Skip to content
This repository was archived by the owner on Dec 10, 2023. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 20 additions & 17 deletions src/DoubleDouble.jl
Original file line number Diff line number Diff line change
Expand Up @@ -31,19 +31,16 @@ end
Double{T<:AbstractFloat}(u::T, v::T) = Double{T}(u, v)
Double{T<:AbstractFloat}(x::T) = Double(x, zero(T))


const half64 = 1.34217729e8
const half32 = 4097f0
const half16 = Float16(33.0)
const halfBig = 3.402823669209384634633746074317682114570000000000000000000000000000000000000000e+38
# 6.805647338418769269267492148635364229120000000000000000000000000000000000000000e38

# round floats to half-precision
# TODO: fix overflow for large values
halfprec(x::Float64) = (p = x*half64; (x-p)+p) # signif(x,26,2) for 26 is 6.7108865e7, this seems like 27
halfprec(x::Float32) = (p = x*half32; (x-p)+p) # float32(signif(x,12,2))
halfprec(x::Float16) = (p = x*half16; (x-p)+p) # float16(signif(x,5,2))
halfprec(x::BigFloat) = (p = x*halfBig; (x-p)+p) # BigFloat(signif(x,128,2))
# use Veltkamp splitting to remove trailing digits
# see "Handbook of Floating-point Arithmetic", §4.4.1
# TODO:
# - fix overflow for large values
# - use fma when available
function halfprec{T<:AbstractFloat}(x::T)
c = T((1 << cld(precision(T),2)) + 1)
p = x*c
(x-p)+p
end

function splitprec(x::AbstractFloat)
h = halfprec(x)
Expand Down Expand Up @@ -139,10 +136,16 @@ end

# Dekker mul12
function *{T}(x::Single{T},y::Single{T})
hx,lx = splitprec(x.hi)
hy,ly = splitprec(y.hi)
z = x.hi*y.hi
Double(z, ((hx*hy-z) + hx*ly + lx*hy) + lx*ly)
xs, xe = frexp(x.hi)
ys, ye = frexp(y.hi)
z = xs*ys
zexp = ldexp(z, xe+ye)
if (iszero(zexp) | !isfinite(zexp))
return Double(zexp, zexp)
end
hx,lx = splitprec(xs)
hy,ly = splitprec(ys)
Double(zexp, ldexp(((hx*hy-z) + hx*ly + lx*hy) + lx*ly, xe+ye))
end

# Dekker mul2
Expand Down
81 changes: 81 additions & 0 deletions test/extratests.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
using DoubleDouble, ProgressMeter, Base.Test

# Compare precision in a manner sensitive to subnormals, which lose
# precision compared to widening.
function cmp_sn(w, hi, lo, slopbits=0)
if !isfinite(hi)
if abs(w) > realmax(typeof(hi))
return isinf(hi) && sign(w) == sign(hi)
end
if isnan(w) && isnan(hi)
return true
end
return w == hi
end
if abs(w) < subnormalmin(typeof(hi))
return (hi == zero(hi) || abs(w - widen(hi)) < abs(w)) && lo == zero(hi)
end
# Compare w == hi + lo unless `lo` issubnormal
z = widen(hi) + widen(lo)
if !issubnormal(lo) && lo != 0
if slopbits == 0
return z == w
end
wr, zr = roundshift(w, slopbits), roundshift(z, slopbits)
return max(wr-1, zero(wr)) <= zr <= wr+1
end
# round w to the same number of bits as z
zu = asbits(z)
wu = asbits(w)
lastbit = false
while zu > 0 && !isodd(zu)
lastbit = isodd(wu)
zu = zu >> 1
wu = wu >> 1
end
return wu <= zu <= wu + lastbit
end

asbits(x) = reinterpret(Base.fpinttype(typeof(x)), x)

function roundshift(x, n)
xu = asbits(x)
lastbit = false
for i = 1:n
lastbit = isodd(xu)
xu = xu >> 1
end
xu + lastbit
end

subnormalmin(::Type{T}) where T = reinterpret(T, Base.fpinttype(T)(1))

function Base.issubnormal(x::Float16)
y = reinterpret(UInt16, x)
(y & Base.exponent_mask(Float16) == 0) & (y & Base.significand_mask(Float16) != 0)
end

Base.iszero(x::Float16) = reinterpret(UInt16, x) & ~Base.sign_mask(Float16) == 0x0000

# Troublesome values
x = Float16(6.45e4)
y = Float16(2112.0)
d = Single(x) * Single(y)
@test cmp_sn(widen(x)*widen(y), d.hi, d.lo)

function pair16()
@showprogress 1 "testing" for yu in 0x0000:0xffff
for xu in 0x0000:0xffff
x, y = reinterpret(Float16, xu), reinterpret(Float16, yu)
try
d = Single(x) * Single(y)
@test cmp_sn(widen(x)*widen(y), d.hi, d.lo)
catch
@show x y
rethrow()
end
end
end
end

pair16()
6 changes: 6 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,9 @@ a = Double(big"3.1")
@test Double{Float32}(3) === Double{Float32}(3.0f0, 0.0f0)
@test Single{Float32}(BigFloat(3)) === Single{Float32}(3.0f0)
@test Double{Float32}(BigFloat(3)) === Double{Float32}(3.0f0, 0.0f0)


# issue #30
x, y = Single(Float16(0.9775)), Single(Float16(0.5156))
d = x*y
@test widen(d.hi) + widen(d.lo) == widen(x.hi) * widen(y.hi)