JuliaMath · timholy · Aug 14, 2017 · Aug 15, 2017
diff --git a/src/DoubleDouble.jl b/src/DoubleDouble.jl
@@ -31,19 +31,16 @@ end
 Double{T<:AbstractFloat}(u::T, v::T) = Double{T}(u, v)
 Double{T<:AbstractFloat}(x::T) = Double(x, zero(T))
 
-
-const half64 = 1.34217729e8
-const half32 = 4097f0
-const half16 = Float16(33.0)
-const halfBig = 3.402823669209384634633746074317682114570000000000000000000000000000000000000000e+38
-# 6.805647338418769269267492148635364229120000000000000000000000000000000000000000e38
-
-# round floats to half-precision
-# TODO: fix overflow for large values
-halfprec(x::Float64) = (p = x*half64; (x-p)+p) # signif(x,26,2) for 26 is 6.7108865e7, this seems like 27
-halfprec(x::Float32) = (p = x*half32; (x-p)+p) # float32(signif(x,12,2))
-halfprec(x::Float16) = (p = x*half16; (x-p)+p) # float16(signif(x,5,2))
-halfprec(x::BigFloat) = (p = x*halfBig; (x-p)+p) # BigFloat(signif(x,128,2))
+# use Veltkamp splitting to remove trailing digits
+# see "Handbook of Floating-point Arithmetic", §4.4.1
+# TODO:
+#  - fix overflow for large values
+#  - use fma when available
+function halfprec{T<:AbstractFloat}(x::T)
+    c = T((1 << cld(precision(T),2)) + 1)
+    p = x*c
+    (x-p)+p
+end
 
 function splitprec(x::AbstractFloat)
     h = halfprec(x)
@@ -139,10 +136,16 @@ end
 
 # Dekker mul12
 function *{T}(x::Single{T},y::Single{T})
-    hx,lx = splitprec(x.hi)
-    hy,ly = splitprec(y.hi)
-    z = x.hi*y.hi
-    Double(z, ((hx*hy-z) + hx*ly + lx*hy) + lx*ly)
+    xs, xe = frexp(x.hi)
+    ys, ye = frexp(y.hi)
+    z = xs*ys
+    zexp = ldexp(z, xe+ye)
+    if (iszero(zexp) | !isfinite(zexp))
+        return Double(zexp, zexp)
+    end
+    hx,lx = splitprec(xs)
+    hy,ly = splitprec(ys)
+    Double(zexp, ldexp(((hx*hy-z) + hx*ly + lx*hy) + lx*ly, xe+ye))
 end
 
 # Dekker mul2

diff --git a/test/extratests.jl b/test/extratests.jl
@@ -0,0 +1,81 @@
+using DoubleDouble, ProgressMeter, Base.Test
+
+# Compare precision in a manner sensitive to subnormals, which lose
+# precision compared to widening.
+function cmp_sn(w, hi, lo, slopbits=0)
+    if !isfinite(hi)
+        if abs(w) > realmax(typeof(hi))
+            return isinf(hi) && sign(w) == sign(hi)
+        end
+        if isnan(w) && isnan(hi)
+            return true
+        end
+        return w == hi
+    end
+    if abs(w) < subnormalmin(typeof(hi))
+        return (hi == zero(hi) || abs(w - widen(hi)) < abs(w)) && lo == zero(hi)
+    end
+    # Compare w == hi + lo unless `lo` issubnormal
+    z = widen(hi) + widen(lo)
+    if !issubnormal(lo) && lo != 0
+        if slopbits == 0
+            return z == w
+        end
+        wr, zr = roundshift(w, slopbits), roundshift(z, slopbits)
+        return max(wr-1, zero(wr)) <= zr <= wr+1
+    end
+    # round w to the same number of bits as z
+    zu = asbits(z)
+    wu = asbits(w)
+    lastbit = false
+    while zu > 0 && !isodd(zu)
+        lastbit = isodd(wu)
+        zu = zu >> 1
+        wu = wu >> 1
+    end
+    return wu <= zu <= wu + lastbit
+end
+
+asbits(x) = reinterpret(Base.fpinttype(typeof(x)), x)
+
+function roundshift(x, n)
+    xu = asbits(x)
+    lastbit = false
+    for i = 1:n
+        lastbit = isodd(xu)
+        xu = xu >> 1
+    end
+    xu + lastbit
+end
+
+subnormalmin(::Type{T}) where T = reinterpret(T, Base.fpinttype(T)(1))
+
+function Base.issubnormal(x::Float16)
+    y = reinterpret(UInt16, x)
+    (y & Base.exponent_mask(Float16) == 0) & (y & Base.significand_mask(Float16) != 0)
+end
+
+Base.iszero(x::Float16) = reinterpret(UInt16, x) & ~Base.sign_mask(Float16) == 0x0000
+
+# Troublesome values
+x = Float16(6.45e4)
+y = Float16(2112.0)
+d = Single(x) * Single(y)
+@test cmp_sn(widen(x)*widen(y), d.hi, d.lo)
+
+function pair16()
+    @showprogress 1 "testing" for yu in 0x0000:0xffff
+        for xu in 0x0000:0xffff
+            x, y = reinterpret(Float16, xu), reinterpret(Float16, yu)
+            try
+                d = Single(x) * Single(y)
+                @test cmp_sn(widen(x)*widen(y), d.hi, d.lo)
+            catch
+                @show x y
+                rethrow()
+            end
+        end
+    end
+end
+
+pair16()
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -64,3 +64,9 @@ a = Double(big"3.1")
 @test Double{Float32}(3) === Double{Float32}(3.0f0, 0.0f0)
 @test Single{Float32}(BigFloat(3)) === Single{Float32}(3.0f0)
 @test Double{Float32}(BigFloat(3)) === Double{Float32}(3.0f0, 0.0f0)
+
+
+# issue #30
+x, y = Single(Float16(0.9775)), Single(Float16(0.5156))
+d = x*y
+@test widen(d.hi) + widen(d.lo) == widen(x.hi) * widen(y.hi)