fix subnormal conversion; add tests for IEEE formats

AntonOresten · AntonOresten · commit 7e5c14429704 · 2025-08-08T13:23:03.000+02:00
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "Microfloats"
 uuid = "31c70f10-a750-4521-b13c-797315ae2933"
 authors = ["Anton Oresten <antonoresten@gmail.com> and contributors"]
-version = "0.0.1"
+version = "0.0.2"
 
 [compat]
 julia = "1.10"
diff --git a/src/MX/MX.jl b/src/MX/MX.jl
@@ -98,8 +98,10 @@ function create_base_shifttable(::Type{T}) where {T<:MXMicrofloat}
         if e < e_subnormal(T)
             basetable[i|0x000+1] = zero(T)
             basetable[i|0x100+1] = -zero(T)
-            shifttable[i|0x000+1] = n_mantissa_bits(T)+1
-            shifttable[i|0x100+1] = n_mantissa_bits(T)+1
+            # Provide a large shift so rounding logic can raise to the minimal subnormal when appropriate
+            sh = -e + e_shift_subnorm
+            shifttable[i|0x000+1] = sh
+            shifttable[i|0x100+1] = sh
         elseif e < e_normal(T)
             basetable[i|0x000+1] = zero(T)
             basetable[i|0x100+1] = -zero(T)
diff --git a/src/conversion/to_microfloat.jl b/src/conversion/to_microfloat.jl
@@ -17,8 +17,11 @@ function create_base_shifttable(::Type{T}) where {T<:Microfloat}
         if e < e_subnormal(T)                   # Very small numbers map to +- zero
             basetable[i|0x000+1] = zero(T)
             basetable[i|0x100+1] = -zero(T)
-            shifttable[i|0x000+1] = n_mantissa_bits(T)+1
-            shifttable[i|0x100+1] = n_mantissa_bits(T)+1
+            # Use a large shift that depends on how far below the subnormal threshold we are,
+            # so mantissa contribution is zero and rounding behaves correctly (may bump to min subnormal).
+            sh = -e + e_shift_subnorm
+            shifttable[i|0x000+1] = sh
+            shifttable[i|0x100+1] = sh
         elseif e < e_normal(T)                  # Small numbers map to denorms
             basetable[i|0x000+1] = zero(T)
             basetable[i|0x100+1] = -zero(T)
diff --git a/test/IEEE_properties.jl b/test/IEEE_properties.jl
@@ -0,0 +1,95 @@
+using Test
+using Microfloats
+
+const TYPES = [
+    Microfloat(0, 3, 4),
+    Microfloat(0, 4, 3),
+    Microfloat(0, 3, 3),
+    Microfloat(0, 4, 2),
+    Microfloat(0, 5, 1),
+    Microfloat(0, 3, 2),
+    Microfloat(0, 2, 3),
+    Microfloat(0, 2, 2),
+    Microfloat(0, 3, 1),
+    Microfloat(0, 1, 3),
+    Microfloat(0, 2, 1),
+    Microfloat(1, 3, 4),
+    Microfloat(1, 4, 3),
+    Microfloat(1, 3, 3),
+    Microfloat(1, 4, 2),
+    Microfloat(1, 5, 1),
+    Microfloat(1, 3, 2),
+    Microfloat(1, 2, 3),
+    Microfloat(1, 2, 2),
+    Microfloat(1, 3, 1),
+    Microfloat(1, 1, 3),
+    Microfloat(1, 2, 1),
+]
+
+@testset "IEEE microfloats: subnormals and rounding" begin
+    for T in TYPES
+        @testset "$T boundaries" begin
+            bias = Microfloats.bias(T)
+            M = Microfloats.n_mantissa_bits(T)
+            mo = Microfloats.mantissa_offset(T)
+            # Encoding for the minimum positive subnormal (mantissa LSB only)
+            min_sub_u = UInt8(1) << mo
+            min_sub = reinterpret(T, min_sub_u)
+
+            # Real values
+            min_sub_val = Float32(2.0)^(1 - bias - M)
+            half = min_sub_val/2
+            just_below_half = prevfloat(half)
+            just_above_half = nextfloat(half)
+            just_below = prevfloat(min_sub_val)
+            just_above = nextfloat(min_sub_val)
+
+            # Exact min subnormal
+            @test Float32(min_sub) == min_sub_val
+
+            # Values well below half of min subnormal should round to +0
+            @test T(half/4) == zero(T)
+
+            # Exactly half rounds to even -> zero; below half also zero
+            @test T(half) == zero(T)
+            @test T(just_below_half) == zero(T)
+
+            # Values just above half of min subnormal should round to min subnormal
+            @test T(just_above_half) == min_sub
+
+            # Values just below min subnormal remain min subnormal after rounding up from Float32
+            @test T(just_below) == min_sub
+
+            # Values just above min subnormal quantize to min subnormal or the next representable
+            # depending on spacing; at least should be >= min_sub
+            @test Float32(T(just_above)) >= min_sub_val
+        end
+    end
+end
+
+@testset "IEEE microfloats: monotonic Float32 mapping (canonical encodings)" begin
+    for T in TYPES
+        @testset "$T" begin
+            vals = Tuple{UInt8,Float32,Any}[]
+            mshift = Microfloats.mantissa_offset(T)
+            mmask  = UInt8(Microfloats.mantissa_mask(T))
+            for u in UInt8(0):UInt8(0xff)
+                x = reinterpret(T, u)
+                isnan(x) && continue
+                # Only include canonical encodings: mantissa padding bits zero
+                ((u & ~mmask) != (u & ~mmask & ~(UInt8(1)<<mshift - UInt8(1)))) && continue
+                push!(vals, (u, Float32(x), x))
+            end
+            sort!(vals, by = t -> t[2])
+            for i in 1:length(vals)-1
+                a = vals[i]; b = vals[i+1]
+                if a[2] == b[2]
+                    # duplicate comes only from signed zeros
+                    @test iszero(a[3]) && iszero(b[3])
+                else
+                    @test a[2] < b[2]
+                end
+            end
+        end
+    end
+end
diff --git a/test/MX/MX_properties.jl b/test/MX/MX_properties.jl
@@ -5,8 +5,6 @@ const E2M3 = Microfloat(1, 2, 3, :MX)
 const E2M1 = Microfloat(1, 2, 1, :MX)
 const E8M0 = Microfloat(0, 8, 0, :MX)
 
-uint8(x) = reinterpret(UInt8, x)
-
 @testset "MX: no Infs" begin
     for T in (E4M3, E3M2, E2M3, E2M1, E8M0)
         @testset "$T no isinf()" begin
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -3,9 +3,12 @@ using Test
 
 a ≡ b = isnan(a) || isnan(b) ? true : a == b
 
+uint8(x) = reinterpret(UInt8, x)
+
 @testset "Microfloats" begin
 
     include("Float8s/runtests.jl")
+    include("IEEE_properties.jl")
     include("MX/runtests.jl")
 
 end