Skip to content

Commit 7e5c144

Browse files
committed
fix subnormal conversion; add tests for IEEE formats
1 parent cc1e9e2 commit 7e5c144

File tree

6 files changed

+108
-7
lines changed

6 files changed

+108
-7
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "Microfloats"
22
uuid = "31c70f10-a750-4521-b13c-797315ae2933"
33
authors = ["Anton Oresten <[email protected]> and contributors"]
4-
version = "0.0.1"
4+
version = "0.0.2"
55

66
[compat]
77
julia = "1.10"

src/MX/MX.jl

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,10 @@ function create_base_shifttable(::Type{T}) where {T<:MXMicrofloat}
9898
if e < e_subnormal(T)
9999
basetable[i|0x000+1] = zero(T)
100100
basetable[i|0x100+1] = -zero(T)
101-
shifttable[i|0x000+1] = n_mantissa_bits(T)+1
102-
shifttable[i|0x100+1] = n_mantissa_bits(T)+1
101+
# Provide a large shift so rounding logic can raise to the minimal subnormal when appropriate
102+
sh = -e + e_shift_subnorm
103+
shifttable[i|0x000+1] = sh
104+
shifttable[i|0x100+1] = sh
103105
elseif e < e_normal(T)
104106
basetable[i|0x000+1] = zero(T)
105107
basetable[i|0x100+1] = -zero(T)

src/conversion/to_microfloat.jl

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,11 @@ function create_base_shifttable(::Type{T}) where {T<:Microfloat}
1717
if e < e_subnormal(T) # Very small numbers map to +- zero
1818
basetable[i|0x000+1] = zero(T)
1919
basetable[i|0x100+1] = -zero(T)
20-
shifttable[i|0x000+1] = n_mantissa_bits(T)+1
21-
shifttable[i|0x100+1] = n_mantissa_bits(T)+1
20+
# Use a large shift that depends on how far below the subnormal threshold we are,
21+
# so mantissa contribution is zero and rounding behaves correctly (may bump to min subnormal).
22+
sh = -e + e_shift_subnorm
23+
shifttable[i|0x000+1] = sh
24+
shifttable[i|0x100+1] = sh
2225
elseif e < e_normal(T) # Small numbers map to denorms
2326
basetable[i|0x000+1] = zero(T)
2427
basetable[i|0x100+1] = -zero(T)

test/IEEE_properties.jl

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
using Test
2+
using Microfloats
3+
4+
const TYPES = [
5+
Microfloat(0, 3, 4),
6+
Microfloat(0, 4, 3),
7+
Microfloat(0, 3, 3),
8+
Microfloat(0, 4, 2),
9+
Microfloat(0, 5, 1),
10+
Microfloat(0, 3, 2),
11+
Microfloat(0, 2, 3),
12+
Microfloat(0, 2, 2),
13+
Microfloat(0, 3, 1),
14+
Microfloat(0, 1, 3),
15+
Microfloat(0, 2, 1),
16+
Microfloat(1, 3, 4),
17+
Microfloat(1, 4, 3),
18+
Microfloat(1, 3, 3),
19+
Microfloat(1, 4, 2),
20+
Microfloat(1, 5, 1),
21+
Microfloat(1, 3, 2),
22+
Microfloat(1, 2, 3),
23+
Microfloat(1, 2, 2),
24+
Microfloat(1, 3, 1),
25+
Microfloat(1, 1, 3),
26+
Microfloat(1, 2, 1),
27+
]
28+
29+
@testset "IEEE microfloats: subnormals and rounding" begin
30+
for T in TYPES
31+
@testset "$T boundaries" begin
32+
bias = Microfloats.bias(T)
33+
M = Microfloats.n_mantissa_bits(T)
34+
mo = Microfloats.mantissa_offset(T)
35+
# Encoding for the minimum positive subnormal (mantissa LSB only)
36+
min_sub_u = UInt8(1) << mo
37+
min_sub = reinterpret(T, min_sub_u)
38+
39+
# Real values
40+
min_sub_val = Float32(2.0)^(1 - bias - M)
41+
half = min_sub_val/2
42+
just_below_half = prevfloat(half)
43+
just_above_half = nextfloat(half)
44+
just_below = prevfloat(min_sub_val)
45+
just_above = nextfloat(min_sub_val)
46+
47+
# Exact min subnormal
48+
@test Float32(min_sub) == min_sub_val
49+
50+
# Values well below half of min subnormal should round to +0
51+
@test T(half/4) == zero(T)
52+
53+
# Exactly half rounds to even -> zero; below half also zero
54+
@test T(half) == zero(T)
55+
@test T(just_below_half) == zero(T)
56+
57+
# Values just above half of min subnormal should round to min subnormal
58+
@test T(just_above_half) == min_sub
59+
60+
# Values just below min subnormal remain min subnormal after rounding up from Float32
61+
@test T(just_below) == min_sub
62+
63+
# Values just above min subnormal quantize to min subnormal or the next representable
64+
# depending on spacing; at least should be >= min_sub
65+
@test Float32(T(just_above)) >= min_sub_val
66+
end
67+
end
68+
end
69+
70+
@testset "IEEE microfloats: monotonic Float32 mapping (canonical encodings)" begin
71+
for T in TYPES
72+
@testset "$T" begin
73+
vals = Tuple{UInt8,Float32,Any}[]
74+
mshift = Microfloats.mantissa_offset(T)
75+
mmask = UInt8(Microfloats.mantissa_mask(T))
76+
for u in UInt8(0):UInt8(0xff)
77+
x = reinterpret(T, u)
78+
isnan(x) && continue
79+
# Only include canonical encodings: mantissa padding bits zero
80+
((u & ~mmask) != (u & ~mmask & ~(UInt8(1)<<mshift - UInt8(1)))) && continue
81+
push!(vals, (u, Float32(x), x))
82+
end
83+
sort!(vals, by = t -> t[2])
84+
for i in 1:length(vals)-1
85+
a = vals[i]; b = vals[i+1]
86+
if a[2] == b[2]
87+
# duplicate comes only from signed zeros
88+
@test iszero(a[3]) && iszero(b[3])
89+
else
90+
@test a[2] < b[2]
91+
end
92+
end
93+
end
94+
end
95+
end

test/MX/MX_properties.jl

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@ const E2M3 = Microfloat(1, 2, 3, :MX)
55
const E2M1 = Microfloat(1, 2, 1, :MX)
66
const E8M0 = Microfloat(0, 8, 0, :MX)
77

8-
uint8(x) = reinterpret(UInt8, x)
9-
108
@testset "MX: no Infs" begin
119
for T in (E4M3, E3M2, E2M3, E2M1, E8M0)
1210
@testset "$T no isinf()" begin

test/runtests.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,12 @@ using Test
33

44
a b = isnan(a) || isnan(b) ? true : a == b
55

6+
uint8(x) = reinterpret(UInt8, x)
7+
68
@testset "Microfloats" begin
79

810
include("Float8s/runtests.jl")
11+
include("IEEE_properties.jl")
912
include("MX/runtests.jl")
1013

1114
end

0 commit comments

Comments
 (0)