Skip to content

Commit f140eea

Browse files
committed
remove NV
1 parent bedbccb commit f140eea

File tree

4 files changed

+25
-36
lines changed

4 files changed

+25
-36
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "Microfloats"
22
uuid = "31c70f10-a750-4521-b13c-797315ae2933"
33
authors = ["Anton Oresten <[email protected]> and contributors"]
4-
version = "0.0.4+1"
4+
version = "0.0.5"
55

66
[deps]
77
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"

README.md

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -23,20 +23,11 @@ const Float8_4 = Microfloat(1, 4, 3)
2323
const Float8_5 = Microfloat(1, 5, 2)
2424
```
2525

26-
### MX format
26+
### Microscaling (MX) Element Types
2727

28-
Microfloats additionally implements the `E4M3`, `E5M2`, `E2M3`, `E3M2`, `E2M1`, and `E8M0` formats from the [Open Compute Project Microscaling Formats (MX) Specification](https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf), with most of these using saturated arithmetic (no infinities), and different bit layouts for NaNs. These can be constructed by passing an additional `:MX` argument to the `Microfloat` constructor:
28+
Microfloats implements the E4M3, E5M2, E2M3, E3M2, E2M1, and E8M0 types from the [Open Compute Project Microscaling Formats (MX) Specification](https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf), with most of these using saturated arithmetic (no infinities), and different bit layouts for NaNs. These are exported as `MX_E4M3`, `MX_E5M2`, `MX_E2M3`, `MX_E3M2`, `MX_E2M1`, and `MX_E8M0`, respectively.
2929

30-
```julia
31-
const E4M3 = Microfloat(1, 4, 3, :MX)
32-
const E5M2 = Microfloat(1, 5, 2, :MX)
33-
const E2M3 = Microfloat(1, 2, 3, :MX)
34-
const E3M2 = Microfloat(1, 3, 2, :MX)
35-
const E2M1 = Microfloat(1, 2, 1, :MX)
36-
const E8M0 = Microfloat(0, 8, 0, :MX)
37-
```
38-
39-
For `INT8`, see `FixedPointNumbers.Q1f6`.
30+
For INT8, see `FixedPointNumbers.Q1f6`.
4031

4132
## Installation
4233

src/Microfloats.jl

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,15 @@ include("float-bits.jl")
44

55
include("Microfloat.jl")
66
export Microfloat
7-
export IEEE
8-
export Float8_E3M4, Float8_E4M3, Float8_E5M2, Float6_E2M3, Float6_E3M2, Float4_E2M1
7+
export Float8_E3M4, Float8_E4M3, Float8_E5M2
8+
export Float6_E2M3, Float6_E3M2
9+
export Float4_E2M1
910

1011
include("microscaled/microscaled.jl")
11-
export MX, NV
12-
export MX_E5M2, MX_E4M3, MX_E3M2, MX_E2M3, MX_E2M1, MX_E8M0, NV_E2M1
12+
export MX_E5M2, MX_E4M3
13+
export MX_E3M2, MX_E2M3
14+
export MX_E2M1
15+
export MX_E8M0
1316

1417
include("conversion/conversion.jl")
1518

src/microscaled/microscaled.jl

Lines changed: 14 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,17 @@
11
abstract type MX <: Variant end
2-
abstract type NV <: Variant end
3-
const Microscaled = Union{MX, NV}
42

5-
const MXMicrofloat{S,E,M} = Microfloat{S,E,M,MX}
6-
const NVMicrofloat{S,E,M} = Microfloat{S,E,M,NV}
7-
const MicroscaledMicrofloat{S,E,M} = Microfloat{S,E,M,<:Microscaled}
3+
const MX_Microfloat{S,E,M} = Microfloat{S,E,M,MX}
84

9-
const MX_E5M2 = MXMicrofloat{1,5,2}
10-
const MX_E4M3 = MXMicrofloat{1,4,3}
11-
const MX_E3M2 = MXMicrofloat{1,3,2}
12-
const MX_E2M3 = MXMicrofloat{1,2,3}
13-
const MX_E2M1 = MXMicrofloat{1,2,1}
14-
const MX_E8M0 = MXMicrofloat{0,8,0}
15-
const NV_E2M1 = NVMicrofloat{1,2,1}
5+
const MX_E5M2 = MX_Microfloat{1,5,2}
6+
const MX_E4M3 = MX_Microfloat{1,4,3}
7+
const MX_E3M2 = MX_Microfloat{1,3,2}
8+
const MX_E2M3 = MX_Microfloat{1,2,3}
9+
const MX_E2M1 = MX_Microfloat{1,2,1}
10+
const MX_E8M0 = MX_Microfloat{0,8,0}
1611

17-
const NO_INF = Union{MX_E4M3, MX_E3M2, MX_E2M3, MX_E2M1, MX_E8M0, NV_E2M1}
18-
const NO_NAN = Union{MX_E3M2, MX_E2M3, MX_E2M1, NV_E2M1}
19-
const NO_NAN_OR_INF = Union{MX_E3M2, MX_E2M3, MX_E2M1, NV_E2M1}
12+
const NO_INF = Union{MX_E4M3, MX_E3M2, MX_E2M3, MX_E2M1, MX_E8M0}
13+
const NO_NAN = Union{MX_E3M2, MX_E2M3, MX_E2M1}
14+
const NO_NAN_OR_INF = Union{MX_E3M2, MX_E2M3, MX_E2M1}
2015

2116
Base.isinf(::NO_INF) = false
2217
Base.isnan(::NO_NAN) = false
@@ -38,7 +33,7 @@ nan(::Type{MX_E8M0}) = reinterpret(MX_E8M0, 0xff)
3833
# Float32 conversion for MX variants:
3934
# - exp=all-ones is "normal" except for the MX NaN sentinel(s)
4035
# - otherwise identical mapping as IEEE
41-
function _float32(x::T) where {T<:MicroscaledMicrofloat}
36+
function _float32(x::T) where {T<:MX_Microfloat}
4237
T isa MX_E8M0 && reinterpret(UInt8, x) == 0xff && return NaN32
4338

4439
sgn = UInt32(right_aligned_sign(x))
@@ -78,7 +73,7 @@ function _float32(x::T) where {T<:MicroscaledMicrofloat}
7873
end
7974

8075
# Saturating to_microfloat tables for MX (no Infs; overflow -> ±floatmax)
81-
function create_base_shifttable(::Type{T}) where {T<:MicroscaledMicrofloat}
76+
function create_base_shifttable(::Type{T}) where {T<:MX_Microfloat}
8277
basetable = Vector{T}(undef, 512)
8378
shifttable = Vector{UInt8}(undef, 512)
8479

@@ -115,5 +110,5 @@ function create_base_shifttable(::Type{T}) where {T<:MicroscaledMicrofloat}
115110
end
116111

117112
# Saturating bounds for MX: use finite extrema
118-
Base.typemax(::Type{T}) where {S,E,M,T<:MicroscaledMicrofloat{S,E,M}} = floatmax(T)
119-
Base.typemin(::Type{T}) where {S,E,M,T<:MicroscaledMicrofloat{S,E,M}} = ifelse(n_sign_bits(T) == 0, zero(T), -floatmax(T))
113+
Base.typemax(::Type{T}) where {S,E,M,T<:MX_Microfloat{S,E,M}} = floatmax(T)
114+
Base.typemin(::Type{T}) where {S,E,M,T<:MX_Microfloat{S,E,M}} = ifelse(n_sign_bits(T) == 0, zero(T), -floatmax(T))

0 commit comments

Comments
 (0)