Merge pull request #13 from MurrellGroup/main

AntonOresten · web-flow · commit e35547924172 · 2025-09-18T14:58:41.000+02:00
Update v0.1.0 branch
diff --git a/README.md b/README.md
@@ -5,7 +5,7 @@
 [![Build Status](https://github.com/MurrellGroup/Microfloats.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/MurrellGroup/Microfloats.jl/actions/workflows/CI.yml?query=branch%3Amain)
 [![Coverage](https://codecov.io/gh/MurrellGroup/Microfloats.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/MurrellGroup/Microfloats.jl)
 
-Microfloats is a Julia package that implements floating point types and arithmetic for sub-8 bit floating point types, supporting arbitrary combinations of sign, exponent, and mantissa bits.
+Microfloats is a Julia package that implements floating point types and arithmetic (through wider intermediates) for sub-8 bit floating point types, supporting arbitrary combinations of sign, exponent, and mantissa (significand) bits.
 
 Instances of a sub-8 bit floating point type are still 8 bits wide in memory; the goal of `Microfloat` is to serve as a base for arithmetic operations and method dispatch, lending downstream packages a good abstraction for doing bitpacking and hardware acceleration.
 
@@ -30,12 +30,12 @@ const UFloat7_5 = Microfloat{0,5,2,IEEE_754_like}
 
 ### Microscaling (MX)
 
-Microfloats implements the E4M3, E5M2, E2M3, E3M2, E2M1, and E8M0 types from the [Open Compute Project Microscaling Formats (MX) Specification](https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf), with most of these using saturated arithmetic (no infinities), and different bit layouts for NaNs. These are exported as `MX_E4M3`, `MX_E5M2`, `MX_E2M3`, `MX_E3M2`, `MX_E2M1`, and `MX_E8M0`, respectively.
+Microfloats implements the E4M3, E5M2, E2M3, E3M2, E2M1, and E8M0 types from the [Open Compute Project Microscaling Formats (MX) Specification](https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf). These are exported as `MX_E4M3`, `MX_E5M2`, `MX_E2M3`, `MX_E3M2`, `MX_E2M1`, and `MX_E8M0`, respectively, with most of these using saturated arithmetic (no Inf or NaN), and a different encoding for the types that do have NaNs.
 
 For INT8, see `FixedPointNumbers.Q1f6`.
 
-> [!WARNING]
-> MX types may not yet be fully OCP compliant. See issues with the [![MX-compliance](https://img.shields.io/github/labels/MurrellGroup/Microfloats.jl/mx-compliance)](https://github.com/MurrellGroup/Microfloats.jl/labels/mx-compliance) label.
+> [!NOTE]
+> MX types may not be fully MX compliant, but efforts have been and continue to be made to adhere to the specification. See issues with the [![MX-compliance](https://img.shields.io/github/labels/MurrellGroup/Microfloats.jl/mx-compliance)](https://github.com/MurrellGroup/Microfloats.jl/labels/mx-compliance) label.
 
 Since Microfloats.jl only implements the primitive types, microscaling itself may be done with [Microscaling.jl](https://github.com/MurrellGroup/Microscaling.jl), which includes quantization and bitpacking.
 
diff --git a/docs/src/assets/icon.svg b/docs/src/assets/icon.svg
@@ -1,36 +1,37 @@
 <svg
   xmlns="http://www.w3.org/2000/svg"
-  viewBox="0 0 400 240"
+  viewBox="0 0 400 200"
   width="400"
-  height="240"
+  height="200"
   aria-labelledby="title desc"
   role="img"
 >
+
+  <!-- Julia palette -->
+  <!-- Blue  : #4063D8  | Green : #389826  | Purple : #9558B2  | Red : #CB3C33 -->
+
   <title id="title">Microfloats.jl mark</title>
   <desc id="desc">
     Two base circles (red left, purple right), a symmetric blue sine wave above them,
-    and a small green circle floating above the wave’s center crest.
+    and a small (micro) green circle floating above the wave’s center crest.
   </desc>
 
 
   <!-- Bottom circles (adjust cx/cy/r for proportions) -->
-  <circle cx="120" cy="170" r="60" fill="#CB3C33" />  <!-- red, bottom-left -->
-  <circle cx="280" cy="170" r="60" fill="#9558B2" />  <!-- purple, bottom-right -->
+  <circle cx="99" cy="116" r="42" fill="#CB3C33" />  <!-- red, bottom-left -->
+  <circle cx="301" cy="116" r="42" fill="#9558B2" />  <!-- purple, bottom-right -->
 
-  <!-- Julia palette -->
-  <!-- Blue  : #4063D8  | Green : #389826  | Purple : #9558B2  | Red : #CB3C33 -->
+  <!-- Blue sine wave -->
   <path
-    d="M 50 40 Q 75 68 100 68 Q 125 68 150 40 Q 200 -16 250 40 Q 275 68 300 68 Q 325 68 350 40"
+    d="M 50 -44 Q 75 -66 100 -66 Q 125 -66 150 -40 Q 200 16 250 -40 Q 275 -66 300 -66 Q 325 -66 350 -44"
     fill="none"
     stroke="#4063d8"
-    stroke-width="64"
+    stroke-width="24"
     stroke-linecap="round"
     stroke-linejoin="round"
-    transform="translate(0, 60)"
+    transform="translate(0, 120)"
   />
 
-  <!-- Micro-floating green circle
-       Placed above the wave’s center crest (x=200, crest y≈86)
-       Adjust r and cy to sit tighter/looser to the wave. -->
-  <circle cx="200" cy="36" r="30" fill="#389826" />
+  <!-- Micro-floating green circle -->
+  <circle cx="200" cy="72" r="18" fill="#389826" />
 </svg>
diff --git a/docs/src/conversion.md b/docs/src/conversion.md
@@ -4,7 +4,7 @@
 ## BFloat16
 
 Conversion to and from `Microfloat` uses `BFloat16` as an intermediate type,
-since BFloat16 has 1 sign bit, 8 exponent bits, and 7 significand bits,
+since BFloat16 has 1 sign bit, 8 exponent bits, and 7 significand (mantissa) bits,
 and is therefore able to represent all `Microfloat` types.
 
 ## Rounding
diff --git a/src/Microfloats.jl b/src/Microfloats.jl
@@ -41,7 +41,7 @@ include("random.jl")
     Microfloat{S,E,M,V}
 
 A `Microfloat` type has `S` sign bits (between 0 and 1),
-`E` exponent bits (between 1 and 8), and `M` significand bits (between 0 and 7).
+`E` exponent bits (between 1 and 8), and `M` mantissa bits (between 0 and 7).
 """
 Microfloat
 
@@ -70,8 +70,8 @@ for T in (
         - Has NaN: `$(hasnan($T))`
         - Max normal: `$(Float64(floatmax($T)))`
         - Min normal: `$(Float64(floatmin($T)))`
-        - Max subnormal: `$(Float64(prevfloat(floatmin($T))))`
-        - Min subnormal: `$(Float64(nextfloat(zero($T))))`
+        - Max subnormal: `$(significand_bits($T) > 0 ? Float64(prevfloat(floatmin($T))) : "N/A")`
+        - Min subnormal: `$(significand_bits($T) > 0 ? Float64(nextfloat(zero($T))) : "N/A")`
         """
         $T
     end
diff --git a/src/conversion.jl b/src/conversion.jl
@@ -3,11 +3,12 @@ abstract type SAT <: OverflowPolicy end
 abstract type OVF <: OverflowPolicy end
 
 function rshift_round_to_even(x::UInt16, n::Int)
-    n <= 0 && return x << (-n)
-    lower = x & ((UInt16(1) << n) - UInt16(1))
-    half = UInt16(1) << (n - 1)
-    up = (lower > half) | ((lower == half) & (((x >> n) & UInt16(1)) == UInt16(1)))
-    (x >> n) + (up ? UInt16(1) : UInt16(0))
+    n <= 0 && return x >> n
+    x_32 = UInt32(x)
+    lower = x_32 & ((UInt32(1) << n) - UInt32(1))
+    half = UInt32(1) << (n - 1)
+    up = (lower > half) | ((lower == half) & (((x_32 >> n) & UInt32(1)) == UInt32(1)))
+    UInt16((x_32 >> n) + (up ? 1 : 0))
 end
 
 is_outside_floatmax(xb::BFloat16, ::Type{T}) where T<:Microfloat = reinterpret(Unsigned, abs(xb)) > reinterpret(Unsigned, BFloat16(floatmax(T)))