diff --git a/crates/compiler-builtins-smoke-test/src/lib.rs b/crates/compiler-builtins-smoke-test/src/lib.rs
index e3a51a575..39ad45c7a 100644
--- a/crates/compiler-builtins-smoke-test/src/lib.rs
+++ b/crates/compiler-builtins-smoke-test/src/lib.rs
@@ -7,6 +7,7 @@
 #![no_std]
 
 #[allow(dead_code)]
+#[allow(unused_imports)]
 #[allow(clippy::all)] // We don't get `libm`'s list of `allow`s, so just ignore Clippy.
 #[path = "../../../src/math/mod.rs"]
 pub mod libm;
diff --git a/crates/libm-macros/src/shared.rs b/crates/libm-macros/src/shared.rs
index 16547404f..a195f69ed 100644
--- a/crates/libm-macros/src/shared.rs
+++ b/crates/libm-macros/src/shared.rs
@@ -92,6 +92,13 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         None,
         &["copysignf128"],
     ),
+    (
+        // `(f16, f16, f16) -> f16`
+        FloatTy::F16,
+        Signature { args: &[Ty::F16, Ty::F16, Ty::F16], returns: &[Ty::F16] },
+        None,
+        &["fmaf16"],
+    ),
     (
         // `(f32, f32, f32) -> f32`
         FloatTy::F32,
@@ -106,6 +113,13 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         None,
         &["fma"],
     ),
+    (
+        // `(f128, f128, f128) -> f128`
+        FloatTy::F128,
+        Signature { args: &[Ty::F128, Ty::F128, Ty::F128], returns: &[Ty::F128] },
+        None,
+        &["fmaf128"],
+    ),
     (
         // `(f32) -> i32`
         FloatTy::F32,
diff --git a/crates/libm-test/benches/random.rs b/crates/libm-test/benches/random.rs
index cd1e2d2cc..c9d51cf04 100644
--- a/crates/libm-test/benches/random.rs
+++ b/crates/libm-test/benches/random.rs
@@ -117,7 +117,8 @@ libm_macros::for_each_function! {
         exp10 | exp10f | exp2 | exp2f => (true, Some(musl_math_sys::MACRO_FN_NAME)),
 
         // Musl does not provide `f16` and `f128` functions
-        copysignf16 | copysignf128 | fabsf16 | fabsf128 => (false, None),
+        copysignf16 | copysignf128 | fabsf16 | fabsf128 |
+            fmaf16 | fmaf128  => (false, None),
 
         // By default we never skip (false) and always have a musl function available
         _ => (false, Some(musl_math_sys::MACRO_FN_NAME))
diff --git a/crates/libm-test/src/f8_impl.rs b/crates/libm-test/src/f8_impl.rs
index d378863f2..9a18a4a69 100644
--- a/crates/libm-test/src/f8_impl.rs
+++ b/crates/libm-test/src/f8_impl.rs
@@ -20,7 +20,6 @@ pub struct f8(u8);
 impl Float for f8 {
     type Int = u8;
     type SignedInt = i8;
-    type ExpInt = i8;
 
     const ZERO: Self = Self(0b0_0000_000);
     const NEG_ZERO: Self = Self(0b1_0000_000);
@@ -62,8 +61,8 @@ impl Float for f8 {
         self.0 & Self::SIGN_MASK != 0
     }
 
-    fn exp(self) -> Self::ExpInt {
-        unimplemented!()
+    fn exp(self) -> i32 {
+        ((self.to_bits() & Self::EXP_MASK) >> Self::SIG_BITS) as i32
     }
 
     fn from_bits(a: Self::Int) -> Self {
diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs
index f2b7b2f25..c3a23cc97 100644
--- a/crates/libm-test/src/mpfloat.rs
+++ b/crates/libm-test/src/mpfloat.rs
@@ -144,7 +144,7 @@ libm_macros::for_each_function! {
         expm1 | expm1f => exp_m1,
         fabs | fabsf => abs,
         fdim | fdimf => positive_diff,
-        fma | fmaf => mul_add,
+        fma | fmaf | fmaf16 | fmaf128 => mul_add,
         fmax | fmaxf => max,
         fmin | fminf => min,
         lgamma | lgammaf => ln_gamma,
diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
index f8c3a7b8f..8dad34354 100644
--- a/crates/libm-test/src/precision.rs
+++ b/crates/libm-test/src/precision.rs
@@ -459,7 +459,12 @@ fn bessel_prec_dropoff<F: Float>(
     None
 }
 
+#[cfg(f16_enabled)]
+impl MaybeOverride<(f16, f16, f16)> for SpecialCase {}
 impl MaybeOverride<(f32, f32, f32)> for SpecialCase {}
 impl MaybeOverride<(f64, f64, f64)> for SpecialCase {}
+#[cfg(f128_enabled)]
+impl MaybeOverride<(f128, f128, f128)> for SpecialCase {}
+
 impl MaybeOverride<(f32, i32)> for SpecialCase {}
 impl MaybeOverride<(f64, i32)> for SpecialCase {}
diff --git a/crates/libm-test/tests/compare_built_musl.rs b/crates/libm-test/tests/compare_built_musl.rs
index 3e11d322a..c33d4eda1 100644
--- a/crates/libm-test/tests/compare_built_musl.rs
+++ b/crates/libm-test/tests/compare_built_musl.rs
@@ -47,7 +47,7 @@ where
 libm_macros::for_each_function! {
     callback: musl_rand_tests,
     // Musl does not support `f16` and `f128` on all platforms.
-    skip: [copysignf16, copysignf128, fabsf16, fabsf128],
+    skip: [copysignf16, copysignf128, fabsf16, fabsf128, fmaf16, fmaf128],
     attributes: [
         #[cfg_attr(x86_no_sse, ignore)] // FIXME(correctness): wrong result on i586
         [exp10, exp10f, exp2, exp2f, rint]
diff --git a/crates/libm-test/tests/multiprecision.rs b/crates/libm-test/tests/multiprecision.rs
index 7961b0802..478a9879c 100644
--- a/crates/libm-test/tests/multiprecision.rs
+++ b/crates/libm-test/tests/multiprecision.rs
@@ -126,6 +126,8 @@ libm_macros::for_each_function! {
         fdimf,
         fma,
         fmaf,
+        fmaf16,
+        fmaf128,
         fmax,
         fmaxf,
         fmin,
diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index 0b2d6214f..808f92d89 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -318,16 +318,32 @@
     "fma": {
         "sources": [
             "src/libm_helper.rs",
-            "src/math/fma.rs"
+            "src/math/fma.rs",
+            "src/math/generic/fma.rs"
         ],
         "type": "f64"
     },
     "fmaf": {
         "sources": [
-            "src/math/fmaf.rs"
+            "src/math/fmaf.rs",
+            "src/math/generic/fma.rs"
         ],
         "type": "f32"
     },
+    "fmaf128": {
+        "sources": [
+            "src/math/fmaf128.rs",
+            "src/math/generic/fma.rs"
+        ],
+        "type": "f128"
+    },
+    "fmaf16": {
+        "sources": [
+            "src/math/fmaf16.rs",
+            "src/math/generic/fma.rs"
+        ],
+        "type": "f16"
+    },
     "fmax": {
         "sources": [
             "src/libm_helper.rs",
diff --git a/etc/function-list.txt b/etc/function-list.txt
index 0a1bbab24..2fb2ed9aa 100644
--- a/etc/function-list.txt
+++ b/etc/function-list.txt
@@ -47,6 +47,8 @@ floor
 floorf
 fma
 fmaf
+fmaf128
+fmaf16
 fmax
 fmaxf
 fmin
diff --git a/src/math/fma.rs b/src/math/fma.rs
index 826143d5a..b482c0f9c 100644
--- a/src/math/fma.rs
+++ b/src/math/fma.rs
@@ -42,6 +42,10 @@ fn mul(x: u64, y: u64) -> (u64, u64) {
 /// according to the rounding mode characterized by the value of FLT_ROUNDS.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn fma(x: f64, y: f64, z: f64) -> f64 {
+    if true {
+        return super::generic::fma(x, y, z, scalbn);
+    }
+
     let x1p63: f64 = f64::from_bits(0x43e0000000000000); // 0x1p63 === 2 ^ 63
     let x0_ffffff8p_63 = f64::from_bits(0x3bfffffff0000000); // 0x0.ffffff8p-63
 
diff --git a/src/math/fmaf.rs b/src/math/fmaf.rs
index 79371c836..7c42a1b6d 100644
--- a/src/math/fmaf.rs
+++ b/src/math/fmaf.rs
@@ -47,6 +47,10 @@ use super::fenv::{
 /// according to the rounding mode characterized by the value of FLT_ROUNDS.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn fmaf(x: f32, y: f32, mut z: f32) -> f32 {
+    if true {
+        return super::generic::fma_big::<f32, f64>(x, y, z);
+    }
+
     let xy: f64;
     let mut result: f64;
     let mut ui: u64;
diff --git a/src/math/fmaf128.rs b/src/math/fmaf128.rs
new file mode 100644
index 000000000..6272ef677
--- /dev/null
+++ b/src/math/fmaf128.rs
@@ -0,0 +1,6 @@
+#[expect(unused)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmaf128(x: f128, y: f128, z: f128) -> f128 {
+    // super::generic::fma(x, y, z)
+    todo!()
+}
diff --git a/src/math/fmaf16.rs b/src/math/fmaf16.rs
new file mode 100644
index 000000000..e7adab304
--- /dev/null
+++ b/src/math/fmaf16.rs
@@ -0,0 +1,4 @@
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmaf16(x: f16, y: f16, z: f16) -> f16 {
+    super::generic::fma_big::<f16, f32>(x, y, z)
+}
diff --git a/src/math/generic/fma.rs b/src/math/generic/fma.rs
new file mode 100644
index 000000000..0b0292d78
--- /dev/null
+++ b/src/math/generic/fma.rs
@@ -0,0 +1,261 @@
+#![allow(unused)]
+
+use core::ops::{Shl, Shr};
+
+use super::super::fenv::{
+    FE_INEXACT, FE_TONEAREST, FE_UNDERFLOW, feclearexcept, fegetround, feraiseexcept, fetestexcept,
+};
+use super::super::support::{DInt, HInt, Int};
+use super::super::{CastFrom, CastInto, Float, IntTy, MinInt};
+
+const ZEROINFNAN: i32 = 0x7ff - 0x3ff - 52 - 1;
+
+/// Fused multiply add.
+pub fn fma<F: Float>(x: F, y: F, z: F, scbn: impl FnOnce(F, i32) -> F) -> F
+where
+    F::Int: CastFrom<u32>,
+    F::Int: HInt,
+    F::Int: Shr<i32, Output = F::Int>,
+    F::Int: Shl<i32, Output = F::Int>,
+    F::SignedInt: CastInto<F>,
+    u32: CastInto<F::Int>,
+    bool: CastInto<F::Int>,
+{
+    let one = F::Int::ONE;
+    let zero = F::Int::ZERO;
+
+    let nx = Norm::from_float(x);
+    let ny = Norm::from_float(y);
+    let nz = Norm::from_float(z);
+
+    if nx.e >= ZEROINFNAN || ny.e >= ZEROINFNAN {
+        return x * y + z;
+    }
+    if nz.e >= ZEROINFNAN {
+        if nz.e > ZEROINFNAN {
+            /* z==0 */
+            return x * y + z;
+        }
+        return z;
+    }
+
+    let zhi: F::Int;
+    let zlo: F::Int;
+
+    let (mut rlo, mut rhi) = nx.m.widen_mul(ny.m).lo_hi();
+
+    let mut e: i32 = nx.e + ny.e;
+    let mut d: i32 = nz.e - e;
+
+    let fbits = F::BITS as i32;
+
+    if d > 0 {
+        if d < fbits {
+            zlo = nz.m << d;
+            zhi = nz.m >> (fbits - d);
+        } else {
+            zlo = zero;
+            zhi = nz.m;
+            e = nz.e - fbits;
+            d -= fbits;
+            if d == 0 {
+            } else if d < fbits {
+                rlo = (rhi << (fbits - d)) | (rlo >> d) | ((rlo << (fbits - d)) != zero).cast();
+                rhi = rhi >> d;
+            } else {
+                rlo = one;
+                rhi = zero;
+            }
+        }
+    } else {
+        zhi = zero;
+        d = -d;
+        if d == 0 {
+            zlo = nz.m;
+        } else if d < fbits {
+            zlo = (nz.m >> d) | ((nz.m << (fbits - d)) != zero).cast();
+        } else {
+            zlo = one;
+        }
+    }
+
+    /* add */
+    let mut neg: bool = nx.neg ^ ny.neg;
+    let samesign: bool = neg ^ nz.neg;
+    let mut nonzero: i32 = 1;
+    if samesign {
+        /* r += z */
+        rlo = rlo.wrapping_add(zlo);
+        rhi += zhi + (rlo < zlo).cast();
+    } else {
+        /* r -= z */
+        let (res, borrow) = rlo.overflowing_sub(zlo);
+        rlo = res;
+        rhi = rhi.wrapping_sub(zhi.wrapping_add(borrow.cast()));
+        if (rhi >> (F::BITS - 1)) != zero {
+            rlo = (rlo.signed()).wrapping_neg().unsigned();
+            rhi = (rhi.signed()).wrapping_neg().unsigned() - (rlo != zero).cast();
+            neg = !neg;
+        }
+        nonzero = (rhi != zero) as i32;
+    }
+
+    /* set rhi to top 63bit of the result (last bit is sticky) */
+    if nonzero != 0 {
+        e += fbits;
+        d = rhi.leading_zeros() as i32 - 1;
+        /* note: d > 0 */
+        rhi = (rhi << d) | (rlo >> (fbits - d)) | ((rlo << d) != zero).cast();
+    } else if rlo != zero {
+        d = rlo.leading_zeros() as i32 - 1;
+        if d < 0 {
+            rhi = (rlo >> 1) | (rlo & one);
+        } else {
+            rhi = rlo << d;
+        }
+    } else {
+        /* exact +-0 */
+        return x * y + z;
+    }
+    e -= d;
+
+    /* convert to double */
+    let mut i: F::SignedInt = rhi.signed(); /* i is in [1<<62,(1<<63)-1] */
+    if neg {
+        i = -i;
+    }
+    let mut r: F = i.cast(); /* |r| is in [0x1p62,0x1p63] */
+
+    if e < -1022 - 62 {
+        /* result is subnormal before rounding */
+        if e == -1022 - 63 {
+            let mut c: F = foo::<F>();
+            if neg {
+                c = -c;
+            }
+            if r == c {
+                /* min normal after rounding, underflow depends
+                on arch behaviour which can be imitated by
+                a double to float conversion */
+                // let fltmin: f32 = (x0_ffffff8p_63 * f32::MIN_POSITIVE as f64 * r) as f32;
+                // return f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * fltmin as f64;
+                todo!()
+            }
+            /* one bit is lost when scaled, add another top bit to
+            only round once at conversion if it is inexact */
+            if (rhi << (F::SIG_BITS + 1)) != zero {
+                let tmp: F::Int = (rhi >> 1) | (rhi & one) | (one << (F::BITS - 2));
+                i = tmp.signed();
+                if neg {
+                    i = -i;
+                }
+                r = i.cast();
+                r = (F::ONE + F::ONE) * r - c; /* remove top bit */
+
+                /* raise underflow portably, such that it
+                cannot be optimized away */
+                {
+                    // let tiny: f64 = f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * r;
+                    // r += (tiny * tiny) * (r - r);
+                    todo!()
+                }
+            }
+        } else {
+            /* only round once when scaled */
+            d = 10;
+            i = (((rhi >> d) | ((rhi << (fbits - d)) != zero).cast()) << d).signed();
+            if neg {
+                i = -i;
+            }
+            r = i.cast();
+        }
+    }
+
+    // todo!()
+    //
+    scbn(r, e)
+}
+
+struct Norm<F: Float> {
+    m: F::Int,
+    e: i32,
+    neg: bool,
+}
+
+impl<F: Float> Norm<F> {
+    fn from_float(x: F) -> Self
+    where
+        F::Int: CastFrom<u32>,
+        u32: CastInto<F::Int>,
+    {
+        let mut ix = x.to_bits();
+        let mut e = x.exp();
+        let neg = x.is_sign_negative();
+        if e.is_zero() {
+            ix = (x * foo::<F>()).to_bits();
+            e = x.exp();
+            e = if e != 0 { e - (F::BITS as i32) } else { 0x800 };
+        }
+        ix &= F::SIG_MASK;
+        ix |= F::IMPLICIT_BIT;
+        ix <<= 1;
+        e -= 0x3ff + 52 + 1;
+
+        Self { m: ix, e, neg }
+    }
+}
+
+// 1p63 magic number
+fn foo<F: Float>() -> F
+where
+    u32: CastInto<F::Int>,
+{
+    F::from_parts(false, (F::BITS - 1).cast(), F::Int::ZERO)
+}
+
+/// FMA implementation when there is a larger float type available.
+pub fn fma_big<F, B>(x: F, y: F, z: F) -> F
+where
+    F: Float + CastInto<B>,
+    B: Float + CastInto<F> + CastFrom<F>,
+    B::Int: CastInto<i32>,
+    i32: CastFrom<i32>,
+{
+    let one = IntTy::<B>::ONE;
+
+    let xy: B;
+    let mut result: B;
+    let mut ui: B::Int;
+    let e: i32;
+
+    xy = x.cast() * y.cast();
+    result = xy + z.cast();
+    ui = result.to_bits();
+    e = i32::cast_from(ui >> F::SIG_BITS) & F::EXP_MAX as i32;
+    let zb = B::cast_from(z);
+
+    let prec_diff = B::SIG_BITS - F::SIG_BITS;
+    let excess_prec = ui & ((one << prec_diff) - one);
+    let x = one << (prec_diff - 1);
+
+    // Common case: the larger precision is fine
+    if excess_prec != x
+        || e == i32::cast_from(F::EXP_MAX)
+        || (result - xy == zb && result - zb == xy)
+        || fegetround() != FE_TONEAREST
+    {
+        // TODO: feclearexcept
+
+        return result.cast();
+    }
+
+    let neg = ui & B::SIGN_MASK > IntTy::<B>::ZERO;
+    let err = if neg == (zb > xy) { xy - result + zb } else { zb - result + xy };
+    if neg == (err < B::ZERO) {
+        ui += one;
+    } else {
+        ui -= one;
+    }
+
+    B::from_bits(ui).cast()
+}
diff --git a/src/math/generic/mod.rs b/src/math/generic/mod.rs
index 08524b685..07a73f37b 100644
--- a/src/math/generic/mod.rs
+++ b/src/math/generic/mod.rs
@@ -1,5 +1,7 @@
 mod copysign;
 mod fabs;
+mod fma;
 
 pub use copysign::copysign;
 pub use fabs::fabs;
+pub use fma::{fma, fma_big};
diff --git a/src/math/mod.rs b/src/math/mod.rs
index 5baf35e42..de6bef828 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -121,7 +121,7 @@ use self::rem_pio2::rem_pio2;
 use self::rem_pio2_large::rem_pio2_large;
 use self::rem_pio2f::rem_pio2f;
 #[allow(unused_imports)]
-use self::support::{CastFrom, CastInto, DInt, Float, HInt, Int, MinInt};
+use self::support::{CastFrom, CastInto, DInt, Float, HInt, Int, IntTy, MinInt};
 
 // Public modules
 mod acos;
@@ -343,9 +343,11 @@ cfg_if! {
     if #[cfg(f16_enabled)] {
         mod copysignf16;
         mod fabsf16;
+        mod fmaf16;
 
         pub use self::copysignf16::copysignf16;
         pub use self::fabsf16::fabsf16;
+        pub use self::fmaf16::fmaf16;
     }
 }
 
@@ -353,9 +355,11 @@ cfg_if! {
     if #[cfg(f128_enabled)] {
         mod copysignf128;
         mod fabsf128;
+        mod fmaf128;
 
         pub use self::copysignf128::copysignf128;
         pub use self::fabsf128::fabsf128;
+        pub use self::fmaf128::fmaf128;
     }
 }
 
diff --git a/src/math/support/float_traits.rs b/src/math/support/float_traits.rs
index 3b5be4fa3..01676bd79 100644
--- a/src/math/support/float_traits.rs
+++ b/src/math/support/float_traits.rs
@@ -1,4 +1,5 @@
-use core::{fmt, mem, ops};
+use core::ops::{self, Neg};
+use core::{fmt, mem};
 
 use super::int_traits::{Int, MinInt};
 
@@ -23,10 +24,9 @@ pub trait Float:
     type Int: Int<OtherSign = Self::SignedInt, Unsigned = Self::Int>;
 
     /// A int of the same width as the float
-    type SignedInt: Int + MinInt<OtherSign = Self::Int, Unsigned = Self::Int>;
-
-    /// An int capable of containing the exponent bits plus a sign bit. This is signed.
-    type ExpInt: Int;
+    type SignedInt: Int
+        + MinInt<OtherSign = Self::Int, Unsigned = Self::Int>
+        + Neg<Output = Self::SignedInt>;
 
     const ZERO: Self;
     const NEG_ZERO: Self;
@@ -98,7 +98,7 @@ pub trait Float:
     }
 
     /// Returns the exponent, not adjusting for bias.
-    fn exp(self) -> Self::ExpInt;
+    fn exp(self) -> i32;
 
     /// Returns the significand with no implicit bit (or the "fractional" part)
     fn frac(self) -> Self::Int {
@@ -138,7 +138,6 @@ pub trait Float:
 }
 
 /// Access the associated `Int` type from a float (helper to avoid ambiguous associated types).
-#[allow(dead_code)]
 pub type IntTy<F> = <F as Float>::Int;
 
 macro_rules! float_impl {
@@ -146,7 +145,6 @@ macro_rules! float_impl {
         $ty:ident,
         $ity:ident,
         $sity:ident,
-        $expty:ident,
         $bits:expr,
         $significand_bits:expr,
         $from_bits:path
@@ -154,7 +152,6 @@ macro_rules! float_impl {
         impl Float for $ty {
             type Int = $ity;
             type SignedInt = $sity;
-            type ExpInt = $expty;
 
             const ZERO: Self = 0.0;
             const NEG_ZERO: Self = -0.0;
@@ -191,8 +188,8 @@ macro_rules! float_impl {
             fn is_sign_negative(self) -> bool {
                 self.is_sign_negative()
             }
-            fn exp(self) -> Self::ExpInt {
-                ((self.to_bits() & Self::EXP_MASK) >> Self::SIG_BITS) as Self::ExpInt
+            fn exp(self) -> i32 {
+                ((self.to_bits() & Self::EXP_MASK) >> Self::SIG_BITS) as i32
             }
             fn from_bits(a: Self::Int) -> Self {
                 Self::from_bits(a)
@@ -226,11 +223,11 @@ macro_rules! float_impl {
 }
 
 #[cfg(f16_enabled)]
-float_impl!(f16, u16, i16, i8, 16, 10, f16::from_bits);
-float_impl!(f32, u32, i32, i16, 32, 23, f32_from_bits);
-float_impl!(f64, u64, i64, i16, 64, 52, f64_from_bits);
+float_impl!(f16, u16, i16, 16, 10, f16::from_bits);
+float_impl!(f32, u32, i32, 32, 23, f32_from_bits);
+float_impl!(f64, u64, i64, 64, 52, f64_from_bits);
 #[cfg(f128_enabled)]
-float_impl!(f128, u128, i128, i16, 128, 112, f128::from_bits);
+float_impl!(f128, u128, i128, 128, 112, f128::from_bits);
 
 /* FIXME(msrv): vendor some things that are not const stable at our MSRV */
 
diff --git a/src/math/support/int_traits.rs b/src/math/support/int_traits.rs
index 380313c1e..e50459525 100644
--- a/src/math/support/int_traits.rs
+++ b/src/math/support/int_traits.rs
@@ -82,6 +82,7 @@ pub trait Int:
     fn wrapping_shr(self, other: u32) -> Self;
     fn rotate_left(self, other: u32) -> Self;
     fn overflowing_add(self, other: Self) -> (Self, bool);
+    fn overflowing_sub(self, other: Self) -> (Self, bool);
     fn leading_zeros(self) -> u32;
     fn ilog2(self) -> u32;
 }
@@ -140,6 +141,10 @@ macro_rules! int_impl_common {
             <Self>::overflowing_add(self, other)
         }
 
+        fn overflowing_sub(self, other: Self) -> (Self, bool) {
+            <Self>::overflowing_sub(self, other)
+        }
+
         fn leading_zeros(self) -> u32 {
             <Self>::leading_zeros(self)
         }
@@ -382,3 +387,29 @@ cast_into!(u64);
 cast_into!(i64);
 cast_into!(u128);
 cast_into!(i128);
+
+cast_into!(i64; f32);
+cast_into!(i64; f64);
+cast_into!(f32; f64);
+cast_into!(f64; f32);
+
+cast_into!(bool; u16);
+cast_into!(bool; u32);
+cast_into!(bool; u64);
+cast_into!(bool; u128);
+
+cfg_if! {
+    if #[cfg(f16_enabled)] {
+        cast_into!(f16; f32, f64);
+        cast_into!(f32; f16);
+        cast_into!(f64; f16);
+    }
+}
+
+cfg_if! {
+    if #[cfg(f128_enabled)] {
+        cast_into!(f128; f32, f64);
+        cast_into!(f32; f128);
+        cast_into!(f64; f128);
+    }
+}