diff --git a/crates/compiler-builtins-smoke-test/src/lib.rs b/crates/compiler-builtins-smoke-test/src/lib.rs index e3a51a575..39ad45c7a 100644 --- a/crates/compiler-builtins-smoke-test/src/lib.rs +++ b/crates/compiler-builtins-smoke-test/src/lib.rs @@ -7,6 +7,7 @@ #![no_std] #[allow(dead_code)] +#[allow(unused_imports)] #[allow(clippy::all)] // We don't get `libm`'s list of `allow`s, so just ignore Clippy. #[path = "../../../src/math/mod.rs"] pub mod libm; diff --git a/crates/libm-macros/src/shared.rs b/crates/libm-macros/src/shared.rs index 16547404f..a195f69ed 100644 --- a/crates/libm-macros/src/shared.rs +++ b/crates/libm-macros/src/shared.rs @@ -92,6 +92,13 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] None, &["copysignf128"], ), + ( + // `(f16, f16, f16) -> f16` + FloatTy::F16, + Signature { args: &[Ty::F16, Ty::F16, Ty::F16], returns: &[Ty::F16] }, + None, + &["fmaf16"], + ), ( // `(f32, f32, f32) -> f32` FloatTy::F32, @@ -106,6 +113,13 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] None, &["fma"], ), + ( + // `(f128, f128, f128) -> f128` + FloatTy::F128, + Signature { args: &[Ty::F128, Ty::F128, Ty::F128], returns: &[Ty::F128] }, + None, + &["fmaf128"], + ), ( // `(f32) -> i32` FloatTy::F32, diff --git a/crates/libm-test/benches/random.rs b/crates/libm-test/benches/random.rs index cd1e2d2cc..c9d51cf04 100644 --- a/crates/libm-test/benches/random.rs +++ b/crates/libm-test/benches/random.rs @@ -117,7 +117,8 @@ libm_macros::for_each_function! { exp10 | exp10f | exp2 | exp2f => (true, Some(musl_math_sys::MACRO_FN_NAME)), // Musl does not provide `f16` and `f128` functions - copysignf16 | copysignf128 | fabsf16 | fabsf128 => (false, None), + copysignf16 | copysignf128 | fabsf16 | fabsf128 | + fmaf16 | fmaf128 => (false, None), // By default we never skip (false) and always have a musl function available _ => (false, Some(musl_math_sys::MACRO_FN_NAME)) diff --git a/crates/libm-test/src/f8_impl.rs b/crates/libm-test/src/f8_impl.rs index d378863f2..9a18a4a69 100644 --- a/crates/libm-test/src/f8_impl.rs +++ b/crates/libm-test/src/f8_impl.rs @@ -20,7 +20,6 @@ pub struct f8(u8); impl Float for f8 { type Int = u8; type SignedInt = i8; - type ExpInt = i8; const ZERO: Self = Self(0b0_0000_000); const NEG_ZERO: Self = Self(0b1_0000_000); @@ -62,8 +61,8 @@ impl Float for f8 { self.0 & Self::SIGN_MASK != 0 } - fn exp(self) -> Self::ExpInt { - unimplemented!() + fn exp(self) -> i32 { + ((self.to_bits() & Self::EXP_MASK) >> Self::SIG_BITS) as i32 } fn from_bits(a: Self::Int) -> Self { diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs index f2b7b2f25..c3a23cc97 100644 --- a/crates/libm-test/src/mpfloat.rs +++ b/crates/libm-test/src/mpfloat.rs @@ -144,7 +144,7 @@ libm_macros::for_each_function! { expm1 | expm1f => exp_m1, fabs | fabsf => abs, fdim | fdimf => positive_diff, - fma | fmaf => mul_add, + fma | fmaf | fmaf16 | fmaf128 => mul_add, fmax | fmaxf => max, fmin | fminf => min, lgamma | lgammaf => ln_gamma, diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs index f8c3a7b8f..8dad34354 100644 --- a/crates/libm-test/src/precision.rs +++ b/crates/libm-test/src/precision.rs @@ -459,7 +459,12 @@ fn bessel_prec_dropoff( None } +#[cfg(f16_enabled)] +impl MaybeOverride<(f16, f16, f16)> for SpecialCase {} impl MaybeOverride<(f32, f32, f32)> for SpecialCase {} impl MaybeOverride<(f64, f64, f64)> for SpecialCase {} +#[cfg(f128_enabled)] +impl MaybeOverride<(f128, f128, f128)> for SpecialCase {} + impl MaybeOverride<(f32, i32)> for SpecialCase {} impl MaybeOverride<(f64, i32)> for SpecialCase {} diff --git a/crates/libm-test/tests/compare_built_musl.rs b/crates/libm-test/tests/compare_built_musl.rs index 3e11d322a..c33d4eda1 100644 --- a/crates/libm-test/tests/compare_built_musl.rs +++ b/crates/libm-test/tests/compare_built_musl.rs @@ -47,7 +47,7 @@ where libm_macros::for_each_function! { callback: musl_rand_tests, // Musl does not support `f16` and `f128` on all platforms. - skip: [copysignf16, copysignf128, fabsf16, fabsf128], + skip: [copysignf16, copysignf128, fabsf16, fabsf128, fmaf16, fmaf128], attributes: [ #[cfg_attr(x86_no_sse, ignore)] // FIXME(correctness): wrong result on i586 [exp10, exp10f, exp2, exp2f, rint] diff --git a/crates/libm-test/tests/multiprecision.rs b/crates/libm-test/tests/multiprecision.rs index 7961b0802..478a9879c 100644 --- a/crates/libm-test/tests/multiprecision.rs +++ b/crates/libm-test/tests/multiprecision.rs @@ -126,6 +126,8 @@ libm_macros::for_each_function! { fdimf, fma, fmaf, + fmaf16, + fmaf128, fmax, fmaxf, fmin, diff --git a/etc/function-definitions.json b/etc/function-definitions.json index 0b2d6214f..808f92d89 100644 --- a/etc/function-definitions.json +++ b/etc/function-definitions.json @@ -318,16 +318,32 @@ "fma": { "sources": [ "src/libm_helper.rs", - "src/math/fma.rs" + "src/math/fma.rs", + "src/math/generic/fma.rs" ], "type": "f64" }, "fmaf": { "sources": [ - "src/math/fmaf.rs" + "src/math/fmaf.rs", + "src/math/generic/fma.rs" ], "type": "f32" }, + "fmaf128": { + "sources": [ + "src/math/fmaf128.rs", + "src/math/generic/fma.rs" + ], + "type": "f128" + }, + "fmaf16": { + "sources": [ + "src/math/fmaf16.rs", + "src/math/generic/fma.rs" + ], + "type": "f16" + }, "fmax": { "sources": [ "src/libm_helper.rs", diff --git a/etc/function-list.txt b/etc/function-list.txt index 0a1bbab24..2fb2ed9aa 100644 --- a/etc/function-list.txt +++ b/etc/function-list.txt @@ -47,6 +47,8 @@ floor floorf fma fmaf +fmaf128 +fmaf16 fmax fmaxf fmin diff --git a/src/math/fma.rs b/src/math/fma.rs index 826143d5a..b482c0f9c 100644 --- a/src/math/fma.rs +++ b/src/math/fma.rs @@ -42,6 +42,10 @@ fn mul(x: u64, y: u64) -> (u64, u64) { /// according to the rounding mode characterized by the value of FLT_ROUNDS. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fma(x: f64, y: f64, z: f64) -> f64 { + if true { + return super::generic::fma(x, y, z, scalbn); + } + let x1p63: f64 = f64::from_bits(0x43e0000000000000); // 0x1p63 === 2 ^ 63 let x0_ffffff8p_63 = f64::from_bits(0x3bfffffff0000000); // 0x0.ffffff8p-63 diff --git a/src/math/fmaf.rs b/src/math/fmaf.rs index 79371c836..7c42a1b6d 100644 --- a/src/math/fmaf.rs +++ b/src/math/fmaf.rs @@ -47,6 +47,10 @@ use super::fenv::{ /// according to the rounding mode characterized by the value of FLT_ROUNDS. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fmaf(x: f32, y: f32, mut z: f32) -> f32 { + if true { + return super::generic::fma_big::(x, y, z); + } + let xy: f64; let mut result: f64; let mut ui: u64; diff --git a/src/math/fmaf128.rs b/src/math/fmaf128.rs new file mode 100644 index 000000000..6272ef677 --- /dev/null +++ b/src/math/fmaf128.rs @@ -0,0 +1,6 @@ +#[expect(unused)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaf128(x: f128, y: f128, z: f128) -> f128 { + // super::generic::fma(x, y, z) + todo!() +} diff --git a/src/math/fmaf16.rs b/src/math/fmaf16.rs new file mode 100644 index 000000000..e7adab304 --- /dev/null +++ b/src/math/fmaf16.rs @@ -0,0 +1,4 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaf16(x: f16, y: f16, z: f16) -> f16 { + super::generic::fma_big::(x, y, z) +} diff --git a/src/math/generic/fma.rs b/src/math/generic/fma.rs new file mode 100644 index 000000000..0b0292d78 --- /dev/null +++ b/src/math/generic/fma.rs @@ -0,0 +1,261 @@ +#![allow(unused)] + +use core::ops::{Shl, Shr}; + +use super::super::fenv::{ + FE_INEXACT, FE_TONEAREST, FE_UNDERFLOW, feclearexcept, fegetround, feraiseexcept, fetestexcept, +}; +use super::super::support::{DInt, HInt, Int}; +use super::super::{CastFrom, CastInto, Float, IntTy, MinInt}; + +const ZEROINFNAN: i32 = 0x7ff - 0x3ff - 52 - 1; + +/// Fused multiply add. +pub fn fma(x: F, y: F, z: F, scbn: impl FnOnce(F, i32) -> F) -> F +where + F::Int: CastFrom, + F::Int: HInt, + F::Int: Shr, + F::Int: Shl, + F::SignedInt: CastInto, + u32: CastInto, + bool: CastInto, +{ + let one = F::Int::ONE; + let zero = F::Int::ZERO; + + let nx = Norm::from_float(x); + let ny = Norm::from_float(y); + let nz = Norm::from_float(z); + + if nx.e >= ZEROINFNAN || ny.e >= ZEROINFNAN { + return x * y + z; + } + if nz.e >= ZEROINFNAN { + if nz.e > ZEROINFNAN { + /* z==0 */ + return x * y + z; + } + return z; + } + + let zhi: F::Int; + let zlo: F::Int; + + let (mut rlo, mut rhi) = nx.m.widen_mul(ny.m).lo_hi(); + + let mut e: i32 = nx.e + ny.e; + let mut d: i32 = nz.e - e; + + let fbits = F::BITS as i32; + + if d > 0 { + if d < fbits { + zlo = nz.m << d; + zhi = nz.m >> (fbits - d); + } else { + zlo = zero; + zhi = nz.m; + e = nz.e - fbits; + d -= fbits; + if d == 0 { + } else if d < fbits { + rlo = (rhi << (fbits - d)) | (rlo >> d) | ((rlo << (fbits - d)) != zero).cast(); + rhi = rhi >> d; + } else { + rlo = one; + rhi = zero; + } + } + } else { + zhi = zero; + d = -d; + if d == 0 { + zlo = nz.m; + } else if d < fbits { + zlo = (nz.m >> d) | ((nz.m << (fbits - d)) != zero).cast(); + } else { + zlo = one; + } + } + + /* add */ + let mut neg: bool = nx.neg ^ ny.neg; + let samesign: bool = neg ^ nz.neg; + let mut nonzero: i32 = 1; + if samesign { + /* r += z */ + rlo = rlo.wrapping_add(zlo); + rhi += zhi + (rlo < zlo).cast(); + } else { + /* r -= z */ + let (res, borrow) = rlo.overflowing_sub(zlo); + rlo = res; + rhi = rhi.wrapping_sub(zhi.wrapping_add(borrow.cast())); + if (rhi >> (F::BITS - 1)) != zero { + rlo = (rlo.signed()).wrapping_neg().unsigned(); + rhi = (rhi.signed()).wrapping_neg().unsigned() - (rlo != zero).cast(); + neg = !neg; + } + nonzero = (rhi != zero) as i32; + } + + /* set rhi to top 63bit of the result (last bit is sticky) */ + if nonzero != 0 { + e += fbits; + d = rhi.leading_zeros() as i32 - 1; + /* note: d > 0 */ + rhi = (rhi << d) | (rlo >> (fbits - d)) | ((rlo << d) != zero).cast(); + } else if rlo != zero { + d = rlo.leading_zeros() as i32 - 1; + if d < 0 { + rhi = (rlo >> 1) | (rlo & one); + } else { + rhi = rlo << d; + } + } else { + /* exact +-0 */ + return x * y + z; + } + e -= d; + + /* convert to double */ + let mut i: F::SignedInt = rhi.signed(); /* i is in [1<<62,(1<<63)-1] */ + if neg { + i = -i; + } + let mut r: F = i.cast(); /* |r| is in [0x1p62,0x1p63] */ + + if e < -1022 - 62 { + /* result is subnormal before rounding */ + if e == -1022 - 63 { + let mut c: F = foo::(); + if neg { + c = -c; + } + if r == c { + /* min normal after rounding, underflow depends + on arch behaviour which can be imitated by + a double to float conversion */ + // let fltmin: f32 = (x0_ffffff8p_63 * f32::MIN_POSITIVE as f64 * r) as f32; + // return f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * fltmin as f64; + todo!() + } + /* one bit is lost when scaled, add another top bit to + only round once at conversion if it is inexact */ + if (rhi << (F::SIG_BITS + 1)) != zero { + let tmp: F::Int = (rhi >> 1) | (rhi & one) | (one << (F::BITS - 2)); + i = tmp.signed(); + if neg { + i = -i; + } + r = i.cast(); + r = (F::ONE + F::ONE) * r - c; /* remove top bit */ + + /* raise underflow portably, such that it + cannot be optimized away */ + { + // let tiny: f64 = f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * r; + // r += (tiny * tiny) * (r - r); + todo!() + } + } + } else { + /* only round once when scaled */ + d = 10; + i = (((rhi >> d) | ((rhi << (fbits - d)) != zero).cast()) << d).signed(); + if neg { + i = -i; + } + r = i.cast(); + } + } + + // todo!() + // + scbn(r, e) +} + +struct Norm { + m: F::Int, + e: i32, + neg: bool, +} + +impl Norm { + fn from_float(x: F) -> Self + where + F::Int: CastFrom, + u32: CastInto, + { + let mut ix = x.to_bits(); + let mut e = x.exp(); + let neg = x.is_sign_negative(); + if e.is_zero() { + ix = (x * foo::()).to_bits(); + e = x.exp(); + e = if e != 0 { e - (F::BITS as i32) } else { 0x800 }; + } + ix &= F::SIG_MASK; + ix |= F::IMPLICIT_BIT; + ix <<= 1; + e -= 0x3ff + 52 + 1; + + Self { m: ix, e, neg } + } +} + +// 1p63 magic number +fn foo() -> F +where + u32: CastInto, +{ + F::from_parts(false, (F::BITS - 1).cast(), F::Int::ZERO) +} + +/// FMA implementation when there is a larger float type available. +pub fn fma_big(x: F, y: F, z: F) -> F +where + F: Float + CastInto, + B: Float + CastInto + CastFrom, + B::Int: CastInto, + i32: CastFrom, +{ + let one = IntTy::::ONE; + + let xy: B; + let mut result: B; + let mut ui: B::Int; + let e: i32; + + xy = x.cast() * y.cast(); + result = xy + z.cast(); + ui = result.to_bits(); + e = i32::cast_from(ui >> F::SIG_BITS) & F::EXP_MAX as i32; + let zb = B::cast_from(z); + + let prec_diff = B::SIG_BITS - F::SIG_BITS; + let excess_prec = ui & ((one << prec_diff) - one); + let x = one << (prec_diff - 1); + + // Common case: the larger precision is fine + if excess_prec != x + || e == i32::cast_from(F::EXP_MAX) + || (result - xy == zb && result - zb == xy) + || fegetround() != FE_TONEAREST + { + // TODO: feclearexcept + + return result.cast(); + } + + let neg = ui & B::SIGN_MASK > IntTy::::ZERO; + let err = if neg == (zb > xy) { xy - result + zb } else { zb - result + xy }; + if neg == (err < B::ZERO) { + ui += one; + } else { + ui -= one; + } + + B::from_bits(ui).cast() +} diff --git a/src/math/generic/mod.rs b/src/math/generic/mod.rs index 08524b685..07a73f37b 100644 --- a/src/math/generic/mod.rs +++ b/src/math/generic/mod.rs @@ -1,5 +1,7 @@ mod copysign; mod fabs; +mod fma; pub use copysign::copysign; pub use fabs::fabs; +pub use fma::{fma, fma_big}; diff --git a/src/math/mod.rs b/src/math/mod.rs index 5baf35e42..de6bef828 100644 --- a/src/math/mod.rs +++ b/src/math/mod.rs @@ -121,7 +121,7 @@ use self::rem_pio2::rem_pio2; use self::rem_pio2_large::rem_pio2_large; use self::rem_pio2f::rem_pio2f; #[allow(unused_imports)] -use self::support::{CastFrom, CastInto, DInt, Float, HInt, Int, MinInt}; +use self::support::{CastFrom, CastInto, DInt, Float, HInt, Int, IntTy, MinInt}; // Public modules mod acos; @@ -343,9 +343,11 @@ cfg_if! { if #[cfg(f16_enabled)] { mod copysignf16; mod fabsf16; + mod fmaf16; pub use self::copysignf16::copysignf16; pub use self::fabsf16::fabsf16; + pub use self::fmaf16::fmaf16; } } @@ -353,9 +355,11 @@ cfg_if! { if #[cfg(f128_enabled)] { mod copysignf128; mod fabsf128; + mod fmaf128; pub use self::copysignf128::copysignf128; pub use self::fabsf128::fabsf128; + pub use self::fmaf128::fmaf128; } } diff --git a/src/math/support/float_traits.rs b/src/math/support/float_traits.rs index 3b5be4fa3..01676bd79 100644 --- a/src/math/support/float_traits.rs +++ b/src/math/support/float_traits.rs @@ -1,4 +1,5 @@ -use core::{fmt, mem, ops}; +use core::ops::{self, Neg}; +use core::{fmt, mem}; use super::int_traits::{Int, MinInt}; @@ -23,10 +24,9 @@ pub trait Float: type Int: Int; /// A int of the same width as the float - type SignedInt: Int + MinInt; - - /// An int capable of containing the exponent bits plus a sign bit. This is signed. - type ExpInt: Int; + type SignedInt: Int + + MinInt + + Neg; const ZERO: Self; const NEG_ZERO: Self; @@ -98,7 +98,7 @@ pub trait Float: } /// Returns the exponent, not adjusting for bias. - fn exp(self) -> Self::ExpInt; + fn exp(self) -> i32; /// Returns the significand with no implicit bit (or the "fractional" part) fn frac(self) -> Self::Int { @@ -138,7 +138,6 @@ pub trait Float: } /// Access the associated `Int` type from a float (helper to avoid ambiguous associated types). -#[allow(dead_code)] pub type IntTy = ::Int; macro_rules! float_impl { @@ -146,7 +145,6 @@ macro_rules! float_impl { $ty:ident, $ity:ident, $sity:ident, - $expty:ident, $bits:expr, $significand_bits:expr, $from_bits:path @@ -154,7 +152,6 @@ macro_rules! float_impl { impl Float for $ty { type Int = $ity; type SignedInt = $sity; - type ExpInt = $expty; const ZERO: Self = 0.0; const NEG_ZERO: Self = -0.0; @@ -191,8 +188,8 @@ macro_rules! float_impl { fn is_sign_negative(self) -> bool { self.is_sign_negative() } - fn exp(self) -> Self::ExpInt { - ((self.to_bits() & Self::EXP_MASK) >> Self::SIG_BITS) as Self::ExpInt + fn exp(self) -> i32 { + ((self.to_bits() & Self::EXP_MASK) >> Self::SIG_BITS) as i32 } fn from_bits(a: Self::Int) -> Self { Self::from_bits(a) @@ -226,11 +223,11 @@ macro_rules! float_impl { } #[cfg(f16_enabled)] -float_impl!(f16, u16, i16, i8, 16, 10, f16::from_bits); -float_impl!(f32, u32, i32, i16, 32, 23, f32_from_bits); -float_impl!(f64, u64, i64, i16, 64, 52, f64_from_bits); +float_impl!(f16, u16, i16, 16, 10, f16::from_bits); +float_impl!(f32, u32, i32, 32, 23, f32_from_bits); +float_impl!(f64, u64, i64, 64, 52, f64_from_bits); #[cfg(f128_enabled)] -float_impl!(f128, u128, i128, i16, 128, 112, f128::from_bits); +float_impl!(f128, u128, i128, 128, 112, f128::from_bits); /* FIXME(msrv): vendor some things that are not const stable at our MSRV */ diff --git a/src/math/support/int_traits.rs b/src/math/support/int_traits.rs index 380313c1e..e50459525 100644 --- a/src/math/support/int_traits.rs +++ b/src/math/support/int_traits.rs @@ -82,6 +82,7 @@ pub trait Int: fn wrapping_shr(self, other: u32) -> Self; fn rotate_left(self, other: u32) -> Self; fn overflowing_add(self, other: Self) -> (Self, bool); + fn overflowing_sub(self, other: Self) -> (Self, bool); fn leading_zeros(self) -> u32; fn ilog2(self) -> u32; } @@ -140,6 +141,10 @@ macro_rules! int_impl_common { ::overflowing_add(self, other) } + fn overflowing_sub(self, other: Self) -> (Self, bool) { + ::overflowing_sub(self, other) + } + fn leading_zeros(self) -> u32 { ::leading_zeros(self) } @@ -382,3 +387,29 @@ cast_into!(u64); cast_into!(i64); cast_into!(u128); cast_into!(i128); + +cast_into!(i64; f32); +cast_into!(i64; f64); +cast_into!(f32; f64); +cast_into!(f64; f32); + +cast_into!(bool; u16); +cast_into!(bool; u32); +cast_into!(bool; u64); +cast_into!(bool; u128); + +cfg_if! { + if #[cfg(f16_enabled)] { + cast_into!(f16; f32, f64); + cast_into!(f32; f16); + cast_into!(f64; f16); + } +} + +cfg_if! { + if #[cfg(f128_enabled)] { + cast_into!(f128; f32, f64); + cast_into!(f32; f128); + cast_into!(f64; f128); + } +}