Make a subset of libm symbols weakly available on all platforms

tgross35 · tgross35 · commit 63050d8f5239 · 2025-02-23T07:02:55.000Z
018616e ("Always have math functions but with `weak` linking attribute if we can") made all math symbols available on platforms that support weak linkage. This caused some unexpected regressions, however, because our less accurate and slow `libm` implementations were being preferred over the system `libm`, which also tends to be weak. Thus, 0fab77e ("Don't include `math` for `unix` and `wasi` targets") was applied to undo these changes on many platforms. Add back a subset of these functions: * cbrt * ceil * copysign * fabs * fdim * floor * fma * fmax * fmaximum * fmin * fminimum * fmod * rint * round * roundeven * sqrt * trunc This list includes only functions that produce exact results (verified with exhaustive / extensive tests, and also required by IEEE in most cases), and for which benchmarks indicate performance similar to or better than Musl's `libm`. All except `cbrt` also have `f16` and `f128` implementations. Once more routines meet these criteria, we can move them from platform-specific availability to always available. Once this change makes it to rust-lang/rust, we will also be able to move the relevant functions from `std` to `core`. [1]: rust-lang/rust#128386
diff --git a/src/lib.rs b/src/lib.rs
@@ -40,23 +40,6 @@ mod macros;
 
 pub mod float;
 pub mod int;
-
-// Disable for any of the following:
-// - x86 without sse2 due to ABI issues
-//   - <https://github.com/rust-lang/rust/issues/114479>
-//   - but exclude UEFI since it is a soft-float target
-//     - <https://github.com/rust-lang/rust/issues/128533>
-// - All unix targets (linux, macos, freebsd, android, etc)
-// - wasm with known target_os
-#[cfg(not(any(
-    all(
-        target_arch = "x86",
-        not(target_feature = "sse2"),
-        not(target_os = "uefi"),
-    ),
-    unix,
-    all(target_family = "wasm", not(target_os = "unknown"))
-)))]
 pub mod math;
 pub mod mem;
 
diff --git a/src/math.rs b/src/math.rs
@@ -6,109 +6,195 @@
 mod libm;
 
 #[allow(unused_macros)]
-macro_rules! no_mangle {
+macro_rules! libm_intrinsics {
     ($(fn $fun:ident($($iid:ident : $ity:ty),+) -> $oty:ty;)+) => {
         intrinsics! {
             $(
                 pub extern "C" fn $fun($($iid: $ity),+) -> $oty {
-                    self::libm::$fun($($iid),+)
+                    $crate::math::libm::$fun($($iid),+)
                 }
             )+
         }
     }
 }
 
-#[cfg(not(windows))]
-no_mangle! {
-    fn acos(x: f64) -> f64;
-    fn asin(x: f64) -> f64;
-    fn cbrt(x: f64) -> f64;
-    fn expm1(x: f64) -> f64;
-    fn hypot(x: f64, y: f64) -> f64;
-    fn tan(x: f64) -> f64;
-    fn cos(x: f64) -> f64;
-    fn expf(x: f32) -> f32;
-    fn log2(x: f64) -> f64;
-    fn log2f(x: f32) -> f32;
-    fn log10(x: f64) -> f64;
-    fn log10f(x: f32) -> f32;
-    fn log(x: f64) -> f64;
-    fn logf(x: f32) -> f32;
-    fn round(x: f64) -> f64;
-    fn roundf(x: f32) -> f32;
-    fn rint(x: f64) -> f64;
-    fn rintf(x: f32) -> f32;
-    fn sin(x: f64) -> f64;
-    fn pow(x: f64, y: f64) -> f64;
-    fn powf(x: f32, y: f32) -> f32;
-    fn acosf(n: f32) -> f32;
-    fn atan2f(a: f32, b: f32) -> f32;
-    fn atanf(n: f32) -> f32;
-    fn coshf(n: f32) -> f32;
-    fn expm1f(n: f32) -> f32;
-    fn fdim(a: f64, b: f64) -> f64;
-    fn fdimf(a: f32, b: f32) -> f32;
-    fn log1pf(n: f32) -> f32;
-    fn sinhf(n: f32) -> f32;
-    fn tanhf(n: f32) -> f32;
-    fn ldexp(f: f64, n: i32) -> f64;
-    fn ldexpf(f: f32, n: i32) -> f32;
-    fn tgamma(x: f64) -> f64;
-    fn tgammaf(x: f32) -> f32;
-    fn atan(x: f64) -> f64;
-    fn atan2(x: f64, y: f64) -> f64;
-    fn cosh(x: f64) -> f64;
-    fn log1p(x: f64) -> f64;
-    fn sinh(x: f64) -> f64;
-    fn tanh(x: f64) -> f64;
-    fn cosf(x: f32) -> f32;
-    fn exp(x: f64) -> f64;
-    fn sinf(x: f32) -> f32;
-    fn exp2(x: f64) -> f64;
-    fn exp2f(x: f32) -> f32;
-    fn fma(x: f64, y: f64, z: f64) -> f64;
-    fn fmaf(x: f32, y: f32, z: f32) -> f32;
-    fn asinf(n: f32) -> f32;
-    fn cbrtf(n: f32) -> f32;
-    fn hypotf(x: f32, y: f32) -> f32;
-    fn tanf(n: f32) -> f32;
+/// This set of functions is well tested in `libm` and known to provide similar performance to
+/// system `libm`, as well as the same or better accuracy.
+mod full_availability {
+    #[cfg(f16_enabled)]
+    libm_intrinsics! {
+        fn ceilf16(x: f16) -> f16;
+        fn copysignf16(x: f16, y: f16) -> f16;
+        fn fabsf16(x: f16) -> f16;
+        fn fdimf16(x: f16, y: f16) -> f16;
+        fn floorf16(x: f16) -> f16;
+        fn fmaxf16(x: f16, y: f16) -> f16;
+        fn fmaximumf16(x: f16, y: f16) -> f16;
+        fn fminf16(x: f16, y: f16) -> f16;
+        fn fminimumf16(x: f16, y: f16) -> f16;
+        fn fmodf16(x: f16, y: f16) -> f16;
+        fn rintf16(x: f16) -> f16;
+        fn roundevenf16(x: f16) -> f16;
+        fn roundf16(x: f16) -> f16;
+        fn sqrtf16(x: f16) -> f16;
+        fn truncf16(x: f16) -> f16;
+    }
+
+    /* Weak linkage is unreliable on Windows and Apple, so we don't expose symbols that we know
+     * the system libc provides in order to avoid conflicts. */
 
-    fn sqrtf(x: f32) -> f32;
-    fn sqrt(x: f64) -> f64;
+    #[cfg(all(not(windows), not(target_vendor = "apple")))]
+    libm_intrinsics! {
+        /* f32 */
+        fn cbrtf(n: f32) -> f32;
+        fn ceilf(x: f32) -> f32;
+        fn copysignf(x: f32, y: f32) -> f32;
+        fn fabsf(x: f32) -> f32;
+        fn fdimf(a: f32, b: f32) -> f32;
+        fn floorf(x: f32) -> f32;
+        fn fmaf(x: f32, y: f32, z: f32) -> f32;
+        fn fmaxf(x: f32, y: f32) -> f32;
+        fn fminf(x: f32, y: f32) -> f32;
+        fn fmodf(x: f32, y: f32) -> f32;
+        fn rintf(x: f32) -> f32;
+        fn roundf(x: f32) -> f32;
+        fn sqrtf(x: f32) -> f32;
+        fn truncf(x: f32) -> f32;
 
-    fn ceil(x: f64) -> f64;
-    fn ceilf(x: f32) -> f32;
-    fn floor(x: f64) -> f64;
-    fn floorf(x: f32) -> f32;
-    fn trunc(x: f64) -> f64;
-    fn truncf(x: f32) -> f32;
+        /* f64 */
+        fn cbrt(x: f64) -> f64;
+        fn ceil(x: f64) -> f64;
+        fn copysign(x: f64, y: f64) -> f64;
+        fn fabs(x: f64) -> f64;
+        fn fdim(a: f64, b: f64) -> f64;
+        fn floor(x: f64) -> f64;
+        fn fma(x: f64, y: f64, z: f64) -> f64;
+        fn fmax(x: f64, y: f64) -> f64;
+        fn fmin(x: f64, y: f64) -> f64;
+        fn fmod(x: f64, y: f64) -> f64;
+        fn rint(x: f64) -> f64;
+        fn round(x: f64) -> f64;
+        fn sqrt(x: f64) -> f64;
+        fn trunc(x: f64) -> f64;
+    }
 
-    fn fmin(x: f64, y: f64) -> f64;
-    fn fminf(x: f32, y: f32) -> f32;
-    fn fmax(x: f64, y: f64) -> f64;
-    fn fmaxf(x: f32, y: f32) -> f32;
-    // `f64 % f64`
-    fn fmod(x: f64, y: f64) -> f64;
-    // `f32 % f32`
-    fn fmodf(x: f32, y: f32) -> f32;
+    // Windows and MacOS do not yet expose roundeven and IEEE 754-2019 `maximum` / `minimum`,
+    // however, so we still provide a fallback.
+    libm_intrinsics! {
+        fn fmaximum(x: f64, y: f64) -> f64;
+        fn fmaximumf(x: f32, y: f32) -> f32;
+        fn fminimum(x: f64, y: f64) -> f64;
+        fn fminimumf(x: f32, y: f32) -> f32;
+        fn roundeven(x: f64) -> f64;
+        fn roundevenf(x: f32) -> f32;
+    }
 
-    fn erf(x: f64) -> f64;
-    fn erff(x: f32) -> f32;
-    fn erfc(x: f64) -> f64;
-    fn erfcf(x: f32) -> f32;
+    #[cfg(f128_enabled)]
+    libm_intrinsics! {
+        fn ceilf128(x: f128) -> f128;
+        fn copysignf128(x: f128, y: f128) -> f128;
+        fn fabsf128(x: f128) -> f128;
+        fn fdimf128(x: f128, y: f128) -> f128;
+        fn floorf128(x: f128) -> f128;
+        fn fmaf128(x: f128, y: f128, z: f128) -> f128;
+        fn fmaxf128(x: f128, y: f128) -> f128;
+        fn fmaximumf128(x: f128, y: f128) -> f128;
+        fn fminf128(x: f128, y: f128) -> f128;
+        fn fminimumf128(x: f128, y: f128) -> f128;
+        fn fmodf128(x: f128, y: f128) -> f128;
+        fn rintf128(x: f128) -> f128;
+        fn roundevenf128(x: f128) -> f128;
+        fn roundf128(x: f128) -> f128;
+        fn sqrtf128(x: f128) -> f128;
+        fn truncf128(x: f128) -> f128;
+    }
 }
 
-// allow for windows (and other targets)
-intrinsics! {
-    pub extern "C" fn lgamma_r(x: f64, s: &mut i32) -> f64 {
-        let r = self::libm::lgamma_r(x);
-        *s = r.1;
-        r.0
+/// This group of functions has more performance or precision issues than system versions, or
+/// are otherwise less well tested. Provide them only on platforms that have problems with the
+/// system `libm`.
+///
+/// As `libm` improves, more functions will be moved from this group to the first group.
+///
+/// Do not supply for any of the following:
+/// - x86 without sse2 due to ABI issues
+///   - <https://github.com/rust-lang/rust/issues/114479>
+///   - but exclude UEFI since it is a soft-float target
+///     - <https://github.com/rust-lang/rust/issues/128533>
+/// - All unix targets (linux, macos, freebsd, android, etc)
+/// - wasm with known target_os
+#[cfg(not(any(
+    all(
+        target_arch = "x86",
+        not(target_feature = "sse2"),
+        not(target_os = "uefi"),
+    ),
+    unix,
+    all(target_family = "wasm", not(target_os = "unknown"))
+)))]
+mod partial_availability {
+    #[cfg(not(windows))]
+    libm_intrinsics! {
+        fn acos(x: f64) -> f64;
+        fn acosf(n: f32) -> f32;
+        fn asin(x: f64) -> f64;
+        fn asinf(n: f32) -> f32;
+        fn atan(x: f64) -> f64;
+        fn atan2(x: f64, y: f64) -> f64;
+        fn atan2f(a: f32, b: f32) -> f32;
+        fn atanf(n: f32) -> f32;
+        fn cos(x: f64) -> f64;
+        fn cosf(x: f32) -> f32;
+        fn cosh(x: f64) -> f64;
+        fn coshf(n: f32) -> f32;
+        fn erf(x: f64) -> f64;
+        fn erfc(x: f64) -> f64;
+        fn erfcf(x: f32) -> f32;
+        fn erff(x: f32) -> f32;
+        fn exp(x: f64) -> f64;
+        fn exp2(x: f64) -> f64;
+        fn exp2f(x: f32) -> f32;
+        fn expf(x: f32) -> f32;
+        fn expm1(x: f64) -> f64;
+        fn expm1f(n: f32) -> f32;
+        fn hypot(x: f64, y: f64) -> f64;
+        fn hypotf(x: f32, y: f32) -> f32;
+        fn ldexp(f: f64, n: i32) -> f64;
+        fn ldexpf(f: f32, n: i32) -> f32;
+        fn log(x: f64) -> f64;
+        fn log10(x: f64) -> f64;
+        fn log10f(x: f32) -> f32;
+        fn log1p(x: f64) -> f64;
+        fn log1pf(n: f32) -> f32;
+        fn log2(x: f64) -> f64;
+        fn log2f(x: f32) -> f32;
+        fn logf(x: f32) -> f32;
+        fn pow(x: f64, y: f64) -> f64;
+        fn powf(x: f32, y: f32) -> f32;
+        fn sin(x: f64) -> f64;
+        fn sinf(x: f32) -> f32;
+        fn sinh(x: f64) -> f64;
+        fn sinhf(n: f32) -> f32;
+        fn tan(x: f64) -> f64;
+        fn tanf(n: f32) -> f32;
+        fn tanh(x: f64) -> f64;
+        fn tanhf(n: f32) -> f32;
+        fn tgamma(x: f64) -> f64;
+        fn tgammaf(x: f32) -> f32;
     }
 
-    pub extern "C" fn lgammaf_r(x: f32, s: &mut i32) -> f32 {
-        let r = self::libm::lgammaf_r(x);
-        *s = r.1;
-        r.0
+    // allow for windows (and other targets)
+    intrinsics! {
+        pub extern "C" fn lgamma_r(x: f64, s: &mut i32) -> f64 {
+            let r = self::libm::lgamma_r(x);
+            *s = r.1;
+            r.0
+        }
+
+        pub extern "C" fn lgammaf_r(x: f32, s: &mut i32) -> f32 {
+            let r = self::libm::lgammaf_r(x);
+            *s = r.1;
+            r.0
+        }
     }
 }