Skip to content

Commit 63050d8

Browse files
committed
Make a subset of libm symbols weakly available on all platforms
018616e ("Always have math functions but with `weak` linking attribute if we can") made all math symbols available on platforms that support weak linkage. This caused some unexpected regressions, however, because our less accurate and slow `libm` implementations were being preferred over the system `libm`, which also tends to be weak. Thus, 0fab77e ("Don't include `math` for `unix` and `wasi` targets") was applied to undo these changes on many platforms. Add back a subset of these functions: * cbrt * ceil * copysign * fabs * fdim * floor * fma * fmax * fmaximum * fmin * fminimum * fmod * rint * round * roundeven * sqrt * trunc This list includes only functions that produce exact results (verified with exhaustive / extensive tests, and also required by IEEE in most cases), and for which benchmarks indicate performance similar to or better than Musl's `libm`. All except `cbrt` also have `f16` and `f128` implementations. Once more routines meet these criteria, we can move them from platform-specific availability to always available. Once this change makes it to rust-lang/rust, we will also be able to move the relevant functions from `std` to `core`. [1]: rust-lang/rust#128386
1 parent ddd1a09 commit 63050d8

File tree

2 files changed

+172
-103
lines changed

2 files changed

+172
-103
lines changed

src/lib.rs

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -40,23 +40,6 @@ mod macros;
4040

4141
pub mod float;
4242
pub mod int;
43-
44-
// Disable for any of the following:
45-
// - x86 without sse2 due to ABI issues
46-
// - <https://github.com/rust-lang/rust/issues/114479>
47-
// - but exclude UEFI since it is a soft-float target
48-
// - <https://github.com/rust-lang/rust/issues/128533>
49-
// - All unix targets (linux, macos, freebsd, android, etc)
50-
// - wasm with known target_os
51-
#[cfg(not(any(
52-
all(
53-
target_arch = "x86",
54-
not(target_feature = "sse2"),
55-
not(target_os = "uefi"),
56-
),
57-
unix,
58-
all(target_family = "wasm", not(target_os = "unknown"))
59-
)))]
6043
pub mod math;
6144
pub mod mem;
6245

src/math.rs

Lines changed: 172 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -6,109 +6,195 @@
66
mod libm;
77

88
#[allow(unused_macros)]
9-
macro_rules! no_mangle {
9+
macro_rules! libm_intrinsics {
1010
($(fn $fun:ident($($iid:ident : $ity:ty),+) -> $oty:ty;)+) => {
1111
intrinsics! {
1212
$(
1313
pub extern "C" fn $fun($($iid: $ity),+) -> $oty {
14-
self::libm::$fun($($iid),+)
14+
$crate::math::libm::$fun($($iid),+)
1515
}
1616
)+
1717
}
1818
}
1919
}
2020

21-
#[cfg(not(windows))]
22-
no_mangle! {
23-
fn acos(x: f64) -> f64;
24-
fn asin(x: f64) -> f64;
25-
fn cbrt(x: f64) -> f64;
26-
fn expm1(x: f64) -> f64;
27-
fn hypot(x: f64, y: f64) -> f64;
28-
fn tan(x: f64) -> f64;
29-
fn cos(x: f64) -> f64;
30-
fn expf(x: f32) -> f32;
31-
fn log2(x: f64) -> f64;
32-
fn log2f(x: f32) -> f32;
33-
fn log10(x: f64) -> f64;
34-
fn log10f(x: f32) -> f32;
35-
fn log(x: f64) -> f64;
36-
fn logf(x: f32) -> f32;
37-
fn round(x: f64) -> f64;
38-
fn roundf(x: f32) -> f32;
39-
fn rint(x: f64) -> f64;
40-
fn rintf(x: f32) -> f32;
41-
fn sin(x: f64) -> f64;
42-
fn pow(x: f64, y: f64) -> f64;
43-
fn powf(x: f32, y: f32) -> f32;
44-
fn acosf(n: f32) -> f32;
45-
fn atan2f(a: f32, b: f32) -> f32;
46-
fn atanf(n: f32) -> f32;
47-
fn coshf(n: f32) -> f32;
48-
fn expm1f(n: f32) -> f32;
49-
fn fdim(a: f64, b: f64) -> f64;
50-
fn fdimf(a: f32, b: f32) -> f32;
51-
fn log1pf(n: f32) -> f32;
52-
fn sinhf(n: f32) -> f32;
53-
fn tanhf(n: f32) -> f32;
54-
fn ldexp(f: f64, n: i32) -> f64;
55-
fn ldexpf(f: f32, n: i32) -> f32;
56-
fn tgamma(x: f64) -> f64;
57-
fn tgammaf(x: f32) -> f32;
58-
fn atan(x: f64) -> f64;
59-
fn atan2(x: f64, y: f64) -> f64;
60-
fn cosh(x: f64) -> f64;
61-
fn log1p(x: f64) -> f64;
62-
fn sinh(x: f64) -> f64;
63-
fn tanh(x: f64) -> f64;
64-
fn cosf(x: f32) -> f32;
65-
fn exp(x: f64) -> f64;
66-
fn sinf(x: f32) -> f32;
67-
fn exp2(x: f64) -> f64;
68-
fn exp2f(x: f32) -> f32;
69-
fn fma(x: f64, y: f64, z: f64) -> f64;
70-
fn fmaf(x: f32, y: f32, z: f32) -> f32;
71-
fn asinf(n: f32) -> f32;
72-
fn cbrtf(n: f32) -> f32;
73-
fn hypotf(x: f32, y: f32) -> f32;
74-
fn tanf(n: f32) -> f32;
21+
/// This set of functions is well tested in `libm` and known to provide similar performance to
22+
/// system `libm`, as well as the same or better accuracy.
23+
mod full_availability {
24+
#[cfg(f16_enabled)]
25+
libm_intrinsics! {
26+
fn ceilf16(x: f16) -> f16;
27+
fn copysignf16(x: f16, y: f16) -> f16;
28+
fn fabsf16(x: f16) -> f16;
29+
fn fdimf16(x: f16, y: f16) -> f16;
30+
fn floorf16(x: f16) -> f16;
31+
fn fmaxf16(x: f16, y: f16) -> f16;
32+
fn fmaximumf16(x: f16, y: f16) -> f16;
33+
fn fminf16(x: f16, y: f16) -> f16;
34+
fn fminimumf16(x: f16, y: f16) -> f16;
35+
fn fmodf16(x: f16, y: f16) -> f16;
36+
fn rintf16(x: f16) -> f16;
37+
fn roundevenf16(x: f16) -> f16;
38+
fn roundf16(x: f16) -> f16;
39+
fn sqrtf16(x: f16) -> f16;
40+
fn truncf16(x: f16) -> f16;
41+
}
42+
43+
/* Weak linkage is unreliable on Windows and Apple, so we don't expose symbols that we know
44+
* the system libc provides in order to avoid conflicts. */
7545

76-
fn sqrtf(x: f32) -> f32;
77-
fn sqrt(x: f64) -> f64;
46+
#[cfg(all(not(windows), not(target_vendor = "apple")))]
47+
libm_intrinsics! {
48+
/* f32 */
49+
fn cbrtf(n: f32) -> f32;
50+
fn ceilf(x: f32) -> f32;
51+
fn copysignf(x: f32, y: f32) -> f32;
52+
fn fabsf(x: f32) -> f32;
53+
fn fdimf(a: f32, b: f32) -> f32;
54+
fn floorf(x: f32) -> f32;
55+
fn fmaf(x: f32, y: f32, z: f32) -> f32;
56+
fn fmaxf(x: f32, y: f32) -> f32;
57+
fn fminf(x: f32, y: f32) -> f32;
58+
fn fmodf(x: f32, y: f32) -> f32;
59+
fn rintf(x: f32) -> f32;
60+
fn roundf(x: f32) -> f32;
61+
fn sqrtf(x: f32) -> f32;
62+
fn truncf(x: f32) -> f32;
7863

79-
fn ceil(x: f64) -> f64;
80-
fn ceilf(x: f32) -> f32;
81-
fn floor(x: f64) -> f64;
82-
fn floorf(x: f32) -> f32;
83-
fn trunc(x: f64) -> f64;
84-
fn truncf(x: f32) -> f32;
64+
/* f64 */
65+
fn cbrt(x: f64) -> f64;
66+
fn ceil(x: f64) -> f64;
67+
fn copysign(x: f64, y: f64) -> f64;
68+
fn fabs(x: f64) -> f64;
69+
fn fdim(a: f64, b: f64) -> f64;
70+
fn floor(x: f64) -> f64;
71+
fn fma(x: f64, y: f64, z: f64) -> f64;
72+
fn fmax(x: f64, y: f64) -> f64;
73+
fn fmin(x: f64, y: f64) -> f64;
74+
fn fmod(x: f64, y: f64) -> f64;
75+
fn rint(x: f64) -> f64;
76+
fn round(x: f64) -> f64;
77+
fn sqrt(x: f64) -> f64;
78+
fn trunc(x: f64) -> f64;
79+
}
8580

86-
fn fmin(x: f64, y: f64) -> f64;
87-
fn fminf(x: f32, y: f32) -> f32;
88-
fn fmax(x: f64, y: f64) -> f64;
89-
fn fmaxf(x: f32, y: f32) -> f32;
90-
// `f64 % f64`
91-
fn fmod(x: f64, y: f64) -> f64;
92-
// `f32 % f32`
93-
fn fmodf(x: f32, y: f32) -> f32;
81+
// Windows and MacOS do not yet expose roundeven and IEEE 754-2019 `maximum` / `minimum`,
82+
// however, so we still provide a fallback.
83+
libm_intrinsics! {
84+
fn fmaximum(x: f64, y: f64) -> f64;
85+
fn fmaximumf(x: f32, y: f32) -> f32;
86+
fn fminimum(x: f64, y: f64) -> f64;
87+
fn fminimumf(x: f32, y: f32) -> f32;
88+
fn roundeven(x: f64) -> f64;
89+
fn roundevenf(x: f32) -> f32;
90+
}
9491

95-
fn erf(x: f64) -> f64;
96-
fn erff(x: f32) -> f32;
97-
fn erfc(x: f64) -> f64;
98-
fn erfcf(x: f32) -> f32;
92+
#[cfg(f128_enabled)]
93+
libm_intrinsics! {
94+
fn ceilf128(x: f128) -> f128;
95+
fn copysignf128(x: f128, y: f128) -> f128;
96+
fn fabsf128(x: f128) -> f128;
97+
fn fdimf128(x: f128, y: f128) -> f128;
98+
fn floorf128(x: f128) -> f128;
99+
fn fmaf128(x: f128, y: f128, z: f128) -> f128;
100+
fn fmaxf128(x: f128, y: f128) -> f128;
101+
fn fmaximumf128(x: f128, y: f128) -> f128;
102+
fn fminf128(x: f128, y: f128) -> f128;
103+
fn fminimumf128(x: f128, y: f128) -> f128;
104+
fn fmodf128(x: f128, y: f128) -> f128;
105+
fn rintf128(x: f128) -> f128;
106+
fn roundevenf128(x: f128) -> f128;
107+
fn roundf128(x: f128) -> f128;
108+
fn sqrtf128(x: f128) -> f128;
109+
fn truncf128(x: f128) -> f128;
110+
}
99111
}
100112

101-
// allow for windows (and other targets)
102-
intrinsics! {
103-
pub extern "C" fn lgamma_r(x: f64, s: &mut i32) -> f64 {
104-
let r = self::libm::lgamma_r(x);
105-
*s = r.1;
106-
r.0
113+
/// This group of functions has more performance or precision issues than system versions, or
114+
/// are otherwise less well tested. Provide them only on platforms that have problems with the
115+
/// system `libm`.
116+
///
117+
/// As `libm` improves, more functions will be moved from this group to the first group.
118+
///
119+
/// Do not supply for any of the following:
120+
/// - x86 without sse2 due to ABI issues
121+
/// - <https://github.com/rust-lang/rust/issues/114479>
122+
/// - but exclude UEFI since it is a soft-float target
123+
/// - <https://github.com/rust-lang/rust/issues/128533>
124+
/// - All unix targets (linux, macos, freebsd, android, etc)
125+
/// - wasm with known target_os
126+
#[cfg(not(any(
127+
all(
128+
target_arch = "x86",
129+
not(target_feature = "sse2"),
130+
not(target_os = "uefi"),
131+
),
132+
unix,
133+
all(target_family = "wasm", not(target_os = "unknown"))
134+
)))]
135+
mod partial_availability {
136+
#[cfg(not(windows))]
137+
libm_intrinsics! {
138+
fn acos(x: f64) -> f64;
139+
fn acosf(n: f32) -> f32;
140+
fn asin(x: f64) -> f64;
141+
fn asinf(n: f32) -> f32;
142+
fn atan(x: f64) -> f64;
143+
fn atan2(x: f64, y: f64) -> f64;
144+
fn atan2f(a: f32, b: f32) -> f32;
145+
fn atanf(n: f32) -> f32;
146+
fn cos(x: f64) -> f64;
147+
fn cosf(x: f32) -> f32;
148+
fn cosh(x: f64) -> f64;
149+
fn coshf(n: f32) -> f32;
150+
fn erf(x: f64) -> f64;
151+
fn erfc(x: f64) -> f64;
152+
fn erfcf(x: f32) -> f32;
153+
fn erff(x: f32) -> f32;
154+
fn exp(x: f64) -> f64;
155+
fn exp2(x: f64) -> f64;
156+
fn exp2f(x: f32) -> f32;
157+
fn expf(x: f32) -> f32;
158+
fn expm1(x: f64) -> f64;
159+
fn expm1f(n: f32) -> f32;
160+
fn hypot(x: f64, y: f64) -> f64;
161+
fn hypotf(x: f32, y: f32) -> f32;
162+
fn ldexp(f: f64, n: i32) -> f64;
163+
fn ldexpf(f: f32, n: i32) -> f32;
164+
fn log(x: f64) -> f64;
165+
fn log10(x: f64) -> f64;
166+
fn log10f(x: f32) -> f32;
167+
fn log1p(x: f64) -> f64;
168+
fn log1pf(n: f32) -> f32;
169+
fn log2(x: f64) -> f64;
170+
fn log2f(x: f32) -> f32;
171+
fn logf(x: f32) -> f32;
172+
fn pow(x: f64, y: f64) -> f64;
173+
fn powf(x: f32, y: f32) -> f32;
174+
fn sin(x: f64) -> f64;
175+
fn sinf(x: f32) -> f32;
176+
fn sinh(x: f64) -> f64;
177+
fn sinhf(n: f32) -> f32;
178+
fn tan(x: f64) -> f64;
179+
fn tanf(n: f32) -> f32;
180+
fn tanh(x: f64) -> f64;
181+
fn tanhf(n: f32) -> f32;
182+
fn tgamma(x: f64) -> f64;
183+
fn tgammaf(x: f32) -> f32;
107184
}
108185

109-
pub extern "C" fn lgammaf_r(x: f32, s: &mut i32) -> f32 {
110-
let r = self::libm::lgammaf_r(x);
111-
*s = r.1;
112-
r.0
186+
// allow for windows (and other targets)
187+
intrinsics! {
188+
pub extern "C" fn lgamma_r(x: f64, s: &mut i32) -> f64 {
189+
let r = self::libm::lgamma_r(x);
190+
*s = r.1;
191+
r.0
192+
}
193+
194+
pub extern "C" fn lgammaf_r(x: f32, s: &mut i32) -> f32 {
195+
let r = self::libm::lgammaf_r(x);
196+
*s = r.1;
197+
r.0
198+
}
113199
}
114200
}

0 commit comments

Comments
 (0)