rust-lang
diff --git a/‎crates/std_float/src/lib.rs
Lines changed: 6 additions & 288 deletions b/‎crates/std_float/src/lib.rs
Lines changed: 6 additions & 288 deletions
@@ -11,6 +11,10 @@ use core_simd::simd;
 
 use simd::{LaneCount, Simd, SupportedLaneCount};
 
+mod libm32;
+#[cfg(test)]
+mod test_libm32;
+
 #[cfg(feature = "as_crate")]
 mod experimental {
     pub trait Sealed {}
@@ -115,7 +119,9 @@ pub trait StdFloat: Sealed + Sized {
     /// Returns the floating point's fractional value, with its integer part removed.
     #[must_use = "method returns a new vector and does not mutate the original value"]
     fn fract(self) -> Self;
+}
 
+pub trait StdLibm : StdFloat {
     fn sin(self) -> Self;
 
     fn cos(self) -> Self;
@@ -143,112 +149,6 @@ where
     fn fract(self) -> Self {
         self - self.trunc()
     }
-
-    /// Calculate the sine of the angle
-    /// Note: this is hand-edited from generated scalar code.
-    /// In an ideal world, we would generate this directly by code transformation.
-    #[inline]
-    fn sin(self) -> Self {
-        #[allow(non_snake_case)]
-        let RECIP_2PI = Self::splat(0.15915494);
-
-        let scaled = self * RECIP_2PI;
-        let x = scaled - scaled.round();
-        Self::splat(-12.26885994095919635608)
-            .mul_add(x * x, Self::splat(41.21624105096574396575))
-            .mul_add(x * x, Self::splat(-76.58672703333290836700))
-            .mul_add(x * x, Self::splat(81.59746095374827019356))
-            .mul_add(x * x, Self::splat(-41.34151143437582891705))
-            .mul_add(x * x, Self::splat(6.28318452581127506328))
-            * x
-    }
-
-    fn cos(self) -> Self {
-        #[allow(non_snake_case)]
-        let RECIP_2PI = Self::splat(0.15915494);
-
-        let scaled = self * RECIP_2PI;
-        let x = scaled - scaled.round();
-        Self::splat(6.52865816174499269880)
-            .mul_add(x * x, Self::splat(-25.97327546890330396608))
-            .mul_add(x * x, Self::splat(60.17118230812820383560))
-            .mul_add(x * x, Self::splat(-85.45091743827674607508))
-            .mul_add(x * x, Self::splat(64.93918704099473042873))
-            .mul_add(x * x, Self::splat(-19.73920667935656472596))
-            .mul_add(x * x, Self::splat(1.00000000000000000000))
-    }
-
-    fn tan(self) -> Self {
-        use core::f32::consts::PI;
-        let scaled: Self = self * Self::splat(1.0 / PI);
-        let x: Self = scaled - scaled.round();
-        let recip: Self = (x * x - Self::splat(0.25)).recip();
-        let y: Self = Self::splat(0.01439730036301634345)
-            .mul_add(x * x, Self::splat(0.02101734538976238579))
-            .mul_add(x * x, Self::splat(0.05285888255895108345))
-            .mul_add(x * x, Self::splat(0.13475448281475060771))
-            .mul_add(x * x, Self::splat(0.55773663386075044866))
-            .mul_add(x * x, Self::splat(-0.78539816491781455948))
-            * x;
-        y * recip
-    }
-    
-    fn asin(self) -> Self {
-        use core::f32::consts::PI;
-        let lim: Self = Self::splat(0.9);
-        let c: Self = self.lanes_lt(Self::splat(0.0)).select(Self::splat(-PI / 2.0), Self::splat(PI / 2.0));
-        let s: Self = self.lanes_lt(Self::splat(0.0)).select(Self::splat(-1.0), Self::splat(1.0));
-        let x: Self = (self * self).lanes_lt(lim * lim).select(self, (Self::splat(1.0) - self * self).sqrt());
-        let y: Self = Self::splat(4374.97702992533695457424)
-            .mul_add(x * x, Self::splat(-13781.55764426881951685974))
-            .mul_add(x * x, Self::splat(17105.69475701115952774357))
-            .mul_add(x * x, Self::splat(-10486.64894150265898388567))
-            .mul_add(x * x, Self::splat(3231.76028705607279348342))
-            .mul_add(x * x, Self::splat(-447.56480696327035255708))
-            .mul_add(x * x, Self::splat(21.78206149264184872939))
-            .mul_add(x * x, Self::splat(0.84158415752395745675))
-            * x;
-        (self * self).lanes_lt(lim * lim).select(y, c - y * s)
-    }
-    
-    fn acos(self) -> Self {
-        use core::f32::consts::PI;
-        let lim: Self = Self::splat(0.9);
-        let c: Self = self.lanes_lt(Self::splat(0.0)).select(Self::splat(PI), Self::splat(0.0));
-        let s: Self = self.lanes_lt(Self::splat(0.0)).select(Self::splat(1.0), Self::splat(-1.0));
-        let x: Self = (self * self).lanes_lt(lim * lim).select(self, (Self::splat(1.0) - self * self).sqrt());
-        // let c: Self = select(self < 0.0, PI, 0.0);
-        // let s: Self = select(self < 0.0, 1.0, -1.0);
-        // let x: Self = select(self * self < lim * lim, self, (1.0 - self * self).sqrt());
-        let y: Self = Self::splat(4374.97702992533695457424)
-            .mul_add(x * x, Self::splat(-13781.55764426881951685974))
-            .mul_add(x * x, Self::splat(17105.69475701115952774357))
-            .mul_add(x * x, Self::splat(-10486.64894150265898388567))
-            .mul_add(x * x, Self::splat(3231.76028705607279348342))
-            .mul_add(x * x, Self::splat(-447.56480696327035255708))
-            .mul_add(x * x, Self::splat(21.78206149264184872939))
-            .mul_add(x * x, Self::splat(0.84158415752395745675))
-            * x;
-        (self * self).lanes_lt(lim * lim).select(y, c - y * s)
-    }
-    
-    fn atan(self) -> Self {
-        use core::f32::consts::PI;
-        let lim: Self = Self::splat(1.0);
-        let c: Self = self.lanes_lt(Self::splat(0.0)).select(Self::splat(-PI / 2.0), Self::splat(PI / 2.0));
-        let small = self.abs().lanes_lt(lim);
-        let x: Self = small.select(self, self.recip());
-        let y: Self = Self::splat(95.70126383842530559360)
-            .mul_add(x * x, Self::splat(424.99907022806059540464))
-            .mul_add(x * x, Self::splat(-767.48259680040570156003))
-            .mul_add(x * x, Self::splat(714.51953012224223415829))
-            .mul_add(x * x, Self::splat(-354.32654395426962592865))
-            .mul_add(x * x, Self::splat(83.96179897148539189638))
-            .mul_add(x * x, Self::splat(-6.23958170715441509270))
-            .mul_add(x * x, Self::splat(1.05498514186427524914))
-            * x;
-        small.select(y, c - y)
-    }
 }
 
 impl<const N: usize> StdFloat for Simd<f64, N>
@@ -261,36 +161,6 @@ where
     fn fract(self) -> Self {
         self - self.trunc()
     }
-
-    #[inline]
-    fn sin(self) -> Self {
-        self
-    }
-
-    #[inline]
-    fn cos(self) -> Self {
-        self
-    }
-
-    #[inline]
-    fn tan(self) -> Self {
-        self
-    }
-
-    #[inline]
-    fn asin(self) -> Self {
-        self
-    }
-
-    #[inline]
-    fn acos(self) -> Self {
-        self
-    }
-
-    #[inline]
-    fn atan(self) -> Self {
-        self
-    }
 }
 
 #[cfg(test)]
@@ -311,156 +181,4 @@ mod tests {
         let _ = x2.abs() * x2;
         let _ = x.sin();
     }
-
-    const NUM_ITER: usize = 0x10000;
-
-    macro_rules! test_range {
-        (
-                min: $min: expr,
-                max: $max: expr,
-                limit: $limit: expr,
-                scalar_fn: $scalar_fn: expr,
-                vector_fn: $vector_fn: expr,
-                scalar_type: $scalar_type: ty,
-                vector_type: $vector_type: ty,
-            ) => {{
-            let limit = <$vector_type>::splat($limit);
-            let b = (($max) - ($min)) * (1.0 / NUM_ITER as $scalar_type);
-            let a = $min;
-            let sf = $scalar_fn;
-            let vf = $vector_fn;
-            for i in (0..NUM_ITER / 4) {
-                let fi = (i * 4) as $scalar_type;
-                let x = <$vector_type>::from_array([
-                    (fi + 0.0) * b + a,
-                    (fi + 1.0) * b + a,
-                    (fi + 2.0) * b + a,
-                    (fi + 3.0) * b + a,
-                ]);
-                let yref = <$vector_type>::from_array([sf(x[0]), sf(x[1]), sf(x[2]), sf(x[3])]);
-                let y = vf(x);
-                let e = (y - yref);
-                if !(e.abs().lanes_le(limit)).all() {
-                    panic!("\nx     ={:20.16?}\ne     ={:20.16?}\nlimit ={:20.16?}\nvector={:20.16?}\nscalar={:20.16?}\nvector_fn={}", x, e, limit, y, yref, stringify!($vector_fn));
-                }
-            }
-        }};
-    }
-
-    #[test]
-    fn sin_f32() {
-        use core::f32::consts::PI;
-        let one_ulp = (2.0_f32).powi(-23);
-
-        test_range!(
-            min: -PI/4.0,
-            max: PI/4.0,
-            limit: one_ulp * 1.0,
-            scalar_fn: |x : f32| x.sin(),
-            vector_fn: |x : f32x4| x.sin(),
-            scalar_type: f32,
-            vector_type: f32x4,
-        );
-
-        test_range!(
-            min: -PI/2.0,
-            max: PI/2.0,
-            limit: one_ulp * 2.0,
-            scalar_fn: |x : f32| x.sin(),
-            vector_fn: |x : f32x4| x.sin(),
-            scalar_type: f32,
-            vector_type: f32x4,
-        );
-
-        test_range!(
-            min: -PI,
-            max: PI,
-            limit: one_ulp * 8.0,
-            scalar_fn: |x : f32| x.sin(),
-            vector_fn: |x : f32x4| x.sin(),
-            scalar_type: f32,
-            vector_type: f32x4,
-        );
-    }
-
-    #[test]
-    fn cos_f32() {
-        use core::f32::consts::PI;
-        let one_ulp = (2.0_f32).powi(-23);
-
-        // In the range +/- pi/4 the input has 1 ulp of error.
-        test_range!(
-            min: -PI/4.0,
-            max: PI/4.0,
-            limit: one_ulp * 1.0,
-            scalar_fn: |x : f32| x.cos(),
-            vector_fn: |x : f32x4| x.cos(),
-            scalar_type: f32,
-            vector_type: f32x4,
-        );
-
-        // In the range +/- pi/2 the input and output has 2 ulp of error.
-        test_range!(
-            min: -PI/2.0,
-            max: PI/2.0,
-            limit: one_ulp * 2.0,
-            scalar_fn: |x : f32| x.cos(),
-            vector_fn: |x : f32x4| x.cos(),
-            scalar_type: f32,
-            vector_type: f32x4,
-        );
-
-        // In the range +/- pi the input has 4 ulp of error and the output has 5.
-        // Note that the scalar cos also has this error but the implementation
-        // is different.
-        test_range!(
-            min: -PI,
-            max: PI,
-            limit: one_ulp * 8.0,
-            scalar_fn: |x : f32| x.cos(),
-            vector_fn: |x : f32x4| x.cos(),
-            scalar_type: f32,
-            vector_type: f32x4,
-        );
-    }
-
-    #[test]
-    fn tan_f32() {
-        use core::f32::consts::PI;
-        let one_ulp = (2.0_f32).powi(-23);
-
-        // For the outsides, reciprocal accuracy is important.
-        // Note that the vector function correctly gets -inf for -PI/2
-        // but the scalar function does not.
-        test_range!(
-            min: -PI/2.0 + 0.00001,
-            max: -PI/4.0,
-            limit: one_ulp * 3.0,
-            scalar_fn: |x : f32| x.tan().recip(),
-            vector_fn: |x : f32x4| x.tan().recip(),
-            scalar_type: f32,
-            vector_type: f32x4,
-        );
-
-        // For the insides, absolute accuracy is important.
-        test_range!(
-            min: -PI/4.0,
-            max: PI/4.0,
-            limit: one_ulp * 2.0,
-            scalar_fn: |x : f32| x.tan(),
-            vector_fn: |x : f32x4| x.tan(),
-            scalar_type: f32,
-            vector_type: f32x4,
-        );
-
-        test_range!(
-            min: PI/4.0,
-            max: PI/2.0 - 0.00001,
-            limit: one_ulp * 3.0,
-            scalar_fn: |x : f32| x.tan().recip(),
-            vector_fn: |x : f32x4| x.tan().recip(),
-            scalar_type: f32,
-            vector_type: f32x4,
-        );
-    }
 }