Skip to content
This repository was archived by the owner on Apr 28, 2025. It is now read-only.

Commit d4ab195

Browse files
committed
Add cr_hypot from core-math
1 parent 8f2a9ae commit d4ab195

File tree

2 files changed

+279
-59
lines changed

2 files changed

+279
-59
lines changed

src/lib.rs

+2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
#![cfg_attr(all(intrinsics_enabled, target_family = "wasm"), feature(wasm_numeric_instr))]
66
#![cfg_attr(f128_enabled, feature(f128))]
77
#![cfg_attr(f16_enabled, feature(f16))]
8+
#![feature(core_intrinsics)]
9+
#![allow(internal_features)]
810
#![allow(clippy::assign_op_pattern)]
911
#![allow(clippy::deprecated_cfg_attr)]
1012
#![allow(clippy::eq_op)]

src/math/hypot.rs

+277-59
Original file line numberDiff line numberDiff line change
@@ -1,74 +1,292 @@
1-
use core::f64;
1+
/*
2+
Copyright (c) 2022 Alexei Sibidanov.
3+
4+
This file is part of the CORE-MATH project
5+
(https://core-math.gitlabpages.inria.fr/).
6+
*/
27

38
use super::sqrt;
49

5-
const SPLIT: f64 = 134217728. + 1.; // 0x1p27 + 1 === (2 ^ 27) + 1
10+
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
11+
pub fn hypot(x: f64, y: f64) -> f64 {
12+
return cr_hypot(x, y);
13+
}
14+
15+
fn cr_hypot(mut x: f64, mut y: f64) -> f64 {
16+
let flag = get_flags();
17+
18+
let xi = x.to_bits();
19+
let yi = y.to_bits();
20+
21+
let emsk: u64 = 0x7ffu64 << 52;
22+
let mut ex: u64 = xi & emsk;
23+
let mut ey: u64 = yi & emsk;
24+
/* emsk corresponds to the upper bits of NaN and Inf (apart the sign bit) */
25+
x = __builtin_fabs(x);
26+
y = __builtin_fabs(y);
27+
if __builtin_expect(ex == emsk || ey == emsk, false) {
28+
/* Either x or y is NaN or Inf */
29+
let wx: u64 = xi << 1;
30+
let wy: u64 = yi << 1;
31+
let wm: u64 = emsk << 1;
32+
let ninf: i32 = ((wx == wm) ^ (wy == wm)) as i32;
33+
let nqnn: i32 = (((wx >> 52) == 0xfff) ^ ((wy >> 52) == 0xfff)) as i32;
34+
/* ninf is 1 when only one of x and y is +/-Inf
35+
nqnn is 1 when only one of x and y is qNaN
36+
IEEE 754 says that hypot(+/-Inf,qNaN)=hypot(qNaN,+/-Inf)=+Inf. */
37+
if ninf != 0 && nqnn != 0 {
38+
return f64::INFINITY;
39+
}
40+
return x + y; /* inf, nan */
41+
}
42+
43+
let u: f64 = x.max(y);
44+
let v: f64 = x.min(y);
45+
let mut xd: u64 = u.to_bits();
46+
let mut yd: u64 = v.to_bits();
47+
ey = yd;
48+
49+
if __builtin_expect(ey >> 52 == 0, false) {
50+
if yd == 0 {
51+
return f64::from_bits(xd);
52+
}
53+
54+
ex = xd;
55+
56+
if __builtin_expect(ex >> 52 == 0, false) {
57+
if ex == 0 {
58+
return 0.0;
59+
}
60+
61+
return as_hypot_denorm(ex, ey);
62+
}
63+
64+
let nz: u32 = ey.leading_zeros();
65+
ey <<= nz - 11;
66+
ey &= u64::MAX >> 12;
67+
ey = ey.wrapping_sub(((nz as i64 - 12i64) << 52) as u64);
68+
let t = ey; // why did they do this?
69+
yd = t;
70+
}
671

7-
fn sq(x: f64) -> (f64, f64) {
8-
let xh: f64;
9-
let xl: f64;
10-
let xc: f64;
72+
let de: u64 = xd.wrapping_sub(yd);
73+
if __builtin_expect(de > (27_u64 << 52), false) {
74+
return __builtin_fma(hf64!("0x1p-27"), v, u);
75+
}
1176

12-
xc = x * SPLIT;
13-
xh = x - xc + xc;
14-
xl = x - xh;
15-
let hi = x * x;
16-
let lo = xh * xh - hi + 2. * xh * xl + xl * xl;
17-
(hi, lo)
77+
let off: i64 = (0x3ff_i64 << 52) - (xd & emsk) as i64;
78+
xd = xd.wrapping_add(off as u64);
79+
yd = yd.wrapping_add(off as u64);
80+
x = f64::from_bits(xd);
81+
y = f64::from_bits(yd);
82+
let x2: f64 = x * x;
83+
let dx2: f64 = __builtin_fma(x, x, -x2);
84+
let y2: f64 = y * y;
85+
let dy2: f64 = __builtin_fma(y, y, -y2);
86+
let r2: f64 = x2 + y2;
87+
let ir2: f64 = 0.5 / r2;
88+
let dr2: f64 = ((x2 - r2) + y2) + (dx2 + dy2);
89+
let mut th: f64 = sqrt(r2);
90+
let rsqrt: f64 = th * ir2;
91+
let dz: f64 = dr2 - __builtin_fma(th, th, -r2);
92+
let mut tl: f64 = rsqrt * dz;
93+
th = fasttwosum(th, tl, &mut tl);
94+
let mut thd: u64 = th.to_bits();
95+
let tld = __builtin_fabs(tl).to_bits();
96+
ex = thd;
97+
ey = tld;
98+
ex &= 0x7ff_u64 << 52;
99+
let aidr: u64 = ey + (0x3fe_u64 << 52) - ex;
100+
let mid: u64 = (aidr.wrapping_sub(0x3c90000000000000) + 16) >> 5;
101+
if __builtin_expect(
102+
mid == 0 || aidr < 0x39b0000000000000_u64 || aidr > 0x3c9fffffffffff80_u64,
103+
false,
104+
) {
105+
thd = as_hypot_hard(x, y, flag).to_bits();
106+
}
107+
thd = thd.wrapping_sub(off as u64);
108+
if __builtin_expect(thd >= (0x7ff_u64 << 52), false) {
109+
return as_hypot_overflow();
110+
}
111+
112+
f64::from_bits(thd)
18113
}
19114

20-
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
21-
pub fn hypot(mut x: f64, mut y: f64) -> f64 {
22-
let x1p700 = f64::from_bits(0x6bb0000000000000); // 0x1p700 === 2 ^ 700
23-
let x1p_700 = f64::from_bits(0x1430000000000000); // 0x1p-700 === 2 ^ -700
24-
25-
let mut uxi = x.to_bits();
26-
let mut uyi = y.to_bits();
27-
let uti;
28-
let ex: i64;
29-
let ey: i64;
30-
let mut z: f64;
31-
32-
/* arrange |x| >= |y| */
33-
uxi &= -1i64 as u64 >> 1;
34-
uyi &= -1i64 as u64 >> 1;
35-
if uxi < uyi {
36-
uti = uxi;
37-
uxi = uyi;
38-
uyi = uti;
115+
fn fasttwosum(x: f64, y: f64, e: &mut f64) -> f64 {
116+
let s: f64 = x + y;
117+
let z: f64 = s - x;
118+
*e = y - z;
119+
s
120+
}
121+
122+
fn as_hypot_overflow() -> f64 {
123+
let z: f64 = hf64!("0x1.fffffffffffffp1023");
124+
let f = z + z;
125+
if f > z {
126+
// errno = ERANGE
127+
}
128+
f
129+
}
130+
131+
/* Here the square root is refined by Newton iterations: x^2+y^2 is exact
132+
and fits in a 128-bit integer, so the approximation is squared (which
133+
also fits in a 128-bit integer), compared and adjusted if necessary using
134+
the exact value of x^2+y^2. */
135+
fn as_hypot_hard(x: f64, y: f64, flag: FExcept) -> f64 {
136+
let op: f64 = 1.0 + hf64!("0x1p-54");
137+
let om: f64 = 1.0 - hf64!("0x1p-54");
138+
let mut xi: u64 = x.to_bits();
139+
let yi: u64 = y.to_bits();
140+
let mut bm: u64 = (xi & (u64::MAX >> 12)) | 1u64 << 52;
141+
let mut lm: u64 = (yi & (u64::MAX >> 12)) | 1u64 << 52;
142+
let be: i32 = (xi >> 52) as i32;
143+
let le: i32 = (yi >> 52) as i32;
144+
let ri: u64 = sqrt(x * x + y * y).to_bits();
145+
let bs: i32 = 2;
146+
let mut rm: u64 = ri & (u64::MAX >> 12);
147+
let mut re: i32 = (ri >> 52) as i32 - 0x3ff;
148+
rm |= 1u64 << 52;
149+
150+
for _ in 0..3 {
151+
if __builtin_expect(rm == 1u64 << 52, true) {
152+
rm = u64::MAX >> 11;
153+
re -= 1;
154+
} else {
155+
rm -= 1;
156+
}
157+
}
158+
159+
bm <<= bs;
160+
let mut m2: u64 = bm.wrapping_mul(bm);
161+
let de: i32 = be - le;
162+
let mut ls: i32 = bs - de;
163+
164+
if __builtin_expect(ls >= 0, true) {
165+
lm <<= ls;
166+
m2 += lm.wrapping_mul(lm);
167+
} else {
168+
let lm2: u128 = (lm as u128) * (lm as u128);
169+
ls *= 2;
170+
m2 += (lm2 >> -ls) as u64;
171+
m2 |= ((lm2 << (128 + ls)) != 0) as u64;
172+
extern crate std;
173+
std::println!("here");
39174
}
40175

41-
/* special cases */
42-
ex = (uxi >> 52) as i64;
43-
ey = (uyi >> 52) as i64;
44-
x = f64::from_bits(uxi);
45-
y = f64::from_bits(uyi);
46-
/* note: hypot(inf,nan) == inf */
47-
if ey == 0x7ff {
48-
return y;
176+
let k: i32 = bs + re;
177+
let mut d: i64;
178+
179+
loop {
180+
rm += 1 + (rm >= (1u64 << 53)) as u64;
181+
let tm: u64 = rm << k;
182+
let rm2: u64 = tm * tm;
183+
d = m2 as i64 - rm2 as i64;
184+
185+
if d <= 0 {
186+
break;
187+
}
49188
}
50-
if ex == 0x7ff || uyi == 0 {
51-
return x;
189+
190+
if d == 0 {
191+
set_flags(flag);
192+
} else {
193+
if __builtin_expect(op == om, true) {
194+
let tm: u64 = (rm << k) - (1 << (k - (rm <= (1u64 << 53)) as i32));
195+
d = m2 as i64 - (tm * tm) as i64;
196+
197+
if __builtin_expect(d != 0, true) {
198+
rm += d as u64 >> 63;
199+
} else {
200+
rm -= rm & 1;
201+
}
202+
} else {
203+
rm -= ((op == 1.0) as u64) << (rm > (1u64 << 53)) as u32;
204+
}
52205
}
53-
/* note: hypot(x,y) ~= x + y*y/x/2 with inexact for small y/x */
54-
/* 64 difference is enough for ld80 double_t */
55-
if ex - ey > 64 {
56-
return x + y;
206+
207+
if rm >= (1u64 << 53) {
208+
rm >>= 1;
209+
re += 1;
57210
}
58211

59-
/* precise sqrt argument in nearest rounding mode without overflow */
60-
/* xh*xh must not overflow and xl*xl must not underflow in sq */
61-
z = 1.;
62-
if ex > 0x3ff + 510 {
63-
z = x1p700;
64-
x *= x1p_700;
65-
y *= x1p_700;
66-
} else if ey < 0x3ff - 450 {
67-
z = x1p_700;
68-
x *= x1p700;
69-
y *= x1p700;
212+
let e: u64 = (be - 1 + re) as u64;
213+
xi = (e << 52) + rm;
214+
215+
f64::from_bits(xi)
216+
}
217+
218+
fn as_hypot_denorm(mut a: u64, mut b: u64) -> f64 {
219+
let op: f64 = 1.0 + hf64!("0x1p-54");
220+
let om: f64 = 1.0 - hf64!("0x1p-54");
221+
let af: f64 = a as i64 as f64;
222+
let bf: f64 = b as i64 as f64;
223+
a <<= 1;
224+
b <<= 1;
225+
// Is this casting right?
226+
let mut rm: u64 = sqrt(af * af + bf * bf) as u64;
227+
let tm: u64 = rm << 1;
228+
let mut d: i64 = (a.wrapping_mul(a) as i64)
229+
.wrapping_sub(tm.wrapping_mul(tm) as i64)
230+
.wrapping_add(b.wrapping_mul(b) as i64);
231+
let sd: i64 = d >> 63;
232+
let um: i64 = ((rm as i64) ^ sd) - sd;
233+
let mut drm: i64 = sd + 1;
234+
let mut dd: i64 = (um << 3) + 4;
235+
let mut p_d: i64;
236+
rm -= drm as u64;
237+
drm += sd;
238+
loop {
239+
p_d = d;
240+
rm = rm.wrapping_add(drm as u64);
241+
d = d.wrapping_sub(dd);
242+
dd = d.wrapping_add(8);
243+
if !__builtin_expect((d ^ p_d) > 0, false) {
244+
break;
245+
}
70246
}
71-
let (hx, lx) = sq(x);
72-
let (hy, ly) = sq(y);
73-
z * sqrt(ly + lx + hy + hx)
247+
p_d = (sd & d) + (!sd & p_d);
248+
if __builtin_expect(p_d != 0, true) {
249+
if __builtin_expect(op == om, true) {
250+
let sum: i64 = p_d.wrapping_sub(4_i64.wrapping_mul(rm as i64)).wrapping_sub(1);
251+
252+
if __builtin_expect(sum != 0, true) {
253+
rm = rm.wrapping_add((sum as u64 >> 63) + 1);
254+
} else {
255+
rm += rm & 1;
256+
}
257+
} else {
258+
rm += (op > 1.0) as u64;
259+
}
260+
}
261+
let xi: u64 = rm;
262+
f64::from_bits(xi)
263+
}
264+
265+
fn __builtin_expect<T>(v: T, _exp: T) -> T {
266+
v
267+
}
268+
269+
fn __builtin_fabs(x: f64) -> f64 {
270+
x.abs()
271+
}
272+
273+
fn __builtin_copysign(x: f64, y: f64) -> f64 {
274+
x.copysign(y)
275+
}
276+
277+
fn __builtin_fma(x: f64, y: f64, z: f64) -> f64 {
278+
unsafe { core::intrinsics::fmaf64(x, y, z) }
279+
}
280+
281+
type FExcept = u32;
282+
283+
fn get_rounding_mode(_flag: &mut FExcept) -> i32 {
284+
// Always nearest
285+
0
286+
}
287+
288+
fn set_flags(_flag: FExcept) {}
289+
290+
fn get_flags() -> FExcept {
291+
0
74292
}

0 commit comments

Comments
 (0)