Skip to content
This repository was archived by the owner on Apr 28, 2025. It is now read-only.

Commit f679094

Browse files
committed
Add cr_hypot from core-math
1 parent b67b4cc commit f679094

File tree

1 file changed

+289
-13
lines changed

1 file changed

+289
-13
lines changed

src/math/hypot.rs

+289-13
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,22 @@
1+
/*
2+
Copyright (c) 2022 Alexei Sibidanov.
3+
4+
This file is part of the CORE-MATH project
5+
(https://core-math.gitlabpages.inria.fr/).
6+
*/
7+
18
use core::f64;
29

310
use super::sqrt;
411

512
const SPLIT: f64 = 134217728. + 1.; // 0x1p27 + 1 === (2 ^ 27) + 1
613

7-
fn sq(x: f64) -> (f64, f64) {
8-
let xh: f64;
9-
let xl: f64;
10-
let xc: f64;
11-
12-
xc = x * SPLIT;
13-
xh = x - xc + xc;
14-
xl = x - xh;
15-
let hi = x * x;
16-
let lo = xh * xh - hi + 2. * xh * xl + xl * xl;
17-
(hi, lo)
18-
}
19-
2014
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
2115
pub fn hypot(mut x: f64, mut y: f64) -> f64 {
16+
if true {
17+
return cr_hypot(x, y);
18+
}
19+
2220
let x1p700 = f64::from_bits(0x6bb0000000000000); // 0x1p700 === 2 ^ 700
2321
let x1p_700 = f64::from_bits(0x1430000000000000); // 0x1p-700 === 2 ^ -700
2422

@@ -72,3 +70,281 @@ pub fn hypot(mut x: f64, mut y: f64) -> f64 {
7270
let (hy, ly) = sq(y);
7371
z * sqrt(ly + lx + hy + hx)
7472
}
73+
74+
fn sq(x: f64) -> (f64, f64) {
75+
let xh: f64;
76+
let xl: f64;
77+
let xc: f64;
78+
79+
xc = x * SPLIT;
80+
xh = x - xc + xc;
81+
xl = x - xh;
82+
let hi = x * x;
83+
let lo = xh * xh - hi + 2. * xh * xl + xl * xl;
84+
(hi, lo)
85+
}
86+
87+
fn cr_hypot(mut x: f64, mut y: f64) -> f64 {
88+
let flag = get_flags();
89+
90+
let xi = x.to_bits();
91+
let yi = y.to_bits();
92+
93+
let emsk: u64 = 0x7ffu64 << 52;
94+
let mut ex: u64 = xi & emsk;
95+
let mut ey: u64 = yi & emsk;
96+
/* emsk corresponds to the upper bits of NaN and Inf (apart the sign bit) */
97+
x = __builtin_fabs(x);
98+
y = __builtin_fabs(y);
99+
if __builtin_expect(ex == emsk || ey == emsk, false) {
100+
/* Either x or y is NaN or Inf */
101+
let wx: u64 = xi << 1;
102+
let wy: u64 = yi << 1;
103+
let wm: u64 = emsk << 1;
104+
let ninf: i32 = ((wx == wm) ^ (wy == wm)) as i32;
105+
let nqnn: i32 = (((wx >> 52) == 0xfff) ^ ((wy >> 52) == 0xfff)) as i32;
106+
/* ninf is 1 when only one of x and y is +/-Inf
107+
nqnn is 1 when only one of x and y is qNaN
108+
IEEE 754 says that hypot(+/-Inf,qNaN)=hypot(qNaN,+/-Inf)=+Inf. */
109+
if ninf > 0 && nqnn > 0 {
110+
return f64::INFINITY;
111+
}
112+
return x + y; /* inf, nan */
113+
}
114+
115+
let u: f64 = x.max(y);
116+
let v: f64 = x.min(y);
117+
let mut xd: u64 = u.to_bits();
118+
let mut yd: u64 = v.to_bits();
119+
ey = yd;
120+
if __builtin_expect(ey >> 52 == 0, false) {
121+
if yd == 0 {
122+
return f64::from_bits(xd);
123+
}
124+
ex = xd;
125+
if __builtin_expect(ex >> 52 == 0, false) {
126+
if ex == 0 {
127+
return 0.0;
128+
}
129+
return as_hypot_denorm(ex, ey);
130+
}
131+
132+
let nz: u32 = ey.leading_zeros();
133+
ey <<= nz - 11;
134+
ey &= u64::MAX >> 12;
135+
ey -= ((nz as i64 - 12i64) << 52) as u64;
136+
let t = ey; // why did they do this?
137+
yd = t;
138+
}
139+
140+
let de: u64 = xd - yd;
141+
if __builtin_expect(de > (27_u64 << 52), false) {
142+
return __builtin_fma(hf64!("0x1p-27"), v, u);
143+
}
144+
145+
let off: i64 = (0x3ff_i64 << 52) - (xd & emsk) as i64;
146+
xd = xd.wrapping_mul(off as u64);
147+
yd = yd.wrapping_mul(off as u64);
148+
x = f64::from_bits(xd);
149+
y = f64::from_bits(yd);
150+
let x2: f64 = x * x;
151+
let dx2: f64 = __builtin_fma(x, x, -x2);
152+
let y2: f64 = y * y;
153+
let dy2: f64 = __builtin_fma(y, y, -y2);
154+
let r2: f64 = x2 + y2;
155+
let ir2: f64 = 0.5 / r2;
156+
let dr2: f64 = ((x2 - r2) + y2) + (dx2 + dy2);
157+
let mut th: f64 = sqrt(r2);
158+
let rsqrt: f64 = th * ir2;
159+
let dz: f64 = dr2 - __builtin_fma(th, th, -r2);
160+
let mut tl: f64 = rsqrt * dz;
161+
th = fasttwosum(th, tl, &mut tl);
162+
let mut thd: u64 = th.to_bits();
163+
let tld = __builtin_fabs(tl).to_bits();
164+
ex = thd;
165+
ey = tld;
166+
ex &= 0x7ff_u64 << 52;
167+
let aidr: u64 = ey + (0x3fe_u64 << 52) - ex;
168+
let mid: u64 = (aidr.wrapping_sub(0x3c90000000000000) + 16) >> 5;
169+
if __builtin_expect(
170+
mid == 0 || aidr < 0x39b0000000000000_u64 || aidr > 0x3c9fffffffffff80_u64,
171+
false,
172+
) {
173+
thd = as_hypot_hard(x, y, flag).to_bits();
174+
}
175+
thd -= off as u64;
176+
if __builtin_expect(thd >= (0x7ff_u64 << 52), false) {
177+
return as_hypot_overflow();
178+
}
179+
180+
f64::from_bits(thd)
181+
}
182+
183+
fn fasttwosum(x: f64, y: f64, e: &mut f64) -> f64 {
184+
let s: f64 = x + y;
185+
let z: f64 = s - x;
186+
*e = y - z;
187+
s
188+
}
189+
190+
fn as_hypot_overflow() -> f64 {
191+
let z: f64 = hf64!("0x1.fffffffffffffp1023");
192+
let f = z + z;
193+
if f > z {
194+
// errno = ERANGE
195+
}
196+
f
197+
}
198+
199+
/* Here the square root is refined by Newton iterations: x^2+y^2 is exact
200+
and fits in a 128-bit integer, so the approximation is squared (which
201+
also fits in a 128-bit integer), compared and adjusted if necessary using
202+
the exact value of x^2+y^2. */
203+
fn as_hypot_hard(x: f64, y: f64, flag: FExcept) -> f64 {
204+
let op: f64 = 1.0 + hf64!("0x1p-54");
205+
let om: f64 = 1.0 - hf64!("0x1p-54");
206+
let mut xi: u64 = x.to_bits();
207+
let yi: u64 = y.to_bits();
208+
let mut bm: u64 = (xi & (u64::MAX >> 12)) | 1u64 << 52;
209+
let mut lm: u64 = (yi & (u64::MAX >> 12)) | 1u64 << 52;
210+
let be: i32 = (xi >> 52) as i32;
211+
let le: i32 = (yi >> 52) as i32;
212+
let ri: u64 = sqrt(x * x + y * y).to_bits();
213+
let bs: i32 = 2;
214+
let mut rm: u64 = ri & (u64::MAX >> 12);
215+
let mut re: i32 = ((ri >> 52) - 0x3ff) as i32;
216+
rm |= 1u64 << 52;
217+
218+
for _ in 0..3 {
219+
if __builtin_expect(rm == 1u64 << 52, true) {
220+
rm = u64::MAX >> 11;
221+
re -= 1;
222+
} else {
223+
rm -= 1;
224+
}
225+
}
226+
bm <<= bs;
227+
let mut m2: u64 = bm * bm;
228+
let de: i32 = be - le;
229+
let mut ls: i32 = bs - de;
230+
if __builtin_expect(ls >= 0, true) {
231+
lm <<= ls;
232+
m2 += lm * lm;
233+
} else {
234+
let lm2: u128 = (lm as u128) * (lm as u128);
235+
ls *= 2;
236+
m2 += (lm2 >> -ls) as u64;
237+
m2 |= ((lm2 << (128 + ls)) != 0) as u64;
238+
}
239+
let k: i32 = bs + re;
240+
let mut d: i64;
241+
loop {
242+
rm += 1 + (rm >= (1u64 << 53)) as u64;
243+
let tm: u64 = rm << k;
244+
let rm2: u64 = tm * tm;
245+
d = m2 as i64 - rm2 as i64;
246+
247+
if d <= 0 {
248+
break;
249+
}
250+
}
251+
252+
if d == 0 {
253+
set_flags(flag);
254+
} else {
255+
if __builtin_expect(op == om, true) {
256+
let tm: u64 = (rm << k) - (1 << (k - (rm <= (1u64 << 53)) as i32));
257+
d = m2 as i64 - (tm * tm) as i64;
258+
259+
if __builtin_expect(d != 0, true) {
260+
rm += d as u64 >> 63;
261+
} else {
262+
rm -= rm & 1;
263+
}
264+
} else {
265+
rm -= ((op == 1.0) as u64) << (rm > (1u64 << 53)) as u32;
266+
}
267+
}
268+
if rm >= (1u64 << 53) {
269+
rm >>= 1;
270+
re += 1;
271+
}
272+
273+
let e: u64 = (be - 1 + re) as u64;
274+
xi = (e << 52) + rm;
275+
276+
f64::from_bits(xi)
277+
}
278+
279+
fn as_hypot_denorm(mut a: u64, mut b: u64) -> f64 {
280+
let op: f64 = 1.0 + hf64!("0x1p-54");
281+
let om: f64 = 1.0 - hf64!("0x1p-54");
282+
let af: f64 = a as f64;
283+
let bf: f64 = b as f64;
284+
a <<= 1;
285+
b <<= 1;
286+
// Is this casting right?
287+
let mut rm: u64 = sqrt(af * af + bf * bf) as u64;
288+
let tm: u64 = rm << 1;
289+
let mut d: i64 = ((a * a) as i64) - ((tm * tm) as i64) + ((b * b) as i64);
290+
let s_d: i64 = d >> 63;
291+
let um: i64 = ((rm as i64) ^ s_d) - s_d;
292+
let mut drm: i64 = s_d + 1;
293+
let mut dd: i64 = (um << 3) + 4;
294+
let mut p_d: i64;
295+
rm -= drm as u64;
296+
drm += s_d;
297+
loop {
298+
p_d = d;
299+
rm += drm as u64;
300+
d -= dd;
301+
dd += 8;
302+
if !__builtin_expect((d ^ p_d) > 0, false) {
303+
break;
304+
}
305+
}
306+
p_d = (s_d & d) + (!s_d & p_d);
307+
if __builtin_expect(p_d != 0, true) {
308+
if __builtin_expect(op == om, true) {
309+
let sum: i64 = p_d - 4 * (rm as i64) - 1;
310+
if __builtin_expect(sum != 0, true) {
311+
rm += (sum as u64 >> 63) + 1;
312+
} else {
313+
rm += rm & 1;
314+
}
315+
} else {
316+
rm += (op > 1.0) as u64;
317+
}
318+
}
319+
let xi: u64 = rm;
320+
f64::from_bits(xi)
321+
}
322+
323+
fn __builtin_expect<T>(v: T, _exp: T) -> T {
324+
v
325+
}
326+
327+
fn __builtin_fabs(x: f64) -> f64 {
328+
unsafe { core::intrinsics::fabsf64(x) }
329+
}
330+
331+
fn __builtin_copysign(x: f64, y: f64) -> f64 {
332+
unsafe { core::intrinsics::copysignf64(x, y) }
333+
}
334+
335+
fn __builtin_fma(x: f64, y: f64, z: f64) -> f64 {
336+
unsafe { core::intrinsics::fmaf64(x, y, z) }
337+
}
338+
339+
type FExcept = u32;
340+
341+
fn get_rounding_mode(_flag: &mut FExcept) -> i32 {
342+
// Always nearest
343+
0
344+
}
345+
346+
fn set_flags(_flag: FExcept) {}
347+
348+
fn get_flags() -> FExcept {
349+
0
350+
}

0 commit comments

Comments
 (0)