Skip to content

Commit 8fae33d

Browse files
committed
Add simd_as intrinsic
1 parent d32ca64 commit 8fae33d

File tree

7 files changed

+261
-173
lines changed

7 files changed

+261
-173
lines changed

compiler/rustc_codegen_llvm/src/builder.rs

+41-18
Original file line numberDiff line numberDiff line change
@@ -731,27 +731,11 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
731731
}
732732

733733
fn fptoui_sat(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> Option<&'ll Value> {
734-
if !self.fptoint_sat_broken_in_llvm() {
735-
let src_ty = self.cx.val_ty(val);
736-
let float_width = self.cx.float_width(src_ty);
737-
let int_width = self.cx.int_width(dest_ty);
738-
let name = format!("llvm.fptoui.sat.i{}.f{}", int_width, float_width);
739-
return Some(self.call_intrinsic(&name, &[val]));
740-
}
741-
742-
None
734+
self.fptoint_sat(false, val, dest_ty)
743735
}
744736

745737
fn fptosi_sat(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> Option<&'ll Value> {
746-
if !self.fptoint_sat_broken_in_llvm() {
747-
let src_ty = self.cx.val_ty(val);
748-
let float_width = self.cx.float_width(src_ty);
749-
let int_width = self.cx.int_width(dest_ty);
750-
let name = format!("llvm.fptosi.sat.i{}.f{}", int_width, float_width);
751-
return Some(self.call_intrinsic(&name, &[val]));
752-
}
753-
754-
None
738+
self.fptoint_sat(true, val, dest_ty)
755739
}
756740

757741
fn fptoui(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
@@ -1455,4 +1439,43 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
14551439
_ => false,
14561440
}
14571441
}
1442+
1443+
fn fptoint_sat(
1444+
&mut self,
1445+
signed: bool,
1446+
val: &'ll Value,
1447+
dest_ty: &'ll Type,
1448+
) -> Option<&'ll Value> {
1449+
if !self.fptoint_sat_broken_in_llvm() {
1450+
let src_ty = self.cx.val_ty(val);
1451+
let (float_ty, int_ty, vector_length) = if self.cx.type_kind(src_ty) == TypeKind::Vector
1452+
{
1453+
assert_eq!(self.cx.vector_length(src_ty), self.cx.vector_length(dest_ty));
1454+
(
1455+
self.cx.element_type(src_ty),
1456+
self.cx.element_type(dest_ty),
1457+
Some(self.cx.vector_length(src_ty)),
1458+
)
1459+
} else {
1460+
(src_ty, dest_ty, None)
1461+
};
1462+
let float_width = self.cx.float_width(float_ty);
1463+
let int_width = self.cx.int_width(int_ty);
1464+
1465+
let instr = if signed { "fptosi" } else { "fptoui" };
1466+
let name = if let Some(vector_length) = vector_length {
1467+
format!(
1468+
"llvm.{}.sat.v{}i{}.v{}f{}",
1469+
instr, vector_length, int_width, vector_length, float_width
1470+
)
1471+
} else {
1472+
format!("llvm.{}.sat.i{}.f{}", instr, int_width, float_width)
1473+
};
1474+
let f =
1475+
self.declare_cfn(&name, llvm::UnnamedAddr::No, self.type_func(&[src_ty], dest_ty));
1476+
return Some(self.call(self.type_func(&[src_ty], dest_ty), f, &[val], None));
1477+
}
1478+
1479+
None
1480+
}
14581481
}

compiler/rustc_codegen_llvm/src/intrinsic.rs

+5-5
Original file line numberDiff line numberDiff line change
@@ -1689,7 +1689,7 @@ unsupported {} from `{}` with element `{}` of size `{}` to `{}`"#,
16891689
bitwise_red!(simd_reduce_all: vector_reduce_and, true);
16901690
bitwise_red!(simd_reduce_any: vector_reduce_or, true);
16911691

1692-
if name == sym::simd_cast {
1692+
if name == sym::simd_cast || name == sym::simd_as {
16931693
require_simd!(ret_ty, "return");
16941694
let (out_len, out_elem) = ret_ty.simd_size_and_type(bx.tcx());
16951695
require!(
@@ -1761,10 +1761,10 @@ unsupported {} from `{}` with element `{}` of size `{}` to `{}`"#,
17611761
});
17621762
}
17631763
(Style::Float, Style::Int(out_is_signed)) => {
1764-
return Ok(if out_is_signed {
1765-
bx.fptosi(args[0].immediate(), llret_ty)
1766-
} else {
1767-
bx.fptoui(args[0].immediate(), llret_ty)
1764+
return Ok(match (out_is_signed, name == sym::simd_as) {
1765+
(false, false) => bx.fptoui(args[0].immediate(), llret_ty),
1766+
(true, false) => bx.fptosi(args[0].immediate(), llret_ty),
1767+
(_, true) => bx.cast_float_to_int(out_is_signed, args[0].immediate(), llret_ty),
17681768
});
17691769
}
17701770
(Style::Float, Style::Float) => {

compiler/rustc_codegen_ssa/src/mir/rvalue.rs

+3-147
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,10 @@ use super::place::PlaceRef;
33
use super::{FunctionCx, LocalRef};
44

55
use crate::base;
6-
use crate::common::{self, IntPredicate, RealPredicate};
6+
use crate::common::{self, IntPredicate};
77
use crate::traits::*;
88
use crate::MemFlags;
99

10-
use rustc_apfloat::{ieee, Float, Round, Status};
1110
use rustc_middle::mir;
1211
use rustc_middle::ty::cast::{CastTy, IntTy};
1312
use rustc_middle::ty::layout::{HasTyCtxt, LayoutOf};
@@ -368,10 +367,10 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
368367
bx.inttoptr(usize_llval, ll_t_out)
369368
}
370369
(CastTy::Float, CastTy::Int(IntTy::I)) => {
371-
cast_float_to_int(&mut bx, true, llval, ll_t_in, ll_t_out)
370+
bx.cast_float_to_int(true, llval, ll_t_out)
372371
}
373372
(CastTy::Float, CastTy::Int(_)) => {
374-
cast_float_to_int(&mut bx, false, llval, ll_t_in, ll_t_out)
373+
bx.cast_float_to_int(false, llval, ll_t_out)
375374
}
376375
_ => bug!("unsupported cast: {:?} to {:?}", operand.layout.ty, cast.ty),
377376
};
@@ -768,146 +767,3 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
768767
// (*) this is only true if the type is suitable
769768
}
770769
}
771-
772-
fn cast_float_to_int<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
773-
bx: &mut Bx,
774-
signed: bool,
775-
x: Bx::Value,
776-
float_ty: Bx::Type,
777-
int_ty: Bx::Type,
778-
) -> Bx::Value {
779-
if let Some(false) = bx.cx().sess().opts.debugging_opts.saturating_float_casts {
780-
return if signed { bx.fptosi(x, int_ty) } else { bx.fptoui(x, int_ty) };
781-
}
782-
783-
let try_sat_result = if signed { bx.fptosi_sat(x, int_ty) } else { bx.fptoui_sat(x, int_ty) };
784-
if let Some(try_sat_result) = try_sat_result {
785-
return try_sat_result;
786-
}
787-
788-
let int_width = bx.cx().int_width(int_ty);
789-
let float_width = bx.cx().float_width(float_ty);
790-
// LLVM's fpto[su]i returns undef when the input x is infinite, NaN, or does not fit into the
791-
// destination integer type after rounding towards zero. This `undef` value can cause UB in
792-
// safe code (see issue #10184), so we implement a saturating conversion on top of it:
793-
// Semantically, the mathematical value of the input is rounded towards zero to the next
794-
// mathematical integer, and then the result is clamped into the range of the destination
795-
// integer type. Positive and negative infinity are mapped to the maximum and minimum value of
796-
// the destination integer type. NaN is mapped to 0.
797-
//
798-
// Define f_min and f_max as the largest and smallest (finite) floats that are exactly equal to
799-
// a value representable in int_ty.
800-
// They are exactly equal to int_ty::{MIN,MAX} if float_ty has enough significand bits.
801-
// Otherwise, int_ty::MAX must be rounded towards zero, as it is one less than a power of two.
802-
// int_ty::MIN, however, is either zero or a negative power of two and is thus exactly
803-
// representable. Note that this only works if float_ty's exponent range is sufficiently large.
804-
// f16 or 256 bit integers would break this property. Right now the smallest float type is f32
805-
// with exponents ranging up to 127, which is barely enough for i128::MIN = -2^127.
806-
// On the other hand, f_max works even if int_ty::MAX is greater than float_ty::MAX. Because
807-
// we're rounding towards zero, we just get float_ty::MAX (which is always an integer).
808-
// This already happens today with u128::MAX = 2^128 - 1 > f32::MAX.
809-
let int_max = |signed: bool, int_width: u64| -> u128 {
810-
let shift_amount = 128 - int_width;
811-
if signed { i128::MAX as u128 >> shift_amount } else { u128::MAX >> shift_amount }
812-
};
813-
let int_min = |signed: bool, int_width: u64| -> i128 {
814-
if signed { i128::MIN >> (128 - int_width) } else { 0 }
815-
};
816-
817-
let compute_clamp_bounds_single = |signed: bool, int_width: u64| -> (u128, u128) {
818-
let rounded_min = ieee::Single::from_i128_r(int_min(signed, int_width), Round::TowardZero);
819-
assert_eq!(rounded_min.status, Status::OK);
820-
let rounded_max = ieee::Single::from_u128_r(int_max(signed, int_width), Round::TowardZero);
821-
assert!(rounded_max.value.is_finite());
822-
(rounded_min.value.to_bits(), rounded_max.value.to_bits())
823-
};
824-
let compute_clamp_bounds_double = |signed: bool, int_width: u64| -> (u128, u128) {
825-
let rounded_min = ieee::Double::from_i128_r(int_min(signed, int_width), Round::TowardZero);
826-
assert_eq!(rounded_min.status, Status::OK);
827-
let rounded_max = ieee::Double::from_u128_r(int_max(signed, int_width), Round::TowardZero);
828-
assert!(rounded_max.value.is_finite());
829-
(rounded_min.value.to_bits(), rounded_max.value.to_bits())
830-
};
831-
832-
let mut float_bits_to_llval = |bits| {
833-
let bits_llval = match float_width {
834-
32 => bx.cx().const_u32(bits as u32),
835-
64 => bx.cx().const_u64(bits as u64),
836-
n => bug!("unsupported float width {}", n),
837-
};
838-
bx.bitcast(bits_llval, float_ty)
839-
};
840-
let (f_min, f_max) = match float_width {
841-
32 => compute_clamp_bounds_single(signed, int_width),
842-
64 => compute_clamp_bounds_double(signed, int_width),
843-
n => bug!("unsupported float width {}", n),
844-
};
845-
let f_min = float_bits_to_llval(f_min);
846-
let f_max = float_bits_to_llval(f_max);
847-
// To implement saturation, we perform the following steps:
848-
//
849-
// 1. Cast x to an integer with fpto[su]i. This may result in undef.
850-
// 2. Compare x to f_min and f_max, and use the comparison results to select:
851-
// a) int_ty::MIN if x < f_min or x is NaN
852-
// b) int_ty::MAX if x > f_max
853-
// c) the result of fpto[su]i otherwise
854-
// 3. If x is NaN, return 0.0, otherwise return the result of step 2.
855-
//
856-
// This avoids resulting undef because values in range [f_min, f_max] by definition fit into the
857-
// destination type. It creates an undef temporary, but *producing* undef is not UB. Our use of
858-
// undef does not introduce any non-determinism either.
859-
// More importantly, the above procedure correctly implements saturating conversion.
860-
// Proof (sketch):
861-
// If x is NaN, 0 is returned by definition.
862-
// Otherwise, x is finite or infinite and thus can be compared with f_min and f_max.
863-
// This yields three cases to consider:
864-
// (1) if x in [f_min, f_max], the result of fpto[su]i is returned, which agrees with
865-
// saturating conversion for inputs in that range.
866-
// (2) if x > f_max, then x is larger than int_ty::MAX. This holds even if f_max is rounded
867-
// (i.e., if f_max < int_ty::MAX) because in those cases, nextUp(f_max) is already larger
868-
// than int_ty::MAX. Because x is larger than int_ty::MAX, the return value of int_ty::MAX
869-
// is correct.
870-
// (3) if x < f_min, then x is smaller than int_ty::MIN. As shown earlier, f_min exactly equals
871-
// int_ty::MIN and therefore the return value of int_ty::MIN is correct.
872-
// QED.
873-
874-
let int_max = bx.cx().const_uint_big(int_ty, int_max(signed, int_width));
875-
let int_min = bx.cx().const_uint_big(int_ty, int_min(signed, int_width) as u128);
876-
let zero = bx.cx().const_uint(int_ty, 0);
877-
878-
// Step 1 ...
879-
let fptosui_result = if signed { bx.fptosi(x, int_ty) } else { bx.fptoui(x, int_ty) };
880-
let less_or_nan = bx.fcmp(RealPredicate::RealULT, x, f_min);
881-
let greater = bx.fcmp(RealPredicate::RealOGT, x, f_max);
882-
883-
// Step 2: We use two comparisons and two selects, with %s1 being the
884-
// result:
885-
// %less_or_nan = fcmp ult %x, %f_min
886-
// %greater = fcmp olt %x, %f_max
887-
// %s0 = select %less_or_nan, int_ty::MIN, %fptosi_result
888-
// %s1 = select %greater, int_ty::MAX, %s0
889-
// Note that %less_or_nan uses an *unordered* comparison. This
890-
// comparison is true if the operands are not comparable (i.e., if x is
891-
// NaN). The unordered comparison ensures that s1 becomes int_ty::MIN if
892-
// x is NaN.
893-
//
894-
// Performance note: Unordered comparison can be lowered to a "flipped"
895-
// comparison and a negation, and the negation can be merged into the
896-
// select. Therefore, it not necessarily any more expensive than an
897-
// ordered ("normal") comparison. Whether these optimizations will be
898-
// performed is ultimately up to the backend, but at least x86 does
899-
// perform them.
900-
let s0 = bx.select(less_or_nan, int_min, fptosui_result);
901-
let s1 = bx.select(greater, int_max, s0);
902-
903-
// Step 3: NaN replacement.
904-
// For unsigned types, the above step already yielded int_ty::MIN == 0 if x is NaN.
905-
// Therefore we only need to execute this step for signed integer types.
906-
if signed {
907-
// LLVM has no isNaN predicate, so we use (x == x) instead
908-
let cmp = bx.fcmp(RealPredicate::RealOEQ, x, x);
909-
bx.select(cmp, s1, zero)
910-
} else {
911-
s1
912-
}
913-
}

0 commit comments

Comments
 (0)