diff --git a/cranelift/codegen/src/isa/s390x/abi.rs b/cranelift/codegen/src/isa/s390x/abi.rs index fdbc463f498d..e6cbab668ee0 100644 --- a/cranelift/codegen/src/isa/s390x/abi.rs +++ b/cranelift/codegen/src/isa/s390x/abi.rs @@ -166,7 +166,7 @@ fn in_int_reg(ty: Type) -> bool { fn in_flt_reg(ty: Type) -> bool { match ty { - types::F32 | types::F64 => true, + types::F16 | types::F32 | types::F64 => true, _ => false, } } diff --git a/cranelift/codegen/src/isa/s390x/inst.isle b/cranelift/codegen/src/isa/s390x/inst.isle index 0badf6785494..e7bbb56c7cef 100644 --- a/cranelift/codegen/src/isa/s390x/inst.isle +++ b/cranelift/codegen/src/isa/s390x/inst.isle @@ -552,6 +552,11 @@ (rn Reg) (rm Reg)) + ;; Load floating-point constant, half-precision (16 bit). + (LoadFpuConst16 + (rd WritableReg) + (const_data u16)) + ;; Load floating-point constant, single-precision (32 bit). (LoadFpuConst32 (rd WritableReg) @@ -2836,6 +2841,7 @@ (rule (arg_store $I16 reg mem) (store16 reg mem)) (rule (arg_store $I32 reg mem) (store32 reg mem)) (rule (arg_store $I64 reg mem) (store64 reg mem)) +(rule (arg_store $F16 reg mem) (vec_store_lane $F16X8 reg mem 0)) (rule (arg_store $F32 reg mem) (vec_store_lane $F32X4 reg mem 0)) (rule (arg_store $F64 reg mem) (vec_store_lane $F64X2 reg mem 0)) (rule -1 (arg_store (vr128_ty ty) reg mem) (vec_store reg mem)) @@ -2861,6 +2867,7 @@ (rule 5 (abi_vec_elt_rev _ (gpr32_ty ty) reg) reg) (rule 4 (abi_vec_elt_rev _ (gpr64_ty ty) reg) reg) (rule 3 (abi_vec_elt_rev _ $I128 reg) reg) +(rule 3 (abi_vec_elt_rev _ $F128 reg) reg) (rule 2 (abi_vec_elt_rev _ (ty_scalar_float ty) reg) reg) (rule 0 (abi_vec_elt_rev callee_lane_order _ reg) (if-let true (lane_order_equal callee_lane_order (lane_order))) @@ -2925,7 +2932,7 @@ (decl imm (Type u64) Reg) ;; 16-bit (or smaller) result type, any value -(rule 7 (imm (fits_in_16 ty) n) +(rule 7 (imm (fits_in_16 (ty_int ty)) n) (let ((dst WritableReg (temp_writable_reg ty)) (_ Unit (emit (MInst.Mov32SImm16 dst (u64_as_i16 n))))) dst)) @@ -2986,6 +2993,13 @@ (_ Unit (emit (MInst.Insert64UImm32Shifted dst src n)))) dst)) +;; 16-bit floating-point type, any value. Loaded from literal pool. +;; TODO: use LZER to load 0.0 +(rule 8 (imm $F16 n) + (let ((dst WritableReg (temp_writable_reg $F16)) + (_ Unit (emit (MInst.LoadFpuConst16 dst (u64_as_u16 n))))) + dst)) + ;; 32-bit floating-point type, any value. Loaded from literal pool. ;; TODO: use LZER to load 0.0 (rule 8 (imm $F32 n) @@ -3222,6 +3236,10 @@ (let ((dst WritableReg (temp_writable_reg ty)) (inst MInst (MInst.CMov64 dst cond reg_false reg_true))) (ConsumesFlags.ConsumesFlagsReturnsReg inst dst))) +(rule 3 (cmov_reg_reg $F16 cond reg_true reg_false) + (let ((dst WritableReg (temp_writable_reg $F16)) + (inst MInst (MInst.FpuCMov32 dst cond reg_false reg_true))) + (ConsumesFlags.ConsumesFlagsReturnsReg inst dst))) (rule 3 (cmov_reg_reg $F32 cond reg_true reg_false) (let ((dst WritableReg (temp_writable_reg $F32)) (inst MInst (MInst.FpuCMov32 dst cond reg_false reg_true))) diff --git a/cranelift/codegen/src/isa/s390x/inst/emit.rs b/cranelift/codegen/src/isa/s390x/inst/emit.rs index 45cb6b88c705..9c5b6e7caf6b 100644 --- a/cranelift/codegen/src/isa/s390x/inst/emit.rs +++ b/cranelift/codegen/src/isa/s390x/inst/emit.rs @@ -38,6 +38,11 @@ macro_rules! debug_assert_valid_regpair { }; } +const OPCODE_BRAS: u16 = 0xa75; +const OPCODE_BCR: u16 = 0xa74; +const OPCODE_LDR: u16 = 0x28; +const OPCODE_VLR: u16 = 0xe756; + /// Type(s) of memory instructions available for mem_finalize. pub struct MemInstType { /// True if 12-bit unsigned displacement is supported. @@ -2298,9 +2303,8 @@ impl Inst { rd, ref symbol_reloc, } => { - let opcode = 0xa75; // BRAS let reg = writable_spilltmp_reg().to_reg(); - put(sink, &enc_ri_b(opcode, reg, 12)); + put(sink, &enc_ri_b(OPCODE_BRAS, reg, 12)); let (reloc, name, offset) = match &**symbol_reloc { SymbolReloc::Absolute { name, offset } => (Reloc::Abs8, name, *offset), SymbolReloc::TlsGd { name } => (Reloc::S390xTlsGd64, name, 0), @@ -2319,53 +2323,54 @@ impl Inst { let opcode = 0x38; // LER put(sink, &enc_rr(opcode, rd.to_reg(), rn)); } else { - let opcode = 0xe756; // VLR - put(sink, &enc_vrr_a(opcode, rd.to_reg(), rn, 0, 0, 0)); + put(sink, &enc_vrr_a(OPCODE_VLR, rd.to_reg(), rn, 0, 0, 0)); } } &Inst::FpuMove64 { rd, rn } => { if is_fpr(rd.to_reg()) && is_fpr(rn) { - let opcode = 0x28; // LDR - put(sink, &enc_rr(opcode, rd.to_reg(), rn)); + put(sink, &enc_rr(OPCODE_LDR, rd.to_reg(), rn)); } else { - let opcode = 0xe756; // VLR - put(sink, &enc_vrr_a(opcode, rd.to_reg(), rn, 0, 0, 0)); + put(sink, &enc_vrr_a(OPCODE_VLR, rd.to_reg(), rn, 0, 0, 0)); } } &Inst::FpuCMov32 { rd, cond, ri, rm } => { debug_assert_eq!(rd.to_reg(), ri); if is_fpr(rd.to_reg()) && is_fpr(rm) { - let opcode = 0xa74; // BCR - put(sink, &enc_ri_c(opcode, cond.invert().bits(), 4 + 2)); + put(sink, &enc_ri_c(OPCODE_BCR, cond.invert().bits(), 4 + 2)); let opcode = 0x38; // LER put(sink, &enc_rr(opcode, rd.to_reg(), rm)); } else { - let opcode = 0xa74; // BCR - put(sink, &enc_ri_c(opcode, cond.invert().bits(), 4 + 6)); - let opcode = 0xe756; // VLR - put(sink, &enc_vrr_a(opcode, rd.to_reg(), rm, 0, 0, 0)); + put(sink, &enc_ri_c(OPCODE_BCR, cond.invert().bits(), 4 + 6)); + put(sink, &enc_vrr_a(OPCODE_VLR, rd.to_reg(), rm, 0, 0, 0)); } } &Inst::FpuCMov64 { rd, cond, ri, rm } => { debug_assert_eq!(rd.to_reg(), ri); if is_fpr(rd.to_reg()) && is_fpr(rm) { - let opcode = 0xa74; // BCR - put(sink, &enc_ri_c(opcode, cond.invert().bits(), 4 + 2)); - let opcode = 0x28; // LDR - put(sink, &enc_rr(opcode, rd.to_reg(), rm)); + put(sink, &enc_ri_c(OPCODE_BCR, cond.invert().bits(), 4 + 2)); + put(sink, &enc_rr(OPCODE_LDR, rd.to_reg(), rm)); } else { - let opcode = 0xa74; // BCR - put(sink, &enc_ri_c(opcode, cond.invert().bits(), 4 + 6)); - let opcode = 0xe756; // VLR - put(sink, &enc_vrr_a(opcode, rd.to_reg(), rm, 0, 0, 0)); + put(sink, &enc_ri_c(OPCODE_BCR, cond.invert().bits(), 4 + 6)); + put(sink, &enc_vrr_a(OPCODE_VLR, rd.to_reg(), rm, 0, 0, 0)); } } + &Inst::LoadFpuConst16 { rd, const_data } => { + let reg = writable_spilltmp_reg().to_reg(); + put(sink, &enc_ri_b(OPCODE_BRAS, reg, 6)); + sink.put2(const_data.swap_bytes()); + let inst = Inst::VecLoadLaneUndef { + size: 16, + rd, + mem: MemArg::reg(reg, MemFlags::trusted()), + lane_imm: 0, + }; + inst.emit(sink, emit_info, state); + } &Inst::LoadFpuConst32 { rd, const_data } => { - let opcode = 0xa75; // BRAS let reg = writable_spilltmp_reg().to_reg(); - put(sink, &enc_ri_b(opcode, reg, 8)); + put(sink, &enc_ri_b(OPCODE_BRAS, reg, 8)); sink.put4(const_data.swap_bytes()); let inst = Inst::VecLoadLaneUndef { size: 32, @@ -2376,9 +2381,8 @@ impl Inst { inst.emit(sink, emit_info, state); } &Inst::LoadFpuConst64 { rd, const_data } => { - let opcode = 0xa75; // BRAS let reg = writable_spilltmp_reg().to_reg(); - put(sink, &enc_ri_b(opcode, reg, 12)); + put(sink, &enc_ri_b(OPCODE_BRAS, reg, 12)); sink.put8(const_data.swap_bytes()); let inst = Inst::VecLoadLaneUndef { size: 64, @@ -2780,8 +2784,7 @@ impl Inst { put(sink, &enc_vrr_a(opcode, rm, rn, m3, 0, 0)); // If CC != 0, we'd done, so jump over the next instruction. - let opcode = 0xa74; // BCR - put(sink, &enc_ri_c(opcode, 7, 4 + 6)); + put(sink, &enc_ri_c(OPCODE_BCR, 7, 4 + 6)); // Otherwise, use VECTOR COMPARE HIGH LOGICAL. // Since we already know the high parts are equal, the CC @@ -2864,25 +2867,21 @@ impl Inst { } &Inst::VecMov { rd, rn } => { - let opcode = 0xe756; // VLR - put(sink, &enc_vrr_a(opcode, rd.to_reg(), rn, 0, 0, 0)); + put(sink, &enc_vrr_a(OPCODE_VLR, rd.to_reg(), rn, 0, 0, 0)); } &Inst::VecCMov { rd, cond, ri, rm } => { debug_assert_eq!(rd.to_reg(), ri); - let opcode = 0xa74; // BCR - put(sink, &enc_ri_c(opcode, cond.invert().bits(), 4 + 6)); - let opcode = 0xe756; // VLR - put(sink, &enc_vrr_a(opcode, rd.to_reg(), rm, 0, 0, 0)); + put(sink, &enc_ri_c(OPCODE_BCR, cond.invert().bits(), 4 + 6)); + put(sink, &enc_vrr_a(OPCODE_VLR, rd.to_reg(), rm, 0, 0, 0)); } &Inst::MovToVec128 { rd, rn, rm } => { let opcode = 0xe762; // VLVGP put(sink, &enc_vrr_f(opcode, rd.to_reg(), rn, rm)); } &Inst::VecLoadConst { rd, const_data } => { - let opcode = 0xa75; // BRAS let reg = writable_spilltmp_reg().to_reg(); - put(sink, &enc_ri_b(opcode, reg, 20)); + put(sink, &enc_ri_b(OPCODE_BRAS, reg, 20)); for i in const_data.to_be_bytes().iter() { sink.put1(*i); } @@ -2897,9 +2896,8 @@ impl Inst { rd, const_data, } => { - let opcode = 0xa75; // BRAS let reg = writable_spilltmp_reg().to_reg(); - put(sink, &enc_ri_b(opcode, reg, (4 + size / 8) as i32)); + put(sink, &enc_ri_b(OPCODE_BRAS, reg, (4 + size / 8) as i32)); for i in 0..size / 8 { sink.put1((const_data >> (size - 8 - 8 * i)) as u8); } diff --git a/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs b/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs index 64b2c44d55fe..a93f71abdbee 100644 --- a/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs @@ -7817,6 +7817,24 @@ fn test_s390x_binemit() { "wfcdb %v24, %f12", )); + // FIXME(#8312): Use `1.0_f16.to_bits()` once `f16` is stabilised. + let f16_1_0 = 0x3c00; + insns.push(( + Inst::LoadFpuConst16 { + rd: writable_vr(8), + const_data: f16_1_0, + }, + "A71500033C00E78010000001", + "bras %r1, 8 ; data.f16 0x1.000p0 ; vleh %v8, 0(%r1), 0", + )); + insns.push(( + Inst::LoadFpuConst16 { + rd: writable_vr(24), + const_data: f16_1_0, + }, + "A71500033C00E78010000801", + "bras %r1, 8 ; data.f16 0x1.000p0 ; vleh %v24, 0(%r1), 0", + )); insns.push(( Inst::LoadFpuConst32 { rd: writable_vr(8), diff --git a/cranelift/codegen/src/isa/s390x/inst/mod.rs b/cranelift/codegen/src/isa/s390x/inst/mod.rs index 70ab352c8d0c..280739c740ef 100644 --- a/cranelift/codegen/src/isa/s390x/inst/mod.rs +++ b/cranelift/codegen/src/isa/s390x/inst/mod.rs @@ -1,6 +1,7 @@ //! This module defines s390x-specific machine instruction types. use crate::binemit::{Addend, CodeOffset, Reloc}; +use crate::ir::immediates::Ieee16; use crate::ir::{types, ExternalName, Type}; use crate::isa::s390x::abi::S390xMachineDeps; use crate::isa::{CallConv, FunctionAlignment}; @@ -177,6 +178,7 @@ impl Inst { | Inst::FpuRRRR { .. } | Inst::FpuCmp32 { .. } | Inst::FpuCmp64 { .. } + | Inst::LoadFpuConst16 { .. } | Inst::LoadFpuConst32 { .. } | Inst::LoadFpuConst64 { .. } | Inst::VecRRR { .. } @@ -324,6 +326,12 @@ impl Inst { types::I16 => Inst::Load64ZExt16 { rd: into_reg, mem }, types::I32 => Inst::Load64ZExt32 { rd: into_reg, mem }, types::I64 => Inst::Load64 { rd: into_reg, mem }, + types::F16 => Inst::VecLoadLaneUndef { + size: 16, + rd: into_reg, + mem, + lane_imm: 0, + }, types::F32 => Inst::VecLoadLaneUndef { size: 32, rd: into_reg, @@ -336,8 +344,7 @@ impl Inst { mem, lane_imm: 0, }, - _ if ty.is_vector() && ty.bits() == 128 => Inst::VecLoad { rd: into_reg, mem }, - types::I128 => Inst::VecLoad { rd: into_reg, mem }, + _ if ty.bits() == 128 => Inst::VecLoad { rd: into_reg, mem }, _ => unimplemented!("gen_load({})", ty), } } @@ -349,6 +356,12 @@ impl Inst { types::I16 => Inst::Store16 { rd: from_reg, mem }, types::I32 => Inst::Store32 { rd: from_reg, mem }, types::I64 => Inst::Store64 { rd: from_reg, mem }, + types::F16 => Inst::VecStoreLane { + size: 16, + rd: from_reg, + mem, + lane_imm: 0, + }, types::F32 => Inst::VecStoreLane { size: 32, rd: from_reg, @@ -361,8 +374,7 @@ impl Inst { mem, lane_imm: 0, }, - _ if ty.is_vector() && ty.bits() == 128 => Inst::VecStore { rd: from_reg, mem }, - types::I128 => Inst::VecStore { rd: from_reg, mem }, + _ if ty.bits() == 128 => Inst::VecStore { rd: from_reg, mem }, _ => unimplemented!("gen_store({})", ty), } } @@ -646,7 +658,9 @@ fn s390x_get_operands(inst: &mut Inst, collector: &mut DenyReuseVisitor { + Inst::LoadFpuConst16 { rd, .. } + | Inst::LoadFpuConst32 { rd, .. } + | Inst::LoadFpuConst64 { rd, .. } => { collector.reg_def(rd); collector.reg_fixed_nonallocatable(gpr_preg(1)); } @@ -1119,8 +1133,10 @@ impl MachInst for Inst { types::I16 => Ok((&[RegClass::Int], &[types::I16])), types::I32 => Ok((&[RegClass::Int], &[types::I32])), types::I64 => Ok((&[RegClass::Int], &[types::I64])), + types::F16 => Ok((&[RegClass::Float], &[types::F16])), types::F32 => Ok((&[RegClass::Float], &[types::F32])), types::F64 => Ok((&[RegClass::Float], &[types::F64])), + types::F128 => Ok((&[RegClass::Float], &[types::F128])), types::I128 => Ok((&[RegClass::Float], &[types::I128])), _ if ty.is_vector() && ty.bits() == 128 => Ok((&[RegClass::Float], &[types::I8X16])), _ => Err(CodegenError::Unsupported(format!( @@ -2267,6 +2283,18 @@ impl Inst { format!("wfcdb {}, {}", rn_fpr.unwrap_or(rn), rm_fpr.unwrap_or(rm)) } } + &Inst::LoadFpuConst16 { rd, const_data } => { + let (rd, _rd_fpr) = pretty_print_fpr(rd.to_reg()); + let tmp = pretty_print_reg(writable_spilltmp_reg().to_reg()); + // FIXME(#8312): Use `f16::from_bits` once it is stabilised. + format!( + "bras {}, 8 ; data.f16 {} ; vleh {}, 0({}), 0", + tmp, + Ieee16::with_bits(const_data), + rd, + tmp + ) + } &Inst::LoadFpuConst32 { rd, const_data } => { let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg()); let tmp = pretty_print_reg(writable_spilltmp_reg().to_reg()); diff --git a/cranelift/codegen/src/isa/s390x/lower.isle b/cranelift/codegen/src/isa/s390x/lower.isle index c61c4d6f003c..dd62de2ce704 100644 --- a/cranelift/codegen/src/isa/s390x/lower.isle +++ b/cranelift/codegen/src/isa/s390x/lower.isle @@ -16,6 +16,12 @@ (imm ty n)) +;;;; Rules for `f16const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (f16const (u16_from_ieee16 x))) + (imm $F16 x)) + + ;;;; Rules for `f32const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (f32const (u32_from_ieee32 x))) @@ -28,6 +34,12 @@ (imm $F64 x)) +;;;; Rules for `f128const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (f128const (u128_from_constant x))) + (vec_imm $F128 (be_vec_const $F128 x))) + + ;;;; Rules for `vconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type ty (vconst (u128_from_constant x)))) @@ -1731,6 +1743,14 @@ (rule (lower (has_type $I32 (bitcast _ x @ (value_type $F32)))) (vec_extract_lane $F32X4 x 0 (zero_reg))) +;; Reinterpret a 16-bit integer value as floating-point. +(rule (lower (has_type $F16 (bitcast _ x @ (value_type $I16)))) + (vec_insert_lane_undef $F16X8 x 0 (zero_reg))) + +;; Reinterpret a 16-bit floating-point value as integer. +(rule (lower (has_type $I16 (bitcast _ x @ (value_type $F16)))) + (vec_extract_lane $F16X8 x 0 (zero_reg))) + ;; Bitcast between types residing in GPRs is a no-op. (rule 1 (lower (has_type (gpr32_ty _) (bitcast _ x @ (value_type (gpr32_ty _))))) @@ -2323,6 +2343,14 @@ (rule -1 (lower (has_type $I64 (load flags @ (littleendian) addr offset))) (loadrev64 (lower_address flags addr offset))) +;; Load 16-bit big-endian floating-point values (as vector lane). +(rule (lower (has_type $F16 (load flags @ (bigendian) addr offset))) + (vec_load_lane_undef $F16X8 (lower_address flags addr offset) 0)) + +;; Load 16-bit little-endian floating-point values (as vector lane). +(rule -1 (lower (has_type $F16 (load flags @ (littleendian) addr offset))) + (vec_load_lane_little_undef $F16X8 (lower_address flags addr offset) 0)) + ;; Load 32-bit big-endian floating-point values (as vector lane). (rule (lower (has_type $F32 (load flags @ (bigendian) addr offset))) (vec_load_lane_undef $F32X4 (lower_address flags addr offset) 0)) @@ -2383,6 +2411,10 @@ (rule -1 (vec_load_byte_rev $I128 flags addr offset) (vec_load_full_rev $I128 flags addr offset)) +;; Same for `f128`. +(rule -1 (vec_load_byte_rev $F128 flags addr offset) + (vec_load_full_rev $F128 flags addr offset)) + ;; Element-wise byte-reversed 16x8-bit load is a direct load. (rule (vec_load_byte_rev ty @ (multi_lane 8 16) flags addr offset) (vec_load ty (lower_address flags addr offset))) @@ -2418,6 +2450,10 @@ (rule -1 (vec_load_elt_rev $I128 flags addr offset) (vec_load $I128 (lower_address flags addr offset))) +;; Same for `f128`. +(rule -1 (vec_load_elt_rev $F128 flags addr offset) + (vec_load $F128 (lower_address flags addr offset))) + ;; Element-reversed 16x8-bit load is a full byte-reversed load. (rule (vec_load_elt_rev ty @ (multi_lane 8 16) flags addr offset) (vec_load_full_rev ty flags addr offset)) @@ -2641,6 +2677,18 @@ (rule (lower (store flags val @ (value_type $I64) addr offset)) (side_effect (istore64_impl flags val addr offset))) +;; Store 16-bit big-endian floating-point type (as vector lane). +(rule -1 (lower (store flags @ (bigendian) + val @ (value_type $F16) addr offset)) + (side_effect (vec_store_lane $F16X8 val + (lower_address flags addr offset) 0))) + +;; Store 16-bit little-endian floating-point type (as vector lane). +(rule (lower (store flags @ (littleendian) + val @ (value_type $F16) addr offset)) + (side_effect (vec_store_lane_little $F16X8 val + (lower_address flags addr offset) 0))) + ;; Store 32-bit big-endian floating-point type (as vector lane). (rule -1 (lower (store flags @ (bigendian) val @ (value_type $F32) addr offset)) @@ -2714,6 +2762,10 @@ (rule -1 (vec_store_byte_rev $I128 val flags addr offset) (vec_store_full_rev $I128 val flags addr offset)) +;; Same for `f128`. +(rule -1 (vec_store_byte_rev $F128 val flags addr offset) + (vec_store_full_rev $F128 val flags addr offset)) + ;; Element-wise byte-reversed 16x8-bit store is a direct store. (rule (vec_store_byte_rev (multi_lane 8 16) val flags addr offset) (vec_store val (lower_address flags addr offset))) @@ -2748,6 +2800,10 @@ (rule -1 (vec_store_elt_rev $I128 val flags addr offset) (vec_store val (lower_address flags addr offset))) +;; Same for `f128`. +(rule -1 (vec_store_elt_rev $F128 val flags addr offset) + (vec_store val (lower_address flags addr offset))) + ;; Element-reversed 16x8-bit store is a full byte-reversed store. (rule (vec_store_elt_rev ty @ (multi_lane 8 16) val flags addr offset) (vec_store_full_rev ty val flags addr offset)) diff --git a/cranelift/codegen/src/isa/s390x/lower/isle.rs b/cranelift/codegen/src/isa/s390x/lower/isle.rs index 462ca46ce09f..2fddc89e81e4 100644 --- a/cranelift/codegen/src/isa/s390x/lower/isle.rs +++ b/cranelift/codegen/src/isa/s390x/lower/isle.rs @@ -342,7 +342,7 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, S390xBackend> { #[inline] fn vr128_ty(&mut self, ty: Type) -> Option { match ty { - I128 => Some(ty), + I128 | F128 => Some(ty), _ if ty.is_vector() && ty.bits() == 128 => Some(ty), _ => None, } @@ -496,6 +496,7 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, S390xBackend> { fn be_vec_const(&mut self, ty: Type, n: u128) -> u128 { match self.lane_order() { LaneOrder::LittleEndian => n, + LaneOrder::BigEndian if ty.lane_count() == 1 => n, LaneOrder::BigEndian => { let lane_count = ty.lane_count(); let lane_bits = ty.lane_bits(); diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index 0ae32dbfe47e..64a27ecddc94 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -425,6 +425,7 @@ (extern const $I32X4 Type) (extern const $I64X2 Type) +(extern const $F16X8 Type) (extern const $F32X4 Type) (extern const $F64X2 Type) diff --git a/cranelift/filetests/filetests/isa/s390x/bitcast.clif b/cranelift/filetests/filetests/isa/s390x/bitcast.clif index 3deeef1d7d90..e13c50d6e98f 100644 --- a/cranelift/filetests/filetests/isa/s390x/bitcast.clif +++ b/cranelift/filetests/filetests/isa/s390x/bitcast.clif @@ -77,3 +77,204 @@ block0(v0: i128): ; vl %v1, 0(%r3) ; vst %v1, 0(%r2) ; br %r14 + +function %bitcast_f16_to_i16(f16) -> i16 { +block0(v0: f16): + v1 = bitcast.i16 v0 + return v1 +} + +; VCode: +; block0: +; vlgvh %r2, %v0, 0 +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; vlgvh %r2, %v0, 0 +; br %r14 + +function %bitcast_i16_to_f16(i16) -> f16 { +block0(v0: i16): + v1 = bitcast.f16 v0 + return v1 +} + +; VCode: +; block0: +; vlvgh %v0, %r2, 0 +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; vlvgh %v0, %r2, 0 +; br %r14 + +function %bitcast_f32_to_i32(f32) -> i32 { +block0(v0: f32): + v1 = bitcast.i32 v0 + return v1 +} + +; VCode: +; block0: +; vlgvf %r2, %v0, 0 +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; vlgvf %r2, %v0, 0 +; br %r14 + +function %bitcast_i32_to_f32(i32) -> f32 { +block0(v0: i32): + v1 = bitcast.f32 v0 + return v1 +} + +; VCode: +; block0: +; vlvgf %v0, %r2, 0 +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; vlvgf %v0, %r2, 0 +; br %r14 + +function %bitcast_f64_to_i64(f64) -> i64 { +block0(v0: f64): + v1 = bitcast.i64 v0 + return v1 +} + +; VCode: +; block0: +; lgdr %r2, %f0 +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; lgdr %r2, %f0 +; br %r14 + +function %bitcast_i64_to_f64(i64) -> f64 { +block0(v0: i64): + v1 = bitcast.f64 v0 + return v1 +} + +; VCode: +; block0: +; ldgr %f0, %r2 +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; ldgr %f0, %r2 +; br %r14 + +function %bitcast_f128_to_i128(f128) -> i128 { +block0(v0: f128): + v1 = bitcast.i128 v0 + return v1 +} + +; VCode: +; block0: +; vl %v1, 0(%r3) +; vst %v1, 0(%r2) +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; vl %v1, 0(%r3) +; vst %v1, 0(%r2) +; br %r14 + +function %bitcast_i128_to_f128(i128) -> f128 { +block0(v0: i128): + v1 = bitcast.f128 v0 + return v1 +} + +; VCode: +; block0: +; vl %v1, 0(%r3) +; vst %v1, 0(%r2) +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; vl %v1, 0(%r3) +; vst %v1, 0(%r2) +; br %r14 + +function %bitcast_f128_to_i64x2_little(f128) -> i64x2 { +block0(v0: f128): + v1 = bitcast.i64x2 little v0 + return v1 +} + +; VCode: +; block0: +; vl %v1, 0(%r2) +; vpdi %v24, %v1, %v1, 4 +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; vl %v1, 0(%r2) +; vpdi %v24, %v1, %v1, 4 +; br %r14 + +function %bitcast_i64x2_to_f128_little(i64x2) -> f128 { +block0(v0: i64x2): + v1 = bitcast.f128 little v0 + return v1 +} + +; VCode: +; block0: +; vpdi %v3, %v24, %v24, 4 +; vst %v3, 0(%r2) +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; vpdi %v3, %v24, %v24, 4 +; vst %v3, 0(%r2) +; br %r14 + +function %bitcast_f128_to_i64x2(f128) -> i64x2 { +block0(v0: f128): + v1 = bitcast.i64x2 big v0 + return v1 +} + +; VCode: +; block0: +; vl %v24, 0(%r2) +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; vl %v24, 0(%r2) +; br %r14 + +function %bitcast_i64x2_to_f128(i64x2) -> f128 { +block0(v0: i64x2): + v1 = bitcast.f128 big v0 + return v1 +} + +; VCode: +; block0: +; vst %v24, 0(%r2) +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; vst %v24, 0(%r2) +; br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/call.clif b/cranelift/filetests/filetests/isa/s390x/call.clif index 41f1a909d88e..f376592721ff 100644 --- a/cranelift/filetests/filetests/isa/s390x/call.clif +++ b/cranelift/filetests/filetests/isa/s390x/call.clif @@ -392,3 +392,85 @@ block0: ; brasl %r14, 0xe ; reloc_external PLTRel32Dbl %g 2 ; .byte 0x00, 0x00 ; trap: user1 +function %second_f16(f16, f16) -> f16 { +block0(v0: f16, v1: f16): + return v1 +} + +; VCode: +; block0: +; vlr %v0, %v2 +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; vlr %v0, %v2 +; br %r14 + +function %second_f128(f128, f128) -> f128 { +block0(v0: f128, v1: f128): + return v1 +} + +; VCode: +; block0: +; vl %v1, 0(%r4) +; vst %v1, 0(%r2) +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; vl %v1, 0(%r4) +; vst %v1, 0(%r2) +; br %r14 + +function %call_f128(f128) -> f128 { + fn0 = %g(f128) -> f128 + +block0(v0: f128): + v1 = call fn0(v0) + return v1 +} + +; VCode: +; stmg %r6, %r15, 48(%r15) +; aghi %r15, -208 +; block0: +; lgr %r6, %r2 +; vl %v1, 0(%r3) +; vst %v1, 160(%r15) +; la %r3, 160(%r15) +; la %r2, 176(%r15) +; bras %r1, 12 ; data %g + 0 ; lg %r4, 0(%r1) +; basr %r14, %r4 ; vl %v1, 176(%r15) ; vst %v1, 192(%r15) +; lgr %r2, %r6 +; vl %v19, 192(%r15) +; vst %v19, 0(%r2) +; lmg %r6, %r15, 256(%r15) +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; stmg %r6, %r15, 0x30(%r15) +; aghi %r15, -0xd0 +; block1: ; offset 0xa +; lgr %r6, %r2 +; vl %v1, 0(%r3) +; vst %v1, 0xa0(%r15) +; la %r3, 0xa0(%r15) +; la %r2, 0xb0(%r15) +; bras %r1, 0x2e +; .byte 0x00, 0x00 ; reloc_external Abs8 %g 0 +; .byte 0x00, 0x00 +; .byte 0x00, 0x00 +; .byte 0x00, 0x00 +; lg %r4, 0(%r1) +; basr %r14, %r4 +; vl %v1, 0xb0(%r15) +; vst %v1, 0xc0(%r15) +; lgr %r2, %r6 +; vl %v19, 0xc0(%r15) +; vst %v19, 0(%r2) +; lmg %r6, %r15, 0x100(%r15) +; br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/floating-point.clif b/cranelift/filetests/filetests/isa/s390x/floating-point.clif index 7745374b39a2..8154bd1d9c7a 100644 --- a/cranelift/filetests/filetests/isa/s390x/floating-point.clif +++ b/cranelift/filetests/filetests/isa/s390x/floating-point.clif @@ -1,13 +1,32 @@ test compile precise-output +set enable_multi_ret_implicit_sret target s390x ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; F32CONST/F64CONST +;; f16const/f32const/f64const/f128const ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; FIXME: should use FZERO instruction ; FIXME: should use out-of-line literal pool +function %f16const_zero() -> f16 { +block0: + v1 = f16const 0x0.0 + return v1 +} + +; VCode: +; block0: +; bras %r1, 8 ; data.f16 0.0 ; vleh %v0, 0(%r1), 0 +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; bras %r1, 6 +; .byte 0x00, 0x00 +; vleh %v0, 0(%r1), 0 +; br %r14 + function %f32const_zero() -> f32 { block0: v1 = f32const 0x0.0 @@ -48,6 +67,42 @@ block0: ; ld %f0, 0(%r1) ; br %r14 +function %f128const_zero() -> f128 { +block0: + v1 = f128const 0x0.0 + return v1 +} + +; VCode: +; block0: +; vgbm %v2, 0 +; vst %v2, 0(%r2) +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; vzero %v2 +; vst %v2, 0(%r2) +; br %r14 + +function %f16const_one() -> f16 { +block0: + v1 = f16const 0x1.0 + return v1 +} + +; VCode: +; block0: +; bras %r1, 8 ; data.f16 0x1.000p0 ; vleh %v0, 0(%r1), 0 +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; bras %r1, 6 +; mder %f0, %f0 +; vleh %v0, 0(%r1), 0 +; br %r14 + function %f32const_one() -> f32 { block0: v1 = f32const 0x1.0 @@ -88,6 +143,33 @@ block0: ; ld %f0, 0(%r1) ; br %r14 +function %f128const_one() -> f128 { +block0: + v1 = f128const 0x1.0 + return v1 +} + +; VCode: +; block0: +; bras %r1, 20 ; data.u128 0x3fff0000000000000000000000000000 ; vl %v2, 0(%r1) +; vst %v2, 0(%r2) +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; bras %r1, 0x14 +; sur %f15, %f15 +; .byte 0x00, 0x00 +; .byte 0x00, 0x00 +; .byte 0x00, 0x00 +; .byte 0x00, 0x00 +; .byte 0x00, 0x00 +; .byte 0x00, 0x00 +; .byte 0x00, 0x00 +; vl %v2, 0(%r1) +; vst %v2, 0(%r2) +; br %r14 + function %fadd_f32(f32, f32) -> f32 { block0(v0: f32, v1: f32): v2 = fadd v0, v1 diff --git a/cranelift/filetests/filetests/isa/s390x/load-little.clif b/cranelift/filetests/filetests/isa/s390x/load-little.clif index abbd3baeae5d..237dcfcc090f 100644 --- a/cranelift/filetests/filetests/isa/s390x/load-little.clif +++ b/cranelift/filetests/filetests/isa/s390x/load-little.clif @@ -1,4 +1,5 @@ test compile precise-output +set enable_multi_ret_implicit_sret target s390x function %load_i64(i64) -> i64 { @@ -452,3 +453,79 @@ block0(v0: i64): ; llc %r2, 0(%r2) ; trap: heap_oob ; br %r14 +function %load_f16(i64) -> f16 { +block0(v0: i64): + v1 = load.f16 little v0 + return v1 +} + +; VCode: +; block0: +; lrvh %r4, 0(%r2) +; vlvgh %v0, %r4, 0 +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; lrvh %r4, 0(%r2) ; trap: heap_oob +; vlvgh %v0, %r4, 0 +; br %r14 + +function %load_f32(i64) -> f32 { +block0(v0: i64): + v1 = load.f32 little v0 + return v1 +} + +; VCode: +; block0: +; lrv %r4, 0(%r2) +; vlvgf %v0, %r4, 0 +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; lrv %r4, 0(%r2) ; trap: heap_oob +; vlvgf %v0, %r4, 0 +; br %r14 + +function %load_f64(i64) -> f64 { +block0(v0: i64): + v1 = load.f64 little v0 + return v1 +} + +; VCode: +; block0: +; lrvg %r4, 0(%r2) +; ldgr %f0, %r4 +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; lrvg %r4, 0(%r2) ; trap: heap_oob +; ldgr %f0, %r4 +; br %r14 + +function %load_f128(i64) -> f128 { +block0(v0: i64): + v1 = load.f128 little v0 + return v1 +} + +; VCode: +; block0: +; lrvg %r5, 0(%r3) +; lrvg %r3, 8(%r3) +; vlvgp %v7, %r3, %r5 +; vst %v7, 0(%r2) +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; lrvg %r5, 0(%r3) ; trap: heap_oob +; lrvg %r3, 8(%r3) ; trap: heap_oob +; vlvgp %v7, %r3, %r5 +; vst %v7, 0(%r2) +; br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/load.clif b/cranelift/filetests/filetests/isa/s390x/load.clif index fb5c459ea38a..962eb40f5eba 100644 --- a/cranelift/filetests/filetests/isa/s390x/load.clif +++ b/cranelift/filetests/filetests/isa/s390x/load.clif @@ -1,4 +1,5 @@ test compile precise-output +set enable_multi_ret_implicit_sret target s390x function %load_i64(i64) -> i64 { @@ -451,3 +452,69 @@ block0(v0: i64): ; llc %r2, 0(%r2) ; trap: heap_oob ; br %r14 +function %load_f16(i64) -> f16 { +block0(v0: i64): + v1 = load.f16 v0 + return v1 +} + +; VCode: +; block0: +; vleh %v0, 0(%r2), 0 +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; vleh %v0, 0(%r2), 0 ; trap: heap_oob +; br %r14 + +function %load_f32(i64) -> f32 { +block0(v0: i64): + v1 = load.f32 v0 + return v1 +} + +; VCode: +; block0: +; le %f0, 0(%r2) +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; le %f0, 0(%r2) ; trap: heap_oob +; br %r14 + +function %load_f64(i64) -> f64 { +block0(v0: i64): + v1 = load.f64 v0 + return v1 +} + +; VCode: +; block0: +; ld %f0, 0(%r2) +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; ld %f0, 0(%r2) ; trap: heap_oob +; br %r14 + +function %load_f128(i64) -> f128 { +block0(v0: i64): + v1 = load.f128 v0 + return v1 +} + +; VCode: +; block0: +; vl %v3, 0(%r3) +; vst %v3, 0(%r2) +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; vl %v3, 0(%r3) ; trap: heap_oob +; vst %v3, 0(%r2) +; br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/select-float.clif b/cranelift/filetests/filetests/isa/s390x/select-float.clif new file mode 100644 index 000000000000..a9ce284cac04 --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/select-float.clif @@ -0,0 +1,567 @@ +test compile precise-output +set enable_multi_ret_implicit_sret +target s390x + + +function %select_icmp_i8_f16(i8, f16, f16) -> f16 { +block0(v0: i8, v1: f16, v2: f16): + v3 = iconst.i8 42 + v4 = icmp eq v0, v3 + v5 = select.f16 v4, v1, v2 + return v5 +} + +; VCode: +; block0: +; llcr %r2, %r2 +; clfi %r2, 42 +; vlr %v16, %v0 +; vlr %v0, %v2 +; jne 10 ; vlr %v0, %v16 +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; llcr %r2, %r2 +; clfi %r2, 0x2a +; vlr %v16, %v0 +; vlr %v0, %v2 +; jne 0x20 +; vlr %v0, %v16 +; br %r14 + +function %select_icmp_i8_f32(i8, f32, f32) -> f32 { +block0(v0: i8, v1: f32, v2: f32): + v3 = iconst.i8 42 + v4 = icmp eq v0, v3 + v5 = select.f32 v4, v1, v2 + return v5 +} + +; VCode: +; block0: +; llcr %r2, %r2 +; clfi %r2, 42 +; vlr %v16, %v0 +; vlr %v0, %v2 +; jne 10 ; vlr %v0, %v16 +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; llcr %r2, %r2 +; clfi %r2, 0x2a +; vlr %v16, %v0 +; vlr %v0, %v2 +; jne 0x20 +; vlr %v0, %v16 +; br %r14 + +function %select_icmp_i8_f64(i8, f64, f64) -> f64 { +block0(v0: i8, v1: f64, v2: f64): + v3 = iconst.i8 42 + v4 = icmp eq v0, v3 + v5 = select.f64 v4, v1, v2 + return v5 +} + +; VCode: +; block0: +; llcr %r2, %r2 +; clfi %r2, 42 +; vlr %v16, %v0 +; vlr %v0, %v2 +; jne 10 ; vlr %v0, %v16 +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; llcr %r2, %r2 +; clfi %r2, 0x2a +; vlr %v16, %v0 +; vlr %v0, %v2 +; jne 0x20 +; vlr %v0, %v16 +; br %r14 + +function %select_icmp_i8_f128(i8, f128, f128) -> f128 { +block0(v0: i8, v1: f128, v2: f128): + v3 = iconst.i8 42 + v4 = icmp eq v0, v3 + v5 = select.f128 v4, v1, v2 + return v5 +} + +; VCode: +; block0: +; vl %v2, 0(%r4) +; vl %v7, 0(%r5) +; llcr %r5, %r3 +; clfi %r5, 42 +; jne 10 ; vlr %v7, %v2 +; vst %v7, 0(%r2) +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; vl %v2, 0(%r4) +; vl %v7, 0(%r5) +; llcr %r5, %r3 +; clfi %r5, 0x2a +; jne 0x20 +; vlr %v7, %v2 +; vst %v7, 0(%r2) +; br %r14 + +function %select_icmp_i16_f16(i16, f16, f16) -> f16 { +block0(v0: i16, v1: f16, v2: f16): + v3 = iconst.i16 42 + v4 = icmp eq v0, v3 + v5 = select.f16 v4, v1, v2 + return v5 +} + +; VCode: +; block0: +; llhr %r2, %r2 +; clfi %r2, 42 +; vlr %v16, %v0 +; vlr %v0, %v2 +; jne 10 ; vlr %v0, %v16 +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; llhr %r2, %r2 +; clfi %r2, 0x2a +; vlr %v16, %v0 +; vlr %v0, %v2 +; jne 0x20 +; vlr %v0, %v16 +; br %r14 + +function %select_icmp_i16_f32(i16, f32, f32) -> f32 { +block0(v0: i16, v1: f32, v2: f32): + v3 = iconst.i16 42 + v4 = icmp eq v0, v3 + v5 = select.f32 v4, v1, v2 + return v5 +} + +; VCode: +; block0: +; llhr %r2, %r2 +; clfi %r2, 42 +; vlr %v16, %v0 +; vlr %v0, %v2 +; jne 10 ; vlr %v0, %v16 +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; llhr %r2, %r2 +; clfi %r2, 0x2a +; vlr %v16, %v0 +; vlr %v0, %v2 +; jne 0x20 +; vlr %v0, %v16 +; br %r14 + +function %select_icmp_i16_f64(i16, f64, f64) -> f64 { +block0(v0: i16, v1: f64, v2: f64): + v3 = iconst.i16 42 + v4 = icmp eq v0, v3 + v5 = select.f64 v4, v1, v2 + return v5 +} + +; VCode: +; block0: +; llhr %r2, %r2 +; clfi %r2, 42 +; vlr %v16, %v0 +; vlr %v0, %v2 +; jne 10 ; vlr %v0, %v16 +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; llhr %r2, %r2 +; clfi %r2, 0x2a +; vlr %v16, %v0 +; vlr %v0, %v2 +; jne 0x20 +; vlr %v0, %v16 +; br %r14 + +function %select_icmp_i16_f128(i16, f128, f128) -> f128 { +block0(v0: i16, v1: f128, v2: f128): + v3 = iconst.i16 42 + v4 = icmp eq v0, v3 + v5 = select.f128 v4, v1, v2 + return v5 +} + +; VCode: +; block0: +; vl %v2, 0(%r4) +; vl %v7, 0(%r5) +; llhr %r5, %r3 +; clfi %r5, 42 +; jne 10 ; vlr %v7, %v2 +; vst %v7, 0(%r2) +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; vl %v2, 0(%r4) +; vl %v7, 0(%r5) +; llhr %r5, %r3 +; clfi %r5, 0x2a +; jne 0x20 +; vlr %v7, %v2 +; vst %v7, 0(%r2) +; br %r14 + +function %select_icmp_i32_f16(i32, f16, f16) -> f16 { +block0(v0: i32, v1: f16, v2: f16): + v3 = iconst.i32 42 + v4 = icmp eq v0, v3 + v5 = select.f16 v4, v1, v2 + return v5 +} + +; VCode: +; block0: +; clfi %r2, 42 +; vlr %v6, %v0 +; vlr %v0, %v2 +; jne 6 ; ler %f0, %f6 +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; clfi %r2, 0x2a +; vlr %v6, %v0 +; vlr %v0, %v2 +; jne 0x18 +; ler %f0, %f6 +; br %r14 + +function %select_icmp_i32_f32(i32, f32, f32) -> f32 { +block0(v0: i32, v1: f32, v2: f32): + v3 = iconst.i32 42 + v4 = icmp eq v0, v3 + v5 = select.f32 v4, v1, v2 + return v5 +} + +; VCode: +; block0: +; clfi %r2, 42 +; vlr %v6, %v0 +; vlr %v0, %v2 +; jne 6 ; ler %f0, %f6 +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; clfi %r2, 0x2a +; vlr %v6, %v0 +; vlr %v0, %v2 +; jne 0x18 +; ler %f0, %f6 +; br %r14 + +function %select_icmp_i32_f64(i32, f64, f64) -> f64 { +block0(v0: i32, v1: f64, v2: f64): + v3 = iconst.i32 42 + v4 = icmp eq v0, v3 + v5 = select.f64 v4, v1, v2 + return v5 +} + +; VCode: +; block0: +; clfi %r2, 42 +; vlr %v6, %v0 +; vlr %v0, %v2 +; jne 6 ; ldr %f0, %f6 +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; clfi %r2, 0x2a +; vlr %v6, %v0 +; vlr %v0, %v2 +; jne 0x18 +; ldr %f0, %f6 +; br %r14 + +function %select_icmp_i32_f128(i32, f128, f128) -> f128 { +block0(v0: i32, v1: f128, v2: f128): + v3 = iconst.i32 42 + v4 = icmp eq v0, v3 + v5 = select.f128 v4, v1, v2 + return v5 +} + +; VCode: +; block0: +; vl %v2, 0(%r4) +; vl %v6, 0(%r5) +; clfi %r3, 42 +; jne 10 ; vlr %v6, %v2 +; vst %v6, 0(%r2) +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; vl %v2, 0(%r4) +; vl %v6, 0(%r5) +; clfi %r3, 0x2a +; jne 0x1c +; vlr %v6, %v2 +; vst %v6, 0(%r2) +; br %r14 + +function %select_icmp_i64_f16(i64, f16, f16) -> f16 { +block0(v0: i64, v1: f16, v2: f16): + v3 = iconst.i64 42 + v4 = icmp eq v0, v3 + v5 = select.f16 v4, v1, v2 + return v5 +} + +; VCode: +; block0: +; clgfi %r2, 42 +; vlr %v6, %v0 +; vlr %v0, %v2 +; jne 6 ; ler %f0, %f6 +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; clgfi %r2, 0x2a +; vlr %v6, %v0 +; vlr %v0, %v2 +; jne 0x18 +; ler %f0, %f6 +; br %r14 + +function %select_icmp_i64_f32(i64, f32, f32) -> f32 { +block0(v0: i64, v1: f32, v2: f32): + v3 = iconst.i64 42 + v4 = icmp eq v0, v3 + v5 = select.f32 v4, v1, v2 + return v5 +} + +; VCode: +; block0: +; clgfi %r2, 42 +; vlr %v6, %v0 +; vlr %v0, %v2 +; jne 6 ; ler %f0, %f6 +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; clgfi %r2, 0x2a +; vlr %v6, %v0 +; vlr %v0, %v2 +; jne 0x18 +; ler %f0, %f6 +; br %r14 + +function %select_icmp_i64_f64(i64, f64, f64) -> f64 { +block0(v0: i64, v1: f64, v2: f64): + v3 = iconst.i64 42 + v4 = icmp eq v0, v3 + v5 = select.f64 v4, v1, v2 + return v5 +} + +; VCode: +; block0: +; clgfi %r2, 42 +; vlr %v6, %v0 +; vlr %v0, %v2 +; jne 6 ; ldr %f0, %f6 +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; clgfi %r2, 0x2a +; vlr %v6, %v0 +; vlr %v0, %v2 +; jne 0x18 +; ldr %f0, %f6 +; br %r14 + +function %select_icmp_i64_f128(i64, f128, f128) -> f128 { +block0(v0: i64, v1: f128, v2: f128): + v3 = iconst.i64 42 + v4 = icmp eq v0, v3 + v5 = select.f128 v4, v1, v2 + return v5 +} + +; VCode: +; block0: +; vl %v2, 0(%r4) +; vl %v6, 0(%r5) +; clgfi %r3, 42 +; jne 10 ; vlr %v6, %v2 +; vst %v6, 0(%r2) +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; vl %v2, 0(%r4) +; vl %v6, 0(%r5) +; clgfi %r3, 0x2a +; jne 0x1c +; vlr %v6, %v2 +; vst %v6, 0(%r2) +; br %r14 + +function %select_icmp_i128_f16(i128, f16, f16) -> f16 { +block0(v0: i128, v1: f16, v2: f16): + v3 = iconst.i64 42 + v4 = uextend.i128 v3 + v5 = icmp eq v0, v4 + v6 = select.f16 v5, v1, v2 + return v6 +} + +; VCode: +; block0: +; vl %v1, 0(%r2) +; lghi %r3, 42 +; vgbm %v17, 0 +; vlvgg %v17, %r3, 1 +; vceqgs %v16, %v1, %v17 +; vlr %v23, %v0 +; vlr %v0, %v2 +; jne 10 ; vlr %v0, %v23 +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; vl %v1, 0(%r2) +; lghi %r3, 0x2a +; vzero %v17 +; vlvgg %v17, %r3, 1 +; vceqgs %v16, %v1, %v17 +; vlr %v23, %v0 +; vlr %v0, %v2 +; jne 0x32 +; vlr %v0, %v23 +; br %r14 + +function %select_icmp_i128_f32(i128, f32, f32) -> f32 { +block0(v0: i128, v1: f32, v2: f32): + v3 = iconst.i64 42 + v4 = uextend.i128 v3 + v5 = icmp eq v0, v4 + v6 = select.f32 v5, v1, v2 + return v6 +} + +; VCode: +; block0: +; vl %v1, 0(%r2) +; lghi %r3, 42 +; vgbm %v17, 0 +; vlvgg %v17, %r3, 1 +; vceqgs %v16, %v1, %v17 +; vlr %v23, %v0 +; vlr %v0, %v2 +; jne 10 ; vlr %v0, %v23 +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; vl %v1, 0(%r2) +; lghi %r3, 0x2a +; vzero %v17 +; vlvgg %v17, %r3, 1 +; vceqgs %v16, %v1, %v17 +; vlr %v23, %v0 +; vlr %v0, %v2 +; jne 0x32 +; vlr %v0, %v23 +; br %r14 + +function %select_icmp_i128_f64(i128, f64, f64) -> f64 { +block0(v0: i128, v1: f64, v2: f64): + v3 = iconst.i64 42 + v4 = uextend.i128 v3 + v5 = icmp eq v0, v4 + v6 = select.f64 v5, v1, v2 + return v6 +} + +; VCode: +; block0: +; vl %v1, 0(%r2) +; lghi %r3, 42 +; vgbm %v17, 0 +; vlvgg %v17, %r3, 1 +; vceqgs %v16, %v1, %v17 +; vlr %v23, %v0 +; vlr %v0, %v2 +; jne 10 ; vlr %v0, %v23 +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; vl %v1, 0(%r2) +; lghi %r3, 0x2a +; vzero %v17 +; vlvgg %v17, %r3, 1 +; vceqgs %v16, %v1, %v17 +; vlr %v23, %v0 +; vlr %v0, %v2 +; jne 0x32 +; vlr %v0, %v23 +; br %r14 + +function %select_icmp_i128_f128(i128, f128, f128) -> f128 { +block0(v0: i128, v1: f128, v2: f128): + v3 = iconst.i64 42 + v4 = uextend.i128 v3 + v5 = icmp eq v0, v4 + v6 = select.f128 v5, v1, v2 + return v6 +} + +; VCode: +; block0: +; vl %v1, 0(%r3) +; vl %v3, 0(%r4) +; vl %v16, 0(%r5) +; lghi %r3, 42 +; vgbm %v20, 0 +; vlvgg %v20, %r3, 1 +; vceqgs %v19, %v1, %v20 +; jne 10 ; vlr %v16, %v3 +; vst %v16, 0(%r2) +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; vl %v1, 0(%r3) +; vl %v3, 0(%r4) +; vl %v16, 0(%r5) +; lghi %r3, 0x2a +; vzero %v20 +; vlvgg %v20, %r3, 1 +; vceqgs %v19, %v1, %v20 +; jne 0x32 +; vlr %v16, %v3 +; vst %v16, 0(%r2) +; br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/store-little.clif b/cranelift/filetests/filetests/isa/s390x/store-little.clif index 72ae91940ba0..c99669a18e4d 100644 --- a/cranelift/filetests/filetests/isa/s390x/store-little.clif +++ b/cranelift/filetests/filetests/isa/s390x/store-little.clif @@ -484,3 +484,81 @@ block0(v0: i64): ; mviy 0x1000(%r2), 0x7b ; trap: heap_oob ; br %r14 +function %store_f16(f16, i64) { +block0(v0: f16, v1: i64): + store.f16 little v0, v1 + return +} + +; VCode: +; block0: +; vlgvh %r5, %v0, 0 +; strvh %r5, 0(%r2) +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; vlgvh %r5, %v0, 0 +; strvh %r5, 0(%r2) ; trap: heap_oob +; br %r14 + +function %store_f32(f32, i64) { +block0(v0: f32, v1: i64): + store.f32 little v0, v1 + return +} + +; VCode: +; block0: +; vlgvf %r5, %v0, 0 +; strv %r5, 0(%r2) +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; vlgvf %r5, %v0, 0 +; strv %r5, 0(%r2) ; trap: heap_oob +; br %r14 + +function %store_f64(f64, i64) { +block0(v0: f64, v1: i64): + store.f64 little v0, v1 + return +} + +; VCode: +; block0: +; lgdr %r5, %f0 +; strvg %r5, 0(%r2) +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; lgdr %r5, %f0 +; strvg %r5, 0(%r2) ; trap: heap_oob +; br %r14 + +function %store_f16(f128, i64) { +block0(v0: f128, v1: i64): + store.f128 little v0, v1 + return +} + +; VCode: +; block0: +; vl %v1, 0(%r2) +; vlgvg %r2, %v1, 1 +; lgdr %r4, %f1 +; strvg %r2, 0(%r3) +; strvg %r4, 8(%r3) +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; vl %v1, 0(%r2) +; vlgvg %r2, %v1, 1 +; lgdr %r4, %f1 +; strvg %r2, 0(%r3) ; trap: heap_oob +; strvg %r4, 8(%r3) ; trap: heap_oob +; br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/store.clif b/cranelift/filetests/filetests/isa/s390x/store.clif index 31e6f540b5ee..cfbfc17c3773 100644 --- a/cranelift/filetests/filetests/isa/s390x/store.clif +++ b/cranelift/filetests/filetests/isa/s390x/store.clif @@ -504,3 +504,69 @@ block0(v0: i64): ; mviy 0x1000(%r2), 0x7b ; trap: heap_oob ; br %r14 +function %store_f16(f16, i64) { +block0(v0: f16, v1: i64): + store.f16 v0, v1 + return +} + +; VCode: +; block0: +; vsteh %v0, 0(%r2), 0 +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; vsteh %v0, 0(%r2), 0 ; trap: heap_oob +; br %r14 + +function %store_f32(f32, i64) { +block0(v0: f32, v1: i64): + store.f32 v0, v1 + return +} + +; VCode: +; block0: +; ste %f0, 0(%r2) +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; ste %f0, 0(%r2) ; trap: heap_oob +; br %r14 + +function %store_f64(f64, i64) { +block0(v0: f64, v1: i64): + store.f64 v0, v1 + return +} + +; VCode: +; block0: +; std %f0, 0(%r2) +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; std %f0, 0(%r2) ; trap: heap_oob +; br %r14 + +function %store_f16(f128, i64) { +block0(v0: f128, v1: i64): + store.f128 v0, v1 + return +} + +; VCode: +; block0: +; vl %v1, 0(%r2) +; vst %v1, 0(%r3) +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; vl %v1, 0(%r2) +; vst %v1, 0(%r3) ; trap: heap_oob +; br %r14 + diff --git a/cranelift/filetests/filetests/runtests/f128-bitcast.clif b/cranelift/filetests/filetests/runtests/f128-bitcast.clif index f2682c21fc8a..098574b39d39 100644 --- a/cranelift/filetests/filetests/runtests/f128-bitcast.clif +++ b/cranelift/filetests/filetests/runtests/f128-bitcast.clif @@ -1,9 +1,11 @@ test interpret test run set enable_llvm_abi_extensions +set enable_multi_ret_implicit_sret target x86_64 target aarch64 target riscv64 +target s390x function %bitcast_i128_f128(i128) -> f128 fast { block0(v0: i128): diff --git a/cranelift/filetests/filetests/runtests/f128-select.clif b/cranelift/filetests/filetests/runtests/f128-select.clif index bd8f80d379d6..b3f78091c0d2 100644 --- a/cranelift/filetests/filetests/runtests/f128-select.clif +++ b/cranelift/filetests/filetests/runtests/f128-select.clif @@ -1,9 +1,11 @@ test interpret test run set enable_llvm_abi_extensions +set enable_multi_ret_implicit_sret target x86_64 target aarch64 target riscv64 +target s390x function %select_icmp_i8_f128(i8, f128, f128) -> f128 { block0(v0: i8, v1: f128, v2: f128): diff --git a/cranelift/filetests/filetests/runtests/f128const.clif b/cranelift/filetests/filetests/runtests/f128const.clif index b7c1e709793a..1a325096a2ad 100644 --- a/cranelift/filetests/filetests/runtests/f128const.clif +++ b/cranelift/filetests/filetests/runtests/f128const.clif @@ -5,6 +5,7 @@ set enable_multi_ret_implicit_sret target x86_64 target aarch64 target riscv64 +target s390x ;; These values are special for RISC-V since it has a dedicated ;; instruction to generate them. diff --git a/cranelift/filetests/filetests/runtests/f16-bitcast.clif b/cranelift/filetests/filetests/runtests/f16-bitcast.clif index 62adbb6c84a6..5296c7435c60 100644 --- a/cranelift/filetests/filetests/runtests/f16-bitcast.clif +++ b/cranelift/filetests/filetests/runtests/f16-bitcast.clif @@ -6,6 +6,7 @@ target aarch64 target aarch64 has_fp16 target riscv64 target riscv64 has_zfhmin +target s390x function %bitcast_i16_f16(i16) -> f16 fast { block0(v0: i16): diff --git a/cranelift/filetests/filetests/runtests/f16-memory.clif b/cranelift/filetests/filetests/runtests/f16-memory.clif index eed1407532b0..b760a33d4874 100644 --- a/cranelift/filetests/filetests/runtests/f16-memory.clif +++ b/cranelift/filetests/filetests/runtests/f16-memory.clif @@ -2,7 +2,11 @@ test interpret test run set enable_llvm_abi_extensions target x86_64 -target riscv64 has_zfhmin has_zfh +target aarch64 +target aarch64 has_fp16 +target riscv64 +target riscv64 has_zfhmin +target s390x function %f16_load(i16) -> f16 { ss0 = explicit_slot 4 diff --git a/cranelift/filetests/filetests/runtests/f16-select.clif b/cranelift/filetests/filetests/runtests/f16-select.clif index 0d7be177c966..c57d085e5b3a 100644 --- a/cranelift/filetests/filetests/runtests/f16-select.clif +++ b/cranelift/filetests/filetests/runtests/f16-select.clif @@ -6,6 +6,7 @@ target aarch64 target aarch64 has_fp16 target riscv64 target riscv64 has_zfhmin +target s390x function %select_icmp_i8_f16(i8, f16, f16) -> f16 { block0(v0: i8, v1: f16, v2: f16): diff --git a/cranelift/filetests/filetests/runtests/f16const.clif b/cranelift/filetests/filetests/runtests/f16const.clif index 3ebfe91a0424..62c0a8beed99 100644 --- a/cranelift/filetests/filetests/runtests/f16const.clif +++ b/cranelift/filetests/filetests/runtests/f16const.clif @@ -12,6 +12,7 @@ target riscv64 has_zfa target riscv64 has_zfhmin has_zfa target riscv64 has_zfhmin has_zfh has_zfa target riscv64 has_zfhmin has_zvfh has_zfa +target s390x ;; These values are special for RISC-V since it has a dedicated diff --git a/cranelift/filetests/filetests/runtests/simd-bitcast-i128.clif b/cranelift/filetests/filetests/runtests/simd-bitcast-128.clif similarity index 56% rename from cranelift/filetests/filetests/runtests/simd-bitcast-i128.clif rename to cranelift/filetests/filetests/runtests/simd-bitcast-128.clif index 41cf5012f6f6..5e7f89a44a74 100644 --- a/cranelift/filetests/filetests/runtests/simd-bitcast-i128.clif +++ b/cranelift/filetests/filetests/runtests/simd-bitcast-128.clif @@ -24,3 +24,20 @@ block0(v0: i128): } ; run: %bitcast_i128_to_i64x2(0x0000000000c0ffee_000000000000beef) == [0xBEEF 0xC0FFEE] ; run: %bitcast_i128_to_i64x2(0x000000000000007f_ffffffffffffffff) == [-1 127] + +function %bitcast_i64x2_to_f128(i64x2) -> f128 { +block0(v0: i64x2): + v1 = bitcast.f128 little v0 + return v1 +} +; run: %bitcast_i64x2_to_f128([0xBEEF 0xC0FFEE]) == 0x0.000000c0ffee000000000000beefp-16382 +; run: %bitcast_i64x2_to_f128([-1 127]) == 0x0.00000000007fffffffffffffffffp-16382 + + +function %bitcast_f128_to_i64x2(f128) -> i64x2 { +block0(v0: f128): + v1 = bitcast.i64x2 little v0 + return v1 +} +; run: %bitcast_f128_to_i64x2(0x0.000000c0ffee000000000000beefp-16382) == [0xBEEF 0xC0FFEE] +; run: %bitcast_f128_to_i64x2(0x0.00000000007fffffffffffffffffp-16382) == [-1 127]