From e33c4269589a4f97c572d579240b3349534872fa Mon Sep 17 00:00:00 2001 From: Rahul Date: Fri, 28 Feb 2025 22:49:48 -0800 Subject: [PATCH] asm: sse orpd implementation (#10273) * sse orpd implementation assembler integration with isle format add clippy reason, reorder avx priority in isle bless tests for orpd create separate xmm module validate function rewrite sse condition add quote from manual for sse prefix format changes move Xmm bits under Reg * use new isle constructors for sse * remove unused function * minor changes --- .../assembler-x64/meta/src/dsl/encoding.rs | 14 ++- .../assembler-x64/meta/src/dsl/format.rs | 17 ++- cranelift/assembler-x64/meta/src/generate.rs | 12 ++ .../assembler-x64/meta/src/generate/format.rs | 74 +++++++++--- .../assembler-x64/meta/src/generate/inst.rs | 109 ++++++++++++++---- .../meta/src/generate/operand.rs | 84 +++++++++++--- .../assembler-x64/meta/src/instructions/or.rs | 1 + cranelift/assembler-x64/src/api.rs | 24 +++- cranelift/assembler-x64/src/fuzz.rs | 18 ++- cranelift/assembler-x64/src/inst.rs | 3 +- cranelift/assembler-x64/src/lib.rs | 8 +- cranelift/assembler-x64/src/mem.rs | 23 ++++ cranelift/assembler-x64/src/xmm.rs | 90 +++++++++++++++ cranelift/codegen/src/isa/x64/inst.isle | 11 +- .../codegen/src/isa/x64/inst/external.rs | 76 ++++++++++-- cranelift/codegen/src/isa/x64/lower/isle.rs | 31 ++++- .../filetests/isa/x64/fcopysign.clif | 2 +- .../isa/x64/nan-canonicalization.clif | 2 +- .../isa/x64/simd-bitwise-compile.clif | 4 +- .../filetests/isa/x64/simd-float-min-max.clif | 6 +- 20 files changed, 520 insertions(+), 89 deletions(-) create mode 100644 cranelift/assembler-x64/src/xmm.rs diff --git a/cranelift/assembler-x64/meta/src/dsl/encoding.rs b/cranelift/assembler-x64/meta/src/dsl/encoding.rs index cb5a21ead099..6ad3599a6294 100644 --- a/cranelift/assembler-x64/meta/src/dsl/encoding.rs +++ b/cranelift/assembler-x64/meta/src/dsl/encoding.rs @@ -194,10 +194,11 @@ impl Rex { if self.opcodes.prefix.contains_66() { assert!( - operands - .iter() - .all(|&op| matches!(op.location.kind(), OperandKind::Imm(_) | OperandKind::FixedReg(_)) - || op.location.bits() == 16), + operands.iter().all(|&op| matches!( + op.location.kind(), + OperandKind::Imm(_) | OperandKind::FixedReg(_) + ) || op.location.bits() == 16 + || op.location.bits() == 128), "when we encode the 66 prefix, we expect all operands to be 16-bit wide" ); } @@ -366,12 +367,15 @@ impl From<[u8; 4]> for Opcodes { pub enum LegacyPrefix { /// No prefix bytes. NoPrefix, - /// An operand size override typically denoting "16-bit operation". But the + /// An operand size override typically denoting "16-bit operation" or "SSE instructions". But the /// reference manual is more nuanced: /// /// > The operand-size override prefix allows a program to switch between /// > 16- and 32-bit operand sizes. Either size can be the default; use of /// > the prefix selects the non-default. + /// > Some SSE2/SSE3/SSSE3/SSE4 instructions and instructions using a three-byte + /// > sequence of primary opcode bytes may use 66H as a mandatory prefix to express + /// > distinct functionality. _66, /// The lock prefix. _F0, diff --git a/cranelift/assembler-x64/meta/src/dsl/format.rs b/cranelift/assembler-x64/meta/src/dsl/format.rs index 3eedcc889a64..42290bf940c1 100644 --- a/cranelift/assembler-x64/meta/src/dsl/format.rs +++ b/cranelift/assembler-x64/meta/src/dsl/format.rs @@ -212,10 +212,13 @@ pub enum Location { r32, r64, + xmm, + rm8, rm16, rm32, rm64, + rm128, } impl Location { @@ -228,6 +231,7 @@ impl Location { ax | imm16 | r16 | rm16 => 16, eax | imm32 | r32 | rm32 => 32, rax | r64 | rm64 => 64, + xmm | rm128 => 128, } } @@ -242,8 +246,8 @@ impl Location { pub fn uses_memory(&self) -> bool { use Location::*; match self { - al | cl | ax | eax | rax | imm8 | imm16 | imm32 | r8 | r16 | r32 | r64 => false, - rm8 | rm16 | rm32 | rm64 => true, + al | cl | ax | eax | rax | imm8 | imm16 | imm32 | r8 | r16 | r32 | r64 | xmm => false, + rm8 | rm16 | rm32 | rm64 | rm128 => true, } } @@ -254,7 +258,7 @@ impl Location { use Location::*; match self { al | ax | eax | rax | cl | imm8 | imm16 | imm32 => false, - r8 | r16 | r32 | r64 | rm8 | rm16 | rm32 | rm64 => true, + r8 | r16 | r32 | r64 | xmm | rm8 | rm16 | rm32 | rm64 | rm128 => true, } } @@ -265,8 +269,8 @@ impl Location { match self { al | ax | eax | rax | cl => OperandKind::FixedReg(*self), imm8 | imm16 | imm32 => OperandKind::Imm(*self), - r8 | r16 | r32 | r64 => OperandKind::Reg(*self), - rm8 | rm16 | rm32 | rm64 => OperandKind::RegMem(*self), + r8 | r16 | r32 | r64 | xmm => OperandKind::Reg(*self), + rm8 | rm16 | rm32 | rm64 | rm128 => OperandKind::RegMem(*self), } } } @@ -291,10 +295,13 @@ impl core::fmt::Display for Location { r32 => write!(f, "r32"), r64 => write!(f, "r64"), + xmm => write!(f, "xmm"), + rm8 => write!(f, "rm8"), rm16 => write!(f, "rm16"), rm32 => write!(f, "rm32"), rm64 => write!(f, "rm64"), + rm128 => write!(f, "rm128"), } } } diff --git a/cranelift/assembler-x64/meta/src/generate.rs b/cranelift/assembler-x64/meta/src/generate.rs index f5bedb8e723d..30571b976836 100644 --- a/cranelift/assembler-x64/meta/src/generate.rs +++ b/cranelift/assembler-x64/meta/src/generate.rs @@ -63,6 +63,10 @@ pub fn isle_definitions(f: &mut Formatter, insts: &[dsl::Inst]) { f.line("(type AssemblerReadGprMem extern (enum))", None); f.line("(type AssemblerReadWriteGprMem extern (enum))", None); f.line("(type AssemblerInst extern (enum))", None); + f.line("(type AssemblerReadXmm extern (enum))", None); + f.line("(type AssemblerReadWriteXmm extern (enum))", None); + f.line("(type AssemblerReadXmmMem extern (enum))", None); + f.line("(type AssemblerReadWriteXmmMem extern (enum))", None); f.empty_line(); f.line("(type AssemblerOutputs (enum", None); @@ -72,6 +76,8 @@ pub fn isle_definitions(f: &mut Formatter, insts: &[dsl::Inst]) { f.line(" ;; Used for instructions that return a GPR (including `GprMem` variants with", None); f.line(" ;; a GPR as the first argument).", None); f.line(" (RetGpr (inst MInst) (gpr Gpr))", None); + f.line(" ;; Used for instructions that return an XMM register.", None); + f.line(" (RetXmm (inst MInst) (xmm Xmm))", None); f.line(" ;; TODO: eventually add more variants for multi-return, XMM, etc.; see", None); f.line(" ;; https://github.com/bytecodealliance/wasmtime/pull/10276", None); f.line("))", None); @@ -83,6 +89,12 @@ pub fn isle_definitions(f: &mut Formatter, insts: &[dsl::Inst]) { f.line(" (let ((_ Unit (emit inst))) gpr))", None); f.empty_line(); + f.line(";; Directly emit instructions that return an XMM register.", None); + f.line("(decl emit_ret_xmm (AssemblerOutputs) Xmm)", None); + f.line("(rule (emit_ret_xmm (AssemblerOutputs.RetXmm inst xmm))", None); + f.line(" (let ((_ Unit (emit inst))) xmm))", None); + f.empty_line(); + f.line(";; Pass along the side-effecting instruction for later emission.", None); f.line("(decl defer_side_effect (AssemblerOutputs) SideEffectNoResult)", None); f.line("(rule (defer_side_effect (AssemblerOutputs.SideEffect inst))", None); diff --git a/cranelift/assembler-x64/meta/src/generate/format.rs b/cranelift/assembler-x64/meta/src/generate/format.rs index 6bf55891930b..922a5937a044 100644 --- a/cranelift/assembler-x64/meta/src/generate/format.rs +++ b/cranelift/assembler-x64/meta/src/generate/format.rs @@ -109,8 +109,16 @@ impl dsl::Format { fmtln!(f, "let {dst} = self.{dst}.enc();"); fmtln!(f, "match &self.{src} {{"); f.indent(|f| { - fmtln!(f, "GprMem::Gpr({src}) => rex.emit_two_op(buf, {dst}, {src}.enc()),"); - fmtln!(f, "GprMem::Mem({src}) => {src}.emit_rex_prefix(rex, {dst}, buf),"); + match dst.bits() { + 128 => { + fmtln!(f, "XmmMem::Xmm({src}) => rex.emit_two_op(buf, {dst}, {src}.enc()),"); + fmtln!(f, "XmmMem::Mem({src}) => {src}.emit_rex_prefix(rex, {dst}, buf),"); + } + _ => { + fmtln!(f, "GprMem::Gpr({src}) => rex.emit_two_op(buf, {dst}, {src}.enc()),"); + fmtln!(f, "GprMem::Mem({src}) => {src}.emit_rex_prefix(rex, {dst}, buf),"); + } + }; }); fmtln!(f, "}}"); } @@ -119,9 +127,15 @@ impl dsl::Format { | [RegMem(dst), Reg(src), FixedReg(_)] => { fmtln!(f, "let {src} = self.{src}.enc();"); fmtln!(f, "match &self.{dst} {{"); - f.indent(|f| { - fmtln!(f, "GprMem::Gpr({dst}) => rex.emit_two_op(buf, {src}, {dst}.enc()),"); - fmtln!(f, "GprMem::Mem({dst}) => {dst}.emit_rex_prefix(rex, {src}, buf),"); + f.indent(|f| match src.bits() { + 128 => { + fmtln!(f, "XmmMem::Xmm({dst}) => rex.emit_two_op(buf, {src}, {dst}.enc()),"); + fmtln!(f, "XmmMem::Mem({dst}) => {dst}.emit_rex_prefix(rex, {src}, buf),"); + } + _ => { + fmtln!(f, "GprMem::Gpr({dst}) => rex.emit_two_op(buf, {src}, {dst}.enc()),"); + fmtln!(f, "GprMem::Mem({dst}) => {dst}.emit_rex_prefix(rex, {src}, buf),"); + } }); fmtln!(f, "}}"); } @@ -160,8 +174,22 @@ impl dsl::Format { fmtln!(f, "let {dst} = self.{dst}.enc();"); fmtln!(f, "match &self.{src} {{"); f.indent(|f| { - fmtln!(f, "GprMem::Gpr({src}) => emit_modrm(buf, {dst}, {src}.enc()),"); - fmtln!(f, "GprMem::Mem({src}) => emit_modrm_sib_disp(buf, off, {dst}, {src}, 0, None),"); + match dst.bits() { + 128 => { + fmtln!(f, "XmmMem::Xmm({src}) => emit_modrm(buf, {dst}, {src}.enc()),"); + fmtln!( + f, + "XmmMem::Mem({src}) => emit_modrm_sib_disp(buf, off, {dst}, {src}, 0, None)," + ); + } + _ => { + fmtln!(f, "GprMem::Gpr({src}) => emit_modrm(buf, {dst}, {src}.enc()),"); + fmtln!( + f, + "GprMem::Mem({src}) => emit_modrm_sib_disp(buf, off, {dst}, {src}, 0, None)," + ); + } + }; }); fmtln!(f, "}}"); } @@ -171,12 +199,25 @@ impl dsl::Format { fmtln!(f, "let {src} = self.{src}.enc();"); fmtln!(f, "match &self.{dst} {{"); f.indent(|f| { - fmtln!(f, "GprMem::Gpr({dst}) => emit_modrm(buf, {src}, {dst}.enc()),"); - fmtln!(f, "GprMem::Mem({dst}) => emit_modrm_sib_disp(buf, off, {src}, {dst}, 0, None),"); + match src.bits() { + 128 => { + fmtln!(f, "XmmMem::Xmm({dst}) => emit_modrm(buf, {src}, {dst}.enc()),"); + fmtln!( + f, + "XmmMem::Mem({dst}) => emit_modrm_sib_disp(buf, off, {src}, {dst}, 0, None)," + ); + } + _ => { + fmtln!(f, "GprMem::Gpr({dst}) => emit_modrm(buf, {src}, {dst}.enc()),"); + fmtln!( + f, + "GprMem::Mem({dst}) => emit_modrm_sib_disp(buf, off, {src}, {dst}, 0, None)," + ); + } + }; }); fmtln!(f, "}}"); } - unknown => unimplemented!("unknown pattern: {unknown:?}"), } } @@ -218,13 +259,20 @@ impl dsl::Format { Read => unreachable!(), ReadWrite => match one.location.kind() { Imm(_) => unreachable!(), + FixedReg(_) => vec![IsleConstructor::RetGpr], // One read/write register output? Output the instruction // and that register. - FixedReg(_) | Reg(_) => vec![IsleConstructor::RetGpr], + Reg(r) => match r.bits() { + 128 => vec![IsleConstructor::RetXmm], + _ => vec![IsleConstructor::RetGpr], + }, // One read/write reg-mem output? We need constructors for // both variants. - RegMem(_) => vec![IsleConstructor::RetGpr, IsleConstructor::RetMemorySideEffect], - }, + RegMem(rm) => match rm.bits() { + 128 => vec![IsleConstructor::RetXmm, IsleConstructor::RetMemorySideEffect], + _ => vec![IsleConstructor::RetGpr, IsleConstructor::RetMemorySideEffect], + }, + } }, other => panic!("unsupported number of write operands {other:?}"), } diff --git a/cranelift/assembler-x64/meta/src/generate/inst.rs b/cranelift/assembler-x64/meta/src/generate/inst.rs index 9ccaa70e0d43..aa69b72f5dbb 100644 --- a/cranelift/assembler-x64/meta/src/generate/inst.rs +++ b/cranelift/assembler-x64/meta/src/generate/inst.rs @@ -107,7 +107,14 @@ impl dsl::Inst { if let Some(op) = self.format.uses_memory() { f.empty_line(); f.comment("Emit trap."); - fmtln!(f, "if let GprMem::Mem({op}) = &self.{op} {{"); + match op { + crate::dsl::Location::rm128 => { + fmtln!(f, "if let XmmMem::Mem({op}) = &self.{op} {{"); + } + _ => { + fmtln!(f, "if let GprMem::Mem({op}) = &self.{op} {{"); + } + } f.indent(|f| { fmtln!(f, "if let Some(trap_code) = {op}.trap_code() {{"); f.indent(|f| { @@ -151,19 +158,42 @@ impl dsl::Inst { fmtln!(f, "visitor.fixed_{call}(&R::{ty}Gpr::new({fixed}));"); } Reg(reg) => { - let call = o.mutability.generate_regalloc_call(); - fmtln!(f, "visitor.{call}(self.{reg}.as_mut());"); + match reg.bits() { + 128 => { + let call = o.mutability.generate_xmm_regalloc_call(); + fmtln!(f, "visitor.{call}(self.{reg}.as_mut());"); + } + _ => { + let call = o.mutability.generate_regalloc_call(); + fmtln!(f, "visitor.{call}(self.{reg}.as_mut());"); + } + }; } RegMem(rm) => { - let call = o.mutability.generate_regalloc_call(); - fmtln!(f, "match &mut self.{rm} {{"); - f.indent(|f| { - fmtln!(f, "GprMem::Gpr(r) => visitor.{call}(r),"); - fmtln!( - f, - "GprMem::Mem(m) => m.registers_mut().iter_mut().for_each(|r| visitor.read(r))," - ); - }); + match rm.bits() { + 128 => { + let call = o.mutability.generate_xmm_regalloc_call(); + fmtln!(f, "match &mut self.{rm} {{"); + f.indent(|f| { + fmtln!(f, "XmmMem::Xmm(r) => visitor.{call}(r),"); + fmtln!( + f, + "XmmMem::Mem(m) => m.registers_mut().iter_mut().for_each(|r| visitor.read(r))," + ); + }); + } + _ => { + let call = o.mutability.generate_regalloc_call(); + fmtln!(f, "match &mut self.{rm} {{"); + f.indent(|f| { + fmtln!(f, "GprMem::Gpr(r) => visitor.{call}(r),"); + fmtln!( + f, + "GprMem::Mem(m) => m.registers_mut().iter_mut().for_each(|r| visitor.read(r))," + ); + }); + } + }; fmtln!(f, "}}"); } } @@ -278,10 +308,16 @@ impl dsl::Inst { OperandKind::FixedReg(_) => fmtln!(f, "todo!()"), // One read/write register output? Output the instruction // and that register. - OperandKind::Reg(_) => { - fmtln!(f, "let gpr = {}.as_ref().write.to_reg();", results[0].location); - fmtln!(f, "AssemblerOutputs::RetGpr {{ inst, gpr }}") - } + OperandKind::Reg(r) => match r.bits() { + 128 => { + fmtln!(f, "let xmm = {}.as_ref().write.to_reg();", results[0].location); + fmtln!(f, "AssemblerOutputs::RetXmm {{ inst, xmm }}") + } + _ => { + fmtln!(f, "let gpr = {}.as_ref().write.to_reg();", results[0].location); + fmtln!(f, "AssemblerOutputs::RetGpr {{ inst, gpr }}") + } + }, // One read/write regmem output? We need to output // everything and it'll internally disambiguate which was // emitted (e.g. the mem variant or the register variant). @@ -289,16 +325,32 @@ impl dsl::Inst { assert_eq!(results.len(), 1); let l = results[0].location; fmtln!(f, "match {l} {{"); - f.indent(|f| { - fmtln!(f, "asm::GprMem::Gpr(reg) => {{"); - fmtln!(f, "let gpr = reg.write.to_reg();"); - fmtln!(f, "AssemblerOutputs::RetGpr {{ inst, gpr }} "); - fmtln!(f, "}}"); + match l.bits() { + 128 => { + f.indent(|f| { + fmtln!(f, "asm::XmmMem::Xmm(reg) => {{"); + fmtln!(f, "let xmm = reg.write.to_reg();"); + fmtln!(f, "AssemblerOutputs::RetXmm {{ inst, xmm }} "); + fmtln!(f, "}}"); - fmtln!(f, "asm::GprMem::Mem(_) => {{"); - fmtln!(f, "AssemblerOutputs::SideEffect {{ inst }} "); - fmtln!(f, "}}"); - }); + fmtln!(f, "asm::XmmMem::Mem(_) => {{"); + fmtln!(f, "AssemblerOutputs::SideEffect {{ inst }} "); + fmtln!(f, "}}"); + }); + } + _ => { + f.indent(|f| { + fmtln!(f, "asm::GprMem::Gpr(reg) => {{"); + fmtln!(f, "let gpr = reg.write.to_reg();"); + fmtln!(f, "AssemblerOutputs::RetGpr {{ inst, gpr }} "); + fmtln!(f, "}}"); + + fmtln!(f, "asm::GprMem::Mem(_) => {{"); + fmtln!(f, "AssemblerOutputs::SideEffect {{ inst }} "); + fmtln!(f, "}}"); + }); + } + }; fmtln!(f, "}}"); } }, @@ -413,6 +465,10 @@ pub enum IsleConstructor { /// This constructor produces a `Gpr` value, meaning that it will write the /// result to a `Gpr`. RetGpr, + + /// This constructor produces an `Xmm` value, meaning that it will write the + /// result to an `Xmm`. + RetXmm, } impl IsleConstructor { @@ -421,6 +477,7 @@ impl IsleConstructor { match self { IsleConstructor::RetMemorySideEffect => "SideEffectNoResult", IsleConstructor::RetGpr => "Gpr", + IsleConstructor::RetXmm => "Xmm", } } @@ -430,6 +487,7 @@ impl IsleConstructor { match self { IsleConstructor::RetMemorySideEffect => "defer_side_effect", IsleConstructor::RetGpr => "emit_ret_gpr", + IsleConstructor::RetXmm => "emit_ret_xmm", } } @@ -438,6 +496,7 @@ impl IsleConstructor { match self { IsleConstructor::RetMemorySideEffect => "_mem", IsleConstructor::RetGpr => "", + IsleConstructor::RetXmm => "", } } } diff --git a/cranelift/assembler-x64/meta/src/generate/operand.rs b/cranelift/assembler-x64/meta/src/generate/operand.rs index 351dd70bd66b..69df8630aba4 100644 --- a/cranelift/assembler-x64/meta/src/generate/operand.rs +++ b/cranelift/assembler-x64/meta/src/generate/operand.rs @@ -15,8 +15,14 @@ impl dsl::Operand { Some(format!("Imm{bits}")) } } - Reg(_) => Some(format!("Gpr", self.mutability.generate_type())), - RegMem(_) => Some(format!("GprMem", self.mutability.generate_type())), + Reg(r) => match r.bits() { + 128 => Some(format!("Xmm", self.mutability.generate_type())), + _ => Some(format!("Gpr", self.mutability.generate_type())), + }, + RegMem(rm) => match rm.bits() { + 128 => Some(format!("XmmMem", self.mutability.generate_type())), + _ => Some(format!("GprMem", self.mutability.generate_type())), + }, } } @@ -38,8 +44,14 @@ impl dsl::Operand { Some(format!("Imm{bits}")) } } - Reg(_) => Some(format!("Gpr<{pick_ty}>")), - RegMem(_) => Some(format!("GprMem<{pick_ty}, {read_ty}>")), + Reg(r) => match r.bits() { + 128 => Some(format!("Xmm<{pick_ty}>")), + _ => Some(format!("Gpr<{pick_ty}>")), + }, + RegMem(rm) => match rm.bits() { + 128 => Some(format!("XmmMem<{pick_ty}, Gpr>")), + _ => Some(format!("GprMem<{pick_ty}, {read_ty}>")), + }, } } @@ -55,9 +67,15 @@ impl dsl::Operand { format!("AssemblerImm{bits}") } } - OperandKind::Reg(_) => "Gpr".to_string(), + OperandKind::Reg(r) => match r.bits() { + 128 => "Xmm".to_string(), + _ => "Gpr".to_string(), + }, OperandKind::FixedReg(_) => "Gpr".to_string(), - OperandKind::RegMem(_) => "GprMem".to_string(), + OperandKind::RegMem(rm) => match rm.bits() { + 128 => "XmmMem".to_string(), + _ => "GprMem".to_string(), + }, } } @@ -72,6 +90,7 @@ impl dsl::Operand { OperandKind::RegMem(_) if self.mutability.is_write() => match ctor { IsleConstructor::RetMemorySideEffect => "Amode".to_string(), IsleConstructor::RetGpr => "Gpr".to_string(), + IsleConstructor::RetXmm => "Xmm".to_string(), }, // everything else is the same as the "raw" variant @@ -90,8 +109,15 @@ impl dsl::Operand { format!("&cranelift_assembler_x64::Imm{bits}") } } - OperandKind::RegMem(_) => "&GprMem".to_string(), - OperandKind::Reg(_) | OperandKind::FixedReg(_) => "Gpr".to_string(), + OperandKind::RegMem(rm) => match rm.bits() { + 128 => "&XmmMem".to_string(), + _ => "&GprMem".to_string(), + }, + OperandKind::Reg(r) => match r.bits() { + 128 => "Xmm".to_string(), + _ => "Gpr".to_string(), + }, + OperandKind::FixedReg(_) => "Gpr".to_string(), } } @@ -100,14 +126,26 @@ impl dsl::Operand { /// Effectively converts `self.rust_param_raw()` to the assembler type. pub fn rust_convert_isle_to_assembler(&self) -> Option<&'static str> { match self.location.kind() { - OperandKind::Reg(_) => Some(match self.mutability { - Mutability::Read => "cranelift_assembler_x64::Gpr::new", - Mutability::ReadWrite => "self.convert_gpr_to_assembler_read_write_gpr", - }), - OperandKind::RegMem(_) => Some(match self.mutability { - Mutability::Read => "self.convert_gpr_mem_to_assembler_read_gpr_mem", - Mutability::ReadWrite => "self.convert_gpr_mem_to_assembler_read_write_gpr_mem", - }), + OperandKind::Reg(r) => match r.bits() { + 128 => Some(match self.mutability { + Mutability::Read => "cranelift_assembler_x64::Xmm::new", + Mutability::ReadWrite => "self.convert_xmm_to_assembler_read_write_xmm", + }), + _ => Some(match self.mutability { + Mutability::Read => "cranelift_assembler_x64::Gpr::new", + Mutability::ReadWrite => "self.convert_gpr_to_assembler_read_write_gpr", + }), + }, + OperandKind::RegMem(rm) => match rm.bits() { + 128 => Some(match self.mutability { + Mutability::Read => "self.convert_xmm_mem_to_assembler_read_xmm_mem", + Mutability::ReadWrite => "self.convert_xmm_mem_to_assembler_read_write_xmm_mem", + }), + _ => Some(match self.mutability { + Mutability::Read => "self.convert_gpr_mem_to_assembler_read_gpr_mem", + Mutability::ReadWrite => "self.convert_gpr_mem_to_assembler_read_write_gpr_mem", + }), + }, OperandKind::FixedReg(_) | OperandKind::Imm(_) => None, } } @@ -129,6 +167,8 @@ impl dsl::Location { imm32 => Some("Imm32".into()), r8 | r16 | r32 | r64 => Some(format!("Gpr{generic}")), rm8 | rm16 | rm32 | rm64 => Some(format!("GprMem{generic}")), + xmm => Some(format!("Xmm{generic}")), + rm128 => Some(format!("XmmMem{generic}")), } } @@ -154,6 +194,7 @@ impl dsl::Location { Some(size) => format!("self.{self}.to_string({size})"), None => unreachable!(), }, + xmm | rm128 => format!("self.{self}.to_string()"), } } @@ -167,6 +208,7 @@ impl dsl::Location { r16 | rm16 => Some("Size::Word"), r32 | rm32 => Some("Size::Doubleword"), r64 | rm64 => Some("Size::Quadword"), + xmm | rm128 => panic!("no need to generate a size for XMM-sized access"), } } @@ -177,7 +219,7 @@ impl dsl::Location { match self { al | ax | eax | rax => Some("reg::enc::RAX"), cl => Some("reg::enc::RCX"), - imm8 | imm16 | imm32 | r8 | r16 | r32 | r64 | rm8 | rm16 | rm32 | rm64 => None, + imm8 | imm16 | imm32 | r8 | r16 | r32 | r64 | xmm | rm8 | rm16 | rm32 | rm64 | rm128 => None, } } } @@ -198,6 +240,14 @@ impl dsl::Mutability { dsl::Mutability::ReadWrite => "ReadWrite", } } + + #[must_use] + pub fn generate_xmm_regalloc_call(&self) -> &str { + match self { + dsl::Mutability::Read => "read_xmm", + dsl::Mutability::ReadWrite => "read_write_xmm", + } + } } impl dsl::Extension { diff --git a/cranelift/assembler-x64/meta/src/instructions/or.rs b/cranelift/assembler-x64/meta/src/instructions/or.rs index ccff10f0d742..2b918991ef24 100644 --- a/cranelift/assembler-x64/meta/src/instructions/or.rs +++ b/cranelift/assembler-x64/meta/src/instructions/or.rs @@ -21,5 +21,6 @@ pub fn list() -> Vec { inst("orw", fmt("RM", [rw(r16), r(rm16)]), rex([0x66, 0x0B]).r(), _64b | compat), inst("orl", fmt("RM", [rw(r32), r(rm32)]), rex(0x0B).r(), _64b | compat), inst("orq", fmt("RM", [rw(r64), r(rm64)]), rex(0x0B).w().r(), _64b), + inst("orpd", fmt("A", [rw(xmm), r(rm128)]), rex([0x66, 0x0F, 0x56]).r(), _64b), ] } diff --git a/cranelift/assembler-x64/src/api.rs b/cranelift/assembler-x64/src/api.rs index 6b0d9e2da93c..384c76c25b92 100644 --- a/cranelift/assembler-x64/src/api.rs +++ b/cranelift/assembler-x64/src/api.rs @@ -1,6 +1,7 @@ //! Contains traits that a user of this assembler must implement. use crate::reg; +use crate::xmm; use std::{num::NonZeroU8, ops::Index, vec::Vec}; /// Describe how an instruction is emitted into a code buffer. @@ -109,6 +110,12 @@ pub trait Registers { /// An x64 general purpose register that may be read and written. type ReadWriteGpr: AsReg; + + /// An x64 SSE register that may be read. + type ReadXmm: AsReg; + + /// An x64 SSE register that may be read and written. + type ReadWriteXmm: AsReg; } /// Describe how to interact with an external register type. @@ -123,8 +130,11 @@ pub trait AsReg: Clone + std::fmt::Debug { fn enc(&self) -> u8; /// Return the register name. - fn to_string(&self, size: reg::Size) -> &str { - reg::enc::to_string(self.enc(), size) + fn to_string(&self, size: Option) -> &str { + match size { + Some(size) => reg::enc::to_string(self.enc(), size), + None => xmm::enc::to_string(self.enc()), + } } } @@ -155,4 +165,14 @@ pub trait RegisterVisitor { /// Visit a read-write fixed register; for safety, this register cannot be /// modified in-place. fn fixed_read_write(&mut self, reg: &R::ReadWriteGpr); + /// Visit a read-only SSE register. + fn read_xmm(&mut self, reg: &mut R::ReadXmm); + /// Visit a read-write SSE register. + fn read_write_xmm(&mut self, reg: &mut R::ReadWriteXmm); + /// Visit a read-only fixed SSE register; for safety, this register cannot + /// be modified in-place. + fn fixed_read_xmm(&mut self, reg: &R::ReadXmm); + /// Visit a read-write fixed SSE register; for safety, this register cannot + /// be modified in-place. + fn fixed_read_write_xmm(&mut self, reg: &R::ReadWriteXmm); } diff --git a/cranelift/assembler-x64/src/fuzz.rs b/cranelift/assembler-x64/src/fuzz.rs index 1b472146f06a..f4d703a46bc7 100644 --- a/cranelift/assembler-x64/src/fuzz.rs +++ b/cranelift/assembler-x64/src/fuzz.rs @@ -4,7 +4,7 @@ //! throughout this crate to avoid depending on the `arbitrary` crate //! unconditionally (use the `fuzz` feature instead). -use crate::{AmodeOffset, AmodeOffsetPlusKnownOffset, AsReg, Gpr, Inst, NonRspGpr, Registers}; +use crate::{AmodeOffset, AmodeOffsetPlusKnownOffset, AsReg, Gpr, Inst, NonRspGpr, Registers, Xmm}; use arbitrary::{Arbitrary, Result, Unstructured}; use capstone::{arch::x86, arch::BuildsCapstone, arch::BuildsCapstoneSyntax, Capstone}; @@ -165,6 +165,8 @@ pub struct FuzzRegs; impl Registers for FuzzRegs { type ReadGpr = FuzzReg; type ReadWriteGpr = FuzzReg; + type ReadXmm = FuzzReg; + type ReadWriteXmm = FuzzReg; } /// A simple `u8` register type for fuzzing only. @@ -209,11 +211,21 @@ impl<'a, R: AsReg> Arbitrary<'a> for Gpr { Ok(Self(R::new(u.int_in_range(0..=15)?))) } } +impl<'a, R: AsReg> Arbitrary<'a> for Xmm { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + Ok(Self(R::new(u.int_in_range(0..=15)?))) + } +} /// Helper trait that's used to be the same as `Registers` except with an extra /// `for<'a> Arbitrary<'a>` bound on all of the associated types. pub trait RegistersArbitrary: - Registers Arbitrary<'a>, ReadWriteGpr: for<'a> Arbitrary<'a>> + Registers< + ReadGpr: for<'a> Arbitrary<'a>, + ReadWriteGpr: for<'a> Arbitrary<'a>, + ReadXmm: for<'a> Arbitrary<'a>, + ReadWriteXmm: for<'a> Arbitrary<'a>, +> { } @@ -222,6 +234,8 @@ where R: Registers, R::ReadGpr: for<'a> Arbitrary<'a>, R::ReadWriteGpr: for<'a> Arbitrary<'a>, + R::ReadXmm: for<'a> Arbitrary<'a>, + R::ReadWriteXmm: for<'a> Arbitrary<'a>, { } diff --git a/cranelift/assembler-x64/src/inst.rs b/cranelift/assembler-x64/src/inst.rs index 6f0284bf4181..487b458c3f2b 100644 --- a/cranelift/assembler-x64/src/inst.rs +++ b/cranelift/assembler-x64/src/inst.rs @@ -5,9 +5,10 @@ use crate::api::{AsReg, CodeSink, KnownOffsetTable, RegisterVisitor, Registers}; use crate::imm::{Extension, Imm16, Imm32, Imm8, Simm32, Simm8}; -use crate::mem::{emit_modrm_sib_disp, GprMem}; +use crate::mem::{emit_modrm_sib_disp, GprMem, XmmMem}; use crate::reg::{self, Gpr, Size}; use crate::rex::{self, RexFlags}; +use crate::xmm::Xmm; // Include code generated by the `meta` crate. include!(concat!(env!("OUT_DIR"), "/assembler.rs")); diff --git a/cranelift/assembler-x64/src/lib.rs b/cranelift/assembler-x64/src/lib.rs index 72c93320a584..796c6713f565 100644 --- a/cranelift/assembler-x64/src/lib.rs +++ b/cranelift/assembler-x64/src/lib.rs @@ -13,6 +13,8 @@ //! impl Registers for Regs { //! type ReadGpr = u8; //! type ReadWriteGpr = u8; +//! type ReadXmm = u8; +//! type ReadWriteXmm = u8; //! } //! //! // Then, build one of the `AND` instructions; this one operates on an @@ -48,6 +50,7 @@ pub mod isle; mod mem; mod reg; mod rex; +mod xmm; #[cfg(any(test, feature = "fuzz"))] pub mod fuzz; @@ -75,9 +78,12 @@ pub use api::{ TrapCode, }; pub use imm::{Extension, Imm16, Imm32, Imm8, Simm16, Simm32, Simm8}; -pub use mem::{Amode, AmodeOffset, AmodeOffsetPlusKnownOffset, DeferredTarget, GprMem, Scale}; +pub use mem::{ + Amode, AmodeOffset, AmodeOffsetPlusKnownOffset, DeferredTarget, GprMem, Scale, XmmMem, +}; pub use reg::{Gpr, NonRspGpr, Size}; pub use rex::RexFlags; +pub use xmm::Xmm; /// List the files generated to create this assembler. pub fn generated_files() -> Vec { diff --git a/cranelift/assembler-x64/src/mem.rs b/cranelift/assembler-x64/src/mem.rs index b6396ea5bf7c..2e9f6a39f6c5 100644 --- a/cranelift/assembler-x64/src/mem.rs +++ b/cranelift/assembler-x64/src/mem.rs @@ -3,6 +3,7 @@ use crate::api::{AsReg, CodeSink, Constant, KnownOffset, KnownOffsetTable, Label, TrapCode}; use crate::reg::{self, NonRspGpr, Size}; use crate::rex::{encode_modrm, encode_sib, Imm, RexFlags}; +use crate::xmm; /// x64 memory addressing modes. #[derive(Clone, Debug)] @@ -272,6 +273,28 @@ impl GprMem { } } +/// An XMM register or memory operand. +#[derive(Clone, Debug)] +#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))] +#[allow( + clippy::module_name_repetitions, + reason = "'XmmMem' indicates this has Xmm and memory variants" +)] +pub enum XmmMem { + Xmm(R), + Mem(Amode), +} + +impl XmmMem { + /// Pretty-print the operand. + pub fn to_string(&self) -> String { + match self { + XmmMem::Xmm(xmm) => xmm::enc::to_string(xmm.enc()).to_owned(), + XmmMem::Mem(amode) => amode.to_string(), + } + } +} + /// Emit the ModRM/SIB/displacement sequence for a memory operand. pub fn emit_modrm_sib_disp( sink: &mut impl CodeSink, diff --git a/cranelift/assembler-x64/src/xmm.rs b/cranelift/assembler-x64/src/xmm.rs new file mode 100644 index 000000000000..eaa467a4ee6b --- /dev/null +++ b/cranelift/assembler-x64/src/xmm.rs @@ -0,0 +1,90 @@ +//! Xmm register operands; see [`Xmm`]. + +use crate::AsReg; + +/// An x64 SSE register (e.g., `%xmm0`). +#[derive(Clone, Copy, Debug)] +pub struct Xmm(pub(crate) R); + +impl Xmm { + /// Create a new [`Xmm`] register. + pub fn new(reg: R) -> Self { + Self(reg) + } + + /// Return the register's hardware encoding; the underlying type `R` _must_ + /// be a real register at this point. + /// + /// # Panics + /// + /// Panics if the register is not a valid Xmm register. + pub fn enc(&self) -> u8 { + let enc = self.0.enc(); + assert!(enc < 16, "invalid register: {enc}"); + enc + } + + /// Return the register name. + pub fn to_string(&self) -> &str { + enc::to_string(self.enc()) + } +} + +impl AsRef for Xmm { + fn as_ref(&self) -> &R { + &self.0 + } +} + +impl AsMut for Xmm { + fn as_mut(&mut self) -> &mut R { + &mut self.0 + } +} + +/// Encode xmm registers. +pub mod enc { + pub const XMM0: u8 = 0; + pub const XMM1: u8 = 1; + pub const XMM2: u8 = 2; + pub const XMM3: u8 = 3; + pub const XMM4: u8 = 4; + pub const XMM5: u8 = 5; + pub const XMM6: u8 = 6; + pub const XMM7: u8 = 7; + pub const XMM8: u8 = 8; + pub const XMM9: u8 = 9; + pub const XMM10: u8 = 10; + pub const XMM11: u8 = 11; + pub const XMM12: u8 = 12; + pub const XMM13: u8 = 13; + pub const XMM14: u8 = 14; + pub const XMM15: u8 = 15; + + /// Return the name of a XMM encoding (`enc`). + /// + /// # Panics + /// + /// This function will panic if the encoding is not a valid x64 register. + pub fn to_string(enc: u8) -> &'static str { + match enc { + XMM0 => "%xmm0", + XMM1 => "%xmm1", + XMM2 => "%xmm2", + XMM3 => "%xmm3", + XMM4 => "%xmm4", + XMM5 => "%xmm5", + XMM6 => "%xmm6", + XMM7 => "%xmm7", + XMM8 => "%xmm8", + XMM9 => "%xmm9", + XMM10 => "%xmm10", + XMM11 => "%xmm11", + XMM12 => "%xmm12", + XMM13 => "%xmm13", + XMM14 => "%xmm14", + XMM15 => "%xmm15", + _ => panic!("%invalid{enc}"), + } + } +} diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index 8022158b4562..f6d8ca482def 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -2962,6 +2962,10 @@ (extern extractor is_gpr is_gpr) (decl is_gpr_mem (GprMem) GprMemImm) (extern extractor is_gpr_mem is_gpr_mem) +(decl is_xmm_mem (XmmMem) XmmMem) +(extern extractor is_xmm_mem is_xmm_mem) +(decl is_xmm (Xmm) XmmMem) +(extern extractor is_xmm is_xmm) ;; Helpers to auto-convert to and from assembler types. @@ -3596,11 +3600,12 @@ ;; Helper for creating `orpd` instructions. (decl x64_orpd (Xmm XmmMem) Xmm) -(rule 0 (x64_orpd src1 src2) - (xmm_rm_r (SseOpcode.Orpd) src1 src2)) -(rule 1 (x64_orpd src1 src2) +(rule 2 (x64_orpd src1 src2) (if-let true (use_avx)) (xmm_rmir_vex (AvxOpcode.Vorpd) src1 src2)) +(rule 1 (x64_orpd src1 (is_xmm_mem src2)) (x64_orpd_a src1 src2)) +(rule 0 (x64_orpd src1 src2) + (xmm_rm_r (SseOpcode.Orpd) src1 src2)) ;; Helper fxor creating `pxor` instructions. (decl x64_pxor (Xmm XmmMem) Xmm) diff --git a/cranelift/codegen/src/isa/x64/inst/external.rs b/cranelift/codegen/src/isa/x64/inst/external.rs index 9d331ef78996..6a111ed136bb 100644 --- a/cranelift/codegen/src/isa/x64/inst/external.rs +++ b/cranelift/codegen/src/isa/x64/inst/external.rs @@ -2,7 +2,7 @@ use super::{ regs, Amode, Gpr, Inst, LabelUse, MachBuffer, MachLabel, OperandVisitor, OperandVisitorImpl, - SyntheticAmode, VCodeConstant, WritableGpr, + SyntheticAmode, VCodeConstant, WritableGpr, WritableXmm, Xmm, }; use crate::ir::TrapCode; use cranelift_assembler_x64 as asm; @@ -13,6 +13,8 @@ pub struct CraneliftRegisters; impl asm::Registers for CraneliftRegisters { type ReadGpr = Gpr; type ReadWriteGpr = PairedGpr; + type ReadXmm = Xmm; + type ReadWriteXmm = PairedXmm; } /// A pair of registers, one for reading and one for writing. @@ -30,8 +32,8 @@ pub struct PairedGpr { impl asm::AsReg for PairedGpr { fn enc(&self) -> u8 { let PairedGpr { read, write } = self; - let read = enc(read); - let write = enc(&write.to_reg()); + let read = enc_gpr(read); + let write = enc_gpr(&write.to_reg()); assert_eq!(read, write); write } @@ -41,10 +43,42 @@ impl asm::AsReg for PairedGpr { } } -/// This bridges the gap between codegen and assembler register types. +/// A pair of XMM registers, one for reading and one for writing. +#[derive(Clone, Copy, Debug)] +pub struct PairedXmm { + pub(crate) read: Xmm, + pub(crate) write: WritableXmm, +} + +impl asm::AsReg for PairedXmm { + fn enc(&self) -> u8 { + let PairedXmm { read, write } = self; + let read = enc_xmm(read); + let write = enc_xmm(&write.to_reg()); + assert_eq!(read, write); + write + } + + fn new(_: u8) -> Self { + panic!("disallow creation of new assembler registers") + } +} + +/// This bridges the gap between codegen and assembler for general purpose register types. impl asm::AsReg for Gpr { fn enc(&self) -> u8 { - enc(self) + enc_gpr(self) + } + + fn new(_: u8) -> Self { + panic!("disallow creation of new assembler registers") + } +} + +/// This bridges the gap between codegen and assembler for xmm register types. +impl asm::AsReg for Xmm { + fn enc(&self) -> u8 { + enc_xmm(self) } fn new(_: u8) -> Self { @@ -52,9 +86,9 @@ impl asm::AsReg for Gpr { } } -/// A helper method for extracting the hardware encoding of a register. +/// A helper method for extracting the hardware encoding of a general purpose register. #[inline] -fn enc(gpr: &Gpr) -> u8 { +fn enc_gpr(gpr: &Gpr) -> u8 { if let Some(real) = gpr.to_reg().to_real_reg() { real.hw_enc() } else { @@ -62,6 +96,16 @@ fn enc(gpr: &Gpr) -> u8 { } } +/// A helper method for extracting the hardware encoding of an xmm register. +#[inline] +fn enc_xmm(xmm: &Xmm) -> u8 { + if let Some(real) = xmm.to_reg().to_real_reg() { + real.hw_enc() + } else { + unreachable!() + } +} + /// A wrapper to implement the `cranelift-assembler-x64` register allocation trait, /// `RegallocVisitor`, in terms of the trait used in Cranelift, /// `OperandVisitor`. @@ -90,6 +134,24 @@ impl<'a, T: OperandVisitor> asm::RegisterVisitor for Regallo fn fixed_read_write(&mut self, _reg: &PairedGpr) { todo!() } + + fn read_xmm(&mut self, reg: &mut Xmm) { + self.collector.reg_use(reg); + } + + fn read_write_xmm(&mut self, reg: &mut PairedXmm) { + let PairedXmm { read, write } = reg; + self.collector.reg_use(read); + self.collector.reg_reuse_def(write, 0); + } + + fn fixed_read_xmm(&mut self, _reg: &Xmm) { + todo!() + } + + fn fixed_read_write_xmm(&mut self, _reg: &PairedXmm) { + todo!() + } } impl Into> for SyntheticAmode { diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index 85d3555f4de8..22f758778cfa 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -6,7 +6,7 @@ use crate::{ir::types, ir::AtomicRmwOp, isa}; use generated_code::{AssemblerOutputs, Context, MInst, RegisterClass}; // Types that the generated ISLE code uses via `use super::*`. -use super::external::{CraneliftRegisters, PairedGpr}; +use super::external::{CraneliftRegisters, PairedGpr, PairedXmm}; use super::{is_int_or_ref_ty, is_mergeable_load, lower_to_amode, MergeableLoadSize}; use crate::ir::condcodes::{FloatCC, IntCC}; use crate::ir::immediates::*; @@ -1019,6 +1019,13 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> { } } + fn is_xmm(&mut self, src: &XmmMem) -> Option { + match src.clone().to_reg_mem() { + RegMem::Reg { reg } => Xmm::new(reg), + _ => None, + } + } + fn is_gpr_mem(&mut self, src: &GprMemImm) -> Option { match src.clone().to_reg_mem_imm() { RegMemImm::Reg { reg } => GprMem::new(RegMem::Reg { reg }), @@ -1027,6 +1034,13 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> { } } + fn is_xmm_mem(&mut self, src: &XmmMem) -> Option { + match src.clone().to_reg_mem() { + RegMem::Reg { reg } => XmmMem::new(RegMem::Reg { reg }), + RegMem::Mem { addr } => XmmMem::new(RegMem::Mem { addr }), + } + } + fn u8_to_assembler_imm8(&mut self, val: u8) -> AssemblerImm8 { AssemblerImm8::new(val) } @@ -1050,6 +1064,13 @@ impl IsleContext<'_, '_, MInst, X64Backend> { asm::Gpr::new(PairedGpr { read, write }) } + /// Helper used by code generated by the `cranelift-assembler-x64` crate. + fn convert_xmm_to_assembler_read_write_xmm(&mut self, read: Xmm) -> asm::Xmm { + let write = self.lower_ctx.alloc_tmp(types::F32X4).only_reg().unwrap(); + let write = WritableXmm::from_writable_reg(write).unwrap(); + asm::Xmm::new(PairedXmm { read, write }) + } + /// Helper used by code generated by the `cranelift-assembler-x64` crate. fn convert_gpr_mem_to_assembler_read_gpr_mem(&self, read: &GprMem) -> asm::GprMem { match read.clone().into() { @@ -1058,6 +1079,14 @@ impl IsleContext<'_, '_, MInst, X64Backend> { } } + /// Helper used by code generated by the `cranelift-assembler-x64` crate. + fn convert_xmm_mem_to_assembler_read_xmm_mem(&self, read: &XmmMem) -> asm::XmmMem { + match read.clone().into() { + RegMem::Reg { reg } => asm::XmmMem::Xmm(Xmm::new(reg).unwrap()), + RegMem::Mem { addr } => asm::XmmMem::Mem(addr.into()), + } + } + /// Helper used by code generated by the `cranelift-assembler-x64` crate. fn convert_gpr_mem_to_assembler_read_write_gpr_mem( &mut self, diff --git a/cranelift/filetests/filetests/isa/x64/fcopysign.clif b/cranelift/filetests/filetests/isa/x64/fcopysign.clif index c2f39049ac96..19d288482a4f 100644 --- a/cranelift/filetests/filetests/isa/x64/fcopysign.clif +++ b/cranelift/filetests/filetests/isa/x64/fcopysign.clif @@ -54,7 +54,7 @@ block0(v0: f64, v1: f64): ; movdqa %xmm7, %xmm0 ; andnpd %xmm0, %xmm2, %xmm0 ; andpd %xmm7, %xmm1, %xmm7 -; orpd %xmm0, %xmm7, %xmm0 +; orpd %xmm7, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/nan-canonicalization.clif b/cranelift/filetests/filetests/isa/x64/nan-canonicalization.clif index 7454b82fa04d..144a19654be3 100644 --- a/cranelift/filetests/filetests/isa/x64/nan-canonicalization.clif +++ b/cranelift/filetests/filetests/isa/x64/nan-canonicalization.clif @@ -68,7 +68,7 @@ block0(v0: f64, v1: f64): ; cmppd $3, %xmm0, %xmm7, %xmm0 ; andpd %xmm5, %xmm0, %xmm5 ; andnpd %xmm0, %xmm7, %xmm0 -; orpd %xmm0, %xmm5, %xmm0 +; orpd %xmm5, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif index d2d76ff6de8b..335833332add 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif @@ -111,7 +111,7 @@ block0(v0: f64x2, v1: f64x2): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; orpd %xmm0, %xmm1, %xmm0 +; orpd %xmm1, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -296,7 +296,7 @@ block0(v0: f64x2, v1: f64x2, v2: f64x2): ; block0: ; andpd %xmm1, %xmm0, %xmm1 ; andnpd %xmm0, %xmm2, %xmm0 -; orpd %xmm0, %xmm1, %xmm0 +; orpd %xmm1, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/simd-float-min-max.clif b/cranelift/filetests/filetests/isa/x64/simd-float-min-max.clif index 6294479465ce..4ca19dce7fa6 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-float-min-max.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-float-min-max.clif @@ -110,7 +110,7 @@ block0(v0: i64, v1: f64x2): ; maxpd %xmm4, %xmm6, %xmm4 ; movdqa %xmm0, %xmm1 ; xorpd %xmm1, %xmm4, %xmm1 -; orpd %xmm0, %xmm1, %xmm0 +; orpd %xmm1, %xmm0 ; movdqa %xmm0, %xmm4 ; subpd %xmm4, %xmm1, %xmm4 ; cmppd $3, %xmm0, %xmm0, %xmm0 @@ -157,9 +157,9 @@ block0(v0: i64, v1: f64x2): ; minpd %xmm0, %xmm4, %xmm0 ; minpd %xmm4, %xmm5, %xmm4 ; movdqa %xmm0, %xmm2 -; orpd %xmm2, %xmm4, %xmm2 +; orpd %xmm4, %xmm2 ; cmppd $3, %xmm0, %xmm4, %xmm0 -; orpd %xmm2, %xmm0, %xmm2 +; orpd %xmm0, %xmm2 ; psrlq %xmm0, $13, %xmm0 ; andnpd %xmm0, %xmm2, %xmm0 ; movq %rbp, %rsp