Skip to content

Commit

Permalink
asm: sse orpd implementation (#10273)
Browse files Browse the repository at this point in the history
* sse orpd implementation

assembler integration with isle

format

add clippy reason, reorder avx priority in isle

bless tests for orpd

create separate xmm module

validate function rewrite sse condition

add quote from manual for sse prefix

format changes

move Xmm bits under Reg

* use new isle constructors for sse

* remove unused function

* minor changes
  • Loading branch information
rahulchaphalkar authored Mar 1, 2025
1 parent af31e80 commit e33c426
Show file tree
Hide file tree
Showing 20 changed files with 520 additions and 89 deletions.
14 changes: 9 additions & 5 deletions cranelift/assembler-x64/meta/src/dsl/encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -194,10 +194,11 @@ impl Rex {

if self.opcodes.prefix.contains_66() {
assert!(
operands
.iter()
.all(|&op| matches!(op.location.kind(), OperandKind::Imm(_) | OperandKind::FixedReg(_))
|| op.location.bits() == 16),
operands.iter().all(|&op| matches!(
op.location.kind(),
OperandKind::Imm(_) | OperandKind::FixedReg(_)
) || op.location.bits() == 16
|| op.location.bits() == 128),
"when we encode the 66 prefix, we expect all operands to be 16-bit wide"
);
}
Expand Down Expand Up @@ -366,12 +367,15 @@ impl From<[u8; 4]> for Opcodes {
pub enum LegacyPrefix {
/// No prefix bytes.
NoPrefix,
/// An operand size override typically denoting "16-bit operation". But the
/// An operand size override typically denoting "16-bit operation" or "SSE instructions". But the
/// reference manual is more nuanced:
///
/// > The operand-size override prefix allows a program to switch between
/// > 16- and 32-bit operand sizes. Either size can be the default; use of
/// > the prefix selects the non-default.
/// > Some SSE2/SSE3/SSSE3/SSE4 instructions and instructions using a three-byte
/// > sequence of primary opcode bytes may use 66H as a mandatory prefix to express
/// > distinct functionality.
_66,
/// The lock prefix.
_F0,
Expand Down
17 changes: 12 additions & 5 deletions cranelift/assembler-x64/meta/src/dsl/format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -212,10 +212,13 @@ pub enum Location {
r32,
r64,

xmm,

rm8,
rm16,
rm32,
rm64,
rm128,
}

impl Location {
Expand All @@ -228,6 +231,7 @@ impl Location {
ax | imm16 | r16 | rm16 => 16,
eax | imm32 | r32 | rm32 => 32,
rax | r64 | rm64 => 64,
xmm | rm128 => 128,
}
}

Expand All @@ -242,8 +246,8 @@ impl Location {
pub fn uses_memory(&self) -> bool {
use Location::*;
match self {
al | cl | ax | eax | rax | imm8 | imm16 | imm32 | r8 | r16 | r32 | r64 => false,
rm8 | rm16 | rm32 | rm64 => true,
al | cl | ax | eax | rax | imm8 | imm16 | imm32 | r8 | r16 | r32 | r64 | xmm => false,
rm8 | rm16 | rm32 | rm64 | rm128 => true,
}
}

Expand All @@ -254,7 +258,7 @@ impl Location {
use Location::*;
match self {
al | ax | eax | rax | cl | imm8 | imm16 | imm32 => false,
r8 | r16 | r32 | r64 | rm8 | rm16 | rm32 | rm64 => true,
r8 | r16 | r32 | r64 | xmm | rm8 | rm16 | rm32 | rm64 | rm128 => true,
}
}

Expand All @@ -265,8 +269,8 @@ impl Location {
match self {
al | ax | eax | rax | cl => OperandKind::FixedReg(*self),
imm8 | imm16 | imm32 => OperandKind::Imm(*self),
r8 | r16 | r32 | r64 => OperandKind::Reg(*self),
rm8 | rm16 | rm32 | rm64 => OperandKind::RegMem(*self),
r8 | r16 | r32 | r64 | xmm => OperandKind::Reg(*self),
rm8 | rm16 | rm32 | rm64 | rm128 => OperandKind::RegMem(*self),
}
}
}
Expand All @@ -291,10 +295,13 @@ impl core::fmt::Display for Location {
r32 => write!(f, "r32"),
r64 => write!(f, "r64"),

xmm => write!(f, "xmm"),

rm8 => write!(f, "rm8"),
rm16 => write!(f, "rm16"),
rm32 => write!(f, "rm32"),
rm64 => write!(f, "rm64"),
rm128 => write!(f, "rm128"),
}
}
}
Expand Down
12 changes: 12 additions & 0 deletions cranelift/assembler-x64/meta/src/generate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ pub fn isle_definitions(f: &mut Formatter, insts: &[dsl::Inst]) {
f.line("(type AssemblerReadGprMem extern (enum))", None);
f.line("(type AssemblerReadWriteGprMem extern (enum))", None);
f.line("(type AssemblerInst extern (enum))", None);
f.line("(type AssemblerReadXmm extern (enum))", None);
f.line("(type AssemblerReadWriteXmm extern (enum))", None);
f.line("(type AssemblerReadXmmMem extern (enum))", None);
f.line("(type AssemblerReadWriteXmmMem extern (enum))", None);
f.empty_line();

f.line("(type AssemblerOutputs (enum", None);
Expand All @@ -72,6 +76,8 @@ pub fn isle_definitions(f: &mut Formatter, insts: &[dsl::Inst]) {
f.line(" ;; Used for instructions that return a GPR (including `GprMem` variants with", None);
f.line(" ;; a GPR as the first argument).", None);
f.line(" (RetGpr (inst MInst) (gpr Gpr))", None);
f.line(" ;; Used for instructions that return an XMM register.", None);
f.line(" (RetXmm (inst MInst) (xmm Xmm))", None);
f.line(" ;; TODO: eventually add more variants for multi-return, XMM, etc.; see", None);
f.line(" ;; https://github.com/bytecodealliance/wasmtime/pull/10276", None);
f.line("))", None);
Expand All @@ -83,6 +89,12 @@ pub fn isle_definitions(f: &mut Formatter, insts: &[dsl::Inst]) {
f.line(" (let ((_ Unit (emit inst))) gpr))", None);
f.empty_line();

f.line(";; Directly emit instructions that return an XMM register.", None);
f.line("(decl emit_ret_xmm (AssemblerOutputs) Xmm)", None);
f.line("(rule (emit_ret_xmm (AssemblerOutputs.RetXmm inst xmm))", None);
f.line(" (let ((_ Unit (emit inst))) xmm))", None);
f.empty_line();

f.line(";; Pass along the side-effecting instruction for later emission.", None);
f.line("(decl defer_side_effect (AssemblerOutputs) SideEffectNoResult)", None);
f.line("(rule (defer_side_effect (AssemblerOutputs.SideEffect inst))", None);
Expand Down
74 changes: 61 additions & 13 deletions cranelift/assembler-x64/meta/src/generate/format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,16 @@ impl dsl::Format {
fmtln!(f, "let {dst} = self.{dst}.enc();");
fmtln!(f, "match &self.{src} {{");
f.indent(|f| {
fmtln!(f, "GprMem::Gpr({src}) => rex.emit_two_op(buf, {dst}, {src}.enc()),");
fmtln!(f, "GprMem::Mem({src}) => {src}.emit_rex_prefix(rex, {dst}, buf),");
match dst.bits() {
128 => {
fmtln!(f, "XmmMem::Xmm({src}) => rex.emit_two_op(buf, {dst}, {src}.enc()),");
fmtln!(f, "XmmMem::Mem({src}) => {src}.emit_rex_prefix(rex, {dst}, buf),");
}
_ => {
fmtln!(f, "GprMem::Gpr({src}) => rex.emit_two_op(buf, {dst}, {src}.enc()),");
fmtln!(f, "GprMem::Mem({src}) => {src}.emit_rex_prefix(rex, {dst}, buf),");
}
};
});
fmtln!(f, "}}");
}
Expand All @@ -119,9 +127,15 @@ impl dsl::Format {
| [RegMem(dst), Reg(src), FixedReg(_)] => {
fmtln!(f, "let {src} = self.{src}.enc();");
fmtln!(f, "match &self.{dst} {{");
f.indent(|f| {
fmtln!(f, "GprMem::Gpr({dst}) => rex.emit_two_op(buf, {src}, {dst}.enc()),");
fmtln!(f, "GprMem::Mem({dst}) => {dst}.emit_rex_prefix(rex, {src}, buf),");
f.indent(|f| match src.bits() {
128 => {
fmtln!(f, "XmmMem::Xmm({dst}) => rex.emit_two_op(buf, {src}, {dst}.enc()),");
fmtln!(f, "XmmMem::Mem({dst}) => {dst}.emit_rex_prefix(rex, {src}, buf),");
}
_ => {
fmtln!(f, "GprMem::Gpr({dst}) => rex.emit_two_op(buf, {src}, {dst}.enc()),");
fmtln!(f, "GprMem::Mem({dst}) => {dst}.emit_rex_prefix(rex, {src}, buf),");
}
});
fmtln!(f, "}}");
}
Expand Down Expand Up @@ -160,8 +174,22 @@ impl dsl::Format {
fmtln!(f, "let {dst} = self.{dst}.enc();");
fmtln!(f, "match &self.{src} {{");
f.indent(|f| {
fmtln!(f, "GprMem::Gpr({src}) => emit_modrm(buf, {dst}, {src}.enc()),");
fmtln!(f, "GprMem::Mem({src}) => emit_modrm_sib_disp(buf, off, {dst}, {src}, 0, None),");
match dst.bits() {
128 => {
fmtln!(f, "XmmMem::Xmm({src}) => emit_modrm(buf, {dst}, {src}.enc()),");
fmtln!(
f,
"XmmMem::Mem({src}) => emit_modrm_sib_disp(buf, off, {dst}, {src}, 0, None),"
);
}
_ => {
fmtln!(f, "GprMem::Gpr({src}) => emit_modrm(buf, {dst}, {src}.enc()),");
fmtln!(
f,
"GprMem::Mem({src}) => emit_modrm_sib_disp(buf, off, {dst}, {src}, 0, None),"
);
}
};
});
fmtln!(f, "}}");
}
Expand All @@ -171,12 +199,25 @@ impl dsl::Format {
fmtln!(f, "let {src} = self.{src}.enc();");
fmtln!(f, "match &self.{dst} {{");
f.indent(|f| {
fmtln!(f, "GprMem::Gpr({dst}) => emit_modrm(buf, {src}, {dst}.enc()),");
fmtln!(f, "GprMem::Mem({dst}) => emit_modrm_sib_disp(buf, off, {src}, {dst}, 0, None),");
match src.bits() {
128 => {
fmtln!(f, "XmmMem::Xmm({dst}) => emit_modrm(buf, {src}, {dst}.enc()),");
fmtln!(
f,
"XmmMem::Mem({dst}) => emit_modrm_sib_disp(buf, off, {src}, {dst}, 0, None),"
);
}
_ => {
fmtln!(f, "GprMem::Gpr({dst}) => emit_modrm(buf, {src}, {dst}.enc()),");
fmtln!(
f,
"GprMem::Mem({dst}) => emit_modrm_sib_disp(buf, off, {src}, {dst}, 0, None),"
);
}
};
});
fmtln!(f, "}}");
}

unknown => unimplemented!("unknown pattern: {unknown:?}"),
}
}
Expand Down Expand Up @@ -218,13 +259,20 @@ impl dsl::Format {
Read => unreachable!(),
ReadWrite => match one.location.kind() {
Imm(_) => unreachable!(),
FixedReg(_) => vec![IsleConstructor::RetGpr],
// One read/write register output? Output the instruction
// and that register.
FixedReg(_) | Reg(_) => vec![IsleConstructor::RetGpr],
Reg(r) => match r.bits() {
128 => vec![IsleConstructor::RetXmm],
_ => vec![IsleConstructor::RetGpr],
},
// One read/write reg-mem output? We need constructors for
// both variants.
RegMem(_) => vec![IsleConstructor::RetGpr, IsleConstructor::RetMemorySideEffect],
},
RegMem(rm) => match rm.bits() {
128 => vec![IsleConstructor::RetXmm, IsleConstructor::RetMemorySideEffect],
_ => vec![IsleConstructor::RetGpr, IsleConstructor::RetMemorySideEffect],
},
}
},
other => panic!("unsupported number of write operands {other:?}"),
}
Expand Down
Loading

0 comments on commit e33c426

Please sign in to comment.