From 53b83441f9d5325f6c02b30d0c772167339304c1 Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Fri, 5 Aug 2022 11:24:55 -0700 Subject: [PATCH] Cranelift: Remove `ArgumentPurpose::StructReturn` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We used to legalize multi-value returns to using struct-return pointers where callees would store result values into the given struct-return buffer and callers would provide the struct-return buffer when calling and then load the results out of it. We haven't done that for a while and just rely on the calling convention's normal method of returning multiple values now. The only special casing that `ArgumentPurpose::StructReturn` has now is 1. We legalize signatures that have a `StructReturn` parameter but no `StructReturn` result to add the missing `StructReturn` result 2. We automatically insert a copy from a function's `StructReturn` argument to its `StructReturn` result value This isn't really useful for Cranelift embedders anymore since it doesn't even handle putting the return values into the struct-return buffer or getting them out again, has maintenance and cruft overhead for Cranelift hackers, and the above signature legalization in (1) also imposes performance overhead on all Cranelift compiles regardless of whether they use struct returns or not. It's time we removed the vestigial `ArgumentPurpose::StructReturn`. Finally, here are the Sightglass benchmark wins we get from this removal: ``` compilation :: nanoseconds :: benchmarks/spidermonkey/benchmark.wasm Δ = 214956202.90 ± 31700992.96 (confidence = 99%) main.so is 0.91x to 0.94x faster than no-sret.so! no-sret.so is 1.07x to 1.09x faster than main.so! [2765571620 2866580329.79 3085702646] main.so [2396129997 2651624126.89 2923726602] no-sret.so compilation :: nanoseconds :: benchmarks/pulldown-cmark/benchmark.wasm Δ = 4176509.17 ± 2835408.05 (confidence = 99%) main.so is 0.95x to 0.99x faster than no-sret.so! no-sret.so is 1.01x to 1.05x faster than main.so! [115737735 133448206.82 149712338] main.so [108735836 129271697.65 166386156] no-sret.so compilation :: nanoseconds :: benchmarks/bz2/benchmark.wasm No difference in performance. [77356671 85735828.56 96331117] main.so [75824588 84176414.51 94308652] no-sret.so ``` --- cranelift/codegen/src/ir/extfunc.rs | 24 +------------- cranelift/codegen/src/isa/aarch64/abi.rs | 1 - cranelift/codegen/src/isa/s390x/abi.rs | 1 - cranelift/codegen/src/isa/x64/abi.rs | 1 - cranelift/codegen/src/machinst/abi_impl.rs | 33 ++++--------------- cranelift/codegen/src/machinst/lower.rs | 18 ---------- cranelift/docs/ir.md | 3 +- .../filetests/isa/x64/atomic-cas-bug.clif | 6 ++-- .../filetests/isa/x64/struct-ret.clif | 20 ----------- .../filetests/filetests/parser/call.clif | 6 ++-- cranelift/reader/src/parser.rs | 4 +-- 11 files changed, 16 insertions(+), 101 deletions(-) delete mode 100644 cranelift/filetests/filetests/isa/x64/struct-ret.clif diff --git a/cranelift/codegen/src/ir/extfunc.rs b/cranelift/codegen/src/ir/extfunc.rs index 8baa6bff84da..952e82711f69 100644 --- a/cranelift/codegen/src/ir/extfunc.rs +++ b/cranelift/codegen/src/ir/extfunc.rs @@ -87,11 +87,6 @@ impl Signature { .count() } - /// Does this signature take an struct return pointer parameter? - pub fn uses_struct_return_param(&self) -> bool { - self.uses_special_param(ArgumentPurpose::StructReturn) - } - /// Does this return more than one normal value? (Pre-struct return /// legalization) pub fn is_multi_return(&self) -> bool { @@ -242,16 +237,6 @@ pub enum ArgumentPurpose { /// A C struct passed as argument. StructArgument(u32), - /// Struct return pointer. - /// - /// When a function needs to return more data than will fit in registers, the caller passes a - /// pointer to a memory location where the return value can be written. In some ABIs, this - /// struct return pointer is passed in a specific register. - /// - /// This argument kind can also appear as a return value for ABIs that require a function with - /// a `StructReturn` pointer argument to also return that pointer in a register. - StructReturn, - /// The link register. /// /// Most RISC architectures implement calls by saving the return address in a designated @@ -299,7 +284,6 @@ impl fmt::Display for ArgumentPurpose { f.write_str(match self { Self::Normal => "normal", Self::StructArgument(size) => return write!(f, "sarg({})", size), - Self::StructReturn => "sret", Self::Link => "link", Self::FramePointer => "fp", Self::CalleeSaved => "csr", @@ -315,7 +299,6 @@ impl FromStr for ArgumentPurpose { fn from_str(s: &str) -> Result { match s { "normal" => Ok(Self::Normal), - "sret" => Ok(Self::StructReturn), "link" => Ok(Self::Link), "fp" => Ok(Self::FramePointer), "csr" => Ok(Self::CalleeSaved), @@ -391,20 +374,15 @@ mod tests { fn argument_type() { let t = AbiParam::new(I32); assert_eq!(t.to_string(), "i32"); - let mut t = t.uext(); + let t = t.uext(); assert_eq!(t.to_string(), "i32 uext"); assert_eq!(t.sext().to_string(), "i32 sext"); - t.purpose = ArgumentPurpose::StructReturn; - assert_eq!(t.to_string(), "i32 uext sret"); - t.legalized_to_pointer = true; - assert_eq!(t.to_string(), "i32 ptr uext sret"); } #[test] fn argument_purpose() { let all_purpose = [ (ArgumentPurpose::Normal, "normal"), - (ArgumentPurpose::StructReturn, "sret"), (ArgumentPurpose::Link, "link"), (ArgumentPurpose::FramePointer, "fp"), (ArgumentPurpose::CalleeSaved, "csr"), diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index 5d25aaab1c5b..1e7aba4b5b63 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -140,7 +140,6 @@ impl ABIMachineSpec for AArch64MachineDeps { | &ir::ArgumentPurpose::Normal | &ir::ArgumentPurpose::StackLimit | &ir::ArgumentPurpose::SignatureId - | &ir::ArgumentPurpose::StructReturn | &ir::ArgumentPurpose::StructArgument(_) => {} _ => panic!( "Unsupported argument purpose {:?} in signature: {:?}", diff --git a/cranelift/codegen/src/isa/s390x/abi.rs b/cranelift/codegen/src/isa/s390x/abi.rs index 67fd319b0625..a6ee8e04dbb8 100644 --- a/cranelift/codegen/src/isa/s390x/abi.rs +++ b/cranelift/codegen/src/isa/s390x/abi.rs @@ -255,7 +255,6 @@ impl ABIMachineSpec for S390xMachineDeps { | &ir::ArgumentPurpose::Normal | &ir::ArgumentPurpose::StackLimit | &ir::ArgumentPurpose::SignatureId - | &ir::ArgumentPurpose::StructReturn | &ir::ArgumentPurpose::StructArgument(_) => {} _ => panic!( "Unsupported argument purpose {:?} in signature: {:?}", diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index 13cb586c5cbe..0530742da773 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -76,7 +76,6 @@ impl ABIMachineSpec for X64ABIMachineSpec { | &ir::ArgumentPurpose::Normal | &ir::ArgumentPurpose::StackLimit | &ir::ArgumentPurpose::SignatureId - | &ir::ArgumentPurpose::StructReturn | &ir::ArgumentPurpose::StructArgument(_) => {} _ => panic!( "Unsupported argument purpose {:?} in signature: {:?}", diff --git a/cranelift/codegen/src/machinst/abi_impl.rs b/cranelift/codegen/src/machinst/abi_impl.rs index 6b8dabbab8ab..a35e53cc12d7 100644 --- a/cranelift/codegen/src/machinst/abi_impl.rs +++ b/cranelift/codegen/src/machinst/abi_impl.rs @@ -104,7 +104,7 @@ use super::abi::*; use crate::binemit::StackMap; use crate::ir::types::*; -use crate::ir::{ArgumentExtension, ArgumentPurpose, DynamicStackSlot, Signature, StackSlot}; +use crate::ir::{ArgumentExtension, DynamicStackSlot, Signature, StackSlot}; use crate::isa::TargetIsa; use crate::settings; use crate::CodegenResult; @@ -548,8 +548,6 @@ impl ABISig { sig: &ir::Signature, flags: &settings::Flags, ) -> CodegenResult { - let sig = ensure_struct_return_ptr_is_returned(sig); - // Compute args and retvals from signature. Handle retvals first, // because we may need to add a return-area arg to the args. let (rets, sized_stack_ret_space, _) = M::compute_arg_locs( @@ -778,7 +776,7 @@ impl ABICalleeImpl { trace!("ABI: func signature {:?}", f.signature); let flags = isa.flags().clone(); - let ir_sig = ensure_struct_return_ptr_is_returned(&f.signature); + let ir_sig = f.signature.clone(); let sig = ABISig::from_func_sig::(&ir_sig, &flags)?; let call_conv = f.signature.call_conv; @@ -1031,23 +1029,6 @@ fn gen_store_stack_multi( ret } -fn ensure_struct_return_ptr_is_returned(sig: &ir::Signature) -> ir::Signature { - let params_structret = sig - .params - .iter() - .find(|p| p.purpose == ArgumentPurpose::StructReturn); - let rets_have_structret = sig.returns.len() > 0 - && sig - .returns - .iter() - .any(|arg| arg.purpose == ArgumentPurpose::StructReturn); - let mut sig = sig.clone(); - if params_structret.is_some() && !rets_have_structret { - sig.returns.insert(0, params_structret.unwrap().clone()); - } - sig -} - impl ABICallee for ABICalleeImpl { type I = M::I; @@ -1633,14 +1614,13 @@ pub enum CallDest { impl ABICallerImpl { /// Create a callsite ABI object for a call directly to the specified function. pub fn from_func( - sig: &ir::Signature, + ir_sig: &ir::Signature, extname: &ir::ExternalName, dist: RelocDistance, caller_conv: isa::CallConv, flags: &settings::Flags, ) -> CodegenResult> { - let ir_sig = ensure_struct_return_ptr_is_returned(sig); - let sig = ABISig::from_func_sig::(&ir_sig, flags)?; + let sig = ABISig::from_func_sig::(ir_sig, flags)?; let (uses, defs, clobbers) = sig.call_uses_defs_clobbers::(); Ok(ABICallerImpl { sig, @@ -1658,14 +1638,13 @@ impl ABICallerImpl { /// Create a callsite ABI object for a call to a function pointer with the /// given signature. pub fn from_ptr( - sig: &ir::Signature, + ir_sig: &ir::Signature, ptr: Reg, opcode: ir::Opcode, caller_conv: isa::CallConv, flags: &settings::Flags, ) -> CodegenResult> { - let ir_sig = ensure_struct_return_ptr_is_returned(sig); - let sig = ABISig::from_func_sig::(&ir_sig, flags)?; + let sig = ABISig::from_func_sig::(ir_sig, flags)?; let (uses, defs, clobbers) = sig.call_uses_defs_clobbers::(); Ok(ABICallerImpl { sig, diff --git a/cranelift/codegen/src/machinst/lower.rs b/cranelift/codegen/src/machinst/lower.rs index 02eba77bcfd3..c77eedb69860 100644 --- a/cranelift/codegen/src/machinst/lower.rs +++ b/cranelift/codegen/src/machinst/lower.rs @@ -722,24 +722,6 @@ impl<'func, I: VCodeInst> Lower<'func, I> { for insn in self.vcode.abi().gen_copy_arg_to_regs(i, regs).into_iter() { self.emit(insn); } - if self.abi().signature().params[i].purpose == ArgumentPurpose::StructReturn { - assert!(regs.len() == 1); - let ty = self.abi().signature().params[i].value_type; - // The ABI implementation must have ensured that a StructReturn - // arg is present in the return values. - let struct_ret_idx = self - .abi() - .signature() - .returns - .iter() - .position(|ret| ret.purpose == ArgumentPurpose::StructReturn) - .expect("StructReturn return value not present!"); - self.emit(I::gen_move( - Writable::from_reg(self.retval_regs[struct_ret_idx].regs()[0]), - regs.regs()[0].to_reg(), - ty, - )); - } } if let Some(insn) = self.vcode.abi().gen_retval_area_setup() { self.emit(insn); diff --git a/cranelift/docs/ir.md b/cranelift/docs/ir.md index aae0041ffa01..198eca46fb91 100644 --- a/cranelift/docs/ir.md +++ b/cranelift/docs/ir.md @@ -404,7 +404,7 @@ paramlist : param { "," param } retlist : paramlist param : type [paramext] [paramspecial] paramext : "uext" | "sext" -paramspecial : "sret" | "link" | "fp" | "csr" | "vmctx" | "sigid" | "stack_limit" +paramspecial : "link" | "fp" | "csr" | "vmctx" | "sigid" | "stack_limit" callconv : "fast" | "cold" | "system_v" | "windows_fastcall" | "wasmtime_system_v" | "wasmtime_fastcall" | "apple_aarch64" | "wasmtime_apple_aarch64" @@ -419,7 +419,6 @@ system, a function's calling convention is only fully determined by a | Name | Description | | ----------| ---------- | -| sret | pointer to a return value in memory | | link | return address | | fp | the initial value of the frame pointer | | csr | callee-saved register | diff --git a/cranelift/filetests/filetests/isa/x64/atomic-cas-bug.clif b/cranelift/filetests/filetests/isa/x64/atomic-cas-bug.clif index 196bde2b111c..349efff7cf9a 100644 --- a/cranelift/filetests/filetests/isa/x64/atomic-cas-bug.clif +++ b/cranelift/filetests/filetests/isa/x64/atomic-cas-bug.clif @@ -22,10 +22,10 @@ function u0:31(i64, i32, i32, i8, i8) -> i32, i32 system_v { gv6 = symbol colocated u1:35 gv7 = symbol colocated u1:36 gv8 = symbol colocated u1:37 - sig0 = (i64 sret, i64, i64, i64, i64) system_v - sig1 = (i64 sret, i64, i64, i64, i64) system_v + sig0 = (i64, i64, i64, i64, i64) system_v + sig1 = (i64, i64, i64, i64, i64) system_v sig2 = (i64, i64) system_v - sig3 = (i64 sret, i64, i64, i64, i64) system_v + sig3 = (i64, i64, i64, i64, i64) system_v sig4 = (i64, i64) system_v sig5 = (i64, i64) system_v fn0 = colocated u0:11 sig0 diff --git a/cranelift/filetests/filetests/isa/x64/struct-ret.clif b/cranelift/filetests/filetests/isa/x64/struct-ret.clif deleted file mode 100644 index a13136356908..000000000000 --- a/cranelift/filetests/filetests/isa/x64/struct-ret.clif +++ /dev/null @@ -1,20 +0,0 @@ -test compile precise-output -target x86_64 - -function %f0(i64 sret) { -block0(v0: i64): - v1 = iconst.i64 42 - store v1, v0 - return -} - -; pushq %rbp -; movq %rsp, %rbp -; block0: -; movq %rdi, %rax -; movl $42, %edx -; movq %rdx, 0(%rdi) -; movq %rbp, %rsp -; popq %rbp -; ret - diff --git a/cranelift/filetests/filetests/parser/call.clif b/cranelift/filetests/filetests/parser/call.clif index 9f3ad6eb94e7..8374453be82a 100644 --- a/cranelift/filetests/filetests/parser/call.clif +++ b/cranelift/filetests/filetests/parser/call.clif @@ -16,7 +16,7 @@ block1: v2 = f32const 0.0 return v1, v2 } -; sameln: function %r1() -> i32, f32 +; sameln: function %r1() -> i32, f32 ; nextln: block1: ; nextln: v1 = iconst.i32 3 ; nextln: v2 = f32const 0.0 @@ -84,11 +84,11 @@ block0: ; check: return ; Special purpose function arguments -function %special1(i32 sret, i32 fp, i32 csr, i32 link) -> i32 link, i32 fp, i32 csr, i32 sret { +function %special1(i32, i32 fp, i32 csr, i32 link) -> i32 link, i32 fp, i32 csr, i32 { block0(v1: i32, v2: i32, v3: i32, v4: i32): return v4, v2, v3, v1 } -; check: function %special1(i32 sret, i32 fp, i32 csr, i32 link) -> i32 link, i32 fp, i32 csr, i32 sret fast { +; check: function %special1(i32, i32 fp, i32 csr, i32 link) -> i32 link, i32 fp, i32 csr, i32 fast { ; check: block0(v1: i32, v2: i32, v3: i32, v4: i32): ; check: return v4, v2, v3, v1 ; check: } diff --git a/cranelift/reader/src/parser.rs b/cranelift/reader/src/parser.rs index e2f1df77fb13..ac61e45b5b60 100644 --- a/cranelift/reader/src/parser.rs +++ b/cranelift/reader/src/parser.rs @@ -3175,12 +3175,12 @@ mod tests { assert_eq!(sig.returns.len(), 0); assert_eq!(sig.call_conv, CallConv::SystemV); - let sig2 = Parser::new("(i8 uext, f32, f64, i32 sret) -> i32 sext, f64 system_v") + let sig2 = Parser::new("(i8 uext, f32, f64, i32) -> i32 sext, f64 system_v") .parse_signature() .unwrap(); assert_eq!( sig2.to_string(), - "(i8 uext, f32, f64, i32 sret) -> i32 sext, f64 system_v" + "(i8 uext, f32, f64, i32) -> i32 sext, f64 system_v" ); assert_eq!(sig2.call_conv, CallConv::SystemV);