From ff232c62df5518f7c983f78c883c24f19959d8bb Mon Sep 17 00:00:00 2001 From: Pavel Kopyl Date: Fri, 15 Nov 2024 14:42:51 +0100 Subject: [PATCH] [EVM] Support commutable operations in BP stackification algorithm --- llvm/lib/Target/EVM/EVMControlFlowGraph.h | 3 + .../Target/EVM/EVMControlFlowGraphBuilder.cpp | 6 +- llvm/lib/Target/EVM/EVMInstrInfo.td | 24 +-- .../Target/EVM/EVMOptimizedCodeTransform.cpp | 70 +++++++-- llvm/test/CodeGen/EVM/stack-ops-commutable.ll | 137 ++++++++++++++++-- llvm/test/CodeGen/EVM/stack-ops.ll | 20 +-- .../CodeGen/EVM/unused_function_arguments.ll | 4 +- 7 files changed, 210 insertions(+), 54 deletions(-) diff --git a/llvm/lib/Target/EVM/EVMControlFlowGraph.h b/llvm/lib/Target/EVM/EVMControlFlowGraph.h index 5ade51546bbd..2f76f93455d4 100644 --- a/llvm/lib/Target/EVM/EVMControlFlowGraph.h +++ b/llvm/lib/Target/EVM/EVMControlFlowGraph.h @@ -169,6 +169,9 @@ struct CFG { struct BuiltinCall { MachineInstr *Builtin = nullptr; + // True if this instruction has commutable operands. In EVM ISA + // commutable operands always take top two stack slots. + bool IsCommutable = false; bool TerminatesOrReverts = false; }; diff --git a/llvm/lib/Target/EVM/EVMControlFlowGraphBuilder.cpp b/llvm/lib/Target/EVM/EVMControlFlowGraphBuilder.cpp index 35c0cd3426ce..4aaa75841920 100644 --- a/llvm/lib/Target/EVM/EVMControlFlowGraphBuilder.cpp +++ b/llvm/lib/Target/EVM/EVMControlFlowGraphBuilder.cpp @@ -258,9 +258,9 @@ void ControlFlowGraphBuilder::handleMachineInstr(MachineInstr &MI) { default: { Stack Input, Output; collectInstrOperands(MI, &Input, &Output); - CurrentBlock->Operations.emplace_back( - CFG::Operation{std::move(Input), std::move(Output), - CFG::BuiltinCall{&MI, TerminatesOrReverts}}); + CurrentBlock->Operations.emplace_back(CFG::Operation{ + std::move(Input), std::move(Output), + CFG::BuiltinCall{&MI, MI.isCommutable(), TerminatesOrReverts}}); } break; } diff --git a/llvm/lib/Target/EVM/EVMInstrInfo.td b/llvm/lib/Target/EVM/EVMInstrInfo.td index 8038cb0cd7d2..910e8099cfe9 100644 --- a/llvm/lib/Target/EVM/EVMInstrInfo.td +++ b/llvm/lib/Target/EVM/EVMInstrInfo.td @@ -270,17 +270,19 @@ defm SDIV : BinaryInst; defm MOD : BinaryInst; defm SMOD : BinaryInst; -defm ADDMOD - : I<(outs GPR:$dst), (ins GPR:$add_op1, GPR:$add_op2, GPR:$denom), - [(set GPR:$dst, - (int_evm_addmod GPR:$add_op1, GPR:$add_op2, GPR:$denom))], - "ADDMOD", " $dst, $add_op1, $add_op2, $denom", 0x08, 8>; - -defm MULMOD - : I<(outs GPR:$dst), (ins GPR:$mul_op1, GPR:$mul_op2, GPR:$denom), - [(set GPR:$dst, - (int_evm_mulmod GPR:$mul_op1, GPR:$mul_op2, GPR:$denom))], - "MULMOD", " $dst, $mul_op1, $mul_op2, $denom", 0x09, 8>; +let isCommutable = 1 in { + defm ADDMOD + : I<(outs GPR:$dst), (ins GPR:$add_op1, GPR:$add_op2, GPR:$denom), + [(set GPR:$dst, + (int_evm_addmod GPR:$add_op1, GPR:$add_op2, GPR:$denom))], + "ADDMOD", " $dst, $add_op1, $add_op2, $denom", 0x08, 8>; + + defm MULMOD + : I<(outs GPR:$dst), (ins GPR:$mul_op1, GPR:$mul_op2, GPR:$denom), + [(set GPR:$dst, + (int_evm_mulmod GPR:$mul_op1, GPR:$mul_op2, GPR:$denom))], + "MULMOD", " $dst, $mul_op1, $mul_op2, $denom", 0x09, 8>; +} defm EXP : I<(outs GPR:$dst), (ins GPR:$base, GPR:$exp), diff --git a/llvm/lib/Target/EVM/EVMOptimizedCodeTransform.cpp b/llvm/lib/Target/EVM/EVMOptimizedCodeTransform.cpp index 698fb5673f0e..1b168bb1ab7a 100644 --- a/llvm/lib/Target/EVM/EVMOptimizedCodeTransform.cpp +++ b/llvm/lib/Target/EVM/EVMOptimizedCodeTransform.cpp @@ -257,16 +257,54 @@ void EVMOptimizedCodeTransform::createStackLayout(Stack TargetStack) { void EVMOptimizedCodeTransform::createOperationEntryLayout( const CFG::Operation &Op) { // Create required layout for entering the Operation. - createStackLayout(Layout.operationEntryLayout.at(&Op)); + // Check if we can choose cheaper stack shuffling if the Operation is an + // instruction with commutable arguments. + if (const auto *Inst = std::get_if(&Op.Operation); + Inst && Inst->IsCommutable) { + // Get the stack layout before the instruction. + const Stack &DefaultTargetStack = Layout.operationEntryLayout.at(&Op); + size_t DefaultCost = + EvaluateStackTransform(CurrentStack, DefaultTargetStack); + + // Commutable operands always take top two stack slots. + const unsigned OpIdx1 = 0, OpIdx2 = 1; + assert(DefaultTargetStack.size() > 1); + + // Swap the commutable stack items and measure the stack shuffling cost + // again. + Stack CommutedTargetStack = DefaultTargetStack; + std::swap(CommutedTargetStack[CommutedTargetStack.size() - OpIdx1 - 1], + CommutedTargetStack[CommutedTargetStack.size() - OpIdx2 - 1]); + size_t CommutedCost = + EvaluateStackTransform(CurrentStack, CommutedTargetStack); + // Choose the cheapest transformation. + createStackLayout(CommutedCost < DefaultCost ? CommutedTargetStack + : DefaultTargetStack); +#ifndef NDEBUG + // Assert that we have the inputs of the Operation on stack top. + assert(static_cast(CurrentStack.size()) == Assembly.getStackHeight()); + assert(CurrentStack.size() >= Op.Input.size()); + Stack StackInput = + EVMUtils::to_vector(EVMUtils::take_last(CurrentStack, Op.Input.size())); + // Adjust the StackInput to match the commuted stack. + if (CommutedCost < DefaultCost) { + std::swap(StackInput[StackInput.size() - OpIdx1 - 1], + StackInput[StackInput.size() - OpIdx2 - 1]); + } + assert(AreLayoutsCompatible(StackInput, Op.Input)); +#endif // NDEBUG + } else { + createStackLayout(Layout.operationEntryLayout.at(&Op)); #ifndef NDEBUG - // Assert that we have the inputs of the Operation on stack top. - assert(static_cast(CurrentStack.size()) == Assembly.getStackHeight()); - assert(CurrentStack.size() >= Op.Input.size()); - const Stack StackInput = - EVMUtils::to_vector(EVMUtils::take_last(CurrentStack, Op.Input.size())); - assert(AreLayoutsCompatible(StackInput, Op.Input)); + // Assert that we have the inputs of the Operation on stack top. + assert(static_cast(CurrentStack.size()) == Assembly.getStackHeight()); + assert(CurrentStack.size() >= Op.Input.size()); + const Stack StackInput = + EVMUtils::to_vector(EVMUtils::take_last(CurrentStack, Op.Input.size())); + assert(AreLayoutsCompatible(StackInput, Op.Input)); #endif // NDEBUG + } } void EVMOptimizedCodeTransform::operator()(const CFG::BasicBlock &Block) { @@ -280,12 +318,14 @@ void EVMOptimizedCodeTransform::operator()(const CFG::BasicBlock &Block) { auto const &BlockInfo = Layout.blockInfos.at(&Block); - // Assert that the stack is valid for entering the block. - assert(AreLayoutsCompatible(CurrentStack, BlockInfo.entryLayout)); - - // Might set some slots to junk, if not required by the block. - CurrentStack = BlockInfo.entryLayout; - + // Assert that the stack is valid for entering the block. The entry layout + // of the function entry block should is fully determined by the first + // instruction, so we can ignore 'BlockInfo.entryLayout'. + if (&Block != FuncInfo->Entry) { + assert(AreLayoutsCompatible(CurrentStack, BlockInfo.entryLayout)); + // Might set some slots to junk, if not required by the block. + CurrentStack = BlockInfo.entryLayout; + } assert(static_cast(CurrentStack.size()) == Assembly.getStackHeight()); // Emit jumpdest, if required. @@ -446,9 +486,7 @@ void EVMOptimizedCodeTransform::operator()() { Assembly.setStackHeight(static_cast(CurrentStack.size())); Assembly.appendLabel(); - // Create the entry layout of the function body block and visit. - createStackLayout(Layout.blockInfos.at(FuncInfo->Entry).entryLayout); - + // Visit the function entry block. (*this)(*FuncInfo->Entry); Assembly.finalize(); diff --git a/llvm/test/CodeGen/EVM/stack-ops-commutable.ll b/llvm/test/CodeGen/EVM/stack-ops-commutable.ll index 9a14eb4c59cb..93f9e1c34938 100644 --- a/llvm/test/CodeGen/EVM/stack-ops-commutable.ll +++ b/llvm/test/CodeGen/EVM/stack-ops-commutable.ll @@ -8,7 +8,6 @@ define void @no_manipulations_needed_with_junk(i256 %a1, i256 %a2, i256 %a3) nor ; CHECK-LABEL: no_manipulations_needed_with_junk: ; CHECK: ; %bb.0: ; CHECK-NEXT: JUMPDEST -; CHECK-NEXT: SWAP1 ; CHECK-NEXT: ADD ; CHECK-NEXT: PUSH0 ; CHECK-NEXT: REVERT @@ -17,6 +16,75 @@ define void @no_manipulations_needed_with_junk(i256 %a1, i256 %a2, i256 %a3) nor unreachable } +define void @no_manipulations_needed_with_junk_eq(i256 %a1, i256 %a2, i256 %a3) noreturn { + %cmp = icmp eq i256 %a1, %a2 + %x1 = zext i1 %cmp to i256 + call void @llvm.evm.revert(ptr addrspace(1) null, i256 %x1) + unreachable + +; CHECK-LABEL: no_manipulations_needed_with_junk_eq: +; CHECK: ; %bb.0: +; CHECK-NEXT: JUMPDEST +; CHECK-NEXT: EQ +; CHECK-NEXT: PUSH0 +; CHECK-NEXT: REVERT +} + +define i256 @no_manipulations_needed_no_junk_addmod(i256 %a1, i256 %a2, i256 %a3) { +; CHECK-LABEL: no_manipulations_needed_no_junk_addmod: +; CHECK: ; %bb.0: +; CHECK-NEXT: JUMPDEST +; CHECK-NEXT: ADDMOD +; CHECK-NEXT: SWAP1 +; CHECK-NEXT: JUMP + %x1 = call i256 @llvm.evm.addmod(i256 %a2, i256 %a1, i256 %a3) + ret i256 %x1 +} + +define i256 @no_manipulations_needed_no_junk_mulmod(i256 %a1, i256 %a2, i256 %a3) { +; CHECK-LABEL: no_manipulations_needed_no_junk_mulmod: +; CHECK: ; %bb.0: +; CHECK-NEXT: JUMPDEST +; CHECK-NEXT: MULMOD +; CHECK-NEXT: SWAP1 +; CHECK-NEXT: JUMP + %x1 = call i256 @llvm.evm.mulmod(i256 %a2, i256 %a1, i256 %a3) + ret i256 %x1 +} + +define i256 @no_manipulations_needed_no_junk_and(i256 %a1, i256 %a2) { +; CHECK-LABEL: no_manipulations_needed_no_junk_and: +; CHECK: ; %bb.0: +; CHECK-NEXT: JUMPDEST +; CHECK-NEXT: AND +; CHECK-NEXT: SWAP1 +; CHECK-NEXT: JUMP + %x1 = and i256 %a2, %a1 + ret i256 %x1 +} + +define i256 @no_manipulations_needed_no_junk_or(i256 %a1, i256 %a2) { +; CHECK-LABEL: no_manipulations_needed_no_junk_or: +; CHECK: ; %bb.0: +; CHECK-NEXT: JUMPDEST +; CHECK-NEXT: OR +; CHECK-NEXT: SWAP1 +; CHECK-NEXT: JUMP + %x1 = or i256 %a2, %a1 + ret i256 %x1 +} + +define i256 @no_manipulations_needed_no_junk_xor(i256 %a1, i256 %a2) { +; CHECK-LABEL: no_manipulations_needed_no_junk_xor: +; CHECK: ; %bb.0: +; CHECK-NEXT: JUMPDEST +; CHECK-NEXT: XOR +; CHECK-NEXT: SWAP1 +; CHECK-NEXT: JUMP + %x1 = xor i256 %a2, %a1 + ret i256 %x1 +} + define i256 @no_manipulations_needed_no_junk(i256 %a1, i256 %a2, i256 %a3) nounwind { ; CHECK-LABEL: no_manipulations_needed_no_junk: ; CHECK: ; %bb.0: @@ -34,7 +102,6 @@ define void @reorder_with_junk(i256 %a1, i256 %a2, i256 %a3) noreturn { ; CHECK-LABEL: reorder_with_junk: ; CHECK: ; %bb.0: ; CHECK-NEXT: JUMPDEST -; CHECK-NEXT: SWAP1 ; CHECK-NEXT: ADD ; CHECK-NEXT: PUSH0 ; CHECK-NEXT: REVERT @@ -61,7 +128,6 @@ define void @swap_first_with_junk(i256 %a1, i256 %a2, i256 %a3) noreturn { ; CHECK: ; %bb.0: ; CHECK-NEXT: JUMPDEST ; CHECK-NEXT: POP -; CHECK-NEXT: SWAP1 ; CHECK-NEXT: ADD ; CHECK-NEXT: PUSH0 ; CHECK-NEXT: REVERT @@ -70,6 +136,20 @@ define void @swap_first_with_junk(i256 %a1, i256 %a2, i256 %a3) noreturn { unreachable } +define i256 @two_commutable(i256 %a1, i256 %a2, i256 %a3) { + %x1 = add i256 %a3, %a2 + %x2 = add i256 %a1, %x1 + ret i256 %x2 +; CHECK-LABEL: two_commutable: +; CHECK: ; %bb.0: +; CHECK-NEXT: JUMPDEST +; CHECK-NEXT: SWAP2 +; CHECK-NEXT: ADD +; CHECK-NEXT: ADD +; CHECK-NEXT: SWAP1 +; CHECK-NEXT: JUMP +} + define void @swap_second_with_junk(i256 %a1, i256 %a2, i256 %a3, i256 %a4) noreturn { ; CHECK-LABEL: swap_second_with_junk: ; CHECK: ; %bb.0: @@ -87,7 +167,6 @@ define i256 @swap_first_no_junk(i256 %a1, i256 %a2, i256 %a3, i256 %a4) nounwind ; CHECK-LABEL: swap_first_no_junk: ; CHECK: ; %bb.0: ; CHECK-NEXT: JUMPDEST -; CHECK-NEXT: SWAP3 ; CHECK-NEXT: SWAP2 ; CHECK-NEXT: POP ; CHECK-NEXT: POP @@ -102,7 +181,6 @@ define i256 @swap_second_no_junk(i256 %a1, i256 %a2, i256 %a3, i256 %a4) nounwin ; CHECK-LABEL: swap_second_no_junk: ; CHECK: ; %bb.0: ; CHECK-NEXT: JUMPDEST -; CHECK-NEXT: SWAP3 ; CHECK-NEXT: SWAP2 ; CHECK-NEXT: POP ; CHECK-NEXT: POP @@ -179,11 +257,10 @@ define i256 @second_arg_alive_no_junk(i256 %a1, i256 %a2, i256 %a3) nounwind { ; CHECK: ; %bb.0: ; CHECK-NEXT: JUMPDEST ; CHECK-NEXT: DUP2 +; CHECK-NEXT: PUSH1 4 ; CHECK-NEXT: SWAP3 +; CHECK-NEXT: SWAP4 ; CHECK-NEXT: POP -; CHECK-NEXT: SWAP1 -; CHECK-NEXT: PUSH1 4 -; CHECK-NEXT: SWAP2 ; CHECK-NEXT: ADD ; CHECK-NEXT: SWAP2 ; CHECK-NEXT: SUB @@ -220,10 +297,10 @@ define i256 @both_arg_alive_no_junk(i256 %a1, i256 %a2, i256 %a3) nounwind { ; CHECK-LABEL: both_arg_alive_no_junk: ; CHECK: ; %bb.0: ; CHECK-NEXT: JUMPDEST -; CHECK-NEXT: SWAP2 +; CHECK-NEXT: DUP1 +; CHECK-NEXT: SWAP3 ; CHECK-NEXT: POP ; CHECK-NEXT: DUP2 -; CHECK-NEXT: DUP2 ; CHECK-NEXT: DIV ; CHECK-NEXT: SWAP2 ; CHECK-NEXT: ADD @@ -241,9 +318,9 @@ define i256 @same_arg_dead_with_junk(i256 %a1, i256 %a2, i256 %a3) nounwind { ; CHECK: ; %bb.0: ; CHECK-NEXT: JUMPDEST ; CHECK-NEXT: POP -; CHECK-NEXT: SWAP1 -; CHECK-NEXT: POP ; CHECK-NEXT: DUP1 +; CHECK-NEXT: SWAP2 +; CHECK-NEXT: POP ; CHECK-NEXT: ADD ; CHECK-NEXT: SWAP1 ; CHECK-NEXT: DUP2 @@ -255,4 +332,40 @@ define i256 @same_arg_dead_with_junk(i256 %a1, i256 %a2, i256 %a3) nounwind { ret i256 %x1 } +define void @commutable_not_in_function_entry() noreturn { + +; CHECK-LABEL: .BB{{[0-9]+}}_3: +; CHECK: JUMPDEST +; CHECK-NEXT: PUSH4 4294967295 +; CHECK-NEXT: AND +; CHECK-NEXT: PUSH0 + +enter: + %offset = inttoptr i256 0 to ptr addrspace(2) + %load = call i256 @llvm.evm.calldataload(ptr addrspace(2) %offset) + %calldata = trunc i256 %load to i32 + br label %header + +header: + %phi = phi i32 [ %calldata, %enter ], [ %inc, %do ] + %phi2 = phi i32 [ 1, %enter ], [ %mul, %do ] + %cmp = icmp sgt i32 %phi, 0 + br i1 %cmp, label %do, label %exit + +do: + %mul = mul nsw i32 %phi2, %phi + %inc = add nsw i32 %phi, -1 + br label %header + +exit: + %res = zext i32 %phi2 to i256 + store i256 %res, ptr addrspace(1) null, align 4 + call void @llvm.evm.return(ptr addrspace(1) null, i256 32) + unreachable +} + +declare i256 @llvm.evm.addmod(i256, i256, i256) +declare i256 @llvm.evm.mulmod(i256, i256, i256) +declare i256 @llvm.evm.calldataload(ptr addrspace(2)) +declare void @llvm.evm.return(ptr addrspace(1), i256) declare void @llvm.evm.revert(ptr addrspace(1), i256) diff --git a/llvm/test/CodeGen/EVM/stack-ops.ll b/llvm/test/CodeGen/EVM/stack-ops.ll index 92dfaf24887f..40fe299cf9f8 100644 --- a/llvm/test/CodeGen/EVM/stack-ops.ll +++ b/llvm/test/CodeGen/EVM/stack-ops.ll @@ -247,10 +247,10 @@ define i256 @both_arg_alive_no_junk(i256 %a1, i256 %a2, i256 %a3) nounwind { ; CHECK-LABEL: both_arg_alive_no_junk: ; CHECK: ; %bb.0: ; CHECK-NEXT: JUMPDEST -; CHECK-NEXT: SWAP2 +; CHECK-NEXT: DUP1 +; CHECK-NEXT: SWAP3 ; CHECK-NEXT: POP ; CHECK-NEXT: DUP2 -; CHECK-NEXT: DUP2 ; CHECK-NEXT: DIV ; CHECK-NEXT: SWAP2 ; CHECK-NEXT: SUB @@ -268,9 +268,9 @@ define i256 @same_arg_dead_with_junk(i256 %a1, i256 %a2, i256 %a3) nounwind { ; CHECK: ; %bb.0: ; CHECK-NEXT: JUMPDEST ; CHECK-NEXT: POP -; CHECK-NEXT: SWAP1 -; CHECK-NEXT: POP ; CHECK-NEXT: DUP1 +; CHECK-NEXT: SWAP2 +; CHECK-NEXT: POP ; CHECK-NEXT: ADD ; CHECK-NEXT: SWAP1 ; CHECK-NEXT: DUP2 @@ -287,9 +287,9 @@ define i256 @same_arg_dead_no_junk(i256 %a1, i256 %a2, i256 %a3) nounwind { ; CHECK: ; %bb.0: ; CHECK-NEXT: JUMPDEST ; CHECK-NEXT: POP -; CHECK-NEXT: SWAP1 -; CHECK-NEXT: POP ; CHECK-NEXT: DUP1 +; CHECK-NEXT: SWAP2 +; CHECK-NEXT: POP ; CHECK-NEXT: ADD ; CHECK-NEXT: SWAP1 ; CHECK-NEXT: JUMP @@ -302,10 +302,10 @@ define i256 @same_arg_alive_with_junk(i256 %a1, i256 %a2, i256 %a3) nounwind { ; CHECK: ; %bb.0: ; CHECK-NEXT: JUMPDEST ; CHECK-NEXT: POP -; CHECK-NEXT: SWAP1 -; CHECK-NEXT: POP ; CHECK-NEXT: DUP1 ; CHECK-NEXT: DUP1 +; CHECK-NEXT: SWAP3 +; CHECK-NEXT: POP ; CHECK-NEXT: ADD ; CHECK-NEXT: ADD ; CHECK-NEXT: SWAP1 @@ -324,10 +324,10 @@ define i256 @same_arg_alive_no_junk(i256 %a1, i256 %a2, i256 %a3) nounwind { ; CHECK: ; %bb.0: ; CHECK-NEXT: JUMPDEST ; CHECK-NEXT: POP -; CHECK-NEXT: SWAP1 -; CHECK-NEXT: POP ; CHECK-NEXT: DUP1 ; CHECK-NEXT: DUP1 +; CHECK-NEXT: SWAP3 +; CHECK-NEXT: POP ; CHECK-NEXT: ADD ; CHECK-NEXT: ADD ; CHECK-NEXT: SWAP1 diff --git a/llvm/test/CodeGen/EVM/unused_function_arguments.ll b/llvm/test/CodeGen/EVM/unused_function_arguments.ll index 0aa142b88b10..50e68f31e91c 100644 --- a/llvm/test/CodeGen/EVM/unused_function_arguments.ll +++ b/llvm/test/CodeGen/EVM/unused_function_arguments.ll @@ -22,9 +22,9 @@ define i256 @wat(i256 %a1, i256 %a2, i256 %a3) nounwind { ; CHECK-LABEL: @wat ; CHECK: JUMPDEST ; CHECK-NEXT: POP -; CHECK-NEXT: SWAP1 -; CHECK-NEXT: POP ; CHECK-NEXT: DUP1 +; CHECK-NEXT: SWAP2 +; CHECK-NEXT: POP ; CHECK-NEXT: ADD ; CHECK-NEXT: SWAP1 ; CHECK-NEXT: JUMP