diff --git a/llvm/lib/Target/EVM/CMakeLists.txt b/llvm/lib/Target/EVM/CMakeLists.txt
index ceed4bf575e0..18c208f185c1 100644
--- a/llvm/lib/Target/EVM/CMakeLists.txt
+++ b/llvm/lib/Target/EVM/CMakeLists.txt
@@ -20,6 +20,7 @@ add_llvm_target(EVMCodeGen
   EVMAllocaHoisting.cpp
   EVMArgumentMove.cpp
   EVMAsmPrinter.cpp
+  EVMBackwardPropagationStackification.cpp
   EVMCodegenPrepare.cpp
   EVMFrameLowering.cpp
   EVMISelDAGToDAG.cpp
@@ -27,13 +28,19 @@ add_llvm_target(EVMCodeGen
   EVMInstrInfo.cpp
   EVMLinkRuntime.cpp
   EVMLowerIntrinsics.cpp
+  EVMMachineCFGInfo.cpp
   EVMMachineFunctionInfo.cpp
   EVMMCInstLower.cpp
   EVMOptimizeLiveIntervals.cpp
   EVMRegColoring.cpp
   EVMRegisterInfo.cpp
   EVMSingleUseExpression.cpp
+  EVMSplitCriticalEdges.cpp
+  EVMStackDebug.cpp
+  EVMStackLayoutGenerator.cpp
+  EVMStackModel.cpp
   EVMStackify.cpp
+  EVMStackifyCodeEmitter.cpp
   EVMSubtarget.cpp
   EVMTargetMachine.cpp
   EVMTargetTransformInfo.cpp
diff --git a/llvm/lib/Target/EVM/EVM.h b/llvm/lib/Target/EVM/EVM.h
index 2ff8488a5089..15bed82879b1 100644
--- a/llvm/lib/Target/EVM/EVM.h
+++ b/llvm/lib/Target/EVM/EVM.h
@@ -50,7 +50,9 @@ ModulePass *createEVMLinkRuntimePass();
 FunctionPass *createEVMOptimizeLiveIntervals();
 FunctionPass *createEVMRegColoring();
 FunctionPass *createEVMSingleUseExpression();
+FunctionPass *createEVMSplitCriticalEdges();
 FunctionPass *createEVMStackify();
+FunctionPass *createEVMBPStackification();
 
 // PassRegistry initialization declarations.
 void initializeEVMCodegenPreparePass(PassRegistry &);
@@ -61,7 +63,9 @@ void initializeEVMLinkRuntimePass(PassRegistry &);
 void initializeEVMOptimizeLiveIntervalsPass(PassRegistry &);
 void initializeEVMRegColoringPass(PassRegistry &);
 void initializeEVMSingleUseExpressionPass(PassRegistry &);
+void initializeEVMSplitCriticalEdgesPass(PassRegistry &);
 void initializeEVMStackifyPass(PassRegistry &);
+void initializeEVMBPStackificationPass(PassRegistry &);
 
 struct EVMLinkRuntimePass : PassInfoMixin<EVMLinkRuntimePass> {
   EVMLinkRuntimePass() = default;
diff --git a/llvm/lib/Target/EVM/EVMArgumentMove.cpp b/llvm/lib/Target/EVM/EVMArgumentMove.cpp
index b76122b5336f..5da6aeb6caf2 100644
--- a/llvm/lib/Target/EVM/EVMArgumentMove.cpp
+++ b/llvm/lib/Target/EVM/EVMArgumentMove.cpp
@@ -6,7 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file moves ARGUMENT instructions after ScheduleDAG scheduling.
+// This file moves and orders ARGUMENT instructions after ScheduleDAG
+// scheduling.
 //
 // Arguments are really live-in registers, however, since we use virtual
 // registers and LLVM doesn't support live-in virtual registers, we're
@@ -67,21 +68,24 @@ bool EVMArgumentMove::runOnMachineFunction(MachineFunction &MF) {
 
   bool Changed = false;
   MachineBasicBlock &EntryMBB = MF.front();
+  SmallVector<MachineInstr *> Args;
+  for (MachineInstr &MI : EntryMBB) {
+    if (EVM::ARGUMENT == MI.getOpcode())
+      Args.push_back(&MI);
+  }
 
-  // Look for the first NonArg instruction.
-  const auto InsertPt =
-      std::find_if_not(EntryMBB.begin(), EntryMBB.end(), [](auto &MI) {
-        return EVM::ARGUMENT == MI.getOpcode();
-      });
+  // Sort ARGUMENT instructions in ascending order of their arguments.
+  std::sort(Args.begin(), Args.end(),
+            [](const MachineInstr *MI1, const MachineInstr *MI2) {
+              int64_t Arg1Idx = MI1->getOperand(1).getImm();
+              int64_t Arg2Idx = MI2->getOperand(1).getImm();
+              return Arg1Idx < Arg2Idx;
+            });
 
-  // Now move any argument instructions later in the block
-  // to before our first NonArg instruction.
-  for (MachineInstr &MI : llvm::make_range(InsertPt, EntryMBB.end())) {
-    if (EVM::ARGUMENT == MI.getOpcode()) {
-      EntryMBB.insert(InsertPt, MI.removeFromParent());
-      Changed = true;
-    }
+  for (MachineInstr *MI : reverse(Args)) {
+    MachineInstr *Arg = MI->removeFromParent();
+    EntryMBB.insert(EntryMBB.begin(), Arg);
+    Changed = true;
   }
-
   return Changed;
 }
diff --git a/llvm/lib/Target/EVM/EVMAsmPrinter.cpp b/llvm/lib/Target/EVM/EVMAsmPrinter.cpp
index b8536b8e351c..9328c5802bef 100644
--- a/llvm/lib/Target/EVM/EVMAsmPrinter.cpp
+++ b/llvm/lib/Target/EVM/EVMAsmPrinter.cpp
@@ -31,6 +31,8 @@
 
 using namespace llvm;
 
+extern cl::opt<bool> EVMKeepRegisters;
+
 #define DEBUG_TYPE "asm-printer"
 
 namespace {
@@ -52,17 +54,11 @@ class EVMAsmPrinter : public AsmPrinter {
 
   StringRef getPassName() const override { return "EVM Assembly "; }
 
-  void SetupMachineFunction(MachineFunction &MF) override;
-
   void emitInstruction(const MachineInstr *MI) override;
 
-  void emitFunctionEntryLabel() override;
+  void emitBasicBlockStart(const MachineBasicBlock &MBB) override;
 
-  /// Return true if the basic block has exactly one predecessor and the control
-  /// transfer mechanism between the predecessor and this block is a
-  /// fall-through.
-  bool isBlockOnlyReachableByFallthrough(
-      const MachineBasicBlock *MBB) const override;
+  void emitFunctionEntryLabel() override;
 
   void emitEndOfAsmFile(Module &) override;
 
@@ -70,25 +66,10 @@ class EVMAsmPrinter : public AsmPrinter {
   void emitAssemblySymbol(const MachineInstr *MI);
   void emitWideRelocatableSymbol(const MachineInstr *MI);
   void emitLoadImmutableLabel(const MachineInstr *MI);
+  void emitJumpDest();
 };
 } // end of anonymous namespace
 
-void EVMAsmPrinter::SetupMachineFunction(MachineFunction &MF) {
-  // Unbundle <push_label, jump> bundles.
-  for (MachineBasicBlock &MBB : MF) {
-    MachineBasicBlock::instr_iterator I = MBB.instr_begin(),
-                                      E = MBB.instr_end();
-    for (; I != E; ++I) {
-      if (I->isBundledWithPred()) {
-        assert(I->isConditionalBranch() || I->isUnconditionalBranch());
-        I->unbundleFromPred();
-      }
-    }
-  }
-
-  AsmPrinter::SetupMachineFunction(MF);
-}
-
 void EVMAsmPrinter::emitFunctionEntryLabel() {
   AsmPrinter::emitFunctionEntryLabel();
 
@@ -111,19 +92,84 @@ void EVMAsmPrinter::emitFunctionEntryLabel() {
   }
 }
 
+void EVMAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
+  AsmPrinter::emitBasicBlockStart(MBB);
+
+  // Emit JUMPDEST instruction at the beginning of the basic block, if
+  // this is not a block that is only reachable by fallthrough.
+  if (!EVMKeepRegisters && !AsmPrinter::isBlockOnlyReachableByFallthrough(&MBB))
+    emitJumpDest();
+}
+
 void EVMAsmPrinter::emitInstruction(const MachineInstr *MI) {
   EVMMCInstLower MCInstLowering(OutContext, *this, VRegMapping,
                                 MF->getRegInfo());
-  unsigned Opc = MI->getOpcode();
-  if (Opc == EVM::DATASIZE_S || Opc == EVM::DATAOFFSET_S) {
-    emitAssemblySymbol(MI);
+
+  switch (MI->getOpcode()) {
+  default:
+    break;
+  case EVM::PseudoCALL: {
+    // Generate push instruction with the address of a function.
+    MCInst Push;
+    Push.setOpcode(EVM::PUSH4_S);
+    assert(MI->getOperand(0).isGlobal() &&
+           "The first operand of PseudoCALL should be a GlobalValue.");
+
+    // TODO: #745: Refactor EVMMCInstLower::Lower so we could use lowerOperand
+    // instead of creating a MCOperand directly.
+    MCOperand MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(
+        getSymbol(MI->getOperand(0).getGlobal()), OutContext));
+    Push.addOperand(MCOp);
+    EmitToStreamer(*OutStreamer, Push);
+
+    // Jump to a function.
+    MCInst Jump;
+    Jump.setOpcode(EVM::JUMP_S);
+    EmitToStreamer(*OutStreamer, Jump);
+
+    // In case a function has a return label, emit it, and also
+    // emit a JUMPDEST instruction.
+    if (MI->getNumExplicitOperands() > 1) {
+      assert(MI->getOperand(1).isMCSymbol() &&
+             "The second operand of PseudoCALL should be a MCSymbol.");
+      OutStreamer->emitLabel(MI->getOperand(1).getMCSymbol());
+      emitJumpDest();
+    }
     return;
   }
-  if (Opc == EVM::LINKERSYMBOL_S) {
-    emitWideRelocatableSymbol(MI);
+  case EVM::PseudoRET: {
+    // TODO: #746: Use PseudoInstExpansion and do this expansion in tblgen.
+    MCInst Jump;
+    Jump.setOpcode(EVM::JUMP_S);
+    EmitToStreamer(*OutStreamer, Jump);
     return;
   }
-  if (Opc == EVM::LOADIMMUTABLE_S) {
+  case EVM::PseudoJUMP:
+  case EVM::PseudoJUMPI: {
+    MCInst Push;
+    Push.setOpcode(EVM::PUSH4_S);
+
+    // TODO: #745: Refactor EVMMCInstLower::Lower so we could use lowerOperand
+    // instead of creating a MCOperand directly.
+    MCOperand MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(
+        MI->getOperand(0).getMBB()->getSymbol(), OutContext));
+    Push.addOperand(MCOp);
+    EmitToStreamer(*OutStreamer, Push);
+
+    MCInst Jump;
+    Jump.setOpcode(MI->getOpcode() == EVM::PseudoJUMP ? EVM::JUMP_S
+                                                      : EVM::JUMPI_S);
+    EmitToStreamer(*OutStreamer, Jump);
+    return;
+  }
+  case EVM::LINKERSYMBOL_S:
+    emitWideRelocatableSymbol(MI);
+    return;
+  case EVM::DATASIZE_S:
+  case EVM::DATAOFFSET_S:
+    emitAssemblySymbol(MI);
+    return;
+  case EVM::LOADIMMUTABLE_S:
     emitLoadImmutableLabel(MI);
     return;
   }
@@ -133,12 +179,6 @@ void EVMAsmPrinter::emitInstruction(const MachineInstr *MI) {
   EmitToStreamer(*OutStreamer, TmpInst);
 }
 
-bool EVMAsmPrinter::isBlockOnlyReachableByFallthrough(
-    const MachineBasicBlock *MBB) const {
-  // For simplicity, always emit BB labels.
-  return false;
-}
-
 // Lowers LOADIMMUTABLE_S as show below:
 //   LOADIMMUTABLE_S @immutable_id
 //    ->
@@ -247,6 +287,12 @@ void EVMAsmPrinter::emitEndOfAsmFile(Module &) {
   ImmutablesMap.clear();
 }
 
+void EVMAsmPrinter::emitJumpDest() {
+  MCInst JumpDest;
+  JumpDest.setOpcode(EVM::JUMPDEST_S);
+  EmitToStreamer(*OutStreamer, JumpDest);
+}
+
 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeEVMAsmPrinter() {
   const RegisterAsmPrinter<EVMAsmPrinter> X(getTheEVMTarget());
 }
diff --git a/llvm/lib/Target/EVM/EVMBackwardPropagationStackification.cpp b/llvm/lib/Target/EVM/EVMBackwardPropagationStackification.cpp
new file mode 100644
index 000000000000..265cc0a5eecd
--- /dev/null
+++ b/llvm/lib/Target/EVM/EVMBackwardPropagationStackification.cpp
@@ -0,0 +1,100 @@
+//===----- EVMBPStackification.cpp - BP stackification ---------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements backward propagation (BP) stackification.
+// Original idea was taken from the Ethereum's compiler (solc) stackification
+// algorithm.
+// The algorithm is broken into following components:
+//   - CFG (Control Flow Graph) and CFG builder. Stackification CFG has similar
+//     structure to LLVM CFG one, but employs wider notion of instruction.
+//   - Stack layout generator. Contains information about the stack layout at
+//     entry and exit of each CFG::BasicBlock. It also contains input/output
+//     stack layout for each operation.
+//   - Code transformation into stakified form. This component uses both CFG
+//     and the stack layout information to get stackified LLVM MIR.
+//   - Stack shuffler. Finds optimal (locally) transformation between two stack
+//     layouts using three primitives: POP, PUSHn, DUPn. The stack shuffler
+//     is used by the components above.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EVM.h"
+#include "EVMStackifyCodeEmitter.h"
+#include "EVMSubtarget.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "evm-ethereum-stackify"
+
+namespace {
+class EVMBPStackification final : public MachineFunctionPass {
+public:
+  static char ID; // Pass identification, replacement for typeid
+
+  EVMBPStackification() : MachineFunctionPass(ID) {}
+
+private:
+  StringRef getPassName() const override {
+    return "EVM Ethereum stackification";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<LiveIntervals>();
+    AU.addRequired<MachineLoopInfo>();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  MachineFunctionProperties getRequiredProperties() const override {
+    return MachineFunctionProperties().set(
+        MachineFunctionProperties::Property::TracksLiveness);
+  }
+};
+} // end anonymous namespace
+
+char EVMBPStackification::ID = 0;
+
+INITIALIZE_PASS_BEGIN(EVMBPStackification, DEBUG_TYPE,
+                      "Backward propagation stackification", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(EVMBPStackification, DEBUG_TYPE,
+                    "Backward propagation stackification", false, false)
+
+FunctionPass *llvm::createEVMBPStackification() {
+  return new EVMBPStackification();
+}
+
+bool EVMBPStackification::runOnMachineFunction(MachineFunction &MF) {
+  LLVM_DEBUG({
+    dbgs() << "********** Backward propagation stackification **********\n"
+           << "********** Function: " << MF.getName() << '\n';
+  });
+
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  auto &LIS = getAnalysis<LiveIntervals>();
+  MachineLoopInfo *MLI = &getAnalysis<MachineLoopInfo>();
+
+  // We don't preserve SSA form.
+  MRI.leaveSSA();
+
+  assert(MRI.tracksLiveness() && "Stackification expects liveness");
+  EVMMachineCFGInfo CFGInfo(MF, MLI);
+  EVMStackModel StackModel(MF, LIS);
+  std::unique_ptr<EVMStackLayout> Layout =
+      EVMStackLayoutGenerator(MF, MLI, StackModel, CFGInfo).run();
+  EVMStackifyCodeEmitter(*Layout, StackModel, CFGInfo, MF).run();
+  return true;
+}
diff --git a/llvm/lib/Target/EVM/EVMInstrFormats.td b/llvm/lib/Target/EVM/EVMInstrFormats.td
index bd2e6b8b7b0b..c8068ebe0352 100644
--- a/llvm/lib/Target/EVM/EVMInstrFormats.td
+++ b/llvm/lib/Target/EVM/EVMInstrFormats.td
@@ -53,6 +53,7 @@ class NI<dag oops, dag iops, list<dag> pattern, bit stack,
   let Opc            = inst;
   let Inst{7-0}      = Opc;
   let GasCost        = cost;
+  let Defs           = !if(stack, [], [ARGUMENTS]);
 }
 
 // Generates both register and stack based versions of one actual instruction.
@@ -61,7 +62,7 @@ multiclass I<dag oops_r, dag iops_r, list<dag> pattern_r,
              int cost = 0, dag oops_s = (outs), dag iops_s = (ins), string argstr_s = ""> {
   let isCodeGenOnly = 1 in
   def "" : NI<oops_r, iops_r, pattern_r, false, opcstr#argstr_r, inst, cost>;
-  let BaseName = NAME in
+  let BaseName = NAME, Defs = []<Register> in
   def _S : NI<oops_s, iops_s, [], true, opcstr#argstr_s, inst, cost>;
 }
 
@@ -73,8 +74,8 @@ class NRI<dag oops, dag iops, list<dag> pattern, string asmstr>
   : NI<oops, iops, pattern, false, asmstr> {
 }
 
-class EVMPseudo<dag oops, dag iops, list<dag> pattern>
-  : NI<oops, iops, pattern, false> {
+class EVMPseudo<dag oops, dag iops, list<dag> pattern, bit stack = 0>
+  : NI<oops, iops, pattern, stack> {
   let isPseudo = 1;
   let isCodeGenOnly = 1;
 }
diff --git a/llvm/lib/Target/EVM/EVMInstrInfo.cpp b/llvm/lib/Target/EVM/EVMInstrInfo.cpp
index 340ecff24868..7e66f0a9e481 100644
--- a/llvm/lib/Target/EVM/EVMInstrInfo.cpp
+++ b/llvm/lib/Target/EVM/EVMInstrInfo.cpp
@@ -174,7 +174,7 @@ unsigned EVMInstrInfo::insertBranch(MachineBasicBlock &MBB,
                                     MachineBasicBlock *FBB,
                                     ArrayRef<MachineOperand> Cond,
                                     const DebugLoc &DL, int *BytesAdded) const {
-  assert(!BytesAdded && "Code is size not handled");
+  assert(!BytesAdded && "Code size not handled");
 
   // The number of instructions inserted.
   unsigned InstrCount = 0;
diff --git a/llvm/lib/Target/EVM/EVMInstrInfo.td b/llvm/lib/Target/EVM/EVMInstrInfo.td
index b2d3b9691436..33dbd9b77881 100644
--- a/llvm/lib/Target/EVM/EVMInstrInfo.td
+++ b/llvm/lib/Target/EVM/EVMInstrInfo.td
@@ -200,8 +200,7 @@ def ADJCALLSTACKUP
               [(EVMcallseq_end timm:$amt1, timm:$amt2)]>;
 }
 
-let isCodeGenOnly = 1 in {
-let hasSideEffects = 1 in
+let hasSideEffects = 1, Defs = []<Register>, Uses = [ARGUMENTS] in
 def ARGUMENT
   : NRI<(outs GPR:$res), (ins i256imm:$argno),
        [(set GPR:$res, (EVMargument timm:$argno))],
@@ -219,7 +218,6 @@ def CONST_I256
 let isAsCheapAsAMove = 1 in
 def COPY_I256
   : NRI<(outs GPR:$res), (ins GPR:$src), [], "COPY_I256 $res, $src">;
-}
 
 def : Pat<(i256 (EVMTargetAddrWrapper tglobaladdr:$addr)),
           (CONST_I256 tglobaladdr:$addr)>;
@@ -252,6 +250,8 @@ def FCALL
         "FCALL\t$callee">;
 } // Uses = [SP], isCall = 1
 
+let isCall = 1 in
+def PseudoCALL : EVMPseudo<(outs), (ins jmptarget:$callee, variable_ops), [], true>;
 
 //===----------------------------------------------------------------------===//
 // EVM arithmetic instructions.
@@ -272,17 +272,19 @@ defm SDIV : BinaryInst<sdiv, "SDIV", 0x05, 5>;
 defm MOD : BinaryInst<urem, "MOD", 0x06, 5>;
 defm SMOD : BinaryInst<srem, "SMOD", 0x07, 5>;
 
-defm ADDMOD
-  : I<(outs GPR:$dst), (ins GPR:$add_op1, GPR:$add_op2, GPR:$denom),
-      [(set GPR:$dst,
-       (int_evm_addmod GPR:$add_op1, GPR:$add_op2, GPR:$denom))],
-      "ADDMOD", " $dst, $add_op1, $add_op2, $denom", 0x08, 8>;
-
-defm MULMOD
-  : I<(outs GPR:$dst), (ins GPR:$mul_op1, GPR:$mul_op2, GPR:$denom),
-      [(set GPR:$dst,
-       (int_evm_mulmod GPR:$mul_op1, GPR:$mul_op2, GPR:$denom))],
-      "MULMOD", " $dst, $mul_op1, $mul_op2, $denom", 0x09, 8>;
+let isCommutable = 1 in {
+  defm ADDMOD
+    : I<(outs GPR:$dst), (ins GPR:$add_op1, GPR:$add_op2, GPR:$denom),
+        [(set GPR:$dst,
+         (int_evm_addmod GPR:$add_op1, GPR:$add_op2, GPR:$denom))],
+        "ADDMOD", " $dst, $add_op1, $add_op2, $denom", 0x08, 8>;
+
+  defm MULMOD
+    : I<(outs GPR:$dst), (ins GPR:$mul_op1, GPR:$mul_op2, GPR:$denom),
+        [(set GPR:$dst,
+         (int_evm_mulmod GPR:$mul_op1, GPR:$mul_op2, GPR:$denom))],
+        "MULMOD", " $dst, $mul_op1, $mul_op2, $denom", 0x09, 8>;
+}
 
 defm EXP
   : I<(outs GPR:$dst), (ins GPR:$base, GPR:$exp),
@@ -405,18 +407,23 @@ let isBranch = 1, isTerminator = 1 in {
 defm JUMPI
   : I<(outs), (ins jmptarget:$dst, GPR:$cond), [(brcond GPR:$cond, bb:$dst)],
       "JUMPI", " $dst, $cond", 0x57, 10>;
+def PseudoJUMPI : EVMPseudo<(outs), (ins jmptarget:$dst), [], true>;
 
-let isBarrier = 1 in
+let isBarrier = 1 in {
 defm JUMP
   : I<(outs), (ins jmptarget:$dst), [(br bb:$dst)], "JUMP", " $dst", 0x56, 8>;
+def PseudoJUMP : EVMPseudo<(outs), (ins jmptarget:$dst), [], true>;
+} // isBarrier = 1
 } // isBranch = 1, isTerminator = 1
 
 // This isn't really a control flow instruction, but it should be used to mark
 // destination of jump instructions.
 defm JUMPDEST : I<(outs), (ins), [], "JUMPDEST", "", 0x5B, 1>;
 
-let isBarrier = 1, isTerminator = 1, isReturn = 1 in
+let isBarrier = 1, isTerminator = 1, isReturn = 1 in {
 def RET : NRI<(outs), (ins variable_ops), [(EVMret)], "RET">;
+def PseudoRET : EVMPseudo<(outs), (ins), [], true>;
+}
 
 
 //===----------------------------------------------------------------------===//
@@ -750,7 +757,7 @@ defm CREATE2
 // EVM instructions to return with error.
 //===----------------------------------------------------------------------===//
 
-let isTerminator = 1, isBarrier = 1, isReturn = 1 in {
+let isTerminator = 1, isBarrier = 1 in {
   defm REVERT
     : I<(outs), (ins GPR:$offset, GPR:$size),
         [(int_evm_revert GPR:$offset, GPR:$size)],
@@ -767,7 +774,7 @@ let hasSideEffects = 1 in {
       : I<(outs), (ins GPR:$addr), [(int_evm_selfdestruct GPR:$addr)],
           "SELFDESTRUCT", " $addr", 0xFF, 5000>;
 
-  let isTerminator = 1, isBarrier = 1, isReturn = 1 in {
+  let isTerminator = 1, isBarrier = 1 in {
     defm STOP : I<(outs), (ins), [(int_evm_stop)], "STOP", "", 0x00, 0>;
     defm INVALID : I<(outs), (ins), [(int_evm_invalid)], "INVALID", "", 0xFE, 0>;
   }
@@ -794,7 +801,7 @@ foreach I = {1-16} in {
 
 defm PUSH0 : I<(outs), (ins), [], "PUSH0", "", 0x5F, 2>;
 
-def PUSH_LABEL : NI<(outs), (ins jmptarget:$dst), [], false, "", 0, 0> {
+def PUSH_LABEL : NI<(outs), (ins jmptarget:$dst), [], true, "", 0, 0> {
     let isCodeGenOnly = 1;
 }
 
diff --git a/llvm/lib/Target/EVM/EVMLinkRuntime.cpp b/llvm/lib/Target/EVM/EVMLinkRuntime.cpp
index a758131d9c7e..69d4c4b62709 100644
--- a/llvm/lib/Target/EVM/EVMLinkRuntime.cpp
+++ b/llvm/lib/Target/EVM/EVMLinkRuntime.cpp
@@ -81,12 +81,6 @@ static bool EVMLinkRuntimeImpl(Module &M, const char *ModuleToLink) {
     exit(1);
   }
 
-  for (auto &F : M.functions()) {
-    if (!F.isDeclaration()) {
-      F.addFnAttr(Attribute::NoInline);
-    }
-  }
-
   bool LinkErr = false;
   LinkErr = L.linkInModule(
       std::move(RTM), Flags, [](Module &M, const StringSet<> &GVS) {
diff --git a/llvm/lib/Target/EVM/EVMMachineCFGInfo.cpp b/llvm/lib/Target/EVM/EVMMachineCFGInfo.cpp
new file mode 100644
index 000000000000..8a2ad98da818
--- /dev/null
+++ b/llvm/lib/Target/EVM/EVMMachineCFGInfo.cpp
@@ -0,0 +1,192 @@
+//===--------- EVMMachineCFGInfo.cpp - Machine CFG info ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides some information about machine Control Flow Graph.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EVMMachineCFGInfo.h"
+#include "EVMMachineFunctionInfo.h"
+#include "EVMSubtarget.h"
+#include "MCTargetDesc/EVMMCTargetDesc.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+
+using namespace llvm;
+
+static std::pair<MachineInstr *, MachineInstr *>
+getBranchInstructions(MachineBasicBlock &MBB) {
+  MachineInstr *ConditionalBranch = nullptr;
+  MachineInstr *UnconditionalBranch = nullptr;
+  MachineBasicBlock::reverse_iterator I = MBB.rbegin(), E = MBB.rend();
+  while (I != E && I->isTerminator()) {
+    if (I->isUnconditionalBranch())
+      UnconditionalBranch = &*I;
+    else if (I->isConditionalBranch())
+      ConditionalBranch = &*I;
+    ++I;
+  }
+  return {ConditionalBranch, UnconditionalBranch};
+}
+
+static bool isTerminate(const MachineInstr *MI) {
+  switch (MI->getOpcode()) {
+  default:
+    return false;
+  case EVM::REVERT:
+  case EVM::RETURN:
+  case EVM::STOP:
+  case EVM::INVALID:
+    return true;
+  }
+}
+
+EVMMachineCFGInfo::EVMMachineCFGInfo(MachineFunction &MF,
+                                     MachineLoopInfo *MLI) {
+  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+  for (MachineBasicBlock &MBB : MF)
+    collectTerminatorsInfo(TII, MLI, MBB);
+
+  SmallVector<const MachineBasicBlock *> ReturnBlocks;
+  for (const MachineBasicBlock &MBB : MF) {
+    const EVMMBBTerminatorsInfo *TermInfo = getTerminatorsInfo(&MBB);
+    if (TermInfo->getExitType() == MBBExitType::FunctionReturn)
+      ReturnBlocks.emplace_back(&MBB);
+  }
+  collectBlocksLeadingToFunctionReturn(ReturnBlocks);
+  collectCutVertexes(&MF.front());
+}
+
+const EVMMBBTerminatorsInfo *
+EVMMachineCFGInfo::getTerminatorsInfo(const MachineBasicBlock *MBB) const {
+  return MBBTerminatorsInfoMap.at(MBB).get();
+}
+
+void EVMMachineCFGInfo::collectTerminatorsInfo(const TargetInstrInfo *TII,
+                                               const MachineLoopInfo *MLI,
+                                               MachineBasicBlock &MBB) {
+  assert(MBBTerminatorsInfoMap.count(&MBB) == 0);
+
+  MBBTerminatorsInfoMap.try_emplace(&MBB,
+                                    std::make_unique<EVMMBBTerminatorsInfo>());
+  EVMMBBTerminatorsInfo *Info = MBBTerminatorsInfoMap.at(&MBB).get();
+  SmallVector<MachineOperand, 1> Cond;
+  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+  if (TII->analyzeBranch(MBB, TBB, FBB, Cond)) {
+    MachineInstr *LastMI = &MBB.back();
+    Info->LastTerm = LastMI;
+    if (LastMI->isReturn())
+      Info->ExitType = MBBExitType::FunctionReturn;
+    else if (isTerminate(LastMI))
+      Info->ExitType = MBBExitType::Terminate;
+    else
+      llvm_unreachable("Unexpected MBB exit");
+    return;
+  }
+
+  // A non-terminator instruction is followed by 'unreachable',
+  // or a 'noreturn' function is at the end of MBB.
+  if (!TBB && !FBB && MBB.succ_empty()) {
+    Info->ExitType = MBBExitType::Terminate;
+    return;
+  }
+
+  auto [CondBr, UncondBr] = getBranchInstructions(MBB);
+  if (!TBB || (TBB && Cond.empty())) {
+    // Fall through, or unconditional jump.
+    assert(!CondBr);
+    if (!TBB) {
+      assert(!UncondBr);
+      assert(MBB.getSingleSuccessor());
+      TBB = MBB.getFallThrough();
+      assert(TBB);
+    }
+
+    Info->ExitType = MBBExitType::UnconditionalBranch;
+    Info->BranchInfo.Unconditional = {TBB, UncondBr};
+  } else if (TBB && !Cond.empty()) {
+    // Conditional jump + fallthrough, or
+    // conditional jump followed by unconditional jump).
+    if (!FBB) {
+      FBB = MBB.getFallThrough();
+      assert(FBB);
+    }
+    Info->ExitType = MBBExitType::ConditionalBranch;
+    assert(Cond[0].isIdenticalTo(CondBr->getOperand(1)));
+    Info->BranchInfo.Conditional = {&CondBr->getOperand(1), TBB, FBB, CondBr,
+                                    UncondBr};
+  }
+}
+
+// Mark basic blocks that have outgoing paths to function returns.
+void EVMMachineCFGInfo::collectBlocksLeadingToFunctionReturn(
+    const SmallVector<const MachineBasicBlock *> &Returns) {
+  SmallPtrSet<const MachineBasicBlock *, 32> Visited;
+  SmallVector<const MachineBasicBlock *> WorkList = Returns;
+  while (!WorkList.empty()) {
+    const MachineBasicBlock *MBB = WorkList.pop_back_val();
+    if (!Visited.insert(MBB).second)
+      continue;
+
+    ToFuncReturnVertexes.insert(MBB);
+    WorkList.append(MBB->pred_begin(), MBB->pred_end());
+  }
+}
+
+// Collect cut-vertexes of the CFG, i.e. each blocks that begin a disconnected
+// sub-graph of the CFG. Entering a cut-vertex block means that control flow
+// will never return to a previously visited block.
+void EVMMachineCFGInfo::collectCutVertexes(const MachineBasicBlock *Entry) {
+  DenseSet<const MachineBasicBlock *> Visited;
+  DenseMap<const MachineBasicBlock *, size_t> Disc;
+  DenseMap<const MachineBasicBlock *, size_t> Low;
+  DenseMap<const MachineBasicBlock *, const MachineBasicBlock *> Parent;
+  size_t Time = 0;
+  auto Dfs = [&](const MachineBasicBlock *U, auto Recurse) -> void {
+    Visited.insert(U);
+    Disc[U] = Low[U] = Time;
+    Time++;
+
+    SmallVector<const MachineBasicBlock *> Children(U->predecessors());
+    const EVMMBBTerminatorsInfo *UTermInfo = getTerminatorsInfo(U);
+    switch (UTermInfo->getExitType()) {
+    case MBBExitType::Terminate:
+      CutVertexes.insert(U);
+      break;
+    default:
+      Children.append(U->succ_begin(), U->succ_end());
+      break;
+    }
+
+    for (const MachineBasicBlock *V : Children) {
+      // Ignore the loop edge, as it cannot be the bridge.
+      if (V == U)
+        continue;
+
+      if (!Visited.count(V)) {
+        Parent[V] = U;
+        Recurse(V, Recurse);
+        Low[U] = std::min(Low[U], Low[V]);
+        if (Low[V] > Disc[U]) {
+          // U <-> V is a cut edge in the undirected graph
+          bool EdgeVtoU = std::count(U->pred_begin(), U->pred_end(), V);
+          bool EdgeUtoV = std::count(V->pred_begin(), V->pred_end(), U);
+          if (EdgeVtoU && !EdgeUtoV)
+            // Cut edge V -> U
+            CutVertexes.insert(U);
+          else if (EdgeUtoV && !EdgeVtoU)
+            // Cut edge U -> v
+            CutVertexes.insert(V);
+        }
+      } else if (V != Parent[U])
+        Low[U] = std::min(Low[U], Disc[V]);
+    }
+  };
+  Dfs(Entry, Dfs);
+}
diff --git a/llvm/lib/Target/EVM/EVMMachineCFGInfo.h b/llvm/lib/Target/EVM/EVMMachineCFGInfo.h
new file mode 100644
index 000000000000..fdb97ae1785e
--- /dev/null
+++ b/llvm/lib/Target/EVM/EVMMachineCFGInfo.h
@@ -0,0 +1,120 @@
+//===-------- EVMMachineCFGInfo.h - Machine CFG info -----------*- C++ --*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides some information about machine Control Flow Graph.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_EVM_EVMMACHINE_CFG_INFO_H
+#define LLVM_LIB_TARGET_EVM_EVMMACHINE_CFG_INFO_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/CodeGen/MachineOperand.h"
+
+#include <memory>
+
+namespace llvm {
+
+class LiveIntervals;
+class MachineFunction;
+class MachineBasicBlock;
+class MachineLoopInfo;
+class TargetInstrInfo;
+
+enum class MBBExitType {
+  Invalid,
+  ConditionalBranch,
+  UnconditionalBranch,
+  FunctionReturn,
+  Terminate
+};
+
+class EVMMBBTerminatorsInfo {
+  friend class EVMMachineCFGInfo;
+
+  union BranchInfoUnion {
+    BranchInfoUnion() {}
+    struct {
+      const MachineOperand *Condition;
+      MachineBasicBlock *TrueBB;
+      MachineBasicBlock *FalseBB;
+      MachineInstr *CondBr;
+      MachineInstr *UncondBr;
+    } Conditional;
+
+    struct {
+      MachineBasicBlock *TargetBB;
+      MachineInstr *Br;
+    } Unconditional;
+  } BranchInfo;
+
+  MBBExitType ExitType = MBBExitType::Invalid;
+  MachineInstr *LastTerm = nullptr;
+
+public:
+  MBBExitType getExitType() const {
+    assert(ExitType != MBBExitType::Invalid);
+    return ExitType;
+  }
+
+  std::tuple<MachineInstr *, MachineInstr *, MachineBasicBlock *,
+             MachineBasicBlock *, const MachineOperand *>
+  getConditionalBranch() const {
+    assert(ExitType == MBBExitType::ConditionalBranch);
+    return {BranchInfo.Conditional.CondBr, BranchInfo.Conditional.UncondBr,
+            BranchInfo.Conditional.TrueBB, BranchInfo.Conditional.FalseBB,
+            BranchInfo.Conditional.Condition};
+  }
+
+  std::pair<MachineInstr *, MachineBasicBlock *>
+  getUnconditionalBranch() const {
+    assert(ExitType == MBBExitType::UnconditionalBranch);
+    return {BranchInfo.Unconditional.Br, BranchInfo.Unconditional.TargetBB};
+  }
+
+  MachineInstr *getFunctionReturn() const {
+    assert(ExitType == MBBExitType::FunctionReturn);
+    return LastTerm;
+  }
+};
+
+class EVMMachineCFGInfo {
+public:
+  EVMMachineCFGInfo(const EVMMachineCFGInfo &) = delete;
+  EVMMachineCFGInfo &operator=(const EVMMachineCFGInfo &) = delete;
+  EVMMachineCFGInfo(MachineFunction &MF, MachineLoopInfo *MLI);
+
+  const EVMMBBTerminatorsInfo *
+  getTerminatorsInfo(const MachineBasicBlock *MBB) const;
+
+  bool isCutVertex(const MachineBasicBlock *MBB) const {
+    return CutVertexes.count(MBB) > 0;
+  }
+
+  bool isOnPathToFuncReturn(const MachineBasicBlock *MBB) const {
+    return ToFuncReturnVertexes.count(MBB) > 0;
+  }
+
+private:
+  DenseMap<const MachineBasicBlock *, std::unique_ptr<EVMMBBTerminatorsInfo>>
+      MBBTerminatorsInfoMap;
+  DenseSet<const MachineBasicBlock *> ToFuncReturnVertexes;
+  DenseSet<const MachineBasicBlock *> CutVertexes;
+
+  void collectTerminatorsInfo(const TargetInstrInfo *TII,
+                              const MachineLoopInfo *MLI,
+                              MachineBasicBlock &MBB);
+  void collectBlocksLeadingToFunctionReturn(
+      const SmallVector<const MachineBasicBlock *> &Returns);
+  void collectCutVertexes(const MachineBasicBlock *Entry);
+};
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_EVM_EVMMACHINE_CFG_INFO_H
diff --git a/llvm/lib/Target/EVM/EVMSingleUseExpression.cpp b/llvm/lib/Target/EVM/EVMSingleUseExpression.cpp
index 8337dc7875d1..f31a49b53033 100644
--- a/llvm/lib/Target/EVM/EVMSingleUseExpression.cpp
+++ b/llvm/lib/Target/EVM/EVMSingleUseExpression.cpp
@@ -311,6 +311,10 @@ static bool isSafeToMove(const MachineOperand *Def, const MachineOperand *Use,
   if (NextI == Insert)
     return true;
 
+  // Don't move ARGUMENT instructions, as stackification pass relies on this.
+  if (DefI->getOpcode() == EVM::ARGUMENT)
+    return false;
+
   // Check for register dependencies.
   SmallVector<unsigned, 4> MutableRegisters;
   for (const MachineOperand &MO : DefI->operands()) {
diff --git a/llvm/lib/Target/EVM/EVMSplitCriticalEdges.cpp b/llvm/lib/Target/EVM/EVMSplitCriticalEdges.cpp
new file mode 100644
index 000000000000..f96d0deddca7
--- /dev/null
+++ b/llvm/lib/Target/EVM/EVMSplitCriticalEdges.cpp
@@ -0,0 +1,100 @@
+//===----- EVMSplitCriticalEdges.cpp - Split Critical Edges ----*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file performs splitting of CFG critical edges.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EVM.h"
+#include "EVMMachineFunctionInfo.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "evm-split-critical-edges"
+
+namespace {
+class EVMSplitCriticalEdges final : public MachineFunctionPass {
+public:
+  static char ID;
+
+  EVMSplitCriticalEdges() : MachineFunctionPass(ID) {}
+
+  StringRef getPassName() const override { return "EVM split critical edges"; }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<MachineLoopInfo>();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+private:
+  MachineFunction *MF = nullptr;
+
+  bool splitCriticalEdges();
+};
+} // end anonymous namespace
+
+char EVMSplitCriticalEdges::ID = 0;
+
+INITIALIZE_PASS_BEGIN(EVMSplitCriticalEdges, DEBUG_TYPE, "Split critical edges",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(EVMSplitCriticalEdges, DEBUG_TYPE, "Split critical edges",
+                    false, false)
+
+FunctionPass *llvm::createEVMSplitCriticalEdges() {
+  return new EVMSplitCriticalEdges();
+}
+
+bool EVMSplitCriticalEdges::splitCriticalEdges() {
+  SetVector<std::pair<MachineBasicBlock *, MachineBasicBlock *>> ToSplit;
+  for (MachineBasicBlock &MBB : *MF) {
+    if (MBB.pred_size() > 1) {
+      for (MachineBasicBlock *Pred : MBB.predecessors()) {
+        if (Pred->succ_size() > 1)
+          ToSplit.insert(std::make_pair(Pred, &MBB));
+      }
+    }
+  }
+
+  bool Changed = false;
+  for (const auto &Pair : ToSplit) {
+    auto NewSucc = Pair.first->SplitCriticalEdge(Pair.second, *this);
+    if (NewSucc != nullptr) {
+      Pair.first->updateTerminator(NewSucc);
+      NewSucc->updateTerminator(Pair.second);
+      LLVM_DEBUG(dbgs() << " *** Splitting critical edge: "
+                        << printMBBReference(*Pair.first) << " -- "
+                        << printMBBReference(*NewSucc) << " -- "
+                        << printMBBReference(*Pair.second) << '\n');
+      Changed = true;
+    } else {
+      llvm_unreachable("Cannot break critical edge");
+    }
+  }
+  return Changed;
+}
+
+bool EVMSplitCriticalEdges::runOnMachineFunction(MachineFunction &Mf) {
+  MF = &Mf;
+  LLVM_DEBUG({
+    dbgs() << "********** Splitting critical edges **********\n"
+           << "********** Function: " << Mf.getName() << '\n';
+  });
+
+  bool Changed = splitCriticalEdges();
+  return Changed;
+}
diff --git a/llvm/lib/Target/EVM/EVMStackDebug.cpp b/llvm/lib/Target/EVM/EVMStackDebug.cpp
new file mode 100644
index 000000000000..de7a20d351af
--- /dev/null
+++ b/llvm/lib/Target/EVM/EVMStackDebug.cpp
@@ -0,0 +1,106 @@
+//===-- EVMStackDebug.cpp - Debugging of the stackification -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares classes for dumping a state of stackifcation related data
+// structures and algorithms.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EVMStackDebug.h"
+#include "EVMStackLayoutGenerator.h"
+#include "EVMSubtarget.h"
+#include "MCTargetDesc/EVMMCTargetDesc.h"
+#include <variant>
+
+using namespace llvm;
+
+std::string llvm::stackToString(const Stack &S) {
+  std::string Result("[ ");
+  for (const auto *Slot : S)
+    Result += Slot->toString() + ' ';
+  Result += ']';
+  return Result;
+}
+
+#ifndef NDEBUG
+
+void StackLayoutPrinter::operator()() {
+  OS << "Function: " << MF.getName() << "(";
+  for (const StackSlot *ParamSlot : StackModel.getFunctionParameters()) {
+    if (const auto *Slot = dyn_cast<VariableSlot>(ParamSlot))
+      OS << printReg(Slot->getReg(), nullptr, 0, nullptr) << ' ';
+    else if (isa<JunkSlot>(ParamSlot))
+      OS << "[unused param] ";
+    else
+      llvm_unreachable("Unexpected stack slot");
+  }
+  OS << ");\n";
+  OS << "FunctionEntry " << " -> Block" << getBlockId(MF.front()) << ";\n";
+
+  for (const auto &MBB : MF) {
+    printBlock(MBB);
+  }
+}
+
+void StackLayoutPrinter::printBlock(MachineBasicBlock const &Block) {
+  OS << "Block" << getBlockId(Block) << " [\n";
+  OS << stackToString(Layout.getMBBEntryLayout(&Block)) << "\n";
+  for (auto const &Op : StackModel.getOperations(&Block)) {
+    OS << "\n";
+    Stack EntryLayout = Layout.getOperationEntryLayout(&Op);
+    OS << stackToString(EntryLayout) << "\n";
+    OS << Op.toString() << "\n";
+    assert(Op.getInput().size() <= EntryLayout.size());
+    EntryLayout.resize(EntryLayout.size() - Op.getInput().size());
+    EntryLayout.append(Op.getOutput());
+    OS << stackToString(EntryLayout) << "\n";
+  }
+  OS << "\n";
+  OS << stackToString(Layout.getMBBExitLayout(&Block)) << "\n";
+  OS << "];\n";
+
+  const EVMMBBTerminatorsInfo *TermInfo = CFGInfo.getTerminatorsInfo(&Block);
+  MBBExitType ExitType = TermInfo->getExitType();
+  if (ExitType == MBBExitType::UnconditionalBranch) {
+    auto [BranchInstr, Target] = TermInfo->getUnconditionalBranch();
+    OS << "Block" << getBlockId(Block) << "Exit [label=\"";
+    OS << "Jump\"];\n";
+    OS << "Block" << getBlockId(Block) << "Exit -> Block" << getBlockId(*Target)
+       << ";\n";
+  } else if (ExitType == MBBExitType::ConditionalBranch) {
+    auto [CondBr, UncondBr, TrueBB, FalseBB, Condition] =
+        TermInfo->getConditionalBranch();
+    OS << "Block" << getBlockId(Block) << "Exit [label=\"{ ";
+    OS << StackModel.getStackSlot(*Condition)->toString();
+    OS << "| { <0> Zero | <1> NonZero }}\"];\n";
+    OS << "Block" << getBlockId(Block);
+    OS << "Exit:0 -> Block" << getBlockId(*FalseBB) << ";\n";
+    OS << "Block" << getBlockId(Block);
+    OS << "Exit:1 -> Block" << getBlockId(*TrueBB) << ";\n";
+  } else if (ExitType == MBBExitType::FunctionReturn) {
+    OS << "Block" << getBlockId(Block) << "Exit [label=\"FunctionReturn["
+       << MF.getName() << "]\"];\n";
+    const MachineInstr &MI = Block.back();
+    OS << "Return values: " << stackToString(StackModel.getReturnArguments(MI))
+       << ";\n";
+  } else if (ExitType == MBBExitType::Terminate) {
+    OS << "Block" << getBlockId(Block) << "Exit [label=\"Terminated\"];\n";
+  }
+  OS << "\n";
+}
+
+std::string StackLayoutPrinter::getBlockId(MachineBasicBlock const &Block) {
+  std::string Name =
+      std::to_string(Block.getNumber()) + "." + std::string(Block.getName());
+  if (auto It = BlockIds.find(&Block); It != BlockIds.end())
+    return std::to_string(It->second) + "(" + Name + ")";
+
+  size_t Id = BlockIds[&Block] = BlockCount++;
+  return std::to_string(Id) + "(" + Name + ")";
+}
+#endif // NDEBUG
diff --git a/llvm/lib/Target/EVM/EVMStackDebug.h b/llvm/lib/Target/EVM/EVMStackDebug.h
new file mode 100644
index 000000000000..615ffd9279c7
--- /dev/null
+++ b/llvm/lib/Target/EVM/EVMStackDebug.h
@@ -0,0 +1,55 @@
+//===---- EVMStackDebug.h - Debugging of the stackification -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares classes for dumping a state of stackifcation related data
+// structures and algorithms.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_EVM_EVMSTACKDEBUG_H
+#define LLVM_LIB_TARGET_EVM_EVMSTACKDEBUG_H
+
+#include "EVMMachineCFGInfo.h"
+#include "EVMStackModel.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include <map>
+
+namespace llvm {
+
+class EVMStackLayout;
+
+std::string stackToString(Stack const &S);
+
+#ifndef NDEBUG
+class StackLayoutPrinter {
+public:
+  StackLayoutPrinter(raw_ostream &OS, const MachineFunction &MF,
+                     const EVMStackLayout &EVMStackLayout,
+                     const EVMMachineCFGInfo &CFGInfo,
+                     const EVMStackModel &StackModel)
+      : OS(OS), MF(MF), Layout(EVMStackLayout), CFGInfo(CFGInfo),
+        StackModel(StackModel) {}
+  void operator()();
+
+private:
+  void printBlock(MachineBasicBlock const &Block);
+  std::string getBlockId(MachineBasicBlock const &Block);
+
+  raw_ostream &OS;
+  const MachineFunction &MF;
+  const EVMStackLayout &Layout;
+  const EVMMachineCFGInfo &CFGInfo;
+  const EVMStackModel &StackModel;
+  size_t BlockCount = 0;
+  std::map<const MachineBasicBlock *, size_t> BlockIds;
+};
+#endif // NDEBUG
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_EVM_EVMSTACKDEBUG_H
diff --git a/llvm/lib/Target/EVM/EVMStackLayoutGenerator.cpp b/llvm/lib/Target/EVM/EVMStackLayoutGenerator.cpp
new file mode 100644
index 000000000000..70aef612ba9f
--- /dev/null
+++ b/llvm/lib/Target/EVM/EVMStackLayoutGenerator.cpp
@@ -0,0 +1,763 @@
+//===---- EVMStackLayoutGenerator.h - Stack layout generator ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the stack layout generator which for each operation
+// finds complete stack layout that:
+//   - has the slots required for the operation at the stack top.
+//   - will have the operation result in a layout that makes it easy to achieve
+//     the next desired layout.
+// It also finds an entering/exiting stack layout for each block.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EVMStackLayoutGenerator.h"
+#include "EVMRegisterInfo.h"
+#include "EVMStackDebug.h"
+#include "EVMStackShuffler.h"
+#include "MCTargetDesc/EVMMCTargetDesc.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "evm-stack-layout-generator"
+
+namespace {
+template <class... Ts> struct Overload : Ts... {
+  using Ts::operator()...;
+};
+template <class... Ts> Overload(Ts...) -> Overload<Ts...>;
+
+/// Return the number of hops from the beginning of the \p RangeOrContainer
+/// to the \p Item. If no \p Item is found in the \p RangeOrContainer,
+/// std::nullopt is returned.
+template <typename T, typename V>
+std::optional<size_t> offset(T &&RangeOrContainer, V &&Item) {
+  auto It = find(RangeOrContainer, Item);
+  return (It == adl_end(RangeOrContainer))
+             ? std::nullopt
+             : std::optional(std::distance(adl_begin(RangeOrContainer), It));
+}
+
+/// Return a range covering  the last N elements of \p RangeOrContainer.
+template <typename T> auto take_back(T &&RangeOrContainer, size_t N = 1) {
+  return make_range(std::prev(adl_end(RangeOrContainer), N),
+                    adl_end(RangeOrContainer));
+}
+
+/// Returns all stack too deep errors that would occur when shuffling \p Source
+/// to \p Target.
+SmallVector<EVMStackLayoutGenerator::StackTooDeep>
+findStackTooDeep(Stack const &Source, Stack const &Target) {
+  Stack CurrentStack = Source;
+  SmallVector<EVMStackLayoutGenerator::StackTooDeep> Errors;
+
+  auto getVariableChoices = [](auto &&SlotRange) {
+    SmallVector<Register> Result;
+    for (auto const *Slot : SlotRange)
+      if (auto const *VarSlot = dyn_cast<VariableSlot>(Slot))
+        if (!is_contained(Result, VarSlot->getReg()))
+          Result.push_back(VarSlot->getReg());
+    return Result;
+  };
+
+  ::createStackLayout(
+      CurrentStack, Target,
+      [&](unsigned I) {
+        if (I > 16)
+          Errors.emplace_back(EVMStackLayoutGenerator::StackTooDeep{
+              I - 16, getVariableChoices(take_back(CurrentStack, I + 1))});
+      },
+      [&](const StackSlot *Slot) {
+        if (Slot->isRematerializable())
+          return;
+
+        if (auto Depth = offset(reverse(CurrentStack), Slot);
+            Depth && *Depth >= 16)
+          Errors.emplace_back(EVMStackLayoutGenerator::StackTooDeep{
+              *Depth - 15,
+              getVariableChoices(take_back(CurrentStack, *Depth + 1))});
+      },
+      [&]() {});
+  return Errors;
+}
+
+/// Returns the ideal stack to have before executing an operation that outputs
+/// \p OperationOutput, s.t. shuffling to \p Post is cheap (excluding the
+/// input of the operation itself). If \p GenerateSlotOnTheFly returns true for
+/// a slot, this slot should not occur in the ideal stack, but rather be
+/// generated on the fly during shuffling.
+template <typename Callable>
+Stack createIdealLayout(const Stack &OperationOutput, const Stack &Post,
+                        Callable GenerateSlotOnTheFly) {
+  struct PreviousSlot {
+    size_t slot;
+  };
+  using LayoutT = SmallVector<std::variant<PreviousSlot, StackSlot *>>;
+
+  // Determine the number of slots that have to be on stack before executing the
+  // operation (excluding the inputs of the operation itself). That is slots
+  // that should not be generated on the fly and are not outputs of the
+  // operation.
+  size_t PreOperationLayoutSize = Post.size();
+  for (const auto *Slot : Post)
+    if (is_contained(OperationOutput, Slot) || GenerateSlotOnTheFly(Slot))
+      --PreOperationLayoutSize;
+
+  // The symbolic layout directly after the operation has the form
+  // PreviousSlot{0}, ..., PreviousSlot{n}, [output<0>], ..., [output<m>]
+  LayoutT Layout;
+  for (size_t Index = 0; Index < PreOperationLayoutSize; ++Index)
+    Layout.emplace_back(PreviousSlot{Index});
+  Layout.append(OperationOutput.begin(), OperationOutput.end());
+
+  // Shortcut for trivial case.
+  if (Layout.empty())
+    return Stack{};
+
+  // Next we will shuffle the layout to the Post stack using ShuffleOperations
+  // that are aware of PreviousSlot's.
+  struct ShuffleOperations {
+    LayoutT &Layout;
+    const Stack &Post;
+    std::set<StackSlot *> Outputs;
+    Multiplicity Mult;
+    Callable GenerateSlotOnTheFly;
+    ShuffleOperations(LayoutT &Layout, Stack const &Post,
+                      Callable GenerateSlotOnTheFly)
+        : Layout(Layout), Post(Post),
+          GenerateSlotOnTheFly(GenerateSlotOnTheFly) {
+      for (const auto &LayoutSlot : Layout)
+        if (auto Slot = std::get_if<StackSlot *>(&LayoutSlot))
+          Outputs.insert(*Slot);
+
+      for (const auto &LayoutSlot : Layout)
+        if (auto Slot = std::get_if<StackSlot *>(&LayoutSlot))
+          --Mult[*Slot];
+
+      for (auto *Slot : Post)
+        if (Outputs.count(Slot) || GenerateSlotOnTheFly(Slot))
+          ++Mult[Slot];
+    }
+
+    bool isCompatible(size_t Source, size_t Target) {
+      return Source < Layout.size() && Target < Post.size() &&
+             (isa<JunkSlot>(Post[Target]) ||
+              std::visit(Overload{[&](const PreviousSlot &) {
+                                    return !Outputs.count(Post[Target]) &&
+                                           !GenerateSlotOnTheFly(Post[Target]);
+                                  },
+                                  [&](const StackSlot *S) {
+                                    return S == Post[Target];
+                                  }},
+                         Layout[Source]));
+    }
+
+    bool sourceIsSame(size_t Lhs, size_t Rhs) {
+      if (std::holds_alternative<PreviousSlot>(Layout[Lhs]) &&
+          std::holds_alternative<PreviousSlot>(Layout[Rhs]))
+        return true;
+
+      auto SlotLHS = std::get_if<StackSlot *>(&Layout[Lhs]);
+      auto SlotRHS = std::get_if<StackSlot *>(&Layout[Rhs]);
+      return SlotLHS && SlotRHS && *SlotLHS == *SlotRHS;
+    }
+
+    int sourceMultiplicity(size_t Offset) {
+      return std::visit(
+          Overload{[&](PreviousSlot const &) { return 0; },
+                   [&](const StackSlot *S) { return Mult.at(S); }},
+          Layout[Offset]);
+    }
+
+    int targetMultiplicity(size_t Offset) {
+      if (!Outputs.count(Post[Offset]) && !GenerateSlotOnTheFly(Post[Offset]))
+        return 0;
+      return Mult.at(Post[Offset]);
+    }
+
+    bool targetIsArbitrary(size_t Offset) {
+      return Offset < Post.size() && isa<JunkSlot>(Post[Offset]);
+    }
+
+    void swap(size_t I) {
+      assert(!std::holds_alternative<PreviousSlot>(
+                 Layout[Layout.size() - I - 1]) ||
+             !std::holds_alternative<PreviousSlot>(Layout.back()));
+      std::swap(Layout[Layout.size() - I - 1], Layout.back());
+    }
+
+    size_t sourceSize() { return Layout.size(); }
+
+    size_t targetSize() { return Post.size(); }
+
+    void pop() { Layout.pop_back(); }
+
+    void pushOrDupTarget(size_t Offset) { Layout.push_back(Post[Offset]); }
+  };
+
+  Shuffler<ShuffleOperations>::shuffle(Layout, Post, GenerateSlotOnTheFly);
+
+  // Now we can construct the ideal layout before the operation.
+  // "layout" has shuffled the PreviousSlot{x} to new places using minimal
+  // operations to move the operation output in place. The resulting permutation
+  // of the PreviousSlot yields the ideal positions of slots before the
+  // operation, i.e. if PreviousSlot{2} is at a position at which Post contains
+  // VariableSlot{"tmp"}, then we want the variable tmp in the slot at offset 2
+  // in the layout before the operation.
+  assert(Layout.size() == Post.size());
+  SmallVector<StackSlot *> IdealLayout(Post.size(), nullptr);
+  for (unsigned Idx = 0; Idx < std::min(Layout.size(), Post.size()); ++Idx) {
+    auto *Slot = Post[Idx];
+    auto &IdealPosition = Layout[Idx];
+    if (PreviousSlot *PrevSlot = std::get_if<PreviousSlot>(&IdealPosition))
+      IdealLayout[PrevSlot->slot] = Slot;
+  }
+
+  // The tail of layout must have contained the operation outputs and will not
+  // have been assigned slots in the last loop.
+  while (!IdealLayout.empty() && !IdealLayout.back())
+    IdealLayout.pop_back();
+
+  assert(IdealLayout.size() == PreOperationLayoutSize);
+
+  Stack Result;
+  for (auto *Item : IdealLayout) {
+    assert(Item);
+    Result.push_back(Item);
+  }
+
+  return Result;
+}
+
+} // end anonymous namespace
+
+EVMStackLayoutGenerator::EVMStackLayoutGenerator(
+    const MachineFunction &MF, const MachineLoopInfo *MLI,
+    const EVMStackModel &StackModel, const EVMMachineCFGInfo &CFGInfo)
+    : MF(MF), MLI(MLI), StackModel(StackModel), CFGInfo(CFGInfo) {}
+
+std::unique_ptr<EVMStackLayout> EVMStackLayoutGenerator::run() {
+  runPropagation();
+
+  auto Layout = std::make_unique<EVMStackLayout>(
+      MBBEntryLayoutMap, MBBExitLayoutMap, OperationEntryLayoutMap);
+
+  LLVM_DEBUG({
+    dbgs() << "************* Stack Layout *************\n";
+    StackLayoutPrinter P(dbgs(), MF, *Layout, CFGInfo, StackModel);
+    P();
+  });
+  return Layout;
+}
+
+Stack EVMStackLayoutGenerator::propagateStackThroughOperation(
+    Stack ExitStack, const Operation &Op, bool AggressiveStackCompression) {
+  // Enable aggressive stack compression for recursive calls.
+  if (Op.isFunctionCall())
+    // TODO: compress stack for recursive functions.
+    AggressiveStackCompression = false;
+
+  // This is a huge tradeoff between code size, gas cost and stack size.
+  auto generateSlotOnTheFly = [&](const StackSlot *Slot) {
+    return AggressiveStackCompression && Slot->isRematerializable();
+  };
+
+  // Determine the ideal permutation of the slots in ExitLayout that are not
+  // operation outputs (and not to be generated on the fly), s.t. shuffling the
+  // 'IdealStack + Operation.output' to ExitLayout is cheap.
+  Stack IdealStack =
+      createIdealLayout(Op.getOutput(), ExitStack, generateSlotOnTheFly);
+
+  // Make sure the resulting previous slots do not overlap with any assignmed
+  // variables.
+  if (Op.isAssignment())
+    for (auto *StackSlot : IdealStack)
+      if (const auto *VarSlot = dyn_cast<VariableSlot>(StackSlot))
+        assert(!is_contained(Op.getOutput(), VarSlot));
+
+  // Since stack+Operation.output can be easily shuffled to ExitLayout, the
+  // desired layout before the operation is stack+Operation.input;
+  IdealStack.append(Op.getInput());
+
+  // Store the exact desired operation entry layout. The stored layout will be
+  // recreated by the code transform before executing the operation. However,
+  // this recreation can produce slots that can be freely generated or are
+  // duplicated, i.e. we can compress the stack afterwards without causing
+  // problems for code generation later.
+  OperationEntryLayoutMap[&Op] = IdealStack;
+
+  // Remove anything from the stack top that can be freely generated or dupped
+  // from deeper on the stack.
+  while (!IdealStack.empty()) {
+    if (IdealStack.back()->isRematerializable())
+      IdealStack.pop_back();
+    else if (auto Offset = offset(drop_begin(reverse(IdealStack), 1),
+                                  IdealStack.back())) {
+      if (*Offset + 2 < 16)
+        IdealStack.pop_back();
+      else
+        break;
+    } else
+      break;
+  }
+
+  return IdealStack;
+}
+
+Stack EVMStackLayoutGenerator::propagateStackThroughBlock(
+    Stack ExitStack, const MachineBasicBlock *Block,
+    bool AggressiveStackCompression) {
+  Stack CurrentStack = ExitStack;
+  for (const Operation &Op : reverse(StackModel.getOperations(Block))) {
+    Stack NewStack = propagateStackThroughOperation(CurrentStack, Op,
+                                                    AggressiveStackCompression);
+    if (!AggressiveStackCompression &&
+        !findStackTooDeep(NewStack, CurrentStack).empty())
+      // If we had stack errors, run again with aggressive stack compression.
+      return propagateStackThroughBlock(std::move(ExitStack), Block, true);
+    CurrentStack = std::move(NewStack);
+  }
+  return CurrentStack;
+}
+
+// Returns the number of junk slots to be prepended to \p TargetLayout for
+// an optimal transition from \p EntryLayout to \p TargetLayout.
+static size_t getOptimalNumberOfJunks(const Stack &EntryLayout,
+                                      const Stack &TargetLayout) {
+  size_t BestCost = EvaluateStackTransform(EntryLayout, TargetLayout);
+  size_t BestNumJunk = 0;
+  size_t MaxJunk = EntryLayout.size();
+  for (size_t NumJunk = 1; NumJunk <= MaxJunk; ++NumJunk) {
+    Stack JunkedTarget(NumJunk, EVMStackModel::getJunkSlot());
+    JunkedTarget.append(TargetLayout);
+    size_t Cost = EvaluateStackTransform(EntryLayout, JunkedTarget);
+    if (Cost < BestCost) {
+      BestCost = Cost;
+      BestNumJunk = NumJunk;
+    }
+  }
+  return BestNumJunk;
+}
+
+void EVMStackLayoutGenerator::runPropagation() {
+  std::deque<const MachineBasicBlock *> ToVisit{&MF.front()};
+  DenseSet<const MachineBasicBlock *> Visited;
+
+  // Collect all the backedges in the MF.
+  // TODO: CPR-1847. Consider changing CFG before the stackification such that
+  // every loop has only one backedge.
+  SmallVector<std::pair<const MachineBasicBlock *, const MachineBasicBlock *>,
+              64>
+      Backedges;
+  for (const MachineLoop *TopLevelLoop : *MLI) {
+    // TODO: CPR-1847. Investigate in which order it's better to traverse
+    // loops.
+    for (const MachineLoop *L : depth_first(TopLevelLoop)) {
+      SmallVector<MachineBasicBlock *, 8> Latches;
+      L->getLoopLatches(Latches);
+      const MachineBasicBlock *Header = L->getHeader();
+      transform(Latches, std::back_inserter(Backedges),
+                [Header](const MachineBasicBlock *MBB) {
+                  return std::make_pair(MBB, Header);
+                });
+    }
+  }
+
+  while (!ToVisit.empty()) {
+    // First calculate stack layouts without walking backwards jumps, i.e.
+    // assuming the current preliminary entry layout of the backwards jump
+    // target as the initial exit layout of the backwards-jumping block.
+    while (!ToVisit.empty()) {
+      const MachineBasicBlock *Block = *ToVisit.begin();
+      ToVisit.pop_front();
+      if (Visited.count(Block))
+        continue;
+
+      if (std::optional<Stack> ExitLayout =
+              getExitLayoutOrStageDependencies(Block, Visited, ToVisit)) {
+        Visited.insert(Block);
+        MBBExitLayoutMap[Block] = *ExitLayout;
+        MBBEntryLayoutMap[Block] =
+            propagateStackThroughBlock(*ExitLayout, Block);
+        for (auto Pred : Block->predecessors())
+          ToVisit.emplace_back(Pred);
+      }
+    }
+
+    // Revisit these blocks again.
+    for (auto [Latch, Header] : Backedges) {
+      const Stack &HeaderEntryLayout = MBBEntryLayoutMap[Header];
+      const Stack &LatchExitLayout = MBBExitLayoutMap[Latch];
+      if (all_of(HeaderEntryLayout, [LatchExitLayout](const StackSlot *Slot) {
+            return is_contained(LatchExitLayout, Slot);
+          }))
+        continue;
+
+      // The latch block does not provide all slots required by the loop
+      // header. Therefore we need to visit the subgraph between the latch
+      // and header again. We will visit blocks backwards starting from latch
+      // and mark all MBBs to-be-visited again until we reach the header.
+
+      ToVisit.emplace_back(Latch);
+
+      // Since we are likely to permute the entry layout of 'Header', we
+      // also visit its entries again. This is not required for correctness,
+      // since the set of stack slots will match, but it may move some
+      // required stack shuffling from the loop condition to outside the loop.
+      for (const MachineBasicBlock *Pred : Header->predecessors())
+        Visited.erase(Pred);
+
+      // DFS upwards traversal from latch to the header.
+      for (auto I = idf_begin(Latch), E = idf_end(Latch); I != E;) {
+        const MachineBasicBlock *MBB = *I;
+        Visited.erase(MBB);
+        if (MBB == Header) {
+          I.skipChildren();
+          continue;
+        }
+        ++I;
+      }
+      // TODO: Consider revisiting the entire graph to propagate the optimal
+      // layout above the loop.
+    }
+  }
+
+  // At this point layouts at conditional jumps are merely
+  // compatible, i.e. the exit layout of the jumping block is a superset of the
+  // entry layout of the target block. We need to modify the entry layouts
+  // of conditional jump targets, s.t., the entry layout of target blocks match
+  // the exit layout of the jumping block exactly, except that slots not
+  // required after the jump are marked as 'JunkSlot'.
+  for (const MachineBasicBlock &MBB : MF) {
+    const EVMMBBTerminatorsInfo *TermInfo = CFGInfo.getTerminatorsInfo(&MBB);
+    MBBExitType ExitType = TermInfo->getExitType();
+    if (ExitType != MBBExitType::ConditionalBranch)
+      continue;
+
+    Stack ExitLayout = MBBExitLayoutMap.at(&MBB);
+
+#ifndef NDEBUG
+    // The last block must have produced the condition at the stack top.
+    auto [CondBr, UncondBr, TrueBB, FalseBB, Condition] =
+        TermInfo->getConditionalBranch();
+    assert(ExitLayout.back() == StackModel.getStackSlot(*Condition));
+#endif // NDEBUG
+
+    // The condition is consumed by the conditional jump.
+    ExitLayout.pop_back();
+    for (const MachineBasicBlock *Succ : MBB.successors()) {
+      const Stack &SuccEntryLayout = MBBEntryLayoutMap.at(Succ);
+      Stack NewSuccEntryLayout = ExitLayout;
+      // Whatever the block being jumped to does not actually require,
+      // can be marked as junk.
+      for (StackSlot *&Slot : NewSuccEntryLayout)
+        if (!is_contained(SuccEntryLayout, Slot))
+          Slot = EVMStackModel::getJunkSlot();
+
+#ifndef NDEBUG
+      // Make sure everything the block being jumped to requires is
+      // actually present or can be generated.
+      for (const StackSlot *Slot : SuccEntryLayout)
+        assert(Slot->isRematerializable() ||
+               is_contained(NewSuccEntryLayout, Slot));
+#endif // NDEBUG
+
+      MBBEntryLayoutMap[Succ] = NewSuccEntryLayout;
+    }
+  }
+
+  // Create the function entry layout.
+  Stack EntryStack;
+  bool IsNoReturn = MF.getFunction().hasFnAttribute(Attribute::NoReturn);
+  if (!IsNoReturn)
+    EntryStack.push_back(StackModel.getFunctionReturnLabelSlot(&MF));
+
+  // Calling convention: input arguments are passed in stack such that the
+  // first one specified in the function declaration is passed on the stack TOP.
+  EntryStack.append(StackModel.getFunctionParameters());
+  std::reverse(IsNoReturn ? EntryStack.begin() : std::next(EntryStack.begin()),
+               EntryStack.end());
+  MBBEntryLayoutMap[&MF.front()] = std::move(EntryStack);
+
+  // Traverse the CFG and at each block that allows junk, i.e. that is a
+  // cut-vertex that never leads to a function return, checks if adding junk
+  // reduces the shuffling cost upon entering and if so recursively adds junk
+  // to the spanned subgraph. This is needed only for optimization purposes,
+  // not for correctness.
+  for (const MachineBasicBlock &MBB : MF) {
+    if (!CFGInfo.isCutVertex(&MBB) || CFGInfo.isOnPathToFuncReturn(&MBB))
+      continue;
+
+    const Stack EntryLayout = MBBEntryLayoutMap.at(&MBB);
+    const Stack &ExitLayout = MBBExitLayoutMap.at(&MBB);
+    const SmallVector<Operation> &Ops = StackModel.getOperations(&MBB);
+    Stack const &NextLayout =
+        Ops.empty() ? ExitLayout : OperationEntryLayoutMap.at(&Ops.front());
+    if (EntryLayout != NextLayout) {
+      size_t OptimalNumJunks = getOptimalNumberOfJunks(EntryLayout, NextLayout);
+      if (OptimalNumJunks > 0) {
+        addJunksToStackBottom(&MBB, OptimalNumJunks);
+        MBBEntryLayoutMap[&MBB] = EntryLayout;
+      }
+    }
+  }
+}
+
+std::optional<Stack> EVMStackLayoutGenerator::getExitLayoutOrStageDependencies(
+    const MachineBasicBlock *Block,
+    const DenseSet<const MachineBasicBlock *> &Visited,
+    std::deque<const MachineBasicBlock *> &ToVisit) const {
+  const EVMMBBTerminatorsInfo *TermInfo = CFGInfo.getTerminatorsInfo(Block);
+  MBBExitType ExitType = TermInfo->getExitType();
+  if (ExitType == MBBExitType::UnconditionalBranch) {
+    auto [_, Target] = TermInfo->getUnconditionalBranch();
+    if (MachineLoop *ML = MLI->getLoopFor(Block);
+        ML && ML->isLoopLatch(Block)) {
+      // Choose the best currently known entry layout of the jump target
+      // as initial exit. Note that this may not yet be the final
+      // layout.
+      auto It = MBBEntryLayoutMap.find(Target);
+      return It == MBBEntryLayoutMap.end() ? Stack{} : It->second;
+    }
+    // If the current iteration has already visited the jump target,
+    // start from its entry layout.
+    if (Visited.count(Target))
+      return MBBEntryLayoutMap.at(Target);
+    // Otherwise stage the jump target for visit and defer the current
+    // block.
+    ToVisit.emplace_front(Target);
+    return std::nullopt;
+  }
+  if (ExitType == MBBExitType::ConditionalBranch) {
+    auto [CondBr, UncondBr, TrueBB, FalseBB, Condition] =
+        TermInfo->getConditionalBranch();
+    bool FalseBBVisited = Visited.count(FalseBB);
+    bool TrueBBVisited = Visited.count(TrueBB);
+
+    if (FalseBBVisited && TrueBBVisited) {
+      // If the current iteration has already Visited both jump targets,
+      // start from its entry layout.
+      Stack CombinedStack = combineStack(MBBEntryLayoutMap.at(FalseBB),
+                                         MBBEntryLayoutMap.at(TrueBB));
+      // Additionally, the jump condition has to be at the stack top at
+      // exit.
+      CombinedStack.emplace_back(StackModel.getStackSlot(*Condition));
+      return CombinedStack;
+    }
+
+    // If one of the jump targets has not been visited, stage it for
+    // visit and defer the current block.
+    if (!FalseBBVisited)
+      ToVisit.emplace_front(FalseBB);
+
+    if (!TrueBBVisited)
+      ToVisit.emplace_front(TrueBB);
+
+    return std::nullopt;
+  }
+  if (ExitType == MBBExitType::FunctionReturn) {
+    const MachineInstr &MI = Block->back();
+    return StackModel.getReturnArguments(MI);
+  }
+
+  return Stack{};
+}
+
+Stack EVMStackLayoutGenerator::combineStack(Stack const &Stack1,
+                                            Stack const &Stack2) {
+  // TODO: it would be nicer to replace this by a constructive algorithm.
+  // Currently it uses a reduced version of the Heap Algorithm to partly
+  // brute-force, which seems to work decently well.
+
+  Stack CommonPrefix;
+  for (unsigned Idx = 0; Idx < std::min(Stack1.size(), Stack2.size()); ++Idx) {
+    StackSlot *Slot1 = Stack1[Idx];
+    const StackSlot *Slot2 = Stack2[Idx];
+    if (!(Slot1 == Slot2))
+      break;
+    CommonPrefix.push_back(Slot1);
+  }
+
+  Stack Stack1Tail, Stack2Tail;
+  for (auto *Slot : drop_begin(Stack1, CommonPrefix.size()))
+    Stack1Tail.push_back(Slot);
+
+  for (auto *Slot : drop_begin(Stack2, CommonPrefix.size()))
+    Stack2Tail.push_back(Slot);
+
+  if (Stack1Tail.empty()) {
+    CommonPrefix.append(compressStack(Stack2Tail));
+    return CommonPrefix;
+  }
+
+  if (Stack2Tail.empty()) {
+    CommonPrefix.append(compressStack(Stack1Tail));
+    return CommonPrefix;
+  }
+
+  Stack Candidate;
+  for (auto *Slot : Stack1Tail)
+    if (!is_contained(Candidate, Slot))
+      Candidate.push_back(Slot);
+
+  for (auto *Slot : Stack2Tail)
+    if (!is_contained(Candidate, Slot))
+      Candidate.push_back(Slot);
+
+  {
+    auto RemIt = std::remove_if(
+        Candidate.begin(), Candidate.end(), [](const StackSlot *Slot) {
+          return isa<LiteralSlot>(Slot) || isa<SymbolSlot>(Slot) ||
+                 isa<FunctionCallReturnLabelSlot>(Slot);
+        });
+    Candidate.erase(RemIt, Candidate.end());
+  }
+
+  auto evaluate = [&](Stack const &Candidate) -> size_t {
+    size_t NumOps = 0;
+    Stack TestStack = Candidate;
+    auto Swap = [&](unsigned SwapDepth) {
+      ++NumOps;
+      if (SwapDepth > 16)
+        NumOps += 1000;
+    };
+
+    auto DupOrPush = [&](const StackSlot *Slot) {
+      if (Slot->isRematerializable())
+        return;
+
+      Stack Tmp = CommonPrefix;
+      Tmp.append(TestStack);
+      auto Depth = offset(reverse(Tmp), Slot);
+      if (Depth && *Depth >= 16)
+        NumOps += 1000;
+    };
+    createStackLayout(TestStack, Stack1Tail, Swap, DupOrPush, [&]() {});
+    TestStack = Candidate;
+    createStackLayout(TestStack, Stack2Tail, Swap, DupOrPush, [&]() {});
+    return NumOps;
+  };
+
+  // See https://en.wikipedia.org/wiki/Heap's_algorithm
+  size_t N = Candidate.size();
+  Stack BestCandidate = Candidate;
+  size_t BestCost = evaluate(Candidate);
+  SmallVector<size_t> C(N, 0);
+  size_t I = 1;
+  while (I < N) {
+    if (C[I] < I) {
+      if (I & 1)
+        std::swap(Candidate.front(), Candidate[I]);
+      else
+        std::swap(Candidate[C[I]], Candidate[I]);
+
+      size_t Cost = evaluate(Candidate);
+      if (Cost < BestCost) {
+        BestCost = Cost;
+        BestCandidate = Candidate;
+      }
+      ++C[I];
+      // Note that for a proper implementation of the Heap algorithm this would
+      // need to revert back to 'I = 1'. However, the incorrect implementation
+      // produces decent result and the proper version would have N! complexity
+      // and is thereby not feasible.
+      ++I;
+    } else {
+      C[I] = 0;
+      ++I;
+    }
+  }
+
+  CommonPrefix.append(BestCandidate);
+  return CommonPrefix;
+}
+
+Stack EVMStackLayoutGenerator::compressStack(Stack CurStack) {
+  std::optional<size_t> FirstDupOffset;
+  do {
+    if (FirstDupOffset) {
+      if (*FirstDupOffset != (CurStack.size() - 1))
+        std::swap(CurStack[*FirstDupOffset], CurStack.back());
+      CurStack.pop_back();
+      FirstDupOffset.reset();
+    }
+
+    auto I = CurStack.rbegin(), E = CurStack.rend();
+    for (size_t Depth = 0; I < E; ++I, ++Depth) {
+      StackSlot *Slot = *I;
+      if (Slot->isRematerializable()) {
+        FirstDupOffset = CurStack.size() - Depth - 1;
+        break;
+      }
+
+      if (auto DupDepth =
+              offset(drop_begin(reverse(CurStack), Depth + 1), Slot)) {
+        if (Depth + *DupDepth <= 16) {
+          FirstDupOffset = CurStack.size() - Depth - 1;
+          break;
+        }
+      }
+    }
+  } while (FirstDupOffset);
+  return CurStack;
+}
+
+/// Returns the number of operations required to transform stack \p Source to
+/// \p Target.
+size_t llvm::EvaluateStackTransform(Stack Source, Stack const &Target) {
+  size_t OpGas = 0;
+  auto Swap = [&](unsigned SwapDepth) {
+    if (SwapDepth > 16)
+      OpGas += 1000;
+    else
+      OpGas += 3; // SWAP* gas price;
+  };
+
+  auto DupOrPush = [&](const StackSlot *Slot) {
+    if (Slot->isRematerializable())
+      OpGas += 3;
+    else {
+      auto Depth = offset(reverse(Source), Slot);
+      if (!Depth)
+        llvm_unreachable("No slot in the stack");
+
+      if (*Depth < 16)
+        OpGas += 3; // DUP* gas price
+      else
+        OpGas += 1000;
+    }
+  };
+  auto Pop = [&]() { OpGas += 2; };
+
+  createStackLayout(Source, Target, Swap, DupOrPush, Pop);
+  return OpGas;
+}
+
+void EVMStackLayoutGenerator::addJunksToStackBottom(
+    const MachineBasicBlock *Entry, size_t NumJunk) {
+  for (const MachineBasicBlock *MBB : depth_first(Entry)) {
+    Stack EntryTmp(NumJunk, EVMStackModel::getJunkSlot());
+    EntryTmp.append(MBBEntryLayoutMap.at(MBB));
+    MBBEntryLayoutMap[MBB] = std::move(EntryTmp);
+
+    for (const Operation &Op : StackModel.getOperations(MBB)) {
+      Stack OpEntryTmp(NumJunk, EVMStackModel::getJunkSlot());
+      OpEntryTmp.append(OperationEntryLayoutMap.at(&Op));
+      OperationEntryLayoutMap[&Op] = std::move(OpEntryTmp);
+    }
+
+    Stack ExitTmp(NumJunk, EVMStackModel::getJunkSlot());
+    ExitTmp.append(MBBExitLayoutMap.at(MBB));
+    MBBExitLayoutMap[MBB] = std::move(ExitTmp);
+  }
+}
diff --git a/llvm/lib/Target/EVM/EVMStackLayoutGenerator.h b/llvm/lib/Target/EVM/EVMStackLayoutGenerator.h
new file mode 100644
index 000000000000..280d7c1e0fe3
--- /dev/null
+++ b/llvm/lib/Target/EVM/EVMStackLayoutGenerator.h
@@ -0,0 +1,139 @@
+//===---- EVMStackLayoutGenerator.h - Stack layout generator ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the stack layout generator which for each operation
+// finds complete stack layout that:
+//   - has the slots required for the operation at the stack top.
+//   - will have the operation result in a layout that makes it easy to achieve
+//     the next desired layout.
+// It also finds an entering/exiting stack layout for each block.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_EVM_EVMSTACKLAYOUTGENERATOR_H
+#define LLVM_LIB_TARGET_EVM_EVMSTACKLAYOUTGENERATOR_H
+
+#include "EVMMachineCFGInfo.h"
+#include "EVMStackModel.h"
+#include "llvm/ADT/DenseMap.h"
+
+#include <deque>
+
+namespace llvm {
+
+/// Returns the number of operations required to transform stack \p Source to
+/// \p Target.
+size_t EvaluateStackTransform(Stack Source, Stack const &Target);
+
+class EVMStackLayout {
+public:
+  EVMStackLayout(DenseMap<const MachineBasicBlock *, Stack> &MBBEntryLayout,
+                 DenseMap<const MachineBasicBlock *, Stack> &MBBExitLayout,
+                 DenseMap<const Operation *, Stack> &OpsEntryLayout)
+      : MBBEntryLayoutMap(MBBEntryLayout), MBBExitLayoutMap(MBBExitLayout),
+        OperationEntryLayoutMap(OpsEntryLayout) {}
+  EVMStackLayout(const EVMStackLayout &) = delete;
+  EVMStackLayout &operator=(const EVMStackLayout &) = delete;
+
+  const Stack &getMBBEntryLayout(const MachineBasicBlock *MBB) const {
+    return MBBEntryLayoutMap.at(MBB);
+  }
+
+  const Stack &getMBBExitLayout(const MachineBasicBlock *MBB) const {
+    return MBBExitLayoutMap.at(MBB);
+  }
+
+  const Stack &getOperationEntryLayout(const Operation *Op) const {
+    return OperationEntryLayoutMap.at(Op);
+  }
+
+private:
+  // Complete stack layout required at MBB entry.
+  DenseMap<const MachineBasicBlock *, Stack> MBBEntryLayoutMap;
+  // Complete stack layout required at MBB exit.
+  DenseMap<const MachineBasicBlock *, Stack> MBBExitLayoutMap;
+  // Complete stack layout that
+  // has the slots required for the operation at the stack top.
+  DenseMap<const Operation *, Stack> OperationEntryLayoutMap;
+};
+
+class EVMStackLayoutGenerator {
+public:
+  struct StackTooDeep {
+    /// Number of slots that need to be saved.
+    size_t deficit = 0;
+    /// Set of variables, eliminating which would decrease the stack deficit.
+    SmallVector<Register> variableChoices;
+  };
+
+  EVMStackLayoutGenerator(const MachineFunction &MF, const MachineLoopInfo *MLI,
+                          const EVMStackModel &StackModel,
+                          const EVMMachineCFGInfo &CFGInfo);
+
+  std::unique_ptr<EVMStackLayout> run();
+
+private:
+  /// Returns the optimal entry stack layout, s.t. \p Operation can be applied
+  /// to it and the result can be transformed to \p ExitStack with minimal stack
+  /// shuffling. Simultaneously stores the entry layout required for executing
+  /// the operation in the map.
+  Stack propagateStackThroughOperation(Stack ExitStack,
+                                       Operation const &Operation,
+                                       bool AggressiveStackCompression = false);
+
+  /// Returns the desired stack layout at the entry of \p Block, assuming the
+  /// layout after executing the block should be \p ExitStack.
+  Stack propagateStackThroughBlock(Stack ExitStack,
+                                   const MachineBasicBlock *Block,
+                                   bool AggressiveStackCompression = false);
+
+  /// Main algorithm walking the graph from entry to exit and propagating back
+  /// the stack layouts to the entries. Iteratively reruns itself along
+  /// backwards jumps until the layout is stabilized.
+  void runPropagation();
+
+  /// Adds junks to the subgraph starting at \p Entry. It should only be
+  /// called on cut-vertices, so the full subgraph retains proper stack balance.
+  void addJunksToStackBottom(const MachineBasicBlock *Entry, size_t NumJunk);
+
+  /// Returns the best known exit layout of \p Block, if all dependencies are
+  /// already \p Visited. If not, adds the dependencies to \p DependencyList and
+  /// returns std::nullopt.
+  std::optional<Stack> getExitLayoutOrStageDependencies(
+      const MachineBasicBlock *Block,
+      const DenseSet<const MachineBasicBlock *> &Visited,
+      std::deque<const MachineBasicBlock *> &DependencyList) const;
+
+  /// Calculates the ideal stack layout, s.t., both \p Stack1 and \p Stack2 can
+  /// be achieved with minimal stack shuffling when starting from the returned
+  /// layout.
+  static Stack combineStack(const Stack &Stack1, const Stack &Stack2);
+
+  /// Walks through the CFG and reports any stack too deep errors that would
+  /// occur when generating code for it without countermeasures.
+  SmallVector<StackTooDeep>
+  reportStackTooDeep(const MachineBasicBlock &Entry) const;
+
+  /// Returns a copy of \p Stack stripped of all duplicates and slots that can
+  /// be freely generated. Attempts to create a layout that requires a minimal
+  /// amount of operations to reconstruct the original stack \p Stack.
+  static Stack compressStack(Stack Stack);
+
+  const MachineFunction &MF;
+  const MachineLoopInfo *MLI;
+  const EVMStackModel &StackModel;
+  const EVMMachineCFGInfo &CFGInfo;
+
+  DenseMap<const MachineBasicBlock *, Stack> MBBEntryLayoutMap;
+  DenseMap<const MachineBasicBlock *, Stack> MBBExitLayoutMap;
+  DenseMap<const Operation *, Stack> OperationEntryLayoutMap;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_EVM_EVMSTACKLAYOUTGENERATOR_H
diff --git a/llvm/lib/Target/EVM/EVMStackModel.cpp b/llvm/lib/Target/EVM/EVMStackModel.cpp
new file mode 100644
index 000000000000..94459074ca7b
--- /dev/null
+++ b/llvm/lib/Target/EVM/EVMStackModel.cpp
@@ -0,0 +1,239 @@
+//===----- EVMEVMStackModel.cpp - EVM Stack Model ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "EVMStackModel.h"
+#include "EVM.h"
+#include "EVMMachineFunctionInfo.h"
+#include "EVMSubtarget.h"
+#include "MCTargetDesc/EVMMCTargetDesc.h"
+#include "llvm/CodeGen/MachineFunction.h"
+
+#include <ostream>
+
+using namespace llvm;
+
+static const Function *getCalledFunction(const MachineInstr &MI) {
+  for (const MachineOperand &MO : MI.operands()) {
+    if (!MO.isGlobal())
+      continue;
+    if (const auto *Func = dyn_cast<Function>(MO.getGlobal()))
+      return Func;
+  }
+  return nullptr;
+}
+static std::string getInstName(const MachineInstr *MI) {
+  const MachineFunction *MF = MI->getParent()->getParent();
+  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+  return TII->getName(MI->getOpcode()).str();
+}
+
+std::string SymbolSlot::toString() const {
+  return getInstName(MI) + ":" + std::string(Symbol->getName());
+}
+std::string FunctionCallReturnLabelSlot::toString() const {
+  return "RET[" + std::string(getCalledFunction(*Call)->getName()) + "]";
+}
+std::string TemporarySlot::toString() const {
+  SmallString<128> S;
+  raw_svector_ostream OS(S);
+  OS << "TMP[" << getInstName(MI) << ", " << std::to_string(Index) + "]";
+  return std::string(S.str());
+}
+std::string Operation::toString() const {
+  if (isFunctionCall()) {
+    const MachineOperand *Callee = MI->explicit_uses().begin();
+    return Callee->getGlobal()->getName().str();
+  }
+  if (isBuiltinCall())
+    return getInstName(MI);
+
+  assert(isAssignment());
+  SmallString<128> S;
+  raw_svector_ostream OS(S);
+  OS << "Assignment(";
+  for (const auto *S : Output)
+    OS << printReg(cast<VariableSlot>(S)->getReg(), nullptr, 0, nullptr)
+       << ", ";
+  OS << ")";
+  return std::string(S);
+}
+
+EVMStackModel::EVMStackModel(MachineFunction &MF, const LiveIntervals &LIS)
+    : MF(MF), LIS(LIS) {
+  for (MachineBasicBlock &MBB : MF) {
+    SmallVector<Operation> Ops;
+    for (MachineInstr &MI : MBB)
+      createOperation(MI, Ops);
+    OperationsMap[&MBB] = std::move(Ops);
+  }
+}
+
+Stack EVMStackModel::getFunctionParameters() const {
+  auto *MFI = MF.getInfo<EVMMachineFunctionInfo>();
+  SmallVector<StackSlot *> Parameters(MFI->getNumParams(),
+                                      EVMStackModel::getJunkSlot());
+  for (const MachineInstr &MI : MF.front()) {
+    if (MI.getOpcode() == EVM::ARGUMENT) {
+      int64_t ArgIdx = MI.getOperand(1).getImm();
+      Parameters[ArgIdx] = getVariableSlot(MI.getOperand(0).getReg());
+    }
+  }
+  return Parameters;
+}
+
+StackSlot *EVMStackModel::getStackSlot(const MachineOperand &MO) const {
+  // If the virtual register defines a constant and this is the only
+  // definition, emit the literal slot as MI's input.
+  const LiveInterval *LI = &LIS.getInterval(MO.getReg());
+  if (LI->containsOneValue()) {
+    SlotIndex Idx = LIS.getInstructionIndex(*MO.getParent());
+    const VNInfo *VNI = LI->Query(Idx).valueIn();
+    assert(VNI && "Use of non-existing value");
+    assert(!VNI->isPHIDef());
+    const MachineInstr *DefMI = LIS.getInstructionFromIndex(VNI->def);
+    assert(DefMI && "Dead valno in interval");
+    if (DefMI->getOpcode() == EVM::CONST_I256) {
+      const APInt Imm = DefMI->getOperand(1).getCImm()->getValue();
+      return getLiteralSlot(std::move(Imm));
+    }
+  }
+  return getVariableSlot(MO.getReg());
+}
+
+Stack EVMStackModel::getInstrInput(const MachineInstr &MI) const {
+  Stack In;
+  for (const auto &MO : reverse(MI.explicit_uses())) {
+    // All the non-register operands are handled in instruction specific
+    // handlers.
+    if (!MO.isReg())
+      continue;
+
+    // SP is not used anyhow.
+    if (MO.getReg() == EVM::SP)
+      continue;
+
+    In.push_back(getStackSlot(MO));
+  }
+  return In;
+}
+
+Stack EVMStackModel::getInstrOutput(const MachineInstr &MI) const {
+  Stack Out;
+  for (unsigned I = 0, E = MI.getNumExplicitDefs(); I < E; ++I)
+    Out.push_back(getTemporarySlot(&MI, I));
+  return Out;
+}
+
+void EVMStackModel::createOperation(MachineInstr &MI,
+                                    SmallVector<Operation> &Ops) const {
+  unsigned Opc = MI.getOpcode();
+  switch (Opc) {
+  case EVM::STACK_LOAD:
+  case EVM::STACK_STORE:
+    llvm_unreachable("Unexpected stack memory instruction");
+    return;
+  case EVM::ARGUMENT:
+    // Is handled above.
+    return;
+  case EVM::FCALL: {
+    Stack Input;
+    for (const MachineOperand &MO : MI.operands()) {
+      if (MO.isGlobal()) {
+        const auto *Func = cast<Function>(MO.getGlobal());
+        if (!Func->hasFnAttribute(Attribute::NoReturn))
+          Input.push_back(getFunctionCallReturnLabelSlot(&MI));
+        break;
+      }
+    }
+    const Stack &Tmp = getInstrInput(MI);
+    Input.insert(Input.end(), Tmp.begin(), Tmp.end());
+    Ops.emplace_back(Operation::FunctionCall, std::move(Input),
+                     getInstrOutput(MI), &MI);
+  } break;
+  case EVM::RET:
+  case EVM::JUMP:
+  case EVM::JUMPI:
+    // These instructions are handled separately.
+    return;
+  case EVM::COPY_I256:
+  case EVM::DATASIZE:
+  case EVM::DATAOFFSET:
+  case EVM::LINKERSYMBOL:
+  case EVM::LOADIMMUTABLE:
+    // The copy/data instructions just represent an assignment. This case is
+    // handled below.
+    break;
+  case EVM::CONST_I256: {
+    const LiveInterval *LI = &LIS.getInterval(MI.getOperand(0).getReg());
+    // If the virtual register has the only definition, ignore this instruction,
+    // as we create literal slots from the immediate value at the register uses.
+    if (LI->containsOneValue())
+      return;
+  } break;
+  default: {
+    Ops.emplace_back(Operation::BuiltinCall, getInstrInput(MI),
+                     getInstrOutput(MI), &MI);
+  } break;
+  }
+
+  // Create CFG::Assignment object for the MI.
+  Stack Input, Output;
+  switch (MI.getOpcode()) {
+  case EVM::CONST_I256: {
+    const Register DefReg = MI.getOperand(0).getReg();
+    const APInt Imm = MI.getOperand(1).getCImm()->getValue();
+    Input.push_back(getLiteralSlot(std::move(Imm)));
+    Output.push_back(getVariableSlot(DefReg));
+  } break;
+  case EVM::DATASIZE:
+  case EVM::DATAOFFSET:
+  case EVM::LINKERSYMBOL:
+  case EVM::LOADIMMUTABLE: {
+    const Register DefReg = MI.getOperand(0).getReg();
+    MCSymbol *Sym = MI.getOperand(1).getMCSymbol();
+    Input.push_back(getSymbolSlot(Sym, &MI));
+    Output.push_back(getVariableSlot(DefReg));
+  } break;
+  case EVM::COPY_I256: {
+    // Copy instruction corresponds to the assignment operator, so
+    // we do not need to create intermediate TmpSlots.
+    Input = getInstrInput(MI);
+    const Register DefReg = MI.getOperand(0).getReg();
+    Output.push_back(getVariableSlot(DefReg));
+  } break;
+  default: {
+    unsigned ArgsNumber = 0;
+    for (const auto &MO : MI.defs()) {
+      assert(MO.isReg());
+      const Register Reg = MO.getReg();
+      Input.push_back(getTemporarySlot(&MI, ArgsNumber++));
+      Output.push_back(getVariableSlot(Reg));
+    }
+  } break;
+  }
+  // We don't need an assignment part of the instructions that do not write
+  // results.
+  if (!Input.empty() || !Output.empty())
+    Ops.emplace_back(Operation::Assignment, std::move(Input), std::move(Output),
+                     &MI);
+}
+
+Stack EVMStackModel::getReturnArguments(const MachineInstr &MI) const {
+  assert(MI.getOpcode() == EVM::RET);
+  Stack Input = getInstrInput(MI);
+  // We need to reverse input operands to restore original ordering,
+  // in the instruction.
+  // Calling convention: return values are passed in stack such that the
+  // last one specified in the RET instruction is passed on the stack TOP.
+  std::reverse(Input.begin(), Input.end());
+  Input.push_back(getFunctionReturnLabelSlot(&MF));
+  return Input;
+}
diff --git a/llvm/lib/Target/EVM/EVMStackModel.h b/llvm/lib/Target/EVM/EVMStackModel.h
new file mode 100644
index 000000000000..dce7765bcaf2
--- /dev/null
+++ b/llvm/lib/Target/EVM/EVMStackModel.h
@@ -0,0 +1,315 @@
+//===------------- EVMStackModel.h - Stack Model ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a representation used by the backwards propagation
+// stackification algorithm. It consists of 'Operation', 'StackSlot' and 'Stack'
+// entities. New stack representation is derived from Machine IR as following:
+// MachineInstr   -> Operation
+// MachineOperand -> StackSlot
+// MI's defs/uses -> Stack (array of StackSlot)
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_EVM_EVMSTACKMODEL_H
+#define LLVM_LIB_TARGET_EVM_EVMSTACKMODEL_H
+
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/Register.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/MC/MCSymbol.h"
+
+#include <variant>
+
+namespace llvm {
+
+class MachineFunction;
+class MachineBasicBlock;
+
+class StackSlot {
+public:
+  enum SlotKind {
+    SK_Literal,
+    SK_Variable,
+    SK_Symbol,
+    SK_FunctionCallReturnLabel,
+    SK_FunctionReturnLabel,
+    SK_Temporary,
+    SK_Junk,
+  };
+
+private:
+  const SlotKind KindID;
+
+protected:
+  StackSlot(SlotKind KindID) : KindID(KindID) {}
+
+public:
+  virtual ~StackSlot() = default;
+
+  unsigned getSlotKind() const { return KindID; }
+
+  // 'isRematerializable()' returns true, if a slot always has a known value
+  // at compile time and therefore can safely be removed from the stack at any
+  // time and then regenerated later.
+  virtual bool isRematerializable() const = 0;
+  virtual std::string toString() const = 0;
+};
+
+/// A slot containing a literal value.
+class LiteralSlot final : public StackSlot {
+  APInt Value;
+
+public:
+  LiteralSlot(const APInt &V) : StackSlot(SK_Literal), Value(V) {}
+  const APInt &getValue() const { return Value; }
+
+  bool isRematerializable() const override { return true; }
+  std::string toString() const override {
+    SmallString<64> S;
+    Value.toStringSigned(S);
+    return std::string(S.str());
+  }
+  static bool classof(const StackSlot *S) {
+    return S->getSlotKind() == SK_Literal;
+  }
+};
+
+/// A slot containing the current value of a particular variable.
+class VariableSlot final : public StackSlot {
+  Register VirtualReg;
+
+public:
+  VariableSlot(const Register &R) : StackSlot(SK_Variable), VirtualReg(R) {}
+  const Register &getReg() const { return VirtualReg; }
+
+  bool isRematerializable() const override { return false; }
+  std::string toString() const override {
+    SmallString<64> S;
+    raw_svector_ostream OS(S);
+    OS << printReg(VirtualReg, nullptr, 0, nullptr);
+    return std::string(S.str());
+  }
+  static bool classof(const StackSlot *S) {
+    return S->getSlotKind() == SK_Variable;
+  }
+};
+
+/// A slot containing a MCSymbol.
+class SymbolSlot final : public StackSlot {
+  MCSymbol *Symbol;
+  const MachineInstr *MI = nullptr;
+
+public:
+  SymbolSlot(MCSymbol *S, const MachineInstr *MI)
+      : StackSlot(SK_Symbol), Symbol(S), MI(MI) {}
+  const MachineInstr *getMachineInstr() const { return MI; }
+  MCSymbol *getSymbol() const { return Symbol; }
+
+  bool isRematerializable() const override { return true; }
+  std::string toString() const override;
+
+  static bool classof(const StackSlot *S) {
+    return S->getSlotKind() == SK_Symbol;
+  }
+};
+
+/// The label pushed as return label before a function call, i.e. the label the
+/// call is supposed to return to.
+class FunctionCallReturnLabelSlot final : public StackSlot {
+  const MachineInstr *Call = nullptr;
+
+public:
+  FunctionCallReturnLabelSlot(const MachineInstr *Call)
+      : StackSlot(SK_FunctionCallReturnLabel), Call(Call) {}
+  const MachineInstr *getCall() const { return Call; }
+
+  bool isRematerializable() const override { return true; }
+  std::string toString() const override;
+
+  static bool classof(const StackSlot *S) {
+    return S->getSlotKind() == SK_FunctionCallReturnLabel;
+  }
+};
+
+/// The return jump target of a function while generating the code of the
+/// function body. I.e. the caller of a function pushes a
+/// 'FunctionCallReturnLabelSlot' (see above) before jumping to the function
+/// and this very slot is viewed as 'FunctionReturnLabelSlot' inside the
+/// function body and jumped to when returning from the function.
+class FunctionReturnLabelSlot final : public StackSlot {
+  const MachineFunction *MF = nullptr;
+
+public:
+  FunctionReturnLabelSlot(const MachineFunction *MF)
+      : StackSlot(SK_FunctionReturnLabel), MF(MF) {}
+  const MachineFunction *getMachineFunction() { return MF; }
+
+  bool isRematerializable() const override { return false; }
+  std::string toString() const override { return "RET"; }
+
+  static bool classof(const StackSlot *S) {
+    return S->getSlotKind() == SK_FunctionReturnLabel;
+  }
+};
+
+/// A slot containing the index-th return value of a previous call.
+class TemporarySlot final : public StackSlot {
+  /// The call that returned this slot.
+  const MachineInstr *MI = nullptr;
+
+  /// Specifies to which of the values returned by the call this slot refers.
+  /// index == 0 refers to the slot deepest in the stack after the call.
+  size_t Index = 0;
+
+public:
+  TemporarySlot(const MachineInstr *MI, size_t Idx)
+      : StackSlot(SK_Temporary), MI(MI), Index(Idx) {}
+
+  bool isRematerializable() const override { return false; }
+  std::string toString() const override;
+
+  static bool classof(const StackSlot *S) {
+    return S->getSlotKind() == SK_Temporary;
+  }
+};
+
+/// A slot containing an arbitrary value that is always eventually popped and
+/// never used. Used to maintain stack balance on control flow joins.
+class JunkSlot final : public StackSlot {
+public:
+  JunkSlot() : StackSlot(SK_Junk) {}
+
+  bool isRematerializable() const override { return true; }
+  std::string toString() const override { return "JUNK"; }
+
+  static bool classof(const StackSlot *S) {
+    return S->getSlotKind() == SK_Junk;
+  }
+};
+
+/// The stack top is the last element of the vector.
+using Stack = SmallVector<StackSlot *>;
+
+class Operation {
+public:
+  enum OpType { BuiltinCall, FunctionCall, Assignment };
+
+private:
+  OpType Type;
+  // Stack slots this operation expects at the top of the stack and consumes.
+  Stack Input;
+  // Stack slots this operation leaves on the stack as output.
+  Stack Output;
+  // The emulated machine instruction.
+  MachineInstr *MI = nullptr;
+
+public:
+  Operation(OpType Type, Stack Input, Stack Output, MachineInstr *MI)
+      : Type(Type), Input(std::move(Input)), Output(std::move(Output)), MI(MI) {
+  }
+
+  const Stack &getInput() const { return Input; }
+  const Stack &getOutput() const { return Output; }
+  MachineInstr *getMachineInstr() const { return MI; }
+
+  bool isBuiltinCall() const { return Type == BuiltinCall; }
+  bool isFunctionCall() const { return Type == FunctionCall; }
+  bool isAssignment() const { return Type == Assignment; }
+
+  std::string toString() const;
+};
+
+class EVMStackModel {
+  MachineFunction &MF;
+  const LiveIntervals &LIS;
+  DenseMap<const MachineBasicBlock *, SmallVector<Operation>> OperationsMap;
+
+  // Storage for stack slots.
+  mutable DenseMap<APInt, std::unique_ptr<LiteralSlot>> LiteralStorage;
+  mutable DenseMap<Register, std::unique_ptr<VariableSlot>> VariableStorage;
+  mutable DenseMap<std::pair<MCSymbol *, const MachineInstr *>,
+                   std::unique_ptr<SymbolSlot>>
+      SymbolStorage;
+  mutable DenseMap<const MachineInstr *,
+                   std::unique_ptr<FunctionCallReturnLabelSlot>>
+      FunctionCallReturnLabelStorage;
+  mutable DenseMap<std::pair<const MachineInstr *, size_t>,
+                   std::unique_ptr<TemporarySlot>>
+      TemporaryStorage;
+
+  // There should be a single FunctionReturnLabelSlot for the MF.
+  mutable std::unique_ptr<FunctionReturnLabelSlot> TheFunctionReturnLabelSlot;
+
+public:
+  EVMStackModel(MachineFunction &MF, const LiveIntervals &LIS);
+  Stack getFunctionParameters() const;
+  Stack getInstrInput(const MachineInstr &MI) const;
+  Stack getInstrOutput(const MachineInstr &MI) const;
+  Stack getReturnArguments(const MachineInstr &MI) const;
+  const SmallVector<Operation> &
+  getOperations(const MachineBasicBlock *MBB) const {
+    return OperationsMap.at(MBB);
+  }
+
+  // Get or create a requested stack slot.
+  StackSlot *getStackSlot(const MachineOperand &MO) const;
+  LiteralSlot *getLiteralSlot(const APInt &V) const {
+    if (LiteralStorage.count(V) == 0)
+      LiteralStorage[V] = std::make_unique<LiteralSlot>(V);
+    return LiteralStorage[V].get();
+  }
+  VariableSlot *getVariableSlot(const Register &R) const {
+    if (VariableStorage.count(R) == 0)
+      VariableStorage[R] = std::make_unique<VariableSlot>(R);
+    return VariableStorage[R].get();
+  }
+  SymbolSlot *getSymbolSlot(MCSymbol *S, const MachineInstr *MI) const {
+    auto Key = std::make_pair(S, MI);
+    if (SymbolStorage.count(Key) == 0)
+      SymbolStorage[Key] = std::make_unique<SymbolSlot>(S, MI);
+    return SymbolStorage[Key].get();
+  }
+  FunctionCallReturnLabelSlot *
+  getFunctionCallReturnLabelSlot(const MachineInstr *Call) const {
+    if (FunctionCallReturnLabelStorage.count(Call) == 0)
+      FunctionCallReturnLabelStorage[Call] =
+          std::make_unique<FunctionCallReturnLabelSlot>(Call);
+    return FunctionCallReturnLabelStorage[Call].get();
+  }
+  FunctionReturnLabelSlot *
+  getFunctionReturnLabelSlot(const MachineFunction *MF) const {
+    if (!TheFunctionReturnLabelSlot)
+      TheFunctionReturnLabelSlot =
+          std::make_unique<FunctionReturnLabelSlot>(MF);
+    assert(MF == TheFunctionReturnLabelSlot->getMachineFunction());
+    return TheFunctionReturnLabelSlot.get();
+  }
+  TemporarySlot *getTemporarySlot(const MachineInstr *MI, size_t Idx) const {
+    auto Key = std::make_pair(MI, Idx);
+    if (TemporaryStorage.count(Key) == 0)
+      TemporaryStorage[Key] = std::make_unique<TemporarySlot>(MI, Idx);
+    return TemporaryStorage[Key].get();
+  }
+  // Junk is always the same slot.
+  static JunkSlot *getJunkSlot() {
+    static JunkSlot TheJunkSlot;
+    return &TheJunkSlot;
+  }
+
+private:
+  void createOperation(MachineInstr &MI, SmallVector<Operation> &Ops) const;
+};
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_EVM_EVMSTACKMODEL_H
diff --git a/llvm/lib/Target/EVM/EVMStackShuffler.h b/llvm/lib/Target/EVM/EVMStackShuffler.h
new file mode 100644
index 000000000000..e95a9554b0de
--- /dev/null
+++ b/llvm/lib/Target/EVM/EVMStackShuffler.h
@@ -0,0 +1,540 @@
+//===-- EVMStackShuffler.h - Implementation of stack shuffling ---*- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares template algorithms to find optimal (cheapest) transition
+// between two stack layouts using three shuffling primitives: `swap`, `dup`,
+// and `pop`.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_EVM_EVMSTACKSHUFFLER_H
+#define LLVM_LIB_TARGET_EVM_EVMSTACKSHUFFLER_H
+
+#include "EVMStackModel.h"
+#include <cassert>
+#include <map>
+#include <set>
+#include <variant>
+
+namespace llvm {
+
+// Abstraction of stack shuffling operations. Can be defined as actual concept
+// once we switch to C++20. Used as an interface for the stack shuffler below.
+// The shuffle operation class is expected to internally keep track of a current
+// stack layout (the "source layout") that the shuffler is supposed to shuffle
+// to a fixed target stack layout. The shuffler works iteratively. At each
+// iteration it instantiates an instance of the shuffle operations and queries
+// it for various information about the current source stack layout and the
+// target layout, as described in the interface below. Based on that information
+// the shuffler decides which is the next optimal operation to perform on the
+// stack and calls the corresponding entry point in the shuffling operations
+// (swap, pushOrDupTarget or pop).
+/*
+template<typename ShuffleOperations>
+concept ShuffleOperationConcept =
+  requires(ShuffleOperations ops, size_t sourceOffset,
+  size_t targetOffset, size_t depth) {
+
+  // Returns true, iff the current slot at sourceOffset in source layout
+  // is a suitable slot at targetOffset.
+  { ops.isCompatible(sourceOffset, targetOffset) }
+      -> std::convertible_to<bool>;
+
+  // Returns true, iff the slots at the two given source offsets are identical.
+  { ops.sourceIsSame(sourceOffset, sourceOffset) } ->
+      std::convertible_to<bool>;
+
+  // Returns a positive integer n, if the slot at the given source offset
+  // needs n more copies. Returns a negative integer -n, if the slot at the
+  // given source offsets occurs n times too many. Returns zero if the amount
+  // of occurrences, in the current source layout, of the slot at the given
+  // source offset matches the desired amount of occurrences in the target.
+  { ops.sourceMultiplicity(sourceOffset) } -> std::convertible_to<int>;
+
+  // Returns a positive integer n, if the slot at the given target offset
+  // needs n more copies. Returns a negative integer -n, if the slot at the
+  // given target offsets occurs n times too many. Returns zero if the amount
+  // of occurrences, in the current source layout, of the slot at the given
+  // target offset matches the desired amount of occurrences in the target.
+  { ops.targetMultiplicity(targetOffset) } -> std::convertible_to<int>;
+
+  // Returns true, iff any slot is compatible with the given target offset.
+  { ops.targetIsArbitrary(targetOffset) } -> std::convertible_to<bool>;
+
+  // Returns the number of slots in the source layout.
+  { ops.sourceSize() } -> std::convertible_to<size_t>;
+
+  // Returns the number of slots in the target layout.
+  { ops.targetSize() } -> std::convertible_to<size_t>;
+
+  // Swaps the top most slot in the source with the slot `depth` slots below
+  // the top. In terms of EVM opcodes this is supposed to be a `SWAP<depth>`.
+  // In terms of vectors this is supposed to be
+  //`std::swap(source.at(source.size() - depth - 1, source.top))`.
+  { ops.swap(depth) };
+
+  // Pops the top most slot in the source, i.e. the slot at offset
+  // ops.sourceSize() - 1. In terms of EVM opcodes this is `POP`.
+  // In terms of vectors this is `source.pop();`.
+  { ops.pop() };
+
+  // Dups or pushes the slot that is supposed to end up at the given
+  // target offset.
+  { ops.pushOrDupTarget(targetOffset) };
+};
+*/
+
+/// Helper class that can perform shuffling of a source stack layout to a target
+/// stack layout via abstracted shuffle operations.
+template </*ShuffleOperationConcept*/ typename ShuffleOperations>
+class Shuffler {
+public:
+  /// Executes the stack shuffling operations. Instantiates an instance of
+  /// ShuffleOperations in each iteration. Each iteration performs exactly one
+  /// operation that modifies the stack. After `shuffle`, source and target have
+  /// the same size and all slots in the source layout are compatible with the
+  /// slots at the same target offset.
+  template <typename... Args> static void shuffle(Args &&...args) {
+    bool NeedsMoreShuffling = true;
+    // The shuffling algorithm should always terminate in polynomial time, but
+    // we provide a limit in case it does not terminate due to a bug.
+    size_t IterationCount = 0;
+    while (IterationCount < 1000 &&
+           (NeedsMoreShuffling = shuffleStep(std::forward<Args>(args)...)))
+      ++IterationCount;
+
+    if (NeedsMoreShuffling)
+      llvm_unreachable("Could not create stack layout after 1000 iterations.");
+  }
+
+private:
+  // If dupping an ideal slot causes a slot that will still be required to
+  // become unreachable, then dup the latter slot first.
+  // Returns true, if it performed a dup.
+  static bool dupDeepSlotIfRequired(ShuffleOperations &Ops) {
+    // Check if the stack is large enough for anything to potentially become
+    // unreachable.
+    if (Ops.sourceSize() < 15)
+      return false;
+    // Check whether any deep slot might still be needed later (i.e. we still
+    // need to reach it with a DUP or SWAP).
+    for (size_t SourceOffset = 0; SourceOffset < (Ops.sourceSize() - 15);
+         ++SourceOffset) {
+      // This slot needs to be moved.
+      if (!Ops.isCompatible(SourceOffset, SourceOffset)) {
+        // If the current top fixes the slot, swap it down now.
+        if (Ops.isCompatible(Ops.sourceSize() - 1, SourceOffset)) {
+          Ops.swap(Ops.sourceSize() - SourceOffset - 1);
+          return true;
+        }
+        // Bring up a slot to fix this now, if possible.
+        if (bringUpTargetSlot(Ops, SourceOffset))
+          return true;
+        // Otherwise swap up the slot that will fix the offending slot.
+        for (auto offset = SourceOffset + 1; offset < Ops.sourceSize();
+             ++offset)
+          if (Ops.isCompatible(offset, SourceOffset)) {
+            Ops.swap(Ops.sourceSize() - offset - 1);
+            return true;
+          }
+        // Otherwise give up - we will need stack compression or stack limit
+        // evasion.
+      }
+      // We need another copy of this slot.
+      else if (Ops.sourceMultiplicity(SourceOffset) > 0) {
+        // If this slot occurs again later, we skip this occurrence.
+        // TODO: use C++ 20 ranges::views::iota
+        if (const auto &R =
+                llvm::seq<size_t>(SourceOffset + 1, Ops.sourceSize());
+            std::any_of(R.begin(), R.end(), [&](size_t Offset) {
+              return Ops.sourceIsSame(SourceOffset, Offset);
+            }))
+          continue;
+
+        // Bring up the target slot that would otherwise become unreachable.
+        for (size_t TargetOffset = 0; TargetOffset < Ops.targetSize();
+             ++TargetOffset)
+          if (!Ops.targetIsArbitrary(TargetOffset) &&
+              Ops.isCompatible(SourceOffset, TargetOffset)) {
+            Ops.pushOrDupTarget(TargetOffset);
+            return true;
+          }
+      }
+    }
+    return false;
+  }
+
+  /// Finds a slot to dup or push with the aim of eventually fixing \p
+  /// TargetOffset in the target. In the simplest case, the slot at \p
+  /// TargetOffset has a multiplicity > 0, i.e. it can directly be dupped or
+  /// pushed and the next iteration will fix \p TargetOffset. But, in general,
+  /// there may already be enough copies of the slot that is supposed to end up
+  /// at \p TargetOffset on stack, s.t. it cannot be dupped again. In that case
+  /// there has to be a copy of the desired slot on stack already elsewhere that
+  /// is not yet in place (`nextOffset` below). The fact that ``nextOffset`` is
+  /// not in place means that we can (recursively) try bringing up the slot that
+  /// is supposed to end up at ``nextOffset`` in the *target*. When the target
+  /// slot at ``nextOffset`` is fixed, the current source slot at ``nextOffset``
+  /// will be at the stack top, which is the slot required at \p TargetOffset.
+  static bool bringUpTargetSlot(ShuffleOperations &Ops, size_t TargetOffset) {
+    std::list<size_t> ToVisit{TargetOffset};
+    std::set<size_t> Visited;
+
+    while (!ToVisit.empty()) {
+      size_t Offset = *ToVisit.begin();
+      ToVisit.erase(ToVisit.begin());
+      Visited.emplace(Offset);
+      if (Ops.targetMultiplicity(Offset) > 0) {
+        Ops.pushOrDupTarget(Offset);
+        return true;
+      }
+      // There must be another slot we can dup/push that will lead to the target
+      // slot at ``offset`` to be fixed.
+      for (size_t NextOffset = 0;
+           NextOffset < std::min(Ops.sourceSize(), Ops.targetSize());
+           ++NextOffset)
+        if (!Ops.isCompatible(NextOffset, NextOffset) &&
+            Ops.isCompatible(NextOffset, Offset))
+          if (!Visited.count(NextOffset))
+            ToVisit.emplace_back(NextOffset);
+    }
+    return false;
+  }
+
+  /// Performs a single stack operation, transforming the source layout closer
+  /// to the target layout.
+  template <typename... Args> static bool shuffleStep(Args &&...args) {
+    ShuffleOperations Ops{std::forward<Args>(args)...};
+
+    // All source slots are final.
+    if (const auto &R = llvm::seq<size_t>(0u, Ops.sourceSize());
+        std::all_of(R.begin(), R.end(), [&](size_t Index) {
+          return Ops.isCompatible(Index, Index);
+        })) {
+      // Bring up all remaining target slots, if any, or terminate otherwise.
+      if (Ops.sourceSize() < Ops.targetSize()) {
+        if (!dupDeepSlotIfRequired(Ops)) {
+          [[maybe_unused]] bool Res = bringUpTargetSlot(Ops, Ops.sourceSize());
+          assert(Res);
+        }
+        return true;
+      }
+      return false;
+    }
+
+    size_t SourceTop = Ops.sourceSize() - 1;
+    // If we no longer need the current stack top, we pop it, unless we need an
+    // arbitrary slot at this position in the target.
+    if (Ops.sourceMultiplicity(SourceTop) < 0 &&
+        !Ops.targetIsArbitrary(SourceTop)) {
+      Ops.pop();
+      return true;
+    }
+
+    assert(Ops.targetSize() > 0);
+
+    // If the top is not supposed to be exactly what is on top right now, try to
+    // find a lower position to swap it to.
+    if (!Ops.isCompatible(SourceTop, SourceTop) ||
+        Ops.targetIsArbitrary(SourceTop))
+      for (size_t Offset = 0;
+           Offset < std::min(Ops.sourceSize(), Ops.targetSize()); ++Offset)
+        // It makes sense to swap to a lower position, if
+        if (!Ops.isCompatible(
+                Offset, Offset) && // The lower slot is not already in position.
+            !Ops.sourceIsSame(
+                Offset, SourceTop) && // We would not just swap identical slots.
+            Ops.isCompatible(
+                SourceTop,
+                Offset)) { // The lower position wants to have this slot.
+          // We cannot swap that deep.
+          if (Ops.sourceSize() - Offset - 1 > 16) {
+            // If there is a reachable slot to be removed, park the current top
+            // there.
+            for (size_t SwapDepth = 16; SwapDepth > 0; --SwapDepth)
+              if (Ops.sourceMultiplicity(Ops.sourceSize() - 1 - SwapDepth) <
+                  0) {
+                Ops.swap(SwapDepth);
+                if (Ops.targetIsArbitrary(SourceTop))
+                  // Usually we keep a slot that is to-be-removed, if the
+                  // current top is arbitrary. However, since we are in a
+                  // stack-too-deep situation, pop it immediately to compress
+                  // the stack (we can always push back junk in the end).
+                  Ops.pop();
+                return true;
+              }
+            // TODO: otherwise we rely on stack compression or stack-to-memory.
+          }
+          Ops.swap(Ops.sourceSize() - Offset - 1);
+          return true;
+        }
+
+    // Ops.sourceSize() > Ops.targetSize() cannot be true anymore, since if the
+    // source top is no longer required, we already popped it, and if it is
+    // required, we already swapped it down to a suitable target position.
+    assert(Ops.sourceSize() <= Ops.targetSize());
+
+    // If a lower slot should be removed, try to bring up the slot that should
+    // end up there and bring it up. Note that after the cases above, there will
+    // always be a target slot to duplicate in this case.
+    for (size_t Offset = 0; Offset < Ops.sourceSize(); ++Offset)
+      if (!Ops.isCompatible(
+              Offset, Offset) && // The lower slot is not already in position.
+          Ops.sourceMultiplicity(Offset) <
+              0 && // We have too many copies of this slot.
+          Offset <=
+              Ops.targetSize() && // There is a target slot at this position.
+          !Ops.targetIsArbitrary(
+              Offset)) { // And that target slot is not arbitrary.
+        if (!dupDeepSlotIfRequired(Ops)) {
+          [[maybe_unused]] bool Res = bringUpTargetSlot(Ops, Offset);
+          assert(Res);
+        }
+        return true;
+      }
+
+    // At this point we want to keep all slots.
+    for (size_t i = 0; i < Ops.sourceSize(); ++i)
+      assert(Ops.sourceMultiplicity(i) >= 0);
+    assert(Ops.sourceSize() <= Ops.targetSize());
+
+    // If the top is not in position, try to find a slot that wants to be at the
+    // top and swap it up.
+    if (!Ops.isCompatible(SourceTop, SourceTop))
+      for (size_t sourceOffset = 0; sourceOffset < Ops.sourceSize();
+           ++sourceOffset)
+        if (!Ops.isCompatible(sourceOffset, sourceOffset) &&
+            Ops.isCompatible(sourceOffset, SourceTop)) {
+          Ops.swap(Ops.sourceSize() - sourceOffset - 1);
+          return true;
+        }
+
+    // If we still need more slots, produce a suitable one.
+    if (Ops.sourceSize() < Ops.targetSize()) {
+      if (!dupDeepSlotIfRequired(Ops)) {
+        [[maybe_unused]] bool Res = bringUpTargetSlot(Ops, Ops.sourceSize());
+        assert(Res);
+      }
+      return true;
+    }
+
+    // The stack has the correct size, each slot has the correct number of
+    // copies and the top is in position.
+    assert(Ops.sourceSize() == Ops.targetSize());
+    size_t Size = Ops.sourceSize();
+    for (size_t I = 0; I < Ops.sourceSize(); ++I)
+      assert(Ops.sourceMultiplicity(I) == 0 &&
+             (Ops.targetIsArbitrary(I) || Ops.targetMultiplicity(I) == 0));
+    assert(Ops.isCompatible(SourceTop, SourceTop));
+
+    const auto &SwappableOffsets =
+        llvm::seq<size_t>(Size > 17 ? Size - 17 : 0u, Size);
+
+    // If we find a lower slot that is out of position, but also compatible with
+    // the top, swap that up.
+    for (size_t Offset : SwappableOffsets)
+      if (!Ops.isCompatible(Offset, Offset) &&
+          Ops.isCompatible(SourceTop, Offset)) {
+        Ops.swap(Size - Offset - 1);
+        return true;
+      }
+
+    // Swap up any reachable slot that is still out of position.
+    for (size_t Offset : SwappableOffsets)
+      if (!Ops.isCompatible(Offset, Offset) &&
+          !Ops.sourceIsSame(Offset, SourceTop)) {
+        Ops.swap(Size - Offset - 1);
+        return true;
+      }
+
+    // We are in a stack-too-deep situation and try to reduce the stack size.
+    // If the current top is merely kept since the target slot is arbitrary, pop
+    // it.
+    if (Ops.targetIsArbitrary(SourceTop) &&
+        Ops.sourceMultiplicity(SourceTop) <= 0) {
+      Ops.pop();
+      return true;
+    }
+
+    // If any reachable slot is merely kept, since the target slot is arbitrary,
+    // swap it up and pop it.
+    for (size_t Offset : SwappableOffsets)
+      if (Ops.targetIsArbitrary(Offset) &&
+          Ops.sourceMultiplicity(Offset) <= 0) {
+        Ops.swap(Size - Offset - 1);
+        Ops.pop();
+        return true;
+      }
+
+    // We cannot avoid a stack-too-deep error. Repeat the above without
+    // restricting to reachable slots.
+    for (size_t Offset = 0; Offset < Size; ++Offset)
+      if (!Ops.isCompatible(Offset, Offset) &&
+          Ops.isCompatible(SourceTop, Offset)) {
+        Ops.swap(Size - Offset - 1);
+        return true;
+      }
+
+    for (size_t Offset = 0; Offset < Size; ++Offset)
+      if (!Ops.isCompatible(Offset, Offset) &&
+          !Ops.sourceIsSame(Offset, SourceTop)) {
+        Ops.swap(Size - Offset - 1);
+        return true;
+      }
+
+    llvm_unreachable("Unexpected state");
+  }
+};
+
+/// A simple optimized map for mapping StackSlot to ints.
+class Multiplicity {
+public:
+  int &operator[](const StackSlot *Slot) {
+    if (auto *p = dyn_cast<FunctionCallReturnLabelSlot>(Slot))
+      return FunctionCallReturnLabelSlotMultiplicity[p];
+    if (isa<FunctionReturnLabelSlot>(Slot))
+      return FunctionReturnLabelSlotMultiplicity;
+    if (auto *p = dyn_cast<VariableSlot>(Slot))
+      return VariableSlotMultiplicity[p];
+    if (auto *p = dyn_cast<LiteralSlot>(Slot))
+      return LiteralSlotMultiplicity[p];
+    if (auto *p = dyn_cast<SymbolSlot>(Slot))
+      return SymbolSlotMultiplicity[p];
+    if (auto *p = dyn_cast<TemporarySlot>(Slot))
+      return TemporarySlotMultiplicity[p];
+
+    assert(isa<JunkSlot>(Slot));
+    return JunkSlotMultiplicity;
+  }
+
+  int at(const StackSlot *Slot) const {
+    if (auto *p = dyn_cast<FunctionCallReturnLabelSlot>(Slot))
+      return FunctionCallReturnLabelSlotMultiplicity.at(p);
+    if (isa<FunctionReturnLabelSlot>(Slot))
+      return FunctionReturnLabelSlotMultiplicity;
+    if (auto *p = dyn_cast<VariableSlot>(Slot))
+      return VariableSlotMultiplicity.at(p);
+    if (auto *p = dyn_cast<LiteralSlot>(Slot))
+      return LiteralSlotMultiplicity.at(p);
+    if (auto *p = dyn_cast<SymbolSlot>(Slot))
+      return SymbolSlotMultiplicity.at(p);
+    if (auto *p = dyn_cast<TemporarySlot>(Slot))
+      return TemporarySlotMultiplicity.at(p);
+
+    assert(isa<JunkSlot>(Slot));
+    return JunkSlotMultiplicity;
+  }
+
+private:
+  std::map<const FunctionCallReturnLabelSlot *, int>
+      FunctionCallReturnLabelSlotMultiplicity;
+  int FunctionReturnLabelSlotMultiplicity = 0;
+  std::map<const VariableSlot *, int> VariableSlotMultiplicity;
+  std::map<const LiteralSlot *, int> LiteralSlotMultiplicity;
+  std::map<const SymbolSlot *, int> SymbolSlotMultiplicity;
+  std::map<const TemporarySlot *, int> TemporarySlotMultiplicity;
+  int JunkSlotMultiplicity = 0;
+};
+
+/// Transforms \p CurrentStack to \p TargetStack, invoking the provided
+/// shuffling operations. Modifies `CurrentStack` itself after each invocation
+/// of the shuffling operations.
+/// \p Swap is a function with signature void(unsigned) that is called when the
+/// top most slot is swapped with the slot `depth` slots below the top. In terms
+/// of EVM opcodes this is supposed to be a `SWAP<depth>`.
+/// \p PushOrDup is a function with signature void(StackSlot const&) that is
+/// called to push or dup the slot given as its argument to the stack top.
+/// \p Pop is a function with signature void() that is called when the top most
+/// slot is popped.
+template <typename SwapT, typename PushOrDupT, typename PopT>
+void createStackLayout(Stack &CurrentStack, Stack const &TargetStack,
+                       SwapT Swap, PushOrDupT PushOrDup, PopT Pop) {
+  struct ShuffleOperations {
+    Stack &currentStack;
+    Stack const &targetStack;
+    SwapT swapCallback;
+    PushOrDupT pushOrDupCallback;
+    PopT popCallback;
+    Multiplicity multiplicity;
+
+    ShuffleOperations(Stack &CurrentStack, Stack const &TargetStack, SwapT Swap,
+                      PushOrDupT PushOrDup, PopT Pop)
+        : currentStack(CurrentStack), targetStack(TargetStack),
+          swapCallback(Swap), pushOrDupCallback(PushOrDup), popCallback(Pop) {
+      for (auto const &slot : currentStack)
+        --multiplicity[slot];
+
+      for (unsigned Offset = 0; Offset < targetStack.size(); ++Offset) {
+        auto *Slot = targetStack[Offset];
+        if (isa<JunkSlot>(Slot) && Offset < currentStack.size())
+          ++multiplicity[currentStack[Offset]];
+        else
+          ++multiplicity[Slot];
+      }
+    }
+
+    bool isCompatible(size_t Source, size_t Target) {
+      return Source < currentStack.size() && Target < targetStack.size() &&
+             (isa<JunkSlot>(targetStack[Target]) ||
+              currentStack[Source] == targetStack[Target]);
+    }
+
+    bool sourceIsSame(size_t Lhs, size_t Rhs) {
+      return currentStack[Lhs] == currentStack[Rhs];
+    }
+
+    int sourceMultiplicity(size_t Offset) {
+      return multiplicity.at(currentStack[Offset]);
+    }
+
+    int targetMultiplicity(size_t Offset) {
+      return multiplicity.at(targetStack[Offset]);
+    }
+
+    bool targetIsArbitrary(size_t Offset) {
+      return Offset < targetStack.size() && isa<JunkSlot>(targetStack[Offset]);
+    }
+
+    void swap(size_t I) {
+      swapCallback(static_cast<unsigned>(I));
+      std::swap(currentStack[currentStack.size() - I - 1], currentStack.back());
+    }
+
+    size_t sourceSize() { return currentStack.size(); }
+
+    size_t targetSize() { return targetStack.size(); }
+
+    void pop() {
+      popCallback();
+      currentStack.pop_back();
+    }
+
+    void pushOrDupTarget(size_t Offset) {
+      auto *targetSlot = targetStack[Offset];
+      pushOrDupCallback(targetSlot);
+      currentStack.push_back(targetSlot);
+    }
+  };
+
+  Shuffler<ShuffleOperations>::shuffle(CurrentStack, TargetStack, Swap,
+                                       PushOrDup, Pop);
+
+  assert(CurrentStack.size() == TargetStack.size());
+  for (unsigned I = 0; I < CurrentStack.size(); ++I) {
+    StackSlot *&Current = CurrentStack[I];
+    auto *Target = TargetStack[I];
+    if (isa<JunkSlot>(Target))
+      Current = EVMStackModel::getJunkSlot();
+    else
+      assert(Current == Target);
+  }
+}
+
+} // end namespace llvm
+#endif // LLVM_LIB_TARGET_EVM_EVMSTACKSHUFFLER_H
diff --git a/llvm/lib/Target/EVM/EVMStackify.cpp b/llvm/lib/Target/EVM/EVMStackify.cpp
index 3797c5fe7f98..d5ff664dc712 100644
--- a/llvm/lib/Target/EVM/EVMStackify.cpp
+++ b/llvm/lib/Target/EVM/EVMStackify.cpp
@@ -720,21 +720,21 @@ void StackModel::handleArgument(MachineInstr *MI) {
 
 void StackModel::handleLStackAtJump(MachineBasicBlock *MBB, MachineInstr *MI,
                                     const Register &Reg) {
+  assert(MI->getOpcode() == EVM::JUMP || MI->getOpcode() == EVM::JUMPI);
+
   // If the condition register is in the L-stack, we need to move it to
   // the bottom of the L-stack. After that we should clean clean the L-stack.
   // In case of an unconditional jump, the Reg value should be
   // EVM::NoRegister.
   clearPhysStackAtInst(StackType::L, MI, Reg);
 
-  // Insert "PUSH_LABEL %bb" instruction that should be be replaced with
-  // the actual PUSH* one in the MC layer to contain actual jump target
-  // offset.
-  BuildMI(*MI->getParent(), MI, DebugLoc(), TII->get(EVM::PUSH_LABEL))
+  // Insert pseudo jump instruciton that will be replaced with PUSH and JUMP
+  // instructions in AsmPrinter.
+  ToErase.push_back(MI);
+  unsigned PseudoJumpOpc =
+      MI->getOpcode() == EVM::JUMP ? EVM::PseudoJUMP : EVM::PseudoJUMPI;
+  BuildMI(*MI->getParent(), MI, DebugLoc(), TII->get(PseudoJumpOpc))
       .addMBB(MBB);
-
-  // Add JUMPDEST at the beginning of the target MBB.
-  if (MBB->empty() || MBB->begin()->getOpcode() != EVM::JUMPDEST)
-    BuildMI(*MBB, MBB->begin(), DebugLoc(), TII->get(EVM::JUMPDEST));
 }
 
 void StackModel::handleCondJump(MachineInstr *MI) {
@@ -752,7 +752,7 @@ void StackModel::handleJump(MachineInstr *MI) {
 void StackModel::handleReturn(MachineInstr *MI) {
   ToErase.push_back(MI);
   BuildMI(*MI->getParent(), std::next(MIIter(MI)), DebugLoc(),
-          TII->get(EVM::JUMP));
+          TII->get(EVM::PseudoRET));
 
   // Collect the use registers of the RET instruction.
   SmallVector<Register> ReturnRegs;
@@ -864,25 +864,21 @@ void StackModel::handleCall(MachineInstr *MI) {
   // Callee removes them form the stack and pushes return values.
 
   MachineBasicBlock &MBB = *MI->getParent();
-  // Create return destination.
-  MIIter It = BuildMI(MBB, MI, MI->getDebugLoc(), TII->get(EVM::JUMPDEST));
 
   // Add symbol just after the jump that will be used as the return
   // address from the function.
   MCSymbol *RetSym = MF->getContext().createTempSymbol("FUNC_RET", true);
 
-  // Create jump to the callee.
-  It = BuildMI(MBB, It, MI->getDebugLoc(), TII->get(EVM::JUMP));
-  It->setPostInstrSymbol(*MF, RetSym);
+  // Create pseudo jump to the callee, that will be expanded into PUSH, JUMP
+  // return label and JUMPDEST instructions in the AsmPrinter.
+  const MachineOperand *CalleeOp = MI->explicit_uses().begin();
+  assert(CalleeOp->isGlobal());
+  MIIter It = BuildMI(MBB, MI, MI->getDebugLoc(), TII->get(EVM::PseudoCALL))
+                  .addGlobalAddress(CalleeOp->getGlobal())
+                  .addSym(RetSym);
 
   // Create push of the return address.
   BuildMI(MBB, It, MI->getDebugLoc(), TII->get(EVM::PUSH_LABEL)).addSym(RetSym);
-
-  // Create push of the callee's address.
-  const MachineOperand *CalleeOp = MI->explicit_uses().begin();
-  assert(CalleeOp->isGlobal());
-  BuildMI(MBB, It, MI->getDebugLoc(), TII->get(EVM::PUSH_LABEL))
-      .addGlobalAddress(CalleeOp->getGlobal());
 }
 
 void StackModel::clearFrameObjsAtInst(MachineInstr *MI) {
@@ -982,10 +978,6 @@ void StackModel::preProcess() {
   assert(!MF->empty());
   allocateFrameObjects();
   allocateXStack();
-  // Add JUMPDEST at the beginning of the first MBB,
-  // so this function can be jumped to.
-  MachineBasicBlock &MBB = MF->front();
-  BuildMI(MBB, MBB.begin(), DebugLoc(), TII->get(EVM::JUMPDEST));
 }
 
 // Remove all registers operands of the \p MI and repaces the opcode with
@@ -995,7 +987,9 @@ void StackModel::stackifyInstruction(MachineInstr *MI) {
     return;
 
   unsigned RegOpcode = MI->getOpcode();
-  if (RegOpcode == EVM::PUSH_LABEL)
+  if (RegOpcode == EVM::PUSH_LABEL || RegOpcode == EVM::PseudoJUMP ||
+      RegOpcode == EVM::PseudoJUMPI || RegOpcode == EVM::PseudoCALL ||
+      RegOpcode == EVM::PseudoRET)
     return;
 
   // Remove register operands.
@@ -1048,31 +1042,6 @@ void StackModel::postProcess() {
   // In a stackified code register liveness has no meaning.
   MachineRegisterInfo &MRI = MF->getRegInfo();
   MRI.invalidateLiveness();
-
-  // In EVM architecture jump target is set up using one of PUSH* instructions
-  // that come right before the jump instruction.
-  // For example:
-
-  //   PUSH_LABEL %bb.10
-  //   JUMPI_S
-  //   PUSH_LABEL %bb.9
-  //   JUMP_S
-  //
-  // The problem here is that such MIR is not valid. There should not be
-  // non-terminator (PUSH) instructions between terminator (JUMP) ones.
-  // To overcome this issue, we bundle adjacent <PUSH_LABEL, JUMP> instructions
-  // together and unbundle them in the AsmPrinter.
-  for (MachineBasicBlock &MBB : *MF) {
-    MachineBasicBlock::instr_iterator I = MBB.instr_begin(),
-                                      E = MBB.instr_end();
-    for (; I != E; ++I) {
-      if (I->isBranch()) {
-        auto P = std::next(I);
-        if (P != E && P->getOpcode() == EVM::PUSH_LABEL)
-          I->bundleWithPred();
-      }
-    }
-  }
 }
 
 void StackModel::dumpState() const {
diff --git a/llvm/lib/Target/EVM/EVMStackifyCodeEmitter.cpp b/llvm/lib/Target/EVM/EVMStackifyCodeEmitter.cpp
new file mode 100644
index 000000000000..189a689dc3f5
--- /dev/null
+++ b/llvm/lib/Target/EVM/EVMStackifyCodeEmitter.cpp
@@ -0,0 +1,516 @@
+//===--- EVMStackifyCodeEmitter.h - Create stackified MIR -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file transforms MIR to the 'stackified' MIR.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EVMStackifyCodeEmitter.h"
+#include "EVMMachineFunctionInfo.h"
+#include "EVMStackDebug.h"
+#include "EVMStackShuffler.h"
+#include "TargetInfo/EVMTargetInfo.h"
+#include "llvm/MC/MCContext.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "evm-stackify-code-emitter"
+
+// Return whether the function of the call instruction will return.
+bool callWillReturn(const MachineInstr *Call) {
+  assert(Call->getOpcode() == EVM::FCALL && "Unexpected call instruction");
+  const MachineOperand *FuncOp = Call->explicit_uses().begin();
+  assert(FuncOp->isGlobal() && "Expected a global value");
+  const auto *Func = cast<Function>(FuncOp->getGlobal());
+  assert(Func && "Expected a function");
+  return !Func->hasFnAttribute(Attribute::NoReturn);
+}
+
+// Return the number of input arguments of the call instruction.
+static size_t getCallArgCount(const MachineInstr *Call) {
+  assert(Call->getOpcode() == EVM::FCALL && "Unexpected call instruction");
+  assert(Call->explicit_uses().begin()->isGlobal() &&
+         "First operand must be a function");
+  size_t NumExplicitInputs =
+      Call->getNumExplicitOperands() - Call->getNumExplicitDefs();
+
+  // The first operand is a function, so don't count it. If function
+  // will return, we need to account for the return label.
+  constexpr size_t NumFuncOp = 1;
+  return NumExplicitInputs - NumFuncOp + callWillReturn(Call);
+}
+
+size_t EVMStackifyCodeEmitter::CodeEmitter::stackHeight() const {
+  return StackHeight;
+}
+
+void EVMStackifyCodeEmitter::CodeEmitter::enterMBB(MachineBasicBlock *MBB,
+                                                   int Height) {
+  StackHeight = Height;
+  CurMBB = MBB;
+  LLVM_DEBUG(dbgs() << "\n"
+                    << "Set stack height: " << StackHeight << "\n");
+  LLVM_DEBUG(dbgs() << "Setting current location to: " << MBB->getNumber()
+                    << "." << MBB->getName() << "\n");
+}
+
+void EVMStackifyCodeEmitter::CodeEmitter::emitInst(const MachineInstr *MI) {
+  unsigned Opc = MI->getOpcode();
+  assert(Opc != EVM::JUMP && Opc != EVM::JUMPI && Opc != EVM::ARGUMENT &&
+         Opc != EVM::RET && Opc != EVM::CONST_I256 && Opc != EVM::COPY_I256 &&
+         Opc != EVM::FCALL && "Unexpected instruction");
+
+  size_t NumInputs = MI->getNumExplicitOperands() - MI->getNumExplicitDefs();
+  assert(StackHeight >= NumInputs && "Not enough operands on the stack");
+  StackHeight -= NumInputs;
+  StackHeight += MI->getNumExplicitDefs();
+
+  auto NewMI = BuildMI(*CurMBB, CurMBB->end(), MI->getDebugLoc(),
+                       TII->get(EVM::getStackOpcode(Opc)));
+  verify(NewMI);
+}
+
+void EVMStackifyCodeEmitter::CodeEmitter::emitSWAP(unsigned Depth) {
+  unsigned Opc = EVM::getSWAPOpcode(Depth);
+  auto NewMI = BuildMI(*CurMBB, CurMBB->end(), DebugLoc(),
+                       TII->get(EVM::getStackOpcode(Opc)));
+  verify(NewMI);
+}
+
+void EVMStackifyCodeEmitter::CodeEmitter::emitDUP(unsigned Depth) {
+  StackHeight += 1;
+  unsigned Opc = EVM::getDUPOpcode(Depth);
+  auto NewMI = BuildMI(*CurMBB, CurMBB->end(), DebugLoc(),
+                       TII->get(EVM::getStackOpcode(Opc)));
+  verify(NewMI);
+}
+
+void EVMStackifyCodeEmitter::CodeEmitter::emitPOP() {
+  assert(StackHeight > 0 && "Expected at least one operand on the stack");
+  StackHeight -= 1;
+  auto NewMI =
+      BuildMI(*CurMBB, CurMBB->end(), DebugLoc(), TII->get(EVM::POP_S));
+  verify(NewMI);
+}
+
+void EVMStackifyCodeEmitter::CodeEmitter::emitConstant(const APInt &Val) {
+  StackHeight += 1;
+  unsigned Opc = EVM::getPUSHOpcode(Val);
+  auto NewMI = BuildMI(*CurMBB, CurMBB->end(), DebugLoc(),
+                       TII->get(EVM::getStackOpcode(Opc)));
+  if (Opc != EVM::PUSH0)
+    NewMI.addCImm(ConstantInt::get(MF.getFunction().getContext(), Val));
+  verify(NewMI);
+}
+
+void EVMStackifyCodeEmitter::CodeEmitter::emitConstant(uint64_t Val) {
+  emitConstant(APInt(256, Val));
+}
+
+void EVMStackifyCodeEmitter::CodeEmitter::emitSymbol(const MachineInstr *MI,
+                                                     MCSymbol *Symbol) {
+  unsigned Opc = MI->getOpcode();
+  assert(Opc == EVM::DATASIZE || Opc == EVM::DATAOFFSET ||
+         Opc == EVM::LINKERSYMBOL ||
+         Opc == EVM::LOADIMMUTABLE && "Unexpected symbol instruction");
+  StackHeight += 1;
+  // This is codegen-only instruction, that will be converted into PUSH4.
+  auto NewMI = BuildMI(*CurMBB, CurMBB->end(), MI->getDebugLoc(),
+                       TII->get(EVM::getStackOpcode(Opc)))
+                   .addSym(Symbol);
+  verify(NewMI);
+}
+
+void EVMStackifyCodeEmitter::CodeEmitter::emitLabelReference(
+    const MachineInstr *Call) {
+  assert(Call->getOpcode() == EVM::FCALL && "Unexpected call instruction");
+  StackHeight += 1;
+  auto [It, Inserted] = CallReturnSyms.try_emplace(Call);
+  if (Inserted)
+    It->second = MF.getContext().createTempSymbol("FUNC_RET", true);
+  auto NewMI =
+      BuildMI(*CurMBB, CurMBB->end(), DebugLoc(), TII->get(EVM::PUSH_LABEL))
+          .addSym(It->second);
+  verify(NewMI);
+}
+
+void EVMStackifyCodeEmitter::CodeEmitter::emitFuncCall(const MachineInstr *MI) {
+  assert(MI->getOpcode() == EVM::FCALL && "Unexpected call instruction");
+  assert(CurMBB == MI->getParent());
+
+  size_t NumInputs = getCallArgCount(MI);
+  assert(StackHeight >= NumInputs && "Not enough operands on the stack");
+  StackHeight -= NumInputs;
+
+  // PUSH_LABEL increases the stack height on 1, but we don't increase it
+  // explicitly here, as the label will be consumed by the following JUMP.
+  StackHeight += MI->getNumExplicitDefs();
+
+  // Create pseudo jump to the function, that will be expanded into PUSH and
+  // JUMP instructions in the AsmPrinter.
+  auto NewMI = BuildMI(*CurMBB, CurMBB->end(), MI->getDebugLoc(),
+                       TII->get(EVM::PseudoCALL))
+                   .addGlobalAddress(MI->explicit_uses().begin()->getGlobal());
+
+  // If this function returns, add a return label so we can emit it together
+  // with JUMPDEST. This is taken care in the AsmPrinter.
+  if (callWillReturn(MI))
+    NewMI.addSym(CallReturnSyms.at(MI));
+  verify(NewMI);
+}
+
+void EVMStackifyCodeEmitter::CodeEmitter::emitRet(const MachineInstr *MI) {
+  assert(MI->getOpcode() == EVM::RET && "Unexpected ret instruction");
+  auto NewMI = BuildMI(*CurMBB, CurMBB->end(), MI->getDebugLoc(),
+                       TII->get(EVM::PseudoRET));
+  verify(NewMI);
+}
+
+void EVMStackifyCodeEmitter::CodeEmitter::emitUncondJump(
+    const MachineInstr *MI, MachineBasicBlock *Target) {
+  assert(MI->getOpcode() == EVM::JUMP &&
+         "Unexpected unconditional jump instruction");
+  auto NewMI = BuildMI(*CurMBB, CurMBB->end(), MI->getDebugLoc(),
+                       TII->get(EVM::PseudoJUMP))
+                   .addMBB(Target);
+  verify(NewMI);
+}
+
+void EVMStackifyCodeEmitter::CodeEmitter::emitCondJump(
+    const MachineInstr *MI, MachineBasicBlock *Target) {
+  assert(MI->getOpcode() == EVM::JUMPI &&
+         "Unexpected conditional jump instruction");
+  assert(StackHeight > 0 && "Expected at least one operand on the stack");
+  StackHeight -= 1;
+  auto NewMI = BuildMI(*CurMBB, CurMBB->end(), MI->getDebugLoc(),
+                       TII->get(EVM::PseudoJUMPI))
+                   .addMBB(Target);
+  verify(NewMI);
+}
+
+// Verify that a stackified instruction doesn't have registers and dump it.
+void EVMStackifyCodeEmitter::CodeEmitter::verify(const MachineInstr *MI) const {
+  assert(EVMInstrInfo::isStack(MI) &&
+         "Only stackified instructions are allowed");
+  assert(all_of(MI->operands(),
+                [](const MachineOperand &MO) { return !MO.isReg(); }) &&
+         "Registers are not allowed in stackified instructions");
+
+  LLVM_DEBUG(dbgs() << "Adding: " << *MI << "stack height: " << StackHeight
+                    << "\n");
+}
+void EVMStackifyCodeEmitter::CodeEmitter::finalize() {
+  for (MachineBasicBlock &MBB : MF)
+    for (MachineInstr &MI : make_early_inc_range(MBB))
+      // Remove all the instructions that are not stackified.
+      // TODO: #749: Fix debug info for stackified instructions and don't
+      // remove debug instructions.
+      if (!EVMInstrInfo::isStack(&MI))
+        MI.eraseFromParent();
+
+  auto *MFI = MF.getInfo<EVMMachineFunctionInfo>();
+  MFI->setIsStackified();
+
+  // In a stackified code register liveness has no meaning.
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  MRI.invalidateLiveness();
+}
+
+void EVMStackifyCodeEmitter::adjustStackForInst(const MachineInstr *MI,
+                                                size_t NumArgs) {
+  // Remove arguments from CurrentStack.
+  CurrentStack.erase(CurrentStack.end() - NumArgs, CurrentStack.end());
+
+  // Push return values to CurrentStack.
+  unsigned Idx = 0;
+  for (const auto &MO : MI->defs()) {
+    assert(MO.isReg());
+    CurrentStack.push_back(StackModel.getTemporarySlot(MI, Idx++));
+  }
+  assert(Emitter.stackHeight() == CurrentStack.size());
+}
+
+void EVMStackifyCodeEmitter::processCall(const Operation &Call) {
+  assert(Call.isFunctionCall());
+  auto *MI = Call.getMachineInstr();
+  size_t NumArgs = getCallArgCount(MI);
+  // Validate stack.
+  assert(Emitter.stackHeight() == CurrentStack.size());
+  assert(CurrentStack.size() >= NumArgs);
+
+  // Assert that we got the correct return label on stack.
+  if (callWillReturn(MI)) {
+    [[maybe_unused]] const auto *ReturnLabelSlot =
+        dyn_cast<FunctionCallReturnLabelSlot>(
+            CurrentStack[CurrentStack.size() - NumArgs]);
+    assert(ReturnLabelSlot && ReturnLabelSlot->getCall() == MI);
+  }
+
+  // Emit call.
+  Emitter.emitFuncCall(MI);
+  adjustStackForInst(MI, NumArgs);
+}
+
+void EVMStackifyCodeEmitter::processInst(const Operation &Call) {
+  assert(Call.isBuiltinCall());
+  auto *MI = Call.getMachineInstr();
+  size_t NumArgs = MI->getNumExplicitOperands() - MI->getNumExplicitDefs();
+  // Validate stack.
+  assert(Emitter.stackHeight() == CurrentStack.size());
+  assert(CurrentStack.size() >= NumArgs);
+  // TODO: assert that we got a correct stack for the call.
+
+  // Emit instruction.
+  Emitter.emitInst(MI);
+  adjustStackForInst(MI, NumArgs);
+}
+
+void EVMStackifyCodeEmitter::processAssign(const Operation &Assignment) {
+  assert(Assignment.isAssignment());
+  assert(Emitter.stackHeight() == CurrentStack.size());
+
+  // Invalidate occurrences of the assigned variables.
+  for (auto *&CurrentSlot : CurrentStack)
+    if (const auto *VarSlot = dyn_cast<VariableSlot>(CurrentSlot))
+      if (is_contained(Assignment.getOutput(), VarSlot))
+        CurrentSlot = EVMStackModel::getJunkSlot();
+
+  // Assign variables to current stack top.
+  assert(CurrentStack.size() >= Assignment.getOutput().size());
+  llvm::copy(Assignment.getOutput(),
+             CurrentStack.end() - Assignment.getOutput().size());
+}
+
+bool EVMStackifyCodeEmitter::areLayoutsCompatible(const Stack &SourceStack,
+                                                  const Stack &TargetStack) {
+  return SourceStack.size() == TargetStack.size() &&
+         all_of(zip_equal(SourceStack, TargetStack), [](const auto &Pair) {
+           const auto [Src, Tgt] = Pair;
+           return isa<JunkSlot>(Tgt) || (Src == Tgt);
+         });
+}
+
+void EVMStackifyCodeEmitter::createStackLayout(const Stack &TargetStack) {
+  assert(Emitter.stackHeight() == CurrentStack.size());
+  // ::createStackLayout asserts that it has successfully achieved the target
+  // layout.
+  ::createStackLayout(
+      CurrentStack, TargetStack,
+      // Swap callback.
+      [&](unsigned I) {
+        assert(CurrentStack.size() == Emitter.stackHeight());
+        assert(I > 0 && I < CurrentStack.size());
+        if (I <= 16) {
+          Emitter.emitSWAP(I);
+        } else {
+          int Deficit = static_cast<int>(I) - 16;
+          const StackSlot *DeepSlot = CurrentStack[CurrentStack.size() - I - 1];
+          std::string Msg =
+              (Twine("cannot swap ") +
+               (isa<VariableSlot>(DeepSlot) ? "variable " : "slot ") +
+               DeepSlot->toString() + " with " +
+               (isa<VariableSlot>(CurrentStack.back()) ? "variable "
+                                                       : "slot ") +
+               CurrentStack.back()->toString() + ": too deep in the stack by " +
+               std::to_string(Deficit) + " slots in " +
+               stackToString(CurrentStack))
+                  .str();
+
+          report_fatal_error(MF.getName() + Twine(": ") + Msg);
+        }
+      },
+      // Push or dup callback.
+      [&](const StackSlot *Slot) {
+        assert(CurrentStack.size() == Emitter.stackHeight());
+
+        // Dup the slot, if already on stack and reachable.
+        auto SlotIt = llvm::find(llvm::reverse(CurrentStack), Slot);
+        if (SlotIt != CurrentStack.rend()) {
+          unsigned Depth = std::distance(CurrentStack.rbegin(), SlotIt);
+          if (Depth < 16) {
+            Emitter.emitDUP(static_cast<unsigned>(Depth + 1));
+            return;
+          }
+          if (!Slot->isRematerializable()) {
+            std::string Msg =
+                (isa<VariableSlot>(Slot) ? "variable " : "slot ") +
+                Slot->toString() + " is " + std::to_string(Depth - 15) +
+                " too deep in the stack " + stackToString(CurrentStack);
+
+            report_fatal_error(MF.getName() + ": " + Msg);
+            return;
+          }
+          // else: the slot is too deep in stack, but can be freely generated,
+          // we fall through to push it again.
+        }
+
+        // The slot can be freely generated or is an unassigned return variable.
+        // Push it.
+        if (const auto *L = dyn_cast<LiteralSlot>(Slot)) {
+          Emitter.emitConstant(L->getValue());
+        } else if (const auto *S = dyn_cast<SymbolSlot>(Slot)) {
+          Emitter.emitSymbol(S->getMachineInstr(), S->getSymbol());
+        } else if (const auto *CallRet =
+                       dyn_cast<FunctionCallReturnLabelSlot>(Slot)) {
+          Emitter.emitLabelReference(CallRet->getCall());
+        } else if (isa<FunctionReturnLabelSlot>(Slot)) {
+          llvm_unreachable("Cannot produce function return label");
+        } else if (isa<VariableSlot>(Slot)) {
+          llvm_unreachable("Variable not found on stack");
+        } else if (isa<TemporarySlot>(Slot)) {
+          llvm_unreachable("Function call result requested, but "
+                           "not found on stack.");
+        } else {
+          assert(isa<JunkSlot>(Slot));
+          // Note: this will always be popped, so we can push anything.
+          Emitter.emitConstant(0);
+        }
+      },
+      // Pop callback.
+      [&]() { Emitter.emitPOP(); });
+
+  assert(Emitter.stackHeight() == CurrentStack.size());
+}
+
+void EVMStackifyCodeEmitter::createOperationLayout(const Operation &Op) {
+  // Create required layout for entering the Operation.
+  // Check if we can choose cheaper stack shuffling if the Operation is an
+  // instruction with commutable arguments.
+  bool SwapCommutable = false;
+  if (Op.isBuiltinCall() && Op.getMachineInstr()->isCommutable()) {
+    // Get the stack layout before the instruction.
+    const Stack &DefaultTargetStack = Layout.getOperationEntryLayout(&Op);
+    size_t DefaultCost =
+        EvaluateStackTransform(CurrentStack, DefaultTargetStack);
+
+    // Commutable operands always take top two stack slots.
+    const unsigned OpIdx1 = 0, OpIdx2 = 1;
+    assert(DefaultTargetStack.size() > 1);
+
+    // Swap the commutable stack items and measure the stack shuffling cost
+    // again.
+    Stack CommutedTargetStack = DefaultTargetStack;
+    std::swap(CommutedTargetStack[CommutedTargetStack.size() - OpIdx1 - 1],
+              CommutedTargetStack[CommutedTargetStack.size() - OpIdx2 - 1]);
+    size_t CommutedCost =
+        EvaluateStackTransform(CurrentStack, CommutedTargetStack);
+    // Choose the cheapest transformation.
+    SwapCommutable = CommutedCost < DefaultCost;
+    createStackLayout(SwapCommutable ? CommutedTargetStack
+                                     : DefaultTargetStack);
+  } else {
+    createStackLayout(Layout.getOperationEntryLayout(&Op));
+  }
+
+  // Assert that we have the inputs of the Operation on stack top.
+  assert(CurrentStack.size() == Emitter.stackHeight());
+  assert(CurrentStack.size() >= Op.getInput().size());
+  Stack StackInput(CurrentStack.end() - Op.getInput().size(),
+                   CurrentStack.end());
+  // Adjust the StackInput if needed.
+  if (SwapCommutable) {
+    std::swap(StackInput[StackInput.size() - 1],
+              StackInput[StackInput.size() - 2]);
+  }
+  assert(areLayoutsCompatible(StackInput, Op.getInput()));
+}
+
+void EVMStackifyCodeEmitter::run() {
+  assert(CurrentStack.empty() && Emitter.stackHeight() == 0);
+
+  SmallPtrSet<MachineBasicBlock *, 32> Visited;
+  SmallVector<MachineBasicBlock *, 32> WorkList{&MF.front()};
+  while (!WorkList.empty()) {
+    auto *Block = WorkList.pop_back_val();
+    if (!Visited.insert(Block).second)
+      continue;
+
+    // Might set some slots to junk, if not required by the block.
+    CurrentStack = Layout.getMBBEntryLayout(Block);
+    Emitter.enterMBB(Block, CurrentStack.size());
+
+    for (const auto &Op : StackModel.getOperations(Block)) {
+      createOperationLayout(Op);
+
+      [[maybe_unused]] size_t BaseHeight =
+          CurrentStack.size() - Op.getInput().size();
+
+      // Perform the Operation.
+      if (Op.isFunctionCall())
+        processCall(Op);
+      else if (Op.isBuiltinCall())
+        processInst(Op);
+      else if (Op.isAssignment())
+        processAssign(Op);
+      else
+        llvm_unreachable("Unexpected operation type.");
+
+      // Assert that the Operation produced its proclaimed output.
+      assert(CurrentStack.size() == Emitter.stackHeight());
+      assert(CurrentStack.size() == BaseHeight + Op.getOutput().size());
+      assert(CurrentStack.size() >= Op.getOutput().size());
+      assert(areLayoutsCompatible(
+          Stack(CurrentStack.end() - Op.getOutput().size(), CurrentStack.end()),
+          Op.getOutput()));
+    }
+
+    // Exit the block.
+    const EVMMBBTerminatorsInfo *TermInfo = CFGInfo.getTerminatorsInfo(Block);
+    MBBExitType ExitType = TermInfo->getExitType();
+    if (ExitType == MBBExitType::UnconditionalBranch) {
+      auto [UncondBr, Target] = TermInfo->getUnconditionalBranch();
+      // Create the stack expected at the jump target.
+      createStackLayout(Layout.getMBBEntryLayout(Target));
+
+      // Assert that we have a valid stack for the target.
+      assert(
+          areLayoutsCompatible(CurrentStack, Layout.getMBBEntryLayout(Target)));
+
+      if (UncondBr)
+        Emitter.emitUncondJump(UncondBr, Target);
+      WorkList.emplace_back(Target);
+    } else if (ExitType == MBBExitType::ConditionalBranch) {
+      auto [CondBr, UncondBr, TrueBB, FalseBB, Condition] =
+          TermInfo->getConditionalBranch();
+      // Create the shared entry layout of the jump targets, which is
+      // stored as exit layout of the current block.
+      createStackLayout(Layout.getMBBExitLayout(Block));
+
+      // Assert that we have the correct condition on stack.
+      assert(!CurrentStack.empty());
+      assert(CurrentStack.back() == StackModel.getStackSlot(*Condition));
+
+      // Emit the conditional jump to the non-zero label and update the
+      // stored stack.
+      assert(CondBr);
+      Emitter.emitCondJump(CondBr, TrueBB);
+      CurrentStack.pop_back();
+
+      // Assert that we have a valid stack for both jump targets.
+      assert(
+          areLayoutsCompatible(CurrentStack, Layout.getMBBEntryLayout(TrueBB)));
+      assert(areLayoutsCompatible(CurrentStack,
+                                  Layout.getMBBEntryLayout(FalseBB)));
+
+      // Generate unconditional jump if needed.
+      if (UncondBr)
+        Emitter.emitUncondJump(UncondBr, FalseBB);
+      WorkList.emplace_back(TrueBB);
+      WorkList.emplace_back(FalseBB);
+    } else if (ExitType == MBBExitType::FunctionReturn) {
+      assert(!MF.getFunction().hasFnAttribute(Attribute::NoReturn));
+      // Create the function return layout and jump.
+      const MachineInstr *MI = TermInfo->getFunctionReturn();
+      assert(StackModel.getReturnArguments(*MI) ==
+             Layout.getMBBExitLayout(Block));
+      createStackLayout(Layout.getMBBExitLayout(Block));
+      Emitter.emitRet(MI);
+    }
+  }
+  Emitter.finalize();
+}
diff --git a/llvm/lib/Target/EVM/EVMStackifyCodeEmitter.h b/llvm/lib/Target/EVM/EVMStackifyCodeEmitter.h
new file mode 100644
index 000000000000..0b612e9dd5a7
--- /dev/null
+++ b/llvm/lib/Target/EVM/EVMStackifyCodeEmitter.h
@@ -0,0 +1,101 @@
+//===--- EVMStackifyCodeEmitter.h - Create stackified MIR  ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file transforms MIR to the 'stackified' MIR.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_EVM_EVMSTACKIFYCODEEMITTER_H
+#define LLVM_LIB_TARGET_EVM_EVMSTACKIFYCODEEMITTER_H
+
+#include "EVMStackLayoutGenerator.h"
+#include "EVMSubtarget.h"
+
+namespace llvm {
+
+class MachineInstr;
+class MCSymbol;
+
+class EVMStackifyCodeEmitter {
+public:
+  EVMStackifyCodeEmitter(const EVMStackLayout &Layout,
+                         const EVMStackModel &StackModel,
+                         const EVMMachineCFGInfo &CFGInfo, MachineFunction &MF)
+      : Emitter(MF), Layout(Layout), StackModel(StackModel), CFGInfo(CFGInfo),
+        MF(MF) {}
+
+  /// Stackify instructions, starting from the first MF's MBB.
+  void run();
+
+private:
+  class CodeEmitter {
+  public:
+    explicit CodeEmitter(MachineFunction &MF)
+        : MF(MF), TII(MF.getSubtarget<EVMSubtarget>().getInstrInfo()) {}
+    size_t stackHeight() const;
+    void enterMBB(MachineBasicBlock *MBB, int Height);
+    void emitInst(const MachineInstr *MI);
+    void emitSWAP(unsigned Depth);
+    void emitDUP(unsigned Depth);
+    void emitPOP();
+    void emitConstant(const APInt &Val);
+    void emitConstant(uint64_t Val);
+    void emitSymbol(const MachineInstr *MI, MCSymbol *Symbol);
+    void emitFuncCall(const MachineInstr *MI);
+    void emitRet(const MachineInstr *MI);
+    void emitCondJump(const MachineInstr *MI, MachineBasicBlock *Target);
+    void emitUncondJump(const MachineInstr *MI, MachineBasicBlock *Target);
+    void emitLabelReference(const MachineInstr *Call);
+    /// Remove all the instructions that are not stackified and set that all
+    /// instructions are stackified in a function from now on. Also, invalidate
+    /// the register liveness, as it has no meaning in a stackified code.
+    void finalize();
+
+  private:
+    MachineFunction &MF;
+    const EVMInstrInfo *TII;
+    size_t StackHeight = 0;
+    MachineBasicBlock *CurMBB{};
+    DenseMap<const MachineInstr *, MCSymbol *> CallReturnSyms;
+
+    void verify(const MachineInstr *MI) const;
+  };
+
+  CodeEmitter Emitter;
+  const EVMStackLayout &Layout;
+  const EVMStackModel &StackModel;
+  const EVMMachineCFGInfo &CFGInfo;
+  MachineFunction &MF;
+  Stack CurrentStack;
+
+  /// Checks if it's valid to transition from \p SourceStack to \p
+  /// TargetStack, that is \p SourceStack matches each slot in \p
+  /// TargetStack that is not a JunkSlot exactly.
+  bool areLayoutsCompatible(const Stack &SourceStack, const Stack &TargetStack);
+
+  /// Shuffles CurrentStack to the desired \p TargetStack.
+  void createStackLayout(const Stack &TargetStack);
+
+  /// Creates the Op.Input stack layout from the 'CurrentStack' taking into
+  /// account commutative property of the operation.
+  void createOperationLayout(const Operation &Op);
+
+  /// Remove the arguments from the stack and push the return values.
+  void adjustStackForInst(const MachineInstr *MI, size_t NumArgs);
+
+  /// Generate code for the function call \p Call.
+  void processCall(const Operation &Call);
+  /// Generate code for the builtin call \p Call.
+  void processInst(const Operation &Call);
+  /// Generate code for the assignment \p Assignment.
+  void processAssign(const Operation &Assignment);
+};
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_EVM_EVMSTACKIFYCODEEMITTER_H
diff --git a/llvm/lib/Target/EVM/EVMTargetMachine.cpp b/llvm/lib/Target/EVM/EVMTargetMachine.cpp
index 8e28d24346b6..e1991ddfb969 100644
--- a/llvm/lib/Target/EVM/EVMTargetMachine.cpp
+++ b/llvm/lib/Target/EVM/EVMTargetMachine.cpp
@@ -39,6 +39,10 @@ cl::opt<bool>
                       cl::desc("EVM: output stack registers in"
                                " instruction output for test purposes only."),
                       cl::init(false));
+cl::opt<bool>
+    EVMUseLocalStakify("evm-use-local-stackify", cl::Hidden,
+                       cl::desc("EVM: use the local stackification algorithm"),
+                       cl::init(false));
 
 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeEVMTarget() {
   // Register the target.
@@ -51,7 +55,9 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeEVMTarget() {
   initializeEVMOptimizeLiveIntervalsPass(PR);
   initializeEVMRegColoringPass(PR);
   initializeEVMSingleUseExpressionPass(PR);
+  initializeEVMSplitCriticalEdgesPass(PR);
   initializeEVMStackifyPass(PR);
+  initializeEVMBPStackificationPass(PR);
 }
 
 static std::string computeDataLayout() {
@@ -75,7 +81,6 @@ EVMTargetMachine::EVMTargetMachine(const Target &T, const Triple &TT,
                         getEffectiveCodeModel(CM, CodeModel::Small), OL),
       TLOF(std::make_unique<EVMELFTargetObjectFile>()),
       Subtarget(TT, std::string(CPU), std::string(FS), *this) {
-  setRequiresStructuredCFG(true);
   initAsmInfo();
 }
 
@@ -200,11 +205,17 @@ void EVMPassConfig::addPreEmitPass() {
 
   // FIXME: enable all the passes below, but the Stackify with EVMKeepRegisters.
   if (!EVMKeepRegisters) {
+    addPass(createEVMSplitCriticalEdges());
+    addPass(&MachineBlockPlacementID);
     addPass(createEVMOptimizeLiveIntervals());
     addPass(createEVMSingleUseExpression());
-    // Run the register coloring pass to reduce the total number of registers.
-    addPass(createEVMRegColoring());
-    addPass(createEVMStackify());
+    if (EVMUseLocalStakify) {
+      // Run the register coloring pass to reduce the total number of registers.
+      addPass(createEVMRegColoring());
+      addPass(createEVMStackify());
+    } else {
+      addPass(createEVMBPStackification());
+    }
   }
 }
 
diff --git a/llvm/lib/Target/EVM/EVMTargetTransformInfo.h b/llvm/lib/Target/EVM/EVMTargetTransformInfo.h
index a0f0f62168b1..0f1b41d429f6 100644
--- a/llvm/lib/Target/EVM/EVMTargetTransformInfo.h
+++ b/llvm/lib/Target/EVM/EVMTargetTransformInfo.h
@@ -35,7 +35,7 @@ class EVMTTIImpl final : public BasicTTIImplBase<EVMTTIImpl> {
   const EVMTargetLowering *getTLI() const { return TLI; }
 
 public:
-  enum SyncVMRegisterClass { Vector /* Unsupported */, GPR };
+  enum EVMRegisterClass { Vector /* Unsupported */, GPR };
 
   EVMTTIImpl(const EVMTargetMachine *TM, const Function &F)
       : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
@@ -100,9 +100,6 @@ class EVMTTIImpl final : public BasicTTIImplBase<EVMTTIImpl> {
     OpsOut.push_back(Type::getIntNTy(Context, RemainingBytes * 8));
   }
 
-  // TODO: The value is copied from SyncVM, needs to be checked.
-  unsigned getInliningThresholdMultiplier() const { return 11; }
-
   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
                                TTI::UnrollingPreferences &UP,
                                OptimizationRemarkEmitter *ORE);
diff --git a/llvm/lib/Target/EVM/TargetInfo/EVMTargetInfo.cpp b/llvm/lib/Target/EVM/TargetInfo/EVMTargetInfo.cpp
index d9f73e7a1de7..cb4ae74f2672 100644
--- a/llvm/lib/Target/EVM/TargetInfo/EVMTargetInfo.cpp
+++ b/llvm/lib/Target/EVM/TargetInfo/EVMTargetInfo.cpp
@@ -122,3 +122,81 @@ unsigned llvm::EVM::getPUSHOpcode(const APInt &Imm) {
     llvm_unreachable("Unexpected stack depth");
   }
 }
+
+unsigned llvm::EVM::getDUPOpcode(unsigned Depth) {
+  switch (Depth) {
+  case 1:
+    return EVM::DUP1;
+  case 2:
+    return EVM::DUP2;
+  case 3:
+    return EVM::DUP3;
+  case 4:
+    return EVM::DUP4;
+  case 5:
+    return EVM::DUP5;
+  case 6:
+    return EVM::DUP6;
+  case 7:
+    return EVM::DUP7;
+  case 8:
+    return EVM::DUP8;
+  case 9:
+    return EVM::DUP9;
+  case 10:
+    return EVM::DUP10;
+  case 11:
+    return EVM::DUP11;
+  case 12:
+    return EVM::DUP12;
+  case 13:
+    return EVM::DUP13;
+  case 14:
+    return EVM::DUP14;
+  case 15:
+    return EVM::DUP15;
+  case 16:
+    return EVM::DUP16;
+  default:
+    llvm_unreachable("Unexpected stack depth");
+  }
+}
+
+unsigned llvm::EVM::getSWAPOpcode(unsigned Depth) {
+  switch (Depth) {
+  case 1:
+    return EVM::SWAP1;
+  case 2:
+    return EVM::SWAP2;
+  case 3:
+    return EVM::SWAP3;
+  case 4:
+    return EVM::SWAP4;
+  case 5:
+    return EVM::SWAP5;
+  case 6:
+    return EVM::SWAP6;
+  case 7:
+    return EVM::SWAP7;
+  case 8:
+    return EVM::SWAP8;
+  case 9:
+    return EVM::SWAP9;
+  case 10:
+    return EVM::SWAP10;
+  case 11:
+    return EVM::SWAP11;
+  case 12:
+    return EVM::SWAP12;
+  case 13:
+    return EVM::SWAP13;
+  case 14:
+    return EVM::SWAP14;
+  case 15:
+    return EVM::SWAP15;
+  case 16:
+    return EVM::SWAP16;
+  default:
+    llvm_unreachable("Unexpected stack depth");
+  }
+}
diff --git a/llvm/lib/Target/EVM/TargetInfo/EVMTargetInfo.h b/llvm/lib/Target/EVM/TargetInfo/EVMTargetInfo.h
index 5b4abc84d6cc..e0c6998bbb5a 100644
--- a/llvm/lib/Target/EVM/TargetInfo/EVMTargetInfo.h
+++ b/llvm/lib/Target/EVM/TargetInfo/EVMTargetInfo.h
@@ -25,6 +25,8 @@ namespace EVM {
 unsigned getStackOpcode(unsigned Opcode);
 unsigned getRegisterOpcode(unsigned Opcode);
 unsigned getPUSHOpcode(const APInt &Imm);
+unsigned getDUPOpcode(unsigned Depth);
+unsigned getSWAPOpcode(unsigned Depth);
 
 } // namespace EVM
 
diff --git a/llvm/test/CodeGen/EVM/add.ll b/llvm/test/CodeGen/EVM/add.ll
index a4a49063d0b9..c20432a02b05 100644
--- a/llvm/test/CodeGen/EVM/add.ll
+++ b/llvm/test/CodeGen/EVM/add.ll
@@ -5,8 +5,8 @@ target triple = "evm"
 
 define i256 @addrrr(i256 %rs1, i256 %rs2) nounwind {
 ; CHECK-LABEL: @addrrr
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ADD [[REG:\$[0-9]+]], [[IN1]], [[IN2]]
 
   %res = add i256 %rs1, %rs2
diff --git a/llvm/test/CodeGen/EVM/call.ll b/llvm/test/CodeGen/EVM/call.ll
index ab6b309e16bc..4e57b307631c 100644
--- a/llvm/test/CodeGen/EVM/call.ll
+++ b/llvm/test/CodeGen/EVM/call.ll
@@ -8,8 +8,8 @@ declare void @foo2(i256)
 
 define i256 @call(i256 %a, i256 %b) nounwind {
 ; CHECK-LABEL: @call
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ADD [[TMP1:\$[0-9]+]], [[IN1]], [[IN2]]
 ; CHECK: FCALL 1 [[RES1:\$[0-9]+]], @foo, [[TMP1]]
 
@@ -20,8 +20,8 @@ define i256 @call(i256 %a, i256 %b) nounwind {
 
 define void @call2(i256 %a, i256 %b) nounwind {
 ; CHECK-LABEL: @call2
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ADD [[TMP1:\$[0-9]+]], [[IN1]], [[IN2]]
 ; CHECK: FCALL 0 @foo2, [[TMP1]]
 
diff --git a/llvm/test/CodeGen/EVM/div.ll b/llvm/test/CodeGen/EVM/div.ll
index af3296475505..f5965662751e 100644
--- a/llvm/test/CodeGen/EVM/div.ll
+++ b/llvm/test/CodeGen/EVM/div.ll
@@ -5,8 +5,8 @@ target triple = "evm"
 
 define i256 @udivrrr(i256 %rs1, i256 %rs2) nounwind {
 ; CHECK-LABEL: @udivrrr
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: DIV [[TMP:\$[0-9]+]], [[IN1]], [[IN2]]
 
   %res = udiv i256 %rs1, %rs2
@@ -15,8 +15,8 @@ define i256 @udivrrr(i256 %rs1, i256 %rs2) nounwind {
 
 define i256 @sdivrrr(i256 %rs1, i256 %rs2) nounwind {
 ; CHECK-LABEL: @sdivrrr
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: SDIV [[TMP:\$[0-9]+]], [[IN1]], [[IN2]]
 
   %res = sdiv i256 %rs1, %rs2
diff --git a/llvm/test/CodeGen/EVM/fallthrough.mir b/llvm/test/CodeGen/EVM/fallthrough.mir
deleted file mode 100644
index b5efb4f62911..000000000000
--- a/llvm/test/CodeGen/EVM/fallthrough.mir
+++ /dev/null
@@ -1,45 +0,0 @@
-# RUN: llc -x mir --start-after=evm-stackify < %s | FileCheck %s
-
-
---- |
-
-  target datalayout = "E-p:256:256-i256:256:256-S256-a:256:256"
-  target triple = "evm"
-  define void @test_fallthrough() { ret void }
-
-...
----
-# CHECK: PUSH4 @.BB0_1
-# CHECK-NEXT: JUMPI
-# CHECK-NEXT: PUSH4 @.BB0_2
-# CHECK-NEXT: JUMP
-# CHECK-NEXT: .BB0_1:
-
-name: test_fallthrough
-tracksRegLiveness: false
-machineFunctionInfo:
-  isStackified: true
-body:             |
-  bb.0:
-      JUMPDEST_S
-      PUSH_LABEL %bb.10 {
-        JUMPI_S
-      }
-      PUSH_LABEL %bb.13 {
-        JUMP_S
-      }
-
-  bb.10:
-      liveins: $value_stack
-      JUMPDEST_S
-      PUSH0_S
-      PUSH0_S
-      REVERT_S
-
-  bb.13:
-      liveins: $value_stack
-      JUMPDEST_S
-      PUSH0_S
-      PUSH0_S
-      REVERT_S
-...
diff --git a/llvm/test/CodeGen/EVM/globals.ll b/llvm/test/CodeGen/EVM/globals.ll
index 996282f7b566..df70cab339df 100644
--- a/llvm/test/CodeGen/EVM/globals.ll
+++ b/llvm/test/CodeGen/EVM/globals.ll
@@ -57,8 +57,8 @@ define i256 @load.fromarray(i256 %i) nounwind {
 
 define void @store.toarray(i256 %val, i256 %i) nounwind {
 ; CHECK-LABEL: store.toarray
-; CHECK: ARGUMENT [[IDX:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[VAL:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IDX:\$[0-9]+]], 1
 ; CHECK: CONST_I256 [[C:\$[0-9]+]], 5
 ; CHECK: SHL [[SHL:\$[0-9]+]], [[IDX]], [[C]]
 ; CHECK: CONST_I256 [[TMP:\$[0-9]+]], @val.arr
diff --git a/llvm/test/CodeGen/EVM/intrinsic.ll b/llvm/test/CodeGen/EVM/intrinsic.ll
index d871191f3ab5..8398c33cf70a 100644
--- a/llvm/test/CodeGen/EVM/intrinsic.ll
+++ b/llvm/test/CodeGen/EVM/intrinsic.ll
@@ -5,8 +5,8 @@ target triple = "evm"
 
 define i256 @sdiv(i256 %rs1, i256 %rs2) nounwind {
 ; CHECK-LABEL: @sdiv
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: SDIV [[TMP:\$[0-9]+]], [[IN1]], [[IN2]]
 
   %res = call i256 @llvm.evm.sdiv(i256 %rs1, i256 %rs2)
@@ -15,8 +15,8 @@ define i256 @sdiv(i256 %rs1, i256 %rs2) nounwind {
 
 define i256 @div(i256 %rs1, i256 %rs2) nounwind {
 ; CHECK-LABEL: @div
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: DIV [[TMP:\$[0-9]+]], [[IN1]], [[IN2]]
 
   %res = call i256 @llvm.evm.div(i256 %rs1, i256 %rs2)
@@ -25,8 +25,8 @@ define i256 @div(i256 %rs1, i256 %rs2) nounwind {
 
 define i256 @smod(i256 %rs1, i256 %rs2) nounwind {
 ; CHECK-LABEL: @smod
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: SMOD [[TMP:\$[0-9]+]], [[IN1]], [[IN2]]
 
   %res = call i256 @llvm.evm.smod(i256 %rs1, i256 %rs2)
@@ -35,8 +35,8 @@ define i256 @smod(i256 %rs1, i256 %rs2) nounwind {
 
 define i256 @mod(i256 %rs1, i256 %rs2) nounwind {
 ; CHECK-LABEL: @mod
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: MOD [[TMP:\$[0-9]+]], [[IN1]], [[IN2]]
 
   %res = call i256 @llvm.evm.mod(i256 %rs1, i256 %rs2)
@@ -45,8 +45,8 @@ define i256 @mod(i256 %rs1, i256 %rs2) nounwind {
 
 define i256 @shl(i256 %rs1, i256 %rs2) nounwind {
 ; CHECK-LABEL: @shl
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: SHL [[TMP:\$[0-9]+]], [[IN2]], [[IN1]]
 
   %res = call i256 @llvm.evm.shl(i256 %rs1, i256 %rs2)
@@ -55,8 +55,8 @@ define i256 @shl(i256 %rs1, i256 %rs2) nounwind {
 
 define i256 @shr(i256 %rs1, i256 %rs2) nounwind {
 ; CHECK-LABEL: @shr
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: SHR [[TMP:\$[0-9]+]], [[IN2]], [[IN1]]
 
   %res = call i256 @llvm.evm.shr(i256 %rs1, i256 %rs2)
@@ -65,8 +65,8 @@ define i256 @shr(i256 %rs1, i256 %rs2) nounwind {
 
 define i256 @sar(i256 %rs1, i256 %rs2) nounwind {
 ; CHECK-LABEL: @sar
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: SAR [[TMP:\$[0-9]+]], [[IN2]], [[IN1]]
 
   %res = call i256 @llvm.evm.sar(i256 %rs1, i256 %rs2)
@@ -75,9 +75,9 @@ define i256 @sar(i256 %rs1, i256 %rs2) nounwind {
 
 define i256 @addmod(i256 %rs1, i256 %rs2, i256 %rs3) nounwind {
 ; CHECK-LABEL: @addmod
-; CHECK: ARGUMENT [[IN3:\$[0-9]+]], 2
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
+; CHECK: ARGUMENT [[IN3:\$[0-9]+]], 2
 ; CHECK: ADDMOD [[TMP:\$[0-9]+]], [[IN1]], [[IN2]], [[IN3]]
 
   %res = call i256 @llvm.evm.addmod(i256 %rs1, i256 %rs2, i256 %rs3)
@@ -86,9 +86,9 @@ define i256 @addmod(i256 %rs1, i256 %rs2, i256 %rs3) nounwind {
 
 define i256 @mulmod(i256 %rs1, i256 %rs2, i256 %rs3) nounwind {
 ; CHECK-LABEL: @mulmod
-; CHECK: ARGUMENT [[IN3:\$[0-9]+]], 2
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
+; CHECK: ARGUMENT [[IN3:\$[0-9]+]], 2
 ; CHECK: MULMOD [[TMP:\$[0-9]+]], [[IN1]], [[IN2]], [[IN3]]
 
   %res = call i256 @llvm.evm.mulmod(i256 %rs1, i256 %rs2, i256 %rs3)
@@ -97,8 +97,8 @@ define i256 @mulmod(i256 %rs1, i256 %rs2, i256 %rs3) nounwind {
 
 define i256 @exp(i256 %rs1, i256 %rs2) nounwind {
 ; CHECK-LABEL: @exp
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: EXP [[TMP:\$[0-9]+]], [[IN1]], [[IN2]]
 
   %res = call i256 @llvm.evm.exp(i256 %rs1, i256 %rs2)
@@ -107,8 +107,8 @@ define i256 @exp(i256 %rs1, i256 %rs2) nounwind {
 
 define i256 @sha3(ptr addrspace(1) %offset, i256 %size) nounwind {
 ; CHECK-LABEL: @sha3
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: SHA3 [[RES1:\$[0-9]+]], [[IN1]], [[IN2]]
 
   %res = call i256 @llvm.evm.sha3(ptr addrspace(1) %offset, i256 %size)
@@ -117,8 +117,8 @@ define i256 @sha3(ptr addrspace(1) %offset, i256 %size) nounwind {
 
 define i256 @signextend(i256 %bytesize, i256 %val) nounwind {
 ; CHECK-LABEL: @signextend
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: SIGNEXTEND [[RES1:\$[0-9]+]], [[IN1]], [[IN2]]
 
   %res = call i256 @llvm.evm.signextend(i256 %bytesize, i256 %val)
@@ -127,8 +127,8 @@ define i256 @signextend(i256 %bytesize, i256 %val) nounwind {
 
 define i256 @byte(i256 %rs1, i256 %rs2) nounwind {
 ; CHECK-LABEL: @byte
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: BYTE [[TMP:\$[0-9]+]], [[IN1]], [[IN2]]
 
   %res = call i256 @llvm.evm.byte(i256 %rs1, i256 %rs2)
@@ -236,10 +236,10 @@ define i256 @extcodesize(i256 %rs1) nounwind {
 
 define void @extcodecopy(i256 %addr, ptr addrspace(1) %dst, ptr addrspace(4) %src, i256 %size) nounwind {
 ; CHECK-LABEL: @extcodecopy
-; CHECK: ARGUMENT [[IN4:\$[0-9]+]], 3
-; CHECK: ARGUMENT [[IN3:\$[0-9]+]], 2
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
+; CHECK: ARGUMENT [[IN3:\$[0-9]+]], 2
+; CHECK: ARGUMENT [[IN4:\$[0-9]+]], 3
 ; CHECK: EXTCODECOPY [[IN1]], [[IN2]], [[IN3]], [[IN4]]
 
   call void @llvm.evm.extcodecopy(i256 %addr, ptr addrspace(1) %dst, ptr addrspace(4) %src, i256 %size)
@@ -355,8 +355,8 @@ define i256 @blobbasefee() nounwind {
 
 define void @log0(ptr addrspace(1) %off, i256 %size) nounwind {
 ; CHECK-LABEL: @log0
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: LOG0 [[IN1]], [[IN2]]
 
   call void @llvm.evm.log0(ptr addrspace(1) %off, i256 %size)
@@ -365,9 +365,9 @@ define void @log0(ptr addrspace(1) %off, i256 %size) nounwind {
 
 define void @log1(ptr addrspace(1) %off, i256 %size, i256 %t1) nounwind {
 ; CHECK-LABEL: @log1
-; CHECK: ARGUMENT [[IN3:\$[0-9]+]], 2
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
+; CHECK: ARGUMENT [[IN3:\$[0-9]+]], 2
 ; CHECK: LOG1 [[IN1]], [[IN2]], [[IN3]]
 
   call void @llvm.evm.log1(ptr addrspace(1) %off, i256 %size, i256 %t1)
@@ -376,10 +376,10 @@ define void @log1(ptr addrspace(1) %off, i256 %size, i256 %t1) nounwind {
 
 define void @log2(ptr addrspace(1) %off, i256 %size, i256 %t1, i256 %t2) nounwind {
 ; CHECK-LABEL: @log2
-; CHECK: ARGUMENT [[IN4:\$[0-9]+]], 3
-; CHECK: ARGUMENT [[IN3:\$[0-9]+]], 2
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
+; CHECK: ARGUMENT [[IN3:\$[0-9]+]], 2
+; CHECK: ARGUMENT [[IN4:\$[0-9]+]], 3
 ; CHECK: LOG2 [[IN1]], [[IN2]], [[IN3]], [[IN4]]
 
   call void @llvm.evm.log2(ptr addrspace(1) %off, i256 %size, i256 %t1, i256 %t2)
@@ -388,11 +388,11 @@ define void @log2(ptr addrspace(1) %off, i256 %size, i256 %t1, i256 %t2) nounwin
 
 define void @log3(ptr addrspace(1) %off, i256 %size, i256 %t1, i256 %t2, i256 %t3) nounwind {
 ; CHECK-LABEL: @log3
-; CHECK: ARGUMENT [[IN5:\$[0-9]+]], 4
-; CHECK: ARGUMENT [[IN4:\$[0-9]+]], 3
-; CHECK: ARGUMENT [[IN3:\$[0-9]+]], 2
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
+; CHECK: ARGUMENT [[IN3:\$[0-9]+]], 2
+; CHECK: ARGUMENT [[IN4:\$[0-9]+]], 3
+; CHECK: ARGUMENT [[IN5:\$[0-9]+]], 4
 ; CHECK: LOG3 [[IN1]], [[IN2]], [[IN3]], [[IN4]], [[IN5]]
 
   call void @llvm.evm.log3(ptr addrspace(1) %off, i256 %size, i256 %t1, i256 %t2, i256 %t3)
@@ -401,12 +401,12 @@ define void @log3(ptr addrspace(1) %off, i256 %size, i256 %t1, i256 %t2, i256 %t
 
 define void @log4(ptr addrspace(1) %off, i256 %size, i256 %t1, i256 %t2, i256 %t3, i256 %t4) nounwind {
 ; CHECK-LABEL: @log4
-; CHECK: ARGUMENT [[IN6:\$[0-9]+]], 5
-; CHECK: ARGUMENT [[IN5:\$[0-9]+]], 4
-; CHECK: ARGUMENT [[IN4:\$[0-9]+]], 3
-; CHECK: ARGUMENT [[IN3:\$[0-9]+]], 2
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
+; CHECK: ARGUMENT [[IN3:\$[0-9]+]], 2
+; CHECK: ARGUMENT [[IN4:\$[0-9]+]], 3
+; CHECK: ARGUMENT [[IN5:\$[0-9]+]], 4
+; CHECK: ARGUMENT [[IN6:\$[0-9]+]], 5
 ; CHECK: LOG4 [[IN1]], [[IN2]], [[IN3]], [[IN4]], [[IN5]], [[IN6]]
 
   call void @llvm.evm.log4(ptr addrspace(1) %off, i256 %size, i256 %t1, i256 %t2, i256 %t3, i256 %t4)
@@ -415,9 +415,9 @@ define void @log4(ptr addrspace(1) %off, i256 %size, i256 %t1, i256 %t2, i256 %t
 
 define i256 @create(i256 %val, ptr addrspace(1) %off, i256 %size) nounwind {
 ; CHECK-LABEL: @create
-; CHECK: ARGUMENT [[IN3:\$[0-9]+]], 2
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
+; CHECK: ARGUMENT [[IN3:\$[0-9]+]], 2
 ; CHECK: CREATE [[RES1:\$[0-9]+]], [[IN1]], [[IN2]], [[IN3]]
 
   %ret = call i256 @llvm.evm.create(i256 %val, ptr addrspace(1) %off, i256 %size)
@@ -426,13 +426,13 @@ define i256 @create(i256 %val, ptr addrspace(1) %off, i256 %size) nounwind {
 
 define i256 @call(i256 %gas, i256 %addr, i256 %val, ptr addrspace(1) %arg_off, i256 %arg_size, ptr addrspace(1) %ret_off, i256 %ret_size) nounwind {
 ; CHECK-LABEL: @call
-; CHECK: ARGUMENT [[IN7:\$[0-9]+]], 6
-; CHECK: ARGUMENT [[IN6:\$[0-9]+]], 5
-; CHECK: ARGUMENT [[IN5:\$[0-9]+]], 4
-; CHECK: ARGUMENT [[IN4:\$[0-9]+]], 3
-; CHECK: ARGUMENT [[IN3:\$[0-9]+]], 2
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
+; CHECK: ARGUMENT [[IN3:\$[0-9]+]], 2
+; CHECK: ARGUMENT [[IN4:\$[0-9]+]], 3
+; CHECK: ARGUMENT [[IN5:\$[0-9]+]], 4
+; CHECK: ARGUMENT [[IN6:\$[0-9]+]], 5
+; CHECK: ARGUMENT [[IN7:\$[0-9]+]], 6
 ; CHECK: CALL [[RES:\$[0-9]+]], [[IN1]], [[IN2]], [[IN3]], [[IN4]], [[IN5]], [[IN6]], [[IN7]]
 
   %ret = call i256 @llvm.evm.call(i256 %gas, i256 %addr, i256 %val, ptr addrspace(1) %arg_off, i256 %arg_size, ptr addrspace(1) %ret_off, i256 %ret_size)
@@ -441,12 +441,12 @@ define i256 @call(i256 %gas, i256 %addr, i256 %val, ptr addrspace(1) %arg_off, i
 
 define i256 @delegatecall(i256 %gas, i256 %addr, ptr addrspace(1) %arg_off, i256 %arg_size, ptr addrspace(1) %ret_off, i256 %ret_size) nounwind {
 ; CHECK-LABEL: @delegatecall
-; CHECK: ARGUMENT [[IN6:\$[0-9]+]], 5
-; CHECK: ARGUMENT [[IN5:\$[0-9]+]], 4
-; CHECK: ARGUMENT [[IN4:\$[0-9]+]], 3
-; CHECK: ARGUMENT [[IN3:\$[0-9]+]], 2
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
+; CHECK: ARGUMENT [[IN3:\$[0-9]+]], 2
+; CHECK: ARGUMENT [[IN4:\$[0-9]+]], 3
+; CHECK: ARGUMENT [[IN5:\$[0-9]+]], 4
+; CHECK: ARGUMENT [[IN6:\$[0-9]+]], 5
 ; CHECK: DELEGATECALL [[RES:\$[0-9]+]], [[IN1]], [[IN2]], [[IN3]], [[IN4]], [[IN5]], [[IN6]]
 
   %ret = call i256 @llvm.evm.delegatecall(i256 %gas, i256 %addr, ptr addrspace(1) %arg_off, i256 %arg_size, ptr addrspace (1) %ret_off, i256 %ret_size)
@@ -455,10 +455,10 @@ define i256 @delegatecall(i256 %gas, i256 %addr, ptr addrspace(1) %arg_off, i256
 
 define i256 @create2(i256 %val, ptr addrspace(1) %off, i256 %size, i256 %salt) nounwind {
 ; CHECK-LABEL: @create2
-; CHECK: ARGUMENT [[IN4:\$[0-9]+]], 3
-; CHECK: ARGUMENT [[IN3:\$[0-9]+]], 2
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
+; CHECK: ARGUMENT [[IN3:\$[0-9]+]], 2
+; CHECK: ARGUMENT [[IN4:\$[0-9]+]], 3
 ; CHECK: CREATE2 [[RES1:\$[0-9]+]], [[IN1]], [[IN2]], [[IN3]], [[IN4]]
 
   %ret = call i256 @llvm.evm.create2(i256 %val, ptr addrspace(1) %off, i256 %size, i256 %salt)
@@ -467,12 +467,12 @@ define i256 @create2(i256 %val, ptr addrspace(1) %off, i256 %size, i256 %salt) n
 
 define i256 @staticcall(i256 %gas, i256 %addr, ptr addrspace(1) %arg_off, i256 %arg_size, ptr addrspace(1) %ret_off, i256 %ret_size) nounwind {
 ; CHECK-LABEL: @staticcall
-; CHECK: ARGUMENT [[IN6:\$[0-9]+]], 5
-; CHECK: ARGUMENT [[IN5:\$[0-9]+]], 4
-; CHECK: ARGUMENT [[IN4:\$[0-9]+]], 3
-; CHECK: ARGUMENT [[IN3:\$[0-9]+]], 2
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
+; CHECK: ARGUMENT [[IN3:\$[0-9]+]], 2
+; CHECK: ARGUMENT [[IN4:\$[0-9]+]], 3
+; CHECK: ARGUMENT [[IN5:\$[0-9]+]], 4
+; CHECK: ARGUMENT [[IN6:\$[0-9]+]], 5
 ; CHECK: STATICCALL [[RES:\$[0-9]+]], [[IN1]], [[IN2]], [[IN3]], [[IN4]], [[IN5]], [[IN6]]
 
   %ret = call i256 @llvm.evm.staticcall(i256 %gas, i256 %addr, ptr addrspace(1) %arg_off, i256 %arg_size, ptr addrspace(1) %ret_off, i256 %ret_size)
@@ -490,8 +490,8 @@ define void @selfdestruct(i256 %addr) nounwind {
 
 define void @return(ptr addrspace(1) %rs1, i256 %rs2) nounwind {
 ; CHECK-LABEL: @return
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: RETURN [[IN1]], [[IN2]]
 
   call void @llvm.evm.return(ptr addrspace(1) %rs1, i256 %rs2)
@@ -500,8 +500,8 @@ define void @return(ptr addrspace(1) %rs1, i256 %rs2) nounwind {
 
 define void @revert(ptr addrspace(1) %rs1, i256 %rs2) nounwind {
 ; CHECK-LABEL: @revert
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: REVERT [[IN1]], [[IN2]]
 
   call void @llvm.evm.revert(ptr addrspace(1) %rs1, i256 %rs2)
diff --git a/llvm/test/CodeGen/EVM/logical.ll b/llvm/test/CodeGen/EVM/logical.ll
index 4839fb4e6ffa..43b75a30a4fe 100644
--- a/llvm/test/CodeGen/EVM/logical.ll
+++ b/llvm/test/CodeGen/EVM/logical.ll
@@ -5,8 +5,8 @@ target triple = "evm"
 
 define i256 @andrrr(i256 %rs1, i256 %rs2) nounwind {
 ; CHECK-LABEL: @andrrr
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: AND [[TMP:\$[0-9]+]], [[IN1]], [[IN2]]
 
   %res = and i256 %rs1, %rs2
@@ -15,8 +15,8 @@ define i256 @andrrr(i256 %rs1, i256 %rs2) nounwind {
 
 define i256 @orrrr(i256 %rs1, i256 %rs2) nounwind {
 ; CHECK-LABEL: @orrrr
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: OR [[TMP:\$[0-9]+]], [[IN1]], [[IN2]]
 
   %res = or i256 %rs1, %rs2
@@ -25,8 +25,8 @@ define i256 @orrrr(i256 %rs1, i256 %rs2) nounwind {
 
 define i256 @xorrrr(i256 %rs1, i256 %rs2) nounwind {
 ; CHECK-LABEL: @xorrrr
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: XOR [[TMP:\$[0-9]+]], [[IN1]], [[IN2]]
 
   %res = xor i256 %rs1, %rs2
diff --git a/llvm/test/CodeGen/EVM/memory.ll b/llvm/test/CodeGen/EVM/memory.ll
index 2cfbf3f906c4..6c60d756ea85 100644
--- a/llvm/test/CodeGen/EVM/memory.ll
+++ b/llvm/test/CodeGen/EVM/memory.ll
@@ -5,8 +5,8 @@ target triple = "evm"
 
 define void @mstore8(ptr addrspace(1) %offset, i256 %val) nounwind {
 ; CHECK-LABEL: @mstore8
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: MSTORE8 [[IN1]], [[IN2]]
 
   call void @llvm.evm.mstore8(ptr addrspace(1) %offset, i256 %val)
@@ -15,8 +15,8 @@ define void @mstore8(ptr addrspace(1) %offset, i256 %val) nounwind {
 
 define void @mstore(ptr addrspace(1) %offset, i256 %val) nounwind {
 ; CHECK-LABEL: @mstore
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: MSTORE [[IN1]], [[IN2]]
 
   store i256 %val, ptr addrspace(1) %offset, align 32
diff --git a/llvm/test/CodeGen/EVM/mod.ll b/llvm/test/CodeGen/EVM/mod.ll
index c0b4e9976952..8ab6f36ba61a 100644
--- a/llvm/test/CodeGen/EVM/mod.ll
+++ b/llvm/test/CodeGen/EVM/mod.ll
@@ -5,8 +5,8 @@ target triple = "evm"
 
 define i256 @umodrrr(i256 %rs1, i256 %rs2) nounwind {
 ; CHECK-LABEL: @umodrrr
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: MOD [[TMP:\$[0-9]+]], [[IN1]], [[IN2]]
 
   %res = urem i256 %rs1, %rs2
@@ -15,8 +15,8 @@ define i256 @umodrrr(i256 %rs1, i256 %rs2) nounwind {
 
 define i256 @smodrrr(i256 %rs1, i256 %rs2) nounwind {
 ; CHECK-LABEL: @smodrrr
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: SMOD [[TMP:\$[0-9]+]], [[IN1]], [[IN2]]
 
   %res = srem i256 %rs1, %rs2
diff --git a/llvm/test/CodeGen/EVM/mul.ll b/llvm/test/CodeGen/EVM/mul.ll
index 9c286d256707..e2b89fc873bd 100644
--- a/llvm/test/CodeGen/EVM/mul.ll
+++ b/llvm/test/CodeGen/EVM/mul.ll
@@ -5,8 +5,8 @@ target triple = "evm"
 
 define i256 @mulrrr(i256 %rs1, i256 %rs2) nounwind {
 ; CHECK-LABEL: @mulrrr
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: MUL [[TMP:\$[0-9]+]], [[IN1]], [[IN2]]
 
   %res = mul i256 %rs1, %rs2
diff --git a/llvm/test/CodeGen/EVM/select.ll b/llvm/test/CodeGen/EVM/select.ll
index 084ec33481a4..d5cb9af50ba7 100644
--- a/llvm/test/CodeGen/EVM/select.ll
+++ b/llvm/test/CodeGen/EVM/select.ll
@@ -5,10 +5,10 @@ target triple = "evm"
 
 define i256 @select(i256 %v1, i256 %v2, i256 %v3, i256 %v4) {
 ; CHECK-LABEL: @select
-; CHECK: ARGUMENT [[IN4:\$[0-9]+]], 3
-; CHECK: ARGUMENT [[IN3:\$[0-9]+]], 2
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
+; CHECK: ARGUMENT [[IN3:\$[0-9]+]], 2
+; CHECK: ARGUMENT [[IN4:\$[0-9]+]], 3
 ; CHECK: EQ [[TMP1:\$[0-9]+]], [[IN3]], [[IN4]]
 ; CHECK: ISZERO [[COND:\$[0-9]+]], [[TMP1]]
 ; CHECK: JUMPI @.BB0_2, [[COND]]
diff --git a/llvm/test/CodeGen/EVM/stack-ops-commutable.ll b/llvm/test/CodeGen/EVM/stack-ops-commutable.ll
new file mode 100644
index 000000000000..93f9e1c34938
--- /dev/null
+++ b/llvm/test/CodeGen/EVM/stack-ops-commutable.ll
@@ -0,0 +1,371 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "E-p:256:256-i256:256:256-S256-a:256:256"
+target triple = "evm"
+
+define void @no_manipulations_needed_with_junk(i256 %a1, i256 %a2, i256 %a3) noreturn {
+; CHECK-LABEL: no_manipulations_needed_with_junk:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    JUMPDEST
+; CHECK-NEXT:    ADD
+; CHECK-NEXT:    PUSH0
+; CHECK-NEXT:    REVERT
+  %x1 = add i256 %a1, %a2
+  call void @llvm.evm.revert(ptr addrspace(1) null, i256 %x1)
+  unreachable
+}
+
+define void @no_manipulations_needed_with_junk_eq(i256 %a1, i256 %a2, i256 %a3) noreturn {
+  %cmp = icmp eq i256 %a1, %a2
+  %x1 = zext i1 %cmp to i256
+  call void @llvm.evm.revert(ptr addrspace(1) null, i256 %x1)
+  unreachable
+
+; CHECK-LABEL: no_manipulations_needed_with_junk_eq:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    JUMPDEST
+; CHECK-NEXT:    EQ
+; CHECK-NEXT:    PUSH0
+; CHECK-NEXT:    REVERT
+}
+
+define i256 @no_manipulations_needed_no_junk_addmod(i256 %a1, i256 %a2, i256 %a3) {
+; CHECK-LABEL: no_manipulations_needed_no_junk_addmod:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    JUMPDEST
+; CHECK-NEXT:    ADDMOD
+; CHECK-NEXT:    SWAP1
+; CHECK-NEXT:    JUMP
+  %x1 = call i256 @llvm.evm.addmod(i256 %a2, i256 %a1, i256 %a3)
+  ret i256 %x1
+}
+
+define i256 @no_manipulations_needed_no_junk_mulmod(i256 %a1, i256 %a2, i256 %a3) {
+; CHECK-LABEL: no_manipulations_needed_no_junk_mulmod:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    JUMPDEST
+; CHECK-NEXT:    MULMOD
+; CHECK-NEXT:    SWAP1
+; CHECK-NEXT:    JUMP
+  %x1 = call i256 @llvm.evm.mulmod(i256 %a2, i256 %a1, i256 %a3)
+  ret i256 %x1
+}
+
+define i256 @no_manipulations_needed_no_junk_and(i256 %a1, i256 %a2) {
+; CHECK-LABEL: no_manipulations_needed_no_junk_and:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    JUMPDEST
+; CHECK-NEXT:    AND
+; CHECK-NEXT:    SWAP1
+; CHECK-NEXT:    JUMP
+  %x1 = and i256 %a2, %a1
+  ret i256 %x1
+}
+
+define i256 @no_manipulations_needed_no_junk_or(i256 %a1, i256 %a2) {
+; CHECK-LABEL: no_manipulations_needed_no_junk_or:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    JUMPDEST
+; CHECK-NEXT:    OR
+; CHECK-NEXT:    SWAP1
+; CHECK-NEXT:    JUMP
+  %x1 = or i256 %a2, %a1
+  ret i256 %x1
+}
+
+define i256 @no_manipulations_needed_no_junk_xor(i256 %a1, i256 %a2) {
+; CHECK-LABEL: no_manipulations_needed_no_junk_xor:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    JUMPDEST
+; CHECK-NEXT:    XOR
+; CHECK-NEXT:    SWAP1
+; CHECK-NEXT:    JUMP
+  %x1 = xor i256 %a2, %a1
+  ret i256 %x1
+}
+
+define i256 @no_manipulations_needed_no_junk(i256 %a1, i256 %a2, i256 %a3) nounwind {
+; CHECK-LABEL: no_manipulations_needed_no_junk:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    JUMPDEST
+; CHECK-NEXT:    SWAP2
+; CHECK-NEXT:    POP
+; CHECK-NEXT:    ADD
+; CHECK-NEXT:    SWAP1
+; CHECK-NEXT:    JUMP
+  %x1 = add i256 %a1, %a2
+  ret i256 %x1
+}
+
+define void @reorder_with_junk(i256 %a1, i256 %a2, i256 %a3) noreturn {
+; CHECK-LABEL: reorder_with_junk:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    JUMPDEST
+; CHECK-NEXT:    ADD
+; CHECK-NEXT:    PUSH0
+; CHECK-NEXT:    REVERT
+  %x1 = add i256 %a2, %a1
+  call void @llvm.evm.revert(ptr addrspace(1) null, i256 %x1)
+  unreachable
+}
+
+define i256 @reorder_no_junk(i256 %a1, i256 %a2, i256 %a3) nounwind {
+; CHECK-LABEL: reorder_no_junk:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    JUMPDEST
+; CHECK-NEXT:    SWAP2
+; CHECK-NEXT:    POP
+; CHECK-NEXT:    ADD
+; CHECK-NEXT:    SWAP1
+; CHECK-NEXT:    JUMP
+  %x1 = add i256 %a2, %a1
+  ret i256 %x1
+}
+
+define void @swap_first_with_junk(i256 %a1, i256 %a2, i256 %a3) noreturn {
+; CHECK-LABEL: swap_first_with_junk:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    JUMPDEST
+; CHECK-NEXT:    POP
+; CHECK-NEXT:    ADD
+; CHECK-NEXT:    PUSH0
+; CHECK-NEXT:    REVERT
+  %x1 = add i256 %a3, %a2
+  call void @llvm.evm.revert(ptr addrspace(1) null, i256 %x1)
+  unreachable
+}
+
+define i256 @two_commutable(i256 %a1, i256 %a2, i256 %a3) {
+  %x1 = add i256 %a3, %a2
+  %x2 = add i256 %a1, %x1
+  ret i256 %x2
+; CHECK-LABEL: two_commutable:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    JUMPDEST
+; CHECK-NEXT:    SWAP2
+; CHECK-NEXT:    ADD
+; CHECK-NEXT:    ADD
+; CHECK-NEXT:    SWAP1
+; CHECK-NEXT:    JUMP
+}
+
+define void @swap_second_with_junk(i256 %a1, i256 %a2, i256 %a3, i256 %a4) noreturn {
+; CHECK-LABEL: swap_second_with_junk:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    JUMPDEST
+; CHECK-NEXT:    DUP4
+; CHECK-NEXT:    ADD
+; CHECK-NEXT:    PUSH0
+; CHECK-NEXT:    REVERT
+  %x1 = add i256 %a1, %a4
+  call void @llvm.evm.revert(ptr addrspace(1) null, i256 %x1)
+  unreachable
+}
+
+define i256 @swap_first_no_junk(i256 %a1, i256 %a2, i256 %a3, i256 %a4) nounwind {
+; CHECK-LABEL: swap_first_no_junk:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    JUMPDEST
+; CHECK-NEXT:    SWAP2
+; CHECK-NEXT:    POP
+; CHECK-NEXT:    POP
+; CHECK-NEXT:    ADD
+; CHECK-NEXT:    SWAP1
+; CHECK-NEXT:    JUMP
+  %x1 = add i256 %a1, %a4
+  ret i256 %x1
+}
+
+define i256 @swap_second_no_junk(i256 %a1, i256 %a2, i256 %a3, i256 %a4) nounwind {
+; CHECK-LABEL: swap_second_no_junk:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    JUMPDEST
+; CHECK-NEXT:    SWAP2
+; CHECK-NEXT:    POP
+; CHECK-NEXT:    POP
+; CHECK-NEXT:    ADD
+; CHECK-NEXT:    SWAP1
+; CHECK-NEXT:    JUMP
+  %x1 = add i256 %a4, %a1
+  ret i256 %x1
+}
+
+define void @first_arg_alive_with_junk(i256 %a1, i256 %a2, i256 %a3) noreturn {
+; CHECK-LABEL: first_arg_alive_with_junk:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    JUMPDEST
+; CHECK-NEXT:    PUSH1 4
+; CHECK-NEXT:    DUP2
+; CHECK-NEXT:    DUP4
+; CHECK-NEXT:    ADD
+; CHECK-NEXT:    SWAP2
+; CHECK-NEXT:    SUB
+; CHECK-NEXT:    DIV
+; CHECK-NEXT:    PUSH0
+; CHECK-NEXT:    REVERT
+  %x1 = add i256 %a1, %a2
+  %x2 = sub i256 %a1, 4
+  %x3 = udiv i256 %x2, %x1
+  call void @llvm.evm.revert(ptr addrspace(1) null, i256 %x3)
+  unreachable
+}
+
+define i256 @first_arg_alive_no_junk(i256 %a1, i256 %a2, i256 %a3) nounwind {
+; CHECK-LABEL: first_arg_alive_no_junk:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    JUMPDEST
+; CHECK-NEXT:    DUP1
+; CHECK-NEXT:    SWAP3
+; CHECK-NEXT:    POP
+; CHECK-NEXT:    PUSH1 4
+; CHECK-NEXT:    SWAP2
+; CHECK-NEXT:    ADD
+; CHECK-NEXT:    SWAP2
+; CHECK-NEXT:    SUB
+; CHECK-NEXT:    DIV
+; CHECK-NEXT:    SWAP1
+; CHECK-NEXT:    JUMP
+  %x1 = add i256 %a1, %a2
+  %x2 = sub i256 %a1, 4
+  %x3 = udiv i256 %x2, %x1
+  ret i256 %x3
+}
+
+define void @second_arg_alive_with_junk(i256 %a1, i256 %a2, i256 %a3) noreturn {
+; CHECK-LABEL: second_arg_alive_with_junk:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    JUMPDEST
+; CHECK-NEXT:    DUP2
+; CHECK-NEXT:    PUSH1 4
+; CHECK-NEXT:    SWAP2
+; CHECK-NEXT:    ADD
+; CHECK-NEXT:    SWAP2
+; CHECK-NEXT:    SUB
+; CHECK-NEXT:    DIV
+; CHECK-NEXT:    PUSH0
+; CHECK-NEXT:    REVERT
+  %x1 = add i256 %a1, %a2
+  %x2 = sub i256 %a2, 4
+  %x3 = udiv i256 %x2, %x1
+  call void @llvm.evm.revert(ptr addrspace(1) null, i256 %x3)
+  unreachable
+}
+
+define i256 @second_arg_alive_no_junk(i256 %a1, i256 %a2, i256 %a3) nounwind {
+; CHECK-LABEL: second_arg_alive_no_junk:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    JUMPDEST
+; CHECK-NEXT:    DUP2
+; CHECK-NEXT:    PUSH1 4
+; CHECK-NEXT:    SWAP3
+; CHECK-NEXT:    SWAP4
+; CHECK-NEXT:    POP
+; CHECK-NEXT:    ADD
+; CHECK-NEXT:    SWAP2
+; CHECK-NEXT:    SUB
+; CHECK-NEXT:    DIV
+; CHECK-NEXT:    SWAP1
+; CHECK-NEXT:    JUMP
+  %x1 = add i256 %a1, %a2
+  %x2 = sub i256 %a2, 4
+  %x3 = udiv i256 %x2, %x1
+  ret i256 %x3
+}
+
+define void @both_arg_alive_with_junk(i256 %a1, i256 %a2, i256 %a3) noreturn {
+; CHECK-LABEL: both_arg_alive_with_junk:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    JUMPDEST
+; CHECK-NEXT:    SWAP1
+; CHECK-NEXT:    DUP2
+; CHECK-NEXT:    DUP2
+; CHECK-NEXT:    DIV
+; CHECK-NEXT:    SWAP2
+; CHECK-NEXT:    ADD
+; CHECK-NEXT:    ADD
+; CHECK-NEXT:    PUSH0
+; CHECK-NEXT:    REVERT
+  %x1 = add i256 %a1, %a2
+  %x2 = udiv i256 %a2, %a1
+  %x3 = add i256 %x1, %x2
+  call void @llvm.evm.revert(ptr addrspace(1) null, i256 %x3)
+  unreachable
+}
+
+define i256 @both_arg_alive_no_junk(i256 %a1, i256 %a2, i256 %a3) nounwind {
+; CHECK-LABEL: both_arg_alive_no_junk:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    JUMPDEST
+; CHECK-NEXT:    DUP1
+; CHECK-NEXT:    SWAP3
+; CHECK-NEXT:    POP
+; CHECK-NEXT:    DUP2
+; CHECK-NEXT:    DIV
+; CHECK-NEXT:    SWAP2
+; CHECK-NEXT:    ADD
+; CHECK-NEXT:    ADD
+; CHECK-NEXT:    SWAP1
+; CHECK-NEXT:    JUMP
+  %x1 = add i256 %a1, %a2
+  %x2 = udiv i256 %a2, %a1
+  %x3 = add i256 %x1, %x2
+  ret i256 %x3
+}
+
+define i256 @same_arg_dead_with_junk(i256 %a1, i256 %a2, i256 %a3) nounwind {
+; CHECK-LABEL: same_arg_dead_with_junk:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    JUMPDEST
+; CHECK-NEXT:    POP
+; CHECK-NEXT:    DUP1
+; CHECK-NEXT:    SWAP2
+; CHECK-NEXT:    POP
+; CHECK-NEXT:    ADD
+; CHECK-NEXT:    SWAP1
+; CHECK-NEXT:    DUP2
+; CHECK-NEXT:    PUSH0
+; CHECK-NEXT:    REVERT
+; CHECK-NEXT:    JUMP
+  %x1 = add i256 %a2, %a2
+  call void @llvm.evm.revert(ptr addrspace(1) null, i256 %x1)
+  ret i256 %x1
+}
+
+define void @commutable_not_in_function_entry() noreturn {
+
+; CHECK-LABEL: .BB{{[0-9]+}}_3:
+; CHECK:       JUMPDEST
+; CHECK-NEXT:  PUSH4 4294967295
+; CHECK-NEXT:  AND
+; CHECK-NEXT:  PUSH0
+
+enter:
+  %offset = inttoptr i256 0 to ptr addrspace(2)
+  %load = call i256 @llvm.evm.calldataload(ptr addrspace(2) %offset)
+  %calldata = trunc i256 %load to i32
+  br label %header
+
+header:
+  %phi = phi i32 [ %calldata, %enter ], [ %inc, %do ]
+  %phi2 = phi i32 [ 1, %enter ], [ %mul, %do ]
+  %cmp = icmp sgt i32 %phi, 0
+  br i1 %cmp, label %do, label %exit
+
+do:
+  %mul = mul nsw i32 %phi2, %phi
+  %inc = add nsw i32 %phi, -1
+  br label %header
+
+exit:
+  %res = zext i32 %phi2 to i256
+  store i256 %res, ptr addrspace(1) null, align 4
+  call void @llvm.evm.return(ptr addrspace(1) null, i256 32)
+  unreachable
+}
+
+declare i256 @llvm.evm.addmod(i256, i256, i256)
+declare i256 @llvm.evm.mulmod(i256, i256, i256)
+declare i256 @llvm.evm.calldataload(ptr addrspace(2))
+declare void @llvm.evm.return(ptr addrspace(1), i256)
+declare void @llvm.evm.revert(ptr addrspace(1), i256)
diff --git a/llvm/test/CodeGen/EVM/stack-ops.ll b/llvm/test/CodeGen/EVM/stack-ops.ll
new file mode 100644
index 000000000000..40fe299cf9f8
--- /dev/null
+++ b/llvm/test/CodeGen/EVM/stack-ops.ll
@@ -0,0 +1,340 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "E-p:256:256-i256:256:256-S256-a:256:256"
+target triple = "evm"
+
+define void @no_manipulations_needed_with_junk(i256 %a1, i256 %a2, i256 %a3) noreturn {
+; CHECK-LABEL: no_manipulations_needed_with_junk:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    JUMPDEST
+; CHECK-NEXT:    SUB
+; CHECK-NEXT:    PUSH0
+; CHECK-NEXT:    REVERT
+  %x1 = sub i256 %a1, %a2
+  call void @llvm.evm.revert(ptr addrspace(1) null, i256 %x1)
+  unreachable
+}
+
+define i256 @no_manipulations_needed_no_junk(i256 %a1, i256 %a2, i256 %a3) nounwind {
+; CHECK-LABEL: no_manipulations_needed_no_junk:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    JUMPDEST
+; CHECK-NEXT:    SWAP1
+; CHECK-NEXT:    SWAP2
+; CHECK-NEXT:    POP
+; CHECK-NEXT:    SUB
+; CHECK-NEXT:    SWAP1
+; CHECK-NEXT:    JUMP
+  %x1 = sub i256 %a1, %a2
+  ret i256 %x1
+}
+
+define void @reorder_with_junk(i256 %a1, i256 %a2, i256 %a3) noreturn {
+; CHECK-LABEL: reorder_with_junk:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    JUMPDEST
+; CHECK-NEXT:    SWAP1
+; CHECK-NEXT:    SUB
+; CHECK-NEXT:    PUSH0
+; CHECK-NEXT:    REVERT
+  %x1 = sub i256 %a2, %a1
+  call void @llvm.evm.revert(ptr addrspace(1) null, i256 %x1)
+  unreachable
+}
+
+define i256 @reorder_no_junk(i256 %a1, i256 %a2, i256 %a3) nounwind {
+; CHECK-LABEL: reorder_no_junk:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    JUMPDEST
+; CHECK-NEXT:    SWAP2
+; CHECK-NEXT:    POP
+; CHECK-NEXT:    SUB
+; CHECK-NEXT:    SWAP1
+; CHECK-NEXT:    JUMP
+  %x1 = sub i256 %a2, %a1
+  ret i256 %x1
+}
+
+define void @swap_first_with_junk(i256 %a1, i256 %a2, i256 %a3) noreturn {
+; CHECK-LABEL: swap_first_with_junk:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    JUMPDEST
+; CHECK-NEXT:    POP
+; CHECK-NEXT:    SWAP1
+; CHECK-NEXT:    SUB
+; CHECK-NEXT:    PUSH0
+; CHECK-NEXT:    REVERT
+  %x1 = sub i256 %a3, %a2
+  call void @llvm.evm.revert(ptr addrspace(1) null, i256 %x1)
+  unreachable
+}
+
+define void @swap_second_with_junk(i256 %a1, i256 %a2, i256 %a3, i256 %a4) noreturn {
+; CHECK-LABEL: swap_second_with_junk:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    JUMPDEST
+; CHECK-NEXT:    DUP4
+; CHECK-NEXT:    SWAP1
+; CHECK-NEXT:    SUB
+; CHECK-NEXT:    PUSH0
+; CHECK-NEXT:    REVERT
+  %x1 = sub i256 %a1, %a4
+  call void @llvm.evm.revert(ptr addrspace(1) null, i256 %x1)
+  unreachable
+}
+
+define i256 @swap_first_no_junk(i256 %a1, i256 %a2, i256 %a3, i256 %a4) nounwind {
+; CHECK-LABEL: swap_first_no_junk:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    JUMPDEST
+; CHECK-NEXT:    POP
+; CHECK-NEXT:    SWAP2
+; CHECK-NEXT:    POP
+; CHECK-NEXT:    SUB
+; CHECK-NEXT:    SWAP1
+; CHECK-NEXT:    JUMP
+  %x1 = sub i256 %a3, %a2
+  ret i256 %x1
+}
+
+define i256 @swap_second_no_junk(i256 %a1, i256 %a2, i256 %a3, i256 %a4) nounwind {
+; CHECK-LABEL: swap_second_no_junk:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    JUMPDEST
+; CHECK-NEXT:    SWAP2
+; CHECK-NEXT:    POP
+; CHECK-NEXT:    POP
+; CHECK-NEXT:    SUB
+; CHECK-NEXT:    SWAP1
+; CHECK-NEXT:    JUMP
+  %x1 = sub i256 %a1, %a4
+  ret i256 %x1
+}
+
+define void @swap_both_with_junk(i256 %a1, i256 %a2, i256 %a3, i256 %a4) noreturn {
+; CHECK-LABEL: swap_both_with_junk:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    JUMPDEST
+; CHECK-NEXT:    DUP4
+; CHECK-NEXT:    SUB
+; CHECK-NEXT:    PUSH0
+; CHECK-NEXT:    REVERT
+  %x1 = sub i256 %a4, %a1
+  call void @llvm.evm.revert(ptr addrspace(1) null, i256 %x1)
+  unreachable
+}
+
+define i256 @swap_both_no_junk(i256 %a1, i256 %a2, i256 %a3, i256 %a4) nounwind {
+; CHECK-LABEL: swap_both_no_junk:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    JUMPDEST
+; CHECK-NEXT:    SWAP3
+; CHECK-NEXT:    SWAP2
+; CHECK-NEXT:    POP
+; CHECK-NEXT:    POP
+; CHECK-NEXT:    SUB
+; CHECK-NEXT:    SWAP1
+; CHECK-NEXT:    JUMP
+  %x1 = sub i256 %a4, %a1
+  ret i256 %x1
+}
+
+define void @first_arg_alive_with_junk(i256 %a1, i256 %a2, i256 %a3) noreturn {
+; CHECK-LABEL: first_arg_alive_with_junk:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    JUMPDEST
+; CHECK-NEXT:    PUSH1 4
+; CHECK-NEXT:    DUP3
+; CHECK-NEXT:    DUP3
+; CHECK-NEXT:    SUB
+; CHECK-NEXT:    SWAP2
+; CHECK-NEXT:    SUB
+; CHECK-NEXT:    DIV
+; CHECK-NEXT:    PUSH0
+; CHECK-NEXT:    REVERT
+  %x1 = sub i256 %a1, %a2
+  %x2 = sub i256 %a1, 4
+  %x3 = udiv i256 %x2, %x1
+  call void @llvm.evm.revert(ptr addrspace(1) null, i256 %x3)
+  unreachable
+}
+
+define i256 @first_arg_alive_no_junk(i256 %a1, i256 %a2, i256 %a3) nounwind {
+; CHECK-LABEL: first_arg_alive_no_junk:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    JUMPDEST
+; CHECK-NEXT:    SWAP2
+; CHECK-NEXT:    POP
+; CHECK-NEXT:    PUSH1 4
+; CHECK-NEXT:    SWAP1
+; CHECK-NEXT:    DUP3
+; CHECK-NEXT:    SUB
+; CHECK-NEXT:    SWAP2
+; CHECK-NEXT:    SUB
+; CHECK-NEXT:    DIV
+; CHECK-NEXT:    SWAP1
+; CHECK-NEXT:    JUMP
+  %x1 = sub i256 %a1, %a2
+  %x2 = sub i256 %a1, 4
+  %x3 = udiv i256 %x2, %x1
+  ret i256 %x3
+}
+
+define void @second_arg_alive_with_junk(i256 %a1, i256 %a2, i256 %a3) noreturn {
+; CHECK-LABEL: second_arg_alive_with_junk:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    JUMPDEST
+; CHECK-NEXT:    DUP2
+; CHECK-NEXT:    PUSH1 4
+; CHECK-NEXT:    SWAP2
+; CHECK-NEXT:    SUB
+; CHECK-NEXT:    SWAP2
+; CHECK-NEXT:    SUB
+; CHECK-NEXT:    DIV
+; CHECK-NEXT:    PUSH0
+; CHECK-NEXT:    REVERT
+  %x1 = sub i256 %a1, %a2
+  %x2 = sub i256 %a2, 4
+  %x3 = udiv i256 %x2, %x1
+  call void @llvm.evm.revert(ptr addrspace(1) null, i256 %x3)
+  unreachable
+}
+
+define i256 @second_arg_alive_no_junk(i256 %a1, i256 %a2, i256 %a3) nounwind {
+; CHECK-LABEL: second_arg_alive_no_junk:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    JUMPDEST
+; CHECK-NEXT:    DUP2
+; CHECK-NEXT:    SWAP3
+; CHECK-NEXT:    POP
+; CHECK-NEXT:    SWAP1
+; CHECK-NEXT:    PUSH1 4
+; CHECK-NEXT:    SWAP2
+; CHECK-NEXT:    SUB
+; CHECK-NEXT:    SWAP2
+; CHECK-NEXT:    SUB
+; CHECK-NEXT:    DIV
+; CHECK-NEXT:    SWAP1
+; CHECK-NEXT:    JUMP
+  %x1 = sub i256 %a1, %a2
+  %x2 = sub i256 %a2, 4
+  %x3 = udiv i256 %x2, %x1
+  ret i256 %x3
+}
+
+define void @both_arg_alive_with_junk(i256 %a1, i256 %a2, i256 %a3) noreturn {
+; CHECK-LABEL: both_arg_alive_with_junk:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    JUMPDEST
+; CHECK-NEXT:    SWAP1
+; CHECK-NEXT:    DUP2
+; CHECK-NEXT:    DUP2
+; CHECK-NEXT:    DIV
+; CHECK-NEXT:    SWAP2
+; CHECK-NEXT:    SUB
+; CHECK-NEXT:    ADD
+; CHECK-NEXT:    PUSH0
+; CHECK-NEXT:    REVERT
+  %x1 = sub i256 %a1, %a2
+  %x2 = udiv i256 %a2, %a1
+  %x3 = add i256 %x1, %x2
+  call void @llvm.evm.revert(ptr addrspace(1) null, i256 %x3)
+  unreachable
+}
+
+define i256 @both_arg_alive_no_junk(i256 %a1, i256 %a2, i256 %a3) nounwind {
+; CHECK-LABEL: both_arg_alive_no_junk:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    JUMPDEST
+; CHECK-NEXT:    DUP1
+; CHECK-NEXT:    SWAP3
+; CHECK-NEXT:    POP
+; CHECK-NEXT:    DUP2
+; CHECK-NEXT:    DIV
+; CHECK-NEXT:    SWAP2
+; CHECK-NEXT:    SUB
+; CHECK-NEXT:    ADD
+; CHECK-NEXT:    SWAP1
+; CHECK-NEXT:    JUMP
+  %x1 = sub i256 %a1, %a2
+  %x2 = udiv i256 %a2, %a1
+  %x3 = add i256 %x1, %x2
+  ret i256 %x3
+}
+
+define i256 @same_arg_dead_with_junk(i256 %a1, i256 %a2, i256 %a3) nounwind {
+; CHECK-LABEL: same_arg_dead_with_junk:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    JUMPDEST
+; CHECK-NEXT:    POP
+; CHECK-NEXT:    DUP1
+; CHECK-NEXT:    SWAP2
+; CHECK-NEXT:    POP
+; CHECK-NEXT:    ADD
+; CHECK-NEXT:    SWAP1
+; CHECK-NEXT:    DUP2
+; CHECK-NEXT:    PUSH0
+; CHECK-NEXT:    REVERT
+; CHECK-NEXT:    JUMP
+  %x1 = add i256 %a2, %a2
+  call void @llvm.evm.revert(ptr addrspace(1) null, i256 %x1)
+  ret i256 %x1
+}
+
+define i256 @same_arg_dead_no_junk(i256 %a1, i256 %a2, i256 %a3) nounwind {
+; CHECK-LABEL: same_arg_dead_no_junk:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    JUMPDEST
+; CHECK-NEXT:    POP
+; CHECK-NEXT:    DUP1
+; CHECK-NEXT:    SWAP2
+; CHECK-NEXT:    POP
+; CHECK-NEXT:    ADD
+; CHECK-NEXT:    SWAP1
+; CHECK-NEXT:    JUMP
+  %x1 = add i256 %a2, %a2
+  ret i256 %x1
+}
+
+define i256 @same_arg_alive_with_junk(i256 %a1, i256 %a2, i256 %a3) nounwind {
+; CHECK-LABEL: same_arg_alive_with_junk:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    JUMPDEST
+; CHECK-NEXT:    POP
+; CHECK-NEXT:    DUP1
+; CHECK-NEXT:    DUP1
+; CHECK-NEXT:    SWAP3
+; CHECK-NEXT:    POP
+; CHECK-NEXT:    ADD
+; CHECK-NEXT:    ADD
+; CHECK-NEXT:    SWAP1
+; CHECK-NEXT:    DUP2
+; CHECK-NEXT:    PUSH0
+; CHECK-NEXT:    REVERT
+; CHECK-NEXT:    JUMP
+  %x1 = add i256 %a2, %a2
+  %x2 = add i256 %a2, %x1
+  call void @llvm.evm.revert(ptr addrspace(1) null, i256 %x2)
+  ret i256 %x2
+}
+
+define i256 @same_arg_alive_no_junk(i256 %a1, i256 %a2, i256 %a3) nounwind {
+; CHECK-LABEL: same_arg_alive_no_junk:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    JUMPDEST
+; CHECK-NEXT:    POP
+; CHECK-NEXT:    DUP1
+; CHECK-NEXT:    DUP1
+; CHECK-NEXT:    SWAP3
+; CHECK-NEXT:    POP
+; CHECK-NEXT:    ADD
+; CHECK-NEXT:    ADD
+; CHECK-NEXT:    SWAP1
+; CHECK-NEXT:    JUMP
+  %x1 = add i256 %a2, %a2
+  %x2 = add i256 %a2, %x1
+  ret i256 %x2
+}
+
+declare void @llvm.evm.revert(ptr addrspace(1), i256)
diff --git a/llvm/test/CodeGen/EVM/storage.ll b/llvm/test/CodeGen/EVM/storage.ll
index 24336cd0164f..e66d6adfa95d 100644
--- a/llvm/test/CodeGen/EVM/storage.ll
+++ b/llvm/test/CodeGen/EVM/storage.ll
@@ -5,8 +5,8 @@ target triple = "evm"
 
 define void @sstore(ptr addrspace(5) %key, i256 %val) nounwind {
 ; CHECK-LABEL: @sstore
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: SSTORE [[IN1]], [[IN2]]
 
   store i256 %val, ptr addrspace(5) %key, align 32
diff --git a/llvm/test/CodeGen/EVM/sub.ll b/llvm/test/CodeGen/EVM/sub.ll
index 6b88285ef48d..ef913e9a9434 100644
--- a/llvm/test/CodeGen/EVM/sub.ll
+++ b/llvm/test/CodeGen/EVM/sub.ll
@@ -5,8 +5,8 @@ target triple = "evm"
 
 define i256 @subrrr(i256 %rs1, i256 %rs2) nounwind {
 ; CHECK-LABEL: @subrrr
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: SUB [[TMP:\$[0-9]+]], [[IN1]], [[IN2]]
 
   %res = sub i256 %rs1, %rs2
diff --git a/llvm/test/CodeGen/EVM/tstorage.ll b/llvm/test/CodeGen/EVM/tstorage.ll
index c100d67d9992..8bb70f90e0e0 100644
--- a/llvm/test/CodeGen/EVM/tstorage.ll
+++ b/llvm/test/CodeGen/EVM/tstorage.ll
@@ -5,8 +5,8 @@ target triple = "evm"
 
 define void @tstore(ptr addrspace(6) %key, i256 %val) nounwind {
 ; CHECK-LABEL: @tstore
-; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: ARGUMENT [[IN1:\$[0-9]+]], 0
+; CHECK: ARGUMENT [[IN2:\$[0-9]+]], 1
 ; CHECK: TSTORE [[IN1]], [[IN2]]
 
   store i256 %val, ptr addrspace(6) %key, align 32
diff --git a/llvm/test/CodeGen/EVM/unused_function_arguments.ll b/llvm/test/CodeGen/EVM/unused_function_arguments.ll
new file mode 100644
index 000000000000..50e68f31e91c
--- /dev/null
+++ b/llvm/test/CodeGen/EVM/unused_function_arguments.ll
@@ -0,0 +1,52 @@
+; RUN: llc  < %s | FileCheck %s
+
+target datalayout = "E-p:256:256-i256:256:256-S256-a:256:256"
+target triple = "evm"
+
+define i256 @foo(i256 %a1, i256 %a2, i256 %a3) nounwind {
+; CHECK-LABEL: @foo
+; CHECK: JUMPDEST
+; CHECK-NEXT: SWAP2
+; CHECK-NEXT: POP
+; CHECK-NEXT: POP
+; CHECK-NEXT: DUP1
+; CHECK-NEXT: ADD
+; CHECK-NEXT: SWAP1
+; CHECK-NEXT: JUMP
+
+  %x1 = add i256 %a1, %a1
+  ret i256 %x1
+}
+
+define i256 @wat(i256 %a1, i256 %a2, i256 %a3) nounwind {
+; CHECK-LABEL: @wat
+; CHECK: JUMPDEST
+; CHECK-NEXT: POP
+; CHECK-NEXT: DUP1
+; CHECK-NEXT: SWAP2
+; CHECK-NEXT: POP
+; CHECK-NEXT: ADD
+; CHECK-NEXT: SWAP1
+; CHECK-NEXT: JUMP
+
+  %x1 = add i256 %a2, %a2
+  ret i256 %x1
+}
+
+define i256 @bar() nounwind {
+; CHECK-LABEL: @bar
+; CHECK: JUMPDEST
+; CHECK-NEXT: PUSH4 @.FUNC_RET0
+; CHECK-NEXT: PUSH1 3
+; CHECK-NEXT: PUSH1 2
+; CHECK-NEXT: PUSH1 1
+; CHECK-NEXT: PUSH4 @foo
+; CHECK-NEXT: JUMP
+; CHECK-LABEL: .FUNC_RET0:
+; CHECK-NEXT: JUMPDEST
+; CHECK-NEXT: SWAP1
+; CHECK-NEXT: JUMP
+
+  %res = call i256 @foo(i256 1, i256 2, i256 3)
+  ret i256 %res
+}
diff --git a/llvm/test/CodeGen/Generic/2007-01-15-LoadSelectCycle.ll b/llvm/test/CodeGen/Generic/2007-01-15-LoadSelectCycle.ll
index 0bd23db7c62c..1d667d736a70 100644
--- a/llvm/test/CodeGen/Generic/2007-01-15-LoadSelectCycle.ll
+++ b/llvm/test/CodeGen/Generic/2007-01-15-LoadSelectCycle.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s
 ; PR1114
+; UNSUPPORTED: target=evm{{.*}} 
 
 declare i1 @foo()
 
diff --git a/llvm/test/CodeGen/Generic/2008-08-07-PtrToInt-SmallerInt.ll b/llvm/test/CodeGen/Generic/2008-08-07-PtrToInt-SmallerInt.ll
index c3fb54e3dd53..b3196227c86c 100644
--- a/llvm/test/CodeGen/Generic/2008-08-07-PtrToInt-SmallerInt.ll
+++ b/llvm/test/CodeGen/Generic/2008-08-07-PtrToInt-SmallerInt.ll
@@ -1,4 +1,5 @@
-; XFAIL: target=eravm{{.*}}, target=evm{{.*}}
+; XFAIL: target=eravm{{.*}}
+; UNSUPPORTED: target=evm{{.*}}
 ; TODO: CPR-920 support operators
 ; RUN: llc < %s
 ; PR2603
diff --git a/llvm/test/CodeGen/Generic/2009-04-28-i128-cmp-crash.ll b/llvm/test/CodeGen/Generic/2009-04-28-i128-cmp-crash.ll
index 605fe346c9d3..bed186acb538 100644
--- a/llvm/test/CodeGen/Generic/2009-04-28-i128-cmp-crash.ll
+++ b/llvm/test/CodeGen/Generic/2009-04-28-i128-cmp-crash.ll
@@ -2,6 +2,7 @@
 ; rdar://6836460
 ; rdar://7516906
 ; PR5963
+; UNSUPPORTED: target=evm{{.*}}
 
 define i32 @test(ptr %P) nounwind {
 entry:
diff --git a/llvm/test/CodeGen/Generic/2011-07-07-ScheduleDAGCrash.ll b/llvm/test/CodeGen/Generic/2011-07-07-ScheduleDAGCrash.ll
index f5f2276ac820..f72452fc0cbf 100644
--- a/llvm/test/CodeGen/Generic/2011-07-07-ScheduleDAGCrash.ll
+++ b/llvm/test/CodeGen/Generic/2011-07-07-ScheduleDAGCrash.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s
+; UNSUPPORTED: target=evm{{.*}}
 
 ; This caused ScheduleDAG to crash in EmitPhysRegCopy when searching
 ; the uses of a copy to a physical register without ignoring non-data
diff --git a/llvm/test/CodeGen/Generic/2012-06-08-APIntCrash.ll b/llvm/test/CodeGen/Generic/2012-06-08-APIntCrash.ll
index f08923669ece..ce3c4ec680df 100644
--- a/llvm/test/CodeGen/Generic/2012-06-08-APIntCrash.ll
+++ b/llvm/test/CodeGen/Generic/2012-06-08-APIntCrash.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s
+; UNSUPPORTED: target=evm{{.*}}
 
 define void @test1(ptr %ptr)
 {
diff --git a/llvm/test/CodeGen/Generic/i128-addsub.ll b/llvm/test/CodeGen/Generic/i128-addsub.ll
index e61658ed2430..7a5e3e6f1e1f 100644
--- a/llvm/test/CodeGen/Generic/i128-addsub.ll
+++ b/llvm/test/CodeGen/Generic/i128-addsub.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s
+; UNSUPPORTED: target=evm{{.*}}
 
 define void @test_add(i64 %AL, i64 %AH, i64 %BL, i64 %BH, ptr %RL, ptr %RH) {
 entry:
diff --git a/llvm/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll b/llvm/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll
index 6cc2b4040d18..e9a4fe5a1955 100644
--- a/llvm/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll
+++ b/llvm/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s
+; UNSUPPORTED: target=evm{{.*}}
 declare { i64, double } @wild()
 
 define void @foo(ptr %p, ptr %q) nounwind personality ptr @__gxx_personality_v0 {
diff --git a/llvm/test/CodeGen/Generic/undef-phi.ll b/llvm/test/CodeGen/Generic/undef-phi.ll
index 0e221fe612ab..89d73901436d 100644
--- a/llvm/test/CodeGen/Generic/undef-phi.ll
+++ b/llvm/test/CodeGen/Generic/undef-phi.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -verify-machineinstrs -verify-coalescing
+; UNSUPPORTED: target=evm{{.*}}
 ;
 ; This function has a PHI with one undefined input. Verify that PHIElimination
 ; inserts an IMPLICIT_DEF instruction in the predecessor so all paths to the use
diff --git a/llvm/test/Transforms/BranchFolding/2007-10-19-InlineAsmDirectives.ll b/llvm/test/Transforms/BranchFolding/2007-10-19-InlineAsmDirectives.ll
index 3a601d0c3f4a..865c60038f8f 100644
--- a/llvm/test/Transforms/BranchFolding/2007-10-19-InlineAsmDirectives.ll
+++ b/llvm/test/Transforms/BranchFolding/2007-10-19-InlineAsmDirectives.ll
@@ -1,6 +1,7 @@
 ; RUN: opt < %s -O3 | llc -no-integrated-as | FileCheck %s
 ; REQUIRES: default_triple
 ; XFAIL: target=eravm{{.*}}
+; UNSUPPORTED: target=evm{{.*}}
 
 ;; We don't want branch folding to fold asm directives.
 
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/evm-basic.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/evm-basic.ll.expected
index 4b6359646f18..4a4f1e007b5d 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/evm-basic.ll.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/evm-basic.ll.expected
@@ -5,17 +5,12 @@ define i256 @swap_second_no_junk(i256 %a1, i256 %a2, i256 %a3, i256 %a4) nounwin
 ; CHECK-LABEL: swap_second_no_junk:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    JUMPDEST
-; CHECK-NEXT:    DUP2
-; CHECK-NEXT:    DUP6
-; CHECK-NEXT:    SUB
-; CHECK-NEXT:    SWAP5
-; CHECK-NEXT:    POP
-; CHECK-NEXT:    DUP1
-; CHECK-NEXT:    SWAP4
-; CHECK-NEXT:    POP
-; CHECK-NEXT:    POP
+; CHECK-NEXT:    SWAP3
+; CHECK-NEXT:    SWAP2
 ; CHECK-NEXT:    POP
 ; CHECK-NEXT:    POP
+; CHECK-NEXT:    SUB
+; CHECK-NEXT:    SWAP1
 ; CHECK-NEXT:    JUMP
   %x1 = sub i256 %a4, %a1
   ret i256 %x1
diff --git a/llvm/unittests/Target/EVM/CMakeLists.txt b/llvm/unittests/Target/EVM/CMakeLists.txt
new file mode 100644
index 000000000000..315aef1b0650
--- /dev/null
+++ b/llvm/unittests/Target/EVM/CMakeLists.txt
@@ -0,0 +1,25 @@
+include_directories(
+  ${LLVM_MAIN_SRC_DIR}/lib/Target/EVM
+  ${LLVM_BINARY_DIR}/lib/Target/EVM
+  )
+
+set(LLVM_LINK_COMPONENTS
+  EVM
+  EVMDesc
+  EVMInfo
+  CodeGen
+  Core
+  MC
+  MIRParser
+  SelectionDAG
+  Support
+  Target
+  TargetParser
+)
+
+add_llvm_target_unittest(EVMTests
+  StackShuffler.cpp
+  StackModel.cpp
+  )
+
+set_property(TARGET EVMTests PROPERTY FOLDER "Tests/UnitTests/TargetTests")
diff --git a/llvm/unittests/Target/EVM/StackModel.cpp b/llvm/unittests/Target/EVM/StackModel.cpp
new file mode 100644
index 000000000000..aaf9da9debe7
--- /dev/null
+++ b/llvm/unittests/Target/EVM/StackModel.cpp
@@ -0,0 +1,157 @@
+//===---------- llvm/unittests/EVM/StackSlotBuilder.cpp -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "EVMStackModel.h"
+#include "EVMTargetMachine.h"
+#include "MCTargetDesc/EVMMCTargetDesc.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Target/CodeGenCWrappers.h"
+#include "llvm/Target/TargetMachine.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+#include <iostream>
+static TargetMachine *unwrap(LLVMTargetMachineRef P) {
+  return reinterpret_cast<TargetMachine *>(P);
+}
+
+class EVMStackModelTest : public testing::Test {
+  void SetUp() override {
+    LLVMInitializeEVMTargetInfo();
+    LLVMInitializeEVMTarget();
+    LLVMInitializeEVMTargetMC();
+
+    LLVMTargetRef Target = 0;
+    const char *Triple = "evm";
+    char *ErrMsg = 0;
+    if (LLVMGetTargetFromTriple(Triple, &Target, &ErrMsg)) {
+      FAIL() << "Failed to create target from the triple (" << Triple
+             << "): " << ErrMsg;
+      return;
+    }
+    ASSERT_TRUE(Target);
+
+    // Construct a TargetMachine.
+    TM =
+        LLVMCreateTargetMachine(Target, Triple, "", "", LLVMCodeGenLevelDefault,
+                                LLVMRelocDefault, LLVMCodeModelDefault);
+    Context = std::make_unique<LLVMContext>();
+    Mod = std::make_unique<Module>("TestModule", *Context);
+    Mod->setDataLayout(unwrap(TM)->createDataLayout());
+    const LLVMTargetMachine &LLVMTM =
+        static_cast<const LLVMTargetMachine &>(*unwrap(TM));
+    MMIWP = std::make_unique<MachineModuleInfoWrapperPass>(&LLVMTM);
+
+    Type *const ReturnType = Type::getVoidTy(Mod->getContext());
+    FunctionType *FunctionType = FunctionType::get(ReturnType, false);
+    Function *const F = Function::Create(
+        FunctionType, GlobalValue::InternalLinkage, "TestFunction", Mod.get());
+    MF = &MMIWP->getMMI().getOrCreateMachineFunction(*F);
+
+    LIS = std::make_unique<LiveIntervals>();
+    StackModel = std::make_unique<EVMStackModel>(*MF, *LIS.get());
+  }
+
+  void TearDown() override { LLVMDisposeTargetMachine(TM); }
+
+public:
+  LLVMTargetMachineRef TM;
+  std::unique_ptr<LLVMContext> Context;
+  std::unique_ptr<MachineModuleInfoWrapperPass> MMIWP;
+  std::unique_ptr<Module> Mod;
+  std::unique_ptr<LiveIntervals> LIS;
+  std::unique_ptr<EVMStackModel> StackModel;
+  MachineFunction *MF = nullptr;
+};
+
+TEST_F(EVMStackModelTest, LiteralSlot) {
+  APInt Int0 = APInt(32, 0);
+  APInt Int42 = APInt(32, 42);
+
+  auto *LiteralSlot0 = StackModel->getLiteralSlot(Int0);
+  auto *LiteralSlot0Copy = StackModel->getLiteralSlot(Int0);
+  EXPECT_TRUE(LiteralSlot0 == LiteralSlot0Copy);
+
+  auto *LiteralSlot42 = StackModel->getLiteralSlot(Int42);
+  EXPECT_TRUE(LiteralSlot0 != LiteralSlot42);
+  EXPECT_TRUE(LiteralSlot0->getValue() != LiteralSlot42->getValue());
+}
+
+TEST_F(EVMStackModelTest, VariableSlot) {
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  Register Reg1 = MRI.createVirtualRegister(&EVM::GPRRegClass);
+  Register Reg2 = MRI.createVirtualRegister(&EVM::GPRRegClass);
+
+  auto *VarSlot1 = StackModel->getVariableSlot(Reg1);
+  auto *VarSlot1Copy = StackModel->getVariableSlot(Reg1);
+  EXPECT_TRUE(VarSlot1 == VarSlot1Copy);
+
+  auto *VarSlot2 = StackModel->getVariableSlot(Reg2);
+  EXPECT_TRUE(VarSlot1 != VarSlot2);
+  EXPECT_TRUE(VarSlot1->getReg() != VarSlot2->getReg());
+}
+
+TEST_F(EVMStackModelTest, SymbolSlot) {
+  MCInstrDesc MCID = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+  auto MI = MF->CreateMachineInstr(MCID, DebugLoc());
+  auto MAI = MCAsmInfo();
+  auto MC = std::make_unique<MCContext>(Triple("evm"), &MAI, nullptr, nullptr,
+                                        nullptr, nullptr, false);
+  MCSymbol *Sym1 = MC->createTempSymbol("sym1", false);
+  MCSymbol *Sym2 = MC->createTempSymbol("sym2", false);
+
+  auto *SymSlot1 = StackModel->getSymbolSlot(Sym1, MI);
+  auto *SymSlot1Copy = StackModel->getSymbolSlot(Sym1, MI);
+  EXPECT_TRUE(SymSlot1 == SymSlot1Copy);
+
+  auto *SymSlot2 = StackModel->getSymbolSlot(Sym2, MI);
+  EXPECT_TRUE(SymSlot1 != SymSlot2);
+  EXPECT_TRUE(SymSlot1->getSymbol() != SymSlot2->getSymbol());
+}
+
+TEST_F(EVMStackModelTest, FunctionCallReturnLabelSlot) {
+  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+  auto Call = MF->CreateMachineInstr(TII->get(EVM::FCALL), DebugLoc());
+  auto Call2 = MF->CreateMachineInstr(TII->get(EVM::FCALL), DebugLoc());
+
+  auto *RetSlot1 = StackModel->getFunctionCallReturnLabelSlot(Call);
+  auto *RetSlot1Copy = StackModel->getFunctionCallReturnLabelSlot(Call);
+  EXPECT_TRUE(RetSlot1 == RetSlot1Copy);
+
+  auto *RetSlot2 = StackModel->getFunctionCallReturnLabelSlot(Call2);
+  EXPECT_TRUE(RetSlot1 != RetSlot2);
+  EXPECT_TRUE(RetSlot1->getCall() != RetSlot2->getCall());
+}
+
+TEST_F(EVMStackModelTest, FunctionReturnLabelSlot) {
+  // Be sure the slot for function return label is a single one.
+  EXPECT_TRUE(StackModel->getFunctionReturnLabelSlot(MF) ==
+              StackModel->getFunctionReturnLabelSlot(MF));
+}
+
+TEST_F(EVMStackModelTest, TemporarySlot) {
+  MCInstrDesc MCID = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+  auto MI = MF->CreateMachineInstr(MCID, DebugLoc());
+
+  auto *TempSlot1 = StackModel->getTemporarySlot(MI, 0);
+  auto *TempSlot1Copy = StackModel->getTemporarySlot(MI, 0);
+  EXPECT_TRUE(TempSlot1 == TempSlot1Copy);
+
+  auto *TempSlot2 = StackModel->getTemporarySlot(MI, 1);
+  EXPECT_TRUE(TempSlot1 != TempSlot2);
+}
+
+TEST_F(EVMStackModelTest, JunkSlot) {
+  // Be sure the JunkSlot is a single one.
+  EXPECT_TRUE(EVMStackModel::getJunkSlot() == EVMStackModel::getJunkSlot());
+}
diff --git a/llvm/unittests/Target/EVM/StackShuffler.cpp b/llvm/unittests/Target/EVM/StackShuffler.cpp
new file mode 100644
index 000000000000..2e99bb685ee8
--- /dev/null
+++ b/llvm/unittests/Target/EVM/StackShuffler.cpp
@@ -0,0 +1,204 @@
+//===---------- llvm/unittests/MC/AssemblerTest.cpp -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "EVMRegisterInfo.h"
+#include "EVMStackDebug.h"
+#include "EVMStackModel.h"
+#include "EVMStackShuffler.h"
+#include "EVMTargetMachine.h"
+#include "MCTargetDesc/EVMMCTargetDesc.h"
+#include "llvm-c/Core.h"
+#include "llvm-c/IRReader.h"
+#include "llvm-c/TargetMachine.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Target/CodeGenCWrappers.h"
+#include "llvm/Target/TargetMachine.h"
+#include "gtest/gtest.h"
+#include <memory>
+#include <string.h>
+
+using namespace llvm;
+
+#include <iostream>
+static TargetMachine *unwrap(LLVMTargetMachineRef P) {
+  return reinterpret_cast<TargetMachine *>(P);
+}
+
+class LLDCTest : public testing::Test {
+  void SetUp() override {
+    LLVMInitializeEVMTargetInfo();
+    LLVMInitializeEVMTarget();
+    LLVMInitializeEVMTargetMC();
+
+    LLVMTargetRef Target = 0;
+    const char *Triple = "evm";
+    char *ErrMsg = 0;
+    if (LLVMGetTargetFromTriple(Triple, &Target, &ErrMsg)) {
+      FAIL() << "Failed to create target from the triple (" << Triple
+             << "): " << ErrMsg;
+      return;
+    }
+    ASSERT_TRUE(Target);
+
+    // Construct a TargetMachine.
+    TM =
+        LLVMCreateTargetMachine(Target, Triple, "", "", LLVMCodeGenLevelDefault,
+                                LLVMRelocDefault, LLVMCodeModelDefault);
+    Context = std::make_unique<LLVMContext>();
+    Mod = std::make_unique<Module>("TestModule", *Context);
+    Mod->setDataLayout(unwrap(TM)->createDataLayout());
+    const LLVMTargetMachine &LLVMTM =
+        static_cast<const LLVMTargetMachine &>(*unwrap(TM));
+    MMIWP = std::make_unique<MachineModuleInfoWrapperPass>(&LLVMTM);
+
+    Type *const ReturnType = Type::getVoidTy(Mod->getContext());
+    FunctionType *FunctionType = FunctionType::get(ReturnType, false);
+    Function *const F = Function::Create(
+        FunctionType, GlobalValue::InternalLinkage, "TestFunction", Mod.get());
+    MF = &MMIWP->getMMI().getOrCreateMachineFunction(*F);
+
+    LIS = std::make_unique<LiveIntervals>();
+    StackModel = std::make_unique<EVMStackModel>(*MF, *LIS.get());
+  }
+
+  void TearDown() override { LLVMDisposeTargetMachine(TM); }
+
+public:
+  LLVMTargetMachineRef TM;
+  MachineFunction *MF = nullptr;
+  std::unique_ptr<LLVMContext> Context;
+  std::unique_ptr<Module> Mod;
+  std::unique_ptr<MachineModuleInfoWrapperPass> MMIWP;
+  std::unique_ptr<LiveIntervals> LIS;
+  std::unique_ptr<EVMStackModel> StackModel;
+};
+
+TEST_F(LLDCTest, Basic) {
+  Stack SourceStack;
+  Stack TargetStack;
+
+  MachineBasicBlock *MBB = MF->CreateMachineBasicBlock(nullptr);
+  MF->push_back(MBB);
+
+  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+  const MCInstrDesc &MCID = TII->get(EVM::SELFBALANCE);
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+
+  auto CreateInstr = [&]() {
+    Register Reg = MRI.createVirtualRegister(&EVM::GPRRegClass);
+    MachineInstr *MI = BuildMI(MBB, DebugLoc(), MCID, Reg);
+    return std::pair(MI, Reg);
+  };
+  SmallVector<std::pair<MachineInstr *, Register>> Instrs;
+  for (unsigned I = 0; I < 17; ++I)
+    Instrs.emplace_back(CreateInstr());
+
+  // Create the source stack layout:
+  //   [ %0 %1 %2 %3 %4 %5 %6 %7 %9 %10 %11 %12 %13 %14 %15 %16 RET RET %5 ]
+  SourceStack.emplace_back(StackModel->getVariableSlot(Instrs[0].second));
+  SourceStack.emplace_back(StackModel->getVariableSlot(Instrs[1].second));
+  SourceStack.emplace_back(StackModel->getVariableSlot(Instrs[2].second));
+  SourceStack.emplace_back(StackModel->getVariableSlot(Instrs[3].second));
+  SourceStack.emplace_back(StackModel->getVariableSlot(Instrs[4].second));
+  SourceStack.emplace_back(StackModel->getVariableSlot(Instrs[5].second));
+  SourceStack.emplace_back(StackModel->getVariableSlot(Instrs[6].second));
+  SourceStack.emplace_back(StackModel->getVariableSlot(Instrs[7].second));
+  SourceStack.emplace_back(StackModel->getVariableSlot(Instrs[9].second));
+  SourceStack.emplace_back(StackModel->getVariableSlot(Instrs[10].second));
+  SourceStack.emplace_back(StackModel->getVariableSlot(Instrs[11].second));
+  SourceStack.emplace_back(StackModel->getVariableSlot(Instrs[12].second));
+  SourceStack.emplace_back(StackModel->getVariableSlot(Instrs[13].second));
+  SourceStack.emplace_back(StackModel->getVariableSlot(Instrs[14].second));
+  SourceStack.emplace_back(StackModel->getVariableSlot(Instrs[15].second));
+  SourceStack.emplace_back(StackModel->getVariableSlot(Instrs[16].second));
+  SourceStack.emplace_back(
+      StackModel->getFunctionReturnLabelSlot(MBB->getParent()));
+  SourceStack.emplace_back(
+      StackModel->getFunctionReturnLabelSlot(MBB->getParent()));
+  SourceStack.emplace_back(StackModel->getVariableSlot(Instrs[5].second));
+
+  // [ %1 %0 %2 %3 %4 %5 %6 %7 %9 %10 %11 %12 %13 %14 %15 %16 RET JUNK JUNK ]
+  TargetStack.emplace_back(StackModel->getVariableSlot(Instrs[1].second));
+  TargetStack.emplace_back(StackModel->getVariableSlot(Instrs[0].second));
+  TargetStack.emplace_back(StackModel->getVariableSlot(Instrs[2].second));
+  TargetStack.emplace_back(StackModel->getVariableSlot(Instrs[3].second));
+  TargetStack.emplace_back(StackModel->getVariableSlot(Instrs[4].second));
+  TargetStack.emplace_back(StackModel->getVariableSlot(Instrs[5].second));
+  TargetStack.emplace_back(StackModel->getVariableSlot(Instrs[6].second));
+  TargetStack.emplace_back(StackModel->getVariableSlot(Instrs[7].second));
+  TargetStack.emplace_back(StackModel->getVariableSlot(Instrs[9].second));
+  TargetStack.emplace_back(StackModel->getVariableSlot(Instrs[10].second));
+  TargetStack.emplace_back(StackModel->getVariableSlot(Instrs[11].second));
+  TargetStack.emplace_back(StackModel->getVariableSlot(Instrs[12].second));
+  TargetStack.emplace_back(StackModel->getVariableSlot(Instrs[13].second));
+  TargetStack.emplace_back(StackModel->getVariableSlot(Instrs[14].second));
+  TargetStack.emplace_back(StackModel->getVariableSlot(Instrs[15].second));
+  TargetStack.emplace_back(StackModel->getVariableSlot(Instrs[16].second));
+  TargetStack.emplace_back(
+      StackModel->getFunctionReturnLabelSlot(MBB->getParent()));
+  TargetStack.emplace_back(StackModel->getJunkSlot());
+  TargetStack.emplace_back(StackModel->getJunkSlot());
+
+  StringRef Reference("\
+[ %0 %1 %2 %3 %4 %5 %6 %7 %9 %10 %11 %12 %13 %14 %15 %16 RET RET %5 ]\n\
+POP\n\
+[ %0 %1 %2 %3 %4 %5 %6 %7 %9 %10 %11 %12 %13 %14 %15 %16 RET RET ]\n\
+SWAP16\n\
+[ %0 RET %2 %3 %4 %5 %6 %7 %9 %10 %11 %12 %13 %14 %15 %16 RET %1 ]\n\
+SWAP16\n\
+[ %0 %1 %2 %3 %4 %5 %6 %7 %9 %10 %11 %12 %13 %14 %15 %16 RET RET ]\n\
+POP\n\
+[ %0 %1 %2 %3 %4 %5 %6 %7 %9 %10 %11 %12 %13 %14 %15 %16 RET ]\n\
+SWAP15\n\
+[ %0 RET %2 %3 %4 %5 %6 %7 %9 %10 %11 %12 %13 %14 %15 %16 %1 ]\n\
+SWAP16\n\
+[ %1 RET %2 %3 %4 %5 %6 %7 %9 %10 %11 %12 %13 %14 %15 %16 %0 ]\n\
+SWAP15\n\
+[ %1 %0 %2 %3 %4 %5 %6 %7 %9 %10 %11 %12 %13 %14 %15 %16 RET ]\n\
+PUSH JUNK\n\
+[ %1 %0 %2 %3 %4 %5 %6 %7 %9 %10 %11 %12 %13 %14 %15 %16 RET JUNK ]\n\
+PUSH JUNK\n\
+[ %1 %0 %2 %3 %4 %5 %6 %7 %9 %10 %11 %12 %13 %14 %15 %16 RET JUNK JUNK ]\n");
+
+  std::ostringstream Output;
+  createStackLayout(
+      SourceStack, TargetStack,
+      [&](unsigned SwapDepth) { // swap
+        Output << stackToString(SourceStack) << '\n';
+        Output << "SWAP" << SwapDepth << '\n';
+      },
+      [&](const StackSlot *Slot) { // dupOrPush
+        Output << stackToString(SourceStack) << '\n';
+        if (Slot->isRematerializable())
+          Output << "PUSH " << Slot->toString() << '\n';
+        else {
+          Stack TmpStack = SourceStack;
+          std::reverse(TmpStack.begin(), TmpStack.end());
+          auto It = std::find(TmpStack.begin(), TmpStack.end(), Slot);
+          if (It == TmpStack.end())
+            FAIL() << "Invalid DUP operation.";
+
+          auto Depth = std::distance(TmpStack.begin(), It);
+          Output << "DUP" << Depth + 1 << '\n';
+        }
+      },
+      [&]() { // pop
+        Output << stackToString(SourceStack) << '\n';
+        Output << "POP" << '\n';
+      });
+
+  Output << stackToString(SourceStack) << '\n';
+  std::cerr << Output.str();
+  EXPECT_TRUE(Reference == Output.str());
+}