llvm · tzuralon · Jul 19, 2025 · Jul 20, 2025 · Jul 20, 2025 · Jul 27, 2025
diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -17,6 +17,7 @@
 
 #include "CoroInternal.h"
 #include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Analysis/StackLifetime.h"
 #include "llvm/IR/DIBuilder.h"
@@ -35,6 +36,7 @@
 #include "llvm/Transforms/Coroutines/SpillUtils.h"
 #include "llvm/Transforms/Coroutines/SuspendCrossingInfo.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
 #include <algorithm>
@@ -973,6 +975,178 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
   return FrameTy;
 }
 
+// This function assumes that it is being called on basic block in reversed
+// post-order, meaning predecessors are visited before successors failing to do
+// so will cause VMap to be non-valid and will cause instructions to fail
+// mapping to their corresponding clones
+static void finalizeBasicBlockCloneAndTrackSuccessors(
+    BasicBlock *InitialBlock, BasicBlock *ClonedBlock, ValueToValueMapTy &VMap,
+    SmallSet<BasicBlock *, 20> &SuccessorBlocksSet) {
+  // This code will examine the basic block, fix issues caused by clones
+  //  for example - tailor cleanupret to the corresponding cleanuppad
+  //  it will use VMap to do so
+  // in addition, it will add the node successors to SuccessorBlocksSet
+
+  // Remap the instructions, VMap here aggregates instructions across
+  // multiple BasicBlocks, and we assume that traversal is reversed post-order,
+  // therefore successor blocks (for example instructions having funclet
+  // tags) will be mapped correctly to the new cloned cleanuppad
+  for (Instruction &ClonedBlockInstruction : *ClonedBlock) {
+    RemapInstruction(&ClonedBlockInstruction, VMap,
+                     RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+  }
+
+  const auto &InitialBlockTerminator = InitialBlock->getTerminator();
+
+  // If it's cleanupret, find the correspondant cleanuppad (use the VMap to
+  // find it).
+  if (auto *InitialBlockTerminatorCleanupReturn =
+          dyn_cast<CleanupReturnInst>(InitialBlockTerminator)) {
+    // if none found do nothing
+    if (VMap.find(InitialBlockTerminatorCleanupReturn->getCleanupPad()) ==
+        VMap.end()) {
+      return;
+    }
+
+    auto *ClonedBlockTerminatorCleanupReturn =
+        cast<CleanupReturnInst>(ClonedBlock->getTerminator());
+
+    // Assuming reversed post-order traversal
+    llvm::Value *ClonedBlockCleanupPadValue =
+        VMap[InitialBlockTerminatorCleanupReturn->getCleanupPad()];
+    auto *ClonedBlockCleanupPad =
+        cast<CleanupPadInst>(ClonedBlockCleanupPadValue);
+    ClonedBlockTerminatorCleanupReturn->setCleanupPad(ClonedBlockCleanupPad);
+
+    // If it's a branch/invoke, keep track of its successors, we want to
+    // calculate dominance between CoroBegin and them also
+  } else if (auto *InitialBlockTerminatorBranch =
+                 dyn_cast<BranchInst>(InitialBlockTerminator)) {
+    for (unsigned int successorIdx = 0;
+         successorIdx < InitialBlockTerminatorBranch->getNumSuccessors();
+         ++successorIdx) {
+      SuccessorBlocksSet.insert(
+          InitialBlockTerminatorBranch->getSuccessor(successorIdx));
+    }
+  } else if (auto *InitialBlockTerminatorInvoke =
+                 dyn_cast<InvokeInst>(InitialBlockTerminator)) {
+    SuccessorBlocksSet.insert(InitialBlockTerminatorInvoke->getUnwindDest());
+  } else if (isa<ReturnInst>(InitialBlockTerminator)) {
+    // No action needed
+  } else {
+    InitialBlockTerminator->print(dbgs());
+    report_fatal_error("Terminator is not implemented in "
+                       "finalizeBasicBlockCloneAndTrackSuccessors");
+  }
+}
+
+// Dominance issue fixer for each predecessor satisfying predicate function
+void splitIfBasicBlockPredecessors(
+    BasicBlock *InitialBlock, BasicBlock *ReplacementBlock,
+    std::function<bool(BasicBlock *)> Predicate) {
+
+  SmallVector<BasicBlock *> InitialBlockPredecessors;
+
+  auto Predecessors = predecessors(InitialBlock);
+  std::copy_if(Predecessors.begin(), Predecessors.end(),
+               std::back_inserter(InitialBlockPredecessors), Predicate);
+
+  // Fixups on the predecessors terminator - point them to ReplacementBlock.
+  for (auto *InitialBlockPredecessor : InitialBlockPredecessors) {
+    auto *InitialBlockPredecessorTerminator =
+        InitialBlockPredecessor->getTerminator();
+    if (auto *InitialBlockPredecessorTerminatorInvoke =
+            dyn_cast<InvokeInst>(InitialBlockPredecessorTerminator)) {
+      if (InitialBlock ==
+          InitialBlockPredecessorTerminatorInvoke->getUnwindDest()) {
+        InitialBlockPredecessorTerminatorInvoke->setUnwindDest(
+            ReplacementBlock);
+      } else {
+        InitialBlockPredecessorTerminatorInvoke->setNormalDest(
+            ReplacementBlock);
+      }
+    } else if (auto *InitialBlockPredecessorTerminatorBranch =
+                   dyn_cast<BranchInst>(InitialBlockPredecessorTerminator)) {
+      for (unsigned int successorIdx = 0;
+           successorIdx <
+           InitialBlockPredecessorTerminatorBranch->getNumSuccessors();
+           ++successorIdx) {
+        if (InitialBlock ==
+            InitialBlockPredecessorTerminatorBranch->getSuccessor(
+                successorIdx)) {
+          InitialBlockPredecessorTerminatorBranch->setSuccessor(
+              successorIdx, ReplacementBlock);
+        }
+      }
+    } else if (auto *InitialBlockPredecessorTerminatorCleanupReturn =
+                   dyn_cast<CleanupReturnInst>(
+                       InitialBlockPredecessorTerminator)) {
+      InitialBlockPredecessorTerminatorCleanupReturn->setUnwindDest(
+          ReplacementBlock);
+    } else {
+      InitialBlockPredecessorTerminator->print(dbgs());
+      report_fatal_error(
+          "Terminator is not implemented in splitIfBasicBlockPredecessors");
+    }
+  }
+}
+
+// Fixer for the "Instruction does not dominate all uses!" bug
+// The fix consists of mapping problematic paths (where CoroBegin does not
+// dominate cleanup nodes) and duplicates them to 2 flows - the one that
+// insertSpills intended to create (using the spill) and another one, preserving
+// the logics of pre-splitting, which would be triggered if unwinding happened
+// before CoroBegin
+static void
+splitBasicBlocksNotDominatedByCoroBegin(const FrameDataInfo &FrameData,
+                                        const coro::Shape &Shape, Function *F,
+                                        DominatorTree &DT) {
+  ValueToValueMapTy VMap;
+  SmallSet<BasicBlock *, 20> SpillUserBlocksSet;
+
+  // Prepare the node set, logics will be run only on those nodes
+  for (const auto &E : FrameData.Spills) {
+    for (auto *U : E.second) {
+      auto CurrentBlock = U->getParent();
+      if (!DT.dominates(Shape.CoroBegin, CurrentBlock)) {
+        SpillUserBlocksSet.insert(CurrentBlock);
+      }
+    }
+  }
+
+  // Run is in reversed post order, to enforce visiting predecessors before
+  // successors
+  for (BasicBlock *CurrentBlock : ReversePostOrderTraversal<Function *>(F)) {
+    if (!SpillUserBlocksSet.contains(CurrentBlock)) {
+      continue;
+    }
+    SpillUserBlocksSet.erase(CurrentBlock);
+
+    // Preserve the current node. the duplicate will become the unspilled
+    // alternative
+    auto UnspilledAlternativeBlock =
+        CloneBasicBlock(CurrentBlock, VMap, ".unspilled_alternative", F);
+
+    // Remap node instructions, keep track of successors to visit them in next
+    // iterations
+    finalizeBasicBlockCloneAndTrackSuccessors(
+        CurrentBlock, UnspilledAlternativeBlock, VMap, SpillUserBlocksSet);
+
+    // Split only if predecessor breaks dominance against CoroBegin
+    splitIfBasicBlockPredecessors(CurrentBlock, UnspilledAlternativeBlock,
+                                  [&DT, &Shape](BasicBlock *PredecessorNode) {
+                                    return !DT.dominates(Shape.CoroBegin,
+                                                         PredecessorNode);
+                                  });
+
+    // We changed dominance tree, so recalculate
+    DT.recalculate(*F);
+  }
+
+  assert(SpillUserBlocksSet.empty() &&
+         "Graph is corrupted by SpillUserBlocksSet");
+}
+
 // Replace all alloca and SSA values that are accessed across suspend points
 // with GetElementPointer from coroutine frame + loads and stores. Create an
 // AllocaSpillBB that will become the new entry block for the resume parts of
@@ -1052,6 +1226,8 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
     return GEP;
   };
 
+  splitBasicBlocksNotDominatedByCoroBegin(FrameData, Shape, F, DT);
+
   for (auto const &E : FrameData.Spills) {
     Value *Def = E.first;
     auto SpillAlignment = Align(FrameData.getAlign(Def));

diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -376,7 +376,16 @@ static void replaceUnwindCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape,
   // If coro.end has an associated bundle, add cleanupret instruction.
   if (auto Bundle = End->getOperandBundle(LLVMContext::OB_funclet)) {
     auto *FromPad = cast<CleanupPadInst>(Bundle->Inputs[0]);
-    auto *CleanupRet = Builder.CreateCleanupRet(FromPad, nullptr);
+
+    // If the terminator is an invoke,
+    // set the cleanupret unwind destination the same as the other edges, to
+    // avoid validation errors
+    BasicBlock *UBB = nullptr;
+    if (auto II = dyn_cast<InvokeInst>(FromPad->getParent()->getTerminator())) {
+      UBB = II->getUnwindDest();
+    }
+
+    auto *CleanupRet = Builder.CreateCleanupRet(FromPad, UBB);
     End->getParent()->splitBasicBlock(End);
     CleanupRet->getParent()->getTerminator()->eraseFromParent();
   }

diff --git a/llvm/test/Transforms/Coroutines/pr148035_inst_does_not_dominate.ll b/llvm/test/Transforms/Coroutines/pr148035_inst_does_not_dominate.ll
@@ -0,0 +1,45 @@
+; In coro-split, this coroutine code reduced IR, produced using clang with async-exceptions
+; crashed before fix because of the validation mismatch of Instruction does not dominate all uses!
+; RUN: opt < %s -passes='coro-split' -S 2>&1 | FileCheck %s --implicit-check-not="Instruction does not dominate all uses!"
+; RUN: opt < %s -passes='default<Os>,coro-split' -S 2>&1 | FileCheck %s --implicit-check-not="Instruction does not dominate all uses!"
+
+; Function Attrs: presplitcoroutine
+define i8 @"?resuming_on_new_thread@@YA?AUtask@@Vunique_ptr@@@Z"(ptr %0) #0 personality ptr null {
+  invoke void @llvm.seh.scope.begin()
+          to label %2 unwind label %14
+
+2:                                                ; preds = %1
+  %3 = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
+  %4 = load volatile ptr, ptr null, align 8
+  %5 = call ptr @llvm.coro.begin(token %3, ptr %4)
+  %6 = call token @llvm.coro.save(ptr null)
+  %7 = call i8 @llvm.coro.suspend(token none, i1 false)
+  invoke void @llvm.seh.try.begin()
+          to label %common.ret unwind label %8
+
+common.ret:                                       ; preds = %12, %10, %2
+  ret i8 0
+
+8:                                                ; preds = %2
+  %9 = catchswitch within none [label %10] unwind label %12
+
+10:                                               ; preds = %8
+  %11 = catchpad within %9 [ptr null, i32 0, ptr null]
+  br label %common.ret
+
+12:                                               ; preds = %8
+  %13 = cleanuppad within none []
+  invoke void @llvm.seh.scope.end()
+          to label %common.ret unwind label %14
+
+14:                                               ; preds = %12, %1
+  %15 = cleanuppad within none []
+  store i32 0, ptr %0, align 4
+  cleanupret from %15 unwind to caller
+}
+
+attributes #0 = { presplitcoroutine }
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 2, !"eh-asynch", i32 1}
diff --git a/llvm/test/Transforms/Coroutines/pr148035_inst_does_not_dominate_2.ll b/llvm/test/Transforms/Coroutines/pr148035_inst_does_not_dominate_2.ll
@@ -0,0 +1,45 @@
+; In coro-split, this coroutine code reduced IR, produced using clang with async-exceptions
+; crashed before fix because of the validation mismatch of Instruction does not dominate all uses!
+; RUN: opt < %s -passes='coro-split' -S 2>&1 | FileCheck %s --implicit-check-not="Instruction does not dominate all uses!"
+; RUN: opt < %s -passes='default<Os>,coro-split' -S 2>&1 | FileCheck %s --implicit-check-not="Instruction does not dominate all uses!"
+
+; Function Attrs: presplitcoroutine
+define i8 @"?resuming_on_new_thread@@YA?AUtask@@Vunique_ptr@@@Z"(ptr %0) #0 personality ptr null {
+  invoke void @llvm.seh.scope.begin()
+          to label %2 unwind label %14
+
+2:                                                ; preds = %1
+  %3 = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
+  %4 = load volatile ptr, ptr null, align 8
+  %5 = call ptr @llvm.coro.begin(token %3, ptr %4)
+  %6 = call token @llvm.coro.save(ptr null)
+  %7 = call i8 @llvm.coro.suspend(token none, i1 false)
+  invoke void @llvm.seh.try.begin()
+          to label %common.ret unwind label %8
+
+common.ret:                                       ; preds = %12, %10, %2
+  ret i8 0
+
+8:                                                ; preds = %2
+  %9 = catchswitch within none [label %10] unwind label %12
+
+10:                                               ; preds = %8
+  %11 = catchpad within %9 [ptr null, i32 0, ptr null]
+  br label %common.ret
+
+12:                                               ; preds = %8
+  %13 = cleanuppad within none []
+  invoke void @llvm.seh.scope.end()
+          to label %common.ret unwind label %14
+
+14:                                               ; preds = %12, %1
+  %15 = cleanuppad within none []
+  store i32 0, ptr %0, align 4
+  br label %common.ret
+}
+
+attributes #0 = { presplitcoroutine }
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 2, !"eh-asynch", i32 1}
diff --git a/llvm/test/Transforms/Coroutines/pr148035_not_implemented_terminator_for_pred.ll b/llvm/test/Transforms/Coroutines/pr148035_not_implemented_terminator_for_pred.ll
@@ -0,0 +1,51 @@
+; In coro-split, this coroutine code reduced IR, produced using clang with async-exceptions
+; crashed after first phase of fix because the terminator cleanupret was not implemented on predecessor fixer at the time
+; RUN: opt < %s -passes='coro-split' -S
+; RUN: opt < %s -passes='default<Os>,coro-split' -S
+
+; Function Attrs: presplitcoroutine
+define i8 @"?resuming_on_new_thread@@YA?AUtask@@V?$unique_ptr@HU?$default_delete@H@std@@@std@@0@Z"(ptr %0) #0 personality ptr null {
+  invoke void @llvm.seh.scope.begin()
+          to label %2 unwind label %15
+
+2:                                                ; preds = %1
+  %3 = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
+  %4 = call ptr @llvm.coro.begin(token %3, ptr null)
+  %5 = call token @llvm.coro.save(ptr null)
+  %6 = call i8 @llvm.coro.suspend(token none, i1 false)
+  invoke void @llvm.seh.try.begin()
+          to label %7 unwind label %8
+
+7:                                                ; preds = %2
+  ret i8 0
+
+8:                                                ; preds = %2
+  %9 = catchswitch within none [label %10] unwind label %12
+
+10:                                               ; preds = %8
+  %11 = catchpad within %9 [ptr null, i32 0, ptr null]
+  ret i8 0
+
+12:                                               ; preds = %8
+  %13 = cleanuppad within none []
+  invoke void @llvm.seh.scope.end()
+          to label %14 unwind label %15
+
+14:                                               ; preds = %12
+  ret i8 0
+
+15:                                               ; preds = %12, %1
+  %16 = cleanuppad within none []
+  cleanupret from %16 unwind label %17
+
+17:                                               ; preds = %15
+  %18 = cleanuppad within none []
+  store i32 0, ptr %0, align 4
+  cleanupret from %18 unwind to caller
+}
+
+attributes #0 = { presplitcoroutine }
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 2, !"eh-asynch", i32 1}