Skip to content
Open
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
6ccbdcd
[Coroutines] added 5 tests that fail on the validations errors in #14…
tzuralon Jul 19, 2025
1d303ca
[Coroutines] fixed a bug when coro.end had an associated bundle, and …
tzuralon Jul 20, 2025
887bcb2
[Coroutines] fixed a bug when insertSpills performed spilling of Coro…
tzuralon Jul 20, 2025
7f00eeb
[Coroutines] added more 3 tests that are still failing due to complex…
tzuralon Jul 27, 2025
5899a93
[Coroutines] fixed the bugs introduced in the last 3 tests, improved …
tzuralon Jul 27, 2025
0b8e37a
Merge branch 'main' into users/tzuralon/coro-async-exceptions-fixes
tzuralon Jul 28, 2025
428d6b4
[Coroutines] removed 3 non-reduced tests that are redundant due to th…
tzuralon Jul 31, 2025
ffe4587
[Coroutines] reduced a test IR that introduces an unhandled edge-case…
tzuralon Jul 31, 2025
c8b944c
[Coroutines] reduced the IR tests a bit more by reducing unneccesary …
tzuralon Aug 3, 2025
8fa7b44
[Coroutines] applied formatter comments
tzuralon Aug 10, 2025
9ec807f
Merge branch 'main' into users/tzuralon/coro-async-exceptions-fixes
tzuralon Aug 18, 2025
3c5929a
[Coroutines] Reduce storage size for SmallSet, enhance naming
tzuralon Aug 22, 2025
3551cc8
[Coroutines] make comments more coherent
tzuralon Aug 22, 2025
c4795ae
[Coroutines] some formatting enhancements, improved performance by no…
tzuralon Aug 22, 2025
dbafc22
[Coroutines] - Remove redundant function finalizeBasicBlockCloneAndTr…
tzuralon Aug 22, 2025
0e1afdf
[Coroutines] - Make variable names more meaningful
tzuralon Sep 2, 2025
380a643
[Coroutines] - aligned the IR tests to have basic verification
tzuralon Sep 2, 2025
5baf5aa
[Coroutines] - Moved the fixer from CoroFrame to CoroSplit
tzuralon Nov 2, 2025
0be3d04
[Coroutines] Moved the fixer to newly SwitchABI Preprocess
tzuralon Nov 25, 2025
8cbad36
[Coroutines] Added fix and test for nodes cloning bug
tzuralon Nov 27, 2025
92c7c6b
[Coroutines] Changed tests IR to be more code-gen friendly
tzuralon Nov 30, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
176 changes: 176 additions & 0 deletions llvm/lib/Transforms/Coroutines/CoroFrame.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#include "CoroInternal.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Analysis/StackLifetime.h"
#include "llvm/IR/DIBuilder.h"
Expand All @@ -35,6 +36,7 @@
#include "llvm/Transforms/Coroutines/SpillUtils.h"
#include "llvm/Transforms/Coroutines/SuspendCrossingInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include <algorithm>
Expand Down Expand Up @@ -973,6 +975,178 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
return FrameTy;
}

// This function assumes that it is being called on basic block in reversed
// post-order, meaning predecessors are visited before successors failing to do
// so will cause VMap to be non-valid and will cause instructions to fail
// mapping to their corresponding clones
static void finalizeBasicBlockCloneAndTrackSuccessors(
BasicBlock *InitialBlock, BasicBlock *ClonedBlock, ValueToValueMapTy &VMap,
SmallSet<BasicBlock *, 20> &SuccessorBlocksSet) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could use SmallSetImpl to avoid explicitly passing 20.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess you meant SmallPtrSetImpl? It is also bound to storage (in ctor), it expects the caller to pass a storage buffer

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, SmallPtrSetImpl is correct.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Eventually I changed to SmallPtrSet (which was implicitly used anyway in practice when I used SmallSet, as the Set is of ptrs), and not SmallPtrSetImpl, because SmallPtrSetImpl does not spare the storage management as thought initially.

// This code will examine the basic block, fix issues caused by clones
// for example - tailor cleanupret to the corresponding cleanuppad
// it will use VMap to do so
// in addition, it will add the node successors to SuccessorBlocksSet

// Remap the instructions, VMap here aggregates instructions across
// multiple BasicBlocks, and we assume that traversal is reversed post-order,
// therefore successor blocks (for example instructions having funclet
// tags) will be mapped correctly to the new cloned cleanuppad
for (Instruction &ClonedBlockInstruction : *ClonedBlock) {
RemapInstruction(&ClonedBlockInstruction, VMap,
RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
}

const auto &InitialBlockTerminator = InitialBlock->getTerminator();

// If it's cleanupret, find the correspondant cleanuppad (use the VMap to
// find it).
if (auto *InitialBlockTerminatorCleanupReturn =
dyn_cast<CleanupReturnInst>(InitialBlockTerminator)) {
// if none found do nothing
if (VMap.find(InitialBlockTerminatorCleanupReturn->getCleanupPad()) ==
VMap.end()) {
return;
}

auto *ClonedBlockTerminatorCleanupReturn =
cast<CleanupReturnInst>(ClonedBlock->getTerminator());

// Assuming reversed post-order traversal
llvm::Value *ClonedBlockCleanupPadValue =
VMap[InitialBlockTerminatorCleanupReturn->getCleanupPad()];
auto *ClonedBlockCleanupPad =
cast<CleanupPadInst>(ClonedBlockCleanupPadValue);
ClonedBlockTerminatorCleanupReturn->setCleanupPad(ClonedBlockCleanupPad);

// If it's a branch/invoke, keep track of its successors, we want to
// calculate dominance between CoroBegin and them also
} else if (auto *InitialBlockTerminatorBranch =
dyn_cast<BranchInst>(InitialBlockTerminator)) {
for (unsigned int successorIdx = 0;
successorIdx < InitialBlockTerminatorBranch->getNumSuccessors();
++successorIdx) {
SuccessorBlocksSet.insert(
InitialBlockTerminatorBranch->getSuccessor(successorIdx));
}
} else if (auto *InitialBlockTerminatorInvoke =
dyn_cast<InvokeInst>(InitialBlockTerminator)) {
SuccessorBlocksSet.insert(InitialBlockTerminatorInvoke->getUnwindDest());
} else if (isa<ReturnInst>(InitialBlockTerminator)) {
// No action needed
} else {
InitialBlockTerminator->print(dbgs());
report_fatal_error("Terminator is not implemented in "
"finalizeBasicBlockCloneAndTrackSuccessors");
}
}

// Dominance issue fixer for each predecessor satisfying predicate function
void splitIfBasicBlockPredecessors(
Copy link
Contributor

@NewSigma NewSigma Aug 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

static void?

BasicBlock *InitialBlock, BasicBlock *ReplacementBlock,
std::function<bool(BasicBlock *)> Predicate) {

SmallVector<BasicBlock *> InitialBlockPredecessors;

auto Predecessors = predecessors(InitialBlock);
std::copy_if(Predecessors.begin(), Predecessors.end(),
std::back_inserter(InitialBlockPredecessors), Predicate);

// Fixups on the predecessors terminator - point them to ReplacementBlock.
for (auto *InitialBlockPredecessor : InitialBlockPredecessors) {
auto *InitialBlockPredecessorTerminator =
InitialBlockPredecessor->getTerminator();
if (auto *InitialBlockPredecessorTerminatorInvoke =
dyn_cast<InvokeInst>(InitialBlockPredecessorTerminator)) {
if (InitialBlock ==
InitialBlockPredecessorTerminatorInvoke->getUnwindDest()) {
InitialBlockPredecessorTerminatorInvoke->setUnwindDest(
ReplacementBlock);
} else {
InitialBlockPredecessorTerminatorInvoke->setNormalDest(
ReplacementBlock);
}
} else if (auto *InitialBlockPredecessorTerminatorBranch =
dyn_cast<BranchInst>(InitialBlockPredecessorTerminator)) {
for (unsigned int successorIdx = 0;
successorIdx <
InitialBlockPredecessorTerminatorBranch->getNumSuccessors();
++successorIdx) {
if (InitialBlock ==
InitialBlockPredecessorTerminatorBranch->getSuccessor(
successorIdx)) {
InitialBlockPredecessorTerminatorBranch->setSuccessor(
successorIdx, ReplacementBlock);
}
}
} else if (auto *InitialBlockPredecessorTerminatorCleanupReturn =
dyn_cast<CleanupReturnInst>(
InitialBlockPredecessorTerminator)) {
InitialBlockPredecessorTerminatorCleanupReturn->setUnwindDest(
ReplacementBlock);
} else {
InitialBlockPredecessorTerminator->print(dbgs());
report_fatal_error(
"Terminator is not implemented in splitIfBasicBlockPredecessors");
}
}
}

// Fixer for the "Instruction does not dominate all uses!" bug
// The fix consists of mapping problematic paths (where CoroBegin does not
// dominate cleanup nodes) and duplicates them to 2 flows - the one that
// insertSpills intended to create (using the spill) and another one, preserving
// the logics of pre-splitting, which would be triggered if unwinding happened
// before CoroBegin
static void
splitBasicBlocksNotDominatedByCoroBegin(const FrameDataInfo &FrameData,
const coro::Shape &Shape, Function *F,
DominatorTree &DT) {
ValueToValueMapTy VMap;
SmallSet<BasicBlock *, 20> SpillUserBlocksSet;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the value 20 intentional? I suspect the number of basic blocks that are not dominated by coro.begin is small.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In all my tests, I found <=2, so I will change it to 3


// Prepare the node set, logics will be run only on those nodes
for (const auto &E : FrameData.Spills) {
for (auto *U : E.second) {
auto CurrentBlock = U->getParent();
if (!DT.dominates(Shape.CoroBegin, CurrentBlock)) {
SpillUserBlocksSet.insert(CurrentBlock);
}
}
}

// Run is in reversed post order, to enforce visiting predecessors before
// successors
for (BasicBlock *CurrentBlock : ReversePostOrderTraversal<Function *>(F)) {
if (!SpillUserBlocksSet.contains(CurrentBlock)) {
continue;
}
SpillUserBlocksSet.erase(CurrentBlock);

// Preserve the current node. the duplicate will become the unspilled
// alternative
auto UnspilledAlternativeBlock =
CloneBasicBlock(CurrentBlock, VMap, ".unspilled_alternative", F);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we clear VMap before reuse it?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought it's mandatory to keep it across traverse (when tested it previously), but I don't find an actual example to prove it, so I will clear it for now.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Found an example, and restored :)
The nodes loop traverses in reverse post-order, so VMap remembers instructions from predecessors.
I added a test where a duplicated node contains a call instruction with a funclet being duplicated, the VMap only being generic handles the funclet token duplication correctly.


// Remap node instructions, keep track of successors to visit them in next
// iterations
finalizeBasicBlockCloneAndTrackSuccessors(
CurrentBlock, UnspilledAlternativeBlock, VMap, SpillUserBlocksSet);

// Split only if predecessor breaks dominance against CoroBegin
splitIfBasicBlockPredecessors(CurrentBlock, UnspilledAlternativeBlock,
[&DT, &Shape](BasicBlock *PredecessorNode) {
return !DT.dominates(Shape.CoroBegin,
PredecessorNode);
});

// We changed dominance tree, so recalculate
DT.recalculate(*F);
}

assert(SpillUserBlocksSet.empty() &&
"Graph is corrupted by SpillUserBlocksSet");
}

// Replace all alloca and SSA values that are accessed across suspend points
// with GetElementPointer from coroutine frame + loads and stores. Create an
// AllocaSpillBB that will become the new entry block for the resume parts of
Expand Down Expand Up @@ -1052,6 +1226,8 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
return GEP;
};

splitBasicBlocksNotDominatedByCoroBegin(FrameData, Shape, F, DT);

for (auto const &E : FrameData.Spills) {
Value *Def = E.first;
auto SpillAlignment = Align(FrameData.getAlign(Def));
Expand Down
11 changes: 10 additions & 1 deletion llvm/lib/Transforms/Coroutines/CoroSplit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,16 @@ static void replaceUnwindCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape,
// If coro.end has an associated bundle, add cleanupret instruction.
if (auto Bundle = End->getOperandBundle(LLVMContext::OB_funclet)) {
auto *FromPad = cast<CleanupPadInst>(Bundle->Inputs[0]);
auto *CleanupRet = Builder.CreateCleanupRet(FromPad, nullptr);

// If the terminator is an invoke,
// set the cleanupret unwind destination the same as the other edges, to
// avoid validation errors
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The "validation errors" is confusing to maintainers that not familiar to this bug. Maybe we could drop it.

BTW, there are some similar comments that are context-dependent. It would be great if you could reword them.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I rewritten comments

BasicBlock *UBB = nullptr;
if (auto II = dyn_cast<InvokeInst>(FromPad->getParent()->getTerminator())) {
UBB = II->getUnwindDest();
}

auto *CleanupRet = Builder.CreateCleanupRet(FromPad, UBB);
End->getParent()->splitBasicBlock(End);
CleanupRet->getParent()->getTerminator()->eraseFromParent();
}
Expand Down
45 changes: 45 additions & 0 deletions llvm/test/Transforms/Coroutines/pr148035_inst_does_not_dominate.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
; In coro-split, this coroutine code reduced IR, produced using clang with async-exceptions
; crashed before fix because of the validation mismatch of Instruction does not dominate all uses!
; RUN: opt < %s -passes='coro-split' -S 2>&1 | FileCheck %s --implicit-check-not="Instruction does not dominate all uses!"
; RUN: opt < %s -passes='default<Os>,coro-split' -S 2>&1 | FileCheck %s --implicit-check-not="Instruction does not dominate all uses!"
Copy link
Contributor

@NewSigma NewSigma Aug 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Checking for crashes/errors is already handled implicitly, so it doesn't need explicit mention. Perhaps we could go further by using utils/update_test_checks.py to generate more robust tests?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This fix handles 2 different bugs, I wanted to make sure that each IR test file validates that it's correspondent bug is being fixed, but it is the situation also after simplifying the RUN line.


; Function Attrs: presplitcoroutine
define i8 @"?resuming_on_new_thread@@YA?AUtask@@Vunique_ptr@@@Z"(ptr %0) #0 personality ptr null {
invoke void @llvm.seh.scope.begin()
to label %2 unwind label %14

2: ; preds = %1
%3 = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
%4 = load volatile ptr, ptr null, align 8
%5 = call ptr @llvm.coro.begin(token %3, ptr %4)
%6 = call token @llvm.coro.save(ptr null)
%7 = call i8 @llvm.coro.suspend(token none, i1 false)
invoke void @llvm.seh.try.begin()
to label %common.ret unwind label %8

common.ret: ; preds = %12, %10, %2
ret i8 0

8: ; preds = %2
%9 = catchswitch within none [label %10] unwind label %12

10: ; preds = %8
%11 = catchpad within %9 [ptr null, i32 0, ptr null]
br label %common.ret

12: ; preds = %8
%13 = cleanuppad within none []
invoke void @llvm.seh.scope.end()
to label %common.ret unwind label %14

14: ; preds = %12, %1
%15 = cleanuppad within none []
store i32 0, ptr %0, align 4
cleanupret from %15 unwind to caller
}

attributes #0 = { presplitcoroutine }

!llvm.module.flags = !{!0}

!0 = !{i32 2, !"eh-asynch", i32 1}
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
; In coro-split, this coroutine code reduced IR, produced using clang with async-exceptions
; crashed before fix because of the validation mismatch of Instruction does not dominate all uses!
; RUN: opt < %s -passes='coro-split' -S 2>&1 | FileCheck %s --implicit-check-not="Instruction does not dominate all uses!"
; RUN: opt < %s -passes='default<Os>,coro-split' -S 2>&1 | FileCheck %s --implicit-check-not="Instruction does not dominate all uses!"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you explain why do we need coro-split after default<Os>?

Copy link
Author

@tzuralon tzuralon Sep 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need, it reproduces also just with coro-split, I removed the default<Os>.


; Function Attrs: presplitcoroutine
define i8 @"?resuming_on_new_thread@@YA?AUtask@@Vunique_ptr@@@Z"(ptr %0) #0 personality ptr null {
invoke void @llvm.seh.scope.begin()
to label %2 unwind label %14

2: ; preds = %1
%3 = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
%4 = load volatile ptr, ptr null, align 8
%5 = call ptr @llvm.coro.begin(token %3, ptr %4)
%6 = call token @llvm.coro.save(ptr null)
%7 = call i8 @llvm.coro.suspend(token none, i1 false)
invoke void @llvm.seh.try.begin()
to label %common.ret unwind label %8

common.ret: ; preds = %12, %10, %2
ret i8 0

8: ; preds = %2
%9 = catchswitch within none [label %10] unwind label %12

10: ; preds = %8
%11 = catchpad within %9 [ptr null, i32 0, ptr null]
br label %common.ret

12: ; preds = %8
%13 = cleanuppad within none []
invoke void @llvm.seh.scope.end()
to label %common.ret unwind label %14

14: ; preds = %12, %1
%15 = cleanuppad within none []
store i32 0, ptr %0, align 4
br label %common.ret
}

attributes #0 = { presplitcoroutine }

!llvm.module.flags = !{!0}

!0 = !{i32 2, !"eh-asynch", i32 1}
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
; In coro-split, this coroutine code reduced IR, produced using clang with async-exceptions
; crashed after first phase of fix because the terminator cleanupret was not implemented on predecessor fixer at the time
; RUN: opt < %s -passes='coro-split' -S
; RUN: opt < %s -passes='default<Os>,coro-split' -S

; Function Attrs: presplitcoroutine
define i8 @"?resuming_on_new_thread@@YA?AUtask@@V?$unique_ptr@HU?$default_delete@H@std@@@std@@0@Z"(ptr %0) #0 personality ptr null {
invoke void @llvm.seh.scope.begin()
to label %2 unwind label %15

2: ; preds = %1
%3 = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
%4 = call ptr @llvm.coro.begin(token %3, ptr null)
%5 = call token @llvm.coro.save(ptr null)
%6 = call i8 @llvm.coro.suspend(token none, i1 false)
invoke void @llvm.seh.try.begin()
to label %7 unwind label %8

7: ; preds = %2
ret i8 0

8: ; preds = %2
%9 = catchswitch within none [label %10] unwind label %12

10: ; preds = %8
%11 = catchpad within %9 [ptr null, i32 0, ptr null]
ret i8 0

12: ; preds = %8
%13 = cleanuppad within none []
invoke void @llvm.seh.scope.end()
to label %14 unwind label %15

14: ; preds = %12
ret i8 0

15: ; preds = %12, %1
%16 = cleanuppad within none []
cleanupret from %16 unwind label %17

17: ; preds = %15
%18 = cleanuppad within none []
store i32 0, ptr %0, align 4
cleanupret from %18 unwind to caller
}

attributes #0 = { presplitcoroutine }

!llvm.module.flags = !{!0}

!0 = !{i32 2, !"eh-asynch", i32 1}
Loading