Skip to content

Commit bb2089c

Browse files
committed
[AMDGPU][FixIrreducible][UnifyLoopExits] Support callbr with inline-asm
First batch of changes to add support for basic inline-asm callbr for the AMDGPU backend.
1 parent 6b5c38d commit bb2089c

File tree

16 files changed

+2850
-52
lines changed

16 files changed

+2850
-52
lines changed

llvm/include/llvm/Support/GenericLoopInfoImpl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,7 @@ void LoopBase<BlockT, LoopT>::verifyLoop() const {
355355
if (BB == getHeader()) {
356356
assert(!OutsideLoopPreds.empty() && "Loop is unreachable!");
357357
} else if (!OutsideLoopPreds.empty()) {
358-
// A non-header loop shouldn't be reachable from outside the loop,
358+
// A non-header loop block shouldn't be reachable from outside the loop,
359359
// though it is permitted if the predecessor is not itself actually
360360
// reachable.
361361
BlockT *EntryBB = &BB->getParent()->front();

llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -607,10 +607,17 @@ LLVM_ABI bool SplitIndirectBrCriticalEdges(Function &F,
607607
// successors
608608
LLVM_ABI void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder);
609609

610-
// Check whether the function only has simple terminator:
610+
template <typename... TermInst>
611+
LLVM_ABI bool hasOnlyGivenTerminators(const Function &F);
612+
613+
// Check whether the function only has blocks with simple terminators:
611614
// br/brcond/unreachable/ret
612615
LLVM_ABI bool hasOnlySimpleTerminator(const Function &F);
613616

617+
// Check whether the function only has blocks with simple terminators
618+
// (br/brcond/unreachable/ret) or callbr.
619+
LLVM_ABI bool hasOnlySimpleTerminatorOrCallBr(const Function &F);
620+
614621
} // end namespace llvm
615622

616623
#endif // LLVM_TRANSFORMS_UTILS_BASICBLOCKUTILS_H

llvm/include/llvm/Transforms/Utils/ControlFlowUtils.h

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,13 @@
1515

1616
#include "llvm/ADT/SmallVector.h"
1717
#include "llvm/ADT/StringRef.h"
18+
#include "llvm/IR/CycleInfo.h"
1819

1920
namespace llvm {
2021

2122
class BasicBlock;
23+
class CallBrInst;
24+
class LoopInfo;
2225
class DomTreeUpdater;
2326

2427
/// Given a set of branch descriptors [BB, Succ0, Succ1], create a "hub" such
@@ -104,7 +107,8 @@ struct ControlFlowHub {
104107
: BB(BB), Succ0(Succ0), Succ1(Succ1) {}
105108
};
106109

107-
void addBranch(BasicBlock *BB, BasicBlock *Succ0, BasicBlock *Succ1) {
110+
void addBranch(BasicBlock *BB, BasicBlock *Succ0,
111+
BasicBlock *Succ1 = nullptr) {
108112
assert(BB);
109113
assert(Succ0 || Succ1);
110114
Branches.emplace_back(BB, Succ0, Succ1);
@@ -118,6 +122,35 @@ struct ControlFlowHub {
118122
std::optional<unsigned> MaxControlFlowBooleans = std::nullopt);
119123

120124
SmallVector<BranchDescriptor> Branches;
125+
126+
/**
127+
* \brief Create a new intermediate target block for a callbr edge.
128+
*
129+
* This function creates a new basic block (the "target block") that sits
130+
* between a callbr instruction and one of its successors. The callbr's
131+
* successor is rewired to this new block, and the new block unconditionally
132+
* branches to the original successor. This is useful for normalizing control
133+
* flow, e.g., when transforming irreducible loops.
134+
*
135+
* \param CallBr The callbr instruction whose edge is to be split.
136+
* \param Succ The original successor basic block to be reached.
137+
* \param SuccIdx The index of the successor in the callbr instruction.
138+
* \param AttachToCallBr If true, the new block is associated with the
139+
* callbr's parent for loop/cycle info. If false, the new block is associated
140+
* with the callbr's successor for loop/cycle info. \param CI Optional
141+
* CycleInfo for updating cycle membership. \param DTU Optional
142+
* DomTreeUpdater for updating the dominator tree. \param LI Optional LoopInfo
143+
* for updating loop membership.
144+
*
145+
* \returns The newly created intermediate target block.
146+
*
147+
* \note This function updates PHI nodes, dominator tree, loop info, and cycle
148+
* info as needed.
149+
*/
150+
static BasicBlock *
151+
createCallBrTarget(CallBrInst *CallBr, BasicBlock *Succ, unsigned SuccIdx,
152+
bool AttachToCallBr = true, CycleInfo *CI = nullptr,
153+
DomTreeUpdater *DTU = nullptr, LoopInfo *LI = nullptr);
121154
};
122155

123156
} // end namespace llvm

llvm/lib/Transforms/Utils/BasicBlockUtils.cpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1766,12 +1766,21 @@ void llvm::InvertBranch(BranchInst *PBI, IRBuilderBase &Builder) {
17661766
PBI->swapSuccessors();
17671767
}
17681768

1769-
bool llvm::hasOnlySimpleTerminator(const Function &F) {
1769+
template <typename... TermInst>
1770+
bool llvm::hasOnlyGivenTerminators(const Function &F) {
17701771
for (auto &BB : F) {
17711772
auto *Term = BB.getTerminator();
1772-
if (!(isa<ReturnInst>(Term) || isa<UnreachableInst>(Term) ||
1773-
isa<BranchInst>(Term)))
1773+
if (!(isa<TermInst>(Term) || ...))
17741774
return false;
17751775
}
17761776
return true;
17771777
}
1778+
1779+
bool llvm::hasOnlySimpleTerminator(const Function &F) {
1780+
return hasOnlyGivenTerminators<ReturnInst, UnreachableInst, BranchInst>(F);
1781+
}
1782+
1783+
bool llvm::hasOnlySimpleTerminatorOrCallBr(const Function &F) {
1784+
return hasOnlyGivenTerminators<ReturnInst, UnreachableInst, BranchInst,
1785+
CallBrInst>(F);
1786+
}

llvm/lib/Transforms/Utils/ControlFlowUtils.cpp

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "llvm/ADT/SetVector.h"
1515
#include "llvm/ADT/SmallSet.h"
1616
#include "llvm/Analysis/DomTreeUpdater.h"
17+
#include "llvm/Analysis/LoopInfo.h"
1718
#include "llvm/IR/Constants.h"
1819
#include "llvm/IR/Instructions.h"
1920
#include "llvm/IR/ValueHandle.h"
@@ -282,7 +283,9 @@ std::pair<BasicBlock *, bool> ControlFlowHub::finalize(
282283

283284
for (auto [BB, Succ0, Succ1] : Branches) {
284285
#ifndef NDEBUG
285-
assert(Incoming.insert(BB).second && "Duplicate entry for incoming block.");
286+
assert(
287+
(Incoming.insert(BB).second || isa<CallBrInst>(BB->getTerminator())) &&
288+
"Duplicate entry for incoming block.");
286289
#endif
287290
if (Succ0)
288291
Outgoing.insert(Succ0);
@@ -342,3 +345,55 @@ std::pair<BasicBlock *, bool> ControlFlowHub::finalize(
342345

343346
return {FirstGuardBlock, true};
344347
}
348+
349+
BasicBlock *ControlFlowHub::createCallBrTarget(
350+
CallBrInst *CallBr, BasicBlock *Succ, unsigned SuccIdx, bool AttachToCallBr,
351+
CycleInfo *CI, DomTreeUpdater *DTU, LoopInfo *LI) {
352+
BasicBlock *CallBrBlock = CallBr->getParent();
353+
BasicBlock *CallBrTarget =
354+
BasicBlock::Create(CallBrBlock->getContext(),
355+
CallBrBlock->getName() + ".target." + Succ->getName(),
356+
CallBrBlock->getParent());
357+
// Rewire control flow from callbr to the new target block.
358+
Succ->replacePhiUsesWith(CallBrBlock, CallBrTarget);
359+
CallBr->setSuccessor(SuccIdx, CallBrTarget);
360+
// Jump from the new target block to the original successor.
361+
BranchInst::Create(Succ, CallBrTarget);
362+
if (LI) {
363+
if (Loop *L = LI->getLoopFor(AttachToCallBr ? CallBrBlock : Succ); L) {
364+
bool AddToLoop = true;
365+
if (AttachToCallBr) {
366+
// Check if the loops are disjoint. In that case, we do not add the
367+
// intermediate target to any loop.
368+
if (auto *LL = LI->getLoopFor(Succ);
369+
LL && !L->contains(LL) && !LL->contains(L))
370+
AddToLoop = false;
371+
}
372+
if (AddToLoop)
373+
L->addBasicBlockToLoop(CallBrTarget, *LI);
374+
}
375+
}
376+
if (CI) {
377+
if (auto *C = CI->getCycle(AttachToCallBr ? CallBrBlock : Succ); C) {
378+
bool AddToCycle = true;
379+
if (AttachToCallBr) {
380+
// Check if the cycles are disjoint. In that case, we do not add the
381+
// intermediate target to any cycle.
382+
if (auto *CC = CI->getCycle(Succ); CC) {
383+
auto *CommonC = CI->getSmallestCommonCycle(C, CC);
384+
if (CommonC != C && CommonC != CC)
385+
AddToCycle = false;
386+
}
387+
}
388+
if (AddToCycle)
389+
CI->addBlockToCycle(CallBrTarget, C);
390+
}
391+
}
392+
if (DTU) {
393+
DTU->applyUpdates({{DominatorTree::Insert, CallBrBlock, CallBrTarget}});
394+
if (DTU->getDomTree().dominates(CallBrBlock, Succ))
395+
DTU->applyUpdates({{DominatorTree::Delete, CallBrBlock, Succ},
396+
{DominatorTree::Insert, CallBrTarget, Succ}});
397+
}
398+
return CallBrTarget;
399+
}

llvm/lib/Transforms/Utils/FixIrreducible.cpp

Lines changed: 102 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,53 @@
7979
// Limitation: The pass cannot handle switch statements and indirect
8080
// branches. Both must be lowered to plain branches first.
8181
//
82+
// CallBr support: CallBr is handled as a more general branch instruction which
83+
// can have multiple successors. The pass redirects the edges to intermediate
84+
// target blocks that unconditionally branch to the original callbr target
85+
// blocks. This allows the control flow hub to know to which of the original
86+
// target blocks to jump to.
87+
// Example input CFG:
88+
// Entry (callbr)
89+
// / \
90+
// v v
91+
// H ----> B
92+
// ^ /|
93+
// `----' |
94+
// v
95+
// Exit
96+
//
97+
// becomes:
98+
// Entry (callbr)
99+
// / \
100+
// v v
101+
// target.H target.B
102+
// | |
103+
// v v
104+
// H ----> B
105+
// ^ /|
106+
// `----' |
107+
// v
108+
// Exit
109+
//
110+
// Note
111+
// OUTPUT CFG: Converted to a natural loop with a new header N.
112+
//
113+
// Entry (callbr)
114+
// / \
115+
// v v
116+
// target.H target.B
117+
// \ /
118+
// \ /
119+
// v v
120+
// N <---.
121+
// / \ \
122+
// / \ |
123+
// v v /
124+
// H --> B --'
125+
// |
126+
// v
127+
// Exit
128+
//
82129
//===----------------------------------------------------------------------===//
83130

84131
#include "llvm/Transforms/Utils/FixIrreducible.h"
@@ -231,6 +278,7 @@ static bool fixIrreducible(Cycle &C, CycleInfo &CI, DominatorTree &DT,
231278
return false;
232279
LLVM_DEBUG(dbgs() << "Processing cycle:\n" << CI.print(&C) << "\n";);
233280

281+
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
234282
ControlFlowHub CHub;
235283
SetVector<BasicBlock *> Predecessors;
236284

@@ -242,18 +290,33 @@ static bool fixIrreducible(Cycle &C, CycleInfo &CI, DominatorTree &DT,
242290
}
243291

244292
for (BasicBlock *P : Predecessors) {
245-
auto *Branch = cast<BranchInst>(P->getTerminator());
246-
// Exactly one of the two successors is the header.
247-
BasicBlock *Succ0 = Branch->getSuccessor(0) == Header ? Header : nullptr;
248-
BasicBlock *Succ1 = Succ0 ? nullptr : Header;
249-
if (!Succ0)
250-
assert(Branch->getSuccessor(1) == Header);
251-
assert(Succ0 || Succ1);
252-
CHub.addBranch(P, Succ0, Succ1);
253-
254-
LLVM_DEBUG(dbgs() << "Added internal branch: " << P->getName() << " -> "
255-
<< (Succ0 ? Succ0->getName() : "") << " "
256-
<< (Succ1 ? Succ1->getName() : "") << "\n");
293+
if (BranchInst *Branch = dyn_cast<BranchInst>(P->getTerminator()); Branch) {
294+
// Exactly one of the two successors is the header.
295+
BasicBlock *Succ0 = Branch->getSuccessor(0) == Header ? Header : nullptr;
296+
BasicBlock *Succ1 = Succ0 ? nullptr : Header;
297+
if (!Succ0)
298+
assert(Branch->getSuccessor(1) == Header);
299+
assert(Succ0 || Succ1);
300+
CHub.addBranch(P, Succ0, Succ1);
301+
302+
LLVM_DEBUG(dbgs() << "Added internal branch: " << P->getName() << " -> "
303+
<< (Succ0 ? Succ0->getName() : "") << " "
304+
<< (Succ1 ? Succ1->getName() : "") << "\n");
305+
} else if (CallBrInst *CallBr = dyn_cast<CallBrInst>(P->getTerminator());
306+
CallBr) {
307+
for (unsigned I = 0; I < CallBr->getNumSuccessors(); ++I) {
308+
BasicBlock *Succ = CallBr->getSuccessor(I);
309+
if (Succ != Header)
310+
continue;
311+
BasicBlock *NewSucc = llvm::ControlFlowHub::createCallBrTarget(
312+
CallBr, Succ, I, false, &CI, &DTU, LI);
313+
CHub.addBranch(NewSucc, Succ);
314+
LLVM_DEBUG(dbgs() << "Added internal branch: " << NewSucc->getName()
315+
<< " -> " << Succ->getName() << "\n");
316+
}
317+
} else {
318+
llvm_unreachable("Unsupported block terminator.");
319+
}
257320
}
258321

259322
// Redirect external incoming edges. This includes the edges on the header.
@@ -266,17 +329,32 @@ static bool fixIrreducible(Cycle &C, CycleInfo &CI, DominatorTree &DT,
266329
}
267330

268331
for (BasicBlock *P : Predecessors) {
269-
auto *Branch = cast<BranchInst>(P->getTerminator());
270-
BasicBlock *Succ0 = Branch->getSuccessor(0);
271-
Succ0 = C.contains(Succ0) ? Succ0 : nullptr;
272-
BasicBlock *Succ1 =
273-
Branch->isUnconditional() ? nullptr : Branch->getSuccessor(1);
274-
Succ1 = Succ1 && C.contains(Succ1) ? Succ1 : nullptr;
275-
CHub.addBranch(P, Succ0, Succ1);
276-
277-
LLVM_DEBUG(dbgs() << "Added external branch: " << P->getName() << " -> "
278-
<< (Succ0 ? Succ0->getName() : "") << " "
279-
<< (Succ1 ? Succ1->getName() : "") << "\n");
332+
if (BranchInst *Branch = dyn_cast<BranchInst>(P->getTerminator()); Branch) {
333+
BasicBlock *Succ0 = Branch->getSuccessor(0);
334+
Succ0 = C.contains(Succ0) ? Succ0 : nullptr;
335+
BasicBlock *Succ1 =
336+
Branch->isUnconditional() ? nullptr : Branch->getSuccessor(1);
337+
Succ1 = Succ1 && C.contains(Succ1) ? Succ1 : nullptr;
338+
CHub.addBranch(P, Succ0, Succ1);
339+
340+
LLVM_DEBUG(dbgs() << "Added external branch: " << P->getName() << " -> "
341+
<< (Succ0 ? Succ0->getName() : "") << " "
342+
<< (Succ1 ? Succ1->getName() : "") << "\n");
343+
} else if (CallBrInst *CallBr = dyn_cast<CallBrInst>(P->getTerminator());
344+
CallBr) {
345+
for (unsigned I = 0; I < CallBr->getNumSuccessors(); ++I) {
346+
BasicBlock *Succ = CallBr->getSuccessor(I);
347+
if (!C.contains(Succ))
348+
continue;
349+
BasicBlock *NewSucc = llvm::ControlFlowHub::createCallBrTarget(
350+
CallBr, Succ, I, true, &CI, &DTU, LI);
351+
CHub.addBranch(NewSucc, Succ);
352+
LLVM_DEBUG(dbgs() << "Added external branch: " << NewSucc->getName()
353+
<< " -> " << Succ->getName() << "\n");
354+
}
355+
} else {
356+
llvm_unreachable("Unsupported block terminator.");
357+
}
280358
}
281359

282360
// Redirect all the backedges through a "hub" consisting of a series
@@ -292,7 +370,6 @@ static bool fixIrreducible(Cycle &C, CycleInfo &CI, DominatorTree &DT,
292370
SetVector<BasicBlock *> Entries;
293371
Entries.insert(C.entry_rbegin(), C.entry_rend());
294372

295-
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
296373
CHub.finalize(&DTU, GuardBlocks, "irr");
297374
#if defined(EXPENSIVE_CHECKS)
298375
assert(DT.verify(DominatorTree::VerificationLevel::Full));
@@ -325,7 +402,7 @@ static bool FixIrreducibleImpl(Function &F, CycleInfo &CI, DominatorTree &DT,
325402
LLVM_DEBUG(dbgs() << "===== Fix irreducible control-flow in function: "
326403
<< F.getName() << "\n");
327404

328-
assert(hasOnlySimpleTerminator(F) && "Unsupported block terminator.");
405+
assert(hasOnlySimpleTerminatorOrCallBr(F) && "Unsupported block terminator.");
329406

330407
bool Changed = false;
331408
for (Cycle *TopCycle : CI.toplevel_cycles()) {

0 commit comments

Comments
 (0)