Skip to content

Commit 9023e09

Browse files
YuriPlyakhinigcbot
authored andcommitted
Support Predicated Memory intrinsics in codegen memory optimization passes
Disabled PromoteToPredicatedMemory for illegal int types. Implemented support for Predicated Memory intrinsics in Memory Optimization passes: MemOpt, MemOpt2, CombineLdSt, AdvMemOpt, PrepareLoadsStoresPass. Disabled PromoteToPredicatedMemory for instructions/calls which may create unexpected behavior.
1 parent dafcd41 commit 9023e09

File tree

10 files changed

+392
-98
lines changed

10 files changed

+392
-98
lines changed

IGC/Compiler/CISACodeGen/AdvMemOpt.cpp

Lines changed: 68 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ SPDX-License-Identifier: MIT
1313
#include <llvm/Pass.h>
1414
#include <llvm/Transforms/Utils/Local.h>
1515
#include <llvm/ADT/Optional.h>
16+
#include "llvmWrapper/Analysis/TargetLibraryInfo.h"
1617
#include "llvmWrapper/Transforms/Utils/LoopUtils.h"
1718
#include "common/LLVMWarningsPop.hpp"
1819
#include "Compiler/CISACodeGen/ShaderCodeGen.hpp"
@@ -29,13 +30,16 @@ using namespace llvm::PatternMatch;
2930
using namespace IGC;
3031
using namespace IGC::IGCMD;
3132

33+
#define DEBUG_TYPE "AdvMemOpt"
34+
3235
namespace {
3336

3437
class AdvMemOpt : public FunctionPass {
3538
DominatorTree* DT = nullptr;
3639
LoopInfo* LI = nullptr;
3740
PostDominatorTree* PDT = nullptr;
3841
WIAnalysis* WI = nullptr;
42+
TargetLibraryInfo* TLI = nullptr;
3943

4044
public:
4145
static char ID;
@@ -58,6 +62,7 @@ namespace {
5862
AU.addRequired<DominatorTreeWrapperPass>();
5963
AU.addRequired<LoopInfoWrapperPass>();
6064
AU.addRequired<PostDominatorTreeWrapperPass>();
65+
AU.addRequired<TargetLibraryInfoWrapperPass>();
6166
}
6267

6368
bool collectOperandInst(SmallPtrSetImpl<Instruction*>&,
@@ -96,10 +101,13 @@ namespace IGC {
96101
IGC_INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
97102
IGC_INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
98103
IGC_INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass);
104+
IGC_INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
99105
IGC_INITIALIZE_PASS_END(AdvMemOpt, PASS_FLAG, PASS_DESC, PASS_CFG_ONLY, PASS_ANALYSIS)
100106
} // End namespace IGC
101107

102108
bool AdvMemOpt::runOnFunction(Function& F) {
109+
bool Changed = false;
110+
103111
// Skip non-kernel function.
104112
MetaDataUtils* MDU = nullptr;
105113
MDU = getAnalysis<MetaDataUtilsWrapper>().getMetaDataUtils();
@@ -111,6 +119,7 @@ bool AdvMemOpt::runOnFunction(Function& F) {
111119
PDT = &getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
112120
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
113121
WI = &getAnalysis<WIAnalysis>();
122+
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
114123

115124
SmallVector<Loop*, 8> InnermostLoops;
116125
for (auto I = LI->begin(), E = LI->end(); I != E; ++I)
@@ -138,7 +147,7 @@ bool AdvMemOpt::runOnFunction(Function& F) {
138147
}
139148
}
140149
}
141-
hoistUniformLoad(Line);
150+
Changed |= hoistUniformLoad(Line);
142151
}
143152

144153
auto* Ctx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
@@ -151,8 +160,8 @@ bool AdvMemOpt::runOnFunction(Function& F) {
151160
// because, once ballot-loop is added, vISA finalizer cannot schedule
152161
// those sample operations.
153162
auto& DL = F.getParent()->getDataLayout();
154-
IRBuilder<> IRB(F.getContext());
155-
Cluster.init(Ctx, &DL, nullptr/*AA*/, 32);
163+
IGCIRBuilder<> IRB(F.getContext());
164+
Cluster.init(Ctx, &DL, nullptr/*AA*/, TLI, 32);
156165
for (Function::iterator I = F.begin(), E = F.end(); I != E;
157166
++I) {
158167
BasicBlock *BB = &*I;
@@ -182,24 +191,26 @@ bool AdvMemOpt::runOnFunction(Function& F) {
182191
if (!WI->isUniform(LI->getResourceValue())) {
183192
NumResourceVarying++;
184193
}
185-
} else if (auto *SI = dyn_cast<StoreInst>(I)) {
186-
if (!WI->isUniform(SI))
194+
} else if (auto SI = AStoreInst::get(I); SI.has_value()) {
195+
if (!WI->isUniform(SI->inst()))
187196
continue;
188197

189198
unsigned AS = SI->getPointerAddressSpace();
190199
if (AS != ADDRESS_SPACE_PRIVATE &&
191200
AS != ADDRESS_SPACE_GLOBAL)
192201
continue;
193202

194-
IRB.SetInsertPoint(SI);
203+
IRB.SetInsertPoint(SI->inst());
195204

196-
if (auto NewSI = expand64BitStore(IRB, DL, SI)) {
205+
if (auto NewSI = expand64BitStore(IRB, DL, SI.value())) {
206+
auto NewASI = AStoreInst::get(NewSI);
197207
WI->incUpdateDepend(NewSI, WIAnalysis::UNIFORM_THREAD);
198-
WI->incUpdateDepend(NewSI->getValueOperand(),
208+
WI->incUpdateDepend(NewASI->getValueOperand(),
199209
WIAnalysis::UNIFORM_THREAD);
200-
WI->incUpdateDepend(NewSI->getPointerOperand(),
210+
WI->incUpdateDepend(NewASI->getPointerOperand(),
201211
WIAnalysis::UNIFORM_THREAD);
202-
SI->eraseFromParent();
212+
SI->inst()->eraseFromParent();
213+
Changed = true;
203214
}
204215
}
205216
}
@@ -209,24 +220,29 @@ bool AdvMemOpt::runOnFunction(Function& F) {
209220
NumResourceVarying;
210221
// clustering method cannot handle memory dependence
211222
if (!HasStore)
212-
Cluster.runForGFX(BB);
223+
Changed |= Cluster.runForGFX(BB);
213224
}
214225
}
215226
}
216-
return false;
227+
return Changed;
217228
}
218229

219230
bool AdvMemOpt::isLeadCandidate(BasicBlock* BB) const {
220-
// A candidate lead should have at least one uniform loads. In addition,
231+
// A candidate lead should have at least one uniform load. In addition,
221232
// there's no instruction might to write memory from the last uniform loads
222233
// to the end.
234+
LLVM_DEBUG(dbgs() << "Check lead candidate: " << BB->getName() << "\n");
223235
for (auto II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
224-
if (II->mayWriteToMemory())
236+
if (II->mayWriteToMemory()) {
237+
LLVM_DEBUG(dbgs() <<" - May write to memory. Bail out: " << *II << "\n");
225238
return false;
226-
LoadInst* LD = dyn_cast<LoadInst>(&*II);
227-
if (!LD || !WI->isUniform(LD))
239+
}
240+
std::optional<ALoadInst> LD = ALoadInst::get(&*II);
241+
if (!LD.has_value() || !WI->isUniform(LD->inst())) {
242+
LLVM_DEBUG(dbgs() << " - Not uniform load. Skip: " << *II << "\n");
228243
continue;
229-
// Found uniform loads.
244+
}
245+
LLVM_DEBUG(dbgs() << "Found uniform loads.\n");
230246
return true;
231247
}
232248
return false;
@@ -351,42 +367,66 @@ bool AdvMemOpt::hoistInst(Instruction* LD, BasicBlock* BB) const {
351367

352368
bool AdvMemOpt::hoistUniformLoad(ArrayRef<BasicBlock*> Line) const {
353369
bool Changed = false;
354-
// Find the lead BB where to hoist uniform load.
370+
LLVM_DEBUG(dbgs() << "Find the lead BB where to hoist uniform load.\n");
371+
355372
auto BI = Line.begin();
356373
auto BE = Line.end();
374+
357375
while (BI != BE) {
358376
if (!isLeadCandidate(*BI)) {
359377
++BI;
360378
continue;
361379
}
380+
362381
// Found lead.
363382
BasicBlock* Lead = *BI++;
364-
BasicBlock* Prev = Lead;
383+
LLVM_DEBUG(dbgs() << "Found lead to hoist to: " << Lead->getName() << "\n");
384+
365385
for (; BI != BE; ++BI) {
366386
BasicBlock* Curr = *BI;
387+
LLVM_DEBUG(dbgs() << " - Try to hoist from: " << Curr->getName() << "\n");
367388
// Check whether it's safe to hoist uniform loads from Curr to Lead by
368389
// checking all blocks between Prev and Curr.
369-
if (hasMemoryWrite(Prev, Curr))
390+
if (hasMemoryWrite(Lead, Curr)) {
391+
LLVM_DEBUG(dbgs() << "- Memory write between Lead and Curr. Bail out.\n");
370392
break;
393+
}
394+
371395
// Hoist uniform loads from Curr into Lead.
372396
for (auto II = Curr->getFirstNonPHI()->getIterator(),
373397
IE = Curr->end(); II != IE; /*EMPTY*/) {
374-
if (II->mayWriteToMemory())
398+
LLVM_DEBUG(dbgs() << " - - Try hoisting: " << *II << "\n");
399+
400+
if (II->mayWriteToMemory()) {
401+
LLVM_DEBUG(dbgs() << " - - May write to memory. Bail out.\n");
375402
break;
376-
LoadInst* LD = dyn_cast<LoadInst>(&*II++);
377-
if (!LD || !WI->isUniform(LD))
403+
}
404+
405+
std::optional<ALoadInst> LD = ALoadInst::get(&*II++);
406+
if (!LD.has_value() || !WI->isUniform(LD->inst())) {
407+
LLVM_DEBUG(dbgs() << " - - Not uniform load. Skip.\n");
378408
continue;
379-
if (!hoistInst(LD, Lead))
380-
break; // Bail out if any uniform load could not be hoisted safely.
381-
// Reset iterator
382-
II = Curr->getFirstNonPHI()->getIterator();
409+
}
410+
411+
if (!hoistInst(LD->inst(), Lead)) {
412+
LLVM_DEBUG(dbgs() << " - - Uniform load could not be hoisted safely. Bail out.\n");
413+
break;
414+
}
383415
Changed = true;
416+
LLVM_DEBUG(dbgs() << " - - Hoisted!\n");
417+
418+
// Reset iterator
419+
II = Curr->getFirstNonPHI()->getIterator();
384420
}
421+
385422
// After hoisting uniform loads safely, if Curr has memory write, stop
386423
// hoisting further.
387-
if (hasMemoryWrite(Curr))
424+
if (hasMemoryWrite(Curr)) {
425+
LLVM_DEBUG(dbgs() << "- Curr has memory write. Bail out.\n");
388426
break;
427+
}
389428
}
390429
}
430+
391431
return Changed;
392432
}

IGC/Compiler/CISACodeGen/MemOpt2.cpp

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ SPDX-License-Identifier: MIT
1111
#include "common/LLVMWarningsPush.hpp"
1212
#include <llvm/Pass.h>
1313
#include <llvm/Transforms/Utils/Local.h>
14+
#include <llvmWrapper/Analysis/TargetLibraryInfo.h>
1415
#include <llvmWrapper/IR/DerivedTypes.h>
1516
#include "common/LLVMWarningsPop.hpp"
1617

@@ -117,9 +118,9 @@ bool MemInstCluster::runForOCL(Function& F) {
117118
bool MemInstCluster::isSafeToMoveTo(Instruction* I, Instruction* Pos, const SmallVectorImpl<Instruction*>* CheckList) const {
118119
// TODO: So far, we simply don't allow rescheduling load/atomic operations.
119120
// Add alias analysis to allow memory operations to be rescheduled.
120-
if (auto LD = dyn_cast<LoadInst>(I)) {
121+
if (auto LD = ALoadInst::get(I); LD.has_value()) {
121122
if (CheckList)
122-
return isSafeToScheduleLoad(LD, CheckList);
123+
return isSafeToScheduleLoad(LD.value(), CheckList);
123124
return false;
124125
}
125126
if (GenIntrinsicInst * GII = dyn_cast<GenIntrinsicInst>(I)) {
@@ -292,26 +293,26 @@ bool MemInstCluster::clusterLoad(BasicBlock* BB) {
292293
unsigned MaxLiveOutByte = getMaxLiveOutThreshold() * 4;
293294
unsigned CountByte = 0;
294295
for (auto BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) {
295-
LoadInst* Lead = dyn_cast<LoadInst>(BI);
296-
if (!Lead || !Lead->isSimple())
296+
std::optional<ALoadInst> Lead = ALoadInst::get(&(*BI));
297+
if (!Lead.has_value() || !Lead->isSimple())
297298
continue;
298299
if (Lead->getPointerAddressSpace() != ADDRESS_SPACE_LOCAL &&
299300
Lead->getPointerAddressSpace() != ADDRESS_SPACE_GLOBAL)
300301
continue;
301302

302-
CountByte = getNumLiveOutBytes(Lead);
303+
CountByte = getNumLiveOutBytes(Lead->inst());
303304
if (CountByte > MaxLiveOutByte)
304305
continue;
305306

306307
SmallVector<Instruction*, 8> CheckList;
307-
InsertPos = Lead;
308+
InsertPos = Lead->inst();
308309
// Find candidate the cluster them.
309310
BasicBlock::iterator I = BasicBlock::iterator(InsertPos), E;
310311
for (I = std::next(I), E = BB->end(); I != E; ++I) {
311312
if (I->mayWriteToMemory())
312313
CheckList.push_back(&(*I));
313-
LoadInst* Next = dyn_cast<LoadInst>(I);
314-
if (!Next || !Next->isSimple())
314+
std::optional<ALoadInst> Next = ALoadInst::get(&(*I));
315+
if (!Next.has_value() || !Next->isSimple())
315316
continue;
316317
// Skip memory accesses on different memory address space.
317318
// FIXME: GetUnderlyingObject() cannot track through `inttoptr`
@@ -320,21 +321,21 @@ bool MemInstCluster::clusterLoad(BasicBlock* BB) {
320321
// same buffer.
321322
if (Next->getPointerAddressSpace() != Lead->getPointerAddressSpace())
322323
continue;
323-
CountByte += getNumLiveOutBytes(Next);
324+
CountByte += getNumLiveOutBytes(Next->inst());
324325
if (CountByte > MaxLiveOutByte) {
325-
BasicBlock::iterator I = BasicBlock::iterator(Next);
326+
BasicBlock::iterator I = BasicBlock::iterator(Next->inst());
326327
BI = std::prev(I);
327328
break;
328329
}
329-
Changed |= schedule(BB, Next, InsertPos, &CheckList);
330+
Changed |= schedule(BB, Next->inst(), InsertPos, &CheckList);
330331
}
331332
}
332333
return Changed;
333334
}
334335

335-
bool MemInstCluster::isSafeToScheduleLoad(const LoadInst* LD,
336+
bool MemInstCluster::isSafeToScheduleLoad(const ALoadInst& LD,
336337
const SmallVectorImpl<Instruction*>* CheckList) const {
337-
MemoryLocation A = MemoryLocation::get(LD);
338+
MemoryLocation A = getLocation(LD.inst(), TLI);
338339

339340
for (auto* I : *CheckList) {
340341
// Skip instructions never writing to memory.
@@ -343,7 +344,7 @@ bool MemInstCluster::isSafeToScheduleLoad(const LoadInst* LD,
343344
if (!AA)
344345
return false;
345346
// Unsafe if there's alias.
346-
MemoryLocation B = getLocation(I);
347+
MemoryLocation B = getLocation(I, TLI);
347348
if (!A.Ptr || !B.Ptr || AA->alias(A, B))
348349
return false;
349350
}
@@ -419,6 +420,7 @@ class MemOpt2 : public FunctionPass {
419420
AU.addRequired<AAResultsWrapperPass>();
420421
AU.addRequired<CodeGenContextWrapper>();
421422
AU.addRequired<MetaDataUtilsWrapper>();
423+
AU.addRequired<TargetLibraryInfoWrapperPass>();
422424
}
423425
MemInstCluster Cluster;
424426
unsigned MaxLiveOutThreshold = 16;
@@ -438,6 +440,7 @@ IGC_INITIALIZE_PASS_BEGIN(MemOpt2, PASS_FLAG, PASS_DESC, PASS_CFG_ONLY,
438440
PASS_ANALYSIS)
439441
IGC_INITIALIZE_PASS_DEPENDENCY(CodeGenContextWrapper)
440442
IGC_INITIALIZE_PASS_DEPENDENCY(MetaDataUtilsWrapper)
443+
IGC_INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
441444
IGC_INITIALIZE_PASS_END(MemOpt2, PASS_FLAG, PASS_DESC, PASS_CFG_ONLY,
442445
PASS_ANALYSIS)
443446

@@ -456,7 +459,8 @@ bool MemOpt2::runOnFunction(Function &F) {
456459
getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
457460
auto DL = &F.getParent()->getDataLayout();
458461
auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
462+
auto TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
459463

460-
Cluster.init(cgCtx, DL, AA, MaxLiveOutThreshold);
464+
Cluster.init(cgCtx, DL, AA, TLI, MaxLiveOutThreshold);
461465
return Cluster.runForOCL(F);
462466
}

IGC/Compiler/CISACodeGen/MemOpt2.h

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,13 @@ SPDX-License-Identifier: MIT
2323
#include "Compiler/IGCPassSupport.h"
2424
#include "Compiler/MetaDataUtilsWrapper.h"
2525

26+
#include "MemOptUtils.h"
27+
2628
class MemInstCluster {
2729
IGC::CodeGenContext *CTX = nullptr;
2830
const DataLayout *DL = nullptr;
2931
AliasAnalysis *AA = nullptr;
32+
TargetLibraryInfo* TLI = nullptr;
3033
unsigned MaxLiveOutThreshold = 0;
3134
llvm::DenseSet<Instruction *> Scheduled;
3235

@@ -35,14 +38,15 @@ class MemInstCluster {
3538
~MemInstCluster() {}
3639

3740
MemInstCluster(IGC::CodeGenContext *pCTX, const DataLayout *pDL,
38-
AliasAnalysis *pAA, unsigned MLT) {
39-
init(pCTX, pDL, pAA, MLT);
41+
AliasAnalysis *pAA, TargetLibraryInfo* pTLI, unsigned MLT) {
42+
init(pCTX, pDL, pAA, pTLI, MLT);
4043
}
4144
void init(IGC::CodeGenContext *pCTX, const DataLayout *pDL,
42-
AliasAnalysis *pAA, unsigned MLT) {
45+
AliasAnalysis *pAA, TargetLibraryInfo* pTLI, unsigned MLT) {
4346
CTX = pCTX;
4447
DL = pDL;
4548
AA = pAA;
49+
TLI = pTLI;
4650
MaxLiveOutThreshold = MLT;
4751
}
4852
/// Called by MemOpt2 to cluster GPGPU kernels
@@ -61,19 +65,11 @@ class MemInstCluster {
6165
bool clusterLoad(BasicBlock *BB);
6266
bool isDefinedBefore(BasicBlock *BB, Instruction *I, Instruction *Pos) const;
6367
bool
64-
isSafeToScheduleLoad(const LoadInst *LD,
68+
isSafeToScheduleLoad(const IGC::ALoadInst& LD,
6569
const SmallVectorImpl<Instruction *> *CheckList) const;
6670
bool schedule(BasicBlock *BB, Value *V, Instruction *&InsertPos,
6771
const SmallVectorImpl<Instruction *> *CheckList = nullptr);
6872

69-
MemoryLocation getLocation(Instruction *I) const {
70-
if (LoadInst *LD = dyn_cast<LoadInst>(I))
71-
return MemoryLocation::get(LD);
72-
if (StoreInst *ST = dyn_cast<StoreInst>(I))
73-
return MemoryLocation::get(ST);
74-
return MemoryLocation();
75-
}
76-
7773
unsigned getNumLiveOuts(Instruction *I) const;
7874

7975
unsigned getNumLiveOutBytes(Instruction *I) const;

0 commit comments

Comments
 (0)