Skip to content

Commit 5639d3f

Browse files
admitricpszymich
authored andcommitted
Fix incorrect 2d loads rescheduling rollback functionality
Fix incorrect 2d loads rescheduling rollback functionality by ensuring the uses of a potential candidate are dominated by the undo point And create an option to make splitting of the candidates finer in order to reduce the regpressure better (cherry picked from commit 8a50745)
1 parent d8504b7 commit 5639d3f

File tree

5 files changed

+85
-17
lines changed

5 files changed

+85
-17
lines changed

IGC/Compiler/CISACodeGen/CodeSinking.cpp

Lines changed: 43 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -789,9 +789,9 @@ namespace IGC {
789789
#define LOOPSINK_PREHEADER_IMPACT_THRESHOLD 0.2
790790

791791
// Helper functions for loop sink debug dumps
792-
#define PrintDump(Contents) if (IGC_IS_FLAG_ENABLED(DumpLoopSink)) {LogStream << Contents;}
793-
#define PrintInstructionDump(Inst) if (IGC_IS_FLAG_ENABLED(DumpLoopSink)) {(Inst)->print(LogStream, false); LogStream << "\n";}
794-
#define PrintOUGDump(OUG) if (IGC_IS_FLAG_ENABLED(DumpLoopSink)) {OUG.print(LogStream); LogStream << "\n";}
792+
#define PrintDump(Contents) if (IGC_IS_FLAG_ENABLED(DumpLoopSink)) {*LogStream << Contents;}
793+
#define PrintInstructionDump(Inst) if (IGC_IS_FLAG_ENABLED(DumpLoopSink)) {(Inst)->print(*LogStream, false); *LogStream << "\n";}
794+
#define PrintOUGDump(OUG) if (IGC_IS_FLAG_ENABLED(DumpLoopSink)) {OUG.print(*LogStream); *LogStream << "\n";}
795795

796796

797797
// Register pass to igc-opt
@@ -812,7 +812,11 @@ namespace IGC {
812812
IGC_INITIALIZE_PASS_END(CodeLoopSinking, PASS_FLAG1, PASS_DESCRIPTION1, PASS_CFG_ONLY1, PASS_ANALYSIS1)
813813

814814
char CodeLoopSinking::ID = 0;
815-
CodeLoopSinking::CodeLoopSinking() : FunctionPass(ID), LogStream(Log) {
815+
CodeLoopSinking::CodeLoopSinking() : FunctionPass(ID), LogStringStream(Log) {
816+
if (IGC_IS_FLAG_ENABLED(PrintToConsole))
817+
LogStream = &IGC::Debug::ods();
818+
else
819+
LogStream = &LogStringStream;
816820
initializeCodeLoopSinkingPass(*PassRegistry::getPassRegistry());
817821
}
818822

@@ -834,7 +838,7 @@ namespace IGC {
834838

835839
if (IGC_IS_FLAG_ENABLED(DumpLoopSink))
836840
{
837-
auto printGlobalSettings = [](llvm::raw_string_ostream &LogStream)
841+
auto printGlobalSettings = [](llvm::raw_ostream &LogStream)
838842
{
839843
// print every value to the dump
840844
LogStream << "ForceLoopSink: " << IGC_GET_FLAG_VALUE(ForceLoopSink) << "\n";
@@ -859,7 +863,7 @@ namespace IGC {
859863

860864
Log.clear();
861865

862-
printGlobalSettings(LogStream);
866+
printGlobalSettings(*LogStream);
863867

864868
PrintDump("=====================================\n");
865869
PrintDump("Function " << F.getName() << "\n");
@@ -900,12 +904,9 @@ namespace IGC {
900904
IGC_ASSERT(false == verifyFunction(F, &dbgs()));
901905
}
902906

903-
if (IGC_IS_FLAG_ENABLED(DumpLoopSink))
907+
if (IGC_IS_FLAG_ENABLED(DumpLoopSink) && IGC_IS_FLAG_DISABLED(PrintToConsole))
904908
{
905-
if (IGC_IS_FLAG_ENABLED(PrintToConsole))
906-
IGC::Debug::ods() << Log;
907-
else
908-
dumpToFile(Log);
909+
dumpToFile(Log);
909910
}
910911

911912
return Changed;
@@ -1749,7 +1750,7 @@ namespace IGC {
17491750
auto allUsesAreDominatedByRemainingUses = [&](SmallVector<Instruction *, 16> &CurrentCandidateInsts,
17501751
SmallPtrSet<Instruction *, 16> &RemainingCandidateInsts)
17511752
{
1752-
for (Instruction *RI : RemainingCandidateInsts)
1753+
auto instUsesDominateAllCurrentCandidateUses = [&](Instruction *RI)
17531754
{
17541755
for (User *RU : RI->users())
17551756
{
@@ -1768,13 +1769,38 @@ namespace IGC {
17681769
}
17691770
}
17701771
}
1772+
return true;
1773+
};
1774+
1775+
if (IGC_IS_FLAG_ENABLED(LoopSinkCoarserLoadsRescheduling))
1776+
return std::all_of(RemainingCandidateInsts.begin(), RemainingCandidateInsts.end(), instUsesDominateAllCurrentCandidateUses);
1777+
else
1778+
return std::any_of(RemainingCandidateInsts.begin(), RemainingCandidateInsts.end(), instUsesDominateAllCurrentCandidateUses);
1779+
};
1780+
1781+
// If the uses are not dominated by the UndoPoint
1782+
// It's possible that we put some instructions after their uses on rollback
1783+
// So it needs to be checked if we sink not from PH
1784+
auto allUsesAreDominatedByUndoPoint = [&](SmallVector<Instruction *, 16> &CurrentCandidateInsts, Instruction *UndoPoint)
1785+
{
1786+
for (Instruction *CI : CurrentCandidateInsts)
1787+
{
1788+
for (User *CU : CI->users())
1789+
{
1790+
Instruction *CUI = dyn_cast<Instruction>(CU);
1791+
if (!CUI)
1792+
return false;
1793+
if (!DT->dominates(UndoPoint, CUI))
1794+
return false;
1795+
}
17711796
}
17721797
return true;
17731798
};
17741799

17751800
// All the uses are a candidate
17761801
// Try splitting then into separate candidates for better scheduling within a BB
1777-
auto Worthiness = I->getParent() == PH ? LoopSinkWorthiness::Sink : LoopSinkWorthiness::IntraLoopSink;
1802+
bool SinkFromPH = I->getParent() == PH;
1803+
auto Worthiness = SinkFromPH ? LoopSinkWorthiness::Sink : LoopSinkWorthiness::IntraLoopSink;
17781804
SmallVector<Instruction *, 16> CurrentCandidateInsts;
17791805
SmallPtrSet<Instruction *, 16> RemainingCandidateInsts(CandidateInsts.begin(), CandidateInsts.end());
17801806

@@ -1789,7 +1815,8 @@ namespace IGC {
17891815

17901816
if (CurrentCandidateInsts.size() > 0 &&
17911817
Id == GenISAIntrinsic::GenISA_LSC2DBlockReadAddrPayload &&
1792-
allUsesAreDominatedByRemainingUses(CurrentCandidateInsts, RemainingCandidateInsts))
1818+
allUsesAreDominatedByRemainingUses(CurrentCandidateInsts, RemainingCandidateInsts) &&
1819+
(SinkFromPH || allUsesAreDominatedByUndoPoint(CurrentCandidateInsts, CurrentCandidateInsts[0]->getNextNode())))
17931820
{
17941821
NCandidates++;
17951822
SinkCandidates.push_back(std::make_unique<Candidate>(CurrentCandidateInsts, TgtBB, Worthiness, CurrentCandidateInsts[0]->getNextNode()));
@@ -1798,7 +1825,8 @@ namespace IGC {
17981825
CurrentCandidateInsts.push_back(I);
17991826
RemainingCandidateInsts.erase(I);
18001827
}
1801-
if (CurrentCandidateInsts.size() > 0)
1828+
if (CurrentCandidateInsts.size() > 0 &&
1829+
(SinkFromPH || allUsesAreDominatedByUndoPoint(CurrentCandidateInsts, CurrentCandidateInsts[0]->getNextNode())))
18021830
{
18031831
NCandidates++;
18041832
SinkCandidates.push_back(std::make_unique<Candidate>(CurrentCandidateInsts, TgtBB, Worthiness, CurrentCandidateInsts[0]->getNextNode()));

IGC/Compiler/CISACodeGen/CodeSinking.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,8 @@ namespace IGC {
239239
llvm::SmallPtrSet<llvm::BasicBlock*, 8> UndoBlkSet;
240240
/// dumping
241241
std::string Log;
242-
llvm::raw_string_ostream LogStream;
242+
llvm::raw_string_ostream LogStringStream;
243+
llvm::raw_ostream *LogStream = nullptr;
243244

244245
void dumpToFile(const std::string& Log);
245246

IGC/Compiler/tests/CodeSinking/LoopSinking/2d-blockload-loopsched.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
;
77
;============================ end_copyright_notice =============================
88
; REQUIRES: regkeys, llvm-14-plus
9-
; RUN: igc_opt --regkey LoopSinkMinSave=0 --regkey LoopSinkEnable2dBlockReads=1 --regkey LoopSinkEnableLoadsRescheduling=1 --regkey ForceLoopSink=1 --regkey CodeLoopSinkingMinSize=10 --regkey LoopSinkDisableRollback=1 --regkey CodeSinkingLoadSchedulingInstr=0 %enable-basic-aa% --igc-code-loop-sinking -S %s 2>&1 | FileCheck %s
9+
; RUN: igc_opt --regkey LoopSinkMinSave=0 --regkey LoopSinkEnable2dBlockReads=1 --regkey LoopSinkCoarserLoadsRescheduling=1 --regkey LoopSinkEnableLoadsRescheduling=1 --regkey ForceLoopSink=1 --regkey CodeLoopSinkingMinSize=10 --regkey LoopSinkDisableRollback=1 --regkey CodeSinkingLoadSchedulingInstr=0 %enable-basic-aa% --igc-code-loop-sinking -S %s 2>&1 | FileCheck %s
1010

1111
declare i8* @llvm.genx.GenISA.LSC2DBlockCreateAddrPayload.p0i8(i64, i32, i32, i32, i32, i32, i32, i32, i32) #0
1212

IGC/Compiler/tests/CodeSinking/LoopSinking/2d-blockload-sch-rollback.ll

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,44 @@ for.body19.i: ; preds = %for.body19.i.for.bo
108108
br label %for.body19.i
109109
}
110110

111+
112+
; If all block reads are considered a single candidate and we don't check the undo point is dominated
113+
; We might generate incorrect IR by placing the first block read after the first dpas
114+
; Checking it's not happening
115+
116+
define spir_kernel void @bar() {
117+
; CHECK-LABEL: @bar(
118+
; CHECK: br label [[DOT_CRIT_EDGE:%.*]]
119+
; CHECK: ._crit_edge:
120+
; CHECK: [[BLOCK2D_ADDRPAYLOAD456:%.*]] = call i32* @llvm.genx.GenISA.LSC2DBlockCreateAddrPayload.p0i32(i64 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
121+
; CHECK: [[BLOCK2D_READADDRPAYLOAD457:%.*]] = call <8 x i32> @llvm.genx.GenISA.LSC2DBlockReadAddrPayload.v8i32.p0i32(i32* [[BLOCK2D_ADDRPAYLOAD456]], i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 false, i1 false, i32 0)
122+
; CHECK: [[TMP1:%.*]] = call <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float> zeroinitializer, <8 x i16> zeroinitializer, <8 x i32> [[BLOCK2D_READADDRPAYLOAD457]], i32 0, i32 0, i32 0, i32 0, i1 false)
123+
; CHECK: [[TMP2:%.*]] = insertelement <8 x half> zeroinitializer, half 0xH0000, i64 0
124+
; CHECK: [[TMP3:%.*]] = insertelement <8 x half> [[TMP2]], half 0xH0000, i64 0
125+
; CHECK: [[BLOCK2D_READADDRPAYLOAD459:%.*]] = call <8 x i32> @llvm.genx.GenISA.LSC2DBlockReadAddrPayload.v8i32.p0i32(i32* [[BLOCK2D_ADDRPAYLOAD456]], i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 false, i1 false, i32 0)
126+
; CHECK: [[TMP4:%.*]] = call <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float> zeroinitializer, <8 x i16> zeroinitializer, <8 x i32> [[BLOCK2D_READADDRPAYLOAD459]], i32 0, i32 0, i32 0, i32 0, i1 false)
127+
; CHECK: [[TMP5:%.*]] = call <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float> zeroinitializer, <8 x i16> zeroinitializer, <8 x i32> [[BLOCK2D_READADDRPAYLOAD457]], i32 0, i32 0, i32 0, i32 0, i1 false)
128+
; CHECK: br label [[DOT_CRIT_EDGE___CRIT_EDGE_CRIT_EDGE:%.*]]
129+
; CHECK: ._crit_edge.._crit_edge_crit_edge:
130+
; CHECK: br label [[DOT_CRIT_EDGE]]
131+
;
132+
br label %._crit_edge
133+
134+
._crit_edge: ; preds = %._crit_edge.._crit_edge_crit_edge, %0
135+
%Block2D_AddrPayload456 = call i32* @llvm.genx.GenISA.LSC2DBlockCreateAddrPayload.p0i32(i64 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
136+
%Block2D_ReadAddrPayload457 = call <8 x i32> @llvm.genx.GenISA.LSC2DBlockReadAddrPayload.v8i32.p0i32(i32* %Block2D_AddrPayload456, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 false, i1 false, i32 0)
137+
%1 = call <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float> zeroinitializer, <8 x i16> zeroinitializer, <8 x i32> %Block2D_ReadAddrPayload457, i32 0, i32 0, i32 0, i32 0, i1 false)
138+
%2 = insertelement <8 x half> zeroinitializer, half 0xH0000, i64 0
139+
%3 = insertelement <8 x half> %2, half 0xH0000, i64 0
140+
%Block2D_ReadAddrPayload459 = call <8 x i32> @llvm.genx.GenISA.LSC2DBlockReadAddrPayload.v8i32.p0i32(i32* %Block2D_AddrPayload456, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 false, i1 false, i32 0)
141+
%4 = call <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float> zeroinitializer, <8 x i16> zeroinitializer, <8 x i32> %Block2D_ReadAddrPayload459, i32 0, i32 0, i32 0, i32 0, i1 false)
142+
%5 = call <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float> zeroinitializer, <8 x i16> zeroinitializer, <8 x i32> %Block2D_ReadAddrPayload457, i32 0, i32 0, i32 0, i32 0, i1 false)
143+
br label %._crit_edge.._crit_edge_crit_edge
144+
145+
._crit_edge.._crit_edge_crit_edge: ; preds = %._crit_edge
146+
br label %._crit_edge
147+
}
148+
111149
declare <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float>, <8 x i16>, <8 x i32>, i32, i32, i32, i32, i1)
112150

113151
declare i32* @llvm.genx.GenISA.LSC2DBlockCreateAddrPayload.p0i32(i64, i32, i32, i32, i32, i32, i32, i32, i32)

IGC/common/igc_flags.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ DECLARE_IGC_REGKEY(DWORD, LoopSinkMinSave, 1, "If loop sink can ha
152152
DECLARE_IGC_REGKEY(DWORD, LoopSinkThresholdDelta, 30, "Do loop sink If the estimated register pressure is higher than this + #avaialble registers", false)
153153
DECLARE_IGC_REGKEY(DWORD, LoopSinkRollbackThreshold, 15, "Rollback loop sinking if the estimated regpressure after the sinking is still higher than this + #available registers, and the number of registers can be increased", false)
154154
DECLARE_IGC_REGKEY(bool, LoopSinkEnableLoadsRescheduling, true, "Allow sinking the loads that are already in the loop", false)
155+
DECLARE_IGC_REGKEY(bool, LoopSinkCoarserLoadsRescheduling, true, "Try to reschedule multi-instruction load candidates in larger chunks", false)
155156
DECLARE_IGC_REGKEY(bool, LoopSinkEnable2dBlockReads, true, "Allow sinking of the 2d block reads", false)
156157
DECLARE_IGC_REGKEY(bool, LoopSinkEnableVectorShuffle, true, "Allow sinking of the lowered vector shuffle pattern", false)
157158
DECLARE_IGC_REGKEY(bool, LoopSinkForceRollback, false, "Rollback every loop sinking change (for debug purposes only)", false)

0 commit comments

Comments
 (0)