Skip to content

Commit 3ca208f

Browse files
sys-igcigcbot
authored andcommitted
Changes in code.
1 parent 5fb184c commit 3ca208f

File tree

3 files changed

+180
-138
lines changed

3 files changed

+180
-138
lines changed

IGC/AdaptorOCL/UnifyIROCL.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -513,10 +513,10 @@ static void CommonOCLBasedPasses(
513513
mpm.add(createSROAPass());
514514
mpm.add(createIGCInstructionCombiningPass());
515515

516-
// true means selective scalarization
516+
// "false" to createScalarizerPass() means that vector load/stores are NOT scalarized
517517
if (IGC_IS_FLAG_ENABLED(DisableScalarizerGPGPU) == false)
518518
{
519-
mpm.add(createScalarizerPass(true));
519+
mpm.add(createScalarizerPass(false));
520520
}
521521

522522
// Create a dummy kernel to attach the symbol table if necessary

IGC/Compiler/Optimizer/Scalarizer.cpp

Lines changed: 163 additions & 124 deletions
Original file line numberDiff line numberDiff line change
@@ -69,12 +69,12 @@ IGC_INITIALIZE_PASS_END(ScalarizeFunction, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG
6969

7070
char ScalarizeFunction::ID = 0;
7171

72-
ScalarizeFunction::ScalarizeFunction(bool selectiveScalarization) : FunctionPass(ID)
72+
ScalarizeFunction::ScalarizeFunction(bool scalarizingVectorLDSTType) : FunctionPass(ID)
7373
{
7474
initializeScalarizeFunctionPass(*PassRegistry::getPassRegistry());
7575

7676
for (int i = 0; i < Instruction::OtherOpsEnd; i++) m_transposeCtr[i] = 0;
77-
m_SelectiveScalarization = selectiveScalarization;
77+
m_ScalarizingVectorLDSTType = scalarizingVectorLDSTType;
7878

7979
// Initialize SCM buffers and allocation
8080
m_SCMAllocationArray = new SCMEntry[ESTIMATED_INST_NUM];
@@ -121,13 +121,6 @@ bool ScalarizeFunction::runOnFunction(Function& F)
121121
m_SCM.clear();
122122
releaseAllSCMEntries();
123123
m_DRL.clear();
124-
m_Excludes.clear();
125-
126-
// collecting instructions that we want to avoid scalarization
127-
if (m_SelectiveScalarization)
128-
{
129-
buildExclusiveSet();
130-
}
131124

132125
// Scalarization. Iterate over all the instructions
133126
// Always hold the iterator at the instruction following the one being scalarized (so the
@@ -139,14 +132,7 @@ bool ScalarizeFunction::runOnFunction(Function& F)
139132
Instruction* currInst = &*sI;
140133
// Move iterator to next instruction BEFORE scalarizing current instruction
141134
++sI;
142-
if (m_Excludes.count(currInst))
143-
{
144-
recoverNonScalarizableInst(currInst);
145-
}
146-
else
147-
{
148-
dispatchInstructionToScalarize(currInst);
149-
}
135+
dispatchInstructionToScalarize(currInst);
150136
}
151137

152138
resolveVectorValues();
@@ -175,111 +161,6 @@ bool ScalarizeFunction::runOnFunction(Function& F)
175161
return true;
176162
}
177163

178-
void ScalarizeFunction::buildExclusiveSet()
179-
{
180-
inst_iterator sI = inst_begin(m_currFunc);
181-
inst_iterator sE = inst_end(m_currFunc);
182-
std::vector<llvm::Value*> workset;
183-
while (sI != sE)
184-
{
185-
Instruction* currInst = &*sI;
186-
++sI;
187-
if (CallInst* CI = dyn_cast<CallInst>(currInst))
188-
{
189-
unsigned numOperands = CI->getNumArgOperands();
190-
for (unsigned i = 0; i < numOperands; i++)
191-
{
192-
Value* operand = CI->getArgOperand(i);
193-
if (isa<VectorType>(operand->getType()))
194-
{
195-
workset.push_back(operand);
196-
}
197-
}
198-
}
199-
else if (auto IEI = dyn_cast<InsertElementInst>(currInst))
200-
{
201-
Value* scalarIndexVal = IEI->getOperand(2);
202-
// If the index is not a constant - we cannot statically remove this inst
203-
if (!isa<ConstantInt>(scalarIndexVal)) {
204-
workset.push_back(IEI);
205-
}
206-
}
207-
else if (auto EEI = dyn_cast<ExtractElementInst>(currInst))
208-
{
209-
Value* scalarIndexVal = EEI->getOperand(1);
210-
// If the index is not a constant - we cannot statically remove this inst
211-
if (!isa<ConstantInt>(scalarIndexVal)) {
212-
workset.push_back(EEI->getOperand(0));
213-
}
214-
}
215-
}
216-
while (!workset.empty())
217-
{
218-
auto Def = workset.back();
219-
workset.pop_back();
220-
if (m_Excludes.count(Def))
221-
{
222-
continue;
223-
}
224-
if (auto IEI = dyn_cast<InsertElementInst>(Def))
225-
{
226-
m_Excludes.insert(IEI);
227-
if (!m_Excludes.count(IEI->getOperand(0)) &&
228-
(isa<PHINode>(IEI->getOperand(0)) ||
229-
isa<ShuffleVectorInst>(IEI->getOperand(0)) ||
230-
isa<InsertElementInst>(IEI->getOperand(0))))
231-
{
232-
workset.push_back(IEI->getOperand(0));
233-
}
234-
}
235-
else if (auto SVI = dyn_cast<ShuffleVectorInst>(Def))
236-
{
237-
m_Excludes.insert(SVI);
238-
if (!m_Excludes.count(SVI->getOperand(0)) &&
239-
(isa<PHINode>(SVI->getOperand(0)) ||
240-
isa<ShuffleVectorInst>(SVI->getOperand(0)) ||
241-
isa<InsertElementInst>(SVI->getOperand(0))))
242-
{
243-
workset.push_back(SVI->getOperand(0));
244-
}
245-
if (!m_Excludes.count(SVI->getOperand(1)) &&
246-
(isa<PHINode>(SVI->getOperand(1)) ||
247-
isa<ShuffleVectorInst>(SVI->getOperand(1)) ||
248-
isa<InsertElementInst>(SVI->getOperand(1))))
249-
{
250-
workset.push_back(SVI->getOperand(1));
251-
}
252-
}
253-
else if (auto PHI = dyn_cast<PHINode>(Def))
254-
{
255-
m_Excludes.insert(PHI);
256-
for (int i = 0, n = PHI->getNumOperands(); i < n; ++i)
257-
if (!m_Excludes.count(PHI->getOperand(i)) &&
258-
(isa<PHINode>(PHI->getOperand(i)) ||
259-
isa<ShuffleVectorInst>(PHI->getOperand(i)) ||
260-
isa<InsertElementInst>(PHI->getOperand(i))))
261-
{
262-
workset.push_back(PHI->getOperand(i));
263-
}
264-
}
265-
else
266-
{
267-
continue;
268-
}
269-
// check use
270-
for (auto U : Def->users())
271-
{
272-
if (!m_Excludes.count(U) &&
273-
(isa<PHINode>(U) ||
274-
isa<ShuffleVectorInst>(U) ||
275-
isa<InsertElementInst>(U)))
276-
{
277-
workset.push_back(U);
278-
}
279-
}
280-
}
281-
}
282-
283164
void ScalarizeFunction::dispatchInstructionToScalarize(Instruction* I)
284165
{
285166
V_PRINT(scalarizer, "\tScalarizing Instruction: " << *I << "\n");
@@ -354,6 +235,13 @@ void ScalarizeFunction::dispatchInstructionToScalarize(Instruction* I)
354235
case Instruction::GetElementPtr:
355236
scalarizeInstruction(dyn_cast<GetElementPtrInst>(I));
356237
break;
238+
case Instruction::Load:
239+
scalarizeInstruction(dyn_cast<LoadInst>(I));
240+
break;
241+
case Instruction::Store:
242+
scalarizeInstruction(dyn_cast<StoreInst>(I));
243+
break;
244+
357245
// The remaining instructions are not supported for scalarization. Keep "as is"
358246
default:
359247
recoverNonScalarizableInst(I);
@@ -1004,6 +892,149 @@ void ScalarizeFunction::scalarizeInstruction(GetElementPtrInst* GI)
1004892
m_removedInsts.insert(GI);
1005893
}
1006894

895+
void ScalarizeFunction::scalarizeInstruction(LoadInst* LI)
896+
{
897+
V_PRINT(scalarizer, "\t\tLoad instruction\n");
898+
IGC_ASSERT_MESSAGE(LI, "instruction type dynamic cast failed");
899+
900+
VectorType* dataType = dyn_cast<VectorType>(LI->getType());
901+
if (isScalarizableLoadStoreType(dataType) && m_pDL)
902+
{
903+
// Prepare empty SCM entry for the instruction
904+
SCMEntry* newEntry = getSCMEntry(LI);
905+
906+
// Get additional info from instruction
907+
unsigned int vectorSize = int_cast<unsigned int>(m_pDL->getTypeAllocSize(dataType));
908+
unsigned int elementSize = int_cast<unsigned int>(m_pDL->getTypeSizeInBits(dataType->getElementType()) / 8);
909+
IGC_ASSERT(elementSize);
910+
IGC_ASSERT_MESSAGE((vectorSize / elementSize > 0), "vector size should be a multiply of element size");
911+
IGC_ASSERT_MESSAGE((vectorSize % elementSize == 0), "vector size should be a multiply of element size");
912+
unsigned numDupElements = int_cast<unsigned>(dataType->getNumElements());
913+
914+
// Obtain scalarized arguments
915+
// 1 - to allow scalarizing Load with any pointer type
916+
// 0 - to limit scalarizing to special case where packetizer benifit from the scalarizing
917+
#if 1
918+
// Apply the bit-cast on the GEP base and add base-offset then fix the index by multiply it with numElements. (assuming one index only).
919+
Value * GepPtr = LI->getOperand(0);
920+
PointerType* GepPtrType = cast<PointerType>(GepPtr->getType());
921+
Value* operandBase = BitCastInst::CreatePointerCast(GepPtr, dataType->getScalarType()->getPointerTo(GepPtrType->getAddressSpace()), "ptrVec2ptrScl", LI);
922+
Type* indexType = Type::getInt32Ty(*m_moduleContext);
923+
// Generate new (scalar) instructions
924+
SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>newScalarizedInsts;
925+
newScalarizedInsts.resize(numDupElements);
926+
for (unsigned dup = 0; dup < numDupElements; dup++)
927+
{
928+
Constant* laneVal = ConstantInt::get(indexType, dup);
929+
Value* pGEP = GetElementPtrInst::Create(nullptr, operandBase, laneVal, "GEP_lane", LI);
930+
newScalarizedInsts[dup] = new LoadInst(pGEP->getType()->getPointerElementType(), pGEP, LI->getName(), LI);
931+
}
932+
#else
933+
GetElementPtrInst* operand = dyn_cast<GetElementPtrInst>(LI->getOperand(0));
934+
if (!operand || operand->getNumIndices() != 1)
935+
{
936+
return recoverNonScalarizableInst(LI);
937+
}
938+
// Apply the bit-cast on the GEP base and add base-offset then fix the index by multiply it with numElements. (assuming one index only).
939+
Value* GepPtr = operand->getPointerOperand();
940+
PointerType* GepPtrType = cast<PointerType>(GepPtr->getType());
941+
Value* operandBase = BitCastInst::CreatePointerCast(GepPtr, dataType->getScalarType()->getPointerTo(GepPtrType->getAddressSpace()), "ptrVec2ptrScl", LI);
942+
Type* indexType = operand->getOperand(1)->getType();
943+
// Generate new (scalar) instructions
944+
Value* newScalarizedInsts[MAX_INPUT_VECTOR_WIDTH];
945+
Constant* elementNumVal = ConstantInt::get(indexType, numElements);
946+
for (unsigned dup = 0; dup < numDupElements; dup++)
947+
{
948+
Constant* laneVal = ConstantInt::get(indexType, dup);
949+
Value* pGEP = GetElementPtrInst::Create(operandBase, laneVal, "GEP_lane", LI);
950+
Value* pIndex = BinaryOperator::CreateMul(operand->getOperand(1), elementNumVal, "GEPIndex_s", LI);
951+
pGEP = GetElementPtrInst::Create(pGEP, pIndex, "GEP_s", LI);
952+
newScalarizedInsts[dup] = new LoadInst(pGEP, LI->getName(), LI);
953+
}
954+
#endif
955+
// Add new value/s to SCM
956+
updateSCMEntryWithValues(newEntry, &(newScalarizedInsts[0]), LI, true);
957+
958+
// Remove original instruction
959+
m_removedInsts.insert(LI);
960+
return;
961+
}
962+
return recoverNonScalarizableInst(LI);
963+
}
964+
965+
void ScalarizeFunction::scalarizeInstruction(StoreInst* SI)
966+
{
967+
V_PRINT(scalarizer, "\t\tStore instruction\n");
968+
IGC_ASSERT_MESSAGE(SI, "instruction type dynamic cast failed");
969+
970+
int indexPtr = SI->getPointerOperandIndex();
971+
int indexData = 1 - indexPtr;
972+
VectorType* dataType = dyn_cast<VectorType>(SI->getOperand(indexData)->getType());
973+
if (isScalarizableLoadStoreType(dataType) && m_pDL)
974+
{
975+
// Get additional info from instruction
976+
unsigned int vectorSize = int_cast<unsigned int>(m_pDL->getTypeAllocSize(dataType));
977+
unsigned int elementSize = int_cast<unsigned int>(m_pDL->getTypeSizeInBits(dataType->getElementType()) / 8);
978+
IGC_ASSERT(elementSize);
979+
IGC_ASSERT_MESSAGE((vectorSize / elementSize > 0), "vector size should be a multiply of element size");
980+
IGC_ASSERT_MESSAGE((vectorSize % elementSize == 0), "vector size should be a multiply of element size");
981+
982+
unsigned numDupElements = int_cast<unsigned>(dataType->getNumElements());
983+
984+
// Obtain scalarized arguments
985+
// 1 - to allow scalarizing Load with any pointer type
986+
// 0 - to limit scalarizing to special case where packetizer benifit from the scalarizing
987+
#if 1
988+
SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>operand0;
989+
990+
bool opIsConst;
991+
obtainScalarizedValues(operand0, &opIsConst, SI->getOperand(indexData), SI);
992+
993+
// Apply the bit-cast on the GEP base and add base-offset then fix the index by multiply it with numElements. (assuming one index only).
994+
Value* GepPtr = SI->getOperand(indexPtr);
995+
PointerType* GepPtrType = cast<PointerType>(GepPtr->getType());
996+
Value* operandBase = BitCastInst::CreatePointerCast(GepPtr, dataType->getScalarType()->getPointerTo(GepPtrType->getAddressSpace()), "ptrVec2ptrScl", SI);
997+
Type* indexType = Type::getInt32Ty(*m_moduleContext);
998+
// Generate new (scalar) instructions
999+
for (unsigned dup = 0; dup < numDupElements; dup++)
1000+
{
1001+
Constant* laneVal = ConstantInt::get(indexType, dup);
1002+
Value* pGEP = GetElementPtrInst::Create(nullptr, operandBase, laneVal, "GEP_lane", SI);
1003+
new StoreInst(operand0[dup], pGEP, SI);
1004+
}
1005+
#else
1006+
GetElementPtrInst* operand1 = dyn_cast<GetElementPtrInst>(SI->getOperand(indexPtr));
1007+
if (!operand1 || operand1->getNumIndices() != 1)
1008+
{
1009+
return recoverNonScalarizableInst(SI);
1010+
}
1011+
Value* operand0[MAX_INPUT_VECTOR_WIDTH];
1012+
bool opIsConst;
1013+
obtainScalarizedValues(operand0, &opIsConst, SI->getOperand(indexData), SI);
1014+
1015+
// Apply the bit-cast on the GEP base and add base-offset then fix the index by multiply it with numElements. (assuming one index only).
1016+
Value* GepPtr = operand1->getPointerOperand();
1017+
PointerType* GepPtrType = cast<PointerType>(GepPtr->getType());
1018+
Value* operandBase = BitCastInst::CreatePointerCast(GepPtr, dataType->getScalarType()->getPointerTo(GepPtrType->getAddressSpace()), "ptrVec2ptrScl", SI);
1019+
Type* indexType = operand1->getOperand(1)->getType();
1020+
// Generate new (scalar) instructions
1021+
Constant* elementNumVal = ConstantInt::get(indexType, numElements);
1022+
for (unsigned dup = 0; dup < numDupElements; dup++)
1023+
{
1024+
Constant* laneVal = ConstantInt::get(indexType, dup);
1025+
Value* pGEP = GetElementPtrInst::Create(operandBase, laneVal, "GEP_lane", SI);
1026+
Value* pIndex = BinaryOperator::CreateMul(operand1->getOperand(1), elementNumVal, "GEPIndex_s", SI);
1027+
pGEP = GetElementPtrInst::Create(pGEP, pIndex, "GEP_s", SI);
1028+
new StoreInst(operand0[dup], pGEP, SI);
1029+
}
1030+
#endif
1031+
// Remove original instruction
1032+
m_removedInsts.insert(SI);
1033+
return;
1034+
}
1035+
return recoverNonScalarizableInst(SI);
1036+
}
1037+
10071038
void ScalarizeFunction::obtainScalarizedValues(SmallVectorImpl<Value*>& retValues, bool* retIsConstant,
10081039
Value* origValue, Instruction* origInst, int destIdx)
10091040
{
@@ -1380,9 +1411,17 @@ void ScalarizeFunction::resolveDeferredInstructions()
13801411
m_DRL.clear();
13811412
}
13821413

1383-
extern "C" FunctionPass* createScalarizerPass(bool selectiveScalarization)
1414+
bool ScalarizeFunction::isScalarizableLoadStoreType(VectorType* type)
1415+
{
1416+
// Scalarize Load/Store worth doing only if:
1417+
// 1. Gather/Scatter are supported
1418+
// 2. Load/Store type is a vector
1419+
return (m_ScalarizingVectorLDSTType && (NULL != type));
1420+
}
1421+
1422+
extern "C" FunctionPass* createScalarizerPass(bool scalarizingVectorLDSTType)
13841423
{
1385-
return new ScalarizeFunction(selectiveScalarization);
1424+
return new ScalarizeFunction(scalarizingVectorLDSTType);
13861425
}
13871426

13881427

0 commit comments

Comments
 (0)