@@ -69,12 +69,12 @@ IGC_INITIALIZE_PASS_END(ScalarizeFunction, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG
69
69
70
70
char ScalarizeFunction::ID = 0;
71
71
72
- ScalarizeFunction::ScalarizeFunction (bool selectiveScalarization ) : FunctionPass(ID)
72
+ ScalarizeFunction::ScalarizeFunction (bool scalarizingVectorLDSTType ) : FunctionPass(ID)
73
73
{
74
74
initializeScalarizeFunctionPass (*PassRegistry::getPassRegistry ());
75
75
76
76
for (int i = 0 ; i < Instruction::OtherOpsEnd; i++) m_transposeCtr[i] = 0 ;
77
- m_SelectiveScalarization = selectiveScalarization ;
77
+ m_ScalarizingVectorLDSTType = scalarizingVectorLDSTType ;
78
78
79
79
// Initialize SCM buffers and allocation
80
80
m_SCMAllocationArray = new SCMEntry[ESTIMATED_INST_NUM];
@@ -121,13 +121,6 @@ bool ScalarizeFunction::runOnFunction(Function& F)
121
121
m_SCM.clear ();
122
122
releaseAllSCMEntries ();
123
123
m_DRL.clear ();
124
- m_Excludes.clear ();
125
-
126
- // collecting instructions that we want to avoid scalarization
127
- if (m_SelectiveScalarization)
128
- {
129
- buildExclusiveSet ();
130
- }
131
124
132
125
// Scalarization. Iterate over all the instructions
133
126
// Always hold the iterator at the instruction following the one being scalarized (so the
@@ -139,14 +132,7 @@ bool ScalarizeFunction::runOnFunction(Function& F)
139
132
Instruction* currInst = &*sI ;
140
133
// Move iterator to next instruction BEFORE scalarizing current instruction
141
134
++sI ;
142
- if (m_Excludes.count (currInst))
143
- {
144
- recoverNonScalarizableInst (currInst);
145
- }
146
- else
147
- {
148
- dispatchInstructionToScalarize (currInst);
149
- }
135
+ dispatchInstructionToScalarize (currInst);
150
136
}
151
137
152
138
resolveVectorValues ();
@@ -175,111 +161,6 @@ bool ScalarizeFunction::runOnFunction(Function& F)
175
161
return true ;
176
162
}
177
163
178
- void ScalarizeFunction::buildExclusiveSet ()
179
- {
180
- inst_iterator sI = inst_begin (m_currFunc);
181
- inst_iterator sE = inst_end (m_currFunc);
182
- std::vector<llvm::Value*> workset;
183
- while (sI != sE )
184
- {
185
- Instruction* currInst = &*sI ;
186
- ++sI ;
187
- if (CallInst* CI = dyn_cast<CallInst>(currInst))
188
- {
189
- unsigned numOperands = CI->getNumArgOperands ();
190
- for (unsigned i = 0 ; i < numOperands; i++)
191
- {
192
- Value* operand = CI->getArgOperand (i);
193
- if (isa<VectorType>(operand->getType ()))
194
- {
195
- workset.push_back (operand);
196
- }
197
- }
198
- }
199
- else if (auto IEI = dyn_cast<InsertElementInst>(currInst))
200
- {
201
- Value* scalarIndexVal = IEI->getOperand (2 );
202
- // If the index is not a constant - we cannot statically remove this inst
203
- if (!isa<ConstantInt>(scalarIndexVal)) {
204
- workset.push_back (IEI);
205
- }
206
- }
207
- else if (auto EEI = dyn_cast<ExtractElementInst>(currInst))
208
- {
209
- Value* scalarIndexVal = EEI->getOperand (1 );
210
- // If the index is not a constant - we cannot statically remove this inst
211
- if (!isa<ConstantInt>(scalarIndexVal)) {
212
- workset.push_back (EEI->getOperand (0 ));
213
- }
214
- }
215
- }
216
- while (!workset.empty ())
217
- {
218
- auto Def = workset.back ();
219
- workset.pop_back ();
220
- if (m_Excludes.count (Def))
221
- {
222
- continue ;
223
- }
224
- if (auto IEI = dyn_cast<InsertElementInst>(Def))
225
- {
226
- m_Excludes.insert (IEI);
227
- if (!m_Excludes.count (IEI->getOperand (0 )) &&
228
- (isa<PHINode>(IEI->getOperand (0 )) ||
229
- isa<ShuffleVectorInst>(IEI->getOperand (0 )) ||
230
- isa<InsertElementInst>(IEI->getOperand (0 ))))
231
- {
232
- workset.push_back (IEI->getOperand (0 ));
233
- }
234
- }
235
- else if (auto SVI = dyn_cast<ShuffleVectorInst>(Def))
236
- {
237
- m_Excludes.insert (SVI);
238
- if (!m_Excludes.count (SVI->getOperand (0 )) &&
239
- (isa<PHINode>(SVI->getOperand (0 )) ||
240
- isa<ShuffleVectorInst>(SVI->getOperand (0 )) ||
241
- isa<InsertElementInst>(SVI->getOperand (0 ))))
242
- {
243
- workset.push_back (SVI->getOperand (0 ));
244
- }
245
- if (!m_Excludes.count (SVI->getOperand (1 )) &&
246
- (isa<PHINode>(SVI->getOperand (1 )) ||
247
- isa<ShuffleVectorInst>(SVI->getOperand (1 )) ||
248
- isa<InsertElementInst>(SVI->getOperand (1 ))))
249
- {
250
- workset.push_back (SVI->getOperand (1 ));
251
- }
252
- }
253
- else if (auto PHI = dyn_cast<PHINode>(Def))
254
- {
255
- m_Excludes.insert (PHI);
256
- for (int i = 0 , n = PHI->getNumOperands (); i < n; ++i)
257
- if (!m_Excludes.count (PHI->getOperand (i)) &&
258
- (isa<PHINode>(PHI->getOperand (i)) ||
259
- isa<ShuffleVectorInst>(PHI->getOperand (i)) ||
260
- isa<InsertElementInst>(PHI->getOperand (i))))
261
- {
262
- workset.push_back (PHI->getOperand (i));
263
- }
264
- }
265
- else
266
- {
267
- continue ;
268
- }
269
- // check use
270
- for (auto U : Def->users ())
271
- {
272
- if (!m_Excludes.count (U) &&
273
- (isa<PHINode>(U) ||
274
- isa<ShuffleVectorInst>(U) ||
275
- isa<InsertElementInst>(U)))
276
- {
277
- workset.push_back (U);
278
- }
279
- }
280
- }
281
- }
282
-
283
164
void ScalarizeFunction::dispatchInstructionToScalarize (Instruction* I)
284
165
{
285
166
V_PRINT (scalarizer, " \t Scalarizing Instruction: " << *I << " \n " );
@@ -354,6 +235,13 @@ void ScalarizeFunction::dispatchInstructionToScalarize(Instruction* I)
354
235
case Instruction::GetElementPtr:
355
236
scalarizeInstruction (dyn_cast<GetElementPtrInst>(I));
356
237
break ;
238
+ case Instruction::Load:
239
+ scalarizeInstruction (dyn_cast<LoadInst>(I));
240
+ break ;
241
+ case Instruction::Store:
242
+ scalarizeInstruction (dyn_cast<StoreInst>(I));
243
+ break ;
244
+
357
245
// The remaining instructions are not supported for scalarization. Keep "as is"
358
246
default :
359
247
recoverNonScalarizableInst (I);
@@ -1004,6 +892,149 @@ void ScalarizeFunction::scalarizeInstruction(GetElementPtrInst* GI)
1004
892
m_removedInsts.insert (GI);
1005
893
}
1006
894
895
+ void ScalarizeFunction::scalarizeInstruction (LoadInst* LI)
896
+ {
897
+ V_PRINT (scalarizer, " \t\t Load instruction\n " );
898
+ IGC_ASSERT_MESSAGE (LI, " instruction type dynamic cast failed" );
899
+
900
+ VectorType* dataType = dyn_cast<VectorType>(LI->getType ());
901
+ if (isScalarizableLoadStoreType (dataType) && m_pDL)
902
+ {
903
+ // Prepare empty SCM entry for the instruction
904
+ SCMEntry* newEntry = getSCMEntry (LI);
905
+
906
+ // Get additional info from instruction
907
+ unsigned int vectorSize = int_cast<unsigned int >(m_pDL->getTypeAllocSize (dataType));
908
+ unsigned int elementSize = int_cast<unsigned int >(m_pDL->getTypeSizeInBits (dataType->getElementType ()) / 8 );
909
+ IGC_ASSERT (elementSize);
910
+ IGC_ASSERT_MESSAGE ((vectorSize / elementSize > 0 ), " vector size should be a multiply of element size" );
911
+ IGC_ASSERT_MESSAGE ((vectorSize % elementSize == 0 ), " vector size should be a multiply of element size" );
912
+ unsigned numDupElements = int_cast<unsigned >(dataType->getNumElements ());
913
+
914
+ // Obtain scalarized arguments
915
+ // 1 - to allow scalarizing Load with any pointer type
916
+ // 0 - to limit scalarizing to special case where packetizer benifit from the scalarizing
917
+ #if 1
918
+ // Apply the bit-cast on the GEP base and add base-offset then fix the index by multiply it with numElements. (assuming one index only).
919
+ Value * GepPtr = LI->getOperand (0 );
920
+ PointerType* GepPtrType = cast<PointerType>(GepPtr->getType ());
921
+ Value* operandBase = BitCastInst::CreatePointerCast (GepPtr, dataType->getScalarType ()->getPointerTo (GepPtrType->getAddressSpace ()), " ptrVec2ptrScl" , LI);
922
+ Type* indexType = Type::getInt32Ty (*m_moduleContext);
923
+ // Generate new (scalar) instructions
924
+ SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>newScalarizedInsts;
925
+ newScalarizedInsts.resize (numDupElements);
926
+ for (unsigned dup = 0 ; dup < numDupElements; dup++)
927
+ {
928
+ Constant* laneVal = ConstantInt::get (indexType, dup);
929
+ Value* pGEP = GetElementPtrInst::Create (nullptr , operandBase, laneVal, " GEP_lane" , LI);
930
+ newScalarizedInsts[dup] = new LoadInst (pGEP->getType ()->getPointerElementType (), pGEP, LI->getName (), LI);
931
+ }
932
+ #else
933
+ GetElementPtrInst* operand = dyn_cast<GetElementPtrInst>(LI->getOperand(0));
934
+ if (!operand || operand->getNumIndices() != 1)
935
+ {
936
+ return recoverNonScalarizableInst(LI);
937
+ }
938
+ // Apply the bit-cast on the GEP base and add base-offset then fix the index by multiply it with numElements. (assuming one index only).
939
+ Value* GepPtr = operand->getPointerOperand();
940
+ PointerType* GepPtrType = cast<PointerType>(GepPtr->getType());
941
+ Value* operandBase = BitCastInst::CreatePointerCast(GepPtr, dataType->getScalarType()->getPointerTo(GepPtrType->getAddressSpace()), "ptrVec2ptrScl", LI);
942
+ Type* indexType = operand->getOperand(1)->getType();
943
+ // Generate new (scalar) instructions
944
+ Value* newScalarizedInsts[MAX_INPUT_VECTOR_WIDTH];
945
+ Constant* elementNumVal = ConstantInt::get(indexType, numElements);
946
+ for (unsigned dup = 0; dup < numDupElements; dup++)
947
+ {
948
+ Constant* laneVal = ConstantInt::get(indexType, dup);
949
+ Value* pGEP = GetElementPtrInst::Create(operandBase, laneVal, "GEP_lane", LI);
950
+ Value* pIndex = BinaryOperator::CreateMul(operand->getOperand(1), elementNumVal, "GEPIndex_s", LI);
951
+ pGEP = GetElementPtrInst::Create(pGEP, pIndex, "GEP_s", LI);
952
+ newScalarizedInsts[dup] = new LoadInst(pGEP, LI->getName(), LI);
953
+ }
954
+ #endif
955
+ // Add new value/s to SCM
956
+ updateSCMEntryWithValues (newEntry, &(newScalarizedInsts[0 ]), LI, true );
957
+
958
+ // Remove original instruction
959
+ m_removedInsts.insert (LI);
960
+ return ;
961
+ }
962
+ return recoverNonScalarizableInst (LI);
963
+ }
964
+
965
+ void ScalarizeFunction::scalarizeInstruction (StoreInst* SI)
966
+ {
967
+ V_PRINT (scalarizer, " \t\t Store instruction\n " );
968
+ IGC_ASSERT_MESSAGE (SI, " instruction type dynamic cast failed" );
969
+
970
+ int indexPtr = SI->getPointerOperandIndex ();
971
+ int indexData = 1 - indexPtr;
972
+ VectorType* dataType = dyn_cast<VectorType>(SI->getOperand (indexData)->getType ());
973
+ if (isScalarizableLoadStoreType (dataType) && m_pDL)
974
+ {
975
+ // Get additional info from instruction
976
+ unsigned int vectorSize = int_cast<unsigned int >(m_pDL->getTypeAllocSize (dataType));
977
+ unsigned int elementSize = int_cast<unsigned int >(m_pDL->getTypeSizeInBits (dataType->getElementType ()) / 8 );
978
+ IGC_ASSERT (elementSize);
979
+ IGC_ASSERT_MESSAGE ((vectorSize / elementSize > 0 ), " vector size should be a multiply of element size" );
980
+ IGC_ASSERT_MESSAGE ((vectorSize % elementSize == 0 ), " vector size should be a multiply of element size" );
981
+
982
+ unsigned numDupElements = int_cast<unsigned >(dataType->getNumElements ());
983
+
984
+ // Obtain scalarized arguments
985
+ // 1 - to allow scalarizing Load with any pointer type
986
+ // 0 - to limit scalarizing to special case where packetizer benifit from the scalarizing
987
+ #if 1
988
+ SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>operand0;
989
+
990
+ bool opIsConst;
991
+ obtainScalarizedValues (operand0, &opIsConst, SI->getOperand (indexData), SI);
992
+
993
+ // Apply the bit-cast on the GEP base and add base-offset then fix the index by multiply it with numElements. (assuming one index only).
994
+ Value* GepPtr = SI->getOperand (indexPtr);
995
+ PointerType* GepPtrType = cast<PointerType>(GepPtr->getType ());
996
+ Value* operandBase = BitCastInst::CreatePointerCast (GepPtr, dataType->getScalarType ()->getPointerTo (GepPtrType->getAddressSpace ()), " ptrVec2ptrScl" , SI);
997
+ Type* indexType = Type::getInt32Ty (*m_moduleContext);
998
+ // Generate new (scalar) instructions
999
+ for (unsigned dup = 0 ; dup < numDupElements; dup++)
1000
+ {
1001
+ Constant* laneVal = ConstantInt::get (indexType, dup);
1002
+ Value* pGEP = GetElementPtrInst::Create (nullptr , operandBase, laneVal, " GEP_lane" , SI);
1003
+ new StoreInst (operand0[dup], pGEP, SI);
1004
+ }
1005
+ #else
1006
+ GetElementPtrInst* operand1 = dyn_cast<GetElementPtrInst>(SI->getOperand(indexPtr));
1007
+ if (!operand1 || operand1->getNumIndices() != 1)
1008
+ {
1009
+ return recoverNonScalarizableInst(SI);
1010
+ }
1011
+ Value* operand0[MAX_INPUT_VECTOR_WIDTH];
1012
+ bool opIsConst;
1013
+ obtainScalarizedValues(operand0, &opIsConst, SI->getOperand(indexData), SI);
1014
+
1015
+ // Apply the bit-cast on the GEP base and add base-offset then fix the index by multiply it with numElements. (assuming one index only).
1016
+ Value* GepPtr = operand1->getPointerOperand();
1017
+ PointerType* GepPtrType = cast<PointerType>(GepPtr->getType());
1018
+ Value* operandBase = BitCastInst::CreatePointerCast(GepPtr, dataType->getScalarType()->getPointerTo(GepPtrType->getAddressSpace()), "ptrVec2ptrScl", SI);
1019
+ Type* indexType = operand1->getOperand(1)->getType();
1020
+ // Generate new (scalar) instructions
1021
+ Constant* elementNumVal = ConstantInt::get(indexType, numElements);
1022
+ for (unsigned dup = 0; dup < numDupElements; dup++)
1023
+ {
1024
+ Constant* laneVal = ConstantInt::get(indexType, dup);
1025
+ Value* pGEP = GetElementPtrInst::Create(operandBase, laneVal, "GEP_lane", SI);
1026
+ Value* pIndex = BinaryOperator::CreateMul(operand1->getOperand(1), elementNumVal, "GEPIndex_s", SI);
1027
+ pGEP = GetElementPtrInst::Create(pGEP, pIndex, "GEP_s", SI);
1028
+ new StoreInst(operand0[dup], pGEP, SI);
1029
+ }
1030
+ #endif
1031
+ // Remove original instruction
1032
+ m_removedInsts.insert (SI);
1033
+ return ;
1034
+ }
1035
+ return recoverNonScalarizableInst (SI);
1036
+ }
1037
+
1007
1038
void ScalarizeFunction::obtainScalarizedValues (SmallVectorImpl<Value*>& retValues, bool * retIsConstant,
1008
1039
Value* origValue, Instruction* origInst, int destIdx)
1009
1040
{
@@ -1380,9 +1411,17 @@ void ScalarizeFunction::resolveDeferredInstructions()
1380
1411
m_DRL.clear ();
1381
1412
}
1382
1413
1383
- extern " C" FunctionPass* createScalarizerPass (bool selectiveScalarization)
1414
+ bool ScalarizeFunction::isScalarizableLoadStoreType (VectorType* type)
1415
+ {
1416
+ // Scalarize Load/Store worth doing only if:
1417
+ // 1. Gather/Scatter are supported
1418
+ // 2. Load/Store type is a vector
1419
+ return (m_ScalarizingVectorLDSTType && (NULL != type));
1420
+ }
1421
+
1422
+ extern " C" FunctionPass* createScalarizerPass (bool scalarizingVectorLDSTType)
1384
1423
{
1385
- return new ScalarizeFunction (selectiveScalarization );
1424
+ return new ScalarizeFunction (scalarizingVectorLDSTType );
1386
1425
}
1387
1426
1388
1427
0 commit comments