@@ -35,7 +35,6 @@ IN THE SOFTWARE.
35
35
#include " GenXTargetMachine.h"
36
36
#include " GenXUtil.h"
37
37
#include " GenXVisa.h"
38
- #include " vc/GenXCodeGen/GenXInternalMetadata.h"
39
38
40
39
#include " Probe/Assertion.h"
41
40
#include " llvmWrapper/IR/DerivedTypes.h"
@@ -209,7 +208,6 @@ std::pair<Value *, unsigned>
209
208
GenXThreadPrivateMemory::NormalizeVector (Value *From, Type *To,
210
209
Instruction *Inst) {
211
210
Type *I32Ty = Type::getInt32Ty (Inst->getContext ());
212
- Type *I64Ty = Type::getInt64Ty (Inst->getContext ());
213
211
Value *Res = From;
214
212
Type *FromTy = From->getType ();
215
213
IGC_ASSERT (isa<VectorType>(FromTy));
@@ -236,22 +234,22 @@ GenXThreadPrivateMemory::NormalizeVector(Value *From, Type *To,
236
234
To = IGCLLVM::FixedVectorType::get (I32Ty, NumElts);
237
235
EltSz = I32Ty->getPrimitiveSizeInBits () / genx::ByteBits;
238
236
Res = CastInst::Create (Instruction::BitCast, Res, To, " " , Inst);
239
- } else if (m_DL->getTypeSizeInBits (cast<VectorType>(To)->getElementType ()) <
240
- genx::DWordBits) {
237
+ } else if (cast<VectorType>(To)->getElementType ()->getPrimitiveSizeInBits () <
238
+ genx::DWordBits
239
+ // this is required for correct generation of svm.gather/scatter
240
+ // of data of type which size is < i32 because these intrinsics
241
+ // infer their block size from the type of the data they handle
242
+ && !m_useGlobalMem) {
241
243
To = IGCLLVM::FixedVectorType::get (I32Ty, NumElts);
242
- Res = CastInst::CreateZExtOrBitCast (From, To, " " , Inst);
243
- } else if (!m_useGlobalMem &&
244
- m_DL->getTypeSizeInBits (cast<VectorType>(To)->getElementType ()) ==
245
- genx::QWordBits) {
246
- if (From->getType ()->getScalarType ()->isPointerTy ()) {
247
- auto *NewType = IGCLLVM::FixedVectorType::get (I64Ty, NumElts);
248
- From = CastInst::Create (CastInst::PtrToInt, From, NewType, " " , Inst);
249
- }
244
+
245
+ Res = CastInst::Create (Instruction::ZExt, From, To, " " , Inst);
246
+ } else if (cast<VectorType>(To)->getElementType ()->getPrimitiveSizeInBits () ==
247
+ genx::QWordBits) {
250
248
NumElts *= 2 ;
251
249
EltSz = I32Ty->getPrimitiveSizeInBits () / genx::ByteBits;
252
250
To = IGCLLVM::FixedVectorType::get (I32Ty, NumElts);
253
251
254
- Res = CastInst::CreateBitOrPointerCast ( From, To, " " , Inst);
252
+ Res = CastInst::Create (Instruction::BitCast, From, To, " " , Inst);
255
253
}
256
254
257
255
return std::make_pair (Res, EltSz);
@@ -260,8 +258,6 @@ GenXThreadPrivateMemory::NormalizeVector(Value *From, Type *To,
260
258
Instruction *
261
259
GenXThreadPrivateMemory::RestoreVectorAfterNormalization (Instruction *From,
262
260
Type *To) {
263
- if (From->getType () == To)
264
- return From;
265
261
Instruction *Restored = From;
266
262
unsigned EltSz = m_DL->getTypeSizeInBits (To->getScalarType ());
267
263
IGC_ASSERT (EltSz > 0 );
@@ -523,19 +519,35 @@ bool GenXThreadPrivateMemory::replaceLoad(LoadInst *LdI) {
523
519
LdTy = IGCLLVM::FixedVectorType::get (LdTy, 1 );
524
520
525
521
unsigned NumEltsToLoad = cast<VectorType>(LdTy)->getNumElements ();
526
- unsigned ValueEltSz = m_DL->getTypeSizeInBits (LdEltTy) / genx::ByteBits;
522
+ unsigned LdEltTySz = m_DL->getTypeSizeInBits (LdEltTy);
523
+ if (!(m_useGlobalMem && LdEltTy->isIntegerTy (64 )) &&
524
+ LdEltTySz == genx::QWordBits)
525
+ NumEltsToLoad *= 2 ;
527
526
528
527
Value *PredVal = ConstantInt::get (Type::getInt1Ty (*m_ctx), 1 );
529
528
Value *Pred = Builder.CreateVectorSplat (NumEltsToLoad, PredVal);
530
529
531
530
Type *I32Ty = Type::getInt32Ty (*m_ctx);
532
531
Type *I64Ty = Type::getInt64Ty (*m_ctx);
532
+ Type *TyToLoad = (m_useGlobalMem && LdEltTy->isIntegerTy (64 )) ? I64Ty : I32Ty;
533
+ if (LdEltTy->isFloatTy ())
534
+ TyToLoad = LdEltTy;
535
+ Type *RealTyToLoad = LdEltTy;
536
+ if (!(m_useGlobalMem && LdEltTy->isIntegerTy (64 )) &&
537
+ m_DL->getTypeSizeInBits (RealTyToLoad) == genx::QWordBits)
538
+ RealTyToLoad = I32Ty;
539
+ unsigned RealTyToLoadSz =
540
+ m_DL->getTypeSizeInBits (RealTyToLoad) / genx::ByteBits;
541
+ // we don't want to use improper block sizes for loads of i8/i16
542
+ // to make sure we comply with alignment rules for gathers
543
+ bool NoExtToDword =
544
+ m_useGlobalMem &&
545
+ !(LdI->getType ()->isAggregateType () || LdI->getType ()->isVectorTy ()) &&
546
+ m_DL->getTypeSizeInBits (LdI->getType ()) < genx::DWordBits;
547
+ if (NoExtToDword)
548
+ TyToLoad = LdI->getType ();
533
549
Value *OldValOfTheDataRead =
534
- Builder.CreateVectorSplat (NumEltsToLoad, UndefValue::get (LdEltTy));
535
- std::tie (OldValOfTheDataRead, ValueEltSz) =
536
- NormalizeVector (OldValOfTheDataRead, LdTy, LdI);
537
- NumEltsToLoad =
538
- cast<VectorType>(OldValOfTheDataRead->getType ())->getNumElements ();
550
+ Builder.CreateVectorSplat (NumEltsToLoad, UndefValue::get (TyToLoad));
539
551
540
552
Value *PointerOp = LdI->getPointerOperand ();
541
553
Value *Offset = lookForPtrReplacement (PointerOp);
@@ -545,13 +557,10 @@ bool GenXThreadPrivateMemory::replaceLoad(LoadInst *LdI) {
545
557
? llvm::GenXIntrinsic::genx_svm_gather
546
558
: llvm::GenXIntrinsic::genx_gather_scaled;
547
559
548
- Value *EltsOffset = FormEltsOffsetVector (NumEltsToLoad, ValueEltSz , LdI);
560
+ Value *EltsOffset = FormEltsOffsetVector (NumEltsToLoad, RealTyToLoadSz , LdI);
549
561
550
- unsigned NumBlocks = m_DL->getTypeSizeInBits (LdEltTy) / genx::ByteBits;
551
- // This logic is aligned with the on in CisaBuilder and GenXLowering
552
- // The reason behind check for == 2 is that svm intrinsics don't support
553
- // BlockSize of 2, so for ops with i16s we have to use BlockSize == 1 and NumBlocks == 2
554
- Value *logNumBlocks = ConstantInt::get (I32Ty, genx::log2 (NumBlocks == 2 ? NumBlocks : 1 ));
562
+ unsigned SrcSize = genx::log2 (RealTyToLoadSz);
563
+ Value *logNumBlocks = ConstantInt::get (I32Ty, m_useGlobalMem ? 0 : SrcSize);
555
564
Value *Scale = ConstantInt::get (Type::getInt16Ty (*m_ctx), 0 );
556
565
Value *Surface = ConstantInt::get (I32Ty,
557
566
visa::getReservedSurfaceIndex (m_stack));
@@ -592,10 +601,6 @@ bool GenXThreadPrivateMemory::replaceLoad(LoadInst *LdI) {
592
601
ProperGather = LdVal;
593
602
}
594
603
595
- Gather->setMetadata (InstMD::SVMBlockType,
596
- MDNode::get (*m_ctx, llvm::ValueAsMetadata::get (
597
- UndefValue::get (LdEltTy))));
598
-
599
604
LLVM_DEBUG (dbgs () << *Gather << " \n " );
600
605
LdI->replaceAllUsesWith (ProperGather);
601
606
LdI->eraseFromParent ();
@@ -642,9 +647,7 @@ bool GenXThreadPrivateMemory::replaceStore(StoreInst *StI) {
642
647
{Pred->getType (),
643
648
(m_useGlobalMem ? Offset : EltsOffset)->getType (),
644
649
ValueOp->getType ()});
645
- unsigned NumBlocks = m_DL->getTypeSizeInBits (ValueOpTy->getScalarType ()) / genx::ByteBits;
646
- // see the comment in replaceLoad above
647
- Value *logNumBlocks = ConstantInt::get (I32Ty, genx::log2 (NumBlocks == 2 ? NumBlocks : 1 ));
650
+ Value *logNumBlocks = ConstantInt::get (I32Ty, m_useGlobalMem ? 0 : genx::log2 (ValueEltSz));
648
651
Value *Scale = ConstantInt::get (Type::getInt16Ty (*m_ctx), 0 );
649
652
Value *Surface = ConstantInt::get (I32Ty,
650
653
visa::getReservedSurfaceIndex (m_stack));
@@ -659,11 +662,6 @@ bool GenXThreadPrivateMemory::replaceStore(StoreInst *StI) {
659
662
Scatter->insertAfter (StI);
660
663
StI->eraseFromParent ();
661
664
662
- Scatter->setMetadata (
663
- InstMD::SVMBlockType,
664
- MDNode::get (*m_ctx, llvm::ValueAsMetadata::get (
665
- UndefValue::get (ValueOpTy->getScalarType ()))));
666
-
667
665
LLVM_DEBUG (dbgs () << *Scatter << " \n " );
668
666
m_scatter.push_back (Scatter);
669
667
@@ -1096,12 +1094,6 @@ void SplitScatter(CallInst *CI) {
1096
1094
}
1097
1095
IGC_ASSERT (FirstScatter && SecondScatter);
1098
1096
1099
- auto *MD = CI->getMetadata (InstMD::SVMBlockType);
1100
- if (MD) {
1101
- FirstScatter->setMetadata (InstMD::SVMBlockType, MD);
1102
- SecondScatter->setMetadata (InstMD::SVMBlockType, MD);
1103
- }
1104
-
1105
1097
FirstScatter->insertAfter (CI);
1106
1098
SecondScatter->insertAfter (FirstScatter);
1107
1099
@@ -1171,12 +1163,6 @@ void SplitGather(CallInst *CI) {
1171
1163
}
1172
1164
IGC_ASSERT (FirstGather && SecondGather);
1173
1165
1174
- auto *MD = CI->getMetadata (InstMD::SVMBlockType);
1175
- if (MD) {
1176
- FirstGather->setMetadata (InstMD::SVMBlockType, MD);
1177
- SecondGather->setMetadata (InstMD::SVMBlockType, MD);
1178
- }
1179
-
1180
1166
FirstGather->insertAfter (CI);
1181
1167
SecondGather->insertAfter (FirstGather);
1182
1168
@@ -1294,16 +1280,14 @@ bool GenXThreadPrivateMemory::runOnModule(Module &M) {
1294
1280
m_ST = &getAnalysis<TargetPassConfig>()
1295
1281
.getTM <GenXTargetMachine>()
1296
1282
.getGenXSubtarget ();
1297
- if (!m_ST->isOCLRuntime ())
1298
- m_useGlobalMem = false ;
1299
1283
for (auto &F : M)
1300
1284
visit (F);
1301
- if (m_useGlobalMem ||
1302
- (m_ST-> isOCLRuntime () && std::find_if (m_alloca.begin (), m_alloca.end (),
1303
- SVMChecker ()) != m_alloca.end () )) {
1285
+ if (! m_useGlobalMem &&
1286
+ std::find_if (m_alloca.begin (), m_alloca.end (), SVMChecker ()) !=
1287
+ m_alloca.end ()) {
1304
1288
LLVM_DEBUG (dbgs () << " Switching TPM to SVM\n " );
1305
1289
// TODO: move the name string to vc-intrinsics *MD::useGlobalMem
1306
- M.addModuleFlag (Module::ModFlagBehavior::Error, ModuleMD::UseSVMStack , 1 );
1290
+ M.addModuleFlag (Module::ModFlagBehavior::Error, " genx.useGlobalMem " , 1 );
1307
1291
m_useGlobalMem = true ;
1308
1292
}
1309
1293
bool Result = false ;
0 commit comments