Skip to content

Commit c02698d

Browse files
zuban32igcbot
authored andcommitted
[Autobackout][FuncReg]Revert of change: 89ca3bd
Switch TPM to SVM entirely
1 parent 3ca208f commit c02698d

File tree

3 files changed

+42
-69
lines changed

3 files changed

+42
-69
lines changed

IGC/VectorCompiler/include/vc/GenXCodeGen/GenXInternalMetadata.h

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -38,14 +38,6 @@ namespace FunctionMD {
3838
inline constexpr const char GenXKernelInternal[] = "genx.kernel.internal";
3939
}
4040

41-
namespace InstMD {
42-
inline constexpr const char SVMBlockType[] = "SVMBlockType";
43-
}
44-
45-
namespace ModuleMD {
46-
inline constexpr const char UseSVMStack[] = "genx.useGlobalMem";
47-
}
48-
4941
namespace internal {
5042

5143
namespace KernelMDOp {

IGC/VectorCompiler/lib/GenXCodeGen/GenXCisaBuilder.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3640,10 +3640,7 @@ void GenXKernelBuilder::buildIntrinsic(CallInst *CI, unsigned IntrinID,
36403640
Value *V = CI;
36413641
if (!AI.isRet())
36423642
V = CI->getArgOperand(AI.getArgIdx());
3643-
auto *EltType = V->getType()->getScalarType();
3644-
if (auto *MDType = CI->getMetadata(InstMD::SVMBlockType))
3645-
EltType = cast<ValueAsMetadata>(MDType->getOperand(0).get())->getType();
3646-
unsigned ElBytes = getResultedTypeSize(EltType, DL);
3643+
unsigned ElBytes = getResultedTypeSize(V->getType()->getScalarType(), DL);
36473644
switch (ElBytes) {
36483645
// For N = 2 byte data type, use block size 1 and block count 2.
36493646
// Otherwise, use block size N and block count 1.

IGC/VectorCompiler/lib/GenXCodeGen/GenXThreadPrivateMemory.cpp

Lines changed: 41 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ IN THE SOFTWARE.
3535
#include "GenXTargetMachine.h"
3636
#include "GenXUtil.h"
3737
#include "GenXVisa.h"
38-
#include "vc/GenXCodeGen/GenXInternalMetadata.h"
3938

4039
#include "Probe/Assertion.h"
4140
#include "llvmWrapper/IR/DerivedTypes.h"
@@ -209,7 +208,6 @@ std::pair<Value *, unsigned>
209208
GenXThreadPrivateMemory::NormalizeVector(Value *From, Type *To,
210209
Instruction *Inst) {
211210
Type *I32Ty = Type::getInt32Ty(Inst->getContext());
212-
Type *I64Ty = Type::getInt64Ty(Inst->getContext());
213211
Value *Res = From;
214212
Type *FromTy = From->getType();
215213
IGC_ASSERT(isa<VectorType>(FromTy));
@@ -236,22 +234,22 @@ GenXThreadPrivateMemory::NormalizeVector(Value *From, Type *To,
236234
To = IGCLLVM::FixedVectorType::get(I32Ty, NumElts);
237235
EltSz = I32Ty->getPrimitiveSizeInBits() / genx::ByteBits;
238236
Res = CastInst::Create(Instruction::BitCast, Res, To, "", Inst);
239-
} else if (m_DL->getTypeSizeInBits(cast<VectorType>(To)->getElementType()) <
240-
genx::DWordBits) {
237+
} else if (cast<VectorType>(To)->getElementType()->getPrimitiveSizeInBits() <
238+
genx::DWordBits
239+
// this is required for correct generation of svm.gather/scatter
240+
// of data of type which size is < i32 because these intrinsics
241+
// infer their block size from the type of the data they handle
242+
&& !m_useGlobalMem) {
241243
To = IGCLLVM::FixedVectorType::get(I32Ty, NumElts);
242-
Res = CastInst::CreateZExtOrBitCast(From, To, "", Inst);
243-
} else if (!m_useGlobalMem &&
244-
m_DL->getTypeSizeInBits(cast<VectorType>(To)->getElementType()) ==
245-
genx::QWordBits) {
246-
if (From->getType()->getScalarType()->isPointerTy()) {
247-
auto *NewType = IGCLLVM::FixedVectorType::get(I64Ty, NumElts);
248-
From = CastInst::Create(CastInst::PtrToInt, From, NewType, "", Inst);
249-
}
244+
245+
Res = CastInst::Create(Instruction::ZExt, From, To, "", Inst);
246+
} else if (cast<VectorType>(To)->getElementType()->getPrimitiveSizeInBits() ==
247+
genx::QWordBits) {
250248
NumElts *= 2;
251249
EltSz = I32Ty->getPrimitiveSizeInBits() / genx::ByteBits;
252250
To = IGCLLVM::FixedVectorType::get(I32Ty, NumElts);
253251

254-
Res = CastInst::CreateBitOrPointerCast(From, To, "", Inst);
252+
Res = CastInst::Create(Instruction::BitCast, From, To, "", Inst);
255253
}
256254

257255
return std::make_pair(Res, EltSz);
@@ -260,8 +258,6 @@ GenXThreadPrivateMemory::NormalizeVector(Value *From, Type *To,
260258
Instruction *
261259
GenXThreadPrivateMemory::RestoreVectorAfterNormalization(Instruction *From,
262260
Type *To) {
263-
if (From->getType() == To)
264-
return From;
265261
Instruction *Restored = From;
266262
unsigned EltSz = m_DL->getTypeSizeInBits(To->getScalarType());
267263
IGC_ASSERT(EltSz > 0);
@@ -523,19 +519,35 @@ bool GenXThreadPrivateMemory::replaceLoad(LoadInst *LdI) {
523519
LdTy = IGCLLVM::FixedVectorType::get(LdTy, 1);
524520

525521
unsigned NumEltsToLoad = cast<VectorType>(LdTy)->getNumElements();
526-
unsigned ValueEltSz = m_DL->getTypeSizeInBits(LdEltTy) / genx::ByteBits;
522+
unsigned LdEltTySz = m_DL->getTypeSizeInBits(LdEltTy);
523+
if (!(m_useGlobalMem && LdEltTy->isIntegerTy(64)) &&
524+
LdEltTySz == genx::QWordBits)
525+
NumEltsToLoad *= 2;
527526

528527
Value *PredVal = ConstantInt::get(Type::getInt1Ty(*m_ctx), 1);
529528
Value *Pred = Builder.CreateVectorSplat(NumEltsToLoad, PredVal);
530529

531530
Type *I32Ty = Type::getInt32Ty(*m_ctx);
532531
Type *I64Ty = Type::getInt64Ty(*m_ctx);
532+
Type *TyToLoad = (m_useGlobalMem && LdEltTy->isIntegerTy(64)) ? I64Ty : I32Ty;
533+
if (LdEltTy->isFloatTy())
534+
TyToLoad = LdEltTy;
535+
Type *RealTyToLoad = LdEltTy;
536+
if (!(m_useGlobalMem && LdEltTy->isIntegerTy(64)) &&
537+
m_DL->getTypeSizeInBits(RealTyToLoad) == genx::QWordBits)
538+
RealTyToLoad = I32Ty;
539+
unsigned RealTyToLoadSz =
540+
m_DL->getTypeSizeInBits(RealTyToLoad) / genx::ByteBits;
541+
// we don't want to use improper block sizes for loads of i8/i16
542+
// to make sure we comply with alignment rules for gathers
543+
bool NoExtToDword =
544+
m_useGlobalMem &&
545+
!(LdI->getType()->isAggregateType() || LdI->getType()->isVectorTy()) &&
546+
m_DL->getTypeSizeInBits(LdI->getType()) < genx::DWordBits;
547+
if (NoExtToDword)
548+
TyToLoad = LdI->getType();
533549
Value *OldValOfTheDataRead =
534-
Builder.CreateVectorSplat(NumEltsToLoad, UndefValue::get(LdEltTy));
535-
std::tie(OldValOfTheDataRead, ValueEltSz) =
536-
NormalizeVector(OldValOfTheDataRead, LdTy, LdI);
537-
NumEltsToLoad =
538-
cast<VectorType>(OldValOfTheDataRead->getType())->getNumElements();
550+
Builder.CreateVectorSplat(NumEltsToLoad, UndefValue::get(TyToLoad));
539551

540552
Value *PointerOp = LdI->getPointerOperand();
541553
Value *Offset = lookForPtrReplacement(PointerOp);
@@ -545,13 +557,10 @@ bool GenXThreadPrivateMemory::replaceLoad(LoadInst *LdI) {
545557
? llvm::GenXIntrinsic::genx_svm_gather
546558
: llvm::GenXIntrinsic::genx_gather_scaled;
547559

548-
Value *EltsOffset = FormEltsOffsetVector(NumEltsToLoad, ValueEltSz, LdI);
560+
Value *EltsOffset = FormEltsOffsetVector(NumEltsToLoad, RealTyToLoadSz, LdI);
549561

550-
unsigned NumBlocks = m_DL->getTypeSizeInBits(LdEltTy) / genx::ByteBits;
551-
// This logic is aligned with the on in CisaBuilder and GenXLowering
552-
// The reason behind check for == 2 is that svm intrinsics don't support
553-
// BlockSize of 2, so for ops with i16s we have to use BlockSize == 1 and NumBlocks == 2
554-
Value *logNumBlocks = ConstantInt::get(I32Ty, genx::log2(NumBlocks == 2 ? NumBlocks : 1));
562+
unsigned SrcSize = genx::log2(RealTyToLoadSz);
563+
Value *logNumBlocks = ConstantInt::get(I32Ty, m_useGlobalMem ? 0 : SrcSize);
555564
Value *Scale = ConstantInt::get(Type::getInt16Ty(*m_ctx), 0);
556565
Value *Surface = ConstantInt::get(I32Ty,
557566
visa::getReservedSurfaceIndex(m_stack));
@@ -592,10 +601,6 @@ bool GenXThreadPrivateMemory::replaceLoad(LoadInst *LdI) {
592601
ProperGather = LdVal;
593602
}
594603

595-
Gather->setMetadata(InstMD::SVMBlockType,
596-
MDNode::get(*m_ctx, llvm::ValueAsMetadata::get(
597-
UndefValue::get(LdEltTy))));
598-
599604
LLVM_DEBUG(dbgs() << *Gather << "\n");
600605
LdI->replaceAllUsesWith(ProperGather);
601606
LdI->eraseFromParent();
@@ -642,9 +647,7 @@ bool GenXThreadPrivateMemory::replaceStore(StoreInst *StI) {
642647
{Pred->getType(),
643648
(m_useGlobalMem ? Offset : EltsOffset)->getType(),
644649
ValueOp->getType()});
645-
unsigned NumBlocks = m_DL->getTypeSizeInBits(ValueOpTy->getScalarType()) / genx::ByteBits;
646-
// see the comment in replaceLoad above
647-
Value *logNumBlocks = ConstantInt::get(I32Ty, genx::log2(NumBlocks == 2 ? NumBlocks : 1));
650+
Value *logNumBlocks = ConstantInt::get(I32Ty, m_useGlobalMem ? 0 : genx::log2(ValueEltSz));
648651
Value *Scale = ConstantInt::get(Type::getInt16Ty(*m_ctx), 0);
649652
Value *Surface = ConstantInt::get(I32Ty,
650653
visa::getReservedSurfaceIndex(m_stack));
@@ -659,11 +662,6 @@ bool GenXThreadPrivateMemory::replaceStore(StoreInst *StI) {
659662
Scatter->insertAfter(StI);
660663
StI->eraseFromParent();
661664

662-
Scatter->setMetadata(
663-
InstMD::SVMBlockType,
664-
MDNode::get(*m_ctx, llvm::ValueAsMetadata::get(
665-
UndefValue::get(ValueOpTy->getScalarType()))));
666-
667665
LLVM_DEBUG(dbgs() << *Scatter << "\n");
668666
m_scatter.push_back(Scatter);
669667

@@ -1096,12 +1094,6 @@ void SplitScatter(CallInst *CI) {
10961094
}
10971095
IGC_ASSERT(FirstScatter && SecondScatter);
10981096

1099-
auto *MD = CI->getMetadata(InstMD::SVMBlockType);
1100-
if (MD) {
1101-
FirstScatter->setMetadata(InstMD::SVMBlockType, MD);
1102-
SecondScatter->setMetadata(InstMD::SVMBlockType, MD);
1103-
}
1104-
11051097
FirstScatter->insertAfter(CI);
11061098
SecondScatter->insertAfter(FirstScatter);
11071099

@@ -1171,12 +1163,6 @@ void SplitGather(CallInst *CI) {
11711163
}
11721164
IGC_ASSERT(FirstGather && SecondGather);
11731165

1174-
auto *MD = CI->getMetadata(InstMD::SVMBlockType);
1175-
if (MD) {
1176-
FirstGather->setMetadata(InstMD::SVMBlockType, MD);
1177-
SecondGather->setMetadata(InstMD::SVMBlockType, MD);
1178-
}
1179-
11801166
FirstGather->insertAfter(CI);
11811167
SecondGather->insertAfter(FirstGather);
11821168

@@ -1294,16 +1280,14 @@ bool GenXThreadPrivateMemory::runOnModule(Module &M) {
12941280
m_ST = &getAnalysis<TargetPassConfig>()
12951281
.getTM<GenXTargetMachine>()
12961282
.getGenXSubtarget();
1297-
if (!m_ST->isOCLRuntime())
1298-
m_useGlobalMem = false;
12991283
for (auto &F : M)
13001284
visit(F);
1301-
if (m_useGlobalMem ||
1302-
(m_ST->isOCLRuntime() && std::find_if(m_alloca.begin(), m_alloca.end(),
1303-
SVMChecker()) != m_alloca.end())) {
1285+
if (!m_useGlobalMem &&
1286+
std::find_if(m_alloca.begin(), m_alloca.end(), SVMChecker()) !=
1287+
m_alloca.end()) {
13041288
LLVM_DEBUG(dbgs() << "Switching TPM to SVM\n");
13051289
// TODO: move the name string to vc-intrinsics *MD::useGlobalMem
1306-
M.addModuleFlag(Module::ModFlagBehavior::Error, ModuleMD::UseSVMStack, 1);
1290+
M.addModuleFlag(Module::ModFlagBehavior::Error, "genx.useGlobalMem", 1);
13071291
m_useGlobalMem = true;
13081292
}
13091293
bool Result = false;

0 commit comments

Comments
 (0)