Skip to content

Commit 69b34ec

Browse files
kychendevigcbot
authored andcommitted
Improve DF math translation and
rematerilization. Improvements to reduce register pressure: Reduce temp dcl size created for DF math translation. Extend RA rematerilization to handle source with acc mod.
1 parent 5b04ba2 commit 69b34ec

File tree

2 files changed

+20
-9
lines changed

2 files changed

+20
-9
lines changed

visa/Rematerialization.cpp

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -530,9 +530,9 @@ namespace vISA
530530
(G4_RegFileKind::G4_GRF | G4_RegFileKind::G4_INPUT)) == 0x0)
531531
return false;
532532

533-
// Skip remat if src opnd uses special acc registers
534-
if (src->getAccRegSel() != ACC_UNDEFINED)
535-
return false;
533+
G4_AccRegSel accRegSel = src->getAccRegSel();
534+
if (accRegSel != ACC_UNDEFINED && accRegSel != NOACC)
535+
return false;
536536

537537
// Lookup defs of src in program
538538
auto opIt = operations.find(topdcl);
@@ -1242,6 +1242,11 @@ namespace vISA
12421242
rematSrc = createSrcRgn(src->asSrcRegRegion(), uniqueDef->first->getDst(),
12431243
(*prevRematIt).second.first->getDst()->getTopDcl());
12441244

1245+
if (src->asSrcRegRegion()->getAccRegSel() == NOACC)
1246+
{
1247+
rematSrc->setAccRegSel(NOACC);
1248+
}
1249+
12451250
reduceNumUses(src->getTopDcl());
12461251

12471252
#if 0
@@ -1262,6 +1267,12 @@ namespace vISA
12621267
std::list<G4_INST*> newInsts;
12631268
G4_INST* cacheInst = nullptr;
12641269
rematSrc = rematerialize(src->asSrcRegRegion(), bb, uniqueDef, newInsts, cacheInst);
1270+
1271+
if (src->asSrcRegRegion()->getAccRegSel() == NOACC)
1272+
{
1273+
rematSrc->setAccRegSel(NOACC);
1274+
}
1275+
12651276
while (!newInsts.empty())
12661277
{
12671278
bb->insertBefore(instIt, newInsts.front());

visa/VisaToG4/TranslateMath.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -238,8 +238,8 @@ int IR_Builder::translateVISAArithmeticDoubleInst(
238238
G4_Declare *t13 = createTempVarWithNoSpill(element_size, Type_DF, Any);
239239

240240
// r0 = 0.0:df, r1 = 1.0:df
241-
G4_Declare *t0 = getImmDcl(dbl_constant_0, element_size);
242-
G4_Declare *t1 = getImmDcl(dbl_constant_1, element_size);
241+
G4_Declare* t0 = getImmDcl(dbl_constant_0, exsize);
242+
G4_Declare* t1 = getImmDcl(dbl_constant_1, exsize);
243243

244244
inst = createPseudoKills({ t6, t7, t8, t9, t10, t11, t12, t13, tmpFlag }, PseudoKillType::Src);
245245

@@ -1281,10 +1281,10 @@ int IR_Builder::translateVISAArithmeticDoubleSQRTInst(
12811281

12821282

12831283
// temp registers
1284-
G4_Declare *t0 = getImmDcl(createDFImm(0.0), element_size);
1285-
G4_Declare *t1 = getImmDcl(createDFImm(1.0), element_size);
1286-
G4_Declare *t2 = getImmDcl(createDFImm(0.5), element_size);
1287-
G4_Declare* t3 = getImmDcl(createDFImm(1.5), element_size);
1284+
G4_Declare *t0 = getImmDcl(createDFImm(0.0), exsize);
1285+
G4_Declare *t1 = getImmDcl(createDFImm(1.0), exsize);
1286+
G4_Declare *t2 = getImmDcl(createDFImm(0.5), exsize);
1287+
G4_Declare *t3 = getImmDcl(createDFImm(1.5), exsize);
12881288
G4_Declare *t6 = createTempVarWithNoSpill(element_size, Type_DF, Any);
12891289
G4_Declare *t7 = createTempVarWithNoSpill(element_size, Type_DF, Any);
12901290
G4_Declare *t8 = createTempVarWithNoSpill(element_size, Type_DF, Any);

0 commit comments

Comments
 (0)