Skip to content

Commit 9731db2

Browse files
fangliu2020igcbot
authored andcommitted
[Autobackout][FuncReg]Revert of change: 765ac0f
Enable expandMulPostSchedule pass by default
1 parent 26897ed commit 9731db2

File tree

4 files changed

+46
-45
lines changed

4 files changed

+46
-45
lines changed

visa/HWConformity.cpp

Lines changed: 42 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -2048,10 +2048,10 @@ void HWConformity::doGenerateMacl(INST_LIST_ITER it, G4_BB* bb)
20482048

20492049
//need extra move for dst
20502050
if (!IS_DTYPE(origDst->getType()) || origDst->getHorzStride() != 1 ||
2051-
!builder.isOpndAligned(origDst, getGRFSize()))
2051+
!builder.isOpndAligned(origDst, 32))
20522052
{
20532053
// macl dst must be grf-aligned, packed D/UD as it is also used for the implicit acc source's region
2054-
G4_DstRegRegion* tmpDst = insertMovAfter(it, origDst, tmpType, bb, GRFALIGN);
2054+
G4_DstRegRegion* tmpDst = insertMovAfter(it, origDst, tmpType, bb);
20552055
mulInst->setDest(tmpDst);
20562056
}
20572057
}
@@ -2090,10 +2090,10 @@ void HWConformity::doGenerateMacl(INST_LIST_ITER it, G4_BB* bb)
20902090
machIter = bb->insertBefore(++machIter, maclInst);
20912091

20922092
if (!IS_DTYPE(origDst->getType()) || origDst->getHorzStride() != 1 ||
2093-
!builder.isOpndAligned(origDst, getGRFSize()))
2093+
!builder.isOpndAligned(origDst, 32))
20942094
{
20952095
// macl dst must be grf-aligned, packed D/UD as it is also used for the implicit acc source's region
2096-
G4_DstRegRegion* tmpDst = insertMovAfter(machIter, origDst, tmpType, bb, GRFALIGN);
2096+
G4_DstRegRegion* tmpDst = insertMovAfter(machIter, origDst, tmpType, bb);
20972097
maclInst->setDest(tmpDst);
20982098
}
20992099
}
@@ -2460,9 +2460,10 @@ bool HWConformity::fixMULInst(INST_LIST_ITER& i, G4_BB* bb)
24602460
// Translate MULH into
24612461
// MUL acc src0 src1
24622462
// MACH dst src0 src1
2463-
void HWConformity::fixMULHInst(INST_LIST_ITER& i, G4_BB* bb)
2463+
bool HWConformity::fixMULHInst(INST_LIST_ITER& i, G4_BB* bb)
24642464
{
24652465
G4_INST* inst = *i;
2466+
INST_LIST_ITER iter = i;
24662467
G4_ExecSize execSize = inst->getExecSize();
24672468

24682469
int inst_opt = inst->getOption();
@@ -2529,20 +2530,23 @@ void HWConformity::fixMULHInst(INST_LIST_ITER& i, G4_BB* bb)
25292530
execSize > 1 ? builder.getRegionStride2() : builder.getRegionScalar(),
25302531
dst->getType());
25312532

2533+
++iter;
2534+
25322535
G4_INST* tmpMov = builder.createMov(execSize, dst, tmpSrc, inst->getOption(), false);
25332536
tmpMov->setPredicate(builder.duplicateOperand(inst->getPredicate()));
25342537

2535-
bb->insertAfter(i, tmpMov);
2538+
bb->insertBefore(iter, tmpMov);
2539+
//it will decrement back to mov
2540+
i = iter;
25362541

2537-
// Check the new inserted mov inst
2538-
i++;
2539-
2540-
// Need to remove dst from uses list of mulh, and add them to movInst useList
2541-
// add movInst to uselist of mulh.
2542-
// Add mulh to def instruction list of movInst
2542+
/*
2543+
Need to remove dst from uses list of mulh, and add them to movInst useList
2544+
add movInst to uselist of mulh.
2545+
Add mulh to def instruction list of movInst
2546+
*/
25432547
inst->transferUse(tmpMov);
25442548
inst->addDefUse(tmpMov, Opnd_src0);
2545-
return;
2549+
return true;
25462550
}
25472551

25482552
// src1 does not support modifier
@@ -2571,6 +2575,8 @@ void HWConformity::fixMULHInst(INST_LIST_ITER& i, G4_BB* bb)
25712575
// Here just create tmp variables to fix srcMod, cond modifier, saturate, etc. And Mul->Mul + Macl expanding will
25722576
// be done in expandMulPostSchedule pass.
25732577

2578+
bool newInstInserted = false;
2579+
25742580
// sat cannot be used at all in the macro sequence
25752581
// this effectivly means sat is broken for mul D D D
25762582
inst->setSaturate(g4::NOSAT);
@@ -2589,30 +2595,32 @@ void HWConformity::fixMULHInst(INST_LIST_ITER& i, G4_BB* bb)
25892595
}
25902596

25912597
INST_LIST_ITER end_iter = i;
2592-
// this mul will be expanded into mul+macl in expandMulPostSchedule pass. Since expanded macl
2593-
// must be grf-aligned, so need to make mul to be grf-aligned.
2598+
// check if the ACC source is aligned to mach dst
2599+
// ToDo: this should be checked by fixAcc?
25942600
G4_DstRegRegion* dst = inst->getDst();
25952601
if (inst->getSaturate() ||
25962602
dst->getExecTypeSize() > TypeSize(Type_D) ||
2597-
isPreAssignedRegOffsetNonZero<G4_DstRegRegion>(dst) ||
2598-
!builder.isOpndAligned(dst, getGRFSize()))
2603+
isPreAssignedRegOffsetNonZero<G4_DstRegRegion>(dst))
25992604
{
26002605
// add a tmp mov
2601-
inst->setDest(insertMovAfter(i, dst, dst->getType(), bb, GRFALIGN));
2606+
inst->setDest(insertMovAfter(i, dst, dst->getType(), bb));
26022607
end_iter++;
2608+
newInstInserted = true;
26032609
}
26042610

26052611
if (execSize > builder.getNativeExecSize())
26062612
{
26072613
auto start_iter = i;
2608-
splitDWMULInst(start_iter, end_iter, bb);
2609-
// start_iter points to the first half of mulh. Need double check this new inserted mulh to see if need split again
2610-
i = start_iter;
2614+
splitDWMULInst(i, end_iter, bb);
2615+
newInstInserted = true;
26112616
}
2612-
else
2617+
2618+
if (newInstInserted)
26132619
{
2620+
// it will decrease back to mulh
26142621
i++;
26152622
}
2623+
return newInstInserted;
26162624
}
26172625
else
26182626
{
@@ -2626,7 +2634,7 @@ void HWConformity::fixMULHInst(INST_LIST_ITER& i, G4_BB* bb)
26262634
G4_INST* newMul = builder.createBinOp(G4_mul, execSize,
26272635
acc_dst_opnd, builder.duplicateOperand(src0), builder.duplicateOperand(src1), inst_opt, false);
26282636

2629-
bb->insertBefore(i, newMul);
2637+
bb->insertBefore(iter, newMul);
26302638
inst->copyDefsTo(newMul, false);
26312639

26322640
fixMulSrc1(std::prev(i), bb);
@@ -2673,16 +2681,10 @@ void HWConformity::fixMULHInst(INST_LIST_ITER& i, G4_BB* bb)
26732681
{
26742682
auto start_iter = std::prev(i);
26752683
splitDWMULInst(start_iter, end_iter, bb);
2676-
// start_iter ponits to the first half of mul. Need to check the new inserted mul/mach instructions
2677-
i = start_iter;
2678-
}
2679-
else
2680-
{
2681-
// i points to mach, and need to check the new inserted mul before mach
2682-
i = std::prev(i);
2684+
i = end_iter;
26832685
}
2686+
return true;
26842687
}
2685-
return;
26862688
}
26872689

26882690
//
@@ -3567,11 +3569,6 @@ void HWConformity::splitDWMULInst(INST_LIST_ITER& start, INST_LIST_ITER& end, G4
35673569
evenlySplitInst(iter, bb);
35683570
G4_INST* expand_sec_half_op = *iter;
35693571
bb->insertBefore(last_iter, expand_sec_half_op);
3570-
// For the case that only one instruction needed to split, that is to say start equals to end
3571-
if (start == end)
3572-
{
3573-
start--;
3574-
}
35753572
end--;
35763573
bb->erase(iter);
35773574
}
@@ -5274,9 +5271,14 @@ void HWConformity::conformBB(G4_BB* bb)
52745271

52755272
if (inst->opcode() == G4_mulh)
52765273
{
5277-
fixMULHInst(i, bb);
5278-
next_iter = i;
5279-
continue;
5274+
if (fixMULHInst(i, bb))
5275+
{
5276+
// inserted mul before
5277+
// check the newly added MUL inst
5278+
i--;
5279+
next_iter = i;
5280+
continue;
5281+
}
52805282
}
52815283

52825284
#ifdef _DEBUG
@@ -7170,4 +7172,4 @@ void HWConformity::fixSrc1Region(INST_LIST_ITER it, G4_BB* bb)
71707172
G4_Operand* new_src1 = insertMovBefore(it, 1, src1->getType(), bb);
71717173
inst->setSrc(new_src1, 1);
71727174
}
7173-
}
7175+
}

visa/HWConformity.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ namespace vISA
109109
void fixPackedSource(INST_LIST_ITER it, G4_BB *bb, G4_Type extype);
110110
bool fixMathInst(INST_LIST_ITER it, G4_BB *bb);
111111
bool fixMULInst(INST_LIST_ITER &it, G4_BB *bb);
112-
void fixMULHInst(INST_LIST_ITER &i, G4_BB *bb);
112+
bool fixMULHInst(INST_LIST_ITER &i, G4_BB *bb);
113113
void fixMulSrc1(INST_LIST_ITER i, G4_BB* bb);
114114
void splitDWMULInst(INST_LIST_ITER &start, INST_LIST_ITER &end, G4_BB *bb);
115115
void fixOpnds(INST_LIST_ITER it, G4_BB *bb, G4_Type& exType);

visa/Optimizer.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11066,7 +11066,6 @@ void Optimizer::expandMulPostSchedule()
1106611066
}
1106711067
else
1106811068
{
11069-
// create a mach inst
1107011069
inst->setOpcode(G4_mul);
1107111070
maclOrMachInst = builder.createMach(inst->getExecSize(),
1107211071
dst, builder.duplicateOperand(src0), builder.duplicateOperand(src1), origOptions, accType);
@@ -11081,9 +11080,9 @@ void Optimizer::expandMulPostSchedule()
1108111080
auto maclOrMachInstIt = bb->insertAfter(it, maclOrMachInst);
1108211081

1108311082
// Always add a dummy mov after mach/macl for HW read suppresion W/A
11084-
auto dummyMovSrc = builder.createSrc(dst->getBase(),
11083+
auto dummyMovSrc = builder.createSrc(dst->getTopDcl()->getRegVar(),
1108511084
0, 0, builder.getRegionScalar(), Type_D);
11086-
G4_INST* dummyMov = builder.createMov(g4::SIMD1, builder.createNullDst(Type_D),
11085+
G4_INST* dummyMov = builder.createMov(g4::SIMD16, builder.createNullDst(Type_D),
1108711086
dummyMovSrc, InstOpt_WriteEnable, false);
1108811087
bb->insertAfter(maclOrMachInstIt, dummyMov);
1108911088
}

visa/include/VISAOptions.def

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ DEF_VISA_OPTION(vISA_DstSrcOverlapWA, ET_BOOL, "-dstSrcOverlapWA"
250250
DEF_VISA_OPTION(vISA_noSendSrcDstOverlap, ET_BOOL, "-noSendSrcDstOverlap", UNUSED, false)
251251
DEF_VISA_OPTION(vISA_cloneSampleInst, ET_BOOL, "-cloneSampleInst", UNUSED, false)
252252
DEF_VISA_OPTION(vISA_cloneEvaluateSampleInst, ET_BOOL, "-cloneEvaluateSampleInst", UNUSED, false)
253-
DEF_VISA_OPTION(vISA_expandMulPostSchedule, ET_BOOL, "-expandMulPostSchedule", UNUSED, true)
253+
DEF_VISA_OPTION(vISA_expandMulPostSchedule, ET_BOOL, "-expandMulPostSchedule", UNUSED, false)
254254

255255
//=== HW debugging options ===
256256
DEF_VISA_OPTION(vISA_GenerateDebugInfo, ET_BOOL, "-generateDebugInfo", UNUSED, false)

0 commit comments

Comments
 (0)