@@ -2048,10 +2048,10 @@ void HWConformity::doGenerateMacl(INST_LIST_ITER it, G4_BB* bb)
2048
2048
2049
2049
// need extra move for dst
2050
2050
if (!IS_DTYPE (origDst->getType ()) || origDst->getHorzStride () != 1 ||
2051
- !builder.isOpndAligned (origDst, getGRFSize () ))
2051
+ !builder.isOpndAligned (origDst, 32 ))
2052
2052
{
2053
2053
// macl dst must be grf-aligned, packed D/UD as it is also used for the implicit acc source's region
2054
- G4_DstRegRegion* tmpDst = insertMovAfter (it, origDst, tmpType, bb, GRFALIGN );
2054
+ G4_DstRegRegion* tmpDst = insertMovAfter (it, origDst, tmpType, bb);
2055
2055
mulInst->setDest (tmpDst);
2056
2056
}
2057
2057
}
@@ -2090,10 +2090,10 @@ void HWConformity::doGenerateMacl(INST_LIST_ITER it, G4_BB* bb)
2090
2090
machIter = bb->insertBefore (++machIter, maclInst);
2091
2091
2092
2092
if (!IS_DTYPE (origDst->getType ()) || origDst->getHorzStride () != 1 ||
2093
- !builder.isOpndAligned (origDst, getGRFSize () ))
2093
+ !builder.isOpndAligned (origDst, 32 ))
2094
2094
{
2095
2095
// macl dst must be grf-aligned, packed D/UD as it is also used for the implicit acc source's region
2096
- G4_DstRegRegion* tmpDst = insertMovAfter (machIter, origDst, tmpType, bb, GRFALIGN );
2096
+ G4_DstRegRegion* tmpDst = insertMovAfter (machIter, origDst, tmpType, bb);
2097
2097
maclInst->setDest (tmpDst);
2098
2098
}
2099
2099
}
@@ -2460,9 +2460,10 @@ bool HWConformity::fixMULInst(INST_LIST_ITER& i, G4_BB* bb)
2460
2460
// Translate MULH into
2461
2461
// MUL acc src0 src1
2462
2462
// MACH dst src0 src1
2463
- void HWConformity::fixMULHInst (INST_LIST_ITER& i, G4_BB* bb)
2463
+ bool HWConformity::fixMULHInst (INST_LIST_ITER& i, G4_BB* bb)
2464
2464
{
2465
2465
G4_INST* inst = *i;
2466
+ INST_LIST_ITER iter = i;
2466
2467
G4_ExecSize execSize = inst->getExecSize ();
2467
2468
2468
2469
int inst_opt = inst->getOption ();
@@ -2529,20 +2530,23 @@ void HWConformity::fixMULHInst(INST_LIST_ITER& i, G4_BB* bb)
2529
2530
execSize > 1 ? builder.getRegionStride2 () : builder.getRegionScalar (),
2530
2531
dst->getType ());
2531
2532
2533
+ ++iter;
2534
+
2532
2535
G4_INST* tmpMov = builder.createMov (execSize, dst, tmpSrc, inst->getOption (), false );
2533
2536
tmpMov->setPredicate (builder.duplicateOperand (inst->getPredicate ()));
2534
2537
2535
- bb->insertAfter (i, tmpMov);
2538
+ bb->insertBefore (iter, tmpMov);
2539
+ // it will decrement back to mov
2540
+ i = iter;
2536
2541
2537
- // Check the new inserted mov inst
2538
- i++;
2539
-
2540
- // Need to remove dst from uses list of mulh, and add them to movInst useList
2541
- // add movInst to uselist of mulh.
2542
- // Add mulh to def instruction list of movInst
2542
+ /*
2543
+ Need to remove dst from uses list of mulh, and add them to movInst useList
2544
+ add movInst to uselist of mulh.
2545
+ Add mulh to def instruction list of movInst
2546
+ */
2543
2547
inst->transferUse (tmpMov);
2544
2548
inst->addDefUse (tmpMov, Opnd_src0);
2545
- return ;
2549
+ return true ;
2546
2550
}
2547
2551
2548
2552
// src1 does not support modifier
@@ -2571,6 +2575,8 @@ void HWConformity::fixMULHInst(INST_LIST_ITER& i, G4_BB* bb)
2571
2575
// Here just create tmp variables to fix srcMod, cond modifier, saturate, etc. And Mul->Mul + Macl expanding will
2572
2576
// be done in expandMulPostSchedule pass.
2573
2577
2578
+ bool newInstInserted = false ;
2579
+
2574
2580
// sat cannot be used at all in the macro sequence
2575
2581
// this effectivly means sat is broken for mul D D D
2576
2582
inst->setSaturate (g4::NOSAT);
@@ -2589,30 +2595,32 @@ void HWConformity::fixMULHInst(INST_LIST_ITER& i, G4_BB* bb)
2589
2595
}
2590
2596
2591
2597
INST_LIST_ITER end_iter = i;
2592
- // this mul will be expanded into mul+macl in expandMulPostSchedule pass. Since expanded macl
2593
- // must be grf-aligned, so need to make mul to be grf-aligned.
2598
+ // check if the ACC source is aligned to mach dst
2599
+ // ToDo: this should be checked by fixAcc?
2594
2600
G4_DstRegRegion* dst = inst->getDst ();
2595
2601
if (inst->getSaturate () ||
2596
2602
dst->getExecTypeSize () > TypeSize (Type_D) ||
2597
- isPreAssignedRegOffsetNonZero<G4_DstRegRegion>(dst) ||
2598
- !builder.isOpndAligned (dst, getGRFSize ()))
2603
+ isPreAssignedRegOffsetNonZero<G4_DstRegRegion>(dst))
2599
2604
{
2600
2605
// add a tmp mov
2601
- inst->setDest (insertMovAfter (i, dst, dst->getType (), bb, GRFALIGN ));
2606
+ inst->setDest (insertMovAfter (i, dst, dst->getType (), bb));
2602
2607
end_iter++;
2608
+ newInstInserted = true ;
2603
2609
}
2604
2610
2605
2611
if (execSize > builder.getNativeExecSize ())
2606
2612
{
2607
2613
auto start_iter = i;
2608
- splitDWMULInst (start_iter, end_iter, bb);
2609
- // start_iter points to the first half of mulh. Need double check this new inserted mulh to see if need split again
2610
- i = start_iter;
2614
+ splitDWMULInst (i, end_iter, bb);
2615
+ newInstInserted = true ;
2611
2616
}
2612
- else
2617
+
2618
+ if (newInstInserted)
2613
2619
{
2620
+ // it will decrease back to mulh
2614
2621
i++;
2615
2622
}
2623
+ return newInstInserted;
2616
2624
}
2617
2625
else
2618
2626
{
@@ -2626,7 +2634,7 @@ void HWConformity::fixMULHInst(INST_LIST_ITER& i, G4_BB* bb)
2626
2634
G4_INST* newMul = builder.createBinOp (G4_mul, execSize,
2627
2635
acc_dst_opnd, builder.duplicateOperand (src0), builder.duplicateOperand (src1), inst_opt, false );
2628
2636
2629
- bb->insertBefore (i , newMul);
2637
+ bb->insertBefore (iter , newMul);
2630
2638
inst->copyDefsTo (newMul, false );
2631
2639
2632
2640
fixMulSrc1 (std::prev (i), bb);
@@ -2673,16 +2681,10 @@ void HWConformity::fixMULHInst(INST_LIST_ITER& i, G4_BB* bb)
2673
2681
{
2674
2682
auto start_iter = std::prev (i);
2675
2683
splitDWMULInst (start_iter, end_iter, bb);
2676
- // start_iter ponits to the first half of mul. Need to check the new inserted mul/mach instructions
2677
- i = start_iter;
2678
- }
2679
- else
2680
- {
2681
- // i points to mach, and need to check the new inserted mul before mach
2682
- i = std::prev (i);
2684
+ i = end_iter;
2683
2685
}
2686
+ return true ;
2684
2687
}
2685
- return ;
2686
2688
}
2687
2689
2688
2690
//
@@ -3567,11 +3569,6 @@ void HWConformity::splitDWMULInst(INST_LIST_ITER& start, INST_LIST_ITER& end, G4
3567
3569
evenlySplitInst (iter, bb);
3568
3570
G4_INST* expand_sec_half_op = *iter;
3569
3571
bb->insertBefore (last_iter, expand_sec_half_op);
3570
- // For the case that only one instruction needed to split, that is to say start equals to end
3571
- if (start == end)
3572
- {
3573
- start--;
3574
- }
3575
3572
end--;
3576
3573
bb->erase (iter);
3577
3574
}
@@ -5274,9 +5271,14 @@ void HWConformity::conformBB(G4_BB* bb)
5274
5271
5275
5272
if (inst->opcode () == G4_mulh)
5276
5273
{
5277
- fixMULHInst (i, bb);
5278
- next_iter = i;
5279
- continue ;
5274
+ if (fixMULHInst (i, bb))
5275
+ {
5276
+ // inserted mul before
5277
+ // check the newly added MUL inst
5278
+ i--;
5279
+ next_iter = i;
5280
+ continue ;
5281
+ }
5280
5282
}
5281
5283
5282
5284
#ifdef _DEBUG
@@ -7170,4 +7172,4 @@ void HWConformity::fixSrc1Region(INST_LIST_ITER it, G4_BB* bb)
7170
7172
G4_Operand* new_src1 = insertMovBefore (it, 1 , src1->getType (), bb);
7171
7173
inst->setSrc (new_src1, 1 );
7172
7174
}
7173
- }
7175
+ }
0 commit comments