@@ -5171,31 +5171,48 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod)
5171
5171
if (!comp->opts.MinOpts() && (divisorValue >= 3))
5172
5172
{
5173
5173
size_t magic;
5174
- bool add;
5175
- int shift;
5174
+ bool increment;
5175
+ int preShift;
5176
+ int postShift;
5177
+ bool simpleMul = false;
5176
5178
5177
5179
if (type == TYP_INT)
5178
5180
{
5179
- magic = MagicDivide::GetUnsigned32Magic(static_cast<uint32_t>(divisorValue), &add, &shift);
5181
+ magic =
5182
+ MagicDivide::GetUnsigned32Magic(static_cast<uint32_t>(divisorValue), &increment, &preShift, &postShift);
5183
+
5184
+ #ifdef TARGET_64BIT
5185
+ // avoid inc_saturate/multiple shifts by widening to 32x64 MULHI
5186
+ if (increment || (preShift
5187
+ #ifdef TARGET_XARCH
5188
+ // IMUL reg,reg,imm32 can't be used if magic<0 because of sign-extension
5189
+ && static_cast<int32_t>(magic) < 0
5190
+ #endif
5191
+ ))
5192
+ {
5193
+ magic = MagicDivide::GetUnsigned64Magic(static_cast<uint64_t>(divisorValue), &increment, &preShift,
5194
+ &postShift, 32);
5195
+ }
5196
+ // otherwise just widen to regular multiplication
5197
+ else
5198
+ {
5199
+ postShift += 32;
5200
+ simpleMul = true;
5201
+ }
5202
+ #endif
5180
5203
}
5181
5204
else
5182
5205
{
5183
5206
#ifdef TARGET_64BIT
5184
- magic = MagicDivide::GetUnsigned64Magic(static_cast<uint64_t>(divisorValue), &add, &shift);
5207
+ magic =
5208
+ MagicDivide::GetUnsigned64Magic(static_cast<uint64_t>(divisorValue), &increment, &preShift, &postShift);
5185
5209
#else
5186
5210
unreached();
5187
5211
#endif
5188
5212
}
5189
5213
assert(divMod->MarkedDivideByConstOptimized());
5190
5214
5191
- // Depending on the "add" flag returned by GetUnsignedMagicNumberForDivide we need to generate:
5192
- // add == false (when divisor == 3 for example):
5193
- // div = (dividend MULHI magic) RSZ shift
5194
- // add == true (when divisor == 7 for example):
5195
- // mulhi = dividend MULHI magic
5196
- // div = (((dividend SUB mulhi) RSZ 1) ADD mulhi)) RSZ (shift - 1)
5197
- const bool requiresAdjustment = add;
5198
- const bool requiresDividendMultiuse = requiresAdjustment || !isDiv;
5215
+ const bool requiresDividendMultiuse = !isDiv;
5199
5216
const BasicBlock::weight_t curBBWeight = m_block->getBBWeight(comp);
5200
5217
5201
5218
if (requiresDividendMultiuse)
@@ -5204,62 +5221,107 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod)
5204
5221
dividend = ReplaceWithLclVar(dividendUse);
5205
5222
}
5206
5223
5207
- // Insert a new GT_MULHI node before the existing GT_UDIV/GT_UMOD node.
5208
- // The existing node will later be transformed into a GT_RSZ/GT_SUB that
5209
- // computes the final result. This way don't need to find and change the use
5210
- // of the existing node.
5211
- GenTree* mulhi = comp->gtNewOperNode(GT_MULHI, type, dividend, divisor);
5212
- mulhi->gtFlags |= GTF_UNSIGNED;
5213
- divisor->AsIntCon()->SetIconValue(magic);
5214
- BlockRange().InsertBefore(divMod, mulhi);
5215
- GenTree* firstNode = mulhi;
5224
+ GenTree* firstNode = nullptr;
5225
+ GenTree* adjustedDividend = dividend;
5216
5226
5217
- if (requiresAdjustment)
5227
+ // If "increment" flag is returned by GetUnsignedMagic we need to do Saturating Increment first
5228
+ if (increment)
5218
5229
{
5219
- dividend = comp->gtNewLclvNode(dividend->AsLclVar()->GetLclNum(), dividend->TypeGet());
5220
- GenTree* sub = comp->gtNewOperNode(GT_SUB, type, dividend, mulhi);
5221
- BlockRange().InsertBefore(divMod, dividend, sub);
5222
-
5223
- GenTree* one = comp->gtNewIconNode(1, TYP_INT);
5224
- GenTree* rsz = comp->gtNewOperNode(GT_RSZ, type, sub, one);
5225
- BlockRange().InsertBefore(divMod, one, rsz);
5226
-
5227
- LIR::Use mulhiUse(BlockRange(), &sub->AsOp()->gtOp2, sub);
5228
- mulhi = ReplaceWithLclVar(mulhiUse);
5229
-
5230
- mulhi = comp->gtNewLclvNode(mulhi->AsLclVar()->GetLclNum(), mulhi->TypeGet());
5231
- GenTree* add = comp->gtNewOperNode(GT_ADD, type, rsz, mulhi);
5232
- BlockRange().InsertBefore(divMod, mulhi, add);
5233
-
5234
- mulhi = add;
5235
- shift -= 1;
5230
+ adjustedDividend = comp->gtNewOperNode(GT_INC_SATURATE, type, adjustedDividend);
5231
+ BlockRange().InsertBefore(divMod, adjustedDividend);
5232
+ firstNode = adjustedDividend;
5233
+ assert(!preShift);
5236
5234
}
5235
+ // if "preShift" is required, then do a right shift before
5236
+ else if (preShift)
5237
+ {
5238
+ GenTree* preShiftBy = comp->gtNewIconNode(preShift, TYP_INT);
5239
+ adjustedDividend = comp->gtNewOperNode(GT_RSZ, type, adjustedDividend, preShiftBy);
5240
+ BlockRange().InsertBefore(divMod, preShiftBy, adjustedDividend);
5241
+ firstNode = preShiftBy;
5242
+ }
5243
+ else if (type != TYP_I_IMPL)
5244
+ {
5245
+ adjustedDividend = comp->gtNewCastNode(TYP_I_IMPL, adjustedDividend, true, TYP_U_IMPL);
5246
+ BlockRange().InsertBefore(divMod, adjustedDividend);
5247
+ firstNode = adjustedDividend;
5248
+ }
5249
+
5250
+ #ifdef TARGET_XARCH
5251
+ // force input transformation to RAX because the following MULHI will kill RDX:RAX anyway and LSRA often causes
5252
+ // reduntant copies otherwise
5253
+ if (firstNode && !simpleMul)
5254
+ adjustedDividend->SetRegNum(REG_RAX);
5255
+ #endif
5237
5256
5238
- GenTree* shiftBy = comp->gtNewIconNode(shift, TYP_INT) ;
5239
- BlockRange().InsertBefore(divMod, shiftBy );
5257
+ divisor->gtType = TYP_I_IMPL ;
5258
+ divisor->AsIntCon()->SetIconValue(magic );
5240
5259
5241
- if (isDiv)
5260
+ if (isDiv && !postShift && type == TYP_I_IMPL )
5242
5261
{
5243
- divMod->SetOper(GT_RSZ );
5244
- divMod->gtOp1 = mulhi ;
5245
- divMod->gtOp2 = shiftBy ;
5262
+ divMod->SetOper(GT_MULHI );
5263
+ divMod->gtOp1 = adjustedDividend ;
5264
+ divMod->gtFlags |= GTF_UNSIGNED ;
5246
5265
}
5247
5266
else
5248
5267
{
5249
- GenTree* div = comp->gtNewOperNode(GT_RSZ, type, mulhi, shiftBy);
5268
+ // Insert a new GT_MULHI node before the existing GT_UDIV/GT_UMOD node.
5269
+ // The existing node will later be transformed into a GT_RSZ/GT_SUB that
5270
+ // computes the final result. This way don't need to find and change the use
5271
+ // of the existing node.
5272
+ GenTree* mulhi = comp->gtNewOperNode(simpleMul ? GT_MUL : GT_MULHI, TYP_I_IMPL, adjustedDividend, divisor);
5273
+ mulhi->gtFlags |= GTF_UNSIGNED;
5274
+ BlockRange().InsertBefore(divMod, mulhi);
5275
+ if (!firstNode)
5276
+ firstNode = mulhi;
5277
+
5278
+ if (postShift)
5279
+ {
5280
+ GenTree* shiftBy = comp->gtNewIconNode(postShift, TYP_INT);
5281
+ BlockRange().InsertBefore(divMod, shiftBy);
5250
5282
5251
- // divisor UMOD dividend = dividend SUB (div MUL divisor)
5252
- GenTree* divisor = comp->gtNewIconNode(divisorValue, type);
5253
- GenTree* mul = comp->gtNewOperNode(GT_MUL, type, div, divisor);
5254
- dividend = comp->gtNewLclvNode(dividend->AsLclVar()->GetLclNum(), dividend->TypeGet());
5283
+ if (isDiv && type == TYP_I_IMPL)
5284
+ {
5285
+ divMod->SetOper(GT_RSZ);
5286
+ divMod->gtOp1 = mulhi;
5287
+ divMod->gtOp2 = shiftBy;
5288
+ }
5289
+ else
5290
+ {
5291
+ mulhi = comp->gtNewOperNode(GT_RSZ, TYP_I_IMPL, mulhi, shiftBy);
5292
+ BlockRange().InsertBefore(divMod, mulhi);
5293
+ }
5294
+ }
5295
+
5296
+ if (!isDiv)
5297
+ {
5298
+ // divisor UMOD dividend = dividend SUB (div MUL divisor)
5299
+ GenTree* divisor = comp->gtNewIconNode(divisorValue, type);
5300
+ GenTree* mul = comp->gtNewOperNode(GT_MUL, type, mulhi, divisor);
5301
+ dividend = comp->gtNewLclvNode(dividend->AsLclVar()->GetLclNum(), dividend->TypeGet());
5255
5302
5256
- divMod->SetOper(GT_SUB);
5257
- divMod->gtOp1 = dividend;
5258
- divMod->gtOp2 = mul;
5303
+ divMod->SetOper(GT_SUB);
5304
+ divMod->gtOp1 = dividend;
5305
+ divMod->gtOp2 = mul;
5259
5306
5260
- BlockRange().InsertBefore(divMod, div, divisor, mul, dividend);
5307
+ BlockRange().InsertBefore(divMod, divisor, mul, dividend);
5308
+ }
5309
+ else if (type != TYP_I_IMPL)
5310
+ {
5311
+ #ifdef TARGET_ARMARCH
5312
+ divMod->SetOper(GT_CAST);
5313
+ divMod->gtFlags |= GTF_UNSIGNED;
5314
+ divMod->AsCast()->gtCastType = TYP_UINT;
5315
+ #else
5316
+ divMod->SetOper(GT_BITCAST);
5317
+ #endif
5318
+ divMod->gtOp1 = mulhi;
5319
+ divMod->gtOp2 = nullptr;
5320
+ }
5261
5321
}
5262
- ContainCheckRange(firstNode, divMod);
5322
+
5323
+ if (firstNode)
5324
+ ContainCheckRange(firstNode, divMod);
5263
5325
return true;
5264
5326
}
5265
5327
#endif
0 commit comments