Skip to content

Commit fcdb6db

Browse files
Centralize the folding logic for ConditionalSelect and ensure side effects aren't dropped (#104175)
* Centralize the folding logic for ConditionalSelect and ensure side effects aren't dropped * Ensure CndSel handles 64-bit operands where possible * Don't fold if op3 has side effects * Ensure that operands are passed into the lowered not->ternarylogic correctly
1 parent a7709e5 commit fcdb6db

File tree

6 files changed

+406
-193
lines changed

6 files changed

+406
-193
lines changed

src/coreclr/jit/gentree.cpp

Lines changed: 77 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -23479,18 +23479,14 @@ GenTree* Compiler::gtNewSimdCndSelNode(
2347923479
NamedIntrinsic intrinsic = NI_Illegal;
2348023480

2348123481
#if defined(TARGET_XARCH)
23482-
assert((simdSize != 32) || compIsaSupportedDebugOnly(InstructionSet_AVX));
23483-
23484-
if (canUseEvexEncoding())
23482+
if (simdSize == 64)
2348523483
{
23486-
GenTree* control = gtNewIconNode(0xCA); // (B & A) | (C & ~A)
23487-
return gtNewSimdTernaryLogicNode(type, op1, op2, op3, control, simdBaseJitType, simdSize);
23484+
assert(canUseEvexEncodingDebugOnly());
23485+
intrinsic = NI_Vector512_ConditionalSelect;
2348823486
}
23489-
23490-
assert(simdSize != 64);
23491-
23492-
if (simdSize == 32)
23487+
else if (simdSize == 32)
2349323488
{
23489+
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
2349423490
intrinsic = NI_Vector256_ConditionalSelect;
2349523491
}
2349623492
else
@@ -26606,45 +26602,19 @@ GenTree* Compiler::gtNewSimdUnOpNode(
2660626602
{
2660726603
if (simdSize == 64)
2660826604
{
26609-
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F));
26610-
26611-
if (genTypeSize(simdBaseType) >= 4)
26612-
{
26613-
intrinsic = NI_AVX512F_TernaryLogic;
26614-
}
26615-
}
26616-
else
26617-
{
26618-
if (simdSize == 32)
26619-
{
26620-
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
26621-
}
26622-
if ((genTypeSize(simdBaseType) >= 4) && compOpportunisticallyDependsOn(InstructionSet_AVX10v1))
26623-
{
26624-
intrinsic = NI_AVX10v1_TernaryLogic;
26625-
}
26626-
else if ((genTypeSize(simdBaseType) >= 4) && compOpportunisticallyDependsOn(InstructionSet_AVX512F_VL))
26627-
{
26628-
intrinsic = NI_AVX512F_VL_TernaryLogic;
26629-
}
26605+
assert(canUseEvexEncodingDebugOnly());
26606+
intrinsic = NI_Vector512_op_OnesComplement;
2663026607
}
26631-
26632-
if (intrinsic != NI_Illegal)
26608+
else if (simdSize == 32)
2663326609
{
26634-
// AVX512 allows performing `not` without requiring a memory access
26635-
assert(canUseEvexEncodingDebugOnly());
26636-
26637-
op2 = gtNewZeroConNode(type);
26638-
op3 = gtNewZeroConNode(type);
26639-
26640-
GenTree* cns = gtNewIconNode(static_cast<uint8_t>(~0xAA)); // ~C
26641-
return gtNewSimdTernaryLogicNode(type, op3, op2, op1, cns, simdBaseJitType, simdSize);
26610+
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
26611+
intrinsic = NI_Vector256_op_OnesComplement;
2664226612
}
2664326613
else
2664426614
{
26645-
op2 = gtNewAllBitsSetConNode(type);
26646-
return gtNewSimdBinOpNode(GT_XOR, type, op1, op2, simdBaseJitType, simdSize);
26615+
intrinsic = NI_Vector128_op_OnesComplement;
2664726616
}
26617+
return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize);
2664826618
}
2664926619
#elif defined(TARGET_ARM64)
2665026620
case GT_NEG:
@@ -28194,12 +28164,16 @@ genTreeOps GenTreeHWIntrinsic::HWOperGet(bool* isScalar) const
2819428164
return GT_AND;
2819528165
}
2819628166

28197-
#if defined(TARGET_ARM64)
28167+
#if defined(TARGET_XARCH)
28168+
case NI_Vector128_op_OnesComplement:
28169+
case NI_Vector256_op_OnesComplement:
28170+
case NI_Vector512_op_OnesComplement:
28171+
#elif defined(TARGET_ARM64)
2819828172
case NI_AdvSimd_Not:
28173+
#endif
2819928174
{
2820028175
return GT_NOT;
2820128176
}
28202-
#endif
2820328177

2820428178
#if defined(TARGET_XARCH)
2820528179
case NI_SSE_Xor:
@@ -30556,6 +30530,65 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
3055630530
}
3055730531
}
3055830532

30533+
if (op3 != nullptr)
30534+
{
30535+
switch (ni)
30536+
{
30537+
case NI_Vector128_ConditionalSelect:
30538+
#if defined(TARGET_XARCH)
30539+
case NI_Vector256_ConditionalSelect:
30540+
case NI_Vector512_ConditionalSelect:
30541+
#elif defined(TARGET_ARM64)
30542+
case NI_Vector64_ConditionalSelect:
30543+
case NI_Sve_ConditionalSelect:
30544+
#endif
30545+
{
30546+
if (cnsNode != op1)
30547+
{
30548+
break;
30549+
}
30550+
30551+
if (op1->IsVectorAllBitsSet() || op1->IsMaskAllBitsSet())
30552+
{
30553+
if ((op3->gtFlags & GTF_SIDE_EFFECT) != 0)
30554+
{
30555+
// op3 has side effects, this would require us to append a new statement
30556+
// to ensure that it isn't lost, which isn't safe to do from the general
30557+
// purpose handler here. We'll recognize this and mark it in VN instead
30558+
}
30559+
30560+
// op3 has no side effects, so we can return op2 directly
30561+
return op2;
30562+
}
30563+
30564+
if (op1->IsVectorZero())
30565+
{
30566+
return gtWrapWithSideEffects(op3, op2, GTF_ALL_EFFECT);
30567+
}
30568+
30569+
if (op2->IsCnsVec() && op3->IsCnsVec())
30570+
{
30571+
// op2 = op2 & op1
30572+
op2->AsVecCon()->EvaluateBinaryInPlace(GT_AND, false, simdBaseType, op1->AsVecCon());
30573+
30574+
// op3 = op2 & ~op1
30575+
op3->AsVecCon()->EvaluateBinaryInPlace(GT_AND_NOT, false, simdBaseType, op1->AsVecCon());
30576+
30577+
// op2 = op2 | op3
30578+
op2->AsVecCon()->EvaluateBinaryInPlace(GT_OR, false, simdBaseType, op3->AsVecCon());
30579+
30580+
resultNode = op2;
30581+
}
30582+
break;
30583+
}
30584+
30585+
default:
30586+
{
30587+
break;
30588+
}
30589+
}
30590+
}
30591+
3055930592
if (resultNode != tree)
3056030593
{
3056130594
if (fgGlobalMorph)

src/coreclr/jit/hwintrinsic.cpp

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1911,19 +1911,6 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
19111911

19121912
switch (intrinsic)
19131913
{
1914-
case NI_Sve_ConditionalSelect:
1915-
{
1916-
if (op1->IsVectorAllBitsSet() || op1->IsMaskAllBitsSet())
1917-
{
1918-
return retNode->AsHWIntrinsic()->Op(2);
1919-
}
1920-
else if (op1->IsVectorZero())
1921-
{
1922-
return retNode->AsHWIntrinsic()->Op(3);
1923-
}
1924-
break;
1925-
}
1926-
19271914
case NI_Sve_CreateMaskForFirstActiveElement:
19281915
case NI_Sve_CreateMaskForNextActiveElement:
19291916
case NI_Sve_GetActiveElementCount:

src/coreclr/jit/hwintrinsiclistxarch.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ HARDWARE_INTRINSIC(Vector128, op_ExclusiveOr,
119119
HARDWARE_INTRINSIC(Vector128, op_Inequality, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp)
120120
HARDWARE_INTRINSIC(Vector128, op_LeftShift, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
121121
HARDWARE_INTRINSIC(Vector128, op_Multiply, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
122-
HARDWARE_INTRINSIC(Vector128, op_OnesComplement, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
122+
HARDWARE_INTRINSIC(Vector128, op_OnesComplement, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
123123
HARDWARE_INTRINSIC(Vector128, op_RightShift, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
124124
HARDWARE_INTRINSIC(Vector128, op_Subtraction, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
125125
HARDWARE_INTRINSIC(Vector128, op_UnaryNegation, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
@@ -221,7 +221,7 @@ HARDWARE_INTRINSIC(Vector256, op_ExclusiveOr,
221221
HARDWARE_INTRINSIC(Vector256, op_Inequality, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp)
222222
HARDWARE_INTRINSIC(Vector256, op_LeftShift, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
223223
HARDWARE_INTRINSIC(Vector256, op_Multiply, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
224-
HARDWARE_INTRINSIC(Vector256, op_OnesComplement, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible)
224+
HARDWARE_INTRINSIC(Vector256, op_OnesComplement, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible)
225225
HARDWARE_INTRINSIC(Vector256, op_RightShift, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
226226
HARDWARE_INTRINSIC(Vector256, op_Subtraction, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
227227
HARDWARE_INTRINSIC(Vector256, op_UnaryNegation, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
@@ -321,7 +321,7 @@ HARDWARE_INTRINSIC(Vector512, op_ExclusiveOr,
321321
HARDWARE_INTRINSIC(Vector512, op_Inequality, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp)
322322
HARDWARE_INTRINSIC(Vector512, op_LeftShift, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
323323
HARDWARE_INTRINSIC(Vector512, op_Multiply, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
324-
HARDWARE_INTRINSIC(Vector512, op_OnesComplement, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
324+
HARDWARE_INTRINSIC(Vector512, op_OnesComplement, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
325325
HARDWARE_INTRINSIC(Vector512, op_RightShift, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
326326
HARDWARE_INTRINSIC(Vector512, op_Subtraction, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
327327
HARDWARE_INTRINSIC(Vector512, op_UnaryNegation, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)

0 commit comments

Comments
 (0)