Skip to content

Commit 6d3b842

Browse files
Add pattern matching for SVE intrinsics that operate on mask operands (#114438)
* Add pattern matching for SVE intrinsics that operate on mask operands Introduces `fgMorphTryUseAllMaskVariant` for ARM64 that looks for various named intrinsics that have operands that look 'mask-like'. E.g. source operands originating from Sve.CreateTrueMask* may be recognized as masks, causing the JIT to prefer to use the predicated version of the instruction as codegen for the intrinsic. It will also inspect ConditionalSelect intrinsic nodes to match instructions with governing predicates. The transform runs during morph. It's possible to emit the following instructions after this patch: * ZIP{1,2} <Pd>.<T>, <Pn>.<T>, <Pm>.<T> (Sve.ZipLow, Sve.ZipHigh) * UZP{1,2} <Pd>.<T>, <Pn>.<T>, <Pm>.<T> (Sve.UnzipEven, Sve.UnzipOdd) * TRN{1,2} <Pd>.<T>, <Pn>.<T>, <Pm>.<T> (Sve.TransposeEven, Sve.TransposeOdd) * REV <Pd>.<T>, <Pn>.<T> (Sve.ReverseElement) * AND <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B (Sve.And) * BIC <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B (Sve.BitwiseClear) * EOR <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B (Sve.Xor) * ORR <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B (Sve.Or) * SEL <Pd>.B, <Pg>, <Pn>.B, <Pm>.B (Sve.ConditionalSelect) Contributes towards #101970 * Fix test failure and add FileCheck tests * Don't run tests on OSX * Don't run tests for Mono * Move the transform later in fgOptimizeHWIntrinsic * Rename gtNewSimdAllFalseMaskNode * Re-design using HW_Flag_AllMaskVariant * Add missing function documentation in hwintrinsic.h * Fix integer comparison and add assertion * Refactor to follow similar path to XARCH * fix the refactoring * jit formatting * Move code into morph.cpp --------- Co-authored-by: Kunal Pathak <[email protected]>
1 parent dd33442 commit 6d3b842

9 files changed

+564
-156
lines changed

src/coreclr/jit/compiler.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3132,6 +3132,7 @@ class Compiler
31323132

31333133
#if defined(TARGET_ARM64)
31343134
GenTree* gtNewSimdAllTrueMaskNode(CorInfoType simdBaseJitType, unsigned simdSize);
3135+
GenTree* gtNewSimdFalseMaskByteNode(unsigned simdSize);
31353136
#endif
31363137

31373138
GenTree* gtNewSimdBinOpNode(genTreeOps op,
@@ -6691,6 +6692,15 @@ class Compiler
66916692
GenTree* fgMorphHWIntrinsic(GenTreeHWIntrinsic* tree);
66926693
GenTree* fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node);
66936694
GenTree* fgOptimizeHWIntrinsicAssociative(GenTreeHWIntrinsic* node);
6695+
#if defined(FEATURE_MASKED_HW_INTRINSICS)
6696+
GenTreeHWIntrinsic* fgOptimizeForMaskedIntrinsic(GenTreeHWIntrinsic* node);
6697+
#endif // FEATURE_MASKED_HW_INTRINSICS
6698+
#ifdef TARGET_ARM64
6699+
bool canMorphVectorOperandToMask(GenTree* node);
6700+
bool canMorphAllVectorOperandsToMasks(GenTreeHWIntrinsic* node);
6701+
GenTree* doMorphVectorOperandToMask(GenTree* node, GenTreeHWIntrinsic* parent);
6702+
GenTreeHWIntrinsic* fgMorphTryUseAllMaskVariant(GenTreeHWIntrinsic* node);
6703+
#endif // TARGET_ARM64
66946704
#endif // FEATURE_HW_INTRINSICS
66956705
GenTree* fgOptimizeCommutativeArithmetic(GenTreeOp* tree);
66966706
GenTree* fgOptimizeRelationalComparisonWithCasts(GenTreeOp* cmp);

src/coreclr/jit/hwintrinsic.h

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,11 @@ enum HWIntrinsicFlag : unsigned int
233233
// The intrinsic is a reduce operation.
234234
HW_Flag_ReduceOperation = 0x2000000,
235235

236+
// This intrinsic could be implemented with another intrinsic when it is operating on operands that are all of
237+
// type TYP_MASK, and this other intrinsic will produces a value of this type. Used in morph to convert vector
238+
// operations into mask operations when the intrinsic is operating on mask vectors (mainly bitwise operations).
239+
HW_Flag_HasAllMaskVariant = 0x4000000,
240+
236241
#else
237242
#error Unsupported platform
238243
#endif
@@ -1133,6 +1138,67 @@ struct HWIntrinsicInfo
11331138
}
11341139
}
11351140

1141+
#ifdef FEATURE_MASKED_HW_INTRINSICS
1142+
// HasAllMaskVariant: Does the intrinsic have an intrinsic variant that operates on mask types?
1143+
//
1144+
// Arguments:
1145+
// id -- the intrinsic to check for a mask-type variant.
1146+
//
1147+
// Return Value:
1148+
// true when the intrinsic has a mask-type variant, else false
1149+
//
1150+
static bool HasAllMaskVariant(NamedIntrinsic id)
1151+
{
1152+
const HWIntrinsicFlag flags = lookupFlags(id);
1153+
return (flags & HW_Flag_HasAllMaskVariant) != 0;
1154+
}
1155+
1156+
// GetMaskVariant: Given an intrinsic that has a variant that operates on mask types, return the ID of
1157+
// this variant intrinsic. Call HasAllMaskVariant before using this function, as it will
1158+
// assert if no match is found.
1159+
//
1160+
// Arguments:
1161+
// id -- the intrinsic with a mask-type variant.
1162+
//
1163+
// Return Value:
1164+
// The ID of the mask-type variant for the given intrinsic
1165+
//
1166+
static NamedIntrinsic GetMaskVariant(NamedIntrinsic id)
1167+
{
1168+
assert(HasAllMaskVariant(id));
1169+
switch (id)
1170+
{
1171+
case NI_Sve_And:
1172+
return NI_Sve_And_Predicates;
1173+
case NI_Sve_BitwiseClear:
1174+
return NI_Sve_BitwiseClear_Predicates;
1175+
case NI_Sve_Xor:
1176+
return NI_Sve_Xor_Predicates;
1177+
case NI_Sve_Or:
1178+
return NI_Sve_Or_Predicates;
1179+
case NI_Sve_ZipHigh:
1180+
return NI_Sve_ZipHigh_Predicates;
1181+
case NI_Sve_ZipLow:
1182+
return NI_Sve_ZipLow_Predicates;
1183+
case NI_Sve_UnzipOdd:
1184+
return NI_Sve_UnzipOdd_Predicates;
1185+
case NI_Sve_UnzipEven:
1186+
return NI_Sve_UnzipEven_Predicates;
1187+
case NI_Sve_TransposeEven:
1188+
return NI_Sve_TransposeEven_Predicates;
1189+
case NI_Sve_TransposeOdd:
1190+
return NI_Sve_TransposeOdd_Predicates;
1191+
case NI_Sve_ReverseElement:
1192+
return NI_Sve_ReverseElement_Predicates;
1193+
case NI_Sve_ConditionalSelect:
1194+
return NI_Sve_ConditionalSelect_Predicates;
1195+
1196+
default:
1197+
unreached();
1198+
}
1199+
}
1200+
#endif // FEATURE_MASKED_HW_INTRINSICS
1201+
11361202
#endif // TARGET_ARM64
11371203

11381204
static bool HasSpecialSideEffect(NamedIntrinsic id)

src/coreclr/jit/hwintrinsicarm64.cpp

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3341,7 +3341,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
33413341
}
33423342

33433343
//------------------------------------------------------------------------
3344-
// gtNewSimdEmbeddedMaskNode: Create an embedded mask
3344+
// gtNewSimdAllTrueMaskNode: Create an embedded mask with all bits set to true
33453345
//
33463346
// Arguments:
33473347
// simdBaseJitType -- the base jit type of the nodes being masked
@@ -3355,4 +3355,18 @@ GenTree* Compiler::gtNewSimdAllTrueMaskNode(CorInfoType simdBaseJitType, unsigne
33553355
return gtNewSimdHWIntrinsicNode(TYP_MASK, NI_Sve_CreateTrueMaskAll, simdBaseJitType, simdSize);
33563356
}
33573357

3358+
//------------------------------------------------------------------------
3359+
// gtNewSimdFalseMaskByteNode: Create an embedded mask with all bits set to false
3360+
//
3361+
// Arguments:
3362+
// simdSize -- the simd size of the nodes being masked
3363+
//
3364+
// Return Value:
3365+
// The mask
3366+
//
3367+
GenTree* Compiler::gtNewSimdFalseMaskByteNode(unsigned simdSize)
3368+
{
3369+
return gtNewSimdHWIntrinsicNode(TYP_MASK, NI_Sve_CreateFalseMaskByte, CORINFO_TYPE_UBYTE, simdSize);
3370+
}
3371+
33583372
#endif // FEATURE_HW_INTRINSICS

src/coreclr/jit/hwintrinsiccodegenarm64.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -702,6 +702,14 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
702702
break;
703703
}
704704

705+
case NI_Sve_And_Predicates:
706+
case NI_Sve_BitwiseClear_Predicates:
707+
case NI_Sve_Or_Predicates:
708+
case NI_Sve_Xor_Predicates:
709+
GetEmitter()->emitIns_R_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp1Reg,
710+
embMaskOp2Reg, INS_OPTS_SCALABLE_B);
711+
break;
712+
705713
default:
706714
{
707715
GetEmitter()->emitIns_R_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp1Reg,
@@ -2478,6 +2486,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
24782486

24792487
case NI_Sve_CreateBreakAfterPropagateMask:
24802488
case NI_Sve_CreateBreakBeforePropagateMask:
2489+
case NI_Sve_ConditionalSelect_Predicates:
24812490
{
24822491
GetEmitter()->emitInsSve_R_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, op3Reg, INS_OPTS_SCALABLE_B);
24832492
break;

0 commit comments

Comments
 (0)