From c5922a126b6f1a55484269f456cbe94740829039 Mon Sep 17 00:00:00 2001 From: Sebastian Nickolls Date: Tue, 25 Mar 2025 10:23:37 +0000 Subject: [PATCH 01/13] Add pattern matching for SVE intrinsics that operate on mask operands Introduces `fgMorphTryUseAllMaskVariant` for ARM64 that looks for various named intrinsics that have operands that look 'mask-like'. E.g. source operands originating from Sve.CreateTrueMask* may be recognized as masks, causing the JIT to prefer to use the predicated version of the instruction as codegen for the intrinsic. It will also inspect ConditionalSelect intrinsic nodes to match instructions with governing predicates. The transform runs during morph. It's possible to emit the following instructions after this patch: * ZIP{1,2} ., ., . (Sve.ZipLow, Sve.ZipHigh) * UZP{1,2} ., ., . (Sve.UnzipEven, Sve.UnzipOdd) * TRN{1,2} ., ., . (Sve.TransposeEven, Sve.TransposeOdd) * REV ., . (Sve.ReverseElement) * AND .B, /Z, .B, .B (Sve.And) * BIC .B, /Z, .B, .B (Sve.BitwiseClear) * EOR .B, /Z, .B, .B (Sve.Xor) * ORR .B, /Z, .B, .B (Sve.Or) * SEL .B, , .B, .B (Sve.ConditionalSelect) Contributes towards #101970 --- src/coreclr/jit/CMakeLists.txt | 1 + src/coreclr/jit/compiler.h | 7 + src/coreclr/jit/gentree.cpp | 19 ++- src/coreclr/jit/gentree.h | 4 + src/coreclr/jit/hwintrinsicarm64.cpp | 16 ++- src/coreclr/jit/morph.cpp | 9 ++ src/coreclr/jit/morpharm64.cpp | 206 +++++++++++++++++++++++++++ 7 files changed, 260 insertions(+), 2 deletions(-) create mode 100644 src/coreclr/jit/morpharm64.cpp diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt index 288edf637a6dd4..37108f6b5c5be8 100644 --- a/src/coreclr/jit/CMakeLists.txt +++ b/src/coreclr/jit/CMakeLists.txt @@ -244,6 +244,7 @@ set( JIT_ARM64_SOURCES unwindarm64.cpp hwintrinsicarm64.cpp hwintrinsiccodegenarm64.cpp + morpharm64.cpp ) set( JIT_ARMV6_SOURCES diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index ea7f52fb609ced..25d5bfd44206f8 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3130,6 +3130,7 @@ class Compiler #if defined(TARGET_ARM64) GenTree* gtNewSimdAllTrueMaskNode(CorInfoType simdBaseJitType, unsigned simdSize); + GenTree* gtNewSimdAllFalseMaskNode(unsigned simdSize); #endif GenTree* gtNewSimdBinOpNode(genTreeOps op, @@ -6683,6 +6684,12 @@ class Compiler GenTree* fgMorphHWIntrinsic(GenTreeHWIntrinsic* tree); GenTree* fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node); GenTree* fgOptimizeHWIntrinsicAssociative(GenTreeHWIntrinsic* node); +#ifdef TARGET_ARM64 + bool canMorphVectorOperandToMask(GenTree* node); + bool canMorphAllVectorOperandsToMasks(GenTreeHWIntrinsic* node); + GenTree* doMorphVectorOperandToMask(GenTree* node, GenTreeHWIntrinsic* parent); + GenTree* fgMorphTryUseAllMaskVariant(GenTreeHWIntrinsic* node); +#endif // TARGET_ARM64 #endif // FEATURE_HW_INTRINSICS GenTree* fgOptimizeCommutativeArithmetic(GenTreeOp* tree); GenTree* fgOptimizeRelationalComparisonWithCasts(GenTreeOp* cmp); diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 5bf5fb170af2f1..b5977d5c4cfc21 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -20569,7 +20569,24 @@ bool GenTree::isRMWHWIntrinsic(Compiler* comp) } } #elif defined(TARGET_ARM64) - return HWIntrinsicInfo::HasRMWSemantics(AsHWIntrinsic()->GetHWIntrinsicId()); + NamedIntrinsic id = AsHWIntrinsic()->GetHWIntrinsicId(); + switch (id) + { + case NI_Sve_And: + case NI_Sve_BitwiseClear: + case NI_Sve_Xor: + case NI_Sve_Or: + // Mask variant is not RMW, but the vector variant is. + if (varTypeIsMask(this)) + { + assert(AsHWIntrinsic()->GetOperandCount() == 3); + return false; + } + break; + default: + break; + } + return HWIntrinsicInfo::HasRMWSemantics(id); #else return false; #endif diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index f3fb94b09429e8..ff44a900e54758 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -6617,6 +6617,10 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic } } +#if defined(TARGET_ARM64) && defined(FEATURE_MASKED_HW_INTRINSICS) + bool HasAllMaskVariant(); +#endif + private: void SetHWIntrinsicId(NamedIntrinsic intrinsicId); diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index a9c50c029cc22f..c09ccee85b1ceb 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -3248,7 +3248,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } //------------------------------------------------------------------------ -// gtNewSimdEmbeddedMaskNode: Create an embedded mask +// gtNewSimdAllTrueMaskNode: Create an embedded mask with all bits set to true // // Arguments: // simdBaseJitType -- the base jit type of the nodes being masked @@ -3262,4 +3262,18 @@ GenTree* Compiler::gtNewSimdAllTrueMaskNode(CorInfoType simdBaseJitType, unsigne return gtNewSimdHWIntrinsicNode(TYP_MASK, NI_Sve_CreateTrueMaskAll, simdBaseJitType, simdSize); } +//------------------------------------------------------------------------ +// gtNewSimdAllFalseMaskNode: Create an embedded mask with all bits set to false +// +// Arguments: +// simdSize -- the simd size of the nodes being masked +// +// Return Value: +// The mask +// +GenTree* Compiler::gtNewSimdAllFalseMaskNode(unsigned simdSize) +{ + return gtNewSimdHWIntrinsicNode(TYP_MASK, NI_Sve_CreateFalseMaskByte, CORINFO_TYPE_BYTE, simdSize); +} + #endif // FEATURE_HW_INTRINSICS diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 79b9d2743c9375..f57bf948078b0a 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -9218,6 +9218,15 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node) } } +#ifdef TARGET_ARM64 + optimizedTree = fgMorphTryUseAllMaskVariant(node); + if (optimizedTree != nullptr) + { + optimizedTree->SetMorphed(this); + return optimizedTree; + } +#endif + NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); var_types retType = node->TypeGet(); CorInfoType simdBaseJitType = node->GetSimdBaseJitType(); diff --git a/src/coreclr/jit/morpharm64.cpp b/src/coreclr/jit/morpharm64.cpp new file mode 100644 index 00000000000000..cc7fc42883f313 --- /dev/null +++ b/src/coreclr/jit/morpharm64.cpp @@ -0,0 +1,206 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX Arm64 Specific Morph XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ + +#include "jitpch.h" +#ifdef _MSC_VER +#pragma hdrstop +#endif + +#ifdef FEATURE_MASKED_HW_INTRINSICS + +//------------------------------------------------------------------------ +// HasAllMaskVariant: Does this intrinsic have a variant where all of it's operands +// are mask types? +// +// Return Value: +// true if an all-mask variant exists for the intrinsic, else false. +// +bool GenTreeHWIntrinsic::HasAllMaskVariant() +{ + switch (GetHWIntrinsicId()) + { + // ZIP1 ., ., . + // ZIP2 ., ., . + // UZP1 ., ., . + // UZP2 ., ., . + // TRN1 ., ., . + // TRN2 ., ., . + // REV ., . + case NI_Sve_ZipHigh: + case NI_Sve_ZipLow: + case NI_Sve_UnzipOdd: + case NI_Sve_UnzipEven: + case NI_Sve_TransposeEven: + case NI_Sve_TransposeOdd: + case NI_Sve_ReverseElement: + return true; + + default: + return false; + } +} + +//------------------------------------------------------------------------ +// canMorphVectorOperandToMask: Can this vector operand be converted to a +// node with type TYP_MASK easily? +// +bool Compiler::canMorphVectorOperandToMask(GenTree* node) +{ + return varTypeIsMask(node) || node->OperIsConvertMaskToVector() || node->IsVectorZero(); +} + +//------------------------------------------------------------------------ +// canMorphAllVectorOperandsToMasks: Can all vector operands to this node +// be converted to a node with type +// TYP_MASK easily? +// +bool Compiler::canMorphAllVectorOperandsToMasks(GenTreeHWIntrinsic* node) +{ + bool allMaskConversions = true; + for (size_t i = 1; i <= node->GetOperandCount() && allMaskConversions; i++) + { + allMaskConversions &= canMorphVectorOperandToMask(node->Op(i)); + } + + return allMaskConversions; +} + +//------------------------------------------------------------------------ +// doMorphVectorOperandToMask: Morph a vector node that is close to a mask +// node into a mask node. +// +// Return value: +// The morphed tree, or nullptr if the transform is not applicable. +// +GenTree* Compiler::doMorphVectorOperandToMask(GenTree* node, GenTreeHWIntrinsic* parent) +{ + if (varTypeIsMask(node)) + { + // Already a mask, nothing to do. + return node; + } + else if (node->OperIsConvertMaskToVector()) + { + // Replace node with op1. + return node->AsHWIntrinsic()->Op(1); + } + else if (node->IsVectorZero()) + { + // Morph the vector of zeroes into mask of zeroes. + GenTree* mask = gtNewSimdAllFalseMaskNode(parent->GetSimdSize()); + mask->SetMorphed(this); + return mask; + } + + return nullptr; +} + +//----------------------------------------------------------------------------------------------------- +// fgMorphTryUseAllMaskVariant: For NamedIntrinsics that have a variant where all operands are +// mask nodes. If all operands to this node are 'suggesting' that they +// originate closely from a mask, but are of vector types, then morph the +// operands as appropriate to use mask types instead. 'Suggesting' +// is defined by the canMorphVectorOperandToMask function. +// +// Arguments: +// tree - The HWIntrinsic to try and optimize. +// +// Return Value: +// The fully morphed tree if a change was made, else nullptr. +// +GenTree* Compiler::fgMorphTryUseAllMaskVariant(GenTreeHWIntrinsic* node) +{ + if (node->HasAllMaskVariant() && canMorphAllVectorOperandsToMasks(node)) + { + for (size_t i = 1; i <= node->GetOperandCount(); i++) + { + node->Op(i) = doMorphVectorOperandToMask(node->Op(i), node); + } + + node->gtType = TYP_MASK; + return node; + } + + if (node->OperIsHWIntrinsic(NI_Sve_ConditionalSelect)) + { + GenTree* mask = node->Op(1); + GenTree* left = node->Op(2); + GenTree* right = node->Op(3); + + if (left->OperIsHWIntrinsic()) + { + assert(canMorphVectorOperandToMask(mask)); + + if (canMorphAllVectorOperandsToMasks(left->AsHWIntrinsic())) + { + // At this point we know the 'left' node is a HWINTRINSIC node and all of its operands look like + // mask nodes. + // + // The ConditionalSelect could be substituted for the named intrinsic in it's 'left' operand and + // transformed to a mask-type operation for some named intrinsics. Doing so will encourage codegen + // to emit predicate variants of instructions rather than vector variants, and we can lose some + // unnecessary mask->vector conversion nodes. + GenTreeHWIntrinsic* actualOp = left->AsHWIntrinsic(); + + switch (actualOp->GetHWIntrinsicId()) + { + // AND .B, /Z, .B, .B + // BIC .B, /Z, .B, .B + // EOR .B, /Z, .B, .B + // ORR .B, /Z, .B, .B + case NI_Sve_And: + case NI_Sve_BitwiseClear: + case NI_Sve_Xor: + case NI_Sve_Or: + if (right->IsVectorZero()) + { + // The operation is equivalent for all lane arrangements, because it is a bitwise operation. + // It's safe to bash the type to 8-bit required to assemble the instruction. + actualOp->SetSimdBaseJitType(CORINFO_TYPE_BYTE); + + actualOp->ResetHWIntrinsicId(actualOp->GetHWIntrinsicId(), this, + doMorphVectorOperandToMask(mask, actualOp), + doMorphVectorOperandToMask(actualOp->Op(1), actualOp), + doMorphVectorOperandToMask(actualOp->Op(2), actualOp)); + actualOp->gtType = TYP_MASK; + return actualOp; + } + break; + default: + break; + } + } + } + + // If we got this far, then there was no match on any predicated operation. + // ConditionalSelect itself can be a mask operation for 8-bit lane types, using + // SEL .B, , .B, .B + if (canMorphAllVectorOperandsToMasks(node)) + { + for (size_t i = 1; i <= node->GetOperandCount(); i++) + { + node->Op(i) = doMorphVectorOperandToMask(node->Op(i), node); + } + + // Again this operation is bitwise, so the lane arrangement doesn't matter. + // We can bash the type to 8-bit. + node->SetSimdBaseJitType(CORINFO_TYPE_BYTE); + + node->gtType = TYP_MASK; + return node; + } + } + + return nullptr; +} + +#endif // FEATURE_MASKED_HW_INTRINSICS From 3ee02308f2bfebf2d1f6cc84033ffa6607dccbd3 Mon Sep 17 00:00:00 2001 From: Sebastian Nickolls Date: Mon, 14 Apr 2025 12:59:19 +0000 Subject: [PATCH 02/13] Fix test failure and add FileCheck tests --- src/coreclr/jit/hwintrinsicarm64.cpp | 2 +- src/tests/JIT/opt/SVE/Directory.Build.props | 8 ++ .../JIT/opt/SVE/PredicateInstructions.cs | 135 ++++++++++++++++++ .../JIT/opt/SVE/PredicateInstructions.csproj | 15 ++ 4 files changed, 159 insertions(+), 1 deletion(-) create mode 100644 src/tests/JIT/opt/SVE/Directory.Build.props create mode 100644 src/tests/JIT/opt/SVE/PredicateInstructions.cs create mode 100644 src/tests/JIT/opt/SVE/PredicateInstructions.csproj diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index c09ccee85b1ceb..60fb311a29324c 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -3273,7 +3273,7 @@ GenTree* Compiler::gtNewSimdAllTrueMaskNode(CorInfoType simdBaseJitType, unsigne // GenTree* Compiler::gtNewSimdAllFalseMaskNode(unsigned simdSize) { - return gtNewSimdHWIntrinsicNode(TYP_MASK, NI_Sve_CreateFalseMaskByte, CORINFO_TYPE_BYTE, simdSize); + return gtNewSimdHWIntrinsicNode(TYP_MASK, NI_Sve_CreateFalseMaskByte, CORINFO_TYPE_UBYTE, simdSize); } #endif // FEATURE_HW_INTRINSICS diff --git a/src/tests/JIT/opt/SVE/Directory.Build.props b/src/tests/JIT/opt/SVE/Directory.Build.props new file mode 100644 index 00000000000000..a5abe9c2f0b119 --- /dev/null +++ b/src/tests/JIT/opt/SVE/Directory.Build.props @@ -0,0 +1,8 @@ + + + + + $(NoWarn);SYSLIB5003 + true + + \ No newline at end of file diff --git a/src/tests/JIT/opt/SVE/PredicateInstructions.cs b/src/tests/JIT/opt/SVE/PredicateInstructions.cs new file mode 100644 index 00000000000000..6d460adbf4beb1 --- /dev/null +++ b/src/tests/JIT/opt/SVE/PredicateInstructions.cs @@ -0,0 +1,135 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Numerics; +using System.Collections.Generic; +using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using Xunit; + +public class PredicateInstructions +{ + [MethodImpl(MethodImplOptions.NoInlining)] + [Fact] + public static void TestPredicateInstructions() + { + ZipLow(); + ZipHigh(); + UnzipOdd(); + UnzipEven(); + TransposeOdd(); + TransposeEven(); + ReverseElement(); + And(); + BitwiseClear(); + Xor(); + Or(); + ConditionalSelect(); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector ZipLow() + { + //ARM64-FULL-LINE: zip1 {{p[0-9]+}}.h, {{p[0-9]+}}.h, {{p[0-9]+}}.h + return Sve.ZipLow(Vector.Zero, Sve.CreateTrueMaskInt16()); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector ZipHigh() + { + //ARM64-FULL-LINE: zip2 {{p[0-9]+}}.s, {{p[0-9]+}}.s, {{p[0-9]+}}.s + return Sve.ZipHigh(Sve.CreateTrueMaskUInt32(), Sve.CreateTrueMaskUInt32()); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector UnzipEven() + { + //ARM64-FULL-LINE: uzp1 {{p[0-9]+}}.b, {{p[0-9]+}}.b, {{p[0-9]+}}.b + return Sve.UnzipEven(Sve.CreateTrueMaskSByte(), Vector.Zero); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector UnzipOdd() + { + //ARM64-FULL-LINE: uzp2 {{p[0-9]+}}.h, {{p[0-9]+}}.h, {{p[0-9]+}}.h + return Sve.UnzipOdd(Sve.CreateTrueMaskInt16(), Sve.CreateFalseMaskInt16()); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector TransposeEven() + { + //ARM64-FULL-LINE: trn1 {{p[0-9]+}}.d, {{p[0-9]+}}.d, {{p[0-9]+}}.d + return Sve.TransposeEven(Sve.CreateFalseMaskInt64(), Sve.CreateTrueMaskInt64()); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector TransposeOdd() + { + //ARM64-FULL-LINE: trn2 {{p[0-9]+}}.h, {{p[0-9]+}}.h, {{p[0-9]+}}.h + return Sve.TransposeOdd(Vector.Zero, Sve.CreateTrueMaskInt16()); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector ReverseElement() + { + //ARM64-FULL-LINE: rev {{p[0-9]+}}.h, {{p[0-9]+}}.h + return Sve.ReverseElement(Sve.CreateTrueMaskInt16()); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector And() + { + //ARM64-FULL-LINE: and {{p[0-9]+}}.b, {{p[0-9]+}}/z, {{p[0-9]+}}.b, {{p[0-9]+}}.b + return Sve.ConditionalSelect( + Sve.CreateTrueMaskInt16(), + Sve.And(Sve.CreateTrueMaskInt16(), Sve.CreateTrueMaskInt16()), + Vector.Zero + ); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector BitwiseClear() + { + //ARM64-FULL-LINE: bic {{p[0-9]+}}.b, {{p[0-9]+}}/z, {{p[0-9]+}}.b, {{p[0-9]+}}.b + return Sve.ConditionalSelect( + Sve.CreateFalseMaskInt16(), + Sve.BitwiseClear(Sve.CreateTrueMaskInt16(), Sve.CreateTrueMaskInt16()), + Vector.Zero + ); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector Xor() + { + //ARM64-FULL-LINE: eor {{p[0-9]+}}.b, {{p[0-9]+}}/z, {{p[0-9]+}}.b, {{p[0-9]+}}.b + return Sve.ConditionalSelect( + Sve.CreateTrueMaskInt32(), + Sve.Xor(Sve.CreateTrueMaskInt32(), Sve.CreateTrueMaskInt32()), + Vector.Zero + ); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector Or() + { + //ARM64-FULL-LINE: orr {{p[0-9]+}}.b, {{p[0-9]+}}/z, {{p[0-9]+}}.b, {{p[0-9]+}}.b + return Sve.ConditionalSelect( + Sve.CreateTrueMaskInt16(), + Sve.Or(Sve.CreateTrueMaskInt16(), Sve.CreateTrueMaskInt16()), + Vector.Zero + ); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector ConditionalSelect() + { + //ARM64-FULL-LINE: sel {{p[0-9]+}}.b, {{p[0-9]+}}, {{p[0-9]+}}.b, {{p[0-9]+}}.b + return Sve.ConditionalSelect( + Vector.Zero, + Sve.CreateFalseMaskInt32(), + Sve.CreateTrueMaskInt32() + ); + } +} \ No newline at end of file diff --git a/src/tests/JIT/opt/SVE/PredicateInstructions.csproj b/src/tests/JIT/opt/SVE/PredicateInstructions.csproj new file mode 100644 index 00000000000000..9005125d885bb0 --- /dev/null +++ b/src/tests/JIT/opt/SVE/PredicateInstructions.csproj @@ -0,0 +1,15 @@ + + + true + true + None + True + + + + true + + + + + \ No newline at end of file From 782d7fd2c1d012825919e4ea6f6c2868c1086e48 Mon Sep 17 00:00:00 2001 From: Sebastian Nickolls Date: Mon, 14 Apr 2025 15:09:15 +0000 Subject: [PATCH 03/13] Don't run tests on OSX --- src/tests/JIT/opt/SVE/Directory.Build.props | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tests/JIT/opt/SVE/Directory.Build.props b/src/tests/JIT/opt/SVE/Directory.Build.props index a5abe9c2f0b119..6d54cf233ca0da 100644 --- a/src/tests/JIT/opt/SVE/Directory.Build.props +++ b/src/tests/JIT/opt/SVE/Directory.Build.props @@ -3,6 +3,6 @@ $(NoWarn);SYSLIB5003 - true + true \ No newline at end of file From 8793f723f3e9e9e0651cec021f404fc8ed942710 Mon Sep 17 00:00:00 2001 From: Sebastian Nickolls Date: Tue, 15 Apr 2025 10:12:55 +0000 Subject: [PATCH 04/13] Don't run tests for Mono --- src/tests/JIT/opt/SVE/Directory.Build.props | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tests/JIT/opt/SVE/Directory.Build.props b/src/tests/JIT/opt/SVE/Directory.Build.props index 6d54cf233ca0da..0d03787db07ada 100644 --- a/src/tests/JIT/opt/SVE/Directory.Build.props +++ b/src/tests/JIT/opt/SVE/Directory.Build.props @@ -3,6 +3,6 @@ $(NoWarn);SYSLIB5003 - true + true \ No newline at end of file From 0b567843f1c084fa537604b05b92acde94584192 Mon Sep 17 00:00:00 2001 From: Sebastian Nickolls Date: Thu, 24 Apr 2025 14:57:22 +0000 Subject: [PATCH 05/13] Move the transform later in fgOptimizeHWIntrinsic --- src/coreclr/jit/morph.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index f57bf948078b0a..f68155ac618ec5 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -9218,15 +9218,6 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node) } } -#ifdef TARGET_ARM64 - optimizedTree = fgMorphTryUseAllMaskVariant(node); - if (optimizedTree != nullptr) - { - optimizedTree->SetMorphed(this); - return optimizedTree; - } -#endif - NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); var_types retType = node->TypeGet(); CorInfoType simdBaseJitType = node->GetSimdBaseJitType(); @@ -9677,6 +9668,15 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node) } } +#ifdef TARGET_ARM64 + optimizedTree = fgMorphTryUseAllMaskVariant(node); + if (optimizedTree != nullptr) + { + optimizedTree->SetMorphed(this); + return optimizedTree; + } +#endif + return node; } From 037875448bb27687f656c7811adbac99e231595b Mon Sep 17 00:00:00 2001 From: Sebastian Nickolls Date: Thu, 24 Apr 2025 15:07:04 +0000 Subject: [PATCH 06/13] Rename gtNewSimdAllFalseMaskNode --- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/hwintrinsicarm64.cpp | 4 ++-- src/coreclr/jit/morpharm64.cpp | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 25d5bfd44206f8..2daed574a6f5ec 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3130,7 +3130,7 @@ class Compiler #if defined(TARGET_ARM64) GenTree* gtNewSimdAllTrueMaskNode(CorInfoType simdBaseJitType, unsigned simdSize); - GenTree* gtNewSimdAllFalseMaskNode(unsigned simdSize); + GenTree* gtNewSimdFalseMaskByteNode(unsigned simdSize); #endif GenTree* gtNewSimdBinOpNode(genTreeOps op, diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 60fb311a29324c..529ce0a0faff76 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -3263,7 +3263,7 @@ GenTree* Compiler::gtNewSimdAllTrueMaskNode(CorInfoType simdBaseJitType, unsigne } //------------------------------------------------------------------------ -// gtNewSimdAllFalseMaskNode: Create an embedded mask with all bits set to false +// gtNewSimdFalseMaskByteNode: Create an embedded mask with all bits set to false // // Arguments: // simdSize -- the simd size of the nodes being masked @@ -3271,7 +3271,7 @@ GenTree* Compiler::gtNewSimdAllTrueMaskNode(CorInfoType simdBaseJitType, unsigne // Return Value: // The mask // -GenTree* Compiler::gtNewSimdAllFalseMaskNode(unsigned simdSize) +GenTree* Compiler::gtNewSimdFalseMaskByteNode(unsigned simdSize) { return gtNewSimdHWIntrinsicNode(TYP_MASK, NI_Sve_CreateFalseMaskByte, CORINFO_TYPE_UBYTE, simdSize); } diff --git a/src/coreclr/jit/morpharm64.cpp b/src/coreclr/jit/morpharm64.cpp index cc7fc42883f313..ab7be0fa5a4859 100644 --- a/src/coreclr/jit/morpharm64.cpp +++ b/src/coreclr/jit/morpharm64.cpp @@ -96,7 +96,7 @@ GenTree* Compiler::doMorphVectorOperandToMask(GenTree* node, GenTreeHWIntrinsic* else if (node->IsVectorZero()) { // Morph the vector of zeroes into mask of zeroes. - GenTree* mask = gtNewSimdAllFalseMaskNode(parent->GetSimdSize()); + GenTree* mask = gtNewSimdFalseMaskByteNode(parent->GetSimdSize()); mask->SetMorphed(this); return mask; } From ff2df6c6efa1e373fe5f3d6e65150eae32a246a0 Mon Sep 17 00:00:00 2001 From: Sebastian Nickolls Date: Fri, 9 May 2025 14:10:53 +0000 Subject: [PATCH 07/13] Re-design using HW_Flag_AllMaskVariant --- src/coreclr/jit/gentree.cpp | 19 +-- src/coreclr/jit/gentree.h | 4 - src/coreclr/jit/hwintrinsic.h | 47 ++++++++ src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 9 ++ src/coreclr/jit/hwintrinsiclistarm64sve.h | 38 ++++-- src/coreclr/jit/morpharm64.cpp | 127 ++++---------------- 6 files changed, 108 insertions(+), 136 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index b5977d5c4cfc21..5bf5fb170af2f1 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -20569,24 +20569,7 @@ bool GenTree::isRMWHWIntrinsic(Compiler* comp) } } #elif defined(TARGET_ARM64) - NamedIntrinsic id = AsHWIntrinsic()->GetHWIntrinsicId(); - switch (id) - { - case NI_Sve_And: - case NI_Sve_BitwiseClear: - case NI_Sve_Xor: - case NI_Sve_Or: - // Mask variant is not RMW, but the vector variant is. - if (varTypeIsMask(this)) - { - assert(AsHWIntrinsic()->GetOperandCount() == 3); - return false; - } - break; - default: - break; - } - return HWIntrinsicInfo::HasRMWSemantics(id); + return HWIntrinsicInfo::HasRMWSemantics(AsHWIntrinsic()->GetHWIntrinsicId()); #else return false; #endif diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index ff44a900e54758..f3fb94b09429e8 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -6617,10 +6617,6 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic } } -#if defined(TARGET_ARM64) && defined(FEATURE_MASKED_HW_INTRINSICS) - bool HasAllMaskVariant(); -#endif - private: void SetHWIntrinsicId(NamedIntrinsic intrinsicId); diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h index d936d579d8e25a..e9e43fd4f7fbaf 100644 --- a/src/coreclr/jit/hwintrinsic.h +++ b/src/coreclr/jit/hwintrinsic.h @@ -233,6 +233,11 @@ enum HWIntrinsicFlag : unsigned int // The intrinsic is a reduce operation. HW_Flag_ReduceOperation = 0x2000000, + // This intrinsic could be implemented with another intrinsic when it is operating on operands that are all of + // type TYP_MASK, and this other intrinsic will produces a value of this type. Used in morph to convert vector + // operations into mask operations when the intrinsic is operating on mask vectors (mainly bitwise operations). + HW_Flag_HasAllMaskVariant = 0x4000000, + #else #error Unsupported platform #endif @@ -1133,6 +1138,48 @@ struct HWIntrinsicInfo } } +#ifdef FEATURE_MASKED_HW_INTRINSICS + static bool HasAllMaskVariant(NamedIntrinsic id) + { + const HWIntrinsicFlag flags = lookupFlags(id); + return (flags & HW_Flag_HasAllMaskVariant) != 0; + } + + static NamedIntrinsic GetMaskVariant(NamedIntrinsic id) + { + switch (id) + { + case NI_Sve_And: + return NI_Sve_And_Predicates; + case NI_Sve_BitwiseClear: + return NI_Sve_BitwiseClear_Predicates; + case NI_Sve_Xor: + return NI_Sve_Xor_Predicates; + case NI_Sve_Or: + return NI_Sve_Or_Predicates; + case NI_Sve_ZipHigh: + return NI_Sve_ZipHigh_Predicates; + case NI_Sve_ZipLow: + return NI_Sve_ZipLow_Predicates; + case NI_Sve_UnzipOdd: + return NI_Sve_UnzipOdd_Predicates; + case NI_Sve_UnzipEven: + return NI_Sve_UnzipEven_Predicates; + case NI_Sve_TransposeEven: + return NI_Sve_TransposeEven_Predicates; + case NI_Sve_TransposeOdd: + return NI_Sve_TransposeOdd_Predicates; + case NI_Sve_ReverseElement: + return NI_Sve_ReverseElement_Predicates; + case NI_Sve_ConditionalSelect: + return NI_Sve_ConditionalSelect_Predicates; + + default: + unreached(); + } + } +#endif // FEATURE_MASKED_HW_INTRINSICS + #endif // TARGET_ARM64 static bool HasSpecialSideEffect(NamedIntrinsic id) diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 9bb7a1e4f39356..b99f89f8a22ddb 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -702,6 +702,14 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) break; } + case NI_Sve_And_Predicates: + case NI_Sve_BitwiseClear_Predicates: + case NI_Sve_Or_Predicates: + case NI_Sve_Xor_Predicates: + GetEmitter()->emitIns_R_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp1Reg, + embMaskOp2Reg, INS_OPTS_SCALABLE_B); + break; + default: { GetEmitter()->emitIns_R_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp1Reg, @@ -2478,6 +2486,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) case NI_Sve_CreateBreakAfterPropagateMask: case NI_Sve_CreateBreakBeforePropagateMask: + case NI_Sve_ConditionalSelect_Predicates: { GetEmitter()->emitInsSve_R_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, op3Reg, INS_OPTS_SCALABLE_B); break; diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 9bb76b0ad038a5..8dadeee34bddb4 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -27,9 +27,9 @@ HARDWARE_INTRINSIC(Sve, AddAcross, HARDWARE_INTRINSIC(Sve, AddRotateComplex, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcadd, INS_sve_fcadd}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand) HARDWARE_INTRINSIC(Sve, AddSaturate, -1, 2, {INS_sve_sqadd, INS_sve_uqadd, INS_sve_sqadd, INS_sve_uqadd, INS_sve_sqadd, INS_sve_uqadd, INS_sve_sqadd, INS_sve_uqadd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable) HARDWARE_INTRINSIC(Sve, AddSequentialAcross, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fadda, INS_sve_fadda}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_ReduceOperation) -HARDWARE_INTRINSIC(Sve, And, -1, -1, {INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, And, -1, -1, {INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_HasAllMaskVariant) HARDWARE_INTRINSIC(Sve, AndAcross, -1, -1, {INS_sve_andv, INS_sve_andv, INS_sve_andv, INS_sve_andv, INS_sve_andv, INS_sve_andv, INS_sve_andv, INS_sve_andv, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReduceOperation) -HARDWARE_INTRINSIC(Sve, BitwiseClear, -1, -1, {INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, BitwiseClear, -1, -1, {INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_HasAllMaskVariant) HARDWARE_INTRINSIC(Sve, BooleanNot, -1, -1, {INS_sve_cnot, INS_sve_cnot, INS_sve_cnot, INS_sve_cnot, INS_sve_cnot, INS_sve_cnot, INS_sve_cnot, INS_sve_cnot, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, Compact, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_compact, INS_sve_compact, INS_sve_compact, INS_sve_compact, INS_sve_compact, INS_sve_compact}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, CompareEqual, -1, -1, {INS_sve_cmpeq, INS_sve_cmpeq, INS_sve_cmpeq, INS_sve_cmpeq, INS_sve_cmpeq, INS_sve_cmpeq, INS_sve_cmpeq, INS_sve_cmpeq, INS_sve_fcmeq, INS_sve_fcmeq}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_ZeroingMaskedOperation) @@ -47,7 +47,7 @@ HARDWARE_INTRINSIC(Sve, ConditionalExtractAfterLastActiveElement, HARDWARE_INTRINSIC(Sve, ConditionalExtractAfterLastActiveElementAndReplicate, -1, 3, {INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(Sve, ConditionalExtractLastActiveElement, -1, 3, {INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_HasScalarInputVariant|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, ConditionalExtractLastActiveElementAndReplicate, -1, 3, {INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve, ConditionalSelect, -1, 3, {INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_SupportsContainment) +HARDWARE_INTRINSIC(Sve, ConditionalSelect, -1, 3, {INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_SupportsContainment|HW_Flag_HasAllMaskVariant) HARDWARE_INTRINSIC(Sve, ConvertToDouble, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_scvtf, INS_sve_ucvtf, INS_sve_scvtf, INS_sve_ucvtf, INS_sve_fcvt, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, ConvertToInt32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcvtzs, INS_sve_fcvtzs}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, ConvertToInt64, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcvtzs, INS_sve_fcvtzs}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) @@ -224,7 +224,7 @@ HARDWARE_INTRINSIC(Sve, MultiplyExtended, HARDWARE_INTRINSIC(Sve, MultiplySubtract, -1, -1, {INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, Negate, -1, -1, {INS_sve_neg, INS_invalid, INS_sve_neg, INS_invalid, INS_sve_neg, INS_invalid, INS_sve_neg, INS_invalid, INS_sve_fneg, INS_sve_fneg}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, Not, -1, -1, {INS_sve_not, INS_sve_not, INS_sve_not, INS_sve_not, INS_sve_not, INS_sve_not, INS_sve_not, INS_sve_not, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation) -HARDWARE_INTRINSIC(Sve, Or, -1, -1, {INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, Or, -1, -1, {INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_HasAllMaskVariant) HARDWARE_INTRINSIC(Sve, OrAcross, -1, -1, {INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReduceOperation) HARDWARE_INTRINSIC(Sve, PopCount, -1, -1, {INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, PrefetchBytes, -1, 3, {INS_invalid, INS_sve_prfb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SpecialSideEffect_Other) @@ -237,7 +237,7 @@ HARDWARE_INTRINSIC(Sve, ReciprocalSqrtEstimate, HARDWARE_INTRINSIC(Sve, ReciprocalSqrtStep, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_frsqrts, INS_sve_frsqrts}, HW_Category_SIMD, HW_Flag_Scalable) HARDWARE_INTRINSIC(Sve, ReciprocalStep, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_frecps, INS_sve_frecps}, HW_Category_SIMD, HW_Flag_Scalable) HARDWARE_INTRINSIC(Sve, ReverseBits, -1, -1, {INS_sve_rbit, INS_sve_rbit, INS_sve_rbit, INS_sve_rbit, INS_sve_rbit, INS_sve_rbit, INS_sve_rbit, INS_sve_rbit, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, ReverseElement, -1, 1, {INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, ReverseElement, -1, 1, {INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasAllMaskVariant) HARDWARE_INTRINSIC(Sve, ReverseElement16, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_revh, INS_sve_revh, INS_sve_revh, INS_sve_revh, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, ReverseElement32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_revw, INS_sve_revw, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, ReverseElement8, -1, -1, {INS_invalid, INS_invalid, INS_sve_revb, INS_sve_revb, INS_sve_revb, INS_sve_revb, INS_sve_revb, INS_sve_revb, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) @@ -284,23 +284,23 @@ HARDWARE_INTRINSIC(Sve, SubtractSaturate, HARDWARE_INTRINSIC(Sve, TestAnyTrue, -1, 2, {INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, TestFirstTrue, -1, 2, {INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, TestLastTrue, -1, 2, {INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, TransposeEven, -1, 2, {INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, TransposeOdd, -1, 2, {INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, TransposeEven, -1, 2, {INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasAllMaskVariant) +HARDWARE_INTRINSIC(Sve, TransposeOdd, -1, 2, {INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasAllMaskVariant) HARDWARE_INTRINSIC(Sve, TrigonometricMultiplyAddCoefficient, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ftmad, INS_sve_ftmad}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, TrigonometricSelectCoefficient, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ftssel, INS_sve_ftssel}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Sve, TrigonometricStartingValue, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ftsmul, INS_sve_ftsmul}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Sve, UnzipEven, -1, 2, {INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, UnzipOdd, -1, 2, {INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, UnzipEven, -1, 2, {INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasAllMaskVariant) +HARDWARE_INTRINSIC(Sve, UnzipOdd, -1, 2, {INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasAllMaskVariant) HARDWARE_INTRINSIC(Sve, VectorTableLookup, -1, 2, {INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl, INS_sve_tbl}, HW_Category_SIMD, HW_Flag_Scalable) -HARDWARE_INTRINSIC(Sve, Xor, -1, -1, {INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, Xor, -1, -1, {INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_HasAllMaskVariant) HARDWARE_INTRINSIC(Sve, XorAcross, -1, -1, {INS_sve_eorv, INS_sve_eorv, INS_sve_eorv, INS_sve_eorv, INS_sve_eorv, INS_sve_eorv, INS_sve_eorv, INS_sve_eorv, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReduceOperation) HARDWARE_INTRINSIC(Sve, ZeroExtend16, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_uxth, INS_invalid, INS_sve_uxth, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, ZeroExtend32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_uxtw, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, ZeroExtend8, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_sve_uxtb, INS_invalid, INS_sve_uxtb, INS_invalid, INS_sve_uxtb, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, ZeroExtendWideningLower, -1, 1, {INS_invalid, INS_sve_uunpklo, INS_invalid, INS_sve_uunpklo, INS_invalid, INS_sve_uunpklo, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Sve, ZeroExtendWideningUpper, -1, 1, {INS_invalid, INS_sve_uunpkhi, INS_invalid, INS_sve_uunpkhi, INS_invalid, INS_sve_uunpkhi, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Sve, ZipHigh, -1, 2, {INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, ZipLow, -1, 2, {INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, ZipHigh, -1, 2, {INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasAllMaskVariant) +HARDWARE_INTRINSIC(Sve, ZipLow, -1, 2, {INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasAllMaskVariant) #define LAST_NI_Sve NI_Sve_ZipLow // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** @@ -325,7 +325,19 @@ HARDWARE_INTRINSIC(Sve, SaturatingIncrementBy64BitElementCountScalar, HARDWARE_INTRINSIC(Sve, StoreAndZipx2, -1, 3, {INS_sve_st2b, INS_sve_st2b, INS_sve_st2h, INS_sve_st2h, INS_sve_st2w, INS_sve_st2w, INS_sve_st2d, INS_sve_st2d, INS_sve_st2w, INS_sve_st2d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters) HARDWARE_INTRINSIC(Sve, StoreAndZipx3, -1, 3, {INS_sve_st3b, INS_sve_st3b, INS_sve_st3h, INS_sve_st3h, INS_sve_st3w, INS_sve_st3w, INS_sve_st3d, INS_sve_st3d, INS_sve_st3w, INS_sve_st3d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters) HARDWARE_INTRINSIC(Sve, StoreAndZipx4, -1, 3, {INS_sve_st4b, INS_sve_st4b, INS_sve_st4h, INS_sve_st4h, INS_sve_st4w, INS_sve_st4w, INS_sve_st4d, INS_sve_st4d, INS_sve_st4w, INS_sve_st4d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters) - +// Predicate variants of intrinsics, these are specialized for operating on TYP_MASK type values. +HARDWARE_INTRINSIC(Sve, And_Predicates, -1, 2, {INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask|HW_Flag_EmbeddedMaskedOperation|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, BitwiseClear_Predicates, -1, 2, {INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask|HW_Flag_EmbeddedMaskedOperation|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, Or_Predicates, -1, 2, {INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask|HW_Flag_EmbeddedMaskedOperation|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, Xor_Predicates, -1, 2, {INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask|HW_Flag_EmbeddedMaskedOperation|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, ConditionalSelect_Predicates, -1, 3, {INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask|HW_Flag_ExplicitMaskedOperation|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, ZipHigh_Predicates, -1, 2, {INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, ZipLow_Predicates, -1, 2, {INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, UnzipEven_Predicates, -1, 2, {INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, UnzipOdd_Predicates, -1, 2, {INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, TransposeEven_Predicates, -1, 2, {INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, TransposeOdd_Predicates, -1, 2, {INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, ReverseElement_Predicates, -1, 1, {INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) #endif // FEATURE_HW_INTRINSIC diff --git a/src/coreclr/jit/morpharm64.cpp b/src/coreclr/jit/morpharm64.cpp index ab7be0fa5a4859..59399a58d2ad29 100644 --- a/src/coreclr/jit/morpharm64.cpp +++ b/src/coreclr/jit/morpharm64.cpp @@ -17,38 +17,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #ifdef FEATURE_MASKED_HW_INTRINSICS -//------------------------------------------------------------------------ -// HasAllMaskVariant: Does this intrinsic have a variant where all of it's operands -// are mask types? -// -// Return Value: -// true if an all-mask variant exists for the intrinsic, else false. -// -bool GenTreeHWIntrinsic::HasAllMaskVariant() -{ - switch (GetHWIntrinsicId()) - { - // ZIP1 ., ., . - // ZIP2 ., ., . - // UZP1 ., ., . - // UZP2 ., ., . - // TRN1 ., ., . - // TRN2 ., ., . - // REV ., . - case NI_Sve_ZipHigh: - case NI_Sve_ZipLow: - case NI_Sve_UnzipOdd: - case NI_Sve_UnzipEven: - case NI_Sve_TransposeEven: - case NI_Sve_TransposeOdd: - case NI_Sve_ReverseElement: - return true; - - default: - return false; - } -} - //------------------------------------------------------------------------ // canMorphVectorOperandToMask: Can this vector operand be converted to a // node with type TYP_MASK easily? @@ -119,84 +87,41 @@ GenTree* Compiler::doMorphVectorOperandToMask(GenTree* node, GenTreeHWIntrinsic* // GenTree* Compiler::fgMorphTryUseAllMaskVariant(GenTreeHWIntrinsic* node) { - if (node->HasAllMaskVariant() && canMorphAllVectorOperandsToMasks(node)) - { - for (size_t i = 1; i <= node->GetOperandCount(); i++) - { - node->Op(i) = doMorphVectorOperandToMask(node->Op(i), node); - } - - node->gtType = TYP_MASK; - return node; - } - - if (node->OperIsHWIntrinsic(NI_Sve_ConditionalSelect)) + if (HWIntrinsicInfo::HasAllMaskVariant(node->GetHWIntrinsicId())) { - GenTree* mask = node->Op(1); - GenTree* left = node->Op(2); - GenTree* right = node->Op(3); - - if (left->OperIsHWIntrinsic()) + NamedIntrinsic maskVariant = HWIntrinsicInfo::GetMaskVariant(node->GetHWIntrinsicId()); + + // As some intrinsics have many variants, check that the count of operands on the node + // matches the number of operands required for the mask variant of the intrinsic. The mask + // variant of the intrinsic must have a fixed number of operands. + int numArgs = HWIntrinsicInfo::lookupNumArgs(maskVariant); + assert(numArgs >= 0); + if (node->GetOperandCount() == numArgs) { - assert(canMorphVectorOperandToMask(mask)); - - if (canMorphAllVectorOperandsToMasks(left->AsHWIntrinsic())) + // We're sure it will work at this point, so perform the pattern match on operands. + if (canMorphAllVectorOperandsToMasks(node)) { - // At this point we know the 'left' node is a HWINTRINSIC node and all of its operands look like - // mask nodes. - // - // The ConditionalSelect could be substituted for the named intrinsic in it's 'left' operand and - // transformed to a mask-type operation for some named intrinsics. Doing so will encourage codegen - // to emit predicate variants of instructions rather than vector variants, and we can lose some - // unnecessary mask->vector conversion nodes. - GenTreeHWIntrinsic* actualOp = left->AsHWIntrinsic(); - - switch (actualOp->GetHWIntrinsicId()) + switch (node->GetOperandCount()) { - // AND .B, /Z, .B, .B - // BIC .B, /Z, .B, .B - // EOR .B, /Z, .B, .B - // ORR .B, /Z, .B, .B - case NI_Sve_And: - case NI_Sve_BitwiseClear: - case NI_Sve_Xor: - case NI_Sve_Or: - if (right->IsVectorZero()) - { - // The operation is equivalent for all lane arrangements, because it is a bitwise operation. - // It's safe to bash the type to 8-bit required to assemble the instruction. - actualOp->SetSimdBaseJitType(CORINFO_TYPE_BYTE); - - actualOp->ResetHWIntrinsicId(actualOp->GetHWIntrinsicId(), this, - doMorphVectorOperandToMask(mask, actualOp), - doMorphVectorOperandToMask(actualOp->Op(1), actualOp), - doMorphVectorOperandToMask(actualOp->Op(2), actualOp)); - actualOp->gtType = TYP_MASK; - return actualOp; - } + case 1: + node->ResetHWIntrinsicId(maskVariant, doMorphVectorOperandToMask(node->Op(1), node)); break; - default: + case 2: + node->ResetHWIntrinsicId(maskVariant, doMorphVectorOperandToMask(node->Op(1), node), + doMorphVectorOperandToMask(node->Op(2), node)); + break; + case 3: + node->ResetHWIntrinsicId(maskVariant, this, doMorphVectorOperandToMask(node->Op(1), node), + doMorphVectorOperandToMask(node->Op(2), node), + doMorphVectorOperandToMask(node->Op(3), node)); break; + default: + unreached(); } - } - } - // If we got this far, then there was no match on any predicated operation. - // ConditionalSelect itself can be a mask operation for 8-bit lane types, using - // SEL .B, , .B, .B - if (canMorphAllVectorOperandsToMasks(node)) - { - for (size_t i = 1; i <= node->GetOperandCount(); i++) - { - node->Op(i) = doMorphVectorOperandToMask(node->Op(i), node); + node->gtType = TYP_MASK; + return node; } - - // Again this operation is bitwise, so the lane arrangement doesn't matter. - // We can bash the type to 8-bit. - node->SetSimdBaseJitType(CORINFO_TYPE_BYTE); - - node->gtType = TYP_MASK; - return node; } } From f067baa9ee4dc08c00848c09dd9b70e4a2536749 Mon Sep 17 00:00:00 2001 From: Sebastian Nickolls Date: Wed, 14 May 2025 09:22:38 +0000 Subject: [PATCH 08/13] Add missing function documentation in hwintrinsic.h --- src/coreclr/jit/hwintrinsic.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h index e9e43fd4f7fbaf..548bbde3bb14ec 100644 --- a/src/coreclr/jit/hwintrinsic.h +++ b/src/coreclr/jit/hwintrinsic.h @@ -1139,12 +1139,30 @@ struct HWIntrinsicInfo } #ifdef FEATURE_MASKED_HW_INTRINSICS + // HasAllMaskVariant: Does the intrinsic have an intrinsic variant that operates on mask types? + // + // Arguments: + // id -- the intrinsic to check for a mask-type variant. + // + // Return Value: + // true when the intrinsic has a mask-type variant, else false + // static bool HasAllMaskVariant(NamedIntrinsic id) { const HWIntrinsicFlag flags = lookupFlags(id); return (flags & HW_Flag_HasAllMaskVariant) != 0; } + // GetMaskVariant: Given an intrinsic that has a variant that operates on mask types, return the ID of + // this variant intrinsic. Call HasAllMaskVariant before using this function, as it will + // assert if no match is found. + // + // Arguments: + // id -- the intrinsic with a mask-type variant. + // + // Return Value: + // The ID of the mask-type variant for the given intrinsic + // static NamedIntrinsic GetMaskVariant(NamedIntrinsic id) { switch (id) From eae26223bdc893d09c3c78c03291d99c16f94813 Mon Sep 17 00:00:00 2001 From: Sebastian Nickolls Date: Thu, 15 May 2025 16:06:22 +0000 Subject: [PATCH 09/13] Fix integer comparison and add assertion --- src/coreclr/jit/hwintrinsic.h | 1 + src/coreclr/jit/morpharm64.cpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h index 548bbde3bb14ec..470ab85650bf7c 100644 --- a/src/coreclr/jit/hwintrinsic.h +++ b/src/coreclr/jit/hwintrinsic.h @@ -1165,6 +1165,7 @@ struct HWIntrinsicInfo // static NamedIntrinsic GetMaskVariant(NamedIntrinsic id) { + assert(HasAllMaskVariant(id)); switch (id) { case NI_Sve_And: diff --git a/src/coreclr/jit/morpharm64.cpp b/src/coreclr/jit/morpharm64.cpp index 59399a58d2ad29..0d6ce79fd77cd3 100644 --- a/src/coreclr/jit/morpharm64.cpp +++ b/src/coreclr/jit/morpharm64.cpp @@ -96,7 +96,7 @@ GenTree* Compiler::fgMorphTryUseAllMaskVariant(GenTreeHWIntrinsic* node) // variant of the intrinsic must have a fixed number of operands. int numArgs = HWIntrinsicInfo::lookupNumArgs(maskVariant); assert(numArgs >= 0); - if (node->GetOperandCount() == numArgs) + if (node->GetOperandCount() == (size_t)numArgs) { // We're sure it will work at this point, so perform the pattern match on operands. if (canMorphAllVectorOperandsToMasks(node)) From e28622a7d69cecaaf60c34ed42e3c891a190c701 Mon Sep 17 00:00:00 2001 From: Sebastian Nickolls Date: Fri, 16 May 2025 10:43:18 +0000 Subject: [PATCH 10/13] Refactor to follow similar path to XARCH --- src/coreclr/jit/compiler.h | 5 +- src/coreclr/jit/morph.cpp | 312 +++++++++++++++++---------------- src/coreclr/jit/morpharm64.cpp | 2 +- 3 files changed, 167 insertions(+), 152 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 36de68b52171ab..d627fec27bc5a1 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6692,11 +6692,14 @@ class Compiler GenTree* fgMorphHWIntrinsic(GenTreeHWIntrinsic* tree); GenTree* fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node); GenTree* fgOptimizeHWIntrinsicAssociative(GenTreeHWIntrinsic* node); +#if defined(FEATURE_MASKED_HW_INTRINSICS) + GenTreeHWIntrinsic* fgOptimizeForMaskedIntrinsic(GenTreeHWIntrinsic* node); +#endif // FEATURE_MASKED_HW_INTRINSICS #ifdef TARGET_ARM64 bool canMorphVectorOperandToMask(GenTree* node); bool canMorphAllVectorOperandsToMasks(GenTreeHWIntrinsic* node); GenTree* doMorphVectorOperandToMask(GenTree* node, GenTreeHWIntrinsic* parent); - GenTree* fgMorphTryUseAllMaskVariant(GenTreeHWIntrinsic* node); + GenTreeHWIntrinsic* fgMorphTryUseAllMaskVariant(GenTreeHWIntrinsic* node); #endif // TARGET_ARM64 #endif // FEATURE_HW_INTRINSICS GenTree* fgOptimizeCommutativeArithmetic(GenTreeOp* tree); diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 1d79c2b4b021bc..d5faa60589494a 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -9353,151 +9353,11 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node) default: { #if defined(FEATURE_MASKED_HW_INTRINSICS) - bool isScalar = false; - genTreeOps actualOper = node->GetOperForHWIntrinsicId(&isScalar); - genTreeOps oper = actualOper; - - // We shouldn't find AND_NOT, OR_NOT or XOR_NOT nodes since it should only be produced in lowering - assert((oper != GT_AND_NOT) && (oper != GT_OR_NOT) && (oper != GT_XOR_NOT)); - - if (GenTreeHWIntrinsic::OperIsBitwiseHWIntrinsic(oper)) + GenTreeHWIntrinsic* maskedIntrinsic = fgOptimizeForMaskedIntrinsic(node); + if (maskedIntrinsic != nullptr) { - GenTree* op1 = node->Op(1); - - GenTree* op2; - GenTree* actualOp2; - - if (oper == GT_NOT) - { - op2 = op1; - actualOp2 = nullptr; - } - else - { - op2 = node->Op(2); - actualOp2 = op2; - } - - // We need both operands to be ConvertMaskToVector in - // order to optimize this to a direct mask operation - - if (!op1->OperIsConvertMaskToVector()) - { - break; - } - - if (!op2->OperIsHWIntrinsic()) - { -#if defined(TARGET_XARCH) - if ((oper != GT_XOR) || !op2->IsVectorAllBitsSet()) - { - break; - } - - // We want to explicitly recognize op1 ^ AllBitsSet as - // some platforms don't have direct support for ~op1 - - oper = GT_NOT; - op2 = op1; -#else - break; -#endif - } - - GenTreeHWIntrinsic* cvtOp1 = op1->AsHWIntrinsic(); - GenTreeHWIntrinsic* cvtOp2 = op2->AsHWIntrinsic(); - - if (!cvtOp2->OperIsConvertMaskToVector()) - { - break; - } - - unsigned simdBaseTypeSize = genTypeSize(simdBaseType); - - if ((genTypeSize(cvtOp1->GetSimdBaseType()) != simdBaseTypeSize) || - (genTypeSize(cvtOp2->GetSimdBaseType()) != simdBaseTypeSize)) - { - // We need both operands to be the same kind of mask; otherwise - // the bitwise operation can differ in how it performs - break; - } - - NamedIntrinsic maskIntrinsicId = NI_Illegal; - -#if defined(TARGET_XARCH) - switch (oper) - { - case GT_AND: - { - maskIntrinsicId = NI_EVEX_AndMask; - break; - } - - case GT_NOT: - { - maskIntrinsicId = NI_EVEX_NotMask; - break; - } - - case GT_OR: - { - maskIntrinsicId = NI_EVEX_OrMask; - break; - } - - case GT_XOR: - { - maskIntrinsicId = NI_EVEX_XorMask; - break; - } - - default: - { - unreached(); - } - } -#elif defined(TARGET_ARM64) - // TODO-ARM64-CQ: Support transforming bitwise operations on masks - break; -#else -#error Unsupported platform -#endif // !TARGET_XARCH && !TARGET_ARM64 - - if (maskIntrinsicId == NI_Illegal) - { - break; - } - - if (oper == actualOper) - { - node->ChangeHWIntrinsicId(maskIntrinsicId); - node->Op(1) = cvtOp1->Op(1); - } - else - { - assert(oper == GT_NOT); - node->ResetHWIntrinsicId(maskIntrinsicId, this, cvtOp1->Op(1)); - node->gtFlags &= ~GTF_REVERSE_OPS; - } - - node->gtType = TYP_MASK; - DEBUG_DESTROY_NODE(op1); - - if (oper != GT_NOT) - { - assert(actualOp2 != nullptr); - node->Op(2) = cvtOp2->Op(1); - } - - if (actualOp2 != nullptr) - { - DEBUG_DESTROY_NODE(actualOp2); - } - - node->SetMorphed(this); - node = gtNewSimdCvtMaskToVectorNode(retType, node, simdBaseJitType, simdSize)->AsHWIntrinsic(); + node = maskedIntrinsic; node->SetMorphed(this); - return node; } #endif // FEATURE_MASKED_HW_INTRINSICS break; @@ -9679,17 +9539,169 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node) } } -#ifdef TARGET_ARM64 - optimizedTree = fgMorphTryUseAllMaskVariant(node); - if (optimizedTree != nullptr) + return node; +} + +#if defined(FEATURE_MASKED_HW_INTRINSICS) +//------------------------------------------------------------------------ +// fgOptimizeForMaskedIntrinsic: Tries to recognize intrinsics that are operating +// on mask types and morphs the tree to use intrinsics +// better suited to this. +// +// Arguments: +// node - the hardware intrinsic tree to try and optimize. +// This tree will be mutated if it is possible to optimize the tree. +// +// Return Value: +// The optimized tree, nullptr if no change was made. +// +GenTreeHWIntrinsic* Compiler::fgOptimizeForMaskedIntrinsic(GenTreeHWIntrinsic* node) +{ +#if defined(TARGET_XARCH) + bool isScalar = false; + genTreeOps actualOper = node->GetOperForHWIntrinsicId(&isScalar); + genTreeOps oper = actualOper; + + // We shouldn't find AND_NOT, OR_NOT or XOR_NOT nodes since it should only be produced in lowering + assert((oper != GT_AND_NOT) && (oper != GT_OR_NOT) && (oper != GT_XOR_NOT)); + + if (GenTreeHWIntrinsic::OperIsBitwiseHWIntrinsic(oper)) { - optimizedTree->SetMorphed(this); - return optimizedTree; + GenTree* op1 = node->Op(1); + + GenTree* op2; + GenTree* actualOp2; + + if (oper == GT_NOT) + { + op2 = op1; + actualOp2 = nullptr; + } + else + { + op2 = node->Op(2); + actualOp2 = op2; + } + + // We need both operands to be ConvertMaskToVector in + // order to optimize this to a direct mask operation + + if (!op1->OperIsConvertMaskToVector()) + { + return nullptr; + } + + if (!op2->OperIsHWIntrinsic()) + { + if ((oper != GT_XOR) || !op2->IsVectorAllBitsSet()) + { + return nullptr; + } + + // We want to explicitly recognize op1 ^ AllBitsSet as + // some platforms don't have direct support for ~op1 + + oper = GT_NOT; + op2 = op1; + } + + GenTreeHWIntrinsic* cvtOp1 = op1->AsHWIntrinsic(); + GenTreeHWIntrinsic* cvtOp2 = op2->AsHWIntrinsic(); + + if (!cvtOp2->OperIsConvertMaskToVector()) + { + return nullptr; + } + + unsigned simdBaseTypeSize = genTypeSize(node->GetSimdBaseType()); + + if ((genTypeSize(cvtOp1->GetSimdBaseType()) != simdBaseTypeSize) || + (genTypeSize(cvtOp2->GetSimdBaseType()) != simdBaseTypeSize)) + { + // We need both operands to be the same kind of mask; otherwise + // the bitwise operation can differ in how it performs + return nullptr; + } + + NamedIntrinsic maskIntrinsicId = NI_Illegal; + + switch (oper) + { + case GT_AND: + { + maskIntrinsicId = NI_EVEX_AndMask; + break; + } + + case GT_NOT: + { + maskIntrinsicId = NI_EVEX_NotMask; + break; + } + + case GT_OR: + { + maskIntrinsicId = NI_EVEX_OrMask; + break; + } + + case GT_XOR: + { + maskIntrinsicId = NI_EVEX_XorMask; + break; + } + + default: + { + unreached(); + } + } + + if (maskIntrinsicId == NI_Illegal) + { + return nullptr; + } + + if (oper == actualOper) + { + node->ChangeHWIntrinsicId(maskIntrinsicId); + node->Op(1) = cvtOp1->Op(1); + } + else + { + assert(oper == GT_NOT); + node->ResetHWIntrinsicId(maskIntrinsicId, this, cvtOp1->Op(1)); + node->gtFlags &= ~GTF_REVERSE_OPS; + } + + node->gtType = TYP_MASK; + DEBUG_DESTROY_NODE(op1); + + if (oper != GT_NOT) + { + assert(actualOp2 != nullptr); + node->Op(2) = cvtOp2->Op(1); + } + + if (actualOp2 != nullptr) + { + DEBUG_DESTROY_NODE(actualOp2); + } + + node->SetMorphed(this); + node = gtNewSimdCvtMaskToVectorNode(node->TypeGet(), node, node->GetSimdBaseJitType(), node->GetSimdSize()) + ->AsHWIntrinsic(); + node->SetMorphed(this); + return node; } +#elif defined(TARGET_ARM64) + return fgMorphTryUseAllMaskVariant(node); +#else +#error Unsupported platform #endif - - return node; + return nullptr; } +#endif // FEATURE_MASKED_HW_INTRINSICS //------------------------------------------------------------------------ // fgOptimizeHWIntrinsicAssociative: Morph an associative GenTreeHWIntrinsic tree. diff --git a/src/coreclr/jit/morpharm64.cpp b/src/coreclr/jit/morpharm64.cpp index 0d6ce79fd77cd3..a95dc1207c2611 100644 --- a/src/coreclr/jit/morpharm64.cpp +++ b/src/coreclr/jit/morpharm64.cpp @@ -85,7 +85,7 @@ GenTree* Compiler::doMorphVectorOperandToMask(GenTree* node, GenTreeHWIntrinsic* // Return Value: // The fully morphed tree if a change was made, else nullptr. // -GenTree* Compiler::fgMorphTryUseAllMaskVariant(GenTreeHWIntrinsic* node) +GenTreeHWIntrinsic* Compiler::fgMorphTryUseAllMaskVariant(GenTreeHWIntrinsic* node) { if (HWIntrinsicInfo::HasAllMaskVariant(node->GetHWIntrinsicId())) { From 713ab9649b54bda4686d0281af9780365a57a32e Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 19 May 2025 11:36:24 -0700 Subject: [PATCH 11/13] fix the refactoring --- src/coreclr/jit/morph.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index d5faa60589494a..0f8d61ac0107c2 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -9558,9 +9558,13 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node) GenTreeHWIntrinsic* Compiler::fgOptimizeForMaskedIntrinsic(GenTreeHWIntrinsic* node) { #if defined(TARGET_XARCH) - bool isScalar = false; - genTreeOps actualOper = node->GetOperForHWIntrinsicId(&isScalar); - genTreeOps oper = actualOper; + bool isScalar = false; + genTreeOps actualOper = node->GetOperForHWIntrinsicId(&isScalar); + genTreeOps oper = actualOper; + var_types retType = node->TypeGet(); + CorInfoType simdBaseJitType = node->GetSimdBaseJitType(); + var_types simdBaseType = node->GetSimdBaseType(); + unsigned simdSize = node->GetSimdSize(); // We shouldn't find AND_NOT, OR_NOT or XOR_NOT nodes since it should only be produced in lowering assert((oper != GT_AND_NOT) && (oper != GT_OR_NOT) && (oper != GT_XOR_NOT)); @@ -9689,8 +9693,7 @@ GenTreeHWIntrinsic* Compiler::fgOptimizeForMaskedIntrinsic(GenTreeHWIntrinsic* n } node->SetMorphed(this); - node = gtNewSimdCvtMaskToVectorNode(node->TypeGet(), node, node->GetSimdBaseJitType(), node->GetSimdSize()) - ->AsHWIntrinsic(); + node = gtNewSimdCvtMaskToVectorNode(retType, node, simdBaseJitType, simdSize)->AsHWIntrinsic(); node->SetMorphed(this); return node; } @@ -12646,7 +12649,7 @@ void Compiler::fgMorphStmts(BasicBlock* block) // Compiler::MorphUnreachableInfo::MorphUnreachableInfo(Compiler* comp) : m_traits(comp->m_dfsTree->GetPostOrderCount(), comp) - , m_vec(BitVecOps::MakeEmpty(&m_traits)){}; + , m_vec(BitVecOps::MakeEmpty(&m_traits)) {}; //------------------------------------------------------------------------ // SetUnreachable: during morph, mark a block as unreachable From 0b4e5dbff0bbc6da6049eabce7a17f52989fd107 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 19 May 2025 22:36:16 -0700 Subject: [PATCH 12/13] jit formatting --- src/coreclr/jit/morph.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index a3026704cd43ab..a353b50b1a1cfc 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -12698,7 +12698,7 @@ void Compiler::fgMorphStmts(BasicBlock* block) // Compiler::MorphUnreachableInfo::MorphUnreachableInfo(Compiler* comp) : m_traits(comp->m_dfsTree->GetPostOrderCount(), comp) - , m_vec(BitVecOps::MakeEmpty(&m_traits)) {}; + , m_vec(BitVecOps::MakeEmpty(&m_traits)){}; //------------------------------------------------------------------------ // SetUnreachable: during morph, mark a block as unreachable From c77c83e666c9e2157215c9ed1c2d2b6fa2877a67 Mon Sep 17 00:00:00 2001 From: Sebastian Nickolls Date: Tue, 20 May 2025 12:33:43 +0000 Subject: [PATCH 13/13] Move code into morph.cpp --- src/coreclr/jit/CMakeLists.txt | 1 - src/coreclr/jit/morph.cpp | 114 ++++++++++++++++++++++++++++ src/coreclr/jit/morpharm64.cpp | 131 --------------------------------- 3 files changed, 114 insertions(+), 132 deletions(-) delete mode 100644 src/coreclr/jit/morpharm64.cpp diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt index 9b00d72e0cdaf8..f8167e8ae94241 100644 --- a/src/coreclr/jit/CMakeLists.txt +++ b/src/coreclr/jit/CMakeLists.txt @@ -245,7 +245,6 @@ set( JIT_ARM64_SOURCES unwindarm64.cpp hwintrinsicarm64.cpp hwintrinsiccodegenarm64.cpp - morpharm64.cpp ) set( JIT_ARMV6_SOURCES diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index a353b50b1a1cfc..37861d2284a2e5 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -9748,6 +9748,120 @@ GenTreeHWIntrinsic* Compiler::fgOptimizeForMaskedIntrinsic(GenTreeHWIntrinsic* n #endif return nullptr; } + +#ifdef TARGET_ARM64 +//------------------------------------------------------------------------ +// canMorphVectorOperandToMask: Can this vector operand be converted to a +// node with type TYP_MASK easily? +// +bool Compiler::canMorphVectorOperandToMask(GenTree* node) +{ + return varTypeIsMask(node) || node->OperIsConvertMaskToVector() || node->IsVectorZero(); +} + +//------------------------------------------------------------------------ +// canMorphAllVectorOperandsToMasks: Can all vector operands to this node +// be converted to a node with type +// TYP_MASK easily? +// +bool Compiler::canMorphAllVectorOperandsToMasks(GenTreeHWIntrinsic* node) +{ + bool allMaskConversions = true; + for (size_t i = 1; i <= node->GetOperandCount() && allMaskConversions; i++) + { + allMaskConversions &= canMorphVectorOperandToMask(node->Op(i)); + } + + return allMaskConversions; +} + +//------------------------------------------------------------------------ +// doMorphVectorOperandToMask: Morph a vector node that is close to a mask +// node into a mask node. +// +// Return value: +// The morphed tree, or nullptr if the transform is not applicable. +// +GenTree* Compiler::doMorphVectorOperandToMask(GenTree* node, GenTreeHWIntrinsic* parent) +{ + if (varTypeIsMask(node)) + { + // Already a mask, nothing to do. + return node; + } + else if (node->OperIsConvertMaskToVector()) + { + // Replace node with op1. + return node->AsHWIntrinsic()->Op(1); + } + else if (node->IsVectorZero()) + { + // Morph the vector of zeroes into mask of zeroes. + GenTree* mask = gtNewSimdFalseMaskByteNode(parent->GetSimdSize()); + mask->SetMorphed(this); + return mask; + } + + return nullptr; +} + +//----------------------------------------------------------------------------------------------------- +// fgMorphTryUseAllMaskVariant: For NamedIntrinsics that have a variant where all operands are +// mask nodes. If all operands to this node are 'suggesting' that they +// originate closely from a mask, but are of vector types, then morph the +// operands as appropriate to use mask types instead. 'Suggesting' +// is defined by the canMorphVectorOperandToMask function. +// +// Arguments: +// tree - The HWIntrinsic to try and optimize. +// +// Return Value: +// The fully morphed tree if a change was made, else nullptr. +// +GenTreeHWIntrinsic* Compiler::fgMorphTryUseAllMaskVariant(GenTreeHWIntrinsic* node) +{ + if (HWIntrinsicInfo::HasAllMaskVariant(node->GetHWIntrinsicId())) + { + NamedIntrinsic maskVariant = HWIntrinsicInfo::GetMaskVariant(node->GetHWIntrinsicId()); + + // As some intrinsics have many variants, check that the count of operands on the node + // matches the number of operands required for the mask variant of the intrinsic. The mask + // variant of the intrinsic must have a fixed number of operands. + int numArgs = HWIntrinsicInfo::lookupNumArgs(maskVariant); + assert(numArgs >= 0); + if (node->GetOperandCount() == (size_t)numArgs) + { + // We're sure it will work at this point, so perform the pattern match on operands. + if (canMorphAllVectorOperandsToMasks(node)) + { + switch (node->GetOperandCount()) + { + case 1: + node->ResetHWIntrinsicId(maskVariant, doMorphVectorOperandToMask(node->Op(1), node)); + break; + case 2: + node->ResetHWIntrinsicId(maskVariant, doMorphVectorOperandToMask(node->Op(1), node), + doMorphVectorOperandToMask(node->Op(2), node)); + break; + case 3: + node->ResetHWIntrinsicId(maskVariant, this, doMorphVectorOperandToMask(node->Op(1), node), + doMorphVectorOperandToMask(node->Op(2), node), + doMorphVectorOperandToMask(node->Op(3), node)); + break; + default: + unreached(); + } + + node->gtType = TYP_MASK; + return node; + } + } + } + + return nullptr; +} +#endif // TARGET_ARM64 + #endif // FEATURE_MASKED_HW_INTRINSICS //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/morpharm64.cpp b/src/coreclr/jit/morpharm64.cpp deleted file mode 100644 index a95dc1207c2611..00000000000000 --- a/src/coreclr/jit/morpharm64.cpp +++ /dev/null @@ -1,131 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX -XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX -XX XX -XX Arm64 Specific Morph XX -XX XX -XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX -XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX -*/ - -#include "jitpch.h" -#ifdef _MSC_VER -#pragma hdrstop -#endif - -#ifdef FEATURE_MASKED_HW_INTRINSICS - -//------------------------------------------------------------------------ -// canMorphVectorOperandToMask: Can this vector operand be converted to a -// node with type TYP_MASK easily? -// -bool Compiler::canMorphVectorOperandToMask(GenTree* node) -{ - return varTypeIsMask(node) || node->OperIsConvertMaskToVector() || node->IsVectorZero(); -} - -//------------------------------------------------------------------------ -// canMorphAllVectorOperandsToMasks: Can all vector operands to this node -// be converted to a node with type -// TYP_MASK easily? -// -bool Compiler::canMorphAllVectorOperandsToMasks(GenTreeHWIntrinsic* node) -{ - bool allMaskConversions = true; - for (size_t i = 1; i <= node->GetOperandCount() && allMaskConversions; i++) - { - allMaskConversions &= canMorphVectorOperandToMask(node->Op(i)); - } - - return allMaskConversions; -} - -//------------------------------------------------------------------------ -// doMorphVectorOperandToMask: Morph a vector node that is close to a mask -// node into a mask node. -// -// Return value: -// The morphed tree, or nullptr if the transform is not applicable. -// -GenTree* Compiler::doMorphVectorOperandToMask(GenTree* node, GenTreeHWIntrinsic* parent) -{ - if (varTypeIsMask(node)) - { - // Already a mask, nothing to do. - return node; - } - else if (node->OperIsConvertMaskToVector()) - { - // Replace node with op1. - return node->AsHWIntrinsic()->Op(1); - } - else if (node->IsVectorZero()) - { - // Morph the vector of zeroes into mask of zeroes. - GenTree* mask = gtNewSimdFalseMaskByteNode(parent->GetSimdSize()); - mask->SetMorphed(this); - return mask; - } - - return nullptr; -} - -//----------------------------------------------------------------------------------------------------- -// fgMorphTryUseAllMaskVariant: For NamedIntrinsics that have a variant where all operands are -// mask nodes. If all operands to this node are 'suggesting' that they -// originate closely from a mask, but are of vector types, then morph the -// operands as appropriate to use mask types instead. 'Suggesting' -// is defined by the canMorphVectorOperandToMask function. -// -// Arguments: -// tree - The HWIntrinsic to try and optimize. -// -// Return Value: -// The fully morphed tree if a change was made, else nullptr. -// -GenTreeHWIntrinsic* Compiler::fgMorphTryUseAllMaskVariant(GenTreeHWIntrinsic* node) -{ - if (HWIntrinsicInfo::HasAllMaskVariant(node->GetHWIntrinsicId())) - { - NamedIntrinsic maskVariant = HWIntrinsicInfo::GetMaskVariant(node->GetHWIntrinsicId()); - - // As some intrinsics have many variants, check that the count of operands on the node - // matches the number of operands required for the mask variant of the intrinsic. The mask - // variant of the intrinsic must have a fixed number of operands. - int numArgs = HWIntrinsicInfo::lookupNumArgs(maskVariant); - assert(numArgs >= 0); - if (node->GetOperandCount() == (size_t)numArgs) - { - // We're sure it will work at this point, so perform the pattern match on operands. - if (canMorphAllVectorOperandsToMasks(node)) - { - switch (node->GetOperandCount()) - { - case 1: - node->ResetHWIntrinsicId(maskVariant, doMorphVectorOperandToMask(node->Op(1), node)); - break; - case 2: - node->ResetHWIntrinsicId(maskVariant, doMorphVectorOperandToMask(node->Op(1), node), - doMorphVectorOperandToMask(node->Op(2), node)); - break; - case 3: - node->ResetHWIntrinsicId(maskVariant, this, doMorphVectorOperandToMask(node->Op(1), node), - doMorphVectorOperandToMask(node->Op(2), node), - doMorphVectorOperandToMask(node->Op(3), node)); - break; - default: - unreached(); - } - - node->gtType = TYP_MASK; - return node; - } - } - } - - return nullptr; -} - -#endif // FEATURE_MASKED_HW_INTRINSICS