Skip to content

Commit ed5aadd

Browse files
authored
[IR] Vector extract last active element intrinsic (#113587)
As discussed in #112738, it may be better to have an intrinsic to represent vector element extracts based on mask bits. This intrinsic is for the case of extracting the last active element, if any, or a default value if the mask is all-false. The target-agnostic SelectionDAG lowering is similar to the IR in #106560.
1 parent 593be02 commit ed5aadd

File tree

6 files changed

+505
-0
lines changed

6 files changed

+505
-0
lines changed

llvm/docs/LangRef.rst

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20004,6 +20004,33 @@ the follow sequence of operations:
2000420004

2000520005
The ``mask`` operand will apply to at least the gather and scatter operations.
2000620006

20007+
'``llvm.experimental.vector.extract.last.active``' Intrinsic
20008+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
20009+
20010+
This is an overloaded intrinsic.
20011+
20012+
::
20013+
20014+
declare i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> %data, <4 x i1> %mask, i32 %passthru)
20015+
declare i16 @llvm.experimental.vector.extract.last.active.nxv8i16(<vscale x 8 x i16> %data, <vscale x 8 x i1> %mask, i16 %passthru)
20016+
20017+
Arguments:
20018+
""""""""""
20019+
20020+
The first argument is the data vector to extract a lane from. The second is a
20021+
mask vector controlling the extraction. The third argument is a passthru
20022+
value.
20023+
20024+
The two input vectors must have the same number of elements, and the type of
20025+
the passthru value must match that of the elements of the data vector.
20026+
20027+
Semantics:
20028+
""""""""""
20029+
20030+
The '``llvm.experimental.vector.extract.last.active``' intrinsic will extract an
20031+
element from the data vector at the index matching the highest active lane of
20032+
the mask vector. If no mask lanes are active then the passthru value is
20033+
returned instead.
2000720034

2000820035
.. _int_vector_compress:
2000920036

llvm/include/llvm/IR/Intrinsics.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1928,6 +1928,12 @@ def int_experimental_vector_match : DefaultAttrsIntrinsic<
19281928
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ], // Mask
19291929
[ IntrNoMem, IntrNoSync, IntrWillReturn ]>;
19301930

1931+
// Extract based on mask bits
1932+
def int_experimental_vector_extract_last_active:
1933+
DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
1934+
[llvm_anyvector_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1935+
LLVMVectorElementType<0>], [IntrNoMem]>;
1936+
19311937
// Operators
19321938
let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in {
19331939
// Integer arithmetic

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6415,6 +6415,50 @@ void SelectionDAGBuilder::visitVectorHistogram(const CallInst &I,
64156415
DAG.setRoot(Histogram);
64166416
}
64176417

6418+
void SelectionDAGBuilder::visitVectorExtractLastActive(const CallInst &I,
6419+
unsigned Intrinsic) {
6420+
assert(Intrinsic == Intrinsic::experimental_vector_extract_last_active &&
6421+
"Tried lowering invalid vector extract last");
6422+
SDLoc sdl = getCurSDLoc();
6423+
SDValue Data = getValue(I.getOperand(0));
6424+
SDValue Mask = getValue(I.getOperand(1));
6425+
SDValue PassThru = getValue(I.getOperand(2));
6426+
6427+
EVT DataVT = Data.getValueType();
6428+
EVT ScalarVT = PassThru.getValueType();
6429+
EVT BoolVT = Mask.getValueType().getScalarType();
6430+
6431+
// Find a suitable type for a stepvector.
6432+
ConstantRange VScaleRange(1, /*isFullSet=*/true); // Dummy value.
6433+
if (DataVT.isScalableVector())
6434+
VScaleRange = getVScaleRange(I.getCaller(), 64);
6435+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6436+
unsigned EltWidth = TLI.getBitWidthForCttzElements(
6437+
I.getType(), DataVT.getVectorElementCount(), /*ZeroIsPoison=*/true,
6438+
&VScaleRange);
6439+
MVT StepVT = MVT::getIntegerVT(EltWidth);
6440+
EVT StepVecVT = DataVT.changeVectorElementType(StepVT);
6441+
6442+
// Zero out lanes with inactive elements, then find the highest remaining
6443+
// value from the stepvector.
6444+
SDValue Zeroes = DAG.getConstant(0, sdl, StepVecVT);
6445+
SDValue StepVec = DAG.getStepVector(sdl, StepVecVT);
6446+
SDValue ActiveElts = DAG.getSelect(sdl, StepVecVT, Mask, StepVec, Zeroes);
6447+
SDValue HighestIdx =
6448+
DAG.getNode(ISD::VECREDUCE_UMAX, sdl, StepVT, ActiveElts);
6449+
6450+
// Extract the corresponding lane from the data vector
6451+
EVT ExtVT = TLI.getVectorIdxTy(DAG.getDataLayout());
6452+
SDValue Idx = DAG.getZExtOrTrunc(HighestIdx, sdl, ExtVT);
6453+
SDValue Extract =
6454+
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, sdl, ScalarVT, Data, Idx);
6455+
6456+
// If all mask lanes were inactive, choose the passthru value instead.
6457+
SDValue AnyActive = DAG.getNode(ISD::VECREDUCE_OR, sdl, BoolVT, Mask);
6458+
SDValue Result = DAG.getSelect(sdl, ScalarVT, AnyActive, Extract, PassThru);
6459+
setValue(&I, Result);
6460+
}
6461+
64186462
/// Lower the call to the specified intrinsic function.
64196463
void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
64206464
unsigned Intrinsic) {
@@ -8236,6 +8280,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
82368280
visitVectorHistogram(I, Intrinsic);
82378281
return;
82388282
}
8283+
case Intrinsic::experimental_vector_extract_last_active: {
8284+
visitVectorExtractLastActive(I, Intrinsic);
8285+
return;
8286+
}
82398287
}
82408288
}
82418289

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -629,6 +629,7 @@ class SelectionDAGBuilder {
629629
void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI);
630630
void visitConvergenceControl(const CallInst &I, unsigned Intrinsic);
631631
void visitVectorHistogram(const CallInst &I, unsigned IntrinsicID);
632+
void visitVectorExtractLastActive(const CallInst &I, unsigned Intrinsic);
632633
void visitVPLoad(const VPIntrinsic &VPIntrin, EVT VT,
633634
const SmallVectorImpl<SDValue> &OpValues);
634635
void visitVPStore(const VPIntrinsic &VPIntrin,

llvm/lib/IR/AutoUpgrade.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1119,6 +1119,9 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
11191119
if (Name.consume_front("experimental.vector.")) {
11201120
Intrinsic::ID ID =
11211121
StringSwitch<Intrinsic::ID>(Name)
1122+
// Skip over extract.last.active, otherwise it will be 'upgraded'
1123+
// to a regular vector extract which is a different operation.
1124+
.StartsWith("extract.last.active.", Intrinsic::not_intrinsic)
11221125
.StartsWith("extract.", Intrinsic::vector_extract)
11231126
.StartsWith("insert.", Intrinsic::vector_insert)
11241127
.StartsWith("splice.", Intrinsic::vector_splice)

0 commit comments

Comments
 (0)