Skip to content

Commit dac2839

Browse files
fixup! do not use CSAState
1 parent 5b11dca commit dac2839

File tree

8 files changed

+220
-301
lines changed

8 files changed

+220
-301
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ class VPBuilder {
237237
}
238238

239239
VPInstruction *createAnyActive(VPValue *Cond, DebugLoc DL,
240-
const Twine &Name) {
240+
const Twine &Name) {
241241
return createInstruction(VPInstruction::AnyActive, {Cond}, DL, Name);
242242
}
243243

@@ -249,7 +249,7 @@ class VPBuilder {
249249
}
250250

251251
VPInstruction *createAnyActiveEVL(VPValue *Cond, VPValue *EVL, DebugLoc DL,
252-
const Twine &Name) {
252+
const Twine &Name) {
253253
return createInstruction(VPInstruction::AnyActiveEVL, {Cond, EVL}, DL,
254254
Name);
255255
}

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 49 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -2974,7 +2974,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
29742974
fixupIVUsers(Entry.first, Entry.second,
29752975
getOrCreateVectorTripCount(nullptr),
29762976
IVEndValues[Entry.first], LoopMiddleBlock, State);
2977-
IVEndValues[Entry.first], LoopMiddleBlock, Plan, State);
29782977
}
29792978

29802979
for (Instruction *PI : PredicatedInstructions)
@@ -8710,13 +8709,18 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
87108709
// directly, enabling more efficient codegen.
87118710
PhiRecipe = new VPFirstOrderRecurrencePHIRecipe(Phi, *StartV);
87128711
} else if (Legal->isCSAPhi(Phi)) {
8713-
VPCSAState *State = Plan.getCSAStates().find(Phi)->second;
8714-
VPValue *InitData = State->getVPInitData();
8712+
VPValue *InitScalar = Plan.getOrAddLiveIn(
8713+
Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader()));
8714+
8715+
// Don't build full CSA for VF=ElementCount::getFixed(1)
8716+
bool IsScalarVF = LoopVectorizationPlanner::getDecisionAndClampRange(
8717+
[&](ElementCount VF) { return VF.isScalar(); }, Range);
8718+
87158719
// When the VF=getFixed(1), InitData is just InitScalar.
8716-
if (!InitData)
8717-
InitData = State->getVPInitScalar();
8720+
VPValue *InitData =
8721+
IsScalarVF ? InitScalar
8722+
: getVPValueOrAddLiveIn(PoisonValue::get(Phi->getType()));
87188723
PhiRecipe = new VPCSAHeaderPHIRecipe(Phi, InitData);
8719-
State->setPhiRecipe(cast<VPCSAHeaderPHIRecipe>(PhiRecipe));
87208724
} else {
87218725
llvm_unreachable(
87228726
"can only widen reductions, fixed-order recurrences, and CSAs here");
@@ -8757,13 +8761,17 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
87578761
return CSADescriptor::isCSASelect(CSA.second, SI);
87588762
});
87598763
if (CSADescIt != Legal->getCSAs().end()) {
8760-
PHINode *CSAPhi = CSADescIt->first;
8761-
VPCSAState *State = Plan.getCSAStates().find(CSAPhi)->second;
8762-
VPValue *VPDataPhi = State->getPhiRecipe();
8763-
auto *R = new VPCSADataUpdateRecipe(
8764-
SI, {VPDataPhi, Operands[0], Operands[1], Operands[2]});
8765-
State->setDataUpdate(R);
8766-
return R;
8764+
for (VPRecipeBase &R :
8765+
Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
8766+
if (auto PhiR = dyn_cast<VPCSAHeaderPHIRecipe>(&R)) {
8767+
if (PhiR->getUnderlyingInstr() == CSADescIt->first) {
8768+
auto *R = new VPCSADataUpdateRecipe(
8769+
SI, {PhiR, Operands[0], Operands[1], Operands[2]});
8770+
PhiR->setDataUpdate(R);
8771+
return R;
8772+
}
8773+
}
8774+
}
87678775
}
87688776

87698777
return new VPWidenSelectRecipe(
@@ -8778,44 +8786,6 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
87788786
return tryToWiden(Instr, Operands, VPBB);
87798787
}
87808788

8781-
/// Add CSA Recipes that can occur before each instruction in the input IR
8782-
/// is processed and introduced into VPlan.
8783-
static void
8784-
addCSAPreprocessRecipes(const LoopVectorizationLegality::CSAList &CSAs,
8785-
Loop *OrigLoop, VPBasicBlock *PreheaderVPBB,
8786-
VPBasicBlock *HeaderVPBB, DebugLoc DL, VFRange &Range,
8787-
VPlan &Plan, VPRecipeBuilder &Builder) {
8788-
8789-
// Don't build full CSA for VF=ElementCount::getFixed(1)
8790-
bool IsScalarVF = LoopVectorizationPlanner::getDecisionAndClampRange(
8791-
[&](ElementCount VF) { return VF.isScalar(); }, Range);
8792-
8793-
for (const auto &CSA : CSAs) {
8794-
VPValue *VPInitScalar = Plan.getOrAddLiveIn(
8795-
CSA.first->getIncomingValueForBlock(OrigLoop->getLoopPreheader()));
8796-
8797-
// Scalar VF builds the scalar version of the loop. In that case,
8798-
// no maintenence of mask nor extraction in middle block is needed.
8799-
if (IsScalarVF) {
8800-
VPCSAState *S = new VPCSAState(VPInitScalar);
8801-
Plan.addCSAState(CSA.first, S);
8802-
continue;
8803-
}
8804-
8805-
VPBuilder PHB(PreheaderVPBB);
8806-
auto *VPInitMask = Builder.getVPValueOrAddLiveIn(
8807-
ConstantInt::getFalse(Type::getInt1Ty(CSA.first->getContext())));
8808-
auto *VPInitData =
8809-
Builder.getVPValueOrAddLiveIn(PoisonValue::get(CSA.first->getType()));
8810-
8811-
VPBuilder HB(HeaderVPBB);
8812-
auto *VPMaskPhi = HB.createCSAMaskPhi(VPInitMask, DL, "csa.mask.phi");
8813-
8814-
auto *S = new VPCSAState(VPInitScalar, VPInitData, VPMaskPhi);
8815-
Plan.addCSAState(CSA.first, S);
8816-
}
8817-
}
8818-
88198789
/// Add CSA Recipes that must occur after each instruction in the input IR
88208790
/// is processed and introduced into VPlan.
88218791
static void
@@ -8828,60 +8798,57 @@ addCSAPostprocessRecipes(VPRecipeBuilder &RecipeBuilder,
88288798
[&](ElementCount VF) { return VF.isScalar(); }, Range))
88298799
return;
88308800

8801+
VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock();
88318802
for (const auto &CSA : CSAs) {
8832-
VPCSAState *CSAState = Plan.getCSAStates().find(CSA.first)->second;
8833-
VPCSADataUpdateRecipe *VPDataUpdate = CSAState->getDataUpdate();
8803+
// Build the MaskPhi recipe.
8804+
auto *VPInitMask = RecipeBuilder.getVPValueOrAddLiveIn(
8805+
ConstantInt::getFalse(Type::getInt1Ty(CSA.first->getContext())));
8806+
VPBuilder B;
8807+
B.setInsertPoint(Header, Header->getFirstNonPhi());
8808+
auto *VPMaskPhi = B.createCSAMaskPhi(VPInitMask, DL, "csa.mask.phi");
8809+
B.clearInsertionPoint();
88348810

8835-
assert(VPDataUpdate &&
8836-
"VPDataUpdate must have been introduced prior to postprocess");
8837-
assert(CSA.second.getCond() &&
8838-
"CSADescriptor must know how to describe the condition");
88398811
auto GetVPValue = [&](Value *I) {
88408812
return RecipeBuilder.getRecipe(cast<Instruction>(I))->getVPSingleValue();
88418813
};
8842-
VPValue *WidenedCond = GetVPValue(CSA.second.getCond());
8843-
VPValue *VPInitScalar = CSAState->getVPInitScalar();
8814+
VPCSADataUpdateRecipe *VPDataUpdate = cast<VPCSADataUpdateRecipe>(
8815+
cast<VPCSAHeaderPHIRecipe>(GetVPValue(CSA.first))->getVPNewData());
88448816

88458817
// The CSA optimization wants to use a condition such that when it is
88468818
// true, a new value is assigned. However, it is possible that a true lane
88478819
// in WidenedCond corresponds to selection of the initial value instead.
88488820
// In that case, we must use the negation of WidenedCond.
88498821
// i.e. select cond new_val old_val versus select cond.not old_val new_val
8822+
assert(CSA.second.getCond() &&
8823+
"CSADescriptor must know how to describe the condition");
8824+
VPValue *WidenedCond = GetVPValue(CSA.second.getCond());
88508825
VPValue *CondToUse = WidenedCond;
8851-
VPBuilder B;
88528826
if (cast<SelectInst>(CSA.second.getAssignment())->getTrueValue() ==
88538827
CSA.first) {
88548828
auto *VPNotCond = B.createNot(WidenedCond, DL);
8855-
VPNotCond->insertBefore(
8856-
GetVPValue(CSA.second.getAssignment())->getDefiningRecipe());
8829+
VPNotCond->insertBefore(VPDataUpdate);
88578830
CondToUse = VPNotCond;
88588831
}
88598832

8860-
auto *VPAnyActive =
8861-
B.createAnyActive(CondToUse, DL, "csa.cond.anyactive");
8862-
VPAnyActive->insertBefore(
8863-
GetVPValue(CSA.second.getAssignment())->getDefiningRecipe());
8833+
auto *VPAnyActive = B.createAnyActive(CondToUse, DL, "csa.cond.anyactive");
8834+
VPAnyActive->insertBefore(VPDataUpdate);
88648835

8865-
auto *VPMaskSel = B.createCSAMaskSel(CondToUse, CSAState->getVPMaskPhi(),
8866-
VPAnyActive, DL, "csa.mask.sel");
8836+
auto *VPMaskSel = B.createCSAMaskSel(CondToUse, VPMaskPhi, VPAnyActive, DL,
8837+
"csa.mask.sel");
88678838
VPMaskSel->insertAfter(VPAnyActive);
8839+
88688840
VPDataUpdate->setVPNewMaskAndVPAnyActive(VPMaskSel, VPAnyActive);
8841+
VPValue *VPInitScalar = Plan.getOrAddLiveIn(
8842+
CSA.first->getIncomingValueForBlock(OrigLoop->getLoopPreheader()));
8843+
SmallVector<PHINode *> PhiToFix;
8844+
for (User *U : VPDataUpdate->getUnderlyingValue()->users())
8845+
if (auto *Phi = dyn_cast<PHINode>(U);
8846+
Phi && Phi->getParent() == OrigLoop->getUniqueExitBlock())
8847+
PhiToFix.emplace_back(Phi);
88698848
VPCSAExtractScalarRecipe *ExtractScalarRecipe =
8870-
new VPCSAExtractScalarRecipe({VPInitScalar, VPMaskSel, VPDataUpdate});
8871-
8849+
new VPCSAExtractScalarRecipe({VPInitScalar, VPMaskSel, VPDataUpdate},
8850+
PhiToFix);
88728851
MiddleVPBB->insert(ExtractScalarRecipe, MiddleVPBB->getFirstNonPhi());
8873-
8874-
// Update CSAState with new recipes
8875-
CSAState->setExtractScalarRecipe(ExtractScalarRecipe);
8876-
CSAState->setVPAnyActive(VPAnyActive);
8877-
8878-
// Add live out for the CSA. We should be in LCSSA, so we are looking for
8879-
// Phi users in the unique exit block of the original updated value.
8880-
BasicBlock *OrigExit = OrigLoop->getUniqueExitBlock();
8881-
assert(OrigExit && "Expected a single exit block");
8882-
for (User *U :VPDataUpdate->getUnderlyingValue()->users())
8883-
if (auto *Phi = dyn_cast<PHINode>(U); Phi && Phi->getParent() == OrigExit)
8884-
Plan.addLiveOut(Phi, ExtractScalarRecipe);
88858852
}
88868853
}
88878854

@@ -9199,11 +9166,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
91999166

92009167
VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, Legal, CM, PSE, Builder);
92019168

9202-
addCSAPreprocessRecipes(Legal->getCSAs(), OrigLoop, Plan->getPreheader(),
9203-
Plan->getVectorLoopRegion()->getEntryBasicBlock(), DL,
9204-
Range, *Plan, RecipeBuilder);
9205-
9206-
92079169
// ---------------------------------------------------------------------------
92089170
// Pre-construction: record ingredients whose recipes we'll need to further
92099171
// process after constructing the initial VPlan.

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -848,9 +848,6 @@ VPlan::~VPlan() {
848848
delete VPV;
849849
if (BackedgeTakenCount)
850850
delete BackedgeTakenCount;
851-
852-
for (std::pair<PHINode *, VPCSAState *> &S : CSAStates)
853-
delete S.second;
854851
}
855852

856853
VPIRBasicBlock *VPIRBasicBlock::fromBasicBlock(BasicBlock *IRBB) {

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 13 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -231,53 +231,6 @@ class VPLane {
231231
}
232232
};
233233

234-
class VPInstruction;
235-
class VPCSAHeaderPHIRecipe;
236-
class VPCSADataUpdateRecipe;
237-
class VPCSAExtractScalarRecipe;
238-
239-
/// VPCSAState holds information required to vectorize a conditional scalar
240-
/// assignment.
241-
class VPCSAState {
242-
VPValue *VPInitScalar = nullptr;
243-
VPValue *VPInitData = nullptr;
244-
VPInstruction *VPMaskPhi = nullptr;
245-
VPInstruction *VPAnyActive = nullptr;
246-
VPCSAHeaderPHIRecipe *VPPhiRecipe = nullptr;
247-
VPCSADataUpdateRecipe *VPDataUpdate = nullptr;
248-
VPCSAExtractScalarRecipe *VPExtractScalar = nullptr;
249-
250-
public:
251-
VPCSAState(VPValue *VPInitScalar, VPValue *InitData,
252-
VPInstruction *MaskPhi)
253-
: VPInitScalar(VPInitScalar), VPInitData(InitData), VPMaskPhi(MaskPhi) {}
254-
255-
VPCSAState(VPValue *VPInitScalar) : VPInitScalar(VPInitScalar) {}
256-
257-
VPValue *getVPInitScalar() const { return VPInitScalar; }
258-
259-
VPValue *getVPInitData() const { return VPInitData; }
260-
261-
VPInstruction *getVPMaskPhi() const { return VPMaskPhi; }
262-
263-
void setVPAnyActive(VPInstruction *AnyActive) { VPAnyActive = AnyActive; }
264-
VPInstruction *getVPAnyActive() { return VPAnyActive; }
265-
266-
VPCSAHeaderPHIRecipe *getPhiRecipe() const { return VPPhiRecipe; }
267-
268-
void setPhiRecipe(VPCSAHeaderPHIRecipe *R) { VPPhiRecipe = R; }
269-
270-
VPCSADataUpdateRecipe *getDataUpdate() const { return VPDataUpdate; }
271-
void setDataUpdate(VPCSADataUpdateRecipe *R) { VPDataUpdate = R; }
272-
273-
void setExtractScalarRecipe(VPCSAExtractScalarRecipe *R) {
274-
VPExtractScalar = R;
275-
}
276-
VPCSAExtractScalarRecipe *getExtractScalarRecipe() const {
277-
return VPExtractScalar;
278-
}
279-
};
280-
281234
/// VPTransformState holds information passed down when "executing" a VPlan,
282235
/// needed for generating the output IR.
283236
struct VPTransformState {
@@ -2899,7 +2852,10 @@ class VPCSAHeaderPHIRecipe final : public VPHeaderPHIRecipe {
28992852
}
29002853

29012854
VPValue *getVPInitData() { return getOperand(0); }
2902-
VPValue *getVPNewData() { return getOperand(1); }
2855+
2856+
VPValue *NewData = nullptr;
2857+
void setDataUpdate(VPValue *V) { NewData = V; }
2858+
VPValue *getVPNewData() { return NewData; }
29032859
};
29042860

29052861
class VPCSADataUpdateRecipe final : public VPSingleDefRecipe {
@@ -2953,15 +2909,19 @@ class VPCSADataUpdateRecipe final : public VPSingleDefRecipe {
29532909
};
29542910

29552911
class VPCSAExtractScalarRecipe final : public VPSingleDefRecipe {
2912+
SmallVector<PHINode *> PhisToFix;
2913+
29562914
public:
2957-
VPCSAExtractScalarRecipe(ArrayRef<VPValue *> Operands)
2958-
: VPSingleDefRecipe(VPDef::VPCSAExtractScalarSC, Operands) {}
2915+
VPCSAExtractScalarRecipe(ArrayRef<VPValue *> Operands,
2916+
SmallVector<PHINode *> PhisToFix)
2917+
: VPSingleDefRecipe(VPDef::VPCSAExtractScalarSC, Operands),
2918+
PhisToFix(PhisToFix) {}
29592919

29602920
~VPCSAExtractScalarRecipe() override = default;
29612921

29622922
VPCSAExtractScalarRecipe *clone() override {
29632923
SmallVector<VPValue *> Ops(operands());
2964-
return new VPCSAExtractScalarRecipe(Ops);
2924+
return new VPCSAExtractScalarRecipe(Ops, PhisToFix);
29652925
}
29662926

29672927
void execute(VPTransformState &State) override;
@@ -2975,6 +2935,8 @@ class VPCSAExtractScalarRecipe final : public VPSingleDefRecipe {
29752935
VPSlotTracker &SlotTracker) const override;
29762936
#endif
29772937

2938+
VP_CLASSOF_IMPL(VPDef::VPCSAExtractScalarSC)
2939+
29782940
VPValue *getVPInitScalar() const { return getOperand(0); }
29792941
VPValue *getVPMaskSel() const { return getOperand(1); }
29802942
VPValue *getVPDataSel() const { return getOperand(2); }
@@ -3954,11 +3916,6 @@ class VPlan {
39543916
/// definitions are VPValues that hold a pointer to their underlying IR.
39553917
SmallVector<VPValue *, 16> VPLiveInsToFree;
39563918

3957-
/// Values used outside the plan. It contains live-outs that need fixing. Any
3958-
/// live-out that is fixed outside VPlan needs to be removed. The remaining
3959-
/// live-outs are fixed via VPLiveOut::fixPhi.
3960-
MapVector<PHINode *, VPLiveOut *> LiveOuts;
3961-
39623919
/// Mapping from SCEVs to the VPValues representing their expansions.
39633920
/// NOTE: This mapping is temporary and will be removed once all users have
39643921
/// been modeled in VPlan directly.
@@ -4009,12 +3966,6 @@ class VPlan {
40093966
bool RequiresScalarEpilogueCheck,
40103967
bool TailFolded, Loop *TheLoop);
40113968

4012-
void addCSAState(PHINode *Phi, VPCSAState *S) { CSAStates.insert({Phi, S}); }
4013-
4014-
MapVector<PHINode *, VPCSAState *> const &getCSAStates() const {
4015-
return CSAStates;
4016-
}
4017-
40183969
/// Prepare the plan for execution, setting up the required live-in values.
40193970
void prepareToExecute(Value *TripCount, Value *VectorTripCount,
40203971
Value *CanonicalIVStartValue, VPTransformState &State);

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -672,8 +672,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
672672
ConstantInt::get(WidenedCond->getType()->getScalarType(), 0);
673673
Value *AnyActive = State.Builder.CreateIntrinsic(
674674
WidenedCond->getType()->getScalarType(), Intrinsic::vp_reduce_or,
675-
{StartValue, WidenedCond, AllOnesMask, EVL}, nullptr,
676-
"any.active");
675+
{StartValue, WidenedCond, AllOnesMask, EVL}, nullptr, "any.active");
677676
return AnyActive;
678677
}
679678
case VPInstruction::CSAVLPhi: {
@@ -2504,6 +2503,7 @@ void VPCSADataUpdateRecipe::execute(VPTransformState &State) {
25042503
"csa.data.sel");
25052504

25062505
DataPhi->addIncoming(DataSel, State.CFG.PrevBB);
2506+
25072507
State.set(this, DataSel);
25082508
}
25092509

@@ -2594,6 +2594,10 @@ void VPCSAExtractScalarRecipe::execute(VPTransformState &State) {
25942594
Value *LastIdxGEZero = State.Builder.CreateICmpSGE(LastIdx, Zero);
25952595
Value *ChooseFromVecOrInit =
25962596
State.Builder.CreateSelect(LastIdxGEZero, ExtractFromVec, InitScalar);
2597+
2598+
for (PHINode *Phi : PhisToFix)
2599+
Phi->addIncoming(ChooseFromVecOrInit, State.CFG.ExitBB);
2600+
25972601
State.set(this, ChooseFromVecOrInit, /*IsScalar=*/true);
25982602
}
25992603

0 commit comments

Comments
 (0)