@@ -2973,7 +2973,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
2973
2973
for (const auto &Entry : Legal->getInductionVars ())
2974
2974
fixupIVUsers (Entry.first , Entry.second ,
2975
2975
getOrCreateVectorTripCount (nullptr ), LoopMiddleBlock, State);
2976
- fixCSALiveOuts (State, Plan);
2977
2976
}
2978
2977
2979
2978
for (Instruction *PI : PredicatedInstructions)
@@ -8731,13 +8730,18 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
8731
8730
// directly, enabling more efficient codegen.
8732
8731
PhiRecipe = new VPFirstOrderRecurrencePHIRecipe (Phi, *StartV);
8733
8732
} else if (Legal->isCSAPhi (Phi)) {
8734
- VPCSAState *State = Plan.getCSAStates ().find (Phi)->second ;
8735
- VPValue *InitData = State->getVPInitData ();
8733
+ VPValue *InitScalar = Plan.getOrAddLiveIn (
8734
+ Phi->getIncomingValueForBlock (OrigLoop->getLoopPreheader ()));
8735
+
8736
+ // Don't build full CSA for VF=ElementCount::getFixed(1)
8737
+ bool IsScalarVF = LoopVectorizationPlanner::getDecisionAndClampRange (
8738
+ [&](ElementCount VF) { return VF.isScalar (); }, Range);
8739
+
8736
8740
// When the VF=getFixed(1), InitData is just InitScalar.
8737
- if (!InitData)
8738
- InitData = State->getVPInitScalar ();
8741
+ VPValue *InitData =
8742
+ IsScalarVF ? InitScalar
8743
+ : getVPValueOrAddLiveIn (PoisonValue::get (Phi->getType ()));
8739
8744
PhiRecipe = new VPCSAHeaderPHIRecipe (Phi, InitData);
8740
- State->setPhiRecipe (cast<VPCSAHeaderPHIRecipe>(PhiRecipe));
8741
8745
} else {
8742
8746
llvm_unreachable (
8743
8747
" can only widen reductions, fixed-order recurrences, and CSAs here" );
@@ -8778,13 +8782,17 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
8778
8782
return CSADescriptor::isCSASelect (CSA.second , SI);
8779
8783
});
8780
8784
if (CSADescIt != Legal->getCSAs ().end ()) {
8781
- PHINode *CSAPhi = CSADescIt->first ;
8782
- VPCSAState *State = Plan.getCSAStates ().find (CSAPhi)->second ;
8783
- VPValue *VPDataPhi = State->getPhiRecipe ();
8784
- auto *R = new VPCSADataUpdateRecipe (
8785
- SI, {VPDataPhi, Operands[0 ], Operands[1 ], Operands[2 ]});
8786
- State->setDataUpdate (R);
8787
- return R;
8785
+ for (VPRecipeBase &R :
8786
+ Plan.getVectorLoopRegion ()->getEntryBasicBlock ()->phis ()) {
8787
+ if (auto PhiR = dyn_cast<VPCSAHeaderPHIRecipe>(&R)) {
8788
+ if (PhiR->getUnderlyingInstr () == CSADescIt->first ) {
8789
+ auto *R = new VPCSADataUpdateRecipe (
8790
+ SI, {PhiR, Operands[0 ], Operands[1 ], Operands[2 ]});
8791
+ PhiR->setDataUpdate (R);
8792
+ return R;
8793
+ }
8794
+ }
8795
+ }
8788
8796
}
8789
8797
8790
8798
return new VPWidenSelectRecipe (
@@ -8799,44 +8807,6 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
8799
8807
return tryToWiden (Instr, Operands, VPBB);
8800
8808
}
8801
8809
8802
- // / Add CSA Recipes that can occur before each instruction in the input IR
8803
- // / is processed and introduced into VPlan.
8804
- static void
8805
- addCSAPreprocessRecipes (const LoopVectorizationLegality::CSAList &CSAs,
8806
- Loop *OrigLoop, VPBasicBlock *PreheaderVPBB,
8807
- VPBasicBlock *HeaderVPBB, DebugLoc DL, VFRange &Range,
8808
- VPlan &Plan, VPRecipeBuilder &Builder) {
8809
-
8810
- // Don't build full CSA for VF=ElementCount::getFixed(1)
8811
- bool IsScalarVF = LoopVectorizationPlanner::getDecisionAndClampRange (
8812
- [&](ElementCount VF) { return VF.isScalar (); }, Range);
8813
-
8814
- for (const auto &CSA : CSAs) {
8815
- VPValue *VPInitScalar = Plan.getOrAddLiveIn (
8816
- CSA.first ->getIncomingValueForBlock (OrigLoop->getLoopPreheader ()));
8817
-
8818
- // Scalar VF builds the scalar version of the loop. In that case,
8819
- // no maintenence of mask nor extraction in middle block is needed.
8820
- if (IsScalarVF) {
8821
- VPCSAState *S = new VPCSAState (VPInitScalar);
8822
- Plan.addCSAState (CSA.first , S);
8823
- continue ;
8824
- }
8825
-
8826
- VPBuilder PHB (PreheaderVPBB);
8827
- auto *VPInitMask = Builder.getVPValueOrAddLiveIn (
8828
- ConstantInt::getFalse (Type::getInt1Ty (CSA.first ->getContext ())));
8829
- auto *VPInitData =
8830
- Builder.getVPValueOrAddLiveIn (PoisonValue::get (CSA.first ->getType ()));
8831
-
8832
- VPBuilder HB (HeaderVPBB);
8833
- auto *VPMaskPhi = HB.createCSAMaskPhi (VPInitMask, DL, " csa.mask.phi" );
8834
-
8835
- auto *S = new VPCSAState (VPInitScalar, VPInitData, VPMaskPhi);
8836
- Plan.addCSAState (CSA.first , S);
8837
- }
8838
- }
8839
-
8840
8810
// / Add CSA Recipes that must occur after each instruction in the input IR
8841
8811
// / is processed and introduced into VPlan.
8842
8812
static void
@@ -8849,60 +8819,57 @@ addCSAPostprocessRecipes(VPRecipeBuilder &RecipeBuilder,
8849
8819
[&](ElementCount VF) { return VF.isScalar (); }, Range))
8850
8820
return ;
8851
8821
8822
+ VPBasicBlock *Header = Plan.getVectorLoopRegion ()->getEntryBasicBlock ();
8852
8823
for (const auto &CSA : CSAs) {
8853
- VPCSAState *CSAState = Plan.getCSAStates ().find (CSA.first )->second ;
8854
- VPCSADataUpdateRecipe *VPDataUpdate = CSAState->getDataUpdate ();
8824
+ // Build the MaskPhi recipe.
8825
+ auto *VPInitMask = RecipeBuilder.getVPValueOrAddLiveIn (
8826
+ ConstantInt::getFalse (Type::getInt1Ty (CSA.first ->getContext ())));
8827
+ VPBuilder B;
8828
+ B.setInsertPoint (Header, Header->getFirstNonPhi ());
8829
+ auto *VPMaskPhi = B.createCSAMaskPhi (VPInitMask, DL, " csa.mask.phi" );
8830
+ B.clearInsertionPoint ();
8855
8831
8856
- assert (VPDataUpdate &&
8857
- " VPDataUpdate must have been introduced prior to postprocess" );
8858
- assert (CSA.second .getCond () &&
8859
- " CSADescriptor must know how to describe the condition" );
8860
8832
auto GetVPValue = [&](Value *I) {
8861
8833
return RecipeBuilder.getRecipe (cast<Instruction>(I))->getVPSingleValue ();
8862
8834
};
8863
- VPValue *WidenedCond = GetVPValue (CSA. second . getCond ());
8864
- VPValue *VPInitScalar = CSAState-> getVPInitScalar ( );
8835
+ VPCSADataUpdateRecipe *VPDataUpdate = cast<VPCSADataUpdateRecipe>(
8836
+ cast<VPCSAHeaderPHIRecipe>( GetVPValue (CSA. first ))-> getVPNewData () );
8865
8837
8866
8838
// The CSA optimization wants to use a condition such that when it is
8867
8839
// true, a new value is assigned. However, it is possible that a true lane
8868
8840
// in WidenedCond corresponds to selection of the initial value instead.
8869
8841
// In that case, we must use the negation of WidenedCond.
8870
8842
// i.e. select cond new_val old_val versus select cond.not old_val new_val
8843
+ assert (CSA.second .getCond () &&
8844
+ " CSADescriptor must know how to describe the condition" );
8845
+ VPValue *WidenedCond = GetVPValue (CSA.second .getCond ());
8871
8846
VPValue *CondToUse = WidenedCond;
8872
- VPBuilder B;
8873
8847
if (cast<SelectInst>(CSA.second .getAssignment ())->getTrueValue () ==
8874
8848
CSA.first ) {
8875
8849
auto *VPNotCond = B.createNot (WidenedCond, DL);
8876
- VPNotCond->insertBefore (
8877
- GetVPValue (CSA.second .getAssignment ())->getDefiningRecipe ());
8850
+ VPNotCond->insertBefore (VPDataUpdate);
8878
8851
CondToUse = VPNotCond;
8879
8852
}
8880
8853
8881
- auto *VPAnyActive =
8882
- B.createAnyActive (CondToUse, DL, " csa.cond.anyactive" );
8883
- VPAnyActive->insertBefore (
8884
- GetVPValue (CSA.second .getAssignment ())->getDefiningRecipe ());
8854
+ auto *VPAnyActive = B.createAnyActive (CondToUse, DL, " csa.cond.anyactive" );
8855
+ VPAnyActive->insertBefore (VPDataUpdate);
8885
8856
8886
- auto *VPMaskSel = B.createCSAMaskSel (CondToUse, CSAState-> getVPMaskPhi () ,
8887
- VPAnyActive, DL, " csa.mask.sel" );
8857
+ auto *VPMaskSel = B.createCSAMaskSel (CondToUse, VPMaskPhi, VPAnyActive, DL ,
8858
+ " csa.mask.sel" );
8888
8859
VPMaskSel->insertAfter (VPAnyActive);
8860
+
8889
8861
VPDataUpdate->setVPNewMaskAndVPAnyActive (VPMaskSel, VPAnyActive);
8862
+ VPValue *VPInitScalar = Plan.getOrAddLiveIn (
8863
+ CSA.first ->getIncomingValueForBlock (OrigLoop->getLoopPreheader ()));
8864
+ SmallVector<PHINode *> PhiToFix;
8865
+ for (User *U : VPDataUpdate->getUnderlyingValue ()->users ())
8866
+ if (auto *Phi = dyn_cast<PHINode>(U);
8867
+ Phi && Phi->getParent () == OrigLoop->getUniqueExitBlock ())
8868
+ PhiToFix.emplace_back (Phi);
8890
8869
VPCSAExtractScalarRecipe *ExtractScalarRecipe =
8891
- new VPCSAExtractScalarRecipe ({VPInitScalar, VPMaskSel, VPDataUpdate});
8892
-
8870
+ new VPCSAExtractScalarRecipe ({VPInitScalar, VPMaskSel, VPDataUpdate},
8871
+ PhiToFix);
8893
8872
MiddleVPBB->insert (ExtractScalarRecipe, MiddleVPBB->getFirstNonPhi ());
8894
-
8895
- // Update CSAState with new recipes
8896
- CSAState->setExtractScalarRecipe (ExtractScalarRecipe);
8897
- CSAState->setVPAnyActive (VPAnyActive);
8898
-
8899
- // Add live out for the CSA. We should be in LCSSA, so we are looking for
8900
- // Phi users in the unique exit block of the original updated value.
8901
- BasicBlock *OrigExit = OrigLoop->getUniqueExitBlock ();
8902
- assert (OrigExit && " Expected a single exit block" );
8903
- for (User *U :VPDataUpdate->getUnderlyingValue ()->users ())
8904
- if (auto *Phi = dyn_cast<PHINode>(U); Phi && Phi->getParent () == OrigExit)
8905
- Plan.addLiveOut (Phi, ExtractScalarRecipe);
8906
8873
}
8907
8874
}
8908
8875
@@ -9224,11 +9191,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
9224
9191
9225
9192
VPRecipeBuilder RecipeBuilder (*Plan, OrigLoop, TLI, Legal, CM, PSE, Builder);
9226
9193
9227
- addCSAPreprocessRecipes (Legal->getCSAs (), OrigLoop, Plan->getPreheader (),
9228
- Plan->getVectorLoopRegion ()->getEntryBasicBlock (), DL,
9229
- Range, *Plan, RecipeBuilder);
9230
-
9231
-
9232
9194
// ---------------------------------------------------------------------------
9233
9195
// Pre-construction: record ingredients whose recipes we'll need to further
9234
9196
// process after constructing the initial VPlan.
0 commit comments