@@ -2974,7 +2974,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
29742974 fixupIVUsers (Entry.first , Entry.second ,
29752975 getOrCreateVectorTripCount (nullptr ),
29762976 IVEndValues[Entry.first ], LoopMiddleBlock, State);
2977- IVEndValues[Entry.first ], LoopMiddleBlock, Plan, State);
29782977 }
29792978
29802979 for (Instruction *PI : PredicatedInstructions)
@@ -8710,13 +8709,18 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
87108709 // directly, enabling more efficient codegen.
87118710 PhiRecipe = new VPFirstOrderRecurrencePHIRecipe (Phi, *StartV);
87128711 } else if (Legal->isCSAPhi (Phi)) {
8713- VPCSAState *State = Plan.getCSAStates ().find (Phi)->second ;
8714- VPValue *InitData = State->getVPInitData ();
8712+ VPValue *InitScalar = Plan.getOrAddLiveIn (
8713+ Phi->getIncomingValueForBlock (OrigLoop->getLoopPreheader ()));
8714+
8715+ // Don't build full CSA for VF=ElementCount::getFixed(1)
8716+ bool IsScalarVF = LoopVectorizationPlanner::getDecisionAndClampRange (
8717+ [&](ElementCount VF) { return VF.isScalar (); }, Range);
8718+
87158719 // When the VF=getFixed(1), InitData is just InitScalar.
8716- if (!InitData)
8717- InitData = State->getVPInitScalar ();
8720+ VPValue *InitData =
8721+ IsScalarVF ? InitScalar
8722+ : getVPValueOrAddLiveIn (PoisonValue::get (Phi->getType ()));
87188723 PhiRecipe = new VPCSAHeaderPHIRecipe (Phi, InitData);
8719- State->setPhiRecipe (cast<VPCSAHeaderPHIRecipe>(PhiRecipe));
87208724 } else {
87218725 llvm_unreachable (
87228726 " can only widen reductions, fixed-order recurrences, and CSAs here" );
@@ -8757,13 +8761,17 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
87578761 return CSADescriptor::isCSASelect (CSA.second , SI);
87588762 });
87598763 if (CSADescIt != Legal->getCSAs ().end ()) {
8760- PHINode *CSAPhi = CSADescIt->first ;
8761- VPCSAState *State = Plan.getCSAStates ().find (CSAPhi)->second ;
8762- VPValue *VPDataPhi = State->getPhiRecipe ();
8763- auto *R = new VPCSADataUpdateRecipe (
8764- SI, {VPDataPhi, Operands[0 ], Operands[1 ], Operands[2 ]});
8765- State->setDataUpdate (R);
8766- return R;
8764+ for (VPRecipeBase &R :
8765+ Plan.getVectorLoopRegion ()->getEntryBasicBlock ()->phis ()) {
8766+ if (auto PhiR = dyn_cast<VPCSAHeaderPHIRecipe>(&R)) {
8767+ if (PhiR->getUnderlyingInstr () == CSADescIt->first ) {
8768+ auto *R = new VPCSADataUpdateRecipe (
8769+ SI, {PhiR, Operands[0 ], Operands[1 ], Operands[2 ]});
8770+ PhiR->setDataUpdate (R);
8771+ return R;
8772+ }
8773+ }
8774+ }
87678775 }
87688776
87698777 return new VPWidenSelectRecipe (
@@ -8778,44 +8786,6 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
87788786 return tryToWiden (Instr, Operands, VPBB);
87798787}
87808788
8781- // / Add CSA Recipes that can occur before each instruction in the input IR
8782- // / is processed and introduced into VPlan.
8783- static void
8784- addCSAPreprocessRecipes (const LoopVectorizationLegality::CSAList &CSAs,
8785- Loop *OrigLoop, VPBasicBlock *PreheaderVPBB,
8786- VPBasicBlock *HeaderVPBB, DebugLoc DL, VFRange &Range,
8787- VPlan &Plan, VPRecipeBuilder &Builder) {
8788-
8789- // Don't build full CSA for VF=ElementCount::getFixed(1)
8790- bool IsScalarVF = LoopVectorizationPlanner::getDecisionAndClampRange (
8791- [&](ElementCount VF) { return VF.isScalar (); }, Range);
8792-
8793- for (const auto &CSA : CSAs) {
8794- VPValue *VPInitScalar = Plan.getOrAddLiveIn (
8795- CSA.first ->getIncomingValueForBlock (OrigLoop->getLoopPreheader ()));
8796-
8797- // Scalar VF builds the scalar version of the loop. In that case,
8798- // no maintenence of mask nor extraction in middle block is needed.
8799- if (IsScalarVF) {
8800- VPCSAState *S = new VPCSAState (VPInitScalar);
8801- Plan.addCSAState (CSA.first , S);
8802- continue ;
8803- }
8804-
8805- VPBuilder PHB (PreheaderVPBB);
8806- auto *VPInitMask = Builder.getVPValueOrAddLiveIn (
8807- ConstantInt::getFalse (Type::getInt1Ty (CSA.first ->getContext ())));
8808- auto *VPInitData =
8809- Builder.getVPValueOrAddLiveIn (PoisonValue::get (CSA.first ->getType ()));
8810-
8811- VPBuilder HB (HeaderVPBB);
8812- auto *VPMaskPhi = HB.createCSAMaskPhi (VPInitMask, DL, " csa.mask.phi" );
8813-
8814- auto *S = new VPCSAState (VPInitScalar, VPInitData, VPMaskPhi);
8815- Plan.addCSAState (CSA.first , S);
8816- }
8817- }
8818-
88198789// / Add CSA Recipes that must occur after each instruction in the input IR
88208790// / is processed and introduced into VPlan.
88218791static void
@@ -8828,60 +8798,57 @@ addCSAPostprocessRecipes(VPRecipeBuilder &RecipeBuilder,
88288798 [&](ElementCount VF) { return VF.isScalar (); }, Range))
88298799 return ;
88308800
8801+ VPBasicBlock *Header = Plan.getVectorLoopRegion ()->getEntryBasicBlock ();
88318802 for (const auto &CSA : CSAs) {
8832- VPCSAState *CSAState = Plan.getCSAStates ().find (CSA.first )->second ;
8833- VPCSADataUpdateRecipe *VPDataUpdate = CSAState->getDataUpdate ();
8803+ // Build the MaskPhi recipe.
8804+ auto *VPInitMask = RecipeBuilder.getVPValueOrAddLiveIn (
8805+ ConstantInt::getFalse (Type::getInt1Ty (CSA.first ->getContext ())));
8806+ VPBuilder B;
8807+ B.setInsertPoint (Header, Header->getFirstNonPhi ());
8808+ auto *VPMaskPhi = B.createCSAMaskPhi (VPInitMask, DL, " csa.mask.phi" );
8809+ B.clearInsertionPoint ();
88348810
8835- assert (VPDataUpdate &&
8836- " VPDataUpdate must have been introduced prior to postprocess" );
8837- assert (CSA.second .getCond () &&
8838- " CSADescriptor must know how to describe the condition" );
88398811 auto GetVPValue = [&](Value *I) {
88408812 return RecipeBuilder.getRecipe (cast<Instruction>(I))->getVPSingleValue ();
88418813 };
8842- VPValue *WidenedCond = GetVPValue (CSA. second . getCond ());
8843- VPValue *VPInitScalar = CSAState-> getVPInitScalar ( );
8814+ VPCSADataUpdateRecipe *VPDataUpdate = cast<VPCSADataUpdateRecipe>(
8815+ cast<VPCSAHeaderPHIRecipe>( GetVPValue (CSA. first ))-> getVPNewData () );
88448816
88458817 // The CSA optimization wants to use a condition such that when it is
88468818 // true, a new value is assigned. However, it is possible that a true lane
88478819 // in WidenedCond corresponds to selection of the initial value instead.
88488820 // In that case, we must use the negation of WidenedCond.
88498821 // i.e. select cond new_val old_val versus select cond.not old_val new_val
8822+ assert (CSA.second .getCond () &&
8823+ " CSADescriptor must know how to describe the condition" );
8824+ VPValue *WidenedCond = GetVPValue (CSA.second .getCond ());
88508825 VPValue *CondToUse = WidenedCond;
8851- VPBuilder B;
88528826 if (cast<SelectInst>(CSA.second .getAssignment ())->getTrueValue () ==
88538827 CSA.first ) {
88548828 auto *VPNotCond = B.createNot (WidenedCond, DL);
8855- VPNotCond->insertBefore (
8856- GetVPValue (CSA.second .getAssignment ())->getDefiningRecipe ());
8829+ VPNotCond->insertBefore (VPDataUpdate);
88578830 CondToUse = VPNotCond;
88588831 }
88598832
8860- auto *VPAnyActive =
8861- B.createAnyActive (CondToUse, DL, " csa.cond.anyactive" );
8862- VPAnyActive->insertBefore (
8863- GetVPValue (CSA.second .getAssignment ())->getDefiningRecipe ());
8833+ auto *VPAnyActive = B.createAnyActive (CondToUse, DL, " csa.cond.anyactive" );
8834+ VPAnyActive->insertBefore (VPDataUpdate);
88648835
8865- auto *VPMaskSel = B.createCSAMaskSel (CondToUse, CSAState-> getVPMaskPhi () ,
8866- VPAnyActive, DL, " csa.mask.sel" );
8836+ auto *VPMaskSel = B.createCSAMaskSel (CondToUse, VPMaskPhi, VPAnyActive, DL ,
8837+ " csa.mask.sel" );
88678838 VPMaskSel->insertAfter (VPAnyActive);
8839+
88688840 VPDataUpdate->setVPNewMaskAndVPAnyActive (VPMaskSel, VPAnyActive);
8841+ VPValue *VPInitScalar = Plan.getOrAddLiveIn (
8842+ CSA.first ->getIncomingValueForBlock (OrigLoop->getLoopPreheader ()));
8843+ SmallVector<PHINode *> PhiToFix;
8844+ for (User *U : VPDataUpdate->getUnderlyingValue ()->users ())
8845+ if (auto *Phi = dyn_cast<PHINode>(U);
8846+ Phi && Phi->getParent () == OrigLoop->getUniqueExitBlock ())
8847+ PhiToFix.emplace_back (Phi);
88698848 VPCSAExtractScalarRecipe *ExtractScalarRecipe =
8870- new VPCSAExtractScalarRecipe ({VPInitScalar, VPMaskSel, VPDataUpdate});
8871-
8849+ new VPCSAExtractScalarRecipe ({VPInitScalar, VPMaskSel, VPDataUpdate},
8850+ PhiToFix);
88728851 MiddleVPBB->insert (ExtractScalarRecipe, MiddleVPBB->getFirstNonPhi ());
8873-
8874- // Update CSAState with new recipes
8875- CSAState->setExtractScalarRecipe (ExtractScalarRecipe);
8876- CSAState->setVPAnyActive (VPAnyActive);
8877-
8878- // Add live out for the CSA. We should be in LCSSA, so we are looking for
8879- // Phi users in the unique exit block of the original updated value.
8880- BasicBlock *OrigExit = OrigLoop->getUniqueExitBlock ();
8881- assert (OrigExit && " Expected a single exit block" );
8882- for (User *U :VPDataUpdate->getUnderlyingValue ()->users ())
8883- if (auto *Phi = dyn_cast<PHINode>(U); Phi && Phi->getParent () == OrigExit)
8884- Plan.addLiveOut (Phi, ExtractScalarRecipe);
88858852 }
88868853}
88878854
@@ -9199,11 +9166,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
91999166
92009167 VPRecipeBuilder RecipeBuilder (*Plan, OrigLoop, TLI, Legal, CM, PSE, Builder);
92019168
9202- addCSAPreprocessRecipes (Legal->getCSAs (), OrigLoop, Plan->getPreheader (),
9203- Plan->getVectorLoopRegion ()->getEntryBasicBlock (), DL,
9204- Range, *Plan, RecipeBuilder);
9205-
9206-
92079169 // ---------------------------------------------------------------------------
92089170 // Pre-construction: record ingredients whose recipes we'll need to further
92099171 // process after constructing the initial VPlan.
0 commit comments