-
Notifications
You must be signed in to change notification settings - Fork 15.7k
[VPlan] Optimize resume values of IVs together with other exit values. #174239
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
Simplify updateScalarResumePhis to always extract the final lane for inductions and move optimizing of induction end values to be used in resume phis to optimizeInductionExitUsers. This removes the need to pass state between transforms and addresses a TODO. Note that optimizeInductionExitUsers has been moved directly after updateScalarResumePhis, to keep other transforms expecting optimized IV resume values in between working as-is. This allows optimizing induction end values in a few more cases.
|
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-vectorizers Author: Florian Hahn (fhahn) ChangesSimplify updateScalarResumePhis to always extract the final lane for inductions and move optimizing of induction end values to be used in resume phis to optimizeInductionExitUsers. This removes the need to pass state between transforms and addresses a TODO. Note that optimizeInductionExitUsers has been moved directly after updateScalarResumePhis, to keep other transforms expecting optimized IV resume values in between working as-is. This allows optimizing induction end values in a few more cases. Patch is 36.47 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/174239.diff 7 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 04e08e0349074..57e186ebf38fb 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8532,8 +8532,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// TODO: We can't call runPass on these transforms yet, due to verifier
// failures.
VPlanTransforms::addExitUsersForFirstOrderRecurrences(*Plan, Range);
- DenseMap<VPValue *, VPValue *> IVEndValues;
- VPlanTransforms::updateScalarResumePhis(*Plan, IVEndValues);
+ VPlanTransforms::updateScalarResumePhis(*Plan);
+ VPlanTransforms::optimizeInductionExitUsers(*Plan, PSE);
// ---------------------------------------------------------------------------
// Transform initial VPlan: Apply previously taken decisions, in order, to
@@ -8600,7 +8600,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
VPlanTransforms::addActiveLaneMask(*Plan, ForControlFlow,
WithoutRuntimeCheck);
}
- VPlanTransforms::optimizeInductionExitUsers(*Plan, IVEndValues, PSE);
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
return Plan;
@@ -8639,8 +8638,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
// values.
// TODO: We can't call runPass on the transform yet, due to verifier
// failures.
- DenseMap<VPValue *, VPValue *> IVEndValues;
- VPlanTransforms::updateScalarResumePhis(*Plan, IVEndValues);
+ VPlanTransforms::updateScalarResumePhis(*Plan);
+ VPlanTransforms::optimizeInductionExitUsers(*Plan, PSE);
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
return Plan;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 8f0cf7d5beeed..6374dfb698b37 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -891,6 +891,41 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) {
}
}
+/// Compute and return the end value for \p WideIV, unless it is truncated. If
+/// the induction recipe is not canonical, creates a VPDerivedIVRecipe to
+/// compute the end value of the induction.
+static VPValue *tryToComputeEndValueForInduction(VPWidenInductionRecipe *WideIV,
+ VPBuilder &VectorPHBuilder,
+ VPTypeAnalysis &TypeInfo,
+ VPValue *VectorTC) {
+ auto *WideIntOrFp = dyn_cast<VPWidenIntOrFpInductionRecipe>(WideIV);
+ // Truncated wide inductions resume from the last lane of their vector value
+ // in the last vector iteration which is handled elsewhere.
+ if (WideIntOrFp && WideIntOrFp->getTruncInst())
+ return nullptr;
+
+ VPValue *Start = WideIV->getStartValue();
+ VPValue *Step = WideIV->getStepValue();
+ const InductionDescriptor &ID = WideIV->getInductionDescriptor();
+ VPValue *EndValue = VectorTC;
+ if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
+ EndValue = VectorPHBuilder.createDerivedIV(
+ ID.getKind(), dyn_cast_or_null<FPMathOperator>(ID.getInductionBinOp()),
+ Start, VectorTC, Step);
+ }
+
+ // EndValue is derived from the vector trip count (which has the same type as
+ // the widest induction) and thus may be wider than the induction here.
+ Type *ScalarTypeOfWideIV = TypeInfo.inferScalarType(WideIV);
+ if (ScalarTypeOfWideIV != TypeInfo.inferScalarType(EndValue)) {
+ EndValue = VectorPHBuilder.createScalarCast(Instruction::Trunc, EndValue,
+ ScalarTypeOfWideIV,
+ WideIV->getDebugLoc());
+ }
+
+ return EndValue;
+}
+
/// Check if \p VPV is an untruncated wide induction, either before or after the
/// increment. If so return the header IV (before the increment), otherwise
/// return null.
@@ -1054,11 +1089,28 @@ static VPValue *optimizeLatchExitInductionUser(
return nullptr;
}
+static void recursivelyDeleteDeadRecipes(VPValue *V);
+
void VPlanTransforms::optimizeInductionExitUsers(
- VPlan &Plan, DenseMap<VPValue *, VPValue *> &EndValues,
- PredicatedScalarEvolution &PSE) {
+ VPlan &Plan, PredicatedScalarEvolution &PSE) {
+ VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion();
VPBlockBase *MiddleVPBB = Plan.getMiddleBlock();
VPTypeAnalysis TypeInfo(Plan);
+ VPBuilder VectorPHBuilder(
+ cast<VPBasicBlock>(VectorRegion->getSinglePredecessor()));
+
+ // Compute end values for all inductions.
+ DenseMap<VPValue *, VPValue *> EndValues;
+ for (auto &Phi : VectorRegion->getEntryBasicBlock()->phis()) {
+ auto *WideIV = dyn_cast<VPWidenInductionRecipe>(&Phi);
+ if (!WideIV)
+ continue;
+ if (VPValue *EndValue = tryToComputeEndValueForInduction(
+ WideIV, VectorPHBuilder, TypeInfo, &Plan.getVectorTripCount()))
+ EndValues[WideIV] = EndValue;
+ }
+
+ // Optimize induction exit users in exit blocks.
for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks()) {
for (VPRecipeBase &R : ExitVPBB->phis()) {
auto *ExitIRI = cast<VPIRPhi>(&R);
@@ -1077,6 +1129,22 @@ void VPlanTransforms::optimizeInductionExitUsers(
}
}
}
+
+ // Optimize resume phis for inductions in the scalar preheader.
+ for (VPRecipeBase &PhiR : Plan.getScalarPreheader()->phis()) {
+ auto *ResumePhiR = cast<VPPhi>(&PhiR);
+ VPValue *Incoming = ResumePhiR->getOperand(0);
+
+ VPValue *ExtractedFrom;
+ if (!match(Incoming, m_ExtractLastLaneOfLastPart(m_VPValue(ExtractedFrom))))
+ continue;
+ VPValue *EndValue = EndValues.lookup(ExtractedFrom);
+ if (!EndValue)
+ continue;
+ ResumePhiR->setOperand(0, EndValue);
+ recursivelyDeleteDeadRecipes(Incoming);
+ continue;
+ }
}
/// Remove redundant EpxandSCEVRecipes in \p Plan's entry block by replacing
@@ -5124,69 +5192,23 @@ void VPlanTransforms::addBranchWeightToMiddleTerminator(
MiddleTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
}
-/// Compute and return the end value for \p WideIV, unless it is truncated. If
-/// the induction recipe is not canonical, creates a VPDerivedIVRecipe to
-/// compute the end value of the induction.
-static VPValue *tryToComputeEndValueForInduction(VPWidenInductionRecipe *WideIV,
- VPBuilder &VectorPHBuilder,
- VPTypeAnalysis &TypeInfo,
- VPValue *VectorTC) {
- auto *WideIntOrFp = dyn_cast<VPWidenIntOrFpInductionRecipe>(WideIV);
- // Truncated wide inductions resume from the last lane of their vector value
- // in the last vector iteration which is handled elsewhere.
- if (WideIntOrFp && WideIntOrFp->getTruncInst())
- return nullptr;
-
- VPValue *Start = WideIV->getStartValue();
- VPValue *Step = WideIV->getStepValue();
- const InductionDescriptor &ID = WideIV->getInductionDescriptor();
- VPValue *EndValue = VectorTC;
- if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
- EndValue = VectorPHBuilder.createDerivedIV(
- ID.getKind(), dyn_cast_or_null<FPMathOperator>(ID.getInductionBinOp()),
- Start, VectorTC, Step);
- }
-
- // EndValue is derived from the vector trip count (which has the same type as
- // the widest induction) and thus may be wider than the induction here.
- Type *ScalarTypeOfWideIV = TypeInfo.inferScalarType(WideIV);
- if (ScalarTypeOfWideIV != TypeInfo.inferScalarType(EndValue)) {
- EndValue = VectorPHBuilder.createScalarCast(Instruction::Trunc, EndValue,
- ScalarTypeOfWideIV,
- WideIV->getDebugLoc());
- }
-
- return EndValue;
-}
-
-void VPlanTransforms::updateScalarResumePhis(
- VPlan &Plan, DenseMap<VPValue *, VPValue *> &IVEndValues) {
- VPTypeAnalysis TypeInfo(Plan);
+void VPlanTransforms::updateScalarResumePhis(VPlan &Plan) {
auto *ScalarPH = Plan.getScalarPreheader();
auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getPredecessors()[0]);
VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion();
- VPBuilder VectorPHBuilder(
- cast<VPBasicBlock>(VectorRegion->getSinglePredecessor()));
VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
for (VPRecipeBase &PhiR : Plan.getScalarPreheader()->phis()) {
auto *ResumePhiR = cast<VPPhi>(&PhiR);
- // TODO: Extract final value from induction recipe initially, optimize to
- // pre-computed end value together in optimizeInductionExitUsers.
auto *VectorPhiR = cast<VPHeaderPHIRecipe>(ResumePhiR->getOperand(0));
if (auto *WideIVR = dyn_cast<VPWidenInductionRecipe>(VectorPhiR)) {
- if (VPValue *EndValue = tryToComputeEndValueForInduction(
- WideIVR, VectorPHBuilder, TypeInfo, &Plan.getVectorTripCount())) {
- IVEndValues[WideIVR] = EndValue;
- ResumePhiR->setOperand(0, EndValue);
- ResumePhiR->setName("bc.resume.val");
- continue;
- }
- // TODO: Also handle truncated inductions here. Computing end-values
- // separately should be done as VPlan-to-VPlan optimization, after
- // legalizing all resume values to use the last lane from the loop.
- assert(cast<VPWidenIntOrFpInductionRecipe>(VectorPhiR)->getTruncInst() &&
- "should only skip truncated wide inductions");
+ auto *ExtractPart =
+ MiddleBuilder.createNaryOp(VPInstruction::ExtractLastPart, WideIVR);
+ auto *ResumeFromVectorLoop = MiddleBuilder.createNaryOp(
+ VPInstruction::ExtractLastLane, ExtractPart, DebugLoc::getUnknown(),
+ "bc.resume.val");
+ ResumePhiR->setOperand(0, ResumeFromVectorLoop);
+ ResumePhiR->setName("bc.resume.val");
continue;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index f4ecee4e7f04f..dae798dd26874 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -329,11 +329,9 @@ struct VPlanTransforms {
/// If there's a single exit block, optimize its phi recipes that use exiting
/// IV values by feeding them precomputed end values instead, possibly taken
- /// one step backwards.
- static void
- optimizeInductionExitUsers(VPlan &Plan,
- DenseMap<VPValue *, VPValue *> &EndValues,
- PredicatedScalarEvolution &PSE);
+ /// one step backwards. Also optimize resume phis in the scalar preheader.
+ static void optimizeInductionExitUsers(VPlan &Plan,
+ PredicatedScalarEvolution &PSE);
/// Add explicit broadcasts for live-ins and VPValues defined in \p Plan's entry block if they are used as vectors.
static void materializeBroadcasts(VPlan &Plan);
@@ -415,11 +413,8 @@ struct VPlanTransforms {
std::optional<unsigned> VScaleForTuning);
/// Update the resume phis in the scalar preheader after creating wide recipes
- /// for first-order recurrences, reductions and inductions. End values for
- /// inductions are added to \p IVEndValues.
- static void
- updateScalarResumePhis(VPlan &Plan,
- DenseMap<VPValue *, VPValue *> &IVEndValues);
+ /// for first-order recurrences, reductions and inductions.
+ static void updateScalarResumePhis(VPlan &Plan);
/// Handle users in the exit block for first order reductions in the original
/// exit block. The penultimate value of recurrences is fed to their LCSSA phi
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr131359-dead-for-splice.ll b/llvm/test/Transforms/LoopVectorize/X86/pr131359-dead-for-splice.ll
index 63f8036b9b6a0..ea06fe2fa93fb 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr131359-dead-for-splice.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr131359-dead-for-splice.ll
@@ -9,29 +9,14 @@ target triple = "x86_64"
define void @no_use() {
; CHECK-LABEL: define void @no_use() {
-; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
-; CHECK: [[VECTOR_PH]]:
-; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
-; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4)
-; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40
-; CHECK-NEXT: br i1 [[TMP0]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
-; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i32 3
-; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
-; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[FOR:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[SCALAR_PH]] ], [ [[E_0_I:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[E_0_I]] = phi i32 [ 40, %[[SCALAR_PH]] ], [ [[INC_I:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[FOR:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[E_0_I:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[E_0_I]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC_I:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[INC_I]] = add i32 [[E_0_I]], 1
; CHECK-NEXT: [[EXITCOND_NOT_I:%.*]] = icmp eq i32 [[E_0_I]], 43
-; CHECK-NEXT: br i1 [[EXITCOND_NOT_I]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT_I]], label %[[EXIT:.*]], label %[[LOOP]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
@@ -51,30 +36,15 @@ exit:
define void @dead_use() {
; CHECK-LABEL: define void @dead_use() {
-; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
-; CHECK: [[VECTOR_PH]]:
-; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
-; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4)
-; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40
-; CHECK-NEXT: br i1 [[TMP0]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
-; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i32 3
-; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
-; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[D_0_I:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[SCALAR_PH]] ], [ [[E_0_I:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[E_0_I]] = phi i32 [ 40, %[[SCALAR_PH]] ], [ [[INC_I:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[D_0_I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[E_0_I:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[E_0_I]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC_I:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[DEAD:%.*]] = add i32 [[D_0_I]], 1
; CHECK-NEXT: [[INC_I]] = add i32 [[E_0_I]], 1
; CHECK-NEXT: [[EXITCOND_NOT_I:%.*]] = icmp eq i32 [[E_0_I]], 43
-; CHECK-NEXT: br i1 [[EXITCOND_NOT_I]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT_I]], label %[[EXIT:.*]], label %[[LOOP]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll
index 2f05435bc75ba..46ef2dca71078 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll
@@ -816,17 +816,16 @@ define i64 @test_first_order_recurrences_and_induction(ptr %ptr, i64 %n) {
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 3
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 2
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 22, %[[ENTRY]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 22, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[FOR_1:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[IV:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[IV]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[FOR_1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[IV]] = phi i64 [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[GEP_PTR:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 [[IV]]
; CHECK-NEXT: [[ADD_1:%.*]] = add i64 [[FOR_1]], 10
@@ -880,17 +879,16 @@ define i64 @test_first_order_recurrences_and_induction2(ptr %ptr, i64 %n) {
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 3
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 2
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 22, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 22, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[FOR_1:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[IV]], %[[LOOP]] ]
+; CHECK-NEXT: [[FOR_1:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[IV]], %[[LOOP]] ]
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[GEP_PTR:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 [[IV]]
; CHECK-NEXT: [[ADD_1:%.*]] = add i64 [[FOR_1]], 10
@@ -944,19 +942,18 @@ define ptr @test_first_order_recurrences_and_pointer_induction1(ptr %ptr, i64 %n
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 3
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 2
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br ...
[truncated]
|
| ; CHECK: [[SCALAR_PH]]: | ||
| ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 22, %[[ENTRY]] ] | ||
| ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] | ||
| ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 22, %[[ENTRY]] ] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This doesn't look right because previously the resume value was (N_VEC - 1) I think?
Simplify updateScalarResumePhis to always extract the final lane for inductions and move optimizing of induction end values to be used in resume phis to optimizeInductionExitUsers.
This removes the need to pass state between transforms and addresses a TODO.
Note that optimizeInductionExitUsers has been moved directly after updateScalarResumePhis, to keep other transforms expecting optimized IV resume values in between working as-is.
This allows optimizing induction end values in a few more cases.