Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8532,8 +8532,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// TODO: We can't call runPass on these transforms yet, due to verifier
// failures.
VPlanTransforms::addExitUsersForFirstOrderRecurrences(*Plan, Range);
DenseMap<VPValue *, VPValue *> IVEndValues;
VPlanTransforms::updateScalarResumePhis(*Plan, IVEndValues);
VPlanTransforms::updateScalarResumePhis(*Plan);
VPlanTransforms::optimizeInductionExitUsers(*Plan, PSE);

// ---------------------------------------------------------------------------
// Transform initial VPlan: Apply previously taken decisions, in order, to
Expand Down Expand Up @@ -8600,7 +8600,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
VPlanTransforms::addActiveLaneMask(*Plan, ForControlFlow,
WithoutRuntimeCheck);
}
VPlanTransforms::optimizeInductionExitUsers(*Plan, IVEndValues, PSE);

assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
return Plan;
Expand Down Expand Up @@ -8639,8 +8638,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
// values.
// TODO: We can't call runPass on the transform yet, due to verifier
// failures.
DenseMap<VPValue *, VPValue *> IVEndValues;
VPlanTransforms::updateScalarResumePhis(*Plan, IVEndValues);
VPlanTransforms::updateScalarResumePhis(*Plan);
VPlanTransforms::optimizeInductionExitUsers(*Plan, PSE);

assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
return Plan;
Expand Down
134 changes: 78 additions & 56 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -891,6 +891,41 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) {
}
}

/// Compute and return the end value for \p WideIV, unless it is truncated. If
/// the induction recipe is not canonical, creates a VPDerivedIVRecipe to
/// compute the end value of the induction.
static VPValue *tryToComputeEndValueForInduction(VPWidenInductionRecipe *WideIV,
VPBuilder &VectorPHBuilder,
VPTypeAnalysis &TypeInfo,
VPValue *VectorTC) {
auto *WideIntOrFp = dyn_cast<VPWidenIntOrFpInductionRecipe>(WideIV);
// Truncated wide inductions resume from the last lane of their vector value
// in the last vector iteration which is handled elsewhere.
if (WideIntOrFp && WideIntOrFp->getTruncInst())
return nullptr;

VPValue *Start = WideIV->getStartValue();
VPValue *Step = WideIV->getStepValue();
const InductionDescriptor &ID = WideIV->getInductionDescriptor();
VPValue *EndValue = VectorTC;
if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
EndValue = VectorPHBuilder.createDerivedIV(
ID.getKind(), dyn_cast_or_null<FPMathOperator>(ID.getInductionBinOp()),
Start, VectorTC, Step);
}

// EndValue is derived from the vector trip count (which has the same type as
// the widest induction) and thus may be wider than the induction here.
Type *ScalarTypeOfWideIV = TypeInfo.inferScalarType(WideIV);
if (ScalarTypeOfWideIV != TypeInfo.inferScalarType(EndValue)) {
EndValue = VectorPHBuilder.createScalarCast(Instruction::Trunc, EndValue,
ScalarTypeOfWideIV,
WideIV->getDebugLoc());
}

return EndValue;
}

/// Check if \p VPV is an untruncated wide induction, either before or after the
/// increment. If so return the header IV (before the increment), otherwise
/// return null.
Expand Down Expand Up @@ -1054,11 +1089,28 @@ static VPValue *optimizeLatchExitInductionUser(
return nullptr;
}

static void recursivelyDeleteDeadRecipes(VPValue *V);

void VPlanTransforms::optimizeInductionExitUsers(
VPlan &Plan, DenseMap<VPValue *, VPValue *> &EndValues,
PredicatedScalarEvolution &PSE) {
VPlan &Plan, PredicatedScalarEvolution &PSE) {
VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion();
VPBlockBase *MiddleVPBB = Plan.getMiddleBlock();
VPTypeAnalysis TypeInfo(Plan);
VPBuilder VectorPHBuilder(
cast<VPBasicBlock>(VectorRegion->getSinglePredecessor()));

// Compute end values for all inductions.
DenseMap<VPValue *, VPValue *> EndValues;
for (auto &Phi : VectorRegion->getEntryBasicBlock()->phis()) {
auto *WideIV = dyn_cast<VPWidenInductionRecipe>(&Phi);
if (!WideIV)
continue;
if (VPValue *EndValue = tryToComputeEndValueForInduction(
WideIV, VectorPHBuilder, TypeInfo, &Plan.getVectorTripCount()))
EndValues[WideIV] = EndValue;
}

// Optimize induction exit users in exit blocks.
for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks()) {
for (VPRecipeBase &R : ExitVPBB->phis()) {
auto *ExitIRI = cast<VPIRPhi>(&R);
Expand All @@ -1077,6 +1129,22 @@ void VPlanTransforms::optimizeInductionExitUsers(
}
}
}

// Optimize resume phis for inductions in the scalar preheader.
for (VPRecipeBase &PhiR : Plan.getScalarPreheader()->phis()) {
auto *ResumePhiR = cast<VPPhi>(&PhiR);
VPValue *Incoming = ResumePhiR->getOperand(0);

VPValue *ExtractedFrom;
if (!match(Incoming, m_ExtractLastLaneOfLastPart(m_VPValue(ExtractedFrom))))
continue;
VPValue *EndValue = EndValues.lookup(ExtractedFrom);
if (!EndValue)
continue;
ResumePhiR->setOperand(0, EndValue);
recursivelyDeleteDeadRecipes(Incoming);
continue;
}
}

/// Remove redundant EpxandSCEVRecipes in \p Plan's entry block by replacing
Expand Down Expand Up @@ -5124,69 +5192,23 @@ void VPlanTransforms::addBranchWeightToMiddleTerminator(
MiddleTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
}

/// Compute and return the end value for \p WideIV, unless it is truncated. If
/// the induction recipe is not canonical, creates a VPDerivedIVRecipe to
/// compute the end value of the induction.
static VPValue *tryToComputeEndValueForInduction(VPWidenInductionRecipe *WideIV,
VPBuilder &VectorPHBuilder,
VPTypeAnalysis &TypeInfo,
VPValue *VectorTC) {
auto *WideIntOrFp = dyn_cast<VPWidenIntOrFpInductionRecipe>(WideIV);
// Truncated wide inductions resume from the last lane of their vector value
// in the last vector iteration which is handled elsewhere.
if (WideIntOrFp && WideIntOrFp->getTruncInst())
return nullptr;

VPValue *Start = WideIV->getStartValue();
VPValue *Step = WideIV->getStepValue();
const InductionDescriptor &ID = WideIV->getInductionDescriptor();
VPValue *EndValue = VectorTC;
if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
EndValue = VectorPHBuilder.createDerivedIV(
ID.getKind(), dyn_cast_or_null<FPMathOperator>(ID.getInductionBinOp()),
Start, VectorTC, Step);
}

// EndValue is derived from the vector trip count (which has the same type as
// the widest induction) and thus may be wider than the induction here.
Type *ScalarTypeOfWideIV = TypeInfo.inferScalarType(WideIV);
if (ScalarTypeOfWideIV != TypeInfo.inferScalarType(EndValue)) {
EndValue = VectorPHBuilder.createScalarCast(Instruction::Trunc, EndValue,
ScalarTypeOfWideIV,
WideIV->getDebugLoc());
}

return EndValue;
}

void VPlanTransforms::updateScalarResumePhis(
VPlan &Plan, DenseMap<VPValue *, VPValue *> &IVEndValues) {
VPTypeAnalysis TypeInfo(Plan);
void VPlanTransforms::updateScalarResumePhis(VPlan &Plan) {
auto *ScalarPH = Plan.getScalarPreheader();
auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getPredecessors()[0]);
VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion();
VPBuilder VectorPHBuilder(
cast<VPBasicBlock>(VectorRegion->getSinglePredecessor()));
VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
for (VPRecipeBase &PhiR : Plan.getScalarPreheader()->phis()) {
auto *ResumePhiR = cast<VPPhi>(&PhiR);

// TODO: Extract final value from induction recipe initially, optimize to
// pre-computed end value together in optimizeInductionExitUsers.
auto *VectorPhiR = cast<VPHeaderPHIRecipe>(ResumePhiR->getOperand(0));
if (auto *WideIVR = dyn_cast<VPWidenInductionRecipe>(VectorPhiR)) {
if (VPValue *EndValue = tryToComputeEndValueForInduction(
WideIVR, VectorPHBuilder, TypeInfo, &Plan.getVectorTripCount())) {
IVEndValues[WideIVR] = EndValue;
ResumePhiR->setOperand(0, EndValue);
ResumePhiR->setName("bc.resume.val");
continue;
}
// TODO: Also handle truncated inductions here. Computing end-values
// separately should be done as VPlan-to-VPlan optimization, after
// legalizing all resume values to use the last lane from the loop.
assert(cast<VPWidenIntOrFpInductionRecipe>(VectorPhiR)->getTruncInst() &&
"should only skip truncated wide inductions");
auto *ExtractPart =
MiddleBuilder.createNaryOp(VPInstruction::ExtractLastPart, WideIVR);
auto *ResumeFromVectorLoop = MiddleBuilder.createNaryOp(
VPInstruction::ExtractLastLane, ExtractPart, DebugLoc::getUnknown(),
"bc.resume.val");
ResumePhiR->setOperand(0, ResumeFromVectorLoop);
ResumePhiR->setName("bc.resume.val");
continue;
}

Expand Down
15 changes: 5 additions & 10 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -329,11 +329,9 @@ struct VPlanTransforms {

/// If there's a single exit block, optimize its phi recipes that use exiting
/// IV values by feeding them precomputed end values instead, possibly taken
/// one step backwards.
static void
optimizeInductionExitUsers(VPlan &Plan,
DenseMap<VPValue *, VPValue *> &EndValues,
PredicatedScalarEvolution &PSE);
/// one step backwards. Also optimize resume phis in the scalar preheader.
static void optimizeInductionExitUsers(VPlan &Plan,
PredicatedScalarEvolution &PSE);

/// Add explicit broadcasts for live-ins and VPValues defined in \p Plan's entry block if they are used as vectors.
static void materializeBroadcasts(VPlan &Plan);
Expand Down Expand Up @@ -415,11 +413,8 @@ struct VPlanTransforms {
std::optional<unsigned> VScaleForTuning);

/// Update the resume phis in the scalar preheader after creating wide recipes
/// for first-order recurrences, reductions and inductions. End values for
/// inductions are added to \p IVEndValues.
static void
updateScalarResumePhis(VPlan &Plan,
DenseMap<VPValue *, VPValue *> &IVEndValues);
/// for first-order recurrences, reductions and inductions.
static void updateScalarResumePhis(VPlan &Plan);

/// Handle users in the exit block for first order reductions in the original
/// exit block. The penultimate value of recurrences is fed to their LCSSA phi
Expand Down
46 changes: 8 additions & 38 deletions llvm/test/Transforms/LoopVectorize/X86/pr131359-dead-for-splice.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,29 +9,14 @@ target triple = "x86_64"

define void @no_use() {
; CHECK-LABEL: define void @no_use() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4)
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40
; CHECK-NEXT: br i1 [[TMP0]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i32 3
; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[FOR:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[SCALAR_PH]] ], [ [[E_0_I:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[E_0_I]] = phi i32 [ 40, %[[SCALAR_PH]] ], [ [[INC_I:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[FOR:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[E_0_I:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[E_0_I]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC_I:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[INC_I]] = add i32 [[E_0_I]], 1
; CHECK-NEXT: [[EXITCOND_NOT_I:%.*]] = icmp eq i32 [[E_0_I]], 43
; CHECK-NEXT: br i1 [[EXITCOND_NOT_I]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK-NEXT: br i1 [[EXITCOND_NOT_I]], label %[[EXIT:.*]], label %[[LOOP]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
Expand All @@ -51,30 +36,15 @@ exit:

define void @dead_use() {
; CHECK-LABEL: define void @dead_use() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4)
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40
; CHECK-NEXT: br i1 [[TMP0]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i32 3
; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[D_0_I:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[SCALAR_PH]] ], [ [[E_0_I:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[E_0_I]] = phi i32 [ 40, %[[SCALAR_PH]] ], [ [[INC_I:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[D_0_I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[E_0_I:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[E_0_I]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC_I:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[DEAD:%.*]] = add i32 [[D_0_I]], 1
; CHECK-NEXT: [[INC_I]] = add i32 [[E_0_I]], 1
; CHECK-NEXT: [[EXITCOND_NOT_I:%.*]] = icmp eq i32 [[E_0_I]], 43
; CHECK-NEXT: br i1 [[EXITCOND_NOT_I]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK-NEXT: br i1 [[EXITCOND_NOT_I]], label %[[EXIT:.*]], label %[[LOOP]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
Expand Down
Loading