Skip to content

Commit 1cfb0c2

Browse files
committed
[LAA] Support assumptions in evaluatePtrAddRecAtMaxBTCWillNotWrap
This patch extends the logic added in #128061 to support dereferenceability information from assumptions as well. Unfortunately both assumption cache and the dominator tree need to be threaded through multiple layers to make them available where needed.
1 parent 1f8e2a4 commit 1cfb0c2

File tree

8 files changed

+109
-43
lines changed

8 files changed

+109
-43
lines changed

llvm/include/llvm/Analysis/LoopAccessAnalysis.h

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -180,10 +180,12 @@ class MemoryDepChecker {
180180
const SmallVectorImpl<Instruction *> &Instrs) const;
181181
};
182182

183-
MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L,
183+
MemoryDepChecker(PredicatedScalarEvolution &PSE, AssumptionCache *AC,
184+
DominatorTree *DT, const Loop *L,
184185
const DenseMap<Value *, const SCEV *> &SymbolicStrides,
185186
unsigned MaxTargetVectorWidthInBits)
186-
: PSE(PSE), InnermostLoop(L), SymbolicStrides(SymbolicStrides),
187+
: PSE(PSE), AC(AC), DT(DT), InnermostLoop(L),
188+
SymbolicStrides(SymbolicStrides),
187189
MaxTargetVectorWidthInBits(MaxTargetVectorWidthInBits) {}
188190

189191
/// Register the location (instructions are given increasing numbers)
@@ -288,6 +290,9 @@ class MemoryDepChecker {
288290
return PointerBounds;
289291
}
290292

293+
AssumptionCache *getAC() const { return AC; }
294+
DominatorTree *getDT() const { return DT; }
295+
291296
private:
292297
/// A wrapper around ScalarEvolution, used to add runtime SCEV checks, and
293298
/// applies dynamic knowledge to simplify SCEV expressions and convert them
@@ -296,6 +301,10 @@ class MemoryDepChecker {
296301
/// example we might assume a unit stride for a pointer in order to prove
297302
/// that a memory access is strided and doesn't wrap.
298303
PredicatedScalarEvolution &PSE;
304+
305+
AssumptionCache *AC;
306+
DominatorTree *DT;
307+
299308
const Loop *InnermostLoop;
300309

301310
/// Reference to map of pointer values to
@@ -669,7 +678,7 @@ class LoopAccessInfo {
669678
LLVM_ABI LoopAccessInfo(Loop *L, ScalarEvolution *SE,
670679
const TargetTransformInfo *TTI,
671680
const TargetLibraryInfo *TLI, AAResults *AA,
672-
DominatorTree *DT, LoopInfo *LI,
681+
DominatorTree *DT, LoopInfo *LI, AssumptionCache *AC,
673682
bool AllowPartial = false);
674683

675684
/// Return true we can analyze the memory accesses in the loop and there are
@@ -921,7 +930,8 @@ LLVM_ABI std::pair<const SCEV *, const SCEV *> getStartAndEndForAccess(
921930
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC,
922931
const SCEV *MaxBTC, ScalarEvolution *SE,
923932
DenseMap<std::pair<const SCEV *, Type *>,
924-
std::pair<const SCEV *, const SCEV *>> *PointerBounds);
933+
std::pair<const SCEV *, const SCEV *>> *PointerBounds,
934+
AssumptionCache *AC, DominatorTree *DT);
925935

926936
class LoopAccessInfoManager {
927937
/// The cache.
@@ -934,12 +944,13 @@ class LoopAccessInfoManager {
934944
LoopInfo &LI;
935945
TargetTransformInfo *TTI;
936946
const TargetLibraryInfo *TLI = nullptr;
947+
AssumptionCache *AC;
937948

938949
public:
939950
LoopAccessInfoManager(ScalarEvolution &SE, AAResults &AA, DominatorTree &DT,
940951
LoopInfo &LI, TargetTransformInfo *TTI,
941-
const TargetLibraryInfo *TLI)
942-
: SE(SE), AA(AA), DT(DT), LI(LI), TTI(TTI), TLI(TLI) {}
952+
const TargetLibraryInfo *TLI, AssumptionCache *AC)
953+
: SE(SE), AA(AA), DT(DT), LI(LI), TTI(TTI), TLI(TLI), AC(AC) {}
943954

944955
LLVM_ABI const LoopAccessInfo &getInfo(Loop &L, bool AllowPartial = false);
945956

llvm/lib/Analysis/Loads.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,7 @@ bool llvm::isDereferenceableAndAlignedInLoop(
326326
return false;
327327

328328
const auto &[AccessStart, AccessEnd] = getStartAndEndForAccess(
329-
L, PtrScev, LI->getType(), BECount, MaxBECount, &SE, nullptr);
329+
L, PtrScev, LI->getType(), BECount, MaxBECount, &SE, nullptr, AC, &DT);
330330
if (isa<SCEVCouldNotCompute>(AccessStart) ||
331331
isa<SCEVCouldNotCompute>(AccessEnd))
332332
return false;

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 51 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
#include "llvm/ADT/SmallVector.h"
2424
#include "llvm/Analysis/AliasAnalysis.h"
2525
#include "llvm/Analysis/AliasSetTracker.h"
26+
#include "llvm/Analysis/AssumeBundleQueries.h"
27+
#include "llvm/Analysis/AssumptionCache.h"
2628
#include "llvm/Analysis/LoopAnalysisManager.h"
2729
#include "llvm/Analysis/LoopInfo.h"
2830
#include "llvm/Analysis/LoopIterator.h"
@@ -208,28 +210,52 @@ static const SCEV *mulSCEVOverflow(const SCEV *A, const SCEV *B,
208210

209211
/// Return true, if evaluating \p AR at \p MaxBTC cannot wrap, because \p AR at
210212
/// \p MaxBTC is guaranteed inbounds of the accessed object.
211-
static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR,
212-
const SCEV *MaxBTC,
213-
const SCEV *EltSize,
214-
ScalarEvolution &SE,
215-
const DataLayout &DL) {
213+
static bool
214+
evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR,
215+
const SCEV *MaxBTC, const SCEV *EltSize,
216+
ScalarEvolution &SE, const DataLayout &DL,
217+
AssumptionCache *AC, DominatorTree *DT) {
216218
auto *PointerBase = SE.getPointerBase(AR->getStart());
217219
auto *StartPtr = dyn_cast<SCEVUnknown>(PointerBase);
218220
if (!StartPtr)
219221
return false;
222+
const Loop *L = AR->getLoop();
220223
bool CheckForNonNull, CheckForFreed;
221-
uint64_t DerefBytes = StartPtr->getValue()->getPointerDereferenceableBytes(
224+
Value *StartPtrV = StartPtr->getValue();
225+
uint64_t DerefBytes = StartPtrV->getPointerDereferenceableBytes(
222226
DL, CheckForNonNull, CheckForFreed);
223227

224-
if (CheckForNonNull || CheckForFreed)
228+
if (DerefBytes && (CheckForNonNull || CheckForFreed))
225229
return false;
226230

227231
const SCEV *Step = AR->getStepRecurrence(SE);
232+
Type *WiderTy = SE.getWiderType(MaxBTC->getType(), Step->getType());
233+
const SCEV *DerefBytesSCEV = SE.getConstant(WiderTy, DerefBytes);
234+
235+
// Check if we have a suitable dereferencable assumption we can use.
236+
RetainedKnowledge DerefRK;
237+
if (!StartPtrV->canBeFreed() &&
238+
getKnowledgeForValue(
239+
StartPtrV, {Attribute::Dereferenceable}, *AC,
240+
[&](RetainedKnowledge RK, Instruction *Assume, auto) {
241+
if (!isValidAssumeForContext(
242+
Assume, L->getLoopPredecessor()->getTerminator(), DT))
243+
return false;
244+
if (RK.AttrKind == Attribute::Dereferenceable) {
245+
DerefRK = std::max(DerefRK, RK);
246+
return true;
247+
}
248+
return false;
249+
}) &&
250+
DerefRK.ArgValue) {
251+
DerefBytesSCEV = SE.getUMaxExpr(DerefBytesSCEV,
252+
SE.getConstant(WiderTy, DerefRK.ArgValue));
253+
}
254+
228255
bool IsKnownNonNegative = SE.isKnownNonNegative(Step);
229256
if (!IsKnownNonNegative && !SE.isKnownNegative(Step))
230257
return false;
231258

232-
Type *WiderTy = SE.getWiderType(MaxBTC->getType(), Step->getType());
233259
Step = SE.getNoopOrSignExtend(Step, WiderTy);
234260
MaxBTC = SE.getNoopOrZeroExtend(MaxBTC, WiderTy);
235261

@@ -256,24 +282,23 @@ static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR,
256282
const SCEV *EndBytes = addSCEVNoOverflow(StartOffset, OffsetEndBytes, SE);
257283
if (!EndBytes)
258284
return false;
259-
return SE.isKnownPredicate(CmpInst::ICMP_ULE, EndBytes,
260-
SE.getConstant(WiderTy, DerefBytes));
285+
return SE.isKnownPredicate(CmpInst::ICMP_ULE, EndBytes, DerefBytesSCEV);
261286
}
262287

263288
// For negative steps check if
264289
// * StartOffset >= (MaxBTC * Step + EltSize)
265290
// * StartOffset <= DerefBytes.
266291
assert(SE.isKnownNegative(Step) && "must be known negative");
267292
return SE.isKnownPredicate(CmpInst::ICMP_SGE, StartOffset, OffsetEndBytes) &&
268-
SE.isKnownPredicate(CmpInst::ICMP_ULE, StartOffset,
269-
SE.getConstant(WiderTy, DerefBytes));
293+
SE.isKnownPredicate(CmpInst::ICMP_ULE, StartOffset, DerefBytesSCEV);
270294
}
271295

272296
std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
273297
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC,
274298
const SCEV *MaxBTC, ScalarEvolution *SE,
275299
DenseMap<std::pair<const SCEV *, Type *>,
276-
std::pair<const SCEV *, const SCEV *>> *PointerBounds) {
300+
std::pair<const SCEV *, const SCEV *>> *PointerBounds,
301+
AssumptionCache *AC, DominatorTree *DT) {
277302
std::pair<const SCEV *, const SCEV *> *PtrBoundsPair;
278303
if (PointerBounds) {
279304
auto [Iter, Ins] = PointerBounds->insert(
@@ -308,8 +333,8 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
308333
// sets ScEnd to the maximum unsigned value for the type. Note that LAA
309334
// separately checks that accesses cannot not wrap, so unsigned max
310335
// represents an upper bound.
311-
if (evaluatePtrAddRecAtMaxBTCWillNotWrap(AR, MaxBTC, EltSizeSCEV, *SE,
312-
DL)) {
336+
if (evaluatePtrAddRecAtMaxBTCWillNotWrap(AR, MaxBTC, EltSizeSCEV, *SE, DL,
337+
AC, DT)) {
313338
ScEnd = AR->evaluateAtIteration(MaxBTC, *SE);
314339
} else {
315340
ScEnd = SE->getAddExpr(
@@ -356,9 +381,9 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,
356381
bool NeedsFreeze) {
357382
const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount();
358383
const SCEV *BTC = PSE.getBackedgeTakenCount();
359-
const auto &[ScStart, ScEnd] =
360-
getStartAndEndForAccess(Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC,
361-
PSE.getSE(), &DC.getPointerBounds());
384+
const auto &[ScStart, ScEnd] = getStartAndEndForAccess(
385+
Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC, PSE.getSE(),
386+
&DC.getPointerBounds(), DC.getAC(), DC.getDT());
362387
assert(!isa<SCEVCouldNotCompute>(ScStart) &&
363388
!isa<SCEVCouldNotCompute>(ScEnd) &&
364389
"must be able to compute both start and end expressions");
@@ -2011,10 +2036,10 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
20112036
const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount();
20122037
const auto &[SrcStart_, SrcEnd_] =
20132038
getStartAndEndForAccess(InnermostLoop, Src, ATy, BTC, SymbolicMaxBTC,
2014-
PSE.getSE(), &PointerBounds);
2039+
PSE.getSE(), &PointerBounds, AC, DT);
20152040
const auto &[SinkStart_, SinkEnd_] =
20162041
getStartAndEndForAccess(InnermostLoop, Sink, BTy, BTC, SymbolicMaxBTC,
2017-
PSE.getSE(), &PointerBounds);
2042+
PSE.getSE(), &PointerBounds, AC, DT);
20182043
if (!isa<SCEVCouldNotCompute>(SrcStart_) &&
20192044
!isa<SCEVCouldNotCompute>(SrcEnd_) &&
20202045
!isa<SCEVCouldNotCompute>(SinkStart_) &&
@@ -3015,7 +3040,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
30153040
const TargetTransformInfo *TTI,
30163041
const TargetLibraryInfo *TLI, AAResults *AA,
30173042
DominatorTree *DT, LoopInfo *LI,
3018-
bool AllowPartial)
3043+
AssumptionCache *AC, bool AllowPartial)
30193044
: PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)),
30203045
PtrRtChecking(nullptr), TheLoop(L), AllowPartial(AllowPartial) {
30213046
unsigned MaxTargetVectorWidthInBits = std::numeric_limits<unsigned>::max();
@@ -3025,8 +3050,8 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
30253050
MaxTargetVectorWidthInBits =
30263051
TTI->getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector) * 2;
30273052

3028-
DepChecker = std::make_unique<MemoryDepChecker>(*PSE, L, SymbolicStrides,
3029-
MaxTargetVectorWidthInBits);
3053+
DepChecker = std::make_unique<MemoryDepChecker>(
3054+
*PSE, AC, DT, L, SymbolicStrides, MaxTargetVectorWidthInBits);
30303055
PtrRtChecking = std::make_unique<RuntimePointerChecking>(*DepChecker, SE);
30313056
if (canAnalyzeLoop())
30323057
CanVecMem = analyzeLoop(AA, LI, TLI, DT);
@@ -3095,7 +3120,7 @@ const LoopAccessInfo &LoopAccessInfoManager::getInfo(Loop &L,
30953120
// or if it was created with a different value of AllowPartial.
30963121
if (Inserted || It->second->hasAllowPartial() != AllowPartial)
30973122
It->second = std::make_unique<LoopAccessInfo>(&L, &SE, TTI, TLI, &AA, &DT,
3098-
&LI, AllowPartial);
3123+
&LI, AC, AllowPartial);
30993124

31003125
return *It->second;
31013126
}
@@ -3138,7 +3163,8 @@ LoopAccessInfoManager LoopAccessAnalysis::run(Function &F,
31383163
auto &LI = FAM.getResult<LoopAnalysis>(F);
31393164
auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
31403165
auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
3141-
return LoopAccessInfoManager(SE, AA, DT, LI, &TTI, &TLI);
3166+
auto &AC = FAM.getResult<AssumptionAnalysis>(F);
3167+
return LoopAccessInfoManager(SE, AA, DT, LI, &TTI, &TLI, &AC);
31423168
}
31433169

31443170
AnalysisKey LoopAccessAnalysis::Key;

llvm/lib/Transforms/Scalar/LoopFlatten.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1009,7 +1009,8 @@ PreservedAnalyses LoopFlattenPass::run(LoopNest &LN, LoopAnalysisManager &LAM,
10091009
// in simplified form, and also needs LCSSA. Running
10101010
// this pass will simplify all loops that contain inner loops,
10111011
// regardless of whether anything ends up being flattened.
1012-
LoopAccessInfoManager LAIM(AR.SE, AR.AA, AR.DT, AR.LI, &AR.TTI, nullptr);
1012+
LoopAccessInfoManager LAIM(AR.SE, AR.AA, AR.DT, AR.LI, &AR.TTI, nullptr,
1013+
nullptr);
10131014
for (Loop *InnerLoop : LN.getLoops()) {
10141015
auto *OuterLoop = InnerLoop->getParentLoop();
10151016
if (!OuterLoop)

llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -551,7 +551,7 @@ PreservedAnalyses LoopVersioningLICMPass::run(Loop &L, LoopAnalysisManager &AM,
551551
const Function *F = L.getHeader()->getParent();
552552
OptimizationRemarkEmitter ORE(F);
553553

554-
LoopAccessInfoManager LAIs(*SE, *AA, *DT, LAR.LI, nullptr, nullptr);
554+
LoopAccessInfoManager LAIs(*SE, *AA, *DT, LAR.LI, nullptr, nullptr, nullptr);
555555
if (!LoopVersioningLICM(AA, SE, &ORE, LAIs, LAR.LI, &L).run(DT))
556556
return PreservedAnalyses::all();
557557
return getLoopPassPreservedAnalyses();

llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -505,7 +505,7 @@ e.1:
505505
ret i32 1
506506
}
507507

508-
define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_known_deref_via_assumption(ptr %A, ptr %B) {
508+
define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_known_deref_via_assumption(ptr %A, ptr %B) nosync nofree {
509509
; CHECK-LABEL: 'all_exits_dominate_latch_countable_exits_at_most_500_iterations_known_deref_via_assumption'
510510
; CHECK-NEXT: loop.header:
511511
; CHECK-NEXT: Memory dependences are safe with run-time checks
@@ -518,10 +518,10 @@ define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_kno
518518
; CHECK-NEXT: %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv
519519
; CHECK-NEXT: Grouped accesses:
520520
; CHECK-NEXT: Group GRP0:
521-
; CHECK-NEXT: (Low: %B High: inttoptr (i64 -1 to ptr))
521+
; CHECK-NEXT: (Low: %B High: (2000 + %B))
522522
; CHECK-NEXT: Member: {%B,+,4}<nuw><%loop.header>
523523
; CHECK-NEXT: Group GRP1:
524-
; CHECK-NEXT: (Low: %A High: inttoptr (i64 -1 to ptr))
524+
; CHECK-NEXT: (Low: %A High: (2000 + %A))
525525
; CHECK-NEXT: Member: {%A,+,4}<nuw><%loop.header>
526526
; CHECK-EMPTY:
527527
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
@@ -565,7 +565,7 @@ e.2:
565565
ret void
566566
}
567567

568-
define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_deref_via_assumption_too_small(ptr %A, ptr %B) {
568+
define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_deref_via_assumption_too_small(ptr %A, ptr %B) nosync nofree {
569569
; CHECK-LABEL: 'all_exits_dominate_latch_countable_exits_at_most_500_iterations_deref_via_assumption_too_small'
570570
; CHECK-NEXT: loop.header:
571571
; CHECK-NEXT: Memory dependences are safe with run-time checks

llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,21 +7,48 @@ define i64 @early_exit_alignment_and_deref_known_via_assumption_with_constant_si
77
; CHECK-NEXT: entry:
88
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P1]], i64 4), "dereferenceable"(ptr [[P1]], i64 1024) ]
99
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P2]], i64 4), "dereferenceable"(ptr [[P2]], i64 1024) ]
10+
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
11+
; CHECK: vector.ph:
1012
; CHECK-NEXT: br label [[LOOP:%.*]]
13+
; CHECK: vector.body:
14+
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[LOOP]] ]
15+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX1]]
16+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 0
17+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1
18+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX1]]
19+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0
20+
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1
21+
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
22+
; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4
23+
; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]])
24+
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 1024
25+
; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
26+
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
27+
; CHECK: middle.split:
28+
; CHECK-NEXT: br i1 [[TMP5]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
29+
; CHECK: middle.block:
30+
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
31+
; CHECK: vector.early.exit:
32+
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 true)
33+
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], [[TMP8]]
34+
; CHECK-NEXT: br label [[LOOP_END]]
35+
; CHECK: scalar.ph:
36+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
37+
; CHECK-NEXT: br label [[LOOP1:%.*]]
1138
; CHECK: loop:
12-
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ]
39+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
1340
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
1441
; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
1542
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
1643
; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
1744
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
18-
; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
45+
; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]]
1946
; CHECK: loop.inc:
2047
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
2148
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 1024
22-
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
49+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]], !llvm.loop [[LOOP3:![0-9]+]]
2350
; CHECK: loop.end:
24-
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ -1, [[LOOP_INC]] ]
51+
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP1]] ], [ -1, [[LOOP_INC]] ], [ -1, [[MIDDLE_BLOCK]] ], [ [[TMP9]], [[VECTOR_EARLY_EXIT]] ]
2552
; CHECK-NEXT: ret i64 [[RETVAL]]
2653
;
2754
entry:

llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ class VPlanSlpTest : public VPlanTestIRBase {
4141
AARes.reset(new AAResults(*TLI));
4242
AARes->addAAResult(*BasicAA);
4343
PSE.reset(new PredicatedScalarEvolution(*SE, *L));
44-
LAI.reset(new LoopAccessInfo(L, &*SE, nullptr, &*TLI, &*AARes, &*DT, &*LI));
44+
LAI.reset(new LoopAccessInfo(L, &*SE, nullptr, &*TLI, &*AARes, &*DT, &*LI,
45+
nullptr));
4546
IAI.reset(new InterleavedAccessInfo(*PSE, L, &*DT, &*LI, &*LAI));
4647
IAI->analyzeInterleaving(false);
4748
return {Plan, *IAI};

0 commit comments

Comments
 (0)