23
23
#include " llvm/ADT/SmallVector.h"
24
24
#include " llvm/Analysis/AliasAnalysis.h"
25
25
#include " llvm/Analysis/AliasSetTracker.h"
26
+ #include " llvm/Analysis/AssumeBundleQueries.h"
27
+ #include " llvm/Analysis/AssumptionCache.h"
26
28
#include " llvm/Analysis/LoopAnalysisManager.h"
27
29
#include " llvm/Analysis/LoopInfo.h"
28
30
#include " llvm/Analysis/LoopIterator.h"
@@ -208,28 +210,50 @@ static const SCEV *mulSCEVOverflow(const SCEV *A, const SCEV *B,
208
210
209
211
// / Return true, if evaluating \p AR at \p MaxBTC cannot wrap, because \p AR at
210
212
// / \p MaxBTC is guaranteed inbounds of the accessed object.
211
- static bool evaluatePtrAddRecAtMaxBTCWillNotWrap ( const SCEVAddRecExpr *AR,
212
- const SCEV *MaxBTC ,
213
- const SCEV *EltSize,
214
- ScalarEvolution &SE ,
215
- const DataLayout &DL ) {
213
+ static bool
214
+ evaluatePtrAddRecAtMaxBTCWillNotWrap ( const SCEVAddRecExpr *AR ,
215
+ const SCEV *MaxBTC, const SCEV *EltSize,
216
+ ScalarEvolution &SE, const DataLayout &DL ,
217
+ AssumptionCache *AC, DominatorTree *DT ) {
216
218
auto *PointerBase = SE.getPointerBase (AR->getStart ());
217
219
auto *StartPtr = dyn_cast<SCEVUnknown>(PointerBase);
218
220
if (!StartPtr)
219
221
return false ;
222
+ const Loop *L = AR->getLoop ();
220
223
bool CheckForNonNull, CheckForFreed;
221
224
uint64_t DerefBytes = StartPtr->getValue ()->getPointerDereferenceableBytes (
222
225
DL, CheckForNonNull, CheckForFreed);
223
226
224
- if (CheckForNonNull || CheckForFreed)
227
+ if (DerefBytes && ( CheckForNonNull || CheckForFreed) )
225
228
return false ;
226
229
227
230
const SCEV *Step = AR->getStepRecurrence (SE);
231
+ Type *WiderTy = SE.getWiderType (MaxBTC->getType (), Step->getType ());
232
+ const SCEV *DerefBytesSCEV = SE.getConstant (WiderTy, DerefBytes);
233
+
234
+ // Check if we have a suitable dereferencable assumption we can use.
235
+ RetainedKnowledge DerefRK;
236
+ if (getKnowledgeForValue (
237
+ StartPtr->getValue (), {Attribute::Dereferenceable}, *AC,
238
+ [&](RetainedKnowledge RK, Instruction *Assume, auto ) {
239
+ if (!isValidAssumeForContext (
240
+ Assume, L->getLoopPredecessor ()->getTerminator (), DT))
241
+ return false ;
242
+ if (RK.AttrKind == Attribute::Dereferenceable) {
243
+ DerefRK = std::max (DerefRK, RK);
244
+ return true ;
245
+ }
246
+ return false ;
247
+ }) &&
248
+ DerefRK.ArgValue ) {
249
+ DerefBytesSCEV = SE.getUMaxExpr (DerefBytesSCEV,
250
+ SE.getConstant (WiderTy, DerefRK.ArgValue ));
251
+ }
252
+
228
253
bool IsKnownNonNegative = SE.isKnownNonNegative (Step);
229
254
if (!IsKnownNonNegative && !SE.isKnownNegative (Step))
230
255
return false ;
231
256
232
- Type *WiderTy = SE.getWiderType (MaxBTC->getType (), Step->getType ());
233
257
Step = SE.getNoopOrSignExtend (Step, WiderTy);
234
258
MaxBTC = SE.getNoopOrZeroExtend (MaxBTC, WiderTy);
235
259
@@ -256,24 +280,23 @@ static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR,
256
280
const SCEV *EndBytes = addSCEVNoOverflow (StartOffset, OffsetEndBytes, SE);
257
281
if (!EndBytes)
258
282
return false ;
259
- return SE.isKnownPredicate (CmpInst::ICMP_ULE, EndBytes,
260
- SE.getConstant (WiderTy, DerefBytes));
283
+ return SE.isKnownPredicate (CmpInst::ICMP_ULE, EndBytes, DerefBytesSCEV);
261
284
}
262
285
263
286
// For negative steps check if
264
287
// * StartOffset >= (MaxBTC * Step + EltSize)
265
288
// * StartOffset <= DerefBytes.
266
289
assert (SE.isKnownNegative (Step) && " must be known negative" );
267
290
return SE.isKnownPredicate (CmpInst::ICMP_SGE, StartOffset, OffsetEndBytes) &&
268
- SE.isKnownPredicate (CmpInst::ICMP_ULE, StartOffset,
269
- SE.getConstant (WiderTy, DerefBytes));
291
+ SE.isKnownPredicate (CmpInst::ICMP_ULE, StartOffset, DerefBytesSCEV);
270
292
}
271
293
272
294
std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess (
273
295
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC,
274
296
const SCEV *MaxBTC, ScalarEvolution *SE,
275
297
DenseMap<std::pair<const SCEV *, Type *>,
276
- std::pair<const SCEV *, const SCEV *>> *PointerBounds) {
298
+ std::pair<const SCEV *, const SCEV *>> *PointerBounds,
299
+ AssumptionCache *AC, DominatorTree *DT) {
277
300
std::pair<const SCEV *, const SCEV *> *PtrBoundsPair;
278
301
if (PointerBounds) {
279
302
auto [Iter, Ins] = PointerBounds->insert (
@@ -308,8 +331,8 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
308
331
// sets ScEnd to the maximum unsigned value for the type. Note that LAA
309
332
// separately checks that accesses cannot not wrap, so unsigned max
310
333
// represents an upper bound.
311
- if (evaluatePtrAddRecAtMaxBTCWillNotWrap (AR, MaxBTC, EltSizeSCEV, *SE,
312
- DL )) {
334
+ if (evaluatePtrAddRecAtMaxBTCWillNotWrap (AR, MaxBTC, EltSizeSCEV, *SE, DL,
335
+ AC, DT )) {
313
336
ScEnd = AR->evaluateAtIteration (MaxBTC, *SE);
314
337
} else {
315
338
ScEnd = SE->getAddExpr (
@@ -356,9 +379,9 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,
356
379
bool NeedsFreeze) {
357
380
const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount ();
358
381
const SCEV *BTC = PSE.getBackedgeTakenCount ();
359
- const auto &[ScStart, ScEnd] =
360
- getStartAndEndForAccess ( Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC,
361
- PSE. getSE (), & DC.getPointerBounds ());
382
+ const auto &[ScStart, ScEnd] = getStartAndEndForAccess (
383
+ Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC, PSE. getSE () ,
384
+ &DC. getPointerBounds (), DC. getAC (), DC.getDT ());
362
385
assert (!isa<SCEVCouldNotCompute>(ScStart) &&
363
386
!isa<SCEVCouldNotCompute>(ScEnd) &&
364
387
" must be able to compute both start and end expressions" );
@@ -2011,10 +2034,10 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
2011
2034
const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount ();
2012
2035
const auto &[SrcStart_, SrcEnd_] =
2013
2036
getStartAndEndForAccess (InnermostLoop, Src, ATy, BTC, SymbolicMaxBTC,
2014
- PSE.getSE (), &PointerBounds);
2037
+ PSE.getSE (), &PointerBounds, AC, DT );
2015
2038
const auto &[SinkStart_, SinkEnd_] =
2016
2039
getStartAndEndForAccess (InnermostLoop, Sink, BTy, BTC, SymbolicMaxBTC,
2017
- PSE.getSE (), &PointerBounds);
2040
+ PSE.getSE (), &PointerBounds, AC, DT );
2018
2041
if (!isa<SCEVCouldNotCompute>(SrcStart_) &&
2019
2042
!isa<SCEVCouldNotCompute>(SrcEnd_) &&
2020
2043
!isa<SCEVCouldNotCompute>(SinkStart_) &&
@@ -3015,7 +3038,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
3015
3038
const TargetTransformInfo *TTI,
3016
3039
const TargetLibraryInfo *TLI, AAResults *AA,
3017
3040
DominatorTree *DT, LoopInfo *LI,
3018
- bool AllowPartial)
3041
+ AssumptionCache *AC, bool AllowPartial)
3019
3042
: PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)),
3020
3043
PtrRtChecking (nullptr ), TheLoop(L), AllowPartial(AllowPartial) {
3021
3044
unsigned MaxTargetVectorWidthInBits = std::numeric_limits<unsigned >::max ();
@@ -3025,8 +3048,8 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
3025
3048
MaxTargetVectorWidthInBits =
3026
3049
TTI->getRegisterBitWidth (TargetTransformInfo::RGK_FixedWidthVector) * 2 ;
3027
3050
3028
- DepChecker = std::make_unique<MemoryDepChecker>(*PSE, L, SymbolicStrides,
3029
- MaxTargetVectorWidthInBits);
3051
+ DepChecker = std::make_unique<MemoryDepChecker>(
3052
+ *PSE, AC, DT, L, SymbolicStrides, MaxTargetVectorWidthInBits);
3030
3053
PtrRtChecking = std::make_unique<RuntimePointerChecking>(*DepChecker, SE);
3031
3054
if (canAnalyzeLoop ())
3032
3055
CanVecMem = analyzeLoop (AA, LI, TLI, DT);
@@ -3095,7 +3118,7 @@ const LoopAccessInfo &LoopAccessInfoManager::getInfo(Loop &L,
3095
3118
// or if it was created with a different value of AllowPartial.
3096
3119
if (Inserted || It->second ->hasAllowPartial () != AllowPartial)
3097
3120
It->second = std::make_unique<LoopAccessInfo>(&L, &SE, TTI, TLI, &AA, &DT,
3098
- &LI, AllowPartial);
3121
+ &LI, AC, AllowPartial);
3099
3122
3100
3123
return *It->second ;
3101
3124
}
@@ -3138,7 +3161,8 @@ LoopAccessInfoManager LoopAccessAnalysis::run(Function &F,
3138
3161
auto &LI = FAM.getResult <LoopAnalysis>(F);
3139
3162
auto &TTI = FAM.getResult <TargetIRAnalysis>(F);
3140
3163
auto &TLI = FAM.getResult <TargetLibraryAnalysis>(F);
3141
- return LoopAccessInfoManager (SE, AA, DT, LI, &TTI, &TLI);
3164
+ auto &AC = FAM.getResult <AssumptionAnalysis>(F);
3165
+ return LoopAccessInfoManager (SE, AA, DT, LI, &TTI, &TLI, &AC);
3142
3166
}
3143
3167
3144
3168
AnalysisKey LoopAccessAnalysis::Key;
0 commit comments