Skip to content

Commit fb845f9

Browse files
authored
[LAA] Hoist setting condition for RT-checks (#128045)
Strip ShouldRetyWithRuntimeCheck from the DepedenceDistanceStrideAndSizeInfo struct, and free isDependent from the responsibility of setting the condition for when runtime-checks are needed, transferring this responsibility to getDependenceDistanceStrideAndSize. We can have multiple DepType::Unknown dependences that, by themselves, do not trigger the retrying with runtime memory checks, and therefore block vectorization. But once a single FoundNonConstantDistanceDependence is found, the analysis seems to switch to the "LAA: Retrying with memory checks" path and allows all these dependences to be handled via runtime checks. There is hence no rationale for predicating FoundNonConstantDependenceDistance on DepType::Unknown, and removing this predication is one of the side-effects of this patch.
1 parent e7bcd3f commit fb845f9

File tree

3 files changed

+29
-51
lines changed

3 files changed

+29
-51
lines changed

llvm/include/llvm/Analysis/LoopAccessAnalysis.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -396,8 +396,6 @@ class MemoryDepChecker {
396396
uint64_t MaxStride;
397397
std::optional<uint64_t> CommonStride;
398398

399-
bool ShouldRetryWithRuntimeCheck;
400-
401399
/// TypeByteSize is either the common store size of both accesses, or 0 when
402400
/// store sizes mismatch.
403401
uint64_t TypeByteSize;
@@ -407,11 +405,9 @@ class MemoryDepChecker {
407405

408406
DepDistanceStrideAndSizeInfo(const SCEV *Dist, uint64_t MaxStride,
409407
std::optional<uint64_t> CommonStride,
410-
bool ShouldRetryWithRuntimeCheck,
411408
uint64_t TypeByteSize, bool AIsWrite,
412409
bool BIsWrite)
413410
: Dist(Dist), MaxStride(MaxStride), CommonStride(CommonStride),
414-
ShouldRetryWithRuntimeCheck(ShouldRetryWithRuntimeCheck),
415411
TypeByteSize(TypeByteSize), AIsWrite(AIsWrite), BIsWrite(BIsWrite) {}
416412
};
417413

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 9 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -2081,14 +2081,14 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
20812081
if (StrideAScaled == StrideBScaled)
20822082
CommonStride = StrideAScaled;
20832083

2084-
// TODO: Historically, we don't retry with runtime checks unless the
2085-
// (unscaled) strides are the same. Fix this once the condition for runtime
2086-
// checks in isDependent is fixed.
2087-
bool ShouldRetryWithRuntimeCheck = StrideAPtrInt == StrideBPtrInt;
2084+
// TODO: FoundNonConstantDistanceDependence is used as a necessary condition
2085+
// to consider retrying with runtime checks. Historically, we did not set it
2086+
// when (unscaled) strides were different but there is no inherent reason to.
2087+
if (!isa<SCEVConstant>(Dist))
2088+
FoundNonConstantDistanceDependence |= StrideAPtrInt == StrideBPtrInt;
20882089

20892090
return DepDistanceStrideAndSizeInfo(Dist, MaxStride, CommonStride,
2090-
ShouldRetryWithRuntimeCheck, TypeByteSize,
2091-
AIsWrite, BIsWrite);
2091+
TypeByteSize, AIsWrite, BIsWrite);
20922092
}
20932093

20942094
MemoryDepChecker::Dependence::DepType
@@ -2103,15 +2103,11 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
21032103
if (std::holds_alternative<Dependence::DepType>(Res))
21042104
return std::get<Dependence::DepType>(Res);
21052105

2106-
auto &[Dist, MaxStride, CommonStride, ShouldRetryWithRuntimeCheck,
2107-
TypeByteSize, AIsWrite, BIsWrite] =
2106+
auto &[Dist, MaxStride, CommonStride, TypeByteSize, AIsWrite, BIsWrite] =
21082107
std::get<DepDistanceStrideAndSizeInfo>(Res);
21092108
bool HasSameSize = TypeByteSize > 0;
21102109

21112110
if (isa<SCEVCouldNotCompute>(Dist)) {
2112-
// TODO: Relax requirement that there is a common unscaled stride to retry
2113-
// with non-constant distance dependencies.
2114-
FoundNonConstantDistanceDependence |= ShouldRetryWithRuntimeCheck;
21152111
LLVM_DEBUG(dbgs() << "LAA: Dependence because of uncomputable distance.\n");
21162112
return Dependence::Unknown;
21172113
}
@@ -2173,14 +2169,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
21732169
// forward dependency will allow vectorization using any width.
21742170

21752171
if (IsTrueDataDependence && EnableForwardingConflictDetection) {
2176-
if (!ConstDist) {
2177-
// TODO: FoundNonConstantDistanceDependence is used as a necessary
2178-
// condition to consider retrying with runtime checks. Historically, we
2179-
// did not set it when strides were different but there is no inherent
2180-
// reason to.
2181-
FoundNonConstantDistanceDependence |= ShouldRetryWithRuntimeCheck;
2172+
if (!ConstDist)
21822173
return Dependence::Unknown;
2183-
}
21842174
if (!HasSameSize ||
21852175
couldPreventStoreLoadForward(ConstDist, TypeByteSize)) {
21862176
LLVM_DEBUG(
@@ -2195,22 +2185,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
21952185

21962186
int64_t MinDistance = SE.getSignedRangeMin(Dist).getSExtValue();
21972187
// Below we only handle strictly positive distances.
2198-
if (MinDistance <= 0) {
2199-
FoundNonConstantDistanceDependence |= ShouldRetryWithRuntimeCheck;
2188+
if (MinDistance <= 0)
22002189
return Dependence::Unknown;
2201-
}
2202-
2203-
if (!ConstDist) {
2204-
// Previously this case would be treated as Unknown, possibly setting
2205-
// FoundNonConstantDistanceDependence to force re-trying with runtime
2206-
// checks. Until the TODO below is addressed, set it here to preserve
2207-
// original behavior w.r.t. re-trying with runtime checks.
2208-
// TODO: FoundNonConstantDistanceDependence is used as a necessary
2209-
// condition to consider retrying with runtime checks. Historically, we
2210-
// did not set it when strides were different but there is no inherent
2211-
// reason to.
2212-
FoundNonConstantDistanceDependence |= ShouldRetryWithRuntimeCheck;
2213-
}
22142190

22152191
if (!HasSameSize) {
22162192
LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with "

llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -320,28 +320,34 @@ exit:
320320
define void @retry_after_dep_check_with_unknown_offset(ptr %A, i32 %offset) {
321321
; CHECK-LABEL: 'retry_after_dep_check_with_unknown_offset'
322322
; CHECK-NEXT: loop:
323-
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
324-
; CHECK-NEXT: Unknown data dependence.
323+
; CHECK-NEXT: Memory dependences are safe with run-time checks
325324
; CHECK-NEXT: Dependences:
326-
; CHECK-NEXT: Unknown:
327-
; CHECK-NEXT: %l.A = load float, ptr %A, align 4 ->
328-
; CHECK-NEXT: store float 0.000000e+00, ptr %A.100.iv.offset.3, align 4
329-
; CHECK-EMPTY:
330-
; CHECK-NEXT: Unknown:
331-
; CHECK-NEXT: %l.A = load float, ptr %A, align 4 ->
332-
; CHECK-NEXT: store float %l.A, ptr %A.100.iv, align 8
333-
; CHECK-EMPTY:
334325
; CHECK-NEXT: Run-time memory checks:
326+
; CHECK-NEXT: Check 0:
327+
; CHECK-NEXT: Comparing group GRP0:
328+
; CHECK-NEXT: %A.100.iv = getelementptr { float, float }, ptr %A.100, i64 %iv
329+
; CHECK-NEXT: Against group GRP1:
330+
; CHECK-NEXT: %A.100.iv.offset.3 = getelementptr i8, ptr %A.100, i64 %iv.offset.3
331+
; CHECK-NEXT: Check 1:
332+
; CHECK-NEXT: Comparing group GRP0:
333+
; CHECK-NEXT: %A.100.iv = getelementptr { float, float }, ptr %A.100, i64 %iv
334+
; CHECK-NEXT: Against group GRP2:
335+
; CHECK-NEXT: ptr %A
336+
; CHECK-NEXT: Check 2:
337+
; CHECK-NEXT: Comparing group GRP1:
338+
; CHECK-NEXT: %A.100.iv.offset.3 = getelementptr i8, ptr %A.100, i64 %iv.offset.3
339+
; CHECK-NEXT: Against group GRP2:
340+
; CHECK-NEXT: ptr %A
335341
; CHECK-NEXT: Grouped accesses:
336342
; CHECK-NEXT: Group GRP0:
337-
; CHECK-NEXT: (Low: %A High: (4 + %A))
338-
; CHECK-NEXT: Member: %A
343+
; CHECK-NEXT: (Low: (100 + %A) High: (96 + (8 * (zext i32 %offset to i64))<nuw><nsw> + %A))
344+
; CHECK-NEXT: Member: {(100 + %A),+,8}<%loop>
339345
; CHECK-NEXT: Group GRP1:
340346
; CHECK-NEXT: (Low: (100 + (8 * (zext i32 %offset to i64))<nuw><nsw> + %A) High: (96 + (16 * (zext i32 %offset to i64))<nuw><nsw> + %A))
341347
; CHECK-NEXT: Member: {(100 + (8 * (zext i32 %offset to i64))<nuw><nsw> + %A),+,8}<%loop>
342348
; CHECK-NEXT: Group GRP2:
343-
; CHECK-NEXT: (Low: (100 + %A) High: (96 + (8 * (zext i32 %offset to i64))<nuw><nsw> + %A))
344-
; CHECK-NEXT: Member: {(100 + %A),+,8}<%loop>
349+
; CHECK-NEXT: (Low: %A High: (4 + %A))
350+
; CHECK-NEXT: Member: %A
345351
; CHECK-EMPTY:
346352
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
347353
; CHECK-NEXT: SCEV assumptions:

0 commit comments

Comments
 (0)