-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[InstCombine] Add limit for expansion of gep chains #147065
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Nikita Popov (nikic) ChangesWhen converting gep subtraction / comparison to offset subtraction / comparison, avoid expanding very long multi-use gep chains. Another improvement we could make is to expand one-use followed by multi-use gep differently, by rewriting the multi-use gep to include the one-use offsets. But I think we want to have some kind of complexity cut-off in any case. Full diff: https://github.com/llvm/llvm-project/pull/147065.diff 5 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index f727eb0a63e05..5dae81a5bea9c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -2144,13 +2144,35 @@ CommonPointerBase CommonPointerBase::compute(Value *LHS, Value *RHS) {
return Base;
}
+bool CommonPointerBase::isExpensive() const {
+ bool SeenConst = false;
+ unsigned NumGEPs = 0;
+ auto ProcessGEPs = [&SeenConst, &NumGEPs](ArrayRef<GEPOperator *> GEPs) {
+ bool SeenMultiUse = false;
+ for (GEPOperator *GEP : GEPs) {
+ // Only count GEPs after the first multi-use GEP. For the first one,
+ // we will directly reuse the offset.
+ if (SeenMultiUse) {
+ bool IsConst = GEP->hasAllConstantIndices();
+ SeenConst |= IsConst;
+ NumGEPs += !IsConst;
+ }
+ SeenMultiUse |= !GEP->hasOneUse();
+ }
+ };
+ ProcessGEPs(LHSGEPs);
+ ProcessGEPs(RHSGEPs);
+ NumGEPs += SeenConst;
+ return NumGEPs > 2;
+}
+
/// Optimize pointer differences into the same array into a size. Consider:
/// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer
/// operands to the ptrtoint instructions for the LHS/RHS of the subtract.
Value *InstCombinerImpl::OptimizePointerDifference(Value *LHS, Value *RHS,
Type *Ty, bool IsNUW) {
CommonPointerBase Base = CommonPointerBase::compute(LHS, RHS);
- if (!Base.Ptr)
+ if (!Base.Ptr || Base.isExpensive())
return nullptr;
// To avoid duplicating the offset arithmetic, rewrite the GEP to use the
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 6737b50405ee2..cbe4e3718a75c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -709,7 +709,7 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
};
CommonPointerBase Base = CommonPointerBase::compute(GEPLHS, RHS);
- if (Base.Ptr == RHS && CanFold(Base.LHSNW)) {
+ if (Base.Ptr == RHS && CanFold(Base.LHSNW) && !Base.isExpensive()) {
// ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0).
Type *IdxTy = DL.getIndexType(GEPLHS->getType());
Value *Offset =
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 9adde8094d44d..4ffc11db6856f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -896,6 +896,9 @@ struct CommonPointerBase {
GEPNoWrapFlags RHSNW = GEPNoWrapFlags::all();
static CommonPointerBase compute(Value *LHS, Value *RHS);
+
+ /// Whether expanding the GEP chains is expensive.
+ bool isExpensive() const;
};
} // end namespace llvm
diff --git a/llvm/test/Transforms/InstCombine/icmp-gep.ll b/llvm/test/Transforms/InstCombine/icmp-gep.ll
index 3f104056fb1f2..6c7572089532a 100644
--- a/llvm/test/Transforms/InstCombine/icmp-gep.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-gep.ll
@@ -849,3 +849,91 @@ define i1 @gep_mugtiple_ugt_inbounds_nusw(ptr %base, i64 %idx, i64 %idx2) {
%cmp = icmp ugt ptr %gep2, %base
ret i1 %cmp
}
+
+define i1 @gep_multiple_multi_use_below_limit(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3) {
+; CHECK-LABEL: @gep_multiple_multi_use_below_limit(
+; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl i64 [[IDX1:%.*]], 2
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[GEP1_IDX]]
+; CHECK-NEXT: [[GEP2_IDX:%.*]] = shl i64 [[IDX2:%.*]], 2
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr [[GEP1]], i64 [[GEP2_IDX]]
+; CHECK-NEXT: [[GEP3_IDX:%.*]] = shl i64 [[IDX3:%.*]], 2
+; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2]], i64 [[GEP3_IDX]]
+; CHECK-NEXT: call void @use(ptr [[GEP3]])
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[GEP1_IDX]], [[GEP2_IDX]]
+; CHECK-NEXT: [[TMP2:%.*]] = sub i64 0, [[GEP3_IDX]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %gep1 = getelementptr i32, ptr %base, i64 %idx1
+ %gep2 = getelementptr i32, ptr %gep1, i64 %idx2
+ %gep3 = getelementptr i32, ptr %gep2, i64 %idx3
+ call void @use(ptr %gep3)
+ %cmp = icmp eq ptr %gep3, %base
+ ret i1 %cmp
+}
+
+define i1 @gep_multiple_multi_use_below_limit_extra_one_use_gep(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3, i64 %idx4) {
+; CHECK-LABEL: @gep_multiple_multi_use_below_limit_extra_one_use_gep(
+; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl i64 [[IDX1:%.*]], 2
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[GEP1_IDX]]
+; CHECK-NEXT: [[GEP2_IDX:%.*]] = shl i64 [[IDX2:%.*]], 2
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr [[GEP1]], i64 [[GEP2_IDX]]
+; CHECK-NEXT: [[GEP3_IDX:%.*]] = shl i64 [[IDX3:%.*]], 2
+; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2]], i64 [[GEP3_IDX]]
+; CHECK-NEXT: [[GEP4_IDX_NEG:%.*]] = mul i64 [[IDX4:%.*]], -4
+; CHECK-NEXT: call void @use(ptr [[GEP3]])
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[GEP1_IDX]], [[GEP2_IDX]]
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], [[GEP3_IDX]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[TMP2]], [[GEP4_IDX_NEG]]
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %gep1 = getelementptr i32, ptr %base, i64 %idx1
+ %gep2 = getelementptr i32, ptr %gep1, i64 %idx2
+ %gep3 = getelementptr i32, ptr %gep2, i64 %idx3
+ %gep4 = getelementptr i32, ptr %gep3, i64 %idx4
+ call void @use(ptr %gep3)
+ %cmp = icmp eq ptr %gep4, %base
+ ret i1 %cmp
+}
+
+define i1 @gep_multiple_multi_use_below_limit_consts(ptr %base, i64 %idx1, i64 %idx2) {
+; CHECK-LABEL: @gep_multiple_multi_use_below_limit_consts(
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 16
+; CHECK-NEXT: [[GEP2_IDX:%.*]] = shl i64 [[IDX1:%.*]], 2
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr [[GEP1]], i64 [[GEP2_IDX]]
+; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2]], i64 16
+; CHECK-NEXT: [[GEP4_IDX:%.*]] = shl i64 [[IDX2:%.*]], 2
+; CHECK-NEXT: [[GEP4:%.*]] = getelementptr i8, ptr [[GEP3]], i64 [[GEP4_IDX]]
+; CHECK-NEXT: call void @use(ptr [[GEP4]])
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[GEP2_IDX]], 32
+; CHECK-NEXT: [[TMP2:%.*]] = sub i64 0, [[GEP4_IDX]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %gep1 = getelementptr i32, ptr %base, i64 4
+ %gep2 = getelementptr i32, ptr %gep1, i64 %idx1
+ %gep3 = getelementptr i32, ptr %gep2, i64 4
+ %gep4 = getelementptr i32, ptr %gep3, i64 %idx2
+ call void @use(ptr %gep4)
+ %cmp = icmp eq ptr %gep4, %base
+ ret i1 %cmp
+}
+
+define i1 @gep_multiple_multi_use_above_limit(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3, i64 %idx4) {
+; CHECK-LABEL: @gep_multiple_multi_use_above_limit(
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i64 [[IDX1:%.*]]
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, ptr [[GEP1]], i64 [[IDX2:%.*]]
+; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i32, ptr [[GEP2]], i64 [[IDX3:%.*]]
+; CHECK-NEXT: [[GEP4:%.*]] = getelementptr i32, ptr [[GEP3]], i64 [[IDX4:%.*]]
+; CHECK-NEXT: call void @use(ptr [[GEP4]])
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[GEP4]], [[BASE]]
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %gep1 = getelementptr i32, ptr %base, i64 %idx1
+ %gep2 = getelementptr i32, ptr %gep1, i64 %idx2
+ %gep3 = getelementptr i32, ptr %gep2, i64 %idx3
+ %gep4 = getelementptr i32, ptr %gep3, i64 %idx4
+ call void @use(ptr %gep4)
+ %cmp = icmp eq ptr %gep4, %base
+ ret i1 %cmp
+}
diff --git a/llvm/test/Transforms/InstCombine/sub-gep.ll b/llvm/test/Transforms/InstCombine/sub-gep.ll
index 993a06ad1780f..c53973c482f39 100644
--- a/llvm/test/Transforms/InstCombine/sub-gep.ll
+++ b/llvm/test/Transforms/InstCombine/sub-gep.ll
@@ -1089,3 +1089,55 @@ define <2 x i64> @splat_geps_multiple(ptr %base, i64 %idx0, <2 x i64> %idx1, <2
%d = sub <2 x i64> %gep2.int, %gep1.int
ret <2 x i64> %d
}
+
+define i64 @multiple_geps_multi_use_below_limit(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3, i64 %idx4) {
+; CHECK-LABEL: @multiple_geps_multi_use_below_limit(
+; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds nuw i8, ptr [[BASE:%.*]], i64 [[IDX1:%.*]]
+; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds nuw i8, ptr [[P1]], i64 [[IDX2:%.*]]
+; CHECK-NEXT: call void @use(ptr [[P2]])
+; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds nuw i8, ptr [[BASE]], i64 [[IDX3:%.*]]
+; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds nuw i8, ptr [[P3]], i64 [[IDX4:%.*]]
+; CHECK-NEXT: call void @use(ptr [[P4]])
+; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[IDX1]], [[IDX2]]
+; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[IDX3]], [[IDX4]]
+; CHECK-NEXT: [[GEPDIFF:%.*]] = sub nsw i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret i64 [[GEPDIFF]]
+;
+ %p1 = getelementptr inbounds nuw i8, ptr %base, i64 %idx1
+ %p2 = getelementptr inbounds nuw i8, ptr %p1, i64 %idx2
+ call void @use(ptr %p2)
+ %p3 = getelementptr inbounds nuw i8, ptr %base, i64 %idx3
+ %p4 = getelementptr inbounds nuw i8, ptr %p3, i64 %idx4
+ call void @use(ptr %p4)
+ %i1 = ptrtoint ptr %p4 to i64
+ %i2 = ptrtoint ptr %p2 to i64
+ %d = sub i64 %i2, %i1
+ ret i64 %d
+}
+
+define i64 @multiple_geps_multi_use_above_limit(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3, i64 %idx4, i64 %idx5) {
+; CHECK-LABEL: @multiple_geps_multi_use_above_limit(
+; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds nuw i8, ptr [[BASE:%.*]], i64 [[IDX1:%.*]]
+; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds nuw i8, ptr [[P1]], i64 [[IDX2:%.*]]
+; CHECK-NEXT: call void @use(ptr [[P2]])
+; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds nuw i8, ptr [[BASE]], i64 [[IDX3:%.*]]
+; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds nuw i8, ptr [[P3]], i64 [[IDX4:%.*]]
+; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds nuw i8, ptr [[P4]], i64 [[IDX5:%.*]]
+; CHECK-NEXT: call void @use(ptr [[P5]])
+; CHECK-NEXT: [[I1:%.*]] = ptrtoint ptr [[P5]] to i64
+; CHECK-NEXT: [[I2:%.*]] = ptrtoint ptr [[P2]] to i64
+; CHECK-NEXT: [[D:%.*]] = sub i64 [[I2]], [[I1]]
+; CHECK-NEXT: ret i64 [[D]]
+;
+ %p1 = getelementptr inbounds nuw i8, ptr %base, i64 %idx1
+ %p2 = getelementptr inbounds nuw i8, ptr %p1, i64 %idx2
+ call void @use(ptr %p2)
+ %p3 = getelementptr inbounds nuw i8, ptr %base, i64 %idx3
+ %p4 = getelementptr inbounds nuw i8, ptr %p3, i64 %idx4
+ %p5 = getelementptr inbounds nuw i8, ptr %p4, i64 %idx5
+ call void @use(ptr %p5)
+ %i1 = ptrtoint ptr %p5 to i64
+ %i2 = ptrtoint ptr %p2 to i64
+ %d = sub i64 %i2, %i1
+ ret i64 %d
+}
|
When converting gep subtraction / comparison to offset subtraction / comparison, avoid expanding very long multi-use gep chains. Another improvement we could make is to expand one-use followed by multi-use gep differently, by rewriting the multi-use gep to include the one-use offsets. But I think we want to have some kind of complexity cut-off in any case.
d5821d2
to
ffcac69
Compare
bool SeenMultiUse = false; | ||
for (GEPOperator *GEP : GEPs) { | ||
// Only count GEPs after the first multi-use GEP. For the first one, | ||
// we will directly reuse the offset. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am confused why the offset of the first multi-use GEP can be reused. See the following case (or multiple_geps_multi_use_below_limit
):
%gep1 = gep %p, %idx1
%gep2 = gep %gep1, %idx2 (multiuse)
Currently EmitGEPOffsets
left %gep2
as is, instead of rewriting %gep2
to gep %p, (%idx1 + %idx2)
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For %gep2
we will just use %idx2
, which is free. When adding %gep1
we generate %idx1 + %idx2
which adds cost.
And yes, we could also rewrite the GEP, in which case we would only have to count all but the first multi-use GEPs and could ignore one-use GEPs entirely.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you think it would be better to start with changing the GEP expansion/rewriting?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you think it would be better to start with changing the GEP expansion/rewriting?
Yeah, if the compile-time impact is acceptable.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Implemented in #147263.
When converting gep subtraction / comparison to offset subtraction / comparison, avoid expanding very long multi-use gep chains.
Another improvement we could make is to expand one-use followed by multi-use gep differently, by rewriting the multi-use gep to include the one-use offsets. But I think we want to have some kind of complexity cut-off in any case.