-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[InstCombine] Merge one-use GEP offsets during expansion #147263
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
When expanding a GEP chain, if there is a chain of one-use GEPs followed by a multi-use GEP, rewrite the multi-use GEP to include the one-use GEPs offsets. This means the offsets from the one-use GEPs can be reused by the offset expansion without additional cost (from computing them again with a differen reassociation).
@llvm/pr-subscribers-llvm-transforms Author: Nikita Popov (nikic) ChangesWhen expanding a GEP chain, if there is a chain of one-use GEPs followed by a multi-use GEP, rewrite the multi-use GEP to include the one-use GEPs offsets. This means the offsets from the one-use GEPs can be reused by the offset expansion without additional cost (from computing them again with a differen reassociation). Full diff: https://github.com/llvm/llvm-project/pull/147263.diff 3 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 91a1b61ddc483..eba1d9225f9ba 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -219,18 +219,59 @@ Value *InstCombinerImpl::EmitGEPOffset(GEPOperator *GEP, bool RewriteGEP) {
Value *InstCombinerImpl::EmitGEPOffsets(ArrayRef<GEPOperator *> GEPs,
GEPNoWrapFlags NW, Type *IdxTy,
bool RewriteGEPs) {
+ auto Add = [&](Value *Sum, Value *Offset) -> Value * {
+ if (Sum)
+ return Builder.CreateAdd(Sum, Offset, "", NW.hasNoUnsignedWrap(),
+ NW.isInBounds());
+ else
+ return Offset;
+ };
+
Value *Sum = nullptr;
+ Value *OneUseSum = nullptr;
+ Value *OneUseBase = nullptr;
+ GEPNoWrapFlags OneUseFlags = GEPNoWrapFlags::all();
for (GEPOperator *GEP : reverse(GEPs)) {
- Value *Offset = EmitGEPOffset(GEP, RewriteGEPs);
+ IRBuilderBase::InsertPointGuard Guard(Builder);
+ auto *Inst = dyn_cast<Instruction>(GEP);
+ if (RewriteGEPs && Inst)
+ Builder.SetInsertPoint(Inst);
+
+ Value *Offset = llvm::emitGEPOffset(&Builder, DL, GEP);
if (Offset->getType() != IdxTy)
Offset = Builder.CreateVectorSplat(
cast<VectorType>(IdxTy)->getElementCount(), Offset);
- if (Sum)
- Sum = Builder.CreateAdd(Sum, Offset, "", NW.hasNoUnsignedWrap(),
- NW.isInBounds());
- else
- Sum = Offset;
+ if (GEP->hasOneUse()) {
+ // Offsets of one-use GEPs will be merged into the next multi-use GEP.
+ OneUseSum = Add(OneUseSum, Offset);
+ OneUseFlags = OneUseFlags.intersectForOffsetAdd(GEP->getNoWrapFlags());
+ if (!OneUseBase)
+ OneUseBase = GEP->getPointerOperand();
+ continue;
+ }
+
+ if (OneUseSum)
+ Offset = Add(OneUseSum, Offset);
+
+ // Rewrite the GEP to reuse the computed offset. This also includes offsets
+ // from preceding one-use GEPs.
+ if (RewriteGEPs && Inst &&
+ !(GEP->getSourceElementType()->isIntegerTy(8) &&
+ GEP->getOperand(1) == Offset)) {
+ replaceInstUsesWith(
+ *Inst,
+ Builder.CreatePtrAdd(
+ OneUseBase ? OneUseBase : GEP->getPointerOperand(), Offset, "",
+ OneUseFlags.intersectForOffsetAdd(GEP->getNoWrapFlags())));
+ eraseInstFromFunction(*Inst);
+ }
+
+ Sum = Add(Sum, Offset);
+ OneUseSum = OneUseBase = nullptr;
+ OneUseFlags = GEPNoWrapFlags::all();
}
+ if (OneUseSum)
+ Sum = Add(Sum, OneUseSum);
if (!Sum)
return Constant::getNullValue(IdxTy);
return Sum;
diff --git a/llvm/test/Transforms/InstCombine/getelementptr.ll b/llvm/test/Transforms/InstCombine/getelementptr.ll
index 752ff0cae5b78..bb0a94cb01494 100644
--- a/llvm/test/Transforms/InstCombine/getelementptr.ll
+++ b/llvm/test/Transforms/InstCombine/getelementptr.ll
@@ -682,15 +682,15 @@ define i32 @test28() nounwind {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ORIENTATIONS:%.*]] = alloca [1 x [1 x %struct.x]], align 8
; CHECK-NEXT: [[T3:%.*]] = call i32 @puts(ptr noundef nonnull dereferenceable(1) @.str) #[[ATTR0]]
-; CHECK-NEXT: [[T45:%.*]] = getelementptr inbounds nuw i8, ptr [[ORIENTATIONS]], i64 1
; CHECK-NEXT: br label [[BB10:%.*]]
; CHECK: bb10:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[BB10]] ]
; CHECK-NEXT: [[T12_REC:%.*]] = xor i32 [[INDVAR]], -1
; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[T12_REC]] to i64
-; CHECK-NEXT: [[T12:%.*]] = getelementptr inbounds i8, ptr [[T45]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[TMP0]], 1
+; CHECK-NEXT: [[T12:%.*]] = getelementptr inbounds i8, ptr [[ORIENTATIONS]], i64 [[TMP1]]
; CHECK-NEXT: [[T16:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str1, ptr nonnull [[T12]]) #[[ATTR0]]
-; CHECK-NEXT: [[T84:%.*]] = icmp eq i32 [[INDVAR]], 0
+; CHECK-NEXT: [[T84:%.*]] = icmp eq i64 [[TMP1]], 0
; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1
; CHECK-NEXT: br i1 [[T84]], label [[BB17:%.*]], label [[BB10]]
; CHECK: bb17:
diff --git a/llvm/test/Transforms/InstCombine/sub-gep.ll b/llvm/test/Transforms/InstCombine/sub-gep.ll
index 11af6b4a0197f..fd70df772564c 100644
--- a/llvm/test/Transforms/InstCombine/sub-gep.ll
+++ b/llvm/test/Transforms/InstCombine/sub-gep.ll
@@ -945,19 +945,15 @@ define i64 @multiple_geps_two_chains_gep_base(ptr %base, i64 %base.idx, i64 %idx
define i64 @multiple_geps_two_chains_multi_use(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3, i64 %idx4) {
; CHECK-LABEL: @multiple_geps_two_chains_multi_use(
-; CHECK-NEXT: [[P2_IDX:%.*]] = shl nsw i64 [[IDX2:%.*]], 2
-; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i8, ptr [[P1:%.*]], i64 [[P2_IDX]]
-; CHECK-NEXT: [[P4_IDX:%.*]] = shl nsw i64 [[IDX4:%.*]], 2
-; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[P4_IDX]]
-; CHECK-NEXT: [[P3_IDX:%.*]] = shl nsw i64 [[IDX3:%.*]], 2
-; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[P3_IDX]]
-; CHECK-NEXT: [[P4_IDX1:%.*]] = shl nsw i64 [[IDX5:%.*]], 2
-; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds i8, ptr [[P3]], i64 [[P4_IDX1]]
+; CHECK-NEXT: [[P1_IDX1:%.*]] = add i64 [[IDX1:%.*]], [[IDX2:%.*]]
+; CHECK-NEXT: [[P4_IDX:%.*]] = shl i64 [[P1_IDX1]], 2
+; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds i8, ptr [[P2:%.*]], i64 [[P4_IDX]]
+; CHECK-NEXT: [[P3_IDX2:%.*]] = add i64 [[IDX3:%.*]], [[IDX4:%.*]]
+; CHECK-NEXT: [[P4_IDX1:%.*]] = shl i64 [[P3_IDX2]], 2
+; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[P4_IDX1]]
; CHECK-NEXT: call void @use(ptr [[P5]])
; CHECK-NEXT: call void @use(ptr [[P4]])
-; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[P2_IDX]], [[P4_IDX]]
-; CHECK-NEXT: [[TMP2:%.*]] = add nsw i64 [[P3_IDX]], [[P4_IDX1]]
-; CHECK-NEXT: [[GEPDIFF:%.*]] = sub nsw i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[GEPDIFF:%.*]] = sub nsw i64 [[P4_IDX]], [[P4_IDX1]]
; CHECK-NEXT: ret i64 [[GEPDIFF]]
;
%p1 = getelementptr inbounds i32, ptr %base, i64 %idx1
@@ -974,23 +970,18 @@ define i64 @multiple_geps_two_chains_multi_use(ptr %base, i64 %idx1, i64 %idx2,
define i64 @multiple_geps_two_chains_partial_multi_use(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3, i64 %idx4, i64 %idx5, i64 %idx6) {
; CHECK-LABEL: @multiple_geps_two_chains_partial_multi_use(
-; CHECK-NEXT: [[P2_IDX:%.*]] = shl nsw i64 [[IDX2:%.*]], 2
-; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i8, ptr [[P1:%.*]], i64 [[P2_IDX]]
-; CHECK-NEXT: [[P4_IDX:%.*]] = shl nsw i64 [[IDX4:%.*]], 2
-; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[P4_IDX]]
-; CHECK-NEXT: [[P3_IDX:%.*]] = shl nsw i64 [[IDX3:%.*]], 2
-; CHECK-NEXT: [[P4_IDX1:%.*]] = shl nsw i64 [[IDX7:%.*]], 2
-; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[P4_IDX1]]
-; CHECK-NEXT: [[P5_IDX:%.*]] = shl nsw i64 [[IDX5:%.*]], 2
-; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds i8, ptr [[P5]], i64 [[P5_IDX]]
-; CHECK-NEXT: [[P6_IDX:%.*]] = shl nsw i64 [[IDX6:%.*]], 2
+; CHECK-NEXT: [[P1_IDX1:%.*]] = add i64 [[IDX1:%.*]], [[IDX2:%.*]]
+; CHECK-NEXT: [[P4_IDX:%.*]] = shl i64 [[P1_IDX1]], 2
+; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i8, ptr [[P2:%.*]], i64 [[P4_IDX]]
+; CHECK-NEXT: [[P4_IDX2:%.*]] = add i64 [[IDX4:%.*]], [[IDX5:%.*]]
+; CHECK-NEXT: [[P5_IDX:%.*]] = shl i64 [[P4_IDX2]], 2
+; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[P5_IDX]]
; CHECK-NEXT: call void @use(ptr [[P3]])
; CHECK-NEXT: call void @use(ptr [[P4]])
-; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[P2_IDX]], [[P4_IDX]]
-; CHECK-NEXT: [[TMP2:%.*]] = add nsw i64 [[TMP1]], [[P3_IDX]]
-; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[P4_IDX1]], [[P5_IDX]]
-; CHECK-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP3]], [[P6_IDX]]
-; CHECK-NEXT: [[GEPDIFF:%.*]] = sub nsw i64 [[TMP2]], [[TMP4]]
+; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[P1_IDX1]], [[IDX3:%.*]]
+; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[P4_IDX2]], [[IDX6:%.*]]
+; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP3]], [[TMP4]]
+; CHECK-NEXT: [[GEPDIFF:%.*]] = shl i64 [[TMP5]], 2
; CHECK-NEXT: ret i64 [[GEPDIFF]]
;
%p1 = getelementptr inbounds i32, ptr %base, i64 %idx1
|
; CHECK-NEXT: br label [[BB10:%.*]] | ||
; CHECK: bb10: | ||
; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[BB10]] ] | ||
; CHECK-NEXT: [[T12_REC:%.*]] = xor i32 [[INDVAR]], -1 | ||
; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[T12_REC]] to i64 | ||
; CHECK-NEXT: [[T12:%.*]] = getelementptr inbounds i8, ptr [[T45]], i64 [[TMP0]] | ||
; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[TMP0]], 1 | ||
; CHECK-NEXT: [[T12:%.*]] = getelementptr inbounds i8, ptr [[ORIENTATIONS]], i64 [[TMP1]] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This illustrates a regression from this approach: We pull an addition into the loop.
(I think it we allowed folding of multi-use x+c==c2 this particular case would ultimately fold away.)
This comment was marked as outdated.
This comment was marked as outdated.
A downside of this approach is that we can't preserve all GEP flags when round-tripping gep+gep -> gep+add -> gep+gep. So if extra uses get removed and we restore the gep+gep form, we may lose flags. |
When expanding a GEP chain, if there is a chain of one-use GEPs followed by a multi-use GEP, rewrite the multi-use GEP to include the one-use GEPs offsets.
This means the offsets from the one-use GEPs can be reused by the offset expansion without additional cost (from computing them again with a differen reassociation).