From eebf5a8d599b36a90d56bdf408d2fbc7f2ff67c0 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 7 Jul 2025 10:12:02 +0200 Subject: [PATCH 1/2] [InstCombine] Merge one-use GEP offsets during expansion When expanding a GEP chain, if there is a chain of one-use GEPs followed by a multi-use GEP, rewrite the multi-use GEP to include the one-use GEPs offsets. This means the offsets from the one-use GEPs can be reused by the offset expansion without additional cost (from computing them again with a differen reassociation). --- .../InstCombine/InstructionCombining.cpp | 53 ++++++++++++++++--- .../Transforms/InstCombine/getelementptr.ll | 6 +-- llvm/test/Transforms/InstCombine/sub-gep.ll | 43 ++++++--------- 3 files changed, 67 insertions(+), 35 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 91a1b61ddc483..eba1d9225f9ba 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -219,18 +219,59 @@ Value *InstCombinerImpl::EmitGEPOffset(GEPOperator *GEP, bool RewriteGEP) { Value *InstCombinerImpl::EmitGEPOffsets(ArrayRef GEPs, GEPNoWrapFlags NW, Type *IdxTy, bool RewriteGEPs) { + auto Add = [&](Value *Sum, Value *Offset) -> Value * { + if (Sum) + return Builder.CreateAdd(Sum, Offset, "", NW.hasNoUnsignedWrap(), + NW.isInBounds()); + else + return Offset; + }; + Value *Sum = nullptr; + Value *OneUseSum = nullptr; + Value *OneUseBase = nullptr; + GEPNoWrapFlags OneUseFlags = GEPNoWrapFlags::all(); for (GEPOperator *GEP : reverse(GEPs)) { - Value *Offset = EmitGEPOffset(GEP, RewriteGEPs); + IRBuilderBase::InsertPointGuard Guard(Builder); + auto *Inst = dyn_cast(GEP); + if (RewriteGEPs && Inst) + Builder.SetInsertPoint(Inst); + + Value *Offset = llvm::emitGEPOffset(&Builder, DL, GEP); if (Offset->getType() != IdxTy) Offset = Builder.CreateVectorSplat( cast(IdxTy)->getElementCount(), Offset); - if (Sum) - Sum = Builder.CreateAdd(Sum, Offset, "", NW.hasNoUnsignedWrap(), - NW.isInBounds()); - else - Sum = Offset; + if (GEP->hasOneUse()) { + // Offsets of one-use GEPs will be merged into the next multi-use GEP. + OneUseSum = Add(OneUseSum, Offset); + OneUseFlags = OneUseFlags.intersectForOffsetAdd(GEP->getNoWrapFlags()); + if (!OneUseBase) + OneUseBase = GEP->getPointerOperand(); + continue; + } + + if (OneUseSum) + Offset = Add(OneUseSum, Offset); + + // Rewrite the GEP to reuse the computed offset. This also includes offsets + // from preceding one-use GEPs. + if (RewriteGEPs && Inst && + !(GEP->getSourceElementType()->isIntegerTy(8) && + GEP->getOperand(1) == Offset)) { + replaceInstUsesWith( + *Inst, + Builder.CreatePtrAdd( + OneUseBase ? OneUseBase : GEP->getPointerOperand(), Offset, "", + OneUseFlags.intersectForOffsetAdd(GEP->getNoWrapFlags()))); + eraseInstFromFunction(*Inst); + } + + Sum = Add(Sum, Offset); + OneUseSum = OneUseBase = nullptr; + OneUseFlags = GEPNoWrapFlags::all(); } + if (OneUseSum) + Sum = Add(Sum, OneUseSum); if (!Sum) return Constant::getNullValue(IdxTy); return Sum; diff --git a/llvm/test/Transforms/InstCombine/getelementptr.ll b/llvm/test/Transforms/InstCombine/getelementptr.ll index 752ff0cae5b78..bb0a94cb01494 100644 --- a/llvm/test/Transforms/InstCombine/getelementptr.ll +++ b/llvm/test/Transforms/InstCombine/getelementptr.ll @@ -682,15 +682,15 @@ define i32 @test28() nounwind { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ORIENTATIONS:%.*]] = alloca [1 x [1 x %struct.x]], align 8 ; CHECK-NEXT: [[T3:%.*]] = call i32 @puts(ptr noundef nonnull dereferenceable(1) @.str) #[[ATTR0]] -; CHECK-NEXT: [[T45:%.*]] = getelementptr inbounds nuw i8, ptr [[ORIENTATIONS]], i64 1 ; CHECK-NEXT: br label [[BB10:%.*]] ; CHECK: bb10: ; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[BB10]] ] ; CHECK-NEXT: [[T12_REC:%.*]] = xor i32 [[INDVAR]], -1 ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[T12_REC]] to i64 -; CHECK-NEXT: [[T12:%.*]] = getelementptr inbounds i8, ptr [[T45]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[TMP0]], 1 +; CHECK-NEXT: [[T12:%.*]] = getelementptr inbounds i8, ptr [[ORIENTATIONS]], i64 [[TMP1]] ; CHECK-NEXT: [[T16:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str1, ptr nonnull [[T12]]) #[[ATTR0]] -; CHECK-NEXT: [[T84:%.*]] = icmp eq i32 [[INDVAR]], 0 +; CHECK-NEXT: [[T84:%.*]] = icmp eq i64 [[TMP1]], 0 ; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1 ; CHECK-NEXT: br i1 [[T84]], label [[BB17:%.*]], label [[BB10]] ; CHECK: bb17: diff --git a/llvm/test/Transforms/InstCombine/sub-gep.ll b/llvm/test/Transforms/InstCombine/sub-gep.ll index 11af6b4a0197f..fd70df772564c 100644 --- a/llvm/test/Transforms/InstCombine/sub-gep.ll +++ b/llvm/test/Transforms/InstCombine/sub-gep.ll @@ -945,19 +945,15 @@ define i64 @multiple_geps_two_chains_gep_base(ptr %base, i64 %base.idx, i64 %idx define i64 @multiple_geps_two_chains_multi_use(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3, i64 %idx4) { ; CHECK-LABEL: @multiple_geps_two_chains_multi_use( -; CHECK-NEXT: [[P2_IDX:%.*]] = shl nsw i64 [[IDX2:%.*]], 2 -; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i8, ptr [[P1:%.*]], i64 [[P2_IDX]] -; CHECK-NEXT: [[P4_IDX:%.*]] = shl nsw i64 [[IDX4:%.*]], 2 -; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[P4_IDX]] -; CHECK-NEXT: [[P3_IDX:%.*]] = shl nsw i64 [[IDX3:%.*]], 2 -; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[P3_IDX]] -; CHECK-NEXT: [[P4_IDX1:%.*]] = shl nsw i64 [[IDX5:%.*]], 2 -; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds i8, ptr [[P3]], i64 [[P4_IDX1]] +; CHECK-NEXT: [[P1_IDX1:%.*]] = add i64 [[IDX1:%.*]], [[IDX2:%.*]] +; CHECK-NEXT: [[P4_IDX:%.*]] = shl i64 [[P1_IDX1]], 2 +; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds i8, ptr [[P2:%.*]], i64 [[P4_IDX]] +; CHECK-NEXT: [[P3_IDX2:%.*]] = add i64 [[IDX3:%.*]], [[IDX4:%.*]] +; CHECK-NEXT: [[P4_IDX1:%.*]] = shl i64 [[P3_IDX2]], 2 +; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[P4_IDX1]] ; CHECK-NEXT: call void @use(ptr [[P5]]) ; CHECK-NEXT: call void @use(ptr [[P4]]) -; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[P2_IDX]], [[P4_IDX]] -; CHECK-NEXT: [[TMP2:%.*]] = add nsw i64 [[P3_IDX]], [[P4_IDX1]] -; CHECK-NEXT: [[GEPDIFF:%.*]] = sub nsw i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[GEPDIFF:%.*]] = sub nsw i64 [[P4_IDX]], [[P4_IDX1]] ; CHECK-NEXT: ret i64 [[GEPDIFF]] ; %p1 = getelementptr inbounds i32, ptr %base, i64 %idx1 @@ -974,23 +970,18 @@ define i64 @multiple_geps_two_chains_multi_use(ptr %base, i64 %idx1, i64 %idx2, define i64 @multiple_geps_two_chains_partial_multi_use(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3, i64 %idx4, i64 %idx5, i64 %idx6) { ; CHECK-LABEL: @multiple_geps_two_chains_partial_multi_use( -; CHECK-NEXT: [[P2_IDX:%.*]] = shl nsw i64 [[IDX2:%.*]], 2 -; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i8, ptr [[P1:%.*]], i64 [[P2_IDX]] -; CHECK-NEXT: [[P4_IDX:%.*]] = shl nsw i64 [[IDX4:%.*]], 2 -; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[P4_IDX]] -; CHECK-NEXT: [[P3_IDX:%.*]] = shl nsw i64 [[IDX3:%.*]], 2 -; CHECK-NEXT: [[P4_IDX1:%.*]] = shl nsw i64 [[IDX7:%.*]], 2 -; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[P4_IDX1]] -; CHECK-NEXT: [[P5_IDX:%.*]] = shl nsw i64 [[IDX5:%.*]], 2 -; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds i8, ptr [[P5]], i64 [[P5_IDX]] -; CHECK-NEXT: [[P6_IDX:%.*]] = shl nsw i64 [[IDX6:%.*]], 2 +; CHECK-NEXT: [[P1_IDX1:%.*]] = add i64 [[IDX1:%.*]], [[IDX2:%.*]] +; CHECK-NEXT: [[P4_IDX:%.*]] = shl i64 [[P1_IDX1]], 2 +; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i8, ptr [[P2:%.*]], i64 [[P4_IDX]] +; CHECK-NEXT: [[P4_IDX2:%.*]] = add i64 [[IDX4:%.*]], [[IDX5:%.*]] +; CHECK-NEXT: [[P5_IDX:%.*]] = shl i64 [[P4_IDX2]], 2 +; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[P5_IDX]] ; CHECK-NEXT: call void @use(ptr [[P3]]) ; CHECK-NEXT: call void @use(ptr [[P4]]) -; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[P2_IDX]], [[P4_IDX]] -; CHECK-NEXT: [[TMP2:%.*]] = add nsw i64 [[TMP1]], [[P3_IDX]] -; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[P4_IDX1]], [[P5_IDX]] -; CHECK-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP3]], [[P6_IDX]] -; CHECK-NEXT: [[GEPDIFF:%.*]] = sub nsw i64 [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[P1_IDX1]], [[IDX3:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[P4_IDX2]], [[IDX6:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[GEPDIFF:%.*]] = shl i64 [[TMP5]], 2 ; CHECK-NEXT: ret i64 [[GEPDIFF]] ; %p1 = getelementptr inbounds i32, ptr %base, i64 %idx1 From 5530e1f7968f6328a99f0d27f16f33601c54177f Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 7 Jul 2025 12:35:16 +0200 Subject: [PATCH 2/2] Fix insert point handling --- .../InstCombine/InstructionCombining.cpp | 67 ++++++++++--------- llvm/test/Transforms/InstCombine/sub-gep.ll | 23 +++++++ 2 files changed, 59 insertions(+), 31 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index eba1d9225f9ba..044b9b215b9b8 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -232,38 +232,43 @@ Value *InstCombinerImpl::EmitGEPOffsets(ArrayRef GEPs, Value *OneUseBase = nullptr; GEPNoWrapFlags OneUseFlags = GEPNoWrapFlags::all(); for (GEPOperator *GEP : reverse(GEPs)) { - IRBuilderBase::InsertPointGuard Guard(Builder); - auto *Inst = dyn_cast(GEP); - if (RewriteGEPs && Inst) - Builder.SetInsertPoint(Inst); - - Value *Offset = llvm::emitGEPOffset(&Builder, DL, GEP); - if (Offset->getType() != IdxTy) - Offset = Builder.CreateVectorSplat( - cast(IdxTy)->getElementCount(), Offset); - if (GEP->hasOneUse()) { - // Offsets of one-use GEPs will be merged into the next multi-use GEP. - OneUseSum = Add(OneUseSum, Offset); - OneUseFlags = OneUseFlags.intersectForOffsetAdd(GEP->getNoWrapFlags()); - if (!OneUseBase) - OneUseBase = GEP->getPointerOperand(); - continue; - } + Value *Offset; + { + // Expand the offset at the point of the previous GEP to enable rewriting. + // However, use the original insertion point for calculating Sum. + IRBuilderBase::InsertPointGuard Guard(Builder); + auto *Inst = dyn_cast(GEP); + if (RewriteGEPs && Inst) + Builder.SetInsertPoint(Inst); + + Offset = llvm::emitGEPOffset(&Builder, DL, GEP); + if (Offset->getType() != IdxTy) + Offset = Builder.CreateVectorSplat( + cast(IdxTy)->getElementCount(), Offset); + if (GEP->hasOneUse()) { + // Offsets of one-use GEPs will be merged into the next multi-use GEP. + OneUseSum = Add(OneUseSum, Offset); + OneUseFlags = OneUseFlags.intersectForOffsetAdd(GEP->getNoWrapFlags()); + if (!OneUseBase) + OneUseBase = GEP->getPointerOperand(); + continue; + } - if (OneUseSum) - Offset = Add(OneUseSum, Offset); - - // Rewrite the GEP to reuse the computed offset. This also includes offsets - // from preceding one-use GEPs. - if (RewriteGEPs && Inst && - !(GEP->getSourceElementType()->isIntegerTy(8) && - GEP->getOperand(1) == Offset)) { - replaceInstUsesWith( - *Inst, - Builder.CreatePtrAdd( - OneUseBase ? OneUseBase : GEP->getPointerOperand(), Offset, "", - OneUseFlags.intersectForOffsetAdd(GEP->getNoWrapFlags()))); - eraseInstFromFunction(*Inst); + if (OneUseSum) + Offset = Add(OneUseSum, Offset); + + // Rewrite the GEP to reuse the computed offset. This also includes + // offsets from preceding one-use GEPs. + if (RewriteGEPs && Inst && + !(GEP->getSourceElementType()->isIntegerTy(8) && + GEP->getOperand(1) == Offset)) { + replaceInstUsesWith( + *Inst, + Builder.CreatePtrAdd( + OneUseBase ? OneUseBase : GEP->getPointerOperand(), Offset, "", + OneUseFlags.intersectForOffsetAdd(GEP->getNoWrapFlags()))); + eraseInstFromFunction(*Inst); + } } Sum = Add(Sum, Offset); diff --git a/llvm/test/Transforms/InstCombine/sub-gep.ll b/llvm/test/Transforms/InstCombine/sub-gep.ll index fd70df772564c..84e570395e03b 100644 --- a/llvm/test/Transforms/InstCombine/sub-gep.ll +++ b/llvm/test/Transforms/InstCombine/sub-gep.ll @@ -998,6 +998,29 @@ define i64 @multiple_geps_two_chains_partial_multi_use(ptr %base, i64 %idx1, i64 ret i64 %d } +define i64 @multiple_geps_two_chains_partial_multi_use_insert_point(ptr %p, i64 %idx1, i64 %idx2, i64 %idx3) { +; CHECK-LABEL: @multiple_geps_two_chains_partial_multi_use_insert_point( +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 8 +; CHECK-NEXT: call void @use(ptr [[GEP2]]) +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[IDX2:%.*]], [[IDX3:%.*]] +; CHECK-NEXT: [[GEP4:%.*]] = getelementptr i8, ptr [[GEP2]], i64 [[TMP1]] +; CHECK-NEXT: call void @use(ptr [[GEP4]]) +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], 8 +; CHECK-NEXT: [[GEPDIFF:%.*]] = sub i64 [[IDX1:%.*]], [[TMP2]] +; CHECK-NEXT: ret i64 [[GEPDIFF]] +; + %gep1 = getelementptr i8, ptr %p, i64 %idx1 + %gep2 = getelementptr i8, ptr %p, i64 8 + call void @use(ptr %gep2) + %gep3 = getelementptr i8, ptr %gep2, i64 %idx2 + %gep4 = getelementptr i8, ptr %gep3, i64 %idx3 + call void @use(ptr %gep4) + %gep1.int = ptrtoint ptr %gep1 to i64 + %gep4.int = ptrtoint ptr %gep4 to i64 + %sub = sub i64 %gep1.int, %gep4.int + ret i64 %sub +} + define i64 @multiple_geps_inbounds(ptr %base, i64 %idx, i64 %idx2) { ; CHECK-LABEL: @multiple_geps_inbounds( ; CHECK-NEXT: [[D:%.*]] = add nsw i64 [[IDX:%.*]], [[IDX2:%.*]]