diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index 320b79203c0b3..b478c72a14a71 100644 --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -1048,19 +1048,31 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { if (GEP->getType()->isVectorTy()) return false; + // If the base of this GEP is a ptradd of a constant, lets pass the constant + // along. This ensures that when we have a chain of GEPs the constant + // offset from each is accumulated. + Value *NewBase; + const APInt *BaseOffset; + const bool ExtractBase = + match(GEP->getPointerOperand(), + m_PtrAdd(m_Value(NewBase), m_APInt(BaseOffset))); + + const int64_t BaseByteOffset = ExtractBase ? BaseOffset->getSExtValue() : 0; + // The backend can already nicely handle the case where all indices are // constant. - if (GEP->hasAllConstantIndices()) + if (GEP->hasAllConstantIndices() && !ExtractBase) return false; bool Changed = canonicalizeArrayIndicesToIndexSize(GEP); bool NeedsExtraction; - int64_t AccumulativeByteOffset = accumulateByteOffset(GEP, NeedsExtraction); + int64_t AccumulativeByteOffset = + BaseByteOffset + accumulateByteOffset(GEP, NeedsExtraction); TargetTransformInfo &TTI = GetTTI(*GEP->getFunction()); - if (!NeedsExtraction) { + if (!NeedsExtraction && !ExtractBase) { Changed |= reorderGEP(GEP, TTI); return Changed; } @@ -1084,7 +1096,9 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { // Track information for preserving GEP flags. bool AllOffsetsNonNegative = AccumulativeByteOffset >= 0; - bool AllNUWPreserved = true; + bool AllNUWPreserved = GEP->hasNoUnsignedWrap(); + bool NewGEPInBounds = GEP->isInBounds(); + bool NewGEPNUSW = GEP->hasNoUnsignedSignedWrap(); // Remove the constant offset in each sequential index. The resultant GEP // computes the variadic base. @@ -1120,6 +1134,16 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { } } } + if (ExtractBase) { + GEPOperator *Base = cast(GEP->getPointerOperand()); + AllNUWPreserved &= Base->hasNoUnsignedWrap(); + NewGEPInBounds &= Base->isInBounds(); + NewGEPNUSW &= Base->hasNoUnsignedSignedWrap(); + AllOffsetsNonNegative &= BaseByteOffset >= 0; + + GEP->setOperand(0, NewBase); + RecursivelyDeleteTriviallyDeadInstructions(Base); + } // Clear the inbounds attribute because the new index may be off-bound. // e.g., @@ -1147,7 +1171,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { // If the initial GEP was NUW and all operations that we reassociate were NUW // additions, the resulting GEPs are also NUW. - if (GEP->hasNoUnsignedWrap() && AllNUWPreserved) { + if (AllNUWPreserved) { NewGEPFlags |= GEPNoWrapFlags::noUnsignedWrap(); // If the initial GEP additionally had NUSW (or inbounds, which implies // NUSW), we know that the indices in the initial GEP must all have their @@ -1155,13 +1179,13 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { // add-operands therefore also don't have their signbit set. Therefore, all // indices of the resulting GEPs are non-negative -> we can preserve // the inbounds/nusw flag. - CanPreserveInBoundsNUSW |= GEP->hasNoUnsignedSignedWrap(); + CanPreserveInBoundsNUSW |= NewGEPNUSW; } if (CanPreserveInBoundsNUSW) { - if (GEP->isInBounds()) + if (NewGEPInBounds) NewGEPFlags |= GEPNoWrapFlags::inBounds(); - else if (GEP->hasNoUnsignedSignedWrap()) + else if (NewGEPNUSW) NewGEPFlags |= GEPNoWrapFlags::noUnsignedSignedWrap(); } @@ -1242,11 +1266,13 @@ bool SeparateConstOffsetFromGEP::run(Function &F) { DL = &F.getDataLayout(); bool Changed = false; - for (BasicBlock &B : F) { - if (!DT->isReachableFromEntry(&B)) + + ReversePostOrderTraversal RPOT(&F); + for (BasicBlock *B : RPOT) { + if (!DT->isReachableFromEntry(B)) continue; - for (Instruction &I : llvm::make_early_inc_range(B)) + for (Instruction &I : llvm::make_early_inc_range(*B)) if (GetElementPtrInst *GEP = dyn_cast(&I)) Changed |= splitGEP(GEP); // No need to split GEP ConstantExprs because all its indices are constant diff --git a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll index 553d7e09390fd..680942fcb4d4b 100644 --- a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll +++ b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll @@ -279,11 +279,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CHECK-NEXT: ds_write_b32 v0, v58 ; CHECK-NEXT: s_branch .LBB0_7 -; CHECK-NEXT: .LBB0_16: ; %Flow45 +; CHECK-NEXT: .LBB0_16: ; %Flow43 ; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s69 ; CHECK-NEXT: v_mov_b32_e32 v57, v0 -; CHECK-NEXT: .LBB0_17: ; %Flow46 +; CHECK-NEXT: .LBB0_17: ; %Flow44 ; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s68 ; CHECK-NEXT: s_mov_b32 s55, exec_lo @@ -330,11 +330,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CHECK-NEXT: ds_write_b32 v0, v57 ; CHECK-NEXT: s_branch .LBB0_19 -; CHECK-NEXT: .LBB0_22: ; %Flow43 +; CHECK-NEXT: .LBB0_22: ; %Flow41 ; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: s_inst_prefetch 0x2 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s68 -; CHECK-NEXT: .LBB0_23: ; %Flow44 +; CHECK-NEXT: .LBB0_23: ; %Flow42 ; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s55 ; CHECK-NEXT: ; %bb.24: ; in Loop: Header=BB0_5 Depth=1 @@ -347,7 +347,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_or_b32 s53, s4, s53 ; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s53 ; CHECK-NEXT: s_cbranch_execnz .LBB0_5 -; CHECK-NEXT: .LBB0_25: ; %Flow51 +; CHECK-NEXT: .LBB0_25: ; %Flow49 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s52 ; CHECK-NEXT: v_mov_b32_e32 v31, v40 ; CHECK-NEXT: v_mov_b32_e32 v0, 1 diff --git a/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll b/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll index 9f62477ae01df..af0942e99182d 100644 --- a/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll +++ b/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll @@ -56,155 +56,153 @@ define void @foo(ptr %.m, ptr %.n, ptr %.a, ptr %.x, ptr %.l, ptr %.vy01, ptr %. ; CHECK-NEXT: .cfi_offset v29, -240 ; CHECK-NEXT: .cfi_offset v30, -224 ; CHECK-NEXT: .cfi_offset v31, -208 +; CHECK-NEXT: std 14, 400(1) # 8-byte Folded Spill +; CHECK-NEXT: std 15, 408(1) # 8-byte Folded Spill +; CHECK-NEXT: ld 2, 728(1) +; CHECK-NEXT: ld 14, 688(1) +; CHECK-NEXT: ld 11, 704(1) +; CHECK-NEXT: std 20, 448(1) # 8-byte Folded Spill +; CHECK-NEXT: std 21, 456(1) # 8-byte Folded Spill +; CHECK-NEXT: mr 21, 5 +; CHECK-NEXT: lwa 5, 0(7) +; CHECK-NEXT: ld 7, 720(1) ; CHECK-NEXT: std 22, 464(1) # 8-byte Folded Spill ; CHECK-NEXT: std 23, 472(1) # 8-byte Folded Spill -; CHECK-NEXT: mr 22, 5 -; CHECK-NEXT: ld 5, 848(1) +; CHECK-NEXT: mr 22, 6 +; CHECK-NEXT: ld 6, 848(1) ; CHECK-NEXT: addi 3, 3, 1 -; CHECK-NEXT: mr 11, 7 -; CHECK-NEXT: ld 23, 688(1) -; CHECK-NEXT: ld 7, 728(1) +; CHECK-NEXT: ld 15, 736(1) ; CHECK-NEXT: std 18, 432(1) # 8-byte Folded Spill ; CHECK-NEXT: std 19, 440(1) # 8-byte Folded Spill -; CHECK-NEXT: mr 18, 6 -; CHECK-NEXT: li 6, 9 ; CHECK-NEXT: ld 19, 768(1) -; CHECK-NEXT: ld 2, 760(1) -; CHECK-NEXT: std 26, 496(1) # 8-byte Folded Spill -; CHECK-NEXT: std 27, 504(1) # 8-byte Folded Spill -; CHECK-NEXT: cmpldi 3, 9 -; CHECK-NEXT: ld 27, 816(1) -; CHECK-NEXT: ld 26, 808(1) -; CHECK-NEXT: std 14, 400(1) # 8-byte Folded Spill -; CHECK-NEXT: std 15, 408(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 15, 736(1) -; CHECK-NEXT: lxv 39, 0(8) +; CHECK-NEXT: ld 18, 760(1) ; CHECK-NEXT: std 30, 528(1) # 8-byte Folded Spill ; CHECK-NEXT: std 31, 536(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 30, 704(1) -; CHECK-NEXT: lxv 38, 0(9) -; CHECK-NEXT: std 20, 448(1) # 8-byte Folded Spill -; CHECK-NEXT: std 21, 456(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 21, 784(1) +; CHECK-NEXT: ld 12, 696(1) +; CHECK-NEXT: lxv 0, 0(9) +; CHECK-NEXT: std 9, 64(1) # 8-byte Folded Spill +; CHECK-NEXT: std 10, 72(1) # 8-byte Folded Spill +; CHECK-NEXT: lxv 1, 0(8) +; CHECK-NEXT: cmpldi 3, 9 +; CHECK-NEXT: ld 30, 824(1) +; CHECK-NEXT: std 28, 512(1) # 8-byte Folded Spill +; CHECK-NEXT: std 29, 520(1) # 8-byte Folded Spill +; CHECK-NEXT: ld 29, 840(1) +; CHECK-NEXT: ld 28, 832(1) +; CHECK-NEXT: std 16, 416(1) # 8-byte Folded Spill +; CHECK-NEXT: std 17, 424(1) # 8-byte Folded Spill +; CHECK-NEXT: ld 23, 784(1) ; CHECK-NEXT: ld 20, 776(1) ; CHECK-NEXT: std 24, 480(1) # 8-byte Folded Spill ; CHECK-NEXT: std 25, 488(1) # 8-byte Folded Spill -; CHECK-NEXT: iselgt 3, 3, 6 -; CHECK-NEXT: ld 6, 720(1) +; CHECK-NEXT: ld 25, 800(1) ; CHECK-NEXT: ld 24, 792(1) -; CHECK-NEXT: std 10, 72(1) # 8-byte Folded Spill -; CHECK-NEXT: std 7, 80(1) # 8-byte Folded Spill +; CHECK-NEXT: std 26, 496(1) # 8-byte Folded Spill +; CHECK-NEXT: std 27, 504(1) # 8-byte Folded Spill +; CHECK-NEXT: ld 27, 816(1) +; CHECK-NEXT: ld 26, 808(1) +; CHECK-NEXT: stfd 26, 544(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 27, 552(1) # 8-byte Folded Spill +; CHECK-NEXT: ld 17, 752(1) +; CHECK-NEXT: extswsli 9, 5, 3 +; CHECK-NEXT: lxv 4, 0(14) +; CHECK-NEXT: std 14, 32(1) # 8-byte Folded Spill +; CHECK-NEXT: std 12, 40(1) # 8-byte Folded Spill +; CHECK-NEXT: mulli 0, 5, 40 +; CHECK-NEXT: sldi 14, 5, 5 +; CHECK-NEXT: mulli 31, 5, 24 +; CHECK-NEXT: lxv 38, 0(2) +; CHECK-NEXT: lxv 2, 0(11) +; CHECK-NEXT: std 2, 80(1) # 8-byte Folded Spill +; CHECK-NEXT: std 15, 88(1) # 8-byte Folded Spill +; CHECK-NEXT: mulli 2, 5, 48 +; CHECK-NEXT: sldi 5, 5, 4 +; CHECK-NEXT: ld 16, 744(1) +; CHECK-NEXT: lxv 5, 0(10) +; CHECK-NEXT: std 6, 200(1) # 8-byte Folded Spill +; CHECK-NEXT: std 29, 192(1) # 8-byte Folded Spill +; CHECK-NEXT: ld 6, 712(1) +; CHECK-NEXT: mr 10, 7 +; CHECK-NEXT: add 7, 14, 21 +; CHECK-NEXT: lxv 13, 0(19) +; CHECK-NEXT: std 8, 48(1) # 8-byte Folded Spill +; CHECK-NEXT: std 6, 56(1) # 8-byte Folded Spill +; CHECK-NEXT: mr 8, 11 +; CHECK-NEXT: li 11, 9 +; CHECK-NEXT: iselgt 3, 3, 11 ; CHECK-NEXT: addi 3, 3, -2 -; CHECK-NEXT: lxv 6, 0(19) -; CHECK-NEXT: lxv 11, 0(7) -; CHECK-NEXT: std 5, 200(1) # 8-byte Folded Spill -; CHECK-NEXT: std 23, 40(1) # 8-byte Folded Spill -; CHECK-NEXT: std 6, 48(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 5, 840(1) -; CHECK-NEXT: lxv 12, 0(6) -; CHECK-NEXT: rldicl 12, 3, 61, 3 +; CHECK-NEXT: rldicl 11, 3, 61, 3 +; CHECK-NEXT: lxv 3, 0(12) +; CHECK-NEXT: lxv 40, 0(6) +; CHECK-NEXT: std 18, 112(1) # 8-byte Folded Spill ; CHECK-NEXT: std 19, 120(1) # 8-byte Folded Spill +; CHECK-NEXT: add 19, 21, 5 +; CHECK-NEXT: ld 5, 200(1) # 8-byte Folded Reload +; CHECK-NEXT: lxv 39, 0(10) +; CHECK-NEXT: addi 3, 7, 32 +; CHECK-NEXT: add 12, 31, 21 ; CHECK-NEXT: std 20, 128(1) # 8-byte Folded Spill -; CHECK-NEXT: std 21, 136(1) # 8-byte Folded Spill -; CHECK-NEXT: std 24, 144(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 4, 0(21) -; CHECK-NEXT: ld 25, 800(1) -; CHECK-NEXT: lxv 33, 0(10) -; CHECK-NEXT: lxv 32, 0(23) -; CHECK-NEXT: lxv 36, 0(30) -; CHECK-NEXT: std 16, 416(1) # 8-byte Folded Spill -; CHECK-NEXT: std 17, 424(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 17, 752(1) -; CHECK-NEXT: ld 16, 744(1) -; CHECK-NEXT: std 28, 512(1) # 8-byte Folded Spill -; CHECK-NEXT: std 29, 520(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 29, 712(1) -; CHECK-NEXT: ld 28, 696(1) -; CHECK-NEXT: std 8, 56(1) # 8-byte Folded Spill -; CHECK-NEXT: std 9, 64(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 37, 0(28) -; CHECK-NEXT: lxv 13, 0(29) -; CHECK-NEXT: mr 8, 29 -; CHECK-NEXT: mr 9, 30 -; CHECK-NEXT: mr 10, 28 -; CHECK-NEXT: std 25, 152(1) # 8-byte Folded Spill +; CHECK-NEXT: std 23, 136(1) # 8-byte Folded Spill +; CHECK-NEXT: lxv 33, 0(15) +; CHECK-NEXT: lxv 32, 0(16) ; CHECK-NEXT: std 26, 160(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 10, 0(15) -; CHECK-NEXT: lxv 9, 0(16) -; CHECK-NEXT: li 28, 1 -; CHECK-NEXT: stfd 26, 544(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 27, 552(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 8, 0(17) -; CHECK-NEXT: lxv 7, 0(2) +; CHECK-NEXT: std 27, 168(1) # 8-byte Folded Spill +; CHECK-NEXT: lxv 37, 0(17) +; CHECK-NEXT: lxv 36, 0(18) +; CHECK-NEXT: std 30, 176(1) # 8-byte Folded Spill +; CHECK-NEXT: std 28, 184(1) # 8-byte Folded Spill +; CHECK-NEXT: lxv 12, 0(20) +; CHECK-NEXT: lxv 11, 0(23) +; CHECK-NEXT: add 20, 21, 9 ; CHECK-NEXT: stfd 28, 560(1) # 8-byte Folded Spill ; CHECK-NEXT: stfd 29, 568(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 5, 0(20) -; CHECK-NEXT: lxv 3, 0(24) +; CHECK-NEXT: lxv 10, 0(24) +; CHECK-NEXT: lxv 9, 0(25) ; CHECK-NEXT: stfd 30, 576(1) # 8-byte Folded Spill ; CHECK-NEXT: stfd 31, 584(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 2, 0(25) -; CHECK-NEXT: lxv 1, 0(26) +; CHECK-NEXT: lxv 8, 0(26) +; CHECK-NEXT: lxv 7, 0(27) +; CHECK-NEXT: addi 12, 12, 32 +; CHECK-NEXT: li 27, 0 +; CHECK-NEXT: mr 26, 21 ; CHECK-NEXT: stxv 52, 208(1) # 16-byte Folded Spill ; CHECK-NEXT: stxv 53, 224(1) # 16-byte Folded Spill -; CHECK-NEXT: lxv 0, 0(27) +; CHECK-NEXT: lxv 6, 0(30) +; CHECK-NEXT: lxv 41, 0(28) +; CHECK-NEXT: addi 7, 11, 1 +; CHECK-NEXT: add 11, 0, 21 +; CHECK-NEXT: li 28, 1 ; CHECK-NEXT: stxv 54, 240(1) # 16-byte Folded Spill ; CHECK-NEXT: stxv 55, 256(1) # 16-byte Folded Spill +; CHECK-NEXT: lxv 43, 0(29) +; CHECK-NEXT: lxv 42, 0(5) ; CHECK-NEXT: stxv 56, 272(1) # 16-byte Folded Spill ; CHECK-NEXT: stxv 57, 288(1) # 16-byte Folded Spill +; CHECK-NEXT: addi 11, 11, 32 ; CHECK-NEXT: stxv 58, 304(1) # 16-byte Folded Spill -; CHECK-NEXT: std 5, 192(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 5, 832(1) ; CHECK-NEXT: stxv 59, 320(1) # 16-byte Folded Spill ; CHECK-NEXT: stxv 60, 336(1) # 16-byte Folded Spill ; CHECK-NEXT: stxv 61, 352(1) # 16-byte Folded Spill ; CHECK-NEXT: stxv 62, 368(1) # 16-byte Folded Spill ; CHECK-NEXT: stxv 63, 384(1) # 16-byte Folded Spill -; CHECK-NEXT: std 15, 88(1) # 8-byte Folded Spill ; CHECK-NEXT: std 16, 96(1) # 8-byte Folded Spill ; CHECK-NEXT: std 17, 104(1) # 8-byte Folded Spill -; CHECK-NEXT: std 2, 112(1) # 8-byte Folded Spill -; CHECK-NEXT: std 5, 184(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 5, 824(1) -; CHECK-NEXT: std 5, 176(1) # 8-byte Folded Spill -; CHECK-NEXT: std 27, 168(1) # 8-byte Folded Spill -; CHECK-NEXT: lwa 5, 0(11) -; CHECK-NEXT: li 27, 0 -; CHECK-NEXT: ld 7, 176(1) # 8-byte Folded Reload -; CHECK-NEXT: mulli 6, 5, 40 -; CHECK-NEXT: sldi 0, 5, 4 -; CHECK-NEXT: extswsli 14, 5, 3 -; CHECK-NEXT: lxv 40, 0(7) -; CHECK-NEXT: ld 7, 184(1) # 8-byte Folded Reload -; CHECK-NEXT: add 31, 14, 22 -; CHECK-NEXT: add 11, 0, 22 -; CHECK-NEXT: mr 26, 22 -; CHECK-NEXT: addi 3, 11, 32 -; CHECK-NEXT: addi 11, 12, 1 -; CHECK-NEXT: mulli 12, 5, 48 -; CHECK-NEXT: addi 31, 31, 32 -; CHECK-NEXT: add 19, 22, 6 -; CHECK-NEXT: sldi 6, 5, 5 -; CHECK-NEXT: mulli 5, 5, 24 -; CHECK-NEXT: lxv 41, 0(7) -; CHECK-NEXT: add 20, 22, 6 -; CHECK-NEXT: add 21, 22, 5 -; CHECK-NEXT: ld 5, 192(1) # 8-byte Folded Reload -; CHECK-NEXT: lxv 43, 0(5) -; CHECK-NEXT: ld 5, 200(1) # 8-byte Folded Reload -; CHECK-NEXT: lxv 42, 0(5) +; CHECK-NEXT: std 24, 144(1) # 8-byte Folded Spill +; CHECK-NEXT: std 25, 152(1) # 8-byte Folded Spill ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_3: # %_loop_2_do_.lr.ph ; CHECK-NEXT: # =>This Loop Header: Depth=1 ; CHECK-NEXT: # Child Loop BB0_4 Depth 2 -; CHECK-NEXT: maddld 5, 12, 27, 0 -; CHECK-NEXT: mr 6, 18 -; CHECK-NEXT: mr 29, 21 +; CHECK-NEXT: maddld 5, 2, 27, 0 +; CHECK-NEXT: mr 6, 22 ; CHECK-NEXT: mr 30, 20 -; CHECK-NEXT: mr 2, 19 -; CHECK-NEXT: mtctr 11 -; CHECK-NEXT: add 25, 22, 5 -; CHECK-NEXT: maddld 5, 12, 27, 14 -; CHECK-NEXT: add 24, 22, 5 +; CHECK-NEXT: mr 29, 19 +; CHECK-NEXT: mtctr 7 +; CHECK-NEXT: add 25, 21, 5 +; CHECK-NEXT: maddld 5, 2, 27, 14 +; CHECK-NEXT: add 24, 21, 5 +; CHECK-NEXT: maddld 5, 2, 27, 31 +; CHECK-NEXT: add 23, 21, 5 ; CHECK-NEXT: mr 5, 26 ; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB0_4: # %_loop_2_do_ @@ -212,66 +210,66 @@ define void @foo(ptr %.m, ptr %.n, ptr %.a, ptr %.x, ptr %.l, ptr %.vy01, ptr %. ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: lxvp 34, 0(6) ; CHECK-NEXT: lxvp 44, 0(5) -; CHECK-NEXT: xvmaddadp 39, 45, 35 -; CHECK-NEXT: lxvp 46, 0(24) -; CHECK-NEXT: xvmaddadp 38, 47, 35 -; CHECK-NEXT: lxvp 48, 0(25) -; CHECK-NEXT: lxvp 50, 0(29) -; CHECK-NEXT: lxvp 62, 0(30) -; CHECK-NEXT: lxvp 60, 0(2) +; CHECK-NEXT: xvmaddadp 1, 45, 35 +; CHECK-NEXT: lxvp 46, 0(30) +; CHECK-NEXT: xvmaddadp 0, 47, 35 +; CHECK-NEXT: lxvp 48, 0(29) +; CHECK-NEXT: lxvp 50, 0(23) +; CHECK-NEXT: lxvp 62, 0(24) +; CHECK-NEXT: lxvp 60, 0(25) ; CHECK-NEXT: lxvp 58, 32(6) ; CHECK-NEXT: lxvp 56, 32(5) -; CHECK-NEXT: lxvp 54, 32(24) -; CHECK-NEXT: lxvp 52, 32(25) -; CHECK-NEXT: lxvp 30, 32(29) -; CHECK-NEXT: lxvp 28, 32(30) -; CHECK-NEXT: lxvp 26, 32(2) -; CHECK-NEXT: xvmaddadp 33, 49, 35 -; CHECK-NEXT: xvmaddadp 32, 51, 35 -; CHECK-NEXT: xvmaddadp 37, 63, 35 -; CHECK-NEXT: xvmaddadp 36, 61, 35 -; CHECK-NEXT: xvmaddadp 13, 44, 34 -; CHECK-NEXT: xvmaddadp 12, 46, 34 -; CHECK-NEXT: xvmaddadp 11, 48, 34 -; CHECK-NEXT: xvmaddadp 10, 50, 34 -; CHECK-NEXT: xvmaddadp 9, 62, 34 -; CHECK-NEXT: xvmaddadp 8, 60, 34 -; CHECK-NEXT: xvmaddadp 7, 57, 59 -; CHECK-NEXT: xvmaddadp 6, 55, 59 -; CHECK-NEXT: xvmaddadp 5, 53, 59 -; CHECK-NEXT: xvmaddadp 4, 31, 59 -; CHECK-NEXT: xvmaddadp 3, 29, 59 -; CHECK-NEXT: xvmaddadp 2, 27, 59 -; CHECK-NEXT: xvmaddadp 1, 56, 58 -; CHECK-NEXT: xvmaddadp 0, 54, 58 -; CHECK-NEXT: xvmaddadp 40, 52, 58 +; CHECK-NEXT: lxvp 54, 32(30) +; CHECK-NEXT: lxvp 52, 32(29) +; CHECK-NEXT: lxvp 30, 32(23) +; CHECK-NEXT: lxvp 28, 32(24) +; CHECK-NEXT: lxvp 26, 32(25) +; CHECK-NEXT: xvmaddadp 5, 49, 35 +; CHECK-NEXT: xvmaddadp 4, 51, 35 +; CHECK-NEXT: xvmaddadp 3, 63, 35 +; CHECK-NEXT: xvmaddadp 2, 61, 35 +; CHECK-NEXT: xvmaddadp 40, 44, 34 +; CHECK-NEXT: xvmaddadp 39, 46, 34 +; CHECK-NEXT: xvmaddadp 38, 48, 34 +; CHECK-NEXT: xvmaddadp 33, 50, 34 +; CHECK-NEXT: xvmaddadp 32, 62, 34 +; CHECK-NEXT: xvmaddadp 37, 60, 34 +; CHECK-NEXT: xvmaddadp 36, 57, 59 +; CHECK-NEXT: xvmaddadp 13, 55, 59 +; CHECK-NEXT: xvmaddadp 12, 53, 59 +; CHECK-NEXT: xvmaddadp 11, 31, 59 +; CHECK-NEXT: xvmaddadp 10, 29, 59 +; CHECK-NEXT: xvmaddadp 9, 27, 59 +; CHECK-NEXT: xvmaddadp 8, 56, 58 +; CHECK-NEXT: xvmaddadp 7, 54, 58 +; CHECK-NEXT: xvmaddadp 6, 52, 58 ; CHECK-NEXT: xvmaddadp 41, 30, 58 ; CHECK-NEXT: xvmaddadp 43, 28, 58 ; CHECK-NEXT: xvmaddadp 42, 26, 58 ; CHECK-NEXT: addi 6, 6, 64 ; CHECK-NEXT: addi 5, 5, 64 +; CHECK-NEXT: addi 30, 30, 64 +; CHECK-NEXT: addi 29, 29, 64 +; CHECK-NEXT: addi 23, 23, 64 ; CHECK-NEXT: addi 24, 24, 64 ; CHECK-NEXT: addi 25, 25, 64 -; CHECK-NEXT: addi 29, 29, 64 -; CHECK-NEXT: addi 30, 30, 64 -; CHECK-NEXT: addi 2, 2, 64 ; CHECK-NEXT: bdnz .LBB0_4 ; CHECK-NEXT: # %bb.5: # %_loop_2_endl_ ; CHECK-NEXT: # ; CHECK-NEXT: addi 28, 28, 6 -; CHECK-NEXT: add 26, 26, 12 -; CHECK-NEXT: add 31, 31, 12 -; CHECK-NEXT: add 19, 19, 12 -; CHECK-NEXT: add 3, 3, 12 -; CHECK-NEXT: add 20, 20, 12 -; CHECK-NEXT: add 21, 21, 12 +; CHECK-NEXT: add 26, 26, 2 +; CHECK-NEXT: add 20, 20, 2 +; CHECK-NEXT: add 11, 11, 2 +; CHECK-NEXT: add 19, 19, 2 +; CHECK-NEXT: add 3, 3, 2 +; CHECK-NEXT: add 12, 12, 2 ; CHECK-NEXT: addi 27, 27, 1 ; CHECK-NEXT: cmpld 28, 4 ; CHECK-NEXT: ble 0, .LBB0_3 ; CHECK-NEXT: # %bb.6: # %_loop_1_loopHeader_._return_bb_crit_edge.loopexit -; CHECK-NEXT: ld 3, 56(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 3, 48(1) # 8-byte Folded Reload ; CHECK-NEXT: lxv 63, 384(1) # 16-byte Folded Reload -; CHECK-NEXT: stxv 39, 0(3) +; CHECK-NEXT: stxv 1, 0(3) ; CHECK-NEXT: ld 3, 64(1) # 8-byte Folded Reload ; CHECK-NEXT: lxv 62, 368(1) # 16-byte Folded Reload ; CHECK-NEXT: lxv 61, 352(1) # 16-byte Folded Reload @@ -284,7 +282,7 @@ define void @foo(ptr %.m, ptr %.n, ptr %.a, ptr %.x, ptr %.l, ptr %.vy01, ptr %. ; CHECK-NEXT: lxv 54, 240(1) # 16-byte Folded Reload ; CHECK-NEXT: lxv 53, 224(1) # 16-byte Folded Reload ; CHECK-NEXT: lxv 52, 208(1) # 16-byte Folded Reload -; CHECK-NEXT: stxv 38, 0(3) +; CHECK-NEXT: stxv 0, 0(3) ; CHECK-NEXT: ld 3, 72(1) # 8-byte Folded Reload ; CHECK-NEXT: lfd 31, 584(1) # 8-byte Folded Reload ; CHECK-NEXT: lfd 30, 576(1) # 8-byte Folded Reload @@ -297,8 +295,8 @@ define void @foo(ptr %.m, ptr %.n, ptr %.a, ptr %.x, ptr %.l, ptr %.vy01, ptr %. ; CHECK-NEXT: ld 29, 520(1) # 8-byte Folded Reload ; CHECK-NEXT: ld 28, 512(1) # 8-byte Folded Reload ; CHECK-NEXT: ld 27, 504(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 33, 0(3) -; CHECK-NEXT: ld 3, 40(1) # 8-byte Folded Reload +; CHECK-NEXT: stxv 5, 0(3) +; CHECK-NEXT: ld 3, 32(1) # 8-byte Folded Reload ; CHECK-NEXT: ld 26, 496(1) # 8-byte Folded Reload ; CHECK-NEXT: ld 25, 488(1) # 8-byte Folded Reload ; CHECK-NEXT: ld 24, 480(1) # 8-byte Folded Reload @@ -310,40 +308,41 @@ define void @foo(ptr %.m, ptr %.n, ptr %.a, ptr %.x, ptr %.l, ptr %.vy01, ptr %. ; CHECK-NEXT: ld 18, 432(1) # 8-byte Folded Reload ; CHECK-NEXT: ld 17, 424(1) # 8-byte Folded Reload ; CHECK-NEXT: ld 16, 416(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 32, 0(3) -; CHECK-NEXT: ld 3, 48(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 37, 0(10) -; CHECK-NEXT: stxv 36, 0(9) -; CHECK-NEXT: stxv 13, 0(8) +; CHECK-NEXT: stxv 4, 0(3) +; CHECK-NEXT: ld 3, 40(1) # 8-byte Folded Reload ; CHECK-NEXT: ld 15, 408(1) # 8-byte Folded Reload ; CHECK-NEXT: ld 14, 400(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 12, 0(3) +; CHECK-NEXT: stxv 3, 0(3) +; CHECK-NEXT: ld 3, 56(1) # 8-byte Folded Reload +; CHECK-NEXT: stxv 2, 0(8) +; CHECK-NEXT: stxv 40, 0(3) ; CHECK-NEXT: ld 3, 80(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 11, 0(3) +; CHECK-NEXT: stxv 39, 0(10) +; CHECK-NEXT: stxv 38, 0(3) ; CHECK-NEXT: ld 3, 88(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 10, 0(3) +; CHECK-NEXT: stxv 33, 0(3) ; CHECK-NEXT: ld 3, 96(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 9, 0(3) +; CHECK-NEXT: stxv 32, 0(3) ; CHECK-NEXT: ld 3, 104(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 8, 0(3) +; CHECK-NEXT: stxv 37, 0(3) ; CHECK-NEXT: ld 3, 112(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 7, 0(3) +; CHECK-NEXT: stxv 36, 0(3) ; CHECK-NEXT: ld 3, 120(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 6, 0(3) +; CHECK-NEXT: stxv 13, 0(3) ; CHECK-NEXT: ld 3, 128(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 5, 0(3) +; CHECK-NEXT: stxv 12, 0(3) ; CHECK-NEXT: ld 3, 136(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 4, 0(3) +; CHECK-NEXT: stxv 11, 0(3) ; CHECK-NEXT: ld 3, 144(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 3, 0(3) +; CHECK-NEXT: stxv 10, 0(3) ; CHECK-NEXT: ld 3, 152(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 2, 0(3) +; CHECK-NEXT: stxv 9, 0(3) ; CHECK-NEXT: ld 3, 160(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 1, 0(3) +; CHECK-NEXT: stxv 8, 0(3) ; CHECK-NEXT: ld 3, 168(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 0, 0(3) +; CHECK-NEXT: stxv 7, 0(3) ; CHECK-NEXT: ld 3, 176(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 40, 0(3) +; CHECK-NEXT: stxv 6, 0(3) ; CHECK-NEXT: ld 3, 184(1) # 8-byte Folded Reload ; CHECK-NEXT: stxv 41, 0(3) ; CHECK-NEXT: ld 3, 192(1) # 8-byte Folded Reload diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll index 16e47f057babc..5cb3ee6f72e3b 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll @@ -279,8 +279,8 @@ define void @addrspace3(ptr addrspace(3) %in.ptr, i64 %in.idx1) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 9223372036854775807 ; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 [[IN_IDX1_NNEG]] to i32 -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i128, ptr addrspace(3) [[IN_PTR]], i32 [[IDXPROM]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP0]], i64 1024 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i128, ptr addrspace(3) [[IN_PTR]], i32 [[IDXPROM]] +; CHECK-NEXT: [[IDX11:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP0]], i32 1024 ; CHECK-NEXT: ret void ; entry: @@ -296,8 +296,8 @@ define void @addrspace7(ptr addrspace(7) %in.ptr, i64 %in.idx1) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i64 [[IN_IDX1]], 9223372036854775807 ; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 [[IN_IDX1_NNEG]] to i32 -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i128, ptr addrspace(7) [[IN_PTR]], i32 [[IDXPROM]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(7) [[TMP0]], i64 1024 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i128, ptr addrspace(7) [[IN_PTR]], i32 [[IDXPROM]] +; CHECK-NEXT: [[IDX11:%.*]] = getelementptr inbounds i8, ptr addrspace(7) [[TMP0]], i32 1024 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll index dd12c98af696d..7d8a43f59e367 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll @@ -74,13 +74,13 @@ define void @reorder_i8half(ptr addrspace(3) %in.ptr, i64 %in.idx0, i64 %in.idx1 ; CHECK-NEXT: [[IDX0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM1]] ; CHECK-NEXT: [[IDXPROM2:%.*]] = trunc i64 [[IN_IDX1]] to i32 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM2]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP0]], i64 256 +; CHECK-NEXT: [[IDX13:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP0]], i32 256 ; CHECK-NEXT: [[IDXPROM3:%.*]] = trunc i64 [[IN_IDX1]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM3]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 512 +; CHECK-NEXT: [[IDX25:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i32 512 ; CHECK-NEXT: [[IDXPROM4:%.*]] = trunc i64 [[IN_IDX1]] to i32 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM4]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP4]], i64 768 +; CHECK-NEXT: [[IDX37:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP4]], i32 768 ; CHECK-NEXT: ret void ; entry: @@ -169,13 +169,13 @@ define void @bad_index(ptr addrspace(3) %in.ptr, i64 %in.idx0, i64 %in.idx1) { ; CHECK-NEXT: [[IDX0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM1]] ; CHECK-NEXT: [[IDXPROM2:%.*]] = trunc i64 [[IN_IDX1]] to i32 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM2]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP4]], i64 1 +; CHECK-NEXT: [[IDX13:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP4]], i32 1 ; CHECK-NEXT: [[IDXPROM3:%.*]] = trunc i64 [[IN_IDX1]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM3]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 2 +; CHECK-NEXT: [[IDX25:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i32 2 ; CHECK-NEXT: [[IDXPROM4:%.*]] = trunc i64 [[IN_IDX1]] to i32 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr half, ptr addrspace(3) [[BASE]], i32 [[IDXPROM4]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP0]], i64 3 +; CHECK-NEXT: [[IDX37:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP0]], i32 3 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/gep-chain.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/gep-chain.ll new file mode 100644 index 0000000000000..2587fd26fdef2 --- /dev/null +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/gep-chain.ll @@ -0,0 +1,383 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -mtriple=nvptx64-nvidia-cuda -S -passes=separate-const-offset-from-gep < %s | FileCheck %s + +%struct.uchar4 = type { i8, i8, i8, i8 } + +define ptr @basic(ptr %ptr, i64 %offset1, i64 %offset2) { +; CHECK-LABEL: define ptr @basic( +; CHECK-SAME: ptr [[PTR:%.*]], i64 [[OFFSET1:%.*]], i64 [[OFFSET2:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_UCHAR4:%.*]], ptr [[PTR]], i64 [[OFFSET1]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [[STRUCT_UCHAR4]], ptr [[TMP1]], i64 [[OFFSET2]] +; CHECK-NEXT: [[GEP24:%.*]] = getelementptr i8, ptr [[TMP2]], i64 72 +; CHECK-NEXT: ret ptr [[GEP24]] +; + %offset3 = add i64 %offset1, 8 + %gep1 = getelementptr %struct.uchar4, ptr %ptr, i64 %offset3 + %offset4 = add i64 %offset2, 10 + %gep2 = getelementptr %struct.uchar4, ptr %gep1, i64 %offset4 + ret ptr %gep2 +} + +define i32 @more_interesting(ptr %ptr, i32 %offset1, i32 %offset2) { +; CHECK-LABEL: define i32 @more_interesting( +; CHECK-SAME: ptr [[PTR:%.*]], i32 [[OFFSET1:%.*]], i32 [[OFFSET2:%.*]]) { +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[OFFSET1]] to i64 +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [[STRUCT_UCHAR4:%.*]], ptr [[PTR]], i64 [[IDXPROM]] +; CHECK-NEXT: [[IDXPROM1:%.*]] = sext i32 [[OFFSET2]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_UCHAR4]], ptr [[GEP1]], i64 [[IDXPROM1]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8 +; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[IDXPROM2:%.*]] = sext i32 [[OFFSET2]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_UCHAR4]], ptr [[TMP1]], i64 [[IDXPROM2]] +; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[TMP4]], align 4 +; CHECK-NEXT: [[R:%.*]] = add i32 [[V1]], [[V2]] +; CHECK-NEXT: ret i32 [[R]] +; + %gep1 = getelementptr inbounds %struct.uchar4, ptr %ptr, i32 %offset1 + %gep2 = getelementptr inbounds nuw i8, ptr %gep1, i32 8 + %gep3 = getelementptr inbounds %struct.uchar4, ptr %gep2, i32 %offset2 + %v1 = load i32, ptr %gep3, align 4 + %gep4 = getelementptr inbounds i8, ptr %gep3, i32 -8 + %gep5 = getelementptr inbounds %struct.uchar4, ptr %gep4, i32 %offset2 + %v2 = load i32, ptr %gep5, align 4 + %r = add i32 %v1, %v2 + ret i32 %r +} + +;; Check nuw/nusw/inbounds flag propagation + +; GEPs with nusw flag. All indices and offsets are non-negative. +define ptr @test_0(ptr %p, i32 %i) { +; CHECK-LABEL: define ptr @test_0( +; CHECK-SAME: ptr [[P:%.*]], i32 [[I:%.*]]) { +; CHECK-NEXT: [[I_PROM:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nusw i8, ptr [[P]], i64 [[I_PROM]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr nusw i8, ptr [[TMP1]], i64 2 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr nuw inbounds i8, ptr %p, i32 1 + %i.prom = zext i32 %i to i64 + %idx2 = add i64 %i.prom, 1 + %arrayidx2 = getelementptr nusw i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +; GEPs with inbounds flag. All indices and offsets are non-negative. +define ptr @test_1(ptr %p, i32 %i) { +; CHECK-LABEL: define ptr @test_1( +; CHECK-SAME: ptr [[P:%.*]], i32 [[I:%.*]]) { +; CHECK-NEXT: [[I_PROM:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[I_PROM]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 2 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr inbounds i8, ptr %p, i32 1 + %i.prom = zext i32 %i to i64 + %idx2 = add i64 %i.prom, 1 + %arrayidx2 = getelementptr nuw inbounds i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +; GEPs with nusw flag. All indices and offsets are non-negative. +define ptr @test_2(ptr %p, i32 %i) { +; CHECK-LABEL: define ptr @test_2( +; CHECK-SAME: ptr [[P:%.*]], i32 [[I:%.*]]) { +; CHECK-NEXT: [[I_PROM:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nusw i8, ptr [[P]], i64 [[I_PROM]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr nusw i8, ptr [[TMP1]], i64 2 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr nusw i8, ptr %p, i32 1 + %i.prom = zext i32 %i to i64 + %idx2 = add nuw i64 %i.prom, 1 + %arrayidx2 = getelementptr nuw inbounds i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +; GEPs with inbounds flag. All indices and offsets are non-negative. +define ptr @test_3(ptr %p, i32 %i) { +; CHECK-LABEL: define ptr @test_3( +; CHECK-SAME: ptr [[P:%.*]], i32 [[I:%.*]]) { +; CHECK-NEXT: [[I_PROM:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[I_PROM]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 2 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr nuw inbounds i8, ptr %p, i32 1 + %i.prom = zext i32 %i to i64 + %idx2 = add nuw i64 %i.prom, 1 + %arrayidx2 = getelementptr inbounds i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +; GEPs with nusw flag. All indices and offsets are non-negative. +define ptr @test_4(ptr %p, i32 %i) { +; CHECK-LABEL: define ptr @test_4( +; CHECK-SAME: ptr [[P:%.*]], i32 [[I:%.*]]) { +; CHECK-NEXT: [[I_PROM:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nusw i8, ptr [[P]], i64 [[I_PROM]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr nusw i8, ptr [[TMP1]], i64 2 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr nuw inbounds i8, ptr %p, i32 1 + %i.prom = zext i32 %i to i64 + %idx2 = add i64 %i.prom, 1 + %arrayidx2 = getelementptr nuw nusw i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +; GEPs with inbounds flag. All indices and offsets are non-negative. +define ptr @test_5(ptr %p, i32 %i) { +; CHECK-LABEL: define ptr @test_5( +; CHECK-SAME: ptr [[P:%.*]], i32 [[I:%.*]]) { +; CHECK-NEXT: [[I_PROM:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[I_PROM]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 2 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr nuw inbounds i8, ptr %p, i32 1 + %i.prom = zext i32 %i to i64 + %idx2 = add i64 %i.prom, 1 + %arrayidx2 = getelementptr nuw inbounds i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +; GEPs with nuw flag. Negative offsets. +define ptr @test_6(ptr %p, i64 %i) { +; CHECK-LABEL: define ptr @test_6( +; CHECK-SAME: ptr [[P:%.*]], i64 [[I:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nuw i8, ptr [[P]], i64 [[I]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr nuw i8, ptr [[TMP1]], i64 -11 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr nuw nusw i8, ptr %p, i32 -1 + %idx2 = add nuw i64 %i, -10 + %arrayidx2 = getelementptr nuw i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +; GEPs with nuw and nusw flags. Negative offsets. +define ptr @test_7(ptr %p, i64 %i) { +; CHECK-LABEL: define ptr @test_7( +; CHECK-SAME: ptr [[P:%.*]], i64 [[I:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nusw nuw i8, ptr [[P]], i64 [[I]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr nusw nuw i8, ptr [[TMP1]], i64 -11 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr nuw inbounds i8, ptr %p, i32 -1 + %idx2 = add nuw i64 %i, -10 + %arrayidx2 = getelementptr nuw nusw i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +; GEPs with nuw flag. Mixed positive/negative offsets. +define ptr @test_8(ptr %p, i64 %i) { +; CHECK-LABEL: define ptr @test_8( +; CHECK-SAME: ptr [[P:%.*]], i64 [[I:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nuw i8, ptr [[P]], i64 [[I]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr nuw i8, ptr [[TMP1]], i64 -9 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr nuw i8, ptr %p, i32 1 + %idx2 = add nuw i64 %i, -10 + %arrayidx2 = getelementptr nuw nusw i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +; GEPs with nuw and nusw flags. Mixed positive/negative offsets. +define ptr @test_9(ptr %p, i64 %i) { +; CHECK-LABEL: define ptr @test_9( +; CHECK-SAME: ptr [[P:%.*]], i64 [[I:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nusw nuw i8, ptr [[P]], i64 [[I]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr nusw nuw i8, ptr [[TMP1]], i64 -9 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr nuw inbounds i8, ptr %p, i32 1 + %idx2 = add nuw i64 %i, -10 + %arrayidx2 = getelementptr nuw nusw i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +; GEPs with nuw flag. Mixed negative/positive offsets. +define ptr @test_10(ptr %p, i64 %i) { +; CHECK-LABEL: define ptr @test_10( +; CHECK-SAME: ptr [[P:%.*]], i64 [[I:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nuw i8, ptr [[P]], i64 [[I]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr nuw i8, ptr [[TMP1]], i64 9 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr nuw i8, ptr %p, i32 -1 + %idx2 = add nuw i64 %i, 10 + %arrayidx2 = getelementptr nuw nusw i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +; GEPs with nuw and nusw flags. Mixed negative/positive offsets. +define ptr @test_11(ptr %p, i64 %i) { +; CHECK-LABEL: define ptr @test_11( +; CHECK-SAME: ptr [[P:%.*]], i64 [[I:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nusw nuw i8, ptr [[P]], i64 [[I]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr nusw nuw i8, ptr [[TMP1]], i64 9 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr nuw inbounds i8, ptr %p, i32 -1 + %idx2 = add nuw i64 %i, 10 + %arrayidx2 = getelementptr nuw nusw i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +; GEPs with nuw flag. All positive offsets. +define ptr @test_12(ptr %p, i64 %i) { +; CHECK-LABEL: define ptr @test_12( +; CHECK-SAME: ptr [[P:%.*]], i64 [[I:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nuw i8, ptr [[P]], i64 [[I]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr nuw i8, ptr [[TMP1]], i64 2 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr nuw nusw i8, ptr %p, i32 1 + %idx2 = add nuw i64 %i, 1 + %arrayidx2 = getelementptr nuw i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +; GEPs with nuw and nusw flags. All positive offsets. +define ptr @test_13(ptr %p, i64 %i) { +; CHECK-LABEL: define ptr @test_13( +; CHECK-SAME: ptr [[P:%.*]], i64 [[I:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nusw nuw i8, ptr [[P]], i64 [[I]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr nusw nuw i8, ptr [[TMP1]], i64 2 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr nuw inbounds i8, ptr %p, i32 1 + %idx2 = add nuw i64 %i, 1 + %arrayidx2 = getelementptr nuw nusw i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +; GEPs with nuw flag. Zext index with negative offsets. +define ptr @test_14(ptr %p, i32 %i) { +; CHECK-LABEL: define ptr @test_14( +; CHECK-SAME: ptr [[P:%.*]], i32 [[I:%.*]]) { +; CHECK-NEXT: [[I_PROM:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nuw i8, ptr [[P]], i64 [[I_PROM]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr nuw i8, ptr [[TMP1]], i64 -11 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr nuw nusw i8, ptr %p, i32 -1 + %i.prom = zext i32 %i to i64 + %idx2 = add nuw i64 %i.prom, -10 + %arrayidx2 = getelementptr nuw i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +; GEPs with nuw and nusw flags. Zext index with negative offsets. +define ptr @test_15(ptr %p, i32 %i) { +; CHECK-LABEL: define ptr @test_15( +; CHECK-SAME: ptr [[P:%.*]], i32 [[I:%.*]]) { +; CHECK-NEXT: [[I_PROM:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nusw nuw i8, ptr [[P]], i64 [[I_PROM]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr nusw nuw i8, ptr [[TMP1]], i64 -11 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr nuw nusw i8, ptr %p, i32 -1 + %i.prom = zext i32 %i to i64 + %idx2 = add nuw i64 %i.prom, -10 + %arrayidx2 = getelementptr nuw inbounds i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +; GEPs with nuw flag. Zext index with mixed positive/negative offsets. +define ptr @test_16(ptr %p, i32 %i) { +; CHECK-LABEL: define ptr @test_16( +; CHECK-SAME: ptr [[P:%.*]], i32 [[I:%.*]]) { +; CHECK-NEXT: [[I_PROM:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nuw i8, ptr [[P]], i64 [[I_PROM]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr nuw i8, ptr [[TMP1]], i64 -9 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr nuw i8, ptr %p, i32 1 + %i.prom = zext i32 %i to i64 + %idx2 = add nuw i64 %i.prom, -10 + %arrayidx2 = getelementptr nuw nusw i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +; GEPs with nuw and nusw flags. Zext index with mixed positive/negative offsets. +define ptr @test_17(ptr %p, i32 %i) { +; CHECK-LABEL: define ptr @test_17( +; CHECK-SAME: ptr [[P:%.*]], i32 [[I:%.*]]) { +; CHECK-NEXT: [[I_PROM:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nusw nuw i8, ptr [[P]], i64 [[I_PROM]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr nusw nuw i8, ptr [[TMP1]], i64 -9 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr nuw nusw i8, ptr %p, i32 1 + %i.prom = zext i32 %i to i64 + %idx2 = add nuw i64 %i.prom, -10 + %arrayidx2 = getelementptr nuw inbounds i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +; GEPs with nuw flag. Zext index with mixed negative/positive offsets. +define ptr @test_18(ptr %p, i32 %i) { +; CHECK-LABEL: define ptr @test_18( +; CHECK-SAME: ptr [[P:%.*]], i32 [[I:%.*]]) { +; CHECK-NEXT: [[I_PROM:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nuw i8, ptr [[P]], i64 [[I_PROM]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr nuw i8, ptr [[TMP1]], i64 9 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr nuw nusw i8, ptr %p, i32 -1 + %i.prom = zext i32 %i to i64 + %idx2 = add nuw i64 %i.prom, 10 + %arrayidx2 = getelementptr nuw i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +; GEPs with nuw and nusw flags. Zext index with mixed negative/positive offsets. +define ptr @test_19(ptr %p, i32 %i) { +; CHECK-LABEL: define ptr @test_19( +; CHECK-SAME: ptr [[P:%.*]], i32 [[I:%.*]]) { +; CHECK-NEXT: [[I_PROM:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nusw nuw i8, ptr [[P]], i64 [[I_PROM]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr nusw nuw i8, ptr [[TMP1]], i64 9 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr nuw nusw i8, ptr %p, i32 -1 + %i.prom = zext i32 %i to i64 + %idx2 = add nuw i64 %i.prom, 10 + %arrayidx2 = getelementptr nuw inbounds i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +; GEPs with nuw flag. Zext index with all positive offsets. +define ptr @test_20(ptr %p, i32 %i) { +; CHECK-LABEL: define ptr @test_20( +; CHECK-SAME: ptr [[P:%.*]], i32 [[I:%.*]]) { +; CHECK-NEXT: [[I_PROM:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nuw i8, ptr [[P]], i64 [[I_PROM]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr nuw i8, ptr [[TMP1]], i64 2 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr nuw i8, ptr %p, i32 1 + %i.prom = zext i32 %i to i64 + %idx2 = add nuw i64 %i.prom, 1 + %arrayidx2 = getelementptr nuw nusw i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +} + +; GEPs with nuw and nusw flags. Zext index with all positive offsets. +define ptr @test_21(ptr %p, i32 %i) { +; CHECK-LABEL: define ptr @test_21( +; CHECK-SAME: ptr [[P:%.*]], i32 [[I:%.*]]) { +; CHECK-NEXT: [[I_PROM:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nusw nuw i8, ptr [[P]], i64 [[I_PROM]] +; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr nusw nuw i8, ptr [[TMP1]], i64 2 +; CHECK-NEXT: ret ptr [[ARRAYIDX22]] +; + %ptradd = getelementptr nuw inbounds i8, ptr %p, i32 1 + %i.prom = zext i32 %i to i64 + %idx2 = add nuw i64 %i.prom, 1 + %arrayidx2 = getelementptr nuw nusw i8, ptr %ptradd, i64 %idx2 + ret ptr %arrayidx2 +}