Skip to content

Optimization fails to reduce a loop when AVX is enabled #134735

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
cuviper opened this issue Apr 7, 2025 · 3 comments
Closed

Optimization fails to reduce a loop when AVX is enabled #134735

cuviper opened this issue Apr 7, 2025 · 3 comments

Comments

@cuviper
Copy link
Member

cuviper commented Apr 7, 2025

Rust's issue-101082.rs test case has a simple sum over an array:

#[no_mangle]
pub fn test() -> usize {
    // CHECK-LABEL: @test(
    // CHECK: ret {{i64|i32}} 165
    let values = [23, 16, 54, 3, 60, 9];
    let mut acc = 0;
    for item in values {
        acc += item;
    }
    acc
}

I captured the IR (included at the end of this report), and confirmed that opt -S -O2 reduces the function to a single instruction returning the answer, as our CHECK lines expect:

define noundef i64 @test() unnamed_addr #0 personality ptr @rust_eh_personality {
bb1.i.i.preheader:
  ret i64 165
}

However, with -mcpu=x86-64-v3, it produces a vectorized calculation:

define noundef i64 @test() unnamed_addr #0 personality ptr @rust_eh_personality {
bb1.i.i.preheader:
  %iter = alloca [64 x i8], align 8
  call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %iter)
  %_3.sroa.5.0.iter.sroa_idx = getelementptr inbounds nuw i8, ptr %iter, i64 16
  store <4 x i64> <i64 23, i64 16, i64 54, i64 3>, ptr %_3.sroa.5.0.iter.sroa_idx, align 8
  %_3.sroa.5.sroa.7.0._3.sroa.5.0.iter.sroa_idx.sroa_idx = getelementptr inbounds nuw i8, ptr %iter, i64 48
  store i64 60, ptr %_3.sroa.5.sroa.7.0._3.sroa.5.0.iter.sroa_idx.sroa_idx, align 8
  %_3.sroa.5.sroa.8.0._3.sroa.5.0.iter.sroa_idx.sroa_idx = getelementptr inbounds nuw i8, ptr %iter, i64 56
  store i64 9, ptr %_3.sroa.5.sroa.8.0._3.sroa.5.0.iter.sroa_idx.sroa_idx, align 8
  %unmaskedload = load <4 x i64>, ptr %_3.sroa.5.0.iter.sroa_idx, align 8, !alias.scope !3
  %0 = getelementptr inbounds nuw i8, ptr %iter, i64 48
  %wide.masked.load.1 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr nonnull %0, i32 8, <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x i64> poison), !alias.scope !3
  %1 = add <4 x i64> %wide.masked.load.1, %unmaskedload
  %2 = shufflevector <4 x i64> %1, <4 x i64> %unmaskedload, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
  %3 = tail call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %2)
  call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %iter)
  ret i64 %3
}

It's also vectorized with the default CPU and just enabling -mattr=+avx:

define noundef i64 @test() unnamed_addr #0 personality ptr @rust_eh_personality {
bb1.i.i.preheader:
  %iter = alloca [64 x i8], align 8
  call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %iter)
  %_3.sroa.5.0.iter.sroa_idx = getelementptr inbounds nuw i8, ptr %iter, i64 16
  store <4 x i64> <i64 23, i64 16, i64 54, i64 3>, ptr %_3.sroa.5.0.iter.sroa_idx, align 8
  %_3.sroa.5.sroa.7.0._3.sroa.5.0.iter.sroa_idx.sroa_idx = getelementptr inbounds nuw i8, ptr %iter, i64 48
  store i64 60, ptr %_3.sroa.5.sroa.7.0._3.sroa.5.0.iter.sroa_idx.sroa_idx, align 8
  %_3.sroa.5.sroa.8.0._3.sroa.5.0.iter.sroa_idx.sroa_idx = getelementptr inbounds nuw i8, ptr %iter, i64 56
  store i64 9, ptr %_3.sroa.5.sroa.8.0._3.sroa.5.0.iter.sroa_idx.sroa_idx, align 8
  %unmaskedload = load <2 x i64>, ptr %_3.sroa.5.0.iter.sroa_idx, align 8, !alias.scope !3
  %0 = getelementptr inbounds nuw i8, ptr %iter, i64 32
  %unmaskedload16 = load <2 x i64>, ptr %0, align 8, !alias.scope !3
  %1 = add <2 x i64> %unmaskedload16, %unmaskedload
  %2 = getelementptr inbounds nuw i8, ptr %iter, i64 48
  %unmaskedload17 = load <2 x i64>, ptr %2, align 8, !alias.scope !3
  %3 = add <2 x i64> %unmaskedload17, %1
  %4 = tail call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %3)
  call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %iter)
  ret i64 %4
}

My context is rust-lang/rust#138380, where this testcase is failing to optimize as expected with Ubuntu's llvm-20, and I have confirmed the reproducer with LLVM main. Oddly, that test has had no problem with x86-64-v3 when using Rust's bundled LLVM 20 in CI, but I suspect other factors are perturbing the input in that case, because the CI-built opt still fails the same way on this exact reproducer IR.

LLVM IR reproducer
; ModuleID = '<stdin>'
source_filename = "issue_101082.775d9ed26ae14e6e-cgu.0"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@alloc_ed8641ebea8e5515740d4eb49a916ff5 = private unnamed_addr constant [218 x i8] c"unsafe precondition(s) violated: ptr::read requires that the pointer argument is aligned and non-null\0A\0AThis indicates a bug in the program. This Undefined Behavior check is optional, and cannot be relied on for safety.", align 1
@alloc_fad0cd83b7d1858a846a172eb260e593 = private unnamed_addr constant [42 x i8] c"is_aligned_to: align is not a power-of-two", align 1
@alloc_e92e94d0ff530782b571cfd99ec66aef = private unnamed_addr constant <{ ptr, [8 x i8] }> <{ ptr @alloc_fad0cd83b7d1858a846a172eb260e593, [8 x i8] c"*\00\00\00\00\00\00\00" }>, align 8
@0 = private unnamed_addr constant <{ [8 x i8], [8 x i8] }> <{ [8 x i8] zeroinitializer, [8 x i8] undef }>, align 8
@alloc_57f39e09f7c9ad4245f74159f6bbb2c2 = private unnamed_addr constant [52 x i8] c"/rustc/FAKE_PREFIX/library/core/src/ptr/const_ptr.rs", align 1
@alloc_55a78c643ac9851d3ad8dc871da622ba = private unnamed_addr constant <{ ptr, [16 x i8] }> <{ ptr @alloc_57f39e09f7c9ad4245f74159f6bbb2c2, [16 x i8] c"4\00\00\00\00\00\00\00\0D\00\00\00\C3\05\00\00" }>, align 8
@alloc_ea57c52ffecfc6f596231aa6e610f772 = private unnamed_addr constant [39 x i8] c"assertion failed: self.start < self.end", align 1
@alloc_6fbb54c7cb7260f04218cc51e19e04e4 = private unnamed_addr constant [54 x i8] c"/rustc/FAKE_PREFIX/library/core/src/ops/index_range.rs", align 1
@alloc_a35bb76379a3b55fc8cbe20b8dab1d46 = private unnamed_addr constant <{ ptr, [16 x i8] }> <{ ptr @alloc_6fbb54c7cb7260f04218cc51e19e04e4, [16 x i8] c"6\00\00\00\00\00\00\00\09\00\00\008\00\00\00" }>, align 8

; Function Attrs: inlinehint nounwind nonlazybind uwtable
define internal void @_ZN4core3ptr4read18precondition_check17he50e497260f5c5aaE(ptr noundef %addr, i64 noundef %align, i1 noundef zeroext %is_zst) unnamed_addr #0 personality ptr @rust_eh_personality {
start:
  %_6 = invoke noundef zeroext i1 @"_ZN4core3ptr9const_ptr33_$LT$impl$u20$$BP$const$u20$T$GT$13is_aligned_to17hee3b7393df819147E"(ptr noundef %addr, i64 noundef %align)
          to label %bb3 unwind label %terminate

terminate:                                        ; preds = %start
  %0 = landingpad { ptr, i32 }
          filter [0 x ptr] zeroinitializer
  %1 = extractvalue { ptr, i32 } %0, 0
  %2 = extractvalue { ptr, i32 } %0, 1
  call void @_ZN4core9panicking19panic_cannot_unwind17hf77d1d116ce25e3bE() #12
  unreachable

bb3:                                              ; preds = %start
  %3 = call i1 @llvm.expect.i1(i1 %_6, i1 true)
  br i1 %3, label %bb4, label %bb5

bb5:                                              ; preds = %bb3
  br label %bb2

bb4:                                              ; preds = %bb3
  br i1 %is_zst, label %bb6, label %bb7

bb2:                                              ; preds = %bb7, %bb5
  call void @_ZN4core9panicking14panic_nounwind17h7b81d1449a5f4f5aE(ptr noalias noundef nonnull readonly align 1 @alloc_ed8641ebea8e5515740d4eb49a916ff5, i64 noundef 218) #13
  unreachable

bb7:                                              ; preds = %bb4
  %_8 = ptrtoint ptr %addr to i64
  %_7 = icmp eq i64 %_8, 0
  %_4 = xor i1 %_7, true
  %4 = call i1 @llvm.expect.i1(i1 %_4, i1 true)
  br i1 %4, label %bb1, label %bb2

bb6:                                              ; preds = %bb4
  br label %bb1

bb1:                                              ; preds = %bb6, %bb7
  ret void
}

; Function Attrs: nonlazybind uwtable
define internal void @"_ZN4core3ptr71drop_in_place$LT$core..array..iter..IntoIter$LT$usize$C$6_usize$GT$$GT$17hd85b6451ff2beb57E"(ptr noalias noundef align 8 dereferenceable(64) %_1) unnamed_addr #1 {
start:
  call void @"_ZN82_$LT$core..array..iter..IntoIter$LT$T$C$_$GT$$u20$as$u20$core..ops..drop..Drop$GT$4drop17h3e57fc25945276b8E"(ptr noalias noundef align 8 dereferenceable(64) %_1)
  ret void
}

; Function Attrs: inlinehint nonlazybind uwtable
define internal noundef zeroext i1 @"_ZN4core3ptr9const_ptr33_$LT$impl$u20$$BP$const$u20$T$GT$13is_aligned_to17hee3b7393df819147E"(ptr noundef %self, i64 noundef %align) unnamed_addr #2 {
start:
  %0 = alloca [4 x i8], align 4
  %_4 = alloca [48 x i8], align 8
  call void @llvm.lifetime.start.p0(i64 4, ptr %0)
  %1 = call i64 @llvm.ctpop.i64(i64 %align)
  %2 = trunc i64 %1 to i32
  store i32 %2, ptr %0, align 4
  %_9 = load i32, ptr %0, align 4, !noundef !3
  call void @llvm.lifetime.end.p0(i64 4, ptr %0)
  %3 = icmp eq i32 %_9, 1
  %4 = call i1 @llvm.expect.i1(i1 %3, i1 true)
  br i1 %4, label %bb1, label %bb2

bb1:                                              ; preds = %start
  %_6 = ptrtoint ptr %self to i64
  %_8.0 = sub i64 %align, 1
  %_8.1 = icmp ult i64 %align, 1
  %_5 = and i64 %_6, %_8.0
  %_0 = icmp eq i64 %_5, 0
  ret i1 %_0

bb2:                                              ; preds = %start
  call void @llvm.lifetime.start.p0(i64 48, ptr %_4)
  %5 = getelementptr inbounds i8, ptr %_4, i64 32
  store ptr @alloc_e92e94d0ff530782b571cfd99ec66aef, ptr %5, align 8
  %6 = getelementptr inbounds i8, ptr %5, i64 8
  store i64 1, ptr %6, align 8
  %7 = load ptr, ptr @0, align 8, !align !4, !noundef !3
  %8 = load i64, ptr getelementptr inbounds (i8, ptr @0, i64 8), align 8
  %9 = getelementptr inbounds i8, ptr %_4, i64 16
  store ptr %7, ptr %9, align 8
  %10 = getelementptr inbounds i8, ptr %9, i64 8
  store i64 %8, ptr %10, align 8
  store ptr inttoptr (i64 8 to ptr), ptr %_4, align 8
  %11 = getelementptr inbounds i8, ptr %_4, i64 8
  store i64 0, ptr %11, align 8
  call void @_ZN4core9panicking9panic_fmt17hacfb58ec817221c8E(ptr noalias nocapture noundef readonly align 8 dereferenceable(48) %_4, ptr noalias noundef readonly align 8 dereferenceable(24) @alloc_55a78c643ac9851d3ad8dc871da622ba) #14
  unreachable
}

; Function Attrs: nonlazybind uwtable
define internal { ptr, i64 } @"_ZN4core5array4iter21IntoIter$LT$T$C$_$GT$12as_mut_slice17h1cd4fd74799a8d07E"(ptr noalias noundef align 8 dereferenceable(64) %self) unnamed_addr #1 {
start:
  %index = alloca [16 x i8], align 8
  %_3 = getelementptr inbounds i8, ptr %self, i64 16
  call void @llvm.lifetime.start.p0(i64 16, ptr %index)
  %0 = load i64, ptr %self, align 8, !noundef !3
  %1 = getelementptr inbounds i8, ptr %self, i64 8
  %2 = load i64, ptr %1, align 8, !noundef !3
  store i64 %0, ptr %index, align 8
  %3 = getelementptr inbounds i8, ptr %index, i64 8
  store i64 %2, ptr %3, align 8
  br label %bb3

bb3:                                              ; preds = %start
  %4 = getelementptr inbounds i8, ptr %index, i64 8
  %offset = load i64, ptr %4, align 8, !noundef !3
  %_16 = load i64, ptr %index, align 8, !noundef !3
  %len = sub nuw i64 %_16, %offset
  %_17 = getelementptr inbounds nuw i64, ptr %_3, i64 %offset
  call void @llvm.lifetime.end.p0(i64 16, ptr %index)
  %5 = insertvalue { ptr, i64 } poison, ptr %_17, 0
  %6 = insertvalue { ptr, i64 } %5, i64 %len, 1
  ret { ptr, i64 } %6

bb1:                                              ; No predecessors!
  unreachable

bb2:                                              ; No predecessors!
  unreachable
}

; Function Attrs: nonlazybind uwtable
define internal void @"_ZN82_$LT$core..array..iter..IntoIter$LT$T$C$_$GT$$u20$as$u20$core..ops..drop..Drop$GT$4drop17h3e57fc25945276b8E"(ptr noalias noundef align 8 dereferenceable(64) %self) unnamed_addr #1 {
start:
  %0 = call { ptr, i64 } @"_ZN4core5array4iter21IntoIter$LT$T$C$_$GT$12as_mut_slice17h1cd4fd74799a8d07E"(ptr noalias noundef align 8 dereferenceable(64) %self)
  %_3.0 = extractvalue { ptr, i64 } %0, 0
  %_3.1 = extractvalue { ptr, i64 } %0, 1
  ret void
}

; Function Attrs: inlinehint nonlazybind uwtable
define internal { i64, i64 } @"_ZN93_$LT$core..ops..index_range..IndexRange$u20$as$u20$core..iter..traits..iterator..Iterator$GT$4next17h4e21b39aae59a907E"(ptr noalias noundef align 8 dereferenceable(16) %0) unnamed_addr #2 {
start:
  %_0 = alloca [16 x i8], align 8
  %self = alloca [8 x i8], align 8
  store ptr %0, ptr %self, align 8
  %1 = load ptr, ptr %self, align 8, !nonnull !3, !align !4, !noundef !3
  %_6 = load i64, ptr %1, align 8, !noundef !3
  %2 = load ptr, ptr %self, align 8, !nonnull !3, !align !4, !noundef !3
  %3 = getelementptr inbounds i8, ptr %2, i64 8
  %_7 = load i64, ptr %3, align 8, !noundef !3
  %_3 = sub nuw i64 %_6, %_7
  %_2 = icmp ugt i64 %_3, 0
  br i1 %_2, label %bb1, label %bb2

bb2:                                              ; preds = %start
  %4 = load i64, ptr @0, align 8, !range !5, !noundef !3
  %5 = load i64, ptr getelementptr inbounds (i8, ptr @0, i64 8), align 8
  store i64 %4, ptr %_0, align 8
  %6 = getelementptr inbounds i8, ptr %_0, i64 8
  store i64 %5, ptr %6, align 8
  br label %bb3

bb1:                                              ; preds = %start
  %7 = load ptr, ptr %self, align 8, !nonnull !3, !align !4, !noundef !3
  %8 = getelementptr inbounds i8, ptr %7, i64 8
  %_9 = load i64, ptr %8, align 8, !noundef !3
  %9 = load ptr, ptr %self, align 8, !nonnull !3, !align !4, !noundef !3
  %_10 = load i64, ptr %9, align 8, !noundef !3
  %_8 = icmp ult i64 %_9, %_10
  %10 = call i1 @llvm.expect.i1(i1 %_8, i1 true)
  br i1 %10, label %bb4, label %bb5

bb3:                                              ; preds = %bb7, %bb2
  %11 = load i64, ptr %_0, align 8, !range !5, !noundef !3
  %12 = getelementptr inbounds i8, ptr %_0, i64 8
  %13 = load i64, ptr %12, align 8
  %14 = insertvalue { i64, i64 } poison, i64 %11, 0
  %15 = insertvalue { i64, i64 } %14, i64 %13, 1
  ret { i64, i64 } %15

bb5:                                              ; preds = %bb1
  call void @_ZN4core9panicking5panic17heec94013e0df98e3E(ptr noalias noundef nonnull readonly align 1 @alloc_ea57c52ffecfc6f596231aa6e610f772, i64 noundef 39, ptr noalias noundef readonly align 8 dereferenceable(24) @alloc_a35bb76379a3b55fc8cbe20b8dab1d46) #14
  unreachable

bb4:                                              ; preds = %bb1
  %16 = load ptr, ptr %self, align 8, !nonnull !3, !align !4, !noundef !3
  %17 = getelementptr inbounds i8, ptr %16, i64 8
  %value = load i64, ptr %17, align 8, !noundef !3
  br label %bb7

bb7:                                              ; preds = %bb4
  %_12 = add nuw i64 %value, 1
  %18 = load ptr, ptr %self, align 8, !nonnull !3, !align !4, !noundef !3
  %19 = getelementptr inbounds i8, ptr %18, i64 8
  store i64 %_12, ptr %19, align 8
  %20 = getelementptr inbounds i8, ptr %_0, i64 8
  store i64 %value, ptr %20, align 8
  store i64 1, ptr %_0, align 8
  br label %bb3

bb6:                                              ; No predecessors!
  unreachable
}

; Function Attrs: nonlazybind uwtable
define internal { i64, i64 } @"_ZN99_$LT$core..array..iter..IntoIter$LT$T$C$_$GT$$u20$as$u20$core..iter..traits..iterator..Iterator$GT$4next17h8bcf71c7ab2e62d8E"(ptr noalias noundef align 8 dereferenceable(64) %self) unnamed_addr #1 {
start:
  %self1 = alloca [16 x i8], align 8
  %_0 = alloca [16 x i8], align 8
  call void @llvm.lifetime.start.p0(i64 16, ptr %self1)
  %0 = call { i64, i64 } @"_ZN93_$LT$core..ops..index_range..IndexRange$u20$as$u20$core..iter..traits..iterator..Iterator$GT$4next17h4e21b39aae59a907E"(ptr noalias noundef align 8 dereferenceable(16) %self)
  %1 = extractvalue { i64, i64 } %0, 0
  %2 = extractvalue { i64, i64 } %0, 1
  store i64 %1, ptr %self1, align 8
  %3 = getelementptr inbounds i8, ptr %self1, i64 8
  store i64 %2, ptr %3, align 8
  %_5 = getelementptr inbounds i8, ptr %self, i64 16
  %_6 = load i64, ptr %self1, align 8, !range !5, !noundef !3
  %4 = getelementptr inbounds i8, ptr %self1, i64 8
  %5 = load i64, ptr %4, align 8
  %6 = trunc nuw i64 %_6 to i1
  br i1 %6, label %bb5, label %bb4

bb5:                                              ; preds = %start
  %7 = getelementptr inbounds i8, ptr %self1, i64 8
  %x = load i64, ptr %7, align 8, !noundef !3
  %_8 = call noundef i64 @"_ZN99_$LT$core..array..iter..IntoIter$LT$T$C$_$GT$$u20$as$u20$core..iter..traits..iterator..Iterator$GT$4next28_$u7b$$u7b$closure$u7d$$u7d$17ha5bbe4f336906f6aE"(ptr noalias noundef readonly align 8 dereferenceable(48) %_5, i64 noundef %x)
  %8 = getelementptr inbounds i8, ptr %_0, i64 8
  store i64 %_8, ptr %8, align 8
  store i64 1, ptr %_0, align 8
  br label %bb2

bb4:                                              ; preds = %start
  store i64 0, ptr %_0, align 8
  br label %bb2

bb2:                                              ; preds = %bb4, %bb5
  call void @llvm.lifetime.end.p0(i64 16, ptr %self1)
  %9 = load i64, ptr %_0, align 8, !range !5, !noundef !3
  %10 = getelementptr inbounds i8, ptr %_0, i64 8
  %11 = load i64, ptr %10, align 8
  %12 = insertvalue { i64, i64 } poison, i64 %9, 0
  %13 = insertvalue { i64, i64 } %12, i64 %11, 1
  ret { i64, i64 } %13

bb3:                                              ; No predecessors!
  unreachable
}

; Function Attrs: inlinehint nonlazybind uwtable
define internal noundef i64 @"_ZN99_$LT$core..array..iter..IntoIter$LT$T$C$_$GT$$u20$as$u20$core..iter..traits..iterator..Iterator$GT$4next28_$u7b$$u7b$closure$u7d$$u7d$17ha5bbe4f336906f6aE"(ptr noalias noundef readonly align 8 dereferenceable(48) %_1, i64 noundef %idx) unnamed_addr #2 {
start:
  %self1 = alloca [8 x i8], align 8
  %self = alloca [8 x i8], align 8
  br label %bb2

bb2:                                              ; preds = %start
  %_9 = icmp ult i64 %idx, 6
  call void @llvm.assume(i1 %_9)
  %0 = getelementptr inbounds nuw i64, ptr %_1, i64 %idx
  store ptr %0, ptr %self, align 8
  call void @llvm.lifetime.start.p0(i64 8, ptr %self1)
  %1 = load ptr, ptr %self, align 8, !noundef !3
  store ptr %1, ptr %self1, align 8
  br label %bb6

bb6:                                              ; preds = %bb4, %bb2
  %2 = load ptr, ptr %self1, align 8, !noundef !3
  %_0 = load i64, ptr %2, align 8, !noundef !3
  call void @llvm.lifetime.end.p0(i64 8, ptr %self1)
  ret i64 %_0

bb4:                                              ; No predecessors!
  %_14 = load ptr, ptr %self, align 8, !noundef !3
  call void @_ZN4core3ptr4read18precondition_check17he50e497260f5c5aaE(ptr noundef %_14, i64 noundef 8, i1 noundef zeroext false) #15
  br label %bb6

bb1:                                              ; No predecessors!
  unreachable

bb7:                                              ; No predecessors!
  unreachable

bb8:                                              ; No predecessors!
  unreachable
}

; Function Attrs: nonlazybind uwtable
define noundef i64 @test() unnamed_addr #1 personality ptr @rust_eh_personality {
start:
  %0 = alloca [16 x i8], align 8
  %_9 = alloca [48 x i8], align 8
  %_5 = alloca [16 x i8], align 8
  %iter = alloca [64 x i8], align 8
  %_3 = alloca [64 x i8], align 8
  %acc = alloca [8 x i8], align 8
  %values = alloca [48 x i8], align 8
  %1 = getelementptr inbounds nuw i64, ptr %values, i64 0
  store i64 23, ptr %1, align 8
  %2 = getelementptr inbounds nuw i64, ptr %values, i64 1
  store i64 16, ptr %2, align 8
  %3 = getelementptr inbounds nuw i64, ptr %values, i64 2
  store i64 54, ptr %3, align 8
  %4 = getelementptr inbounds nuw i64, ptr %values, i64 3
  store i64 3, ptr %4, align 8
  %5 = getelementptr inbounds nuw i64, ptr %values, i64 4
  store i64 60, ptr %5, align 8
  %6 = getelementptr inbounds nuw i64, ptr %values, i64 5
  store i64 9, ptr %6, align 8
  call void @llvm.lifetime.start.p0(i64 8, ptr %acc)
  store i64 0, ptr %acc, align 8
  call void @llvm.lifetime.start.p0(i64 64, ptr %_3)
  call void @llvm.lifetime.start.p0(i64 48, ptr %_9)
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %_9, ptr align 8 %values, i64 48, i1 false)
  %7 = getelementptr inbounds i8, ptr %_3, i64 16
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %7, ptr align 8 %_9, i64 48, i1 false)
  store i64 6, ptr %_3, align 8
  %8 = getelementptr inbounds i8, ptr %_3, i64 8
  store i64 0, ptr %8, align 8
  call void @llvm.lifetime.end.p0(i64 48, ptr %_9)
  call void @llvm.lifetime.start.p0(i64 64, ptr %iter)
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %iter, ptr align 8 %_3, i64 64, i1 false)
  br label %bb1

bb1:                                              ; preds = %bb4, %start
  call void @llvm.lifetime.start.p0(i64 16, ptr %_5)
  %9 = invoke { i64, i64 } @"_ZN99_$LT$core..array..iter..IntoIter$LT$T$C$_$GT$$u20$as$u20$core..iter..traits..iterator..Iterator$GT$4next17h8bcf71c7ab2e62d8E"(ptr noalias noundef align 8 dereferenceable(64) %iter)
          to label %bb2 unwind label %cleanup

bb7:                                              ; preds = %cleanup
  invoke void @"_ZN4core3ptr71drop_in_place$LT$core..array..iter..IntoIter$LT$usize$C$6_usize$GT$$GT$17hd85b6451ff2beb57E"(ptr noalias noundef align 8 dereferenceable(64) %iter) #16
          to label %bb8 unwind label %terminate

cleanup:                                          ; preds = %bb1
  %10 = landingpad { ptr, i32 }
          cleanup
  %11 = extractvalue { ptr, i32 } %10, 0
  %12 = extractvalue { ptr, i32 } %10, 1
  call void @llvm.lifetime.start.p0(i64 16, ptr %0)
  store ptr %11, ptr %0, align 8
  %13 = getelementptr inbounds i8, ptr %0, i64 8
  store i32 %12, ptr %13, align 8
  br label %bb7

bb2:                                              ; preds = %bb1
  %14 = extractvalue { i64, i64 } %9, 0
  %15 = extractvalue { i64, i64 } %9, 1
  store i64 %14, ptr %_5, align 8
  %16 = getelementptr inbounds i8, ptr %_5, i64 8
  store i64 %15, ptr %16, align 8
  %_7 = load i64, ptr %_5, align 8, !range !5, !noundef !3
  %17 = getelementptr inbounds i8, ptr %_5, i64 8
  %18 = load i64, ptr %17, align 8
  %19 = trunc nuw i64 %_7 to i1
  br i1 %19, label %bb4, label %bb5

bb4:                                              ; preds = %bb2
  %20 = getelementptr inbounds i8, ptr %_5, i64 8
  %item = load i64, ptr %20, align 8, !noundef !3
  %21 = load i64, ptr %acc, align 8, !noundef !3
  %22 = add i64 %21, %item
  store i64 %22, ptr %acc, align 8
  call void @llvm.lifetime.end.p0(i64 16, ptr %_5)
  br label %bb1

bb5:                                              ; preds = %bb2
  call void @llvm.lifetime.end.p0(i64 16, ptr %_5)
  call void @"_ZN4core3ptr71drop_in_place$LT$core..array..iter..IntoIter$LT$usize$C$6_usize$GT$$GT$17hd85b6451ff2beb57E"(ptr noalias noundef align 8 dereferenceable(64) %iter)
  call void @llvm.lifetime.end.p0(i64 64, ptr %iter)
  call void @llvm.lifetime.end.p0(i64 64, ptr %_3)
  %_0 = load i64, ptr %acc, align 8, !noundef !3
  call void @llvm.lifetime.end.p0(i64 8, ptr %acc)
  ret i64 %_0

bb3:                                              ; No predecessors!
  unreachable

terminate:                                        ; preds = %bb7
  %23 = landingpad { ptr, i32 }
          filter [0 x ptr] zeroinitializer
  %24 = extractvalue { ptr, i32 } %23, 0
  %25 = extractvalue { ptr, i32 } %23, 1
  call void @_ZN4core9panicking16panic_in_cleanup17hcae40d8735d0599eE() #12
  unreachable

bb8:                                              ; preds = %bb7
  %26 = load ptr, ptr %0, align 8, !noundef !3
  %27 = getelementptr inbounds i8, ptr %0, i64 8
  %28 = load i32, ptr %27, align 8, !noundef !3
  call void @llvm.lifetime.end.p0(i64 16, ptr %0)
  %29 = insertvalue { ptr, i32 } poison, ptr %26, 0
  %30 = insertvalue { ptr, i32 } %29, i32 %28, 1
  resume { ptr, i32 } %30
}

; Function Attrs: nounwind nonlazybind uwtable
declare noundef range(i32 0, 10) i32 @rust_eh_personality(i32 noundef, i32 noundef, i64 noundef, ptr noundef, ptr noundef) unnamed_addr #3

; Function Attrs: cold minsize noinline noreturn nounwind nonlazybind optsize uwtable
declare void @_ZN4core9panicking19panic_cannot_unwind17hf77d1d116ce25e3bE() unnamed_addr #4

; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
declare i1 @llvm.expect.i1(i1, i1) #5

; Function Attrs: cold noinline noreturn nounwind nonlazybind uwtable
declare void @_ZN4core9panicking14panic_nounwind17h7b81d1449a5f4f5aE(ptr noalias noundef nonnull readonly align 1, i64 noundef) unnamed_addr #6

; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i64 @llvm.ctpop.i64(i64) #7

; Function Attrs: cold noinline noreturn nonlazybind uwtable
declare void @_ZN4core9panicking9panic_fmt17hacfb58ec817221c8E(ptr noalias nocapture noundef readonly align 8 dereferenceable(48), ptr noalias noundef readonly align 8 dereferenceable(24)) unnamed_addr #8

; Function Attrs: cold noinline noreturn nonlazybind uwtable
declare void @_ZN4core9panicking5panic17heec94013e0df98e3E(ptr noalias noundef nonnull readonly align 1, i64 noundef, ptr noalias noundef readonly align 8 dereferenceable(24)) unnamed_addr #8

; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write)
declare void @llvm.assume(i1 noundef) #9

; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #10

; Function Attrs: cold minsize noinline noreturn nounwind nonlazybind optsize uwtable
declare void @_ZN4core9panicking16panic_in_cleanup17hcae40d8735d0599eE() unnamed_addr #4

; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #11

; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #11

attributes #0 = { inlinehint nounwind nonlazybind uwtable "probe-stack"="inline-asm" }
attributes #1 = { nonlazybind uwtable "probe-stack"="inline-asm" }
attributes #2 = { inlinehint nonlazybind uwtable "probe-stack"="inline-asm" }
attributes #3 = { nounwind nonlazybind uwtable "probe-stack"="inline-asm" }
attributes #4 = { cold minsize noinline noreturn nounwind nonlazybind optsize uwtable "probe-stack"="inline-asm" }
attributes #5 = { nocallback nofree nosync nounwind willreturn memory(none) }
attributes #6 = { cold noinline noreturn nounwind nonlazybind uwtable "probe-stack"="inline-asm" }
attributes #7 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
attributes #8 = { cold noinline noreturn nonlazybind uwtable "probe-stack"="inline-asm" }
attributes #9 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
attributes #10 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
attributes #11 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
attributes #12 = { cold noreturn nounwind }
attributes #13 = { noreturn nounwind }
attributes #14 = { noreturn }
attributes #15 = { nounwind }
attributes #16 = { cold }

!llvm.module.flags = !{!0, !1}
!llvm.ident = !{!2}

!0 = !{i32 8, !"PIC Level", i32 2}
!1 = !{i32 2, !"RtLibUseGOT", i32 1}
!2 = !{!"rustc version 1.88.0-nightly"}
!3 = !{}
!4 = !{i64 8}
!5 = !{i64 0, i64 2}

(The original IR also had explicit target-cpu attributes, which I had to remove to let command-line options take effect.)

@llvmbot
Copy link
Member

llvmbot commented Apr 7, 2025

@llvm/issue-subscribers-backend-x86

Author: Josh Stone (cuviper)

Rust's `issue-101082.rs` test case has a simple sum over an array:
#[no_mangle]
pub fn test() -&gt; usize {
    // CHECK-LABEL: @<!-- -->test(
    // CHECK: ret {{i64|i32}} 165
    let values = [23, 16, 54, 3, 60, 9];
    let mut acc = 0;
    for item in values {
        acc += item;
    }
    acc
}

I captured the IR (included at the end of this report), and confirmed that opt -S -O2 reduces the function to a single instruction returning the answer, as our CHECK lines expect:

define noundef i64 @<!-- -->test() unnamed_addr #<!-- -->0 personality ptr @<!-- -->rust_eh_personality {
bb1.i.i.preheader:
  ret i64 165
}

However, with -mcpu=x86-64-v3, it produces a vectorized calculation:

define noundef i64 @<!-- -->test() unnamed_addr #<!-- -->0 personality ptr @<!-- -->rust_eh_personality {
bb1.i.i.preheader:
  %iter = alloca [64 x i8], align 8
  call void @<!-- -->llvm.lifetime.start.p0(i64 64, ptr nonnull %iter)
  %_3.sroa.5.0.iter.sroa_idx = getelementptr inbounds nuw i8, ptr %iter, i64 16
  store &lt;4 x i64&gt; &lt;i64 23, i64 16, i64 54, i64 3&gt;, ptr %_3.sroa.5.0.iter.sroa_idx, align 8
  %_3.sroa.5.sroa.7.0._3.sroa.5.0.iter.sroa_idx.sroa_idx = getelementptr inbounds nuw i8, ptr %iter, i64 48
  store i64 60, ptr %_3.sroa.5.sroa.7.0._3.sroa.5.0.iter.sroa_idx.sroa_idx, align 8
  %_3.sroa.5.sroa.8.0._3.sroa.5.0.iter.sroa_idx.sroa_idx = getelementptr inbounds nuw i8, ptr %iter, i64 56
  store i64 9, ptr %_3.sroa.5.sroa.8.0._3.sroa.5.0.iter.sroa_idx.sroa_idx, align 8
  %unmaskedload = load &lt;4 x i64&gt;, ptr %_3.sroa.5.0.iter.sroa_idx, align 8, !alias.scope !3
  %0 = getelementptr inbounds nuw i8, ptr %iter, i64 48
  %wide.masked.load.1 = call &lt;4 x i64&gt; @<!-- -->llvm.masked.load.v4i64.p0(ptr nonnull %0, i32 8, &lt;4 x i1&gt; &lt;i1 true, i1 true, i1 false, i1 false&gt;, &lt;4 x i64&gt; poison), !alias.scope !3
  %1 = add &lt;4 x i64&gt; %wide.masked.load.1, %unmaskedload
  %2 = shufflevector &lt;4 x i64&gt; %1, &lt;4 x i64&gt; %unmaskedload, &lt;4 x i32&gt; &lt;i32 0, i32 1, i32 6, i32 7&gt;
  %3 = tail call i64 @<!-- -->llvm.vector.reduce.add.v4i64(&lt;4 x i64&gt; %2)
  call void @<!-- -->llvm.lifetime.end.p0(i64 64, ptr nonnull %iter)
  ret i64 %3
}

It's also vectorized with the default CPU and just enabling -mattr=+avx:

define noundef i64 @<!-- -->test() unnamed_addr #<!-- -->0 personality ptr @<!-- -->rust_eh_personality {
bb1.i.i.preheader:
  %iter = alloca [64 x i8], align 8
  call void @<!-- -->llvm.lifetime.start.p0(i64 64, ptr nonnull %iter)
  %_3.sroa.5.0.iter.sroa_idx = getelementptr inbounds nuw i8, ptr %iter, i64 16
  store &lt;4 x i64&gt; &lt;i64 23, i64 16, i64 54, i64 3&gt;, ptr %_3.sroa.5.0.iter.sroa_idx, align 8
  %_3.sroa.5.sroa.7.0._3.sroa.5.0.iter.sroa_idx.sroa_idx = getelementptr inbounds nuw i8, ptr %iter, i64 48
  store i64 60, ptr %_3.sroa.5.sroa.7.0._3.sroa.5.0.iter.sroa_idx.sroa_idx, align 8
  %_3.sroa.5.sroa.8.0._3.sroa.5.0.iter.sroa_idx.sroa_idx = getelementptr inbounds nuw i8, ptr %iter, i64 56
  store i64 9, ptr %_3.sroa.5.sroa.8.0._3.sroa.5.0.iter.sroa_idx.sroa_idx, align 8
  %unmaskedload = load &lt;2 x i64&gt;, ptr %_3.sroa.5.0.iter.sroa_idx, align 8, !alias.scope !3
  %0 = getelementptr inbounds nuw i8, ptr %iter, i64 32
  %unmaskedload16 = load &lt;2 x i64&gt;, ptr %0, align 8, !alias.scope !3
  %1 = add &lt;2 x i64&gt; %unmaskedload16, %unmaskedload
  %2 = getelementptr inbounds nuw i8, ptr %iter, i64 48
  %unmaskedload17 = load &lt;2 x i64&gt;, ptr %2, align 8, !alias.scope !3
  %3 = add &lt;2 x i64&gt; %unmaskedload17, %1
  %4 = tail call i64 @<!-- -->llvm.vector.reduce.add.v2i64(&lt;2 x i64&gt; %3)
  call void @<!-- -->llvm.lifetime.end.p0(i64 64, ptr nonnull %iter)
  ret i64 %4
}

My context is rust-lang/rust#138380, where this testcase is failing to optimize as expected with Ubuntu's llvm-20, and I have confirmed the reproducer with LLVM main. Oddly, that test has had no problem with x86-64-v3 when using Rust's bundled LLVM 20 in CI, but I suspect other factors are perturbing the input in that case, because the CI-built opt still fails the same way on this exact reproducer IR.

<details>
<summary>LLVM IR reproducer</summary>

; ModuleID = '&lt;stdin&gt;'
source_filename = "issue_101082.775d9ed26ae14e6e-cgu.0"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@<!-- -->alloc_ed8641ebea8e5515740d4eb49a916ff5 = private unnamed_addr constant [218 x i8] c"unsafe precondition(s) violated: ptr::read requires that the pointer argument is aligned and non-null\0A\0AThis indicates a bug in the program. This Undefined Behavior check is optional, and cannot be relied on for safety.", align 1
@<!-- -->alloc_fad0cd83b7d1858a846a172eb260e593 = private unnamed_addr constant [42 x i8] c"is_aligned_to: align is not a power-of-two", align 1
@<!-- -->alloc_e92e94d0ff530782b571cfd99ec66aef = private unnamed_addr constant &lt;{ ptr, [8 x i8] }&gt; &lt;{ ptr @<!-- -->alloc_fad0cd83b7d1858a846a172eb260e593, [8 x i8] c"*\00\00\00\00\00\00\00" }&gt;, align 8
@<!-- -->0 = private unnamed_addr constant &lt;{ [8 x i8], [8 x i8] }&gt; &lt;{ [8 x i8] zeroinitializer, [8 x i8] undef }&gt;, align 8
@<!-- -->alloc_57f39e09f7c9ad4245f74159f6bbb2c2 = private unnamed_addr constant [52 x i8] c"/rustc/FAKE_PREFIX/library/core/src/ptr/const_ptr.rs", align 1
@<!-- -->alloc_55a78c643ac9851d3ad8dc871da622ba = private unnamed_addr constant &lt;{ ptr, [16 x i8] }&gt; &lt;{ ptr @<!-- -->alloc_57f39e09f7c9ad4245f74159f6bbb2c2, [16 x i8] c"4\00\00\00\00\00\00\00\0D\00\00\00\C3\05\00\00" }&gt;, align 8
@<!-- -->alloc_ea57c52ffecfc6f596231aa6e610f772 = private unnamed_addr constant [39 x i8] c"assertion failed: self.start &lt; self.end", align 1
@<!-- -->alloc_6fbb54c7cb7260f04218cc51e19e04e4 = private unnamed_addr constant [54 x i8] c"/rustc/FAKE_PREFIX/library/core/src/ops/index_range.rs", align 1
@<!-- -->alloc_a35bb76379a3b55fc8cbe20b8dab1d46 = private unnamed_addr constant &lt;{ ptr, [16 x i8] }&gt; &lt;{ ptr @<!-- -->alloc_6fbb54c7cb7260f04218cc51e19e04e4, [16 x i8] c"6\00\00\00\00\00\00\00\09\00\00\008\00\00\00" }&gt;, align 8

; Function Attrs: inlinehint nounwind nonlazybind uwtable
define internal void @<!-- -->_ZN4core3ptr4read18precondition_check17he50e497260f5c5aaE(ptr noundef %addr, i64 noundef %align, i1 noundef zeroext %is_zst) unnamed_addr #<!-- -->0 personality ptr @<!-- -->rust_eh_personality {
start:
  %_6 = invoke noundef zeroext i1 @"_ZN4core3ptr9const_ptr33_$LT$impl$u20$$BP$const$u20$T$GT$13is_aligned_to17hee3b7393df819147E"(ptr noundef %addr, i64 noundef %align)
          to label %bb3 unwind label %terminate

terminate:                                        ; preds = %start
  %0 = landingpad { ptr, i32 }
          filter [0 x ptr] zeroinitializer
  %1 = extractvalue { ptr, i32 } %0, 0
  %2 = extractvalue { ptr, i32 } %0, 1
  call void @<!-- -->_ZN4core9panicking19panic_cannot_unwind17hf77d1d116ce25e3bE() #<!-- -->12
  unreachable

bb3:                                              ; preds = %start
  %3 = call i1 @<!-- -->llvm.expect.i1(i1 %_6, i1 true)
  br i1 %3, label %bb4, label %bb5

bb5:                                              ; preds = %bb3
  br label %bb2

bb4:                                              ; preds = %bb3
  br i1 %is_zst, label %bb6, label %bb7

bb2:                                              ; preds = %bb7, %bb5
  call void @<!-- -->_ZN4core9panicking14panic_nounwind17h7b81d1449a5f4f5aE(ptr noalias noundef nonnull readonly align 1 @<!-- -->alloc_ed8641ebea8e5515740d4eb49a916ff5, i64 noundef 218) #<!-- -->13
  unreachable

bb7:                                              ; preds = %bb4
  %_8 = ptrtoint ptr %addr to i64
  %_7 = icmp eq i64 %_8, 0
  %_4 = xor i1 %_7, true
  %4 = call i1 @<!-- -->llvm.expect.i1(i1 %_4, i1 true)
  br i1 %4, label %bb1, label %bb2

bb6:                                              ; preds = %bb4
  br label %bb1

bb1:                                              ; preds = %bb6, %bb7
  ret void
}

; Function Attrs: nonlazybind uwtable
define internal void @"_ZN4core3ptr71drop_in_place$LT$core..array..iter..IntoIter$LT$usize$C$6_usize$GT$$GT$17hd85b6451ff2beb57E"(ptr noalias noundef align 8 dereferenceable(64) %_1) unnamed_addr #<!-- -->1 {
start:
  call void @"_ZN82_$LT$core..array..iter..IntoIter$LT$T$C$_$GT$$u20$as$u20$core..ops..drop..Drop$GT$4drop17h3e57fc25945276b8E"(ptr noalias noundef align 8 dereferenceable(64) %_1)
  ret void
}

; Function Attrs: inlinehint nonlazybind uwtable
define internal noundef zeroext i1 @"_ZN4core3ptr9const_ptr33_$LT$impl$u20$$BP$const$u20$T$GT$13is_aligned_to17hee3b7393df819147E"(ptr noundef %self, i64 noundef %align) unnamed_addr #<!-- -->2 {
start:
  %0 = alloca [4 x i8], align 4
  %_4 = alloca [48 x i8], align 8
  call void @<!-- -->llvm.lifetime.start.p0(i64 4, ptr %0)
  %1 = call i64 @<!-- -->llvm.ctpop.i64(i64 %align)
  %2 = trunc i64 %1 to i32
  store i32 %2, ptr %0, align 4
  %_9 = load i32, ptr %0, align 4, !noundef !3
  call void @<!-- -->llvm.lifetime.end.p0(i64 4, ptr %0)
  %3 = icmp eq i32 %_9, 1
  %4 = call i1 @<!-- -->llvm.expect.i1(i1 %3, i1 true)
  br i1 %4, label %bb1, label %bb2

bb1:                                              ; preds = %start
  %_6 = ptrtoint ptr %self to i64
  %_8.0 = sub i64 %align, 1
  %_8.1 = icmp ult i64 %align, 1
  %_5 = and i64 %_6, %_8.0
  %_0 = icmp eq i64 %_5, 0
  ret i1 %_0

bb2:                                              ; preds = %start
  call void @<!-- -->llvm.lifetime.start.p0(i64 48, ptr %_4)
  %5 = getelementptr inbounds i8, ptr %_4, i64 32
  store ptr @<!-- -->alloc_e92e94d0ff530782b571cfd99ec66aef, ptr %5, align 8
  %6 = getelementptr inbounds i8, ptr %5, i64 8
  store i64 1, ptr %6, align 8
  %7 = load ptr, ptr @<!-- -->0, align 8, !align !4, !noundef !3
  %8 = load i64, ptr getelementptr inbounds (i8, ptr @<!-- -->0, i64 8), align 8
  %9 = getelementptr inbounds i8, ptr %_4, i64 16
  store ptr %7, ptr %9, align 8
  %10 = getelementptr inbounds i8, ptr %9, i64 8
  store i64 %8, ptr %10, align 8
  store ptr inttoptr (i64 8 to ptr), ptr %_4, align 8
  %11 = getelementptr inbounds i8, ptr %_4, i64 8
  store i64 0, ptr %11, align 8
  call void @<!-- -->_ZN4core9panicking9panic_fmt17hacfb58ec817221c8E(ptr noalias nocapture noundef readonly align 8 dereferenceable(48) %_4, ptr noalias noundef readonly align 8 dereferenceable(24) @<!-- -->alloc_55a78c643ac9851d3ad8dc871da622ba) #<!-- -->14
  unreachable
}

; Function Attrs: nonlazybind uwtable
define internal { ptr, i64 } @"_ZN4core5array4iter21IntoIter$LT$T$C$_$GT$12as_mut_slice17h1cd4fd74799a8d07E"(ptr noalias noundef align 8 dereferenceable(64) %self) unnamed_addr #<!-- -->1 {
start:
  %index = alloca [16 x i8], align 8
  %_3 = getelementptr inbounds i8, ptr %self, i64 16
  call void @<!-- -->llvm.lifetime.start.p0(i64 16, ptr %index)
  %0 = load i64, ptr %self, align 8, !noundef !3
  %1 = getelementptr inbounds i8, ptr %self, i64 8
  %2 = load i64, ptr %1, align 8, !noundef !3
  store i64 %0, ptr %index, align 8
  %3 = getelementptr inbounds i8, ptr %index, i64 8
  store i64 %2, ptr %3, align 8
  br label %bb3

bb3:                                              ; preds = %start
  %4 = getelementptr inbounds i8, ptr %index, i64 8
  %offset = load i64, ptr %4, align 8, !noundef !3
  %_16 = load i64, ptr %index, align 8, !noundef !3
  %len = sub nuw i64 %_16, %offset
  %_17 = getelementptr inbounds nuw i64, ptr %_3, i64 %offset
  call void @<!-- -->llvm.lifetime.end.p0(i64 16, ptr %index)
  %5 = insertvalue { ptr, i64 } poison, ptr %_17, 0
  %6 = insertvalue { ptr, i64 } %5, i64 %len, 1
  ret { ptr, i64 } %6

bb1:                                              ; No predecessors!
  unreachable

bb2:                                              ; No predecessors!
  unreachable
}

; Function Attrs: nonlazybind uwtable
define internal void @"_ZN82_$LT$core..array..iter..IntoIter$LT$T$C$_$GT$$u20$as$u20$core..ops..drop..Drop$GT$4drop17h3e57fc25945276b8E"(ptr noalias noundef align 8 dereferenceable(64) %self) unnamed_addr #<!-- -->1 {
start:
  %0 = call { ptr, i64 } @"_ZN4core5array4iter21IntoIter$LT$T$C$_$GT$12as_mut_slice17h1cd4fd74799a8d07E"(ptr noalias noundef align 8 dereferenceable(64) %self)
  %_3.0 = extractvalue { ptr, i64 } %0, 0
  %_3.1 = extractvalue { ptr, i64 } %0, 1
  ret void
}

; Function Attrs: inlinehint nonlazybind uwtable
define internal { i64, i64 } @"_ZN93_$LT$core..ops..index_range..IndexRange$u20$as$u20$core..iter..traits..iterator..Iterator$GT$4next17h4e21b39aae59a907E"(ptr noalias noundef align 8 dereferenceable(16) %0) unnamed_addr #<!-- -->2 {
start:
  %_0 = alloca [16 x i8], align 8
  %self = alloca [8 x i8], align 8
  store ptr %0, ptr %self, align 8
  %1 = load ptr, ptr %self, align 8, !nonnull !3, !align !4, !noundef !3
  %_6 = load i64, ptr %1, align 8, !noundef !3
  %2 = load ptr, ptr %self, align 8, !nonnull !3, !align !4, !noundef !3
  %3 = getelementptr inbounds i8, ptr %2, i64 8
  %_7 = load i64, ptr %3, align 8, !noundef !3
  %_3 = sub nuw i64 %_6, %_7
  %_2 = icmp ugt i64 %_3, 0
  br i1 %_2, label %bb1, label %bb2

bb2:                                              ; preds = %start
  %4 = load i64, ptr @<!-- -->0, align 8, !range !5, !noundef !3
  %5 = load i64, ptr getelementptr inbounds (i8, ptr @<!-- -->0, i64 8), align 8
  store i64 %4, ptr %_0, align 8
  %6 = getelementptr inbounds i8, ptr %_0, i64 8
  store i64 %5, ptr %6, align 8
  br label %bb3

bb1:                                              ; preds = %start
  %7 = load ptr, ptr %self, align 8, !nonnull !3, !align !4, !noundef !3
  %8 = getelementptr inbounds i8, ptr %7, i64 8
  %_9 = load i64, ptr %8, align 8, !noundef !3
  %9 = load ptr, ptr %self, align 8, !nonnull !3, !align !4, !noundef !3
  %_10 = load i64, ptr %9, align 8, !noundef !3
  %_8 = icmp ult i64 %_9, %_10
  %10 = call i1 @<!-- -->llvm.expect.i1(i1 %_8, i1 true)
  br i1 %10, label %bb4, label %bb5

bb3:                                              ; preds = %bb7, %bb2
  %11 = load i64, ptr %_0, align 8, !range !5, !noundef !3
  %12 = getelementptr inbounds i8, ptr %_0, i64 8
  %13 = load i64, ptr %12, align 8
  %14 = insertvalue { i64, i64 } poison, i64 %11, 0
  %15 = insertvalue { i64, i64 } %14, i64 %13, 1
  ret { i64, i64 } %15

bb5:                                              ; preds = %bb1
  call void @<!-- -->_ZN4core9panicking5panic17heec94013e0df98e3E(ptr noalias noundef nonnull readonly align 1 @<!-- -->alloc_ea57c52ffecfc6f596231aa6e610f772, i64 noundef 39, ptr noalias noundef readonly align 8 dereferenceable(24) @<!-- -->alloc_a35bb76379a3b55fc8cbe20b8dab1d46) #<!-- -->14
  unreachable

bb4:                                              ; preds = %bb1
  %16 = load ptr, ptr %self, align 8, !nonnull !3, !align !4, !noundef !3
  %17 = getelementptr inbounds i8, ptr %16, i64 8
  %value = load i64, ptr %17, align 8, !noundef !3
  br label %bb7

bb7:                                              ; preds = %bb4
  %_12 = add nuw i64 %value, 1
  %18 = load ptr, ptr %self, align 8, !nonnull !3, !align !4, !noundef !3
  %19 = getelementptr inbounds i8, ptr %18, i64 8
  store i64 %_12, ptr %19, align 8
  %20 = getelementptr inbounds i8, ptr %_0, i64 8
  store i64 %value, ptr %20, align 8
  store i64 1, ptr %_0, align 8
  br label %bb3

bb6:                                              ; No predecessors!
  unreachable
}

; Function Attrs: nonlazybind uwtable
define internal { i64, i64 } @"_ZN99_$LT$core..array..iter..IntoIter$LT$T$C$_$GT$$u20$as$u20$core..iter..traits..iterator..Iterator$GT$4next17h8bcf71c7ab2e62d8E"(ptr noalias noundef align 8 dereferenceable(64) %self) unnamed_addr #<!-- -->1 {
start:
  %self1 = alloca [16 x i8], align 8
  %_0 = alloca [16 x i8], align 8
  call void @<!-- -->llvm.lifetime.start.p0(i64 16, ptr %self1)
  %0 = call { i64, i64 } @"_ZN93_$LT$core..ops..index_range..IndexRange$u20$as$u20$core..iter..traits..iterator..Iterator$GT$4next17h4e21b39aae59a907E"(ptr noalias noundef align 8 dereferenceable(16) %self)
  %1 = extractvalue { i64, i64 } %0, 0
  %2 = extractvalue { i64, i64 } %0, 1
  store i64 %1, ptr %self1, align 8
  %3 = getelementptr inbounds i8, ptr %self1, i64 8
  store i64 %2, ptr %3, align 8
  %_5 = getelementptr inbounds i8, ptr %self, i64 16
  %_6 = load i64, ptr %self1, align 8, !range !5, !noundef !3
  %4 = getelementptr inbounds i8, ptr %self1, i64 8
  %5 = load i64, ptr %4, align 8
  %6 = trunc nuw i64 %_6 to i1
  br i1 %6, label %bb5, label %bb4

bb5:                                              ; preds = %start
  %7 = getelementptr inbounds i8, ptr %self1, i64 8
  %x = load i64, ptr %7, align 8, !noundef !3
  %_8 = call noundef i64 @"_ZN99_$LT$core..array..iter..IntoIter$LT$T$C$_$GT$$u20$as$u20$core..iter..traits..iterator..Iterator$GT$4next28_$u7b$$u7b$closure$u7d$$u7d$17ha5bbe4f336906f6aE"(ptr noalias noundef readonly align 8 dereferenceable(48) %_5, i64 noundef %x)
  %8 = getelementptr inbounds i8, ptr %_0, i64 8
  store i64 %_8, ptr %8, align 8
  store i64 1, ptr %_0, align 8
  br label %bb2

bb4:                                              ; preds = %start
  store i64 0, ptr %_0, align 8
  br label %bb2

bb2:                                              ; preds = %bb4, %bb5
  call void @<!-- -->llvm.lifetime.end.p0(i64 16, ptr %self1)
  %9 = load i64, ptr %_0, align 8, !range !5, !noundef !3
  %10 = getelementptr inbounds i8, ptr %_0, i64 8
  %11 = load i64, ptr %10, align 8
  %12 = insertvalue { i64, i64 } poison, i64 %9, 0
  %13 = insertvalue { i64, i64 } %12, i64 %11, 1
  ret { i64, i64 } %13

bb3:                                              ; No predecessors!
  unreachable
}

; Function Attrs: inlinehint nonlazybind uwtable
define internal noundef i64 @"_ZN99_$LT$core..array..iter..IntoIter$LT$T$C$_$GT$$u20$as$u20$core..iter..traits..iterator..Iterator$GT$4next28_$u7b$$u7b$closure$u7d$$u7d$17ha5bbe4f336906f6aE"(ptr noalias noundef readonly align 8 dereferenceable(48) %_1, i64 noundef %idx) unnamed_addr #<!-- -->2 {
start:
  %self1 = alloca [8 x i8], align 8
  %self = alloca [8 x i8], align 8
  br label %bb2

bb2:                                              ; preds = %start
  %_9 = icmp ult i64 %idx, 6
  call void @<!-- -->llvm.assume(i1 %_9)
  %0 = getelementptr inbounds nuw i64, ptr %_1, i64 %idx
  store ptr %0, ptr %self, align 8
  call void @<!-- -->llvm.lifetime.start.p0(i64 8, ptr %self1)
  %1 = load ptr, ptr %self, align 8, !noundef !3
  store ptr %1, ptr %self1, align 8
  br label %bb6

bb6:                                              ; preds = %bb4, %bb2
  %2 = load ptr, ptr %self1, align 8, !noundef !3
  %_0 = load i64, ptr %2, align 8, !noundef !3
  call void @<!-- -->llvm.lifetime.end.p0(i64 8, ptr %self1)
  ret i64 %_0

bb4:                                              ; No predecessors!
  %_14 = load ptr, ptr %self, align 8, !noundef !3
  call void @<!-- -->_ZN4core3ptr4read18precondition_check17he50e497260f5c5aaE(ptr noundef %_14, i64 noundef 8, i1 noundef zeroext false) #<!-- -->15
  br label %bb6

bb1:                                              ; No predecessors!
  unreachable

bb7:                                              ; No predecessors!
  unreachable

bb8:                                              ; No predecessors!
  unreachable
}

; Function Attrs: nonlazybind uwtable
define noundef i64 @<!-- -->test() unnamed_addr #<!-- -->1 personality ptr @<!-- -->rust_eh_personality {
start:
  %0 = alloca [16 x i8], align 8
  %_9 = alloca [48 x i8], align 8
  %_5 = alloca [16 x i8], align 8
  %iter = alloca [64 x i8], align 8
  %_3 = alloca [64 x i8], align 8
  %acc = alloca [8 x i8], align 8
  %values = alloca [48 x i8], align 8
  %1 = getelementptr inbounds nuw i64, ptr %values, i64 0
  store i64 23, ptr %1, align 8
  %2 = getelementptr inbounds nuw i64, ptr %values, i64 1
  store i64 16, ptr %2, align 8
  %3 = getelementptr inbounds nuw i64, ptr %values, i64 2
  store i64 54, ptr %3, align 8
  %4 = getelementptr inbounds nuw i64, ptr %values, i64 3
  store i64 3, ptr %4, align 8
  %5 = getelementptr inbounds nuw i64, ptr %values, i64 4
  store i64 60, ptr %5, align 8
  %6 = getelementptr inbounds nuw i64, ptr %values, i64 5
  store i64 9, ptr %6, align 8
  call void @<!-- -->llvm.lifetime.start.p0(i64 8, ptr %acc)
  store i64 0, ptr %acc, align 8
  call void @<!-- -->llvm.lifetime.start.p0(i64 64, ptr %_3)
  call void @<!-- -->llvm.lifetime.start.p0(i64 48, ptr %_9)
  call void @<!-- -->llvm.memcpy.p0.p0.i64(ptr align 8 %_9, ptr align 8 %values, i64 48, i1 false)
  %7 = getelementptr inbounds i8, ptr %_3, i64 16
  call void @<!-- -->llvm.memcpy.p0.p0.i64(ptr align 8 %7, ptr align 8 %_9, i64 48, i1 false)
  store i64 6, ptr %_3, align 8
  %8 = getelementptr inbounds i8, ptr %_3, i64 8
  store i64 0, ptr %8, align 8
  call void @<!-- -->llvm.lifetime.end.p0(i64 48, ptr %_9)
  call void @<!-- -->llvm.lifetime.start.p0(i64 64, ptr %iter)
  call void @<!-- -->llvm.memcpy.p0.p0.i64(ptr align 8 %iter, ptr align 8 %_3, i64 64, i1 false)
  br label %bb1

bb1:                                              ; preds = %bb4, %start
  call void @<!-- -->llvm.lifetime.start.p0(i64 16, ptr %_5)
  %9 = invoke { i64, i64 } @"_ZN99_$LT$core..array..iter..IntoIter$LT$T$C$_$GT$$u20$as$u20$core..iter..traits..iterator..Iterator$GT$4next17h8bcf71c7ab2e62d8E"(ptr noalias noundef align 8 dereferenceable(64) %iter)
          to label %bb2 unwind label %cleanup

bb7:                                              ; preds = %cleanup
  invoke void @"_ZN4core3ptr71drop_in_place$LT$core..array..iter..IntoIter$LT$usize$C$6_usize$GT$$GT$17hd85b6451ff2beb57E"(ptr noalias noundef align 8 dereferenceable(64) %iter) #<!-- -->16
          to label %bb8 unwind label %terminate

cleanup:                                          ; preds = %bb1
  %10 = landingpad { ptr, i32 }
          cleanup
  %11 = extractvalue { ptr, i32 } %10, 0
  %12 = extractvalue { ptr, i32 } %10, 1
  call void @<!-- -->llvm.lifetime.start.p0(i64 16, ptr %0)
  store ptr %11, ptr %0, align 8
  %13 = getelementptr inbounds i8, ptr %0, i64 8
  store i32 %12, ptr %13, align 8
  br label %bb7

bb2:                                              ; preds = %bb1
  %14 = extractvalue { i64, i64 } %9, 0
  %15 = extractvalue { i64, i64 } %9, 1
  store i64 %14, ptr %_5, align 8
  %16 = getelementptr inbounds i8, ptr %_5, i64 8
  store i64 %15, ptr %16, align 8
  %_7 = load i64, ptr %_5, align 8, !range !5, !noundef !3
  %17 = getelementptr inbounds i8, ptr %_5, i64 8
  %18 = load i64, ptr %17, align 8
  %19 = trunc nuw i64 %_7 to i1
  br i1 %19, label %bb4, label %bb5

bb4:                                              ; preds = %bb2
  %20 = getelementptr inbounds i8, ptr %_5, i64 8
  %item = load i64, ptr %20, align 8, !noundef !3
  %21 = load i64, ptr %acc, align 8, !noundef !3
  %22 = add i64 %21, %item
  store i64 %22, ptr %acc, align 8
  call void @<!-- -->llvm.lifetime.end.p0(i64 16, ptr %_5)
  br label %bb1

bb5:                                              ; preds = %bb2
  call void @<!-- -->llvm.lifetime.end.p0(i64 16, ptr %_5)
  call void @"_ZN4core3ptr71drop_in_place$LT$core..array..iter..IntoIter$LT$usize$C$6_usize$GT$$GT$17hd85b6451ff2beb57E"(ptr noalias noundef align 8 dereferenceable(64) %iter)
  call void @<!-- -->llvm.lifetime.end.p0(i64 64, ptr %iter)
  call void @<!-- -->llvm.lifetime.end.p0(i64 64, ptr %_3)
  %_0 = load i64, ptr %acc, align 8, !noundef !3
  call void @<!-- -->llvm.lifetime.end.p0(i64 8, ptr %acc)
  ret i64 %_0

bb3:                                              ; No predecessors!
  unreachable

terminate:                                        ; preds = %bb7
  %23 = landingpad { ptr, i32 }
          filter [0 x ptr] zeroinitializer
  %24 = extractvalue { ptr, i32 } %23, 0
  %25 = extractvalue { ptr, i32 } %23, 1
  call void @<!-- -->_ZN4core9panicking16panic_in_cleanup17hcae40d8735d0599eE() #<!-- -->12
  unreachable

bb8:                                              ; preds = %bb7
  %26 = load ptr, ptr %0, align 8, !noundef !3
  %27 = getelementptr inbounds i8, ptr %0, i64 8
  %28 = load i32, ptr %27, align 8, !noundef !3
  call void @<!-- -->llvm.lifetime.end.p0(i64 16, ptr %0)
  %29 = insertvalue { ptr, i32 } poison, ptr %26, 0
  %30 = insertvalue { ptr, i32 } %29, i32 %28, 1
  resume { ptr, i32 } %30
}

; Function Attrs: nounwind nonlazybind uwtable
declare noundef range(i32 0, 10) i32 @<!-- -->rust_eh_personality(i32 noundef, i32 noundef, i64 noundef, ptr noundef, ptr noundef) unnamed_addr #<!-- -->3

; Function Attrs: cold minsize noinline noreturn nounwind nonlazybind optsize uwtable
declare void @<!-- -->_ZN4core9panicking19panic_cannot_unwind17hf77d1d116ce25e3bE() unnamed_addr #<!-- -->4

; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
declare i1 @<!-- -->llvm.expect.i1(i1, i1) #<!-- -->5

; Function Attrs: cold noinline noreturn nounwind nonlazybind uwtable
declare void @<!-- -->_ZN4core9panicking14panic_nounwind17h7b81d1449a5f4f5aE(ptr noalias noundef nonnull readonly align 1, i64 noundef) unnamed_addr #<!-- -->6

; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i64 @<!-- -->llvm.ctpop.i64(i64) #<!-- -->7

; Function Attrs: cold noinline noreturn nonlazybind uwtable
declare void @<!-- -->_ZN4core9panicking9panic_fmt17hacfb58ec817221c8E(ptr noalias nocapture noundef readonly align 8 dereferenceable(48), ptr noalias noundef readonly align 8 dereferenceable(24)) unnamed_addr #<!-- -->8

; Function Attrs: cold noinline noreturn nonlazybind uwtable
declare void @<!-- -->_ZN4core9panicking5panic17heec94013e0df98e3E(ptr noalias noundef nonnull readonly align 1, i64 noundef, ptr noalias noundef readonly align 8 dereferenceable(24)) unnamed_addr #<!-- -->8

; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write)
declare void @<!-- -->llvm.assume(i1 noundef) #<!-- -->9

; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
declare void @<!-- -->llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #<!-- -->10

; Function Attrs: cold minsize noinline noreturn nounwind nonlazybind optsize uwtable
declare void @<!-- -->_ZN4core9panicking16panic_in_cleanup17hcae40d8735d0599eE() unnamed_addr #<!-- -->4

; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
declare void @<!-- -->llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #<!-- -->11

; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
declare void @<!-- -->llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #<!-- -->11

attributes #<!-- -->0 = { inlinehint nounwind nonlazybind uwtable "probe-stack"="inline-asm" }
attributes #<!-- -->1 = { nonlazybind uwtable "probe-stack"="inline-asm" }
attributes #<!-- -->2 = { inlinehint nonlazybind uwtable "probe-stack"="inline-asm" }
attributes #<!-- -->3 = { nounwind nonlazybind uwtable "probe-stack"="inline-asm" }
attributes #<!-- -->4 = { cold minsize noinline noreturn nounwind nonlazybind optsize uwtable "probe-stack"="inline-asm" }
attributes #<!-- -->5 = { nocallback nofree nosync nounwind willreturn memory(none) }
attributes #<!-- -->6 = { cold noinline noreturn nounwind nonlazybind uwtable "probe-stack"="inline-asm" }
attributes #<!-- -->7 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
attributes #<!-- -->8 = { cold noinline noreturn nonlazybind uwtable "probe-stack"="inline-asm" }
attributes #<!-- -->9 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
attributes #<!-- -->10 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
attributes #<!-- -->11 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
attributes #<!-- -->12 = { cold noreturn nounwind }
attributes #<!-- -->13 = { noreturn nounwind }
attributes #<!-- -->14 = { noreturn }
attributes #<!-- -->15 = { nounwind }
attributes #<!-- -->16 = { cold }

!llvm.module.flags = !{!0, !1}
!llvm.ident = !{!2}

!0 = !{i32 8, !"PIC Level", i32 2}
!1 = !{i32 2, !"RtLibUseGOT", i32 1}
!2 = !{!"rustc version 1.88.0-nightly"}
!3 = !{}
!4 = !{i64 8}
!5 = !{i64 0, i64 2}

</details>

(The original IR also had explicit target-cpu attributes, which I had to remove to let command-line options take effect.)

@nikic
Copy link
Contributor

nikic commented Apr 7, 2025

Duplicate of #134513, though this does answer my question about what the unoptimized IR is.

@nikic
Copy link
Contributor

nikic commented Apr 17, 2025

Confirmed that the fix for #134513 also works for the original IR here.

@nikic nikic closed this as completed Apr 17, 2025
@EugeneZelenko EugeneZelenko added the duplicate Resolved as duplicate label Apr 17, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

5 participants