Skip to content

vec![[[[0i32; 16]; 16]; 16]; 0] is not optimized #143637

Open
@theemathas

Description

@theemathas

Sorry if this is unrealistic code. I stumbled upon this, and thought it was weird that the code wasn't optimized properly.

pub fn foo() -> Vec<[[[i32; 16]; 16]; 16]> {
    vec![[[[0i32; 16]; 16]; 16]; 0]
}

Godbolt link

Assembly code
example::foo::hf7aa743c8384f4c6:
        push    rbx
        sub     rsp, 4096
        mov     qword ptr [rsp], 0
        sub     rsp, 4096
        mov     qword ptr [rsp], 0
        sub     rsp, 4096
        mov     qword ptr [rsp], 0
        sub     rsp, 4096
        mov     rbx, rdi
        mov     rdi, rsp
        mov     edx, 16384
        xor     esi, esi
        call    qword ptr [rip + memset@GOTPCREL]
        mov     eax, 960
        pxor    xmm0, xmm0
.LBB0_1:
        movdqu  xmm1, xmmword ptr [rsp + rax - 960]
        movdqu  xmm2, xmmword ptr [rsp + rax - 944]
        movdqu  xmm3, xmmword ptr [rsp + rax - 928]
        por     xmm3, xmm1
        movdqu  xmm1, xmmword ptr [rsp + rax - 912]
        por     xmm1, xmm2
        por     xmm1, xmm3
        pcmpeqd xmm1, xmm0
        movmskps        ecx, xmm1
        xor     ecx, 15
        jne     .LBB0_18
        movdqu  xmm1, xmmword ptr [rsp + rax - 896]
        movdqu  xmm2, xmmword ptr [rsp + rax - 880]
        movdqu  xmm3, xmmword ptr [rsp + rax - 864]
        por     xmm3, xmm1
        movdqu  xmm1, xmmword ptr [rsp + rax - 848]
        por     xmm1, xmm2
        por     xmm1, xmm3
        pcmpeqd xmm1, xmm0
        movmskps        ecx, xmm1
        xor     ecx, 15
        jne     .LBB0_18
        movdqu  xmm1, xmmword ptr [rsp + rax - 832]
        movdqu  xmm2, xmmword ptr [rsp + rax - 816]
        movdqu  xmm3, xmmword ptr [rsp + rax - 800]
        por     xmm3, xmm1
        movdqu  xmm1, xmmword ptr [rsp + rax - 784]
        por     xmm1, xmm2
        por     xmm1, xmm3
        pcmpeqd xmm1, xmm0
        movmskps        ecx, xmm1
        xor     ecx, 15
        jne     .LBB0_18
        movdqu  xmm1, xmmword ptr [rsp + rax - 768]
        movdqu  xmm2, xmmword ptr [rsp + rax - 752]
        movdqu  xmm3, xmmword ptr [rsp + rax - 736]
        por     xmm3, xmm1
        movdqu  xmm1, xmmword ptr [rsp + rax - 720]
        por     xmm1, xmm2
        por     xmm1, xmm3
        pcmpeqd xmm1, xmm0
        movmskps        ecx, xmm1
        xor     ecx, 15
        jne     .LBB0_18
        movdqu  xmm1, xmmword ptr [rsp + rax - 704]
        movdqu  xmm2, xmmword ptr [rsp + rax - 688]
        movdqu  xmm3, xmmword ptr [rsp + rax - 672]
        por     xmm3, xmm1
        movdqu  xmm1, xmmword ptr [rsp + rax - 656]
        por     xmm1, xmm2
        por     xmm1, xmm3
        pcmpeqd xmm1, xmm0
        movmskps        ecx, xmm1
        xor     ecx, 15
        jne     .LBB0_18
        movdqu  xmm1, xmmword ptr [rsp + rax - 640]
        movdqu  xmm2, xmmword ptr [rsp + rax - 624]
        movdqu  xmm3, xmmword ptr [rsp + rax - 608]
        por     xmm3, xmm1
        movdqu  xmm1, xmmword ptr [rsp + rax - 592]
        por     xmm1, xmm2
        por     xmm1, xmm3
        pcmpeqd xmm1, xmm0
        movmskps        ecx, xmm1
        xor     ecx, 15
        jne     .LBB0_18
        movdqu  xmm1, xmmword ptr [rsp + rax - 576]
        movdqu  xmm2, xmmword ptr [rsp + rax - 560]
        movdqu  xmm3, xmmword ptr [rsp + rax - 544]
        por     xmm3, xmm1
        movdqu  xmm1, xmmword ptr [rsp + rax - 528]
        por     xmm1, xmm2
        por     xmm1, xmm3
        pcmpeqd xmm1, xmm0
        movmskps        ecx, xmm1
        xor     ecx, 15
        jne     .LBB0_18
        movdqu  xmm1, xmmword ptr [rsp + rax - 512]
        movdqu  xmm2, xmmword ptr [rsp + rax - 496]
        movdqu  xmm3, xmmword ptr [rsp + rax - 480]
        por     xmm3, xmm1
        movdqu  xmm1, xmmword ptr [rsp + rax - 464]
        por     xmm1, xmm2
        por     xmm1, xmm3
        pcmpeqd xmm1, xmm0
        movmskps        ecx, xmm1
        xor     ecx, 15
        jne     .LBB0_18
        movdqu  xmm1, xmmword ptr [rsp + rax - 448]
        movdqu  xmm2, xmmword ptr [rsp + rax - 432]
        movdqu  xmm3, xmmword ptr [rsp + rax - 416]
        por     xmm3, xmm1
        movdqu  xmm1, xmmword ptr [rsp + rax - 400]
        por     xmm1, xmm2
        por     xmm1, xmm3
        pcmpeqd xmm1, xmm0
        movmskps        ecx, xmm1
        xor     ecx, 15
        jne     .LBB0_18
        movdqu  xmm1, xmmword ptr [rsp + rax - 384]
        movdqu  xmm2, xmmword ptr [rsp + rax - 368]
        movdqu  xmm3, xmmword ptr [rsp + rax - 352]
        por     xmm3, xmm1
        movdqu  xmm1, xmmword ptr [rsp + rax - 336]
        por     xmm1, xmm2
        por     xmm1, xmm3
        pcmpeqd xmm1, xmm0
        movmskps        ecx, xmm1
        xor     ecx, 15
        jne     .LBB0_18
        movdqu  xmm1, xmmword ptr [rsp + rax - 320]
        movdqu  xmm2, xmmword ptr [rsp + rax - 304]
        movdqu  xmm3, xmmword ptr [rsp + rax - 288]
        por     xmm3, xmm1
        movdqu  xmm1, xmmword ptr [rsp + rax - 272]
        por     xmm1, xmm2
        por     xmm1, xmm3
        pcmpeqd xmm1, xmm0
        movmskps        ecx, xmm1
        xor     ecx, 15
        jne     .LBB0_18
        movdqu  xmm1, xmmword ptr [rsp + rax - 256]
        movdqu  xmm2, xmmword ptr [rsp + rax - 240]
        movdqu  xmm3, xmmword ptr [rsp + rax - 224]
        por     xmm3, xmm1
        movdqu  xmm1, xmmword ptr [rsp + rax - 208]
        por     xmm1, xmm2
        por     xmm1, xmm3
        pcmpeqd xmm1, xmm0
        movmskps        ecx, xmm1
        xor     ecx, 15
        jne     .LBB0_18
        movdqu  xmm1, xmmword ptr [rsp + rax - 192]
        movdqu  xmm2, xmmword ptr [rsp + rax - 176]
        movdqu  xmm3, xmmword ptr [rsp + rax - 160]
        por     xmm3, xmm1
        movdqu  xmm1, xmmword ptr [rsp + rax - 144]
        por     xmm1, xmm2
        por     xmm1, xmm3
        pcmpeqd xmm1, xmm0
        movmskps        ecx, xmm1
        xor     ecx, 15
        jne     .LBB0_18
        movdqu  xmm1, xmmword ptr [rsp + rax - 128]
        movdqu  xmm2, xmmword ptr [rsp + rax - 112]
        movdqu  xmm3, xmmword ptr [rsp + rax - 96]
        por     xmm3, xmm1
        movdqu  xmm1, xmmword ptr [rsp + rax - 80]
        por     xmm1, xmm2
        por     xmm1, xmm3
        pcmpeqd xmm1, xmm0
        movmskps        ecx, xmm1
        xor     ecx, 15
        jne     .LBB0_18
        movdqu  xmm1, xmmword ptr [rsp + rax - 64]
        movdqu  xmm2, xmmword ptr [rsp + rax - 48]
        movdqu  xmm3, xmmword ptr [rsp + rax - 32]
        por     xmm3, xmm1
        movdqu  xmm1, xmmword ptr [rsp + rax - 16]
        por     xmm1, xmm2
        por     xmm1, xmm3
        pcmpeqd xmm1, xmm0
        movmskps        ecx, xmm1
        xor     ecx, 15
        jne     .LBB0_18
        movdqu  xmm1, xmmword ptr [rsp + rax]
        movdqu  xmm2, xmmword ptr [rsp + rax + 16]
        movdqu  xmm3, xmmword ptr [rsp + rax + 32]
        por     xmm3, xmm1
        movdqu  xmm1, xmmword ptr [rsp + rax + 48]
        por     xmm1, xmm2
        por     xmm1, xmm3
        pcmpeqd xmm1, xmm0
        movmskps        ecx, xmm1
        xor     ecx, 15
        jne     .LBB0_18
        lea     rcx, [rax + 1024]
        cmp     rax, 16320
        mov     rax, rcx
        jne     .LBB0_1
.LBB0_18:
        mov     qword ptr [rbx], 0
        mov     qword ptr [rbx + 8], 4
        mov     qword ptr [rbx + 16], 0
        mov     rax, rbx
        add     rsp, 16384
        pop     rbx
        ret

I expected the code to get optimized into just returning an empty vec (a few instructions). Instead, the assembly does the following:

  1. allocate some stack space.
  2. memset that stack space to zeroes
  3. check whether the entire array is zeroes, then discards that knowledge
  4. unconditionally return an empty vec

This seems rather bad.

Note that Rust's vec![] macro checks whether the value is zero, but only for arrays of length 16 or less. This check was supposed to be used to decide whether to allocate a pre-zeroed chunk of memory or not, although the result of this check is discarded in this case.

Meta

Reproducible on godbolt with:

rustc 1.88.0 (6b00bc388 2025-06-23)
binary: rustc
commit-hash: 6b00bc3880198600130e1cf62b8f8a93494488cc
commit-date: 2025-06-23
host: x86_64-unknown-linux-gnu
release: 1.88.0
LLVM version: 20.1.5
Internal compiler ID: r1880

@rustbot labels +C-optimization

Metadata

Metadata

Assignees

No one assigned

    Labels

    C-bugCategory: This is a bug.C-optimizationCategory: An issue highlighting optimization opportunities or PRs implementing suchneeds-triageThis issue may need triage. Remove it if it has been sufficiently triaged.

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions