From 913162bae0b2c19cd8d88ca9d2aade7dc747a594 Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Wed, 25 Jun 2025 18:21:10 +0000 Subject: [PATCH 1/2] [AArch64] Ensure the LR is preserved if we must call __arm_get_current_vg Fixes #145635 --- .../Target/AArch64/AArch64FrameLowering.cpp | 4 ++ .../AArch64/sme-must-save-lr-for-vg.ll | 51 +++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/sme-must-save-lr-for-vg.ll diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index a71668e71c235..791f77956462c 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -3955,6 +3955,10 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, // Save number of saved regs, so we can easily update CSStackSize later. unsigned NumSavedRegs = SavedRegs.count(); + // If we must call __arm_get_current_vg in the prologue preserve the LR. + if (requiresSaveVG(MF) && !Subtarget.hasSVE()) + SavedRegs.set(AArch64::LR); + // The frame record needs to be created by saving the appropriate registers uint64_t EstimatedStackSize = MFI.estimateStackSize(MF); if (hasFP(MF) || diff --git a/llvm/test/CodeGen/AArch64/sme-must-save-lr-for-vg.ll b/llvm/test/CodeGen/AArch64/sme-must-save-lr-for-vg.ll new file mode 100644 index 0000000000000..487d87a84549b --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-must-save-lr-for-vg.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -O0 < %s | FileCheck %s + +; Example of locally streaming function that (at -O0) must preserve the LR (X30) +; before calling __arm_get_current_vg. +define void @foo() "aarch64_pstate_sm_body" { +; CHECK-LABEL: foo: +; CHECK: // %bb.0: +; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 96 +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: rdsvl x9, #1 +; CHECK-NEXT: lsr x9, x9, #3 +; CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: bl __arm_get_current_vg +; CHECK-NEXT: str x0, [sp, #88] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset vg, -8 +; CHECK-NEXT: .cfi_offset w30, -24 +; CHECK-NEXT: .cfi_offset w29, -32 +; CHECK-NEXT: .cfi_offset b8, -40 +; CHECK-NEXT: .cfi_offset b9, -48 +; CHECK-NEXT: .cfi_offset b10, -56 +; CHECK-NEXT: .cfi_offset b11, -64 +; CHECK-NEXT: .cfi_offset b12, -72 +; CHECK-NEXT: .cfi_offset b13, -80 +; CHECK-NEXT: .cfi_offset b14, -88 +; CHECK-NEXT: .cfi_offset b15, -96 +; CHECK-NEXT: smstart sm +; CHECK-NEXT: smstop sm +; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 +; CHECK-NEXT: .cfi_restore b8 +; CHECK-NEXT: .cfi_restore b9 +; CHECK-NEXT: .cfi_restore b10 +; CHECK-NEXT: .cfi_restore b11 +; CHECK-NEXT: .cfi_restore b12 +; CHECK-NEXT: .cfi_restore b13 +; CHECK-NEXT: .cfi_restore b14 +; CHECK-NEXT: .cfi_restore b15 +; CHECK-NEXT: ret + ret void +} From e6fb783d171144d582d14f4fc82b767f6c55a768 Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Wed, 25 Jun 2025 20:30:44 +0000 Subject: [PATCH 2/2] Fixups --- llvm/lib/Target/AArch64/AArch64FrameLowering.cpp | 8 +++++--- .../CodeGen/AArch64/sme-must-save-lr-for-vg.ll | 14 ++++++-------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 791f77956462c..6f1ce5bdbe286 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -3934,6 +3934,11 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, CSStackSize += SpillSize; } + // Save number of saved regs, so we can easily update CSStackSize later to + // account for any additional 64-bit GPR saves. Note: After this point + // only 64-bit GPRs can be added to SavedRegs. + unsigned NumSavedRegs = SavedRegs.count(); + // Increase the callee-saved stack size if the function has streaming mode // changes, as we will need to spill the value of the VG register. // For locally streaming functions, we spill both the streaming and @@ -3952,9 +3957,6 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, if (AFI->hasStackHazardSlotIndex()) CSStackSize += getStackHazardSize(MF); - // Save number of saved regs, so we can easily update CSStackSize later. - unsigned NumSavedRegs = SavedRegs.count(); - // If we must call __arm_get_current_vg in the prologue preserve the LR. if (requiresSaveVG(MF) && !Subtarget.hasSVE()) SavedRegs.set(AArch64::LR); diff --git a/llvm/test/CodeGen/AArch64/sme-must-save-lr-for-vg.ll b/llvm/test/CodeGen/AArch64/sme-must-save-lr-for-vg.ll index 487d87a84549b..69f603458670c 100644 --- a/llvm/test/CodeGen/AArch64/sme-must-save-lr-for-vg.ll +++ b/llvm/test/CodeGen/AArch64/sme-must-save-lr-for-vg.ll @@ -11,15 +11,14 @@ define void @foo() "aarch64_pstate_sm_body" { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: rdsvl x9, #1 ; CHECK-NEXT: lsr x9, x9, #3 -; CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: str x9, [sp, #72] // 8-byte Folded Spill ; CHECK-NEXT: bl __arm_get_current_vg -; CHECK-NEXT: str x0, [sp, #88] // 8-byte Folded Spill -; CHECK-NEXT: .cfi_offset vg, -8 -; CHECK-NEXT: .cfi_offset w30, -24 -; CHECK-NEXT: .cfi_offset w29, -32 +; CHECK-NEXT: str x0, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset vg, -16 +; CHECK-NEXT: .cfi_offset w30, -32 ; CHECK-NEXT: .cfi_offset b8, -40 ; CHECK-NEXT: .cfi_offset b9, -48 ; CHECK-NEXT: .cfi_offset b10, -56 @@ -30,14 +29,13 @@ define void @foo() "aarch64_pstate_sm_body" { ; CHECK-NEXT: .cfi_offset b15, -96 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: smstop sm -; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: .cfi_restore w30 -; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: .cfi_restore b8 ; CHECK-NEXT: .cfi_restore b9 ; CHECK-NEXT: .cfi_restore b10