Skip to content

Commit 452efb3

Browse files
authored
[AArch64] Fix LDR/STR folding causing memtag failures (#118821)
When generating code with sanitize_memtag, we make use of the fact that the sp+imm forms of many load and store instructions are not tag-checked, so we can use SP directly instead of needing a register holding the tagged pointer. However, this isn't true for the writeback versions of the instructions, so we can't fold ADDs and SUBs into them in AArch64LoadStoreOptimizer. This would be possible in cases where the loads/stores only access untagged stack slots, but that information isn't easily available after frame index elimination.
1 parent 7c52360 commit 452efb3

File tree

2 files changed

+154
-2
lines changed

2 files changed

+154
-2
lines changed

llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp

+12-2
Original file line numberDiff line numberDiff line change
@@ -733,7 +733,7 @@ static bool isPromotableLoadFromStore(MachineInstr &MI) {
733733
}
734734
}
735735

736-
static bool isMergeableLdStUpdate(MachineInstr &MI) {
736+
static bool isMergeableLdStUpdate(MachineInstr &MI, AArch64FunctionInfo &AFI) {
737737
unsigned Opc = MI.getOpcode();
738738
switch (Opc) {
739739
default:
@@ -785,6 +785,15 @@ static bool isMergeableLdStUpdate(MachineInstr &MI) {
785785
if (!AArch64InstrInfo::getLdStOffsetOp(MI).isImm())
786786
return false;
787787

788+
// When using stack tagging, simple sp+imm loads and stores are not
789+
// tag-checked, but pre- and post-indexed versions of them are, so we can't
790+
// replace the former with the latter. This transformation would be valid
791+
// if the load/store accesses an untagged stack slot, but we don't have
792+
// that information available after frame indices have been eliminated.
793+
if (AFI.isMTETagged() &&
794+
AArch64InstrInfo::getLdStBaseOp(MI).getReg() == AArch64::SP)
795+
return false;
796+
788797
return true;
789798
}
790799
}
@@ -2772,6 +2781,7 @@ bool AArch64LoadStoreOpt::tryToMergeIndexLdSt(MachineBasicBlock::iterator &MBBI,
27722781

27732782
bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
27742783
bool EnableNarrowZeroStOpt) {
2784+
AArch64FunctionInfo &AFI = *MBB.getParent()->getInfo<AArch64FunctionInfo>();
27752785

27762786
bool Modified = false;
27772787
// Four tranformations to do here:
@@ -2842,7 +2852,7 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
28422852
// ldr x0, [x2], #4
28432853
for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
28442854
MBBI != E;) {
2845-
if (isMergeableLdStUpdate(*MBBI) && tryToMergeLdStUpdate(MBBI))
2855+
if (isMergeableLdStUpdate(*MBBI, AFI) && tryToMergeLdStUpdate(MBBI))
28462856
Modified = true;
28472857
else
28482858
++MBBI;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple aarch64-none-elf -mattr=+mte --run-pass=aarch64-ldst-opt %s -o - | FileCheck %s
3+
4+
## When generating code with sanitize_memtag, we make use of the fact that the
5+
## sp+imm forms of many load and store instructions are not tag-checked, so we
6+
## can use SP directly instead of needing a register holding the tagged
7+
## pointer. However, this isn't true for the writeback versions of the
8+
## instructions, so we can't fold ADDs and SUBs into them in
9+
## AArch64LoadStoreOptimizer. This would be possible in cases where the
10+
## loads/stores only access untagged stack slots, but that information isn't
11+
## easily available after frame index elimination.
12+
13+
--- |
14+
define void @pre_index() {
15+
entry:
16+
ret void
17+
}
18+
define void @pre_index_memtag() sanitize_memtag {
19+
entry:
20+
ret void
21+
}
22+
define void @pre_index_memtag_not_sp() sanitize_memtag {
23+
entry:
24+
ret void
25+
}
26+
define void @post_index() {
27+
entry:
28+
ret void
29+
}
30+
define void @post_index_memtag() sanitize_memtag {
31+
entry:
32+
ret void
33+
}
34+
define void @post_index_memtag_not_sp() sanitize_memtag {
35+
entry:
36+
ret void
37+
}
38+
...
39+
---
40+
name: pre_index
41+
body: |
42+
bb.0.entry:
43+
liveins: $x0
44+
45+
; CHECK-LABEL: name: pre_index
46+
; CHECK: liveins: $x0
47+
; CHECK-NEXT: {{ $}}
48+
; CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
49+
; CHECK-NEXT: early-clobber $sp = STRXpre killed renamable $x0, $sp, 16
50+
; CHECK-NEXT: RET undef $lr
51+
$sp = frame-setup SUBXri $sp, 16, 0
52+
STRXui killed renamable $x0, $sp, 2
53+
$sp = ADDXri $sp, 16, 0
54+
RET undef $lr
55+
...
56+
---
57+
name: pre_index_memtag
58+
body: |
59+
bb.0.entry:
60+
liveins: $x0
61+
62+
; CHECK-LABEL: name: pre_index_memtag
63+
; CHECK: liveins: $x0
64+
; CHECK-NEXT: {{ $}}
65+
; CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
66+
; CHECK-NEXT: STRXui killed renamable $x0, $sp, 2
67+
; CHECK-NEXT: $sp = ADDXri $sp, 16, 0
68+
; CHECK-NEXT: RET undef $lr
69+
$sp = frame-setup SUBXri $sp, 16, 0
70+
STRXui killed renamable $x0, $sp, 2
71+
$sp = ADDXri $sp, 16, 0
72+
RET undef $lr
73+
...
74+
---
75+
name: pre_index_memtag_not_sp
76+
body: |
77+
bb.0.entry:
78+
liveins: $x0, $x1
79+
80+
; CHECK-LABEL: name: pre_index_memtag_not_sp
81+
; CHECK: liveins: $x0, $x1
82+
; CHECK-NEXT: {{ $}}
83+
; CHECK-NEXT: $x1 = frame-setup SUBXri $x1, 16, 0
84+
; CHECK-NEXT: early-clobber $x1 = STRXpre killed renamable $x0, $x1, 16
85+
; CHECK-NEXT: RET undef $lr, implicit $x1
86+
$x1 = frame-setup SUBXri $x1, 16, 0
87+
STRXui killed renamable $x0, $x1, 2
88+
$x1 = ADDXri $x1, 16, 0
89+
RET undef $lr, implicit $x1
90+
...
91+
---
92+
name: post_index
93+
body: |
94+
bb.0.entry:
95+
liveins: $x0
96+
97+
; CHECK-LABEL: name: post_index
98+
; CHECK: liveins: $x0
99+
; CHECK-NEXT: {{ $}}
100+
; CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
101+
; CHECK-NEXT: early-clobber $sp = STRXpost killed renamable $x0, $sp, 16
102+
; CHECK-NEXT: RET undef $lr
103+
$sp = frame-setup SUBXri $sp, 16, 0
104+
STRXui killed renamable $x0, $sp, 0
105+
$sp = ADDXri $sp, 16, 0
106+
RET undef $lr
107+
...
108+
---
109+
name: post_index_memtag
110+
body: |
111+
bb.0.entry:
112+
liveins: $x0
113+
114+
; CHECK-LABEL: name: post_index_memtag
115+
; CHECK: liveins: $x0
116+
; CHECK-NEXT: {{ $}}
117+
; CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
118+
; CHECK-NEXT: STRXui killed renamable $x0, $sp, 0
119+
; CHECK-NEXT: $sp = ADDXri $sp, 16, 0
120+
; CHECK-NEXT: RET undef $lr
121+
$sp = frame-setup SUBXri $sp, 16, 0
122+
STRXui killed renamable $x0, $sp, 0
123+
$sp = ADDXri $sp, 16, 0
124+
RET undef $lr
125+
...
126+
---
127+
name: post_index_memtag_not_sp
128+
body: |
129+
bb.0.entry:
130+
liveins: $x0, $x1
131+
132+
; CHECK-LABEL: name: post_index_memtag_not_sp
133+
; CHECK: liveins: $x0, $x1
134+
; CHECK-NEXT: {{ $}}
135+
; CHECK-NEXT: $x1 = frame-setup SUBXri $x1, 16, 0
136+
; CHECK-NEXT: early-clobber $x1 = STRXpost killed renamable $x0, $x1, 16
137+
; CHECK-NEXT: RET undef $lr, implicit $x1
138+
$x1 = frame-setup SUBXri $x1, 16, 0
139+
STRXui killed renamable $x0, $x1, 0
140+
$x1 = ADDXri $x1, 16, 0
141+
RET undef $lr, implicit $x1
142+
...

0 commit comments

Comments
 (0)