Skip to content

Commit af297c3

Browse files
committed
cmd/internal/obj/arm64: factor out splitting of 24 bit unsigned scaled immediates
Rather than duplicating this code, factor it out into a function and add test coverage. Change-Id: I37ce568ded4659d98a4ff1361520c5fb2207e947 Reviewed-on: https://go-review.googlesource.com/c/go/+/512537 Run-TryBot: Joel Sing <[email protected]> TryBot-Result: Gopher Robot <[email protected]> Reviewed-by: Matthew Dempsky <[email protected]> Reviewed-by: Cherry Mui <[email protected]>
1 parent 2a1ba6e commit af297c3

File tree

3 files changed

+231
-52
lines changed

3 files changed

+231
-52
lines changed

src/cmd/asm/internal/asm/testdata/arm64.s

Lines changed: 48 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -558,36 +558,60 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
558558
FMOVQ 64(RSP), F11 // eb13c03d
559559

560560
// large aligned offset, use two instructions(add+ldr/store).
561-
MOVB R1, 0x1001(R2) // MOVB R1, 4097(R2) // 5b04409161070039
562-
MOVH R1, 0x2002(R2) // MOVH R1, 8194(R2) // 5b08409161070079
563-
MOVW R1, 0x4004(R2) // MOVW R1, 16388(R2) // 5b104091610700b9
564-
MOVD R1, 0x8008(R2) // MOVD R1, 32776(R2) // 5b204091610700f9
565-
FMOVS F1, 0x4004(R2) // FMOVS F1, 16388(R2) // 5b104091610700bd
566-
FMOVD F1, 0x8008(R2) // FMOVD F1, 32776(R2) // 5b204091610700fd
561+
MOVB R1, 0x1001(R2) // MOVB R1, 4097(R2) // 5b04409161070039
562+
MOVB R1, 0xffffff(R2) // MOVB R1, 16777215(R2) // 5bfc7f9161ff3f39
563+
MOVH R1, 0x2002(R2) // MOVH R1, 8194(R2) // 5b08409161070079
564+
MOVH R1, 0xfffffe(R2) // MOVH R1, 16777214(R2) // 5bf87f9161ff3f79
565+
MOVW R1, 0x4004(R2) // MOVW R1, 16388(R2) // 5b104091610700b9
566+
MOVW R1, 0xfffffc(R2) // MOVW R1, 16777212(R2) // 5bf07f9161ff3fb9
567+
MOVD R1, 0x8008(R2) // MOVD R1, 32776(R2) // 5b204091610700f9
568+
MOVD R1, 0xfffff8(R2) // MOVD R1, 16777208(R2) // 5be07f9161ff3ff9
569+
FMOVS F1, 0x4004(R2) // FMOVS F1, 16388(R2) // 5b104091610700bd
570+
FMOVS F1, 0xfffffc(R2) // FMOVS F1, 16777212(R2) // 5bf07f9161ff3fbd
571+
FMOVD F1, 0x8008(R2) // FMOVD F1, 32776(R2) // 5b204091610700fd
572+
FMOVD F1, 0xfffff8(R2) // FMOVD F1, 16777208(R2) // 5be07f9161ff3ffd
567573

568-
MOVB 0x1001(R1), R2 // MOVB 4097(R1), R2 // 3b04409162078039
569-
MOVH 0x2002(R1), R2 // MOVH 8194(R1), R2 // 3b08409162078079
570-
MOVW 0x4004(R1), R2 // MOVW 16388(R1), R2 // 3b104091620780b9
571-
MOVD 0x8008(R1), R2 // MOVD 32776(R1), R2 // 3b204091620740f9
572-
FMOVS 0x4004(R1), F2 // FMOVS 16388(R1), F2 // 3b104091620740bd
573-
FMOVD 0x8008(R1), F2 // FMOVD 32776(R1), F2 // 3b204091620740fd
574+
MOVB 0x1001(R1), R2 // MOVB 4097(R1), R2 // 3b04409162078039
575+
MOVB 0xffffff(R1), R2 // MOVB 16777215(R1), R2 // 3bfc7f9162ffbf39
576+
MOVH 0x2002(R1), R2 // MOVH 8194(R1), R2 // 3b08409162078079
577+
MOVH 0xfffffe(R1), R2 // MOVH 16777214(R1), R2 // 3bf87f9162ffbf79
578+
MOVW 0x4004(R1), R2 // MOVW 16388(R1), R2 // 3b104091620780b9
579+
MOVW 0xfffffc(R1), R2 // MOVW 16777212(R1), R2 // 3bf07f9162ffbfb9
580+
MOVD 0x8008(R1), R2 // MOVD 32776(R1), R2 // 3b204091620740f9
581+
MOVD 0xfffff8(R1), R2 // MOVD 16777208(R1), R2 // 3be07f9162ff7ff9
582+
FMOVS 0x4004(R1), F2 // FMOVS 16388(R1), F2 // 3b104091620740bd
583+
FMOVS 0xfffffc(R1), F2 // FMOVS 16777212(R1), F2 // 3bf07f9162ff7fbd
584+
FMOVD 0x8008(R1), F2 // FMOVD 32776(R1), F2 // 3b204091620740fd
585+
FMOVD 0xfffff8(R1), F2 // FMOVD 16777208(R1), F2 // 3be07f9162ff7ffd
574586

575587
// very large or unaligned offset uses constant pool.
576588
// the encoding cannot be checked as the address of the constant pool is unknown.
577589
// here we only test that they can be assembled.
578-
MOVB R1, 0x44332211(R2) // MOVB R1, 1144201745(R2)
579-
MOVH R1, 0x44332211(R2) // MOVH R1, 1144201745(R2)
580-
MOVW R1, 0x44332211(R2) // MOVW R1, 1144201745(R2)
581-
MOVD R1, 0x44332211(R2) // MOVD R1, 1144201745(R2)
582-
FMOVS F1, 0x44332211(R2) // FMOVS F1, 1144201745(R2)
583-
FMOVD F1, 0x44332211(R2) // FMOVD F1, 1144201745(R2)
590+
MOVB R1, 0x1000000(R2) // MOVB R1, 16777216(R2)
591+
MOVB R1, 0x44332211(R2) // MOVB R1, 1144201745(R2)
592+
MOVH R1, 0x1000000(R2) // MOVH R1, 16777216(R2)
593+
MOVH R1, 0x44332211(R2) // MOVH R1, 1144201745(R2)
594+
MOVW R1, 0x1000000(R2) // MOVW R1, 16777216(R2)
595+
MOVW R1, 0x44332211(R2) // MOVW R1, 1144201745(R2)
596+
MOVD R1, 0x1000000(R2) // MOVD R1, 16777216(R2)
597+
MOVD R1, 0x44332211(R2) // MOVD R1, 1144201745(R2)
598+
FMOVS F1, 0x1000000(R2) // FMOVS F1, 16777216(R2)
599+
FMOVS F1, 0x44332211(R2) // FMOVS F1, 1144201745(R2)
600+
FMOVD F1, 0x1000000(R2) // FMOVD F1, 16777216(R2)
601+
FMOVD F1, 0x44332211(R2) // FMOVD F1, 1144201745(R2)
584602

585-
MOVB 0x44332211(R1), R2 // MOVB 1144201745(R1), R2
586-
MOVH 0x44332211(R1), R2 // MOVH 1144201745(R1), R2
587-
MOVW 0x44332211(R1), R2 // MOVW 1144201745(R1), R2
588-
MOVD 0x44332211(R1), R2 // MOVD 1144201745(R1), R2
589-
FMOVS 0x44332211(R1), F2 // FMOVS 1144201745(R1), F2
590-
FMOVD 0x44332211(R1), F2 // FMOVD 1144201745(R1), F2
603+
MOVB 0x1000000(R1), R2 // MOVB 16777216(R1), R2
604+
MOVB 0x44332211(R1), R2 // MOVB 1144201745(R1), R2
605+
MOVH 0x1000000(R1), R2 // MOVH 16777216(R1), R2
606+
MOVH 0x44332211(R1), R2 // MOVH 1144201745(R1), R2
607+
MOVW 0x1000000(R1), R2 // MOVW 16777216(R1), R2
608+
MOVW 0x44332211(R1), R2 // MOVW 1144201745(R1), R2
609+
MOVD 0x1000000(R1), R2 // MOVD 16777216(R1), R2
610+
MOVD 0x44332211(R1), R2 // MOVD 1144201745(R1), R2
611+
FMOVS 0x1000000(R1), F2 // FMOVS 16777216(R1), F2
612+
FMOVS 0x44332211(R1), F2 // FMOVS 1144201745(R1), F2
613+
FMOVD 0x1000000(R1), F2 // FMOVD 16777216(R1), F2
614+
FMOVD 0x44332211(R1), F2 // FMOVD 1144201745(R1), F2
591615

592616
// shifted or extended register offset.
593617
MOVD (R2)(R6.SXTW), R4 // 44c866f8

src/cmd/internal/obj/arm64/asm7.go

Lines changed: 25 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1384,6 +1384,25 @@ func roundUp(x, to uint32) uint32 {
13841384
return (x + to - 1) &^ (to - 1)
13851385
}
13861386

1387+
// splitImm24uScaled splits an immediate into a scaled 12 bit unsigned lo value
1388+
// and an unscaled shifted 12 bit unsigned hi value. These are typically used
1389+
// by adding or subtracting the hi value and using the lo value as the offset
1390+
// for a load or store.
1391+
func splitImm24uScaled(v int32, shift int) (int32, int32, error) {
1392+
if v < 0 {
1393+
return 0, 0, fmt.Errorf("%d is not a 24 bit unsigned immediate", v)
1394+
}
1395+
if v&((1<<shift)-1) != 0 {
1396+
return 0, 0, fmt.Errorf("%d is not a multiple of %d", v, 1<<shift)
1397+
}
1398+
lo := (v >> shift) & 0xfff
1399+
hi := v - (lo << shift)
1400+
if hi&^0xfff000 != 0 {
1401+
return 0, 0, fmt.Errorf("%d is too large for a scaled 24 bit unsigned immediate %x %x", v, lo, hi)
1402+
}
1403+
return hi, lo, nil
1404+
}
1405+
13871406
func (c *ctxt7) regoff(a *obj.Addr) int32 {
13881407
c.instoffset = 0
13891408
c.aclass(a)
@@ -3908,23 +3927,12 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
39083927
}
39093928

39103929
v := c.regoff(&p.To)
3911-
var hi int32
3912-
if v < 0 || (v&((1<<uint(s))-1)) != 0 {
3913-
// negative or unaligned offset, use constant pool
3914-
goto storeusepool
3915-
}
3916-
3917-
hi = v - (v & (0xFFF << uint(s)))
3918-
if hi&0xFFF != 0 {
3919-
c.ctxt.Diag("internal: miscalculated offset %d [%d]\n%v", v, s, p)
3920-
}
3921-
if hi&^0xFFF000 != 0 {
3922-
// hi doesn't fit into an ADD instruction
3930+
hi, lo, err := splitImm24uScaled(v, s)
3931+
if err != nil {
39233932
goto storeusepool
39243933
}
3925-
39263934
o1 = c.oaddi(p, AADD, hi, REGTMP, r)
3927-
o2 = c.olsr12u(p, c.opstr(p, p.As), ((v-hi)>>uint(s))&0xFFF, REGTMP, p.From.Reg)
3935+
o2 = c.olsr12u(p, c.opstr(p, p.As), lo, REGTMP, p.From.Reg)
39283936
break
39293937

39303938
storeusepool:
@@ -3952,23 +3960,12 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
39523960
}
39533961

39543962
v := c.regoff(&p.From)
3955-
var hi int32
3956-
if v < 0 || (v&((1<<uint(s))-1)) != 0 {
3957-
// negative or unaligned offset, use constant pool
3958-
goto loadusepool
3959-
}
3960-
3961-
hi = v - (v & (0xFFF << uint(s)))
3962-
if (hi & 0xFFF) != 0 {
3963-
c.ctxt.Diag("internal: miscalculated offset %d [%d]\n%v", v, s, p)
3964-
}
3965-
if hi&^0xFFF000 != 0 {
3966-
// hi doesn't fit into an ADD instruction
3963+
hi, lo, err := splitImm24uScaled(v, s)
3964+
if err != nil {
39673965
goto loadusepool
39683966
}
3969-
39703967
o1 = c.oaddi(p, AADD, hi, REGTMP, r)
3971-
o2 = c.olsr12u(p, c.opldr(p, p.As), ((v-hi)>>uint(s))&0xFFF, REGTMP, p.To.Reg)
3968+
o2 = c.olsr12u(p, c.opldr(p, p.As), lo, REGTMP, p.To.Reg)
39723969
break
39733970

39743971
loadusepool:

src/cmd/internal/obj/arm64/asm_arm64_test.go

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,164 @@ import (
1414
"testing"
1515
)
1616

17+
func TestSplitImm24uScaled(t *testing.T) {
18+
tests := []struct {
19+
v int32
20+
shift int
21+
wantErr bool
22+
wantHi int32
23+
wantLo int32
24+
}{
25+
{
26+
v: 0,
27+
shift: 0,
28+
wantHi: 0,
29+
wantLo: 0,
30+
},
31+
{
32+
v: 0x1001,
33+
shift: 0,
34+
wantHi: 0x1000,
35+
wantLo: 0x1,
36+
},
37+
{
38+
v: 0xffffff,
39+
shift: 0,
40+
wantHi: 0xfff000,
41+
wantLo: 0xfff,
42+
},
43+
{
44+
v: 0xffffff,
45+
shift: 1,
46+
wantErr: true,
47+
},
48+
{
49+
v: 0xfe,
50+
shift: 1,
51+
wantHi: 0x0,
52+
wantLo: 0x7f,
53+
},
54+
{
55+
v: 0x10fe,
56+
shift: 1,
57+
wantHi: 0x0,
58+
wantLo: 0x87f,
59+
},
60+
{
61+
v: 0x2002,
62+
shift: 1,
63+
wantHi: 0x2000,
64+
wantLo: 0x1,
65+
},
66+
{
67+
v: 0xfffffe,
68+
shift: 1,
69+
wantHi: 0xffe000,
70+
wantLo: 0xfff,
71+
},
72+
{
73+
// TODO(jsing): Fix splitting to make this fit.
74+
v: 0x1000ffe,
75+
shift: 1,
76+
wantErr: true,
77+
wantHi: 0xfff000,
78+
wantLo: 0xfff,
79+
},
80+
{
81+
v: 0x1001000,
82+
shift: 1,
83+
wantErr: true,
84+
},
85+
{
86+
v: 0xfffffe,
87+
shift: 2,
88+
wantErr: true,
89+
},
90+
{
91+
v: 0x4004,
92+
shift: 2,
93+
wantHi: 0x4000,
94+
wantLo: 0x1,
95+
},
96+
{
97+
v: 0xfffffc,
98+
shift: 2,
99+
wantHi: 0xffc000,
100+
wantLo: 0xfff,
101+
},
102+
{
103+
// TODO(jsing): Fix splitting to make this fit.
104+
v: 0x1002ffc,
105+
shift: 2,
106+
wantErr: true,
107+
wantHi: 0xfff000,
108+
wantLo: 0xfff,
109+
},
110+
{
111+
v: 0x1003000,
112+
shift: 2,
113+
wantErr: true,
114+
},
115+
{
116+
v: 0xfffffe,
117+
shift: 3,
118+
wantErr: true,
119+
},
120+
{
121+
v: 0x8008,
122+
shift: 3,
123+
wantHi: 0x8000,
124+
wantLo: 0x1,
125+
},
126+
{
127+
v: 0xfffff8,
128+
shift: 3,
129+
wantHi: 0xff8000,
130+
wantLo: 0xfff,
131+
},
132+
{
133+
// TODO(jsing): Fix splitting to make this fit.
134+
v: 0x1006ff8,
135+
shift: 3,
136+
wantErr: true,
137+
wantHi: 0xfff000,
138+
wantLo: 0xfff,
139+
},
140+
{
141+
v: 0x1007000,
142+
shift: 3,
143+
wantErr: true,
144+
},
145+
}
146+
for _, test := range tests {
147+
hi, lo, err := splitImm24uScaled(test.v, test.shift)
148+
switch {
149+
case err == nil && test.wantErr:
150+
t.Errorf("splitImm24uScaled(%v, %v) succeeded, want error", test.v, test.shift)
151+
case err != nil && !test.wantErr:
152+
t.Errorf("splitImm24uScaled(%v, %v) failed: %v", test.v, test.shift, err)
153+
case !test.wantErr:
154+
if got, want := hi, test.wantHi; got != want {
155+
t.Errorf("splitImm24uScaled(%x, %x) - got hi %x, want %x", test.v, test.shift, got, want)
156+
}
157+
if got, want := lo, test.wantLo; got != want {
158+
t.Errorf("splitImm24uScaled(%x, %x) - got lo %x, want %x", test.v, test.shift, got, want)
159+
}
160+
}
161+
}
162+
for shift := 0; shift <= 3; shift++ {
163+
for v := int32(0); v < 0xfff000|0xfff<<shift; v = v + 1<<shift {
164+
hi, lo, err := splitImm24uScaled(v, shift)
165+
if err != nil {
166+
t.Fatalf("splitImm24uScaled(%x, %x) failed: %v", v, shift, err)
167+
}
168+
if hi+lo<<shift != v {
169+
t.Fatalf("splitImm24uScaled(%x, %x) = (%x, %x) is incorrect", v, shift, hi, lo)
170+
}
171+
}
172+
}
173+
}
174+
17175
// TestLarge generates a very large file to verify that large
18176
// program builds successfully, in particular, too-far
19177
// conditional branches are fixed, and also verify that the

0 commit comments

Comments
 (0)