Skip to content

Commit 53d44e8

Browse files
committed
AArch64 LSE atomic_rmw support
Rename the existing AtomicRMW to AtomicRMWLoop and directly lower atomic_rmw operations, without a loop if LSE support is available. Copyright (c) 2021, Arm Limited
1 parent bd19f43 commit 53d44e8

File tree

5 files changed

+605
-22
lines changed

5 files changed

+605
-22
lines changed

cranelift/codegen/src/isa/aarch64/inst/emit.rs

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -504,6 +504,33 @@ fn enc_dmb_ish() -> u32 {
504504
0xD5033BBF
505505
}
506506

507+
fn enc_ldal(ty: Type, op: AtomicRMWOp, rs: Reg, rt: Writable<Reg>, rn: Reg) -> u32 {
508+
assert!(machreg_to_gpr(rt.to_reg()) != 31);
509+
let sz = match ty {
510+
I64 => 0b11,
511+
I32 => 0b10,
512+
I16 => 0b01,
513+
I8 => 0b00,
514+
_ => unreachable!(),
515+
};
516+
let op = match op {
517+
AtomicRMWOp::Add => 0b000,
518+
AtomicRMWOp::Clr => 0b001,
519+
AtomicRMWOp::Eor => 0b010,
520+
AtomicRMWOp::Set => 0b011,
521+
AtomicRMWOp::Smax => 0b100,
522+
AtomicRMWOp::Smin => 0b101,
523+
AtomicRMWOp::Umax => 0b110,
524+
AtomicRMWOp::Umin => 0b111,
525+
};
526+
0b00_111_000_111_00000_0_000_00_00000_00000
527+
| (sz << 30)
528+
| (machreg_to_gpr(rs) << 16)
529+
| (op << 12)
530+
| (machreg_to_gpr(rn) << 5)
531+
| machreg_to_gpr(rt.to_reg())
532+
}
533+
507534
fn enc_ldar(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32 {
508535
let sz = match ty {
509536
I64 => 0b11,
@@ -1318,7 +1345,10 @@ impl MachInstEmit for Inst {
13181345
} => {
13191346
sink.put4(enc_ccmp_imm(size, rn, imm, nzcv, cond));
13201347
}
1321-
&Inst::AtomicRMW { ty, op } => {
1348+
&Inst::AtomicRMW { ty, op, rs, rt, rn } => {
1349+
sink.put4(enc_ldal(ty, op, rs, rt, rn));
1350+
}
1351+
&Inst::AtomicRMWLoop { ty, op } => {
13221352
/* Emit this:
13231353
again:
13241354
ldaxr{,b,h} x/w27, [x25]
@@ -1340,7 +1370,7 @@ impl MachInstEmit for Inst {
13401370
so that we simply write in the destination, the "2nd arg for op".
13411371
*/
13421372
// TODO: We should not hardcode registers here, a better idea would be to
1343-
// pass some scratch registers in the AtomicRMW pseudo-instruction, and use those
1373+
// pass some scratch registers in the AtomicRMWLoop pseudo-instruction, and use those
13441374
let xzr = zero_reg();
13451375
let x24 = xreg(24);
13461376
let x25 = xreg(25);

0 commit comments

Comments
 (0)