Skip to content

Commit 2a76fe4

Browse files
committed
cpu: Use relaxed ordering on AArch64.
This changes the codegen for places where we call `cpu::features()` as expected. Examples: ``` adrp x8, :got:_ZN4ring3cpu3arm12featureflags8FEATURES17habb4a2487137fb15E mov x19, x0 ldr x8, [x8, :got_lo12:_ZN4ring3cpu3arm12featureflags8FEATURES17habb4a2487137fb15E] - ldar x8, [x8] + ldr x8, [x8] cbz x8, .LBB86_2 ``` ``` adrp x8, :got:_ZN4ring3cpu3arm12featureflags8FEATURES17habb4a2487137fb15E mov x19, x4 mov x20, x3 ldr x8, [x8, :got_lo12:_ZN4ring3cpu3arm12featureflags8FEATURES17habb4a2487137fb15E] mov x21, x2 mov x22, x0 sub x23, x3, #4 - ldar x8, [x8] + ldr x8, [x8] cbz x8, .LBB84_8 ```
1 parent ec9408b commit 2a76fe4

File tree

3 files changed

+52
-9
lines changed

3 files changed

+52
-9
lines changed

src/cpu/arm.rs

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ pub(super) mod featureflags {
9494
cpu,
9595
polyfill::{once_cell::race, usize_from_u32},
9696
};
97+
use cfg_if::cfg_if;
9798
use core::num::NonZeroUsize;
9899
#[cfg(all(target_arch = "arm", target_endian = "little"))]
99100
use core::sync::atomic::{AtomicU32, Ordering};
@@ -158,7 +159,17 @@ pub(super) mod featureflags {
158159
features
159160
}
160161

161-
static FEATURES: race::OnceNonZeroUsize = race::OnceNonZeroUsize::new();
162+
cfg_if! {
163+
if #[cfg(all(target_arch = "aarch64", target_endian = "little"))] {
164+
// On AArch64, we store all feature flags in `FEATURES`.
165+
type FeaturesOrdering = race::Relaxed;
166+
} else if #[cfg(all(target_arch = "arm", target_endian = "little"))] {
167+
// On 32-bit ARM, we also have a separate flag for NEON.
168+
type FeaturesOrdering = race::AcquireRelease;
169+
}
170+
}
171+
172+
static FEATURES: race::OnceNonZeroUsize<FeaturesOrdering> = race::OnceNonZeroUsize::new();
162173

163174
// TODO(MSRV): There is no "pmull" feature listed from
164175
// `rustc --print cfg --target=aarch64-apple-darwin`. Originally ARMv8 tied

src/cpu/intel.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ pub(super) mod featureflags {
8282
features
8383
}
8484

85-
static FEATURES: race::OnceNonZeroUsize = race::OnceNonZeroUsize::new();
85+
static FEATURES: race::OnceNonZeroUsize<race::AcquireRelease> = race::OnceNonZeroUsize::new();
8686

8787
#[cfg(target_arch = "x86")]
8888
#[rustfmt::skip]

src/polyfill/once_cell/race.rs

Lines changed: 39 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,29 +27,61 @@
2727
// on another thread) using `Ordering::Release`, so we must use
2828
// `Ordering::Acquire` to ensure that store "happens-before" this load.
2929

30-
use core::sync::atomic;
31-
32-
use atomic::{AtomicUsize, Ordering};
30+
use cfg_if::cfg_if;
31+
use core::marker::PhantomData;
3332
use core::num::NonZeroUsize;
33+
use core::sync::atomic::{self, AtomicUsize};
34+
35+
pub trait Ordering {
36+
const ACQUIRE: atomic::Ordering;
37+
const RELEASE: atomic::Ordering;
38+
}
39+
40+
cfg_if! {
41+
if #[cfg(any(all(target_arch = "arm", target_endian = "little"),
42+
target_arch = "x86",
43+
target_arch = "x86_64"))]
44+
{
45+
pub struct AcquireRelease(());
46+
47+
impl Ordering for AcquireRelease {
48+
const ACQUIRE: atomic::Ordering = atomic::Ordering::Acquire;
49+
const RELEASE: atomic::Ordering = atomic::Ordering::Release;
50+
}
51+
}
52+
}
53+
54+
cfg_if! {
55+
if #[cfg(all(target_arch = "aarch64", target_endian = "little"))] {
56+
pub struct Relaxed(());
57+
58+
impl Ordering for Relaxed {
59+
const ACQUIRE: atomic::Ordering = atomic::Ordering::Relaxed;
60+
const RELEASE: atomic::Ordering = atomic::Ordering::Relaxed;
61+
}
62+
}
63+
}
3464

3565
/// A thread-safe cell which can be written to only once.
36-
pub struct OnceNonZeroUsize {
66+
pub struct OnceNonZeroUsize<O> {
3767
inner: AtomicUsize,
68+
ordering: PhantomData<O>,
3869
}
3970

40-
impl OnceNonZeroUsize {
71+
impl<O: Ordering> OnceNonZeroUsize<O> {
4172
/// Creates a new empty cell.
4273
#[inline]
4374
pub const fn new() -> Self {
4475
Self {
4576
inner: AtomicUsize::new(0),
77+
ordering: PhantomData,
4678
}
4779
}
4880

4981
/// Gets the underlying value.
5082
#[inline]
5183
pub fn get(&self) -> Option<NonZeroUsize> {
52-
let val = self.inner.load(Ordering::Acquire);
84+
let val = self.inner.load(O::ACQUIRE);
5385
NonZeroUsize::new(val)
5486
}
5587

@@ -124,6 +156,6 @@ impl OnceNonZeroUsize {
124156
#[inline(always)]
125157
fn compare_exchange(&self, val: NonZeroUsize) -> Result<usize, usize> {
126158
self.inner
127-
.compare_exchange(0, val.get(), Ordering::Release, Ordering::Acquire)
159+
.compare_exchange(0, val.get(), O::RELEASE, O::ACQUIRE)
128160
}
129161
}

0 commit comments

Comments
 (0)