Skip to content

Commit 37a4091

Browse files
committed
Auto merge of #50319 - nagisa:align_to, r=alexcrichton
Implement [T]::align_to Note that this PR deviates from what is accepted by RFC slightly by making `align_offset` to return an offset in elements, rather than bytes. This is necessary to sanely support `[T]::align_to` and also simply makes more sense™. The caveat is that trying to align a pointer of ZST is now an equivalent to `is_aligned` check, rather than anything else (as no number of ZST elements will align a misaligned ZST pointer). It also implements the `align_to` slightly differently than proposed in the RFC to properly handle cases where size of T and U aren’t co-prime. Furthermore, a promise is made that the slice containing `U`s will be as large as possible (contrary to the RFC) – otherwise the function is quite useless. The implementation uses quite a few underhanded tricks and takes advantage of the fact that alignment is a power-of-two quite heavily to optimise the machine code down to something that results in as few known-expensive instructions as possible. Currently calling `ptr.align_offset` with an unknown-at-compile-time `align` results in code that has just a single "expensive" modulo operation; the rest is "cheap" arithmetic and bitwise ops. cc #44488 @oli-obk As mentioned in the commit message for align_offset, many thanks go to Chris McDonald.
2 parents 952f344 + 59bb0fe commit 37a4091

File tree

10 files changed

+521
-111
lines changed

10 files changed

+521
-111
lines changed

src/libcore/intrinsics.rs

+2-32
Original file line numberDiff line numberDiff line change
@@ -1364,38 +1364,8 @@ extern "rust-intrinsic" {
13641364
/// source as well as std's catch implementation.
13651365
pub fn try(f: fn(*mut u8), data: *mut u8, local_ptr: *mut u8) -> i32;
13661366

1367-
/// Computes the byte offset that needs to be applied to `ptr` in order to
1368-
/// make it aligned to `align`.
1369-
/// If it is not possible to align `ptr`, the implementation returns
1370-
/// `usize::max_value()`.
1371-
///
1372-
/// There are no guarantees whatsover that offsetting the pointer will not
1373-
/// overflow or go beyond the allocation that `ptr` points into.
1374-
/// It is up to the caller to ensure that the returned offset is correct
1375-
/// in all terms other than alignment.
1376-
///
1377-
/// # Examples
1378-
///
1379-
/// Accessing adjacent `u8` as `u16`
1380-
///
1381-
/// ```
1382-
/// # #![feature(core_intrinsics)]
1383-
/// # fn foo(n: usize) {
1384-
/// # use std::intrinsics::align_offset;
1385-
/// # use std::mem::align_of;
1386-
/// # unsafe {
1387-
/// let x = [5u8, 6u8, 7u8, 8u8, 9u8];
1388-
/// let ptr = &x[n] as *const u8;
1389-
/// let offset = align_offset(ptr as *const (), align_of::<u16>());
1390-
/// if offset < x.len() - n - 1 {
1391-
/// let u16_ptr = ptr.offset(offset as isize) as *const u16;
1392-
/// assert_ne!(*u16_ptr, 500);
1393-
/// } else {
1394-
/// // while the pointer can be aligned via `offset`, it would point
1395-
/// // outside the allocation
1396-
/// }
1397-
/// # } }
1398-
/// ```
1367+
#[cfg(stage0)]
1368+
/// docs my friends, its friday!
13991369
pub fn align_offset(ptr: *const (), align: usize) -> usize;
14001370

14011371
/// Emits a `!nontemporal` store according to LLVM (see their docs).

src/libcore/ptr.rs

+224-46
Original file line numberDiff line numberDiff line change
@@ -1203,15 +1203,22 @@ impl<T: ?Sized> *const T {
12031203
copy_nonoverlapping(self, dest, count)
12041204
}
12051205

1206-
/// Computes the byte offset that needs to be applied in order to
1207-
/// make the pointer aligned to `align`.
1206+
/// Computes the offset that needs to be applied to the pointer in order to make it aligned to
1207+
/// `align`.
1208+
///
12081209
/// If it is not possible to align the pointer, the implementation returns
12091210
/// `usize::max_value()`.
12101211
///
1211-
/// There are no guarantees whatsover that offsetting the pointer will not
1212-
/// overflow or go beyond the allocation that the pointer points into.
1213-
/// It is up to the caller to ensure that the returned offset is correct
1214-
/// in all terms other than alignment.
1212+
/// The offset is expressed in number of `T` elements, and not bytes. The value returned can be
1213+
/// used with the `offset` or `offset_to` methods.
1214+
///
1215+
/// There are no guarantees whatsover that offsetting the pointer will not overflow or go
1216+
/// beyond the allocation that the pointer points into. It is up to the caller to ensure that
1217+
/// the returned offset is correct in all terms other than alignment.
1218+
///
1219+
/// # Panics
1220+
///
1221+
/// The function panics if `align` is not a power-of-two.
12151222
///
12161223
/// # Examples
12171224
///
@@ -1235,13 +1242,30 @@ impl<T: ?Sized> *const T {
12351242
/// # } }
12361243
/// ```
12371244
#[unstable(feature = "align_offset", issue = "44488")]
1238-
pub fn align_offset(self, align: usize) -> usize {
1245+
#[cfg(not(stage0))]
1246+
pub fn align_offset(self, align: usize) -> usize where T: Sized {
1247+
if !align.is_power_of_two() {
1248+
panic!("align_offset: align is not a power-of-two");
1249+
}
1250+
unsafe {
1251+
align_offset(self, align)
1252+
}
1253+
}
1254+
1255+
/// definitely docs.
1256+
#[unstable(feature = "align_offset", issue = "44488")]
1257+
#[cfg(stage0)]
1258+
pub fn align_offset(self, align: usize) -> usize where T: Sized {
1259+
if !align.is_power_of_two() {
1260+
panic!("align_offset: align is not a power-of-two");
1261+
}
12391262
unsafe {
1240-
intrinsics::align_offset(self as *const _, align)
1263+
intrinsics::align_offset(self as *const (), align)
12411264
}
12421265
}
12431266
}
12441267

1268+
12451269
#[lang = "mut_ptr"]
12461270
impl<T: ?Sized> *mut T {
12471271
/// Returns `true` if the pointer is null.
@@ -1574,44 +1598,6 @@ impl<T: ?Sized> *mut T {
15741598
(self as *const T).wrapping_offset_from(origin)
15751599
}
15761600

1577-
/// Computes the byte offset that needs to be applied in order to
1578-
/// make the pointer aligned to `align`.
1579-
/// If it is not possible to align the pointer, the implementation returns
1580-
/// `usize::max_value()`.
1581-
///
1582-
/// There are no guarantees whatsover that offsetting the pointer will not
1583-
/// overflow or go beyond the allocation that the pointer points into.
1584-
/// It is up to the caller to ensure that the returned offset is correct
1585-
/// in all terms other than alignment.
1586-
///
1587-
/// # Examples
1588-
///
1589-
/// Accessing adjacent `u8` as `u16`
1590-
///
1591-
/// ```
1592-
/// # #![feature(align_offset)]
1593-
/// # fn foo(n: usize) {
1594-
/// # use std::mem::align_of;
1595-
/// # unsafe {
1596-
/// let x = [5u8, 6u8, 7u8, 8u8, 9u8];
1597-
/// let ptr = &x[n] as *const u8;
1598-
/// let offset = ptr.align_offset(align_of::<u16>());
1599-
/// if offset < x.len() - n - 1 {
1600-
/// let u16_ptr = ptr.offset(offset as isize) as *const u16;
1601-
/// assert_ne!(*u16_ptr, 500);
1602-
/// } else {
1603-
/// // while the pointer can be aligned via `offset`, it would point
1604-
/// // outside the allocation
1605-
/// }
1606-
/// # } }
1607-
/// ```
1608-
#[unstable(feature = "align_offset", issue = "44488")]
1609-
pub fn align_offset(self, align: usize) -> usize {
1610-
unsafe {
1611-
intrinsics::align_offset(self as *const _, align)
1612-
}
1613-
}
1614-
16151601
/// Calculates the offset from a pointer (convenience for `.offset(count as isize)`).
16161602
///
16171603
/// `count` is in units of T; e.g. a `count` of 3 represents a pointer
@@ -2281,8 +2267,200 @@ impl<T: ?Sized> *mut T {
22812267
{
22822268
swap(self, with)
22832269
}
2270+
2271+
/// Computes the offset that needs to be applied to the pointer in order to make it aligned to
2272+
/// `align`.
2273+
///
2274+
/// If it is not possible to align the pointer, the implementation returns
2275+
/// `usize::max_value()`.
2276+
///
2277+
/// The offset is expressed in number of `T` elements, and not bytes. The value returned can be
2278+
/// used with the `offset` or `offset_to` methods.
2279+
///
2280+
/// There are no guarantees whatsover that offsetting the pointer will not overflow or go
2281+
/// beyond the allocation that the pointer points into. It is up to the caller to ensure that
2282+
/// the returned offset is correct in all terms other than alignment.
2283+
///
2284+
/// # Panics
2285+
///
2286+
/// The function panics if `align` is not a power-of-two.
2287+
///
2288+
/// # Examples
2289+
///
2290+
/// Accessing adjacent `u8` as `u16`
2291+
///
2292+
/// ```
2293+
/// # #![feature(align_offset)]
2294+
/// # fn foo(n: usize) {
2295+
/// # use std::mem::align_of;
2296+
/// # unsafe {
2297+
/// let x = [5u8, 6u8, 7u8, 8u8, 9u8];
2298+
/// let ptr = &x[n] as *const u8;
2299+
/// let offset = ptr.align_offset(align_of::<u16>());
2300+
/// if offset < x.len() - n - 1 {
2301+
/// let u16_ptr = ptr.offset(offset as isize) as *const u16;
2302+
/// assert_ne!(*u16_ptr, 500);
2303+
/// } else {
2304+
/// // while the pointer can be aligned via `offset`, it would point
2305+
/// // outside the allocation
2306+
/// }
2307+
/// # } }
2308+
/// ```
2309+
#[unstable(feature = "align_offset", issue = "44488")]
2310+
#[cfg(not(stage0))]
2311+
pub fn align_offset(self, align: usize) -> usize where T: Sized {
2312+
if !align.is_power_of_two() {
2313+
panic!("align_offset: align is not a power-of-two");
2314+
}
2315+
unsafe {
2316+
align_offset(self, align)
2317+
}
2318+
}
2319+
2320+
/// definitely docs.
2321+
#[unstable(feature = "align_offset", issue = "44488")]
2322+
#[cfg(stage0)]
2323+
pub fn align_offset(self, align: usize) -> usize where T: Sized {
2324+
if !align.is_power_of_two() {
2325+
panic!("align_offset: align is not a power-of-two");
2326+
}
2327+
unsafe {
2328+
intrinsics::align_offset(self as *const (), align)
2329+
}
2330+
}
2331+
}
2332+
2333+
/// Align pointer `p`.
2334+
///
2335+
/// Calculate offset (in terms of elements of `stride` stride) that has to be applied
2336+
/// to pointer `p` so that pointer `p` would get aligned to `a`.
2337+
///
2338+
/// Note: This implementation has been carefully tailored to not panic. It is UB for this to panic.
2339+
/// The only real change that can be made here is change of `INV_TABLE_MOD_16` and associated
2340+
/// constants.
2341+
///
2342+
/// If we ever decide to make it possible to call the intrinsic with `a` that is not a
2343+
/// power-of-two, it will probably be more prudent to just change to a naive implementation rather
2344+
/// than trying to adapt this to accomodate that change.
2345+
///
2346+
/// Any questions go to @nagisa.
2347+
#[lang="align_offset"]
2348+
#[cfg(not(stage0))]
2349+
pub(crate) unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usize {
2350+
/// Calculate multiplicative modular inverse of `x` modulo `m`.
2351+
///
2352+
/// This implementation is tailored for align_offset and has following preconditions:
2353+
///
2354+
/// * `m` is a power-of-two;
2355+
/// * `x < m`; (if `x ≥ m`, pass in `x % m` instead)
2356+
///
2357+
/// Implementation of this function shall not panic. Ever.
2358+
#[inline]
2359+
fn mod_inv(x: usize, m: usize) -> usize {
2360+
/// Multiplicative modular inverse table modulo 2⁴ = 16.
2361+
///
2362+
/// Note, that this table does not contain values where inverse does not exist (i.e. for
2363+
/// `0⁻¹ mod 16`, `2⁻¹ mod 16`, etc.)
2364+
const INV_TABLE_MOD_16: [usize; 8] = [1, 11, 13, 7, 9, 3, 5, 15];
2365+
/// Modulo for which the `INV_TABLE_MOD_16` is intended.
2366+
const INV_TABLE_MOD: usize = 16;
2367+
/// INV_TABLE_MOD²
2368+
const INV_TABLE_MOD_SQUARED: usize = INV_TABLE_MOD * INV_TABLE_MOD;
2369+
2370+
let table_inverse = INV_TABLE_MOD_16[(x & (INV_TABLE_MOD - 1)) >> 1];
2371+
if m <= INV_TABLE_MOD {
2372+
return table_inverse & (m - 1);
2373+
} else {
2374+
// We iterate "up" using the following formula:
2375+
//
2376+
// $$ xy ≡ 1 (mod 2ⁿ) → xy (2 - xy) ≡ 1 (mod 2²ⁿ) $$
2377+
//
2378+
// until 2²ⁿ ≥ m. Then we can reduce to our desired `m` by taking the result `mod m`.
2379+
let mut inverse = table_inverse;
2380+
let mut going_mod = INV_TABLE_MOD_SQUARED;
2381+
loop {
2382+
// y = y * (2 - xy) mod n
2383+
//
2384+
// Note, that we use wrapping operations here intentionally – the original formula
2385+
// uses e.g. subtraction `mod n`. It is entirely fine to do them `mod
2386+
// usize::max_value()` instead, because we take the result `mod n` at the end
2387+
// anyway.
2388+
inverse = inverse.wrapping_mul(
2389+
2usize.wrapping_sub(x.wrapping_mul(inverse))
2390+
) & (going_mod - 1);
2391+
if going_mod > m {
2392+
return inverse & (m - 1);
2393+
}
2394+
going_mod = going_mod.wrapping_mul(going_mod);
2395+
}
2396+
}
2397+
}
2398+
2399+
let stride = ::mem::size_of::<T>();
2400+
let a_minus_one = a.wrapping_sub(1);
2401+
let pmoda = p as usize & a_minus_one;
2402+
2403+
if pmoda == 0 {
2404+
// Already aligned. Yay!
2405+
return 0;
2406+
}
2407+
2408+
if stride <= 1 {
2409+
return if stride == 0 {
2410+
// If the pointer is not aligned, and the element is zero-sized, then no amount of
2411+
// elements will ever align the pointer.
2412+
!0
2413+
} else {
2414+
a.wrapping_sub(pmoda)
2415+
};
2416+
}
2417+
2418+
let smoda = stride & a_minus_one;
2419+
// a is power-of-two so cannot be 0. stride = 0 is handled above.
2420+
let gcdpow = intrinsics::cttz_nonzero(stride).min(intrinsics::cttz_nonzero(a));
2421+
let gcd = 1usize << gcdpow;
2422+
2423+
if gcd == 1 {
2424+
// This branch solves for the variable $o$ in following linear congruence equation:
2425+
//
2426+
// ⎰ p + o ≡ 0 (mod a) # $p + o$ must be aligned to specified alignment $a$
2427+
// ⎱ o ≡ 0 (mod s) # offset $o$ must be a multiple of stride $s$
2428+
//
2429+
// where
2430+
//
2431+
// * a, s are co-prime
2432+
//
2433+
// This gives us the formula below:
2434+
//
2435+
// o = (a - (p mod a)) * (s⁻¹ mod a) * s
2436+
//
2437+
// The first term is “the relative alignment of p to a”, the second term is “how does
2438+
// incrementing p by one s change the relative alignment of p”, the third term is
2439+
// translating change in units of s to a byte count.
2440+
//
2441+
// Furthermore, the result produced by this solution is not “minimal”, so it is necessary
2442+
// to take the result $o mod lcm(s, a)$. Since $s$ and $a$ are co-prime (i.e. $gcd(s, a) =
2443+
// 1$) and $lcm(s, a) = s * a / gcd(s, a)$, we can replace $lcm(s, a)$ with just a $s * a$.
2444+
//
2445+
// (Author note: we decided later on to express the offset in "elements" rather than bytes,
2446+
// which drops the multiplication by `s` on both sides of the modulo.)
2447+
return intrinsics::unchecked_rem(a.wrapping_sub(pmoda).wrapping_mul(mod_inv(smoda, a)), a);
2448+
}
2449+
2450+
if p as usize & (gcd - 1) == 0 {
2451+
// This can be aligned, but `a` and `stride` are not co-prime, so a somewhat adapted
2452+
// formula is used.
2453+
let j = a.wrapping_sub(pmoda) >> gcdpow;
2454+
let k = smoda >> gcdpow;
2455+
return intrinsics::unchecked_rem(j.wrapping_mul(mod_inv(k, a)), a >> gcdpow);
2456+
}
2457+
2458+
// Cannot be aligned at all.
2459+
return usize::max_value();
22842460
}
22852461

2462+
2463+
22862464
// Equality for pointers
22872465
#[stable(feature = "rust1", since = "1.0.0")]
22882466
impl<T: ?Sized> PartialEq for *const T {

0 commit comments

Comments
 (0)