diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 543f9da33e..bef66fab9d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,7 +17,7 @@ env: # `ZC_NIGHTLY_XXX` are flags that we add to `XXX` only on the nightly # toolchain. ZC_NIGHTLY_RUSTFLAGS: -Zrandomize-layout - ZC_NIGHTLY_MIRIFLAGS: "-Zmiri-symbolic-alignment-check -Zmiri-strict-provenance -Zmiri-backtrace=full" + ZC_NIGHTLY_MIRIFLAGS: "-Zmiri-strict-provenance -Zmiri-backtrace=full" jobs: build_test: diff --git a/README.md b/README.md index 1c8285d75f..a9e4cfd049 100644 --- a/README.md +++ b/README.md @@ -19,12 +19,14 @@ so you don't have to. ## Overview -Zerocopy provides four core marker traits, each of which can be derived +Zerocopy provides five core marker traits, each of which can be derived (e.g., `#[derive(FromZeroes)]`): - `FromZeroes` indicates that a sequence of zero bytes represents a valid instance of a type - `FromBytes` indicates that a type may safely be converted from an arbitrary byte sequence +- `TryFromBytes` supports non-`FromBytes` types by providing the ability + to check the validity of a conversion at runtime - `AsBytes` indicates that a type may safely be converted *to* a byte sequence - `Unaligned` indicates that a type's alignment requirement is 1 diff --git a/src/lib.rs b/src/lib.rs index fbd094843b..6df32379c0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -18,12 +18,14 @@ //! //! # Overview //! -//! Zerocopy provides four core marker traits, each of which can be derived +//! Zerocopy provides five core marker traits, each of which can be derived //! (e.g., `#[derive(FromZeroes)]`): //! - [`FromZeroes`] indicates that a sequence of zero bytes represents a valid //! instance of a type //! - [`FromBytes`] indicates that a type may safely be converted from an //! arbitrary byte sequence +//! - [`TryFromBytes`] supports non-`FromBytes` types by providing the ability +//! to check the validity of a conversion at runtime //! - [`AsBytes`] indicates that a type may safely be converted *to* a byte //! sequence //! - [`Unaligned`] indicates that a type's alignment requirement is 1 @@ -247,7 +249,8 @@ use core::{ NonZeroU16, NonZeroU32, NonZeroU64, NonZeroU8, NonZeroUsize, Wrapping, }, ops::{Deref, DerefMut}, - ptr, slice, + ptr::{self, NonNull}, + slice, }; #[cfg(feature = "alloc")] @@ -255,9 +258,20 @@ extern crate alloc; #[cfg(feature = "alloc")] use { alloc::{boxed::Box, vec::Vec}, - core::{alloc::Layout, ptr::NonNull}, + core::alloc::Layout, }; +use crate::util::Ptr; + +// For each polyfill, as soon as the corresponding feature is stable, the +// polyfill import will be unused because method/function resolution will prefer +// the inherent method/function over a trait method/function. Thus, we suppress +// the `unused_imports` warning. +// +// See the documentation on `util::polyfills` for more information. +#[allow(unused_imports)] +use crate::util::polyfills::{NonNullExt as _, NonNullSliceExt as _}; + // This is a hack to allow zerocopy-derive derives to work in this crate. They // assume that zerocopy is linked as an extern crate, so they access items from // it as `zerocopy::Xxx`. This makes that still work. @@ -352,9 +366,12 @@ impl SizeInfo { } } -#[cfg_attr(test, derive(Copy, Clone, Debug))] -enum _CastType { - _Prefix, +#[doc(hidden)] +#[derive(Copy, Clone)] +#[cfg_attr(test, derive(Debug))] +#[allow(missing_debug_implementations)] +pub enum CastType { + Prefix, _Suffix, } @@ -458,17 +475,20 @@ impl DstLayout { /// /// # Panics /// + /// `validate_cast_and_convert_metadata` will panic if `self` describes a + /// DST whose trailing slice element is zero-sized. + /// /// If `addr + bytes_len` overflows `usize`, /// `validate_cast_and_convert_metadata` may panic, or it may return /// incorrect results. No guarantees are made about when /// `validate_cast_and_convert_metadata` will panic. The caller should not /// rely on `validate_cast_and_convert_metadata` panicking in any particular /// condition, even if `debug_assertions` are enabled. - const fn _validate_cast_and_convert_metadata( + const fn validate_cast_and_convert_metadata( &self, addr: usize, bytes_len: usize, - cast_type: _CastType, + cast_type: CastType, ) -> Option<(usize, usize)> { // `debug_assert!`, but with `#[allow(clippy::arithmetic_side_effects)]`. macro_rules! __debug_assert { @@ -514,8 +534,8 @@ impl DstLayout { // address for a suffix cast (`addr + bytes_len`) is not aligned, // then no valid start address will be aligned either. let offset = match cast_type { - _CastType::_Prefix => 0, - _CastType::_Suffix => bytes_len, + CastType::Prefix => 0, + CastType::_Suffix => bytes_len, }; // Addition is guaranteed not to overflow because `offset <= @@ -541,7 +561,7 @@ impl DstLayout { // multiple of the alignment, or will be larger than // `bytes_len`. let max_total_bytes = - util::_round_down_to_next_multiple_of_alignment(bytes_len, self._align); + util::round_down_to_next_multiple_of_alignment(bytes_len, self._align); // Calculate the maximum number of bytes that could be consumed // by the trailing slice. // @@ -595,14 +615,14 @@ impl DstLayout { __debug_assert!(self_bytes <= bytes_len); let split_at = match cast_type { - _CastType::_Prefix => self_bytes, + CastType::Prefix => self_bytes, // Guaranteed not to underflow: // - In the `Sized` branch, only returns `size` if `size <= // bytes_len`. // - In the `SliceDst` branch, calculates `self_bytes <= // max_toatl_bytes`, which is upper-bounded by `bytes_len`. #[allow(clippy::arithmetic_side_effects)] - _CastType::_Suffix => bytes_len - self_bytes, + CastType::_Suffix => bytes_len - self_bytes, }; Some((elems, split_at)) @@ -625,12 +645,27 @@ impl DstLayout { pub unsafe trait KnownLayout: sealed::KnownLayoutSealed { #[doc(hidden)] const LAYOUT: DstLayout; + + /// SAFETY: The returned pointer has the same address and provenance as + /// `bytes`. If `Self` is a DST, the returned pointer's referent has `elems` + /// elements in its trailing slice. If `Self` is sized, `elems` is ignored. + #[doc(hidden)] + fn raw_from_ptr_len(bytes: NonNull, elems: usize) -> NonNull; } impl sealed::KnownLayoutSealed for [T] {} // SAFETY: Delegates safety to `DstLayout::for_slice`. unsafe impl KnownLayout for [T] { const LAYOUT: DstLayout = DstLayout::for_slice::(); + + // SAFETY: `.cast` preserves address and provenance. The returned pointer + // refers to an object with `elems` elements by construction. + #[inline(always)] + fn raw_from_ptr_len(data: NonNull, elems: usize) -> NonNull { + // TODO(#67): Remove this allow. See NonNullExt for more details. + #[allow(unstable_name_collisions)] + NonNull::slice_from_raw_parts(data.cast::(), elems) + } } #[rustfmt::skip] @@ -1041,6 +1076,296 @@ pub unsafe trait FromBytes: FromZeroes { } } +/// Types whose validity can be checked at runtime, allowing them to be +/// conditionally converted from byte slices. +/// +/// WARNING: Do not implement this trait yourself! Instead, use +/// `#[derive(TryFromBytes)]`. +/// +/// `TryFromBytes` types can safely be deserialized from an untrusted sequence +/// of bytes by performing a runtime check that the byte sequence contains a +/// valid instance of `Self`. +/// +/// `TryFromBytes` is ignorant of byte order. For byte order-aware types, see +/// the [`byteorder`] module. +/// +/// # What is a "valid instance"? +/// +/// In Rust, each type has *bit validity*, which refers to the set of bit +/// patterns which may appear in an instance of that type. It is impossible for +/// safe Rust code to produce values which violate bit validity (ie, values +/// outside of the "valid" set of bit patterns). If `unsafe` code produces an +/// invalid value, this is considered [undefined behavior]. +/// +/// Rust's bit validity rules are currently being decided, which means that some +/// types have three classes of bit patterns: those which are definitely valid, +/// and whose validity is documented in the language; those which may or may not +/// be considered valid at some point in the future; and those which are +/// definitely invalid. +/// +/// Zerocopy takes a conservative approach, and only considers a bit pattern to +/// be valid if its validity is a documenteed guarantee provided by the +/// language. +/// +/// For most use cases, Rust's current guarantees align with programmers' +/// intuitions about what ought to be valid. As a result, zerocopy's +/// conservatism should not affect most users. One notable exception is unions, +/// whose bit validity is very up in the air; zerocopy does not permit +/// implementing `TryFromBytes` for any union type. +/// +/// If you are negatively affected by lack of support for a particular type, +/// we encourage you to let us know by [filing an issue][github-repo]. +/// +/// # Safety +/// +/// On its own, `T: TryFromBytes` does not make any guarantees about the layout +/// or representation of `T`. It merely provides the ability to perform a +/// validity check at runtime via methods like [`try_from_ref`]. +/// +/// Currently, it is not possible to stably implement `TryFromBytes` other than +/// by using `#[derive(TryFromBytes)]`. While there are `#[doc(hidden)]` items +/// on this trait that provide well-defined safety invariants, no stability +/// guarantees are made with respect to these items. In particular, future +/// releases of zerocopy may make backwards-breaking changes to these items, +/// including changes that only affect soundness, which may cause code which +/// uses those items to silently become unsound. +/// +/// [undefined behavior]: https://raphlinus.github.io/programming/rust/2018/08/17/undefined-behavior.html +/// [github-repo]: https://github.com/google/zerocopy +/// [`try_from_ref`]: TryFromBytes::try_from_ref +pub unsafe trait TryFromBytes: KnownLayout { + /// Does a given memory range contain a valid instance of `Self`? + /// + /// # Safety + /// + /// ## Preconditions + /// + /// The memory referenced by `candidate` may only be accessed via reads for + /// the duration of this method call. This prohibits writes through mutable + /// references and through [`UnsafeCell`]s. There may exist immutable + /// references to the same memory which contain `UnsafeCell`s so long as: + /// - Those `UnsafeCell`s exist at the same byte ranges as `UnsafeCell`s in + /// `Self`. This is a bidirectional property: `Self` may not contain + /// `UnsafeCell`s where other references to the same memory do not, and + /// vice-versa. + /// - Those `UnsafeCell`s are never used to perform mutation for the + /// duration of this method call. + /// + /// `candidate` is not required to refer to a valid `Self`. However, it must + /// satisfy the requirement that uninitialized bytes may only be present + /// where it is possible for them to be present in `Self`. This is a dynamic + /// property: if, at a particular byte offset, a valid enum discriminant is + /// set, the subsequent bytes may only have uninitialized bytes as + /// specificed by the corresponding enum. + /// + /// Formally, given `len = size_of_val_raw(candidate)`, at every byte + /// offset, `b`, in the range `[0, len)`: + /// - If, in all instances `s: Self` of length `len`, the byte at offset `b` + /// in `s` is initialized, then the byte at offset `b` within `*candidate` + /// must be initialized. + /// - Let `c` be the contents of the byte range `[0, b)` in `*candidate`. + /// Let `S` be the subset of valid instances of `Self` of length `len` + /// which contain `c` in the offset range `[0, b)`. If, for all instances + /// of `s: Self` in `S`, the byte at offset `b` in `s` is initialized, + /// then the byte at offset `b` in `*candidate` must be initialized. + /// + /// Pragmatically, this means that if `*candidate` is guaranteed to + /// contain an enum type at a particular offset, and the enum discriminant + /// stored in `*candidate` corresponds to a valid variant of that enum + /// type, then it is guaranteed that the appropriate bytes of `*candidate` + /// are initialized as defined by that variant's bit validity (although + /// note that the variant may contain another enum type, in which case the + /// same rules apply depending on the state of its discriminant, and so on + /// recursively). + /// + /// ## Postconditions + /// + /// Unsafe code may assume that, if `is_bit_valid(candidate)` returns true, + /// `*candidate` contains a valid `Self`. + /// + /// # Panics + /// + /// `is_bit_valid` may panic. Callers are responsible for ensuring that any + /// `unsafe` code remains sound even in the face of `is_bit_valid` + /// panicking. (We support user-defined validation routines; so long as + /// these routines are not required to be `unsafe`, there is no way to + /// ensure that these do not generate panics.) + /// + /// [`UnsafeCell`]: core::cell::UnsafeCell + #[doc(hidden)] + unsafe fn is_bit_valid(candidate: Ptr<'_, Self>) -> bool; + + /// Attempts to interpret a byte slice as a `Self`. + /// + /// `try_from_ref` validates that `bytes` contains a valid `Self`, and that + /// it satisfies `Self`'s alignment requirement. If it does, then `bytes` is + /// reinterpreted as a `Self`. + /// + /// Note that Rust's bit validity rules are still being decided. As such, + /// there exist types whose bit validity is ambiguous. See the + /// `TryFromBytes` docs for a discussion of how these cases are handled. + // TODO(#251): In a future in which we distinguish between `FromBytes` and + // `RefFromBytes`, this requires `where Self: RefFromBytes` to disallow + // interior mutability. + #[inline] + #[doc(hidden)] // TODO(#5): Finalize name before remove this attribute. + fn try_from_ref(bytes: &[u8]) -> Option<&Self> { + let maybe_self = Ptr::from(bytes).try_cast_into_no_leftover::()?; + + // SAFETY: + // - Since `bytes` is an immutable reference, we know that no mutable + // references exist to this memory region. + // - Since `[u8]` contains no `UnsafeCell`s, we know there are no + // `&UnsafeCell` references to this memory region. + // - Since we don't permit implementing `TryFromBytes` for types which + // contain `UnsafeCell`s, there are no `UnsafeCell`s in `Self`, and so + // the requirement that all references contain `UnsafeCell`s at the + // same offsets is trivially satisfied. + // - All bytes of `bytes` are initialized. + // + // This call may panic. If that happens, it doesn't cause any soundness + // issues, as we have not generated any invalid state which we need to + // fix before returning. + if unsafe { !Self::is_bit_valid(maybe_self) } { + return None; + } + + // SAFETY: TODO + // + // Old safety comment: + // + // SAFETY: + // - `is_bit_valid` guaranteed that `*maybe_self` contains a valid + // `Self` + // - Since `Self` is not allowed to contain any `UnsafeCell`s: + // - The caller cannot use the `&Self` to perform interior mutation on + // a byte range that `bytes` views as not containing `UnsafeCell`s + // - The caller cannot use the `&Self` to write invalid values to + // `bytes` (namely, uninitialized bytes, as `[u8]` has no other bit + // validity constraints) + // - Since `[u8]` does not contain any `UnsafeCell`s, we are guaranteed + // that, having verified that `maybe_self` currently contains a valid + // `Self`, code with access to `bytes` cannot cause it to no longer + // contain a valid `Self` in the future + Some(unsafe { maybe_self.as_ref() }) + } + + /// Attempts to interpret a mutable byte slice as a `Self`. + /// + /// `try_from_mut` validates that `bytes` contains a valid `Self`, and that + /// it satisfies `Self`'s alignment requirement. If it does, then `bytes` is + /// reinterpreted as a `Self`. + /// + /// Note that Rust's bit validity rules are still being decided. As such, + /// there exist types whose bit validity is ambiguous. See the + /// `TryFromBytes` docs for a discussion of how these cases are handled. + // TODO(#251): In a future in which we distinguish between `FromBytes` and + // `RefFromBytes`, this requires `where Self: RefFromBytes` to disallow + // interior mutability. + #[inline] + #[doc(hidden)] // TODO(#5): Finalize name before remove this attribute. + fn try_from_mut(bytes: &mut [u8]) -> Option<&mut Self> + where + Self: AsBytes, + { + let mut maybe_self = Ptr::from(bytes).try_cast_into_no_leftover::()?; + + // SAFETY: TODO + // + // Old safety comment: + // + // SAFETY: + // - `bytes` is a `&[u8]`, which guarantees that its length doesn't + // overflow `isize` and that it comes from a single allocation + // - `validate_exact_cast` checked alignment + // - all bytes of `bytes` are initialized + // + // This call may panic. If that happens, it doesn't cause any soundness + // issues, as we have not generated any invalid state which we need to + // fix before returning. + if unsafe { !Self::is_bit_valid(maybe_self) } { + return None; + } + + // SAFETY: TODO + // + // Old safety comment: + // + // SAFETY: + // - `is_bit_valid` guaranteed that `*maybe_self` contains a valid + // `Self` + // - Since `Self: AsBytes`, any values written to the returned `&mut + // Self` will be valid for `[u8]` once it is accessible again + // - Since the returned `&mut Self` has the same lifetime as the input + // `&mut [u8]`, that input cannot be directly mutated so long as the + // returned reference exists. Thus, having verified that `maybe_self` + // currently contains a valid `Self`, code with access to `bytes` + // cannot cause it to no longer contain a valid `Self` in the future. + Some(unsafe { maybe_self.as_mut() }) + } + + /// Attempts to read a `Self` from a byte slice. + /// + /// `try_from_mut` validates that `bytes` contains a valid `Self`. If it + /// does, then the contents of `bytes` are copied and reinterpreted as a + /// `Self`. + /// + /// Note that Rust's bit validity rules are still being decided. As such, + /// there exist types whose bit validity is ambiguous. See the + /// `TryFromBytes` docs for a discussion of how these cases are handled. + #[inline] + #[doc(hidden)] // TODO(#5): Finalize name before remove this attribute. + fn try_read_from(bytes: &[u8]) -> Option + where + Self: Sized, + { + // A note on performance: We unconditionally read `size_of::()` + // bytes into the local stack frame before validation. This has + // advantages and disadvantages: + // - It allows `MaybeUninit` to be aligned to `T`, and thus allows + // `is_bit_valid` to operate on an aligned value. + // - It requires us to perform the copy even if validation fails. + // + // The authors believe that this is a worthwhile tradeoff. Allowing + // `is_bit_valid` to operate on an aligned value can make the generated + // machine code significantly smaller and faster. On the other hand, we + // expect the vast majority of calls to `try_read_from` to succeed, and + // in these cases, the copy will not be wasted. + let maybe_uninit = MaybeUninit::::read_from(bytes)?; + let candidate = Ptr::from(&maybe_uninit); + // SAFETY: TODO + let candidate = unsafe { candidate.cast::() }; + + // SAFETY: TODO + // + // Old safety comment: + // + // SAFETY: + // - `MaybeUninit` has the same alignment as `Self`, so this is + // aligned + // - `maybe_uninit` was initialized from `bytes`, so all of its bytes + // are initialized + // + // This call may panic. If that happens, it doesn't cause any soundness + // issues, as we have not generated any invalid state which we need to + // fix before returning. The `MaybeUninit` will be dropped, but it does + // not have any validity requirements, so it may soundly be dropped in + // any state. + if unsafe { !Self::is_bit_valid(candidate) } { + return None; + } + + // SAFETY: TODO + // + // Old safety comment: + // + // SAFETY: `is_bit_valid` promises that it only returns true if its + // argument contains a valid `Self`. This is exactly the safety + // precondition of `assume_init`. + Some(unsafe { maybe_uninit.assume_init() }) + } +} + /// Types which are safe to treat as an immutable byte slice. /// /// WARNING: Do not implement this trait yourself! Instead, use @@ -1157,7 +1482,8 @@ pub unsafe trait AsBytes { // reference, the only other references to this memory region that // could exist are other immutable references, and those don't allow // mutation. `AsBytes` prohibits types which contain `UnsafeCell`s, - // which are the only types for which this rule wouldn't be sufficient. + // which are the only types for which this rule wouldn't be + // sufficient. // - The total size of the resulting slice is no larger than // `isize::MAX` because no allocation produced by safe code can be // larger than `isize::MAX`. @@ -1271,19 +1597,20 @@ safety_comment! { /// SAFETY: /// Per the reference [1], "the unit tuple (`()`) ... is guaranteed as a /// zero-sized type to have a size of 0 and an alignment of 1." - /// - `FromZeroes`, `FromBytes`: There is only one possible sequence of 0 - /// bytes, and `()` is inhabited. + /// - `TryFromBytes` (with no validator), `FromZeroes`, `FromBytes`: There + /// is only one possible sequence of 0 bytes, and `()` is inhabited. /// - `AsBytes`: Since `()` has size 0, it contains no padding bytes. /// - `Unaligned`: `()` has alignment 1. /// /// [1] https://doc.rust-lang.org/reference/type-layout.html#tuple-layout - unsafe_impl!((): FromZeroes, FromBytes, AsBytes, Unaligned); + unsafe_impl!((): TryFromBytes, FromZeroes, FromBytes, AsBytes, Unaligned); assert_unaligned!(()); } safety_comment! { /// SAFETY: - /// - `FromZeroes`, `FromBytes`: all bit patterns are valid for integers [1] + /// - `TryFromBytes` (with no validator), `FromZeroes`, `FromBytes`: all bit + /// patterns are valid for integers [1] /// - `AsBytes`: integers have no padding bytes [1] /// - `Unaligned` (`u8` and `i8` only): The reference [2] specifies the size /// of `u8` and `i8` as 1 byte. We also know that: @@ -1295,30 +1622,31 @@ safety_comment! { /// [1] TODO(https://github.com/rust-lang/reference/issues/1291): Once the /// reference explicitly guarantees these properties, cite it. /// [2] https://doc.rust-lang.org/reference/type-layout.html#primitive-data-layout - unsafe_impl!(u8: FromZeroes, FromBytes, AsBytes, Unaligned); - unsafe_impl!(i8: FromZeroes, FromBytes, AsBytes, Unaligned); + unsafe_impl!(u8: TryFromBytes, FromZeroes, FromBytes, AsBytes, Unaligned); + unsafe_impl!(i8: TryFromBytes, FromZeroes, FromBytes, AsBytes, Unaligned); assert_unaligned!(u8, i8); - unsafe_impl!(u16: FromZeroes, FromBytes, AsBytes); - unsafe_impl!(i16: FromZeroes, FromBytes, AsBytes); - unsafe_impl!(u32: FromZeroes, FromBytes, AsBytes); - unsafe_impl!(i32: FromZeroes, FromBytes, AsBytes); - unsafe_impl!(u64: FromZeroes, FromBytes, AsBytes); - unsafe_impl!(i64: FromZeroes, FromBytes, AsBytes); - unsafe_impl!(u128: FromZeroes, FromBytes, AsBytes); - unsafe_impl!(i128: FromZeroes, FromBytes, AsBytes); - unsafe_impl!(usize: FromZeroes, FromBytes, AsBytes); - unsafe_impl!(isize: FromZeroes, FromBytes, AsBytes); + unsafe_impl!(u16: TryFromBytes, FromZeroes, FromBytes, AsBytes); + unsafe_impl!(i16: TryFromBytes, FromZeroes, FromBytes, AsBytes); + unsafe_impl!(u32: TryFromBytes, FromZeroes, FromBytes, AsBytes); + unsafe_impl!(i32: TryFromBytes, FromZeroes, FromBytes, AsBytes); + unsafe_impl!(u64: TryFromBytes, FromZeroes, FromBytes, AsBytes); + unsafe_impl!(i64: TryFromBytes, FromZeroes, FromBytes, AsBytes); + unsafe_impl!(u128: TryFromBytes, FromZeroes, FromBytes, AsBytes); + unsafe_impl!(i128: TryFromBytes, FromZeroes, FromBytes, AsBytes); + unsafe_impl!(usize: TryFromBytes, FromZeroes, FromBytes, AsBytes); + unsafe_impl!(isize: TryFromBytes, FromZeroes, FromBytes, AsBytes); } safety_comment! { /// SAFETY: - /// - `FromZeroes`, `FromBytes`: the `{f32,f64}::from_bits` constructors' - /// documentation [1,2] states that they are currently equivalent to - /// `transmute`. [3] - /// - `AsBytes`: the `{f32,f64}::to_bits` methods' documentation [4,5] + /// - `TryFromBytes` (with no validator), `FromZeroes`, `FromBytes`: the + /// `{f32,f64}::from_bits` constructors' documentation [1] [2] states that + /// they are currently equivalent to `transmute`. [3] + /// - `AsBytes`: the `{f32,f64}::to_bits` methods' documentation [4] [5] /// states that they are currently equivalent to `transmute`. [3] /// - /// TODO: Make these arguments more precisely in terms of the documentation. + /// TODO(#61): Make these arguments more precisely in terms of the + /// documentation. /// /// [1] https://doc.rust-lang.org/nightly/std/primitive.f32.html#method.from_bits /// [2] https://doc.rust-lang.org/nightly/std/primitive.f64.html#method.from_bits @@ -1326,8 +1654,8 @@ safety_comment! { /// reference explicitly guarantees these properties, cite it. /// [4] https://doc.rust-lang.org/nightly/std/primitive.f32.html#method.to_bits /// [5] https://doc.rust-lang.org/nightly/std/primitive.f64.html#method.to_bits - unsafe_impl!(f32: FromZeroes, FromBytes, AsBytes); - unsafe_impl!(f64: FromZeroes, FromBytes, AsBytes); + unsafe_impl!(f32: TryFromBytes, FromZeroes, FromBytes, AsBytes); + unsafe_impl!(f64: TryFromBytes, FromZeroes, FromBytes, AsBytes); } safety_comment! { @@ -1343,6 +1671,21 @@ safety_comment! { /// [1] https://doc.rust-lang.org/reference/types/boolean.html unsafe_impl!(bool: FromZeroes, AsBytes, Unaligned); assert_unaligned!(bool); + /// SAFETY: + /// + /// TODO; old safety comment: + /// + /// - Since `bool`'s single byte is always initialized, the `is_bit_valid` + /// caller is required to ensure that the referent of the `NonNull` + /// argument is initialized. Since `u8` has no alignment requirement, this + /// means that, after converting from `NonNull` to `&u8`, the + /// resulting reference is properly aligned and points to a + /// properly-initialized `u8`. + /// - All values less than 2 are valid instances of `bool` [1], and so this + /// is a sound implementation of `TryFromBytes::is_bit_valid`. + /// + /// [1] https://doc.rust-lang.org/reference/types/boolean.html + unsafe_impl!(bool: TryFromBytes; |byte: &u8| *byte < 2); } safety_comment! { /// SAFETY: @@ -1354,6 +1697,40 @@ safety_comment! { /// /// [1] https://doc.rust-lang.org/reference/types/textual.html unsafe_impl!(char: FromZeroes, AsBytes); + /// SAFETY: + /// + /// TODO; old safety comment: + /// + /// - `MaybeUninit` has no bit validity requirements, and has the same + /// alignment as `char`. Since the `is_bit_valid` caller must promise that + /// the provided `NonNull` is properly-aligned and points a region + /// which is valid for reads, that's all we need to ensure that converting + /// it to a `&MaybeUninit` is sound. + /// - Since we transmute `c` from the bytes passed to `is_bit_valid` without + /// modifying them, and since `char::from_u32` guarantees that it returns + /// `None` if its input is not a valid `char` [1], this function only + /// returns `true` if its argument is a valid `char`, and so this is a + /// sound implementation of `TryFromBytes::is_bit_valid`. + /// + /// Note that it might be slightly simpler to treat `candidate` as a `[u8; + /// 4]` - it would make the code and the safety argument a bit simpler. + /// However, using `&MaybeUninit` ensures that the compiler preserves + /// alignment information, which it may be able to use to produce smaller or + /// better-performing assembly. + /// + /// [1] https://doc.rust-lang.org/std/primitive.char.html#method.from_u32 + /// + /// TODO(https://github.com/rust-lang/reference/pull/1401): Use `&u32` + /// instead once it's guaranteed that `align_of::() == + /// align_of::()`. + unsafe_impl!(char: TryFromBytes; |candidate: &MaybeUninit| { + // SAFETY: `MaybeUninit` has no bit validity constraints. + let c: MaybeUninit = unsafe { mem::transmute(*candidate) }; + // SAFETY: Since all bytes of a `char` must be initialized, the bytes + // passed to this function must all have been initialized. + let c = unsafe { c.assume_init() }; + char::from_u32(c).is_some() + }); } safety_comment! { /// SAFETY: @@ -1366,6 +1743,31 @@ safety_comment! { /// /// [1] https://doc.rust-lang.org/reference/type-layout.html#str-layout unsafe_impl!(str: FromZeroes, AsBytes, Unaligned); + /// SAFETY: + /// + /// TODO; old safety comment: + /// + /// - Since `str`'s bytes are all always initialized, `is_bit_valid`'s + /// caller must ensure that the bytes they pass are all initialized. Since + /// `&[u8]` has no alignment requirement and no bit validity requirement + /// beyond that its bytes be initialized, it is sound to convert the + /// caller's `NonNull` (which they promise is valid for reads) to a + /// `&[u8]`. + /// - `str`'s bit validity requirement is that it is valid UTF-8. [1] Thus, + /// if `from_utf8` can successfully convert `bytes` to a `str`, then the + /// `str` is valid [2], and so this is a sound implementation of + /// `TryFromBytes::is_bit_valid`. + /// + /// [1] https://doc.rust-lang.org/reference/types/textual.html + /// [2] https://doc.rust-lang.org/core/str/fn.from_utf8.html + unsafe_impl!(str: TryFromBytes; |bytes: &[u8]| { + // Note that, while this function has no documented panic conditions, it + // would still be sound even if it panicked. `is_bit_valid` does not + // promise that it will not panic (in fact, it explicitly warns that + // it's a possibility), and we have not violated any safety invariants + // that we must fix before returning. + core::str::from_utf8(bytes).is_ok() + }); } safety_comment! { @@ -1403,12 +1805,46 @@ safety_comment! { unsafe_impl!(NonZeroI128: AsBytes); unsafe_impl!(NonZeroUsize: AsBytes); unsafe_impl!(NonZeroIsize: AsBytes); + + /// SAFETY: + /// + /// TODO; old safety comment: + /// + /// - The caller is required to provide a `NonNull` which is + /// properly aligned and is valid for reads. Since very byte of a + /// `NonZeroXxx` must be initialized, the pointer's referent must have all + /// its bytes initialized. + /// + /// Since `Xxx` has the same size and alignment as `NonZeroXxx`, the + /// provided `NonNull` is also aligned to `Xxx` and is valid + /// for reads of size `Xxx`. Since `Xxx` has no bit validity requirements + /// other than that its bytes are initialized, this means that the + /// provided `NonNull` may soundly be converted to a `&Xxx`. + /// - `NonZeroXxx`'s only validity constraint is that it is non-zero, which + /// all of these closures ensure. Thus, these closures are sound + /// implementations of `TryFromBytes::is_bit_valid`. + unsafe_impl!(NonZeroU8: TryFromBytes; |n: &u8| *n != 0); + unsafe_impl!(NonZeroI8: TryFromBytes; |n: &i8| *n != 0); + unsafe_impl!(NonZeroU16: TryFromBytes; |n: &u16| *n != 0); + unsafe_impl!(NonZeroI16: TryFromBytes; |n: &i16| *n != 0); + unsafe_impl!(NonZeroU32: TryFromBytes; |n: &u32| *n != 0); + unsafe_impl!(NonZeroI32: TryFromBytes; |n: &i32| *n != 0); + unsafe_impl!(NonZeroU64: TryFromBytes; |n: &u64| *n != 0); + unsafe_impl!(NonZeroI64: TryFromBytes; |n: &i64| *n != 0); + unsafe_impl!(NonZeroU128: TryFromBytes; |n: &u128| *n != 0); + unsafe_impl!(NonZeroI128: TryFromBytes; |n: &i128| *n != 0); + unsafe_impl!(NonZeroUsize: TryFromBytes; |n: &usize| *n != 0); + unsafe_impl!(NonZeroIsize: TryFromBytes; |n: &isize| *n != 0); } safety_comment! { /// SAFETY: - /// - `FromZeroes`, `FromBytes`, `AsBytes`: The Rust compiler reuses `0` - /// value to represent `None`, so `size_of::>() == - /// size_of::()`; see `NonZeroXxx` documentation. + /// + /// TODO; old safety comment: + /// + /// - `TryFromBytes` (with no validator), `FromZeroes`, `FromBytes`, + /// `AsBytes`: The Rust compiler reuses `0` value to represent `None`, so + /// `size_of::>() == size_of::()`; see + /// `NonZeroXxx` documentation. /// - `Unaligned`: `NonZeroU8` and `NonZeroI8` document that /// `Option` and `Option` both have size 1. [1] [2] /// This is worded in a way that makes it unclear whether it's meant as a @@ -1421,32 +1857,37 @@ safety_comment! { /// /// TODO(https://github.com/rust-lang/rust/pull/104082): Cite documentation /// for layout guarantees. - unsafe_impl!(Option: FromZeroes, FromBytes, AsBytes, Unaligned); - unsafe_impl!(Option: FromZeroes, FromBytes, AsBytes, Unaligned); + unsafe_impl!(Option: TryFromBytes, FromZeroes, FromBytes, AsBytes, Unaligned); + unsafe_impl!(Option: TryFromBytes, FromZeroes, FromBytes, AsBytes, Unaligned); assert_unaligned!(Option, Option); - unsafe_impl!(Option: FromZeroes, FromBytes, AsBytes); - unsafe_impl!(Option: FromZeroes, FromBytes, AsBytes); - unsafe_impl!(Option: FromZeroes, FromBytes, AsBytes); - unsafe_impl!(Option: FromZeroes, FromBytes, AsBytes); - unsafe_impl!(Option: FromZeroes, FromBytes, AsBytes); - unsafe_impl!(Option: FromZeroes, FromBytes, AsBytes); - unsafe_impl!(Option: FromZeroes, FromBytes, AsBytes); - unsafe_impl!(Option: FromZeroes, FromBytes, AsBytes); - unsafe_impl!(Option: FromZeroes, FromBytes, AsBytes); - unsafe_impl!(Option: FromZeroes, FromBytes, AsBytes); + unsafe_impl!(Option: TryFromBytes, FromZeroes, FromBytes, AsBytes); + unsafe_impl!(Option: TryFromBytes, FromZeroes, FromBytes, AsBytes); + unsafe_impl!(Option: TryFromBytes, FromZeroes, FromBytes, AsBytes); + unsafe_impl!(Option: TryFromBytes, FromZeroes, FromBytes, AsBytes); + unsafe_impl!(Option: TryFromBytes, FromZeroes, FromBytes, AsBytes); + unsafe_impl!(Option: TryFromBytes, FromZeroes, FromBytes, AsBytes); + unsafe_impl!(Option: TryFromBytes, FromZeroes, FromBytes, AsBytes); + unsafe_impl!(Option: TryFromBytes, FromZeroes, FromBytes, AsBytes); + unsafe_impl!(Option: TryFromBytes, FromZeroes, FromBytes, AsBytes); + unsafe_impl!(Option: TryFromBytes, FromZeroes, FromBytes, AsBytes); } safety_comment! { /// SAFETY: + /// + /// TODO; old safety comment: + /// /// For all `T`, `PhantomData` has size 0 and alignment 1. [1] - /// - `FromZeroes`, `FromBytes`: There is only one possible sequence of 0 - /// bytes, and `PhantomData` is inhabited. + /// - `TryFromBytes` (with no validator), `FromZeroes`, `FromBytes`: There + /// is only one possible sequence of 0 bytes, and `PhantomData` is + /// inhabited. /// - `AsBytes`: Since `PhantomData` has size 0, it contains no padding /// bytes. /// - `Unaligned`: Per the preceding reference, `PhantomData` has alignment /// 1. /// /// [1] https://doc.rust-lang.org/std/marker/struct.PhantomData.html#layout-1 + unsafe_impl!(T: ?Sized => TryFromBytes for PhantomData); unsafe_impl!(T: ?Sized => FromZeroes for PhantomData); unsafe_impl!(T: ?Sized => FromBytes for PhantomData); unsafe_impl!(T: ?Sized => AsBytes for PhantomData); @@ -1455,13 +1896,40 @@ safety_comment! { } safety_comment! { /// SAFETY: + /// + /// TODO; old safety comment: + /// /// `Wrapping` is guaranteed by its docs [1] to have the same layout as /// `T`. Also, `Wrapping` is `#[repr(transparent)]`, and has a single - /// field, which is `pub`. Per the reference [2], this means that the + /// field, which is `pub`. Per the nomicon [2], this means that the /// `#[repr(transparent)]` attribute is "considered part of the public ABI". + /// - `TryFromBytes`: `Wrapping` has the same layout and bit validity as + /// `T`, so if the provided `NonNull>` satisfies the + /// preconditions for `TryFromBytes::>::is_bit_valid`, then it + /// is sound to convert it to a `NonNull`, and that pointer satisfies + /// the preconditions for `TryFromBytes::::is_bit_valid`. For DSTs, + /// `Wrapping` and `T` have the same pointer metadata, so pointer + /// casting preserves length. + /// + /// Since their bit validity requirements are the same, + /// `TryFromBytes::::is_bit_valid` is a sound implementation of + /// `TryFromBytes::>::is_bit_valid`. + /// - `FromZeroes`, `FromBytes`: Since it has the same layout as `T`, any + /// valid `T` is a valid `Wrapping`. If `T: FromZeroes`, a sequence of + /// zero bytes is a valid `T`, and thus a valid `Wrapping`. If `T: + /// FromBytes`, any sequence of bytes is a valid `T`, and thus a valid + /// `Wrapping`. /// /// [1] https://doc.rust-lang.org/nightly/core/num/struct.Wrapping.html#layout-1 /// [2] https://doc.rust-lang.org/nomicon/other-reprs.html#reprtransparent + // TODO(#5): Implement `TryFromBytes` for `Wrapping`. + unsafe_impl!(T: TryFromBytes => TryFromBytes for Wrapping; |c: Ptr| { + // Note that this call may panic, but it would still be sound even if it + // did. `is_bit_valid` does not promise that it will not panic (in fact, + // it explicitly warns that it's a possibility), and we have not + // violated any safety invariants that we must fix before returning. + unsafe { T::is_bit_valid(c) } + }); unsafe_impl!(T: FromZeroes => FromZeroes for Wrapping); unsafe_impl!(T: FromBytes => FromBytes for Wrapping); unsafe_impl!(T: AsBytes => AsBytes for Wrapping); @@ -1473,12 +1941,16 @@ safety_comment! { // since it may contain uninitialized bytes. // /// SAFETY: - /// - `FromZeroes`, `FromBytes`: `MaybeUninit` has no restrictions on its - /// contents. Unfortunately, in addition to bit validity, `FromZeroes` and - /// `FromBytes` also require that implementers contain no `UnsafeCell`s. - /// Thus, we require `T: FromZeroes` and `T: FromBytes` in order to ensure - /// that `T` - and thus `MaybeUninit` - contains to `UnsafeCell`s. - /// Thus, requiring that `T` implement each of these traits is sufficient + /// + /// TODO; old safety comment: + /// + /// - `TryFromBytes` (with no validator), `FromZeroes`, `FromBytes`: + /// `MaybeUninit` has no restrictions on its contents. Unfortunately, + /// in addition to bit validity, these traits also require that + /// implementers contain no `UnsafeCell`s. Thus, we require a trait bound + /// for `T` in order to ensure that `T` - and thus `MaybeUninit` - + /// contains to `UnsafeCell`s. Thus, requiring that `T` implement each of + /// these traits is sufficient. /// - `Unaligned`: `MaybeUninit` is guaranteed by its documentation [1] /// to have the same alignment as `T`. /// @@ -1489,16 +1961,35 @@ safety_comment! { /// `FromBytes` and `RefFromBytes`, or if we introduce a separate /// `NoCell`/`Freeze` trait, we can relax the trait bounds for `FromZeroes` /// and `FromBytes`. - unsafe_impl!(T: FromZeroes => FromZeroes for MaybeUninit); - unsafe_impl!(T: FromBytes => FromBytes for MaybeUninit); + /// + /// TODO(#251): Replace these bounds with `NoCell` once we support it. + unsafe_impl!(T: TryFromBytes => TryFromBytes for MaybeUninit); + unsafe_impl!(T: TryFromBytes => FromZeroes for MaybeUninit); + unsafe_impl!(T: TryFromBytes => FromBytes for MaybeUninit); unsafe_impl!(T: Unaligned => Unaligned for MaybeUninit); assert_unaligned!(MaybeUninit<()>, MaybeUninit); } safety_comment! { /// SAFETY: + /// + /// TODO; old safety comment: + /// /// `ManuallyDrop` has the same layout as `T`, and accessing the inner value /// is safe (meaning that it's unsound to leave the inner value - /// uninitialized while exposing the `ManuallyDrop` to safe code). + /// uninitialized while exposing the `ManuallyDrop` to safe code). It's + /// nearly certain that `ManuallyDrop` has the same bit validity as `T`, but + /// this is technically not yet documented. [1] + /// - `TryFromBytes`: `ManuallyDrop` has the same layout and bit validity + /// as `T`, so if the provided `NonNull>` satisfies the + /// preconditions for `TryFromBytes::>::is_bit_valid`, + /// then it is sound to convert it to a `NonNull`, and that pointer + /// satisfies the preconditions for `TryFromBytes::::is_bit_valid`. + /// For DSTs, `ManuallyDrop` and `T` have the same pointer metadata, + /// so pointer casting preserves length. + /// + /// Since their bit validity requirements are the same, + /// `TryFromBytes::::is_bit_valid` is a sound implementation of + /// `TryFromBytes::>::is_bit_valid`. /// - `FromZeroes`, `FromBytes`: Since it has the same layout as `T`, any /// valid `T` is a valid `ManuallyDrop`. If `T: FromZeroes`, a sequence /// of zero bytes is a valid `T`, and thus a valid `ManuallyDrop`. If @@ -1511,6 +2002,19 @@ safety_comment! { /// code can only ever access a `ManuallyDrop` with all initialized bytes. /// - `Unaligned`: `ManuallyDrop` has the same layout (and thus alignment) /// as `T`, and `T: Unaligned` guarantees that that alignment is 1. + /// + /// [1] https://github.com/rust-lang/rust/pull/115522 + /// + /// TODO(https://github.com/rust-lang/rust/pull/115522): Update these docs + /// once ManuallyDrop bit validity is guaranteed. + unsafe_impl!( + T: ?Sized TryFromBytes => TryFromBytes for ManuallyDrop; + // Note that this call may panic, but it would still be sound even if it + // did. `is_bit_valid` does not promise that it will not panic (in fact, + // it explicitly warns that it's a possibility), and we have not + // violated any safety invariants that we must fix before returning. + |c: Ptr| unsafe { T::is_bit_valid(c) } + ); unsafe_impl!(T: ?Sized + FromZeroes => FromZeroes for ManuallyDrop); unsafe_impl!(T: ?Sized + FromBytes => FromBytes for ManuallyDrop); unsafe_impl!(T: ?Sized + AsBytes => AsBytes for ManuallyDrop); @@ -1519,6 +2023,9 @@ safety_comment! { } safety_comment! { /// SAFETY: + /// + /// TODO; old safety comment: + /// /// Per the reference [1]: /// /// An array of `[T; N]` has a size of `size_of::() * N` and the same @@ -1536,15 +2043,56 @@ safety_comment! { /// (respectively). Furthermore, since an array/slice has "the same /// alignment of `T`", `[T]` and `[T; N]` are `Unaligned` if `T` is. /// + /// Finally, because of this layout equivalence, an instance of `[T]` or + /// `[T; N]` is valid if each `T` is valid. Thus, it is sound to implement + /// `TryFromBytes::is_bit_valid` by calling `is_bit_valid` on each element. + /// /// Note that we don't `assert_unaligned!` for slice types because /// `assert_unaligned!` uses `align_of`, which only works for `Sized` types. /// /// [1] https://doc.rust-lang.org/reference/type-layout.html#array-layout + unsafe_impl!(const N: usize, T: TryFromBytes => TryFromBytes for [T; N]; |c: Ptr<[T; N]>| { + let slice = c.as_slice(); + // SAFETY: The preconditions of `is_bit_valid` are identical for `[T; + // N]` and for `[T]` if the passed pointer encodes a slice of `N` + // elements. Thus, if the caller has upheld their preconditions, then we + // uphold the preconditions for this call. + // + // Note that this call may panic, but it would still be sound even if it + // did. `is_bit_valid` does not promise that it will not panic (in fact, + // it explicitly warns that it's a possibility), and we have not + // violated any safety invariants that we must fix before returning. + unsafe { <[T] as TryFromBytes>::is_bit_valid(slice) } + }); unsafe_impl!(const N: usize, T: FromZeroes => FromZeroes for [T; N]); unsafe_impl!(const N: usize, T: FromBytes => FromBytes for [T; N]); unsafe_impl!(const N: usize, T: AsBytes => AsBytes for [T; N]); unsafe_impl!(const N: usize, T: Unaligned => Unaligned for [T; N]); assert_unaligned!([(); 0], [(); 1], [u8; 0], [u8; 1]); + unsafe_impl!(T: TryFromBytes => TryFromBytes for [T]; |c: Ptr<[T]>| { + // SAFETY: TODO + // + // Here's the old safety comment: + // + // SAFETY: The caller promises that `c` is aligned and is valid for + // reads. They also promise that any byte which is always initialized in + // a valid `[T]` is initialized in `c`'s referent. While the exact, + // formal property is slightly more complicated (see the safety docs on + // `is_bit_valid`), what's important is that, for types which are merely + // the concatenation of other types (structs, tuples, arrays, slices), + // the property is also compositional - if it holds for the larger type, + // then by definition it holds for each element of the type. Thus, since + // the caller has promised that it holds of the entire `[T]`, it must + // also hold for each individual `T`. + // + // Thus, the preconditions for this call are satisfied. + // + // Note that this call may panic, but it would still be sound even if it + // did. `is_bit_valid` does not promise that it will not panic (in fact, + // it explicitly warns that it's a possibility), and we have not + // violated any safety invariants that we must fix before returning. + c.iter().all(|elem| unsafe { ::is_bit_valid(elem) }) + }); unsafe_impl!(T: FromZeroes => FromZeroes for [T]); unsafe_impl!(T: FromBytes => FromBytes for [T]); unsafe_impl!(T: AsBytes => AsBytes for [T]); @@ -1596,8 +2144,8 @@ safety_comment! { // Given this background, we can observe that: // - The size and bit pattern requirements of a SIMD type are equivalent to the // equivalent array type. Thus, for any SIMD type whose primitive `T` is -// `FromZeroes`, `FromBytes`, or `AsBytes`, that SIMD type is also -// `FromZeroes`, `FromBytes`, or `AsBytes` respectively. +// `FromZeroes`, `FromBytes`, `TryFromBytes`, or `AsBytes`, that SIMD type is +// also `FromZeroes`, `FromBytes`, `TryFromBytes`, or `AsBytes` respectively. // - Since no upper bound is placed on the alignment, no SIMD type can be // guaranteed to be `Unaligned`. // @@ -1608,21 +2156,23 @@ safety_comment! { // // See issue #38 [2]. While this behavior is not technically guaranteed, the // likelihood that the behavior will change such that SIMD types are no longer -// `FromZeroes`, `FromBytes`, or `AsBytes` is next to zero, as that would defeat -// the entire purpose of SIMD types. Nonetheless, we put this behavior behind -// the `simd` Cargo feature, which requires consumers to opt into this stability -// hazard. +// `FromZeroes`, `FromBytes`, `TryFromBytes`, or `AsBytes` is next to zero, as +// that would defeat the entire purpose of SIMD types. Nonetheless, we put this +// behavior behind the `simd` Cargo feature, which requires consumers to opt +// into this stability hazard. // // [1] https://rust-lang.github.io/unsafe-code-guidelines/layout/packed-simd-vectors.html // [2] https://github.com/rust-lang/unsafe-code-guidelines/issues/38 #[cfg(feature = "simd")] mod simd { - /// Defines a module which implements `FromZeroes`, `FromBytes`, and - /// `AsBytes` for a set of types from a module in `core::arch`. + /// Defines a module which implements `FromZeroes`, `FromBytes`, + /// `TryFromBytes`, and `AsBytes` for a set of types from a module in + /// `core::arch`. /// /// `$arch` is both the name of the defined module and the name of the /// module in `core::arch`, and `$typ` is the list of items from that module - /// to implement `FromZeroes`, `FromBytes`, and `AsBytes` for. + /// for which to implement `FromZeroes`, `FromBytes`, `TryFromBytes`, and + /// `AsBytes`. #[allow(unused_macros)] // `allow(unused_macros)` is needed because some // target/feature combinations don't emit any impls // and thus don't use this macro. @@ -1636,7 +2186,7 @@ mod simd { safety_comment! { /// SAFETY: /// See comment on module definition for justification. - $( unsafe_impl!($typ: FromZeroes, FromBytes, AsBytes); )* + $( unsafe_impl!($typ: TryFromBytes, FromZeroes, FromBytes, AsBytes); )* } } }; @@ -1683,14 +2233,34 @@ mod simd { /// Safely transmutes a value of one type to a value of another type of the same /// size. /// -/// The expression `$e` must have a concrete type, `T`, which implements -/// `AsBytes`. The `transmute!` expression must also have a concrete type, `U` +/// The expression, `$e`, must have a concrete type, `T`, which implements +/// [`AsBytes`]. The `transmute!` expression must also have a concrete type, `U` /// (`U` is inferred from the calling context), and `U` must implement -/// `FromBytes`. +/// [`FromBytes`]. `T` and `U` must have the same size. /// /// Note that the `T` produced by the expression `$e` will *not* be dropped. /// Semantically, its bits will be copied into a new value of type `U`, the /// original `T` will be forgotten, and the value of type `U` will be returned. +/// +/// # Examples +/// +/// ```rust +/// # use zerocopy::transmute; +/// use core::num::NonZeroU64; +/// +/// // Why would you want to do this? Who knows ¯\_(ツ)_/¯ +/// let opt: Option = transmute!(0.0f64); +/// assert_eq!(opt, None); +/// ``` +/// +/// ```rust,compile_fail +/// # use zerocopy::try_transmute; +/// // Fails to compile: `bool` does not implement `FromBytes` +/// assert_eq!(transmute!(1u8), true); +/// +/// // Fails to compile: can't transmute between sizes of different types +/// let _: u8 = try_transmute!(0u16); +/// ``` #[macro_export] macro_rules! transmute { ($e:expr) => {{ @@ -1874,6 +2444,178 @@ macro_rules! transmute_ref { }} } +/// Safely attempts to transmute a value of one type to a value of another type +/// of the same size, failing if the transmute would be unsound. +/// +/// The expression, `$e`, must have a concrete type, `T`, which implements +/// [`AsBytes`]. The `try_transmute!` expression must also have a concrete type, +/// `Option` (`U` is inferred from the calling context), and `U` must +/// implement [`TryFromBytes`]. `T` and `U` must have the same size. +/// +/// [`TryFromBytes::try_read_from`] is used to attempt to convert `$e` to the +/// output type `U`. This will fail if the bytes of `$e` do not correspond to a +/// valid instance of `U`. +/// +/// Note that the `T` produced by the expression `$e` will *not* be dropped. +/// Semantically, its bits will be copied into a new value of type `U`, the +/// original `T` will be forgotten, and the value of type `U` will be returned. +/// +/// # Examples +/// +/// ```rust +/// # use zerocopy::try_transmute; +/// assert_eq!(try_transmute!(1u8), Some(true)); +/// assert_eq!(try_transmute!(2u8), None::); +/// +/// assert_eq!(try_transmute!(108u32), Some('l')); +/// assert_eq!(try_transmute!(0xD800u32), None::); +/// ``` +/// +/// ```rust,compile_fail +/// # use zerocopy::try_transmute; +/// // Attempting to transmute from 2 to 1 bytes will fail to compile +/// let _: Option = try_transmute!(0u16); +/// ``` +#[macro_export] +macro_rules! try_transmute { + ($e:expr) => {{ + // NOTE: This must be a macro (rather than a function with trait bounds) + // because there's no way, in a generic context, to enforce that two + // types have the same size. `core::mem::transmute` uses compiler magic + // to enforce this so long as the types are concrete. + + let e = $e; + if false { + // This branch, though never taken, ensures that the type of `e` is + // `AsBytes` and that the type of this macro invocation expression + // is `TryFromBytes`. + const fn transmute(_t: T) -> U { + unreachable!() + } + Some(transmute(e)) + } else if false { + // Though never executed, this ensures that the source and + // destination types have the same size. This isn't strictly + // necessary for soundness, but it turns what would otherwise be + // runtime errors into compile-time errors. + // + // SAFETY: This branch never executes. + Some(unsafe { $crate::macro_util::core_reexport::mem::transmute(e) }) + } else { + // TODO: What's the correct drop behavior on `None`? Does this just + // behave like `mem::forget` in that case? + let m = $crate::macro_util::core_reexport::mem::ManuallyDrop::new(e); + $crate::TryFromBytes::try_read_from($crate::AsBytes::as_bytes(&m)) + } + }} +} + +/// Safely attempts to transmute a reference of one type to a reference of +/// another type, failing if the transmute would be unsound. +/// +/// The expression, `$e`, must have a concrete type, `&T`, where [`T: AsBytes`]. +/// The `try_transmute_ref!` expression must also have a concrete type, +/// `Option<&U>` (`U` is inferred from the calling context), and `U` must +/// implement [`TryFromBytes`]. +/// +/// [`TryFromBytes::try_from_ref`] is used to attempt to convert `$e` to the +/// output reference type `&U`. This will fail if `$e` is not the right size, is +/// not properly aligned, or if the bytes of `$e` do not correspond to a valid +/// instance of `U`. +/// +/// Note that, if `U` is an unsized type, there will be multiple sizes for `$e` +/// which correspond to valid values of `U`. +/// +/// [`T: AsBytes`]: AsBytes +/// +/// # Examples +/// +/// ```rust +/// # use zerocopy::try_transmute_ref; +/// # use zerocopy::AsBytes as _; +/// let s: Option<&str> = try_transmute_ref!(&[104u8, 101, 108, 108, 111]); +/// assert_eq!(s, Some("hello")); +/// +/// // Invalid UTF-8 +/// assert_eq!(try_transmute_ref!(&0xFFFFFFFFu32), None::<&str>); +/// +/// // Not enough bytes for a `u8` +/// assert_eq!(try_transmute_ref!(&()), None::<&u8>); +/// +/// // Valid `&[[u8; 2]]` slices could be 2 or 4 bytes long, +/// // but not 3. +/// assert_eq!(try_transmute_ref!(&[0u8, 1, 2]), None::<&[[u8; 2]]>); +/// +/// // Guaranteed to be invalidly-aligned so long as +/// // `align_of::() == 2` and `align_of::() >= 2` +/// // (this is true on most targets, but it isn't guaranteed). +/// assert_eq!(try_transmute_ref!(&0u32.as_bytes()[1..]), None::<&u16>); +/// ``` +#[macro_export] +macro_rules! try_transmute_ref { + ($e:expr) => { + $crate::TryFromBytes::try_from_ref($crate::AsBytes::as_bytes($e)) + }; +} + +/// Safely attempts to transmute a mutable reference of one type to a mutable +/// reference of another type, failing if the transmute would be unsound. +/// +/// The expression, `$e`, must have a concrete type, `&mut T`, where `T: +/// FromBytes + AsBytes`. The `try_transmute_ref!` expression must also have a +/// concrete type, `Option<&mut U>` (`U` is inferred from the calling context), +/// and `U` must implement [`TryFromBytes`]. +/// +/// [`TryFromBytes::try_from_mut`] is used to attempt to convert `$e` to the +/// output reference type, `&mut U`. This will fail if `$e` is not the right +/// size, is not properly aligned, or if the bytes of `$e` do not correspond to +/// a valid instance of `U`. +/// +/// Note that, if `U` is an unsized type, there will be multiple sizes for `$e` +/// which correspond to valid values of `U`. +/// +/// [`TryFromBytes`]: TryFromBytes +/// +/// # Examples +/// +/// ```rust +/// # use zerocopy::try_transmute_mut; +/// # use zerocopy::AsBytes as _; +/// let bytes = &mut [104u8, 101, 108, 108, 111]; +/// let mut s = try_transmute_mut!(bytes); +/// assert_eq!(s, Some(String::from("hello").as_mut_str())); +/// +/// // Mutations to the transmuted reference are reflected +/// // in the original reference. +/// s.as_mut().unwrap().make_ascii_uppercase(); +/// assert_eq!(bytes, &[72, 69, 76, 76, 79]); +/// +/// // Invalid UTF-8 +/// let mut u = 0xFFFFFFFFu32; +/// assert_eq!(try_transmute_mut!(&mut u), None::<&mut str>); +/// +/// // Not enough bytes for a `u8` +/// let mut tuple = (); +/// assert_eq!(try_transmute_mut!(&mut tuple), None::<&mut u8>); +/// +/// // Valid `&mut [[u8; 2]]` slices could be 2 or 4 bytes +/// // long, but not 3. +/// let bytes = &mut [0u8, 1, 2]; +/// assert_eq!(try_transmute_mut!(bytes), None::<&mut [[u8; 2]]>); +/// +/// // Guaranteed to be invalidly-aligned so long as +/// // `align_of::() == 2` and `align_of::() >= 2` +/// // (this is true on most targets, but it isn't guaranteed). +/// let mut u = 0u32; +/// assert_eq!(try_transmute_mut!(&mut u.as_bytes_mut()[1..]), None::<&mut u16>); +/// ``` +#[macro_export] +macro_rules! try_transmute_mut { + ($e:expr) => { + $crate::TryFromBytes::try_from_mut($crate::AsBytes::as_bytes_mut($e)) + }; +} + /// A typed reference derived from a byte slice. /// /// A `Ref` is a reference to a `T` which is stored in a byte slice, `B`. @@ -3336,7 +4078,7 @@ mod tests { // addition to the previous line. std::panic::set_hook(Box::new(|_| {})); let actual = std::panic::catch_unwind(|| { - layout(size_info, align)._validate_cast_and_convert_metadata(addr, bytes_len, cast_type) + layout(size_info, align).validate_cast_and_convert_metadata(addr, bytes_len, cast_type) }).map_err(|d| { *d.downcast::<&'static str>().expect("expected string panic message").as_ref() }); @@ -3373,8 +4115,8 @@ mod tests { (@generate_elem_size _) => { 1..8 }; (@generate_align _) => { [1, 2, 4, 8, 16] }; (@generate_opt_usize _) => { [None].into_iter().chain((0..8).map(Some).into_iter()) }; - (@generate_cast_type _) => { [_CastType::_Prefix, _CastType::_Suffix] }; - (@generate_cast_type $variant:ident) => { [_CastType::$variant] }; + (@generate_cast_type _) => { [CastType::Prefix, CastType::_Suffix] }; + (@generate_cast_type $variant:ident) => { [CastType::$variant] }; // Some expressions need to be wrapped in parentheses in order to be // valid `tt`s (required by the top match pattern). See the comment // below for more details. This arm removes these parentheses to @@ -3391,8 +4133,8 @@ mod tests { test!(layout(((2..8) | ((2..8), (2..8))), _).validate(_, [1], _), Ok(None)); // addr is unaligned for prefix cast - test!(layout(_, [2]).validate(ODDS, _, _Prefix), Ok(None)); - test!(layout(_, [2]).validate(ODDS, _, _Prefix), Ok(None)); + test!(layout(_, [2]).validate(ODDS, _, Prefix), Ok(None)); + test!(layout(_, [2]).validate(ODDS, _, Prefix), Ok(None)); // addr is aligned, but end of buffer is unaligned for suffix cast test!(layout(_, [2]).validate(EVENS, ODDS, _Suffix), Ok(None)); @@ -3432,10 +4174,10 @@ mod tests { // documented safety postconditions, and also a few other properties // that aren't documented but we want to guarantee anyway. fn validate_behavior( - (layout, addr, bytes_len, cast_type): (DstLayout, usize, usize, _CastType), + (layout, addr, bytes_len, cast_type): (DstLayout, usize, usize, CastType), ) { if let Some((elems, split_at)) = - layout._validate_cast_and_convert_metadata(addr, bytes_len, cast_type) + layout.validate_cast_and_convert_metadata(addr, bytes_len, cast_type) { let (size_info, align) = (layout._size_info, layout._align); let debug_str = format!( @@ -3474,11 +4216,11 @@ mod tests { // `validate_cast_and_convert_metadata`. assert!(resulting_size <= bytes_len, "{}", debug_str); match cast_type { - _CastType::_Prefix => { + CastType::Prefix => { assert_eq!(addr % align, 0, "{}", debug_str); assert_eq!(resulting_size, split_at, "{}", debug_str); } - _CastType::_Suffix => { + CastType::_Suffix => { assert_eq!(split_at, bytes_len - resulting_size, "{}", debug_str); assert_eq!((addr + split_at) % align, 0, "{}", debug_str); } @@ -3496,8 +4238,8 @@ mod tests { let insufficient_bytes = bytes_len < min_size; // 2. performing the cast would misalign type: let base = match cast_type { - _CastType::_Prefix => 0, - _CastType::_Suffix => bytes_len, + CastType::Prefix => 0, + CastType::_Suffix => bytes_len, }; let misaligned = (base + addr) % layout._align != 0; @@ -3514,7 +4256,7 @@ mod tests { let layouts = itertools::iproduct!(size_infos, [1, 2, 4, 8, 16, 32]) .filter(|(size_info, align)| !matches!(size_info, SizeInfo::Sized { _size } if _size % align != 0)) .map(|(size_info, align)| layout(size_info, align)); - itertools::iproduct!(layouts, 0..8, 0..8, [_CastType::_Prefix, _CastType::_Suffix]) + itertools::iproduct!(layouts, 0..8, 0..8, [CastType::Prefix, CastType::_Suffix]) .for_each(validate_behavior); } @@ -3628,7 +4370,7 @@ mod tests { if args.elem_size.map(|elem_size| elem_size > 0).unwrap_or(true) { let addr = ptr.addr().get(); let (got_elems, got_split_at) = layout - ._validate_cast_and_convert_metadata(addr, size, _CastType::_Prefix) + .validate_cast_and_convert_metadata(addr, size, CastType::Prefix) .unwrap(); // Avoid expensive allocation when running under Miri. let assert_msg = if !cfg!(miri) { @@ -3986,10 +4728,16 @@ mod tests { // Test that memory is transmuted as expected. let array_of_u8s = [0u8, 1, 2, 3, 4, 5, 6, 7]; let array_of_arrays = [[0, 1], [2, 3], [4, 5], [6, 7]]; + let x: [[u8; 2]; 4] = transmute!(array_of_u8s); assert_eq!(x, array_of_arrays); + let x: Option<[[u8; 2]; 4]> = try_transmute!(array_of_u8s); + assert_eq!(x, Some(array_of_arrays)); + let x: [u8; 8] = transmute!(array_of_arrays); assert_eq!(x, array_of_u8s); + let x: Option<[u8; 8]> = try_transmute!(array_of_arrays); + assert_eq!(x, Some(array_of_u8s)); // Test that the source expression's value is forgotten rather than // dropped. @@ -4002,12 +4750,37 @@ mod tests { } } let _: () = transmute!(PanicOnDrop(())); + let _: Option<()> = try_transmute!(PanicOnDrop(())); // Test that `transmute!` is legal in a const context. const ARRAY_OF_U8S: [u8; 8] = [0u8, 1, 2, 3, 4, 5, 6, 7]; const ARRAY_OF_ARRAYS: [[u8; 2]; 4] = [[0, 1], [2, 3], [4, 5], [6, 7]]; const X: [[u8; 2]; 4] = transmute!(ARRAY_OF_U8S); assert_eq!(X, ARRAY_OF_ARRAYS); + + // Test fallible transmutations with `try_transmute!`. + let mut b: Option = try_transmute!(0u8); + assert_eq!(b, Some(false)); + b = try_transmute!(1u8); + assert_eq!(b, Some(true)); + b = try_transmute!(2u8); + assert_eq!(b, None); + } + + #[test] + fn test_try_transmute_ref_mut() { + // These macros are dead-simple thin wrappers which delegate to other + // traits. We only have this test to ensure that the macros are uesd + // somewhere so our tests will break if the paths to various items + // break. + let x: Option<&[u8; 2]> = try_transmute_ref!(&0xFFFFu16); + assert_eq!(x, Some(&[255, 255])); + + let mut u = 0xFFFFu16; + let x: Option<&mut [u8; 2]> = try_transmute_mut!(&mut u); + assert_eq!(x, Some(&mut [255, 255])); + *x.unwrap() = [0, 0]; + assert_eq!(u, 0); } #[test] @@ -4741,9 +5514,175 @@ mod tests { #[test] fn test_impls() { + use core::borrow::Borrow; + + // A type that can supply test cases for testing + // `TryFromBytes::is_bit_valid`. All types passed to `assert_impls!` + // must implement this trait; that macro uses it to generate runtime + // tests for `TryFromBytes` impls. + // + // All `T: FromBytes` types are provided with a blanket impl. Other + // types must implement `TryFromBytesTestable` directly (ie using + // `impl_try_from_bytes_testable!`). + trait TryFromBytesTestable { + fn with_passing_test_cases(f: F); + fn with_failing_test_cases(f: F); + } + + impl TryFromBytesTestable for T { + fn with_passing_test_cases(f: F) { + // Test with a zeroed value. + f(&Self::new_zeroed()); + + let ffs = { + let mut t = Self::new_zeroed(); + let ptr: *mut T = &mut t; + // SAFETY: `T: FromBytes` + unsafe { ptr::write_bytes(ptr.cast::(), 0xFF, mem::size_of::()) }; + t + }; + + // Test with a value initialized with 0xFF. + f(&ffs); + } + + fn with_failing_test_cases(_f: F) {} + } + + // Implements `TryFromBytesTestable`. + macro_rules! impl_try_from_bytes_testable { + // Base case for recursion (when the list of types has run out). + (=> @success $($success_case:expr),* $(, @failure $($failure_case:expr),*)?) => {}; + // Implements for type(s) with no type parameters. + ($ty:ty $(,$tys:ty)* => @success $($success_case:expr),* $(, @failure $($failure_case:expr),*)?) => { + impl TryFromBytesTestable for $ty { + impl_try_from_bytes_testable!( + @methods @success $($success_case),* + $(, @failure $($failure_case),*)? + ); + } + impl_try_from_bytes_testable!($($tys),* => @success $($success_case),* $(, @failure $($failure_case),*)?); + }; + // Implements for multiple types with no type parameters. + ($($($ty:ty),* => @success $($success_case:expr), * $(, @failure $($failure_case:expr),*)?;)*) => { + $( + impl_try_from_bytes_testable!($($ty),* => @success $($success_case),* $(, @failure $($failure_case),*)*); + )* + }; + // Implements only the methods; caller must invoke this from inside + // an impl block. + (@methods @success $($success_case:expr),* $(, @failure $($failure_case:expr),*)?) => { + fn with_passing_test_cases(_f: F) { + $( + _f($success_case.borrow()); + )* + } + + fn with_failing_test_cases(_f: F) { + $($( + // `unused_qualifications` is spuriously triggered on + // `Option::::None`. + #[allow(unused_qualifications)] + let case = $failure_case.as_bytes(); + _f(case.as_bytes()); + )*)? + } + }; + } + + // Note that these impls are only for types which are not `FromBytes`. + // `FromBytes` types are covered by a preceding blanket impl. + impl_try_from_bytes_testable!( + bool => @success true, false, + @failure 2u8, 3u8, 0xFFu8; + char => @success '\u{0}', '\u{D7FF}', '\u{E000}', '\u{10FFFF}', + @failure 0xD800u32, 0xDFFFu32, 0x110000u32; + str => @success "", "hello", "❤️🧡💛💚💙💜", + @failure [0, 159, 146, 150]; + [u8] => @success [], [0, 1, 2]; + NonZeroU8, NonZeroI8, NonZeroU16, NonZeroI16, NonZeroU32, + NonZeroI32, NonZeroU64, NonZeroI64, NonZeroU128, NonZeroI128, + NonZeroUsize, NonZeroIsize + => @success Self::new(1).unwrap(), + // Doing this instead of `0` ensures that we always satisfy + // the size and alignment requirements of `Self` (whereas + // `0` may be any integer type with a different size or + // alignment than some `NonZeroXxx` types). + @failure Option::::None; + ManuallyDrop + => @success ManuallyDrop::new(true), ManuallyDrop::new(false), + @failure 3u8, 4u8, 0xFFu8; + Wrapping + => @success Wrapping(true), Wrapping(false), + @failure 3u8, 4u8, 0xFFu8; + // TODO(#321): Add success test cases for `ManuallyDrop<[u8]>` and + // `ManuallyDrop<[bool]>` once we have an easy way of converting + // `[ManuallyDrop] -> ManuallyDrop<[T]>`. Here are some suggested + // test cases: + // + // @success [ManuallyDrop::new(true), ManuallyDrop::new(false)][..], + // [ManuallyDrop::new(false), ManuallyDrop::new(true)][..], + ManuallyDrop<[u8]> => @success; + ManuallyDrop<[bool]> + => @success, + @failure [2u8], [3u8], [0xFFu8], [0u8, 1u8, 2u8]; + [bool; 0] => @success []; + [bool; 1] + => @success [true], [false], + @failure [2u8], [3u8], [0xFFu8]; + [bool] + => @success [true, false], [false, true], + @failure [2u8], [3u8], [0xFFu8], [0u8, 1u8, 2u8]; + + + ); + // Asserts that `$ty` implements any `$trait` and doesn't implement any // `!$trait`. Note that all `$trait`s must come before any `!$trait`s. + // + // For `T: TryFromBytes`, uses `TryFromBytesTestable` to test success + // and failure cases for `TryFromBytes::is_bit_valid`. macro_rules! assert_impls { + ($ty:ty: TryFromBytes) => { + <$ty as TryFromBytesTestable>::with_passing_test_cases(|val| { + let c = Ptr::from(val); + // SAFETY: + // - Since `val` is a normal reference, `c` is guranteed to + // be aligned, to point to a single allocation, and to + // have a size which doesn't overflow `isize`. + // - Since `val` is a valid `$ty`, `c`'s referent satisfies + // the bit validity constraints of `is_bit_valid`, which + // are a superset of the bit validity constraints of + // `$ty`. + let res = unsafe { <$ty as TryFromBytes>::is_bit_valid(c) }; + assert!(res, "{}::is_bit_valid({:?}): got false, expected true", stringify!($ty), val); + + // TODO(#5): In addition to testing `is_bit_valid`, test the + // methods built on top of it. This would both allow us to + // test their implementations and actually convert the bytes + // to `$ty`, giving Miri a chance to catch if this is + // unsound (ie, if our `is_bit_valid` impl is buggy). + // + // The following code was tried, but it doesn't work because + // a) some types are not `AsBytes` and, b) some types are + // not `Sized`. + // + // let r = <$ty as TryFromBytes>::try_from_ref(val.as_bytes()).unwrap(); + // assert_eq!(r, &val); + // let r = <$ty as TryFromBytes>::try_from_mut(val.as_bytes_mut()).unwrap(); + // assert_eq!(r, &mut val); + // let v = <$ty as TryFromBytes>::try_read_from(val.as_bytes()).unwrap(); + // assert_eq!(v, val); + }); + #[allow(clippy::as_conversions)] + <$ty as TryFromBytesTestable>::with_failing_test_cases(|c| { + let res = <$ty as TryFromBytes>::try_from_ref(c); + assert!(res.is_none(), "{}::is_bit_valid({:?}): got true, expected false", stringify!($ty), c); + }); + + #[allow(dead_code)] + const _: () = { static_assertions::assert_impl_all!($ty: TryFromBytes); }; + }; ($ty:ty: $trait:ident) => { #[allow(dead_code)] const _: () = { static_assertions::assert_impl_all!($ty: $trait); }; @@ -4763,77 +5702,87 @@ mod tests { }; } - assert_impls!((): FromZeroes, FromBytes, AsBytes, Unaligned); - assert_impls!(u8: FromZeroes, FromBytes, AsBytes, Unaligned); - assert_impls!(i8: FromZeroes, FromBytes, AsBytes, Unaligned); - assert_impls!(u16: FromZeroes, FromBytes, AsBytes, !Unaligned); - assert_impls!(i16: FromZeroes, FromBytes, AsBytes, !Unaligned); - assert_impls!(u32: FromZeroes, FromBytes, AsBytes, !Unaligned); - assert_impls!(i32: FromZeroes, FromBytes, AsBytes, !Unaligned); - assert_impls!(u64: FromZeroes, FromBytes, AsBytes, !Unaligned); - assert_impls!(i64: FromZeroes, FromBytes, AsBytes, !Unaligned); - assert_impls!(u128: FromZeroes, FromBytes, AsBytes, !Unaligned); - assert_impls!(i128: FromZeroes, FromBytes, AsBytes, !Unaligned); - assert_impls!(usize: FromZeroes, FromBytes, AsBytes, !Unaligned); - assert_impls!(isize: FromZeroes, FromBytes, AsBytes, !Unaligned); - assert_impls!(f32: FromZeroes, FromBytes, AsBytes, !Unaligned); - assert_impls!(f64: FromZeroes, FromBytes, AsBytes, !Unaligned); - - assert_impls!(bool: FromZeroes, AsBytes, Unaligned, !FromBytes); - assert_impls!(char: FromZeroes, AsBytes, !FromBytes, !Unaligned); - assert_impls!(str: FromZeroes, AsBytes, Unaligned, !FromBytes); - - assert_impls!(NonZeroU8: AsBytes, Unaligned, !FromZeroes, !FromBytes); - assert_impls!(NonZeroI8: AsBytes, Unaligned, !FromZeroes, !FromBytes); - assert_impls!(NonZeroU16: AsBytes, !FromZeroes, !FromBytes, !Unaligned); - assert_impls!(NonZeroI16: AsBytes, !FromZeroes, !FromBytes, !Unaligned); - assert_impls!(NonZeroU32: AsBytes, !FromZeroes, !FromBytes, !Unaligned); - assert_impls!(NonZeroI32: AsBytes, !FromZeroes, !FromBytes, !Unaligned); - assert_impls!(NonZeroU64: AsBytes, !FromZeroes, !FromBytes, !Unaligned); - assert_impls!(NonZeroI64: AsBytes, !FromZeroes, !FromBytes, !Unaligned); - assert_impls!(NonZeroU128: AsBytes, !FromZeroes, !FromBytes, !Unaligned); - assert_impls!(NonZeroI128: AsBytes, !FromZeroes, !FromBytes, !Unaligned); - assert_impls!(NonZeroUsize: AsBytes, !FromZeroes, !FromBytes, !Unaligned); - assert_impls!(NonZeroIsize: AsBytes, !FromZeroes, !FromBytes, !Unaligned); - - assert_impls!(Option: FromZeroes, FromBytes, AsBytes, Unaligned); - assert_impls!(Option: FromZeroes, FromBytes, AsBytes, Unaligned); - assert_impls!(Option: FromZeroes, FromBytes, AsBytes, !Unaligned); - assert_impls!(Option: FromZeroes, FromBytes, AsBytes, !Unaligned); - assert_impls!(Option: FromZeroes, FromBytes, AsBytes, !Unaligned); - assert_impls!(Option: FromZeroes, FromBytes, AsBytes, !Unaligned); - assert_impls!(Option: FromZeroes, FromBytes, AsBytes, !Unaligned); - assert_impls!(Option: FromZeroes, FromBytes, AsBytes, !Unaligned); - assert_impls!(Option: FromZeroes, FromBytes, AsBytes, !Unaligned); - assert_impls!(Option: FromZeroes, FromBytes, AsBytes, !Unaligned); - assert_impls!(Option: FromZeroes, FromBytes, AsBytes, !Unaligned); - assert_impls!(Option: FromZeroes, FromBytes, AsBytes, !Unaligned); + assert_impls!((): TryFromBytes, FromZeroes, FromBytes, AsBytes, Unaligned); + assert_impls!(u8: TryFromBytes, FromZeroes, FromBytes, AsBytes, Unaligned); + assert_impls!(i8: TryFromBytes, FromZeroes, FromBytes, AsBytes, Unaligned); + assert_impls!(u16: TryFromBytes, FromZeroes, FromBytes, AsBytes, !Unaligned); + assert_impls!(i16: TryFromBytes, FromZeroes, FromBytes, AsBytes, !Unaligned); + assert_impls!(u32: TryFromBytes, FromZeroes, FromBytes, AsBytes, !Unaligned); + assert_impls!(i32: TryFromBytes, FromZeroes, FromBytes, AsBytes, !Unaligned); + assert_impls!(u64: TryFromBytes, FromZeroes, FromBytes, AsBytes, !Unaligned); + assert_impls!(i64: TryFromBytes, FromZeroes, FromBytes, AsBytes, !Unaligned); + assert_impls!(u128: TryFromBytes, FromZeroes, FromBytes, AsBytes, !Unaligned); + assert_impls!(i128: TryFromBytes, FromZeroes, FromBytes, AsBytes, !Unaligned); + assert_impls!(usize: TryFromBytes, FromZeroes, FromBytes, AsBytes, !Unaligned); + assert_impls!(isize: TryFromBytes, FromZeroes, FromBytes, AsBytes, !Unaligned); + assert_impls!(f32: TryFromBytes, FromZeroes, FromBytes, AsBytes, !Unaligned); + assert_impls!(f64: TryFromBytes, FromZeroes, FromBytes, AsBytes, !Unaligned); + + assert_impls!(bool: TryFromBytes, FromZeroes, AsBytes, Unaligned, !FromBytes); + assert_impls!(char: TryFromBytes, FromZeroes, AsBytes, !FromBytes, !Unaligned); + assert_impls!(str: TryFromBytes, FromZeroes, AsBytes, Unaligned, !FromBytes); + + assert_impls!(NonZeroU8: TryFromBytes, AsBytes, Unaligned, !FromZeroes, !FromBytes); + assert_impls!(NonZeroI8: TryFromBytes, AsBytes, Unaligned, !FromZeroes, !FromBytes); + assert_impls!(NonZeroU16: TryFromBytes, AsBytes, !FromZeroes, !FromBytes, !Unaligned); + assert_impls!(NonZeroI16: TryFromBytes, AsBytes, !FromZeroes, !FromBytes, !Unaligned); + assert_impls!(NonZeroU32: TryFromBytes, AsBytes, !FromZeroes, !FromBytes, !Unaligned); + assert_impls!(NonZeroI32: TryFromBytes, AsBytes, !FromZeroes, !FromBytes, !Unaligned); + assert_impls!(NonZeroU64: TryFromBytes, AsBytes, !FromZeroes, !FromBytes, !Unaligned); + assert_impls!(NonZeroI64: TryFromBytes, AsBytes, !FromZeroes, !FromBytes, !Unaligned); + assert_impls!(NonZeroU128: TryFromBytes, AsBytes, !FromZeroes, !FromBytes, !Unaligned); + assert_impls!(NonZeroI128: TryFromBytes, AsBytes, !FromZeroes, !FromBytes, !Unaligned); + assert_impls!(NonZeroUsize: TryFromBytes, AsBytes, !FromZeroes, !FromBytes, !Unaligned); + assert_impls!(NonZeroIsize: TryFromBytes, AsBytes, !FromZeroes, !FromBytes, !Unaligned); + + assert_impls!(Option: TryFromBytes, FromZeroes, FromBytes, AsBytes, Unaligned); + assert_impls!(Option: TryFromBytes, FromZeroes, FromBytes, AsBytes, Unaligned); + assert_impls!(Option: TryFromBytes, FromZeroes, FromBytes, AsBytes, !Unaligned); + assert_impls!(Option: TryFromBytes, FromZeroes, FromBytes, AsBytes, !Unaligned); + assert_impls!(Option: TryFromBytes, FromZeroes, FromBytes, AsBytes, !Unaligned); + assert_impls!(Option: TryFromBytes, FromZeroes, FromBytes, AsBytes, !Unaligned); + assert_impls!(Option: TryFromBytes, FromZeroes, FromBytes, AsBytes, !Unaligned); + assert_impls!(Option: TryFromBytes, FromZeroes, FromBytes, AsBytes, !Unaligned); + assert_impls!(Option: TryFromBytes, FromZeroes, FromBytes, AsBytes, !Unaligned); + assert_impls!(Option: TryFromBytes, FromZeroes, FromBytes, AsBytes, !Unaligned); + assert_impls!(Option: TryFromBytes, FromZeroes, FromBytes, AsBytes, !Unaligned); + assert_impls!(Option: TryFromBytes, FromZeroes, FromBytes, AsBytes, !Unaligned); // Implements none of the ZC traits. struct NotZerocopy; - assert_impls!(PhantomData: FromZeroes, FromBytes, AsBytes, Unaligned); - assert_impls!(PhantomData<[u8]>: FromZeroes, FromBytes, AsBytes, Unaligned); - - assert_impls!(ManuallyDrop: FromZeroes, FromBytes, AsBytes, Unaligned); - assert_impls!(ManuallyDrop<[u8]>: FromZeroes, FromBytes, AsBytes, Unaligned); - assert_impls!(ManuallyDrop: !FromZeroes, !FromBytes, !AsBytes, !Unaligned); - assert_impls!(ManuallyDrop<[NotZerocopy]>: !FromZeroes, !FromBytes, !AsBytes, !Unaligned); - - assert_impls!(MaybeUninit: FromZeroes, FromBytes, Unaligned, !AsBytes); - assert_impls!(MaybeUninit: !FromZeroes, !FromBytes, !AsBytes, !Unaligned); - - assert_impls!(Wrapping: FromZeroes, FromBytes, AsBytes, Unaligned); - assert_impls!(Wrapping: !FromZeroes, !FromBytes, !AsBytes, !Unaligned); - - assert_impls!(Unalign: FromZeroes, FromBytes, AsBytes, Unaligned); - assert_impls!(Unalign: Unaligned, !FromZeroes, !FromBytes, !AsBytes); - - assert_impls!([u8]: FromZeroes, FromBytes, AsBytes, Unaligned); - assert_impls!([NotZerocopy]: !FromZeroes, !FromBytes, !AsBytes, !Unaligned); - assert_impls!([u8; 0]: FromZeroes, FromBytes, AsBytes, Unaligned); - assert_impls!([NotZerocopy; 0]: !FromZeroes, !FromBytes, !AsBytes, !Unaligned); - assert_impls!([u8; 1]: FromZeroes, FromBytes, AsBytes, Unaligned); - assert_impls!([NotZerocopy; 1]: !FromZeroes, !FromBytes, !AsBytes, !Unaligned); + assert_impls!(PhantomData: TryFromBytes, FromZeroes, FromBytes, AsBytes, Unaligned); + assert_impls!(PhantomData<[u8]>: TryFromBytes, FromZeroes, FromBytes, AsBytes, Unaligned); + + assert_impls!(ManuallyDrop: TryFromBytes, FromZeroes, FromBytes, AsBytes, Unaligned); + assert_impls!(ManuallyDrop<[u8]>: TryFromBytes, FromZeroes, FromBytes, AsBytes, Unaligned); + assert_impls!(ManuallyDrop: TryFromBytes, FromZeroes, AsBytes, Unaligned, !FromBytes); + assert_impls!(ManuallyDrop<[bool]>: TryFromBytes, FromZeroes, AsBytes, Unaligned, !FromBytes); + assert_impls!(ManuallyDrop: !TryFromBytes, !FromZeroes, !FromBytes, !AsBytes, !Unaligned); + assert_impls!(ManuallyDrop<[NotZerocopy]>: !TryFromBytes, !FromZeroes, !FromBytes, !AsBytes, !Unaligned); + + assert_impls!(MaybeUninit: TryFromBytes, FromZeroes, FromBytes, Unaligned, !AsBytes); + assert_impls!(MaybeUninit: TryFromBytes, FromZeroes, FromBytes, Unaligned, !AsBytes); + assert_impls!(MaybeUninit>: TryFromBytes, FromZeroes, FromBytes, Unaligned, !AsBytes); + assert_impls!(MaybeUninit>: TryFromBytes, FromZeroes, FromBytes, Unaligned, !AsBytes); + assert_impls!(MaybeUninit: !TryFromBytes, !FromZeroes, !FromBytes, !AsBytes, !Unaligned); + assert_impls!(MaybeUninit>: !TryFromBytes, !FromZeroes, !FromBytes, !AsBytes, !Unaligned); + + assert_impls!(Wrapping: TryFromBytes, FromZeroes, FromBytes, AsBytes, Unaligned); + assert_impls!(Wrapping: TryFromBytes, FromZeroes, AsBytes, Unaligned, !FromBytes); + assert_impls!(Wrapping: !TryFromBytes, !FromZeroes, !FromBytes, !AsBytes, !Unaligned); + + assert_impls!(Unalign: FromZeroes, FromBytes, AsBytes, Unaligned, !TryFromBytes); + assert_impls!(Unalign: Unaligned, !TryFromBytes, !FromZeroes, !FromBytes, !AsBytes); + + assert_impls!([u8]: TryFromBytes, FromZeroes, FromBytes, AsBytes, Unaligned); + assert_impls!([bool]: TryFromBytes, FromZeroes, AsBytes, Unaligned, !FromBytes); + assert_impls!([NotZerocopy]: !TryFromBytes, !FromZeroes, !FromBytes, !AsBytes, !Unaligned); + assert_impls!([u8; 0]: TryFromBytes, FromZeroes, FromBytes, AsBytes, Unaligned); + assert_impls!([bool; 0]: TryFromBytes, FromZeroes, AsBytes, Unaligned, !FromBytes); + assert_impls!([NotZerocopy; 0]: !TryFromBytes, !FromZeroes, !FromBytes, !AsBytes, !Unaligned); + assert_impls!([u8; 1]: TryFromBytes, FromZeroes, FromBytes, AsBytes, Unaligned); + assert_impls!([bool; 1]: TryFromBytes, FromZeroes, AsBytes, Unaligned, !FromBytes); + assert_impls!([NotZerocopy; 1]: !TryFromBytes, !FromZeroes, !FromBytes, !AsBytes, !Unaligned); } } diff --git a/src/macro_util.rs b/src/macro_util.rs index f67812fad2..08089c531f 100644 --- a/src/macro_util.rs +++ b/src/macro_util.rs @@ -99,7 +99,7 @@ macro_rules! union_has_padding { pub mod core_reexport { pub mod mem { - pub use core::mem::transmute; + pub use core::mem::{transmute, ManuallyDrop}; } } diff --git a/src/macros.rs b/src/macros.rs index aebc8d69cc..d58f1d9c58 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -29,13 +29,42 @@ macro_rules! safety_comment { } /// Unsafely implements trait(s) for a type. +/// +/// # Safety +/// +/// The trait impl must be sound. +/// +/// When implementing `TryFromBytes`: +/// - If no `is_bit_valid` impl is provided, then it must be valid for +/// `is_bit_valid` to unconditionally return `true`. In other words, it must +/// be the case that any initialized sequence of bytes constitutes a valid +/// instance of `$ty`. +/// - If an `is_bit_valid` impl is provided, then: +/// - Regardless of whether the provided closure takes a `Ptr<$repr>` or +/// `&$repr` argument, it must be the case that, given `t: *mut $ty`, `let r +/// = t as *mut $repr` is valid, and `r` refers to an object of equal or +/// lesser size than the object referred to by `t`. +/// - If the provided closure takes a `Ptr<$repr>` argument, then given a +/// `Ptr<$ty>` which satisfies the preconditions of +/// `TryFromBytes::<$ty>::is_bit_valid`, it must be guaranteed that a +/// `Ptr<$repr>` with the same address, provenance, and pointer metadata +/// satisfies the preconditions of `TryFromBytes::<$repr>::is_bit_valid`. +/// - If the provided closure takes a `&$repr` argument, then given a +/// `Ptr<'a, $ty>` which satisfies the preconditions of +/// `TryFromBytes::<$ty>::is_bit_valid`, it must be sound to convert it to a +/// `$repr` pointer with the same address, provenance, and pointer metadata, +/// and to subsequently dereference that pointer as a `&'a $repr`. +/// - The impl of `is_bit_valid` must only return `true` for its argument +/// `Ptr<$repr>` if the original `Ptr<$ty>` refers to a valid `$ty`. macro_rules! unsafe_impl { // Implement `$trait` for `$ty` with no bounds. - ($ty:ty: $trait:ty) => { - unsafe impl $trait for $ty { #[allow(clippy::missing_inline_in_public_items)] fn only_derive_is_allowed_to_implement_this_trait() {} } + ($ty:ty: $trait:ident $(; |$candidate:ident: &$repr:ty| $is_bit_valid:expr)?) => { + unsafe impl $trait for $ty { + unsafe_impl!(@method $trait $(; |$candidate: &$repr| $is_bit_valid)?); + } }; // Implement all `$traits` for `$ty` with no bounds. - ($ty:ty: $($traits:ty),*) => { + ($ty:ty: $($traits:ident),*) => { $( unsafe_impl!($ty: $traits); )* }; // This arm is identical to the following one, except it contains a @@ -66,36 +95,74 @@ macro_rules! unsafe_impl { ( const $constname:ident : $constty:ident $(,)? $($tyvar:ident $(: $(? $optbound:ident $(+)?)* $($bound:ident $(+)?)* )?),* - => $trait:ident for $ty:ty + => $trait:ident for $ty:ty $(; |$candidate:ident $(: &$ref_repr:ty)? $(: Ptr<$ptr_repr:ty>)?| $is_bit_valid:expr)? ) => { unsafe_impl!( @inner @const $constname: $constty, $($tyvar $(: $(? $optbound +)* + $($bound +)*)?,)* - => $trait for $ty + => $trait for $ty $(; |$candidate $(: &$ref_repr)? $(: Ptr<$ptr_repr>)?| $is_bit_valid)? ); }; ( $($tyvar:ident $(: $(? $optbound:ident $(+)?)* $($bound:ident $(+)?)* )?),* - => $trait:ident for $ty:ty + => $trait:ident for $ty:ty $(; |$candidate:ident $(: &$ref_repr:ty)? $(: Ptr<$ptr_repr:ty>)?| $is_bit_valid:expr)? ) => { unsafe_impl!( @inner $($tyvar $(: $(? $optbound +)* + $($bound +)*)?,)* - => $trait for $ty + => $trait for $ty $(; |$candidate $(: &$ref_repr)? $(: Ptr<$ptr_repr>)?| $is_bit_valid)? ); }; ( @inner $(@const $constname:ident : $constty:ident,)* $($tyvar:ident $(: $(? $optbound:ident +)* + $($bound:ident +)* )?,)* - => $trait:ident for $ty:ty + => $trait:ident for $ty:ty $(; |$candidate:ident $(: &$ref_repr:ty)? $(: Ptr<$ptr_repr:ty>)?| $is_bit_valid:expr)? ) => { unsafe impl<$(const $constname: $constty,)* $($tyvar $(: $(? $optbound +)* $($bound +)*)?),*> $trait for $ty { - #[allow(clippy::missing_inline_in_public_items)] - fn only_derive_is_allowed_to_implement_this_trait() {} + unsafe_impl!(@method $trait $(; |$candidate: $(&$ref_repr)? $(Ptr<$ptr_repr>)?| $is_bit_valid)?); } }; + + (@method TryFromBytes ; |$candidate:ident: &$repr:ty| $is_bit_valid:expr) => { + #[inline] + unsafe fn is_bit_valid(candidate: Ptr<'_, Self>) -> bool { + // SAFETY: + // - The argument to `cast_unsized` is `|p| p as *mut _` as required + // by that method's safety precondition. + // - The caller has promised that the cast results in an object of + // equal or lesser size. + #[allow(clippy::as_conversions)] + let candidate = unsafe { candidate.cast_unsized::<$repr>(|p| p as *mut _) }; + // SAFETY: The caller has promised that, so long as `candidate` + // satisfies the preconditions for `is_bit_valid`, it is valid to + // convert it to a reference with the same lifetime as `candidate`. + let $candidate: &$repr = unsafe { candidate.as_ref() }; + $is_bit_valid + } + }; + (@method TryFromBytes ; |$candidate:ident: Ptr<$repr:ty>| $is_bit_valid:expr) => { + #[inline] + unsafe fn is_bit_valid(candidate: Ptr<'_, Self>) -> bool { + // SAFETY: + // - The argument to `cast_unsized` is `|p| p as *mut _` as required + // by that method's safety precondition. + // - The caller has promised that the cast results in an object of + // equal or lesser size. + #[allow(clippy::as_conversions)] + let $candidate = unsafe { candidate.cast_unsized::<$repr>(|p| p as *mut _) }; + $is_bit_valid + } + }; + (@method TryFromBytes) => { #[inline(always)] unsafe fn is_bit_valid(_: Ptr<'_, Self>) -> bool { true } }; + (@method $trait:ident) => { + #[allow(clippy::missing_inline_in_public_items)] + fn only_derive_is_allowed_to_implement_this_trait() {} + }; + (@method $trait:ident; |$_candidate:ident $(: &$_ref_repr:ty)? $(: NonNull<$_ptr_repr:ty>)?| $_is_bit_valid:expr) => { + compile_error!("Can't provide `is_bit_valid` impl for trait other than `TryFromBytes`"); + }; } /// Implements trait(s) for a type or verifies the given implementation by @@ -204,11 +271,24 @@ macro_rules! impl_known_layout { }; ($($ty:ty),*) => { $(impl_known_layout!(@inner , => $ty);)* }; (@inner $(const $constvar:ident : $constty:ty)? , $($tyvar:ident $(: ?$optbound:ident)?)? => $ty:ty) => { - impl<$(const $constvar : $constty,)? $($tyvar $(: ?$optbound)?)?> sealed::KnownLayoutSealed for $ty {} - // SAFETY: Delegates safety to `DstLayout::for_type`. - unsafe impl<$(const $constvar : $constty,)? $($tyvar $(: ?$optbound)?)?> KnownLayout for $ty { - const LAYOUT: DstLayout = DstLayout::for_type::<$ty>(); - } + const _: () = { + use core::ptr::NonNull; + + impl<$(const $constvar : $constty,)? $($tyvar $(: ?$optbound)?)?> sealed::KnownLayoutSealed for $ty {} + // SAFETY: Delegates safety to `DstLayout::for_type`. + unsafe impl<$(const $constvar : $constty,)? $($tyvar $(: ?$optbound)?)?> KnownLayout for $ty { + const LAYOUT: DstLayout = DstLayout::for_type::<$ty>(); + + // SAFETY: `.cast` preserves address and provenance. + // + // TODO(#429): Add documentation to `.cast` that promises that + // it preserves provenance. + #[inline(always)] + fn raw_from_ptr_len(bytes: NonNull, _elems: usize) -> NonNull { + bytes.cast::() + } + } + }; }; } @@ -225,10 +305,28 @@ macro_rules! impl_known_layout { /// and this operation must preserve referent size (ie, `size_of_val_raw`). macro_rules! unsafe_impl_known_layout { ($($tyvar:ident: ?Sized + KnownLayout =>)? #[repr($repr:ty)] $ty:ty) => { - impl<$($tyvar: ?Sized + KnownLayout)?> sealed::KnownLayoutSealed for $ty {} - unsafe impl<$($tyvar: ?Sized + KnownLayout)?> KnownLayout for $ty { - const LAYOUT: DstLayout = <$repr as KnownLayout>::LAYOUT; - } + const _: () = { + use core::ptr::NonNull; + + impl<$($tyvar: ?Sized + KnownLayout)?> sealed::KnownLayoutSealed for $ty {} + unsafe impl<$($tyvar: ?Sized + KnownLayout)?> KnownLayout for $ty { + const LAYOUT: DstLayout = <$repr as KnownLayout>::LAYOUT; + + // SAFETY: All operations preserve address and provenance. + // Caller has promised that the `as` cast preserves size. + // + // TODO(#429): Add documentation to `NonNull::new_unchecked` + // that it preserves provenance. + #[inline(always)] + #[allow(unused_qualifications)] // for `core::ptr::NonNull` + fn raw_from_ptr_len(bytes: NonNull, elems: usize) -> NonNull { + #[allow(clippy::as_conversions)] + let ptr = <$repr>::raw_from_ptr_len(bytes, elems).as_ptr() as *mut Self; + // SAFETY: `ptr` was converted from `bytes`, which is non-null. + unsafe { NonNull::new_unchecked(ptr) } + } + } + }; }; } diff --git a/src/util.rs b/src/util.rs index 38e4887144..a2a41df2dd 100644 --- a/src/util.rs +++ b/src/util.rs @@ -5,7 +5,387 @@ #[path = "third_party/rust/layout.rs"] pub(crate) mod core_layout; -use core::{mem, num::NonZeroUsize}; +use core::{ + fmt::{Debug, Formatter}, + marker::PhantomData, + mem, + num::NonZeroUsize, + ptr::NonNull, +}; + +use crate::{CastType, KnownLayout}; + +// For each polyfill, as soon as the corresponding feature is stable, the +// polyfill import will be unused because method/function resolution will prefer +// the inherent method/function over a trait method/function. Thus, we suppress +// the `unused_imports` warning. +// +// See the documentation on `util::polyfills` for more information. +#[allow(unused_imports)] +use crate::util::polyfills::NonNullSliceExt as _; + +/// A raw pointer with more restrictions. +/// +/// `Ptr` is similar to `NonNull`, but it is more restrictive in the +/// following ways: +/// - It must derive from a valid allocation +/// - It must reference a byte range which is contained inside the allocation +/// from which it derives +/// - As a consequence, the byte range it references must have a size which +/// does not overflow `isize` +/// - It must satisfy `T`'s alignment requirement +/// +/// Thanks to these restrictions, it is easier to prove the soundness of some +/// operations using `Ptr`s. +/// +/// `Ptr<'a, T>` is [covariant] in `'a` and `T`. +/// +/// [covariant]: https://doc.rust-lang.org/reference/subtyping.html +pub struct Ptr<'a, T: 'a + ?Sized> { + // INVARIANTS: + // - `ptr` is derived from some valid Rust allocation, `A` + // - `ptr` has the same provenance as `A` + // - `ptr` addresses a byte range which is entirely contained in `A` + // - `ptr` addresses a byte range which is not longer than `isize::MAX` + // - `ptr` addresses a byte range which does not wrap around the address + // space + // - `ptr` is validly-aligned for `T` + // - `A` is guaranteed to live for at least `'a` + // - `T: 'a` + ptr: NonNull, + _lifetime: PhantomData<&'a ()>, +} + +impl<'a, T: ?Sized> Copy for Ptr<'a, T> {} +impl<'a, T: ?Sized> Clone for Ptr<'a, T> { + fn clone(&self) -> Self { + *self + } +} + +impl<'a, T: ?Sized> Ptr<'a, T> { + /// Returns a shared reference to the value. + /// + /// # Safety + /// + /// TODO(#29), TODO(#429): What is the right way to articulate the safety + /// invariant here? I can see two possible approaches: + /// - Mimic the invariants on [`NonNull::as_ref`] so that it's easy to write + /// the safety comment on the inner call to `self.ptr.as_ref()`. + /// - Simply say that it's the caller's responsibility to ensure that the + /// resulting reference is valid. + /// + /// These two approaches should in principle be equivalent, but since our + /// memory model is undefined, there are some subtleties here. See, e.g.: + /// + /// + /// # Old draft of Safety section + /// + /// - The referenced memory must contain a validly-initialized `T` for the + /// duration of `'a`. Note that this requires that any interior mutation + /// (i.e. via [`UnsafeCell`]) performed after this method call leave the + /// memory region always containing a valid `T`. + /// - The referenced memory must not also by referenced by any mutable + /// references during the lifetime `'a`. + /// - There must not exist any references to the same memory region which + /// contain `UnsafeCell`s at byte ranges which are not identical to the + /// byte ranges at which `T` contains `UnsafeCell`s. + /// + /// TODO: What about reads/mutation via raw pointers? Presumably these can + /// happen under the following conditions: + /// - Mutation only occurs inside `UnsafeCell`s + /// - Reads only happen using `UnsafeCell`s in places where there are + /// `UnsafeCell`s in `T` (otherwise, those reads could be unsound due to + /// assuming no concurrent mutation) + /// + /// [`UnsafeCell`]: core::cell::UnsafeCell + pub(crate) unsafe fn as_ref(&self) -> &'a T { + // TODO(#429): Add a safety comment. This will depend on how we resolve + // the question about how to define the safety invariants on this + // method. + // + // Old draft of safety comment: + // - By invariant, `self.ptr` is properly-aligned for `T`. + // - By invariant, `self.ptr` is "dereferenceable" in that it points to + // a single allocation + // - By invariant, the allocation is live for `'a` + // - The caller promises that no mutable references exist to this region + // during `'a` + // - The caller promises that `UnsafeCell`s match exactly + // - The caller promises that the memory region contains a + // validly-intialized `T` + #[allow(clippy::undocumented_unsafe_blocks)] + unsafe { + self.ptr.as_ref() + } + } + + /// Returns a unique reference to the value. + /// + /// # Safety + /// + /// - The referenced memory must contain a validly-initialized `T`. + /// - The referenced memory must not also be referenced by any other + /// references during the lifetime `'a`. + /// + /// [`UnsafeCell`]: core::cell::UnsafeCell + pub(crate) unsafe fn as_mut(&mut self) -> &'a mut T { + // SAFETY: + // - By invariant, `self.ptr` is properly-aligned for `T`. + // - By invariant, `self.ptr` is "dereferenceable" in that it points to + // a single allocation + // - By invariant, the allocation is live for `'a` + // - The caller promises that no other references exist to this memory + // region for `'a` + // - The caller promises that the memory region contains a + // validly-intialized `T` + unsafe { self.ptr.as_mut() } + } +} + +impl<'a, T: 'a + ?Sized> Ptr<'a, T> { + /// TODO + /// + /// # Safety + /// + /// The caller promises that, given `t: *mut T` and `u: *mut U` with the + /// same pointer metadata, `u` references an object which is less than or + /// equal to the size of the object referenced by `t`. + pub(crate) unsafe fn cast(self) -> Ptr<'a, U> { + // SAFETY: We pass a vanilla `as` cast for the `cast` argument as + // required. The caller is responsible for guaranteeing the size + // relationship. + unsafe { self.cast_unsized(|p| p as *mut _) } + } + + /// TODO + /// + /// # Safety + /// + /// The caller promises that + /// - `cast(p)` is implemented exactly as follows: `|p: *mut T| p as *mut U` + /// - The size of the object referenced by the resulting pointer is less + /// than or equal to the size of the object referenced by `self` + pub(crate) unsafe fn cast_unsized( + self, + cast: impl FnOnce(*mut T) -> *mut U, + ) -> Ptr<'a, U> { + let ptr = cast(self.ptr.as_ptr()); + // SAFETY: Caller promises that `cast` is just an `as` cast. We call + // `cast` on `self.ptr.as_ptr()`, which is non-null by construction. + let ptr = unsafe { NonNull::new_unchecked(ptr) }; + // SAFETY: TODO + Ptr { ptr, _lifetime: PhantomData } + } +} + +impl<'a, T: 'a> Ptr<'a, [T]> { + pub(crate) fn len(&self) -> usize { + // TODO(#67): Remove this allow. See NonNullSliceExt for more details. + #[allow(unstable_name_collisions)] + self.ptr.len() + } + + pub(crate) fn iter(&self) -> impl Iterator> { + let base = self.ptr.cast::().as_ptr(); + (0..self.len()).map(move |i| { + // TODO(https://github.com/rust-lang/rust/issues/74265): Use + // `NonNull::get_unchecked_mut`. + + // SAFETY: TODO + // + // Old safety comment: + // + // SAFETY: + // - `i` is in bounds of `c.len()` by construction, and so the + // result of this addition cannot overflow past the end of the + // allocation referred to by `c`. + // - It is a precondition of `is_bit_valid` that the total length + // encoded by `c` doesn't overflow `isize`. + // - Since `c` must point to a valid allocation, and valid + // allocations cannot wrap around the address space, we know that + // this addition will not wrap around either. + let elem = unsafe { base.add(i) }; + // SAFETY: TODO + // + // Old safety comment: + // + // SAFETY: `base` is constructed from a `NonNull` pointer, and the + // addition that produces `elem` is guaranteed not to wrap + // around/overflow, so `elem >= base > 0`. + let elem = unsafe { NonNull::new_unchecked(elem) }; + // SAFETY: TODO + Ptr { ptr: elem, _lifetime: PhantomData } + }) + } +} + +impl<'a, const N: usize, T: 'a> Ptr<'a, [T; N]> { + pub(crate) fn as_slice(&self) -> Ptr<'a, [T]> { + let ptr = NonNull::slice_from_raw_parts(self.ptr.cast::(), N); + // SAFETY: TODO + Ptr { ptr, _lifetime: PhantomData } + } +} + +impl<'a> Ptr<'a, [u8]> { + /// Attempts to cast `self` to a `U` using the given cast type. + /// + /// Returns `None` if the resulting `U` would be invalidly-aligned or if no + /// `U` can fit in `self`. On success, returns a pointer to the + /// largest-possible `U` which fits in `self`. + /// + /// # Safety + /// + /// The caller may assume that this implementation is correct, and may rely + /// on that assumption for the soundness of their code. In particular, the + /// caller may assume that, if `try_cast_into` returns `Some((ptr, + /// split_at))`, then: + /// - If this is a prefix cast, `ptr` refers to the byte range `[0, + /// split_at)` in `self`. + /// - If this is a suffix cast, `ptr` refers to the byte range `[split_at, + /// self.len())` in `self`. + /// + /// # Panics + /// + /// Panics if `U` is a DST whose trailing slice element is zero-sized. + pub(crate) fn try_cast_into( + &self, + cast_type: CastType, + ) -> Option<(Ptr<'a, U>, usize)> { + // PANICS: By invariant, the byte range addressed by `self.ptr` does not + // wrap around the address space. This implies that the sum of the + // address (represented as a `usize`) and length do not overflow + // `usize`, as required by `validate_cast_and_convert_metadata`. Thus, + // this call to `validate_cast_and_convert_metadata` won't panic. + let (elems, split_at) = U::LAYOUT.validate_cast_and_convert_metadata( + AsAddress::addr(self.ptr.as_ptr()), + self.len(), + cast_type, + )?; + let offset = match cast_type { + CastType::Prefix => 0, + CastType::_Suffix => split_at, + }; + + let ptr = self.ptr.cast::().as_ptr(); + // SAFETY: `offset` is either `0` or `split_at`. + // `validate_cast_and_convert_metadata` promises that `split_at` is in + // the range `[0, bytes_len)`, where `bytes_len` is the length argument + // to `validate_cast_and_convert_metadata`. Thus, in both cases, + // `offset` is in `[0, bytes_len)`. For `bytes_len`, we pass the length + // of `self`. Thus: + // - The resulting pointer is in or one byte past the end of the same + // byte range as `self.ptr`. Since, by invariant, `self.ptr` addresses + // a byte range entirely contained within a single allocation, the + // pointer resulting from this operation is within or one byte past + // the end of that same allocation. + // - By invariant, `bytes_len <= isize::MAX`. Since `offset <= + // bytes_len`, `offset <= isize::MAX`. + // - By invariant, `self.ptr` addresses a byte range which does not wrap + // around the address space. This means that the base pointer plus the + // `bytes_len` does not overflow `usize`. Since `offset <= bytes_len`, + // this addition does not overflow `usize`. + let base = unsafe { ptr.add(offset) }; + // SAFETY: Since `add` is not allowed to wrap around, the preceding line + // produces a pointer whose address is greater than or equal to that of + // `ptr`. Since `ptr` is a `NonNull`, `base` is also non-null. + let base = unsafe { NonNull::new_unchecked(base) }; + let ptr = U::raw_from_ptr_len(base, elems); + // SAFETY: + // - By invariant, `self.ptr` is derived from some valid Rust + // allocation, `A`, and has the same provenance as `A`. All operations + // performed on `self.ptr` and values derived from it in this method + // preserve provenance, so: + // - `ptr` is derived from a valid Rust allocation, `A`. + // - `ptr` has the same provenance as `A`. + // - `validate_cast_and_convert_metadata` promises that the object + // described by `elems` and `split_at` lives at a byte range which is + // a subset of the input byte range. Thus: + // - Since, by invariant, `self.ptr` addresses a byte range entirely + // contained in `A`, so does `ptr`. + // - Since, by invariant, `self.ptr` addresses a range not longer than + // `isize::MAX` bytes, so does `ptr`. + // - Since, by invariant, `self.ptr` addresses a range which does not + // wrap around the address space, so does `ptr`. + // - `validate_cast_and_convert_metadata` promises that the object + // described by `split_at` is validly-aligned for `U`. + // - By invariant on `self`, `A` is guaranteed to live for at least + // `'a`. + // - `U: 'a` by trait bound. + Some((Ptr { ptr, _lifetime: PhantomData }, split_at)) + } + + /// Attempts to cast `self` into a `U`, failing if all of the bytes of + /// `self` cannot be treated as a `U`. + /// + /// In particular, this method fails if `self` is not validly-aligned for + /// `U` or if `self`'s size is not a valid size for `U`. + /// + /// # Safety + /// + /// On success, the caller may assume that the returned pointer references + /// the same byte range as `self`. + #[doc(hidden)] + #[inline(always)] + pub(crate) fn try_cast_into_no_leftover( + &self, + ) -> Option> { + // TODO(#67): Remove this allow. See NonNulSlicelExt for more details. + #[allow(unstable_name_collisions)] + match self.try_cast_into(CastType::Prefix) { + Some((slf, split_at)) if split_at == self.len() => Some(slf), + Some(_) | None => None, + } + } +} + +impl<'a, T: 'a + ?Sized> From<&'a T> for Ptr<'a, T> { + #[inline(always)] + fn from(t: &'a T) -> Ptr<'a, T> { + // SAFETY: `t` points to a valid Rust allocation, `A`, by construction. + // Thus: + // - `ptr` is derived from `A` + // - Since we use `NonNull::from`, which preserves provenance, `ptr` has + // the same provenance as `A` + // - Since `NonNull::from` creates a pointer which addresses the same + // bytes as `t`, `ptr` addresses a byte range entirely contained in + // (in this case, identical to) `A` + // - Since `t: &T`, it addresses no more than `isize::MAX` bytes. [1] + // - Since `t: &T`, it addresses a byte range which does not wrap around + // the address space. [2] + // - Since it is constructed from a valid `&T`, `ptr` is validly-aligned + // for `T` + // - Since `t: &'a T`, the allocation `A` is guaranteed to live for at + // least `'a` + // - `T: 'a` by trait bound + // + // TODO(#429), TODO(https://github.com/rust-lang/rust/issues/116181): + // Once it's documented, reference the guarantee that `NonNull::from` + // preserves provenance. + // + // TODO(#429), + // TODO(https://github.com/rust-lang/unsafe-code-guidelines/issues/465): + // - [1] Where does the reference document that allocations fit in + // `isize`? + // - [2] Where does the reference document that allocations don't wrap + // around the address space? + Ptr { ptr: NonNull::from(t), _lifetime: PhantomData } + } +} + +impl<'a, T: 'a + ?Sized> From<&'a mut T> for Ptr<'a, T> { + #[inline(always)] + fn from(t: &'a mut T) -> Ptr<'a, T> { + Ptr::from(&*t) + } +} + +impl<'a, T: 'a + ?Sized> Debug for Ptr<'a, T> { + fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { + self.ptr.fmt(f) + } +} pub(crate) trait AsAddress { fn addr(self) -> usize; @@ -64,7 +444,7 @@ pub(crate) fn aligned_to(t: T) -> bool { /// May panic if `align` is not a power of two. Even if it doesn't panic in this /// case, it will produce nonsense results. #[inline(always)] -pub(crate) const fn _round_down_to_next_multiple_of_alignment( +pub(crate) const fn round_down_to_next_multiple_of_alignment( n: usize, align: NonZeroUsize, ) -> usize { @@ -77,6 +457,61 @@ pub(crate) const fn _round_down_to_next_multiple_of_alignment( n & mask } +/// Since we support multiple versions of Rust, there are often features which +/// have been stabilized in the most recent stable release which do not yet +/// exist (stably) on our MSRV. This module provides polyfills for those +/// features so that we can write more "modern" code, and just remove the +/// polyfill once our MSRV supports the corresponding feature. Without this, +/// we'd have to write worse/more verbose code and leave TODO comments sprinkled +/// throughout the codebase to update to the new pattern once it's stabilized. +/// +/// Each trait is imported as `_` at the crate root; each polyfill should "just +/// work" at usage sites. +pub(crate) mod polyfills { + use core::ptr::{self, NonNull}; + + // A polyfill for `NonNull::slice_from_raw_parts` that we can use before our + // MSRV is 1.70, when that function was stabilized. + // + // TODO(#67): Once our MSRV is 1.70, remove this. + pub(crate) trait NonNullExt { + fn slice_from_raw_parts(data: Self, len: usize) -> NonNull<[T]>; + } + + impl NonNullExt for NonNull { + #[inline(always)] + fn slice_from_raw_parts(data: Self, len: usize) -> NonNull<[T]> { + let ptr = ptr::slice_from_raw_parts_mut(data.as_ptr(), len); + // SAFETY: `ptr` is converted from `data`, which is non-null. + unsafe { NonNull::new_unchecked(ptr) } + } + } + + // A polyfill for `NonNull::len` that we can use before our MSRV is 1.63, + // when that function was stabilized. + // + // TODO(#67): Once our MSRV is 1.63, remove this. + pub(crate) trait NonNullSliceExt { + fn len(&self) -> usize; + } + + impl NonNullSliceExt for NonNull<[T]> { + #[inline(always)] + fn len(&self) -> usize { + #[allow(clippy::as_conversions)] + let slc = self.as_ptr() as *const [()]; + // SAFETY: + // - `()` has alignment 1, so `slc` is trivially aligned + // - `slc` was derived from a non-null pointer + // - the size is 0 regardless of the length, so it is sound to + // materialize a reference regardless of location + // - pointer provenance may be an issue, but we never dereference + let slc = unsafe { &*slc }; + slc.len() + } + } +} + #[cfg(test)] pub(crate) mod testutil { use core::fmt::{self, Display, Formatter}; @@ -130,7 +565,142 @@ pub(crate) mod testutil { #[cfg(test)] mod tests { + use core::mem::MaybeUninit; + use super::*; + use crate::{util::testutil::AU64, FromBytes}; + + #[test] + fn test_ptrtry_cast_into_soundness() { + // This test is designed so that if `Ptr::try_cast_into_xxx` are buggy, + // it will manifest as unsoundness that Miri can detect. + + // - If `size_of::() == 0`, `N == 4` + // - Else, `N == 4 * size_of::()` + fn test() { + let mut bytes = [MaybeUninit::::uninit(); N]; + let initialized = [MaybeUninit::new(0u8); N]; + for start in 0..=bytes.len() { + for end in start..=bytes.len() { + // Set all bytes to uninitialized other than those in the + // range we're going to pass to `try_cast_from`. This allows + // Miri to detect out-of-bounds reads because they read + // uninitialized memory. Without this, some out-of-bounds + // reads would still be in-bounds of `bytes`, and so might + // spuriously be accepted. + bytes = [MaybeUninit::::uninit(); N]; + let bytes = &mut bytes[start..end]; + // Initialize only the byte range we're going to pass to + // `try_cast_from`. + bytes.copy_from_slice(&initialized[start..end]); + + let bytes = { + let bytes: *const [MaybeUninit] = bytes; + #[allow(clippy::as_conversions)] + let bytes = bytes as *const [u8]; + // SAFETY: We just initialized these bytes to valid + // `u8`s. + unsafe { &*bytes } + }; + + /// # Safety + /// + /// - `slf` must reference a byte range which is entirely + /// initialized. + /// - `slf` must reference a byte range which is only + /// referenced by shared references which do not contain + /// `UnsafeCell`s during its lifetime. + unsafe fn validate_and_get_len( + slf: Ptr<'_, T>, + ) -> usize { + // TODO(#429): Update this safety comment once + // `as_ref`'s safety invariants are well-defined. + // + // Old draft safety comment: + // - The caller has promised that all bytes referenced + // by `slf` are initialized. Since `T: FromBytes`, + // those bytes constitute a valid `T`. + // - The caller has promised that no mutable references + // exist to the same memory during the duration of + // this function call. + // - The caller has promised that no `UnsafeCell` + // references exist to the same memory during the + // duration of this function call. + #[allow(clippy::undocumented_unsafe_blocks)] + let t = unsafe { slf.as_ref() }; + + let bytes = { + let len = mem::size_of_val(t); + let t: *const T = t; + // SAFETY: + // - We know `t`'s bytes are all initialized + // because we just read it from `slf`, which + // points to an initialized range of bytes. If + // there's a bug and this doesn't hold, then + // that's exactly what we're hoping Miri will + // catch! + // - Since `T: FromBytes`, `T` doesn't contain + // any `UnsafeCell`s, so it's okay for `t: T` + // and a `&[u8]` to the same memory to be + // alive concurrently. + unsafe { core::slice::from_raw_parts(t.cast::(), len) } + }; + + // This assertion ensures that `t`'s bytes are read + // and compared to another value, which in turn + // ensures that Miri gets a chance to notice if any + // of `t`'s bytes are uninitialized, which they + // shouldn't be (see the comment above). + assert_eq!(bytes, vec![0u8; bytes.len()]); + + mem::size_of_val(t) + } + + for cast_type in [CastType::Prefix, CastType::_Suffix] { + if let Some((slf, split_at)) = + Ptr::from(bytes).try_cast_into::(cast_type) + { + // SAFETY: All bytes in `bytes` have been + // initialized. + let len = unsafe { validate_and_get_len(slf) }; + match cast_type { + CastType::Prefix => assert_eq!(split_at, len), + CastType::_Suffix => assert_eq!(split_at, bytes.len() - len), + } + } + } + + if let Some(slf) = Ptr::from(bytes).try_cast_into_no_leftover::() { + // SAFETY: All bytes in `bytes` have been initialized. + let len = unsafe { validate_and_get_len(slf) }; + assert_eq!(len, bytes.len()); + } + } + } + } + + macro_rules! test { + ($($ty:ty),*) => { + $({ + const S: usize = core::mem::size_of::<$ty>(); + const N: usize = if S == 0 { 4 } else { S * 4 }; + test::(); + // We don't support casting into DSTs whose trailing slice + // element is a ZST. + if S > 0 { + test::(); + } + // TODO: Test with a slice DST once we have any that + // implement `KnownLayout + FromBytes`. + })* + }; + } + + test!(()); + test!(u8, u16, u32, u64, u128, usize, AU64); + test!(i8, i16, i32, i64, i128, isize); + test!(f32, f64); + } #[test] fn test_round_down_to_next_multiple_of_alignment() { @@ -143,7 +713,7 @@ mod tests { for n in 0..256 { let align = NonZeroUsize::new(align).unwrap(); let want = alt_impl(n, align); - let got = _round_down_to_next_multiple_of_alignment(n, align); + let got = round_down_to_next_multiple_of_alignment(n, align); assert_eq!(got, want, "round_down_to_next_multiple_of_alignment({n}, {align})"); } } @@ -167,7 +737,7 @@ mod proofs { let n: usize = kani::any(); let expected = model_impl(n, align); - let actual = _round_down_to_next_multiple_of_alignment(n, align); + let actual = round_down_to_next_multiple_of_alignment(n, align); assert_eq!(expected, actual, "round_down_to_next_multiple_of_alignment({n}, {align})"); }