Skip to content

Commit 3057641

Browse files
authored
Remove zerocopy from rand (#1579)
Replace `zerocopy` dependency with `unsafe` code. Add benchmarks for some SIMD / wide types. Document all `unsafe` code.
1 parent f812984 commit 3057641

File tree

9 files changed

+183
-49
lines changed

9 files changed

+183
-49
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ A [separate changelog is kept for rand_core](rand_core/CHANGELOG.md).
99
You may also find the [Upgrade Guide](https://rust-random.github.io/book/update.html) useful.
1010

1111
## [Unreleased]
12+
- Remove `zerocopy` dependency (#1579)
1213
- Fix feature `simd_support` for recent nightly rust (#1586)
1314
- Add `Alphabetic` distribution. (#1587)
1415
- Re-export `rand_core` (#1602)

Cargo.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ alloc = []
4343
os_rng = ["rand_core/os_rng"]
4444

4545
# Option (requires nightly Rust): experimental SIMD support
46-
simd_support = ["zerocopy/simd-nightly"]
46+
simd_support = []
4747

4848
# Option (enabled by default): enable StdRng
4949
std_rng = ["dep:rand_chacha"]
@@ -75,7 +75,6 @@ rand_core = { path = "rand_core", version = "0.9.0", default-features = false }
7575
log = { version = "0.4.4", optional = true }
7676
serde = { version = "1.0.103", features = ["derive"], optional = true }
7777
rand_chacha = { path = "rand_chacha", version = "0.9.0", default-features = false, optional = true }
78-
zerocopy = { version = "0.8.0", default-features = false, features = ["simd"] }
7978

8079
[dev-dependencies]
8180
rand_pcg = { path = "rand_pcg", version = "0.9.0" }

benches/Cargo.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ publish = false
88
# Option (requires nightly Rust): experimental SIMD support
99
simd_support = ["rand/simd_support"]
1010

11-
1211
[dependencies]
1312

1413
[dev-dependencies]
@@ -38,6 +37,10 @@ harness = false
3837
name = "shuffle"
3938
harness = false
4039

40+
[[bench]]
41+
name = "simd"
42+
harness = false
43+
4144
[[bench]]
4245
name = "standard"
4346
harness = false

benches/benches/simd.rs

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
// Copyright 2018-2023 Developers of the Rand project.
2+
//
3+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4+
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5+
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
6+
// option. This file may not be copied, modified, or distributed
7+
// except according to those terms.
8+
9+
//! Generating SIMD / wide types
10+
11+
#![cfg_attr(feature = "simd_support", feature(portable_simd))]
12+
13+
use criterion::{criterion_group, criterion_main, Criterion};
14+
15+
criterion_group!(
16+
name = benches;
17+
config = Criterion::default();
18+
targets = simd
19+
);
20+
criterion_main!(benches);
21+
22+
#[cfg(not(feature = "simd_support"))]
23+
pub fn simd(_: &mut Criterion) {}
24+
25+
#[cfg(feature = "simd_support")]
26+
pub fn simd(c: &mut Criterion) {
27+
use rand::prelude::*;
28+
use rand_pcg::Pcg64Mcg;
29+
30+
let mut g = c.benchmark_group("random_simd");
31+
32+
g.bench_function("u128", |b| {
33+
let mut rng = Pcg64Mcg::from_rng(&mut rand::rng());
34+
b.iter(|| rng.random::<u128>());
35+
});
36+
37+
g.bench_function("m128i", |b| {
38+
let mut rng = Pcg64Mcg::from_rng(&mut rand::rng());
39+
b.iter(|| rng.random::<core::arch::x86_64::__m128i>());
40+
});
41+
42+
g.bench_function("m256i", |b| {
43+
let mut rng = Pcg64Mcg::from_rng(&mut rand::rng());
44+
b.iter(|| rng.random::<core::arch::x86_64::__m256i>());
45+
});
46+
47+
g.bench_function("m512i", |b| {
48+
let mut rng = Pcg64Mcg::from_rng(&mut rand::rng());
49+
b.iter(|| rng.random::<core::arch::x86_64::__m512i>());
50+
});
51+
52+
g.bench_function("u64x2", |b| {
53+
let mut rng = Pcg64Mcg::from_rng(&mut rand::rng());
54+
b.iter(|| rng.random::<core::simd::u64x2>());
55+
});
56+
57+
g.bench_function("u32x4", |b| {
58+
let mut rng = Pcg64Mcg::from_rng(&mut rand::rng());
59+
b.iter(|| rng.random::<core::simd::u64x4>());
60+
});
61+
62+
g.bench_function("u32x8", |b| {
63+
let mut rng = Pcg64Mcg::from_rng(&mut rand::rng());
64+
b.iter(|| rng.random::<core::simd::u8x32>());
65+
});
66+
67+
g.bench_function("u16x8", |b| {
68+
let mut rng = Pcg64Mcg::from_rng(&mut rand::rng());
69+
b.iter(|| rng.random::<core::simd::u8x32>());
70+
});
71+
72+
g.bench_function("u8x16", |b| {
73+
let mut rng = Pcg64Mcg::from_rng(&mut rand::rng());
74+
b.iter(|| rng.random::<core::simd::u8x32>());
75+
});
76+
}

rand_core/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
)]
3232
#![deny(missing_docs)]
3333
#![deny(missing_debug_implementations)]
34+
#![deny(clippy::undocumented_unsafe_blocks)]
3435
#![doc(test(attr(allow(unused_variables), deny(warnings))))]
3536
#![cfg_attr(docsrs, feature(doc_auto_cfg))]
3637
#![no_std]

src/distr/integer.rs

Lines changed: 44 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -107,21 +107,50 @@ impl_nzint!(NonZeroI64, NonZeroI64::new);
107107
impl_nzint!(NonZeroI128, NonZeroI128::new);
108108

109109
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
110-
macro_rules! x86_intrinsic_impl {
111-
($meta:meta, $($intrinsic:ident),+) => {$(
112-
#[cfg($meta)]
113-
impl Distribution<$intrinsic> for StandardUniform {
114-
#[inline]
115-
fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> $intrinsic {
116-
// On proper hardware, this should compile to SIMD instructions
117-
// Verified on x86 Haswell with __m128i, __m256i
118-
let mut buf = [0_u8; core::mem::size_of::<$intrinsic>()];
119-
rng.fill_bytes(&mut buf);
120-
// x86 is little endian so no need for conversion
121-
zerocopy::transmute!(buf)
122-
}
123-
}
124-
)+};
110+
impl Distribution<__m128i> for StandardUniform {
111+
#[inline]
112+
fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> __m128i {
113+
// NOTE: It's tempting to use the u128 impl here, but confusingly this
114+
// results in different code (return via rdx, r10 instead of rax, rdx
115+
// with u128 impl) and is much slower (+130 time). This version calls
116+
// impls::fill_bytes_via_next but performs well.
117+
118+
let mut buf = [0_u8; core::mem::size_of::<__m128i>()];
119+
rng.fill_bytes(&mut buf);
120+
// x86 is little endian so no need for conversion
121+
122+
// SAFETY: All byte sequences of `buf` represent values of the output type.
123+
unsafe { core::mem::transmute(buf) }
124+
}
125+
}
126+
127+
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
128+
impl Distribution<__m256i> for StandardUniform {
129+
#[inline]
130+
fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> __m256i {
131+
let mut buf = [0_u8; core::mem::size_of::<__m256i>()];
132+
rng.fill_bytes(&mut buf);
133+
// x86 is little endian so no need for conversion
134+
135+
// SAFETY: All byte sequences of `buf` represent values of the output type.
136+
unsafe { core::mem::transmute(buf) }
137+
}
138+
}
139+
140+
#[cfg(all(
141+
any(target_arch = "x86", target_arch = "x86_64"),
142+
feature = "simd_support"
143+
))]
144+
impl Distribution<__m512i> for StandardUniform {
145+
#[inline]
146+
fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> __m512i {
147+
let mut buf = [0_u8; core::mem::size_of::<__m512i>()];
148+
rng.fill_bytes(&mut buf);
149+
// x86 is little endian so no need for conversion
150+
151+
// SAFETY: All byte sequences of `buf` represent values of the output type.
152+
unsafe { core::mem::transmute(buf) }
153+
}
125154
}
126155

127156
#[cfg(feature = "simd_support")]
@@ -148,24 +177,6 @@ macro_rules! simd_impl {
148177
#[cfg(feature = "simd_support")]
149178
simd_impl!(u8, i8, u16, i16, u32, i32, u64, i64);
150179

151-
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
152-
x86_intrinsic_impl!(
153-
any(target_arch = "x86", target_arch = "x86_64"),
154-
__m128i,
155-
__m256i
156-
);
157-
#[cfg(all(
158-
any(target_arch = "x86", target_arch = "x86_64"),
159-
feature = "simd_support"
160-
))]
161-
x86_intrinsic_impl!(
162-
all(
163-
any(target_arch = "x86", target_arch = "x86_64"),
164-
feature = "simd_support"
165-
),
166-
__m512i
167-
);
168-
169180
#[cfg(test)]
170181
mod tests {
171182
use super::*;

src/distr/other.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ impl Distribution<char> for StandardUniform {
118118
if n <= 0xDFFF {
119119
n -= GAP_SIZE;
120120
}
121+
// SAFETY: We ensure above that `n` represents a `char`.
121122
unsafe { char::from_u32_unchecked(n) }
122123
}
123124
}
@@ -166,9 +167,14 @@ impl Distribution<u8> for Alphabetic {
166167
#[cfg(feature = "alloc")]
167168
impl SampleString for Alphanumeric {
168169
fn append_string<R: Rng + ?Sized>(&self, rng: &mut R, string: &mut String, len: usize) {
170+
// SAFETY: `self` only samples alphanumeric characters, which are valid UTF-8.
169171
unsafe {
170172
let v = string.as_mut_vec();
171-
v.extend(self.sample_iter(rng).take(len));
173+
v.extend(
174+
self.sample_iter(rng)
175+
.take(len)
176+
.inspect(|b| debug_assert!(b.is_ascii_alphanumeric())),
177+
);
172178
}
173179
}
174180
}

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
clippy::neg_cmp_op_on_partial_ord,
6060
clippy::nonminimal_bool
6161
)]
62+
#![deny(clippy::undocumented_unsafe_blocks)]
6263

6364
#[cfg(feature = "alloc")]
6465
extern crate alloc;

src/rng.rs

Lines changed: 48 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
use crate::distr::uniform::{SampleRange, SampleUniform};
1313
use crate::distr::{self, Distribution, StandardUniform};
1414
use core::num::Wrapping;
15+
use core::{mem, slice};
1516
use rand_core::RngCore;
16-
use zerocopy::IntoBytes;
1717

1818
/// User-level interface for RNGs
1919
///
@@ -393,14 +393,36 @@ impl Fill for [u8] {
393393
}
394394
}
395395

396+
/// Call target for unsafe macros
397+
const unsafe fn __unsafe() {}
398+
399+
/// Implement `Fill` for given type `$t`.
400+
///
401+
/// # Safety
402+
/// All bit patterns of `[u8; size_of::<$t>()]` must represent values of `$t`.
396403
macro_rules! impl_fill {
397404
() => {};
398-
($t:ty) => {
405+
($t:ty) => {{
406+
// Force caller to wrap with an `unsafe` block
407+
__unsafe();
408+
399409
impl Fill for [$t] {
400-
#[inline(never)] // in micro benchmarks, this improves performance
401410
fn fill<R: Rng + ?Sized>(&mut self, rng: &mut R) {
402411
if self.len() > 0 {
403-
rng.fill_bytes(self.as_mut_bytes());
412+
let size = mem::size_of_val(self);
413+
rng.fill_bytes(
414+
// SAFETY: `self` non-null and valid for reads and writes within its `size`
415+
// bytes. `self` meets the alignment requirements of `&mut [u8]`.
416+
// The contents of `self` are initialized. Both `[u8]` and `[$t]` are valid
417+
// for all bit-patterns of their contents (note that the SAFETY requirement
418+
// on callers of this macro). `self` is not borrowed.
419+
unsafe {
420+
slice::from_raw_parts_mut(self.as_mut_ptr()
421+
as *mut u8,
422+
size
423+
)
424+
}
425+
);
404426
for x in self {
405427
*x = x.to_le();
406428
}
@@ -409,27 +431,41 @@ macro_rules! impl_fill {
409431
}
410432

411433
impl Fill for [Wrapping<$t>] {
412-
#[inline(never)]
413434
fn fill<R: Rng + ?Sized>(&mut self, rng: &mut R) {
414435
if self.len() > 0 {
415-
rng.fill_bytes(self.as_mut_bytes());
436+
let size = self.len() * mem::size_of::<$t>();
437+
rng.fill_bytes(
438+
// SAFETY: `self` non-null and valid for reads and writes within its `size`
439+
// bytes. `self` meets the alignment requirements of `&mut [u8]`.
440+
// The contents of `self` are initialized. Both `[u8]` and `[$t]` are valid
441+
// for all bit-patterns of their contents (note that the SAFETY requirement
442+
// on callers of this macro). `self` is not borrowed.
443+
unsafe {
444+
slice::from_raw_parts_mut(self.as_mut_ptr()
445+
as *mut u8,
446+
size
447+
)
448+
}
449+
);
416450
for x in self {
417-
*x = Wrapping(x.0.to_le());
451+
*x = Wrapping(x.0.to_le());
418452
}
419453
}
420454
}
421-
}
455+
}}
422456
};
423-
($t:ty, $($tt:ty,)*) => {
457+
($t:ty, $($tt:ty,)*) => {{
424458
impl_fill!($t);
425459
// TODO: this could replace above impl once Rust #32463 is fixed
426460
// impl_fill!(Wrapping<$t>);
427461
impl_fill!($($tt,)*);
428-
}
462+
}}
429463
}
430464

431-
impl_fill!(u16, u32, u64, u128,);
432-
impl_fill!(i8, i16, i32, i64, i128,);
465+
// SAFETY: All bit patterns of `[u8; size_of::<$t>()]` represent values of `u*`.
466+
const _: () = unsafe { impl_fill!(u16, u32, u64, u128,) };
467+
// SAFETY: All bit patterns of `[u8; size_of::<$t>()]` represent values of `i*`.
468+
const _: () = unsafe { impl_fill!(i8, i16, i32, i64, i128,) };
433469

434470
impl<T, const N: usize> Fill for [T; N]
435471
where

0 commit comments

Comments
 (0)