Skip to content

Commit 64b7e57

Browse files
committed
fill_via_chunks: make a generic function
1 parent 9684ebf commit 64b7e57

File tree

1 file changed

+54
-30
lines changed

1 file changed

+54
-30
lines changed

rand_core/src/impls.rs

Lines changed: 54 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -52,36 +52,60 @@ pub fn fill_bytes_via_next<R: RngCore + ?Sized>(rng: &mut R, dest: &mut [u8]) {
5252
}
5353
}
5454

55-
macro_rules! fill_via_chunks {
56-
($src:expr, $dst:expr, $ty:ty) => {{
57-
const SIZE: usize = core::mem::size_of::<$ty>();
58-
let chunk_size_u8 = min($src.len() * SIZE, $dst.len());
59-
let chunk_size = (chunk_size_u8 + SIZE - 1) / SIZE;
60-
61-
if cfg!(target_endian = "little") {
62-
// On LE we can do a simple copy, which is 25-50% faster:
63-
unsafe {
64-
core::ptr::copy_nonoverlapping(
65-
$src.as_ptr() as *const u8,
66-
$dst.as_mut_ptr(),
67-
chunk_size_u8);
68-
}
69-
} else {
70-
// This code is valid on all arches, but slower than the above:
71-
let mut i = 0;
72-
let mut iter = $dst[..chunk_size_u8].chunks_exact_mut(SIZE);
73-
while let Some(chunk) = iter.next() {
74-
chunk.copy_from_slice(&$src[i].to_le_bytes());
75-
i += 1;
76-
}
77-
let chunk = iter.into_remainder();
78-
if !chunk.is_empty() {
79-
chunk.copy_from_slice(&$src[i].to_le_bytes()[..chunk.len()]);
80-
}
55+
trait ToLe: Copy {
56+
type Bytes: AsRef<[u8]>;
57+
fn to_le_bytes(self) -> Self::Bytes;
58+
}
59+
impl ToLe for u32 {
60+
type Bytes = [u8; 4];
61+
fn to_le_bytes(self) -> Self::Bytes {
62+
self.to_le_bytes()
63+
}
64+
}
65+
impl ToLe for u64 {
66+
type Bytes = [u8; 8];
67+
fn to_le_bytes(self) -> Self::Bytes {
68+
self.to_le_bytes()
69+
}
70+
}
71+
72+
fn fill_via_chunks<T: ToLe>(src: &[T], dest: &mut [u8]) -> (usize, usize) {
73+
let size = core::mem::size_of::<T>();
74+
let chunk_size_u8 = min(src.len() * size, dest.len());
75+
let chunk_size = (chunk_size_u8 + size - 1) / size;
76+
77+
if cfg!(target_endian = "little") {
78+
// On LE we can do a simple copy, which is 25-50% faster:
79+
unsafe {
80+
core::ptr::copy_nonoverlapping(
81+
src.as_ptr() as *const u8,
82+
dest.as_mut_ptr(),
83+
chunk_size_u8,
84+
);
8185
}
86+
} else {
87+
// This code is valid on all arches, but slower than the above:
88+
let mut i = 0;
89+
let mut iter = dest[..chunk_size_u8].chunks_exact_mut(size);
90+
while let Some(chunk) = iter.next() {
91+
chunk.copy_from_slice(src[i].to_le_bytes().as_ref());
92+
i += 1;
93+
}
94+
let chunk = iter.into_remainder();
95+
if !chunk.is_empty() {
96+
chunk.copy_from_slice(&src[i].to_le_bytes().as_ref()[..chunk.len()]);
97+
}
98+
}
99+
100+
unsafe {
101+
core::ptr::copy_nonoverlapping(
102+
src.as_ptr() as *const u8,
103+
dest.as_mut_ptr(),
104+
chunk_size_u8,
105+
);
106+
}
82107

83-
(chunk_size, chunk_size_u8)
84-
}};
108+
(chunk_size, chunk_size_u8)
85109
}
86110

87111
/// Implement `fill_bytes` by reading chunks from the output buffer of a block
@@ -115,7 +139,7 @@ macro_rules! fill_via_chunks {
115139
/// }
116140
/// ```
117141
pub fn fill_via_u32_chunks(src: &[u32], dest: &mut [u8]) -> (usize, usize) {
118-
fill_via_chunks!(src, dest, u32)
142+
fill_via_chunks(src, dest)
119143
}
120144

121145
/// Implement `fill_bytes` by reading chunks from the output buffer of a block
@@ -129,7 +153,7 @@ pub fn fill_via_u32_chunks(src: &[u32], dest: &mut [u8]) -> (usize, usize) {
129153
///
130154
/// See `fill_via_u32_chunks` for an example.
131155
pub fn fill_via_u64_chunks(src: &[u64], dest: &mut [u8]) -> (usize, usize) {
132-
fill_via_chunks!(src, dest, u64)
156+
fill_via_chunks(src, dest)
133157
}
134158

135159
/// Implement `next_u32` via `fill_bytes`, little-endian order.

0 commit comments

Comments
 (0)