Skip to content

Commit 50b9a44

Browse files
authored
fill_via_chunks: mutate src on BE (small optimisation) (#1182)
* fill_via_chunks: mutate src on BE (small optimisation) * Add doc to fill_via_chunks
1 parent 19169cb commit 50b9a44

File tree

2 files changed

+46
-35
lines changed

2 files changed

+46
-35
lines changed

rand_core/src/block.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ impl<R: BlockRngCore<Item = u32>> RngCore for BlockRng<R> {
223223
self.generate_and_set(0);
224224
}
225225
let (consumed_u32, filled_u8) =
226-
fill_via_u32_chunks(&self.results.as_ref()[self.index..], &mut dest[read_len..]);
226+
fill_via_u32_chunks(&mut self.results.as_mut()[self.index..], &mut dest[read_len..]);
227227

228228
self.index += consumed_u32;
229229
read_len += filled_u8;
@@ -387,7 +387,7 @@ impl<R: BlockRngCore<Item = u64>> RngCore for BlockRng64<R> {
387387
}
388388

389389
let (consumed_u64, filled_u8) = fill_via_u64_chunks(
390-
&self.results.as_ref()[self.index..],
390+
&mut self.results.as_mut()[self.index..],
391391
&mut dest[read_len..],
392392
);
393393

rand_core/src/impls.rs

Lines changed: 44 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -53,16 +53,14 @@ pub fn fill_bytes_via_next<R: RngCore + ?Sized>(rng: &mut R, dest: &mut [u8]) {
5353
}
5454

5555
trait Observable: Copy {
56-
type Bytes: AsRef<[u8]>;
57-
fn to_le_bytes(self) -> Self::Bytes;
56+
fn to_le(self) -> Self;
5857

5958
// Contract: observing self is memory-safe (implies no uninitialised padding)
6059
fn as_byte_slice(x: &[Self]) -> &[u8];
6160
}
6261
impl Observable for u32 {
63-
type Bytes = [u8; 4];
64-
fn to_le_bytes(self) -> Self::Bytes {
65-
self.to_le_bytes()
62+
fn to_le(self) -> Self {
63+
self.to_le()
6664
}
6765
fn as_byte_slice(x: &[Self]) -> &[u8] {
6866
let ptr = x.as_ptr() as *const u8;
@@ -71,9 +69,8 @@ impl Observable for u32 {
7169
}
7270
}
7371
impl Observable for u64 {
74-
type Bytes = [u8; 8];
75-
fn to_le_bytes(self) -> Self::Bytes {
76-
self.to_le_bytes()
72+
fn to_le(self) -> Self {
73+
self.to_le()
7774
}
7875
fn as_byte_slice(x: &[Self]) -> &[u8] {
7976
let ptr = x.as_ptr() as *const u8;
@@ -82,28 +79,27 @@ impl Observable for u64 {
8279
}
8380
}
8481

85-
fn fill_via_chunks<T: Observable>(src: &[T], dest: &mut [u8]) -> (usize, usize) {
82+
/// Fill dest from src
83+
///
84+
/// Returns `(n, byte_len)`. `src[..n]` is consumed (and possibly mutated),
85+
/// `dest[..byte_len]` is filled. `src[n..]` and `dest[byte_len..]` are left
86+
/// unaltered.
87+
fn fill_via_chunks<T: Observable>(src: &mut [T], dest: &mut [u8]) -> (usize, usize) {
8688
let size = core::mem::size_of::<T>();
8789
let byte_len = min(src.len() * size, dest.len());
8890
let num_chunks = (byte_len + size - 1) / size;
8991

90-
if cfg!(target_endian = "little") {
91-
// On LE we can do a simple copy, which is 25-50% faster:
92-
dest[..byte_len].copy_from_slice(&T::as_byte_slice(&src[..num_chunks])[..byte_len]);
93-
} else {
94-
// This code is valid on all arches, but slower than the above:
95-
let mut i = 0;
96-
let mut iter = dest[..byte_len].chunks_exact_mut(size);
97-
for chunk in &mut iter {
98-
chunk.copy_from_slice(src[i].to_le_bytes().as_ref());
99-
i += 1;
100-
}
101-
let chunk = iter.into_remainder();
102-
if !chunk.is_empty() {
103-
chunk.copy_from_slice(&src[i].to_le_bytes().as_ref()[..chunk.len()]);
92+
// Byte-swap for portability of results. This must happen before copying
93+
// since the size of dest is not guaranteed to be a multiple of T or to be
94+
// sufficiently aligned.
95+
if cfg!(target_endian = "big") {
96+
for x in &mut src[..num_chunks] {
97+
*x = x.to_le();
10498
}
10599
}
106100

101+
dest[..byte_len].copy_from_slice(&T::as_byte_slice(&src[..num_chunks])[..byte_len]);
102+
107103
(num_chunks, byte_len)
108104
}
109105

@@ -112,6 +108,9 @@ fn fill_via_chunks<T: Observable>(src: &[T], dest: &mut [u8]) -> (usize, usize)
112108
///
113109
/// The return values are `(consumed_u32, filled_u8)`.
114110
///
111+
/// On big-endian systems, endianness of `src[..consumed_u32]` values is
112+
/// swapped. No other adjustments to `src` are made.
113+
///
115114
/// `filled_u8` is the number of filled bytes in `dest`, which may be less than
116115
/// the length of `dest`.
117116
/// `consumed_u32` is the number of words consumed from `src`, which is the same
@@ -137,21 +136,25 @@ fn fill_via_chunks<T: Observable>(src: &[T], dest: &mut [u8]) -> (usize, usize)
137136
/// }
138137
/// }
139138
/// ```
140-
pub fn fill_via_u32_chunks(src: &[u32], dest: &mut [u8]) -> (usize, usize) {
139+
pub fn fill_via_u32_chunks(src: &mut [u32], dest: &mut [u8]) -> (usize, usize) {
141140
fill_via_chunks(src, dest)
142141
}
143142

144143
/// Implement `fill_bytes` by reading chunks from the output buffer of a block
145144
/// based RNG.
146145
///
147146
/// The return values are `(consumed_u64, filled_u8)`.
147+
///
148+
/// On big-endian systems, endianness of `src[..consumed_u64]` values is
149+
/// swapped. No other adjustments to `src` are made.
150+
///
148151
/// `filled_u8` is the number of filled bytes in `dest`, which may be less than
149152
/// the length of `dest`.
150153
/// `consumed_u64` is the number of words consumed from `src`, which is the same
151154
/// as `filled_u8 / 8` rounded up.
152155
///
153156
/// See `fill_via_u32_chunks` for an example.
154-
pub fn fill_via_u64_chunks(src: &[u64], dest: &mut [u8]) -> (usize, usize) {
157+
pub fn fill_via_u64_chunks(src: &mut [u64], dest: &mut [u8]) -> (usize, usize) {
155158
fill_via_chunks(src, dest)
156159
}
157160

@@ -175,33 +178,41 @@ mod test {
175178

176179
#[test]
177180
fn test_fill_via_u32_chunks() {
178-
let src = [1, 2, 3];
181+
let src_orig = [1, 2, 3];
182+
183+
let mut src = src_orig;
179184
let mut dst = [0u8; 11];
180-
assert_eq!(fill_via_u32_chunks(&src, &mut dst), (3, 11));
185+
assert_eq!(fill_via_u32_chunks(&mut src, &mut dst), (3, 11));
181186
assert_eq!(dst, [1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0]);
182187

188+
let mut src = src_orig;
183189
let mut dst = [0u8; 13];
184-
assert_eq!(fill_via_u32_chunks(&src, &mut dst), (3, 12));
190+
assert_eq!(fill_via_u32_chunks(&mut src, &mut dst), (3, 12));
185191
assert_eq!(dst, [1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0]);
186192

193+
let mut src = src_orig;
187194
let mut dst = [0u8; 5];
188-
assert_eq!(fill_via_u32_chunks(&src, &mut dst), (2, 5));
195+
assert_eq!(fill_via_u32_chunks(&mut src, &mut dst), (2, 5));
189196
assert_eq!(dst, [1, 0, 0, 0, 2]);
190197
}
191198

192199
#[test]
193200
fn test_fill_via_u64_chunks() {
194-
let src = [1, 2];
201+
let src_orig = [1, 2];
202+
203+
let mut src = src_orig;
195204
let mut dst = [0u8; 11];
196-
assert_eq!(fill_via_u64_chunks(&src, &mut dst), (2, 11));
205+
assert_eq!(fill_via_u64_chunks(&mut src, &mut dst), (2, 11));
197206
assert_eq!(dst, [1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0]);
198207

208+
let mut src = src_orig;
199209
let mut dst = [0u8; 17];
200-
assert_eq!(fill_via_u64_chunks(&src, &mut dst), (2, 16));
210+
assert_eq!(fill_via_u64_chunks(&mut src, &mut dst), (2, 16));
201211
assert_eq!(dst, [1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0]);
202212

213+
let mut src = src_orig;
203214
let mut dst = [0u8; 5];
204-
assert_eq!(fill_via_u64_chunks(&src, &mut dst), (1, 5));
215+
assert_eq!(fill_via_u64_chunks(&mut src, &mut dst), (1, 5));
205216
assert_eq!(dst, [1, 0, 0, 0, 0]);
206217
}
207218
}

0 commit comments

Comments
 (0)