diff --git a/Cargo.toml b/Cargo.toml index 6f9eb64e319..24bf159771a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "rand" -version = "0.3.18" +version = "0.3.19" authors = ["The Rust Project Developers"] license = "MIT/Apache-2.0" readme = "README.md" diff --git a/src/lib.rs b/src/lib.rs index 3c7d2cc5149..be3922fc8c6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -184,7 +184,7 @@ //! // where the car is. The game host will never open the door with the car. //! fn game_host_open(car: u32, choice: u32, rng: &mut R) -> u32 { //! let choices = free_doors(&[car, choice]); -//! rand::sample(rng, choices.into_iter(), 1)[0] +//! rand::seq::sample_slice(rng, &choices, 1)[0] //! } //! //! // Returns the door we switch to, given our current choice and @@ -276,6 +276,7 @@ pub mod reseeding; mod rand_impls; pub mod os; pub mod read; +pub mod seq; #[allow(bad_style)] type w64 = w; @@ -1016,6 +1017,10 @@ pub fn random() -> T { thread_rng().gen() } +#[inline(always)] +#[deprecated(since="0.3.18", note="renamed to seq::sample_iter")] +/// DEPRECATED: use `seq::sample_iter` instead. +/// /// Randomly sample up to `amount` elements from a finite iterator. /// The order of elements in the sample is not random. /// @@ -1032,24 +1037,14 @@ pub fn sample(rng: &mut R, iterable: I, amount: usize) -> Vec where I: IntoIterator, R: Rng, { - let mut iter = iterable.into_iter(); - let mut reservoir: Vec = iter.by_ref().take(amount).collect(); - // continue unless the iterator was exhausted - if reservoir.len() == amount { - for (i, elem) in iter.enumerate() { - let k = rng.gen_range(0, i + 1 + amount); - if let Some(spot) = reservoir.get_mut(k) { - *spot = elem; - } - } - } - reservoir + // the legacy sample didn't care whether amount was met + seq::sample_iter(rng, iterable, amount) + .unwrap_or_else(|e| e) } #[cfg(test)] mod test { - use super::{Rng, thread_rng, random, SeedableRng, StdRng, sample, - weak_rng}; + use super::{Rng, thread_rng, random, SeedableRng, StdRng, weak_rng}; use std::iter::repeat; pub struct MyRng { inner: R } @@ -1255,24 +1250,6 @@ mod test { (f32, (f64, (f64,)))) = random(); } - #[test] - fn test_sample() { - let min_val = 1; - let max_val = 100; - - let mut r = thread_rng(); - let vals = (min_val..max_val).collect::>(); - let small_sample = sample(&mut r, vals.iter(), 5); - let large_sample = sample(&mut r, vals.iter(), vals.len() + 5); - - assert_eq!(small_sample.len(), 5); - assert_eq!(large_sample.len(), vals.len()); - - assert!(small_sample.iter().all(|e| { - **e >= min_val && **e <= max_val - })); - } - #[test] fn test_std_rng_seeded() { let s = thread_rng().gen_iter::().take(256).collect::>(); diff --git a/src/seq.rs b/src/seq.rs new file mode 100644 index 00000000000..195d5ad62a0 --- /dev/null +++ b/src/seq.rs @@ -0,0 +1,328 @@ +// Copyright 2017 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! Functions for randomly accessing and sampling sequences. + +use super::Rng; +use std::collections::hash_map::HashMap; + +/// Randomly sample `amount` elements from a finite iterator. +/// +/// The following can be returned: +/// - `Ok`: `Vec` of `amount` non-repeating randomly sampled elements. The order is not random. +/// - `Err`: `Vec` of all the elements from `iterable` in sequential order. This happens when the +/// length of `iterable` was less than `amount`. This is considered an error since exactly +/// `amount` elements is typically expected. +/// +/// This implementation uses `O(len(iterable))` time and `O(amount)` memory. +/// +/// # Example +/// +/// ```rust +/// use rand::{thread_rng, seq}; +/// +/// let mut rng = thread_rng(); +/// let sample = seq::sample_iter(&mut rng, 1..100, 5).unwrap(); +/// println!("{:?}", sample); +/// ``` +pub fn sample_iter(rng: &mut R, iterable: I, amount: usize) -> Result, Vec> + where I: IntoIterator, + R: Rng, +{ + let mut iter = iterable.into_iter(); + let mut reservoir = Vec::with_capacity(amount); + reservoir.extend(iter.by_ref().take(amount)); + + // Continue unless the iterator was exhausted + // + // note: this prevents iterators that "restart" from causing problems. + // If the iterator stops once, then so do we. + if reservoir.len() == amount { + for (i, elem) in iter.enumerate() { + let k = rng.gen_range(0, i + 1 + amount); + if let Some(spot) = reservoir.get_mut(k) { + *spot = elem; + } + } + Ok(reservoir) + } else { + // Don't hang onto extra memory. There is a corner case where + // `amount` was much less than `len(iterable)`. + reservoir.shrink_to_fit(); + Err(reservoir) + } +} + +/// Randomly sample exactly `amount` values from `slice`. +/// +/// The values are non-repeating and in random order. +/// +/// This implementation uses `O(amount)` time and memory. +/// +/// Panics if `amount > slice.len()` +/// +/// # Example +/// +/// ```rust +/// use rand::{thread_rng, seq}; +/// +/// let mut rng = thread_rng(); +/// let values = vec![5, 6, 1, 3, 4, 6, 7]; +/// println!("{:?}", seq::sample_slice(&mut rng, &values, 3)); +/// ``` +pub fn sample_slice(rng: &mut R, slice: &[T], amount: usize) -> Vec + where R: Rng, + T: Clone +{ + let indices = sample_indices(rng, slice.len(), amount); + + let mut out = Vec::with_capacity(amount); + out.extend(indices.iter().map(|i| slice[*i].clone())); + out +} + +/// Randomly sample exactly `amount` references from `slice`. +/// +/// The references are non-repeating and in random order. +/// +/// This implementation uses `O(amount)` time and memory. +/// +/// Panics if `amount > slice.len()` +/// +/// # Example +/// +/// ```rust +/// use rand::{thread_rng, seq}; +/// +/// let mut rng = thread_rng(); +/// let values = vec![5, 6, 1, 3, 4, 6, 7]; +/// println!("{:?}", seq::sample_slice_ref(&mut rng, &values, 3)); +/// ``` +pub fn sample_slice_ref<'a, R, T>(rng: &mut R, slice: &'a [T], amount: usize) -> Vec<&'a T> + where R: Rng +{ + let indices = sample_indices(rng, slice.len(), amount); + + let mut out = Vec::with_capacity(amount); + out.extend(indices.iter().map(|i| &slice[*i])); + out +} + +/// Randomly sample exactly `amount` indices from `0..length`. +/// +/// The values are non-repeating and in random order. +/// +/// This implementation uses `O(amount)` time and memory. +/// +/// This method is used internally by the slice sampling methods, but it can sometimes be useful to +/// have the indices themselves so this is provided as an alternative. +/// +/// Panics if `amount > length` +pub fn sample_indices(rng: &mut R, length: usize, amount: usize) -> Vec + where R: Rng, +{ + if amount > length { + panic!("`amount` must be less than or equal to `slice.len()`"); + } + + // We are going to have to allocate at least `amount` for the output no matter what. However, + // if we use the `cached` version we will have to allocate `amount` as a HashMap as well since + // it inserts an element for every loop. + // + // Therefore, if `amount >= length / 2` then inplace will be both faster and use less memory. + // + // TODO: there is probably even more fine-tuning that can be done here since + // `HashMap::with_capacity(amount)` probably allocates more than `amount` in practice, + // and a trade off could probably be made between memory/cpu, since hashmap operations + // are slower than array index swapping. + if amount >= length / 2 { + sample_indices_inplace(rng, length, amount) + } else { + sample_indices_cache(rng, length, amount) + } +} + +/// Sample an amount of indices using an inplace partial fisher yates method. +/// +/// This allocates the entire `length` of indices and randomizes only the first `amount`. +/// It then truncates to `amount` and returns. +/// +/// This is better than using a HashMap "cache" when `amount >= length / 2` since it does not +/// require allocating an extra cache and is much faster. +fn sample_indices_inplace(rng: &mut R, length: usize, amount: usize) -> Vec + where R: Rng, +{ + debug_assert!(amount <= length); + let mut indices: Vec = Vec::with_capacity(length); + indices.extend(0..length); + for i in 0..amount { + let j: usize = rng.gen_range(i, length); + let tmp = indices[i]; + indices[i] = indices[j]; + indices[j] = tmp; + } + indices.truncate(amount); + debug_assert_eq!(indices.len(), amount); + indices +} + + +/// This method performs a partial fisher-yates on a range of indices using a HashMap +/// as a cache to record potential collisions. +/// +/// The cache avoids allocating the entire `length` of values. This is especially useful when +/// `amount <<< length`, i.e. select 3 non-repeating from 1_000_000 +fn sample_indices_cache( + rng: &mut R, + length: usize, + amount: usize, +) -> Vec + where R: Rng, +{ + debug_assert!(amount <= length); + let mut cache = HashMap::with_capacity(amount); + let mut out = Vec::with_capacity(amount); + for i in 0..amount { + let j: usize = rng.gen_range(i, length); + + // equiv: let tmp = slice[i]; + let tmp = match cache.get(&i) { + Some(e) => *e, + None => i, + }; + + // equiv: slice[i] = slice[j]; + let x = match cache.get(&j) { + Some(x) => *x, + None => j, + }; + + // equiv: slice[j] = tmp; + cache.insert(j, tmp); + + // note that in the inplace version, slice[i] is automatically "returned" value + out.push(x); + } + debug_assert_eq!(out.len(), amount); + out +} + +#[cfg(test)] +mod test { + use super::*; + use {thread_rng, XorShiftRng, SeedableRng}; + + #[test] + fn test_sample_iter() { + let min_val = 1; + let max_val = 100; + + let mut r = thread_rng(); + let vals = (min_val..max_val).collect::>(); + let small_sample = sample_iter(&mut r, vals.iter(), 5).unwrap(); + let large_sample = sample_iter(&mut r, vals.iter(), vals.len() + 5).unwrap_err(); + + assert_eq!(small_sample.len(), 5); + assert_eq!(large_sample.len(), vals.len()); + // no randomization happens when amount >= len + assert_eq!(large_sample, vals.iter().collect::>()); + + assert!(small_sample.iter().all(|e| { + **e >= min_val && **e <= max_val + })); + } + #[test] + fn test_sample_slice_boundaries() { + let empty: &[u8] = &[]; + + let mut r = thread_rng(); + + // sample 0 items + assert_eq!(sample_slice(&mut r, empty, 0), vec![]); + assert_eq!(sample_slice(&mut r, &[42, 2, 42], 0), vec![]); + + // sample 1 item + assert_eq!(sample_slice(&mut r, &[42], 1), vec![42]); + let v = sample_slice(&mut r, &[1, 42], 1)[0]; + assert!(v == 1 || v == 42); + + // sample "all" the items + let v = sample_slice(&mut r, &[42, 133], 2); + assert!(v == vec![42, 133] || v == vec![133, 42]); + + assert_eq!(sample_indices_inplace(&mut r, 0, 0), vec![]); + assert_eq!(sample_indices_inplace(&mut r, 1, 0), vec![]); + assert_eq!(sample_indices_inplace(&mut r, 1, 1), vec![0]); + + assert_eq!(sample_indices_cache(&mut r, 0, 0), vec![]); + assert_eq!(sample_indices_cache(&mut r, 1, 0), vec![]); + assert_eq!(sample_indices_cache(&mut r, 1, 1), vec![0]); + + // Make sure lucky 777's aren't lucky + let slice = &[42, 777]; + let mut num_42 = 0; + let total = 1000; + for _ in 0..total { + let v = sample_slice(&mut r, slice, 1); + assert_eq!(v.len(), 1); + let v = v[0]; + assert!(v == 42 || v == 777); + if v == 42 { + num_42 += 1; + } + } + let ratio_42 = num_42 as f64 / 1000 as f64; + assert!(0.4 <= ratio_42 || ratio_42 <= 0.6, "{}", ratio_42); + } + + #[test] + fn test_sample_slice() { + let xor_rng = XorShiftRng::from_seed; + + let max_range = 100; + let mut r = thread_rng(); + + for length in 1usize..max_range { + let amount = r.gen_range(0, length); + let seed: [u32; 4] = [ + r.next_u32(), r.next_u32(), r.next_u32(), r.next_u32() + ]; + + println!("Selecting indices: len={}, amount={}, seed={:?}", length, amount, seed); + + // assert that the two index methods give exactly the same result + let inplace = sample_indices_inplace( + &mut xor_rng(seed), length, amount); + let cache = sample_indices_cache( + &mut xor_rng(seed), length, amount); + assert_eq!(inplace, cache); + + // assert the basics work + let regular = sample_indices( + &mut xor_rng(seed), length, amount); + assert_eq!(regular.len(), amount); + assert!(regular.iter().all(|e| *e < length)); + assert_eq!(regular, inplace); + + // also test that sampling the slice works + let vec: Vec = (0..length).collect(); + { + let result = sample_slice(&mut xor_rng(seed), &vec, amount); + assert_eq!(result, regular); + } + + { + let result = sample_slice_ref(&mut xor_rng(seed), &vec, amount); + let expected = regular.iter().map(|v| v).collect::>(); + assert_eq!(result, expected); + } + } + } +}