1- // Copyright 2013-2014 The Rust Project Developers. See the COPYRIGHT
1+ // Copyright 2017 The Rust Project Developers. See the COPYRIGHT
22// file at the top-level directory of this distribution and at
33// http://rust-lang.org/COPYRIGHT.
44//
88// option. This file may not be copied, modified, or distributed
99// except according to those terms.
1010
11- //! Functions for sampling data
11+ //! Functions for randomly accessing and sampling sequences.
1212
1313use super :: Rng ;
1414use std:: collections:: hash_map:: HashMap ;
1515
16- /// The `Sample` trait provides the `sample` method .
16+ /// Randomly sample *up to* `amount` elements from a finite iterator .
1717///
18- /// This is intended to be implemented for containers that:
19- /// - Can be sampled in `O(amount)` time.
20- /// - Whos items can be `cloned`.
18+ /// The values are non-repeating but the order of elements returned is *not* random.
2119///
22- /// If cloning is impossible or expensive, use `sample_ref` instead.
23- pub trait Sample {
24- /// The returned sampled data. Typically the either a `Vec<T>` or a new instance of the
25- /// container's own type.
26- type Sampled ;
27-
28- /// Return exactly `amount` randomly sampled values.
29- ///
30- /// Any type which implements `sample` should guarantee that:
31- /// - Both the order and values of `Sampled` is random.
32- /// - The implementation uses `O(amount)` speed and memory
33- /// - The returned values are not references (if so, implement `SampleRef` instead).
34- ///
35- /// Panics if `amount > self.len()`
36- ///
37- /// # Example
38- ///
39- /// ```rust
40- /// use rand::{thread_rng, Sample};
41- ///
42- /// let mut rng = thread_rng();
43- /// let values = vec![5, 6, 1, 3, 4, 6, 7];
44- /// println!("{:?}", values.sample(&mut rng, 3))
45- /// ```
46- fn sample < R : Rng > ( & self , rng : & mut R , amount : usize ) -> Self :: Sampled ;
47- }
48-
49- /// The `SampleRef` trait provides the `sample_ref` method.
50- ///
51- /// This is intended to be implemented for containers that which can be sampled in `O(amount)` time
52- /// and want a fast way to give references to a sample of their items.
53- pub trait SampleRef {
54- /// The returned sampled data. Typically the either a `Vec<&T>` or a new instance of the
55- /// container's own type containing references to the underlying data.
56- type SampledRef ;
57-
58- /// Return exactly `amount` references to randomly sampled values.
59- ///
60- /// Any type which implements `sample_ref` should guarantee that:
61- /// - Both the order and values of `SampledRef` is random.
62- /// - The implementation uses `O(amount)` speed and memory.
63- /// - The returned values are not copies/clones (if so, implement `Sample` instead).
64- ///
65- /// Panics if `amount > self.len()`
66- ///
67- /// # Example
68- ///
69- /// ```rust
70- /// use rand::{thread_rng, SampleRef};
71- ///
72- /// let mut rng = thread_rng();
73- /// let values = vec![5, 6, 1, 3, 4, 6, 7];
74- /// println!("{:?}", values.as_slice().sample_ref(&mut rng, 3))
75- /// ```
76- fn sample_ref < R : Rng > ( & self , rng : & mut R , amount : usize ) -> Self :: SampledRef ;
77- }
78-
79- /// Randomly sample *up to* `amount` elements from a finite iterator using a reservoir.
20+ /// This implementation uses `O(len(iterable))` time and `O(amount)` memory.
8021///
81- /// The order of elements in the sample is not random. In fact, if `len(iterable) <= amount` then
82- /// the output will be in the exact order they were collected.
83- ///
84- /// The reservoir method used allocates only an `Vec` of size `amount`. The size of the iterable
85- /// does not affect the amount of memory used.
22+ /// > If `len(iterable) <= amount` then the values will be in sequential order. In all other
23+ /// > cases the order of the elements is neither random nor guaranteed.
8624///
8725/// # Example
8826///
8927/// ```rust
90- /// use rand::{thread_rng, sample_reservoir };
28+ /// use rand::{thread_rng, seq };
9129///
9230/// let mut rng = thread_rng();
93- /// let sample = sample_reservoir(&mut rng, 1..100, 5);
31+ /// let sample = seq:: sample_reservoir(&mut rng, 1..100, 5);
9432/// println!("{:?}", sample);
9533/// ```
9634pub fn sample_reservoir < T , I , R > ( rng : & mut R , iterable : I , amount : usize ) -> Vec < T >
@@ -110,18 +48,73 @@ pub fn sample_reservoir<T, I, R>(rng: &mut R, iterable: I, amount: usize) -> Vec
11048 }
11149 }
11250 }
51+ // There is a rare corner case where `size(iterable) <<< amount`,
52+ // we don't want to be hanging onto exra memory.
53+ reservoir. shrink_to_fit ( ) ;
11354 reservoir
11455}
11556
116- /// Sample (non-repeating) exactly `amount` of indices from a sequence of the given `length`.
57+ /// Randomly sample exactly `amount` values from `slice`.
58+ ///
59+ /// The values are non-repeating and in random order.
60+ ///
61+ /// This implementation uses `O(amount)` time and memory.
62+ ///
63+ /// Panics if `amount > self.len()`
64+ ///
65+ /// # Example
66+ ///
67+ /// ```rust
68+ /// use rand::{thread_rng, seq};
69+ ///
70+ /// let mut rng = thread_rng();
71+ /// let values = vec![5, 6, 1, 3, 4, 6, 7];
72+ /// println!("{:?}", seq::sample_slice(&mut rng, &values, 3));
73+ /// ```
74+ pub fn sample_slice < R : Rng , T : Clone > ( rng : & mut R , slice : & [ T ] , amount : usize ) -> Vec < T > {
75+ let indices = sample_indices ( rng, slice. len ( ) , amount) ;
76+
77+ let mut out = Vec :: with_capacity ( amount) ;
78+ out. extend ( indices. iter ( ) . map ( |i| slice[ * i] . clone ( ) ) ) ;
79+ out
80+ }
81+
82+ /// Randomly sample exactly `amount` references from `slice`.
83+ ///
84+ /// The references are non-repeating and in random order.
11785///
118- /// The returned elements and their order are random .
86+ /// This implementation uses `O(amount)` time and memory .
11987///
120- /// Panics if `amount > length`
88+ /// Panics if `amount > self.len()`
89+ ///
90+ /// # Example
91+ ///
92+ /// ```rust
93+ /// use rand::{thread_rng, seq};
94+ ///
95+ /// let mut rng = thread_rng();
96+ /// let values = vec![5, 6, 1, 3, 4, 6, 7];
97+ /// println!("{:?}", seq::sample_slice_ref(&mut rng, &values, 3));
98+ /// ```
99+ pub fn sample_slice_ref < ' a , R : Rng , T > ( rng : & mut R , slice : & ' a [ T ] , amount : usize ) -> Vec < & ' a T > {
100+ let indices = sample_indices ( rng, slice. len ( ) , amount) ;
101+
102+ let mut out = Vec :: with_capacity ( amount) ;
103+ out. extend ( indices. iter ( ) . map ( |i| & slice[ * i] ) ) ;
104+ out
105+ }
106+
107+ /// Randomly sample exactly `amount` indices from `0..length`.
121108///
122- /// TODO: IMO this should be made public since it can be generally useful, although
123- /// there might be a way to make the output type more generic/compact.
124- fn sample_indices < R > ( rng : & mut R , length : usize , amount : usize ) -> Vec < usize >
109+ /// The values are non-repeating and in random order.
110+ ///
111+ /// This implementation uses `O(amount)` time and memory.
112+ ///
113+ /// This method is used internally by the slice sampling methods, but it can sometimes be useful to
114+ /// have the indices themselves so this is provided as an alternative.
115+ ///
116+ /// Panics if `amount > self.len()`
117+ pub fn sample_indices < R > ( rng : & mut R , length : usize , amount : usize ) -> Vec < usize >
125118 where R : Rng ,
126119{
127120 if amount > length {
@@ -132,7 +125,7 @@ fn sample_indices<R>(rng: &mut R, length: usize, amount: usize) -> Vec<usize>
132125 // if we use the `cached` version we will have to allocate `amount` as a HashMap as well since
133126 // it inserts an element for every loop.
134127 //
135- // Therefore, if amount >= length / 2, inplace will be both faster and use less memory.
128+ // Therefore, if ` amount >= length / 2` then inplace will be both faster and use less memory.
136129 //
137130 // TODO: there is probably even more fine-tuning that can be done here since
138131 // `HashMap::with_capacity(amount)` probably allocates more than `amount` in practice,
@@ -156,23 +149,25 @@ fn sample_indices_inplace<R>(rng: &mut R, length: usize, amount: usize) -> Vec<u
156149 where R : Rng ,
157150{
158151 debug_assert ! ( amount <= length) ;
159- let amount = if amount == length {
152+ let mut indices: Vec < usize > = Vec :: with_capacity ( length) ;
153+ indices. extend ( 0 ..length) ;
154+ let end_i = if length != 0 && amount == length {
160155 // It isn't necessary to shuffle the final element if we are shuffling
161156 // the whole array... it would just be shuffled with itself
157+ //
158+ // Also, `rng.gen_range(i, i)` panics.
162159 amount - 1
163160 } else {
164161 amount
165162 } ;
166-
167- let mut indices: Vec < usize > = Vec :: with_capacity ( length) ;
168- indices. extend ( 0 ..length) ;
169- for i in 0 ..amount {
163+ for i in 0 ..end_i {
170164 let j: usize = rng. gen_range ( i, length) ;
171165 let tmp = indices[ i] ;
172166 indices[ i] = indices[ j] ;
173167 indices[ j] = tmp;
174168 }
175169 indices. truncate ( amount) ;
170+ debug_assert_eq ! ( indices. len( ) , amount) ;
176171 indices
177172}
178173
@@ -213,52 +208,10 @@ fn sample_indices_cache<R>(
213208 // note that in the inplace version, slice[i] is automatically "returned" value
214209 out. push ( x) ;
215210 }
211+ debug_assert_eq ! ( out. len( ) , amount) ;
216212 out
217213}
218214
219- impl < ' a , T : Clone > Sample for & ' a [ T ] {
220- type Sampled = Vec < T > ;
221-
222- fn sample < R : Rng > ( & self , rng : & mut R , amount : usize ) -> Vec < T > {
223- let indices = sample_indices ( rng, self . len ( ) , amount) ;
224-
225- let mut out = Vec :: with_capacity ( amount) ;
226- out. extend ( indices. iter ( ) . map ( |i| self [ * i] . clone ( ) ) ) ;
227- out
228- }
229- }
230-
231- impl < ' a , T : Clone > Sample for Vec < T > {
232- type Sampled = Vec < T > ;
233-
234- fn sample < R : Rng > ( & self , rng : & mut R , amount : usize ) -> Vec < T > {
235- self . as_slice ( ) . sample ( rng, amount)
236- }
237- }
238-
239- impl < ' a , T > SampleRef for & ' a [ T ] {
240- type SampledRef = Vec < & ' a T > ;
241-
242- fn sample_ref < R : Rng > ( & self , rng : & mut R , amount : usize ) -> Vec < & ' a T > {
243- let indices = sample_indices ( rng, self . len ( ) , amount) ;
244-
245- let mut out = Vec :: with_capacity ( amount) ;
246- out. extend ( indices. iter ( ) . map ( |i| & self [ * i] ) ) ;
247- out
248- }
249- }
250-
251- // TODO: It looks like implementing this depends on RFC 1598 being implemented.
252- // See this: https://github.com/rust-lang/rfcs/issues/1965
253- //
254- // impl<'a, T> SampleRef for Vec<&'a T>{
255- // type SampledRef = Vec<&'a T>;
256- //
257- // fn sample_ref<R: Rng>(&'a self, rng: &mut R, amount: usize) -> Vec<&'a T> {
258- // self.as_slice().sample_ref(rng, amount)
259- // }
260- // }
261-
262215#[ cfg( test) ]
263216mod test {
264217 use super :: * ;
@@ -281,11 +234,28 @@ mod test {
281234 * * e >= min_val && * * e <= max_val
282235 } ) ) ;
283236 }
237+ #[ test]
238+ fn test_sample_slice_boundaries ( ) {
239+ let empty: & [ u8 ] = & [ ] ;
240+
241+ let mut r = thread_rng ( ) ;
242+
243+ // sample 0 items
244+ assert_eq ! ( sample_slice( & mut r, empty, 0 ) , vec![ ] ) ;
245+ assert_eq ! ( sample_slice( & mut r, & [ 42 , 2 , 42 ] , 0 ) , vec![ ] ) ;
246+
247+ // sample 1 item
248+ assert_eq ! ( sample_slice( & mut r, & [ 42 ] , 1 ) , vec![ 42 ] ) ;
249+ let v = sample_slice ( & mut r, & [ 1 , 42 ] , 1 ) [ 0 ] ;
250+ assert ! ( v == 1 || v == 42 ) ;
251+
252+ // sample "all" the items
253+ let v = sample_slice ( & mut r, & [ 42 , 133 ] , 2 ) ;
254+ assert ! ( v == vec![ 42 , 133 ] || v == vec![ 133 , 42 ] ) ;
255+ }
284256
285257 #[ test]
286- /// This test mainly works by asserting that the two cases are equivalent,
287- /// as well as equivalent to the exported function.
288- fn test_sample_indices ( ) {
258+ fn test_sample_slice ( ) {
289259 let xor_rng = XorShiftRng :: from_seed;
290260
291261 let max_range = 100 ;
@@ -299,7 +269,7 @@ mod test {
299269
300270 println ! ( "Selecting indices: len={}, amount={}, seed={:?}" , length, amount, seed) ;
301271
302- // assert that the two methods give exactly the same result
272+ // assert that the two index methods give exactly the same result
303273 let inplace = sample_indices_inplace (
304274 & mut xor_rng ( seed) , length, amount) ;
305275 let cache = sample_indices_cache (
@@ -313,15 +283,15 @@ mod test {
313283 assert ! ( regular. iter( ) . all( |e| * e < length) ) ;
314284 assert_eq ! ( regular, inplace) ;
315285
316- // just for fun, also test sampling from a vector
286+ // also test that sampling the slice works
317287 let vec: Vec < usize > = ( 0 ..length) . collect ( ) ;
318288 {
319- let result = vec . sample ( & mut xor_rng ( seed) , amount) ;
289+ let result = sample_slice ( & mut xor_rng ( seed) , & vec , amount) ;
320290 assert_eq ! ( result, regular) ;
321291 }
322292
323293 {
324- let result = vec . as_slice ( ) . sample_ref ( & mut xor_rng ( seed) , amount) ;
294+ let result = sample_slice_ref ( & mut xor_rng ( seed) , & vec , amount) ;
325295 let expected = regular. iter ( ) . map ( |v| v) . collect :: < Vec < _ > > ( ) ;
326296 assert_eq ! ( result, expected) ;
327297 }
0 commit comments