Skip to content

Commit 7cab90c

Browse files
Merge #340
340: feat: add combinations_with_replacement r=jswrenn a=tommilligan As part of a personal project, I wanted to generate combinations of a set `n` of length `k`, but where elements may be repeated (i.e. may be present more than once in the output). I was not able to find an implementation of this in Rust anywhere - please correct me if I'm wrong! The `CombinationsWithReplacement` iterator consumes a given `Iterator` of `Clone`-able items, and yields combinations of type `Vec<I::Item>` (the same api as `Combinations`). Suggestions for a more concise name welcome. The algorithm is based on [this Stackoverflow question](https://stackoverflow.com/questions/127704/algorithm-to-return-all-combinations-of-k-elements-from-n) and [sample C code](https://stackoverflow.com/questions/561/how-to-use-combinations-of-sets-as-test-data#794) Co-authored-by: Tom Milligan <[email protected]>
2 parents 8761fbe + a2f8e9f commit 7cab90c

6 files changed

+285
-67
lines changed
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#![feature(test)]
2+
3+
extern crate itertools;
4+
extern crate test;
5+
6+
use itertools::Itertools;
7+
use test::{black_box, Bencher};
8+
9+
#[bench]
10+
fn comb_replacement_n10_k5(b: &mut Bencher) {
11+
b.iter(|| {
12+
for i in (0..10).combinations_with_replacement(5) {
13+
black_box(i);
14+
}
15+
});
16+
}
17+
18+
#[bench]
19+
fn comb_replacement_n5_k10(b: &mut Bencher) {
20+
b.iter(|| {
21+
for i in (0..5).combinations_with_replacement(10) {
22+
black_box(i);
23+
}
24+
});
25+
}
26+
27+
#[bench]
28+
fn comb_replacement_n10_k10(b: &mut Bencher) {
29+
b.iter(|| {
30+
for i in (0..10).combinations_with_replacement(10) {
31+
black_box(i);
32+
}
33+
});
34+
}

src/combinations.rs

Lines changed: 2 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
2-
use std::ops::Index;
31
use std::fmt;
42

3+
use super::lazy_buffer::LazyBuffer;
4+
55
/// An iterator to iterate through all the `n`-length combinations in an iterator.
66
///
77
/// See [`.combinations()`](../trait.Itertools.html#method.combinations) for more information.
@@ -98,68 +98,3 @@ impl<I> Iterator for Combinations<I>
9898
Some(result)
9999
}
100100
}
101-
102-
#[derive(Debug)]
103-
struct LazyBuffer<I: Iterator> {
104-
it: I,
105-
done: bool,
106-
buffer: Vec<I::Item>,
107-
}
108-
109-
impl<I> LazyBuffer<I>
110-
where I: Iterator
111-
{
112-
pub fn new(it: I) -> LazyBuffer<I> {
113-
let mut it = it;
114-
let mut buffer = Vec::new();
115-
let done;
116-
if let Some(first) = it.next() {
117-
buffer.push(first);
118-
done = false;
119-
} else {
120-
done = true;
121-
}
122-
LazyBuffer {
123-
it: it,
124-
done: done,
125-
buffer: buffer,
126-
}
127-
}
128-
129-
pub fn len(&self) -> usize {
130-
self.buffer.len()
131-
}
132-
133-
pub fn is_done(&self) -> bool {
134-
self.done
135-
}
136-
137-
pub fn get_next(&mut self) -> bool {
138-
if self.done {
139-
return false;
140-
}
141-
let next_item = self.it.next();
142-
match next_item {
143-
Some(x) => {
144-
self.buffer.push(x);
145-
true
146-
}
147-
None => {
148-
self.done = true;
149-
false
150-
}
151-
}
152-
}
153-
}
154-
155-
impl<I> Index<usize> for LazyBuffer<I>
156-
where I: Iterator,
157-
I::Item: Sized
158-
{
159-
type Output = I::Item;
160-
161-
fn index<'b>(&'b self, _index: usize) -> &'b I::Item {
162-
self.buffer.index(_index)
163-
}
164-
}
165-

src/combinations_with_replacement.rs

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
use std::fmt;
2+
3+
use super::lazy_buffer::LazyBuffer;
4+
5+
/// An iterator to iterate through all the `n`-length combinations in an iterator, with replacement.
6+
///
7+
/// See [`.combinations_with_replacement()`](../trait.Itertools.html#method.combinations_with_replacement) for more information.
8+
#[derive(Clone)]
9+
pub struct CombinationsWithReplacement<I>
10+
where
11+
I: Iterator,
12+
I::Item: Clone,
13+
{
14+
n: usize,
15+
indices: Vec<usize>,
16+
// The current known max index value. This increases as pool grows.
17+
max_index: usize,
18+
pool: LazyBuffer<I>,
19+
first: bool,
20+
}
21+
22+
impl<I> fmt::Debug for CombinationsWithReplacement<I>
23+
where
24+
I: Iterator + fmt::Debug,
25+
I::Item: fmt::Debug + Clone,
26+
{
27+
debug_fmt_fields!(Combinations, n, indices, max_index, pool, first);
28+
}
29+
30+
impl<I> CombinationsWithReplacement<I>
31+
where
32+
I: Iterator,
33+
I::Item: Clone,
34+
{
35+
/// Map the current mask over the pool to get an output combination
36+
fn current(&self) -> Vec<I::Item> {
37+
self.indices.iter().map(|i| self.pool[*i].clone()).collect()
38+
}
39+
}
40+
41+
/// Create a new `CombinationsWithReplacement` from a clonable iterator.
42+
pub fn combinations_with_replacement<I>(iter: I, n: usize) -> CombinationsWithReplacement<I>
43+
where
44+
I: Iterator,
45+
I::Item: Clone,
46+
{
47+
let indices: Vec<usize> = vec![0; n];
48+
let pool: LazyBuffer<I> = LazyBuffer::new(iter);
49+
50+
CombinationsWithReplacement {
51+
n,
52+
indices,
53+
max_index: 0,
54+
pool: pool,
55+
first: true,
56+
}
57+
}
58+
59+
impl<I> Iterator for CombinationsWithReplacement<I>
60+
where
61+
I: Iterator,
62+
I::Item: Clone,
63+
{
64+
type Item = Vec<I::Item>;
65+
fn next(&mut self) -> Option<Self::Item> {
66+
// If this is the first iteration, return early
67+
if self.first {
68+
// In empty edge cases, stop iterating immediately
69+
return if self.n == 0 || self.pool.is_done() {
70+
None
71+
// Otherwise, yield the initial state
72+
} else {
73+
self.first = false;
74+
Some(self.current())
75+
};
76+
}
77+
78+
// Check if we need to consume more from the iterator
79+
// This will run while we increment our first index digit
80+
if !self.pool.is_done() {
81+
self.pool.get_next();
82+
self.max_index = self.pool.len() - 1;
83+
}
84+
85+
// Work out where we need to update our indices
86+
let mut increment: Option<(usize, usize)> = None;
87+
for (i, indices_int) in self.indices.iter().enumerate().rev() {
88+
if indices_int < &self.max_index {
89+
increment = Some((i, indices_int + 1));
90+
break;
91+
}
92+
}
93+
94+
match increment {
95+
// If we can update the indices further
96+
Some((increment_from, increment_value)) => {
97+
// We need to update the rightmost non-max value
98+
// and all those to the right
99+
for indices_index in increment_from..self.indices.len() {
100+
self.indices[indices_index] = increment_value
101+
}
102+
Some(self.current())
103+
}
104+
// Otherwise, we're done
105+
None => None,
106+
}
107+
}
108+
}

src/lazy_buffer.rs

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
use std::ops::Index;
2+
3+
#[derive(Debug, Clone)]
4+
pub struct LazyBuffer<I: Iterator> {
5+
it: I,
6+
done: bool,
7+
buffer: Vec<I::Item>,
8+
}
9+
10+
impl<I> LazyBuffer<I>
11+
where
12+
I: Iterator,
13+
{
14+
pub fn new(it: I) -> LazyBuffer<I> {
15+
let mut it = it;
16+
let mut buffer = Vec::new();
17+
let done;
18+
if let Some(first) = it.next() {
19+
buffer.push(first);
20+
done = false;
21+
} else {
22+
done = true;
23+
}
24+
LazyBuffer {
25+
it: it,
26+
done: done,
27+
buffer: buffer,
28+
}
29+
}
30+
31+
pub fn len(&self) -> usize {
32+
self.buffer.len()
33+
}
34+
35+
pub fn is_done(&self) -> bool {
36+
self.done
37+
}
38+
39+
pub fn get_next(&mut self) -> bool {
40+
if self.done {
41+
return false;
42+
}
43+
let next_item = self.it.next();
44+
match next_item {
45+
Some(x) => {
46+
self.buffer.push(x);
47+
true
48+
}
49+
None => {
50+
self.done = true;
51+
false
52+
}
53+
}
54+
}
55+
}
56+
57+
impl<I> Index<usize> for LazyBuffer<I>
58+
where
59+
I: Iterator,
60+
I::Item: Sized,
61+
{
62+
type Output = I::Item;
63+
64+
fn index<'b>(&'b self, _index: usize) -> &'b I::Item {
65+
self.buffer.index(_index)
66+
}
67+
}

src/lib.rs

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,8 @@ pub mod structs {
102102
pub use adaptors::MultiProduct;
103103
#[cfg(feature = "use_std")]
104104
pub use combinations::Combinations;
105+
#[cfg(feature = "use_std")]
106+
pub use combinations_with_replacement::CombinationsWithReplacement;
105107
pub use cons_tuples_impl::ConsTuples;
106108
pub use exactly_one_err::ExactlyOneError;
107109
pub use format::{Format, FormatWith};
@@ -160,6 +162,8 @@ mod concat_impl;
160162
mod cons_tuples_impl;
161163
#[cfg(feature = "use_std")]
162164
mod combinations;
165+
#[cfg(feature = "use_std")]
166+
mod combinations_with_replacement;
163167
mod exactly_one_err;
164168
mod diff;
165169
mod format;
@@ -170,6 +174,8 @@ mod groupbylazy;
170174
mod intersperse;
171175
#[cfg(feature = "use_std")]
172176
mod kmerge_impl;
177+
#[cfg(feature = "use_std")]
178+
mod lazy_buffer;
173179
mod merge_join;
174180
mod minmax;
175181
#[cfg(feature = "use_std")]
@@ -1167,6 +1173,34 @@ pub trait Itertools : Iterator {
11671173
combinations::combinations(self, n)
11681174
}
11691175

1176+
/// Return an iterator that iterates over the `n`-length combinations of
1177+
/// the elements from an iterator, with replacement.
1178+
///
1179+
/// Iterator element type is `Vec<Self::Item>`. The iterator produces a new Vec per iteration,
1180+
/// and clones the iterator elements.
1181+
///
1182+
/// ```
1183+
/// use itertools::Itertools;
1184+
///
1185+
/// let it = (1..4).combinations_with_replacement(2);
1186+
/// itertools::assert_equal(it, vec![
1187+
/// vec![1, 1],
1188+
/// vec![1, 2],
1189+
/// vec![1, 3],
1190+
/// vec![2, 2],
1191+
/// vec![2, 3],
1192+
/// vec![3, 3],
1193+
/// ]);
1194+
/// ```
1195+
#[cfg(feature = "use_std")]
1196+
fn combinations_with_replacement(self, n: usize) -> CombinationsWithReplacement<Self>
1197+
where
1198+
Self: Sized,
1199+
Self::Item: Clone,
1200+
{
1201+
combinations_with_replacement::combinations_with_replacement(self, n)
1202+
}
1203+
11701204
/// Return an iterator adaptor that pads the sequence to a minimum length of
11711205
/// `min` by filling missing elements using a closure `f`.
11721206
///

0 commit comments

Comments
 (0)