Skip to content

Commit c7f36fa

Browse files
committed
Add map and set extract_if
1 parent a34d7ae commit c7f36fa

File tree

7 files changed

+323
-5
lines changed

7 files changed

+323
-5
lines changed

src/map.rs

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ mod tests;
1616
pub use self::core::raw_entry_v1::{self, RawEntryApiV1};
1717
pub use self::core::{Entry, IndexedEntry, OccupiedEntry, VacantEntry};
1818
pub use self::iter::{
19-
Drain, IntoIter, IntoKeys, IntoValues, Iter, IterMut, IterMut2, Keys, Splice, Values, ValuesMut,
19+
Drain, ExtractIf, IntoIter, IntoKeys, IntoValues, Iter, IterMut, IterMut2, Keys, Splice,
20+
Values, ValuesMut,
2021
};
2122
pub use self::mutable::MutableEntryKey;
2223
pub use self::mutable::MutableKeys;
@@ -36,7 +37,7 @@ use alloc::vec::Vec;
3637
#[cfg(feature = "std")]
3738
use std::collections::hash_map::RandomState;
3839

39-
use self::core::IndexMapCore;
40+
pub(crate) use self::core::{ExtractCore, IndexMapCore};
4041
use crate::util::{third, try_simplify_range};
4142
use crate::{Bucket, Entries, Equivalent, HashValue, TryReserveError};
4243

@@ -307,6 +308,44 @@ impl<K, V, S> IndexMap<K, V, S> {
307308
Drain::new(self.core.drain(range))
308309
}
309310

311+
/// Creates an iterator which uses a closure to determine if an element should be removed.
312+
///
313+
/// If the closure returns true, the element is removed from the map and yielded.
314+
/// If the closure returns false, or panics, the element remains in the map and will not be
315+
/// yielded.
316+
///
317+
/// Note that `extract_if` lets you mutate every value in the filter closure, regardless of
318+
/// whether you choose to keep or remove it.
319+
///
320+
/// If the returned `ExtractIf` is not exhausted, e.g. because it is dropped without iterating
321+
/// or the iteration short-circuits, then the remaining elements will be retained.
322+
/// Use [`retain`] with a negated predicate if you do not need the returned iterator.
323+
///
324+
/// [`retain`]: IndexMap::retain
325+
///
326+
/// # Examples
327+
///
328+
/// Splitting a map into even and odd keys, reusing the original map:
329+
///
330+
/// ```
331+
/// use indexmap::IndexMap;
332+
///
333+
/// let mut map: IndexMap<i32, i32> = (0..8).map(|x| (x, x)).collect();
334+
/// let extracted: IndexMap<i32, i32> = map.extract_if(|k, _v| k % 2 == 0).collect();
335+
///
336+
/// let evens = extracted.keys().copied().collect::<Vec<_>>();
337+
/// let odds = map.keys().copied().collect::<Vec<_>>();
338+
///
339+
/// assert_eq!(evens, vec![0, 2, 4, 6]);
340+
/// assert_eq!(odds, vec![1, 3, 5, 7]);
341+
/// ```
342+
pub fn extract_if<F>(&mut self, pred: F) -> ExtractIf<'_, K, V, F>
343+
where
344+
F: FnMut(&K, &mut V) -> bool,
345+
{
346+
ExtractIf::new(&mut self.core, pred)
347+
}
348+
310349
/// Splits the collection into two at the given index.
311350
///
312351
/// Returns a newly allocated map containing the elements in the range

src/map/core.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
//! However, we should probably not let this show in the public API or docs.
99
1010
mod entry;
11+
mod extract;
1112

1213
pub mod raw_entry_v1;
1314

@@ -25,6 +26,7 @@ type Indices = hash_table::HashTable<usize>;
2526
type Entries<K, V> = Vec<Bucket<K, V>>;
2627

2728
pub use entry::{Entry, IndexedEntry, OccupiedEntry, VacantEntry};
29+
pub(crate) use extract::ExtractCore;
2830

2931
/// Core of the map that does not depend on S
3032
#[derive(Debug)]
@@ -163,6 +165,7 @@ impl<K, V> IndexMapCore<K, V> {
163165

164166
#[inline]
165167
pub(crate) fn len(&self) -> usize {
168+
debug_assert_eq!(self.entries.len(), self.indices.len());
166169
self.indices.len()
167170
}
168171

src/map/core/extract.rs

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
#![allow(unsafe_code)]
2+
3+
use super::{Bucket, IndexMapCore};
4+
5+
impl<K, V> IndexMapCore<K, V> {
6+
pub(crate) fn extract(&mut self) -> ExtractCore<'_, K, V> {
7+
// SAFETY: We must have consistent lengths to start, so that's a hard assertion.
8+
// Then the worst `set_len(0)` can do is leak items if `ExtractCore` doesn't drop.
9+
assert_eq!(self.entries.len(), self.indices.len());
10+
unsafe {
11+
self.entries.set_len(0);
12+
}
13+
ExtractCore {
14+
map: self,
15+
current: 0,
16+
new_len: 0,
17+
}
18+
}
19+
}
20+
21+
pub(crate) struct ExtractCore<'a, K, V> {
22+
map: &'a mut IndexMapCore<K, V>,
23+
current: usize,
24+
new_len: usize,
25+
}
26+
27+
impl<K, V> Drop for ExtractCore<'_, K, V> {
28+
fn drop(&mut self) {
29+
let old_len = self.map.indices.len();
30+
let mut new_len = self.new_len;
31+
debug_assert!(new_len <= self.current);
32+
debug_assert!(self.current <= old_len);
33+
debug_assert!(old_len <= self.map.entries.capacity());
34+
35+
// SAFETY: We assume `new_len` and `current` were correctly maintained by the iterator.
36+
// So `entries[new_len..current]` were extracted, but the rest before and after are valid.
37+
unsafe {
38+
if new_len == self.current {
39+
// Nothing was extracted, so any remaining items can be left in place.
40+
new_len = old_len;
41+
} else if self.current < old_len {
42+
// Need to shift the remaining items down.
43+
let tail_len = old_len - self.current;
44+
let base = self.map.entries.as_mut_ptr();
45+
let src = base.add(self.current);
46+
let dest = base.add(new_len);
47+
src.copy_to(dest, tail_len);
48+
new_len += tail_len;
49+
}
50+
self.map.entries.set_len(new_len);
51+
}
52+
53+
if new_len != old_len {
54+
// We don't keep track of *which* items were extracted, so reindex everything.
55+
self.map.rebuild_hash_table();
56+
}
57+
}
58+
}
59+
60+
impl<K, V> ExtractCore<'_, K, V> {
61+
pub(crate) fn extract_if<F>(&mut self, mut pred: F) -> Option<Bucket<K, V>>
62+
where
63+
F: FnMut(&mut Bucket<K, V>) -> bool,
64+
{
65+
let old_len = self.map.indices.len();
66+
debug_assert!(old_len <= self.map.entries.capacity());
67+
68+
let base = self.map.entries.as_mut_ptr();
69+
while self.current < old_len {
70+
// SAFETY: We're maintaining both indices within bounds of the original entries, so
71+
// 0..new_len and current..old_len are always valid items for our Drop to keep.
72+
unsafe {
73+
let item = base.add(self.current);
74+
if pred(&mut *item) {
75+
// Extract it!
76+
self.current += 1;
77+
return Some(item.read());
78+
} else {
79+
// Keep it, shifting it down if needed.
80+
if self.new_len != self.current {
81+
debug_assert!(self.new_len < self.current);
82+
let dest = base.add(self.new_len);
83+
item.copy_to_nonoverlapping(dest, 1);
84+
}
85+
self.current += 1;
86+
self.new_len += 1;
87+
}
88+
}
89+
}
90+
None
91+
}
92+
93+
pub(crate) fn remaining(&self) -> usize {
94+
self.map.indices.len() - self.current
95+
}
96+
}

src/map/iter.rs

Lines changed: 54 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
use super::core::IndexMapCore;
2-
use super::{Bucket, Entries, IndexMap, Slice};
1+
use super::{Bucket, Entries, ExtractCore, IndexMap, IndexMapCore, Slice};
32

43
use alloc::vec::{self, Vec};
54
use core::fmt;
@@ -774,3 +773,56 @@ where
774773
.finish()
775774
}
776775
}
776+
777+
/// An extracting iterator for `IndexMap`.
778+
///
779+
/// This `struct` is created by [`IndexMap::extract_if()`].
780+
/// See its documentation for more.
781+
pub struct ExtractIf<'a, K, V, F>
782+
where
783+
F: FnMut(&K, &mut V) -> bool,
784+
{
785+
inner: ExtractCore<'a, K, V>,
786+
pred: F,
787+
}
788+
789+
impl<K, V, F> ExtractIf<'_, K, V, F>
790+
where
791+
F: FnMut(&K, &mut V) -> bool,
792+
{
793+
pub(super) fn new(core: &mut IndexMapCore<K, V>, pred: F) -> ExtractIf<'_, K, V, F> {
794+
ExtractIf {
795+
inner: core.extract(),
796+
pred,
797+
}
798+
}
799+
}
800+
801+
impl<K, V, F> Iterator for ExtractIf<'_, K, V, F>
802+
where
803+
F: FnMut(&K, &mut V) -> bool,
804+
{
805+
type Item = (K, V);
806+
807+
fn next(&mut self) -> Option<Self::Item> {
808+
self.inner
809+
.extract_if(|bucket| {
810+
let (key, value) = bucket.ref_mut();
811+
(self.pred)(key, value)
812+
})
813+
.map(Bucket::key_value)
814+
}
815+
816+
fn size_hint(&self) -> (usize, Option<usize>) {
817+
(0, Some(self.inner.remaining()))
818+
}
819+
}
820+
821+
impl<'a, K, V, F> fmt::Debug for ExtractIf<'a, K, V, F>
822+
where
823+
F: FnMut(&K, &mut V) -> bool,
824+
{
825+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
826+
f.debug_struct("ExtractIf").finish_non_exhaustive()
827+
}
828+
}

src/set.rs

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ mod slice;
88
mod tests;
99

1010
pub use self::iter::{
11-
Difference, Drain, Intersection, IntoIter, Iter, Splice, SymmetricDifference, Union,
11+
Difference, Drain, ExtractIf, Intersection, IntoIter, Iter, Splice, SymmetricDifference, Union,
1212
};
1313
pub use self::mutable::MutableValues;
1414
pub use self::slice::Slice;
@@ -258,6 +258,41 @@ impl<T, S> IndexSet<T, S> {
258258
Drain::new(self.map.core.drain(range))
259259
}
260260

261+
/// Creates an iterator which uses a closure to determine if a value should be removed.
262+
///
263+
/// If the closure returns true, then the value is removed and yielded.
264+
/// If the closure returns false, the value will remain in the list and will not be yielded
265+
/// by the iterator.
266+
///
267+
/// If the returned `ExtractIf` is not exhausted, e.g. because it is dropped without iterating
268+
/// or the iteration short-circuits, then the remaining elements will be retained.
269+
/// Use [`retain`] with a negated predicate if you do not need the returned iterator.
270+
///
271+
/// [`retain`]: IndexSet::retain
272+
///
273+
/// # Examples
274+
///
275+
/// Splitting a set into even and odd values, reusing the original set:
276+
///
277+
/// ```
278+
/// use indexmap::IndexSet;
279+
///
280+
/// let mut set: IndexSet<i32> = (0..8).collect();
281+
/// let extracted: IndexSet<i32> = set.extract_if(|v| v % 2 == 0).collect();
282+
///
283+
/// let evens = extracted.into_iter().collect::<Vec<_>>();
284+
/// let odds = set.into_iter().collect::<Vec<_>>();
285+
///
286+
/// assert_eq!(evens, vec![0, 2, 4, 6]);
287+
/// assert_eq!(odds, vec![1, 3, 5, 7]);
288+
/// ```
289+
pub fn extract_if<F>(&mut self, pred: F) -> ExtractIf<'_, T, F>
290+
where
291+
F: FnMut(&T) -> bool,
292+
{
293+
ExtractIf::new(&mut self.map.core, pred)
294+
}
295+
261296
/// Splits the collection into two at the given index.
262297
///
263298
/// Returns a newly allocated set containing the elements in the range

src/set/iter.rs

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use crate::map::{ExtractCore, IndexMapCore};
2+
13
use super::{Bucket, Entries, IndexSet, Slice};
24

35
use alloc::vec::{self, Vec};
@@ -626,3 +628,53 @@ impl<I: fmt::Debug> fmt::Debug for UnitValue<I> {
626628
fmt::Debug::fmt(&self.0, f)
627629
}
628630
}
631+
632+
/// An extracting iterator for `IndexSet`.
633+
///
634+
/// This `struct` is created by [`IndexSet::extract_if()`].
635+
/// See its documentation for more.
636+
pub struct ExtractIf<'a, T, F>
637+
where
638+
F: FnMut(&T) -> bool,
639+
{
640+
inner: ExtractCore<'a, T, ()>,
641+
pred: F,
642+
}
643+
644+
impl<T, F> ExtractIf<'_, T, F>
645+
where
646+
F: FnMut(&T) -> bool,
647+
{
648+
pub(super) fn new(core: &mut IndexMapCore<T, ()>, pred: F) -> ExtractIf<'_, T, F> {
649+
ExtractIf {
650+
inner: core.extract(),
651+
pred,
652+
}
653+
}
654+
}
655+
656+
impl<T, F> Iterator for ExtractIf<'_, T, F>
657+
where
658+
F: FnMut(&T) -> bool,
659+
{
660+
type Item = T;
661+
662+
fn next(&mut self) -> Option<Self::Item> {
663+
self.inner
664+
.extract_if(|bucket| (self.pred)(bucket.key_ref()))
665+
.map(Bucket::key)
666+
}
667+
668+
fn size_hint(&self) -> (usize, Option<usize>) {
669+
(0, Some(self.inner.remaining()))
670+
}
671+
}
672+
673+
impl<'a, T, F> fmt::Debug for ExtractIf<'a, T, F>
674+
where
675+
F: FnMut(&T) -> bool,
676+
{
677+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
678+
f.debug_struct("ExtractIf").finish_non_exhaustive()
679+
}
680+
}

0 commit comments

Comments
 (0)