Skip to content

Commit ffaf857

Browse files
committed
Auto merge of #88448 - xu-cheng:btree-blk-build, r=Mark-Simulacrum
BTreeMap/BTreeSet::from_iter: use bulk building to improve the performance Bulk building is a common technique to increase the performance of building a fresh btree map. Instead of inserting items one-by-one, we sort all the items beforehand then create the BtreeMap in bulk. Benchmark ``` ./x.py bench library/alloc --test-args btree::map::from_iter ``` * Before ``` test btree::map::from_iter_rand_100 ... bench: 3,694 ns/iter (+/- 840) test btree::map::from_iter_rand_10_000 ... bench: 1,033,446 ns/iter (+/- 192,950) test btree::map::from_iter_seq_100 ... bench: 5,689 ns/iter (+/- 1,259) test btree::map::from_iter_seq_10_000 ... bench: 861,033 ns/iter (+/- 118,815) ``` * After ``` test btree::map::from_iter_rand_100 ... bench: 3,033 ns/iter (+/- 707) test btree::map::from_iter_rand_10_000 ... bench: 775,958 ns/iter (+/- 105,152) test btree::map::from_iter_seq_100 ... bench: 2,969 ns/iter (+/- 336) test btree::map::from_iter_seq_10_000 ... bench: 258,292 ns/iter (+/- 29,364) ```
2 parents 11bbb52 + a03287b commit ffaf857

File tree

5 files changed

+151
-10
lines changed

5 files changed

+151
-10
lines changed

library/alloc/benches/btree/map.rs

+50
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,50 @@ macro_rules! map_insert_seq_bench {
5454
};
5555
}
5656

57+
macro_rules! map_from_iter_rand_bench {
58+
($name: ident, $n: expr, $map: ident) => {
59+
#[bench]
60+
pub fn $name(b: &mut Bencher) {
61+
let n: usize = $n;
62+
// setup
63+
let mut rng = thread_rng();
64+
let mut vec = Vec::with_capacity(n);
65+
66+
for _ in 0..n {
67+
let i = rng.gen::<usize>() % n;
68+
vec.push((i, i));
69+
}
70+
71+
// measure
72+
b.iter(|| {
73+
let map: $map<_, _> = vec.iter().copied().collect();
74+
black_box(map);
75+
});
76+
}
77+
};
78+
}
79+
80+
macro_rules! map_from_iter_seq_bench {
81+
($name: ident, $n: expr, $map: ident) => {
82+
#[bench]
83+
pub fn $name(b: &mut Bencher) {
84+
let n: usize = $n;
85+
// setup
86+
let mut vec = Vec::with_capacity(n);
87+
88+
for i in 0..n {
89+
vec.push((i, i));
90+
}
91+
92+
// measure
93+
b.iter(|| {
94+
let map: $map<_, _> = vec.iter().copied().collect();
95+
black_box(map);
96+
});
97+
}
98+
};
99+
}
100+
57101
macro_rules! map_find_rand_bench {
58102
($name: ident, $n: expr, $map: ident) => {
59103
#[bench]
@@ -111,6 +155,12 @@ map_insert_rand_bench! {insert_rand_10_000, 10_000, BTreeMap}
111155
map_insert_seq_bench! {insert_seq_100, 100, BTreeMap}
112156
map_insert_seq_bench! {insert_seq_10_000, 10_000, BTreeMap}
113157

158+
map_from_iter_rand_bench! {from_iter_rand_100, 100, BTreeMap}
159+
map_from_iter_rand_bench! {from_iter_rand_10_000, 10_000, BTreeMap}
160+
161+
map_from_iter_seq_bench! {from_iter_seq_100, 100, BTreeMap}
162+
map_from_iter_seq_bench! {from_iter_seq_10_000, 10_000, BTreeMap}
163+
114164
map_find_rand_bench! {find_rand_100, 100, BTreeMap}
115165
map_find_rand_bench! {find_rand_10_000, 10_000, BTreeMap}
116166

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
use core::iter::Peekable;
2+
3+
/// A iterator for deduping the key of a sorted iterator.
4+
/// When encountering the duplicated key, only the last key-value pair is yielded.
5+
///
6+
/// Used by [`BTreeMap::bulk_build_from_sorted_iter`].
7+
pub struct DedupSortedIter<K, V, I>
8+
where
9+
I: Iterator<Item = (K, V)>,
10+
{
11+
iter: Peekable<I>,
12+
}
13+
14+
impl<K, V, I> DedupSortedIter<K, V, I>
15+
where
16+
I: Iterator<Item = (K, V)>,
17+
{
18+
pub fn new(iter: I) -> Self {
19+
Self { iter: iter.peekable() }
20+
}
21+
}
22+
23+
impl<K, V, I> Iterator for DedupSortedIter<K, V, I>
24+
where
25+
K: Eq,
26+
I: Iterator<Item = (K, V)>,
27+
{
28+
type Item = (K, V);
29+
30+
fn next(&mut self) -> Option<(K, V)> {
31+
loop {
32+
let next = match self.iter.next() {
33+
Some(next) => next,
34+
None => return None,
35+
};
36+
37+
let peeked = match self.iter.peek() {
38+
Some(peeked) => peeked,
39+
None => return Some(next),
40+
};
41+
42+
if next.0 != peeked.0 {
43+
return Some(next);
44+
}
45+
}
46+
}
47+
}

library/alloc/src/collections/btree/map.rs

+31-5
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use crate::vec::Vec;
12
use core::borrow::Borrow;
23
use core::cmp::Ordering;
34
use core::fmt::{self, Debug};
@@ -9,6 +10,7 @@ use core::ops::{Index, RangeBounds};
910
use core::ptr;
1011

1112
use super::borrow::DormantMutRef;
13+
use super::dedup_sorted_iter::DedupSortedIter;
1214
use super::navigate::{LazyLeafRange, LeafRange};
1315
use super::node::{self, marker, ForceResult::*, Handle, NodeRef, Root};
1416
use super::search::SearchResult::*;
@@ -1285,6 +1287,18 @@ impl<K, V> BTreeMap<K, V> {
12851287
pub fn into_values(self) -> IntoValues<K, V> {
12861288
IntoValues { inner: self.into_iter() }
12871289
}
1290+
1291+
/// Makes a `BTreeMap` from a sorted iterator.
1292+
pub(crate) fn bulk_build_from_sorted_iter<I>(iter: I) -> Self
1293+
where
1294+
K: Ord,
1295+
I: Iterator<Item = (K, V)>,
1296+
{
1297+
let mut root = Root::new();
1298+
let mut length = 0;
1299+
root.bulk_push(DedupSortedIter::new(iter), &mut length);
1300+
BTreeMap { root: Some(root), length }
1301+
}
12881302
}
12891303

12901304
#[stable(feature = "rust1", since = "1.0.0")]
@@ -1909,9 +1923,15 @@ impl<K, V> FusedIterator for RangeMut<'_, K, V> {}
19091923
#[stable(feature = "rust1", since = "1.0.0")]
19101924
impl<K: Ord, V> FromIterator<(K, V)> for BTreeMap<K, V> {
19111925
fn from_iter<T: IntoIterator<Item = (K, V)>>(iter: T) -> BTreeMap<K, V> {
1912-
let mut map = BTreeMap::new();
1913-
map.extend(iter);
1914-
map
1926+
let mut inputs: Vec<_> = iter.into_iter().collect();
1927+
1928+
if inputs.is_empty() {
1929+
return BTreeMap::new();
1930+
}
1931+
1932+
// use stable sort to preserve the insertion order.
1933+
inputs.sort_by(|a, b| a.0.cmp(&b.0));
1934+
BTreeMap::bulk_build_from_sorted_iter(inputs.into_iter())
19151935
}
19161936
}
19171937

@@ -2020,8 +2040,14 @@ impl<K: Ord, V, const N: usize> From<[(K, V); N]> for BTreeMap<K, V> {
20202040
/// let map2: BTreeMap<_, _> = [(1, 2), (3, 4)].into();
20212041
/// assert_eq!(map1, map2);
20222042
/// ```
2023-
fn from(arr: [(K, V); N]) -> Self {
2024-
core::array::IntoIter::new(arr).collect()
2043+
fn from(mut arr: [(K, V); N]) -> Self {
2044+
if N == 0 {
2045+
return BTreeMap::new();
2046+
}
2047+
2048+
// use stable sort to preserve the insertion order.
2049+
arr.sort_by(|a, b| a.0.cmp(&b.0));
2050+
BTreeMap::bulk_build_from_sorted_iter(core::array::IntoIter::new(arr))
20252051
}
20262052
}
20272053

library/alloc/src/collections/btree/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
mod append;
22
mod borrow;
3+
mod dedup_sorted_iter;
34
mod fix;
45
pub mod map;
56
mod mem;

library/alloc/src/collections/btree/set.rs

+22-5
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// This is pretty much entirely stolen from TreeSet, since BTreeMap has an identical interface
22
// to TreeMap
33

4+
use crate::vec::Vec;
45
use core::borrow::Borrow;
56
use core::cmp::Ordering::{Equal, Greater, Less};
67
use core::cmp::{max, min};
@@ -1056,9 +1057,17 @@ impl<T> BTreeSet<T> {
10561057
#[stable(feature = "rust1", since = "1.0.0")]
10571058
impl<T: Ord> FromIterator<T> for BTreeSet<T> {
10581059
fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> BTreeSet<T> {
1059-
let mut set = BTreeSet::new();
1060-
set.extend(iter);
1061-
set
1060+
let mut inputs: Vec<_> = iter.into_iter().collect();
1061+
1062+
if inputs.is_empty() {
1063+
return BTreeSet::new();
1064+
}
1065+
1066+
// use stable sort to preserve the insertion order.
1067+
inputs.sort();
1068+
let iter = inputs.into_iter().map(|k| (k, ()));
1069+
let map = BTreeMap::bulk_build_from_sorted_iter(iter);
1070+
BTreeSet { map }
10621071
}
10631072
}
10641073

@@ -1071,8 +1080,16 @@ impl<T: Ord, const N: usize> From<[T; N]> for BTreeSet<T> {
10711080
/// let set2: BTreeSet<_> = [1, 2, 3, 4].into();
10721081
/// assert_eq!(set1, set2);
10731082
/// ```
1074-
fn from(arr: [T; N]) -> Self {
1075-
core::array::IntoIter::new(arr).collect()
1083+
fn from(mut arr: [T; N]) -> Self {
1084+
if N == 0 {
1085+
return BTreeSet::new();
1086+
}
1087+
1088+
// use stable sort to preserve the insertion order.
1089+
arr.sort();
1090+
let iter = core::array::IntoIter::new(arr).map(|k| (k, ()));
1091+
let map = BTreeMap::bulk_build_from_sorted_iter(iter);
1092+
BTreeSet { map }
10761093
}
10771094
}
10781095

0 commit comments

Comments
 (0)