Skip to content

Commit 6ecde4e

Browse files
authored
Merge pull request #615 from morrisonlevi/min-buckets
perf: increase min buckets on very small types
2 parents 24d0480 + 465720d commit 6ecde4e

File tree

1 file changed

+64
-15
lines changed

1 file changed

+64
-15
lines changed

src/raw/mod.rs

Lines changed: 64 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -100,16 +100,51 @@ impl ProbeSeq {
100100
// Workaround for emscripten bug emscripten-core/emscripten-fastcomp#258
101101
#[cfg_attr(target_os = "emscripten", inline(never))]
102102
#[cfg_attr(not(target_os = "emscripten"), inline)]
103-
fn capacity_to_buckets(cap: usize) -> Option<usize> {
103+
fn capacity_to_buckets(cap: usize, table_layout: TableLayout) -> Option<usize> {
104104
debug_assert_ne!(cap, 0);
105105

106106
// For small tables we require at least 1 empty bucket so that lookups are
107107
// guaranteed to terminate if an element doesn't exist in the table.
108-
if cap < 8 {
108+
if cap < 15 {
109+
// Consider a small TableLayout like { size: 1, ctrl_align: 16 } on a
110+
// platform with Group::WIDTH of 16 (like x86_64 with SSE2). For small
111+
// bucket sizes, this ends up wasting quite a few bytes just to pad to
112+
// the relatively larger ctrl_align:
113+
//
114+
// | capacity | buckets | bytes allocated | bytes per item |
115+
// | -------- | ------- | --------------- | -------------- |
116+
// | 3 | 4 | 36 | (Yikes!) 12.0 |
117+
// | 7 | 8 | 40 | (Poor) 5.7 |
118+
// | 14 | 16 | 48 | 3.4 |
119+
// | 28 | 32 | 80 | 3.3 |
120+
//
121+
// In general, buckets * table_layout.size >= table_layout.ctrl_align
122+
// must be true to avoid these edges. This is implemented by adjusting
123+
// the minimum capacity upwards for small items. This code only needs
124+
// to handle ctrl_align which are less than or equal to Group::WIDTH,
125+
// because valid layout sizes are always a multiple of the alignment,
126+
// so anything with alignment over the Group::WIDTH won't hit this edge
127+
// case.
128+
129+
// This is brittle, e.g. if we ever add 32 byte groups, it will select
130+
// 3 regardless of the table_layout.size.
131+
let min_cap = match (Group::WIDTH, table_layout.size) {
132+
(16, 0..=1) => 14,
133+
(16, 2..=3) => 7,
134+
(8, 0..=1) => 7,
135+
_ => 3,
136+
};
137+
let cap = min_cap.max(cap);
109138
// We don't bother with a table size of 2 buckets since that can only
110-
// hold a single element. Instead we skip directly to a 4 bucket table
139+
// hold a single element. Instead, we skip directly to a 4 bucket table
111140
// which can hold 3 elements.
112-
return Some(if cap < 4 { 4 } else { 8 });
141+
return Some(if cap < 4 {
142+
4
143+
} else if cap < 8 {
144+
8
145+
} else {
146+
16
147+
});
113148
}
114149

115150
// Otherwise require 1/8 buckets to be empty (87.5% load)
@@ -851,7 +886,7 @@ impl<T, A: Allocator> RawTable<T, A> {
851886
// elements. If the calculation overflows then the requested bucket
852887
// count must be larger than what we have right and nothing needs to be
853888
// done.
854-
let min_buckets = match capacity_to_buckets(min_size) {
889+
let min_buckets = match capacity_to_buckets(min_size, Self::TABLE_LAYOUT) {
855890
Some(buckets) => buckets,
856891
None => return,
857892
};
@@ -982,14 +1017,8 @@ impl<T, A: Allocator> RawTable<T, A> {
9821017
/// * If `self.table.items != 0`, calling of this function with `capacity`
9831018
/// equal to 0 (`capacity == 0`) results in [`undefined behavior`].
9841019
///
985-
/// * If `capacity_to_buckets(capacity) < Group::WIDTH` and
986-
/// `self.table.items > capacity_to_buckets(capacity)`
987-
/// calling this function results in [`undefined behavior`].
988-
///
989-
/// * If `capacity_to_buckets(capacity) >= Group::WIDTH` and
990-
/// `self.table.items > capacity_to_buckets(capacity)`
991-
/// calling this function are never return (will go into an
992-
/// infinite loop).
1020+
/// * If `self.table.items > capacity_to_buckets(capacity, Self::TABLE_LAYOUT)`
1021+
/// calling this function are never return (will loop infinitely).
9931022
///
9941023
/// See [`RawTableInner::find_insert_slot`] for more information.
9951024
///
@@ -1479,8 +1508,8 @@ impl RawTableInner {
14791508
// SAFETY: We checked that we could successfully allocate the new table, and then
14801509
// initialized all control bytes with the constant `Tag::EMPTY` byte.
14811510
unsafe {
1482-
let buckets =
1483-
capacity_to_buckets(capacity).ok_or_else(|| fallibility.capacity_overflow())?;
1511+
let buckets = capacity_to_buckets(capacity, table_layout)
1512+
.ok_or_else(|| fallibility.capacity_overflow())?;
14841513

14851514
let mut result =
14861515
Self::new_uninitialized(alloc, table_layout, buckets, fallibility)?;
@@ -4137,6 +4166,26 @@ impl<T, A: Allocator> RawExtractIf<'_, T, A> {
41374166
mod test_map {
41384167
use super::*;
41394168

4169+
#[test]
4170+
fn test_minimum_capacity_for_small_types() {
4171+
#[track_caller]
4172+
fn test_t<T>() {
4173+
let raw_table: RawTable<T> = RawTable::with_capacity(1);
4174+
let actual_buckets = raw_table.buckets();
4175+
let min_buckets = Group::WIDTH / core::mem::size_of::<T>();
4176+
assert!(
4177+
actual_buckets >= min_buckets,
4178+
"expected at least {min_buckets} buckets, got {actual_buckets} buckets"
4179+
);
4180+
}
4181+
4182+
test_t::<u8>();
4183+
4184+
// This is only "small" for some platforms, like x86_64 with SSE2, but
4185+
// there's no harm in running it on other platforms.
4186+
test_t::<u16>();
4187+
}
4188+
41404189
fn rehash_in_place<T>(table: &mut RawTable<T>, hasher: impl Fn(&T) -> u64) {
41414190
unsafe {
41424191
table.table.rehash_in_place(

0 commit comments

Comments
 (0)