@@ -100,16 +100,51 @@ impl ProbeSeq {
100
100
// Workaround for emscripten bug emscripten-core/emscripten-fastcomp#258
101
101
#[ cfg_attr( target_os = "emscripten" , inline( never) ) ]
102
102
#[ cfg_attr( not( target_os = "emscripten" ) , inline) ]
103
- fn capacity_to_buckets ( cap : usize ) -> Option < usize > {
103
+ fn capacity_to_buckets ( cap : usize , table_layout : TableLayout ) -> Option < usize > {
104
104
debug_assert_ne ! ( cap, 0 ) ;
105
105
106
106
// For small tables we require at least 1 empty bucket so that lookups are
107
107
// guaranteed to terminate if an element doesn't exist in the table.
108
- if cap < 8 {
108
+ if cap < 15 {
109
+ // Consider a small TableLayout like { size: 1, ctrl_align: 16 } on a
110
+ // platform with Group::WIDTH of 16 (like x86_64 with SSE2). For small
111
+ // bucket sizes, this ends up wasting quite a few bytes just to pad to
112
+ // the relatively larger ctrl_align:
113
+ //
114
+ // | capacity | buckets | bytes allocated | bytes per item |
115
+ // | -------- | ------- | --------------- | -------------- |
116
+ // | 3 | 4 | 36 | (Yikes!) 12.0 |
117
+ // | 7 | 8 | 40 | (Poor) 5.7 |
118
+ // | 14 | 16 | 48 | 3.4 |
119
+ // | 28 | 32 | 80 | 3.3 |
120
+ //
121
+ // In general, buckets * table_layout.size >= table_layout.ctrl_align
122
+ // must be true to avoid these edges. This is implemented by adjusting
123
+ // the minimum capacity upwards for small items. This code only needs
124
+ // to handle ctrl_align which are less than or equal to Group::WIDTH,
125
+ // because valid layout sizes are always a multiple of the alignment,
126
+ // so anything with alignment over the Group::WIDTH won't hit this edge
127
+ // case.
128
+
129
+ // This is brittle, e.g. if we ever add 32 byte groups, it will select
130
+ // 3 regardless of the table_layout.size.
131
+ let min_cap = match ( Group :: WIDTH , table_layout. size ) {
132
+ ( 16 , 0 ..=1 ) => 14 ,
133
+ ( 16 , 2 ..=3 ) => 7 ,
134
+ ( 8 , 0 ..=1 ) => 7 ,
135
+ _ => 3 ,
136
+ } ;
137
+ let cap = min_cap. max ( cap) ;
109
138
// We don't bother with a table size of 2 buckets since that can only
110
- // hold a single element. Instead we skip directly to a 4 bucket table
139
+ // hold a single element. Instead, we skip directly to a 4 bucket table
111
140
// which can hold 3 elements.
112
- return Some ( if cap < 4 { 4 } else { 8 } ) ;
141
+ return Some ( if cap < 4 {
142
+ 4
143
+ } else if cap < 8 {
144
+ 8
145
+ } else {
146
+ 16
147
+ } ) ;
113
148
}
114
149
115
150
// Otherwise require 1/8 buckets to be empty (87.5% load)
@@ -851,7 +886,7 @@ impl<T, A: Allocator> RawTable<T, A> {
851
886
// elements. If the calculation overflows then the requested bucket
852
887
// count must be larger than what we have right and nothing needs to be
853
888
// done.
854
- let min_buckets = match capacity_to_buckets ( min_size) {
889
+ let min_buckets = match capacity_to_buckets ( min_size, Self :: TABLE_LAYOUT ) {
855
890
Some ( buckets) => buckets,
856
891
None => return ,
857
892
} ;
@@ -982,14 +1017,8 @@ impl<T, A: Allocator> RawTable<T, A> {
982
1017
/// * If `self.table.items != 0`, calling of this function with `capacity`
983
1018
/// equal to 0 (`capacity == 0`) results in [`undefined behavior`].
984
1019
///
985
- /// * If `capacity_to_buckets(capacity) < Group::WIDTH` and
986
- /// `self.table.items > capacity_to_buckets(capacity)`
987
- /// calling this function results in [`undefined behavior`].
988
- ///
989
- /// * If `capacity_to_buckets(capacity) >= Group::WIDTH` and
990
- /// `self.table.items > capacity_to_buckets(capacity)`
991
- /// calling this function are never return (will go into an
992
- /// infinite loop).
1020
+ /// * If `self.table.items > capacity_to_buckets(capacity, Self::TABLE_LAYOUT)`
1021
+ /// calling this function are never return (will loop infinitely).
993
1022
///
994
1023
/// See [`RawTableInner::find_insert_slot`] for more information.
995
1024
///
@@ -1479,8 +1508,8 @@ impl RawTableInner {
1479
1508
// SAFETY: We checked that we could successfully allocate the new table, and then
1480
1509
// initialized all control bytes with the constant `Tag::EMPTY` byte.
1481
1510
unsafe {
1482
- let buckets =
1483
- capacity_to_buckets ( capacity ) . ok_or_else ( || fallibility. capacity_overflow ( ) ) ?;
1511
+ let buckets = capacity_to_buckets ( capacity , table_layout )
1512
+ . ok_or_else ( || fallibility. capacity_overflow ( ) ) ?;
1484
1513
1485
1514
let mut result =
1486
1515
Self :: new_uninitialized ( alloc, table_layout, buckets, fallibility) ?;
@@ -4137,6 +4166,26 @@ impl<T, A: Allocator> RawExtractIf<'_, T, A> {
4137
4166
mod test_map {
4138
4167
use super :: * ;
4139
4168
4169
+ #[ test]
4170
+ fn test_minimum_capacity_for_small_types ( ) {
4171
+ #[ track_caller]
4172
+ fn test_t < T > ( ) {
4173
+ let raw_table: RawTable < T > = RawTable :: with_capacity ( 1 ) ;
4174
+ let actual_buckets = raw_table. buckets ( ) ;
4175
+ let min_buckets = Group :: WIDTH / core:: mem:: size_of :: < T > ( ) ;
4176
+ assert ! (
4177
+ actual_buckets >= min_buckets,
4178
+ "expected at least {min_buckets} buckets, got {actual_buckets} buckets"
4179
+ ) ;
4180
+ }
4181
+
4182
+ test_t :: < u8 > ( ) ;
4183
+
4184
+ // This is only "small" for some platforms, like x86_64 with SSE2, but
4185
+ // there's no harm in running it on other platforms.
4186
+ test_t :: < u16 > ( ) ;
4187
+ }
4188
+
4140
4189
fn rehash_in_place < T > ( table : & mut RawTable < T > , hasher : impl Fn ( & T ) -> u64 ) {
4141
4190
unsafe {
4142
4191
table. table . rehash_in_place (
0 commit comments