Skip to content

Commit 4aca8e4

Browse files
committed
Auto merge of #282 - Zoxc:size-opt, r=Amanieu
Make rehashing and resizing less generic This makes the code in `rehash_in_place`, `resize` and `reserve_rehash` less generic on `T`. It also improves the performance of rustc. That performance increase in partially attributed to the use of `#[inline(always)]`. This is the effect on rustc runtime: ``` clap:check 1.9523s 1.9327s -1.00% hashmap-instances:check 0.0628s 0.0624s -0.57% helloworld:check 0.0438s 0.0436s -0.50% hyper:check 0.2987s 0.2970s -0.59% regex:check 1.1497s 1.1402s -0.82% syn:check 1.7004s 1.6851s -0.90% syntex_syntax:check 6.9232s 6.8546s -0.99% winapi:check 8.3220s 8.2857s -0.44% Total 20.4528s 20.3014s -0.74% Summary 4.0000s 3.9709s -0.73% ``` `rustc_driver`'s code size is increased by 0.02%. This is the effect on compile time this has on my [HashMap compile time benchmark](#277 (comment)): ``` hashmap-instances:check 0.0636s 0.0632s -0.61% hashmap-instances:release 33.0166s 32.2487s -2.33% hashmap-instances:debug 7.8677s 7.2012s -8.47% Total 40.9479s 39.5131s -3.50% Summary 1.5000s 1.4430s -3.80% ``` The `hashmap-instances:debug` compile time could be further improved if there was a way to apply `#[inline(always)]` only on release builds.
2 parents a036b25 + bf8635d commit 4aca8e4

File tree

1 file changed

+213
-120
lines changed

1 file changed

+213
-120
lines changed

src/raw/mod.rs

+213-120
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,14 @@ use crate::scopeguard::guard;
33
use crate::TryReserveError;
44
#[cfg(feature = "nightly")]
55
use crate::UnavailableMutError;
6-
use core::hint;
76
use core::iter::FusedIterator;
87
use core::marker::PhantomData;
98
use core::mem;
109
use core::mem::ManuallyDrop;
1110
#[cfg(feature = "nightly")]
1211
use core::mem::MaybeUninit;
1312
use core::ptr::NonNull;
13+
use core::{hint, ptr};
1414

1515
cfg_if! {
1616
// Use the SSE2 implementation if possible: it allows us to scan 16 buckets
@@ -359,6 +359,7 @@ impl<T> Bucket<T> {
359359
pub unsafe fn as_mut<'a>(&self) -> &'a mut T {
360360
&mut *self.as_ptr()
361361
}
362+
#[cfg(feature = "raw")]
362363
#[cfg_attr(feature = "inline-more", inline)]
363364
pub unsafe fn copy_from_nonoverlapping(&self, other: &Self) {
364365
self.as_ptr().copy_from_nonoverlapping(other.as_ptr(), 1);
@@ -682,102 +683,18 @@ impl<T, A: Allocator + Clone> RawTable<T, A> {
682683
hasher: impl Fn(&T) -> u64,
683684
fallibility: Fallibility,
684685
) -> Result<(), TryReserveError> {
685-
// Avoid `Option::ok_or_else` because it bloats LLVM IR.
686-
let new_items = match self.table.items.checked_add(additional) {
687-
Some(new_items) => new_items,
688-
None => return Err(fallibility.capacity_overflow()),
689-
};
690-
let full_capacity = bucket_mask_to_capacity(self.table.bucket_mask);
691-
if new_items <= full_capacity / 2 {
692-
// Rehash in-place without re-allocating if we have plenty of spare
693-
// capacity that is locked up due to DELETED entries.
694-
self.rehash_in_place(hasher);
695-
Ok(())
696-
} else {
697-
// Otherwise, conservatively resize to at least the next size up
698-
// to avoid churning deletes into frequent rehashes.
699-
self.resize(
700-
usize::max(new_items, full_capacity + 1),
701-
hasher,
702-
fallibility,
703-
)
704-
}
705-
}
706-
707-
/// Rehashes the contents of the table in place (i.e. without changing the
708-
/// allocation).
709-
///
710-
/// If `hasher` panics then some the table's contents may be lost.
711-
fn rehash_in_place(&mut self, hasher: impl Fn(&T) -> u64) {
712686
unsafe {
713-
// If the hash function panics then properly clean up any elements
714-
// that we haven't rehashed yet. We unfortunately can't preserve the
715-
// element since we lost their hash and have no way of recovering it
716-
// without risking another panic.
717-
self.table.prepare_rehash_in_place();
718-
719-
let mut guard = guard(&mut self.table, move |self_| {
687+
self.table.reserve_rehash_inner(
688+
additional,
689+
&|table, index| hasher(table.bucket::<T>(index).as_ref()),
690+
fallibility,
691+
TableLayout::new::<T>(),
720692
if mem::needs_drop::<T>() {
721-
for i in 0..self_.buckets() {
722-
if *self_.ctrl(i) == DELETED {
723-
self_.set_ctrl(i, EMPTY);
724-
self_.bucket::<T>(i).drop();
725-
self_.items -= 1;
726-
}
727-
}
728-
}
729-
self_.growth_left = bucket_mask_to_capacity(self_.bucket_mask) - self_.items;
730-
});
731-
732-
// At this point, DELETED elements are elements that we haven't
733-
// rehashed yet. Find them and re-insert them at their ideal
734-
// position.
735-
'outer: for i in 0..guard.buckets() {
736-
if *guard.ctrl(i) != DELETED {
737-
continue;
738-
}
739-
740-
'inner: loop {
741-
// Hash the current item
742-
let item = guard.bucket(i);
743-
let hash = hasher(item.as_ref());
744-
745-
// Search for a suitable place to put it
746-
let new_i = guard.find_insert_slot(hash);
747-
748-
// Probing works by scanning through all of the control
749-
// bytes in groups, which may not be aligned to the group
750-
// size. If both the new and old position fall within the
751-
// same unaligned group, then there is no benefit in moving
752-
// it and we can just continue to the next item.
753-
if likely(guard.is_in_same_group(i, new_i, hash)) {
754-
guard.set_ctrl_h2(i, hash);
755-
continue 'outer;
756-
}
757-
758-
// We are moving the current item to a new position. Write
759-
// our H2 to the control byte of the new position.
760-
let prev_ctrl = guard.replace_ctrl_h2(new_i, hash);
761-
if prev_ctrl == EMPTY {
762-
guard.set_ctrl(i, EMPTY);
763-
// If the target slot is empty, simply move the current
764-
// element into the new slot and clear the old control
765-
// byte.
766-
guard.bucket(new_i).copy_from_nonoverlapping(&item);
767-
continue 'outer;
768-
} else {
769-
// If the target slot is occupied, swap the two elements
770-
// and then continue processing the element that we just
771-
// swapped into the old slot.
772-
debug_assert_eq!(prev_ctrl, DELETED);
773-
mem::swap(guard.bucket(new_i).as_mut(), item.as_mut());
774-
continue 'inner;
775-
}
776-
}
777-
}
778-
779-
guard.growth_left = bucket_mask_to_capacity(guard.bucket_mask) - guard.items;
780-
mem::forget(guard);
693+
Some(mem::transmute(ptr::drop_in_place::<T> as unsafe fn(*mut T)))
694+
} else {
695+
None
696+
},
697+
)
781698
}
782699
}
783700

@@ -790,30 +707,12 @@ impl<T, A: Allocator + Clone> RawTable<T, A> {
790707
fallibility: Fallibility,
791708
) -> Result<(), TryReserveError> {
792709
unsafe {
793-
let mut new_table =
794-
self.table
795-
.prepare_resize(TableLayout::new::<T>(), capacity, fallibility)?;
796-
797-
// Copy all elements to the new table.
798-
for item in self.iter() {
799-
// This may panic.
800-
let hash = hasher(item.as_ref());
801-
802-
// We can use a simpler version of insert() here since:
803-
// - there are no DELETED entries.
804-
// - we know there is enough space in the table.
805-
// - all elements are unique.
806-
let (index, _) = new_table.prepare_insert_slot(hash);
807-
new_table.bucket(index).copy_from_nonoverlapping(&item);
808-
}
809-
810-
// We successfully copied all elements without panicking. Now replace
811-
// self with the new table. The old table will have its memory freed but
812-
// the items will not be dropped (since they have been moved into the
813-
// new table).
814-
mem::swap(&mut self.table, &mut new_table);
815-
816-
Ok(())
710+
self.table.resize_inner(
711+
capacity,
712+
&|table, index| hasher(table.bucket::<T>(index).as_ref()),
713+
fallibility,
714+
TableLayout::new::<T>(),
715+
)
817716
}
818717
}
819718

@@ -1312,6 +1211,14 @@ impl<A: Allocator + Clone> RawTableInner<A> {
13121211
Bucket::from_base_index(self.data_end(), index)
13131212
}
13141213

1214+
#[cfg_attr(feature = "inline-more", inline)]
1215+
unsafe fn bucket_ptr(&self, index: usize, size_of: usize) -> *mut u8 {
1216+
debug_assert_ne!(self.bucket_mask, 0);
1217+
debug_assert!(index < self.buckets());
1218+
let base: *mut u8 = self.data_end().as_ptr();
1219+
base.sub((index + 1) * size_of)
1220+
}
1221+
13151222
#[cfg_attr(feature = "inline-more", inline)]
13161223
unsafe fn data_end<T>(&self) -> NonNull<T> {
13171224
NonNull::new_unchecked(self.ctrl.as_ptr().cast())
@@ -1457,6 +1364,178 @@ impl<A: Allocator + Clone> RawTableInner<A> {
14571364
}))
14581365
}
14591366

1367+
/// Reserves or rehashes to make room for `additional` more elements.
1368+
///
1369+
/// This uses dynamic dispatch to reduce the amount of
1370+
/// code generated, but it is eliminated by LLVM optimizations when inlined.
1371+
#[allow(clippy::inline_always)]
1372+
#[inline(always)]
1373+
unsafe fn reserve_rehash_inner(
1374+
&mut self,
1375+
additional: usize,
1376+
hasher: &dyn Fn(&mut Self, usize) -> u64,
1377+
fallibility: Fallibility,
1378+
layout: TableLayout,
1379+
drop: Option<fn(*mut u8)>,
1380+
) -> Result<(), TryReserveError> {
1381+
// Avoid `Option::ok_or_else` because it bloats LLVM IR.
1382+
let new_items = match self.items.checked_add(additional) {
1383+
Some(new_items) => new_items,
1384+
None => return Err(fallibility.capacity_overflow()),
1385+
};
1386+
let full_capacity = bucket_mask_to_capacity(self.bucket_mask);
1387+
if new_items <= full_capacity / 2 {
1388+
// Rehash in-place without re-allocating if we have plenty of spare
1389+
// capacity that is locked up due to DELETED entries.
1390+
self.rehash_in_place(hasher, layout.size, drop);
1391+
Ok(())
1392+
} else {
1393+
// Otherwise, conservatively resize to at least the next size up
1394+
// to avoid churning deletes into frequent rehashes.
1395+
self.resize_inner(
1396+
usize::max(new_items, full_capacity + 1),
1397+
hasher,
1398+
fallibility,
1399+
layout,
1400+
)
1401+
}
1402+
}
1403+
1404+
/// Allocates a new table of a different size and moves the contents of the
1405+
/// current table into it.
1406+
///
1407+
/// This uses dynamic dispatch to reduce the amount of
1408+
/// code generated, but it is eliminated by LLVM optimizations when inlined.
1409+
#[allow(clippy::inline_always)]
1410+
#[inline(always)]
1411+
unsafe fn resize_inner(
1412+
&mut self,
1413+
capacity: usize,
1414+
hasher: &dyn Fn(&mut Self, usize) -> u64,
1415+
fallibility: Fallibility,
1416+
layout: TableLayout,
1417+
) -> Result<(), TryReserveError> {
1418+
let mut new_table = self.prepare_resize(layout, capacity, fallibility)?;
1419+
1420+
// Copy all elements to the new table.
1421+
for i in 0..self.buckets() {
1422+
if !is_full(*self.ctrl(i)) {
1423+
continue;
1424+
}
1425+
1426+
// This may panic.
1427+
let hash = hasher(self, i);
1428+
1429+
// We can use a simpler version of insert() here since:
1430+
// - there are no DELETED entries.
1431+
// - we know there is enough space in the table.
1432+
// - all elements are unique.
1433+
let (index, _) = new_table.prepare_insert_slot(hash);
1434+
1435+
ptr::copy_nonoverlapping(
1436+
self.bucket_ptr(i, layout.size),
1437+
new_table.bucket_ptr(index, layout.size),
1438+
layout.size,
1439+
);
1440+
}
1441+
1442+
// We successfully copied all elements without panicking. Now replace
1443+
// self with the new table. The old table will have its memory freed but
1444+
// the items will not be dropped (since they have been moved into the
1445+
// new table).
1446+
mem::swap(self, &mut new_table);
1447+
1448+
Ok(())
1449+
}
1450+
1451+
/// Rehashes the contents of the table in place (i.e. without changing the
1452+
/// allocation).
1453+
///
1454+
/// If `hasher` panics then some the table's contents may be lost.
1455+
///
1456+
/// This uses dynamic dispatch to reduce the amount of
1457+
/// code generated, but it is eliminated by LLVM optimizations when inlined.
1458+
#[allow(clippy::inline_always)]
1459+
#[inline(always)]
1460+
unsafe fn rehash_in_place(
1461+
&mut self,
1462+
hasher: &dyn Fn(&mut Self, usize) -> u64,
1463+
size_of: usize,
1464+
drop: Option<fn(*mut u8)>,
1465+
) {
1466+
// If the hash function panics then properly clean up any elements
1467+
// that we haven't rehashed yet. We unfortunately can't preserve the
1468+
// element since we lost their hash and have no way of recovering it
1469+
// without risking another panic.
1470+
self.prepare_rehash_in_place();
1471+
1472+
let mut guard = guard(self, move |self_| {
1473+
if let Some(drop) = drop {
1474+
for i in 0..self_.buckets() {
1475+
if *self_.ctrl(i) == DELETED {
1476+
self_.set_ctrl(i, EMPTY);
1477+
drop(self_.bucket_ptr(i, size_of));
1478+
self_.items -= 1;
1479+
}
1480+
}
1481+
}
1482+
self_.growth_left = bucket_mask_to_capacity(self_.bucket_mask) - self_.items;
1483+
});
1484+
1485+
// At this point, DELETED elements are elements that we haven't
1486+
// rehashed yet. Find them and re-insert them at their ideal
1487+
// position.
1488+
'outer: for i in 0..guard.buckets() {
1489+
if *guard.ctrl(i) != DELETED {
1490+
continue;
1491+
}
1492+
1493+
let i_p = guard.bucket_ptr(i, size_of);
1494+
1495+
'inner: loop {
1496+
// Hash the current item
1497+
let hash = hasher(*guard, i);
1498+
1499+
// Search for a suitable place to put it
1500+
let new_i = guard.find_insert_slot(hash);
1501+
let new_i_p = guard.bucket_ptr(new_i, size_of);
1502+
1503+
// Probing works by scanning through all of the control
1504+
// bytes in groups, which may not be aligned to the group
1505+
// size. If both the new and old position fall within the
1506+
// same unaligned group, then there is no benefit in moving
1507+
// it and we can just continue to the next item.
1508+
if likely(guard.is_in_same_group(i, new_i, hash)) {
1509+
guard.set_ctrl_h2(i, hash);
1510+
continue 'outer;
1511+
}
1512+
1513+
// We are moving the current item to a new position. Write
1514+
// our H2 to the control byte of the new position.
1515+
let prev_ctrl = guard.replace_ctrl_h2(new_i, hash);
1516+
if prev_ctrl == EMPTY {
1517+
guard.set_ctrl(i, EMPTY);
1518+
// If the target slot is empty, simply move the current
1519+
// element into the new slot and clear the old control
1520+
// byte.
1521+
ptr::copy_nonoverlapping(i_p, new_i_p, size_of);
1522+
continue 'outer;
1523+
} else {
1524+
// If the target slot is occupied, swap the two elements
1525+
// and then continue processing the element that we just
1526+
// swapped into the old slot.
1527+
debug_assert_eq!(prev_ctrl, DELETED);
1528+
ptr::swap_nonoverlapping(i_p, new_i_p, size_of);
1529+
continue 'inner;
1530+
}
1531+
}
1532+
}
1533+
1534+
guard.growth_left = bucket_mask_to_capacity(guard.bucket_mask) - guard.items;
1535+
1536+
mem::forget(guard);
1537+
}
1538+
14601539
#[inline]
14611540
unsafe fn free_buckets(&mut self, table_layout: TableLayout) {
14621541
// Avoid `Option::unwrap_or_else` because it bloats LLVM IR.
@@ -2281,6 +2360,20 @@ impl<'a, A: Allocator + Clone> Iterator for RawIterHashInner<'a, A> {
22812360
mod test_map {
22822361
use super::*;
22832362

2363+
fn rehash_in_place<T>(table: &mut RawTable<T>, hasher: impl Fn(&T) -> u64) {
2364+
unsafe {
2365+
table.table.rehash_in_place(
2366+
&|table, index| hasher(table.bucket::<T>(index).as_ref()),
2367+
mem::size_of::<T>(),
2368+
if mem::needs_drop::<T>() {
2369+
Some(mem::transmute(ptr::drop_in_place::<T> as unsafe fn(*mut T)))
2370+
} else {
2371+
None
2372+
},
2373+
);
2374+
}
2375+
}
2376+
22842377
#[test]
22852378
fn rehash() {
22862379
let mut table = RawTable::new();
@@ -2296,7 +2389,7 @@ mod test_map {
22962389
assert!(table.find(i + 100, |x| *x == i + 100).is_none());
22972390
}
22982391

2299-
table.rehash_in_place(hasher);
2392+
rehash_in_place(&mut table, hasher);
23002393

23012394
for i in 0..100 {
23022395
unsafe {

0 commit comments

Comments
 (0)