From d17b4aa60068a6b0f082ec8e483d90d6e8a216b5 Mon Sep 17 00:00:00 2001 From: stgatilov Date: Sat, 11 May 2024 12:10:26 +0200 Subject: [PATCH 1/3] Added unit test test_hash_function_default_integer against bad hash function on uint64_t. It constructs a set of distinct keys with zero lower half, then computes hash function for all of them, and checks that the hashes are mostly different in lower bits. --- tests/robin_set_tests.cpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/robin_set_tests.cpp b/tests/robin_set_tests.cpp index e68b514..68ac68f 100644 --- a/tests/robin_set_tests.cpp +++ b/tests/robin_set_tests.cpp @@ -171,4 +171,23 @@ BOOST_AUTO_TEST_CASE(test_erase_fast) { BOOST_CHECK(set.size() == 0); } +BOOST_AUTO_TEST_CASE(test_hash_function_default_integer) { + // Brief quality check for default integer hash function. + // Most importantly, make sure it is not identity function like std::hash on GCC. + // See: https://github.com/Tessil/robin-map/issues/73 + const std::size_t nb_values = 1 << 18; + const std::size_t shift = 32; + + using Set = tsl::robin_set; + Set set; + + for (uint64_t i = 0; i < nb_values; i++) { + uint64_t hash = set.hash_function()(uint64_t(i) << shift); + set.insert(hash & 0xFFFFFF); + } + + double imageRatio = double(set.size()) / nb_values; + BOOST_CHECK(imageRatio >= 0.9); +} + BOOST_AUTO_TEST_SUITE_END() From ff5adb9bbe1bc1d3df0e7c66eaa1f94d0d1f521a Mon Sep 17 00:00:00 2001 From: stgatilov Date: Sat, 11 May 2024 16:06:07 +0200 Subject: [PATCH 2/3] Added tsl::hash wrapper which adds MurmurHash2 finalizer in case libstd-c++ or libc++ is used. --- include/tsl/robin_growth_policy.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/include/tsl/robin_growth_policy.h b/include/tsl/robin_growth_policy.h index b100bfc..6cb6b81 100644 --- a/include/tsl/robin_growth_policy.h +++ b/include/tsl/robin_growth_policy.h @@ -413,4 +413,35 @@ class prime_growth_policy { } // namespace rh } // namespace tsl +namespace tsl { + +/** + * Wrapper around std::hash which adds hash finalizer missing in libstd-c++ and + * libc++. It should be used with power_of_two_growth_policy. + */ +template +struct hash : public std::hash { + std::size_t operator()(const Key& key) const { + std::size_t h = std::hash::operator()(key); + +#if defined(__GLIBCXX__) || defined(_LIBCPP_VERSION) +#if SIZE_MAX >= ULLONG_MAX + // 64-bit finalizer from MurmurHash2 + h ^= h >> 47; + h *= 0xc6a4a7935bd1e995ull; + h ^= h >> 47; +#else + // 32-bit finalizer from MurmurHash2 + h ^= h >> 13; + h *= 0x5bd1e995u; + h ^= h >> 15; +#endif +#endif + + return h; + } +}; + +} // namespace tsl + #endif From cd34e3e8af2afd1f41da49d3ac03d0d968ff5dfc Mon Sep 17 00:00:00 2001 From: stgatilov Date: Sat, 11 May 2024 16:06:39 +0200 Subject: [PATCH 3/3] Replaced default hash function from std::hash to tsl::hash everywhere. --- include/tsl/robin_hash.h | 2 +- include/tsl/robin_map.h | 4 ++-- include/tsl/robin_set.h | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/tsl/robin_hash.h b/include/tsl/robin_hash.h index 03d23a9..d60b21a 100644 --- a/include/tsl/robin_hash.h +++ b/include/tsl/robin_hash.h @@ -403,7 +403,7 @@ class robin_hash : private Hash, private KeyEqual, private GrowthPolicy { is_power_of_two_policy::value) && // Don't store the hash for primitive types with default hash. (!std::is_arithmetic::value || - !std::is_same>::value)); + !std::is_same>::value)); /** * Only use the stored hash on lookup if we are explicitly asked. We are not diff --git a/include/tsl/robin_map.h b/include/tsl/robin_map.h index b594810..1ce02f8 100644 --- a/include/tsl/robin_map.h +++ b/include/tsl/robin_map.h @@ -83,7 +83,7 @@ namespace tsl { * insert, invalidate the iterators. * - erase: always invalidate the iterators. */ -template , +template , class KeyEqual = std::equal_to, class Allocator = std::allocator>, bool StoreHash = false, @@ -803,7 +803,7 @@ class robin_map { * Same as `tsl::robin_map`. */ -template , +template , class KeyEqual = std::equal_to, class Allocator = std::allocator>, bool StoreHash = false> diff --git a/include/tsl/robin_set.h b/include/tsl/robin_set.h index e115007..03c9c78 100644 --- a/include/tsl/robin_set.h +++ b/include/tsl/robin_set.h @@ -83,7 +83,7 @@ namespace tsl { * insert, invalidate the iterators. * - erase: always invalidate the iterators. */ -template , +template , class KeyEqual = std::equal_to, class Allocator = std::allocator, bool StoreHash = false, class GrowthPolicy = tsl::rh::power_of_two_growth_policy<2>> @@ -657,7 +657,7 @@ class robin_set { * Same as `tsl::robin_set`. */ -template , +template , class KeyEqual = std::equal_to, class Allocator = std::allocator, bool StoreHash = false> using robin_pg_set = robin_set