diff --git a/benches/native.rs b/benches/native.rs
index 36db8f1..dcd3f5d 100644
--- a/benches/native.rs
+++ b/benches/native.rs
@@ -8,7 +8,7 @@ extern crate im;
extern crate rand;
extern crate test;
-use rand::{rngs::SmallRng, SeedableRng, Rng};
+use rand::{rngs::SmallRng, Rng, SeedableRng};
use std::collections::{BTreeMap, HashMap, VecDeque};
use std::iter::FromIterator;
use test::Bencher;
diff --git a/src/hash/set.rs b/src/hash/set.rs
index 4f1dfee..34ea4b4 100644
--- a/src/hash/set.rs
+++ b/src/hash/set.rs
@@ -31,10 +31,13 @@ use std::iter::FusedIterator;
use std::iter::{FromIterator, IntoIterator, Sum};
use std::ops::{Add, Deref, Mul};
-use crate::nodes::hamt::{hash_key, Drain as NodeDrain, HashValue, Iter as NodeIter, Node};
+use crate::nodes::hamt::{
+ hash_key, Drain as NodeDrain, HashValue, Iter as NodeIter, Node,
+ OrderedDrain as NodeOrderedDrain, OrderedIter as NodeOrderedIter,
+};
use crate::ordset::OrdSet;
-use crate::Vector;
use crate::util::{Pool, PoolRef, Ref};
+use crate::Vector;
/// Construct a set from a sequence of values.
///
@@ -313,15 +316,44 @@ impl HashSet {
/// Get an iterator over the values in a hash set.
///
+ /// No ordering guarantees are provided.
+ #[must_use]
+ pub fn iter(&self) -> Iter<'_, A> {
+ Iter {
+ it: NodeIter::new(&self.root, self.size),
+ }
+ }
+
+ /// Get an ordered iterator over the values in a hash set.
+ ///
/// Please note that the order is consistent between sets using
/// the same hasher, but no other ordering guarantee is offered.
/// Items will not come out in insertion order or sort order.
/// They will, however, come out in the same order every time for
/// the same set.
#[must_use]
- pub fn iter(&self) -> Iter<'_, A> {
- Iter {
- it: NodeIter::new(&self.root, self.size),
+ pub fn ordered_iter(&self) -> OrderedIter<'_, A> {
+ OrderedIter {
+ it: NodeOrderedIter::new(&self.root, self.size),
+ }
+ }
+}
+
+impl HashSet
+where
+A: Hash + Clone + Ord
+{
+ /// Get an ordered iterator over the values in a hash set.
+ ///
+ /// Please note that the order is consistent between sets using
+ /// the same hasher, but no other ordering guarantee is offered.
+ /// Items will not come out in insertion order or sort order.
+ /// They will, however, come out in the same order every time for
+ /// the same set.
+ #[must_use]
+ pub fn into_ordered_iter(self) -> OrderedConsumingIter {
+ OrderedConsumingIter {
+ it: NodeOrderedDrain::new(&self.pool.0, self.root, self.size),
}
}
}
@@ -667,14 +699,14 @@ where
impl PartialOrd for HashSet
where
- A: Hash + Eq + Clone + PartialOrd,
+ A: Hash + Eq + Clone + PartialOrd + Ord,
S: BuildHasher + Default,
{
fn partial_cmp(&self, other: &Self) -> Option {
if Ref::ptr_eq(&self.hasher, &other.hasher) {
- return self.iter().partial_cmp(other.iter());
+ return self.ordered_iter().partial_cmp(other.ordered_iter());
}
- self.iter().partial_cmp(other.iter())
+ self.ordered_iter().partial_cmp(other.ordered_iter())
}
}
@@ -685,22 +717,22 @@ where
{
fn cmp(&self, other: &Self) -> Ordering {
if Ref::ptr_eq(&self.hasher, &other.hasher) {
- return self.iter().cmp(other.iter());
+ return self.ordered_iter().cmp(other.ordered_iter());
}
- self.iter().cmp(other.iter())
+ self.ordered_iter().cmp(other.ordered_iter())
}
}
impl Hash for HashSet
where
- A: Hash + Eq,
+ A: Hash + Eq + Ord,
S: BuildHasher + Default,
{
fn hash(&self, state: &mut H)
where
H: Hasher,
{
- for i in self.iter() {
+ for i in self.ordered_iter() {
i.hash(state);
}
}
@@ -857,6 +889,35 @@ impl<'a, A> ExactSizeIterator for Iter<'a, A> {}
impl<'a, A> FusedIterator for Iter<'a, A> {}
+/// An ordered iterator over the elements of a set.
+/// Given a deterministic hasher, this iterator yields values in a deterministic
+/// order: two sets with the same elements are enumerated in the same order
+/// regardless of the order in which elements were inserted in the sets. Items
+/// are returned in the order of their hash values. Items with identical hash
+/// values are sorted based on the `Ord` trait.
+pub struct OrderedIter<'a, A> {
+ it: NodeOrderedIter<'a, Value>,
+}
+
+impl<'a, A> Iterator for OrderedIter<'a, A>
+where
+ A: 'a + Ord,
+{
+ type Item = &'a A;
+
+ fn next(&mut self) -> Option {
+ self.it.next().map(|(v, _)| &v.0)
+ }
+
+ fn size_hint(&self) -> (usize, Option) {
+ self.it.size_hint()
+ }
+}
+
+impl<'a, A: Ord> ExactSizeIterator for OrderedIter<'a, A> {}
+
+impl<'a, A: Ord> FusedIterator for OrderedIter<'a, A> {}
+
/// A consuming iterator over the elements of a set.
pub struct ConsumingIter
where
@@ -884,6 +945,32 @@ impl ExactSizeIterator for ConsumingIter where A: Hash + Eq + Clone {}
impl FusedIterator for ConsumingIter where A: Hash + Eq + Clone {}
+/// An ordered consuming iterator over the elements of a set.
+pub struct OrderedConsumingIter
+ where A: Hash + Eq + Clone + Ord,
+{
+ it: NodeOrderedDrain>,
+}
+
+impl Iterator for OrderedConsumingIter
+where
+ A: Hash + Eq + Clone + Ord,
+{
+ type Item = A;
+
+ fn next(&mut self) -> Option {
+ self.it.next().map(|(v, _)| v.0)
+ }
+
+ fn size_hint(&self) -> (usize, Option) {
+ self.it.size_hint()
+ }
+}
+
+impl ExactSizeIterator for OrderedConsumingIter where A: Hash + Eq + Clone + Ord {}
+
+impl FusedIterator for OrderedConsumingIter where A: Hash + Eq + Clone + Ord {}
+
// Iterator conversions
impl FromIterator for HashSet
@@ -1063,6 +1150,7 @@ mod test {
use crate::test::LolHasher;
use ::proptest::num::i16;
use ::proptest::proptest;
+ use std::collections::hash_map::DefaultHasher;
use std::hash::BuildHasherDefault;
#[test]
@@ -1119,6 +1207,39 @@ mod test {
}
}
+ #[test]
+ fn consistent_ord() {
+ let mut set1 = HashSet::with_hasher(>::default());
+ let mut set2 = HashSet::with_hasher(>::default());
+
+ // Create two sets with identical elements but different insertion order.
+ // The sets are big enough to trigger hash collisions.
+ for i in 0..50_001 {
+ set1.insert(i);
+ set2.insert(50_000 - i);
+ }
+
+ // The sets are the same according to Eq, Ord, and Hash.
+ assert_eq!(set1, set2);
+ assert_eq!(set1.cmp(&set2), Ordering::Equal);
+
+ let mut s = DefaultHasher::new();
+ set1.hash(&mut s);
+ let hash1 = s.finish();
+
+ let mut s = DefaultHasher::new();
+ set2.hash(&mut s);
+ let hash2 = s.finish();
+ assert_eq!(hash1, hash2);
+
+ // Ordered iterators must yield elements in the same order.
+ let v1: Vec<_> = set1.into_ordered_iter().collect();
+ let v2: Vec<_> = set2.into_ordered_iter().collect();
+ // Don't use `assert_eq!`, as on error it allocates a vector
+ // of size `v1.len() * v2.len()` and runs out of memory.
+ assert!(v1 == v2);
+ }
+
proptest! {
#[test]
fn proptest_a_set(ref s in hash_set(".*", 10..100)) {
diff --git a/src/nodes/hamt.rs b/src/nodes/hamt.rs
index 8ee6157..0194739 100644
--- a/src/nodes/hamt.rs
+++ b/src/nodes/hamt.rs
@@ -549,6 +549,92 @@ impl<'a, A> ExactSizeIterator for Iter<'a, A> where A: 'a {}
impl<'a, A> FusedIterator for Iter<'a, A> where A: 'a {}
+// Ordered Ref iterator.
+// Given a deterministic hasher, this iterator yields values in a deterministic
+// order: two sets with the same elements are enumerated in the same order
+// regardless of the order in which elements were inserted in the sets. Items
+// are returned in the order of their hash values. Items with identical hash
+// values are sorted based on the `Ord` trait.
+
+pub(crate) struct OrderedIter<'a, A> {
+ count: usize,
+ stack: Vec, HashWidth>>,
+ current: ChunkIter<'a, Entry, HashWidth>,
+ collision: Option<(HashBits, std::vec::IntoIter<&'a A>)>,
+}
+
+impl<'a, A> OrderedIter<'a, A>
+where
+ A: 'a,
+{
+ pub(crate) fn new(root: &'a Node, size: usize) -> Self {
+ OrderedIter {
+ count: size,
+ stack: Vec::with_capacity((HASH_WIDTH / HASH_SHIFT) + 1),
+ current: root.data.iter(),
+ collision: None,
+ }
+ }
+}
+
+impl<'a, A> Iterator for OrderedIter<'a, A>
+where
+ A: 'a + Ord,
+{
+ type Item = (&'a A, HashBits);
+
+ fn next(&mut self) -> Option {
+ if self.count == 0 {
+ return None;
+ }
+ if self.collision.is_some() {
+ if let Some((hash, ref mut coll)) = self.collision {
+ match coll.next() {
+ None => {}
+ Some(value) => {
+ self.count -= 1;
+ return Some((value, hash));
+ }
+ }
+ }
+ self.collision = None;
+ return self.next();
+ }
+ match self.current.next() {
+ Some(Entry::Value(value, hash)) => {
+ self.count -= 1;
+ Some((value, *hash))
+ }
+ Some(Entry::Node(child)) => {
+ let current = mem::replace(&mut self.current, child.data.iter());
+ self.stack.push(current);
+ self.next()
+ }
+ Some(Entry::Collision(coll)) => {
+ let mut refs: Vec<&'a A> = coll.data.iter().collect();
+ refs.sort();
+ self.collision = Some((coll.hash, refs.into_iter()));
+ self.next()
+ }
+ None => match self.stack.pop() {
+ None => None,
+ Some(iter) => {
+ self.current = iter;
+ self.next()
+ }
+ },
+ }
+ }
+
+ fn size_hint(&self) -> (usize, Option) {
+ (self.count, Some(self.count))
+ }
+}
+
+impl<'a, A> ExactSizeIterator for OrderedIter<'a, A> where A: 'a + Ord {}
+
+impl<'a, A> FusedIterator for OrderedIter<'a, A> where A: 'a + Ord {}
+
// Mut ref iterator
pub(crate) struct IterMut<'a, A> {
@@ -713,6 +799,91 @@ impl ExactSizeIterator for Drain where A: Clone {}
impl FusedIterator for Drain where A: Clone {}
+// Ordered consuming iterator
+
+pub(crate) struct OrderedDrain
+where
+ A: HashValue,
+{
+ count: usize,
+ pool: Pool>,
+ stack: Vec>>,
+ current: PoolRef>,
+ collision: Option>,
+}
+
+impl OrderedDrain
+where
+ A: HashValue,
+{
+ pub(crate) fn new(pool: &Pool>, root: PoolRef>, size: usize) -> Self {
+ OrderedDrain {
+ count: size,
+ pool: pool.clone(),
+ stack: vec![],
+ current: root,
+ collision: None,
+ }
+ }
+}
+
+impl Iterator for OrderedDrain
+where
+ A: HashValue + Clone + Ord,
+{
+ type Item = (A, HashBits);
+
+ fn next(&mut self) -> Option {
+ if self.count == 0 {
+ return None;
+ }
+ if self.collision.is_some() {
+ if let Some(ref mut coll) = self.collision {
+ if let Some(value) = coll.data.pop() {
+ self.count -= 1;
+ return Some((value, coll.hash));
+ }
+ }
+ self.collision = None;
+ return self.next();
+ }
+ match PoolRef::make_mut(&self.pool, &mut self.current).data.pop() {
+ Some(Entry::Value(value, hash)) => {
+ self.count -= 1;
+ Some((value, hash))
+ }
+ Some(Entry::Collision(coll_ref)) => {
+ let mut coll = clone_ref(coll_ref);
+ // Sort the vector to ensure that elements with the same hash value
+ // come out in the same order regardless of the insertion order.
+ coll.data.sort();
+ self.collision = Some(coll);
+ self.next()
+ }
+ Some(Entry::Node(child)) => {
+ let parent = mem::replace(&mut self.current, child);
+ self.stack.push(parent);
+ self.next()
+ }
+ None => match self.stack.pop() {
+ None => None,
+ Some(parent) => {
+ self.current = parent;
+ self.next()
+ }
+ },
+ }
+ }
+
+ fn size_hint(&self) -> (usize, Option) {
+ (self.count, Some(self.count))
+ }
+}
+
+impl ExactSizeIterator for OrderedDrain where A: Clone + Ord {}
+
+impl FusedIterator for OrderedDrain where A: Clone + Ord {}
+
impl fmt::Debug for Node {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
write!(f, "Node[ ")?;