Skip to content

Commit

Permalink
Ordered iterators.
Browse files Browse the repository at this point in the history
Add ordered iterators over HAMT and `HashSet`.  Given two sets with the
same elements and the same hashers, the new iterators enumerate these
sets in the same order.  This is in contrast to existing iterators that
do not guarantee consistent ordering for elements with the same hash
values.  Consistent ordering is achieved by sorting collision nodes on
the fly during iteration.

We use the new iterators to fix the implementation of `Ord`, `PartialOrd`,
and `Hash` for `HashSet` (bodil#175).
  • Loading branch information
ryzhyk committed Feb 21, 2021
1 parent 3f4e01a commit 46f13d8
Show file tree
Hide file tree
Showing 3 changed files with 305 additions and 13 deletions.
2 changes: 1 addition & 1 deletion benches/native.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ extern crate im;
extern crate rand;
extern crate test;

use rand::{rngs::SmallRng, SeedableRng, Rng};
use rand::{rngs::SmallRng, Rng, SeedableRng};
use std::collections::{BTreeMap, HashMap, VecDeque};
use std::iter::FromIterator;
use test::Bencher;
Expand Down
145 changes: 133 additions & 12 deletions src/hash/set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,13 @@ use std::iter::FusedIterator;
use std::iter::{FromIterator, IntoIterator, Sum};
use std::ops::{Add, Deref, Mul};

use crate::nodes::hamt::{hash_key, Drain as NodeDrain, HashValue, Iter as NodeIter, Node};
use crate::nodes::hamt::{
hash_key, Drain as NodeDrain, HashValue, Iter as NodeIter, Node,
OrderedDrain as NodeOrderedDrain, OrderedIter as NodeOrderedIter,
};
use crate::ordset::OrdSet;
use crate::Vector;
use crate::util::{Pool, PoolRef, Ref};
use crate::Vector;

/// Construct a set from a sequence of values.
///
Expand Down Expand Up @@ -313,15 +316,44 @@ impl<A, S> HashSet<A, S> {

/// Get an iterator over the values in a hash set.
///
/// No ordering guarantees are provided.
#[must_use]
pub fn iter(&self) -> Iter<'_, A> {
Iter {
it: NodeIter::new(&self.root, self.size),
}
}

/// Get an ordered iterator over the values in a hash set.
///
/// Please note that the order is consistent between sets using
/// the same hasher, but no other ordering guarantee is offered.
/// Items will not come out in insertion order or sort order.
/// They will, however, come out in the same order every time for
/// the same set.
#[must_use]
pub fn iter(&self) -> Iter<'_, A> {
Iter {
it: NodeIter::new(&self.root, self.size),
pub fn ordered_iter(&self) -> OrderedIter<'_, A> {
OrderedIter {
it: NodeOrderedIter::new(&self.root, self.size),
}
}
}

impl<A, S> HashSet<A, S>
where
A: Hash + Clone + Ord
{
/// Get an ordered iterator over the values in a hash set.
///
/// Please note that the order is consistent between sets using
/// the same hasher, but no other ordering guarantee is offered.
/// Items will not come out in insertion order or sort order.
/// They will, however, come out in the same order every time for
/// the same set.
#[must_use]
pub fn into_ordered_iter(self) -> OrderedConsumingIter<A> {
OrderedConsumingIter {
it: NodeOrderedDrain::new(&self.pool.0, self.root, self.size),
}
}
}
Expand Down Expand Up @@ -667,14 +699,14 @@ where

impl<A, S> PartialOrd for HashSet<A, S>
where
A: Hash + Eq + Clone + PartialOrd,
A: Hash + Eq + Clone + PartialOrd + Ord,
S: BuildHasher + Default,
{
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
if Ref::ptr_eq(&self.hasher, &other.hasher) {
return self.iter().partial_cmp(other.iter());
return self.ordered_iter().partial_cmp(other.ordered_iter());
}
self.iter().partial_cmp(other.iter())
self.ordered_iter().partial_cmp(other.ordered_iter())
}
}

Expand All @@ -685,22 +717,22 @@ where
{
fn cmp(&self, other: &Self) -> Ordering {
if Ref::ptr_eq(&self.hasher, &other.hasher) {
return self.iter().cmp(other.iter());
return self.ordered_iter().cmp(other.ordered_iter());
}
self.iter().cmp(other.iter())
self.ordered_iter().cmp(other.ordered_iter())
}
}

impl<A, S> Hash for HashSet<A, S>
where
A: Hash + Eq,
A: Hash + Eq + Ord,
S: BuildHasher + Default,
{
fn hash<H>(&self, state: &mut H)
where
H: Hasher,
{
for i in self.iter() {
for i in self.ordered_iter() {
i.hash(state);
}
}
Expand Down Expand Up @@ -857,6 +889,35 @@ impl<'a, A> ExactSizeIterator for Iter<'a, A> {}

impl<'a, A> FusedIterator for Iter<'a, A> {}

/// An ordered iterator over the elements of a set.
/// Given a deterministic hasher, this iterator yields values in a deterministic
/// order: two sets with the same elements are enumerated in the same order
/// regardless of the order in which elements were inserted in the sets. Items
/// are returned in the order of their hash values. Items with identical hash
/// values are sorted based on the `Ord` trait.
pub struct OrderedIter<'a, A> {
it: NodeOrderedIter<'a, Value<A>>,
}

impl<'a, A> Iterator for OrderedIter<'a, A>
where
A: 'a + Ord,
{
type Item = &'a A;

fn next(&mut self) -> Option<Self::Item> {
self.it.next().map(|(v, _)| &v.0)
}

fn size_hint(&self) -> (usize, Option<usize>) {
self.it.size_hint()
}
}

impl<'a, A: Ord> ExactSizeIterator for OrderedIter<'a, A> {}

impl<'a, A: Ord> FusedIterator for OrderedIter<'a, A> {}

/// A consuming iterator over the elements of a set.
pub struct ConsumingIter<A>
where
Expand Down Expand Up @@ -884,6 +945,32 @@ impl<A> ExactSizeIterator for ConsumingIter<A> where A: Hash + Eq + Clone {}

impl<A> FusedIterator for ConsumingIter<A> where A: Hash + Eq + Clone {}

/// An ordered consuming iterator over the elements of a set.
pub struct OrderedConsumingIter<A>
where A: Hash + Eq + Clone + Ord,
{
it: NodeOrderedDrain<Value<A>>,
}

impl<A> Iterator for OrderedConsumingIter<A>
where
A: Hash + Eq + Clone + Ord,
{
type Item = A;

fn next(&mut self) -> Option<Self::Item> {
self.it.next().map(|(v, _)| v.0)
}

fn size_hint(&self) -> (usize, Option<usize>) {
self.it.size_hint()
}
}

impl<A> ExactSizeIterator for OrderedConsumingIter<A> where A: Hash + Eq + Clone + Ord {}

impl<A> FusedIterator for OrderedConsumingIter<A> where A: Hash + Eq + Clone + Ord {}

// Iterator conversions

impl<A, RA, S> FromIterator<RA> for HashSet<A, S>
Expand Down Expand Up @@ -1063,6 +1150,7 @@ mod test {
use crate::test::LolHasher;
use ::proptest::num::i16;
use ::proptest::proptest;
use std::collections::hash_map::DefaultHasher;
use std::hash::BuildHasherDefault;

#[test]
Expand Down Expand Up @@ -1119,6 +1207,39 @@ mod test {
}
}

#[test]
fn consistent_ord() {
let mut set1 = HashSet::with_hasher(<BuildHasherDefault<DefaultHasher>>::default());
let mut set2 = HashSet::with_hasher(<BuildHasherDefault<DefaultHasher>>::default());

// Create two sets with identical elements but different insertion order.
// The sets are big enough to trigger hash collisions.
for i in 0..50_001 {
set1.insert(i);
set2.insert(50_000 - i);
}

// The sets are the same according to Eq, Ord, and Hash.
assert_eq!(set1, set2);
assert_eq!(set1.cmp(&set2), Ordering::Equal);

let mut s = DefaultHasher::new();
set1.hash(&mut s);
let hash1 = s.finish();

let mut s = DefaultHasher::new();
set2.hash(&mut s);
let hash2 = s.finish();
assert_eq!(hash1, hash2);

// Ordered iterators must yield elements in the same order.
let v1: Vec<_> = set1.into_ordered_iter().collect();
let v2: Vec<_> = set2.into_ordered_iter().collect();
// Don't use `assert_eq!`, as on error it allocates a vector
// of size `v1.len() * v2.len()` and runs out of memory.
assert!(v1 == v2);
}

proptest! {
#[test]
fn proptest_a_set(ref s in hash_set(".*", 10..100)) {
Expand Down
Loading

0 comments on commit 46f13d8

Please sign in to comment.