diff --git a/cytoolz/itertoolz.pxd b/cytoolz/itertoolz.pxd index 57e0609..8793a94 100644 --- a/cytoolz/itertoolz.pxd +++ b/cytoolz/itertoolz.pxd @@ -47,14 +47,28 @@ cdef class _unique_key: cdef object seen +cdef class _nonunique_key: + cdef object key + cdef object iter_seq + cdef object seen + + cdef class _unique_identity: cdef object iter_seq cdef object seen + + +cdef class _nonunique_identity: + cdef object iter_seq + cdef object seen cpdef object unique(object seq, object key=*) +cpdef object nonunique(object seq, object key=*) + + cpdef object isiterable(object x) diff --git a/cytoolz/itertoolz.pyx b/cytoolz/itertoolz.pyx index e138f71..17e924b 100644 --- a/cytoolz/itertoolz.pyx +++ b/cytoolz/itertoolz.pyx @@ -382,6 +382,10 @@ cdef class interleave: return val +cdef class _iter_seq_key: + def __iter__(self): + return self + cdef class _unique_key: def __cinit__(self, object seq, object key): self.iter_seq = iter(seq) @@ -401,6 +405,25 @@ cdef class _unique_key: PySet_Add(self.seen, tag) return item +cdef class _nonunique_key: + def __cinit__(self, object seq, object key): + self.iter_seq = iter(seq) + self.key = key + self.seen = set() + + def __iter__(self): + return self + + def __next__(self): + cdef object item, tag + item = next(self.iter_seq) + tag = self.key(item) + while not PySet_Contains(self.seen, tag): + PySet_Add(self.seen, tag) + item = next(self.iter_seq) + tag = self.key(item) + return item + cdef class _unique_identity: def __cinit__(self, object seq): @@ -419,6 +442,22 @@ cdef class _unique_identity: return item +cdef class _nonunique_identity: + def __cinit__(self, object seq): + self.iter_seq = iter(seq) + self.seen = set() + + def __iter__(self): + return self + + def __next__(self): + cdef object item + item = next(self.iter_seq) + while not PySet_Contains(self.seen, item): + PySet_Add(self.seen, item) + item = next(self.iter_seq) + return item + cpdef object unique(object seq, object key=None): """ Return only unique elements of a sequence @@ -432,6 +471,9 @@ cpdef object unique(object seq, object key=None): >>> tuple(unique(['cat', 'mouse', 'dog', 'hen'], key=len)) ('cat', 'mouse') + + See also: + nonunique """ if key is None: return _unique_identity(seq) @@ -439,6 +481,23 @@ cpdef object unique(object seq, object key=None): return _unique_key(seq, key) +cpdef object nonunique(object seq, object key=None): + """Return only the nonunique/duplicated elements of a sequence. + + >>> tuple(nonunique((1, 2, 3, 1))) + (1,) + >>> tuple(nonunique((1, 2, 3))) + () + + See also: + unique + """ + if key is None: + return _nonunique_identity(seq) + else: + return _nonunique_key(seq, key) + + cpdef object isiterable(object x): """ Is x iterable? @@ -473,11 +532,8 @@ cpdef object isdistinct(object seq): True """ if iter(seq) is seq: - seen = set() - for item in seq: - if PySet_Contains(seen, item): - return False - seen.add(item) + for item in _nonunique_identity(seq): + return False return True else: return len(seq) == len(set(seq)) diff --git a/cytoolz/tests/test_itertoolz.py b/cytoolz/tests/test_itertoolz.py index 1e77a26..69383bd 100644 --- a/cytoolz/tests/test_itertoolz.py +++ b/cytoolz/tests/test_itertoolz.py @@ -6,7 +6,7 @@ from pickle import dumps, loads from cytoolz.itertoolz import (remove, groupby, merge_sorted, concat, concatv, interleave, unique, - isiterable, getter, + nonunique, isiterable, getter, mapcat, isdistinct, first, second, nth, take, tail, drop, interpose, get, rest, last, cons, frequencies, @@ -105,6 +105,12 @@ def test_unique(): assert tuple(unique((1, 2, 3), key=iseven)) == (1, 2) +def test_nonunique(): + assert tuple(nonunique((1, 2, 3))) == () + assert tuple(nonunique((1, 2, 1, 3, 1))) == (1, 1) + assert tuple(nonunique((1, 2, 3, 4), key=iseven)) == (3, 4) + + def test_isiterable(): assert isiterable([1, 2, 3]) is True assert isiterable('abc') is True