Skip to content

Add nonunique #175

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions cytoolz/itertoolz.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,28 @@ cdef class _unique_key:
cdef object seen


cdef class _nonunique_key:
cdef object key
cdef object iter_seq
cdef object seen


cdef class _unique_identity:
cdef object iter_seq
cdef object seen


cdef class _nonunique_identity:
cdef object iter_seq
cdef object seen


cpdef object unique(object seq, object key=*)


cpdef object nonunique(object seq, object key=*)


cpdef object isiterable(object x)


Expand Down
66 changes: 61 additions & 5 deletions cytoolz/itertoolz.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,10 @@ cdef class interleave:
return val


cdef class _iter_seq_key:
def __iter__(self):
return self

cdef class _unique_key:
def __cinit__(self, object seq, object key):
self.iter_seq = iter(seq)
Expand All @@ -401,6 +405,25 @@ cdef class _unique_key:
PySet_Add(self.seen, tag)
return item

cdef class _nonunique_key:
def __cinit__(self, object seq, object key):
self.iter_seq = iter(seq)
self.key = key
self.seen = set()

def __iter__(self):
return self

def __next__(self):
cdef object item, tag
item = next(self.iter_seq)
tag = self.key(item)
while not PySet_Contains(self.seen, tag):
PySet_Add(self.seen, tag)
item = next(self.iter_seq)
tag = self.key(item)
return item


cdef class _unique_identity:
def __cinit__(self, object seq):
Expand All @@ -419,6 +442,22 @@ cdef class _unique_identity:
return item


cdef class _nonunique_identity:
def __cinit__(self, object seq):
self.iter_seq = iter(seq)
self.seen = set()

def __iter__(self):
return self

def __next__(self):
cdef object item
item = next(self.iter_seq)
while not PySet_Contains(self.seen, item):
PySet_Add(self.seen, item)
item = next(self.iter_seq)
return item

cpdef object unique(object seq, object key=None):
"""
Return only unique elements of a sequence
Expand All @@ -432,13 +471,33 @@ cpdef object unique(object seq, object key=None):

>>> tuple(unique(['cat', 'mouse', 'dog', 'hen'], key=len))
('cat', 'mouse')

See also:
nonunique
"""
if key is None:
return _unique_identity(seq)
else:
return _unique_key(seq, key)


cpdef object nonunique(object seq, object key=None):
"""Return only the nonunique/duplicated elements of a sequence.

>>> tuple(nonunique((1, 2, 3, 1)))
(1,)
>>> tuple(nonunique((1, 2, 3)))
()

See also:
unique
"""
if key is None:
return _nonunique_identity(seq)
else:
return _nonunique_key(seq, key)


cpdef object isiterable(object x):
"""
Is x iterable?
Expand Down Expand Up @@ -473,11 +532,8 @@ cpdef object isdistinct(object seq):
True
"""
if iter(seq) is seq:
seen = set()
for item in seq:
if PySet_Contains(seen, item):
return False
seen.add(item)
for item in _nonunique_identity(seq):
return False
return True
else:
return len(seq) == len(set(seq))
Expand Down
8 changes: 7 additions & 1 deletion cytoolz/tests/test_itertoolz.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pickle import dumps, loads
from cytoolz.itertoolz import (remove, groupby, merge_sorted,
concat, concatv, interleave, unique,
isiterable, getter,
nonunique, isiterable, getter,
mapcat, isdistinct, first, second,
nth, take, tail, drop, interpose, get,
rest, last, cons, frequencies,
Expand Down Expand Up @@ -105,6 +105,12 @@ def test_unique():
assert tuple(unique((1, 2, 3), key=iseven)) == (1, 2)


def test_nonunique():
assert tuple(nonunique((1, 2, 3))) == ()
assert tuple(nonunique((1, 2, 1, 3, 1))) == (1, 1)
assert tuple(nonunique((1, 2, 3, 4), key=iseven)) == (3, 4)


def test_isiterable():
assert isiterable([1, 2, 3]) is True
assert isiterable('abc') is True
Expand Down