Skip to content

Commit c13dc7d

Browse files
committed
Merge pull request #11252 from jreback/isin
COMPAT/PERF: lib.ismember_int64 on older numpies/cython not comparing correctly #11232
2 parents ef9a79d + 7725766 commit c13dc7d

File tree

14 files changed

+146
-58
lines changed

14 files changed

+146
-58
lines changed

asv_bench/benchmarks/series_methods.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,22 +5,21 @@ class series_isin_int64(object):
55
goal_time = 0.2
66

77
def setup(self):
8-
self.s1 = Series(np.random.randn(10000))
9-
self.s2 = Series(np.random.randint(1, 10, 10000))
108
self.s3 = Series(np.random.randint(1, 10, 100000)).astype('int64')
9+
self.s4 = Series(np.random.randint(1, 100, 10000000)).astype('int64')
1110
self.values = [1, 2]
12-
self.s4 = self.s3.astype('object')
1311

1412
def time_series_isin_int64(self):
1513
self.s3.isin(self.values)
1614

15+
def time_series_isin_int64_large(self):
16+
self.s4.isin(self.values)
17+
1718

1819
class series_isin_object(object):
1920
goal_time = 0.2
2021

2122
def setup(self):
22-
self.s1 = Series(np.random.randn(10000))
23-
self.s2 = Series(np.random.randint(1, 10, 10000))
2423
self.s3 = Series(np.random.randint(1, 10, 100000)).astype('int64')
2524
self.values = [1, 2]
2625
self.s4 = self.s3.astype('object')
@@ -71,4 +70,4 @@ def setup(self):
7170

7271
def time_series_nsmallest2(self):
7372
self.s2.nsmallest(3, take_last=True)
74-
self.s2.nsmallest(3, take_last=False)
73+
self.s2.nsmallest(3, take_last=False)

ci/requirements-2.6.build

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
numpy=1.7.0
1+
numpy=1.7.1
22
cython=0.19.1
33
dateutil=1.5
44
pytz=2013b

ci/requirements-2.6.run

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,16 @@
1-
numpy=1.7.0
1+
numpy=1.7.1
22
dateutil=1.5
33
pytz=2013b
44
scipy=0.11.0
55
xlwt=0.7.5
66
xlrd=0.9.2
77
statsmodels=0.4.3
8+
bottleneck=0.8.0
9+
numexpr=2.2.2
10+
pytables=3.0.0
811
html5lib=1.0b2
912
beautiful-soup=4.2.0
1013
psycopg2=2.5.1
11-
numexpr=1.4.2
1214
pymysql=0.6.0
1315
sqlalchemy=0.7.8
1416
xlsxwriter=0.4.6

ci/requirements-2.7.build

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
dateutil=2.1
22
pytz=2013b
3-
numpy=1.7.1
3+
numpy
44
cython=0.19.1

ci/requirements-2.7.run

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,18 @@
11
dateutil=2.1
22
pytz=2013b
3-
numpy=1.7.1
3+
numpy
44
xlwt=0.7.5
5-
numexpr=2.2.2
6-
pytables=3.0.0
7-
matplotlib=1.3.1
5+
numexpr
6+
pytables
7+
matplotlib
88
openpyxl=1.6.2
99
xlrd=0.9.2
1010
sqlalchemy=0.9.6
1111
lxml=3.2.1
1212
scipy
1313
xlsxwriter=0.4.6
1414
boto=2.36.0
15-
bottleneck=0.8.0
15+
bottleneck
1616
psycopg2=2.5.2
1717
patsy
1818
pymysql=0.6.3

ci/requirements-2.7_SLOW.build

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
python-dateutil
22
pytz
3-
numpy
3+
numpy=1.8.2
44
cython

ci/requirements-2.7_SLOW.run

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
python-dateutil
22
pytz
3-
numpy
4-
matplotlib
3+
numpy=1.8.2
4+
matplotlib=1.3.1
55
scipy
66
patsy
77
statsmodels

doc/source/install.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ Dependencies
214214
------------
215215

216216
* `setuptools <http://pythonhosted.org/setuptools>`__
217-
* `NumPy <http://www.numpy.org>`__: 1.7.0 or higher
217+
* `NumPy <http://www.numpy.org>`__: 1.7.1 or higher
218218
* `python-dateutil <http://labix.org/python-dateutil>`__ 1.5 or higher
219219
* `pytz <http://pytz.sourceforge.net/>`__
220220
* Needed for time zone support

doc/source/whatsnew/v0.17.0.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1034,6 +1034,7 @@ Bug Fixes
10341034
~~~~~~~~~
10351035

10361036
- Bug in incorrection computation of ``.mean()`` on ``timedelta64[ns]`` because of overflow (:issue:`9442`)
1037+
- Bug in ``.isin`` on older numpies (:issue: `11232`)
10371038
- Bug in ``DataFrame.to_html(index=False)`` renders unnecessary ``name`` row (:issue:`10344`)
10381039
- Bug in ``DataFrame.to_latex()`` the ``column_format`` argument could not be passed (:issue:`9402`)
10391040
- Bug in ``DatetimeIndex`` when localizing with ``NaT`` (:issue:`10477`)

pandas/core/algorithms.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from warnings import warn
77
import numpy as np
88

9+
from pandas import compat, lib, _np_version_under1p8
910
import pandas.core.common as com
1011
import pandas.algos as algos
1112
import pandas.hashtable as htable
@@ -66,6 +67,54 @@ def unique(values):
6667
return _hashtable_algo(f, values.dtype)
6768

6869

70+
def isin(comps, values):
71+
"""
72+
Compute the isin boolean array
73+
74+
Parameters
75+
----------
76+
comps: array-like
77+
values: array-like
78+
79+
Returns
80+
-------
81+
boolean array same length as comps
82+
"""
83+
84+
if not com.is_list_like(comps):
85+
raise TypeError("only list-like objects are allowed to be passed"
86+
" to isin(), you passed a "
87+
"[{0}]".format(type(comps).__name__))
88+
comps = np.asarray(comps)
89+
if not com.is_list_like(values):
90+
raise TypeError("only list-like objects are allowed to be passed"
91+
" to isin(), you passed a "
92+
"[{0}]".format(type(values).__name__))
93+
94+
# GH11232
95+
# work-around for numpy < 1.8 and comparisions on py3
96+
# faster for larger cases to use np.in1d
97+
if (_np_version_under1p8 and compat.PY3) or len(comps) > 1000000:
98+
f = lambda x, y: np.in1d(x,np.asarray(list(y)))
99+
else:
100+
f = lambda x, y: lib.ismember_int64(x,set(y))
101+
102+
# may need i8 conversion for proper membership testing
103+
if com.is_datetime64_dtype(comps):
104+
from pandas.tseries.tools import to_datetime
105+
values = to_datetime(values)._values.view('i8')
106+
comps = comps.view('i8')
107+
elif com.is_timedelta64_dtype(comps):
108+
from pandas.tseries.timedeltas import to_timedelta
109+
values = to_timedelta(values)._values.view('i8')
110+
comps = comps.view('i8')
111+
elif com.is_int64_dtype(comps):
112+
pass
113+
else:
114+
f = lambda x, y: lib.ismember(x, set(values))
115+
116+
return f(comps, values)
117+
69118
def _hashtable_algo(f, dtype, return_dtype=None):
70119
"""
71120
f(HashTable, type_caster) -> result

0 commit comments

Comments
 (0)