Skip to content

Commit 13858f6

Browse files
jbrockmendelTomAugspurger
authored andcommitted
BUG: validate Index data is 1D + deprecate multi-dim indexing (#30588)
* BUG: validate Index data is 1D
1 parent 2bdb355 commit 13858f6

File tree

16 files changed

+121
-58
lines changed

16 files changed

+121
-58
lines changed

doc/source/whatsnew/v1.0.0.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -706,6 +706,7 @@ Deprecations
706706
- The ``pandas.util.testing`` module has been deprecated. Use the public API in ``pandas.testing`` documented at :ref:`api.general.testing` (:issue:`16232`).
707707
- ``pandas.SparseArray`` has been deprecated. Use ``pandas.arrays.SparseArray`` (:class:`arrays.SparseArray`) instead. (:issue:`30642`)
708708
- The parameter ``is_copy`` of :meth:`DataFrame.take` has been deprecated and will be removed in a future version. (:issue:`27357`)
709+
- Support for multi-dimensional indexing (e.g. ``index[:, None]``) on a :class:`Index` is deprecated and will be removed in a future version, convert to a numpy array before indexing instead (:issue:`30588`)
709710

710711
**Selecting Columns from a Grouped DataFrame**
711712

@@ -1168,6 +1169,7 @@ Other
11681169
- Bug in :meth:`DataFrame.to_csv` when supplied a series with a ``dtype="string"`` and a ``na_rep``, the ``na_rep`` was being truncated to 2 characters. (:issue:`29975`)
11691170
- Bug where :meth:`DataFrame.itertuples` would incorrectly determine whether or not namedtuples could be used for dataframes of 255 columns (:issue:`28282`)
11701171
- Handle nested NumPy ``object`` arrays in :func:`testing.assert_series_equal` for ExtensionArray implementations (:issue:`30841`)
1172+
- Bug in :class:`Index` constructor incorrectly allowing 2-dimensional input arrays (:issue:`13601`, :issue:`27125`)
11711173

11721174
.. ---------------------------------------------------------------------------
11731175

pandas/core/arrays/categorical.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2007,9 +2007,10 @@ def __getitem__(self, key):
20072007
if com.is_bool_indexer(key):
20082008
key = check_bool_array_indexer(self, key)
20092009

2010-
return self._constructor(
2011-
values=self._codes[key], dtype=self.dtype, fastpath=True
2012-
)
2010+
result = self._codes[key]
2011+
if result.ndim > 1:
2012+
return result
2013+
return self._constructor(result, dtype=self.dtype, fastpath=True)
20132014

20142015
def __setitem__(self, key, value):
20152016
"""

pandas/core/arrays/datetimelike.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -543,8 +543,6 @@ def __getitem__(self, key):
543543
if result.ndim > 1:
544544
# To support MPL which performs slicing with 2 dim
545545
# even though it only has 1 dim by definition
546-
if is_period:
547-
return self._simple_new(result, dtype=self.dtype, freq=freq)
548546
return result
549547

550548
return self._simple_new(result, dtype=self.dtype, freq=freq)

pandas/core/arrays/interval.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -500,8 +500,11 @@ def __getitem__(self, value):
500500

501501
# scalar
502502
if not isinstance(left, ABCIndexClass):
503-
if isna(left):
503+
if is_scalar(left) and isna(left):
504504
return self._fill_value
505+
if np.ndim(left) > 1:
506+
# GH#30588 multi-dimensional indexer disallowed
507+
raise ValueError("multi-dimensional indexing not allowed")
505508
return Interval(left, right, self.closed)
506509

507510
return self._shallow_copy(left, right)

pandas/core/indexes/base.py

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,9 @@ def __new__(
393393

394394
if kwargs:
395395
raise TypeError(f"Unexpected keyword arguments {repr(set(kwargs))}")
396+
if subarr.ndim > 1:
397+
# GH#13601, GH#20285, GH#27125
398+
raise ValueError("Index data must be 1-dimensional")
396399
return cls._simple_new(subarr, name, **kwargs)
397400

398401
elif hasattr(data, "__array__"):
@@ -608,7 +611,7 @@ def __array_wrap__(self, result, context=None):
608611
Gets called after a ufunc.
609612
"""
610613
result = lib.item_from_zerodim(result)
611-
if is_bool_dtype(result) or lib.is_scalar(result):
614+
if is_bool_dtype(result) or lib.is_scalar(result) or np.ndim(result) > 1:
612615
return result
613616

614617
attrs = self._get_attributes_dict()
@@ -687,11 +690,10 @@ def astype(self, dtype, copy=True):
687690
return Index(np.asarray(self), dtype=dtype, copy=copy)
688691

689692
try:
690-
return Index(
691-
self.values.astype(dtype, copy=copy), name=self.name, dtype=dtype
692-
)
693+
casted = self.values.astype(dtype, copy=copy)
693694
except (TypeError, ValueError):
694695
raise TypeError(f"Cannot cast {type(self).__name__} to dtype {dtype}")
696+
return Index(casted, name=self.name, dtype=dtype)
695697

696698
_index_shared_docs[
697699
"take"
@@ -3902,6 +3904,9 @@ def __getitem__(self, key):
39023904
key = com.values_from_object(key)
39033905
result = getitem(key)
39043906
if not is_scalar(result):
3907+
if np.ndim(result) > 1:
3908+
deprecate_ndim_indexing(result)
3909+
return result
39053910
return promote(result)
39063911
else:
39073912
return result
@@ -5533,3 +5538,17 @@ def _try_convert_to_int_array(
55335538
pass
55345539

55355540
raise ValueError
5541+
5542+
5543+
def deprecate_ndim_indexing(result):
5544+
if np.ndim(result) > 1:
5545+
# GH#27125 indexer like idx[:, None] expands dim, but we
5546+
# cannot do that and keep an index, so return ndarray
5547+
# Deprecation GH#30588
5548+
warnings.warn(
5549+
"Support for multi-dimensional indexing (e.g. `index[:, None]`) "
5550+
"on an Index is deprecated and will be removed in a future "
5551+
"version. Convert to a numpy array before indexing instead.",
5552+
DeprecationWarning,
5553+
stacklevel=3,
5554+
)

pandas/core/indexes/extension.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from pandas.core.dtypes.generic import ABCSeries
1313

1414
from pandas.core.arrays import ExtensionArray
15-
from pandas.core.indexes.base import Index
15+
from pandas.core.indexes.base import Index, deprecate_ndim_indexing
1616
from pandas.core.ops import get_op_result_name
1717

1818

@@ -178,6 +178,7 @@ def __getitem__(self, key):
178178
return type(self)(result, name=self.name)
179179

180180
# Includes cases where we get a 2D ndarray back for MPL compat
181+
deprecate_ndim_indexing(result)
181182
return result
182183

183184
def __iter__(self):

pandas/core/indexes/numeric.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,10 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None):
7373
else:
7474
subarr = data
7575

76+
if subarr.ndim > 1:
77+
# GH#13601, GH#20285, GH#27125
78+
raise ValueError("Index data must be 1-dimensional")
79+
7680
name = maybe_extract_name(name, data, cls)
7781
return cls._simple_new(subarr, name=name)
7882

pandas/tests/indexes/categorical/test_category.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -975,3 +975,9 @@ def test_engine_type(self, dtype, engine_type):
975975
ci.values._codes = ci.values._codes.astype("int64")
976976
assert np.issubdtype(ci.codes.dtype, dtype)
977977
assert isinstance(ci._engine, engine_type)
978+
979+
def test_getitem_2d_deprecated(self):
980+
# GH#30588 multi-dim indexing is deprecated, but raising is also acceptable
981+
idx = self.create_index()
982+
with pytest.raises(ValueError, match="cannot mask with array containing NA"):
983+
idx[:, None]

pandas/tests/indexes/common.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -875,3 +875,11 @@ def test_engine_reference_cycle(self):
875875
nrefs_pre = len(gc.get_referrers(index))
876876
index._engine
877877
assert len(gc.get_referrers(index)) == nrefs_pre
878+
879+
def test_getitem_2d_deprecated(self):
880+
# GH#30588
881+
idx = self.create_index()
882+
with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False):
883+
res = idx[:, None]
884+
885+
assert isinstance(res, np.ndarray), type(res)

pandas/tests/indexes/datetimes/test_indexing.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,9 @@ def test_dti_business_getitem(self):
8686

8787
def test_dti_business_getitem_matplotlib_hackaround(self):
8888
rng = pd.bdate_range(START, END)
89-
values = rng[:, None]
89+
with tm.assert_produces_warning(DeprecationWarning):
90+
# GH#30588 multi-dimensional indexing deprecated
91+
values = rng[:, None]
9092
expected = rng.values[:, None]
9193
tm.assert_numpy_array_equal(values, expected)
9294

@@ -110,7 +112,9 @@ def test_dti_custom_getitem(self):
110112

111113
def test_dti_custom_getitem_matplotlib_hackaround(self):
112114
rng = pd.bdate_range(START, END, freq="C")
113-
values = rng[:, None]
115+
with tm.assert_produces_warning(DeprecationWarning):
116+
# GH#30588 multi-dimensional indexing deprecated
117+
values = rng[:, None]
114118
expected = rng.values[:, None]
115119
tm.assert_numpy_array_equal(values, expected)
116120

0 commit comments

Comments
 (0)