Skip to content

Commit be2cb8c

Browse files
authored
CLN: remove and udpate for outdated _item_cache (#61789)
* CLN: remove and udpate for outdated _item_cache * CLN: remove outdated _item_cache in comment * CLN: rollback unittest unralted to _item_cache
1 parent b9d5732 commit be2cb8c

File tree

11 files changed

+0
-168
lines changed

11 files changed

+0
-168
lines changed

pandas/core/generic.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,6 @@ class NDFrame(PandasObject, indexing.IndexingMixin):
236236

237237
_internal_names: list[str] = [
238238
"_mgr",
239-
"_item_cache",
240239
"_cache",
241240
"_name",
242241
"_metadata",

pandas/core/internals/managers.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1898,10 +1898,6 @@ def _consolidate_check(self) -> None:
18981898
self._known_consolidated = True
18991899

19001900
def _consolidate_inplace(self) -> None:
1901-
# In general, _consolidate_inplace should only be called via
1902-
# DataFrame._consolidate_inplace, otherwise we will fail to invalidate
1903-
# the DataFrame's _item_cache. The exception is for newly-created
1904-
# BlockManager objects not yet attached to a DataFrame.
19051901
if not self.is_consolidated():
19061902
self.blocks = _consolidate(self.blocks)
19071903
self._is_consolidated = True

pandas/tests/frame/indexing/test_insert.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@
77
import numpy as np
88
import pytest
99

10-
from pandas.errors import PerformanceWarning
11-
1210
from pandas import (
1311
DataFrame,
1412
Index,
@@ -72,19 +70,6 @@ def test_insert_with_columns_dups(self):
7270
)
7371
tm.assert_frame_equal(df, exp)
7472

75-
def test_insert_item_cache(self, performance_warning):
76-
df = DataFrame(np.random.default_rng(2).standard_normal((4, 3)))
77-
ser = df[0]
78-
expected_warning = PerformanceWarning if performance_warning else None
79-
80-
with tm.assert_produces_warning(expected_warning):
81-
for n in range(100):
82-
df[n + 3] = df[1] * n
83-
84-
ser.iloc[0] = 99
85-
assert df.iloc[0, 0] == df[0][0]
86-
assert df.iloc[0, 0] != 99
87-
8873
def test_insert_EA_no_warning(self):
8974
# PerformanceWarning about fragmented frame should not be raised when
9075
# using EAs (https://github.com/pandas-dev/pandas/issues/44098)

pandas/tests/frame/methods/test_cov_corr.py

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -207,20 +207,6 @@ def test_corr_nullable_integer(self, nullable_column, other_column, method):
207207
expected = DataFrame(np.ones((2, 2)), columns=["a", "b"], index=["a", "b"])
208208
tm.assert_frame_equal(result, expected)
209209

210-
def test_corr_item_cache(self):
211-
# Check that corr does not lead to incorrect entries in item_cache
212-
213-
df = DataFrame({"A": range(10)})
214-
df["B"] = range(10)[::-1]
215-
216-
ser = df["A"] # populate item_cache
217-
assert len(df._mgr.blocks) == 2
218-
219-
_ = df.corr(numeric_only=True)
220-
221-
ser.iloc[0] = 99
222-
assert df.loc[0, "A"] == 0
223-
224210
@pytest.mark.parametrize("length", [2, 20, 200, 2000])
225211
def test_corr_for_constant_columns(self, length):
226212
# GH: 37448

pandas/tests/frame/methods/test_quantile.py

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -721,22 +721,6 @@ def test_quantile_empty_no_columns(self, interp_method):
721721
expected.columns.name = "captain tightpants"
722722
tm.assert_frame_equal(result, expected)
723723

724-
def test_quantile_item_cache(self, interp_method):
725-
# previous behavior incorrect retained an invalid _item_cache entry
726-
interpolation, method = interp_method
727-
df = DataFrame(
728-
np.random.default_rng(2).standard_normal((4, 3)), columns=["A", "B", "C"]
729-
)
730-
df["D"] = df["A"] * 2
731-
ser = df["A"]
732-
assert len(df._mgr.blocks) == 2
733-
734-
df.quantile(numeric_only=False, interpolation=interpolation, method=method)
735-
736-
ser.iloc[0] = 99
737-
assert df.iloc[0, 0] == df["A"][0]
738-
assert df.iloc[0, 0] != 99
739-
740724
def test_invalid_method(self):
741725
with pytest.raises(ValueError, match="Invalid method: foo"):
742726
DataFrame(range(1)).quantile(0.5, method="foo")

pandas/tests/frame/methods/test_sort_values.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -592,21 +592,6 @@ def test_sort_values_nat_na_position_default(self):
592592
result = expected.sort_values(["A", "date"])
593593
tm.assert_frame_equal(result, expected)
594594

595-
def test_sort_values_item_cache(self):
596-
# previous behavior incorrect retained an invalid _item_cache entry
597-
df = DataFrame(
598-
np.random.default_rng(2).standard_normal((4, 3)), columns=["A", "B", "C"]
599-
)
600-
df["D"] = df["A"] * 2
601-
ser = df["A"]
602-
assert len(df._mgr.blocks) == 2
603-
604-
df.sort_values(by="A")
605-
606-
ser.iloc[0] = 99
607-
assert df.iloc[0, 0] == df["A"][0]
608-
assert df.iloc[0, 0] != 99
609-
610595
def test_sort_values_reshaping(self):
611596
# GH 39426
612597
values = list(range(21))

pandas/tests/frame/methods/test_to_dict_of_blocks.py

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,8 @@
1-
import numpy as np
2-
import pytest
3-
4-
from pandas._config import using_string_dtype
5-
61
from pandas import (
72
DataFrame,
83
MultiIndex,
94
)
105
import pandas._testing as tm
11-
from pandas.core.arrays import NumpyExtensionArray
126

137

148
class TestToDictOfBlocks:
@@ -27,22 +21,6 @@ def test_no_copy_blocks(self, float_frame):
2721
assert _last_df is not None and not _last_df[column].equals(df[column])
2822

2923

30-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
31-
def test_to_dict_of_blocks_item_cache():
32-
# Calling to_dict_of_blocks should not poison item_cache
33-
df = DataFrame({"a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"]})
34-
df["c"] = NumpyExtensionArray(np.array([1, 2, None, 3], dtype=object))
35-
mgr = df._mgr
36-
assert len(mgr.blocks) == 3 # i.e. not consolidated
37-
38-
ser = df["b"] # populations item_cache["b"]
39-
40-
df._to_dict_of_blocks()
41-
42-
with pytest.raises(ValueError, match="read-only"):
43-
ser.values[0] = "foo"
44-
45-
4624
def test_set_change_dtype_slice():
4725
# GH#8850
4826
cols = MultiIndex.from_tuples([("1st", "a"), ("2nd", "b"), ("3rd", "c")])

pandas/tests/frame/test_block_internals.py

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -381,30 +381,3 @@ def test_update_inplace_sets_valid_block_values():
381381

382382
# check we haven't put a Series into any block.values
383383
assert isinstance(df._mgr.blocks[0].values, Categorical)
384-
385-
386-
def test_nonconsolidated_item_cache_take():
387-
# https://github.com/pandas-dev/pandas/issues/35521
388-
389-
# create non-consolidated dataframe with object dtype columns
390-
df = DataFrame(
391-
{
392-
"col1": Series(["a"], dtype=object),
393-
}
394-
)
395-
df["col2"] = Series([0], dtype=object)
396-
assert not df._mgr.is_consolidated()
397-
398-
# access column (item cache)
399-
df["col1"] == "A"
400-
# take operation
401-
# (regression was that this consolidated but didn't reset item cache,
402-
# resulting in an invalid cache and the .at operation not working properly)
403-
df[df["col2"] == 0]
404-
405-
# now setting value should update actual dataframe
406-
df.at[0, "col1"] = "A"
407-
408-
expected = DataFrame({"col1": ["A"], "col2": [0]}, dtype=object)
409-
tm.assert_frame_equal(df, expected)
410-
assert df.at[0, "col1"] == "A"

pandas/tests/indexing/test_at.py

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -49,29 +49,6 @@ def test_selection_methods_of_assigned_col():
4949

5050

5151
class TestAtSetItem:
52-
def test_at_setitem_item_cache_cleared(self):
53-
# GH#22372 Note the multi-step construction is necessary to trigger
54-
# the original bug. pandas/issues/22372#issuecomment-413345309
55-
df = DataFrame(index=[0])
56-
df["x"] = 1
57-
df["cost"] = 2
58-
59-
# accessing df["cost"] adds "cost" to the _item_cache
60-
df["cost"]
61-
62-
# This loc[[0]] lookup used to call _consolidate_inplace at the
63-
# BlockManager level, which failed to clear the _item_cache
64-
df.loc[[0]]
65-
66-
df.at[0, "x"] = 4
67-
df.at[0, "cost"] = 789
68-
69-
expected = DataFrame({"x": [4], "cost": 789}, index=[0])
70-
tm.assert_frame_equal(df, expected)
71-
72-
# And in particular, check that the _item_cache has updated correctly.
73-
tm.assert_series_equal(df["cost"], expected["cost"])
74-
7552
def test_at_setitem_mixed_index_assignment(self):
7653
# GH#19860
7754
ser = Series([1, 2, 3, 4, 5], index=["a", "b", "c", 1, 2])

pandas/tests/indexing/test_chaining_and_caching.py

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -18,23 +18,6 @@
1818

1919

2020
class TestCaching:
21-
def test_slice_consolidate_invalidate_item_cache(self):
22-
# this is chained assignment, but will 'work'
23-
with option_context("chained_assignment", None):
24-
# #3970
25-
df = DataFrame({"aa": np.arange(5), "bb": [2.2] * 5})
26-
27-
# Creates a second float block
28-
df["cc"] = 0.0
29-
30-
# caches a reference to the 'bb' series
31-
df["bb"]
32-
33-
# Assignment to wrong series
34-
with tm.raises_chained_assignment_error():
35-
df["bb"].iloc[0] = 0.17
36-
tm.assert_almost_equal(df["bb"][0], 2.2)
37-
3821
@pytest.mark.parametrize("do_ref", [True, False])
3922
def test_setitem_cache_updating(self, do_ref):
4023
# GH 5424
@@ -89,18 +72,6 @@ def test_setitem_cache_updating_slices(self):
8972
tm.assert_frame_equal(out, expected)
9073
tm.assert_series_equal(out["A"], expected["A"])
9174

92-
def test_altering_series_clears_parent_cache(self):
93-
# GH #33675
94-
df = DataFrame([[1, 2], [3, 4]], index=["a", "b"], columns=["A", "B"])
95-
ser = df["A"]
96-
97-
# Adding a new entry to ser swaps in a new array, so "A" needs to
98-
# be removed from df._item_cache
99-
ser["c"] = 5
100-
assert len(ser) == 3
101-
assert df["A"] is not ser
102-
assert len(df["A"]) == 2
103-
10475

10576
class TestChaining:
10677
def test_setitem_chained_setfault(self):

0 commit comments

Comments
 (0)