Skip to content

Commit 16dc69e

Browse files
authored
adding fillna (#335)
* adding fillna * update nested logic for value matching * minor type change
1 parent e21f357 commit 16dc69e

File tree

3 files changed

+134
-1
lines changed

3 files changed

+134
-1
lines changed

docs/reference/nestedframe.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ Extended Pandas.DataFrame Interface
4343
NestedFrame.max
4444
NestedFrame.describe
4545
NestedFrame.explode
46+
NestedFrame.fillna
4647

4748
I/O
4849
~~~~~~~~~

src/nested_pandas/nestedframe/core.py

Lines changed: 86 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import pyarrow as pa
1111
import pyarrow.parquet as pq
1212
from pandas._libs import lib
13-
from pandas._typing import Any, AnyAll, Axis, IndexLabel
13+
from pandas._typing import Any, AnyAll, Axis, Hashable, IndexLabel, Mapping
1414
from pandas.api.extensions import no_default
1515
from pandas.core.computation.eval import Expr, ensure_scope
1616
from pandas.core.dtypes.inference import is_list_like
@@ -1114,6 +1114,91 @@ def explode(self, column: IndexLabel, ignore_index: bool = False):
11141114
result.index.name = None
11151115
return result
11161116

1117+
def fillna(
1118+
self,
1119+
value: Hashable | Mapping | pd.Series | pd.DataFrame | None = None,
1120+
*,
1121+
axis: Axis | None = None,
1122+
inplace: bool = False,
1123+
limit: int | None = None,
1124+
) -> NestedFrame | None:
1125+
"""
1126+
Fill NA/NaN values using the specified method for base and nested columns.
1127+
1128+
Parameters
1129+
----------
1130+
value : scalar, dict, Series, or DataFrame
1131+
Value to use to fill holes (e.g. 0), alternately a
1132+
dict/Series/DataFrame of values specifying which value to use for
1133+
each column. Values not in the dict/Series/DataFrame will not be filled.
1134+
This value cannot be a list.
1135+
axis : {axes_single_arg}, default None
1136+
Axis along which to fill missing values.
1137+
inplace : bool, default False
1138+
If True, fill in-place. Note: this will modify any
1139+
other views on this object (e.g., a no-copy slice for a column in a
1140+
NestedFrame).
1141+
limit : int, default None
1142+
The maximum number of entries along the entire axis where NaNs will be
1143+
filled. Must be greater than 0 if not None. Currently, limit on nested
1144+
columns is not supported, meaning that all Nans will be filled (if there
1145+
is a value specified) regardless of the input.
1146+
1147+
Returns
1148+
-------
1149+
NestedFrame or None
1150+
NestedFrame with missing values filled or None if ``inplace=True``.
1151+
1152+
See Also
1153+
--------
1154+
:meth:`pandas.DataFrame.fillna`
1155+
1156+
Examples
1157+
--------
1158+
>>> import nested_pandas as npd
1159+
>>> nf = npd.NestedFrame(
1160+
... data={"a": [np.nan, 20, np.nan], "b": [np.nan, np.nan, 30], "c": [10, np.nan, np.nan]},
1161+
... index=[0, 1, 2]
1162+
... )
1163+
>>> nested = pd.DataFrame(
1164+
... data={"d": [np.nan, np.nan, np.nan], "e": [np.nan, 1, np.nan]},
1165+
... index=[0, 1, 2]
1166+
... )
1167+
>>> nf = nf.add_nested(nested, "nested")
1168+
1169+
>>> nf.fillna(0)
1170+
a b c nested
1171+
0 0.0 0.0 10.0 [{d: 0.0, e: 0.0}]
1172+
1 20.0 0.0 0.0 [{d: 0.0, e: 1.0}]
1173+
2 0.0 30.0 0.0 [{d: 0.0, e: 0.0}]
1174+
1175+
"""
1176+
1177+
if not self.nested_columns:
1178+
return super().fillna(value=value, axis=axis, inplace=inplace, limit=limit)
1179+
1180+
base_cols = [col for col in self.columns if col not in self.nested_columns]
1181+
filled_df = super().__getitem__(base_cols).fillna(value=value, axis=axis, inplace=False, limit=limit)
1182+
1183+
for nest_col in self.nested_columns:
1184+
nested_df = self[nest_col].nest.to_flat()
1185+
nested_value: Any
1186+
if isinstance(value, Mapping):
1187+
nested_value = {}
1188+
for k, v in value.items():
1189+
if k.startswith(f"{nest_col}."):
1190+
subcol = k.split(".", 1)[1] # strip prefix
1191+
nested_value[subcol] = v
1192+
else:
1193+
nested_value = value
1194+
nested_df = nested_df.fillna(value=nested_value, axis=axis, inplace=False, limit=None)
1195+
filled_df = filled_df.add_nested(nested_df, nest_col)
1196+
1197+
if inplace:
1198+
self._update_inplace(filled_df)
1199+
return None
1200+
return filled_df
1201+
11171202
def eval(self, expr: str, *, inplace: bool = False, **kwargs) -> Any | None:
11181203
"""Evaluate a string describing operations on NestedFrame columns.
11191204

tests/nested_pandas/nestedframe/test_nestedframe.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1787,6 +1787,53 @@ def test_explode_non_unique_index():
17871787
nf.explode(["nested", "XXX", "AAA"])
17881788

17891789

1790+
def test_fillna():
1791+
"""Test NestedFrame.fillna give correct result with NA/NaN filled"""
1792+
1793+
base = NestedFrame(data={"a": [np.nan, np.nan, 3], "b": [2, np.nan, 6]}, index=[0, 1, 2])
1794+
nested = pd.DataFrame(
1795+
data={"c": [0, 2, np.nan, 1, np.nan, 3, 1, 4, 1], "d": [np.nan, 4, np.nan, 5, 3, 1, np.nan, 3, 4]},
1796+
index=[0, 0, 0, 1, 1, 1, 2, 2, 2],
1797+
)
1798+
nested2 = pd.DataFrame(
1799+
data={"e": [np.nan, np.nan, np.nan, 1, 4, np.nan, 4, 1], "f": [5, 4, 7, 5, 1, 9, 3, 4]},
1800+
index=[0, 0, 0, 1, 1, 2, 2, 2],
1801+
)
1802+
1803+
# only base columns
1804+
r0 = base.fillna(0)
1805+
assert (r0["a"] == pd.Series([0, 0, 3])).all()
1806+
assert (r0["b"] == pd.Series([2, 0, 6])).all()
1807+
1808+
# 1 nested column
1809+
base = base.add_nested(nested, "nested")
1810+
r1 = base.fillna(0)
1811+
expected1 = pd.Series([0, 4, 0, 5, 3, 1, 0, 3, 4], index=[0, 0, 0, 1, 1, 1, 2, 2, 2])
1812+
assert (r1["nested.d"] == expected1).all()
1813+
1814+
r2 = base.fillna({"a": 0, "b": 1, "nested.c": 2, "nested.d": 3})
1815+
expected2 = pd.Series([0, 2, 2, 1, 2, 3, 1, 4, 1], index=[0, 0, 0, 1, 1, 1, 2, 2, 2])
1816+
assert (r2["nested.c"] == expected2).all()
1817+
assert (r2["a"] == pd.Series([0, 0, 3])).all()
1818+
1819+
r3 = base.fillna(0, limit=1)
1820+
assert np.isnan(r3["a"][1])
1821+
1822+
# 2 nested columns
1823+
base = base.add_nested(nested2, "nested2")
1824+
r4 = base.fillna(0)
1825+
expected4 = pd.Series([0, 0, 0, 1, 4, 0, 4, 1], index=[0, 0, 0, 1, 1, 2, 2, 2])
1826+
assert (r4["nested2.e"] == expected4).all()
1827+
1828+
# inplace check
1829+
expected5 = pd.Series([0, 2, np.nan, 1, np.nan, 3, 1, 4, 1], index=[0, 0, 0, 1, 1, 1, 2, 2, 2])
1830+
assert (base["nested.c"] == expected5).all()
1831+
1832+
base.fillna({"nested.c": 0}, inplace=True)
1833+
expected6 = pd.Series([0, 2, 0, 1, 0, 3, 1, 4, 1], index=[0, 0, 0, 1, 1, 1, 2, 2, 2])
1834+
assert (base["nested.c"] == expected6).all()
1835+
1836+
17901837
def test_eval():
17911838
"""
17921839
Test basic behavior of NestedFrame.eval, and that it can handle nested references

0 commit comments

Comments
 (0)