Skip to content

Commit 8f74510

Browse files
authored
Fix output_units="cm" failing when cutoff is not inf (#15). Use pandas nullable Int16 instead of numpy int16 to handle NaN values that occur when points exceed the extrapolation cutoff distance. (#144)
1 parent ff22153 commit 8f74510

2 files changed

Lines changed: 61 additions & 3 deletions

File tree

eo_tides/model.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,29 @@ def _parallel_splits(
8080
return int(max(1, optimal_splits))
8181

8282

83+
def _to_nullable_int16(series: pd.Series) -> pd.arrays.IntegerArray:
84+
"""Convert a float Series to nullable Int16, preserving NaN as pd.NA.
85+
86+
This allows integer output units ("cm", "mm") to handle NaN values
87+
that occur when points are beyond the tide model extrapolation cutoff.
88+
89+
Parameters
90+
----------
91+
series : pd.Series
92+
A pandas Series of float values, potentially containing NaN.
93+
94+
Returns
95+
-------
96+
pd.arrays.IntegerArray
97+
A nullable Int16 array where NaN values become pd.NA.
98+
99+
"""
100+
values = series.to_numpy()
101+
mask = np.isnan(values)
102+
int_values = np.where(mask, 0, values).astype(np.int16)
103+
return pd.arrays.IntegerArray(int_values, mask=mask)
104+
105+
83106
def _model_tides(
84107
model,
85108
x,
@@ -250,9 +273,9 @@ def _model_tides(
250273
if output_units == "m":
251274
tide_df["tide_height"] = tide_df.tide_height.astype(np.float32)
252275
elif output_units == "cm":
253-
tide_df["tide_height"] = (tide_df.tide_height * 100).astype(np.int16)
276+
tide_df["tide_height"] = _to_nullable_int16(tide_df.tide_height * 100)
254277
elif output_units == "mm":
255-
tide_df["tide_height"] = (tide_df.tide_height * 1000).astype(np.int16)
278+
tide_df["tide_height"] = _to_nullable_int16(tide_df.tide_height * 1000)
256279

257280
return tide_df
258281

tests/test_model.py

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ def test_model_tides_multiplemodels(measured_tides_ds, models, output_format):
212212
# Run tests for each unit, providing expected outputs
213213
@pytest.mark.parametrize(
214214
"units, expected_range, expected_dtype",
215-
[("m", 10, "float32"), ("cm", 1000, "int16"), ("mm", 10000, "int16")],
215+
[("m", 10, "float32"), ("cm", 1000, "Int16"), ("mm", 10000, "Int16")],
216216
ids=["metres", "centimetres", "millimetres"],
217217
)
218218
def test_model_tides_units(measured_tides_ds, units, expected_range, expected_dtype):
@@ -232,6 +232,41 @@ def test_model_tides_units(measured_tides_ds, units, expected_range, expected_dt
232232
assert modelled_tides_df.tide_height.dtype == expected_dtype
233233

234234

235+
# Test for GitHub Issue #15: integer output units with NaN values
236+
# https://github.com/GeoscienceAustralia/eo-tides/issues/15
237+
@pytest.mark.parametrize("units", ["cm", "mm"], ids=["centimetres", "millimetres"])
238+
def test_model_tides_units_with_nan(units):
239+
"""Test that integer output units handle NaN values correctly.
240+
241+
When cutoff is finite, points beyond the extrapolation distance
242+
receive NaN values. These should be converted to pd.NA in the
243+
nullable Int16 output, not raise an IntCastingNaNError.
244+
"""
245+
# Use an inland point that will be beyond cutoff distance
246+
x_inland = [122.0]
247+
y_inland = [-22.0]
248+
time = pd.date_range("2020-01-01", periods=3, freq="6h")
249+
250+
# This should not raise an error (was failing before fix)
251+
result = model_tides(
252+
x=x_inland,
253+
y=y_inland,
254+
time=time,
255+
model="EOT20",
256+
output_units=units,
257+
cutoff=10, # 10km cutoff, inland point is beyond this
258+
)
259+
260+
# Verify dtype is nullable Int16
261+
assert result.tide_height.dtype == "Int16"
262+
263+
# Verify NaN values are represented as pd.NA
264+
assert result.tide_height.isna().all()
265+
266+
# Verify statistical operations work correctly (don't include NA)
267+
assert pd.isna(result.tide_height.mean())
268+
269+
235270
# Run test for each combination of mode, output format, and one or
236271
# multiple tide models
237272
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)