Skip to content

Commit e6c84d8

Browse files
committed
NANS for HHS:
* add missing columns
1 parent 773fe08 commit e6c84d8

File tree

2 files changed

+48
-23
lines changed

2 files changed

+48
-23
lines changed

hhs_hosp/delphi_hhs/run.py

+19-9
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,7 @@
88

99
import time
1010
from delphi_epidata import Epidata
11-
from delphi_utils.export import create_export_csv
12-
from delphi_utils.geomap import GeoMapper
13-
from delphi_utils import get_structured_logger
11+
from delphi_utils import create_export_csv, get_structured_logger, Nans, GeoMapper
1412
import numpy as np
1513
import pandas as pd
1614

@@ -63,6 +61,17 @@ def generate_date_ranges(start, end):
6361
output.append(Epidata.range(_date_to_int(start), _date_to_int(end)))
6462
return output
6563

64+
def add_nancodes(df):
65+
"""Add nancodes to a signal dataframe."""
66+
# Default missingness codes
67+
df["missing_val"] = Nans.NOT_MISSING
68+
df["missing_se"] = Nans.NOT_APPLICABLE
69+
df["missing_sample_size"] = Nans.NOT_APPLICABLE
70+
71+
# Mark any remaining nans with unknown
72+
remaining_nans_mask = df["val"].isnull()
73+
df.loc[remaining_nans_mask, "missing_val"] = Nans.UNKNOWN
74+
return df
6675

6776
def run_module(params):
6877
"""
@@ -99,12 +108,15 @@ def run_module(params):
99108
geo_mapper = GeoMapper()
100109

101110
for sig in SIGNALS:
102-
state = geo_mapper.add_geocode(make_signal(all_columns, sig),
103-
"state_id", "state_code",
104-
from_col="state")
111+
state = make_signal(all_columns, sig)
112+
state = geo_mapper.add_geocode(state, "state_id", "state_code", from_col="state")
105113
for geo in GEOS:
114+
df = make_geo(state, geo, geo_mapper)
115+
df["se"] = np.nan
116+
df["sample_size"] = np.nan
117+
df = add_nancodes(df)
106118
create_export_csv(
107-
make_geo(state, geo, geo_mapper),
119+
df,
108120
params["common"]["export_dir"],
109121
geo,
110122
sig
@@ -123,8 +135,6 @@ def make_geo(state, geo, geo_mapper):
123135
state, "state_code", geo,
124136
new_col="geo_id",
125137
date_col="timestamp")
126-
exported["se"] = np.nan
127-
exported["sample_size"] = np.nan
128138
return exported
129139

130140
def make_signal(all_columns, sig):

hhs_hosp/tests/test_run.py

+29-14
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
from datetime import datetime, date
22
from unittest.mock import patch
33

4-
from delphi_hhs.run import _date_to_int, int_date_to_previous_day_datetime, generate_date_ranges, \
4+
from delphi_hhs.run import _date_to_int, add_nancodes, int_date_to_previous_day_datetime, generate_date_ranges, \
55
make_signal, make_geo, run_module
66
from delphi_hhs.constants import CONFIRMED, SUM_CONF_SUSP
7-
from delphi_utils.geomap import GeoMapper
7+
from delphi_utils import GeoMapper, Nans
88
from freezegun import freeze_time
99
import numpy as np
1010
import pandas as pd
@@ -72,38 +72,31 @@ def test_make_geo():
7272
"""Check that geographies transform correctly."""
7373
test_timestamp = datetime(year=2020, month=1, day=1)
7474
geo_mapper = GeoMapper()
75-
75+
7676
data = pd.DataFrame({
7777
'state': ['PA','WV','OH'],
7878
'state_code': [42, 54, 39],
7979
'timestamp': [test_timestamp]*3,
8080
'val': [1, 2, 4],
8181
})
8282

83-
template = {
84-
'se': np.nan,
85-
'sample_size': np.nan,
86-
}
8783
expecteds = {
8884
"state": pd.DataFrame(
89-
dict(template,
90-
geo_id=data.state,
85+
dict(geo_id=data.state,
9186
timestamp=data.timestamp,
9287
val=data.val)),
9388
"hhs": pd.DataFrame(
94-
dict(template,
95-
geo_id=['3', '5'],
89+
dict(geo_id=['3', '5'],
9690
timestamp=[test_timestamp]*2,
9791
val=[3, 4])),
9892
"nation": pd.DataFrame(
99-
dict(template,
100-
geo_id=['us'],
93+
dict(geo_id=['us'],
10194
timestamp=[test_timestamp],
10295
val=[7]))
10396
}
10497
for geo, expected in expecteds.items():
10598
result = make_geo(data, geo, geo_mapper)
106-
for series in ["geo_id", "timestamp", "val", "se", "sample_size"]:
99+
for series in ["geo_id", "timestamp", "val"]:
107100
pd.testing.assert_series_equal(expected[series], result[series], obj=f"{geo}:{series}")
108101

109102

@@ -131,3 +124,25 @@ def test_ignore_last_range_no_results(mock_covid_hosp, mock_export):
131124
}
132125
}
133126
assert not run_module(params) # function should not raise value error and has no return value
127+
128+
def test_add_nancode():
129+
data = pd.DataFrame({
130+
'state': ['PA','WV','OH'],
131+
'state_code': [42, 54, 39],
132+
'timestamp': [pd.to_datetime("20200601")]*3,
133+
'val': [1, 2, np.nan],
134+
'se': [np.nan] * 3,
135+
'sample_size': [np.nan] * 3,
136+
})
137+
expected = pd.DataFrame({
138+
'state': ['PA','WV','OH'],
139+
'state_code': [42, 54, 39],
140+
'timestamp': [pd.to_datetime("20200601")]*3,
141+
'val': [1, 2, np.nan],
142+
'se': [np.nan] * 3,
143+
'sample_size': [np.nan] * 3,
144+
'missing_val': [Nans.NOT_MISSING] * 2 + [Nans.UNKNOWN],
145+
'missing_se': [Nans.NOT_APPLICABLE] * 3,
146+
'missing_sample_size': [Nans.NOT_APPLICABLE] * 3,
147+
})
148+
pd.testing.assert_frame_equal(expected, add_nancodes(data))

0 commit comments

Comments
 (0)