Skip to content

Commit d566a59

Browse files
authored
Merge pull request #1932 from cmu-delphi/release/indicators_v0.3.50_utils_v0.3.22
Release covidcast-indicators 0.3.50
2 parents a20b51e + 624b73b commit d566a59

File tree

40 files changed

+1574
-95
lines changed

40 files changed

+1574
-95
lines changed

.bumpversion.cfg

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 0.3.49
2+
current_version = 0.3.50
33
commit = True
44
message = chore: bump covidcast-indicators to {new_version}
55
tag = False

.github/workflows/python-ci.yml

+34-22
Original file line numberDiff line numberDiff line change
@@ -5,37 +5,49 @@ name: Python package
55

66
on:
77
push:
8-
branches: [ main, prod ]
8+
branches: [main, prod]
99
pull_request:
10-
types: [ opened, synchronize, reopened, ready_for_review ]
11-
branches: [ main, prod ]
10+
types: [opened, synchronize, reopened, ready_for_review]
11+
branches: [main, prod]
1212

1313
jobs:
1414
build:
1515
runs-on: ubuntu-20.04
1616
if: github.event.pull_request.draft == false
1717
strategy:
1818
matrix:
19-
packages: [_delphi_utils_python, changehc, claims_hosp, doctor_visits, google_symptoms, hhs_hosp, nchs_mortality, quidel_covidtest, sir_complainsalot]
19+
packages:
20+
[
21+
_delphi_utils_python,
22+
changehc,
23+
claims_hosp,
24+
doctor_visits,
25+
google_symptoms,
26+
hhs_hosp,
27+
nchs_mortality,
28+
nwss_wastewater,
29+
quidel_covidtest,
30+
sir_complainsalot,
31+
]
2032
defaults:
2133
run:
2234
working-directory: ${{ matrix.packages }}
2335
steps:
24-
- uses: actions/checkout@v2
25-
- name: Set up Python 3.8
26-
uses: actions/setup-python@v2
27-
with:
28-
python-version: 3.8
29-
- name: Install testing dependencies
30-
run: |
31-
python -m pip install --upgrade pip
32-
pip install pylint pytest pydocstyle wheel
33-
- name: Install
34-
run: |
35-
make install-ci
36-
- name: Lint
37-
run: |
38-
make lint
39-
- name: Test
40-
run: |
41-
make test
36+
- uses: actions/checkout@v2
37+
- name: Set up Python 3.8
38+
uses: actions/setup-python@v2
39+
with:
40+
python-version: 3.8
41+
- name: Install testing dependencies
42+
run: |
43+
python -m pip install --upgrade pip
44+
pip install pylint pytest pydocstyle wheel
45+
- name: Install
46+
run: |
47+
make install-ci
48+
- name: Lint
49+
run: |
50+
make lint
51+
- name: Test
52+
run: |
53+
make test

Jenkinsfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
- TODO: #527 Get this list automatically from python-ci.yml at runtime.
1111
*/
1212

13-
def indicator_list = ["backfill_corrections", "changehc", "claims_hosp", "google_symptoms", "hhs_hosp", "nchs_mortality", "quidel_covidtest", "sir_complainsalot", "doctor_visits"]
13+
def indicator_list = ["backfill_corrections", "changehc", "claims_hosp", "google_symptoms", "hhs_hosp", "nchs_mortality", "quidel_covidtest", "sir_complainsalot", "doctor_visits", "nwss_wastewater"]
1414
def build_package_main = [:]
1515
def build_package_prod = [:]
1616
def deploy_staging = [:]

_delphi_utils_python/.bumpversion.cfg

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 0.3.21
2+
current_version = 0.3.22
33
commit = True
44
message = chore: bump delphi_utils to {new_version}
55
tag = False

_delphi_utils_python/delphi_utils/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,4 @@
1515
from .nancodes import Nans
1616
from .weekday import Weekday
1717

18-
__version__ = "0.3.21"
18+
__version__ = "0.3.22"
+29-1
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,41 @@
11
"""Unified not-a-number codes for CMU Delphi codebase."""
22

33
from enum import IntEnum
4+
import pandas as pd
5+
46

57
class Nans(IntEnum):
6-
"""An enum of not-a-number codes for the indicators."""
8+
"""An enum of not-a-number codes for the indicators.
9+
10+
See the descriptions here: https://cmu-delphi.github.io/delphi-epidata/api/missing_codes.html
11+
"""
712

813
NOT_MISSING = 0
914
NOT_APPLICABLE = 1
1015
REGION_EXCEPTION = 2
1116
CENSORED = 3
1217
DELETED = 4
1318
OTHER = 5
19+
20+
21+
def add_default_nancodes(df: pd.DataFrame):
22+
"""Add some default nancodes to the dataframe.
23+
24+
This method sets the `"missing_val"` column to NOT_MISSING whenever the
25+
`"val"` column has `isnull()` as `False`; if `isnull()` is `True`, then it
26+
sets `"missing_val"` to `OTHER`. It also sets both the `"missing_se"` and
27+
`"missing_sample_size"` columns to `NOT_APPLICABLE`.
28+
29+
Returns
30+
-------
31+
pd.DataFrame
32+
"""
33+
# Default missingness codes
34+
df["missing_val"] = Nans.NOT_MISSING
35+
df["missing_se"] = Nans.NOT_APPLICABLE
36+
df["missing_sample_size"] = Nans.NOT_APPLICABLE
37+
38+
# Mark any remaining nans with unknown
39+
remaining_nans_mask = df["val"].isnull()
40+
df.loc[remaining_nans_mask, "missing_val"] = Nans.OTHER
41+
return df

_delphi_utils_python/setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727

2828
setup(
2929
name="delphi_utils",
30-
version="0.3.21",
30+
version="0.3.22",
3131
description="Shared Utility Functions for Indicators",
3232
long_description=long_description,
3333
long_description_content_type="text/markdown",
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
{
2+
"common": {
3+
"export_dir": "./receiving",
4+
"log_filename": "./nwss_wastewater.log",
5+
"log_exceptions": false
6+
},
7+
"indicator": {
8+
"wip_signal": true,
9+
"export_start_date": "2020-02-01",
10+
"static_file_dir": "./static",
11+
"token": ""
12+
}
13+
}

changehc/version.cfg

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
current_version = 0.3.49
1+
current_version = 0.3.50

claims_hosp/version.cfg

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
current_version = 0.3.49
1+
current_version = 0.3.50

doctor_visits/version.cfg

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
current_version = 0.3.49
1+
current_version = 0.3.50

google_symptoms/version.cfg

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
current_version = 0.3.49
1+
current_version = 0.3.50

hhs_hosp/version.cfg

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
current_version = 0.3.49
1+
current_version = 0.3.50

nchs_mortality/.pylintrc

+2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
disable=logging-format-interpolation,
55
too-many-locals,
66
too-many-arguments,
7+
too-many-branches,
8+
too-many-statements,
79
# Allow pytest functions to be part of a class.
810
no-self-use,
911
# Allow pytest classes to have one test.

nchs_mortality/README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ the state-level data as-is. For detailed information see the files
88
`MyAppToken` is required when fetching data from SODA Consumer API
99
(https://dev.socrata.com/foundry/data.cdc.gov/r8kw-7aab). Follow the
1010
steps below to create a MyAppToken.
11-
- Click the `Sign up for an app toekn` buttom in the linked website
11+
- Click the `Sign up for an app token` button in the linked website
1212
- Sign In or Sign Up with Socrata ID
13-
- Clck the `Create New App Token` button
13+
- Click the `Create New App Token` button
1414
- Fill in `Application Name` and `Description` (You can just use NCHS_Mortality
1515
for both) and click `Save`
1616
- Copy the `App Token`

nchs_mortality/delphi_nchs_mortality/constants.py

-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
"prop"
2626
]
2727
INCIDENCE_BASE = 100000
28-
GEO_RES = "state"
2928

3029
# this is necessary as a delimiter in the f-string expressions we use to
3130
# construct detailed error reports

nchs_mortality/delphi_nchs_mortality/pull.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -96,8 +96,6 @@ def pull_nchs_mortality_data(token: str, test_file: Optional[str]=None):
9696
{NEWLINE.join(df.columns)}
9797
""") from exc
9898

99-
# Drop rows for locations outside US
100-
df = df[df["state"] != "United States"]
10199
df = df[keep_columns + ["timestamp", "state"]].set_index("timestamp")
102100

103101
# NCHS considers NYC as an individual state, however, we want it included
@@ -124,6 +122,11 @@ def pull_nchs_mortality_data(token: str, test_file: Optional[str]=None):
124122
# Add population info
125123
keep_columns.extend(["timestamp", "geo_id", "population"])
126124
gmpr = GeoMapper()
127-
df = gmpr.add_population_column(df, "state_name", geocode_col="state")
128-
df = gmpr.add_geocode(df, "state_name", "state_id", from_col="state", new_col="geo_id")
125+
# Map state to geo_id, but set dropna=False as we also have national data
126+
df = gmpr.add_population_column(df, "state_name",
127+
geocode_col="state", dropna=False)
128+
df = gmpr.add_geocode(df, "state_name", "state_id",
129+
from_col="state", new_col="geo_id", dropna=False)
130+
# Manually set geo_id for national data
131+
df.loc[df["state"] == "United States", "geo_id"] = "us"
129132
return df[keep_columns]

nchs_mortality/delphi_nchs_mortality/run.py

+37-34
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
from .archive_diffs import arch_diffs
1515
from .constants import (METRICS, SENSOR_NAME_MAP,
16-
SENSORS, INCIDENCE_BASE, GEO_RES)
16+
SENSORS, INCIDENCE_BASE)
1717
from .pull import pull_nchs_mortality_data
1818

1919

@@ -72,51 +72,54 @@ def run_module(params: Dict[str, Any]):
7272
stats = []
7373
df_pull = pull_nchs_mortality_data(token, test_file)
7474
for metric in METRICS:
75-
if metric == 'percent_of_expected_deaths':
76-
logger.info("Generating signal and exporting to CSV",
77-
metric = metric)
78-
df = df_pull.copy()
79-
df["val"] = df[metric]
80-
df["se"] = np.nan
81-
df["sample_size"] = np.nan
82-
df = add_nancodes(df)
83-
# df = df[~df["val"].isnull()]
84-
sensor_name = "_".join([SENSOR_NAME_MAP[metric]])
85-
dates = create_export_csv(
86-
df,
87-
geo_res=GEO_RES,
88-
export_dir=daily_export_dir,
89-
start_date=datetime.strptime(export_start_date, "%Y-%m-%d"),
90-
sensor=sensor_name,
91-
weekly_dates=True
92-
)
93-
if len(dates) > 0:
94-
stats.append((max(dates), len(dates)))
95-
else:
96-
for sensor in SENSORS:
75+
for geo in ["state", "nation"]:
76+
if metric == 'percent_of_expected_deaths':
9777
logger.info("Generating signal and exporting to CSV",
98-
metric = metric,
99-
sensor = sensor)
78+
metric=metric, geo_level=geo)
10079
df = df_pull.copy()
101-
if sensor == "num":
102-
df["val"] = df[metric]
80+
if geo == "nation":
81+
df = df[df["geo_id"] == "us"]
10382
else:
104-
df["val"] = df[metric] / df["population"] * INCIDENCE_BASE
83+
df = df[df["geo_id"] != "us"]
84+
df["val"] = df[metric]
10585
df["se"] = np.nan
10686
df["sample_size"] = np.nan
10787
df = add_nancodes(df)
108-
# df = df[~df["val"].isnull()]
109-
sensor_name = "_".join([SENSOR_NAME_MAP[metric], sensor])
11088
dates = create_export_csv(
11189
df,
112-
geo_res=GEO_RES,
90+
geo_res=geo,
11391
export_dir=daily_export_dir,
11492
start_date=datetime.strptime(export_start_date, "%Y-%m-%d"),
115-
sensor=sensor_name,
93+
sensor=SENSOR_NAME_MAP[metric],
11694
weekly_dates=True
11795
)
118-
if len(dates) > 0:
119-
stats.append((max(dates), len(dates)))
96+
else:
97+
for sensor in SENSORS:
98+
logger.info("Generating signal and exporting to CSV",
99+
metric=metric, sensor=sensor, geo_level=geo)
100+
df = df_pull.copy()
101+
if geo == "nation":
102+
df = df[df["geo_id"] == "us"]
103+
else:
104+
df = df[df["geo_id"] != "us"]
105+
if sensor == "num":
106+
df["val"] = df[metric]
107+
else:
108+
df["val"] = df[metric] / df["population"] * INCIDENCE_BASE
109+
df["se"] = np.nan
110+
df["sample_size"] = np.nan
111+
df = add_nancodes(df)
112+
sensor_name = "_".join([SENSOR_NAME_MAP[metric], sensor])
113+
dates = create_export_csv(
114+
df,
115+
geo_res=geo,
116+
export_dir=daily_export_dir,
117+
start_date=datetime.strptime(export_start_date, "%Y-%m-%d"),
118+
sensor=sensor_name,
119+
weekly_dates=True
120+
)
121+
if len(dates) > 0:
122+
stats.append((max(dates), len(dates)))
120123

121124
# Weekly run of archive utility on Monday
122125
# - Does not upload to S3, that is handled by daily run of archive utility

nchs_mortality/tests/test_run.py

+20-18
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ def test_output_files_exist(self, run_as_module, date):
1919
for output_folder in folders:
2020
csv_files = listdir(output_folder)
2121

22+
geos = ["nation", "state"]
2223
dates = [
2324
"202030",
2425
"202031",
@@ -38,15 +39,14 @@ def test_output_files_exist(self, run_as_module, date):
3839
sensors = ["num", "prop"]
3940

4041
expected_files = []
41-
for d in dates:
42-
for metric in metrics:
43-
if metric == "deaths_percent_of_expected":
44-
expected_files += ["weekly_" + d + "_state_" \
45-
+ metric + ".csv"]
46-
else:
47-
for sensor in sensors:
48-
expected_files += ["weekly_" + d + "_state_" \
49-
+ metric + "_" + sensor + ".csv"]
42+
for geo in geos:
43+
for d in dates:
44+
for metric in metrics:
45+
if metric == "deaths_percent_of_expected":
46+
expected_files += [f"weekly_{d}_{geo}_{metric}.csv"]
47+
else:
48+
for sensor in sensors:
49+
expected_files += [f"weekly_{d}_{geo}_{metric}_{sensor}.csv"]
5050
assert set(expected_files).issubset(set(csv_files))
5151

5252
# the 14th was a Monday
@@ -58,12 +58,14 @@ def test_output_file_format(self, run_as_module, date):
5858
if is_mon_or_thurs:
5959
folders.append("receiving")
6060

61-
for output_folder in folders:
62-
df = pd.read_csv(
63-
join(output_folder, "weekly_202026_state_deaths_covid_incidence_prop.csv")
64-
)
65-
expected_columns = [
66-
"geo_id", "val", "se", "sample_size",
67-
"missing_val", "missing_se", "missing_sample_size"
68-
]
69-
assert (df.columns.values == expected_columns).all()
61+
geos = ["nation", "state"]
62+
for geo in geos:
63+
for output_folder in folders:
64+
df = pd.read_csv(
65+
join(output_folder, f"weekly_202026_{geo}_deaths_covid_incidence_prop.csv")
66+
)
67+
expected_columns = [
68+
"geo_id", "val", "se", "sample_size",
69+
"missing_val", "missing_se", "missing_sample_size"
70+
]
71+
assert (df.columns.values == expected_columns).all()

nchs_mortality/version.cfg

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
current_version = 0.3.49
1+
current_version = 0.3.50

0 commit comments

Comments
 (0)