From e6452b233faa3493aca8059a389b002c66a1bca4 Mon Sep 17 00:00:00 2001 From: Bryan Wilder Date: Mon, 17 Jan 2022 18:00:41 -0500 Subject: [PATCH 1/7] first draft of code for inpatient flu chc --- changehc/delphi_changehc/config.py | 8 ++++- changehc/delphi_changehc/constants.py | 5 ++- changehc/delphi_changehc/load_data.py | 40 +++++++++++++++++++++-- changehc/delphi_changehc/run.py | 17 +++++++++- changehc/delphi_changehc/update_sensor.py | 12 +++++-- 5 files changed, 73 insertions(+), 9 deletions(-) diff --git a/changehc/delphi_changehc/config.py b/changehc/delphi_changehc/config.py index a7232ddf7..374d278ad 100644 --- a/changehc/delphi_changehc/config.py +++ b/changehc/delphi_changehc/config.py @@ -28,25 +28,31 @@ class Config: MIXED_COL = "Mixed" FLU_LIKE_COL = "Flu-like" COVID_LIKE_COL = "Covid-like" - COUNT_COLS = [COVID_COL,DENOM_COL,FLU_COL,MIXED_COL,FLU_LIKE_COL,COVID_LIKE_COL] + FLU_INPATIENT_COL = 'Flu-Inpatient' + COUNT_COLS = [COVID_COL,DENOM_COL,FLU_COL,MIXED_COL,FLU_LIKE_COL,COVID_LIKE_COL, FLU_INPATIENT_COL] DATE_COL = "timestamp" GEO_COL = "fips" + GEO_COL_STATE = 'state_code' ID_COLS = [DATE_COL] + [GEO_COL] FILT_COLS = ID_COLS + COUNT_COLS DENOM_COLS = [DATE_COL, GEO_COL, DENOM_COL] + DENOM_COLS_STATE = [DATE_COL, GEO_COL_STATE, DENOM_COL] COVID_COLS = [DATE_COL, GEO_COL, COVID_COL] FLU_COLS = [DATE_COL, GEO_COL, FLU_COL] MIXED_COLS = [DATE_COL, GEO_COL, MIXED_COL] FLU_LIKE_COLS = [DATE_COL, GEO_COL, FLU_LIKE_COL] COVID_LIKE_COLS = [DATE_COL, GEO_COL, COVID_LIKE_COL] + FLU_INPATIENT_COLS = [GEO_COL_STATE, DATE_COL, FLU_INPATIENT_COL] DENOM_DTYPES = {DATE_COL: str, DENOM_COL: str, GEO_COL: str} + DENOM_DTYPES_STATE = {DATE_COL: str, DENOM_COL: str, GEO_COL_STATE: str} COVID_DTYPES = {DATE_COL: str, COVID_COL: str, GEO_COL: str} FLU_DTYPES = {DATE_COL: str, FLU_COL: str, GEO_COL: str} MIXED_DTYPES = {DATE_COL: str, MIXED_COL: str, GEO_COL: str} FLU_LIKE_DTYPES = {DATE_COL: str, FLU_LIKE_COL: str, GEO_COL: str} COVID_LIKE_DTYPES = {DATE_COL: str, COVID_LIKE_COL: str, GEO_COL: str} + FLU_INPATIENT_DTYPES = {DATE_COL: str, FLU_INPATIENT_COL: str, GEO_COL_STATE: str} SMOOTHER_BANDWIDTH = 100 # bandwidth for the linear left Gaussian filter MIN_DEN = 100 # number of total visits needed to produce a sensor diff --git a/changehc/delphi_changehc/constants.py b/changehc/delphi_changehc/constants.py index a458f8819..6654124cc 100644 --- a/changehc/delphi_changehc/constants.py +++ b/changehc/delphi_changehc/constants.py @@ -5,7 +5,10 @@ SMOOTHED_ADJ_CLI = "smoothed_adj_outpatient_cli" SMOOTHED_FLU = "smoothed_outpatient_flu" SMOOTHED_ADJ_FLU = "smoothed_adj_outpatient_flu" -SIGNALS = [SMOOTHED, SMOOTHED_ADJ, SMOOTHED_CLI, SMOOTHED_ADJ_CLI, SMOOTHED_FLU, SMOOTHED_ADJ_FLU] +SMOOTHED_FLU_INPATIENT = "smoothed_inpatient_flu" +SMOOTHED_ADJ_FLU_INPATIENT = "smoothed_adj_inpatient_flu" + +SIGNALS = [SMOOTHED, SMOOTHED_ADJ, SMOOTHED_CLI, SMOOTHED_ADJ_CLI, SMOOTHED_FLU, SMOOTHED_ADJ_FLU, SMOOTHED_FLU_INPATIENT, SMOOTHED_ADJ_FLU_INPATIENT] NA = "NA" HRR = "hrr" FIPS = "fips" diff --git a/changehc/delphi_changehc/load_data.py b/changehc/delphi_changehc/load_data.py index 1f9a43117..c1f5aaaba 100644 --- a/changehc/delphi_changehc/load_data.py +++ b/changehc/delphi_changehc/load_data.py @@ -27,7 +27,8 @@ def load_chng_data(filepath, dropdate, base_geo, Returns: cleaned dataframe """ - assert base_geo == "fips", "base unit must be 'fips'" + assert base_geo == "fips" or (counts_col == Config.FLU_INPATIENT_COL and + base_geo == "state_code"), "base unit must be 'fips', or state_code for Flu-Inpatient" count_flag = False date_flag = False geo_flag = False @@ -36,11 +37,11 @@ def load_chng_data(filepath, dropdate, base_geo, count_flag = True elif n == Config.DATE_COL: date_flag = True - elif n == "fips": + elif n == base_geo: geo_flag = True assert count_flag, "counts_col must be present in col_names" assert date_flag, "'%s' must be present in col_names"%(Config.DATE_COL) - assert geo_flag, "'fips' must be present in col_names" + assert geo_flag, "'base_geo (%s) must be present in col_names"%(base_geo) data = pd.read_csv( filepath, @@ -186,3 +187,36 @@ def load_flu_data(denom_filepath, flu_filepath, dropdate, base_geo): data = data[["num", "den"]] return data + + +def load_flu_inpatient_data(denom_filepath, flu_filepath, dropdate, base_geo): + """Load in denominator and flu inpatient data, and combine them. + + Args: + denom_filepath: path to the aggregated denominator data + flu_filepath: path to the aggregated flu inpatient data + dropdate: data drop date (datetime object) + base_geo: base geographic unit before aggregation ('state_code') + + Returns: + combined multiindexed dataframe, index 0 is geo_base, index 1 is date + """ + assert base_geo == "state_code", "base unit must be 'state_code'" + + # load each data stream + denom_data = load_chng_data(denom_filepath, dropdate, base_geo, + Config.DENOM_COLS_STATE, Config.DENOM_DTYPES_STATE, Config.DENOM_COL) + flu_data = load_chng_data(flu_filepath, dropdate, base_geo, + Config.FLU_INPATIENT_COLS, Config.FLU_INPATIENT_DTYPES, Config.FLU_INPATIENT_COL) + + # merge data + data = denom_data.merge(flu_data, how="outer", left_index=True, right_index=True) + assert data.isna().all(axis=1).sum() == 0, "entire row is NA after merge" + + # calculate combined numerator and denominator + data.fillna(0, inplace=True) + data["num"] = data[Config.FLU_INPATIENT_COL] + data["den"] = data[Config.DENOM_COL] + data = data[["num", "den"]] + + return data diff --git a/changehc/delphi_changehc/run.py b/changehc/delphi_changehc/run.py index 145f3267a..abdc97222 100644 --- a/changehc/delphi_changehc/run.py +++ b/changehc/delphi_changehc/run.py @@ -15,7 +15,7 @@ # first party from .download_ftp_files import download_counts -from .load_data import load_combined_data, load_cli_data, load_flu_data +from .load_data import load_combined_data, load_cli_data, load_flu_data, load_flu_inpatient_data from .update_sensor import CHCSensorUpdater @@ -34,6 +34,8 @@ def retrieve_files(params, filedate, logger): mixed_file = "%s/%s_Counts_Products_Mixed.dat.gz" % (params["indicator"]["input_cache_dir"],filedate) flu_like_file = "%s/%s_Counts_Products_Flu_Like.dat.gz" % (params["indicator"]["input_cache_dir"],filedate) covid_like_file = "%s/%s_Counts_Products_Covid_Like.dat.gz" % (params["indicator"]["input_cache_dir"],filedate) + flu_inpatient_file = "%s/%s_Counts_Products_Flu_Inpatient.dat.gz" % (params["indicator"]["input_cache_dir"],filedate) + denom_inpatient_state_file = "%s/%s_Counts_Products_Denom_Inpatient_By_State.dat.gz" % (params["indicator"]["input_cache_dir"],filedate) else: denom_file = files["denom"] covid_file = files["covid"] @@ -41,6 +43,8 @@ def retrieve_files(params, filedate, logger): mixed_file = files["mixed"] flu_like_file = files["flu_like"] covid_like_file = files["covid_like"] + flu_inpatient_file = files["flu_inpatient"] + denom_inpatient_state_file = files["denom_inpatient_state"] file_dict = {"denom": denom_file} if "covid" in params["indicator"]["types"]: @@ -52,6 +56,10 @@ def retrieve_files(params, filedate, logger): file_dict["covid_like"] = covid_like_file if "flu" in params["indicator"]["types"]: file_dict["flu"] = flu_file + if "flu_inpatient" in params["indicator"]["types"]: + file_dict["flu_inpatient"] = flu_inpatient_file + file_dict["denom_inpatient_state"] = denom_inpatient_state_file + return file_dict @@ -77,6 +85,9 @@ def make_asserts(params): if "flu" in params["indicator"]["types"]: assert (files["denom"] is None) == (files["flu"] is None), \ "exactly one of denom and flu files are provided" + if "flu_inpatient" in params["indicator"]["types"]: + assert (files["denom_inpatient_state"] is None) == (files["flu_inpatient"] is None), \ + "exactly one of denom_inpatient_state and flu_inpatient files are provided" def run_module(params: Dict[str, Dict[str, Any]]): @@ -187,6 +198,10 @@ def run_module(params: Dict[str, Dict[str, Any]]): elif numtype == "flu": data = load_flu_data(file_dict["denom"],file_dict["flu"], dropdate_dt,"fips") + elif numtype == "flu_inpatient": + data = load_flu_inpatient_data(file_dict["denom"],file_dict["flu_inpatient"], + dropdate_dt,"fips") + more_stats = su_inst.update_sensor( data, params["common"]["export_dir"], diff --git a/changehc/delphi_changehc/update_sensor.py b/changehc/delphi_changehc/update_sensor.py index 52a1af47f..117b57cbe 100644 --- a/changehc/delphi_changehc/update_sensor.py +++ b/changehc/delphi_changehc/update_sensor.py @@ -16,7 +16,8 @@ # first party from .config import Config from .constants import SMOOTHED, SMOOTHED_ADJ, SMOOTHED_CLI, SMOOTHED_ADJ_CLI,\ - SMOOTHED_FLU, SMOOTHED_ADJ_FLU, NA + SMOOTHED_FLU, SMOOTHED_ADJ_FLU, SMOOTHED_FLU_INPATIENT,\ + SMOOTHED_ADJ_FLU_INPATIENT, NA from .sensor import CHCSensor @@ -117,6 +118,8 @@ def __init__(self, signal_name = SMOOTHED_ADJ_CLI if self.weekday else SMOOTHED_CLI elif self.numtype == "flu": signal_name = SMOOTHED_ADJ_FLU if self.weekday else SMOOTHED_FLU + elif self.numtype == 'flu_inpatient': + signal_name = SMOOTHED_ADJ_FLU_INPATIENT if self.weekday else SMOOTHED_FLU_INPATIENT else: raise ValueError(f'Unsupported numtype received "{numtype}",' f' must be one of ["covid", "cli", "flu"]') @@ -154,6 +157,7 @@ def geo_reindex(self, data): "'state', 'msa', 'hrr', 'hss','nation'".format(geo)) return False if geo == "county": + assert data.index.names[0] == "fips", "can only convert fits to county, not %s"%(data.index.names[0]) data_frame = gmpr.fips_to_megacounty(data, Config.MIN_DEN, Config.MAX_BACKFILL_WINDOW, @@ -161,10 +165,12 @@ def geo_reindex(self, data): mega_col=geo, date_col=Config.DATE_COL) elif geo == "state": - data_frame = gmpr.replace_geocode(data, "fips", "state_id", new_col="state", + data_frame = gmpr.replace_geocode(data, data.index.names[0], "state_id", new_col="state", date_col=Config.DATE_COL) else: - data_frame = gmpr.replace_geocode(data, "fips", geo, date_col=Config.DATE_COL) + data_frame = gmpr.replace_geocode(data, data.index.names[0], geo, date_col=Config.DATE_COL) + + unique_geo_ids = pd.unique(data_frame[geo]) data_frame.set_index([geo, Config.DATE_COL],inplace=True) From 5214dcc1dd0e6bf34c3194b89bb9c52c2d054c45 Mon Sep 17 00:00:00 2001 From: Bryan Wilder Date: Mon, 17 Jan 2022 22:43:52 -0500 Subject: [PATCH 2/7] fix enough bugs for flu inpatient to run without errors --- changehc/delphi_changehc/config.py | 5 +++-- changehc/delphi_changehc/load_data.py | 6 +++--- changehc/delphi_changehc/run.py | 4 ++-- changehc/delphi_changehc/update_sensor.py | 4 ++-- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/changehc/delphi_changehc/config.py b/changehc/delphi_changehc/config.py index 374d278ad..81fed44dc 100644 --- a/changehc/delphi_changehc/config.py +++ b/changehc/delphi_changehc/config.py @@ -24,6 +24,7 @@ class Config: ## data columns COVID_COL = "COVID" DENOM_COL = "Denominator" + DENOM_INPATIENT_STATE_COL = "Denominator-Inpatient-State" FLU_COL = "Flu" MIXED_COL = "Mixed" FLU_LIKE_COL = "Flu-like" @@ -37,7 +38,7 @@ class Config: FILT_COLS = ID_COLS + COUNT_COLS DENOM_COLS = [DATE_COL, GEO_COL, DENOM_COL] - DENOM_COLS_STATE = [DATE_COL, GEO_COL_STATE, DENOM_COL] + DENOM_COLS_STATE = [DATE_COL, GEO_COL_STATE, DENOM_INPATIENT_STATE_COL] COVID_COLS = [DATE_COL, GEO_COL, COVID_COL] FLU_COLS = [DATE_COL, GEO_COL, FLU_COL] MIXED_COLS = [DATE_COL, GEO_COL, MIXED_COL] @@ -46,7 +47,7 @@ class Config: FLU_INPATIENT_COLS = [GEO_COL_STATE, DATE_COL, FLU_INPATIENT_COL] DENOM_DTYPES = {DATE_COL: str, DENOM_COL: str, GEO_COL: str} - DENOM_DTYPES_STATE = {DATE_COL: str, DENOM_COL: str, GEO_COL_STATE: str} + DENOM_DTYPES_STATE = {DATE_COL: str, DENOM_INPATIENT_STATE_COL: str, GEO_COL_STATE: str} COVID_DTYPES = {DATE_COL: str, COVID_COL: str, GEO_COL: str} FLU_DTYPES = {DATE_COL: str, FLU_COL: str, GEO_COL: str} MIXED_DTYPES = {DATE_COL: str, MIXED_COL: str, GEO_COL: str} diff --git a/changehc/delphi_changehc/load_data.py b/changehc/delphi_changehc/load_data.py index c1f5aaaba..edb0e8542 100644 --- a/changehc/delphi_changehc/load_data.py +++ b/changehc/delphi_changehc/load_data.py @@ -27,7 +27,7 @@ def load_chng_data(filepath, dropdate, base_geo, Returns: cleaned dataframe """ - assert base_geo == "fips" or (counts_col == Config.FLU_INPATIENT_COL and + assert base_geo == "fips" or ((counts_col == Config.FLU_INPATIENT_COL or counts_col == Config.DENOM_INPATIENT_STATE_COL) and base_geo == "state_code"), "base unit must be 'fips', or state_code for Flu-Inpatient" count_flag = False date_flag = False @@ -205,7 +205,7 @@ def load_flu_inpatient_data(denom_filepath, flu_filepath, dropdate, base_geo): # load each data stream denom_data = load_chng_data(denom_filepath, dropdate, base_geo, - Config.DENOM_COLS_STATE, Config.DENOM_DTYPES_STATE, Config.DENOM_COL) + Config.DENOM_COLS_STATE, Config.DENOM_DTYPES_STATE, Config.DENOM_INPATIENT_STATE_COL) flu_data = load_chng_data(flu_filepath, dropdate, base_geo, Config.FLU_INPATIENT_COLS, Config.FLU_INPATIENT_DTYPES, Config.FLU_INPATIENT_COL) @@ -216,7 +216,7 @@ def load_flu_inpatient_data(denom_filepath, flu_filepath, dropdate, base_geo): # calculate combined numerator and denominator data.fillna(0, inplace=True) data["num"] = data[Config.FLU_INPATIENT_COL] - data["den"] = data[Config.DENOM_COL] + data["den"] = data[Config.DENOM_INPATIENT_STATE_COL] data = data[["num", "den"]] return data diff --git a/changehc/delphi_changehc/run.py b/changehc/delphi_changehc/run.py index abdc97222..9fec64e41 100644 --- a/changehc/delphi_changehc/run.py +++ b/changehc/delphi_changehc/run.py @@ -199,8 +199,8 @@ def run_module(params: Dict[str, Dict[str, Any]]): data = load_flu_data(file_dict["denom"],file_dict["flu"], dropdate_dt,"fips") elif numtype == "flu_inpatient": - data = load_flu_inpatient_data(file_dict["denom"],file_dict["flu_inpatient"], - dropdate_dt,"fips") + data = load_flu_inpatient_data(file_dict["denom_inpatient_state"],file_dict["flu_inpatient"], + dropdate_dt,"state_code") more_stats = su_inst.update_sensor( data, diff --git a/changehc/delphi_changehc/update_sensor.py b/changehc/delphi_changehc/update_sensor.py index 117b57cbe..f931f6197 100644 --- a/changehc/delphi_changehc/update_sensor.py +++ b/changehc/delphi_changehc/update_sensor.py @@ -165,10 +165,10 @@ def geo_reindex(self, data): mega_col=geo, date_col=Config.DATE_COL) elif geo == "state": - data_frame = gmpr.replace_geocode(data, data.index.names[0], "state_id", new_col="state", + data_frame = gmpr.replace_geocode(data, data.columns[0], "state_id", new_col="state", date_col=Config.DATE_COL) else: - data_frame = gmpr.replace_geocode(data, data.index.names[0], geo, date_col=Config.DATE_COL) + data_frame = gmpr.replace_geocode(data, data.columns[0], geo, date_col=Config.DATE_COL) From c8861a39fb59ac075df68b2d4cd1a2ffe5c05dcd Mon Sep 17 00:00:00 2001 From: Bryan Wilder Date: Tue, 18 Jan 2022 21:20:01 -0500 Subject: [PATCH 3/7] debug --- .../delphi_changehc/test_flu_inpatient.py | 56 +++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 changehc/delphi_changehc/test_flu_inpatient.py diff --git a/changehc/delphi_changehc/test_flu_inpatient.py b/changehc/delphi_changehc/test_flu_inpatient.py new file mode 100644 index 000000000..20c036059 --- /dev/null +++ b/changehc/delphi_changehc/test_flu_inpatient.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Tue Jan 18 08:40:49 2022 + +@author: bwilder +""" + +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +import datetime + +start_date = datetime.date(2020, 1, 8) +end_date = datetime.date(2022, 1, 8) +dfs = [] +for i in range((end_date - start_date).days): + print(i) + date = start_date + datetime.timedelta(days=i) + try: + data_date = pd.read_csv("../receiving/%s%s%s_state_smoothed_inpatient_flu.csv"%(date.year, str(date.month).zfill(2), str(date.day).zfill(2))) + except: + print('missing: {}'.format(date)) + data_date['date'] = pd.to_datetime(date) + dfs.append(data_date) + +data = pd.concat(dfs) +data = data.reset_index() + +plt.plot(data.date[data.geo_id == 'vt'], data.val[data.geo_id == 'vt']/100) + + +raw_flu = pd.read_csv('../cache/20220116_Counts_Products_Flu_Inpatient.dat.gz', header=None) +raw_flu = raw_flu.rename(columns={0: 'state', 1: 'date', 2 : 'total'}) +raw_denom = pd.read_csv('../cache/20220116_Counts_Products_Denom_Inpatient_By_State.dat.gz', header=None) +raw_denom = raw_denom.rename(columns={0: 'date', 1: 'state', 2 : 'total'}) +raw_flu.total.replace('3 or less', 1, inplace=True) +raw_denom.total.replace('3 or less', 1, inplace=True) +raw_flu['total'] = raw_flu['total'] .astype(int) +raw_denom['total'] = raw_denom['total'] .astype(int) +raw_flu['date'] = raw_flu['date'].astype(str) +raw_denom['date'] = raw_denom['date'].astype(str) +raw_flu = raw_flu[raw_flu.date <= '20220108'] +raw_denom = raw_denom[raw_denom.date <= '20220108'] +raw_flu['date'] = pd.to_datetime(raw_flu['date'], errors='coerce') +raw_denom['date'] = pd.to_datetime(raw_denom['date'], errors='coerce') +raw_denom = raw_denom.rename(columns={'total' : 'denom'}) +raw_flu['state'] = raw_flu.state.astype(float) +raw_flu = raw_flu[~raw_flu.state.isna()] +raw_flu['state'] = raw_flu['state'].astype(int).astype(str).str.zfill(2) +merged = raw_denom.merge(raw_flu, on=['state', 'date'], how='left') +merged = merged.fillna(0) +merged = merged.sort_values('date') + +subset = merged[merged.state == '50'] +plt.plot(subset.date, subset.total/subset.denom) \ No newline at end of file From 65157c88a3fb36baaa69cc67dd909fe9cbf4a972 Mon Sep 17 00:00:00 2001 From: Bryan Wilder Date: Wed, 19 Jan 2022 18:42:47 -0500 Subject: [PATCH 4/7] update tests --- changehc/delphi_changehc/run.py | 16 +++-- .../delphi_changehc/test_flu_inpatient.py | 61 ++++++++++++++++-- changehc/delphi_changehc/update_sensor.py | 14 ++-- ...s_Products_Denom_Inpatient_By_State.dat.gz | Bin 0 -> 10640 bytes ...00601_Counts_Products_Flu_Inpatient.dat.gz | Bin 0 -> 682 bytes changehc/tests/test_load_data.py | 25 +++++++ changehc/tests/test_update_sensor.py | 6 +- 7 files changed, 105 insertions(+), 17 deletions(-) create mode 100644 changehc/tests/test_data/20200601_Counts_Products_Denom_Inpatient_By_State.dat.gz create mode 100644 changehc/tests/test_data/20200601_Counts_Products_Flu_Inpatient.dat.gz diff --git a/changehc/delphi_changehc/run.py b/changehc/delphi_changehc/run.py index 9fec64e41..fc26c5867 100644 --- a/changehc/delphi_changehc/run.py +++ b/changehc/delphi_changehc/run.py @@ -172,6 +172,9 @@ def run_module(params: Dict[str, Dict[str, Any]]): stats = [] for geo in params["indicator"]["geos"]: for numtype in params["indicator"]["types"]: + if numtype == "flu_inpatient" and geo not in ("state", "nation", "hhs"): + logger.info("Skipping because flu_inpatient is not available at this geo", geo = geo) + continue for weekday in params["indicator"]["weekday"]: if weekday: logger.info("starting weekday adj", geo = geo, numtype = numtype) @@ -190,21 +193,26 @@ def run_module(params: Dict[str, Dict[str, Any]]): logger ) if numtype == "covid": + base_geo = "fips" data = load_combined_data(file_dict["denom"], - file_dict["covid"],dropdate_dt,"fips") + file_dict["covid"],dropdate_dt, base_geo) elif numtype == "cli": + base_geo = "fips" data = load_cli_data(file_dict["denom"],file_dict["flu"],file_dict["mixed"], - file_dict["flu_like"],file_dict["covid_like"],dropdate_dt,"fips") + file_dict["flu_like"],file_dict["covid_like"],dropdate_dt,base_geo) elif numtype == "flu": + base_geo = "fips" data = load_flu_data(file_dict["denom"],file_dict["flu"], - dropdate_dt,"fips") + dropdate_dt,base_geo) elif numtype == "flu_inpatient": + base_geo = "state_code" data = load_flu_inpatient_data(file_dict["denom_inpatient_state"],file_dict["flu_inpatient"], - dropdate_dt,"state_code") + dropdate_dt,base_geo) more_stats = su_inst.update_sensor( data, params["common"]["export_dir"], + base_geo ) stats.extend(more_stats) diff --git a/changehc/delphi_changehc/test_flu_inpatient.py b/changehc/delphi_changehc/test_flu_inpatient.py index 20c036059..8c299b46e 100644 --- a/changehc/delphi_changehc/test_flu_inpatient.py +++ b/changehc/delphi_changehc/test_flu_inpatient.py @@ -19,15 +19,16 @@ date = start_date + datetime.timedelta(days=i) try: data_date = pd.read_csv("../receiving/%s%s%s_state_smoothed_inpatient_flu.csv"%(date.year, str(date.month).zfill(2), str(date.day).zfill(2))) + data_date['date'] = pd.to_datetime(date) + dfs.append(data_date) + except: print('missing: {}'.format(date)) - data_date['date'] = pd.to_datetime(date) - dfs.append(data_date) data = pd.concat(dfs) data = data.reset_index() -plt.plot(data.date[data.geo_id == 'vt'], data.val[data.geo_id == 'vt']/100) +plt.plot(data.date[data.geo_id == 'ny'], data.val[data.geo_id == 'ny']/100) raw_flu = pd.read_csv('../cache/20220116_Counts_Products_Flu_Inpatient.dat.gz', header=None) @@ -53,4 +54,56 @@ merged = merged.sort_values('date') subset = merged[merged.state == '50'] -plt.plot(subset.date, subset.total/subset.denom) \ No newline at end of file +plt.plot(subset.date, subset.total/subset.denom) + + + +geo = 'county' +signal = 'smoothed_adj_outpatient_covid' +start_date = datetime.date(2020, 1, 8) +end_date = datetime.date(2022, 1, 8) +dfs = [] +for i in range((end_date - start_date).days): + print(i) + date = start_date + datetime.timedelta(days=i) + try: + data_date = pd.read_csv("../receiving/%s%s%s_%s_%s.csv"%(date.year, str(date.month).zfill(2), str(date.day).zfill(2), geo, signal)) + data_date['date'] = pd.to_datetime(date) + dfs.append(data_date) + + except: + print('missing: {}'.format(date)) + +data = pd.concat(dfs) +data = data.reset_index() + +dfs = [] +for i in range((end_date - start_date).days): + print(i) + date = start_date + datetime.timedelta(days=i) + try: + data_date = pd.read_csv("../../../test_chc/covidcast-indicators/changehc/receiving/%s%s%s_%s_%s.csv"%(date.year, str(date.month).zfill(2), str(date.day).zfill(2), geo, signal)) + data_date['date'] = pd.to_datetime(date) + dfs.append(data_date) + + except: + print('missing: {}'.format(date)) + +data_old = pd.concat(dfs) +data_old = data_old.reset_index() + + +place = 72127 +plt.plot(data.date[data.geo_id == place], data.val[data.geo_id == place]/100) + +plt.plot(data_old.date[data_old.geo_id == place], data_old.val[data_old.geo_id == place]/100) + + + +raw_flu = pd.read_csv('../cache/20220116_Counts_Products_Flu_Inpatient.dat.gz', header=None) +raw_denom = pd.read_csv('../cache/20220116_Counts_Products_Denom_Inpatient_By_State.dat.gz', header=None) + +filtered_flu = raw_flu[(raw_flu[1] > 20200501) & (raw_flu[1] < 20200601)] +filtered_flu.to_csv('test_data_flu.dat.gz', header=False) +filtered_denom = raw_denom[(raw_denom[0] > 20200501) & (raw_denom[0] < 20200601)] +filtered_denom.to_csv('test_data_denom.dat.gz', header=False) \ No newline at end of file diff --git a/changehc/delphi_changehc/update_sensor.py b/changehc/delphi_changehc/update_sensor.py index f931f6197..9ee0fdd93 100644 --- a/changehc/delphi_changehc/update_sensor.py +++ b/changehc/delphi_changehc/update_sensor.py @@ -141,7 +141,7 @@ def shift_dates(self): self.sensor_dates = drange(self.startdate, self.enddate) return True - def geo_reindex(self, data): + def geo_reindex(self, data, base_geo): """Reindex based on geography, include all date, geo pairs. Args: @@ -157,7 +157,7 @@ def geo_reindex(self, data): "'state', 'msa', 'hrr', 'hss','nation'".format(geo)) return False if geo == "county": - assert data.index.names[0] == "fips", "can only convert fits to county, not %s"%(data.index.names[0]) + assert base_geo == "fips", "can only convert fips to county, not %s"%(base_geo) data_frame = gmpr.fips_to_megacounty(data, Config.MIN_DEN, Config.MAX_BACKFILL_WINDOW, @@ -165,10 +165,10 @@ def geo_reindex(self, data): mega_col=geo, date_col=Config.DATE_COL) elif geo == "state": - data_frame = gmpr.replace_geocode(data, data.columns[0], "state_id", new_col="state", + data_frame = gmpr.replace_geocode(data, base_geo, "state_id", new_col="state", date_col=Config.DATE_COL) else: - data_frame = gmpr.replace_geocode(data, data.columns[0], geo, date_col=Config.DATE_COL) + data_frame = gmpr.replace_geocode(data, base_geo, geo, date_col=Config.DATE_COL) @@ -187,12 +187,14 @@ def geo_reindex(self, data): def update_sensor(self, data, - output_path): + output_path, + base_geo): """Generate sensor values, and write to csv format. Args: data: pd.DataFrame with columns num and den output_path: output path for the csv results + base_geo: base geographic unit of data before aggregation """ self.shift_dates() final_sensor_idxs = (self.burn_in_dates >= self.startdate) &\ @@ -200,7 +202,7 @@ def update_sensor(self, # load data data.reset_index(inplace=True) - data_frame = self.geo_reindex(data) + data_frame = self.geo_reindex(data, base_geo) # handle if we need to adjust by weekday wd_params = Weekday.get_params( data_frame, diff --git a/changehc/tests/test_data/20200601_Counts_Products_Denom_Inpatient_By_State.dat.gz b/changehc/tests/test_data/20200601_Counts_Products_Denom_Inpatient_By_State.dat.gz new file mode 100644 index 0000000000000000000000000000000000000000..7370de22d26b6e8fbd1a92060a254310cb634fce GIT binary patch literal 10640 zcmX9@dmz*A_wV8(v{j1QCW;WE%r?p=X=vF@=58dRjZG#+p->-L#awdBB^h&>`#oBu zTvqPvqH_Jjr`Uv)O9;QW@9&@Ic|Yfz=RD7OKj(eU>pAZu(Na$2J zUdFhz#%Fe{EEQ>`KLoQ|6;-0HW>LSjKUY16mb&Vp3@w!?O8Mr4RlB5 zcQCFFbM5x>!`_Z@?ba$gyzz@A>}>RBuHB@ULwRr`>P8&3*4JMRY$N{8OT@3nRD`gepB+fmpLF zU8&~l0dG7J%q!=Lr%iT>7pwEjkj$PLVeI?@{|^1u!JXpCjolgZ&v(^tOm6;}3*T4{ zUmx3?x1t9s^%!qnyMdU6rM7+Q;S(U~KTV4hR8!iBgXFWW{c#1KgEV9yq%?VK5u^@k z^^A3cAJi41TBsLk-4&!Nec`z*B}r8@DP&wsm^I3P5}jaGy7`EGcM-eY@w-C$)^ye7 zGzA>x?IRu@sQjHHPD$0U{e98&BIS1__C&BqcUkftN>vUEwl|FmSqySGX4rLCFHrb+ z&rhlNB4uTBODh~oT|#7i5iC0YtWAbSe|VH~J_BG-3!ye&72iEZniiE@A23CzO7C~2 z0^Cgd2x00zRq!o@xbq})R%xmR$!!YAHuEau8KOkau`?jY^yrG`K zYqYXC!`lFm-#G{t{i`#=d%S1lvbbQoV!ffN`N-ovuAdifL3rqhA} zWEtzs1Vvng{vru(3`#*O_G^ka^|X@of3NGYS%$Uj;!bv~_XLAXqF~qdDuSYLfWs0EaUTxsao7xDNLPN%pN8kN{to>zT>3sW zb~Y(U-yy=Vxy80p$*|skWB@1eOPkwjI}*N_?#-l9oK z`?2nMF)*uguj>d?YZetM=04npwdcU$MuUoFG<;Xww_Jq-JCO604uEq`l||7AUjNWT zsQCTNi;gfs8nX-w1*R7kw=h$)&Y=}CYTI)P-R-3M1eRc#?alKpEzEt)Vwo1emJk~{ zI`dV1G!-IDi_Xq~B}zyg^0x#`63u5}BDZ~sE~gNr`udUqbs`f_!%T8|zU=zQqf&|! zPnAxB$)~rbUn=*ww=wEIX>491(~=q67GJ080#_)De9;-8O>f0jC55%sk0wa!D@M=% zN^vkeI1yr@{UAV_D}REntQf6a>2a|k#lPJpol}Tbc5vMB60KOtv#djB+lLJKdYTmP z@FV#~m=vF0Bo3waY3UUE;=S#yEw~RIu*P-pb|Q~6DjDfOsr2 z3&Z5<*AO}mQJpa>)Sng3}QvH&%fBYZ&bOz$1IdX3DX ztA~IkIWz(T>)eK@iAI0XerLxJO={105O`WB5}-v%a8gpObdcK4?$dhFK+zg5nGqdvY+ms1Iv_- ze-x%}7+SoBr9G4$re2-Xrxm!t-PYZu_~ofLQx>La`U|_f+yPp7b?dbP7kK$b1$qdVc;g34-ys=KP{+M| z_If5%oBe7@RXv|Ba>wsKs?*_4IBih{J+Nb8cn?Hw$3wS@0$@v!DGH&4bGsCy=Jf=f zw&ifzvlPPqqlPFj?lsD2nh&_BjgSItm@{ZlRQ;CYtfwxaabs~=Z_B{9dRh|Z5gZWU z(-EILnWH0vSGWst+jNetO(SPWLEed1oI9uX$Q>5BXy;O@MWZ5T?M&BSLjl?YBV5_n zpGD3kSza~#S|`V}AA9a_4q9~N67e5BuxUSb-2UHaF`=xD^WrIk&p2E)jjE5q0s@~T z7hfFfo`h>9iOwss&=duOfI{nbR5}EV>#&2v{s+@xM_FUjrq07Pt~_yZge4_mf##+EZWcNlAII1dDwL)}bAEeB7N)Pp`nFu7(48&e zNz;#95xE#1a)G1Jxqg*?scv!S4G*nx1*t6oCtrF{BgYDERm$6D)Fkz^y@8{k;ONxB zps6qe^~jY&Gj}3X*!d|S92&JMh{2+3+i!{_FQjh1^us?~iTFIrHA`*kD{#k42=f_S zGlFlYH$EgCvhHrq<`Em6z*uV(GzROw@$MEJ6vcLq2!I^0Mp3$T0o5d=dM54HFADyw zWqJbz(6$TDPbK92V`u4UV`rJ87lXA*q*3u^!VnU(051E@5C03PopePI+u@e1Be5cv9 zcV1t=24}R9lj*fNr+9m#rC%#{guD`b5F%add4fC<=&x_`9!9n|hC3i`&fYlt#nYGU z`n^^6Hlk`=hWiw=;ljC@UU}nmz;%rM2Yrz^QCW7wtM-|Pd1UqJ>vYw;@fU5630f)- z-c!vVZpseYFr@OdIEAwF<4umRr_Gzxc7Ur?YXidK)L62z8y|z{vTb1fBP?ZQyX0qq z(w{&aSw)>b#f#FF~A>C}@ z$XtP)>3CjKq#al^CHPlaOBk=U2cmz>X5mOC@H_}!R$?h<+bVB0FL!TaF1zlxTDyI5sWETf zya2)>pKo|8A{e}AGPLKoTV_ftW>_ogc*OiU`{dW}_4UxsaKe>sJzguX4meG)tXF~$ zdJP2ERD&$9l0kQ@U$75Vem@d~BVnc>ArtACFgeN@ym4mY`y zXv2x^e|uU?3+Z@d9eQAAtdL!kz0(JBFiQEpyPYAGSENM0;BnsC7Sl$*gwO@!6b#d| zB-^a{jXb8@v%5~Fe74*(Rvv|L$MkqHx-#3=ssKGw|J?AZ-5hZl4oe!p34D6~13D9W z8fFn6?+_*PTDQ%<0*Sr311eXzx!}w)qg( z@xx!AG7;BqtqTQ5vD8bS_!11MMOCom9*#($ucaSgYJk#Elbo8AY>O?c%+?-ae(JYu zD6axXRnHE+u^y=(X1q@g7%)*Q-0 z1o#qCT&e_l1dYDA9P6JxFGV;)9RmV;kg7EZwixw) zLU|0feux&8WL$~_(9#7=giS?^W8gz;gcSj$`1qP!zWamaLaJMMVMl z=_a2_kcv072jjqi!D`z*eixzrbC5y~TvVcvPsPi;6iF9xbGuX}IY2f-B9C_iNvb}e z9~4zhGZZqg?_OSA%|JJ%Vc&Zba3I2(WZr?it1AX9llx`;Y}oD@YsVOI%**a6OkT_E zrCl6r?1LLeSqjNHzBNMd?!OYcnYrheCozHY<_<05YhLZ%b{c&i8?UWH<=F~+V;Lj^ z->*Ll2QeIvRAh+v$I*&5(uzWiiKwZC}H>6pkJ6j#fL z5SA}s1E3s8OES5EjUX}S{ACUUcXA)17KD?@^$LO}*(5rlJDIa-`s;3(Jb~(J69{)Q z9zY21<}>M3XmYc!9t>#kLj59x(QqI!N$5bqVkK$>2P`Aay9M~%7kzA>KD#JLzJ(JL z6=V!BaQkx&*7Vs~621{O^)oegMx~jOW zgrdD%ob7p?+;F5o(bItS=3&m7JHat{iV8^@{{!4ZQdJeSkV=U9Ra_uszsV?f28&nEaUP%}N2?k3D6KQb_S@)rBZm(3F>t(= z``?q))wwOH_x^G&p?_?uyU zErZ~qlYa^0`k8W8o>p$Jtu=+_&tUSr>T5_Xtd*lJJue8hvkzRnPR>(ES#skDD$15B zcD#1iJY}PY09g#SyE&_xaFPB*HeVe0u)3U_o{C-t`asM8Mx1Yxv1ijv-Sa zA^Q!ExbUC8Xw8W0RkSB}8Os>eSJ#X)yRC5G++LT@9I#eW zowQJ2OFHf(N`nLeK+VU=Y{wzC_-_4`I_uCn*FXjN4i*aVv@@}fnv5jXvL1c3z6%m7 zeDI4yYaV(1fl62&_5uP)PpFoDgh^0<$tNYC%O5n*2;Enu3*a8a8r*92mIf zmgpvVmP&$EK@=K$=f^8&I2&{HvZZ{4N<#T|ZA@hEflmzS5lUPbjljz!>|%)T&hn$s z*?-qh>VU)}fwj81#eeHCrYt$ z#34|69!zc%wLYA>kp1SM8A?OtcX}Tg-`W;Lc}d2v60#mpxpIe30tnno%Wp0N4o`+n zlFg(3JvLbi-G4-EfbzpjaDO+8oxo928;!;A&||+#6%NS$(PY6j%cWLn_^5yR1Os)I z->pYn4AlQ)w|fH=pL8H(nuexw;Ii+ses^t6xYv#~P(LS_e9;oGr70DHc?6e*CZ>Wt zRrKfzV{~BP(hxMLtMMjUn9Mbk2rp7LrOvgSWlVLY>VNfx?34Yo#9qd_=5EcJ#?gYA>Ox5sS|dGwoPc%J3N7t>Jb735Kys0E*imAilV`R?m6|JG4%6xi0? zqTg=Xo2D}&o#)=`qfG{VCDkmb(+)1VYKbm&W!Sdz}W3wF~M=kt4H1*AxT}5cLA2Jx>=9)Lk`76jh z4JnQG#cL!u^y#aQYC8*tQXdO^ICXH@LNC9MFX5r`g~s&Uy^Fgs$5irwdyn}l;Yiwa z92^_3#GtF9dC5Ri1Oa28u0H6Gmn(GqMK{Y06X8s4aI#zvbE*A-lH^ldRrGj%8*rXs zd@$n)!DvW$h4Caw<#e>NW}dmJGvkRtr;P=7A&ec);N~XfT-1Te{nMK4j>lz2?K10gjAKl8XFunS#0=u7ZPu>*yRt2-=ZiL8&mu< zWY@o`ededt7k->VTX~J=Uzspi2vDbqQGRJJdRi?T2TXjpZLz*ZKrE~A-0kW^7?kPo zw)?3ku#MnV=d|${I7L2xEC!byvN{+OWu$QP(`)Ms!3HLc3MqSv)Z|9$11`6g1hnuj z{%J_KYfuu@YX-E0KUSfrlls2T7nW&GGRg2AE$kY~#9zaY+$%3FUIn*!@blYYn7oOc zn11HyUVdypb3xvC?#($6?p?@;fY~FXxYwl+7-hU1vS)elxt0ZJ3ZK5ZOd%MuQ-I)F z@^KC-*Mevc_x76ZK-m+>U^qERT=384g!>*+Bgq8 z9y9+nEG4#N@otMr#OdFVHg@09lph+BYG7V37$8N$^tYPb&MTwBiABEtaP~l0o5)lWqyrn*8yop z90pP<+QL9r3n|qUV%IYM-{YlX^hn|{Vhj!@Sh(A=O6_A~@)D2Hi(vLmqHz|KzZi!> zsDuV%hPv<1I##}4YI`zC-Mj1f#H3^EFC%4`ak&a@bb4jC^)0MMmc;id}33vNh09ygOZn z$vXgLa?sejeXsgsWM-rT-!cQpI)GJ1&d0WrsjCAkVY6lJL?>Bme)ZejHDOm!*=B@e zJPBd?nLo%Q3Gu$W+TXf6JaWh06(m__8`U{t0&6ce8E8}XHf7Q6JB`LWi_+$Y%X@#X zAT|CH%O+6>9rDeEomFY`muIqV`OLl-F^;K$h>2Gly$&vzMI~pikuJiHz~Va1h=irS zO%h?un>f&IC;n@&#q}AmfGOa$H5Yq3^t32@ja)Hmn->jVF=S?fKWTZ1x`Jr(vaVF8 zxn(cn8N>?%n_lNJOrEVzN@v4JfPU36NYE0jF5oSc!2oXGHWtgUc=%g17Hl}eBTs9Tn%1qBlvn*T(|oHeL+ibhi=4nksZkGM#C0Ja}FT~9PY(4tUr zmMMLV43C&wPf^Il^rnIjL{orViz$K#bV4QU;aaco;y36K-o;@6l)3%7^--7o+IQM1 zM&`51{WN>zM84Bl1}uqM4qycK0Kq~skDxHJH^PQ(iSgvjg)z9QaZm2tb?66{Fak;f zg(;G?utdA)I^w_<$Mhx?YijPol7bP@3~UuEMiqr0JP~ z`br`e(Q%5I1gzu)-o}#E}kX-j~(M0atL5^j6vdq;O$o-NyEoR-m>KeZEPjg(mn%_RTTl+(ADtm zcDmQXT+jk)yf_MD_!cb5(Big_1D}y$-tD6uP2pPR&IaWWEfF*o)aRWC}!$xMTLh>BaLqVp9jGAq+ngmByQUYjpr7wDK7Tnw zh&)E?kB{n4B|t~@wsu~uN5mmEx>-F@>9S4Nx!P>#d^fo(B;CK$`PYLh1AI`PwZS4a z&C>pLz*l{rCdvomB?OJ=6Z$TF5jl}ktE5MVCQm)6%(r%9bbRV^n!N-0e}S)LuQT2C z6v5uRJ=z6O9BY7ZD8go&u`Vnz$D%77&`Ywu1I9sYUj-U_Iq;J9&Rc6;OX1l`d_Q>f z11`OY9p04s!6rRhm8zyfR_m_E3Y@2ajndqwhZ^9rr!ul%z~tujQm83BkecK?|t(RS7qY=Xj@s0~VEAbO>f@1ID_&6x*2EWQVr?y_-Va zb+-X(I0$-Ts{AQKRx>Q_C`qxKAX&ShG40JQ^(jr}gGAgpXaN~Ncj!0q0wU?WQO)xV zW$TaWZw!v9gt-M~Z`Ta#jW#Iurt=EEx{>9(39B_>qhR*4jLEDENY?17AAxz>=Qo!p zxu*5ki~V=1T+Vla^%__4!O=$>XMODE9G8e@h$K_X?WJjed}H{SCt13?UQ>|m1$TY2 zw43Jz?)Ek?9~0UbOlRmPp^&VRqm5z4e0F(n+G>CrO=gjG$rtfc-$Mc5P2>|!ge9Yp zUlI3^EIxCzX%1ffkX+;PJHoV3I|n-BVi`_`Zke|~zWRQ?*;nnG01h5sKYH|pH(igt z_&MC6mvK8{0o1OS3(OfiWonc3A_XYk`I=kmM3@BrfL4W&HNu_UX1x}>yvMe*hmadc zr9w^5qPgR3L?c9!aVbZccoyip-&v;gFqR}mB$R659@{#ww2+eU-q-vZ_zTx-zKgI_ zGti>1vK}0_cFUBPwS4tQopRyqqcA`7qI_dv0zBLBeM%mJHDWy14~OyAj6x6gfJm2m zOLv1GA8mZ&-_w!SPaBD&_s5@>38{eYL$BuT2AiW-qx$dvei7E`eU`n)=gc&j&daxc zq@59b{o*-%VAXB%A6szo5m< zL0|1VrF+KHvKz!j$q7P)X|OON)YoB)K3$;^?Ik`IIsF%DrfaGxH@hSg+EIM2W9yF( z57J*&@&Xugr8@`H`pcEzaCdx1gL`>q8-f+SIoj7H-clLWk{*Zhb1P^uOZcE zj?LvsE(S(a-6Rh;P}?p8{esIA^GrPrdlhG(_#|^rH&n=cJMi5gBFg3NJ@VJCdQ#c5 zGNgtz;$Am%pQ_Fh8DBdZMI;&^Ru>~am65-6)l-CrQP+U9=e+p2w8)nGP=#_S1D<#CE`%~Q=y7RUKkr&P6brZ92QymAeoC&O=mmF^n{||XCz@4 z!KSu|u+?7*oG52?X=gw?3LOUAQ@R-&12LII6e7vCQMlr6wi<1!2a`Sg9a-(2AJuk` zLl1-O^Wv9exS!pe>d`H!)0^mOPU~-S<9U7Vs<$uR`N#k`COtOFMuZ&(Q8x~DbXBFn zY4Y$eLi}tvm!}Z5LoK2X@c36VjXIgUVS>KB+~qV>kt;mhOzQ8d&nteUep?}R&x86I{j#GL^hYEBVn5)_QZE}y z*z96hXqs%h-ZEYReWo5w=9b@H>`G6AkA^6HHFh}I&8WM*_|}_{D>Z-N-;SI#e;-HN z&YN4WPJJ;rv{XACI$F%4 z4x=&wdh6187RyCM+#`=GD}{JlL%BNYV$OPi#b_E`Od5>#PbN+MveM)h2;N4+kF%c z&clr=huFE7YMOv+hAOq4S!M_?8Db}Kg8shhWOKaM&(83BLW3_W;Z|$0#x$ARodx{i zJmCfOqk6EE4XMG1{)+g5W+~t+3i?$LSm@t=W)K3l2n24yMF!#Cq_10KQyr-NOi@%l z5SmI1!leWrgoNW}$D_UIKMR6ms{_alEByM)TZU}}D~L z!}VHOGh6OhVm~|ncr6Vd{#7&Do9oh)K3)8v@n@16a>8m|oJ>Mas5@A!RmYQuTdCjF zM;Hc`A;p#M0~x9{Rk7snjxVmclJgjlTZuHTWy!!!pgUC=LIOa=6JdHJ`Lb3Uaseo# z8h!~SX?6!4C=PUF@eJ4u2ZY9jIh1=hJ87700mfZ;40%H)_-V!`3z4Qzgc&*}0Pr;^ zcg_@J@-h{y+zr?+kDlgp&~0lo%N-`^{s)SO25RY%r@#E?rPlpv?8kZsF1<3mJ?6v- z3U`A(=Xj9E$Z#^+4xwN8edU)i12^8v2C+CmyN8uhDmxEl3llFu){FDk{<}{q0&z+ zzVHe(>CMc)mCrGO)6CKm7S!)bEsX=hDb#5eTWeZ8R?v>{n8?QxB?C9beZZXgZ}#OS zIncF?`ueIU^7p$}8>=3YLuc9F8g~5x;H?qg@E4eZ2=rIslTT244XIgW_}}TZcZunEAP5<+FaCya$1{<)El- zu!SL}+)HC_36tn&M2#{mD`6H+I46g=63;!%tp3hi)1+)>GK3)SZ zaUokJAc?2T*ByEVN@CVxS{q_o$GmixYNwY&o?YURY5=H&&IT{GdgDtD-2O;BTRr-Z zz{S`={mPvNPIVC39)k*Rq><*kLJ&p8oNM^!j-A~f#?uzbc5Xlgd2Uujfh3MEU50T3 zPE3|-XA=D`P&j^?z*m9U_u zsvBhclc-sff$oytv3-Lbz@D;@)5&Vc>Fzx4$8MSQ#SDwasxY#S0ekOUZwC8oF_w6) zdXynrothjeS?{UlGN6`N2M=Fphd9k%nm^d)wTKT5zBGdY zN@#P(x6N=Ls{7Z(Mjsi`Td-ClWT2}D! z@kvik-gzJ}m*A4G{NsknWxk;YLxH_jquO)Q?9+JJSqidw=W7n9$jYLJ0&Xch-ALUv zGA#K}<%=V!y~}x2Wd%q^13$}Ed|Os8K7IKv^VP>4qjdEnqRs8wwry_i+$KI$rGDY@ Hwr&3hz|Db9 literal 0 HcmV?d00001 diff --git a/changehc/tests/test_data/20200601_Counts_Products_Flu_Inpatient.dat.gz b/changehc/tests/test_data/20200601_Counts_Products_Flu_Inpatient.dat.gz new file mode 100644 index 0000000000000000000000000000000000000000..020d25e14b9e4478753da521feedb92fdbe3bb46 GIT binary patch literal 682 zcmV;b0#*GViwFp#pXgx%|8!+@bYEm)bYWj+Y;`VVVRQg=*G-BYF$_l0c~+4Hh+xbA z3(71INbet_9zER;H0(Z9*;Yxi-W=mv@4uRP^VU{#8$bX4`}ybh*Vm6Z#%0rPUqR13?>WZB=$~(47 zy9l1(L-2sJwVct4jE|t>AcDh=mpJoD25qI@ueNbY!DC*(6-`<(nskYg!FkO|?B=6H z&$otR_d==bBOV0jTcQo5VyPF$iq*+G+bSZxVjVvVj#%ABgnoZh{Sm`CXV1m@B|qjVocAc zd@`oz&wjkoJhyvA#78mW<0Y*Iy+BH%{c^um1Z+GpH1E>TcoZGm5ybBr-&jZX2qF*J1OQ1eXs{yTwIDsAH|C$1z*lB;C}!B Q0RR630N= Date: Wed, 19 Jan 2022 19:11:15 -0500 Subject: [PATCH 5/7] remove messy testing code --- .../delphi_changehc/test_flu_inpatient.py | 109 ------------------ 1 file changed, 109 deletions(-) delete mode 100644 changehc/delphi_changehc/test_flu_inpatient.py diff --git a/changehc/delphi_changehc/test_flu_inpatient.py b/changehc/delphi_changehc/test_flu_inpatient.py deleted file mode 100644 index 8c299b46e..000000000 --- a/changehc/delphi_changehc/test_flu_inpatient.py +++ /dev/null @@ -1,109 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Tue Jan 18 08:40:49 2022 - -@author: bwilder -""" - -import numpy as np -import pandas as pd -import matplotlib.pyplot as plt -import datetime - -start_date = datetime.date(2020, 1, 8) -end_date = datetime.date(2022, 1, 8) -dfs = [] -for i in range((end_date - start_date).days): - print(i) - date = start_date + datetime.timedelta(days=i) - try: - data_date = pd.read_csv("../receiving/%s%s%s_state_smoothed_inpatient_flu.csv"%(date.year, str(date.month).zfill(2), str(date.day).zfill(2))) - data_date['date'] = pd.to_datetime(date) - dfs.append(data_date) - - except: - print('missing: {}'.format(date)) - -data = pd.concat(dfs) -data = data.reset_index() - -plt.plot(data.date[data.geo_id == 'ny'], data.val[data.geo_id == 'ny']/100) - - -raw_flu = pd.read_csv('../cache/20220116_Counts_Products_Flu_Inpatient.dat.gz', header=None) -raw_flu = raw_flu.rename(columns={0: 'state', 1: 'date', 2 : 'total'}) -raw_denom = pd.read_csv('../cache/20220116_Counts_Products_Denom_Inpatient_By_State.dat.gz', header=None) -raw_denom = raw_denom.rename(columns={0: 'date', 1: 'state', 2 : 'total'}) -raw_flu.total.replace('3 or less', 1, inplace=True) -raw_denom.total.replace('3 or less', 1, inplace=True) -raw_flu['total'] = raw_flu['total'] .astype(int) -raw_denom['total'] = raw_denom['total'] .astype(int) -raw_flu['date'] = raw_flu['date'].astype(str) -raw_denom['date'] = raw_denom['date'].astype(str) -raw_flu = raw_flu[raw_flu.date <= '20220108'] -raw_denom = raw_denom[raw_denom.date <= '20220108'] -raw_flu['date'] = pd.to_datetime(raw_flu['date'], errors='coerce') -raw_denom['date'] = pd.to_datetime(raw_denom['date'], errors='coerce') -raw_denom = raw_denom.rename(columns={'total' : 'denom'}) -raw_flu['state'] = raw_flu.state.astype(float) -raw_flu = raw_flu[~raw_flu.state.isna()] -raw_flu['state'] = raw_flu['state'].astype(int).astype(str).str.zfill(2) -merged = raw_denom.merge(raw_flu, on=['state', 'date'], how='left') -merged = merged.fillna(0) -merged = merged.sort_values('date') - -subset = merged[merged.state == '50'] -plt.plot(subset.date, subset.total/subset.denom) - - - -geo = 'county' -signal = 'smoothed_adj_outpatient_covid' -start_date = datetime.date(2020, 1, 8) -end_date = datetime.date(2022, 1, 8) -dfs = [] -for i in range((end_date - start_date).days): - print(i) - date = start_date + datetime.timedelta(days=i) - try: - data_date = pd.read_csv("../receiving/%s%s%s_%s_%s.csv"%(date.year, str(date.month).zfill(2), str(date.day).zfill(2), geo, signal)) - data_date['date'] = pd.to_datetime(date) - dfs.append(data_date) - - except: - print('missing: {}'.format(date)) - -data = pd.concat(dfs) -data = data.reset_index() - -dfs = [] -for i in range((end_date - start_date).days): - print(i) - date = start_date + datetime.timedelta(days=i) - try: - data_date = pd.read_csv("../../../test_chc/covidcast-indicators/changehc/receiving/%s%s%s_%s_%s.csv"%(date.year, str(date.month).zfill(2), str(date.day).zfill(2), geo, signal)) - data_date['date'] = pd.to_datetime(date) - dfs.append(data_date) - - except: - print('missing: {}'.format(date)) - -data_old = pd.concat(dfs) -data_old = data_old.reset_index() - - -place = 72127 -plt.plot(data.date[data.geo_id == place], data.val[data.geo_id == place]/100) - -plt.plot(data_old.date[data_old.geo_id == place], data_old.val[data_old.geo_id == place]/100) - - - -raw_flu = pd.read_csv('../cache/20220116_Counts_Products_Flu_Inpatient.dat.gz', header=None) -raw_denom = pd.read_csv('../cache/20220116_Counts_Products_Denom_Inpatient_By_State.dat.gz', header=None) - -filtered_flu = raw_flu[(raw_flu[1] > 20200501) & (raw_flu[1] < 20200601)] -filtered_flu.to_csv('test_data_flu.dat.gz', header=False) -filtered_denom = raw_denom[(raw_denom[0] > 20200501) & (raw_denom[0] < 20200601)] -filtered_denom.to_csv('test_data_denom.dat.gz', header=False) \ No newline at end of file From c4e2faf77904d44f39e0bfd0af29c5a7332d6478 Mon Sep 17 00:00:00 2001 From: minhkhul <118945681+minhkhul@users.noreply.github.com> Date: Tue, 23 May 2023 21:25:57 -0400 Subject: [PATCH 6/7] hard-coded fips to base_geo in run.py --- changehc/delphi_changehc/run.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/changehc/delphi_changehc/run.py b/changehc/delphi_changehc/run.py index 30c3da5e9..99b08f7a9 100644 --- a/changehc/delphi_changehc/run.py +++ b/changehc/delphi_changehc/run.py @@ -198,19 +198,19 @@ def run_module(params: Dict[str, Dict[str, Any]]): if numtype == "covid": base_geo = "fips" data = load_combined_data(file_dict["denom"], - file_dict["covid"], "fips", + file_dict["covid"], base_geo, backfill_dir, geo, weekday, numtype, generate_backfill_files, backfill_merge_day) elif numtype == "cli": base_geo = "fips" data = load_cli_data(file_dict["denom"],file_dict["flu"],file_dict["mixed"], - file_dict["flu_like"],file_dict["covid_like"], "fips", + file_dict["flu_like"],file_dict["covid_like"], base_geo, backfill_dir, geo, weekday, numtype, generate_backfill_files, backfill_merge_day) elif numtype == "flu": base_geo = "fips" data = load_flu_data(file_dict["denom"],file_dict["flu"], - "fips",backfill_dir, geo, weekday, + base_geo,backfill_dir, geo, weekday, numtype, generate_backfill_files, backfill_merge_day) elif numtype == "flu_inpatient": base_geo = "state_code" From 277c71a161be3504ef817c8d590f02861cb94488 Mon Sep 17 00:00:00 2001 From: minhkhul Date: Wed, 24 May 2023 16:50:24 -0400 Subject: [PATCH 7/7] linter change --- changehc/Makefile | 4 ++-- changehc/delphi_changehc/load_data.py | 2 +- changehc/delphi_changehc/run.py | 1 - changehc/delphi_changehc/update_sensor.py | 3 --- 4 files changed, 3 insertions(+), 7 deletions(-) diff --git a/changehc/Makefile b/changehc/Makefile index bc88f1fec..31d406fc4 100644 --- a/changehc/Makefile +++ b/changehc/Makefile @@ -17,8 +17,8 @@ install-ci: venv pip install . lint: - . env/bin/activate; pylint $(dir) - . env/bin/activate; pydocstyle $(dir) + . env/bin/activate; pylint $(dir);\ + pydocstyle $(dir) test: . env/bin/activate ;\ diff --git a/changehc/delphi_changehc/load_data.py b/changehc/delphi_changehc/load_data.py index 1b421dd81..f55f1f2f1 100644 --- a/changehc/delphi_changehc/load_data.py +++ b/changehc/delphi_changehc/load_data.py @@ -30,7 +30,7 @@ def load_chng_data(filepath, dropdate, base_geo, Returns: cleaned dataframe """ - assert base_geo == "fips" or ((counts_col == Config.FLU_INPATIENT_COL or counts_col == Config.DENOM_INPATIENT_STATE_COL) and + assert base_geo == "fips" or (counts_col in {Config.FLU_INPATIENT_COL,Config.DENOM_INPATIENT_STATE_COL} and base_geo == "state_code"), "base unit must be 'fips', or state_code for Flu-Inpatient" count_flag = False date_flag = False diff --git a/changehc/delphi_changehc/run.py b/changehc/delphi_changehc/run.py index 30c3da5e9..3b2905d86 100644 --- a/changehc/delphi_changehc/run.py +++ b/changehc/delphi_changehc/run.py @@ -59,7 +59,6 @@ def retrieve_files(params, filedate, logger): if "flu_inpatient" in params["indicator"]["types"]: file_dict["flu_inpatient"] = flu_inpatient_file file_dict["denom_inpatient_state"] = denom_inpatient_state_file - return file_dict diff --git a/changehc/delphi_changehc/update_sensor.py b/changehc/delphi_changehc/update_sensor.py index 9ee0fdd93..8bdd77337 100644 --- a/changehc/delphi_changehc/update_sensor.py +++ b/changehc/delphi_changehc/update_sensor.py @@ -169,9 +169,6 @@ def geo_reindex(self, data, base_geo): date_col=Config.DATE_COL) else: data_frame = gmpr.replace_geocode(data, base_geo, geo, date_col=Config.DATE_COL) - - - unique_geo_ids = pd.unique(data_frame[geo]) data_frame.set_index([geo, Config.DATE_COL],inplace=True) # for each location, fill in all missing dates with 0 values