Merge pull request #1956 from cmu-delphi/release/indicators_v0.3.54_utils_v0.3.23

melange396 · web-flow · commit 8d44629675f3 · 2024-04-24T12:08:57.000-04:00
Release covidcast-indicators 0.3.54
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.3.53
+current_version = 0.3.54
 commit = True
 message = chore: bump covidcast-indicators to {new_version}
 tag = False
diff --git a/ansible/templates/google_symptoms-params-prod.json.j2 b/ansible/templates/google_symptoms-params-prod.json.j2
@@ -28,6 +28,7 @@
       "span_length": 14,
       "min_expected_lag": {"all": "3"},
       "max_expected_lag": {"all": "4"},
+      "dry_run": true,
       "suppressed_errors": [
         {"signal": "ageusia_raw_search"},
         {"signal": "ageusia_smoothed_search"},
diff --git a/ansible/templates/nchs_mortality-params-prod.json.j2 b/ansible/templates/nchs_mortality-params-prod.json.j2
@@ -7,7 +7,7 @@
   "indicator": {
     "export_start_date": "2020-02-01",
     "static_file_dir": "./static",
-    "token": "{{ nchs_mortality_token }}"
+    "socrata_token": "{{ nchs_mortality_token }}"
   },
   "archive": {
     "aws_credentials": {
diff --git a/ansible/templates/nwss_wastewater-params-prod.json.j2 b/ansible/templates/nwss_wastewater-params-prod.json.j2
@@ -1,13 +1,13 @@
 {
   "common": {
-    "export_dir": "./receiving",
-    "log_filename": "./nwss_wastewater.log",
+    "export_dir": "/common/covidcast/receiving/nwss_wastewater",
+    "log_filename": "/var/log/indicators/nwss_wastewater.log",
     "log_exceptions": false
   },
   "indicator": {
     "wip_signal": true,
     "export_start_date": "2020-02-01",
     "static_file_dir": "./static",
-    "token": ""
+    "socrata_token": "{{ nwss_wastewater_token }}"
   }
 }
diff --git a/ansible/templates/sir_complainsalot-params-prod.json.j2 b/ansible/templates/sir_complainsalot-params-prod.json.j2
@@ -14,7 +14,8 @@
     },
     "chng": {
       "max_age": 6,
-      "maintainers": ["U01AP8GSWG3","U01069KCRS7"]
+      "maintainers": ["U01AP8GSWG3","U01069KCRS7"],
+      "retired-signals": ["7dav_outpatient_covid","7dav_inpatient_covid"]
     },
     "google-symptoms": {
       "max_age": 6,
diff --git a/changehc/version.cfg b/changehc/version.cfg
@@ -1 +1 @@
-current_version = 0.3.53
+current_version = 0.3.54
diff --git a/claims_hosp/version.cfg b/claims_hosp/version.cfg
@@ -1 +1 @@
-current_version = 0.3.53
+current_version = 0.3.54
diff --git a/doctor_visits/version.cfg b/doctor_visits/version.cfg
@@ -1 +1 @@
-current_version = 0.3.53
+current_version = 0.3.54
diff --git a/google_symptoms/params.json.template b/google_symptoms/params.json.template
@@ -16,6 +16,7 @@
       "span_length": 14,
       "min_expected_lag": {"all": "3"},
       "max_expected_lag": {"all": "4"},
+      "dry_run": true,
       "suppressed_errors": [
         {"signal": "ageusia_raw_search"},
         {"signal": "ageusia_smoothed_search"},
diff --git a/google_symptoms/version.cfg b/google_symptoms/version.cfg
@@ -1 +1 @@
-current_version = 0.3.53
+current_version = 0.3.54
diff --git a/hhs_hosp/version.cfg b/hhs_hosp/version.cfg
@@ -1 +1 @@
-current_version = 0.3.53
+current_version = 0.3.54
diff --git a/nchs_mortality/delphi_nchs_mortality/pull.py b/nchs_mortality/delphi_nchs_mortality/pull.py
@@ -22,7 +22,7 @@ def standardize_columns(df):
     return df.rename(columns=dict(rename_pairs))
 
 
-def pull_nchs_mortality_data(token: str, test_file: Optional[str]=None):
+def pull_nchs_mortality_data(socrata_token: str, test_file: Optional[str] = None):
     """Pull the latest NCHS Mortality data, and conforms it into a dataset.
 
     The output dataset has:
@@ -38,7 +38,7 @@ def pull_nchs_mortality_data(token: str, test_file: Optional[str]=None):
 
     Parameters
     ----------
-    token: str
+    socrata_token: str
         My App Token for pulling the NCHS mortality data
     test_file: Optional[str]
         When not null, name of file from which to read test data
@@ -57,7 +57,7 @@ def pull_nchs_mortality_data(token: str, test_file: Optional[str]=None):
         df = pd.read_csv("./test_data/%s"%test_file)
     else:
         # Pull data from Socrata API
-        client = Socrata("data.cdc.gov", token)
+        client = Socrata("data.cdc.gov", socrata_token)
         results = client.get("r8kw-7aab", limit=10**10)
         df = pd.DataFrame.from_records(results)
         # drop "By Total" rows
diff --git a/nchs_mortality/delphi_nchs_mortality/run.py b/nchs_mortality/delphi_nchs_mortality/run.py
@@ -42,7 +42,7 @@ def run_module(params: Dict[str, Any]):
         - "export_start_date": str, date from which to export data in YYYY-MM-DD format
         - "static_file_dir": str, directory containing population csv files
         - "test_file" (optional): str, name of file from which to read test data
-        - "token": str, authentication for upstream data pull
+        - "socrata_token": str, authentication for upstream data pull
     - "archive" (optional): if provided, output will be archived with S3
         - "aws_credentials": Dict[str, str], AWS login credentials (see S3 documentation)
         - "bucket_name: str, name of S3 bucket to read/write
@@ -59,7 +59,7 @@ def run_module(params: Dict[str, Any]):
                 days=date.today().weekday() + 2)
         export_start_date = export_start_date.strftime('%Y-%m-%d')
     daily_export_dir = params["common"]["daily_export_dir"]
-    token = params["indicator"]["token"]
+    socrata_token = params["indicator"]["socrata_token"]
     test_file = params["indicator"].get("test_file", None)
 
     if "archive" in params:
@@ -70,7 +70,7 @@ def run_module(params: Dict[str, Any]):
         daily_arch_diff.update_cache()
 
     stats = []
-    df_pull = pull_nchs_mortality_data(token, test_file)
+    df_pull = pull_nchs_mortality_data(socrata_token, test_file)
     for metric in METRICS:
         for geo in ["state", "nation"]:
             if metric == 'percent_of_expected_deaths':
diff --git a/nchs_mortality/params.json.template b/nchs_mortality/params.json.template
@@ -8,7 +8,7 @@
   "indicator": {
     "export_start_date": "2020-02-01",
     "static_file_dir": "./static",
-    "token": ""
+    "socrata_token": ""
   },
   "archive": {
     "aws_credentials": {
diff --git a/nchs_mortality/tests/conftest.py b/nchs_mortality/tests/conftest.py
@@ -21,7 +21,7 @@
     "export_start_date": "2020-04-11",
     "static_file_dir": "../static",
     "test_file": "test_data.csv",
-    "token": ""
+    "socrata_token": ""
   },
   "archive": {
     "aws_credentials": {
diff --git a/nchs_mortality/tests/test_pull.py b/nchs_mortality/tests/test_pull.py
@@ -8,70 +8,91 @@
 
 # export_start_date = PARAMS["indicator"]["export_start_date"]
 EXPORT_DIR = "./receiving"
-TOKEN = ""
+SOCRATA_TOKEN = ""
 
 
 class TestPullNCHS:
     def test_standardize_columns(self):
         df = standardize_columns(
-            pd.DataFrame({
-                "start_week": [1],
-                "covid_deaths": [2],
-                "pneumonia_and_covid_deaths": [4],
-                "pneumonia_influenza_or_covid_19_deaths": [8]
-            }))
-        expected = pd.DataFrame({
-            "timestamp": [1],
-            "covid_19_deaths": [2],
-            "pneumonia_and_covid_19_deaths": [4],
-            "pneumonia_influenza_or_covid_19_deaths": [8]
-        })
+            pd.DataFrame(
+                {
+                    "start_week": [1],
+                    "covid_deaths": [2],
+                    "pneumonia_and_covid_deaths": [4],
+                    "pneumonia_influenza_or_covid_19_deaths": [8],
+                }
+            )
+        )
+        expected = pd.DataFrame(
+            {
+                "timestamp": [1],
+                "covid_19_deaths": [2],
+                "pneumonia_and_covid_19_deaths": [4],
+                "pneumonia_influenza_or_covid_19_deaths": [8],
+            }
+        )
         pd.testing.assert_frame_equal(expected, df)
 
     def test_good_file(self):
-        df = pull_nchs_mortality_data(TOKEN, "test_data.csv")
+        df = pull_nchs_mortality_data(SOCRATA_TOKEN, "test_data.csv")
 
         # Test columns
-        assert (df.columns.values == [
-                'covid_19_deaths', 'total_deaths', 'percent_of_expected_deaths',
-                'pneumonia_deaths', 'pneumonia_and_covid_19_deaths',
-                'influenza_deaths', 'pneumonia_influenza_or_covid_19_deaths',
-                "timestamp", "geo_id", "population"]).all()
+        assert (
+            df.columns.values
+            == [
+                "covid_19_deaths",
+                "total_deaths",
+                "percent_of_expected_deaths",
+                "pneumonia_deaths",
+                "pneumonia_and_covid_19_deaths",
+                "influenza_deaths",
+                "pneumonia_influenza_or_covid_19_deaths",
+                "timestamp",
+                "geo_id",
+                "population",
+            ]
+        ).all()
 
         # Test aggregation for NYC and NY
         raw_df = pd.read_csv("./test_data/test_data.csv", parse_dates=["start_week"])
         raw_df = standardize_columns(raw_df)
         for metric in METRICS:
-            ny_list = raw_df.loc[(raw_df["state"] == "New York")
-                                & (raw_df[metric].isnull()), "timestamp"].values
-            nyc_list = raw_df.loc[(raw_df["state"] == "New York City")
-                                & (raw_df[metric].isnull()), "timestamp"].values
-            final_list = df.loc[(df["geo_id"] == "ny")
-                                & (df[metric].isnull()), "timestamp"].values
+            ny_list = raw_df.loc[
+                (raw_df["state"] == "New York") & (raw_df[metric].isnull()), "timestamp"
+            ].values
+            nyc_list = raw_df.loc[
+                (raw_df["state"] == "New York City") & (raw_df[metric].isnull()),
+                "timestamp",
+            ].values
+            final_list = df.loc[
+                (df["geo_id"] == "ny") & (df[metric].isnull()), "timestamp"
+            ].values
             assert set(final_list) == set(ny_list).intersection(set(nyc_list))
 
         # Test missing value
         gmpr = GeoMapper()
         state_ids = pd.DataFrame(list(gmpr.get_geo_values("state_id")))
-        state_names = gmpr.replace_geocode(state_ids,
-                                           "state_id",
-                                           "state_name",
-                                           from_col=0,
-                                           date_col=None)
+        state_names = gmpr.replace_geocode(
+            state_ids, "state_id", "state_name", from_col=0, date_col=None
+        )
         for state, geo_id in zip(state_names, state_ids):
             if state in set(["New York", "New York City"]):
                 continue
             for metric in METRICS:
-                test_list = raw_df.loc[(raw_df["state"] == state)
-                                    & (raw_df[metric].isnull()), "timestamp"].values
-                final_list = df.loc[(df["geo_id"] == geo_id)
-                                    & (df[metric].isnull()), "timestamp"].values
+                test_list = raw_df.loc[
+                    (raw_df["state"] == state) & (raw_df[metric].isnull()), "timestamp"
+                ].values
+                final_list = df.loc[
+                    (df["geo_id"] == geo_id) & (df[metric].isnull()), "timestamp"
+                ].values
                 assert set(final_list) == set(test_list)
 
     def test_bad_file_with_inconsistent_time_col(self):
         with pytest.raises(ValueError):
-            pull_nchs_mortality_data(TOKEN, "bad_data_with_inconsistent_time_col.csv")
+            pull_nchs_mortality_data(
+                SOCRATA_TOKEN, "bad_data_with_inconsistent_time_col.csv"
+            )
 
     def test_bad_file_with_missing_cols(self):
         with pytest.raises(ValueError):
-            pull_nchs_mortality_data(TOKEN, "bad_data_with_missing_cols.csv")
+            pull_nchs_mortality_data(SOCRATA_TOKEN, "bad_data_with_missing_cols.csv")
diff --git a/nchs_mortality/version.cfg b/nchs_mortality/version.cfg
@@ -1 +1 @@
-current_version = 0.3.53
+current_version = 0.3.54
diff --git a/nwss_wastewater/delphi_nwss/constants.py b/nwss_wastewater/delphi_nwss/constants.py
@@ -10,18 +10,6 @@
     # "wwss",  # wastewater sample site, name will probably need to change
 ]
 
-## example:
-#
-# FULL_TIME = "full_time_work_prop"
-# PART_TIME = "part_time_work_prop"
-# COVIDNET = "covidnet"
-#
-# SIGNALS = [
-#     FULL_TIME,
-#     PART_TIME,
-#     COVIDNET
-# ]
-
 SIGNALS = ["pcr_conc_smoothed"]
 METRIC_SIGNALS = ["detect_prop_15d", "percentile", "ptc_15d"]
 METRIC_DATES = ["date_start", "date_end"]
@@ -38,12 +26,4 @@
 }
 SIG_DIGITS = 7
 
-## example:
-# SMOOTHERS = [
-#    (Smoother("identity", impute_method=None), ""),
-#    (Smoother("moving_average", window_length=7), "_7dav"),
-# ]
-
-SMOOTHERS = []
-
 NEWLINE = "\n"
diff --git a/nwss_wastewater/delphi_nwss/pull.py b/nwss_wastewater/delphi_nwss/pull.py
diff --git a/nwss_wastewater/delphi_nwss/run.py b/nwss_wastewater/delphi_nwss/run.py
diff --git a/nwss_wastewater/params.json.template b/nwss_wastewater/params.json.template
diff --git a/nwss_wastewater/tests/test_pull.py b/nwss_wastewater/tests/test_pull.py
diff --git a/quidel_covidtest/version.cfg b/quidel_covidtest/version.cfg
diff --git a/sir_complainsalot/params.json.template b/sir_complainsalot/params.json.template
diff --git a/sir_complainsalot/version.cfg b/sir_complainsalot/version.cfg

Original file line number	Diff line number	Diff line change
`@@ -1,13 +1,13 @@`
`1`	`1`	`{`
`2`	`2`	`"common": {`
`3`		`- "export_dir": "./receiving",`
`4`		`- "log_filename": "./nwss_wastewater.log",`
	`3`	`+ "export_dir": "/common/covidcast/receiving/nwss_wastewater",`
	`4`	`+ "log_filename": "/var/log/indicators/nwss_wastewater.log",`
`5`	`5`	`"log_exceptions": false`
`6`	`6`	`},`
`7`	`7`	`"indicator": {`
`8`	`8`	`"wip_signal": true,`
`9`	`9`	`"export_start_date": "2020-02-01",`
`10`	`10`	`"static_file_dir": "./static",`
`11`		`- "token": ""`
	`11`	`+ "socrata_token": "{{ nwss_wastewater_token }}"`
`12`	`12`	`}`
`13`	`13`	`}`
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-current_version = 0.3.53`
	`1`	`+current_version = 0.3.54`