Skip to content

Commit 8b6a0e6

Browse files
committed
finished converting raw time diff to using api
1 parent 2f2f63a commit 8b6a0e6

File tree

5 files changed

+661
-26
lines changed

5 files changed

+661
-26
lines changed

nhsn/delphi_nhsn/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,4 +65,4 @@
6565
f"{NUM_HOSP_REPORTING_RSV}_prelim": float,
6666
}
6767

68-
RECENTLY_UPDATED_DIFF = timedelta(days=2)
68+
RECENTLY_UPDATED_DIFF = timedelta(days=1)

nhsn/delphi_nhsn/pull.py

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,15 @@
44
import logging
55
import random
66
import time
7-
from datetime import datetime, timezone
7+
from datetime import datetime, timedelta, timezone
88
from pathlib import Path
99
from typing import Optional
1010
from urllib.error import HTTPError
1111

1212
import pandas as pd
13+
from delphi_epidata import Epidata
1314
from delphi_utils import create_backup_csv
1415
from sodapy import Socrata
15-
from delphi_epidata import Epidata
16-
from epiweeks import Week
1716

1817
from .constants import (
1918
MAIN_DATASET_ID,
@@ -48,25 +47,31 @@ def check_last_updated(socrata_token, dataset_id, logger):
4847
client = Socrata("data.cdc.gov", socrata_token)
4948
response = client.get_metadata(dataset_id)
5049

51-
updated_timestamp = datetime.utcfromtimestamp(int(response["rowsUpdatedAt"]))
52-
cdc_updated_epiweek = Week.fromdate(updated_timestamp)
50+
updated_timestamp = datetime.fromtimestamp(int(response["rowsUpdatedAt"]), tz=timezone.utc)
5351

52+
# pulling last updated from the api
5453
meta_df = pd.DataFrame(Epidata.covidcast_meta()["epidata"])
55-
signal_suffix = 'prelim' if dataset_id == PRELIM_DATASET_ID else "ew"
54+
signal_suffix = "prelim" if dataset_id == PRELIM_DATASET_ID else "ew"
5655
nhsn_meta_df = meta_df[(meta_df["data_source"] == "nhsn") & (meta_df["signal"].str.endswith(signal_suffix))]
57-
last_updated = datetime.utcfromtimestamp(nhsn_meta_df["last_update"].min())
58-
covidcast_updated_epiweek = Week.fromdate(last_updated)
56+
est = timezone(timedelta(hours=-5))
57+
last_updated = datetime.fromtimestamp(nhsn_meta_df["last_update"].min(), tz=est)
5958

6059
# currently set to run twice a week, RECENTLY_UPDATED_DIFF may need adjusting based on the cadence
61-
recently_updated_source = (last_updated - updated_timestamp) > RECENTLY_UPDATED_DIFF
62-
print("non")
60+
recently_updated_source = (updated_timestamp - last_updated) > RECENTLY_UPDATED_DIFF
61+
6362
prelim_prefix = "Preliminary " if dataset_id == PRELIM_DATASET_ID else ""
6463
if recently_updated_source:
6564
logger.info(
66-
f"{prelim_prefix}NHSN data was recently updated; Pulling data", updated_timestamp=updated_timestamp
65+
f"{prelim_prefix}NHSN data was recently updated; Pulling data",
66+
updated_timestamp=updated_timestamp,
67+
metadata_timestamp=last_updated,
6768
)
6869
else:
69-
logger.info(f"{prelim_prefix}NHSN data is stale; Skipping", updated_timestamp=updated_timestamp)
70+
logger.info(
71+
f"{prelim_prefix}NHSN data is stale; Skipping",
72+
updated_timestamp=updated_timestamp,
73+
metadata_timestamp=last_updated,
74+
)
7075
# pylint: disable=W0703
7176
except Exception as e:
7277
logger.info("error while processing socrata metadata; treating data as stale", error=str(e))

nhsn/tests/conftest.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@
2323
with open(f"{TEST_DIR}/test_data/prelim_page.json", "r") as f:
2424
PRELIM_TEST_DATA = json.load(f)
2525

26-
27-
covidcast_metadata = pd.read_csv(f"{TEST_DIR}/test_data/covid_metadata.csv",
28-
parse_dates=["max_time", "min_time", "max_issue", "last_update"])
26+
# filtered metadata (just includes nhsn meta)
27+
with open(f"{TEST_DIR}/test_data/covidcast_meta.json", "r") as f:
28+
COVID_META_DATA = json.load(f)
2929

3030

3131
@pytest.fixture(scope="session")

0 commit comments

Comments
 (0)