|
4 | 4 | import logging
|
5 | 5 | import random
|
6 | 6 | import time
|
7 |
| -from datetime import datetime, timezone |
| 7 | +from datetime import datetime, timedelta, timezone |
8 | 8 | from pathlib import Path
|
9 | 9 | from typing import Optional
|
10 | 10 | from urllib.error import HTTPError
|
11 | 11 |
|
12 | 12 | import pandas as pd
|
| 13 | +from delphi_epidata import Epidata |
13 | 14 | from delphi_utils import create_backup_csv
|
14 | 15 | from sodapy import Socrata
|
15 |
| -from delphi_epidata import Epidata |
16 |
| -from epiweeks import Week |
17 | 16 |
|
18 | 17 | from .constants import (
|
19 | 18 | MAIN_DATASET_ID,
|
@@ -48,25 +47,31 @@ def check_last_updated(socrata_token, dataset_id, logger):
|
48 | 47 | client = Socrata("data.cdc.gov", socrata_token)
|
49 | 48 | response = client.get_metadata(dataset_id)
|
50 | 49 |
|
51 |
| - updated_timestamp = datetime.utcfromtimestamp(int(response["rowsUpdatedAt"])) |
52 |
| - cdc_updated_epiweek = Week.fromdate(updated_timestamp) |
| 50 | + updated_timestamp = datetime.fromtimestamp(int(response["rowsUpdatedAt"]), tz=timezone.utc) |
53 | 51 |
|
| 52 | + # pulling last updated from the api |
54 | 53 | meta_df = pd.DataFrame(Epidata.covidcast_meta()["epidata"])
|
55 |
| - signal_suffix = 'prelim' if dataset_id == PRELIM_DATASET_ID else "ew" |
| 54 | + signal_suffix = "prelim" if dataset_id == PRELIM_DATASET_ID else "ew" |
56 | 55 | nhsn_meta_df = meta_df[(meta_df["data_source"] == "nhsn") & (meta_df["signal"].str.endswith(signal_suffix))]
|
57 |
| - last_updated = datetime.utcfromtimestamp(nhsn_meta_df["last_update"].min()) |
58 |
| - covidcast_updated_epiweek = Week.fromdate(last_updated) |
| 56 | + est = timezone(timedelta(hours=-5)) |
| 57 | + last_updated = datetime.fromtimestamp(nhsn_meta_df["last_update"].min(), tz=est) |
59 | 58 |
|
60 | 59 | # currently set to run twice a week, RECENTLY_UPDATED_DIFF may need adjusting based on the cadence
|
61 |
| - recently_updated_source = (last_updated - updated_timestamp) > RECENTLY_UPDATED_DIFF |
62 |
| - print("non") |
| 60 | + recently_updated_source = (updated_timestamp - last_updated) > RECENTLY_UPDATED_DIFF |
| 61 | + |
63 | 62 | prelim_prefix = "Preliminary " if dataset_id == PRELIM_DATASET_ID else ""
|
64 | 63 | if recently_updated_source:
|
65 | 64 | logger.info(
|
66 |
| - f"{prelim_prefix}NHSN data was recently updated; Pulling data", updated_timestamp=updated_timestamp |
| 65 | + f"{prelim_prefix}NHSN data was recently updated; Pulling data", |
| 66 | + updated_timestamp=updated_timestamp, |
| 67 | + metadata_timestamp=last_updated, |
67 | 68 | )
|
68 | 69 | else:
|
69 |
| - logger.info(f"{prelim_prefix}NHSN data is stale; Skipping", updated_timestamp=updated_timestamp) |
| 70 | + logger.info( |
| 71 | + f"{prelim_prefix}NHSN data is stale; Skipping", |
| 72 | + updated_timestamp=updated_timestamp, |
| 73 | + metadata_timestamp=last_updated, |
| 74 | + ) |
70 | 75 | # pylint: disable=W0703
|
71 | 76 | except Exception as e:
|
72 | 77 | logger.info("error while processing socrata metadata; treating data as stale", error=str(e))
|
|
0 commit comments