Skip to content

Commit e38f4da

Browse files
authored
Merge pull request #1108 from cmu-delphi/acquisition_cronicle_change
Acquisition change for run in cronicle
2 parents 61e545f + ccad57f commit e38f4da

File tree

3 files changed

+16
-15
lines changed

3 files changed

+16
-15
lines changed

integrations/acquisition/covidcast/test_csv_uploading.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from datetime import date
55
import os
66
import unittest
7-
from unittest.mock import MagicMock
7+
import argparse
88

99
# third party
1010
import mysql.connector
@@ -14,7 +14,7 @@
1414
# first party
1515
from delphi_utils import Nans
1616
from delphi.epidata.client.delphi_epidata import Epidata
17-
from delphi.epidata.acquisition.covidcast.csv_to_database import main
17+
from delphi.epidata.acquisition.covidcast.csv_to_database import main, get_argument_parser
1818
import delphi.operations.secrets as secrets
1919

2020
# py3tester coverage target (equivalent to `import *`)
@@ -92,16 +92,12 @@ def test_uploading(self):
9292

9393
# make some fake data files
9494
data_dir = 'covid/data'
95-
source_receiving_dir = data_dir + '/receiving/src-name'
95+
indicator_dir = 'src-name'
96+
source_receiving_dir = data_dir + '/receiving/' + indicator_dir
9697
log_file_directory = "/var/log/"
9798
os.makedirs(source_receiving_dir, exist_ok=True)
9899
os.makedirs(log_file_directory, exist_ok=True)
99-
# TODO: use an actual argparse object for the args instead of a MagicMock
100-
args = MagicMock(
101-
log_file=log_file_directory +
102-
"output.log",
103-
data_dir=data_dir,
104-
specific_issue_date=False)
100+
args = get_argument_parser().parse_args(["--log_file", log_file_directory + "output.log", "--data_dir", data_dir, "--indicator_name", indicator_dir])
105101
uploader_column_rename = {"geo_id": "geo_value", "val": "value", "se": "stderr", "missing_val": "missing_value", "missing_se": "missing_stderr"}
106102

107103

src/acquisition/covidcast/csv_importer.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,10 +133,11 @@ def find_issue_specific_csv_files(scan_dir):
133133

134134

135135
@staticmethod
136-
def find_csv_files(scan_dir, issue=(date.today(), epi.Week.fromdate(date.today()))):
136+
def find_csv_files(scan_dir, issue=(date.today(), epi.Week.fromdate(date.today())), indicator_dir= "*"):
137137
"""Recursively search for and yield covidcast-format CSV files.
138138
139139
scan_dir: the directory to scan (recursively)
140+
indicator_dir: specify one indicator with .csv files inside
140141
141142
The return value is a tuple of (path, details), where, if the path was
142143
valid, details is a tuple of (source, signal, time_type, geo_type,
@@ -149,7 +150,7 @@ def find_csv_files(scan_dir, issue=(date.today(), epi.Week.fromdate(date.today()
149150
issue_value=-1
150151
lag_value=-1
151152

152-
for path in sorted(glob(os.path.join(scan_dir, '*', '*'))):
153+
for path in sorted(glob(os.path.join(scan_dir, indicator_dir, '*'))):
153154
# safe to ignore this file
154155
if not path.lower().endswith('.csv'):
155156
continue

src/acquisition/covidcast/csv_to_database.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,16 +28,21 @@ def get_argument_parser():
2828
parser.add_argument(
2929
'--log_file',
3030
help="filename for log output (defaults to stdout)")
31+
parser.add_argument(
32+
'--indicator_name',
33+
nargs='?',
34+
default='*',
35+
help='Name of one indicator directory to run acquisition on')
3136
return parser
3237

3338

34-
def collect_files(data_dir: str, specific_issue_date: bool):
39+
def collect_files(data_dir: str, specific_issue_date: bool, indicator_name="*"):
3540
"""Fetch path and data profile details for each file to upload."""
3641
logger= get_structured_logger('collect_files')
3742
if specific_issue_date:
3843
results = list(CsvImporter.find_issue_specific_csv_files(data_dir))
3944
else:
40-
results = list(CsvImporter.find_csv_files(os.path.join(data_dir, 'receiving')))
45+
results = list(CsvImporter.find_csv_files(os.path.join(data_dir, 'receiving'), indicator_dir=indicator_name))
4146
logger.info(f'found {len(results)} files')
4247
return results
4348

@@ -146,9 +151,8 @@ def main(args):
146151

147152
logger = get_structured_logger("csv_ingestion", filename=args.log_file)
148153
start_time = time.time()
149-
150154
# shortcut escape without hitting db if nothing to do
151-
path_details = collect_files(args.data_dir, args.specific_issue_date)
155+
path_details=collect_files(args.data_dir, args.specific_issue_date, indicator_name = args.indicator_name)
152156
if not path_details:
153157
logger.info('nothing to do; exiting...')
154158
return

0 commit comments

Comments
 (0)