Skip to content

Commit 767df60

Browse files
authored
Merge pull request #751 from cmu-delphi/krivard/robust-covidhosp-acquisition
covid_hosp: Don't open a database connection until you've downloaded the files
2 parents 934334b + 941b11d commit 767df60

1 file changed

Lines changed: 24 additions & 15 deletions

File tree

  • src/acquisition/covid_hosp/common

src/acquisition/covid_hosp/common/utils.py

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -148,25 +148,34 @@ def update_dataset(database, network, newer_than=None, older_than=None):
148148
Whether a new dataset was acquired.
149149
"""
150150
metadata = network.fetch_metadata()
151+
datasets = []
151152
with database.connect() as db:
152153
max_issue = db.get_max_issue()
153-
older_than = datetime.datetime.today().date() if newer_than is None else older_than
154-
newer_than = max_issue if newer_than is None else newer_than
155-
daily_issues = Utils.issues_to_fetch(metadata, newer_than, older_than)
156-
if not daily_issues:
157-
print("no new issues, nothing to do")
158-
return False
159-
for issue, revisions in daily_issues.items():
160-
issue_int = int(issue.strftime("%Y%m%d"))
161-
# download the dataset and add it to the database
162-
dataset = Utils.merge_by_key_cols([network.fetch_dataset(url) for url, _ in revisions],
163-
db.KEY_COLS)
154+
155+
older_than = datetime.datetime.today().date() if newer_than is None else older_than
156+
newer_than = max_issue if newer_than is None else newer_than
157+
daily_issues = Utils.issues_to_fetch(metadata, newer_than, older_than)
158+
if not daily_issues:
159+
print("no new issues, nothing to do")
160+
return False
161+
for issue, revisions in daily_issues.items():
162+
issue_int = int(issue.strftime("%Y%m%d"))
163+
# download the dataset and add it to the database
164+
dataset = Utils.merge_by_key_cols([network.fetch_dataset(url) for url, _ in revisions],
165+
db.KEY_COLS)
166+
# add metadata to the database using the last revision seen.
167+
last_url, last_index = revisions[-1]
168+
metadata_json = metadata.loc[last_index].reset_index().to_json()
169+
datasets.append((
170+
issue_int,
171+
dataset,
172+
last_url,
173+
metadata_json
174+
))
175+
with database.connect() as db:
176+
for issue_int, dataset, last_url, metadata_json in datasets:
164177
db.insert_dataset(issue_int, dataset)
165-
# add metadata to the database using the last revision seen.
166-
last_url, last_index = revisions[-1]
167-
metadata_json = metadata.loc[last_index].reset_index().to_json()
168178
db.insert_metadata(issue_int, last_url, metadata_json)
169-
170179
print(f'successfully acquired {len(dataset)} rows')
171180

172181
# note that the transaction is committed by exiting the `with` block

0 commit comments

Comments
 (0)