Skip to content

Commit c54719c

Browse files
committed
covid_hosp_facility lookup tests passing
1 parent 14aa8fa commit c54719c

File tree

4 files changed

+15
-8
lines changed

4 files changed

+15
-8
lines changed

integrations/acquisition/covid_hosp/facility/test_scenarios.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def test_acquire_dataset(self):
7676
response = Epidata.covid_hosp_facility(
7777
'450822', Epidata.range(20200101, 20210101))
7878
self.assertEqual(response['result'], 1)
79-
self.assertEqual(len(response['epidata']), 1)
79+
self.assertEqual(len(response['epidata']), 2)
8080
row = response['epidata'][0]
8181
for k,v in expected_spotchecks.items():
8282
self.assertTrue(
@@ -101,9 +101,9 @@ def test_acquire_dataset(self):
101101
response = Epidata.covid_hosp_facility(
102102
'450822', Epidata.range(20200101, 20210101))
103103
self.assertEqual(response['result'], 1)
104-
self.assertEqual(len(response['epidata']), 1)
104+
self.assertEqual(len(response['epidata']), 2)
105105

106-
@freeze_time("2021-03-16")
106+
@freeze_time("2021-03-17")
107107
def test_facility_lookup(self):
108108
"""Lookup facilities using various filters."""
109109

@@ -188,7 +188,7 @@ def test_facility_lookup(self):
188188
self.test_utils.load_sample_dataset('dataset_update_facility.csv')
189189

190190
# acquire sample data into local database
191-
with self.subTest(name='first acquisition'):
191+
with self.subTest(name='second acquisition'):
192192
acquired = Update.run(network=mock_network)
193193
self.assertTrue(acquired)
194194

src/acquisition/covid_hosp/common/database.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,9 @@ def insert_dataset(self, publication_date, dataframe):
177177
ak_cols = self.AGGREGATE_KEY_COLS
178178

179179
# restrict data to just the key columns and remove duplicate rows
180-
ak_data = dataframe[ak_cols].drop_duplicates()
180+
ak_data = (dataframe[set(ak_cols + self.KEY_COLS)]
181+
.sort_values(self.KEY_COLS)[ak_cols]
182+
.drop_duplicates())
181183
# cast types
182184
dataframe_typemap = {
183185
name: dtype
@@ -196,13 +198,13 @@ def cast_but_sidestep_nans(i):
196198
# create string of tick-quoted and comma-seperated column list
197199
ak_cols_str = ','.join(f'`{col}`' for col in ak_cols)
198200
# ...and ticked and comma-sep'd "column=column" list for ON UPDATE (to keep only the most recent values for each pk)
199-
ak_updates_str = ','.join(f'`{col}`=`{col}`' for col in ak_cols)
201+
ak_updates_str = ','.join(f'`{col}`=v.{col}' for col in ak_cols)
200202
# ...and string of VALUES placeholders
201203
values_str = ','.join( ['%s'] * len(ak_cols) )
202204
# use aggregate key table alias
203205
ak_table = self.table_name + '_key'
204206
# assemble full SQL statement
205-
ak_insert_sql = f'INSERT INTO `{ak_table}` ({ak_cols_str}) VALUES ({values_str}) ON DUPLICATE KEY UPDATE {ak_updates_str}'
207+
ak_insert_sql = f'INSERT INTO `{ak_table}` ({ak_cols_str}) VALUES ({values_str}) as v ON DUPLICATE KEY UPDATE {ak_updates_str}'
206208

207209
# commit the data
208210
with self.new_cursor() as cur:

src/acquisition/covid_hosp/common/utils.py

+5
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ def issues_to_fetch(metadata, newer_than, older_than):
8686
for issues after newer_than and before older_than
8787
"""
8888
daily_issues = {}
89+
n_beyond = 0
8990
for index in sorted(set(metadata.index)):
9091
day = index.date()
9192
if day > newer_than and day < older_than:
@@ -95,6 +96,10 @@ def issues_to_fetch(metadata, newer_than, older_than):
9596
daily_issues[day] = urls_list
9697
else:
9798
daily_issues[day] += urls_list
99+
elif day >= older_than:
100+
n_beyond += 1
101+
if n_beyond > 0:
102+
print(f"{n_beyond} issues available on {older_than} or newer")
98103
return daily_issues
99104

100105
@staticmethod

testdata/acquisition/covid_hosp/facility/metadata_update_facility.csv

+1-1
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)