Skip to content

Commit 3d37c59

Browse files
rzatsmelange396
andauthored
API server code health improvements (#1041)
* Acquisition * Server * fix test * no return * Update src/server/endpoints/covidcast_utils/model.py Co-authored-by: melange396 <[email protected]> * Fix Co-authored-by: melange396 <[email protected]>
1 parent d98705f commit 3d37c59

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+213
-260
lines changed

src/acquisition/afhsb/afhsb_csv.py

+61-41
Original file line numberDiff line numberDiff line change
@@ -14,60 +14,73 @@
1414
import csv
1515
import os
1616

17-
import sas7bdat
1817
import pickle
18+
import sas7bdat
1919
import epiweeks as epi
2020

2121

2222
DATAPATH = '/home/automation/afhsb_data'
23-
SOURCE_DIR = DATAPATH
24-
TARGET_DIR = DATAPATH
23+
SOURCE_DIR = DATAPATH
24+
TARGET_DIR = DATAPATH
2525

2626
INVALID_DMISIDS = set()
2727

2828
def get_flu_cat(dx):
2929
# flu1 (influenza)
30-
if (len(dx) == 0): return None
30+
if len(dx) == 0:
31+
return None
3132
dx = dx.capitalize()
32-
if (dx.isnumeric()):
33+
if dx.isnumeric():
3334
for prefix in ["487", "488"]:
34-
if (dx.startswith(prefix)): return 1
35+
if dx.startswith(prefix):
36+
return 1
3537
for i in range(0, 7):
3638
prefix = str(480 + i)
37-
if (dx.startswith(prefix)): return 2
39+
if dx.startswith(prefix):
40+
return 2
3841
for i in range(0, 7):
3942
prefix = str(460 + i)
40-
if (dx.startswith(prefix)): return 3
43+
if dx.startswith(prefix):
44+
return 3
4145
for prefix in ["07999", "3829", "7806", "7862"]:
42-
if (dx.startswith(prefix)): return 3
46+
if dx.startswith(prefix):
47+
return 3
4348
elif (dx[0].isalpha() and dx[1:].isnumeric()):
4449
for prefix in ["J09", "J10", "J11"]:
45-
if (dx.startswith(prefix)): return 1
50+
if dx.startswith(prefix):
51+
return 1
4652
for i in range(12, 19):
4753
prefix = "J{}".format(i)
48-
if (dx.startswith(prefix)): return 2
54+
if dx.startswith(prefix):
55+
return 2
4956
for i in range(0, 7):
5057
prefix = "J0{}".format(i)
51-
if (dx.startswith(prefix)): return 3
58+
if dx.startswith(prefix):
59+
return 3
5260
for i in range(20, 23):
5361
prefix = "J{}".format(i)
54-
if (dx.startswith(prefix)): return 3
62+
if dx.startswith(prefix):
63+
return 3
5564
for prefix in ["J40", "R05", "H669", "R509", "B9789"]:
56-
if (dx.startswith(prefix)): return 3
65+
if dx.startswith(prefix):
66+
return 3
5767
else:
5868
return None
5969

6070
def aggregate_data(sourcefile, targetfile):
6171
reader = sas7bdat.SAS7BDAT(os.path.join(SOURCE_DIR, sourcefile), skip_header=True)
6272
# map column names to column indices
63-
COL2IDX = {column.name.decode('utf-8'): column.col_id for column in reader.columns}
64-
def get_field(row, column): return row[COL2IDX[column]]
73+
col_2_idx = {column.name.decode('utf-8'): column.col_id for column in reader.columns}
74+
75+
def get_field(row, column):
76+
return row[col_2_idx[column]]
6577

6678
def row2flu(row):
6779
for i in range(1, 9):
6880
dx = get_field(row, "dx{}".format(i))
6981
flu_cat = get_flu_cat(dx)
70-
if (flu_cat != None): return flu_cat
82+
if flu_cat is not None:
83+
return flu_cat
7184
return 0
7285

7386
def row2epiweek(row):
@@ -77,28 +90,30 @@ def row2epiweek(row):
7790
year, week_num = week_tuple[0], week_tuple[1]
7891
return year, week_num
7992

80-
results_dict = dict()
81-
for r, row in enumerate(reader):
93+
results_dict = {}
94+
for _, row in enumerate(reader):
8295
# if (r >= 1000000): break
83-
if (get_field(row, 'type') != "Outpt"): continue
96+
if get_field(row, 'type') != "Outpt":
97+
continue
8498
year, week_num = row2epiweek(row)
8599
dmisid = get_field(row, 'DMISID')
86100
flu_cat = row2flu(row)
87101

88102
key_list = [year, week_num, dmisid, flu_cat]
89103
curr_dict = results_dict
90104
for i, key in enumerate(key_list):
91-
if (i == len(key_list) - 1):
92-
if (not key in curr_dict): curr_dict[key] = 0
105+
if i == len(key_list) - 1:
106+
if key not in curr_dict:
107+
curr_dict[key] = 0
93108
curr_dict[key] += 1
94109
else:
95-
if (not key in curr_dict): curr_dict[key] = dict()
110+
if key not in curr_dict:
111+
curr_dict[key] = {}
96112
curr_dict = curr_dict[key]
97113

98114
results_path = os.path.join(TARGET_DIR, targetfile)
99115
with open(results_path, 'wb') as f:
100116
pickle.dump(results_dict, f, pickle.HIGHEST_PROTOCOL)
101-
return
102117

103118

104119
################# Functions for geographical information ####################
@@ -122,7 +137,7 @@ def format_dmisid_csv(filename, target_name):
122137

123138
src_csv = open(src_path, "r", encoding='utf-8-sig')
124139
reader = csv.DictReader(src_csv)
125-
140+
126141
dst_csv = open(dst_path, "w")
127142
fieldnames = ['dmisid', 'country', 'state', 'zip5']
128143
writer = csv.DictWriter(dst_csv, fieldnames=fieldnames)
@@ -132,9 +147,11 @@ def format_dmisid_csv(filename, target_name):
132147

133148
for row in reader:
134149
country2 = row['Facility ISO Country Code']
135-
if (country2 == ""): country3 = ""
136-
elif (not country2 in country_mapping):
137-
for key in row.keys(): print(key, row[key])
150+
if country2 == "":
151+
country3 = ""
152+
elif country2 not in country_mapping:
153+
for key in row.keys():
154+
print(key, row[key])
138155
continue
139156
else:
140157
country3 = country_mapping[country2]
@@ -149,6 +166,7 @@ def dmisid():
149166
target_name = "simple_DMISID_FY2018.csv"
150167
format_dmisid_csv(filename, target_name)
151168

169+
152170
cen2states = {'cen1': {'CT', 'ME', 'MA', 'NH', 'RI', 'VT'},
153171
'cen2': {'NJ', 'NY', 'PA'},
154172
'cen3': {'IL', 'IN', 'MI', 'OH', 'WI'},
@@ -175,7 +193,7 @@ def state2region(D):
175193
for region in D.keys():
176194
states = D[region]
177195
for state in states:
178-
assert(not state in results)
196+
assert state not in results
179197
results[state] = region
180198
return results
181199

@@ -204,7 +222,7 @@ def write_afhsb_csv(period):
204222
with open(os.path.join(TARGET_DIR, "{}.csv".format(period)), 'w') as csvfile:
205223
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
206224
writer.writeheader()
207-
225+
208226
i = 0
209227
for year in sorted(results_dict.keys()):
210228
year_dict = results_dict[year]
@@ -217,11 +235,12 @@ def write_afhsb_csv(period):
217235
i += 1
218236
epiweek = int("{}{:02d}".format(year, week))
219237
flu_type = flu_mapping[flu]
220-
238+
221239
row = {"epiweek": epiweek, "dmisid": None if (not dmisid.isnumeric()) else dmisid,
222240
"flu_type": flu_type, "visit_sum": visit_sum, "id": i}
223241
writer.writerow(row)
224-
if (i % 100000 == 0): print(row)
242+
if i % 100000 == 0:
243+
print(row)
225244

226245
def dmisid_start_time_from_file(filename):
227246
starttime_record = dict()
@@ -230,7 +249,7 @@ def dmisid_start_time_from_file(filename):
230249
for row in reader:
231250
dmisid = row['dmisid']
232251
epiweek = int(row['epiweek'])
233-
if (not dmisid in starttime_record):
252+
if dmisid not in starttime_record:
234253
starttime_record[dmisid] = epiweek
235254
else:
236255
starttime_record[dmisid] = min(epiweek, starttime_record[dmisid])
@@ -241,7 +260,7 @@ def dmisid_start_time():
241260
record2 = dmisid_start_time_from_file(os.path.join(TARGET_DIR, "13to17.csv"))
242261
record = record1
243262
for dmisid, epiweek in record2.items():
244-
if (dmisid in record):
263+
if dmisid in record:
245264
record[dmisid] = min(record[dmisid], epiweek)
246265
else:
247266
record[dmisid] = epiweek
@@ -261,10 +280,10 @@ def fillin_zero_to_csv(period, dmisid_start_record):
261280
dmisid = row['dmisid']
262281
flu_type = row['flu_type']
263282
visit_sum = row['visit_sum']
264-
if (not epiweek in results_dict):
283+
if epiweek not in results_dict:
265284
results_dict[epiweek] = dict()
266285
week_dict = results_dict[epiweek]
267-
if (not dmisid in week_dict):
286+
if dmisid not in week_dict:
268287
week_dict[dmisid] = dict()
269288
dmisid_dict = week_dict[dmisid]
270289
dmisid_dict[flu_type] = visit_sum
@@ -277,14 +296,15 @@ def fillin_zero_to_csv(period, dmisid_start_record):
277296
week_dict = results_dict[epiweek]
278297
for dmisid in dmisid_group:
279298
start_week = dmisid_start_record[dmisid]
280-
if (start_week > epiweek): continue
299+
if start_week > epiweek:
300+
continue
281301

282-
if (not dmisid in week_dict):
302+
if dmisid not in week_dict:
283303
week_dict[dmisid] = dict()
284304

285305
dmisid_dict = week_dict[dmisid]
286306
for flutype in flutype_group:
287-
if (not flutype in dmisid_dict):
307+
if flutype not in dmisid_dict:
288308
dmisid_dict[flutype] = 0
289309

290310
# Write to csv files
@@ -301,7 +321,7 @@ def fillin_zero_to_csv(period, dmisid_start_record):
301321
row = {"id": i, "epiweek": epiweek, "dmisid": dmisid,
302322
"flu_type": flutype, "visit_sum": visit_sum}
303323
writer.writerow(row)
304-
if (i % 100000 == 0):
324+
if i % 100000 == 0:
305325
print(row)
306326
i += 1
307327
print("Wrote {} rows".format(i))
@@ -328,4 +348,4 @@ def main():
328348

329349

330350
if __name__ == '__main__':
331-
main()
351+
main()

src/acquisition/afhsb/afhsb_sql.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ def init_all_tables(datapath):
159159
raw_table_name = 'afhsb_{}_raw'.format(period)
160160
state_table_name = 'afhsb_{}_state'.format(period)
161161
region_table_name = 'afhsb_{}_region'.format(period)
162-
162+
163163
init_raw_data(raw_table_name, os.path.join(datapath, "filled_{}.csv".format(period)))
164164
agg_by_state(raw_table_name, state_table_name)
165165
agg_by_region(state_table_name, region_table_name)

src/acquisition/afhsb/afhsb_update.py

+1
Original file line numberDiff line numberDiff line change
@@ -34,5 +34,6 @@ def main():
3434
afhsb_sql.init_all_tables(tmp_datapath)
3535
# (Temporary parent directory should be deleted automatically.)
3636

37+
3738
if __name__ == '__main__':
3839
main()

src/acquisition/cdcp/cdc_dropbox_receiver.py

-5
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
"""
1818

1919
# standard library
20-
import argparse
2120
import datetime
2221
from zipfile import ZIP_DEFLATED, ZipFile
2322

@@ -149,10 +148,6 @@ def fetch_data():
149148

150149

151150
def main():
152-
# args and usage
153-
parser = argparse.ArgumentParser()
154-
args = parser.parse_args()
155-
156151
# fetch new data
157152
fetch_data()
158153

src/acquisition/cdcp/cdc_extract.py

+3-7
Original file line numberDiff line numberDiff line change
@@ -67,13 +67,11 @@
6767

6868
# third party
6969
import mysql.connector
70-
import numpy as np
7170

7271
# first party
73-
from . import cdc_upload
7472
import delphi.operations.secrets as secrets
75-
from delphi.utils.epidate import EpiDate
7673
import delphi.utils.epiweek as flu
74+
from . import cdc_upload
7775

7876

7977
def get_num_hits(cur, epiweek, state, page):
@@ -95,8 +93,7 @@ def get_num_hits(cur, epiweek, state, page):
9593
pass
9694
if num is None:
9795
return 0
98-
else:
99-
return num
96+
return num
10097

10198

10299
def get_total_hits(cur, epiweek, state):
@@ -114,8 +111,7 @@ def get_total_hits(cur, epiweek, state):
114111
pass
115112
if total is None:
116113
raise Exception('missing data for %d-%s' % (epiweek, state))
117-
else:
118-
return total
114+
return total
119115

120116

121117
def store_result(cur, epiweek, state, num1, num2, num3, num4, num5, num6, num7, num8, total):

src/acquisition/cdcp/cdc_upload.py

-2
Original file line numberDiff line numberDiff line change
@@ -77,15 +77,13 @@
7777
import io
7878
import os
7979
import shutil
80-
import sys
8180
from zipfile import ZipFile
8281

8382
# third party
8483
import mysql.connector
8584

8685
# first party
8786
import delphi.operations.secrets as secrets
88-
import delphi.utils.epiweek as flu
8987

9088

9189
STATES = {

src/acquisition/covid_hosp/common/network.py

-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
# third party
22
import pandas
3-
import requests
43

54

65
class Network:

src/acquisition/covid_hosp/common/test_utils.py

-2
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,7 @@
99
"""
1010

1111
# standard library
12-
import json
1312
from pathlib import Path
14-
from unittest.mock import Mock
1513

1614
# third party
1715
import pandas

src/acquisition/covid_hosp/state_daily/database.py

-2
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
from delphi.epidata.acquisition.covid_hosp.common.database import Columndef
44
from delphi.epidata.acquisition.covid_hosp.common.utils import Utils
55

6-
import pandas as pd
7-
86

97
class Database(BaseDatabase):
108

src/acquisition/covid_hosp/state_daily/network.py

-4
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,3 @@
1-
# third party
2-
3-
import requests
4-
51
# first party
62
from delphi.epidata.acquisition.covid_hosp.common.network import Network as BaseNetwork
73

src/acquisition/covid_hosp/state_daily/update.py

+1-6
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,6 @@
33
dataset provided by the US Department of Health & Human Services
44
via healthdata.gov.
55
"""
6-
# standard library
7-
import json
8-
9-
# third party
10-
import pandas as pd
11-
126
# first party
137
from delphi.epidata.acquisition.covid_hosp.common.utils import Utils
148
from delphi.epidata.acquisition.covid_hosp.state_daily.database import Database
@@ -29,5 +23,6 @@ def run(network=Network):
2923

3024
return Utils.update_dataset(Database, network)
3125

26+
3227
# main entry point
3328
Utils.launch_if_main(Update.run, __name__)

0 commit comments

Comments
 (0)