Skip to content

Commit b9ceb40

Browse files
committed
style(black): format quidel acquisition
1 parent 7f60fbb commit b9ceb40

File tree

2 files changed

+245
-189
lines changed

2 files changed

+245
-189
lines changed

src/acquisition/quidel/quidel.py

Lines changed: 137 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
'''
1+
"""
22
===============
33
=== Purpose ===
44
===============
@@ -15,7 +15,7 @@
1515
* add end date, end week check
1616
2017-12-02:
1717
* original version
18-
'''
18+
"""
1919

2020
# standard library
2121
from collections import defaultdict
@@ -35,148 +35,187 @@
3535
import delphi.utils.epidate as ED
3636
from delphi.utils.geo.locations import Locations
3737

38-
def word_map(row,terms):
39-
for (k,v) in terms.items():
40-
row = row.replace(k,v)
38+
39+
def word_map(row, terms):
40+
for (k, v) in terms.items():
41+
row = row.replace(k, v)
4142
return row
4243

43-
def date_less_than(d1,d2):
44-
y1,m1,d1 = [int(x) for x in d1.split('-')]
45-
y2,m2,d2 = [int(x) for x in d2.split('-')]
4644

47-
if y1*10000+m1*100+d1<y2*10000+m2*100+d2:
45+
def date_less_than(d1, d2):
46+
y1, m1, d1 = (int(x) for x in d1.split("-"))
47+
y2, m2, d2 = (int(x) for x in d2.split("-"))
48+
49+
if y1 * 10000 + m1 * 100 + d1 < y2 * 10000 + m2 * 100 + d2:
4850
return 1
49-
elif y1*10000+m1*100+d1==y2*10000+m2*100+d2:
51+
elif y1 * 10000 + m1 * 100 + d1 == y2 * 10000 + m2 * 100 + d2:
5052
return 0
5153
else:
5254
return -1
5355

56+
5457
# shift>0: shifted to future
5558
def date_to_epiweek(date, shift=0):
56-
y,m,d = [int(x) for x in date.split('-')]
59+
y, m, d = (int(x) for x in date.split("-"))
5760

58-
epidate = ED.EpiDate(y,m,d)
61+
epidate = ED.EpiDate(y, m, d)
5962
epidate = epidate.add_days(shift)
6063
ew = epidate.get_ew()
6164
return ew
6265

66+
6367
# convert measurment to time series format
6468
# startweek and endweek are inclusive
65-
def measurement_to_ts(m,index,startweek=None,endweek=None):
69+
def measurement_to_ts(m, index, startweek=None, endweek=None):
6670
if startweek is None:
6771
startweek = 0
6872
if endweek is None:
6973
endweek = 999999
7074
res = {}
71-
for r,rdict in m.items():
72-
res[r]={}
73-
for t,vals in rdict.items():
74-
if index>=len(vals):
75+
for r, rdict in m.items():
76+
res[r] = {}
77+
for t, vals in rdict.items():
78+
if index >= len(vals):
7579
raise Exception("Index is invalid")
76-
if t>=startweek and t<=endweek:
80+
if t >= startweek and t <= endweek:
7781
res[r][t] = vals[index]
7882
return res
7983

84+
8085
class QuidelData:
8186
def __init__(self, raw_path, load_email=True):
8287
self.data_path = raw_path
83-
self.excel_uptodate_path = join(raw_path,'excel/uptodate')
84-
self.excel_history_path = join(raw_path,'excel/history')
85-
self.csv_path = join(raw_path,'csv')
88+
self.excel_uptodate_path = join(raw_path, "excel/uptodate")
89+
self.excel_history_path = join(raw_path, "excel/history")
90+
self.csv_path = join(raw_path, "csv")
8691
self.xlsx_uptodate_list = [
87-
f[:-5] for f in listdir(self.excel_uptodate_path) if isfile(join(self.excel_uptodate_path, f)) and f[-5:]=='.xlsx'
92+
f[:-5]
93+
for f in listdir(self.excel_uptodate_path)
94+
if isfile(join(self.excel_uptodate_path, f)) and f[-5:] == ".xlsx"
8895
]
8996
self.xlsx_history_list = [
90-
f[:-5] for f in listdir(self.excel_history_path) if isfile(join(self.excel_history_path, f)) and f[-5:]=='.xlsx'
97+
f[:-5]
98+
for f in listdir(self.excel_history_path)
99+
if isfile(join(self.excel_history_path, f)) and f[-5:] == ".xlsx"
100+
]
101+
self.csv_list = [
102+
f[:-4]
103+
for f in listdir(self.csv_path)
104+
if isfile(join(self.csv_path, f)) and f[-4:] == ".csv"
91105
]
92-
self.csv_list = [f[:-4] for f in listdir(self.csv_path) if isfile(join(self.csv_path, f)) and f[-4:]=='.csv']
93106
self.map_terms = {
94-
' FL 34637"':'FL',
107+
' FL 34637"': "FL",
95108
}
96109
# hardcoded parameters
97110
self.date_dim = 1
98111
self.state_dim = 4
99112
self.fields = [
100-
'sofia_ser','date','fac_id','city','state','zip','age',
101-
'fluA','fluB','fluAll','county','fac_type'
113+
"sofia_ser",
114+
"date",
115+
"fac_id",
116+
"city",
117+
"state",
118+
"zip",
119+
"age",
120+
"fluA",
121+
"fluB",
122+
"fluAll",
123+
"county",
124+
"fac_type",
102125
]
103-
self.fields_to_keep = ['fac_id','fluA','fluB','fluAll']
126+
self.fields_to_keep = ["fac_id", "fluA", "fluB", "fluAll"]
104127
self.dims_to_keep = [self.fields.index(x) for x in self.fields_to_keep]
105128
if load_email:
106129
self.retrieve_excels()
107130
self.prepare_csv()
108131

109132
def retrieve_excels(self):
110-
detach_dir = self.excel_uptodate_path # directory where to save attachments (default: current)
133+
detach_dir = (
134+
self.excel_uptodate_path
135+
) # directory where to save attachments (default: current)
111136

112137
# connecting to the gmail imap server
113138
m = imaplib.IMAP4_SSL("imap.gmail.com")
114-
m.login(secrets.quidel.email_addr,secrets.quidel.email_pwd)
115-
m.select("INBOX") # here you a can choose a mail box like INBOX instead
139+
m.login(secrets.quidel.email_addr, secrets.quidel.email_pwd)
140+
m.select("INBOX") # here you a can choose a mail box like INBOX instead
116141
# use m.list() to get all the mailboxes
117-
_, items = m.search(None, "ALL") # you could filter using the IMAP rules here (check http://www.example-code.com/csharp/imap-search-critera.asp)
118-
items = items[0].split() # getting the mails id
142+
# you could filter using the IMAP rules here (check https://www.example-code.com/csharp/imap-search-critera.asp)
143+
_, items = m.search(None, "ALL")
144+
items = items[0].split() # getting the mails id
119145

120146
# The emailids are ordered from past to now
121147
for emailid in items:
122-
_, data = m.fetch(emailid, "(RFC822)") # fetching the mail, "`(RFC822)`" means "get the whole stuff", but you can ask for headers only, etc
123-
email_body = data[0][1].decode('utf-8') # getting the mail content
124-
mail = email.message_from_string(email_body) # parsing the mail content to get a mail object
125-
126-
#Check if any attachments at all
127-
if mail.get_content_maintype() != 'multipart':
148+
_, data = m.fetch(
149+
emailid, "(RFC822)"
150+
) # fetching the mail, "`(RFC822)`" means "get the whole stuff", but you can ask for headers only, etc
151+
email_body = data[0][1].decode("utf-8") # getting the mail content
152+
mail = email.message_from_string(
153+
email_body
154+
) # parsing the mail content to get a mail object
155+
156+
# Check if any attachments at all
157+
if mail.get_content_maintype() != "multipart":
128158
continue
129159

130160
# we use walk to create a generator so we can iterate on the parts and forget about the recursive headach
131161
for part in mail.walk():
132162
# multipart are just containers, so we skip them
133-
if part.get_content_maintype() == 'multipart':
163+
if part.get_content_maintype() == "multipart":
134164
continue
135165

136166
# is this part an attachment ?
137-
if part.get('Content-Disposition') is None:
167+
if part.get("Content-Disposition") is None:
138168
continue
139169

140170
filename = part.get_filename()
141171
# check duplicates
142-
if filename[-5:]!='.xlsx' or filename[:-5] in self.xlsx_uptodate_list+self.xlsx_history_list:
172+
if (
173+
filename[-5:] != ".xlsx"
174+
or filename[:-5] in self.xlsx_uptodate_list + self.xlsx_history_list
175+
):
143176
continue
144177

145178
self.xlsx_uptodate_list.append(filename[:-5])
146179
att_path = os.path.join(detach_dir, filename)
147180

148-
#Check if its already there
149-
if not os.path.isfile(att_path) :
181+
# Check if its already there
182+
if not os.path.isfile(att_path):
150183
# finally write the stuff
151-
fp = open(att_path, 'wb')
184+
fp = open(att_path, "wb")
152185
fp.write(part.get_payload(decode=True))
153186
fp.close()
154187

155188
def prepare_csv(self):
156-
need_update=False
189+
need_update = False
157190
for f in self.xlsx_uptodate_list:
158191
if f in self.csv_list:
159192
continue
160193
else:
161-
need_update=True
194+
need_update = True
162195

163-
date_regex = '\d{2}-\d{2}-\d{4}'
164-
date_items = re.findall(date_regex,f)
196+
date_regex = r"\d{2}-\d{2}-\d{4}"
197+
date_items = re.findall(date_regex, f)
165198
if date_items:
166-
end_date = '-'.join(date_items[-1].split('-')[x] for x in [2,0,1])
199+
end_date = "-".join(date_items[-1].split("-")[x] for x in [2, 0, 1])
167200
else:
168-
print("End date not found in file name:"+f)
201+
print("End date not found in file name:" + f)
169202
end_date = None
170203

171-
df_dict = pd.read_excel(join(self.excel_uptodate_path, f+'.xlsx'), sheet_name=None)
172-
for (_,df) in df_dict.items():
173-
df = df.dropna(axis=0, how='all')
174-
df['TestDate'] = df['TestDate'].apply(lambda x: x.strftime('%Y-%m-%d'))
175-
df_filtered = df[df['TestDate']!='']
204+
df_dict = pd.read_excel(join(self.excel_uptodate_path, f + ".xlsx"), sheet_name=None)
205+
for (_, df) in df_dict.items():
206+
df = df.dropna(axis=0, how="all")
207+
df["TestDate"] = df["TestDate"].apply(lambda x: x.strftime("%Y-%m-%d"))
208+
df_filtered = df[df["TestDate"] != ""]
176209
if end_date is not None:
177-
df_filtered = df_filtered[df.apply(lambda x: date_less_than(end_date,x['TestDate'])!=1, axis=1)]
178-
df_filtered.to_csv(join(self.csv_path, f+'.csv'), index=False, encoding='utf-8')
179-
self.csv_list = [f[:-4] for f in listdir(self.csv_path) if isfile(join(self.csv_path, f)) and f[-4:]=='.csv']
210+
df_filtered = df_filtered[
211+
df.apply(lambda x: date_less_than(end_date, x["TestDate"]) != 1, axis=1)
212+
]
213+
df_filtered.to_csv(join(self.csv_path, f + ".csv"), index=False, encoding="utf-8")
214+
self.csv_list = [
215+
f[:-4]
216+
for f in listdir(self.csv_path)
217+
if isfile(join(self.csv_path, f)) and f[-4:] == ".csv"
218+
]
180219
self.need_update = need_update
181220

182221
def load_csv(self, dims=None):
@@ -186,12 +225,12 @@ def load_csv(self, dims=None):
186225
for f in self.csv_list:
187226
if f in self.xlsx_history_list:
188227
continue
189-
rf = open(join(self.csv_path,f+'.csv'))
228+
rf = open(join(self.csv_path, f + ".csv"))
190229

191230
lines = rf.readlines()
192231
for l in lines[1:]:
193-
l = word_map(l,self.map_terms)
194-
row = l.strip().split(',')
232+
l = word_map(l, self.map_terms)
233+
row = l.strip().split(",")
195234
date = row[self.date_dim]
196235
state = row[self.state_dim]
197236
if state not in parsed_dict[date]:
@@ -202,42 +241,43 @@ def load_csv(self, dims=None):
202241

203242
# hardcoded aggregation function
204243
# output: [#unique_device,fluA,fluB,fluAll,total]
205-
def prepare_measurements(self,data_dict,use_hhs=True,start_weekday=6):
244+
def prepare_measurements(self, data_dict, use_hhs=True, start_weekday=6):
206245
buffer_dict = {}
207246
if use_hhs:
208247
region_list = Locations.hhs_list
209248
else:
210249
region_list = Locations.atom_list
211250

212251
def get_hhs_region(atom):
213-
for region in Locations.hhs_list:
214-
if atom.lower() in Locations.hhs_map[region]:
215-
return region
216-
if atom.lower() == 'ny':
217-
return 'hhs2'
218-
return atom
252+
for region in Locations.hhs_list:
253+
if atom.lower() in Locations.hhs_map[region]:
254+
return region
255+
if atom.lower() == "ny":
256+
return "hhs2"
257+
return atom
219258

220259
day_shift = 6 - start_weekday
221-
time_map = lambda x:date_to_epiweek(x,shift=day_shift)
222-
region_map = lambda x:get_hhs_region(x) \
223-
if use_hhs and x not in Locations.hhs_list else x # a bit hacky
260+
time_map = lambda x: date_to_epiweek(x, shift=day_shift)
261+
region_map = (
262+
lambda x: get_hhs_region(x) if use_hhs and x not in Locations.hhs_list else x
263+
) # a bit hacky
224264

225265
end_date = sorted(data_dict.keys())[-1]
226266
# count the latest week in only if Thurs data is included
227-
end_epiweek = date_to_epiweek(end_date,shift=-4)
267+
end_epiweek = date_to_epiweek(end_date, shift=-4)
228268
# first pass: prepare device_id set
229269
device_dict = {}
230-
for (date,daily_dict) in data_dict.items():
270+
for (date, daily_dict) in data_dict.items():
231271
if not date:
232272
continue
233273
ew = time_map(date)
234-
if ew == -1 or ew>end_epiweek:
274+
if ew == -1 or ew > end_epiweek:
235275
continue
236276
if ew not in device_dict:
237-
device_dict[ew]={}
277+
device_dict[ew] = {}
238278
for r in region_list:
239279
device_dict[ew][r] = set()
240-
for (state,rec_list) in daily_dict.items():
280+
for (state, rec_list) in daily_dict.items():
241281
region = region_map(state)
242282
# get rid of non-US regions
243283
if region not in region_list:
@@ -247,38 +287,40 @@ def get_hhs_region(atom):
247287
device_dict[ew][region].add(fac)
248288

249289
# second pass: prepare all measurements
250-
for (date,daily_dict) in data_dict.items():
290+
for (date, daily_dict) in data_dict.items():
251291
ew = time_map(date)
252-
if ew == -1 or ew>end_epiweek:
292+
if ew == -1 or ew > end_epiweek:
253293
continue
254294
if ew not in buffer_dict:
255-
buffer_dict[ew]={}
295+
buffer_dict[ew] = {}
256296
for r in region_list:
257-
buffer_dict[ew][r] = [0.0]*8
297+
buffer_dict[ew][r] = [0.0] * 8
258298

259-
for (state,rec_list) in daily_dict.items():
299+
for (state, rec_list) in daily_dict.items():
260300
region = region_map(state)
261301
# get rid of non-US regions
262302
if region not in region_list:
263303
continue
264304
for rec in rec_list:
265305
fac_num = float(len(device_dict[ew][region]))
266-
buffer_dict[ew][region]= np.add(
267-
buffer_dict[ew][region],[
268-
rec[1]=='positive',
269-
rec[2]=='positive',
270-
rec[3]=='positive',
306+
buffer_dict[ew][region] = np.add(
307+
buffer_dict[ew][region],
308+
[
309+
rec[1] == "positive",
310+
rec[2] == "positive",
311+
rec[3] == "positive",
271312
1.0,
272-
float(rec[1]=='positive')/fac_num,
273-
float(rec[2]=='positive')/fac_num,
274-
float(rec[3]=='positive')/fac_num,
275-
1.0/fac_num,
276-
]).tolist()
313+
float(rec[1] == "positive") / fac_num,
314+
float(rec[2] == "positive") / fac_num,
315+
float(rec[3] == "positive") / fac_num,
316+
1.0 / fac_num,
317+
],
318+
).tolist()
277319
# switch two dims of dict
278320
result_dict = {}
279321
for r in region_list:
280-
result_dict[r]={}
281-
for (k,v) in buffer_dict.items():
282-
result_dict[r][k]=v[r]
322+
result_dict[r] = {}
323+
for (k, v) in buffer_dict.items():
324+
result_dict[r][k] = v[r]
283325

284326
return result_dict

0 commit comments

Comments
 (0)