@@ -122,23 +122,41 @@ def fetch_json(path, payload, call_count=1, requests_impl=requests):
122
122
return resp .json ()
123
123
124
124
125
- def fetch_flusurv_location (location_code ):
126
- """Return decoded FluSurv JSON object for the given location."""
127
- return fetch_json (
125
+ def fetch_flusurv_location (location , seasonids ):
126
+ """Return FluSurv JSON object for the given location."""
127
+ location_code = location_to_code [location ]
128
+
129
+ result = fetch_json (
128
130
"PostPhase03DataTool" ,
129
131
{
130
132
"appversion" : "Public" ,
131
133
"key" : "getdata" ,
132
- "injson" : [{
133
- "networkid" : location_code [0 ],
134
- "cacthmentid" : location_code [1 ],
135
- "seasonid" : seasonid
136
- }],
134
+ "injson" : [
135
+ {
136
+ "networkid" : location_code [0 ],
137
+ "catchmentid" : location_code [1 ],
138
+ "seasonid" : elem ,
139
+ } for elem in seasonids ],
137
140
},
138
141
)
139
142
140
- def fetch_flusurv_object ():
141
- """Return raw FluSurv JSON object for all locations."""
143
+ # If no data is returned (a given seasonid is not reported,
144
+ # location codes are invalid, etc), the API returns a JSON like:
145
+ # {
146
+ # 'default_data': {
147
+ # 'response': 'No Data'
148
+ # }
149
+ # }
150
+ #
151
+ # If data is returned, then data["default_data"] is a list
152
+ # and data["default_data"]["response"] doesn't exist.
153
+ assert isinstance (result ["default_data" ], list ) and len (result ["default_data" ]) > 0 , \
154
+ f"Data was not correctly returned from the API for { location } "
155
+ return result
156
+
157
+
158
+ def fetch_flusurv_metadata ():
159
+ """Return FluSurv JSON metadata object."""
142
160
return fetch_json (
143
161
"PostPhase03DataTool" ,
144
162
{"appversion" : "Public" , "key" : "" , "injson" : []}
@@ -155,12 +173,13 @@ def mmwrid_to_epiweek(mmwrid):
155
173
return epiweek_200340 .add_weeks (mmwrid - mmwrid_200340 ).get_ew ()
156
174
157
175
158
- def reformat_to_nested (data ):
176
+ def group_by_epiweek (data ):
159
177
"""
160
- Convert the default data object into a dictionary grouped by location and epiweek
178
+ Convert default data for a single location into an epiweek-grouped dictionary
161
179
162
180
Args:
163
- A GRASP API response object, as fetched with 'fetch_flusurv_object()'
181
+ data: The "default_data" element of a GRASP API response object,
182
+ as fetched with 'fetch_flusurv_location' or `fetch_flusurv_metadata`
164
183
165
184
Returns a dictionary of the format
166
185
{
@@ -176,21 +195,22 @@ def reformat_to_nested(data):
176
195
...
177
196
}
178
197
"""
198
+ data = data ["default_data" ]
199
+
179
200
# Sanity check the input. We expect to see some epiweeks
180
- if len (data [ "default_data" ] ) == 0 :
201
+ if len (data ) == 0 :
181
202
raise Exception ("no data found" )
182
203
183
- id_label_map = make_id_label_map (data )
204
+ id_label_map = make_id_label_map ()
184
205
185
206
# Create output object
186
- # First layer of keys is locations . Second layer of keys is epiweeks.
187
- # Third layer of keys is groups (by id, not age in years, sex abbr, etc).
207
+ # First layer of keys is epiweeks . Second layer of keys is groups
208
+ # (by id, not age in years, sex abbr, etc).
188
209
#
189
210
# If a top-level key doesn't already exist, create a new empty dict.
190
- # If a secondary key doesn't already exist, create a new empty dict.
191
- # If a tertiary key doesn't already exist, create a new key with a
211
+ # If a secondary key doesn't already exist, create a new key with a
192
212
# default value of None if not provided.
193
- data_out = defaultdict (lambda : defaultdict (lambda : defaultdict ( lambda : None ) ))
213
+ data_out = defaultdict (lambda : defaultdict (lambda : None ))
194
214
195
215
# data["default_data"] is a list of dictionaries, with the format
196
216
# [
@@ -199,66 +219,62 @@ def reformat_to_nested(data):
199
219
# {'networkid': 1, 'catchmentid': 22, 'seasonid': 49, 'ageid': 0, 'sexid': 0, 'raceid': 1, 'rate': 20.6, 'weeklyrate': 0.1, 'mmwrid': 2516},
200
220
# ...
201
221
# ]
202
- for obs in data [ "default_data" ] :
222
+ for obs in data :
203
223
epiweek = mmwrid_to_epiweek (obs ["mmwrid" ])
204
- location = code_to_location [(obs ["networkid" ], obs ["catchmentid" ])]
205
224
groupname = groupids_to_name (
206
225
ageid = obs ["ageid" ], sexid = obs ["sexid" ], raceid = obs ["raceid" ],
207
226
id_label_map = id_label_map
208
227
)
209
228
210
229
rate = obs ["weeklyrate" ]
211
- prev_rate = data_out [location ][ epiweek ][groupname ]
230
+ prev_rate = data_out [epiweek ][groupname ]
212
231
if prev_rate is None :
213
- # This is the first time to see a rate for this location- epiweek-
214
- # group combo
215
- data_out [location ][ epiweek ][groupname ] = rate
232
+ # This is the first time to see a rate for this epiweek-group
233
+ # combo
234
+ data_out [epiweek ][groupname ] = rate
216
235
elif prev_rate != rate :
217
236
# Skip and warn; a different rate was already found for this
218
- # location- epiweek-group combo
219
- warn ((f"warning: Multiple rates seen for { location } { epiweek } "
237
+ # epiweek-group combo
238
+ warn ((f"warning: Multiple rates seen for { epiweek } "
220
239
f"{ groupname } , but previous value { prev_rate } does not "
221
240
f"equal new value { rate } . Using the first value." ))
222
241
223
242
# Sanity check the input. We expect to have populated our dictionary
224
243
if len (data_out .keys ()) == 0 :
225
244
raise Exception ("no data loaded" )
226
245
227
- print (f"found data for { len (data_out .keys ())} locations" )
228
- # Just check one location to avoid iterating through the entire
229
- # dictionary.
230
- print (f"found data for { len (data_out [location ].keys ())} epiweeks for { location } " )
246
+ print (f"found data for { len (data_out .keys ())} epiweeks" )
231
247
232
248
return data_out
233
249
234
250
235
- def get_data (location_code ):
251
+ def get_data (location , seasonids ):
236
252
"""
237
253
Fetch and parse flu data for the given location.
238
254
239
255
This method performs the following operations:
240
- - fetches FluSurv data from CDC
256
+ - filters location-specific FluSurv data from CDC API response object
241
257
- extracts and returns hospitalization rates
242
258
"""
243
-
244
259
# fetch
245
260
print ("[fetching flusurv data...]" )
246
- data_in = fetch_flusurv_location (location_code )
261
+ data_in = fetch_flusurv_location (location , seasonids )
247
262
248
263
# extract
249
- print ("[extracting values ...]" )
250
- data_out = reformat_to_nested (data_in )
264
+ print ("[reformatting flusurv result ...]" )
265
+ data_out = group_by_epiweek (data_in )
251
266
252
267
# return
253
- print ("[scraped successfully]" )
268
+ print (f"[ successfully fetched data for { location } ]" )
254
269
return data_out
255
270
256
271
257
272
def get_current_issue (data ):
258
273
"""
259
274
Extract the current issue from the FluSurv API result.
260
275
261
- data: dictionary representing a JSON response from the FluSurv API
276
+ Args:
277
+ data: dictionary representing a JSON response from the FluSurv API
262
278
"""
263
279
# extract
264
280
date = datetime .strptime (data ["loaddatetime" ], "%b %d, %Y" )
@@ -267,8 +283,10 @@ def get_current_issue(data):
267
283
return EpiDate (date .year , date .month , date .day ).get_ew ()
268
284
269
285
270
- def make_id_label_map (data ):
286
+ def make_id_label_map ():
271
287
"""Create a map from valueid to group description"""
288
+ data = fetch_flusurv_metadata ()
289
+
272
290
id_to_label = defaultdict (lambda : defaultdict (lambda : None ))
273
291
for group in data ["master_lookup" ]:
274
292
# Skip "overall" group
0 commit comments