1
1
"""Integration tests for the `covidcast` endpoint."""
2
2
3
3
# standard library
4
- import json
4
+ from typing import Callable
5
5
import unittest
6
6
7
7
# third party
10
10
11
11
# first party
12
12
from delphi_utils import Nans
13
+ from delphi .epidata .acquisition .covidcast .covidcast_row import CovidcastRow
13
14
from delphi .epidata .acquisition .covidcast .test_utils import CovidcastBase
14
15
15
16
# use the local instance of the Epidata API
16
17
BASE_URL = 'http://delphi_web_epidata/epidata/api.php'
17
-
18
-
18
+ IGNORE_FIELDS = ["id" , "direction_updated_timestamp" , "value_updated_timestamp" , "source" , "time_type" , "geo_type" ]
19
19
20
20
class CovidcastTests (CovidcastBase ):
21
21
"""Tests the `covidcast` endpoint."""
@@ -24,64 +24,62 @@ def localSetUp(self):
24
24
"""Perform per-test setup."""
25
25
self ._db ._cursor .execute ('update covidcast_meta_cache set timestamp = 0, epidata = "[]"' )
26
26
27
- def request_based_on_row (self , row , extract_response = lambda x : x .json (), ** kwargs ):
27
+ def request_based_on_row (self , row : CovidcastRow , extract_response : Callable = lambda x : x .json (), ** kwargs ):
28
28
params = self .params_from_row (row , endpoint = 'covidcast' , ** kwargs )
29
29
response = requests .get (BASE_URL , params = params )
30
30
response .raise_for_status ()
31
31
response = extract_response (response )
32
32
33
- expected = self .expected_from_row (row )
34
-
35
- return response , expected
33
+ return response
36
34
37
35
def _insert_placeholder_set_one (self ):
38
- row , settings = self . _make_placeholder_row ()
36
+ row = CovidcastRow . make_default_row ()
39
37
self ._insert_rows ([row ])
40
38
return row
41
39
42
40
def _insert_placeholder_set_two (self ):
43
41
rows = [
44
- self . _make_placeholder_row (geo_type = 'county' , geo_value = str (i )* 5 , value = i * 1. , stderr = i * 10. , sample_size = i * 100. )[ 0 ]
42
+ CovidcastRow . make_default_row (geo_type = 'county' , geo_value = str (i )* 5 , value = i * 1. , stderr = i * 10. , sample_size = i * 100. )
45
43
for i in [1 , 2 , 3 ]
46
44
] + [
47
45
# geo value intended to overlap with counties above
48
- self . _make_placeholder_row (geo_type = 'msa' , geo_value = str (i - 3 )* 5 , value = i * 1. , stderr = i * 10. , sample_size = i * 100. )[ 0 ]
46
+ CovidcastRow . make_default_row (geo_type = 'msa' , geo_value = str (i - 3 )* 5 , value = i * 1. , stderr = i * 10. , sample_size = i * 100. )
49
47
for i in [4 , 5 , 6 ]
50
48
]
51
49
self ._insert_rows (rows )
52
50
return rows
53
51
54
52
def _insert_placeholder_set_three (self ):
55
53
rows = [
56
- self . _make_placeholder_row (geo_type = 'county' , geo_value = '11111' , time_value = 2000_01_01 + i , value = i * 1. , stderr = i * 10. , sample_size = i * 100. , issue = 2000_01_03 , lag = 2 - i )[ 0 ]
54
+ CovidcastRow . make_default_row (geo_type = 'county' , geo_value = '11111' , time_value = 2000_01_01 + i , value = i * 1. , stderr = i * 10. , sample_size = i * 100. , issue = 2000_01_03 , lag = 2 - i )
57
55
for i in [1 , 2 , 3 ]
58
56
] + [
59
57
# time value intended to overlap with 11111 above, with disjoint geo values
60
- self . _make_placeholder_row (geo_type = 'county' , geo_value = str (i )* 5 , time_value = 2000_01_01 + i - 3 , value = i * 1. , stderr = i * 10. , sample_size = i * 100. , issue = 2000_01_03 , lag = 5 - i )[ 0 ]
58
+ CovidcastRow . make_default_row (geo_type = 'county' , geo_value = str (i )* 5 , time_value = 2000_01_01 + i - 3 , value = i * 1. , stderr = i * 10. , sample_size = i * 100. , issue = 2000_01_03 , lag = 5 - i )
61
59
for i in [4 , 5 , 6 ]
62
60
]
63
61
self ._insert_rows (rows )
64
62
return rows
65
63
66
64
def _insert_placeholder_set_four (self ):
67
65
rows = [
68
- self . _make_placeholder_row (source = 'src1' , signal = str (i )* 5 , value = i * 1. , stderr = i * 10. , sample_size = i * 100. )[ 0 ]
66
+ CovidcastRow . make_default_row (source = 'src1' , signal = str (i )* 5 , value = i * 1. , stderr = i * 10. , sample_size = i * 100. )
69
67
for i in [1 , 2 , 3 ]
70
68
] + [
71
69
# signal intended to overlap with the signal above
72
- self . _make_placeholder_row (source = 'src2' , signal = str (i - 3 )* 5 , value = i * 1. , stderr = i * 10. , sample_size = i * 100. )[ 0 ]
70
+ CovidcastRow . make_default_row (source = 'src2' , signal = str (i - 3 )* 5 , value = i * 1. , stderr = i * 10. , sample_size = i * 100. )
73
71
for i in [4 , 5 , 6 ]
74
72
]
75
73
self ._insert_rows (rows )
76
74
return rows
77
75
78
76
def _insert_placeholder_set_five (self ):
79
77
rows = [
80
- CovidcastRow (time_value = 2000_01_01 , value = i * 1. , stderr = i * 10. , sample_size = i * 100. , issue = 2000_01_03 + i )
78
+ CovidcastRow . make_default_row (time_value = 2000_01_01 , value = i * 1. , stderr = i * 10. , sample_size = i * 100. , issue = 2000_01_03 + i )
81
79
for i in [1 , 2 , 3 ]
82
80
] + [
83
81
# different time_values, same issues
84
- CovidcastRow (time_value = 2000_01_01 + i - 3 , value = i * 1. , stderr = i * 10. , sample_size = i * 100. , issue = 2000_01_03 + i - 3 )
82
+ CovidcastRow . make_default_row (time_value = 2000_01_01 + i - 3 , value = i * 1. , stderr = i * 10. , sample_size = i * 100. , issue = 2000_01_03 + i - 3 )
85
83
for i in [4 , 5 , 6 ]
86
84
]
87
85
self ._insert_rows (rows )
@@ -94,10 +92,13 @@ def test_round_trip(self):
94
92
row = self ._insert_placeholder_set_one ()
95
93
96
94
# make the request
97
- response , expected = self .request_based_on_row (row )
95
+ response = self .request_based_on_row (row )
96
+
97
+ expected = [row .as_dict (ignore_fields = IGNORE_FIELDS )]
98
+
98
99
self .assertEqual (response , {
99
100
'result' : 1 ,
100
- 'epidata' : [ expected ] ,
101
+ 'epidata' : expected ,
101
102
'message' : 'success' ,
102
103
})
103
104
@@ -154,32 +155,25 @@ def test_csv_format(self):
154
155
155
156
# make the request
156
157
# NB 'format' is a Python reserved word
157
- response , _ = self .request_based_on_row (
158
+ response = self .request_based_on_row (
158
159
row ,
159
160
extract_response = lambda resp : resp .text ,
160
161
** {'format' :'csv' }
161
162
)
162
- expected_response = (
163
- "geo_value,signal,time_value,direction,issue,lag,missing_value," +
164
- "missing_stderr,missing_sample_size,value,stderr,sample_size\n " +
165
- "," .join ("" if x is None else str (x ) for x in [
166
- row .geo_value ,
167
- row .signal ,
168
- row .time_value ,
169
- row .direction ,
170
- row .issue ,
171
- row .lag ,
172
- row .missing_value ,
173
- row .missing_stderr ,
174
- row .missing_sample_size ,
175
- row .value ,
176
- row .stderr ,
177
- row .sample_size
178
- ]) + "\n "
163
+
164
+ # TODO: This is a mess because of api.php.
165
+ column_order = [
166
+ "geo_value" , "signal" , "time_value" , "direction" , "issue" , "lag" , "missing_value" ,
167
+ "missing_stderr" , "missing_sample_size" , "value" , "stderr" , "sample_size"
168
+ ]
169
+ expected = (
170
+ row .api_compatibility_row_df
171
+ .assign (direction = None )
172
+ .to_csv (columns = column_order , index = False )
179
173
)
180
174
181
175
# assert that the right data came back
182
- self .assertEqual (response , expected_response )
176
+ self .assertEqual (response , expected )
183
177
184
178
def test_raw_json_format (self ):
185
179
"""Test generate raw json data."""
@@ -188,10 +182,12 @@ def test_raw_json_format(self):
188
182
row = self ._insert_placeholder_set_one ()
189
183
190
184
# make the request
191
- response , expected = self .request_based_on_row (row , ** {'format' :'json' })
185
+ response = self .request_based_on_row (row , ** {'format' :'json' })
186
+
187
+ expected = [row .as_dict (ignore_fields = IGNORE_FIELDS )]
192
188
193
189
# assert that the right data came back
194
- self .assertEqual (response , [ expected ] )
190
+ self .assertEqual (response , expected )
195
191
196
192
def test_fields (self ):
197
193
"""Test fields parameter"""
@@ -200,7 +196,9 @@ def test_fields(self):
200
196
row = self ._insert_placeholder_set_one ()
201
197
202
198
# limit fields
203
- response , expected = self .request_based_on_row (row , fields = 'time_value,geo_value' )
199
+ response = self .request_based_on_row (row , fields = 'time_value,geo_value' )
200
+
201
+ expected = row .as_dict (ignore_fields = IGNORE_FIELDS )
204
202
expected_all = {
205
203
'result' : 1 ,
206
204
'epidata' : [{
@@ -213,15 +211,14 @@ def test_fields(self):
213
211
self .assertEqual (response , expected_all )
214
212
215
213
# limit using invalid fields
216
- response , _ = self .request_based_on_row (row , fields = 'time_value,geo_value,doesnt_exist' )
214
+ response = self .request_based_on_row (row , fields = 'time_value,geo_value,doesnt_exist' )
217
215
218
216
# assert that the right data came back (only valid fields)
219
217
self .assertEqual (response , expected_all )
220
218
221
219
222
220
# limit exclude fields: exclude all except time_value and geo_value
223
-
224
- response , _ = self .request_based_on_row (row , fields = (
221
+ response = self .request_based_on_row (row , fields = (
225
222
'-value,-stderr,-sample_size,-direction,-issue,-lag,-signal,' +
226
223
'-missing_value,-missing_stderr,-missing_sample_size'
227
224
))
@@ -234,18 +231,15 @@ def test_location_wildcard(self):
234
231
235
232
# insert placeholder data
236
233
rows = self ._insert_placeholder_set_two ()
237
- expected_counties = [
238
- self .expected_from_row (r ) for r in rows [:3 ]
239
- ]
240
-
234
+ expected = [row .as_dict (ignore_fields = IGNORE_FIELDS ) for row in rows [:3 ]]
241
235
# make the request
242
- response , _ = self .request_based_on_row (rows [0 ], geo_value = "*" )
236
+ response = self .request_based_on_row (rows [0 ], geo_value = "*" )
243
237
244
238
self .maxDiff = None
245
239
# assert that the right data came back
246
240
self .assertEqual (response , {
247
241
'result' : 1 ,
248
- 'epidata' : expected_counties ,
242
+ 'epidata' : expected ,
249
243
'message' : 'success' ,
250
244
})
251
245
@@ -290,12 +284,10 @@ def test_signal_wildcard(self):
290
284
291
285
# insert placeholder data
292
286
rows = self ._insert_placeholder_set_four ()
293
- expected_signals = [
294
- self .expected_from_row (r ) for r in rows [:3 ]
295
- ]
287
+ expected_signals = [row .as_dict (ignore_fields = IGNORE_FIELDS ) for row in rows [:3 ]]
296
288
297
289
# make the request
298
- response , _ = self .request_based_on_row (rows [0 ], signals = "*" )
290
+ response = self .request_based_on_row (rows [0 ], signals = "*" )
299
291
300
292
self .maxDiff = None
301
293
# assert that the right data came back
@@ -310,35 +302,33 @@ def test_geo_value(self):
310
302
311
303
# insert placeholder data
312
304
rows = self ._insert_placeholder_set_two ()
313
- expected_counties = [
314
- self .expected_from_row (r ) for r in rows [:3 ]
315
- ]
305
+ expected = [row .as_dict (ignore_fields = IGNORE_FIELDS ) for row in rows [:3 ]]
316
306
317
307
def fetch (geo_value ):
318
308
# make the request
319
- response , _ = self .request_based_on_row (rows [0 ], geo_value = geo_value )
309
+ response = self .request_based_on_row (rows [0 ], geo_value = geo_value )
320
310
321
311
return response
322
312
323
313
# test fetch a specific region
324
314
r = fetch ('11111' )
325
315
self .assertEqual (r ['message' ], 'success' )
326
- self .assertEqual (r ['epidata' ], [ expected_counties [ 0 ] ])
316
+ self .assertEqual (r ['epidata' ], expected [ 0 : 1 ])
327
317
# test fetch a specific yet not existing region
328
318
r = fetch ('55555' )
329
319
self .assertEqual (r ['message' ], 'no results' )
330
320
# test fetch multiple regions
331
321
r = fetch ('11111,22222' )
332
322
self .assertEqual (r ['message' ], 'success' )
333
- self .assertEqual (r ['epidata' ], [ expected_counties [ 0 ], expected_counties [ 1 ] ])
323
+ self .assertEqual (r ['epidata' ], expected [ 0 : 2 ])
334
324
# test fetch multiple noncontiguous regions
335
325
r = fetch ('11111,33333' )
336
326
self .assertEqual (r ['message' ], 'success' )
337
- self .assertEqual (r ['epidata' ], [expected_counties [0 ], expected_counties [2 ]])
327
+ self .assertEqual (r ['epidata' ], [expected [0 ], expected [2 ]])
338
328
# test fetch multiple regions but one is not existing
339
329
r = fetch ('11111,55555' )
340
330
self .assertEqual (r ['message' ], 'success' )
341
- self .assertEqual (r ['epidata' ], [ expected_counties [ 0 ] ])
331
+ self .assertEqual (r ['epidata' ], expected [ 0 : 1 ])
342
332
# test fetch empty region
343
333
r = fetch ('' )
344
334
self .assertEqual (r ['message' ], 'no results' )
@@ -348,12 +338,10 @@ def test_location_timeline(self):
348
338
349
339
# insert placeholder data
350
340
rows = self ._insert_placeholder_set_three ()
351
- expected_timeseries = [
352
- self .expected_from_row (r ) for r in rows [:3 ]
353
- ]
341
+ expected_timeseries = [row .as_dict (ignore_fields = IGNORE_FIELDS ) for row in rows [:3 ]]
354
342
355
343
# make the request
356
- response , _ = self .request_based_on_row (rows [0 ], time_values = '20000101-20000105' )
344
+ response = self .request_based_on_row (rows [0 ], time_values = '20000101-20000105' )
357
345
358
346
# assert that the right data came back
359
347
self .assertEqual (response , {
@@ -379,15 +367,16 @@ def test_unique_key_constraint(self):
379
367
def test_nullable_columns (self ):
380
368
"""Missing values should be surfaced as null."""
381
369
382
- row , _ = self . _make_placeholder_row (
370
+ row = CovidcastRow . make_default_row (
383
371
stderr = None , sample_size = None ,
384
372
missing_stderr = Nans .OTHER .value , missing_sample_size = Nans .OTHER .value
385
373
)
386
374
self ._insert_rows ([row ])
387
375
388
376
# make the request
389
- response , expected = self .request_based_on_row (row )
390
- expected .update (stderr = None , sample_size = None )
377
+ response = self .request_based_on_row (row )
378
+ expected = row .as_dict (ignore_fields = IGNORE_FIELDS )
379
+ # expected.update(stderr=None, sample_size=None)
391
380
392
381
# assert that the right data came back
393
382
self .assertEqual (response , {
@@ -401,18 +390,19 @@ def test_temporal_partitioning(self):
401
390
402
391
# insert placeholder data
403
392
rows = [
404
- self . _make_placeholder_row (time_type = tt )[ 0 ]
393
+ CovidcastRow . make_default_row (time_type = tt )
405
394
for tt in "hour day week month year" .split ()
406
395
]
407
396
self ._insert_rows (rows )
408
397
409
398
# make the request
410
- response , expected = self .request_based_on_row (rows [1 ], time_values = "0-99999999" )
399
+ response = self .request_based_on_row (rows [1 ], time_values = "20000101-30010201" )
400
+ expected = [rows [1 ].as_dict (ignore_fields = IGNORE_FIELDS )]
411
401
412
402
# assert that the right data came back
413
403
self .assertEqual (response , {
414
404
'result' : 1 ,
415
- 'epidata' : [ expected ] ,
405
+ 'epidata' : expected ,
416
406
'message' : 'success' ,
417
407
})
418
408
@@ -423,37 +413,37 @@ def test_date_formats(self):
423
413
rows = self ._insert_placeholder_set_three ()
424
414
425
415
# make the request
426
- response , expected = self .request_based_on_row (rows [0 ], time_values = "20000102" , geo_value = "*" )
416
+ response = self .request_based_on_row (rows [0 ], time_values = "20000102" , geo_value = "*" )
427
417
428
418
# assert that the right data came back
429
419
self .assertEqual (len (response ['epidata' ]), 2 )
430
420
431
421
# make the request
432
- response , expected = self .request_based_on_row (rows [0 ], time_values = "2000-01-02" , geo_value = "*" )
422
+ response = self .request_based_on_row (rows [0 ], time_values = "2000-01-02" , geo_value = "*" )
433
423
434
424
# assert that the right data came back
435
425
self .assertEqual (len (response ['epidata' ]), 2 )
436
426
437
427
# make the request
438
- response , expected = self .request_based_on_row (rows [0 ], time_values = "20000102,20000103" , geo_value = "*" )
428
+ response = self .request_based_on_row (rows [0 ], time_values = "20000102,20000103" , geo_value = "*" )
439
429
440
430
# assert that the right data came back
441
- self .assertEqual (len (response ['epidata' ]), 4 )
431
+ self .assertEqual (len (response ['epidata' ]), 2 * 2 )
442
432
443
433
# make the request
444
- response , expected = self .request_based_on_row (rows [0 ], time_values = "2000-01-02,2000-01-03" , geo_value = "*" )
434
+ response = self .request_based_on_row (rows [0 ], time_values = "2000-01-02,2000-01-03" , geo_value = "*" )
445
435
446
436
# assert that the right data came back
447
- self .assertEqual (len (response ['epidata' ]), 4 )
437
+ self .assertEqual (len (response ['epidata' ]), 2 * 2 )
448
438
449
439
# make the request
450
- response , expected = self .request_based_on_row (rows [0 ], time_values = "20000102-20000104" , geo_value = "*" )
440
+ response = self .request_based_on_row (rows [0 ], time_values = "20000102-20000104" , geo_value = "*" )
451
441
452
442
# assert that the right data came back
453
- self .assertEqual (len (response ['epidata' ]), 6 )
443
+ self .assertEqual (len (response ['epidata' ]), 2 * 3 )
454
444
455
445
# make the request
456
- response , expected = self .request_based_on_row (rows [0 ], time_values = "2000-01-02:2000-01-04" , geo_value = "*" )
446
+ response = self .request_based_on_row (rows [0 ], time_values = "2000-01-02:2000-01-04" , geo_value = "*" )
457
447
458
448
# assert that the right data came back
459
- self .assertEqual (len (response ['epidata' ]), 6 )
449
+ self .assertEqual (len (response ['epidata' ]), 2 * 3 )
0 commit comments