Skip to content

Commit 8603447

Browse files
authored
Merge pull request #1095 from cmu-delphi/leonlu2/error_on_geo_values
Leonlu2/error on geo values
2 parents e54eb98 + e2d12ee commit 8603447

File tree

9 files changed

+96
-72
lines changed

9 files changed

+96
-72
lines changed
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
geo_id,value,stderr,sample_size,issue,time_value,geo_type,signal,source
2-
d_nonlatest,0,0,0,1,0,county,sig,src
3-
d_latest, 0,0,0,3,0,county,sig,src
4-
d_justone, 0,0,0,1,0,county,sig,src
2+
d_nonlatest,0,0,0,1,0,msa,sig,src
3+
d_latest, 0,0,0,3,0,msa,sig,src
4+
d_justone, 0,0,0,1,0,msa,sig,src

integrations/client/test_delphi_epidata.py

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,10 @@
1212
# third party
1313
import delphi.operations.secrets as secrets
1414
from delphi.epidata.acquisition.covidcast.covidcast_meta_cache_updater import main as update_covidcast_meta_cache
15-
from delphi.epidata.acquisition.covidcast.test_utils import CovidcastBase, CovidcastTestRow
15+
from delphi.epidata.acquisition.covidcast.test_utils import CovidcastBase, CovidcastTestRow, FIPS, MSA
1616
from delphi.epidata.client.delphi_epidata import Epidata
1717
from delphi_utils import Nans
1818

19-
2019
# py3tester coverage target
2120
__test_target__ = 'delphi.epidata.client.delphi_epidata'
2221
# all the Nans we use here are just one value, so this is a shortcut to it:
@@ -219,10 +218,10 @@ def test_geo_value(self):
219218
# insert placeholder data: three counties, three MSAs
220219
N = 3
221220
rows = [
222-
CovidcastTestRow.make_default_row(geo_type="county", geo_value=str(i)*5, value=i)
221+
CovidcastTestRow.make_default_row(geo_type="fips", geo_value=FIPS[i], value=i)
223222
for i in range(N)
224223
] + [
225-
CovidcastTestRow.make_default_row(geo_type="msa", geo_value=str(i)*5, value=i*10)
224+
CovidcastTestRow.make_default_row(geo_type="msa", geo_value=MSA[i], value=i*10)
226225
for i in range(N)
227226
]
228227
self._insert_rows(rows)
@@ -241,26 +240,28 @@ def fetch(geo):
241240
self.assertEqual(request['message'], 'success')
242241
self.assertEqual(request['epidata'], counties)
243242
# test fetch a specific region
244-
request = fetch('11111')
243+
request = fetch([FIPS[0]])
245244
self.assertEqual(request['message'], 'success')
246-
self.assertEqual(request['epidata'], [counties[1]])
245+
self.assertEqual(request['epidata'], [counties[0]])
247246
# test fetch a specific yet not existing region
248247
request = fetch('55555')
249-
self.assertEqual(request['message'], 'no results')
248+
self.assertEqual(request['message'], 'Invalid geo_value(s) 55555 for the requested geo_type fips')
250249
# test fetch a multiple regions
251-
request = fetch(['11111', '22222'])
250+
request = fetch([FIPS[0], FIPS[1]])
252251
self.assertEqual(request['message'], 'success')
253-
self.assertEqual(request['epidata'], [counties[1], counties[2]])
252+
self.assertEqual(request['epidata'], [counties[0], counties[1]])
254253
# test fetch a multiple regions in another variant
255-
request = fetch(['00000', '22222'])
254+
request = fetch([FIPS[0], FIPS[2]])
256255
self.assertEqual(request['message'], 'success')
257256
self.assertEqual(request['epidata'], [counties[0], counties[2]])
258257
# test fetch a multiple regions but one is not existing
259-
request = fetch(['11111', '55555'])
260-
self.assertEqual(request['message'], 'success')
261-
self.assertEqual(request['epidata'], [counties[1]])
258+
request = fetch([FIPS[0], '55555'])
259+
self.assertEqual(request['message'], 'Invalid geo_value(s) 55555 for the requested geo_type fips')
262260
# test fetch a multiple regions but specify no region
263261
request = fetch([])
262+
self.assertEqual(request['message'], 'geo_value is empty for the requested geo_type fips!')
263+
# test fetch a region with no results
264+
request = fetch([FIPS[3]])
264265
self.assertEqual(request['message'], 'no results')
265266

266267
def test_covidcast_meta(self):
@@ -325,10 +326,10 @@ def test_async_epidata(self):
325326
# insert placeholder data: three counties, three MSAs
326327
N = 3
327328
rows = [
328-
CovidcastTestRow.make_default_row(geo_type="county", geo_value=str(i)*5, value=i)
329+
CovidcastTestRow.make_default_row(geo_type="fips", geo_value=FIPS[i-1], value=i)
329330
for i in range(N)
330331
] + [
331-
CovidcastTestRow.make_default_row(geo_type="msa", geo_value=str(i)*5, value=i*10)
332+
CovidcastTestRow.make_default_row(geo_type="msa", geo_value=MSA[i-1], value=i*10)
332333
for i in range(N)
333334
]
334335
self._insert_rows(rows)

integrations/server/test_covidcast.py

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
# first party
1212
from delphi_utils import Nans
13-
from delphi.epidata.acquisition.covidcast.test_utils import CovidcastBase, CovidcastTestRow
13+
from delphi.epidata.acquisition.covidcast.test_utils import CovidcastBase, CovidcastTestRow, FIPS, MSA
1414
from delphi.epidata.client.delphi_epidata import Epidata
1515

1616
# use the local instance of the Epidata API
@@ -37,23 +37,22 @@ def _insert_placeholder_set_one(self):
3737

3838
def _insert_placeholder_set_two(self):
3939
rows = [
40-
CovidcastTestRow.make_default_row(geo_type='county', geo_value=str(i)*5, value=i*1., stderr=i*10., sample_size=i*100.)
40+
CovidcastTestRow.make_default_row(geo_type='msa', geo_value=MSA[i-1], value=i*1., stderr=i*10., sample_size=i*100.)
4141
for i in [1, 2, 3]
4242
] + [
43-
# geo value intended to overlap with counties above
44-
CovidcastTestRow.make_default_row(geo_type='msa', geo_value=str(i-3)*5, value=i*1., stderr=i*10., sample_size=i*100.)
43+
CovidcastTestRow.make_default_row(geo_type='fips', geo_value=FIPS[i-4], value=i*1., stderr=i*10., sample_size=i*100.)
4544
for i in [4, 5, 6]
4645
]
4746
self._insert_rows(rows)
4847
return rows
4948

5049
def _insert_placeholder_set_three(self):
5150
rows = [
52-
CovidcastTestRow.make_default_row(geo_type='county', geo_value='11111', time_value=2000_01_01+i, value=i*1., stderr=i*10., sample_size=i*100., issue=2000_01_03, lag=2-i)
51+
CovidcastTestRow.make_default_row(time_value=2000_01_01+i, value=i*1., stderr=i*10., sample_size=i*100., issue=2000_01_03, lag=2-i)
5352
for i in [1, 2, 3]
5453
] + [
55-
# time value intended to overlap with 11111 above, with disjoint geo values
56-
CovidcastTestRow.make_default_row(geo_type='county', geo_value=str(i)*5, time_value=2000_01_01+i-3, value=i*1., stderr=i*10., sample_size=i*100., issue=2000_01_03, lag=5-i)
54+
# time value intended to overlap with the time values above, with disjoint geo values
55+
CovidcastTestRow.make_default_row(geo_value=MSA[i-3], time_value=2000_01_01+i-3, value=i*1., stderr=i*10., sample_size=i*100., issue=2000_01_03, lag=5-i)
5756
for i in [4, 5, 6]
5857
]
5958
self._insert_rows(rows)
@@ -295,7 +294,7 @@ def test_signal_wildcard(self):
295294
})
296295

297296
def test_geo_value(self):
298-
"""test different variants of geo types: single, *, multi."""
297+
"""test whether geo values are valid for specific geo types"""
299298

300299
# insert placeholder data
301300
rows = self._insert_placeholder_set_two()
@@ -308,26 +307,28 @@ def fetch(geo_value):
308307
return response
309308

310309
# test fetch a specific region
311-
r = fetch('11111')
310+
r = fetch(MSA[0])
312311
self.assertEqual(r['message'], 'success')
313312
self.assertEqual(r['epidata'], expected[0:1])
314313
# test fetch a specific yet not existing region
315-
r = fetch('55555')
316-
self.assertEqual(r['message'], 'no results')
314+
r = fetch('11111')
315+
self.assertEqual(r['message'], 'Invalid geo_value(s) 11111 for the requested geo_type msa')
317316
# test fetch multiple regions
318-
r = fetch('11111,22222')
317+
r = fetch(f'{MSA[0]},{MSA[1]}')
319318
self.assertEqual(r['message'], 'success')
320319
self.assertEqual(r['epidata'], expected[0:2])
321320
# test fetch multiple noncontiguous regions
322-
r = fetch('11111,33333')
321+
r = fetch(f'{MSA[0]},{MSA[2]}')
323322
self.assertEqual(r['message'], 'success')
324323
self.assertEqual(r['epidata'], [expected[0], expected[2]])
325324
# test fetch multiple regions but one is not existing
326-
r = fetch('11111,55555')
327-
self.assertEqual(r['message'], 'success')
328-
self.assertEqual(r['epidata'], expected[0:1])
325+
r = fetch(f'{MSA[0]},11111')
326+
self.assertEqual(r['message'], 'Invalid geo_value(s) 11111 for the requested geo_type msa')
329327
# test fetch empty region
330328
r = fetch('')
329+
self.assertEqual(r['message'], 'geo_value is empty for the requested geo_type msa!')
330+
# test a region that has no results
331+
r = fetch(MSA[3])
331332
self.assertEqual(r['message'], 'no results')
332333

333334
def test_location_timeline(self):

requirements.api.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
delphi_utils==0.3.6
12
epiweeks==2.1.2
23
Flask==2.2.2
34
itsdangerous<2.1

src/acquisition/covidcast/test_utils.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,11 @@
1414
# all the Nans we use here are just one value, so this is a shortcut to it:
1515
nmv = Nans.NOT_MISSING.value
1616

17+
# TODO replace these real geo_values with fake values, and use patch and mock to mock the return values of
18+
# delphi_utils.geomap.GeoMapper().get_geo_values(geo_type) in parse_geo_sets() of _params.py
19+
20+
FIPS = ['04019', '19143', '29063', '36083'] # Example list of valid FIPS codes as strings
21+
MSA = ['40660', '44180', '48620', '49420'] # Example list of valid MSA codes as strings
1722

1823
class CovidcastTestRow(CovidcastRow):
1924
@staticmethod
@@ -22,9 +27,9 @@ def make_default_row(**kwargs) -> "CovidcastTestRow":
2227
"source": "src",
2328
"signal": "sig",
2429
"time_type": "day",
25-
"geo_type": "county",
30+
"geo_type": "msa",
2631
"time_value": 2020_02_02,
27-
"geo_value": "01234",
32+
"geo_value": MSA[0],
2833
"value": 10.0,
2934
"stderr": 10.0,
3035
"sample_size": 10.0,

src/server/_params.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import re
33
from dataclasses import dataclass
44
from typing import List, Optional, Sequence, Tuple, Union
5+
import delphi_utils
56

67
from flask import request
78

@@ -53,6 +54,17 @@ class GeoSet:
5354
geo_type: str
5455
geo_values: Union[bool, Sequence[str]]
5556

57+
def __init__(self, geo_type: str, geo_values: Union[bool, Sequence[str]]):
58+
if not isinstance(geo_values, bool):
59+
if geo_values == ['']:
60+
raise ValidationFailedException(f"geo_value is empty for the requested geo_type {geo_type}!")
61+
allowed_values = delphi_utils.geomap.GeoMapper().get_geo_values(geo_type)
62+
invalid_values = set(geo_values) - set(allowed_values)
63+
if invalid_values:
64+
raise ValidationFailedException(f"Invalid geo_value(s) {', '.join(invalid_values)} for the requested geo_type {geo_type}")
65+
self.geo_type = geo_type
66+
self.geo_values = geo_values
67+
5668
def matches(self, geo_type: str, geo_value: str) -> bool:
5769
return self.geo_type == geo_type and (self.geo_values is True or (not isinstance(self.geo_values, bool) and geo_value in self.geo_values))
5870

@@ -460,6 +472,7 @@ def parse_source_signal_sets() -> List[SourceSignalSet]:
460472

461473
def parse_geo_sets() -> List[GeoSet]:
462474
geo_type = request.values.get("geo_type")
475+
463476
if geo_type:
464477
# old version
465478
require_any(request, "geo_value", "geo_values", empty=True)

tests/common/test_covidcast_row.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
covidcast_rows_from_args,
1313
transpose_dict,
1414
)
15+
from delphi.epidata.acquisition.covidcast.test_utils import MSA
1516

1617
# py3tester coverage target (equivalent to `import *`)
1718
__test_target__ = 'delphi.epidata.common.covidcast_row'
@@ -22,9 +23,9 @@ class TestCovidcastRows(unittest.TestCase):
2223
"source": ["src"] * 10,
2324
"signal": ["sig_base"] * 5 + ["sig_other"] * 5,
2425
"time_type": ["day"] * 10,
25-
"geo_type": ["county"] * 10,
26+
"geo_type": ["msa"] * 10,
2627
"time_value": [2021_05_01 + i for i in range(5)] * 2,
27-
"geo_value": ["01234"] * 10,
28+
"geo_value": [MSA[0]] * 10,
2829
"value": range(10),
2930
"stderr": [10.0] * 10,
3031
"sample_size": [10.0] * 10,

tests/server/test_params.py

Lines changed: 29 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from delphi.epidata.server._exceptions import (
2929
ValidationFailedException,
3030
)
31+
from delphi.epidata.acquisition.covidcast.test_utils import FIPS, MSA
3132

3233
# py3tester coverage target
3334
__test_target__ = "delphi.epidata.server._params"
@@ -45,19 +46,19 @@ def setUp(self):
4546

4647
def test_geo_set(self):
4748
with self.subTest("*"):
48-
p = GeoSet("hrr", True)
49-
self.assertTrue(p.matches("hrr", "any"))
49+
p = GeoSet("fips", True)
50+
self.assertTrue(p.matches("fips", "any"))
5051
self.assertFalse(p.matches("msa", "any"))
5152
with self.subTest("subset"):
52-
p = GeoSet("hrr", ["a", "b"])
53-
self.assertTrue(p.matches("hrr", "a"))
54-
self.assertTrue(p.matches("hrr", "b"))
55-
self.assertFalse(p.matches("hrr", "c"))
53+
p = GeoSet("fips", [FIPS[0], FIPS[1]])
54+
self.assertTrue(p.matches("fips", FIPS[0]))
55+
self.assertTrue(p.matches("fips", FIPS[1]))
56+
self.assertFalse(p.matches("fips", "c"))
5657
self.assertFalse(p.matches("msa", "any"))
5758
with self.subTest("count"):
5859
self.assertEqual(GeoSet("a", True).count(), inf)
5960
self.assertEqual(GeoSet("a", False).count(), 0)
60-
self.assertEqual(GeoSet("a", ["a", "b"]).count(), 2)
61+
self.assertEqual(GeoSet("fips", [FIPS[0], FIPS[1]]).count(), 2)
6162

6263
def test_source_signal_set(self):
6364
with self.subTest("*"):
@@ -89,43 +90,43 @@ def test_parse_geo_arg(self):
8990
with app.test_request_context("/"):
9091
self.assertEqual(parse_geo_arg(), [])
9192
with self.subTest("single"):
92-
with app.test_request_context("/?geo=state:*"):
93-
self.assertEqual(parse_geo_arg(), [GeoSet("state", True)])
94-
with app.test_request_context("/?geo=state:AK"):
95-
self.assertEqual(parse_geo_arg(), [GeoSet("state", ["ak"])])
93+
with app.test_request_context("/?geo=fips:*"):
94+
self.assertEqual(parse_geo_arg(), [GeoSet("fips", True)])
95+
with app.test_request_context(f"/?geo=fips:{FIPS[0]}"):
96+
self.assertEqual(parse_geo_arg(), [GeoSet("fips", [FIPS[0]])])
9697
with self.subTest("single list"):
97-
with app.test_request_context("/?geo=state:AK,TK"):
98-
self.assertEqual(parse_geo_arg(), [GeoSet("state", ["ak", "tk"])])
98+
with app.test_request_context(f"/?geo=fips:{FIPS[0]},{FIPS[1]}"):
99+
self.assertEqual(parse_geo_arg(), [GeoSet("fips", [FIPS[0], FIPS[1]])])
99100
with self.subTest("multi"):
100-
with app.test_request_context("/?geo=state:*;nation:*"):
101-
self.assertEqual(parse_geo_arg(), [GeoSet("state", True), GeoSet("nation", True)])
102-
with app.test_request_context("/?geo=state:AK;nation:US"):
101+
with app.test_request_context("/?geo=fips:*;msa:*"):
102+
self.assertEqual(parse_geo_arg(), [GeoSet("fips", True), GeoSet("msa", True)])
103+
with app.test_request_context(f"/?geo=fips:{FIPS[0]};msa:{MSA[0]}"):
103104
self.assertEqual(
104105
parse_geo_arg(),
105-
[GeoSet("state", ["ak"]), GeoSet("nation", ["us"])],
106+
[GeoSet("fips", [FIPS[0]]), GeoSet("msa", [MSA[0]])],
106107
)
107-
with app.test_request_context("/?geo=state:AK;state:KY"):
108+
with app.test_request_context(f"/?geo=fips:{FIPS[0]};fips:{FIPS[1]}"):
108109
self.assertEqual(
109110
parse_geo_arg(),
110-
[GeoSet("state", ["ak"]), GeoSet("state", ["ky"])],
111+
[GeoSet("fips", [FIPS[0]]), GeoSet("fips", [FIPS[1]])],
111112
)
112113
with self.subTest("multi list"):
113-
with app.test_request_context("/?geo=state:AK,TK;county:42003,40556"):
114+
with app.test_request_context(f"/?geo=fips:{FIPS[0]},{FIPS[1]};msa:{MSA[0]},{MSA[1]}"):
114115
self.assertEqual(
115116
parse_geo_arg(),
116117
[
117-
GeoSet("state", ["ak", "tk"]),
118-
GeoSet("county", ["42003", "40556"]),
118+
GeoSet("fips", [FIPS[0], FIPS[1]]),
119+
GeoSet("msa", [MSA[0], MSA[1]]),
119120
],
120121
)
121122
with self.subTest("hybrid"):
122-
with app.test_request_context("/?geo=nation:*;state:PA;county:42003,42002"):
123+
with app.test_request_context(f"/?geo=nation:*;fips:{FIPS[0]};msa:{MSA[0]},{MSA[1]}"):
123124
self.assertEqual(
124125
parse_geo_arg(),
125126
[
126127
GeoSet("nation", True),
127-
GeoSet("state", ["pa"]),
128-
GeoSet("county", ["42003", "42002"]),
128+
GeoSet("fips", [FIPS[0]]),
129+
GeoSet("msa", [MSA[0], MSA[1]]),
129130
],
130131
)
131132

@@ -140,10 +141,10 @@ def test_single_parse_geo_arg(self):
140141
with app.test_request_context("/"):
141142
self.assertRaises(ValidationFailedException, parse_single_geo_arg, "geo")
142143
with self.subTest("single"):
143-
with app.test_request_context("/?geo=state:AK"):
144-
self.assertEqual(parse_single_geo_arg("geo"), GeoSet("state", ["ak"]))
144+
with app.test_request_context(f"/?geo=fips:{FIPS[0]}"):
145+
self.assertEqual(parse_single_geo_arg("geo"), GeoSet("fips", [FIPS[0]]))
145146
with self.subTest("single list"):
146-
with app.test_request_context("/?geo=state:AK,TK"):
147+
with app.test_request_context(f"/?geo=fips:{FIPS[0]},{FIPS[1]}"):
147148
self.assertRaises(ValidationFailedException, parse_single_geo_arg, "geo")
148149
with self.subTest("multi"):
149150
with app.test_request_context("/?geo=state:*;nation:*"):

0 commit comments

Comments
 (0)