Skip to content

Commit 868e5e5

Browse files
committed
Server-side compute: add cumulative->incidence
1 parent dcbcc07 commit 868e5e5

File tree

3 files changed

+129
-1
lines changed

3 files changed

+129
-1
lines changed

integrations/server/test_covidcast_endpoints.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,75 @@ def test_basic(self):
191191
expected_values = [float(row.value) for row in rows]
192192
self.assertEqual(out_values, expected_values)
193193

194+
def test_derived_signals(self):
195+
time_value_pairs = [(20200401 + i, i ** 2) for i in range(10)]
196+
rows01 = [CovidcastRow(source="jhu-csse", signal="confirmed_cumulative_num", time_value=time_value, value=value, geo_value="01") for time_value, value in time_value_pairs]
197+
rows02 = [CovidcastRow(source="jhu-csse", signal="confirmed_cumulative_num", time_value=time_value, value=2 * value, geo_value="02") for time_value, value in time_value_pairs]
198+
first = rows01[0]
199+
self._insert_rows(rows01 + rows02)
200+
201+
with self.subTest("diffed signal"):
202+
out = self._fetch("/", signal="jhu-csse:confirmed_incidence_num", geo=first.geo_pair, time="day:*")
203+
assert out['result'] == -2
204+
out = self._fetch("/", signal="jhu-csse:confirmed_incidence_num", geo=first.geo_pair, time="day:20200401-20200410")
205+
self.assertEqual(len(out["epidata"]), len(rows01))
206+
out_values = [row["value"] for row in out["epidata"]]
207+
values = [None] + [value for _, value in time_value_pairs]
208+
expected_values = self._diff_rows(values)
209+
self.assertAlmostEqual(out_values, expected_values)
210+
211+
with self.subTest("diffed signal, multiple geos"):
212+
out = self._fetch("/", signal="jhu-csse:confirmed_incidence_num", geo="county:01,02", time="day:20200401-20200410")
213+
self.assertEqual(len(out["epidata"]), 2*(len(rows01)))
214+
out_values = [row["value"] for row in out["epidata"]]
215+
values1 = [None] + [value for _, value in time_value_pairs]
216+
values2 = [None] + [2 * value for _, value in time_value_pairs]
217+
expected_values = self._diff_rows(values1) + self._diff_rows(values2)
218+
self.assertAlmostEqual(out_values, expected_values)
219+
220+
with self.subTest("diffed signal, multiple geos using geo:*"):
221+
out = self._fetch("/", signal="jhu-csse:confirmed_incidence_num", geo="county:*", time="day:20200401-20200410")
222+
self.assertEqual(len(out["epidata"]), 2*(len(rows01)))
223+
values1 = [None] + [value for _, value in time_value_pairs]
224+
values2 = [None] + [2 * value for _, value in time_value_pairs]
225+
expected_values = self._diff_rows(values1) + self._diff_rows(values2)
226+
self.assertAlmostEqual(out_values, expected_values)
227+
228+
229+
with self.subTest("diffing with time window resizing"):
230+
# should fetch 1 extra day
231+
out = self._fetch("/", signal="jhu-csse:confirmed_incidence_num", geo=first.geo_pair, time="day:20200402-20200410")
232+
self.assertEqual(len(out["epidata"]), len(rows01) - 1)
233+
out_values = [row["value"] for row in out["epidata"]]
234+
values = [value for _, value in time_value_pairs]
235+
expected_values = self._diff_rows(values)
236+
self.assertAlmostEqual(out_values, expected_values)
237+
238+
time_value_pairs = [(20200401 + i, i ** 2) for i in chain(range(10), range(15, 20))]
239+
rows = [CovidcastRow(source="jhu-csse", signal="confirmed_cumulative_num", geo_value="03", time_value=time_value, value=value) for time_value, value in time_value_pairs]
240+
first = rows[0]
241+
self._insert_rows(rows)
242+
243+
with self.subTest("diffing with a time gap"):
244+
# should fetch 1 extra day
245+
out = self._fetch("/", signal="jhu-csse:confirmed_incidence_num", geo=first.geo_pair, time="day:20200401-20200420")
246+
self.assertEqual(len(out["epidata"]), len(rows) + 5)
247+
out_values = [row["value"] for row in out["epidata"]]
248+
values = [None] + [value for _, value in time_value_pairs][:10] + [None] * 5 + [value for _, value in time_value_pairs][10:]
249+
expected_values = self._diff_rows(values)
250+
self.assertAlmostEqual(out_values, expected_values)
251+
252+
253+
def test_compatibility(self):
254+
"""Request at the /api.php endpoint."""
255+
rows = [CovidcastRow(source="src", signal="sig", time_value=20200401 + i, value=i) for i in range(10)]
256+
first = rows[0]
257+
self._insert_rows(rows)
258+
259+
with self.subTest("simple"):
260+
out = self._fetch(is_compatibility=True, source=first.source, signal=first.signal, geo=first.geo_pair, time="day:*")
261+
self.assertEqual(len(out["epidata"]), len(rows))
262+
194263
def test_trend(self):
195264
"""Request a signal the /trend endpoint."""
196265

src/server/endpoints/covidcast_utils/smooth_diff.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,4 +72,15 @@ def generate_row_diffs(rows: Iterable[Dict], nan_fill_value: Number = nan, **kwa
7272
Container for non-shared parameters with other computation functions.
7373
"""
7474
for window in windowed(rows, 2):
75-
yield diffed_row(window)
75+
if all(e is None for e in window):
76+
continue # should only occur if rows is empty
77+
try:
78+
first_value = window[0].get("value", nan_fill_value) if window[0] is not None else nan_fill_value
79+
second_value = window[1].get("value", nan_fill_value) if window[1] is not None else nan_fill_value
80+
value = round(second_value - first_value, 8)
81+
value = float(value) if not isnan(value) else None
82+
except TypeError:
83+
value = None
84+
item = list(filter(lambda e: e is not None, window))[-1].copy() # inherit values of last time stamp
85+
item.update({"value": value, "stderr": None, "sample_size": None, "missing_value": Nans.NOT_MISSING if value is not None else Nans.NOT_APPLICABLE, "missing_stderr": Nans.NOT_APPLICABLE, "missing_sample_size": Nans.NOT_APPLICABLE})
86+
yield item
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
from pandas import DataFrame, date_range
2+
from pandas.testing import assert_frame_equal
3+
from numpy import nan
4+
from itertools import chain
5+
from more_itertools import windowed
6+
7+
from delphi.epidata.server.utils import date_to_time_value, CovidcastRecords
8+
from delphi.epidata.server.endpoints.covidcast_utils.smooth_diff import generate_row_diffs, generate_smooth_rows
9+
10+
11+
class TestStreaming:
12+
13+
14+
def test_generate_row_diffs(self):
15+
# an empty dataframe should return an empty dataframe
16+
data = DataFrame({})
17+
diffs_df = DataFrame.from_records(generate_row_diffs(data.to_dict(orient='records')))
18+
expected_df = DataFrame({})
19+
assert_frame_equal(diffs_df, expected_df)
20+
21+
# a dataframe with a single entry should return a single nan value
22+
data = CovidcastRecords(
23+
time_values=[20210501],
24+
values=[1.0]
25+
).as_dataframe()
26+
diffs_df = DataFrame.from_records(generate_row_diffs(data.to_dict(orient='records')))
27+
expected_df = CovidcastRecords(
28+
time_values=[20210501],
29+
values=[None],
30+
stderrs=[None],
31+
sample_sizes=[None]
32+
).as_dataframe()
33+
assert_frame_equal(diffs_df, expected_df)
34+
35+
data = CovidcastRecords(
36+
time_values=date_range("2021-05-01", "2021-05-10"),
37+
values=chain(range(7), [None, 2., 1.])
38+
).as_dataframe()
39+
40+
# no fill
41+
diffs_df = DataFrame.from_records(generate_row_diffs(data.to_dict(orient='records')))
42+
expected_df = CovidcastRecords(
43+
time_values=date_range("2021-05-02", "2021-05-10"),
44+
values=[1.] * 6 + [None, None, -1.],
45+
stderrs=[None] * 9,
46+
sample_sizes=[None] * 9
47+
).as_dataframe()
48+
assert_frame_equal(diffs_df, expected_df)

0 commit comments

Comments
 (0)