Skip to content

Add CovidcastRow testing util and a few other changes #1044

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Feb 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions integrations/acquisition/covidcast/delete_batch.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
geo_id,value,stderr,sample_size,issue,time_value,geo_type,signal,source
d_nonlatest,0,0,0,1,0,geo,sig,src
d_latest, 0,0,0,3,0,geo,sig,src
d_justone, 0,0,0,1,0,geo,sig,src
d_nonlatest,0,0,0,1,0,county,sig,src
d_latest, 0,0,0,3,0,county,sig,src
d_justone, 0,0,0,1,0,county,sig,src
4 changes: 2 additions & 2 deletions integrations/acquisition/covidcast/test_csv_uploading.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,8 +213,8 @@ def test_uploading(self):
"time_value": [20200419],
"signal": [signal_name],
"direction": [None]})], axis=1).rename(columns=uploader_column_rename)
expected_values_df["missing_value"].iloc[0] = Nans.OTHER
expected_values_df["missing_sample_size"].iloc[0] = Nans.NOT_MISSING
expected_values_df.loc[0, "missing_value"] = Nans.OTHER
expected_values_df.loc[0, "missing_sample_size"] = Nans.NOT_MISSING
expected_values = expected_values_df.to_dict(orient="records")
expected_response = {'result': 1, 'epidata': self.apply_lag(expected_values), 'message': 'success'}

Expand Down
19 changes: 9 additions & 10 deletions integrations/acquisition/covidcast/test_db.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
import unittest

from delphi_utils import Nans
from delphi.epidata.acquisition.covidcast.database import Database, CovidcastRow, DBLoadStateException
from delphi.epidata.acquisition.covidcast.test_utils import CovidcastBase
import delphi.operations.secrets as secrets

from delphi.epidata.acquisition.covidcast.database import DBLoadStateException
from delphi.epidata.acquisition.covidcast.test_utils import CovidcastBase, CovidcastTestRow


# all the Nans we use here are just one value, so this is a shortcut to it:
nmv = Nans.NOT_MISSING.value

class TestTest(CovidcastBase):

def _find_matches_for_row(self, row):
# finds (if existing) row from both history and latest views that matches long-key of provided CovidcastRow
# finds (if existing) row from both history and latest views that matches long-key of provided CovidcastTestRow
cols = "source signal time_type time_value geo_type geo_value issue".split()
results = {}
cur = self._db._cursor
Expand All @@ -31,8 +30,8 @@ def _find_matches_for_row(self, row):

def test_insert_or_update_with_nonempty_load_table(self):
# make rows
a_row = self._make_placeholder_row()[0]
another_row = self._make_placeholder_row(time_value=self.DEFAULT_TIME_VALUE+1, issue=self.DEFAULT_ISSUE+1)[0]
a_row = CovidcastTestRow.make_default_row(time_value=2020_02_02)
another_row = CovidcastTestRow.make_default_row(time_value=2020_02_03, issue=2020_02_03)
# insert one
self._db.insert_or_update_bulk([a_row])
# put something into the load table
Expand Down Expand Up @@ -61,7 +60,7 @@ def test_id_sync(self):
latest_view = 'epimetric_latest_v'

# add a data point
base_row, _ = self._make_placeholder_row()
base_row = CovidcastTestRow.make_default_row()
self._insert_rows([base_row])
# ensure the primary keys match in the latest and history tables
matches = self._find_matches_for_row(base_row)
Expand All @@ -71,7 +70,7 @@ def test_id_sync(self):
old_pk_id = matches[latest_view][pk_column]

# add a reissue for said data point
next_row, _ = self._make_placeholder_row()
next_row = CovidcastTestRow.make_default_row()
next_row.issue += 1
self._insert_rows([next_row])
# ensure the new keys also match
Expand Down
25 changes: 9 additions & 16 deletions integrations/acquisition/covidcast/test_delete_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,10 @@
import unittest
from os import path

# third party
import mysql.connector

# first party
from delphi_utils import Nans
from delphi.epidata.acquisition.covidcast.database import Database, CovidcastRow
import delphi.operations.secrets as secrets
from delphi.epidata.acquisition.covidcast.database import Database
from delphi.epidata.acquisition.covidcast.test_utils import covidcast_rows_from_args

# py3tester coverage target (equivalent to `import *`)
__test_target__ = 'delphi.epidata.acquisition.covidcast.database'
Expand Down Expand Up @@ -56,17 +53,13 @@ def test_delete_from_tuples(self):

def _test_delete_batch(self, cc_deletions):
# load sample data
rows = []
for time_value in [0, 1]:
rows += [
# varying numeric column here (2nd to last) is `issue`
CovidcastRow('src', 'sig', 'day', 'geo', time_value, "d_nonlatest", 0,0,0,0,0,0, 1, 0),
CovidcastRow('src', 'sig', 'day', 'geo', time_value, "d_nonlatest", 0,0,0,0,0,0, 2, 0),
CovidcastRow('src', 'sig', 'day', 'geo', time_value, "d_latest", 0,0,0,0,0,0, 1, 0),
CovidcastRow('src', 'sig', 'day', 'geo', time_value, "d_latest", 0,0,0,0,0,0, 2, 0),
CovidcastRow('src', 'sig', 'day', 'geo', time_value, "d_latest", 0,0,0,0,0,0, 3, 0)
]
rows.append(CovidcastRow('src', 'sig', 'day', 'geo', 0, "d_justone", 0,0,0,0,0,0, 1, 0))
rows = covidcast_rows_from_args(
time_value = [0] * 5 + [1] * 5 + [0],
geo_value = ["d_nonlatest"] * 2 + ["d_latest"] * 3 + ["d_nonlatest"] * 2 + ["d_latest"] * 3 + ["d_justone"],
issue = [1, 2] + [1, 2, 3] + [1, 2] + [1, 2, 3] + [1],
sanitize_fields = True
)

self._db.insert_or_update_bulk(rows)

# delete entries
Expand Down
119 changes: 61 additions & 58 deletions integrations/client/test_delphi_epidata.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,26 @@
"""Integration tests for delphi_epidata.py."""

# standard library
import unittest
import time
from unittest.mock import patch, MagicMock
from json import JSONDecodeError
from unittest.mock import MagicMock, patch

# third party
from aiohttp.client_exceptions import ClientResponseError
import mysql.connector
# first party
import pytest
from aiohttp.client_exceptions import ClientResponseError

# first party
from delphi_utils import Nans
from delphi.epidata.client.delphi_epidata import Epidata
from delphi.epidata.acquisition.covidcast.database import Database, CovidcastRow
from delphi.epidata.acquisition.covidcast.covidcast_meta_cache_updater import main as update_covidcast_meta_cache
from delphi.epidata.acquisition.covidcast.test_utils import CovidcastBase
# third party
import delphi.operations.secrets as secrets
from delphi.epidata.acquisition.covidcast.covidcast_meta_cache_updater import main as update_covidcast_meta_cache
from delphi.epidata.acquisition.covidcast.test_utils import CovidcastBase, CovidcastTestRow
from delphi.epidata.client.delphi_epidata import Epidata
from delphi_utils import Nans


# py3tester coverage target
__test_target__ = 'delphi.epidata.client.delphi_epidata'
# all the Nans we use here are just one value, so this is a shortcut to it:
nmv = Nans.NOT_MISSING.value

def fake_epidata_endpoint(func):
"""This can be used as a decorator to enable a bogus Epidata endpoint to return 404 responses."""
Expand All @@ -30,9 +30,6 @@ def wrapper(*args):
Epidata.BASE_URL = 'http://delphi_web_epidata/epidata/api.php'
return wrapper

# all the Nans we use here are just one value, so this is a shortcut to it:
nmv = Nans.NOT_MISSING.value

class DelphiEpidataPythonClientTests(CovidcastBase):
"""Tests the Python client."""

Expand All @@ -54,13 +51,11 @@ def test_covidcast(self):

# insert placeholder data: three issues of one signal, one issue of another
rows = [
self._make_placeholder_row(issue=self.DEFAULT_ISSUE + i, value=i, lag=i)[0]
CovidcastTestRow.make_default_row(issue=2020_02_02 + i, value=i, lag=i)
for i in range(3)
]
row_latest_issue = rows[-1]
rows.append(
self._make_placeholder_row(signal="sig2")[0]
)
rows.append(CovidcastTestRow.make_default_row(signal="sig2"))
self._insert_rows(rows)

with self.subTest(name='request two signals'):
Expand All @@ -70,10 +65,11 @@ def test_covidcast(self):
)

expected = [
self.expected_from_row(row_latest_issue),
self.expected_from_row(rows[-1])
row_latest_issue.as_api_compatibility_row_dict(),
rows[-1].as_api_compatibility_row_dict()
]

self.assertEqual(response['epidata'], expected)
# check result
self.assertEqual(response, {
'result': 1,
Expand All @@ -89,10 +85,10 @@ def test_covidcast(self):

expected = [{
rows[0].signal: [
self.expected_from_row(row_latest_issue, self.DEFAULT_MINUS + ['signal']),
row_latest_issue.as_api_compatibility_row_dict(ignore_fields=['signal']),
],
rows[-1].signal: [
self.expected_from_row(rows[-1], self.DEFAULT_MINUS + ['signal']),
rows[-1].as_api_compatibility_row_dict(ignore_fields=['signal']),
],
}]

Expand All @@ -109,12 +105,12 @@ def test_covidcast(self):
**self.params_from_row(rows[0])
)

expected = self.expected_from_row(row_latest_issue)
expected = [row_latest_issue.as_api_compatibility_row_dict()]

# check result
self.assertEqual(response_1, {
'result': 1,
'epidata': [expected],
'epidata': expected,
'message': 'success',
})

Expand All @@ -124,13 +120,13 @@ def test_covidcast(self):
**self.params_from_row(rows[0], as_of=rows[1].issue)
)

expected = self.expected_from_row(rows[1])
expected = [rows[1].as_api_compatibility_row_dict()]

# check result
self.maxDiff=None
self.assertEqual(response_1a, {
'result': 1,
'epidata': [expected],
'epidata': expected,
'message': 'success',
})

Expand All @@ -141,8 +137,8 @@ def test_covidcast(self):
)

expected = [
self.expected_from_row(rows[0]),
self.expected_from_row(rows[1])
rows[0].as_api_compatibility_row_dict(),
rows[1].as_api_compatibility_row_dict()
]

# check result
Expand All @@ -158,12 +154,12 @@ def test_covidcast(self):
**self.params_from_row(rows[0], lag=2)
)

expected = self.expected_from_row(row_latest_issue)
expected = [row_latest_issue.as_api_compatibility_row_dict()]

# check result
self.assertDictEqual(response_3, {
'result': 1,
'epidata': [expected],
'epidata': expected,
'message': 'success',
})
with self.subTest(name='long request'):
Expand Down Expand Up @@ -223,16 +219,16 @@ def test_geo_value(self):
# insert placeholder data: three counties, three MSAs
N = 3
rows = [
self._make_placeholder_row(geo_type="county", geo_value=str(i)*5, value=i)[0]
CovidcastTestRow.make_default_row(geo_type="county", geo_value=str(i)*5, value=i)
for i in range(N)
] + [
self._make_placeholder_row(geo_type="msa", geo_value=str(i)*5, value=i*10)[0]
CovidcastTestRow.make_default_row(geo_type="msa", geo_value=str(i)*5, value=i*10)
for i in range(N)
]
self._insert_rows(rows)

counties = [
self.expected_from_row(rows[i]) for i in range(N)
rows[i].as_api_compatibility_row_dict() for i in range(N)
]

def fetch(geo):
Expand All @@ -241,41 +237,48 @@ def fetch(geo):
)

# test fetch all
r = fetch('*')
self.assertEqual(r['message'], 'success')
self.assertEqual(r['epidata'], counties)
request = fetch('*')
self.assertEqual(request['message'], 'success')
self.assertEqual(request['epidata'], counties)
# test fetch a specific region
r = fetch('11111')
self.assertEqual(r['message'], 'success')
self.assertEqual(r['epidata'], [counties[1]])
request = fetch('11111')
self.assertEqual(request['message'], 'success')
self.assertEqual(request['epidata'], [counties[1]])
# test fetch a specific yet not existing region
r = fetch('55555')
self.assertEqual(r['message'], 'no results')
request = fetch('55555')
self.assertEqual(request['message'], 'no results')
# test fetch a multiple regions
r = fetch(['11111', '22222'])
self.assertEqual(r['message'], 'success')
self.assertEqual(r['epidata'], [counties[1], counties[2]])
request = fetch(['11111', '22222'])
self.assertEqual(request['message'], 'success')
self.assertEqual(request['epidata'], [counties[1], counties[2]])
# test fetch a multiple regions in another variant
r = fetch(['00000', '22222'])
self.assertEqual(r['message'], 'success')
self.assertEqual(r['epidata'], [counties[0], counties[2]])
request = fetch(['00000', '22222'])
self.assertEqual(request['message'], 'success')
self.assertEqual(request['epidata'], [counties[0], counties[2]])
# test fetch a multiple regions but one is not existing
r = fetch(['11111', '55555'])
self.assertEqual(r['message'], 'success')
self.assertEqual(r['epidata'], [counties[1]])
request = fetch(['11111', '55555'])
self.assertEqual(request['message'], 'success')
self.assertEqual(request['epidata'], [counties[1]])
# test fetch a multiple regions but specify no region
r = fetch([])
self.assertEqual(r['message'], 'no results')
request = fetch([])
self.assertEqual(request['message'], 'no results')

def test_covidcast_meta(self):
"""Test that the covidcast_meta endpoint returns expected data."""

DEFAULT_TIME_VALUE = 2020_02_02
DEFAULT_ISSUE = 2020_02_02

# insert placeholder data: three dates, three issues. values are:
# 1st issue: 0 10 20
# 2nd issue: 1 11 21
# 3rd issue: 2 12 22
rows = [
self._make_placeholder_row(time_value=self.DEFAULT_TIME_VALUE + t, issue=self.DEFAULT_ISSUE + i, value=t*10 + i)[0]
CovidcastTestRow.make_default_row(
time_value=DEFAULT_TIME_VALUE + t,
issue=DEFAULT_ISSUE + i,
value=t*10 + i
)
for i in range(3) for t in range(3)
]
self._insert_rows(rows)
Expand All @@ -299,14 +302,14 @@ def test_covidcast_meta(self):
signal=rows[0].signal,
time_type=rows[0].time_type,
geo_type=rows[0].geo_type,
min_time=self.DEFAULT_TIME_VALUE,
max_time=self.DEFAULT_TIME_VALUE + 2,
min_time=DEFAULT_TIME_VALUE,
max_time=DEFAULT_TIME_VALUE + 2,
num_locations=1,
min_value=2.,
mean_value=12.,
max_value=22.,
stdev_value=8.1649658, # population stdev, not sample, which is 10.
max_issue=self.DEFAULT_ISSUE + 2,
max_issue=DEFAULT_ISSUE + 2,
min_lag=0,
max_lag=0, # we didn't set lag when inputting data
)
Expand All @@ -322,10 +325,10 @@ def test_async_epidata(self):
# insert placeholder data: three counties, three MSAs
N = 3
rows = [
self._make_placeholder_row(geo_type="county", geo_value=str(i)*5, value=i)[0]
CovidcastTestRow.make_default_row(geo_type="county", geo_value=str(i)*5, value=i)
for i in range(N)
] + [
self._make_placeholder_row(geo_type="msa", geo_value=str(i)*5, value=i*10)[0]
CovidcastTestRow.make_default_row(geo_type="msa", geo_value=str(i)*5, value=i*10)
for i in range(N)
]
self._insert_rows(rows)
Expand Down
Loading