Skip to content

Commit 05fc7e1

Browse files
authored
Merge pull request #1594 from cmu-delphi/release/delphi-epidata-4.1.30
Release Delphi Epidata 4.1.30
2 parents f56995e + c6f4a19 commit 05fc7e1

File tree

14 files changed

+222
-7
lines changed

14 files changed

+222
-7
lines changed

.bumpversion.cfg

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 4.1.29
2+
current_version = 4.1.30
33
commit = False
44
tag = False
55

dev/local/setup.cfg

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[metadata]
22
name = Delphi Development
3-
version = 4.1.29
3+
version = 4.1.30
44

55
[options]
66
packages =

docs/api/covidcast-signals/google-symptoms.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ updates every day and provide the most up-to-date data.
108108
## Limitations
109109

110110
Between May 13 2024 and August 6 2024, signal values were 25%-50% lower compared to previous time periods.
111-
This affected _all_ signals and symptom sets.
111+
This affected _all_ `google-symptoms` signals and symptom sets.
112112
The drop does not reflect actual search term popularity during the affected period.
113113
The apparent decrease in search volume was caused by an outage in the data pipeline on the source side.
114114
The data was unfortunately not recoverable and the dip can not be repaired, but data outside the listed time period is unaffected.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
"""Integration tests for the covidcast `geo_coverage` endpoint."""
2+
3+
# standard library
4+
import json
5+
import unittest
6+
7+
# third party
8+
import mysql.connector
9+
import requests
10+
11+
# first party
12+
from delphi_utils import Nans
13+
from delphi.epidata.client.delphi_epidata import Epidata
14+
import delphi.operations.secrets as secrets
15+
import delphi.epidata.acquisition.covidcast.database as live
16+
from delphi.epidata.maintenance.coverage_crossref_updater import main
17+
from delphi.epidata.acquisition.covidcast.test_utils import CovidcastBase, CovidcastTestRow
18+
19+
# use the local instance of the Epidata API
20+
BASE_URL = 'http://delphi_web_epidata/epidata' # NOSONAR
21+
22+
23+
class CoverageCrossrefTests(CovidcastBase):
24+
"""Tests coverage crossref updater."""
25+
26+
def localSetUp(self):
27+
"""Perform per-test setup."""
28+
self._db._cursor.execute('TRUNCATE TABLE `coverage_crossref`')
29+
30+
@staticmethod
31+
def _make_request(params):
32+
response = requests.get(f"{Epidata.BASE_URL}/covidcast/geo_coverage", params=params, auth=Epidata.auth)
33+
response.raise_for_status()
34+
return response.json()
35+
36+
def test_caching(self):
37+
"""Populate, query, cache, query, and verify the cache."""
38+
39+
# insert dummy data
40+
self._insert_rows([
41+
CovidcastTestRow.make_default_row(geo_type="state", geo_value="pa"),
42+
CovidcastTestRow.make_default_row(geo_type="state", geo_value="ny"),
43+
CovidcastTestRow.make_default_row(geo_type="state", geo_value="ny", signal="sig2"),
44+
])
45+
46+
results = self._make_request(params = {'geo': 'state:*'})
47+
48+
# make sure the tables are empty
49+
self.assertEqual(results, {
50+
'result': -2,
51+
'epidata': [],
52+
'message': 'no results',
53+
})
54+
55+
# update the coverage crossref table
56+
main()
57+
58+
results = self._make_request(params = {'geo': 'state:*'})
59+
60+
# make sure the data was actually served
61+
self.assertEqual(results, {
62+
'result': 1,
63+
'epidata': [{'signal': 'sig', 'source': 'src'}, {'signal': 'sig2', 'source': 'src'}],
64+
'message': 'success',
65+
})
66+
67+
results = self._make_request(params = {'geo': 'hrr:*'})
68+
69+
# make sure the tables are empty
70+
self.assertEqual(results, {
71+
'result': -2,
72+
'epidata': [],
73+
'message': 'no results',
74+
})
75+
76+
results = self._make_request(params = {'geo': 'state:pa'})
77+
78+
# make sure the data was actually served
79+
self.assertEqual(results, {
80+
'result': 1,
81+
'epidata': [{'signal': 'sig', 'source': 'src'}],
82+
'message': 'success',
83+
})
84+
85+
results = self._make_request(params = {'geo': 'state:ny'})
86+
87+
# make sure the data was actually served
88+
self.assertEqual(results, {
89+
'result': 1,
90+
'epidata': [{'signal': 'sig', 'source': 'src'}, {'signal': 'sig2', 'source': 'src'}],
91+
'message': 'success',
92+
})
93+

src/acquisition/covidcast/database.py

+33
Original file line numberDiff line numberDiff line change
@@ -561,3 +561,36 @@ def retrieve_covidcast_meta_cache(self):
561561
for entry in cache:
562562
cache_hash[(entry['data_source'], entry['signal'], entry['time_type'], entry['geo_type'])] = entry
563563
return cache_hash
564+
565+
def compute_coverage_crossref(self):
566+
"""Compute coverage_crossref table, for looking up available signals per geo or vice versa."""
567+
568+
logger = get_structured_logger("compute_coverage_crossref")
569+
570+
coverage_crossref_delete_sql = '''
571+
DELETE FROM coverage_crossref;
572+
'''
573+
574+
coverage_crossref_update_sql = '''
575+
INSERT INTO coverage_crossref (signal_key_id, geo_key_id, min_time_value, max_time_value)
576+
SELECT
577+
signal_key_id,
578+
geo_key_id,
579+
MIN(time_value) AS min_time_value,
580+
MAX(time_value) AS max_time_value
581+
FROM covid.epimetric_latest
582+
GROUP BY signal_key_id, geo_key_id;
583+
'''
584+
585+
self._connection.start_transaction()
586+
587+
self._cursor.execute(coverage_crossref_delete_sql)
588+
logger.info("coverage_crossref_delete", rows=self._cursor.rowcount)
589+
590+
self._cursor.execute(coverage_crossref_update_sql)
591+
logger.info("coverage_crossref_update", rows=self._cursor.rowcount)
592+
593+
self.commit()
594+
logger.info("coverage_crossref committed")
595+
596+
return self._cursor.rowcount

src/client/delphi_epidata.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ Epidata <- (function() {
1515
# API base url
1616
BASE_URL <- getOption('epidata.url', default = 'https://api.delphi.cmu.edu/epidata/')
1717

18-
client_version <- '4.1.29'
18+
client_version <- '4.1.30'
1919

2020
auth <- getOption("epidata.auth", default = NA)
2121

src/client/delphi_epidata.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
}
2323
})(this, function (exports, fetchImpl, jQuery) {
2424
const BASE_URL = "https://api.delphi.cmu.edu/epidata/";
25-
const client_version = "4.1.29";
25+
const client_version = "4.1.30";
2626

2727
// Helper function to cast values and/or ranges to strings
2828
function _listitem(value) {

src/client/packaging/npm/package.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"name": "delphi_epidata",
33
"description": "Delphi Epidata API Client",
44
"authors": "Delphi Group",
5-
"version": "4.1.29",
5+
"version": "4.1.30",
66
"license": "MIT",
77
"homepage": "https://github.com/cmu-delphi/delphi-epidata",
88
"bugs": {

src/ddl/v4_schema.sql

+21
Original file line numberDiff line numberDiff line change
@@ -164,3 +164,24 @@ CREATE TABLE `covidcast_meta_cache` (
164164
PRIMARY KEY (`timestamp`)
165165
) ENGINE=InnoDB;
166166
INSERT INTO covidcast_meta_cache VALUES (0, '[]');
167+
168+
CREATE TABLE `coverage_crossref` (
169+
`signal_key_id` bigint NOT NULL,
170+
`geo_key_id` bigint NOT NULL,
171+
`min_time_value` int NOT NULL,
172+
`max_time_value` int NOT NULL,
173+
UNIQUE INDEX coverage_crossref_geo_sig (`geo_key_id`, `signal_key_id`),
174+
INDEX coverage_crossref_sig_geo (`signal_key_id`, `geo_key_id`)
175+
) ENGINE=InnoDB;
176+
177+
CREATE OR REPLACE VIEW `coverage_crossref_v` AS
178+
SELECT
179+
`sd`.`source`,
180+
`sd`.`signal`,
181+
`gd`.`geo_type`,
182+
`gd`.`geo_value`,
183+
`cc`.`min_time_value`,
184+
`cc`.`max_time_value`
185+
FROM `coverage_crossref` `cc`
186+
JOIN `signal_dim` `sd` USING (`signal_key_id`)
187+
JOIN `geo_dim` `gd` USING (`geo_key_id`);

src/ddl/v4_schema_aliases.sql

+1
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@
88
CREATE VIEW `epidata`.`epimetric_full_v` AS SELECT * FROM `covid`.`epimetric_full_v`;
99
CREATE VIEW `epidata`.`epimetric_latest_v` AS SELECT * FROM `covid`.`epimetric_latest_v`;
1010
CREATE VIEW `epidata`.`covidcast_meta_cache` AS SELECT * FROM `covid`.`covidcast_meta_cache`;
11+
CREATE VIEW `epidata`.`coverage_crossref_v` AS SELECT * FROM `covid`.`coverage_crossref_v`;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
"""Updates the table for the `coverage_crossref` endpoint."""
2+
3+
import time
4+
5+
from delphi.epidata.acquisition.covidcast.database import Database
6+
from delphi_utils import get_structured_logger
7+
8+
9+
def main():
10+
"""Updates the table for the `coverage_crossref`."""
11+
12+
logger = get_structured_logger("coverage_crossref_updater")
13+
start_time = time.time()
14+
database = Database()
15+
database.connect()
16+
17+
# compute and update coverage_crossref
18+
try:
19+
coverage = database.compute_coverage_crossref()
20+
finally:
21+
# clean up in success and in failure
22+
database.disconnect(True)
23+
24+
logger.info(f"coverage_crossref returned: {coverage}")
25+
26+
logger.info(
27+
"Generated and updated covidcast geo/signal coverage",
28+
total_runtime_in_seconds=round(time.time() - start_time, 2))
29+
return True
30+
31+
32+
if __name__ == '__main__':
33+
main()

src/server/_config.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
load_dotenv()
99

10-
VERSION = "4.1.29"
10+
VERSION = "4.1.30"
1111

1212
MAX_RESULTS = int(10e6)
1313
MAX_COMPATIBILITY_RESULTS = int(3650)

src/server/endpoints/covidcast.py

+18
Original file line numberDiff line numberDiff line change
@@ -542,6 +542,24 @@ def transform_row(row, proxy):
542542

543543
return execute_query(q.query, q.params, fields_string, fields_int, [], transform=transform_row)
544544

545+
@bp.route("/geo_coverage", methods=("GET", "POST"))
546+
def handle_geo_coverage():
547+
"""
548+
For a specific geo returns the signal coverage (number of signals for a given geo_type)
549+
"""
550+
551+
geo_sets = parse_geo_sets()
552+
553+
q = QueryBuilder("coverage_crossref_v", "c")
554+
fields_string = ["source", "signal"]
555+
556+
q.set_fields(fields_string)
557+
558+
q.apply_geo_filters("geo_type", "geo_value", geo_sets)
559+
q.set_sort_order("source", "signal")
560+
q.group_by = ["c." + field for field in fields_string] # this condenses duplicate results, similar to `SELECT DISTINCT`
561+
562+
return execute_query(q.query, q.params, fields_string, [], [])
545563

546564
@bp.route("/anomalies", methods=("GET", "POST"))
547565
def handle_anomalies():

tests/acquisition/covidcast/test_database.py

+16
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,22 @@ def test_update_covidcast_meta_cache_query(self):
7878
self.assertIn('timestamp', sql)
7979
self.assertIn('epidata', sql)
8080

81+
def test_compute_coverage_crossref_query(self):
82+
"""Query to update the compute crossref looks sensible.
83+
84+
NOTE: Actual behavior is tested by integration test.
85+
"""
86+
87+
mock_connector = MagicMock()
88+
database = Database()
89+
database.connect(connector_impl=mock_connector)
90+
91+
database.compute_coverage_crossref()
92+
93+
connection = mock_connector.connect()
94+
cursor = connection.cursor()
95+
self.assertTrue(cursor.execute.called)
96+
8197
def test_insert_or_update_batch_exception_reraised(self):
8298
"""Test that an exception is reraised"""
8399
mock_connector = MagicMock()

0 commit comments

Comments
 (0)