Skip to content

Commit 7039a5a

Browse files
Merge branch 'cmu-delphi:dev' into dev
2 parents d460eee + 29f9b96 commit 7039a5a

File tree

2 files changed

+90
-7
lines changed

2 files changed

+90
-7
lines changed

integrations/server/test_covidcast_endpoints.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ def from_json(json: Dict[str, Any]) -> "CovidcastRow":
8484
sample_size=json["sample_size"],
8585
missing_value=json["missing_value"],
8686
missing_stderr=json["missing_stderr"],
87-
missing_sample_size=json["missing_sample_size"]
87+
missing_sample_size=json["missing_sample_size"],
8888
)
8989

9090
@property
@@ -361,3 +361,28 @@ def test_meta(self):
361361
self.assertEqual(out[0]["source"], first.source)
362362
out = self._fetch("/meta", signal=f"{first.source}:X")
363363
self.assertEqual(len(out), 0)
364+
365+
def test_coverage(self):
366+
"""Request a signal the /coverage endpoint."""
367+
368+
num_geos_per_date = [10, 20, 30, 40, 44]
369+
dates = [20200401 + i for i in range(len(num_geos_per_date))]
370+
rows = [CovidcastRow(time_value=dates[i], value=i, geo_value=str(geo_value)) for i, num_geo in enumerate(num_geos_per_date) for geo_value in range(num_geo)]
371+
self._insert_rows(rows)
372+
first = rows[0]
373+
374+
with self.subTest("default"):
375+
out = self._fetch("/coverage", signal=first.signal_pair, latest=dates[-1], format="json")
376+
self.assertEqual(len(out), len(num_geos_per_date))
377+
self.assertEqual([o["time_value"] for o in out], dates)
378+
self.assertEqual([o["count"] for o in out], num_geos_per_date)
379+
380+
with self.subTest("specify window"):
381+
out = self._fetch("/coverage", signal=first.signal_pair, window=f"{dates[0]}-{dates[1]}", format="json")
382+
self.assertEqual(len(out), 2)
383+
self.assertEqual([o["time_value"] for o in out], dates[:2])
384+
self.assertEqual([o["count"] for o in out], num_geos_per_date[:2])
385+
386+
with self.subTest("invalid geo_type"):
387+
out = self._fetch("/coverage", signal=first.signal_pair, geo_type="state", format="json")
388+
self.assertEqual(len(out), 0)

src/server/endpoints/covidcast.py

Lines changed: 64 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
from typing import List, Optional, Union, Tuple, Dict, Any, Set
22
from itertools import groupby
3-
from datetime import date, datetime
3+
from datetime import date, datetime, timedelta
44
from flask import Blueprint, request
55
from flask.json import loads, jsonify
66
from bisect import bisect_right
77
from sqlalchemy import text
8+
from pandas import read_csv
89

910
from .._common import is_compatibility_mode, db
1011
from .._exceptions import ValidationFailedException, DatabaseErrorException
@@ -32,9 +33,9 @@
3233
require_any,
3334
)
3435
from .._db import sql_table_has_columns
35-
from .._pandas import as_pandas
36+
from .._pandas import as_pandas, print_pandas
3637
from .covidcast_utils import compute_trend, compute_trends, compute_correlations, compute_trend_value, CovidcastMetaEntry, AllSignalsMap
37-
from ..utils import shift_time_value, date_to_time_value, time_value_to_iso
38+
from ..utils import shift_time_value, date_to_time_value, time_value_to_iso, time_value_to_date
3839

3940
# first argument is the endpoint name
4041
bp = Blueprint("covidcast", __name__)
@@ -150,7 +151,6 @@ def handle():
150151
q.set_order("source", "signal", "time_type", "time_value", "geo_type", "geo_value", "issue")
151152
q.set_fields(fields_string, fields_int, fields_float)
152153

153-
154154
# basic query info
155155
# data type of each field
156156
# build the source, signal, time, and location (type and id) filters
@@ -244,8 +244,8 @@ def handle_trendseries():
244244
def gen(rows):
245245
for key, group in groupby((parse_row(row, fields_string, fields_int, fields_float) for row in rows), lambda row: (row["geo_type"], row["geo_value"], row["source"], row["signal"])):
246246
trends = compute_trends(key[0], key[1], key[2], key[3], shifter, ((row["time_value"], row["value"]) for row in group))
247-
for t in trends:
248-
yield t
247+
for trend in trends:
248+
yield trend.asdict()
249249

250250
# execute first query
251251
try:
@@ -493,3 +493,61 @@ def handle_meta():
493493
entry.intergrate(row)
494494

495495
return jsonify([r.asdict() for r in out.values()])
496+
497+
498+
@bp.route("/coverage", methods=("GET", "POST"))
499+
def handle_coverage():
500+
"""
501+
similar to /signal_dashboard_coverage for a specific signal returns the coverage (number of locations for a given geo_type)
502+
"""
503+
504+
signal = parse_source_signal_pairs()
505+
geo_type = request.args.get("geo_type", "county")
506+
if "window" in request.values:
507+
time_window = parse_day_range_arg("window")
508+
else:
509+
now_time = extract_date("latest")
510+
now = date.today() if now_time is None else time_value_to_date(now_time)
511+
last = extract_integer("days")
512+
if last is None:
513+
last = 30
514+
time_window = (date_to_time_value(now - timedelta(days=last)), date_to_time_value(now))
515+
516+
q = QueryBuilder("covidcast", "c")
517+
fields_string = ["source", "signal"]
518+
fields_int = ["time_value"]
519+
520+
q.set_fields(fields_string, fields_int)
521+
522+
# manually append the count column because of grouping
523+
fields_int.append("count")
524+
q.fields.append(f"count({q.alias}.geo_value) as count")
525+
526+
if geo_type == "only-county":
527+
q.where(geo_type="county")
528+
q.conditions.append('geo_value not like "%000"')
529+
else:
530+
q.where(geo_type=geo_type)
531+
q.where_source_signal_pairs("source", "signal", signal)
532+
q.where_time_pairs("time_type", "time_value", [TimePair("day", [time_window])])
533+
q.group_by = "c.source, c.signal, c.time_value"
534+
q.set_order("source", "signal", "time_value")
535+
536+
_handle_lag_issues_as_of(q, None, None, None)
537+
538+
return execute_query(q.query, q.params, fields_string, fields_int, [])
539+
540+
541+
@bp.route("/anomalies", methods=("GET", "POST"))
542+
def handle_anomalies():
543+
"""
544+
proxy to the excel sheet about data anomalies
545+
"""
546+
547+
signal = parse_source_signal_arg("signal")
548+
549+
df = read_csv(
550+
"https://docs.google.com/spreadsheets/d/e/2PACX-1vToGcf9x5PNJg-eSrxadoR5b-LM2Cqs9UML97587OGrIX0LiQDcU1HL-L2AA8o5avbU7yod106ih0_n/pub?gid=0&single=true&output=csv", skip_blank_lines=True
551+
)
552+
df = df[df["source"].notnull() & df["published"]]
553+
return print_pandas(df)

0 commit comments

Comments
 (0)