diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index b0b010ef4..1168bee7a 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
[bumpversion]
-current_version = 0.4.6
+current_version = 0.4.7
commit = False
tag = False
diff --git a/.github/workflows/performance-tests.yml b/.github/workflows/performance-tests.yml
new file mode 100644
index 000000000..d515f77e0
--- /dev/null
+++ b/.github/workflows/performance-tests.yml
@@ -0,0 +1,156 @@
+name: Performance testing
+
+# Run when a PR comment is created (issues and PRs are considered the same entity in the GitHub API)
+on:
+ issue_comment:
+ types: [created]
+
+# Add some extra perms to comment on a PR
+permissions:
+ pull-requests: write
+ contents: read
+
+jobs:
+ run-perftests:
+ # Make sure 1. this is a PR, not an issue 2. it contains "/run performance test" anywhere in the body
+ if: github.event.issue.pull_request && contains(github.event.comment.body, '/run performance test')
+ runs-on: ubuntu-latest
+ outputs:
+ request_count: ${{ steps.output.outputs.request_count }}
+ failure_count: ${{ steps.output.outputs.failure_count }}
+ med_time: ${{ steps.output.outputs.med_time }}
+ avg_time: ${{ steps.output.outputs.avg_time }}
+ min_time: ${{ steps.output.outputs.min_time }}
+ max_time: ${{ steps.output.outputs.max_time }}
+ requests_per_sec: ${{ steps.output.outputs.requests_per_sec }}
+ steps:
+ - name: Set up WireGuard
+ uses: egor-tensin/setup-wireguard@v1.2.0
+ with:
+ endpoint: '${{ secrets.WG_PERF_ENDPOINT }}'
+ endpoint_public_key: '${{ secrets.WG_PERF_ENDPOINT_PUBLIC_KEY }}'
+ ips: '${{ secrets.WG_PERF_IPS }}'
+ allowed_ips: '${{ secrets.WG_PERF_ALLOWED_IPS }}'
+ private_key: '${{ secrets.WG_PERF_PRIVATE_KEY }}'
+ - name: Check out repository
+ uses: actions/checkout@v3
+ # Previous step checks out default branch, so we check out the pull request's branch
+ - name: Switch to PR branch
+ run: |
+ hub pr checkout ${{ github.event.issue.number }}
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ - name: Set up repository # mimics install.sh in the README except that delphi is cloned from the PR rather than main
+ run: |
+ cd ..
+ mkdir -p driver/repos/delphi
+ cd driver/repos/delphi
+ git clone https://github.com/cmu-delphi/operations
+ git clone https://github.com/cmu-delphi/utils
+ git clone https://github.com/cmu-delphi/flu-contest
+ git clone https://github.com/cmu-delphi/nowcast
+ cd ../../
+
+ cd ..
+ cp -R delphi-epidata driver/repos/delphi/delphi-epidata
+ cd -
+
+ ln -s repos/delphi/delphi-epidata/dev/local/Makefile
+ - name: Build & run epidata
+ run: |
+ cd ../driver
+ sudo make web sql="${{ secrets.DB_CONN_STRING }}"
+ - name: Check out delphi-admin
+ uses: actions/checkout@v3
+ with:
+ repository: cmu-delphi/delphi-admin
+ token: ${{ secrets.CMU_DELPHI_DEPLOY_MACHINE_PAT }}
+ path: delphi-admin
+ - name: Build & run Locust
+ continue-on-error: true # sometimes ~2-5 queries fail, we shouldn't end the run if that's the case
+ run: |
+ cd delphi-admin/load-testing/locust
+ docker build -t locust .
+ export CSV=v4-requests-small.csv
+ touch output_stats.csv && chmod 666 output_stats.csv
+ touch output_stats_history.csv && chmod 666 output_stats_history.csv
+ touch output_failures.csv && chmod 666 output_failures.csv
+ touch output_exceptions.csv && chmod 666 output_exceptions.csv
+ docker run --net=host -v $PWD:/mnt/locust -e CSV="/mnt/locust/${CSV}" locust -f /mnt/locust/v4.py --host http://127.0.0.1:10080/ --users 10 --spawn-rate 1 --headless -i "$(cat ${CSV} | wc -l)" --csv=/mnt/locust/output
+ - name: Produce output for summary
+ id: output
+ uses: jannekem/run-python-script-action@v1
+ with:
+ script: |
+ import os
+
+ def write_string(name, value):
+ with open(os.environ['GITHUB_OUTPUT'], 'a') as fh:
+ print(f'{name}={value}', file=fh)
+
+ def write_float(name, value):
+ write_string(name, "{:.2f}".format(float(value)))
+
+ with open("delphi-admin/load-testing/locust/output_stats.csv", "r", encoding="utf-8", errors="ignore") as scraped:
+ final_line = scraped.readlines()[-1].split(",")
+ write_string('request_count', final_line[2])
+ write_string('failure_count', final_line[3])
+ write_float('med_time', final_line[4])
+ write_float('avg_time', final_line[5])
+ write_float('min_time', final_line[6])
+ write_float('max_time', final_line[7])
+ write_float('requests_per_sec', final_line[9])
+
+ - name: Archive results as artifacts
+ uses: actions/upload-artifact@v3
+ with:
+ name: locust-output
+ path: |
+ delphi-admin/load-testing/locust/output_*.csv
+
+ comment-success:
+ runs-on: ubuntu-latest
+ if: success()
+ needs: run-perftests
+ steps:
+ - name: Comment run results
+ env:
+ GITHUB_WORKFLOW_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
+ uses: actions/github-script@v5
+ with:
+ github-token: ${{secrets.GITHUB_TOKEN}}
+ script: |
+ github.rest.issues.createComment({
+ issue_number: context.issue.number,
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ body: `✅ Performance tests complete! Result summary:
+ - Total requests: **${{ needs.run-perftests.outputs.request_count }}**
+ - Total failures: **${{ needs.run-perftests.outputs.failure_count }}**
+ - Min response time: **${{ needs.run-perftests.outputs.min_time }} ms**
+ - Max response time: **${{ needs.run-perftests.outputs.max_time }} ms**
+ - Average response time: **${{ needs.run-perftests.outputs.avg_time }} ms**
+ - Median response time: **${{ needs.run-perftests.outputs.med_time }} ms**
+ - Requests per second: **${{ needs.run-perftests.outputs.requests_per_sec }}**
+
+ Click here to view full results: ${{ env.GITHUB_WORKFLOW_URL }}.`
+ })
+
+ comment-failure:
+ runs-on: ubuntu-latest
+ if: failure()
+ needs: run-perftests
+ steps:
+ - name: Comment run results
+ env:
+ GITHUB_WORKFLOW_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
+ uses: actions/github-script@v5
+ with:
+ github-token: ${{secrets.GITHUB_TOKEN}}
+ script: |
+ github.rest.issues.createComment({
+ issue_number: context.issue.number,
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ body: `❌ Performance tests failed! Click here to view full results: ${{ env.GITHUB_WORKFLOW_URL }}.`
+ })
diff --git a/deploy.json b/deploy.json
index 45b45883e..425ddef6d 100644
--- a/deploy.json
+++ b/deploy.json
@@ -25,6 +25,13 @@
"match": "^.*\\.(py)$",
"add-header-comment": true
},
+ {
+ "type": "move",
+ "src": "src/server/utils",
+ "dst": "[[package]]/server/utils/",
+ "match": "^.*\\.(py)$",
+ "add-header-comment": true
+ },
{
"type": "move",
"src": "src/server/endpoints",
@@ -40,6 +47,15 @@
"add-header-comment": true
},
+ "// acquisition - common",
+ {
+ "type": "move",
+ "src": "src/acquisition/common/",
+ "dst": "[[package]]/acquisition/common/",
+ "match": "^.*\\.(py)$",
+ "add-header-comment": true
+ },
+
"// acquisition - fluview",
{
"type": "move",
@@ -65,11 +81,6 @@
"dst": "[[package]]/acquisition/cdcp/",
"match": "^.*\\.(py)$",
"add-header-comment": true
- },{
- "type": "move",
- "src": "src/acquisition/cdcp/cdc_upload.php",
- "dst": "[[auto_web]]/cdc_upload/index.php",
- "add-header-comment": true
},
"// acquisition - ght",
@@ -107,16 +118,6 @@
"dst": "[[package]]/acquisition/wiki/",
"match": "^.*\\.(py)$",
"add-header-comment": true
- },{
- "type": "move",
- "src": "src/acquisition/wiki/master.php",
- "dst": "[[auto_web]]/wiki/master.php",
- "add-header-comment": true
- },{
- "type": "move",
- "src": "src/acquisition/wiki/dashboard.php",
- "dst": "[[auto_web]]/wiki/index.php",
- "add-header-comment": true
},
"// acquisition - flusurv",
diff --git a/dev/docker/web/epidata/Dockerfile b/dev/docker/web/epidata/Dockerfile
deleted file mode 100644
index fa6d503a4..000000000
--- a/dev/docker/web/epidata/Dockerfile
+++ /dev/null
@@ -1,11 +0,0 @@
-# start with the `delphi_web` image
-FROM delphi_web
-
-# deploy the Epidata API (see `delphi-epidata/deploy.json`)
-COPY repos/delphi/delphi-epidata/src/server/*.html /var/www/html/epidata/
-COPY repos/delphi/delphi-epidata/src/server/*.php /var/www/html/epidata/
-
-# point to the development database (overwrites the production config)
-COPY repos/delphi/delphi-epidata/dev/docker/web/epidata/assets/database_config.php /var/www/html/epidata/
-
-RUN chmod o+r /var/www/html/epidata/*
diff --git a/dev/docker/web/epidata/README.md b/dev/docker/web/epidata/README.md
deleted file mode 100644
index 8656f3e66..000000000
--- a/dev/docker/web/epidata/README.md
+++ /dev/null
@@ -1,22 +0,0 @@
-# `delphi_web_epidata`
-
-This image starts with Delphi's web server and adds the sources necessary for
-hosting the Epidata API.
-
-This image includes the file
-[`database_config.php`](assets/database_config.php), which points to a local
-container running the
-[`delphi_database_epidata` image](../../database/epidata/README.md).
-
-To start a container from this image, run:
-
-```bash
-docker run --rm -p 10080:80 \
- --network delphi-net --name delphi_web_epidata \
- delphi_web_epidata
-```
-
-You should be able to call the API by setting your base URL to
-`http://localhost:10080/epidata/api.php`. To verify that the container is alive
-and serving, visit in a web browser (or `curl`)
-http://localhost:10080/epidata/.
diff --git a/dev/docker/web/epidata/assets/database_config.php b/dev/docker/web/epidata/assets/database_config.php
deleted file mode 100644
index 7c7402f47..000000000
--- a/dev/docker/web/epidata/assets/database_config.php
+++ /dev/null
@@ -1,7 +0,0 @@
- 'delphi_database_epidata',
- 'port' => 3306,
-);
-?>
diff --git a/dev/local/Makefile b/dev/local/Makefile
index da910bd86..75b10554c 100644
--- a/dev/local/Makefile
+++ b/dev/local/Makefile
@@ -67,6 +67,11 @@ LOG_DB:=delphi_database_epidata_$(NOW).log
WEB_CONTAINER_ID:=$(shell docker ps -q --filter 'name=delphi_web_epidata')
DATABASE_CONTAINER_ID:=$(shell docker ps -q --filter 'name=delphi_database_epidata')
+M1=
+ifeq ($(shell uname -smp), Darwin arm64 arm)
+$(info M1 system detected, changing docker platform to linux/amd64.)
+ override M1 =--platform linux/amd64
+endif
.PHONY=web
web:
@@ -80,11 +85,14 @@ web:
@# Build the web_epidata image
@cd repos/delphi/delphi-epidata;\
- docker build -t delphi_web_epidata -f ./devops/Dockerfile .;\
+ docker build -t delphi_web_epidata\
+ $(M1) \
+ -f ./devops/Dockerfile .;\
cd -
@# Run the web server
@docker run --rm -p 127.0.0.1:10080:80 \
+ $(M1) \
--env "SQLALCHEMY_DATABASE_URI=$(sqlalchemy_uri)" \
--env "FLASK_SECRET=abc" --env "FLASK_PREFIX=/epidata" --env "LOG_DEBUG" \
--network delphi-net --name delphi_web_epidata \
@@ -102,10 +110,12 @@ db:
@# Build the database_epidata image
@docker build -t delphi_database_epidata \
+ $(M1) \
-f repos/delphi/delphi-epidata/dev/docker/database/epidata/Dockerfile .
@# Run the database
@docker run --rm -p 127.0.0.1:13306:3306 \
+ $(M1) \
--network delphi-net --name delphi_database_epidata \
--cap-add=sys_nice \
delphi_database_epidata >$(LOG_DB) 2>&1 &
@@ -120,6 +130,7 @@ db:
.PHONY=py
py:
@docker build -t delphi_web_python \
+ $(M1) \
-f repos/delphi/delphi-epidata/dev/docker/python/Dockerfile .
.PHONY=all
@@ -128,6 +139,7 @@ all: db web py
.PHONY=test
test:
@docker run -i --rm --network delphi-net \
+ $(M1) \
--mount type=bind,source=$(CWD)repos/delphi/delphi-epidata,target=/usr/src/app/repos/delphi/delphi-epidata,readonly \
--mount type=bind,source=$(CWD)repos/delphi/delphi-epidata/src,target=/usr/src/app/delphi/epidata,readonly \
--env "SQLALCHEMY_DATABASE_URI=$(sqlalchemy_uri)" \
@@ -137,6 +149,7 @@ test:
.PHONY=bash
bash:
@docker run -it --rm --network delphi-net \
+ $(M1) \
--mount type=bind,source=$(CWD)repos/delphi/delphi-epidata,target=/usr/src/app/repos/delphi/delphi-epidata,readonly \
--mount type=bind,source=$(CWD)repos/delphi/delphi-epidata/src,target=/usr/src/app/delphi/epidata,readonly \
--env "SQLALCHEMY_DATABASE_URI=$(sqlalchemy_uri)" \
diff --git a/dev/local/setup.cfg b/dev/local/setup.cfg
index 4c80959ea..e43d90ade 100644
--- a/dev/local/setup.cfg
+++ b/dev/local/setup.cfg
@@ -1,6 +1,6 @@
[metadata]
name = Delphi Development
-version = 0.4.6
+version = 0.4.7
[options]
packages =
diff --git a/docs/epidata_development.md b/docs/epidata_development.md
index 44d13a348..a2f0b7e8e 100644
--- a/docs/epidata_development.md
+++ b/docs/epidata_development.md
@@ -116,7 +116,7 @@ above. The base images are built first, followed by the derived
`epidata`-specific images.
- The [`delphi_web_epidata` image](https://github.com/cmu-delphi/delphi-epidata/blob/main/dev/docker/web/epidata/README.md) adds
- the Epidata API to the `delphi_web` image.
+ the Epidata API to the `delphi_web_epidata` image.
- The
[`delphi_database_epidata` image](https://github.com/cmu-delphi/delphi-epidata/blob/main/dev/docker/database/epidata/README.md)
adds user accounts, `epidata` & other appropriate databases, and relevant tables
@@ -125,14 +125,8 @@ above. The base images are built first, followed by the derived
From the root of your workspace, all of the images can be built as follows:
```bash
-docker build -t delphi_web \
- -f repos/delphi/operations/dev/docker/web/Dockerfile .
-
-docker build -t delphi_web_epidata \
- -f repos/delphi/delphi-epidata/dev/docker/web/epidata/Dockerfile .
-
-docker build -t delphi_database \
- -f repos/delphi/operations/dev/docker/database/Dockerfile .
+docker build -t delphi_web_epidata\
+ -f ./devops/Dockerfile .;\
docker build -t delphi_database_epidata \
-f repos/delphi/delphi-epidata/dev/docker/database/epidata/Dockerfile .
@@ -394,33 +388,3 @@ The command above maps two local directories into the container:
- `/repos/delphi/delphi-epidata/src`: Just the source code, which forms the
container's `delphi.epidata` python package.
-### server code
-
-Local web sources (e.g. PHP files) can be bind-mounted into a
-`delphi_web_epidata` container as follows:
-
-```bash
-docker run --rm -p 127.0.0.1:10080:80 \
- --mount type=bind,source="$(pwd)"/repos/delphi/delphi-epidata/src/server/api.php,target=/var/www/html/epidata/api.php,readonly \
- --mount type=bind,source="$(pwd)"/repos/delphi/delphi-epidata/src/server/api_helpers.php,target=/var/www/html/epidata/api_helpers.php,readonly \
- --network delphi-net --name delphi_web_epidata \
- delphi_web_epidata
-```
-
-The command above mounts two specific files into the image. It may be tempting
-to bind mount the `src/server` directory rather than specific files, however
-that is currently problematic for a couple of reasons:
-
-1. `server/.htaccess` [from the local repository](https://github.com/cmu-delphi/delphi-epidata/blob/main/src/server/.htaccess) uses
- the `Header` directive. However, the webserver in the container doesn't have
- the corresponding module enabled. This causes the server to deny access to
- the API.
-2. `server/database_config.php`
- [in the image](https://github.com/cmu-delphi/delphi-epidata/blob/main/dev/docker/web/epidata/assets/database_config.php) contains
- database credentials for use in conjunction with the
- `delphi_database_epidata` container during development. However, the same
- file from [the local repository](https://github.com/cmu-delphi/delphi-epidata/blob/main/src/server/database_config.php) only
- contains placeholder values. This prevents communication with the database.
-
-There is currently no benefit to bind-mounting sources into the database
-container because schema changes require restarting the container anyway.
diff --git a/docs/new_endpoint_tutorial.md b/docs/new_endpoint_tutorial.md
index bd5634c9e..436b89908 100644
--- a/docs/new_endpoint_tutorial.md
+++ b/docs/new_endpoint_tutorial.md
@@ -273,10 +273,8 @@ already built the `delphi_python` image above:
```bash
# build web and database images for epidata
-docker build -t delphi_web \
- -f repos/delphi/operations/dev/docker/web/Dockerfile .
-docker build -t delphi_web_epidata \
- -f repos/delphi/delphi-epidata/dev/docker/web/epidata/Dockerfile .
+docker build -t delphi_web_epidata\
+ -f ./devops/Dockerfile .;\
docker build -t delphi_database_epidata \
-f repos/delphi/delphi-epidata/dev/docker/database/epidata/Dockerfile .
diff --git a/integrations/acquisition/covid_hosp/facility/test_scenarios.py b/integrations/acquisition/covid_hosp/facility/test_scenarios.py
index 4c47d689e..aaa3c5e3b 100644
--- a/integrations/acquisition/covid_hosp/facility/test_scenarios.py
+++ b/integrations/acquisition/covid_hosp/facility/test_scenarios.py
@@ -38,6 +38,7 @@ def setUp(self):
with Database.connect() as db:
with db.new_cursor() as cur:
cur.execute('truncate table covid_hosp_facility')
+ cur.execute('truncate table covid_hosp_facility_key')
cur.execute('truncate table covid_hosp_meta')
@freeze_time("2021-03-16")
diff --git a/integrations/server/test_covidcast.py b/integrations/server/test_covidcast.py
index c3b50206d..5a8df96f0 100644
--- a/integrations/server/test_covidcast.py
+++ b/integrations/server/test_covidcast.py
@@ -11,6 +11,7 @@
# first party
from delphi_utils import Nans
from delphi.epidata.acquisition.covidcast.test_utils import CovidcastBase, CovidcastTestRow
+from delphi.epidata.client.delphi_epidata import Epidata
# use the local instance of the Epidata API
BASE_URL = 'http://delphi_web_epidata/epidata/api.php'
@@ -22,11 +23,10 @@ def localSetUp(self):
"""Perform per-test setup."""
self._db._cursor.execute('update covidcast_meta_cache set timestamp = 0, epidata = "[]"')
- def request_based_on_row(self, row: CovidcastTestRow, extract_response: Callable = lambda x: x.json(), **kwargs):
+ def request_based_on_row(self, row: CovidcastTestRow, **kwargs):
params = self.params_from_row(row, endpoint='covidcast', **kwargs)
- response = requests.get(BASE_URL, params=params)
- response.raise_for_status()
- response = extract_response(response)
+ Epidata.BASE_URL = BASE_URL
+ response = Epidata.covidcast(**params)
return response
@@ -155,7 +155,6 @@ def test_csv_format(self):
# NB 'format' is a Python reserved word
response = self.request_based_on_row(
row,
- extract_response=lambda resp: resp.text,
**{'format':'csv'}
)
@@ -194,7 +193,7 @@ def test_fields(self):
row = self._insert_placeholder_set_one()
# limit fields
- response = self.request_based_on_row(row, fields='time_value,geo_value')
+ response = self.request_based_on_row(row, **{"fields":"time_value,geo_value"})
expected = row.as_api_compatibility_row_dict()
expected_all = {
diff --git a/src/acquisition/cdcp/cdc_upload.php b/src/acquisition/cdcp/cdc_upload.php
deleted file mode 100644
index 3dfcb38f3..000000000
--- a/src/acquisition/cdcp/cdc_upload.php
+++ /dev/null
@@ -1,101 +0,0 @@
-
-
-
- CDC Page Stats
-
-
-
-
-
-
-
-
Upload Result
- 0) {
- printf('Error: upload failed (%d)', intval($_FILES['zip_file']['error']));
- } else if($fileType != 'zip') {
- printf('Error: expected *.zip, got *.%s', $fileType);
- } else {
- $template = 'openssl dgst -sha256 -hmac "%s" "%s" | cut -d " " -f 2';
- $command = sprintf($template, $hmacSecret, $_FILES['zip_file']['tmp_name']);
- $hmac = trim(shell_exec($command));
- // todo - constant time comparison
- if($hmac === $_REQUEST['hmac']) {
- $target_dir = '/common/cdc_stage/';
- $target_file = $target_dir . time() . '_' . $fileName;
- if (move_uploaded_file($_FILES['zip_file']['tmp_name'], $target_file)) {
- mysql_query('CALL automation.RunStep(46)'); // Process CDCP Data
- printf('Success, thanks!');
- } else {
- printf('Error: something is wrong with file permissions.');
- }
- } else {
- $expected = substr($hmac, 0, 8) . '...';
- printf("Error: HMAC mismatch, expected [{$expected}].");
- sleep(5);
- }
- }
- } else {
- ?>
-
Upload zip file
-
-
- (We have data through = $d ?>.)
-
-
-
-
-
- To compute the hmac of your file, run [ openssl dgst -sha256 -hmac "<secret>" "<filename>" ]. The hmac should be a 64 character string of hex digits (32 bytes, 256 bits).
-
-
-
-
diff --git a/src/acquisition/covidcast/logger.py b/src/acquisition/common/logger.py
similarity index 100%
rename from src/acquisition/covidcast/logger.py
rename to src/acquisition/common/logger.py
diff --git a/src/acquisition/covid_hosp/common/database.py b/src/acquisition/covid_hosp/common/database.py
index 8875828fa..ed308e7a0 100644
--- a/src/acquisition/covid_hosp/common/database.py
+++ b/src/acquisition/covid_hosp/common/database.py
@@ -11,6 +11,7 @@
# first party
import delphi.operations.secrets as secrets
+from delphi.epidata.acquisition.common.logger import get_structured_logger
Columndef = namedtuple("Columndef", "csv_name sql_name dtype")
@@ -53,6 +54,10 @@ def __init__(self,
self.key_columns = key_columns if key_columns is not None else []
self.additional_fields = additional_fields if additional_fields is not None else []
+ @classmethod
+ def logger(database_class):
+ return get_structured_logger(f"{database_class.__module__}")
+
@classmethod
@contextmanager
def connect(database_class, mysql_connector_impl=mysql.connector):
@@ -124,7 +129,7 @@ def contains_revision(self, revision):
for (result,) in cursor:
return bool(result)
- def insert_metadata(self, publication_date, revision, meta_json):
+ def insert_metadata(self, publication_date, revision, meta_json, logger=False):
"""Add revision metadata to the database.
Parameters
@@ -135,6 +140,8 @@ def insert_metadata(self, publication_date, revision, meta_json):
Unique revision string.
meta_json : str
Metadata serialized as a JSON string.
+ logger structlog.Logger [optional; default False]
+ Logger to receive messages
"""
with self.new_cursor() as cursor:
@@ -152,7 +159,7 @@ def insert_metadata(self, publication_date, revision, meta_json):
(%s, %s, %s, %s, %s, NOW())
''', (self.table_name, self.hhs_dataset_id, publication_date, revision, meta_json))
- def insert_dataset(self, publication_date, dataframe):
+ def insert_dataset(self, publication_date, dataframe, logger=False):
"""Add a dataset to the database.
Parameters
@@ -161,6 +168,8 @@ def insert_dataset(self, publication_date, dataframe):
Date when the dataset was published in YYYYMMDD format.
dataframe : pandas.DataFrame
The dataset.
+ logger structlog.Logger [optional; default False]
+ Logger to receive messages.
"""
dataframe_columns_and_types = [
x for x in self.columns_and_types.values() if x.csv_name in dataframe.columns
@@ -181,18 +190,37 @@ def nan_safe_dtype(dtype, value):
sql = f'INSERT INTO `{self.table_name}` (`id`, `{self.publication_col_name}`, {columns}) ' \
f'VALUES ({value_placeholders})'
id_and_publication_date = (0, publication_date)
+ if logger:
+ logger.info('updating values', count=len(dataframe.index))
+ n = 0
+ many_values = []
with self.new_cursor() as cursor:
- for _, row in dataframe.iterrows():
+ for index, row in dataframe.iterrows():
values = []
for c in dataframe_columns_and_types:
values.append(nan_safe_dtype(c.dtype, row[c.csv_name]))
- cursor.execute(sql,
- id_and_publication_date +
- tuple(values) +
- tuple(i.csv_name for i in self.additional_fields))
+ many_values.append(id_and_publication_date +
+ tuple(values) +
+ tuple(i.csv_name for i in self.additional_fields))
+ n += 1
+ # insert in batches because one at a time is slow and all at once makes
+ # the connection drop :(
+ if n % 5_000 == 0:
+ try:
+ cursor.executemany(sql, many_values)
+ many_values = []
+ except Exception as e:
+ if logger:
+ logger.error('error on insert', publ_date=publication_date, in_lines=(n-5_000, n), index=index, values=values, exception=e)
+ raise e
+ # insert final batch
+ if many_values:
+ cursor.executemany(sql, many_values)
# deal with non/seldomly updated columns used like a fk table (if this database needs it)
if hasattr(self, 'AGGREGATE_KEY_COLS'):
+ if logger:
+ logger.info('updating keys')
ak_cols = self.AGGREGATE_KEY_COLS
# restrict data to just the key columns and remove duplicate rows
@@ -219,13 +247,15 @@ def nan_safe_dtype(dtype, value):
ak_table = self.table_name + '_key'
# assemble full SQL statement
ak_insert_sql = f'INSERT INTO `{ak_table}` ({ak_cols_str}) VALUES ({values_str}) AS v ON DUPLICATE KEY UPDATE {ak_updates_str}'
+ if logger:
+ logger.info("database query", sql=ak_insert_sql)
# commit the data
with self.new_cursor() as cur:
cur.executemany(ak_insert_sql, ak_data)
- def get_max_issue(self):
+ def get_max_issue(self, logger=False):
"""Fetch the most recent issue.
This is used to bookend what updates we pull in from the HHS metadata.
@@ -242,4 +272,6 @@ def get_max_issue(self):
for (result,) in cursor:
if result is not None:
return pd.Timestamp(str(result))
+ if logger:
+ logger.warn("get_max_issue", msg="no matching results in meta table; returning 1900/1/1 epoch")
return pd.Timestamp("1900/1/1")
diff --git a/src/acquisition/covid_hosp/common/network.py b/src/acquisition/covid_hosp/common/network.py
index ba0cca281..7b6228f16 100644
--- a/src/acquisition/covid_hosp/common/network.py
+++ b/src/acquisition/covid_hosp/common/network.py
@@ -6,13 +6,15 @@ class Network:
METADATA_URL_TEMPLATE = \
'https://healthdata.gov/api/views/%s/rows.csv'
- def fetch_metadata_for_dataset(dataset_id):
+ def fetch_metadata_for_dataset(dataset_id, logger=False):
"""Download and return metadata.
Parameters
----------
dataset_id : str
healthdata.gov dataset identifier of the dataset.
+ logger : structlog.Logger [optional; default False]
+ Logger to receive messages.
Returns
-------
@@ -20,14 +22,15 @@ def fetch_metadata_for_dataset(dataset_id):
The metadata object.
"""
url = Network.METADATA_URL_TEMPLATE % dataset_id
- print(f'fetching metadata at {url}')
+ if logger:
+ logger.info('fetching metadata', url=url)
df = Network.fetch_dataset(url)
df["Update Date"] = pandas.to_datetime(df["Update Date"])
df.sort_values("Update Date", inplace=True)
df.set_index("Update Date", inplace=True)
return df
- def fetch_dataset(url, pandas_impl=pandas):
+ def fetch_dataset(url, pandas_impl=pandas, logger=False):
"""Download and return a dataset.
Type inference is disabled in favor of explicit type casting at the
@@ -38,12 +41,14 @@ def fetch_dataset(url, pandas_impl=pandas):
----------
url : str
URL to the dataset in CSV format.
+ logger : structlog.Logger [optional; default False]
+ Logger to receive messages.
Returns
-------
pandas.DataFrame
The dataset.
"""
-
- print(f'fetching dataset at {url}')
+ if logger:
+ logger.info('fetching dataset', url=url)
return pandas_impl.read_csv(url, dtype=str)
diff --git a/src/acquisition/covid_hosp/common/utils.py b/src/acquisition/covid_hosp/common/utils.py
index 99a6b4f33..fcf956f66 100644
--- a/src/acquisition/covid_hosp/common/utils.py
+++ b/src/acquisition/covid_hosp/common/utils.py
@@ -6,6 +6,7 @@
import pandas as pd
+
class CovidHospException(Exception):
"""Exception raised exclusively by `covid_hosp` utilities."""
@@ -69,7 +70,26 @@ def parse_bool(value):
return False
raise CovidHospException(f'cannot convert "{value}" to bool')
- def issues_to_fetch(metadata, newer_than, older_than):
+ def limited_string_fn(length):
+ def limited_string(value):
+ value = str(value)
+ if len(value) > length:
+ raise CovidHospException(f"Value '{value}':{len(value)} longer than max {length}")
+ return value
+ return limited_string
+
+ GEOCODE_LENGTH = 32
+ GEOCODE_PATTERN = re.compile(r'POINT \((-?[0-9.]+) (-?[0-9.]+)\)')
+ def limited_geocode(value):
+ if len(value) < Utils.GEOCODE_LENGTH:
+ return value
+ # otherwise parse and set precision to 6 decimal places
+ m = Utils.GEOCODE_PATTERN.match(value)
+ if not m:
+ raise CovidHospException(f"Couldn't parse geocode '{value}'")
+ return f'POINT ({" ".join(f"{float(x):.6f}" for x in m.groups())})'
+
+ def issues_to_fetch(metadata, newer_than, older_than, logger=False):
"""
Construct all issue dates and URLs to be ingested based on metadata.
@@ -81,6 +101,8 @@ def issues_to_fetch(metadata, newer_than, older_than):
Lower bound (exclusive) of days to get issues for.
older_than Date
Upper bound (exclusive) of days to get issues for
+ logger structlog.Logger [optional; default False]
+ Logger to receive messages
Returns
-------
Dictionary of {issue day: list of (download urls, index)}
@@ -88,6 +110,7 @@ def issues_to_fetch(metadata, newer_than, older_than):
"""
daily_issues = {}
n_beyond = 0
+ n_selected = 0
for index in sorted(set(metadata.index)):
day = index.date()
if day > newer_than and day < older_than:
@@ -97,14 +120,17 @@ def issues_to_fetch(metadata, newer_than, older_than):
daily_issues[day] = urls_list
else:
daily_issues[day] += urls_list
+ n_selected += len(urls_list)
elif day >= older_than:
n_beyond += 1
- if n_beyond > 0:
- print(f"{n_beyond} issues available on {older_than} or newer")
+ if logger:
+ if n_beyond > 0:
+ logger.info("issues available beyond selection", on_or_newer=older_than, count=n_beyond)
+ logger.info("issues selected", newer_than=str(newer_than), older_than=str(older_than), count=n_selected)
return daily_issues
@staticmethod
- def merge_by_key_cols(dfs, key_cols):
+ def merge_by_key_cols(dfs, key_cols, logger=False):
"""Merge a list of data frames as a series of updates.
Parameters:
@@ -113,6 +139,8 @@ def merge_by_key_cols(dfs, key_cols):
Data frames to merge, ordered from earliest to latest.
key_cols: list(str)
Columns to use as the index.
+ logger structlog.Logger [optional; default False]
+ Logger to receive messages
Returns a single data frame containing the most recent data for each state+date.
"""
@@ -120,6 +148,11 @@ def merge_by_key_cols(dfs, key_cols):
dfs = [df.set_index(key_cols) for df in dfs
if not all(k in df.index.names for k in key_cols)]
result = dfs[0]
+ if logger and len(dfs) > 7:
+ logger.warning(
+ "expensive operation",
+ msg="concatenating more than 7 files may result in long running times",
+ count=len(dfs))
for df in dfs[1:]:
# update values for existing keys
result.update(df)
@@ -153,22 +186,25 @@ def update_dataset(database, network, newer_than=None, older_than=None):
bool
Whether a new dataset was acquired.
"""
- metadata = network.fetch_metadata()
+ logger = database.logger()
+
+ metadata = network.fetch_metadata(logger=logger)
datasets = []
with database.connect() as db:
- max_issue = db.get_max_issue()
+ max_issue = db.get_max_issue(logger=logger)
older_than = datetime.datetime.today().date() if newer_than is None else older_than
newer_than = max_issue if newer_than is None else newer_than
- daily_issues = Utils.issues_to_fetch(metadata, newer_than, older_than)
+ daily_issues = Utils.issues_to_fetch(metadata, newer_than, older_than, logger=logger)
if not daily_issues:
- print("no new issues, nothing to do")
+ logger.info("no new issues; nothing to do")
return False
for issue, revisions in daily_issues.items():
issue_int = int(issue.strftime("%Y%m%d"))
# download the dataset and add it to the database
- dataset = Utils.merge_by_key_cols([network.fetch_dataset(url) for url, _ in revisions],
- db.KEY_COLS)
+ dataset = Utils.merge_by_key_cols([network.fetch_dataset(url, logger=logger) for url, _ in revisions],
+ db.KEY_COLS,
+ logger=logger)
# add metadata to the database
all_metadata = []
for url, index in revisions:
@@ -180,10 +216,10 @@ def update_dataset(database, network, newer_than=None, older_than=None):
))
with database.connect() as db:
for issue_int, dataset, all_metadata in datasets:
- db.insert_dataset(issue_int, dataset)
+ db.insert_dataset(issue_int, dataset, logger=logger)
for url, metadata_json in all_metadata:
- db.insert_metadata(issue_int, url, metadata_json)
- print(f'successfully acquired {len(dataset)} rows')
+ db.insert_metadata(issue_int, url, metadata_json, logger=logger)
+ logger.info("acquired rows", count=len(dataset))
# note that the transaction is committed by exiting the `with` block
return True
diff --git a/src/acquisition/covid_hosp/facility/database.py b/src/acquisition/covid_hosp/facility/database.py
index 665256a4f..172f32dc4 100644
--- a/src/acquisition/covid_hosp/facility/database.py
+++ b/src/acquisition/covid_hosp/facility/database.py
@@ -40,7 +40,7 @@ class Database(BaseDatabase):
Columndef('ccn', 'ccn', str),
Columndef('city', 'city', str),
Columndef('fips_code', 'fips_code', str),
- Columndef('geocoded_hospital_address', 'geocoded_hospital_address', str),
+ Columndef('geocoded_hospital_address', 'geocoded_hospital_address', Utils.limited_geocode),
Columndef('hhs_ids', 'hhs_ids', str),
Columndef('hospital_name', 'hospital_name', str),
Columndef('hospital_subtype', 'hospital_subtype', str),
diff --git a/src/acquisition/covidcast/covidcast_meta_cache_updater.py b/src/acquisition/covidcast/covidcast_meta_cache_updater.py
index a46345b62..b4eff0d08 100644
--- a/src/acquisition/covidcast/covidcast_meta_cache_updater.py
+++ b/src/acquisition/covidcast/covidcast_meta_cache_updater.py
@@ -7,7 +7,7 @@
# first party
from delphi.epidata.acquisition.covidcast.database import Database
-from delphi.epidata.acquisition.covidcast.logger import get_structured_logger
+from delphi.epidata.acquisition.common.logger import get_structured_logger
from delphi.epidata.client.delphi_epidata import Epidata
def get_argument_parser():
diff --git a/src/acquisition/covidcast/csv_importer.py b/src/acquisition/covidcast/csv_importer.py
index 0fa936802..3eaec7d2a 100644
--- a/src/acquisition/covidcast/csv_importer.py
+++ b/src/acquisition/covidcast/csv_importer.py
@@ -16,7 +16,7 @@
from delphi_utils import Nans
from delphi.utils.epiweek import delta_epiweeks
from delphi.epidata.acquisition.covidcast.covidcast_row import CovidcastRow
-from delphi.epidata.acquisition.covidcast.logger import get_structured_logger
+from delphi.epidata.acquisition.common.logger import get_structured_logger
DataFrameRow = NamedTuple('DFRow', [
('geo_id', str),
diff --git a/src/acquisition/covidcast/csv_to_database.py b/src/acquisition/covidcast/csv_to_database.py
index 842e820c9..90270cb27 100644
--- a/src/acquisition/covidcast/csv_to_database.py
+++ b/src/acquisition/covidcast/csv_to_database.py
@@ -11,7 +11,7 @@
from delphi.epidata.acquisition.covidcast.csv_importer import CsvImporter, PathDetails
from delphi.epidata.acquisition.covidcast.database import Database, DBLoadStateException
from delphi.epidata.acquisition.covidcast.file_archiver import FileArchiver
-from delphi.epidata.acquisition.covidcast.logger import get_structured_logger
+from delphi.epidata.acquisition.common.logger import get_structured_logger
def get_argument_parser():
diff --git a/src/acquisition/covidcast/database.py b/src/acquisition/covidcast/database.py
index 3beedac82..347c85841 100644
--- a/src/acquisition/covidcast/database.py
+++ b/src/acquisition/covidcast/database.py
@@ -14,7 +14,7 @@
# first party
import delphi.operations.secrets as secrets
-from delphi.epidata.acquisition.covidcast.logger import get_structured_logger
+from delphi.epidata.acquisition.common.logger import get_structured_logger
from delphi.epidata.acquisition.covidcast.covidcast_row import CovidcastRow
diff --git a/src/acquisition/covidcast/delete_batch.py b/src/acquisition/covidcast/delete_batch.py
index fe40897fd..ae6ddc487 100644
--- a/src/acquisition/covidcast/delete_batch.py
+++ b/src/acquisition/covidcast/delete_batch.py
@@ -8,7 +8,7 @@
# first party
from delphi.epidata.acquisition.covidcast.database import Database
-from delphi.epidata.acquisition.covidcast.logger import get_structured_logger
+from delphi.epidata.acquisition.common.logger import get_structured_logger
def get_argument_parser():
diff --git a/src/acquisition/covidcast/file_archiver.py b/src/acquisition/covidcast/file_archiver.py
index 92686f3cf..368677133 100644
--- a/src/acquisition/covidcast/file_archiver.py
+++ b/src/acquisition/covidcast/file_archiver.py
@@ -6,7 +6,7 @@
import shutil
# first party
-from delphi.epidata.acquisition.covidcast.logger import get_structured_logger
+from delphi.epidata.acquisition.common.logger import get_structured_logger
class FileArchiver:
"""Archives files by moving and compressing."""
diff --git a/src/acquisition/covidcast/signal_dash_data_generator.py b/src/acquisition/covidcast/signal_dash_data_generator.py
index 2e7467487..431dae9fd 100644
--- a/src/acquisition/covidcast/signal_dash_data_generator.py
+++ b/src/acquisition/covidcast/signal_dash_data_generator.py
@@ -15,7 +15,7 @@
# first party
import covidcast
import delphi.operations.secrets as secrets
-from delphi.epidata.acquisition.covidcast.logger import get_structured_logger
+from delphi.epidata.acquisition.common.logger import get_structured_logger
LOOKBACK_DAYS_FOR_COVERAGE = 56
diff --git a/src/acquisition/wiki/dashboard.php b/src/acquisition/wiki/dashboard.php
deleted file mode 100644
index 09ae0e9d8..000000000
--- a/src/acquisition/wiki/dashboard.php
+++ /dev/null
@@ -1,182 +0,0 @@
-
-
-
-
-
- Wiki Status
-
-
-
-
-
- Workers
-
- Worker | Download (GB) | Num Jobs | Last Job Finished (sec) |
- = date_sub(now(), interval 1 hour) order by max(datetime)");
- $num = 0;
- while($row = mysql_fetch_array($result)) {
- printf("{$row['worker']} | %.1f | {$row['jobs']} | {$row['seen']} |
", floatval($row['dl']));
- $num++;
- }
- ?>
-
-
- Jobs
-
- Status | Num Jobs | Description |
- {$row['status']} | {$row['num']} | {$desc} | ");
- }
- ?>
-
-
- 0) {
- print(" curr");
- }
- print("\">");
- $st_1 = intval($row['st_1']);
- $st0 = intval($row['st0']);
- $st1 = intval($row['st1']);
- $st2 = intval($row['st2']);
- $st3 = intval($row['st3']);
- $total = $st_1 + $st0 + $st1 + $st2 + $st3;
- if($st3 == $total) {
- print("");
- } else {
- printf("", (100 * $st_1 / $total));
- printf("", (100 * $st0 / $total));
- printf("", (100 * $st1 / $total));
- printf("", (100 * $st2 / $total));
- printf("", (100 * $st3 / $total));
- }
- print("");
- }
- ?>
-
-
diff --git a/src/acquisition/wiki/master.php b/src/acquisition/wiki/master.php
deleted file mode 100644
index 99ace0cd4..000000000
--- a/src/acquisition/wiki/master.php
+++ /dev/null
@@ -1,53 +0,0 @@
-{'size'}), mysql_real_escape_string($obj->{'worker'}), floatval($obj->{'elapsed'}), mysql_real_escape_string($obj->{'data'}), intval($obj->{'id'})));
- echo 'ok';
- } else {
- sleep(5);
- http_response_code(400);
- echo 'wrong hmac';
- }
-} else {
- http_response_code(400);
- echo 'bad request';
-}
-?>
diff --git a/src/client/delphi_epidata.R b/src/client/delphi_epidata.R
index be944b857..01f75068d 100644
--- a/src/client/delphi_epidata.R
+++ b/src/client/delphi_epidata.R
@@ -15,7 +15,7 @@ Epidata <- (function() {
# API base url
BASE_URL <- 'https://delphi.cmu.edu/epidata/api.php'
- client_version <- '0.4.6'
+ client_version <- '0.4.7'
# Helper function to cast values and/or ranges to strings
.listitem <- function(value) {
diff --git a/src/client/delphi_epidata.js b/src/client/delphi_epidata.js
index 6ef2e9f9c..588ab7eb3 100644
--- a/src/client/delphi_epidata.js
+++ b/src/client/delphi_epidata.js
@@ -22,7 +22,7 @@
}
})(this, function (exports, fetchImpl, jQuery) {
const BASE_URL = "https://delphi.cmu.edu/epidata/";
- const client_version = "0.4.6";
+ const client_version = "0.4.7";
// Helper function to cast values and/or ranges to strings
function _listitem(value) {
diff --git a/src/client/delphi_epidata.py b/src/client/delphi_epidata.py
index 42f670ad4..9b3deea94 100644
--- a/src/client/delphi_epidata.py
+++ b/src/client/delphi_epidata.py
@@ -72,7 +72,11 @@ def _request(params):
long and returns a 414.
"""
try:
- return Epidata._request_with_retry(params).json()
+ result = Epidata._request_with_retry(params)
+ if params is not None and "format" in params and params["format"]=="csv":
+ return result.text
+ else:
+ return result.json()
except Exception as e:
return {'result': 0, 'message': 'error: ' + str(e)}
@@ -499,16 +503,17 @@ def delphi(system, epiweek):
def sensors(auth, names, locations, epiweeks):
"""Fetch Delphi's digital surveillance sensors."""
# Check parameters
- if auth is None or names is None or locations is None or epiweeks is None:
- raise Exception('`auth`, `names`, `locations`, and `epiweeks` are all required')
+ if names is None or locations is None or epiweeks is None:
+ raise Exception('`names`, `locations`, and `epiweeks` are all required')
# Set up request
params = {
'endpoint': 'sensors',
- 'auth': auth,
'names': Epidata._list(names),
'locations': Epidata._list(locations),
'epiweeks': Epidata._list(epiweeks),
}
+ if auth is not None:
+ params['auth'] = auth
# Make the API call
return Epidata._request(params)
@@ -606,6 +611,9 @@ def covidcast(
if 'format' in kwargs:
params['format'] = kwargs['format']
+ if 'fields' in kwargs:
+ params['fields'] = kwargs['fields']
+
# Make the API call
return Epidata._request(params)
diff --git a/src/client/packaging/npm/package.json b/src/client/packaging/npm/package.json
index 60f0e7b3a..1c1c31b58 100644
--- a/src/client/packaging/npm/package.json
+++ b/src/client/packaging/npm/package.json
@@ -2,7 +2,7 @@
"name": "delphi_epidata",
"description": "Delphi Epidata API Client",
"authors": "Delphi Group",
- "version": "0.4.6",
+ "version": "0.4.7",
"license": "MIT",
"homepage": "https://github.com/cmu-delphi/delphi-epidata",
"bugs": {
diff --git a/src/client/packaging/pypi/delphi_epidata/__init__.py b/src/client/packaging/pypi/delphi_epidata/__init__.py
index e8ae5b0ea..d92fb10d4 100644
--- a/src/client/packaging/pypi/delphi_epidata/__init__.py
+++ b/src/client/packaging/pypi/delphi_epidata/__init__.py
@@ -1,4 +1,4 @@
from .delphi_epidata import Epidata
name = 'delphi_epidata'
-__version__ = '0.4.6'
+__version__ = '0.4.7'
diff --git a/src/client/packaging/pypi/setup.py b/src/client/packaging/pypi/setup.py
index e36b48d67..557784c6a 100644
--- a/src/client/packaging/pypi/setup.py
+++ b/src/client/packaging/pypi/setup.py
@@ -5,7 +5,7 @@
setuptools.setup(
name="delphi_epidata",
- version="0.4.6",
+ version="0.4.7",
author="David Farrow",
author_email="dfarrow0@gmail.com",
description="A programmatic interface to Delphi's Epidata API.",
diff --git a/src/server/_config.py b/src/server/_config.py
index 0be0ee219..618407f75 100644
--- a/src/server/_config.py
+++ b/src/server/_config.py
@@ -4,7 +4,7 @@
load_dotenv()
-VERSION = "0.4.6"
+VERSION = "0.4.7"
MAX_RESULTS = int(10e6)
MAX_COMPATIBILITY_RESULTS = int(3650)
diff --git a/src/server/api.php b/src/server/api.php
deleted file mode 100644
index 404bd80c2..000000000
--- a/src/server/api.php
+++ /dev/null
@@ -1,2232 +0,0 @@
- Secrets::$api['twitter'],
- 'ght' => Secrets::$api['ght'],
- 'fluview' => Secrets::$api['fluview'],
- 'cdc' => Secrets::$api['cdc'],
- 'sensors' => Secrets::$api['sensors'],
- 'sensor_subsets' => Secrets::$api['sensor_subsets'],
- 'quidel' => Secrets::$api['quidel'],
- 'norostat' => Secrets::$api['norostat'],
- 'afhsb' => Secrets::$api['afhsb']
-);
-// begin sensor query authentication configuration
-// A multimap of sensor names to the "granular" auth tokens that can be used to access them; excludes the "global" sensor auth key that works for all sensors:
-$GRANULAR_SENSOR_AUTH_TOKENS = array(
- 'twtr' => array($AUTH['sensor_subsets']['twtr_sensor']),
- 'gft' => array($AUTH['sensor_subsets']['gft_sensor']),
- 'ght' => array($AUTH['sensor_subsets']['ght_sensors']),
- 'ghtj' => array($AUTH['sensor_subsets']['ght_sensors']),
- 'cdc' => array($AUTH['sensor_subsets']['cdc_sensor']),
- 'quid' => array($AUTH['sensor_subsets']['quid_sensor']),
- 'wiki' => array($AUTH['sensor_subsets']['wiki_sensor']),
-);
-// A set of sensors that do not require an auth key to access:
-$OPEN_SENSORS = array(
- 'sar3',
- 'epic',
- 'arch',
-);
-// Limits on the number of effective auth token equality checks performed per sensor query; generate auth tokens with appropriate levels of entropy according to the limits below:
-$MAX_GLOBAL_AUTH_CHECKS_PER_SENSOR_QUERY = 1; // (but imagine is larger to futureproof)
-$MAX_GRANULAR_AUTH_CHECKS_PER_SENSOR_QUERY = 30; // (but imagine is larger to futureproof)
-// A (currently redundant) limit on the number of auth tokens that can be provided:
-$MAX_AUTH_KEYS_PROVIDED_PER_SENSOR_QUERY = 1;
-// end sensor query authentication configuration
-
-// result limit, ~10 years of daily data
-$MAX_RESULTS = 3650;
-
-// queries the `fluview` and `fluview_imputed` tables
-// $epiweeks (required): array of epiweek values/ranges
-// $regions (required): array of region names
-// $issues (optional): array of epiweek values/ranges
-// overrides $lag
-// default: most recent issue
-// $lag (optional): number of weeks between each epiweek and its issue
-// overridden by $issues
-// default: most recent issue
-// $authorized: determines whether private data (i.e. `fluview_imputed`) is
-// included in the result
-function get_fluview($epiweeks, $regions, $issues, $lag, $authorized) {
- $epidata = array();
- // public data
- $table = '`fluview` fv';
- $fields = "fv.`release_date`, fv.`issue`, fv.`epiweek`, fv.`region`, fv.`lag`, fv.`num_ili`, fv.`num_patients`, fv.`num_providers`, fv.`wili`, fv.`ili`, fv.`num_age_0`, fv.`num_age_1`, fv.`num_age_2`, fv.`num_age_3`, fv.`num_age_4`, fv.`num_age_5`";
- _get_fluview_by_table($epidata, $epiweeks, $regions, $issues, $lag, $table, $fields);
- if(!$authorized) {
- // Make a special exception for New York. It is a (weighted) sum of two
- // constituent locations -- "ny_minus_jfk" and "jfk" -- both of which are
- // publicly available.
- if(in_array('ny', array_map('strtolower', $regions))) {
- $regions = array('ny');
- $authorized = true;
- }
- }
- if($authorized) {
- // private data (no release date, no age groups, and wili is equal to ili)
- $table = '`fluview_imputed` fv';
- $fields = "NULL `release_date`, fv.`issue`, fv.`epiweek`, fv.`region`, fv.`lag`, fv.`num_ili`, fv.`num_patients`, fv.`num_providers`, fv.`ili` `wili`, fv.`ili`, NULL `num_age_0`, NULL `num_age_1`, NULL `num_age_2`, NULL `num_age_3`, NULL `num_age_4`, NULL `num_age_5`";
- _get_fluview_by_table($epidata, $epiweeks, $regions, $issues, $lag, $table, $fields);
- }
- // return the data
- return count($epidata) === 0 ? null : $epidata;
-}
-
-// a helper function to query `fluview` and `fluview_imputed` individually
-// parameters
-function _get_fluview_by_table(&$epidata, $epiweeks, $regions, $issues, $lag, $table, $fields) {
- // basic query info
- $order = "fv.`epiweek` ASC, fv.`region` ASC, fv.`issue` ASC";
- // build the epiweek filter
- $condition_epiweek = filter_integers('fv.`epiweek`', $epiweeks);
- // build the region filter
- $condition_region = filter_strings('fv.`region`', $regions);
- if($issues !== null) {
- // build the issue filter
- $condition_issue = filter_integers('fv.`issue`', $issues);
- // final query using specific issues
- $query = "SELECT {$fields} FROM {$table} WHERE ({$condition_epiweek}) AND ({$condition_region}) AND ({$condition_issue}) ORDER BY {$order}";
- } else if($lag !== null) {
- // build the lag filter
- $condition_lag = "(fv.`lag` = {$lag})";
- // final query using lagged issues
- $query = "SELECT {$fields} FROM {$table} WHERE ({$condition_epiweek}) AND ({$condition_region}) AND ({$condition_lag}) ORDER BY {$order}";
- } else {
- // final query using most recent issues
- $subquery = "(SELECT max(`issue`) `max_issue`, `epiweek`, `region` FROM {$table} WHERE ({$condition_epiweek}) AND ({$condition_region}) GROUP BY `epiweek`, `region`) x";
- $condition = "x.`max_issue` = fv.`issue` AND x.`epiweek` = fv.`epiweek` AND x.`region` = fv.`region`";
- $query = "SELECT {$fields} FROM {$table} JOIN {$subquery} ON {$condition} ORDER BY {$order}";
- }
- // get the data from the database
- $fields_string = array('release_date', 'region');
- $fields_int = array('issue', 'epiweek', 'lag', 'num_ili', 'num_patients', 'num_providers', 'num_age_0', 'num_age_1', 'num_age_2', 'num_age_3', 'num_age_4', 'num_age_5');
- $fields_float = array('wili', 'ili');
- execute_query($query, $epidata, $fields_string, $fields_int, $fields_float);
-}
-
-// queries the `fluview_clinical` table
-// $epiweeks (required): array of epiweek values/ranges
-// $regions (required): array of region names
-// $issues (optional): array of epiweek values/ranges
-// overrides $lag
-// default: most recent issue
-// $lag (optional): number of weeks between each epiweek and its issue
-// overridden by $issues
-// default: most recent issue
-function get_fluview_clinical($epiweeks, $regions, $issues, $lag) {
- // store the results in an array
- $epidata = array();
- // set up for query
- $table = "`fluview_clinical` fvc";
- // $fields = 'fvc.`release_date`, fvc.`issue`, fvc.`epiweek`, fvc.`region`, fvc.`lag`, fvc.`total_specimens`, fvc.`total_a_h1n1`, fvc.`total_a_h3`, fvc.`total_a_h3n2v`, fvc.`total_a_no_sub`, fvc.`total_b`, fvc.`total_b_vic`, fvc.`total_b_yam`';
- $fields = "fvc.`release_date`, fvc.`issue`, fvc.`epiweek`, fvc.`region`, fvc.`lag`, fvc.`total_specimens`, fvc.`total_a`, fvc.`total_b`, fvc.`percent_positive`, fvc.`percent_a`, fvc.`percent_b`";
- $order = "fvc.`epiweek` ASC, fvc.`region` ASC, fvc.`issue` ASC";
- // create conditions
- $condition_epiweek = filter_integers("fvc.`epiweek`", $epiweeks);
- $condition_region = filter_strings("fvc.`region`", $regions);
- if ($issues !== null) {
- // using specific issues
- $condition_issue = filter_integers("fvc.`issue`", $issues);
- $query = "SELECT {$fields} FROM {$table} WHERE ({$condition_epiweek}) AND ({$condition_region}) AND ({$condition_issue}) ORDER BY {$order}";
- } else if ($lag !== null) {
- // using lagged issues
- $condition_lag = '(fvc.`lag` = {$lag})';
- $query = "SELECT {$fields} FROM {$table} WHERE ({$condition_epiweek}) AND ({$condition_region}) AND ({$condition_lag}) ORDER BY {$order}";
- } else {
- // using most recent issues
- $subquery = "(SELECT max(`issue`) `max_issue`, `epiweek`, `region` FROM {$table} WHERE ({$condition_epiweek}) AND ({$condition_region}) GROUP BY `epiweek`, `region`) x";
- $condition = "x.`max_issue` = fvc.`issue` AND x.`epiweek` = fvc.`epiweek` AND x.`region` = fvc.`region`";
- $query = "SELECT {$fields} FROM {$table} JOIN {$subquery} ON {$condition} ORDER BY {$order}";
- }
- // get the data from the database
- $fields_string = array('release_date', 'region');
- $fields_float = array('percent_positive', 'percent_a', 'percent_b');
- $fields_int = array('issue', 'epiweek', 'lag', 'total_specimens', 'total_a', 'total_b');
- execute_query($query, $epidata, $fields_string, $fields_int, $fields_float);
- // return the result, if any
- return count($epidata) === 0 ? null : $epidata;
-}
-
-// queries the `flusurv` table
-// $epiweeks (required): array of epiweek values/ranges
-// $locations (required): array of locations names
-// $issues (optional): array of epiweek values/ranges
-// overrides $lag
-// default: most recent issue
-// $lag (optional): number of weeks between each epiweek and its issue
-// overridden by $issues
-// default: most recent issue
-function get_flusurv($epiweeks, $locations, $issues, $lag) {
- // basic query info
- $table = '`flusurv` fs';
- $fields = "fs.`release_date`, fs.`issue`, fs.`epiweek`, fs.`location`, fs.`lag`, fs.`rate_age_0`, fs.`rate_age_1`, fs.`rate_age_2`, fs.`rate_age_3`, fs.`rate_age_4`, fs.`rate_overall`";
- $order = "fs.`epiweek` ASC, fs.`location` ASC, fs.`issue` ASC";
- // build the epiweek filter
- $condition_epiweek = filter_integers('fs.`epiweek`', $epiweeks);
- // build the location filter
- $condition_location = filter_strings('fs.`location`', $locations);
- if($issues !== null) {
- // build the issue filter
- $condition_issue = filter_integers('fs.`issue`', $issues);
- // final query using specific issues
- $query = "SELECT {$fields} FROM {$table} WHERE ({$condition_epiweek}) AND ({$condition_location}) AND ({$condition_issue}) ORDER BY {$order}";
- } else if($lag !== null) {
- // build the lag filter
- $condition_lag = "(fs.`lag` = {$lag})";
- // final query using lagged issues
- $query = "SELECT {$fields} FROM {$table} WHERE ({$condition_epiweek}) AND ({$condition_location}) AND ({$condition_lag}) ORDER BY {$order}";
- } else {
- // final query using most recent issues
- $subquery = "(SELECT max(`issue`) `max_issue`, `epiweek`, `location` FROM {$table} WHERE ({$condition_epiweek}) AND ({$condition_location}) GROUP BY `epiweek`, `location`) x";
- $condition = "x.`max_issue` = fs.`issue` AND x.`epiweek` = fs.`epiweek` AND x.`location` = fs.`location`";
- $query = "SELECT {$fields} FROM {$table} JOIN {$subquery} ON {$condition} ORDER BY {$order}";
- }
- // get the data from the database
- $epidata = array();
- $fields_string = array('release_date', 'location');
- $fields_int = array('issue', 'epiweek', 'lag');
- $fields_float = array('rate_age_0', 'rate_age_1', 'rate_age_2', 'rate_age_3', 'rate_age_4', 'rate_overall');
- execute_query($query, $epidata, $fields_string, $fields_int, $fields_float);
- // return the data
- return count($epidata) === 0 ? null : $epidata;
-}
-
-// queries the `paho_dengue` table
-// $epiweeks (required): array of epiweek values/ranges
-// $regions (required): array of region names
-// $issues (optional): array of epiweek values/ranges
-// overrides $lag
-// default: most recent issue
-// $lag (optional): number of weeks between each epiweek and its issue
-// overridden by $issues
-// default: most recent issue
-function get_paho_dengue($epiweeks, $regions, $issues, $lag) {
- // store the results in an array
- $epidata = array();
- // set up for query
- $table = "`paho_dengue` pd";
- $fields = "pd.`release_date`, pd.`issue`, pd.`epiweek`, pd.`region`, pd.`lag`, pd.`total_pop`, pd.`serotype`, pd.`num_dengue`, pd.`incidence_rate`, pd.`num_severe`, pd.`num_deaths`";
- $order = "pd.`epiweek` ASC, pd.`region` ASC, pd.`issue` ASC";
- // create conditions
- $condition_epiweek = filter_integers("pd.`epiweek`", $epiweeks);
- $condition_region = filter_strings("pd.`region`", $regions);
- if ($issues !== null) {
- // using specific issues
- $condition_issue = filter_integers("pd.`issue`", $issues);
- $query = "SELECT {$fields} FROM {$table} WHERE ({$condition_epiweek}) AND ({$condition_region}) AND ({$condition_issue}) ORDER BY {$order}";
- } else if ($lag !== null) {
- // using lagged issues
- $condition_lag = '(pd.`lag` = {$lag})';
- $query = "SELECT {$fields} FROM {$table} WHERE ({$condition_epiweek}) AND ({$condition_region}) AND ({$condition_lag}) ORDER BY {$order}";
- } else {
- // using most recent issues
- $subquery = "(SELECT max(`issue`) `max_issue`, `epiweek`, `region` FROM {$table} WHERE ({$condition_epiweek}) AND ({$condition_region}) GROUP BY `epiweek`, `region`) x";
- $condition = "x.`max_issue` = pd.`issue` AND x.`epiweek` = pd.`epiweek` AND x.`region` = pd.`region`";
- $query = "SELECT {$fields} FROM {$table} JOIN {$subquery} ON {$condition} ORDER BY {$order}";
- }
- // get the data from the database
- $fields_string = array('release_date', 'region', 'serotype');
- $fields_float = array('incidence_rate');
- $fields_int = array('issue', 'epiweek', 'lag', 'total_pop', 'num_dengue', 'num_severe', 'num_deaths');
- execute_query($query, $epidata, $fields_string, $fields_int, $fields_float);
- // return the result, if any
- return count($epidata) === 0 ? null : $epidata;
-}
-
-// queries the `ecdc_ili` table
-// $epiweeks (required): array of epiweek values/ranges
-// $regions (required): array of region names
-// $issues (optional): array of epiweek values/ranges
-// overrides $lag
-// default: most recent issue
-// $lag (optional): number of weeks between each epiweek and its issue
-// overridden by $issues
-// default: most recent issue
-function get_ecdc_ili($epiweeks, $regions, $issues, $lag) {
- // store the results in an array
- $epidata = array();
- // set up for query
- $table = "`ecdc_ili` ec";
- $fields = "ec.`release_date`, ec.`issue`, ec.`epiweek`, ec.`region`, ec.`lag`, ec.`incidence_rate`";
- $order = "ec.`epiweek` ASC, ec.`region` ASC, ec.`issue` ASC";
- // create conditions
- $condition_epiweek = filter_integers("ec.`epiweek`", $epiweeks);
- $condition_region = filter_strings("ec.`region`", $regions);
- if ($issues !== null) {
- // using specific issues
- $condition_issue = filter_integers("ec.`issue`", $issues);
- $query = "SELECT {$fields} FROM {$table} WHERE ({$condition_epiweek}) AND ({$condition_region}) AND ({$condition_issue}) ORDER BY {$order}";
- } else if ($lag !== null) {
- // using lagged issues
- $condition_lag = '(ec.`lag` = {$lag})';
- $query = "SELECT {$fields} FROM {$table} WHERE ({$condition_epiweek}) AND ({$condition_region}) AND ({$condition_lag}) ORDER BY {$order}";
- } else {
- // using most recent issues
- $subquery = "(SELECT max(`issue`) `max_issue`, `epiweek`, `region` FROM {$table} WHERE ({$condition_epiweek}) AND ({$condition_region}) GROUP BY `epiweek`, `region`) x";
- $condition = "x.`max_issue` = ec.`issue` AND x.`epiweek` = ec.`epiweek` AND x.`region` = ec.`region`";
- $query = "SELECT {$fields} FROM {$table} JOIN {$subquery} ON {$condition} ORDER BY {$order}";
- }
- // get the data from the database
- $fields_string = array('release_date', 'region');
- $fields_float = array('incidence_rate');
- $fields_int = array('issue', 'epiweek', 'lag');
- execute_query($query, $epidata, $fields_string, $fields_int, $fields_float);
- // return the result, if any
- return count($epidata) === 0 ? null : $epidata;
-}
-
-// queries the `kcdc_ili` table
-// $epiweeks (required): array of epiweek values/ranges
-// $regions (required): array of region names
-// $issues (optional): array of epiweek values/ranges
-// overrides $lag
-// default: most recent issue
-// $lag (optional): number of weeks between each epiweek and its issue
-// overridden by $issues
-// default: most recent issue
-function get_kcdc_ili($epiweeks, $regions, $issues, $lag) {
- // store the results in an array
- $epidata = array();
- // set up for query
- $table = "`kcdc_ili` kc";
- $fields = "kc.`release_date`, kc.`issue`, kc.`epiweek`, kc.`region`, kc.`lag`, kc.`ili`";
- $order = "kc.`epiweek` ASC, kc.`region` ASC, kc.`issue` ASC";
- // create conditions
- $condition_epiweek = filter_integers("kc.`epiweek`", $epiweeks);
- $condition_region = filter_strings("kc.`region`", $regions);
- if ($issues !== null) {
- // using specific issues
- $condition_issue = filter_integers("kc.`issue`", $issues);
- $query = "SELECT {$fields} FROM {$table} WHERE ({$condition_epiweek}) AND ({$condition_region}) AND ({$condition_issue}) ORDER BY {$order}";
- } else if ($lag !== null) {
- // using lagged issues
- $condition_lag = '(kc.`lag` = {$lag})';
- $query = "SELECT {$fields} FROM {$table} WHERE ({$condition_epiweek}) AND ({$condition_region}) AND ({$condition_lag}) ORDER BY {$order}";
- } else {
- // using most recent issues
- $subquery = "(SELECT max(`issue`) `max_issue`, `epiweek`, `region` FROM {$table} WHERE ({$condition_epiweek}) AND ({$condition_region}) GROUP BY `epiweek`, `region`) x";
- $condition = "x.`max_issue` = kc.`issue` AND x.`epiweek` = kc.`epiweek` AND x.`region` = kc.`region`";
- $query = "SELECT {$fields} FROM {$table} JOIN {$subquery} ON {$condition} ORDER BY {$order}";
- }
- // get the data from the database
- $fields_string = array('release_date', 'region');
- $fields_float = array('ili');
- $fields_int = array('issue', 'epiweek', 'lag');
- execute_query($query, $epidata, $fields_string, $fields_int, $fields_float);
- // return the result, if any
- return count($epidata) === 0 ? null : $epidata;
-}
-
-// queries the `gft` table
-// $epiweeks (required): array of epiweek values/ranges
-// $locations (required): array of location names
-function get_gft($epiweeks, $locations) {
- // basic query info
- $table = '`gft` g';
- $fields = "g.`epiweek`, g.`location`, g.`num`";
- $order = "g.`epiweek` ASC, g.`location` ASC";
- // build the epiweek filter
- $condition_epiweek = filter_integers('g.`epiweek`', $epiweeks);
- // build the location filter
- $condition_location = filter_strings('g.`location`', $locations);
- // final query using specific issues
- $query = "SELECT {$fields} FROM {$table} WHERE ({$condition_epiweek}) AND ({$condition_location}) ORDER BY {$order}";
- // get the data from the database
- $epidata = array();
- execute_query($query, $epidata, array('location'), array('epiweek', 'num'), null);
- // return the data
- return count($epidata) === 0 ? null : $epidata;
-}
-
-// queries the `ght` table
-// $epiweeks (required): array of epiweek values/ranges
-// $locations (required): array of location names
-// $query (required): search query or topic ID
-function get_ght($epiweeks, $locations, $query) {
- // basic query info
- $table = '`ght` g';
- $fields = "g.`epiweek`, g.`location`, g.`value`";
- $order = "g.`epiweek` ASC, g.`location` ASC";
- // build the epiweek filter
- $condition_epiweek = filter_integers('g.`epiweek`', $epiweeks);
- // build the location filter
- $condition_location = filter_strings('g.`location`', $locations);
- // build the query filter
- $condition_query = filter_strings('g.`query`', array($query));
- // final query using specific issues
- $query = "SELECT {$fields} FROM {$table} WHERE ({$condition_epiweek}) AND ({$condition_location}) AND ({$condition_query}) ORDER BY {$order}";
- // get the data from the database
- $epidata = array();
- execute_query($query, $epidata, array('location'), array('epiweek'), array('value'));
- // return the data
- return count($epidata) === 0 ? null : $epidata;
-}
-
-// queries the `twitter` table
-// $locations (required): array of location names
-// $dates (required): array of date or epiweek values/ranges
-// $resolution (required): either 'daily' or 'weekly'
-function get_twitter($locations, $dates, $resolution) {
- global $dbh;
- // basic query info
- $table = '`twitter` t';
- // build the date filter and set field names
- $fields_string = array('location');
- $fields_int = array('num', 'total');
- $fields_float = array('percent');
- if($resolution === 'daily') {
- $date_field = 't.`date`';
- $date_name = 'date';
- $condition_date = filter_dates($date_field, $dates);
- array_push($fields_string, $date_name);
- } else {
- $date_field = 'yearweek(t.`date`, 6)';
- $date_name = 'epiweek';
- $condition_date = filter_integers($date_field, $dates);
- array_push($fields_int, $date_name);
- }
- $fields = "{$date_field} `{$date_name}`, sum(t.`num`) `num`, sum(t.`total`) `total`, round(100 * sum(t.`num`) / sum(t.`total`), 8) `percent`";
- // for consistency (some rows have low `total`, or `num` > `total`), filter out 2% of rows with highest `percent`
- $condition_filter = 't.`num` / t.`total` <= 0.019';
- // split locations into national/regional/state
- $regions = array();
- $states = array();
- foreach($locations as $location) {
- $location = strtolower($location);
- if(in_array($location, array('nat', 'hhs1', 'hhs2', 'hhs3', 'hhs4', 'hhs5', 'hhs6', 'hhs7', 'hhs8', 'hhs9', 'hhs10', 'cen1', 'cen2', 'cen3', 'cen4', 'cen5', 'cen6', 'cen7', 'cen8', 'cen9'))) {
- array_push($regions, $location);
- } else {
- array_push($states, $location);
- }
- }
- // initialize the epidata array
- $epidata = array();
- // query each region type individually (the data is stored by state, so getting regional data requires some extra processing)
- foreach($regions as $region) {
- $region = mysqli_real_escape_string($dbh, $region);
- if($region === 'nat') {
- // final query for U.S. National
- $query = "SELECT {$fields}, '{$region}' `location` FROM {$table} WHERE ({$condition_filter}) AND ({$condition_date}) GROUP BY {$date_field} ORDER BY {$date_field} ASC";
- } else {
- // build the location filter
- $condition_location = "`state` IN (" . get_region_states($region) . ")";
- // final query for HHS Regions
- $query = "SELECT {$fields}, '{$region}' `location` FROM {$table} WHERE ({$condition_filter}) AND ({$condition_date}) AND ({$condition_location}) GROUP BY {$date_field} ORDER BY {$date_field} ASC";
- }
- // append query results to the epidata array
- execute_query($query, $epidata, $fields_string, $fields_int, $fields_float);
- }
- // query all states together
- if(count($states) !== 0) {
- // build the location filter
- $condition_location = filter_strings('t.`state`', $states);
- // final query for states
- $query = "SELECT {$fields}, t.`state` `location` FROM {$table} WHERE ({$condition_filter}) AND ({$condition_date}) AND ({$condition_location}) GROUP BY {$date_field}, t.`state` ORDER BY {$date_field} ASC, t.`state` ASC";
- // append query results to the epidata array
- execute_query($query, $epidata, $fields_string, $fields_int, $fields_float);
- }
- // return the data
- return count($epidata) === 0 ? null : $epidata;
-}
-
-// queries the `wiki` table
-// $articles (required): array of article titles
-// $language (required): specify the language of articles we want to retrieve
-// $dates (required): array of date or epiweek values/ranges
-// $resolution (required): either 'daily' or 'weekly'
-// $hours (optional): array of hour values/ranges
-// if present, $hours determines which counts are used within each day; otherwise all counts are used
-// for example, if hours=[4], then only the 4 AM (UTC) stream is returned
-function get_wiki($articles, $language, $dates, $resolution, $hours) {
- // required for `mysqli_real_escape_string`
- global $dbh;
- $language = mysqli_real_escape_string($dbh, $language);
- // basic query info
- // in a few rare instances (~6 total), `total` is unreasonably high; something glitched somewhere, just ignore it
- // $table = '`wiki` w JOIN (SELECT * FROM `wiki_meta` WHERE `total` < 100000000) m ON m.`datetime` = w.`datetime`';
- // We select rows by language and then the problem is converted to the original one, and the rest of code can be same
- $table = "( SELECT * FROM `wiki` WHERE `language` = '$language' ) w JOIN (SELECT * FROM `wiki_meta` WHERE `total` < 100000000 AND `language` = '$language' ) m ON m.`datetime` = w.`datetime`";
- // build the date filter and set field names
- $fields_string = array('article');
- $fields_int = array('count', 'total', 'hour');
- $fields_float = array('value');
- if($resolution === 'daily') {
- $date_field = 'm.`date`';
- $date_name = 'date';
- $condition_date = filter_dates($date_field, $dates);
- array_push($fields_string, $date_name);
- } else {
- $date_field = 'm.`epiweek`';
- $date_name = 'epiweek';
- $condition_date = filter_integers($date_field, $dates);
- array_push($fields_int, $date_name);
- }
- $fields = "{$date_field} `{$date_name}`, w.`article`, sum(w.`count`) `count`, sum(m.`total`) `total`, round(sum(w.`count`) / (sum(m.`total`) * 1e-6), 8) `value`";
- // build the article filter
- $condition_article = filter_strings('w.`article`', $articles);
- if($hours !== null) {
- // filter by specific hours
- $condition_hour = filter_integers('hour(m.`datetime`)', $hours);
- // final query, only taking counts from specific hours of the day
- $query = "SELECT {$fields}, hour(m.`datetime`) `hour` FROM {$table} WHERE ({$condition_date}) AND ({$condition_article}) AND ({$condition_hour}) GROUP BY {$date_field}, w.`article`, hour(m.`datetime`) ORDER BY {$date_field} ASC, w.`article` ASC, hour(m.`datetime`) ASC";
- } else {
- // final query, summing over all hours of the day
- $query = "SELECT {$fields}, -1 `hour` FROM {$table} WHERE ({$condition_date}) AND ({$condition_article}) GROUP BY {$date_field}, w.`article` ORDER BY {$date_field} ASC, w.`article` ASC";
- }
- // get the data from the database
- $epidata = array();
- execute_query($query, $epidata, $fields_string, $fields_int, $fields_float);
- // return the data
- return count($epidata) === 0 ? null : $epidata;
-}
-
-// queries the `quidel` table
-// $locations (required): array of location names
-// $epiweeks (required): array of epiweek values/ranges
-function get_quidel($locations, $epiweeks) {
- // basic query info
- $table = '`quidel` q';
- $fields = "q.`location`, q.`epiweek`, q.`value`";
- $order = "q.`epiweek` ASC, q.`location` ASC";
- // data type of each field
- $fields_string = array('location');
- $fields_int = array('epiweek');
- $fields_float = array('value');
- // build the location filter
- $condition_location = filter_strings('q.`location`', $locations);
- // build the epiweek filter
- $condition_epiweek = filter_integers('q.`epiweek`', $epiweeks);
- // the query
- $query = "SELECT {$fields} FROM {$table} WHERE ({$condition_location}) AND ({$condition_epiweek}) ORDER BY {$order}";
- // get the data from the database
- $epidata = array();
- execute_query($query, $epidata, $fields_string, $fields_int, $fields_float);
- // return the data
- return count($epidata) === 0 ? null : $epidata;
-}
-
-// queries the `norostat_point` table
-// $locations (required): single location value (str listing included states)
-// $epiweeks (required): array of epiweek values/ranges
-function get_norostat($location, $epiweeks) {
- // todo add release/issue args
- //
- // build the filters:
- $condition_location = filter_strings('`norostat_raw_datatable_location_pool`.`location`', [$location]);
- $condition_epiweek = filter_integers('`latest`.`epiweek`', $epiweeks);
- // get the data from the database
- $epidata = array();
- // (exclude "location" from output to reduce size & ugliness of result,
- // transfer bandwidth required; it would just be a repeated echo of the input
- // $location)
- $fields_string = array('release_date');
- $fields_int = array('epiweek', 'value');
- $query = "
- SELECT `latest`.`release_date`, `latest`.`epiweek`, `latest`.`new_value` AS `value`
- FROM `norostat_point_diffs` AS `latest`
- LEFT JOIN `norostat_raw_datatable_location_pool` USING (`location_id`)
- LEFT JOIN (
- SELECT * FROM `norostat_point_diffs`
- ) `later`
- ON `latest`.`location_id` = `later`.`location_id` AND
- `latest`.`epiweek` = `later`.`epiweek` AND
- (`latest`.`release_date`, `latest`.`parse_time`) <
- (`later`.`release_date`, `later`.`parse_time`) AND
- `later`.`new_value` IS NOT NULL
- WHERE ({$condition_location}) AND
- ({$condition_epiweek}) AND
- `later`.`parse_time` IS NULL AND
- `latest`.`new_value` IS NOT NULL
- ";
- // xxx may reorder epiweeks
- execute_query($query, $epidata, $fields_string, $fields_int, null);
- // return the data
- return count($epidata) === 0 ? null : $epidata;
-}
-
-// queries the `afhsb_00to13` table
-// $epiweeks (required): array of epiweek values/ranges
-// $locations (required): array of location names
-// $flu_types (required): array of flu types
-function get_afhsb($locations, $epiweeks, $flu_types) {
- global $dbh;
- $epidata = array();
- // split locations into national/regional/state
- $location_dict = array("hhs" => array(), "cen" => array(),
- "state" => array(), "country" => array());
- foreach($locations as $location) {
- $location = strtolower($location);
- if(substr($location, 0, 3) === "hhs") {
- array_push($location_dict["hhs"], $location);
- } elseif (substr($location, 0, 3) === "cen") {
- array_push($location_dict["cen"], $location);
- } elseif (strlen($location) === 3) {
- array_push($location_dict["country"], $location);
- } elseif (strlen($location) === 2) {
- array_push($location_dict["state"], $location);
- }
- }
- // split flu types into disjoint/subset
- $disjoint_flus = array();
- $subset_flus = array();
- foreach($flu_types as $flu_type) {
- if(in_array($flu_type, array('flu1','flu2-flu1','flu3-flu2','ili-flu3'))) {
- array_push($disjoint_flus, $flu_type);
- } elseif(in_array($flu_type, array('flu2','flu3','ili'))) {
- array_push($subset_flus, $flu_type);
- }
- }
- foreach($location_dict as $location_type=>$locs) {
- if(!empty($locs)) {
- _get_afhsb_by_table($epidata, $location_type, $epiweeks, $locs, $disjoint_flus, $subset_flus);
- }
- }
- return count($epidata) === 0 ? null : $epidata;
-}
-
-// A helper function to query afhsb tables
-function _get_afhsb_by_table(&$epidata, $location_type, $epiweeks, $locations, $disjoint_flus, $subset_flus) {
- // basic query info
- $table = (in_array($location_type, array("hhs", "cen"))) ? "afhsb_00to13_region" : "afhsb_00to13_state";
- $fields = "`epiweek`, `{$location_type}` `location`, sum(`visit_sum`) `visit_sum`";
- $group = '`epiweek`, `location`';
- $order = "`epiweek` ASC, `location` ASC";
- $fields_string = array('location', 'flu_type');
- $fields_int = array('epiweek', 'visit_sum');
- // build the epiweek filter
- $condition_epiweek = filter_integers('`epiweek`', $epiweeks);
- // build the location filter
- $condition_location = filter_strings($location_type, $locations);
-
- // subset flu types: flu2, flu3, ili
- $flu_mapping = array('flu2' => array('flu1','flu2-flu1'),
- 'flu3' => array('flu1','flu2-flu1','flu3-flu2'),
- 'ili' => array('flu1','flu2-flu1','flu3-flu2','ili-flu3'));
- foreach($subset_flus as $subset_flu) {
- $condition_flu = filter_strings('`flu_type`', $flu_mapping[$subset_flu]);
- $query = "SELECT {$fields}, '{$subset_flu}' `flu_type` FROM {$table}
- WHERE ({$condition_epiweek}) AND ({$condition_location}) AND ({$condition_flu})
- GROUP BY {$group} ORDER BY {$order}";
- execute_query($query, $epidata, $fields_string, $fields_int, null);
- }
- // disjoint flu types: flu1, flu2-flu1, flu3-flu2, ili-flu3
- if(!empty($disjoint_flus)){
- $condition_flu = filter_strings('`flu_type`', $disjoint_flus);
- $query = "SELECT {$fields}, `flu_type` FROM {$table}
- WHERE ({$condition_epiweek}) AND ({$condition_location}) AND ({$condition_flu})
- GROUP BY {$group},`flu_type` ORDER BY {$order},`flu_type`";
- execute_query($query, $epidata, $fields_string, $fields_int, null);
- }
-}
-
-// queries the `nidss_flu` table
-// $epiweeks (required): array of epiweek values/ranges
-// $regions (required): array of region names
-// $issues (optional): array of epiweek values/ranges
-// overrides $lag
-// default: most recent issue
-// $lag (optional): number of weeks between each epiweek and its issue
-// overridden by $issues
-// default: most recent issue
-function get_nidss_flu($epiweeks, $regions, $issues, $lag) {
- // basic query info
- $table = '`nidss_flu` nf';
- $fields = "nf.`release_date`, nf.`issue`, nf.`epiweek`, nf.`region`, nf.`lag`, nf.`visits`, nf.`ili`";
- $order = "nf.`epiweek` ASC, nf.`region` ASC, nf.`issue` ASC";
- // build the epiweek filter
- $condition_epiweek = filter_integers('nf.`epiweek`', $epiweeks);
- // build the region filter
- $condition_region = filter_strings('nf.`region`', $regions);
- if($issues !== null) {
- // build the issue filter
- $condition_issue = filter_integers('nf.`issue`', $issues);
- // final query using specific issues
- $query = "SELECT {$fields} FROM {$table} WHERE ({$condition_epiweek}) AND ({$condition_region}) AND ({$condition_issue}) ORDER BY {$order}";
- } else if($lag !== null) {
- // build the lag filter
- $condition_lag = "(nf.`lag` = {$lag})";
- // final query using lagged issues
- $query = "SELECT {$fields} FROM {$table} WHERE ({$condition_epiweek}) AND ({$condition_region}) AND ({$condition_lag}) ORDER BY {$order}";
- } else {
- // final query using most recent issues
- $subquery = "(SELECT max(`issue`) `max_issue`, `epiweek`, `region` FROM {$table} WHERE ({$condition_epiweek}) AND ({$condition_region}) GROUP BY `epiweek`, `region`) x";
- $condition = "x.`max_issue` = nf.`issue` AND x.`epiweek` = nf.`epiweek` AND x.`region` = nf.`region`";
- $query = "SELECT {$fields} FROM {$table} JOIN {$subquery} ON {$condition} ORDER BY {$order}";
- }
- // get the data from the database
- $epidata = array();
- $fields_string = array('release_date', 'region');
- $fields_int = array('issue', 'epiweek', 'lag', 'visits');
- $fields_float = array('ili');
- execute_query($query, $epidata, $fields_string, $fields_int, $fields_float);
- // return the data
- return count($epidata) === 0 ? null : $epidata;
-}
-
-// queries the `nidss_dengue` table
-// $epiweeks (required): array of epiweek values/ranges
-// $locations (required): array of region and/or location names
-function get_nidss_dengue($epiweeks, $locations) {
- global $dbh;
- // build the epiweek filter
- $condition_epiweek = filter_integers('nd.`epiweek`', $epiweeks);
- // get the data from the database
- $epidata = array();
- $fields_string = array('location');
- $fields_int = array('epiweek', 'count');
- foreach($locations as $location) {
- $location = mysqli_real_escape_string($dbh, $location);
- $query = "
- SELECT
- nd2.`epiweek`, nd2.`location`, count(1) `num_locations`, sum(nd2.`count`) `count`
- FROM (
- SELECT
- nd1.`epiweek`, CASE WHEN q.`query` = nd1.`location` THEN nd1.`location` WHEN q.`query` = nd1.`region` THEN nd1.`region` ELSE nd1.`nat` END `location`, nd1.`count`
- FROM (
- SELECT
- `epiweek`, `location`, `region`, 'nationwide' `nat`, `count`
- FROM
- `nidss_dengue` nd
- WHERE {$condition_epiweek}
- ) nd1
- JOIN (
- SELECT
- '{$location}' `query`
- ) q
- ON
- q.`query` IN (nd1.`location`, nd1.`region`, nd1.`nat`)
- ) nd2
- GROUP BY
- nd2.`epiweek`, nd2.`location`
- ";
- execute_query($query, $epidata, $fields_string, $fields_int, null);
- }
- // return the data
- return count($epidata) === 0 ? null : $epidata;
-}
-
-// queries the `forecasts` table
-// $system (required): system name
-// $epiweek (required): epiweek on which the forecast was made
-function get_forecast($system, $epiweek) {
- global $dbh;
- // get the data from the database
- $system = mysqli_real_escape_string($dbh, $system);
- $query = "SELECT `system`, `epiweek`, `json` FROM `forecasts` WHERE `system` = '{$system}' AND `epiweek` = {$epiweek}";
- $epidata = array();
- $fields_string = array('system', 'json');
- $fields_int = array('epiweek');
- execute_query($query, $epidata, $fields_string, $fields_int, null);
- // parse forecast data
- if(count($epidata) === 1 && array_key_exists('json', $epidata[0])) {
- $epidata[0]['forecast'] = json_decode($epidata[0]['json']);
- unset($epidata[0]['json']);
- }
- // return the data
- return count($epidata) === 0 ? null : $epidata;
-}
-
-// queries the `cdc_extract` table
-// $epiweeks (required): array of epiweek values/ranges
-// $locations (required): array of location names
-function get_cdc($epiweeks, $locations) {
- global $dbh;
- // basic query info
- $table = '`cdc_extract` c';
- $group = "c.`epiweek`";
- $order = "c.`epiweek` ASC";
- $fields_string = array('location');
- $fields_int = array('epiweek', 'num1', 'num2', 'num3', 'num4', 'num5', 'num6', 'num7', 'num8', 'total');
- // build the epiweek filter
- $condition_epiweek = filter_integers('c.`epiweek`', $epiweeks);
- // split locations into national/regional/state
- $regions = array();
- $states = array();
- foreach($locations as $location) {
- $location = strtolower($location);
- if(in_array($location, array('nat', 'hhs1', 'hhs2', 'hhs3', 'hhs4', 'hhs5', 'hhs6', 'hhs7', 'hhs8', 'hhs9', 'hhs10', 'cen1', 'cen2', 'cen3', 'cen4', 'cen5', 'cen6', 'cen7', 'cen8', 'cen9'))) {
- array_push($regions, $location);
- } else {
- array_push($states, $location);
- }
- }
- // initialize the epidata array
- $epidata = array();
- // query each region type individually (the data is stored by state, so getting regional data requires some extra processing)
- foreach($regions as $region) {
- $region = mysqli_real_escape_string($dbh, $region);
- $fields = "'{$region}' `location`, c.`epiweek`, sum(c.`num1`) `num1`, sum(c.`num2`) `num2`, sum(c.`num3`) `num3`, sum(c.`num4`) `num4`, sum(c.`num5`) `num5`, sum(c.`num6`) `num6`, sum(c.`num7`) `num7`, sum(c.`num8`) `num8`, sum(c.`total`) `total`";
- if($region === 'nat') {
- // final query for U.S. National
- $query = "SELECT {$fields} FROM {$table} WHERE ({$condition_epiweek}) GROUP BY {$group} ORDER BY {$order}";
- } else {
- // build the location filter
- $condition_location = "`state` IN (" . get_region_states($region) . ")";
- // final query for HHS Regions
- $query = "SELECT {$fields} FROM {$table} WHERE ({$condition_epiweek}) AND ({$condition_location}) GROUP BY {$group} ORDER BY {$order}";
- }
- // append query results to the epidata array
- execute_query($query, $epidata, $fields_string, $fields_int, null);
- }
- // query all states together
- if(count($states) !== 0) {
- $fields = "c.`state` `location`, c.`epiweek`, c.`num1`, c.`num2`, c.`num3`, c.`num4`, c.`num5`, c.`num6`, c.`num7`, c.`num8`, c.`total`";
- // build the location filter
- $condition_location = filter_strings('c.`state`', $states);
- // final query for states
- $query = "SELECT {$fields} FROM {$table} WHERE ({$condition_epiweek}) AND ({$condition_location}) ORDER BY {$order}, c.`state` ASC";
- // append query results to the epidata array
- execute_query($query, $epidata, $fields_string, $fields_int, null);
- }
- // return the data
- return count($epidata) === 0 ? null : $epidata;
-}
-
-// queries the `sensors` table
-// $names (required): array of sensor names
-// $locations (required): array of location names
-// $epiweeks (required): array of epiweek values/ranges
-function get_sensors($names, $locations, $epiweeks) {
- // basic query info
- $table = '`sensors` s';
- $fields = "s.`name`, s.`location`, s.`epiweek`, s.`value`";
- $order = "s.`epiweek` ASC, s.`name` ASC, s.`location` ASC";
- // data type of each field
- $fields_string = array('name', 'location');
- $fields_int = array('epiweek');
- $fields_float = array('value');
- // build the name filter
- $condition_name = filter_strings('s.`name`', $names);
- // build the location filter
- $condition_location = filter_strings('s.`location`', $locations);
- // build the epiweek filter
- $condition_epiweek = filter_integers('s.`epiweek`', $epiweeks);
- // the query
- $query = "SELECT {$fields} FROM {$table} WHERE ({$condition_name}) AND ({$condition_location}) AND ({$condition_epiweek}) ORDER BY {$order}";
- // get the data from the database
- $epidata = array();
- execute_query($query, $epidata, $fields_string, $fields_int, $fields_float);
- // return the data
- return count($epidata) === 0 ? null : $epidata;
-}
-
-// queries the `dengue_sensors` table
-// $names (required): array of sensor names
-// $locations (required): array of location names
-// $epiweeks (required): array of epiweek values/ranges
-function get_dengue_sensors($names, $locations, $epiweeks) {
- // basic query info
- $table = '`dengue_sensors` s';
- $fields = "s.`name`, s.`location`, s.`epiweek`, s.`value`";
- $order = "s.`epiweek` ASC, s.`name` ASC, s.`location` ASC";
- // data type of each field
- $fields_string = array('name', 'location');
- $fields_int = array('epiweek');
- $fields_float = array('value');
- // build the name filter
- $condition_name = filter_strings('s.`name`', $names);
- // build the location filter
- $condition_location = filter_strings('s.`location`', $locations);
- // build the epiweek filter
- $condition_epiweek = filter_integers('s.`epiweek`', $epiweeks);
- // the query
- $query = "SELECT {$fields} FROM {$table} WHERE ({$condition_name}) AND ({$condition_location}) AND ({$condition_epiweek}) ORDER BY {$order}";
- // get the data from the database
- $epidata = array();
- execute_query($query, $epidata, $fields_string, $fields_int, $fields_float);
- // return the data
- return count($epidata) === 0 ? null : $epidata;
-}
-
-// queries the `nowcasts` table
-// $locations (required): array of location names
-// $epiweeks (required): array of epiweek values/ranges
-function get_nowcast($locations, $epiweeks) {
- // basic query info
- $table = '`nowcasts` n';
- $fields = "n.`location`, n.`epiweek`, n.`value`, n.`std`";
- $order = "n.`epiweek` ASC, n.`location` ASC";
- // data type of each field
- $fields_string = array('location');
- $fields_int = array('epiweek');
- $fields_float = array('value', 'std');
- // build the location filter
- $condition_location = filter_strings('n.`location`', $locations);
- // build the epiweek filter
- $condition_epiweek = filter_integers('n.`epiweek`', $epiweeks);
- // the query
- $query = "SELECT {$fields} FROM {$table} WHERE ({$condition_location}) AND ({$condition_epiweek}) ORDER BY {$order}";
- // get the data from the database
- $epidata = array();
- execute_query($query, $epidata, $fields_string, $fields_int, $fields_float);
- // return the data
- return count($epidata) === 0 ? null : $epidata;
-}
-
-// queries the `dengue_nowcasts` table
-// $locations (required): array of location names
-// $epiweeks (required): array of epiweek values/ranges
-function get_dengue_nowcast($locations, $epiweeks) {
- // basic query info
- $table = '`dengue_nowcasts` n';
- $fields = "n.`location`, n.`epiweek`, n.`value`, n.`std`";
- $order = "n.`epiweek` ASC, n.`location` ASC";
- // data type of each field
- $fields_string = array('location');
- $fields_int = array('epiweek');
- $fields_float = array('value', 'std');
- // build the location filter
- $condition_location = filter_strings('n.`location`', $locations);
- // build the epiweek filter
- $condition_epiweek = filter_integers('n.`epiweek`', $epiweeks);
- // the query
- $query = "SELECT {$fields} FROM {$table} WHERE ({$condition_location}) AND ({$condition_epiweek}) ORDER BY {$order}";
- // get the data from the database
- $epidata = array();
- execute_query($query, $epidata, $fields_string, $fields_int, $fields_float);
- // return the data
- return count($epidata) === 0 ? null : $epidata;
-}
-
-// queries the `covidcast` table.
-// $source (required): name of upstream data source
-// $signals (required): array of names for signals derived from upstream data
-// $time_type (required): temporal resolution (e.g. day, week)
-// $geo_type (required): spatial resolution (e.g. county, msa, state)
-// $time_values (required): array of time values/ranges
-// $geo_values (required): string, array of string, or `*` as a wildcard for all
-// locations (specific to `$geo_type`)
-// $issues (optional): array of time values/ranges
-// overrides $lag
-// default: most recent issue
-// $lag (optional): number of time units between each time value and its issue
-// overridden by $issues
-// default: most recent issue
-function get_covidcast($source, $signals, $time_type, $geo_type, $time_values, $geo_values, $as_of, $issues, $lag) {
- // required for `mysqli_real_escape_string`
- global $dbh;
- $source = mysqli_real_escape_string($dbh, $source);
- $time_type = mysqli_real_escape_string($dbh, $time_type);
- $geo_type = mysqli_real_escape_string($dbh, $geo_type);
- // basic query info
- $table = '`covidcast` t';
- $fields = "t.`signal`, t.`time_value`, t.`geo_value`, t.`value`, t.`stderr`, t.`sample_size`, t.`direction`, t.`issue`, t.`lag`, t. `missing_value`, t. `missing_stderr`, t. `missing_sample_size`";
- $order = "t.`signal` ASC, t.`time_value` ASC, t.`geo_value` ASC, t.`issue` ASC";
- // data type of each field
- $fields_string = array('geo_value', 'signal');
- $fields_int = array('time_value', 'direction', 'issue', 'lag', 'missing_value', 'missing_stderr', 'missing_sample_size');
- $fields_float = array('value', 'stderr', 'sample_size');
- // build the source, signal, time, and location (type and id) filters
- $condition_source = "t.`source` = '{$source}'";
- $condition_signal = filter_strings('t.`signal`', $signals);
- $condition_time_type = "t.`time_type` = '{$time_type}'";
- $condition_geo_type = "t.`geo_type` = '{$geo_type}'";
- $condition_time_value = filter_integers('t.`time_value`', $time_values);
-
- if ($geo_values === '*') {
- // the wildcard query should return data for all locations in `geo_type`
- $condition_geo_value = 'TRUE';
- } else if (is_array($geo_values)) {
- // return data for multiple location
- $condition_geo_value = filter_strings('t.`geo_value`', $geo_values);
- } else {
- // return data for a particular location
- $geo_escaped_value = mysqli_real_escape_string($dbh, $geo_values);
- $condition_geo_value = "t.`geo_value` = '{$geo_escaped_value}'";
- }
- $conditions = "({$condition_source}) AND ({$condition_signal}) AND ({$condition_time_type}) AND ({$condition_geo_type}) AND ({$condition_time_value}) AND ({$condition_geo_value})";
-
- $subquery = "";
- if ($issues !== null) {
- //build the issue filter
- $condition_issue = filter_integers('t.`issue`', $issues);
- $condition_version = $condition_issue;
- } else if ($lag !== null) {
- //build the lag filter
- $condition_lag = "(t.`lag` = {$lag})";
- $condition_version = $condition_lag;
- } else if ($as_of !== null) {
- // fetch most recent issues with as of
- $sub_condition_asof = "(`issue` <= {$as_of})";
- $sub_fields = "max(`issue`) `max_issue`, `time_type`, `time_value`, `source`, `signal`, `geo_type`, `geo_value`";
- $sub_group = "`time_type`, `time_value`, `source`, `signal`, `geo_type`, `geo_value`";
- $sub_condition = "x.`max_issue` = t.`issue` AND x.`time_type` = t.`time_type` AND x.`time_value` = t.`time_value` AND x.`source` = t.`source` AND x.`signal` = t.`signal` AND x.`geo_type` = t.`geo_type` AND x.`geo_value` = t.`geo_value`";
- $subquery = "JOIN (SELECT {$sub_fields} FROM {$table} WHERE ({$conditions} AND {$sub_condition_asof}) GROUP BY {$sub_group}) x ON {$sub_condition}";
- $condition_version = 'TRUE';
- } else {
- // fetch most recent issue fast
- $condition_version = '(t.`is_latest_issue` IS TRUE)';
- }
- // the query
- $query = "SELECT {$fields} FROM {$table} {$subquery} WHERE {$conditions} AND ({$condition_version}) ORDER BY {$order}";
- // get the data from the database
- $epidata = array();
- execute_query($query, $epidata, $fields_string, $fields_int, $fields_float);
- // return the data
- return count($epidata) === 0 ? null : $epidata;
-}
-
-function get_signal_dash_status_data() {
- $query = 'SELECT enabled_signal.`name`,
- enabled_signal.`source`,
- enabled_signal.`covidcast_signal`,
- status.`latest_issue`,
- status.`latest_time_value`
- FROM (SELECT `id`, `name`, `source`, `covidcast_signal`, `latest_status_update`
- FROM `dashboard_signal`
- WHERE `enabled`) AS enabled_signal
- LEFT JOIN `dashboard_signal_status` AS status
- ON enabled_signal.`latest_status_update` = status.`date`
- AND enabled_signal.`id` = status.`signal_id`';
-
- $epidata = array();
- $fields_string = array('name', 'source', 'covidcast_signal', 'latest_issue', 'latest_time_value');
- execute_query($query, $epidata, $fields_string, null /* fields_int */, null /* fields_float */);
-
- $coverage = get_signal_dash_coverage_data();
-
- $out = array();
- foreach ($epidata as $signal) {
- if (isset($coverage[$signal['name']])) {
- $signal_with_coverage = $signal;
- $signal_with_coverage['coverage'] = $coverage[$signal['name']];
- $out[] = $signal_with_coverage;
- }
- }
-
- // return the data
- return count($out) === 0 ? null : $out;
-}
-
-function get_signal_dash_coverage_data() {
- $query = 'SELECT enabled_signal.`name`,
- coverage.`date`,
- coverage.`geo_type`,
- coverage.`count`
- FROM (SELECT `id`, `name`, `latest_coverage_update`
- FROM `dashboard_signal`
- WHERE `enabled`) AS enabled_signal
- LEFT JOIN `dashboard_signal_coverage` AS coverage
- ON enabled_signal.`id` = coverage.`signal_id`
- ORDER BY `id` ASC, `date` DESC';
-
- $epidata = array();
- $fields_string = array('name', 'date', 'geo_type');
- $fields_int = array('count');
- execute_query($query, $epidata, $fields_string, $fields_int, null /* fields_float */);
-
- $out = array();
- foreach ($epidata as $row) {
- $name = $row['name'];
- $geo_type = $row['geo_type'];
- $timedata = array();
- $timedata['date'] = $row['date'];
- $timedata['count'] =$row['count'];
-
- if (!isset($out[$name])) {
- $out[$name] = array();
- }
-
- if(!isset($out[$name][$geo_type])) {
- $out[$name][$geo_type] = array();
- }
-
- $out[$name][$geo_type][] = $timedata;
- }
-
- // return the data
- return count($out) === 0 ? null : $out;
-}
-
-// queries the `covidcast_meta_cache` table for metadata
-function get_covidcast_meta() {
- // complain if the cache is more than 75 minutes old
- $max_age = 75 * 60;
-
- // basic query info
- $query = 'SELECT UNIX_TIMESTAMP(NOW()) - `timestamp` AS `age`, `epidata` FROM `covidcast_meta_cache` LIMIT 1';
-
- // get the data from the database
- global $dbh;
- $epidata = null;
- $result = mysqli_query($dbh, $query);
- if($row = mysqli_fetch_array($result)) {
- // parse and use the cached response
- $epidata = json_decode($row['epidata'], true);
-
- if (intval($row['age']) > $max_age && strlen($row['epidata']) > 0) {
- error_log('covidcast_meta cache is stale: '.$row['age']);
- }
- }
-
- if ($epidata !== null) {
- // filter rows
- $time_types = extract_values($_REQUEST['time_types'], 'str');
- $signals = isset($_REQUEST['signals']) ? array_map(function($signal) {
- return explode(':', $signal, 2);
- }, extract_values($_REQUEST['signals'], 'str')) : null;
- $geo_types = extract_values($_REQUEST['geo_types'], 'str');
-
- if ($time_types !== null || $signals !== null || $geo_types !== null) {
- $epidata = array_values(array_filter($epidata, function($row) use(&$time_types, &$signals, &$geo_types) {
- if ($time_types !== null && !in_array($row['time_type'], $time_types)) {
- return false;
- }
- if ($geo_types !== null && !in_array($row['geo_type'], $geo_types)) {
- return false;
- }
- if ($signals === null || count($signals) === 0) {
- return true;
- }
- // filter by signal
- foreach($signals as $signal) {
- // match source and (signal or no signal or signal = *)
- if ($row['data_source'] === $signal[0] && (count($signal) === 1 || $row['signal'] === $signal[1] || $signal[1] === '*')) {
- return true;
- }
- }
- return false;
- }));
- }
- // filter fields
- if (isset($_REQUEST['fields'])) {
- $fields = extract_values($_REQUEST['fields'], 'str');
-
- $epidata = array_map(function($row) use(&$fields) {
- $filtered_row = [];
- foreach($fields as $field) {
- if (isset($row[$field])) {
- $filtered_row[$field] = $row[$field];
- }
- }
- return $filtered_row;
- }, $epidata);
- }
- }
-
- // return the data
- $has_values = $epidata !== null && count($epidata) > 0;
- return $has_values ? $epidata : null;
-}
-
-// queries the `covid_hosp_state_timeseries` table
-// $states (required): array of state abbreviations
-// $dates (required): array of date values/ranges
-// $issues (optional): array of date values/ranges
-// default: most recent issue
-function get_covid_hosp_state_timeseries($states, $dates, $issues) {
- $epidata = array();
- $table = '`covid_hosp_state_timeseries` c';
- $fields = implode(', ', array(
- 'c.`issue`',
- 'c.`state`',
- 'c.`date`',
- 'c.`critical_staffing_shortage_today_yes`',
- 'c.`critical_staffing_shortage_today_no`',
- 'c.`critical_staffing_shortage_today_not_reported`',
- 'c.`critical_staffing_shortage_anticipated_within_week_yes`',
- 'c.`critical_staffing_shortage_anticipated_within_week_no`',
- 'c.`critical_staffing_shortage_anticipated_within_week_not_reported`',
- 'c.`hospital_onset_covid`',
- 'c.`hospital_onset_covid_coverage`',
- 'c.`inpatient_beds`',
- 'c.`inpatient_beds_coverage`',
- 'c.`inpatient_beds_used`',
- 'c.`inpatient_beds_used_coverage`',
- 'c.`inpatient_beds_used_covid`',
- 'c.`inpatient_beds_used_covid_coverage`',
- 'c.`previous_day_admission_adult_covid_confirmed`',
- 'c.`previous_day_admission_adult_covid_confirmed_coverage`',
- 'c.`previous_day_admission_adult_covid_suspected`',
- 'c.`previous_day_admission_adult_covid_suspected_coverage`',
- 'c.`previous_day_admission_pediatric_covid_confirmed`',
- 'c.`previous_day_admission_pediatric_covid_confirmed_coverage`',
- 'c.`previous_day_admission_pediatric_covid_suspected`',
- 'c.`previous_day_admission_pediatric_covid_suspected_coverage`',
- 'c.`staffed_adult_icu_bed_occupancy`',
- 'c.`staffed_adult_icu_bed_occupancy_coverage`',
- 'c.`staffed_icu_adult_patients_confirmed_suspected_covid`',
- 'c.`staffed_icu_adult_patients_confirmed_suspected_covid_coverage`',
- 'c.`staffed_icu_adult_patients_confirmed_covid`',
- 'c.`staffed_icu_adult_patients_confirmed_covid_coverage`',
- 'c.`total_adult_patients_hosp_confirmed_suspected_covid`',
- 'c.`total_adult_patients_hosp_confirmed_suspected_covid_coverage`',
- 'c.`total_adult_patients_hosp_confirmed_covid`',
- 'c.`total_adult_patients_hosp_confirmed_covid_coverage`',
- 'c.`total_pediatric_patients_hosp_confirmed_suspected_covid`',
- 'c.`total_pediatric_patients_hosp_confirmed_suspected_covid_coverage`',
- 'c.`total_pediatric_patients_hosp_confirmed_covid`',
- 'c.`total_pediatric_patients_hosp_confirmed_covid_coverage`',
- 'c.`total_staffed_adult_icu_beds`',
- 'c.`total_staffed_adult_icu_beds_coverage`',
- 'c.`inpatient_beds_utilization`',
- 'c.`inpatient_beds_utilization_coverage`',
- 'c.`inpatient_beds_utilization_numerator`',
- 'c.`inpatient_beds_utilization_denominator`',
- 'c.`percent_of_inpatients_with_covid`',
- 'c.`percent_of_inpatients_with_covid_coverage`',
- 'c.`percent_of_inpatients_with_covid_numerator`',
- 'c.`percent_of_inpatients_with_covid_denominator`',
- 'c.`inpatient_bed_covid_utilization`',
- 'c.`inpatient_bed_covid_utilization_coverage`',
- 'c.`inpatient_bed_covid_utilization_numerator`',
- 'c.`inpatient_bed_covid_utilization_denominator`',
- 'c.`adult_icu_bed_covid_utilization`',
- 'c.`adult_icu_bed_covid_utilization_coverage`',
- 'c.`adult_icu_bed_covid_utilization_numerator`',
- 'c.`adult_icu_bed_covid_utilization_denominator`',
- 'c.`adult_icu_bed_utilization`',
- 'c.`adult_icu_bed_utilization_coverage`',
- 'c.`adult_icu_bed_utilization_numerator`',
- 'c.`adult_icu_bed_utilization_denominator`',
- ));
- // basic query info
- $order = "c.`date` ASC, c.`state` ASC, c.`issue` ASC";
- // build the date filter
- $condition_date = filter_integers('c.`date`', $dates);
- // build the state filter
- $condition_state = filter_strings('c.`state`', $states);
- if($issues !== null) {
- // build the issue filter
- $condition_issue = filter_integers('c.`issue`', $issues);
- // final query using specific issues
- $query = "WITH c as (SELECT {$fields}, ROW_NUMBER() OVER (PARTITION BY date, state, issue ORDER BY record_type) row FROM {$table} WHERE ({$condition_date}) AND ({$condition_state}) AND ({$condition_issue})) SELECT {$fields} FROM c where row = 1 ORDER BY {$order}";
- } else {
- // final query using most recent issues
- $subquery = "(SELECT max(`issue`) `max_issue`, `date`, `state` FROM {$table} WHERE ({$condition_date}) AND ({$condition_state}) GROUP BY `date`, `state`) x";
- $condition = "x.`max_issue` = c.`issue` AND x.`date` = c.`date` AND x.`state` = c.`state`";
- $query = "WITH c as (SELECT {$fields}, ROW_NUMBER() OVER (PARTITION BY date, state, issue ORDER BY record_type) row FROM {$table} JOIN {$subquery} ON {$condition}) select {$fields} FROM c WHERE row = 1 ORDER BY {$order}";
- }
- // get the data from the database
- $fields_string = array('state');
- $fields_int = array(
- 'issue',
- 'date',
- 'critical_staffing_shortage_today_yes',
- 'critical_staffing_shortage_today_no',
- 'critical_staffing_shortage_today_not_reported',
- 'critical_staffing_shortage_anticipated_within_week_yes',
- 'critical_staffing_shortage_anticipated_within_week_no',
- 'critical_staffing_shortage_anticipated_within_week_not_reported',
- 'hospital_onset_covid',
- 'hospital_onset_covid_coverage',
- 'inpatient_beds',
- 'inpatient_beds_coverage',
- 'inpatient_beds_used',
- 'inpatient_beds_used_coverage',
- 'inpatient_beds_used_covid',
- 'inpatient_beds_used_covid_coverage',
- 'previous_day_admission_adult_covid_confirmed',
- 'previous_day_admission_adult_covid_confirmed_coverage',
- 'previous_day_admission_adult_covid_suspected',
- 'previous_day_admission_adult_covid_suspected_coverage',
- 'previous_day_admission_pediatric_covid_confirmed',
- 'previous_day_admission_pediatric_covid_confirmed_coverage',
- 'previous_day_admission_pediatric_covid_suspected',
- 'previous_day_admission_pediatric_covid_suspected_coverage',
- 'staffed_adult_icu_bed_occupancy',
- 'staffed_adult_icu_bed_occupancy_coverage',
- 'staffed_icu_adult_patients_confirmed_suspected_covid',
- 'staffed_icu_adult_patients_confirmed_suspected_covid_coverage',
- 'staffed_icu_adult_patients_confirmed_covid',
- 'staffed_icu_adult_patients_confirmed_covid_coverage',
- 'total_adult_patients_hosp_confirmed_suspected_covid',
- 'total_adult_patients_hosp_confirmed_suspected_covid_coverage',
- 'total_adult_patients_hosp_confirmed_covid',
- 'total_adult_patients_hosp_confirmed_covid_coverage',
- 'total_pediatric_patients_hosp_confirmed_suspected_covid',
- 'total_pediatric_patients_hosp_confirmed_suspected_covid_coverage',
- 'total_pediatric_patients_hosp_confirmed_covid',
- 'total_pediatric_patients_hosp_confirmed_covid_coverage',
- 'total_staffed_adult_icu_beds',
- 'total_staffed_adult_icu_beds_coverage',
- 'inpatient_beds_utilization_coverage',
- 'inpatient_beds_utilization_numerator',
- 'inpatient_beds_utilization_denominator',
- 'percent_of_inpatients_with_covid_coverage',
- 'percent_of_inpatients_with_covid_numerator',
- 'percent_of_inpatients_with_covid_denominator',
- 'inpatient_bed_covid_utilization_coverage',
- 'inpatient_bed_covid_utilization_numerator',
- 'inpatient_bed_covid_utilization_denominator',
- 'adult_icu_bed_covid_utilization_coverage',
- 'adult_icu_bed_covid_utilization_numerator',
- 'adult_icu_bed_covid_utilization_denominator',
- 'adult_icu_bed_utilization_coverage',
- 'adult_icu_bed_utilization_numerator',
- 'adult_icu_bed_utilization_denominator',
- );
- $fields_float = array(
- 'inpatient_beds_utilization',
- 'percent_of_inpatients_with_covid',
- 'inpatient_bed_covid_utilization',
- 'adult_icu_bed_covid_utilization',
- 'adult_icu_bed_utilization',
- );
- execute_query($query, $epidata, $fields_string, $fields_int, $fields_float);
- // return the data
- return count($epidata) === 0 ? null : $epidata;
-}
-
-// queries the `covid_hosp_facility` table
-// $hospital_pks (required): array of facility identifiers (`hospital_pk`)
-// $collection_weeks (required): array of date values/ranges
-// $publication_dates (optional): array of date values/ranges
-// default: most recent issue
-function get_covid_hosp_facility($hospital_pks, $collection_weeks, $publication_dates) {
- $epidata = array();
- $table = '`covid_hosp_facility` c';
- $fields = implode(', ', array(
- 'c.`publication_date`',
- 'c.`hospital_pk`',
- 'c.`collection_week`',
- 'c.`state`',
- 'c.`ccn`',
- 'c.`hospital_name`',
- 'c.`address`',
- 'c.`city`',
- 'c.`zip`',
- 'c.`hospital_subtype`',
- 'c.`fips_code`',
- 'c.`is_metro_micro`',
- 'c.`total_beds_7_day_avg`',
- 'c.`all_adult_hospital_beds_7_day_avg`',
- 'c.`all_adult_hospital_inpatient_beds_7_day_avg`',
- 'c.`inpatient_beds_used_7_day_avg`',
- 'c.`all_adult_hospital_inpatient_bed_occupied_7_day_avg`',
- 'c.`total_adult_patients_hosp_confirmed_suspected_covid_7d_avg`',
- 'c.`total_adult_patients_hospitalized_confirmed_covid_7_day_avg`',
- 'c.`total_pediatric_patients_hosp_confirmed_suspected_covid_7d_avg`',
- 'c.`total_pediatric_patients_hospitalized_confirmed_covid_7_day_avg`',
- 'c.`inpatient_beds_7_day_avg`',
- 'c.`total_icu_beds_7_day_avg`',
- 'c.`total_staffed_adult_icu_beds_7_day_avg`',
- 'c.`icu_beds_used_7_day_avg`',
- 'c.`staffed_adult_icu_bed_occupancy_7_day_avg`',
- 'c.`staffed_icu_adult_patients_confirmed_suspected_covid_7d_avg`',
- 'c.`staffed_icu_adult_patients_confirmed_covid_7_day_avg`',
- 'c.`total_patients_hospitalized_confirmed_influenza_7_day_avg`',
- 'c.`icu_patients_confirmed_influenza_7_day_avg`',
- 'c.`total_patients_hosp_confirmed_influenza_and_covid_7d_avg`',
- 'c.`total_beds_7_day_sum`',
- 'c.`all_adult_hospital_beds_7_day_sum`',
- 'c.`all_adult_hospital_inpatient_beds_7_day_sum`',
- 'c.`inpatient_beds_used_7_day_sum`',
- 'c.`all_adult_hospital_inpatient_bed_occupied_7_day_sum`',
- 'c.`total_adult_patients_hosp_confirmed_suspected_covid_7d_sum`',
- 'c.`total_adult_patients_hospitalized_confirmed_covid_7_day_sum`',
- 'c.`total_pediatric_patients_hosp_confirmed_suspected_covid_7d_sum`',
- 'c.`total_pediatric_patients_hospitalized_confirmed_covid_7_day_sum`',
- 'c.`inpatient_beds_7_day_sum`',
- 'c.`total_icu_beds_7_day_sum`',
- 'c.`total_staffed_adult_icu_beds_7_day_sum`',
- 'c.`icu_beds_used_7_day_sum`',
- 'c.`staffed_adult_icu_bed_occupancy_7_day_sum`',
- 'c.`staffed_icu_adult_patients_confirmed_suspected_covid_7d_sum`',
- 'c.`staffed_icu_adult_patients_confirmed_covid_7_day_sum`',
- 'c.`total_patients_hospitalized_confirmed_influenza_7_day_sum`',
- 'c.`icu_patients_confirmed_influenza_7_day_sum`',
- 'c.`total_patients_hosp_confirmed_influenza_and_covid_7d_sum`',
- 'c.`total_beds_7_day_coverage`',
- 'c.`all_adult_hospital_beds_7_day_coverage`',
- 'c.`all_adult_hospital_inpatient_beds_7_day_coverage`',
- 'c.`inpatient_beds_used_7_day_coverage`',
- 'c.`all_adult_hospital_inpatient_bed_occupied_7_day_coverage`',
- 'c.`total_adult_patients_hosp_confirmed_suspected_covid_7d_cov`',
- 'c.`total_adult_patients_hospitalized_confirmed_covid_7_day_coverage`',
- 'c.`total_pediatric_patients_hosp_confirmed_suspected_covid_7d_cov`',
- 'c.`total_pediatric_patients_hosp_confirmed_covid_7d_cov`',
- 'c.`inpatient_beds_7_day_coverage`',
- 'c.`total_icu_beds_7_day_coverage`',
- 'c.`total_staffed_adult_icu_beds_7_day_coverage`',
- 'c.`icu_beds_used_7_day_coverage`',
- 'c.`staffed_adult_icu_bed_occupancy_7_day_coverage`',
- 'c.`staffed_icu_adult_patients_confirmed_suspected_covid_7d_cov`',
- 'c.`staffed_icu_adult_patients_confirmed_covid_7_day_coverage`',
- 'c.`total_patients_hospitalized_confirmed_influenza_7_day_coverage`',
- 'c.`icu_patients_confirmed_influenza_7_day_coverage`',
- 'c.`total_patients_hosp_confirmed_influenza_and_covid_7d_cov`',
- 'c.`previous_day_admission_adult_covid_confirmed_7_day_sum`',
- 'c.`previous_day_admission_adult_covid_confirmed_18_19_7_day_sum`',
- 'c.`previous_day_admission_adult_covid_confirmed_20_29_7_day_sum`',
- 'c.`previous_day_admission_adult_covid_confirmed_30_39_7_day_sum`',
- 'c.`previous_day_admission_adult_covid_confirmed_40_49_7_day_sum`',
- 'c.`previous_day_admission_adult_covid_confirmed_50_59_7_day_sum`',
- 'c.`previous_day_admission_adult_covid_confirmed_60_69_7_day_sum`',
- 'c.`previous_day_admission_adult_covid_confirmed_70_79_7_day_sum`',
- 'c.`previous_day_admission_adult_covid_confirmed_80plus_7_day_sum`',
- 'c.`previous_day_admission_adult_covid_confirmed_unknown_7_day_sum`',
- 'c.`previous_day_admission_pediatric_covid_confirmed_7_day_sum`',
- 'c.`previous_day_covid_ed_visits_7_day_sum`',
- 'c.`previous_day_admission_adult_covid_suspected_7_day_sum`',
- 'c.`previous_day_admission_adult_covid_suspected_18_19_7_day_sum`',
- 'c.`previous_day_admission_adult_covid_suspected_20_29_7_day_sum`',
- 'c.`previous_day_admission_adult_covid_suspected_30_39_7_day_sum`',
- 'c.`previous_day_admission_adult_covid_suspected_40_49_7_day_sum`',
- 'c.`previous_day_admission_adult_covid_suspected_50_59_7_day_sum`',
- 'c.`previous_day_admission_adult_covid_suspected_60_69_7_day_sum`',
- 'c.`previous_day_admission_adult_covid_suspected_70_79_7_day_sum`',
- 'c.`previous_day_admission_adult_covid_suspected_80plus_7_day_sum`',
- 'c.`previous_day_admission_adult_covid_suspected_unknown_7_day_sum`',
- 'c.`previous_day_admission_pediatric_covid_suspected_7_day_sum`',
- 'c.`previous_day_total_ed_visits_7_day_sum`',
- 'c.`previous_day_admission_influenza_confirmed_7_day_sum`',
- ));
- // basic query info
- $order = "c.`collection_week` ASC, c.`hospital_pk` ASC, c.`publication_date` ASC";
- // build the date filter
- $condition_collection_week = filter_integers('c.`collection_week`', $collection_weeks);
- // build the state filter
- $condition_hospital_pk = filter_strings('c.`hospital_pk`', $hospital_pks);
- if($publication_dates !== null) {
- // build the issue filter
- $condition_publication_date = filter_integers('c.`publication_date`', $publication_dates);
- // final query using specific issues
- $query = "SELECT {$fields} FROM {$table} WHERE ({$condition_collection_week}) AND ({$condition_hospital_pk}) AND ({$condition_publication_date}) ORDER BY {$order}";
- } else {
- // final query using most recent issues
- $subquery = "(SELECT max(`publication_date`) `max_publication_date`, `collection_week`, `hospital_pk` FROM {$table} WHERE ({$condition_collection_week}) AND ({$condition_hospital_pk}) GROUP BY `collection_week`, `hospital_pk`) x";
- $condition = "x.`max_publication_date` = c.`publication_date` AND x.`collection_week` = c.`collection_week` AND x.`hospital_pk` = c.`hospital_pk`";
- $query = "SELECT {$fields} FROM {$table} JOIN {$subquery} ON {$condition} ORDER BY {$order}";
- }
- // get the data from the database
- $fields_string = array(
- 'hospital_pk',
- 'state',
- 'ccn',
- 'hospital_name',
- 'address',
- 'city',
- 'zip',
- 'hospital_subtype',
- 'fips_code',
- );
- $fields_int = array(
- 'publication_date',
- 'collection_week',
- 'is_metro_micro',
- 'total_beds_7_day_sum',
- 'all_adult_hospital_beds_7_day_sum',
- 'all_adult_hospital_inpatient_beds_7_day_sum',
- 'inpatient_beds_used_7_day_sum',
- 'all_adult_hospital_inpatient_bed_occupied_7_day_sum',
- 'total_adult_patients_hosp_confirmed_suspected_covid_7d_sum',
- 'total_adult_patients_hospitalized_confirmed_covid_7_day_sum',
- 'total_pediatric_patients_hosp_confirmed_suspected_covid_7d_sum',
- 'total_pediatric_patients_hospitalized_confirmed_covid_7_day_sum',
- 'inpatient_beds_7_day_sum',
- 'total_icu_beds_7_day_sum',
- 'total_staffed_adult_icu_beds_7_day_sum',
- 'icu_beds_used_7_day_sum',
- 'staffed_adult_icu_bed_occupancy_7_day_sum',
- 'staffed_icu_adult_patients_confirmed_suspected_covid_7d_sum',
- 'staffed_icu_adult_patients_confirmed_covid_7_day_sum',
- 'total_patients_hospitalized_confirmed_influenza_7_day_sum',
- 'icu_patients_confirmed_influenza_7_day_sum',
- 'total_patients_hosp_confirmed_influenza_and_covid_7d_sum',
- 'total_beds_7_day_coverage',
- 'all_adult_hospital_beds_7_day_coverage',
- 'all_adult_hospital_inpatient_beds_7_day_coverage',
- 'inpatient_beds_used_7_day_coverage',
- 'all_adult_hospital_inpatient_bed_occupied_7_day_coverage',
- 'total_adult_patients_hosp_confirmed_suspected_covid_7d_cov',
- 'total_adult_patients_hospitalized_confirmed_covid_7_day_coverage',
- 'total_pediatric_patients_hosp_confirmed_suspected_covid_7d_cov',
- 'total_pediatric_patients_hosp_confirmed_covid_7d_cov',
- 'inpatient_beds_7_day_coverage',
- 'total_icu_beds_7_day_coverage',
- 'total_staffed_adult_icu_beds_7_day_coverage',
- 'icu_beds_used_7_day_coverage',
- 'staffed_adult_icu_bed_occupancy_7_day_coverage',
- 'staffed_icu_adult_patients_confirmed_suspected_covid_7d_cov',
- 'staffed_icu_adult_patients_confirmed_covid_7_day_coverage',
- 'total_patients_hospitalized_confirmed_influenza_7_day_coverage',
- 'icu_patients_confirmed_influenza_7_day_coverage',
- 'total_patients_hosp_confirmed_influenza_and_covid_7d_cov',
- 'previous_day_admission_adult_covid_confirmed_7_day_sum',
- 'previous_day_admission_adult_covid_confirmed_18_19_7_day_sum',
- 'previous_day_admission_adult_covid_confirmed_20_29_7_day_sum',
- 'previous_day_admission_adult_covid_confirmed_30_39_7_day_sum',
- 'previous_day_admission_adult_covid_confirmed_40_49_7_day_sum',
- 'previous_day_admission_adult_covid_confirmed_50_59_7_day_sum',
- 'previous_day_admission_adult_covid_confirmed_60_69_7_day_sum',
- 'previous_day_admission_adult_covid_confirmed_70_79_7_day_sum',
- 'previous_day_admission_adult_covid_confirmed_80plus_7_day_sum',
- 'previous_day_admission_adult_covid_confirmed_unknown_7_day_sum',
- 'previous_day_admission_pediatric_covid_confirmed_7_day_sum',
- 'previous_day_covid_ed_visits_7_day_sum',
- 'previous_day_admission_adult_covid_suspected_7_day_sum',
- 'previous_day_admission_adult_covid_suspected_18_19_7_day_sum',
- 'previous_day_admission_adult_covid_suspected_20_29_7_day_sum',
- 'previous_day_admission_adult_covid_suspected_30_39_7_day_sum',
- 'previous_day_admission_adult_covid_suspected_40_49_7_day_sum',
- 'previous_day_admission_adult_covid_suspected_50_59_7_day_sum',
- 'previous_day_admission_adult_covid_suspected_60_69_7_day_sum',
- 'previous_day_admission_adult_covid_suspected_70_79_7_day_sum',
- 'previous_day_admission_adult_covid_suspected_80plus_7_day_sum',
- 'previous_day_admission_adult_covid_suspected_unknown_7_day_sum',
- 'previous_day_admission_pediatric_covid_suspected_7_day_sum',
- 'previous_day_total_ed_visits_7_day_sum',
- 'previous_day_admission_influenza_confirmed_7_day_sum',
- );
- $fields_float = array(
- 'total_beds_7_day_avg',
- 'all_adult_hospital_beds_7_day_avg',
- 'all_adult_hospital_inpatient_beds_7_day_avg',
- 'inpatient_beds_used_7_day_avg',
- 'all_adult_hospital_inpatient_bed_occupied_7_day_avg',
- 'total_adult_patients_hosp_confirmed_suspected_covid_7d_avg',
- 'total_adult_patients_hospitalized_confirmed_covid_7_day_avg',
- 'total_pediatric_patients_hosp_confirmed_suspected_covid_7d_avg',
- 'total_pediatric_patients_hospitalized_confirmed_covid_7_day_avg',
- 'inpatient_beds_7_day_avg',
- 'total_icu_beds_7_day_avg',
- 'total_staffed_adult_icu_beds_7_day_avg',
- 'icu_beds_used_7_day_avg',
- 'staffed_adult_icu_bed_occupancy_7_day_avg',
- 'staffed_icu_adult_patients_confirmed_suspected_covid_7d_avg',
- 'staffed_icu_adult_patients_confirmed_covid_7_day_avg',
- 'total_patients_hospitalized_confirmed_influenza_7_day_avg',
- 'icu_patients_confirmed_influenza_7_day_avg',
- 'total_patients_hosp_confirmed_influenza_and_covid_7d_avg',
- );
- execute_query($query, $epidata, $fields_string, $fields_int, $fields_float);
- // return the data
- return count($epidata) === 0 ? null : $epidata;
-}
-
-// queries the `covid_hosp_facility` table for hospital discovery
-// $state (optional): 2-letter state abbreviation
-// $ccn (optional): cms certification number (ccn) of the given facility
-// $city (optional): name of
-// $zip (optional): 2-letter state abbreviation
-// $fips_code (optional): 2-letter state abbreviation
-// note: exactly one of the above parameters should be non-null. if more than
-// one is non-null, then only the first filter will be used.
-function get_covid_hosp_facility_lookup($state, $ccn, $city, $zip, $fips_code) {
- $epidata = array();
- $table = '`covid_hosp_facility` c';
- $fields = implode(', ', array(
- 'c.`hospital_pk`',
- 'MAX(c.`state`) `state`',
- 'MAX(c.`ccn`) `ccn`',
- 'MAX(c.`hospital_name`) `hospital_name`',
- 'MAX(c.`address`) `address`',
- 'MAX(c.`city`) `city`',
- 'MAX(c.`zip`) `zip`',
- 'MAX(c.`hospital_subtype`) `hospital_subtype`',
- 'MAX(c.`fips_code`) `fips_code`',
- 'MAX(c.`is_metro_micro`) `is_metro_micro`',
- ));
- // basic query info
- $group = 'c.`hospital_pk`';
- $order = "c.`hospital_pk` ASC";
- // build the filter
- // these are all fast because the table has indexes on each of these fields
- $condition = 'FALSE';
- if ($state !== null) {
- $condition = filter_strings('c.`state`', $state);
- } else if ($ccn !== null) {
- $condition = filter_strings('c.`ccn`', $ccn);
- } else if ($city !== null) {
- $condition = filter_strings('c.`city`', $city);
- } else if ($zip !== null) {
- $condition = filter_strings('c.`zip`', $zip);
- } else if ($fips_code !== null) {
- $condition = filter_strings('c.`fips_code`', $fips_code);
- }
- // final query using specific issues
- $query = "SELECT {$fields} FROM {$table} WHERE ({$condition}) GROUP BY {$group} ORDER BY {$order}";
- // get the data from the database
- $fields_string = array(
- 'hospital_pk',
- 'state',
- 'ccn',
- 'hospital_name',
- 'address',
- 'city',
- 'zip',
- 'hospital_subtype',
- 'fips_code',
- );
- $fields_int = array('is_metro_micro');
- $fields_float = null;
- execute_query($query, $epidata, $fields_string, $fields_int, $fields_float);
- // return the data
- return count($epidata) === 0 ? null : $epidata;
-}
-
-// queries a bunch of epidata tables
-function get_meta() {
- // query and return metadata
- return array(array(
- '_api' => array(
- 'minute' => meta_api(60),
- 'hour' => meta_api(60 * 60),
- 'day' => meta_api(60 * 60 * 24),
- 'week' => meta_api(60 * 60 * 24 * 7),
- 'month' => meta_api(60 * 60 * 24 * 30),
- ),
- 'fluview' => meta_fluview(),
- 'twitter' => meta_twitter(),
- 'wiki' => meta_wiki(),
- 'delphi' => meta_delphi(),
- ));
-}
-function meta_api($seconds) {
- $epidata = array();
- $seconds = intval($seconds);
- $query = "SELECT count(1) `num_hits`, count(distinct `ip`) `unique_ips`, sum(`num_rows`) `rows_returned` FROM `api_analytics` WHERE `datetime` >= date_sub(now(), interval {$seconds} second)";
- $fields_int = array('num_hits', 'unique_ips', 'rows_returned');
- execute_query($query, $epidata, null, $fields_int, null);
- return count($epidata) === 0 ? null : $epidata;
-}
-function meta_fluview() {
- $epidata = array();
- $query = 'SELECT max(`release_date`) `latest_update`, max(`issue`) `latest_issue`, count(1) `table_rows` FROM `fluview`';
- $fields_string = array('latest_update');
- $fields_int = array('latest_issue', 'table_rows');
- execute_query($query, $epidata, $fields_string, $fields_int, null);
- return count($epidata) === 0 ? null : $epidata;
-}
-function meta_twitter() {
- $epidata = array();
- $query = 'SELECT x.`date` `latest_update`, x.`table_rows`, count(distinct t.`state`) `num_states` FROM (SELECT max(`date`) `date`, count(1) `table_rows` FROM `twitter`) x JOIN `twitter` t ON t.`date` = x.`date`';
- $fields_string = array('latest_update');
- $fields_int = array('num_states', 'table_rows');
- execute_query($query, $epidata, $fields_string, $fields_int, null);
- return count($epidata) === 0 ? null : $epidata;
-}
-function meta_wiki() {
- $epidata = array();
- //$query = 'SELECT date_sub(max(`datetime`), interval 5 hour) `latest_update`, count(1) `table_rows` FROM `wiki_meta`'; // GMT to EST
- $query = 'SELECT max(`datetime`) `latest_update`, count(1) `table_rows` FROM `wiki_meta`';
- $fields_string = array('latest_update');
- $fields_int = array('table_rows');
- execute_query($query, $epidata, $fields_string, $fields_int, null);
- return count($epidata) === 0 ? null : $epidata;
-}
-function get_meta_norostat() {
- // put behind appropriate auth check
- $epidata_releases = array();
- $query = 'SELECT DISTINCT `release_date` FROM `norostat_raw_datatable_version_list`';
- execute_query($query, $epidata_releases, array('release_date'), null, null);
- $epidata_locations = array();
- $query = 'SELECT DISTINCT `location` FROM `norostat_raw_datatable_location_pool`';
- execute_query($query, $epidata_locations, array('location'), null, null);
- $epidata = array(
- "releases" => $epidata_releases,
- "locations" => $epidata_locations
- );
- return $epidata;
-}
-function get_meta_afhsb() {
- // put behind appropriate auth check
- $table1 = 'afhsb_00to13_state';
- $table2 = 'afhsb_13to17_state';
- $epidata = array();
- $string_keys = array('state', 'country');
- $int_keys = array('flu_severity');
- foreach($string_keys as $key) {
- $epidata_key = array();
- $query = "SELECT DISTINCT `{$key}` FROM (select `{$key}` from `{$table1}` union select `{$key}` from `{$table2}`) t";
- execute_query($query, $epidata_key, array($key), null, null);
- $epidata[$key] = $epidata_key;
- }
- foreach($int_keys as $key) {
- $epidata_key = array();
- $query = "SELECT DISTINCT `{$key}` FROM (select `{$key}` from `{$table1}` union select `{$key}` from `{$table2}`) t";
-
- execute_query($query, $epidata_key, null, array($key), null);
- $epidata[$key] = $epidata_key;
- }
- return $epidata;
-}
-function meta_delphi() {
- $epidata = array();
- $query = 'SELECT `system`, min(`epiweek`) `first_week`, max(`epiweek`) `last_week`, count(1) `num_weeks` FROM `forecasts` GROUP BY `system` ORDER BY `system` ASC';
- $fields_string = array('system');
- $fields_int = array('first_week', 'last_week', 'num_weeks');
- execute_query($query, $epidata, $fields_string, $fields_int, null);
- return count($epidata) === 0 ? null : $epidata;
-}
-
-function get_covidcast_nowcast($source, $signals, $sensor_names, $time_type, $geo_type, $time_values, $geo_values, $as_of, $issues, $lag) {
- // required for `mysqli_real_escape_string`
- global $dbh;
- $source = mysqli_real_escape_string($dbh, $source);
- $time_type = mysqli_real_escape_string($dbh, $time_type);
- $geo_type = mysqli_real_escape_string($dbh, $geo_type);
- // basic query info
- $table = '`covidcast_nowcast` t';
- $fields = "t.`signal`, t.`time_value`, t.`geo_value`, t.`value`, t.`issue`, t.`lag`";
- $order = "t.`signal` ASC, t.`time_value` ASC, t.`geo_value` ASC, t.`issue` ASC";
- // data type of each field
- $fields_string = array('geo_value', 'signal');
- $fields_int = array('time_value', 'issue', 'lag');
- $fields_float = array('value');
- // build the source, signal, time, and location (type and id) filters
- $condition_source = "t.`source` = '{$source}'";
- $condition_signal = filter_strings('t.`signal`', $signals);
- $condition_sensor_name = filter_strings('t.`sensor_name`', $sensor_names);
- $condition_time_type = "t.`time_type` = '{$time_type}'";
- $condition_geo_type = "t.`geo_type` = '{$geo_type}'";
- $condition_time_value = filter_integers('t.`time_value`', $time_values);
-
- if ($geo_values === '*') {
- // the wildcard query should return data for all locations in `geo_type`
- $condition_geo_value = 'TRUE';
- } else if (is_array($geo_values)) {
- // return data for multiple location
- $condition_geo_value = filter_strings('t.`geo_value`', $geo_values);
- } else {
- // return data for a particular location
- $geo_escaped_value = mysqli_real_escape_string($dbh, $geo_values);
- $condition_geo_value = "t.`geo_value` = '{$geo_escaped_value}'";
- }
- $conditions = "({$condition_source}) AND ({$condition_signal}) AND ({$condition_sensor_name}) AND ({$condition_time_type}) AND ({$condition_geo_type}) AND ({$condition_time_value}) AND ({$condition_geo_value})";
-
- $subquery = "";
- if ($issues !== null) {
- //build the issue filter
- $condition_issue = filter_integers('t.`issue`', $issues);
- $query = "SELECT {$fields} FROM {$table} {$subquery} WHERE {$conditions} AND ({$condition_issue}) ORDER BY {$order}";
- } else if ($lag !== null) {
- //build the lag filter
- $condition_lag = "(t.`lag` = {$lag})";
- $query = "SELECT {$fields} FROM {$table} {$subquery} WHERE {$conditions} AND ({$condition_lag}) ORDER BY {$order}";
- } else if ($as_of !== null) {
- // fetch most recent issues with as of
- $sub_condition_asof = "(`issue` <= {$as_of})";
- $sub_fields = "max(`issue`) `max_issue`, `time_type`, `time_value`, `source`, `signal`, `geo_type`, `geo_value`";
- $sub_group = "`time_type`, `time_value`, `source`, `signal`, `geo_type`, `geo_value`";
- $sub_condition = "x.`max_issue` = t.`issue` AND x.`time_type` = t.`time_type` AND x.`time_value` = t.`time_value` AND x.`source` = t.`source` AND x.`signal` = t.`signal` AND x.`geo_type` = t.`geo_type` AND x.`geo_value` = t.`geo_value`";
- $subquery = "JOIN (SELECT {$sub_fields} FROM {$table} WHERE ({$conditions} AND {$sub_condition_asof}) GROUP BY {$sub_group}) x ON {$sub_condition}";
- $condition_version = 'TRUE';
- $query = "SELECT {$fields} FROM {$table} {$subquery} WHERE {$conditions} AND ({$condition_version}) ORDER BY {$order}";
- } else {
- // fetch most recent issue fast
- $query = "WITH t as (SELECT {$fields}, ROW_NUMBER() OVER (PARTITION BY t.`time_type`, t.`time_value`, t.`source`, t.`signal`, t.`geo_type`, t.`geo_value` ORDER BY t.`issue` DESC) row FROM {$table} {$subquery} WHERE {$conditions}) SELECT {$fields} FROM t where row = 1 ORDER BY {$order}";
- }
- // get the data from the database
- $epidata = array();
- execute_query($query, $epidata, $fields_string, $fields_int, $fields_float);
- // return the data
- return count($epidata) === 0 ? null : $epidata;
-}
-
-
-// all responses will have a result field
-$data = array('result' => -1);
-// connect to the database
-if(database_connect()) {
-
- // select the data source
- // endpoint parameter with a fallback to source parameter for compatibility reasons
- $endpoint = isset($_REQUEST['endpoint']) ? strtolower($_REQUEST['endpoint']) : (isset($_REQUEST['source']) ? strtolower($_REQUEST['source']) : null);
-
- if($endpoint === 'fluview') {
- if(require_all($data, array('epiweeks', 'regions'))) {
- // parse the request
- $epiweeks = extract_values($_REQUEST['epiweeks'], 'int');
- $regions = extract_values($_REQUEST['regions'], 'str');
- $issues = isset($_REQUEST['issues']) ? extract_values($_REQUEST['issues'], 'int') : null;
- $lag = isset($_REQUEST['lag']) ? intval($_REQUEST['lag']) : null;
- $authorized = isset($_REQUEST['auth']) && $_REQUEST['auth'] === $AUTH['fluview'];
- // get the data
- $epidata = get_fluview($epiweeks, $regions, $issues, $lag, $authorized);
- store_result($data, $epidata);
- }
- } else if($endpoint === 'fluview_meta') {
- // get the data
- $epidata = meta_fluview();
- store_result($data, $epidata);
- } else if ($endpoint === 'fluview_clinical') {
- if(require_all($data, array('epiweeks', 'regions'))) {
- // parse the request
- $epiweeks = extract_values($_REQUEST['epiweeks'], 'int');
- $regions = extract_values($_REQUEST['regions'], 'str');
- $issues = isset($_REQUEST['issues']) ? extract_values($_REQUEST['issues'], 'int') : null;
- $lag = isset($_REQUEST['lag']) ? intval($_REQUEST['lag']) : null;
- // get the data
- $epidata = get_fluview_clinical($epiweeks, $regions, $issues, $lag);
- store_result($data, $epidata);
- }
- } else if($endpoint === 'flusurv') {
- if(require_all($data, array('epiweeks', 'locations'))) {
- // parse the request
- $epiweeks = extract_values($_REQUEST['epiweeks'], 'int');
- $locations = extract_values($_REQUEST['locations'], 'str');
- $issues = isset($_REQUEST['issues']) ? extract_values($_REQUEST['issues'], 'int') : null;
- $lag = isset($_REQUEST['lag']) ? intval($_REQUEST['lag']) : null;
- // get the data
- $epidata = get_flusurv($epiweeks, $locations, $issues, $lag);
- store_result($data, $epidata);
- }
- } else if ($endpoint === 'paho_dengue') {
- if(require_all($data, array('epiweeks', 'regions'))) {
- // parse the request
- $epiweeks = extract_values($_REQUEST['epiweeks'], 'int');
- $regions = extract_values($_REQUEST['regions'], 'str');
- $issues = isset($_REQUEST['issues']) ? extract_values($_REQUEST['issues'], 'int') : null;
- $lag = isset($_REQUEST['lag']) ? intval($_REQUEST['lag']) : null;
- // get the data
- $epidata = get_paho_dengue($epiweeks, $regions, $issues, $lag);
- store_result($data, $epidata);
- }
- } else if ($endpoint === 'ecdc_ili') {
- if(require_all($data, array('epiweeks', 'regions'))) {
- // parse the request
- $epiweeks = extract_values($_REQUEST['epiweeks'], 'int');
- $regions = extract_values($_REQUEST['regions'], 'str');
- $issues = isset($_REQUEST['issues']) ? extract_values($_REQUEST['issues'], 'int') : null;
- $lag = isset($_REQUEST['lag']) ? intval($_REQUEST['lag']) : null;
- // get the data
- $epidata = get_ecdc_ili($epiweeks, $regions, $issues, $lag);
- store_result($data, $epidata);
- }
- } else if ($endpoint === 'kcdc_ili') {
- if(require_all($data, array('epiweeks', 'regions'))) {
- // parse the request
- $epiweeks = extract_values($_REQUEST['epiweeks'], 'int');
- $regions = extract_values($_REQUEST['regions'], 'str');
- $issues = isset($_REQUEST['issues']) ? extract_values($_REQUEST['issues'], 'int') : null;
- $lag = isset($_REQUEST['lag']) ? intval($_REQUEST['lag']) : null;
- // get the data
- $epidata = get_kcdc_ili($epiweeks, $regions, $issues, $lag);
- store_result($data, $epidata);
- }
- } else if($endpoint === 'ilinet' || $endpoint === 'stateili') {
- // these two sources are now combined into fluview
- $data['message'] = 'use fluview instead';
- } else if($endpoint === 'gft') {
- if(require_all($data, array('epiweeks', 'locations'))) {
- // parse the request
- $epiweeks = extract_values($_REQUEST['epiweeks'], 'int');
- $locations = extract_values($_REQUEST['locations'], 'str');
- // get the data
- $epidata = get_gft($epiweeks, $locations);
- store_result($data, $epidata);
- }
- } else if($endpoint === 'ght') {
- if(require_all($data, array('auth', 'epiweeks', 'locations', 'query'))) {
- if($_REQUEST['auth'] === $AUTH['ght']) {
- // parse the request
- $epiweeks = extract_values($_REQUEST['epiweeks'], 'int');
- $locations = extract_values($_REQUEST['locations'], 'str');
- $query = $_REQUEST['query'];
- // get the data
- $epidata = get_ght($epiweeks, $locations, $query);
- store_result($data, $epidata);
- } else {
- $data['message'] = 'unauthenticated';
- }
- }
- } else if($endpoint === 'twitter') {
- if(require_all($data, array('auth', 'locations'))) {
- if($_REQUEST['auth'] === $AUTH['twitter']) {
- // parse the request
- $locations = extract_values($_REQUEST['locations'], 'str');
- if(require_any($data, array('dates', 'epiweeks'))) {
- if(isset($_REQUEST['dates'])) {
- $resolution = 'daily';
- $dates = extract_values($_REQUEST['dates'], 'int');
- } else {
- $resolution = 'weekly';
- $dates = extract_values($_REQUEST['epiweeks'], 'int');
- }
- // get the data
- $epidata = get_twitter($locations, $dates, $resolution);
- store_result($data, $epidata);
- }
- } else {
- $data['message'] = 'unauthenticated';
- }
- }
- } else if($endpoint === 'wiki') {
- if(require_all($data, array('articles', 'language'))) {
- // parse the request
- $articles = extract_values($_REQUEST['articles'], 'str');
- $language = $_REQUEST['language'];
- if(require_any($data, array('dates', 'epiweeks'))) {
- if(isset($_REQUEST['dates'])) {
- $resolution = 'daily';
- $dates = extract_values($_REQUEST['dates'], 'int');
- } else {
- $resolution = 'weekly';
- $dates = extract_values($_REQUEST['epiweeks'], 'int');
- }
- $hours = isset($_REQUEST['hours']) ? extract_values($_REQUEST['hours'], 'int') : null;
- // get the data
- $epidata = get_wiki($articles, $language, $dates, $resolution, $hours);
- store_result($data, $epidata);
- }
- }
- } else if($endpoint === 'quidel') {
- if(require_all($data, array('auth', 'locations', 'epiweeks'))) {
- if($_REQUEST['auth'] === $AUTH['quidel']) {
- // parse the request
- $locations = extract_values($_REQUEST['locations'], 'str');
- $epiweeks = extract_values($_REQUEST['epiweeks'], 'int');
- // get the data
- $epidata = get_quidel($locations, $epiweeks);
- store_result($data, $epidata);
- } else {
- $data['message'] = 'unauthenticated';
- }
- }
- } else if($endpoint === 'norostat') {
- if(require_all($data, array('auth', 'location', 'epiweeks'))) {
- if($_REQUEST['auth'] === $AUTH['norostat']) {
- // parse the request
- $location = $_REQUEST['location'];
- $epiweeks = extract_values($_REQUEST['epiweeks'], 'int');
- // get the data
- $epidata = get_norostat($location, $epiweeks);
- store_result($data, $epidata);
- } else {
- $data['message'] = 'unauthenticated';
- }
- }
- } else if($endpoint === 'afhsb') {
- if(require_all($data, array('auth', 'locations', 'epiweeks', 'flu_types'))) {
- if($_REQUEST['auth'] === $AUTH['afhsb']) {
- // parse the request
- $locations = extract_values($_REQUEST['locations'], 'str');
- $epiweeks = extract_values($_REQUEST['epiweeks'], 'int');
- $flu_types = extract_values($_REQUEST['flu_types'], 'str');
- // get the data
- $epidata = get_afhsb($locations, $epiweeks, $flu_types);
- store_result($data, $epidata);
- } else {
- $data['message'] = 'unauthenticated';
- }
- }
- } else if($endpoint === 'nidss_flu') {
- if(require_all($data, array('epiweeks', 'regions'))) {
- // parse the request
- $epiweeks = extract_values($_REQUEST['epiweeks'], 'int');
- $regions = extract_values($_REQUEST['regions'], 'str');
- $issues = isset($_REQUEST['issues']) ? extract_values($_REQUEST['issues'], 'int') : null;
- $lag = isset($_REQUEST['lag']) ? intval($_REQUEST['lag']) : null;
- // get the data
- $epidata = get_nidss_flu($epiweeks, $regions, $issues, $lag);
- store_result($data, $epidata);
- }
- } else if($endpoint === 'nidss_dengue') {
- if(require_all($data, array('epiweeks', 'locations'))) {
- // parse the request
- $epiweeks = extract_values($_REQUEST['epiweeks'], 'int');
- $locations = extract_values($_REQUEST['locations'], 'str');
- // get the data
- $epidata = get_nidss_dengue($epiweeks, $locations);
- store_result($data, $epidata);
- }
- } else if($endpoint === 'delphi') {
- if(require_all($data, array('system', 'epiweek'))) {
- // parse the request
- $system = $_REQUEST['system'];
- $epiweek = intval($_REQUEST['epiweek']);
- // get the data
- $epidata = get_forecast($system, $epiweek);
- store_result($data, $epidata);
- }
- } else if($endpoint === 'signals') {
- // this sources is now replaced by sensors
- $data['message'] = 'use sensors instead';
- } else if($endpoint === 'cdc') {
- if(require_all($data, array('auth', 'epiweeks', 'locations'))) {
- if($_REQUEST['auth'] === $AUTH['cdc']) {
- // parse the request
- $epiweeks = extract_values($_REQUEST['epiweeks'], 'int');
- $locations = extract_values($_REQUEST['locations'], 'str');
- // get the data
- $epidata = get_cdc($epiweeks, $locations);
- store_result($data, $epidata);
- } else {
- $data['message'] = 'unauthenticated';
- }
- }
- } else if($endpoint === 'sensors') {
- if(require_all($data, array('names', 'locations', 'epiweeks'))) {
- if(!array_key_exists('auth', $_REQUEST)) {
- $auth_tokens_presented = array();
- } else {
- $auth_tokens_presented = extract_values($_REQUEST['auth'], 'str');
- }
- $names = extract_values($_REQUEST['names'], 'str');
- $n_names = count($names);
- $n_auth_tokens_presented = count($auth_tokens_presented);
- $max_valid_granular_tokens_per_name = max(array_map('count', $GRANULAR_SENSOR_AUTH_TOKENS));
- // The number of valid granular tokens is related to the number of auth token checks that a single query could perform. Use the max number of valid granular auth tokens per name in the check below as a way to prevent leakage of sensor names (but revealing the number of sensor names) via this interface. Treat all sensors as non-open for convenience of calculation.
- if($n_names === 0) {
- // Check whether no names were provided to prevent edge-case issues in error message below, and in case surrounding behavior changes in the future:
- $data['message'] = 'no sensor names provided';
- } else if($n_auth_tokens_presented > 1) {
- $data['message'] = 'currently, only a single auth token is allowed to be presented at a time; please issue a separate query for each sensor name using only the corresponding token';
- } else if(
- // Check whether max number of presented-vs.-acceptable token comparisons that would be performed is over the set limits, avoiding calculation of numbers > PHP_INT_MAX/100:
- // Global auth token comparison limit check:
- $n_auth_tokens_presented > $MAX_GLOBAL_AUTH_CHECKS_PER_SENSOR_QUERY ||
- // Granular auth token comparison limit check:
- $n_names > (int)((PHP_INT_MAX/100-1)/max(1,$max_valid_granular_tokens_per_name)) ||
- $n_auth_tokens_presented > (int)(PHP_INT_MAX/100/max(1,$n_names*$max_valid_granular_tokens_per_name)) ||
- $n_auth_tokens_presented * $n_names * $max_valid_granular_tokens_per_name > $MAX_GRANULAR_AUTH_CHECKS_PER_SENSOR_QUERY
- ) {
- $data['message'] = 'too many sensors requested and/or auth tokens presented; please divide sensors into batches and/or use only the tokens needed for the sensors requested';
- } else if(count($auth_tokens_presented) > $MAX_AUTH_KEYS_PROVIDED_PER_SENSOR_QUERY) {
- // this check should be redundant with >1 check as well as global check above
- $data['message'] = 'too many auth tokens presented';
- } else {
- $unauthenticated_or_nonexistent_sensors = array();
- foreach($names as $name) {
- $sensor_is_open = in_array($name, $OPEN_SENSORS);
- // test whether they provided the "global" auth token that works for all sensors:
- $sensor_authenticated_globally = in_array($AUTH['sensors'], $auth_tokens_presented);
- // test whether they provided a "granular" auth token for one of the
- // sensor_subsets containing this sensor (if any):
- $sensor_authenticated_granularly = false;
- if(array_key_exists($name, $GRANULAR_SENSOR_AUTH_TOKENS)) {
- $acceptable_granular_tokens_for_sensor = $GRANULAR_SENSOR_AUTH_TOKENS[$name];
- // check for nonempty intersection between provided and acceptable
- // granular auth tokens:
- foreach($acceptable_granular_tokens_for_sensor as $acceptable_granular_token) {
- if(in_array($acceptable_granular_token, $auth_tokens_presented)) {
- $sensor_authenticated_granularly = true;
- break;
- }
- }
- } // (else: there are no granular tokens for this sensor; can't authenticate granularly)
- if(! $sensor_is_open &&
- ! $sensor_authenticated_globally &&
- ! $sensor_authenticated_granularly) {
- // authentication failed for this sensor; append to list:
- array_push($unauthenticated_or_nonexistent_sensors, $name);
- }
- }
- if (!empty($unauthenticated_or_nonexistent_sensors)) {
- $data['message'] = 'unauthenticated/nonexistent sensor(s): ' . implode(',', $unauthenticated_or_nonexistent_sensors);
- // // Alternative message that may enable shorter tokens:
- // $data['message'] = 'some/all sensors requested were unauthenticated/nonexistent';
- } else {
- // parse the request
- $locations = extract_values($_REQUEST['locations'], 'str');
- $epiweeks = extract_values($_REQUEST['epiweeks'], 'int');
- // get the data
- $epidata = get_sensors($names, $locations, $epiweeks);
- store_result($data, $epidata);
- }
- }
- }
- } else if($endpoint === 'dengue_sensors') {
- if(require_all($data, array('auth', 'names', 'locations', 'epiweeks'))) {
- if($_REQUEST['auth'] === $AUTH['sensors']) {
- // parse the request
- $names = extract_values($_REQUEST['names'], 'str');
- $locations = extract_values($_REQUEST['locations'], 'str');
- $epiweeks = extract_values($_REQUEST['epiweeks'], 'int');
- // get the data
- $epidata = get_dengue_sensors($names, $locations, $epiweeks);
- store_result($data, $epidata);
- } else {
- $data['message'] = 'unauthenticated';
- }
- }
- } else if($endpoint === 'nowcast') {
- if(require_all($data, array('locations', 'epiweeks'))) {
- // parse the request
- $locations = extract_values($_REQUEST['locations'], 'str');
- $epiweeks = extract_values($_REQUEST['epiweeks'], 'int');
- // get the data
- $epidata = get_nowcast($locations, $epiweeks);
- store_result($data, $epidata);
- }
- } else if($endpoint === 'dengue_nowcast') {
- if(require_all($data, array('locations', 'epiweeks'))) {
- // parse the request
- $locations = extract_values($_REQUEST['locations'], 'str');
- $epiweeks = extract_values($_REQUEST['epiweeks'], 'int');
- // get the data
- $epidata = get_dengue_nowcast($locations, $epiweeks);
- store_result($data, $epidata);
- }
- } else if($endpoint === 'meta') {
- // get the data
- $epidata = get_meta();
- store_result($data, $epidata);
- } else if($endpoint === 'meta_norostat') {
- if(require_all($data, array('auth'))) {
- if($_REQUEST['auth'] === $AUTH['norostat']) {
- $epidata = get_meta_norostat();
- store_result($data, $epidata);
- } else {
- $data['message'] = 'unauthenticated';
- }
- }
- } else if($endpoint === 'meta_afhsb') {
- if(require_all($data, array('auth'))) {
- if($_REQUEST['auth'] === $AUTH['afhsb']) {
- $epidata = get_meta_afhsb();
- store_result($data, $epidata);
- } else {
- $data['message'] = 'unauthenticated';
- }
- }
- } else if($endpoint === 'covidcast') {
- if(require_all($data, array('data_source', 'time_type', 'geo_type', 'time_values'))
- && require_any($data, array('signal', 'signals'))
- && require_any($data, array('geo_value', 'geo_values'))) {
- // parse the request
- $time_values = extract_dates($_REQUEST['time_values']);
- $as_of = isset($_REQUEST['as_of']) ? parse_date($_REQUEST['as_of']) : null;
- $issues = isset($_REQUEST['issues']) ? extract_dates($_REQUEST['issues']) : null;
- $lag = isset($_REQUEST['lag']) ? intval($_REQUEST['lag']) : null;
- $signals = extract_values(isset($_REQUEST['signals']) ? $_REQUEST['signals'] : $_REQUEST['signal'], 'string');
- $geo_values = isset($_REQUEST['geo_value']) ? $_REQUEST['geo_value'] : extract_values($_REQUEST['geo_values'], 'string');
- // get the data
- $epidata = get_covidcast(
- $_REQUEST['data_source'],
- $signals,
- $_REQUEST['time_type'],
- $_REQUEST['geo_type'],
- $time_values,
- $geo_values,
- $as_of,
- $issues,
- $lag);
- if(isset($_REQUEST['format']) && $_REQUEST['format']=="tree") {
- //organize results by signal
- $epi_tree = array();
- $key = -1;
- foreach ($epidata as $row) {
- if ($key != $row['signal']) {
- $key = $row['signal'];
- $epi_tree[$key] = array();
- }
- unset($row['signal']);
- array_push($epi_tree[$key],$row);
- }
- $epidata = array($epi_tree);
- }
- store_result($data, $epidata);
- }
- } else if($endpoint === 'covidcast_meta') {
- // get the metadata
- $epidata = get_covidcast_meta();
- store_result($data, $epidata);
- } else if($endpoint === 'signal_dashboard_status') {
- $signal_dash_data = get_signal_dash_status_data();
- store_result($data, $signal_dash_data);
- } else if($endpoint === 'signal_dashboard_coverage') {
- $signal_dash_data = get_signal_dash_coverage_data();
- store_result($data, $signal_dash_data);
- } else if($endpoint === 'covid_hosp' || $source === 'covid_hosp_state_timeseries') {
- if(require_all($data, array('states', 'dates'))) {
- // parse the request
- $states = extract_values($_REQUEST['states'], 'str');
- $dates = extract_values($_REQUEST['dates'], 'int');
- $issues = isset($_REQUEST['issues']) ? extract_values($_REQUEST['issues'], 'int') : null;
- // get the data
- $epidata = get_covid_hosp_state_timeseries($states, $dates, $issues);
- store_result($data, $epidata);
- }
- } else if($endpoint === 'covid_hosp_facility') {
- if(require_all($data, array('hospital_pks', 'collection_weeks'))) {
- // parse the request
- $hospital_pks = extract_values($_REQUEST['hospital_pks'], 'str');
- $collection_weeks = extract_values($_REQUEST['collection_weeks'], 'int');
- $publication_dates = isset($_REQUEST['publication_dates']) ? extract_values($_REQUEST['publication_dates'], 'int') : null;
- // get the data
- $epidata = get_covid_hosp_facility($hospital_pks, $collection_weeks, $publication_dates);
- store_result($data, $epidata);
- }
- } else if($endpoint === 'covid_hosp_facility_lookup') {
- if(require_any($data, array('state', 'ccn', 'city', 'zip', 'fips_code'))) {
- $state = isset($_REQUEST['state']) ? extract_values($_REQUEST['state'], 'str') : null;
- $ccn = isset($_REQUEST['ccn']) ? extract_values($_REQUEST['ccn'], 'str') : null;
- $city = isset($_REQUEST['city']) ? extract_values($_REQUEST['city'], 'str') : null;
- $zip = isset($_REQUEST['zip']) ? extract_values($_REQUEST['zip'], 'str') : null;
- $fips_code = isset($_REQUEST['fips_code']) ? extract_values($_REQUEST['fips_code'], 'str') : null;
- // get the data
- $epidata = get_covid_hosp_facility_lookup($state, $ccn, $city, $zip, $fips_code);
- store_result($data, $epidata);
- }
- } else if($endpoint === 'covidcast_nowcast') {
- if(require_all($data, array('data_source', 'time_type', 'geo_type', 'time_values', 'signals', 'sensor_names'))
- && require_any($data, array('geo_value', 'geo_values'))) {
- // parse the request
- $time_values = extract_dates($_REQUEST['time_values']);
- $as_of = isset($_REQUEST['as_of']) ? parse_date($_REQUEST['as_of']) : null;
- $issues = isset($_REQUEST['issues']) ? extract_dates($_REQUEST['issues']) : null;
- $lag = isset($_REQUEST['lag']) ? intval($_REQUEST['lag']) : null;
- $signals = extract_values(isset($_REQUEST['signals']) ? $_REQUEST['signals'] : $_REQUEST['signal'], 'string');
- $sensor_names = extract_values(isset($_REQUEST['sensor_names']) ? $_REQUEST['sensor_names'] : $_REQUEST['sensor_names'], 'sensor_names');
- $geo_values = isset($_REQUEST['geo_value']) ? $_REQUEST['geo_value'] : extract_values($_REQUEST['geo_values'], 'string');
- // get the data
- $epidata = get_covidcast_nowcast(
- $_REQUEST['data_source'],
- $signals,
- $sensor_names,
- $_REQUEST['time_type'],
- $_REQUEST['geo_type'],
- $time_values,
- $geo_values,
- $as_of,
- $issues,
- $lag);
- store_result($data, $epidata);
- }
- } else {
- $data['message'] = 'no data source specified';
- }
- // API analytics
- record_analytics($endpoint, $data);
-} else {
- $data['message'] = 'database error';
-}
-
-if(isset($_REQUEST['format']) && $_REQUEST['format'] == "csv") {
- send_csv($data);
-} else if(isset($_REQUEST['format']) && $_REQUEST['format'] == "json") {
- send_json($data);
-} else {
- // send the response as a json object
- header('Content-Type: application/json');
- echo json_encode($data);
-}
-?>
diff --git a/src/server/api_helpers.php b/src/server/api_helpers.php
deleted file mode 100644
index 076033801..000000000
--- a/src/server/api_helpers.php
+++ /dev/null
@@ -1,420 +0,0 @@
- $first) {
- // add the range as an array
- array_push($values, array($first, $last));
- } else {
- // the range is inverted, this is an error
- return null;
- }
- } else {
- // this is a single value
- if($type === 'int') {
- // cast to integer
- $value = intval($part);
- } else {
- // interpret the string literally
- $value = $part;
- }
- // add the extracted value to the list
- array_push($values, $value);
- }
- }
- // success, return the list
- return $values;
-}
-
-/**
- * parses a given string in format YYYYMMDD or YYYY-MM-DD to a number in the form YYYYMMDD
- */
-function parse_date($s) {
- return intval(str_replace('-', '', $s));
-}
-
-// extracts an array of values and/or ranges from a string
-// $str: the string to parse
-function extract_dates($str) {
- if($str === null || strlen($str) === 0) {
- // nothing to do
- return null;
- }
- $values = array();
- // split on commas and loop over each entry, which could be either a single value or a range of values
- $parts = explode(',', $str);
-
- $push_range = function($first, $last) {
- $first = parse_date($first);
- $last = parse_date($last);
- if($last === $first) {
- // the first and last numbers are the same, just treat it as a singe value
- return $first;
- }
- if($last > $first) {
- // add the range as an array
- return array($first, $last);
- }
- // the range is inverted, this is an error
- return false;
- };
-
- foreach($parts as $part) {
- if(strpos($part, '-') === false && strpos($part, ':') === false) {
- // YYYYMMDD
- array_push($values, parse_date($part));
- continue;
- }
- if (strpos($part, ':') !== false) {
- // YYYY-MM-DD:YYYY-MM-DD
- $range = explode(':', $part);
- $r = $push_range($range[0], $range[1]);
- if ($r === false) {
- return null;
- }
- array_push($values, $r);
- }
- // YYYY-MM-DD or YYYYMMDD-YYYYMMDD
- // split on the dash
- $range = explode('-', $part);
- if (count($range) === 2) {
- // YYYYMMDD-YYYYMMDD
- $r = $push_range($range[0], $range[1]);
- if ($r === false) {
- return null;
- }
- array_push($values, $r);
- continue;
- }
- // YYYY-MM-DD
- array_push($values, parse_date($part));
- }
- // success, return the list
- return $values;
-}
-
-// give a comma-separated, quoted list of states in an HHS or Census region
-function get_region_states($region) {
- switch($region) {
- case 'hhs1': return "'VT', 'CT', 'ME', 'MA', 'NH', 'RI'";
- case 'hhs2': return "'NJ', 'NY'";
- case 'hhs3': return "'DE', 'DC', 'MD', 'PA', 'VA', 'WV'";
- case 'hhs4': return "'AL', 'FL', 'GA', 'KY', 'MS', 'NC', 'TN', 'SC'";
- case 'hhs5': return "'IL', 'IN', 'MI', 'MN', 'OH', 'WI'";
- case 'hhs6': return "'AR', 'LA', 'NM', 'OK', 'TX'";
- case 'hhs7': return "'IA', 'KS', 'MO', 'NE'";
- case 'hhs8': return "'CO', 'MT', 'ND', 'SD', 'UT', 'WY'";
- case 'hhs9': return "'AZ', 'CA', 'HI', 'NV'";
- case 'hhs10': return "'AK', 'ID', 'OR', 'WA'";
- case 'cen1': return "'CT', 'ME', 'MA', 'NH', 'RI', 'VT'";
- case 'cen2': return "'NJ', 'NY', 'PA'";
- case 'cen3': return "'IL', 'IN', 'MI', 'OH', 'WI'";
- case 'cen4': return "'IA', 'KS', 'MN', 'MO', 'NE', 'ND', 'SD'";
- case 'cen5': return "'DE', 'DC', 'FL', 'GA', 'MD', 'NC', 'SC', 'VA', 'WV'";
- case 'cen6': return "'AL', 'KY', 'MS', 'TN'";
- case 'cen7': return "'AR', 'LA', 'OK', 'TX'";
- case 'cen8': return "'AZ', 'CO', 'ID', 'MT', 'NV', 'NM', 'UT', 'WY'";
- case 'cen9': return "'AK', 'CA', 'HI', 'OR', 'WA'";
- }
- return null;
-}
-
-function record_analytics($source, $data) {
- global $dbh;
- $ip = mysqli_real_escape_string($dbh, isset($_SERVER['REMOTE_ADDR']) ? $_SERVER['REMOTE_ADDR'] : '');
- $ua = mysqli_real_escape_string($dbh, isset($_SERVER['HTTP_USER_AGENT']) ? $_SERVER['HTTP_USER_AGENT'] : '');
- $source = mysqli_real_escape_string($dbh, isset($source) ? $source : '');
- $result = intval($data['result']);
- $num_rows = intval(isset($data['epidata']) ? count($data['epidata']) : 0);
- mysqli_query($dbh, "INSERT INTO `api_analytics` (`datetime`, `ip`, `ua`, `source`, `result`, `num_rows`) VALUES (now(), '{$ip}', '{$ua}', '{$source}', {$result}, {$num_rows})");
-}
-
-function send_status(&$data) {
- if (intval($data["result"]) > 0 || intval($data["result"]) == -2) {
- return FALSE;
- }
- if ($data["message"] == 'database error') {
- http_response_code(500);
- } else if ($data["message"] == 'unauthenticated') {
- http_response_code(401);
- } else {
- http_response_code(400); // bad request
- }
- header('Content-Type: application/json');
- echo json_encode($data);
- return TRUE;
-}
-
-function send_csv(&$data) {
- if (send_status($data)) {
- return;
- }
- header('Content-Type: text/csv');
- header('Content-Disposition: attachment; filename=epidata.csv');
-
- if (intval($data["result"]) == -2) {
- // empty
- return;
- }
-
- $rows = $data["epidata"];
- $headers = array_keys($rows[0]);
- $out = fopen('php://output', 'w');
- fputcsv($out, $headers);
- foreach ($rows as $row) {
- fputcsv($out, $row);
- }
- fclose($out);
-}
-
-function send_json(&$data) {
- if (send_status($data)) {
- return;
- }
- header('Content-Type: application/json');
-
- if (intval($data["result"]) == -2) {
- echo json_encode(array());
- } else {
- echo json_encode($data["epidata"]);
- }
-}
-
-?>
diff --git a/src/server/database_config.php b/src/server/database_config.php
deleted file mode 100644
index 1667f134d..000000000
--- a/src/server/database_config.php
+++ /dev/null
@@ -1,7 +0,0 @@
- 'delphi_database_epidata',
- 'port' => 3306,
-);
-?>
diff --git a/tests/acquisition/covid_hosp/common/test_database.py b/tests/acquisition/covid_hosp/common/test_database.py
index 09244dd2f..c070a00ae 100644
--- a/tests/acquisition/covid_hosp/common/test_database.py
+++ b/tests/acquisition/covid_hosp/common/test_database.py
@@ -144,9 +144,9 @@ def test_insert_dataset(self):
result = database.insert_dataset(sentinel.publication_date, dataset)
self.assertIsNone(result)
- self.assertEqual(mock_cursor.execute.call_count, 6)
+ self.assertEqual(mock_cursor.executemany.call_count, 1)
- actual_sql = mock_cursor.execute.call_args[0][0]
+ actual_sql = mock_cursor.executemany.call_args[0][0]
self.assertIn(
'INSERT INTO `test_table` (`id`, `publication_date`, `sql_str_col`, `sql_int_col`, `sql_float_col`)',
actual_sql)
@@ -162,5 +162,9 @@ def test_insert_dataset(self):
for i, expected in enumerate(expected_values):
with self.subTest(name=f'row {i + 1}'):
- actual = mock_cursor.execute.call_args_list[i][0][1]
+ # [0]: the first call() object
+ # [0]: get positional args out of the call() object
+ # [-1]: the last arg of the executemany call
+ # [i]: the ith row inserted in the executemany
+ actual = mock_cursor.executemany.call_args_list[0][0][-1][i]
self.assertEqual(actual, (0, sentinel.publication_date) + expected)
diff --git a/tests/acquisition/covid_hosp/facility/test_database.py b/tests/acquisition/covid_hosp/facility/test_database.py
index 28872a6ac..2e1ee29fe 100644
--- a/tests/acquisition/covid_hosp/facility/test_database.py
+++ b/tests/acquisition/covid_hosp/facility/test_database.py
@@ -35,9 +35,14 @@ def test_insert_dataset(self):
result = database.insert_dataset(sentinel.publication_date, dataset)
self.assertIsNone(result)
- self.assertEqual(mock_cursor.execute.call_count, 22)
-
- last_query_values = mock_cursor.execute.call_args[0][-1]
+ # once for the values, once for the keys
+ self.assertEqual(mock_cursor.executemany.call_count, 2)
+
+ # [0]: the first call() object
+ # [0]: get the positional args out of the call() object
+ # [-1]: the last arg of the executemany call
+ # [-1]: the last row inserted in the executemany
+ last_query_values = mock_cursor.executemany.call_args_list[0][0][-1][-1]
expected_query_values = (
0, sentinel.publication_date, '450822', 20201130,
'6800 N MACARTHUR BLVD', 61.1, 7, 428, 60.9, 7, 426, 61.1, 7, 428,
diff --git a/tests/acquisition/covid_hosp/state_daily/test_database.py b/tests/acquisition/covid_hosp/state_daily/test_database.py
index efa439669..95401d7cc 100644
--- a/tests/acquisition/covid_hosp/state_daily/test_database.py
+++ b/tests/acquisition/covid_hosp/state_daily/test_database.py
@@ -38,9 +38,9 @@ def test_insert_dataset(self):
result = database.insert_dataset(sentinel.issue, dataset)
self.assertIsNone(result)
- self.assertEqual(mock_cursor.execute.call_count, 53)
+ self.assertEqual(mock_cursor.executemany.call_count, 1)
- last_query_values = mock_cursor.execute.call_args[0][-1]
+ last_query_values = mock_cursor.executemany.call_args[0][-1][-1]
expected_query_values = (
0, sentinel.issue, 'WY', 20201209,
0.2519685039370078, 29, 127, 32, 0.4233576642335766, 31, 137, 58, 22, 2,
diff --git a/tests/acquisition/covid_hosp/state_timeseries/test_database.py b/tests/acquisition/covid_hosp/state_timeseries/test_database.py
index 2649f7b5f..24897d42d 100644
--- a/tests/acquisition/covid_hosp/state_timeseries/test_database.py
+++ b/tests/acquisition/covid_hosp/state_timeseries/test_database.py
@@ -36,9 +36,9 @@ def test_insert_dataset(self):
result = database.insert_dataset(sentinel.issue, dataset)
self.assertIsNone(result)
- self.assertEqual(mock_cursor.execute.call_count, 22)
+ self.assertEqual(mock_cursor.executemany.call_count, 1)
- last_query_values = mock_cursor.execute.call_args[0][-1]
+ last_query_values = mock_cursor.executemany.call_args[0][-1][-1]
expected_query_values = (
0, sentinel.issue, 'WY', 20200826, 0.0934579439252336, 26, 107, 10,
0.4298245614035088, 28, 114, 49, 19, 7, 2, None, 4, 2, 0, 1, '2', 0, 26,