diff --git a/.dockerignore b/.dockerignore deleted file mode 100644 index d4f76f6..0000000 --- a/.dockerignore +++ /dev/null @@ -1,2 +0,0 @@ -/venv -/.mypy_cache diff --git a/.editorconfig b/.editorconfig index 8a80734..ef887d0 100644 --- a/.editorconfig +++ b/.editorconfig @@ -6,7 +6,6 @@ root = true [*] - # Change these settings to your own preference indent_style = space indent_size = 4 @@ -19,3 +18,6 @@ insert_final_newline = true [*.md] trim_trailing_whitespace = false + +[*.yml] +indent_size = 2 \ No newline at end of file diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 0000000..34bcb53 --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,4 @@ +# Run repo through formatter +c7831fc0fa1367d8517831996539ce0c6c48aa58 +# Run repo through formatter +57908fc0099685a6cad6bfe6996182e6295411df \ No newline at end of file diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml deleted file mode 100644 index fc28188..0000000 --- a/.github/workflows/ci.yaml +++ /dev/null @@ -1,39 +0,0 @@ -name: ci - -on: push - -jobs: - build: - runs-on: ubuntu-latest - strategy: - matrix: - python-version: [3.8] - steps: - - name: Check out code - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - cache: "pip" - - name: Install Dependencies - run: | - python -m venv venv - source venv/bin/activate - pip install -r requirements.txt -r requirements-dev.txt - - name: Check Formatting - run: | - source venv/bin/activate - inv lint-black - - name: Check Linting - run: | - source venv/bin/activate - inv lint-pylint - - name: Check Types - run: | - source venv/bin/activate - inv lint-mypy - - name: Test - run: | - source venv/bin/activate - inv test diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..91bcf24 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,39 @@ +name: ci + +on: push + +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [3.8] + steps: + - name: Check out code + uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + cache: "pip" + - name: Install Dependencies + run: | + python -m venv venv + source venv/bin/activate + pip install -e ".[dev]" + - name: Check Formatting + run: | + source venv/bin/activate + inv lint-black + - name: Check Linting + run: | + source venv/bin/activate + inv lint-pylint + - name: Check Types + run: | + source venv/bin/activate + inv lint-mypy + - name: Test + run: | + source venv/bin/activate + inv test diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml index 1b0cabf..43606e7 100644 --- a/.github/workflows/create_release.yml +++ b/.github/workflows/create_release.yml @@ -6,12 +6,13 @@ on: description: "Semantic Version Number (i.e., 5.5.0 or patch, minor, major, prepatch, preminor, premajor, prerelease)" required: true default: patch + jobs: create_release: runs-on: ubuntu-latest steps: - name: Check out code - uses: actions/checkout@v2 + uses: actions/checkout@v4 with: ref: main ssh-key: ${{ secrets.CMU_DELPHI_DEPLOY_MACHINE_SSH }} @@ -20,17 +21,31 @@ jobs: git fetch origin dev:dev git reset --hard dev - name: Set up Python 3.8 - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: 3.8 - name: Change version number id: version + # See this issue for explanation and testing: + # https://github.com/cmu-delphi/delphi-epidata/pull/1473 run: | python -m pip install bump2version - echo -n "::set-output name=next_tag::" - bump2version --list ${{ github.event.inputs.versionName }} | grep new_version | sed -r s,"^.*=",, + if [[ ${{ github.event.inputs.versionName }} =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then + # use given version number + NEXT_TAG="${{ github.event.inputs.versionName }}" + elif [[ ${{ github.event.inputs.versionName }} =~ ^(major|minor|patch)$ ]]; then + # calculate new version number based on given tag + NEXT_TAG=$(bump2version --dry-run --list ${{ github.event.inputs.versionName }} | grep ^new_version | sed -r s,"^.*=",,) + else + echo "\nInvalid version name: ${{ github.event.inputs.versionName }}" + exit 1 + fi + # apply given or calculated version number + bump2version --new-version $NEXT_TAG _ignored_arg_ + # save version number for later + echo "next_tag=$NEXT_TAG" >> $GITHUB_OUTPUT - name: Create pull request into main - uses: peter-evans/create-pull-request@v3 + uses: peter-evans/create-pull-request@v6 with: branch: release/${{ steps.version.outputs.next_tag }} commit-message: "chore: release ${{ steps.version.outputs.next_tag }}" @@ -38,6 +53,6 @@ jobs: title: Release ${{ steps.version.outputs.next_tag }} labels: chore # reviewers: - assignees: melange396 + assignees: dshemetov body: | Releasing ${{ steps.version.outputs.next_tag }}. diff --git a/.github/workflows/release_helper.yml b/.github/workflows/release_helper.yml index b297f6f..4e6291b 100644 --- a/.github/workflows/release_helper.yml +++ b/.github/workflows/release_helper.yml @@ -17,20 +17,20 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out code - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Set up Python 3.8 - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: 3.8 - name: Extract version id: extract_version run: | python -m pip install bump2version - echo -n "::set-output name=version::" - bump2version --dry-run --list patch | grep ^current_version | sed -r s,"^.*=",, + NEXT_TAG=$(bump2version --dry-run --list patch | grep ^current_version | sed -r s,"^.*=",,) + echo "version=$NEXT_TAG" >> $GITHUB_OUTPUT - name: Create Release id: create_release - uses: release-drafter/release-drafter@v5 + uses: release-drafter/release-drafter@v6 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: @@ -41,20 +41,19 @@ jobs: upload_url: ${{ steps.create_release.outputs.upload_url }} tag_name: ${{ steps.create_release.outputs.tag_name }} - release_package: - needs: create_release + lint: runs-on: ubuntu-latest steps: - name: Check out code - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Set up Python 3.8 - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: 3.8 - name: Install build dependencies run: | python -m pip install --upgrade pip - pip install --use-feature=2020-resolver -r requirements.txt -r requirements-dev.txt + pip install -e ".[dev]" - name: Linting run: | . venv/bin/activate @@ -63,14 +62,42 @@ jobs: run: | . venv/bin/activate inv test - - name: Create release + + build: + needs: [create_release, lint] + runs-on: ubuntu-latest + steps: + - name: Check out code + uses: actions/checkout@v4 + - name: Set up Python 3.8 + uses: actions/setup-python@v5 + with: + python-version: 3.8 + - name: Install build dependencies + run: | + python -m pip install --upgrade pip + pip install -e ".[dev]" + - name: Build run: | inv dist - - uses: actions/upload-artifact@v2 + + release_package: + needs: [create_release, lint] + runs-on: ubuntu-latest + # TODO: Make sure this works, copied from best practices here + # https://github.com/pypa/gh-action-pypi-publish/tree/release/v1/?tab=readme-ov-file#trusted-publishing + environment: + name: pypi + url: https://pypi.org/p/epidatpy + permissions: + id-token: write + steps: + - uses: actions/upload-artifact@v4 with: name: epidatpy + path: dist/*.tar.gz - name: Upload Release Asset - uses: AButler/upload-release-assets@v2.0 + uses: AButler/upload-release-assets@v3.0 with: files: "dist/*.tar.gz" repo-token: ${{ secrets.GITHUB_TOKEN }} @@ -78,9 +105,7 @@ jobs: - name: Publish a Python distribution to PyPI uses: pypa/gh-action-pypi-publish@release/v1 with: - user: __token__ - password: ${{ secrets.DELPHI_PYPI_PROD_TOKEN }} - skip_existing: true + skip-existing: true # repository_url: https://test.pypi.org/legacy/ sync_dev: @@ -88,7 +113,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out code - uses: actions/checkout@v2 + uses: actions/checkout@v4 with: ref: dev ssh-key: ${{ secrets.CMU_DELPHI_DEPLOY_MACHINE_SSH }} @@ -97,14 +122,14 @@ jobs: git fetch origin main:main git reset --hard main - name: Create pull request into dev - uses: peter-evans/create-pull-request@v3 + uses: peter-evans/create-pull-request@v6 with: branch: bot/sync-main-dev commit-message: "chore: sync main-dev" base: dev title: "chore: sync main->dev" labels: chore - # reviewers: - assignees: melange396 + # reviewers: + assignees: dshemetov body: | Syncing Main->Dev. diff --git a/.gitignore b/.gitignore index fc5bc23..aa58631 100644 --- a/.gitignore +++ b/.gitignore @@ -1,10 +1,11 @@ .mypy_cache *.pyc __pycache__ -/venv -/docs/_build .coverage .pytest_cache +.DS_Store *.egg-info -/dist -.DS_Store \ No newline at end of file +dist/ +build/ +docs/_build +venv/ \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..92529d6 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,10 @@ +repos: +- repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: v0.5.1 + hooks: + # Run the linter. + - id: ruff + args: [--fix] + # Run the formatter. + - id: ruff-format diff --git a/README.md b/README.md index 923aebb..6f4dbfa 100644 --- a/README.md +++ b/README.md @@ -1,27 +1,33 @@ -# Delphi Epidata Python Client `epidatpy` +# `epidatpy` [![License: MIT][mit-image]][mit-url] [![Github Actions][github-actions-image]][github-actions-url] [![PyPi][pypi-image]][pypi-url] [![Read the Docs][docs-image]][docs-url] +A Python client for the [Delphi Epidata API](https://cmu-delphi.github.io/delphi-epidata/). + ## Install -Install latest version: +Install with the following commands: ```sh +# Latest dev version pip install -e "git+https://github.com/cmu-delphi/epidatpy.git#egg=epidatpy" + +# PyPI version (not yet available) +pip install epidatpy ``` ## Usage TODO -## Development Environment +## Development Prepare virtual environment and install dependencies ```sh python -m venv venv source ./venv/bin/activate -pip install --use-feature=2020-resolver -r requirements.txt -r requirements-dev.txt +pip install -e ".[dev]" ``` ### Common Commands @@ -38,7 +44,7 @@ inv dist # build distribution packages inv release # upload the current version to pypi ``` -## Release Process +### Release Process The release consists of multiple steps which can be all done via the GitHub website: diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..d4bb2cb --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/conf.py b/docs/conf.py index 8d506e1..20fc7be 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -18,9 +18,9 @@ # -- Project information ----------------------------------------------------- -project = "Delphi Epidata API client" -copyright = "2021, Delphi research group" # pylint: disable=redefined-builtin -author = "Delphi research group" +project = "epidatpy" +copyright = "2024, Delphi Research Group" # pylint: disable=redefined-builtin +author = "Delphi Research Group" # The full version, including alpha/beta/rc tags release = "1.0.0" @@ -43,7 +43,16 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. -exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] +exclude_patterns = [ + "_build", + "Thumbs.db", + ".DS_Store", + "smoke_covid_test", + "smoke_test_async", + "smoke_test", + "tasks", + "test_pydantic", +] add_module_names = False autoclass_content = "class" @@ -63,7 +72,7 @@ html_theme_options = { "extra_nav_links": { - "Delphi group": "https://delphi.cmu.edu/", + "Delphi Research Group": "https://delphi.cmu.edu/", "Delphi Epidata API": "https://cmu-delphi.github.io/delphi-epidata", } } diff --git a/docs/covidcast_examples.rst b/docs/covidcast_examples.rst index 5d46838..e02329d 100644 --- a/docs/covidcast_examples.rst +++ b/docs/covidcast_examples.rst @@ -14,7 +14,7 @@ distributed through Facebook (`fb-survey`), for every county in the United State 2020-05-01 and 2020-05-07: >>> from delphi_epidata.request import EpiRange ->>> apicall = epidata[("fb-survey", "smoothed_cli")].call( +>>> apicall = epidata[("fb-survey", "smoothed_cli")].call( ... 'county', "*", EpiRange(20200501, 20200507), ... ) EpiDataCall(endpoint=covidcast, params={'data_source': 'fb-survey', 'signals': 'smoothed_cli', 'time_type': 'day', 'time_values': '20200501-20200507', 'geo_type': 'county', 'geo_values': '*'}) @@ -22,11 +22,10 @@ EpiDataCall(endpoint=covidcast, params={'data_source': 'fb-survey', 'signals': ' >>> data.head() source signal geo_type geo_value time_type time_value issue lag value stderr sample_size direction missing_value missing_stderr missing_sample_size 0 fb-survey smoothed_cli county 01000 day 2020-05-01 2020-09-03 125 0.825410 0.136003 1722 NaN 0 0 0 -1 fb-survey smoothed_cli county 01001 day 2020-05-01 2020-09-03 125 1.299425 0.967136 115 NaN 0 0 0 -2 fb-survey smoothed_cli county 01003 day 2020-05-01 2020-09-03 125 0.696597 0.324753 584 NaN 0 0 0 -3 fb-survey smoothed_cli county 01015 day 2020-05-01 2020-09-03 125 0.428271 0.548566 122 NaN 0 0 0 -4 fb-survey smoothed_cli county 01031 day 2020-05-01 2020-09-03 125 0.025579 0.360827 114 NaN 0 0 0 - +1 fb-survey smoothed_cli county 01001 day 2020-05-01 2020-09-03 125 1.299425 0.967136 115 NaN 0 0 0 +2 fb-survey smoothed_cli county 01003 day 2020-05-01 2020-09-03 125 0.696597 0.324753 584 NaN 0 0 0 +3 fb-survey smoothed_cli county 01015 day 2020-05-01 2020-09-03 125 0.428271 0.548566 122 NaN 0 0 0 +4 fb-survey smoothed_cli county 01031 day 2020-05-01 2020-09-03 125 0.025579 0.360827 114 NaN 0 0 0 Each row represents one observation in one county on one day. The county FIPS code is given in the ``geo_value`` column, the date in the ``time_value`` @@ -47,7 +46,7 @@ and describes the mathematical derivation of the estimates. Using the ``geo_values`` argument, we can request data for a specific geography, such as the state of Pennsylvania for the month of September 2021: ->>> pa_data = epidata[("fb-survey", "smoothed_cli")].call( +>>> pa_data = epidata[("fb-survey", "smoothed_cli")].call( ... 'state', "pa", EpiRange(20210901, 20210930) ... ).df() >>> pa_data.head() @@ -58,5 +57,4 @@ such as the state of Pennsylvania for the month of September 2021: 3 fb-survey smoothed_cli state pa day 2021-09-04 2021-09-09 5 0.984799 0.092566 9069 NaN 0 0 0 4 fb-survey smoothed_cli state pa day 2021-09-05 2021-09-10 5 1.010306 0.093357 9016 NaN 0 0 0 -We can request multiple states by providing a list, such as ``["pa", "ny", -"mo"]``. \ No newline at end of file +We can request multiple states by providing a list, such as ``["pa", "ny", "mo"]``. diff --git a/docs/epidatpy.rst b/docs/epidatpy.rst new file mode 100644 index 0000000..f72e0d8 --- /dev/null +++ b/docs/epidatpy.rst @@ -0,0 +1,33 @@ +epidatpy Reference +================ + +.. toctree:: + :maxdepth: 4 + +Submodules +---------- + +Module contents +--------------- + +.. automodule:: epidatpy + :members: + :undoc-members: + :show-inheritance: + +epidatpy.request module +----------------------- + +.. automodule:: epidatpy.request + :members: + :undoc-members: + :show-inheritance: + +epidatpy.async\_request module +------------------------------ + +.. automodule:: epidatpy.async_request + :members: + :undoc-members: + :show-inheritance: + diff --git a/docs/getting_started.rst b/docs/getting_started.rst index ab6b099..a993e2a 100644 --- a/docs/getting_started.rst +++ b/docs/getting_started.rst @@ -4,156 +4,143 @@ Getting Started Overview -------------- -This package provides access to data from various Epidata API endpoints including COVIDcast, -which provides numerous COVID-related data streams, updated daily. +This package provides access to data from various Epidata API endpoints including COVIDcast, +which provides numerous COVID-related data streams, updated daily. .. _epidata-endpoints: Epidata Data Sources --------------- +-------------------- The parameters available for each source data are documented in each linked source-specific API page. -| **COVID-19 Data** -.. list-table:: +.. list-table:: :widths: 20 20 40 :header-rows: 1 * - Endpoint - Name - Description - * - `covidcast `_ + * - `pub_covidcast `_ - COVIDcast - Delphi’s COVID-19 surveillance streams. - * - `covidcast_meta `_ + * - `pub_covidcast_meta `_ - COVIDcast metadata - Metadata for Delphi's COVID-19 surveillance streams. - * - `covid_hosp_facility `_ + * - `pub_covid_hosp_facility `_ - COVID-19 Hospitalization by Facility - COVID-19 Reported Patient Impact and Hospital Capacity - Facility Lookup - * - `covid_hosp `_ + * - `pub_covid_hosp `_ - COVID-19 Hospitalization - COVID-19 Reported Patient Impact and Hospital Capacity. -| **Influenza Data** -.. list-table:: +.. list-table:: :widths: 20 20 40 :header-rows: 1 * - Endpoint - Name - Description - * - `afhsb `_ - - AFHSB - - ... - * - `meta_afhsb `_ - - AFHSB Metadata - - ... - * - `cdc `_ - - CDC Page Hits + * - `pvt_cdc `_ + - CDC Page Hits - ... - * - `delphi `_ - - Delphi’s Forecast + * - `pub_delphi `_ + - Delphi’s Forecast - ... - * - `ecdc_ili `_ + * - `pub_ecdc_ili `_ - ECDC ILI - ECDC ILI data from the ECDC website. - * - `flusurv `_ - - FluSurv + * - `pub_flusurv `_ + - FluSurv - FluSurv-NET data (flu hospitaliation rates) from CDC. - * - `fluview `_ + * - `pub_fluview `_ - FluView - Influenza-like illness (ILI) from U.S. Outpatient Influenza-like Illness Surveillance Network (ILINet). - * - `fluview_meta `_ + * - `pub_fluview_meta `_ - FluView Metadata - Summary data about ``fluview``. - * - `fluview_clinical `_ + * - `pub_fluview_clinical `_ - FluView Clinical - ... - * - `gft `_ + * - `pub_gft `_ - Google Flu Trends - Estimate of influenza activity based on volume of certain search queries. This is now a static endpoint due to discontinuation. - * - `ght `_ - - Google Health Trends - - Estimate of influenza activity based on volume of certain search queries. - * - `kcdc_ili `_ + * - `pub_kcdc_ili `_ - KCDC ILI - KCDC ILI data from KCDC website. - * - `meta `_ + * - `pub_meta `_ - API Metadata - Metadata for ``fluview``, ``twitter``, ``wiki``, and ``delphi``. - * - `nidss_flu `_ + * - `pub_nidss_flu `_ - NIDSS Flu - Outpatient ILI from Taiwan's National Infectious Disease Statistics System (NIDSS). - * - `nowcast `_ + * - `pub_nowcast `_ - ILI Nearby - A nowcast of U.S. national, regional, and state-level (weighted) percent ILI, available seven days (regionally) or five days (state-level) before the first ILINet report for the corresponding week. - * - `quidel `_ + * - `pvt_quidel `_ - Quidel - Data provided by Quidel Corp., which contains flu lab test results. - * - `sensors `_ + * - `pvt_sensors `_ - Delphi's Digital Surveillance Sensors - ... - * - `twitter `_ + * - `pvt_twitter `_ - Twitter Stream - Estimate of influenza activity based on analysis of language used in tweets from HealthTweets. - * - `wiki `_ + * - `pub_wiki `_ - Wikipedia Access Logs - Number of page visits for selected English, Influenza-related wikipedia articles. -| **Dengue Data** -.. list-table:: +.. list-table:: :widths: 20 20 40 :header-rows: 1 * - Endpoint - Name - Description - * - `dengue_nowcast `_ + * - `pub_dengue_nowcast `_ - Delphi's Dengue Nowcast - ... - * - `dengue_sensors `_ + * - `pvt_dengue_sensors `_ - Delphi’s Dengue Digital Surveillance Sensors - ... - * - `nidss_dengue `_ + * - `pub_nidss_dengue `_ - NIDSS Dengue - Counts of confirmed dengue cases from Taiwan's NIDSS. - * - `paho_dengue `_ + * - `pub_paho_dengue `_ - PAHO Dengue - ... -| **Norovirus Data** -.. list-table:: +.. list-table:: :widths: 20 20 40 :header-rows: 1 * - Endpoint - Name - Description - * - `meta_norostat `_ + * - `pvt_meta_norostat `_ - NoroSTAT Metadata - ... - * - `norostat `_ + * - `pvt_norostat `_ - NoroSTAT - Suspected and confirmed norovirus outbreaks reported by state health departments to the CDC. -| - Epiweeks and Dates ------------------ -Epiweeks use the U.S. definition. That is, the first epiweek each year is the week, starting on a Sunday, -containing January 4. See `this page `_ for more information. +Epiweeks use the U.S. definition. That is, the first epiweek each year is the +week, starting on a Sunday, containing January 4. See `this page +`_ +for more information. Formatting for epiweeks is YYYYWW and for dates is YYYYMMDD. -Use individual values, comma-separated lists or, a hyphenated range of values to specify single or several dates. +Use individual values, comma-separated lists or, a hyphenated range of values to specify single or several dates. An ``EpiRange`` object can be also used to construct a range of epiweeks or dates. Examples include: - ``param = 201530`` (A single epiweek) @@ -162,8 +149,6 @@ An ``EpiRange`` object can be also used to construct a range of epiweeks or date - ``param = '201440,201501-201510'`` (Several epiweeks, including a range) - ``param = EpiRange(20070101, 20071231)`` (A range of dates) -| - .. _getting-started: Basic examples @@ -176,8 +161,8 @@ distributed through Facebook, for every county in the United States between 2020-05-01 and 2020-05-07: >>> from epidatpy.request import Epidata, EpiRange ->>> apicall = Epidata.covidcast("fb-survey", "smoothed_cli", -... "day", "county", +>>> apicall = Epidata.covidcast("fb-survey", "smoothed_cli", +... "day", "county", ... EpiRange(20200501, 20200507), "*") >>> data = apicall.df() >>> data.head() @@ -197,7 +182,7 @@ May 1st were updated on September 3rd based on new data, giving a ``lag`` of 125 See the :py:func:`epidatpy.request.Epidata.covidcast` documentation for further details on the returned columns. -In the above code, the ``.df()`` function on the ``apicall`` variable generated a Pandas DataFrame. We can use +In the above code, the ``.df()`` function on the ``apicall`` variable generated a Pandas DataFrame. We can use other :ref:`output functions ` to parse the requested API call in different formats. To parse the data into JSON format, we can use the following command: @@ -238,7 +223,7 @@ into JSON format, we can use the following command: . }] -Note that all of the :ref:`output functions ` have a ``field`` parameter which takes in any form of iterator objects +Note that all of the :ref:`output functions ` have a ``field`` parameter which takes in any form of iterator objects to enable fetching the data with customization (e.g. specifying which fields or columns to output). Similar to the previous example, to parse the data in JSON format, but customize the field to show only ``geo_value`` and ``value``, we would use the following command: @@ -259,9 +244,6 @@ command: . }] - -| - **Wikipedia Access article "influenza" on 2020w01** >>> apicall_wiki = Epidata.wiki(articles='influenza', epiweeks='202001') @@ -269,8 +251,6 @@ command: >>> print(data) [{'article': 'influenza', 'count': 6516, 'total': 663604044, 'hour': -1, 'epiweek': datetime.date(2019, 12, 29), 'value': 9.81910834}] -| - **FluView on 2019w01 (national)** >>> apicall_fluview = Epidata.fluview(regions='nat', epiweeks='201901') @@ -294,10 +274,3 @@ command: 'ili': 3.63095}], 'result': 1, 'message': 'success'} - -| - -Other examples (TODO) --------------- - -(TODO) \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst index 6fc4ee9..a046b74 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,45 +1,65 @@ -Delphi Epidata +epidatpy =============== This package provides Python access to the `Delphi Epidata API `_ published by -the `Delphi group `_ at `Carnegie Mellon University +the `Delphi research group `_ at `Carnegie Mellon University `_. The package source code and bug tracker can be found `on GitHub `_. -.. note :: **You should consider subscribing** to the `API mailing list - `_ to be - notified of package updates, new data sources, corrections, and other - updates. - -.. warning :: If you use data from the COVIDcast API to power a public product, - dashboard, app, or other service, please download the data you need and store - it centrally rather than making API requests for every user. Our server - resources are limited and cannot support high-volume interactive use. - - See also the `COVIDcast Terms of Use - `_, noting that the data is a - research product and not warranted for a particular purpose. - - Installation ------------ -This package is available on PyPI as `covidcast -`_, and can be installed using ``pip`` or -your favorite Python package manager: +This package will be available on PyPI as `epidatpy +`_ and will be installable with ``pip``. +Meanwhile, it can be installed from GitHub: .. code-block:: sh - pip install epidatpy + pip install -e "git+https://github.com/cmu-delphi/epidatpy.git#egg=epidatpy" The package requires `pandas `_ and `requests `_; these should be installed automatically. +API Keys +-------- + +The Delphi Epidata API requires a (free) API key for full functionality. To +generate your key, register for a pseudo-anonymous account `here +`_ and see more +discussion on the `general API website +`_. The ``epidatpy`` +client will automatically look for this key in the environment variable +``DELPHI_EPIDATA_KEY``. We recommend storing your key in a ``.env`` file and using +`python-dotenv `_ to load it into +your environment. + +Note that for the time being, the private endpoints (i.e. those prefixed with +``pvt``) will require a separate key that needs to be passed as an argument. + +See also the `COVIDcast Terms of Use +`_, noting that the data is a +research product and not warranted for a particular purpose. + +For users of the covidcast Python package +------------------------------------------ + +The `covidcast `_ +package is deprecated and will no longer be updated. The ``epidatpy`` package is a +complete rewrite with a focus on speed, reliability, and ease of use. It also +supports more endpoints and data sources than ``covidcast``. When migrating from +that package, you will need to use the ``pub_covidcast`` function in +``epidatpy``. + +.. note :: **You should consider subscribing** to the `API mailing list + `_ to be + notified of package updates, new data sources, corrections, and other + updates. + Contents -------- @@ -48,5 +68,9 @@ Contents getting_started + covidcast_examples + signals_covid + epidatpy + diff --git a/docs/signals_covid.rst b/docs/signals_covid.rst index 2d43e9b..46d486b 100644 --- a/docs/signals_covid.rst +++ b/docs/signals_covid.rst @@ -1,126 +1,72 @@ Fetching Data ============= ->>> from delphi_epidata.request import Epidata -This package provides various functions that can be called on the ``Epidata`` object to obtain any :ref:`Epidata endpoint ` signals of interest. +>>> from epidatpy.request import Epidata +>>> epi = Epidata() +>>> epi.pub_covidcast('usa-facts', 'confirmed_7dav_incidence_num', '20210101', '20210131', 'state', 'tx') -The functions below will return an ``EpiDataCall`` object, which contains the appropriate URL -and parameters required to make an API request. The signal of interest can then be obtained in 5 different :ref:`output formats `. +This package provides various functions that can be called on the ``Epidata`` object to obtain any :ref:`Epidata endpoint ` signals of interest. The functions below are inherited by the ``Epidata`` object. Detailed examples are provided in the :ref:`usage examples `. COVIDcast Signals ----------------- -.. autofunction:: delphi_epidata.request.Epidata.covidcast -| -.. autofunction:: delphi_epidata.request.Epidata.covidcast_meta -| -.. autofunction:: delphi_epidata.request.Epidata.covid_hosp_facility -| -.. autofunction:: delphi_epidata.request.Epidata.covid_hosp_facility_lookup -| -.. autofunction:: delphi_epidata.request.Epidata.covid_hosp_state_timeseries -| +.. automethod:: epidatpy.AEpiDataEndpoints.pub_covidcast + +.. automethod:: epidatpy.AEpiDataEndpoints.pub_covidcast_meta + +.. automethod:: epidatpy.AEpiDataEndpoints.pub_covid_hosp_facility + +.. automethod:: epidatpy.AEpiDataEndpoints.pub_covid_hosp_facility_lookup + +.. automethod:: epidatpy.AEpiDataEndpoints.pub_covid_hosp_state_timeseries + Other Epidata Signals ------------------ -.. autofunction:: delphi_epidata.request.Epidata.pvt_afhsb -| -.. autofunction:: delphi_epidata.request.Epidata.pvt_meta_afhsb -| -.. autofunction:: delphi_epidata.request.Epidata.cdc -| -.. autofunction:: delphi_epidata.request.Epidata.delphi -| -.. autofunction:: delphi_epidata.request.Epidata.ecdc_ili -| -.. autofunction:: delphi_epidata.request.Epidata.flusurv -| -.. autofunction:: delphi_epidata.request.Epidata.fluview -| -.. autofunction:: delphi_epidata.request.Epidata.fluview_meta -| -.. autofunction:: delphi_epidata.request.Epidata.fluview_clinical -| -.. autofunction:: delphi_epidata.request.Epidata.gft -| -.. autofunction:: delphi_epidata.request.Epidata.ght -| -.. autofunction:: delphi_epidata.request.Epidata.kcdc_ili -| -.. autofunction:: delphi_epidata.request.Epidata.meta -| -.. autofunction:: delphi_epidata.request.Epidata.nidss_flu -| -.. autofunction:: delphi_epidata.request.Epidata.nowcast -| -.. autofunction:: delphi_epidata.request.Epidata.pvt_quidel -| -.. autofunction:: delphi_epidata.request.Epidata.pvt_sensors -| -.. autofunction:: delphi_epidata.request.Epidata.pvt_twitter -| -.. autofunction:: delphi_epidata.request.Epidata.wiki -| -.. autofunction:: delphi_epidata.request.Epidata.dengue_nowcast -| -.. autofunction:: delphi_epidata.request.Epidata.pvt_dengue_sensors -| -.. autofunction:: delphi_epidata.request.Epidata.nidss_dengue -| -.. autofunction:: delphi_epidata.request.Epidata.paho_dengue -| -.. autofunction:: delphi_epidata.request.Epidata.pvt_meta_norostat -| -.. autofunction:: delphi_epidata.request.Epidata.pvt_norostat - - -.. _output-data: - -Output Functions --------- - -The following functions can be called on an ``EpiDataCall`` object to make an API request and parse the signal in -5 different formats: - - Classic - - JSON - - Pandas DataFrame - - CSV - - Iterator -| -.. autofunction:: delphi_epidata.request.EpiDataCall.classic -| -.. autofunction:: delphi_epidata.request.EpiDataCall.json -| -.. autofunction:: delphi_epidata.request.EpiDataCall.df -| -.. autofunction:: delphi_epidata.request.EpiDataCall.csv -| -.. autofunction:: delphi_epidata.request.EpiDataCall.iter - - -More on COVIDcast (TODO) ------------------------- - -Many data sources and signals are available, so one can also obtain a data frame -of all signals and their associated metadata: - ->>> from epidatpy.request import CovidcastEpidata ->>> covid_ds = CovidcastEpidata() ->>> df_source = covid_ds.source_df ->>> df_signal = covid_ds.signal_df - -Calling ``CovidcastEpidata`` function will return a class object ``CovidcastDataSources``, -which has the property ``source_df`` and ``signal_df``, two data frames containing -the information of all available sources and signals. -More details of the two data frames are listed below. - -.. autoclass:: epidatpy.request.CovidcastDataSources() - :members: - -More metadata statistics can also be obtained as follows: +--------------------- ->>> from epidatpy.request import Epidata ->>> df = Epidata.covidcast_meta().df() +.. automethod:: epidatpy.AEpiDataEndpoints.pvt_cdc + +.. automethod:: epidatpy.AEpiDataEndpoints.pub_delphi + +.. automethod:: epidatpy.AEpiDataEndpoints.pub_ecdc_ili + +.. automethod:: epidatpy.AEpiDataEndpoints.pub_flusurv + +.. automethod:: epidatpy.AEpiDataEndpoints.pub_fluview + +.. automethod:: epidatpy.AEpiDataEndpoints.pub_fluview_meta + +.. automethod:: epidatpy.AEpiDataEndpoints.pub_fluview_clinical + +.. automethod:: epidatpy.AEpiDataEndpoints.pub_gft + +.. automethod:: epidatpy.AEpiDataEndpoints.pvt_ght + +.. automethod:: epidatpy.AEpiDataEndpoints.pub_kcdc_ili + +.. automethod:: epidatpy.AEpiDataEndpoints.pub_meta + +.. automethod:: epidatpy.AEpiDataEndpoints.pub_nidss_flu + +.. automethod:: epidatpy.AEpiDataEndpoints.pub_nowcast + +.. automethod:: epidatpy.AEpiDataEndpoints.pvt_quidel + +.. automethod:: epidatpy.AEpiDataEndpoints.pvt_sensors + +.. automethod:: epidatpy.AEpiDataEndpoints.pvt_twitter + +.. automethod:: epidatpy.AEpiDataEndpoints.pub_wiki + +.. automethod:: epidatpy.AEpiDataEndpoints.pub_dengue_nowcast + +.. automethod:: epidatpy.AEpiDataEndpoints.pvt_dengue_sensors + +.. automethod:: epidatpy.AEpiDataEndpoints.pub_nidss_dengue + +.. automethod:: epidatpy.AEpiDataEndpoints.pub_paho_dengue + +.. automethod:: epidatpy.AEpiDataEndpoints.pvt_meta_norostat -.. autofunction:: epidatpy.request.Epidata.covidcast_meta() \ No newline at end of file +.. automethod:: epidatpy.AEpiDataEndpoints.pvt_norostat diff --git a/epidatpy/__init__.py b/epidatpy/__init__.py index 49880ec..3bd7a38 100644 --- a/epidatpy/__init__.py +++ b/epidatpy/__init__.py @@ -1,27 +1,9 @@ """Fetch data from Delphi's API.""" -from ._constants import __version__ -from ._model import ( - EpiRange, - EpiRangeDict, - EpiDataResponse, - EpiRangeLike, - InvalidArgumentException, - EpiRangeParam, - IntParam, - StringParam, - EpiDataFormatType, - AEpiDataCall, -) -from ._covidcast import ( - DataSignal, - DataSource, - WebLink, - DataSignalGeoStatistics, - CovidcastDataSources, - GeoType, - TimeType, -) -from ._auth import get_api_key +# Make the linter happy about the unused variables +__all__ = ["__version__", "Epidata", "CovidcastEpidata", "EpiRange"] +__author__ = "Delphi Research Group" + -__author__ = "Delphi Group" +from ._constants import __version__ +from .request import CovidcastEpidata, Epidata, EpiRange diff --git a/epidatpy/_auth.py b/epidatpy/_auth.py index 566bccb..d581443 100644 --- a/epidatpy/_auth.py +++ b/epidatpy/_auth.py @@ -1,10 +1,9 @@ import os import warnings -from typing import Optional -def get_api_key() -> Optional[str]: - key = os.environ.get("DELPHI_EPIDATA_KEY", None) +def _get_api_key() -> str: + key = os.environ.get("DELPHI_EPIDATA_KEY", "") if not key: warnings.warn( diff --git a/epidatpy/_constants.py b/epidatpy/_constants.py index b56fcdf..41e57a4 100644 --- a/epidatpy/_constants.py +++ b/epidatpy/_constants.py @@ -1,6 +1,5 @@ from typing import Final - __version__: Final = "0.5.0" HTTP_HEADERS: Final = {"User-Agent": f"epidatpy/{__version__}"} BASE_URL: Final = "https://api.delphi.cmu.edu/epidata/" diff --git a/epidatpy/_covidcast.py b/epidatpy/_covidcast.py index 3367ed1..43e2b99 100644 --- a/epidatpy/_covidcast.py +++ b/epidatpy/_covidcast.py @@ -1,4 +1,5 @@ -from dataclasses import Field, InitVar, dataclass, field, fields +from dataclasses import Field, InitVar, asdict, dataclass, field, fields +from functools import cached_property from typing import ( Any, Callable, @@ -13,25 +14,23 @@ Sequence, Tuple, Union, - overload, get_args, + overload, ) -from functools import cached_property + from pandas import DataFrame + from ._model import ( - EpiRangeLike, CALL_TYPE, EpidataFieldInfo, EpidataFieldType, EpiRangeParam, + GeoType, InvalidArgumentException, + TimeType, ) -GeoType = Literal["nation", "msa", "hrr", "hhs", "state", "county"] -TimeType = Literal["day", "week"] - - @dataclass class WebLink: """ @@ -63,17 +62,9 @@ def define_covidcast_fields() -> List[EpidataFieldInfo]: return [ EpidataFieldInfo("source", EpidataFieldType.text), EpidataFieldInfo("signal", EpidataFieldType.text), - EpidataFieldInfo( - "geo_type", - EpidataFieldType.categorical, - categories=list(get_args(GeoType)), - ), + EpidataFieldInfo("geo_type", EpidataFieldType.categorical, categories=list(get_args(GeoType))), EpidataFieldInfo("geo_value", EpidataFieldType.text), - EpidataFieldInfo( - "time_type", - EpidataFieldType.categorical, - categories=list(get_args(TimeType)), - ), + EpidataFieldInfo("time_type", EpidataFieldType.categorical, categories=list(get_args(TimeType))), EpidataFieldInfo("time_value", EpidataFieldType.date_or_epiweek), EpidataFieldInfo("issue", EpidataFieldType.date), EpidataFieldInfo("lag", EpidataFieldType.int), @@ -93,7 +84,7 @@ class DataSignal(Generic[CALL_TYPE]): represents a COVIDcast data signal """ - _create_call: Callable[[Mapping[str, Union[None, EpiRangeLike, Iterable[EpiRangeLike]]]], CALL_TYPE] + _create_call: Callable[[Mapping[str, Optional[EpiRangeParam]]], CALL_TYPE] source: str signal: str @@ -119,17 +110,19 @@ class DataSignal(Generic[CALL_TYPE]): geo_types: Dict[GeoType, DataSignalGeoStatistics] = field(default_factory=dict) def __post_init__(self) -> None: - self.link = [WebLink(alt=l["alt"], href=l["href"]) if isinstance(l, dict) else l for l in self.link] + self.link = [ + WebLink(alt=link["alt"], href=link["href"]) if isinstance(link, dict) else link for link in self.link + ] stats_fields = fields(DataSignalGeoStatistics) self.geo_types = { - k: DataSignalGeoStatistics(**_limit_fields(l, stats_fields)) if isinstance(l, dict) else l - for k, l in self.geo_types.items() + k: DataSignalGeoStatistics(**_limit_fields(v, stats_fields)) if isinstance(v, dict) else v + for k, v in self.geo_types.items() } @staticmethod def to_df(signals: Iterable["DataSignal"]) -> DataFrame: df = DataFrame( - signals, + [asdict(s) for s in signals], columns=[ "source", "signal", @@ -150,8 +143,8 @@ def to_df(signals: Iterable["DataSignal"]) -> DataFrame: "has_sample_size", ], ) - df.insert(6, "geo_types", [",".join(s.geo_types.keys()) for s in signals]) - return df.set_index(["source", "signal"]) + df["geo_types"] = [",".join(s.geo_types.keys()) for s in signals] + return df @property def key(self) -> Tuple[str, str]: @@ -160,7 +153,7 @@ def key(self) -> Tuple[str, str]: def call( self, geo_type: GeoType, - geo_values: Union[int, str, Iterable[Union[int, str]]], + geo_values: Union[str, Sequence[str]], time_values: EpiRangeParam, as_of: Union[None, str, int] = None, issues: Optional[EpiRangeParam] = None, @@ -189,7 +182,7 @@ def call( def __call__( self, geo_type: GeoType, - geo_values: Union[int, str, Iterable[Union[int, str]]], + geo_values: Union[str, Sequence[str]], time_values: EpiRangeParam, as_of: Union[None, str, int] = None, issues: Optional[EpiRangeParam] = None, @@ -205,7 +198,7 @@ class DataSource(Generic[CALL_TYPE]): represents a COVIDcast data source """ - _create_call: InitVar[Callable[[Mapping[str, Union[None, EpiRangeLike, Iterable[EpiRangeLike]]]], CALL_TYPE]] + _create_call: InitVar[Callable[[Mapping[str, Optional[EpiRangeParam]]], CALL_TYPE]] source: str db_source: str @@ -218,11 +211,10 @@ class DataSource(Generic[CALL_TYPE]): signals: Sequence[DataSignal] = field(default_factory=list) - def __post_init__( - self, - _create_call: Callable[[Mapping[str, Union[None, EpiRangeLike, Iterable[EpiRangeLike]]]], CALL_TYPE], - ) -> None: - self.link = [WebLink(alt=l["alt"], href=l["href"]) if isinstance(l, dict) else l for l in self.link] + def __post_init__(self, _create_call: Callable[[Mapping[str, Optional[EpiRangeParam]]], CALL_TYPE]) -> None: + self.link = [ + WebLink(alt=link["alt"], href=link["href"]) if isinstance(link, dict) else link for link in self.link + ] signal_fields = fields(DataSignal) self.signals = [ DataSignal(_create_call=_create_call, **_limit_fields(s, signal_fields)) if isinstance(s, dict) else s @@ -232,7 +224,7 @@ def __post_init__( @staticmethod def to_df(sources: Iterable["DataSource"]) -> DataFrame: df = DataFrame( - sources, + [asdict(source) for source in sources], columns=[ "source", "name", @@ -243,7 +235,7 @@ def to_df(sources: Iterable["DataSource"]) -> DataFrame: ], ) df["signals"] = [",".join(ss.signal for ss in s.signals) for s in sources] - return df.set_index("source") + return df def get_signal(self, signal: str) -> Optional[DataSignal]: return next((s for s in self.signals if s.signal == signal), None) @@ -265,7 +257,7 @@ class CovidcastDataSources(Generic[CALL_TYPE]): init=False, default_factory=OrderedDict ) - _create_call: Callable[[Mapping[str, Union[None, EpiRangeLike, Iterable[EpiRangeLike]]]], CALL_TYPE] + _create_call: Callable[[Mapping[str, Optional[EpiRangeParam]]], CALL_TYPE] def __post_init__(self) -> None: self._source_by_name = {s.source: s for s in self.sources} @@ -273,50 +265,47 @@ def __post_init__(self) -> None: for signal in source.signals: self._signals_by_key[signal.key] = signal - def get_source(self, source: str) -> Optional[DataSource[CALL_TYPE]]: - return self._source_by_name.get(source) + def source_names(self) -> Sequence[str]: + return [s.source for s in self.sources] - @property - def source_names(self) -> Iterable[str]: - return (s.source for s in self.sources) + def signal_names(self, source: Optional[str] = None) -> Sequence[str]: + if not source: + return [x.signal for src in self._source_by_name.values() for x in src.signals] + return [s.signal for s in self._source_by_name[source].signals] @cached_property def source_df(self) -> DataFrame: """Fetch metadata about available covidcast sources. - Obtains a data frame of source metadata describing all publicly available data - streams from the covidcast API. + Obtains a data frame of source metadata describing all publicly + available data streams from the covidcast API. :returns: A data frame containing one row per available source, with the - following columns: + following columns: - ``source`` + ``source`` Data source name. - ``signal`` + ``signal`` Signal name. - ``description`` + ``description`` Description of the signal. - ``reference_signal`` + ``reference_signal`` Geographic level for which this signal is available, such as county, - state, msa, hss, hrr, or nation. Most signals are available at multiple geographic - levels and will hence be listed in multiple rows with their own - metadata. + state, msa, hss, hrr, or nation. Most signals are available at + multiple geographic levels and will hence be listed in multiple rows + with their own metadata. - ``license`` + ``license`` The license - ``dua`` + ``dua`` Link to the Data Use Agreement. """ return DataSource.to_df(self.sources) - @property - def signals(self) -> Iterable[DataSignal[CALL_TYPE]]: - return self._signals_by_key.values() - @cached_property def signal_df(self) -> DataFrame: """Fetch metadata about available covidcast signals. @@ -327,100 +316,95 @@ def signal_df(self) -> DataFrame: for descriptions of the available sources. :returns: A data frame containing one row per available signal, with the - following columns: + following columns: - ``data_source`` + ``data_source`` Data source name. - ``signal`` + ``signal`` Signal name. - ``name`` + ``name`` Name of signal. - ``active`` - Whether the signal is currently not updated or not. Signals may be inactive - because the sources have become unavailable, other sources have replaced - them, or additional work is required for us to continue updating them. + ``active`` + Whether the signal is currently not updated or not. Signals may be + inactive because the sources have become unavailable, other sources + have replaced them, or additional work is required for us to + continue updating them. - ``short_description`` + ``short_description`` Brief description of the signal. - ``description`` + ``description`` Full description of the signal. - ``geo_types`` - Spatial resolution of the signal (e.g., `county`, `hrr`, `msa`, `dma`, `state`). - More detail about all `geo_types` is given in the `geographic coding documentation + ``geo_types`` + Spatial resolution of the signal (e.g., `county`, `hrr`, `msa`, + `dma`, `state`). More detail about all `geo_types` is given in the + `geographic coding documentation `_. - ``time_type`` - Temporal resolution of the signal (e.g., day, week; see - `date coding details `_). + ``time_type`` + Temporal resolution of the signal (e.g., day, week; see `date coding + details + `_). - ``time_label`` + ``time_label`` The time label ("Date", "Week"). - ``value_label`` - The value label ("Value", "Percentage", "Visits", "Visits per 100,000 people"). + ``value_label`` + The value label ("Value", "Percentage", "Visits", "Visits per + 100,000 people"). - ``format`` + ``format`` The value format ("per100k", "percent", "fraction", "count", "raw"). - ``category`` + ``category`` The signal category ("early", "public", "late", "other"). - ``high_values_are`` - What the higher value of signal indicates ("good", "bad", "neutral"). + ``high_values_are`` + What the higher value of signal indicates ("good", "bad", + "neutral"). - ``is_smoothed`` + ``is_smoothed`` Whether the signal is smoothed. - ``is_weighted`` + ``is_weighted`` Whether the signal is weighted. - ``is_cumulative`` + ``is_cumulative`` Whether the signal is cumulative. - ``has_stderr`` + ``has_stderr`` Whether the signal has `stderr` statistic. - ``has_sample_size`` + ``has_sample_size`` Whether the signal has `sample_size` statistic. """ - return DataSignal.to_df(self.signals) - - def get_signal(self, source: str, signal: str) -> Optional[DataSignal[CALL_TYPE]]: - return self._signals_by_key.get((source, signal)) - - @property - def signal_names(self) -> Iterable[Tuple[str, str]]: - return self._signals_by_key.keys() - - def __iter__(self) -> Iterable[DataSource[CALL_TYPE]]: - return iter(self.sources) + return DataSignal.to_df(self._signals_by_key.values()) @overload - def __getitem__(self, source: str) -> DataSource[CALL_TYPE]: ... + def __getitem__(self, source: str, /) -> DataSource[CALL_TYPE]: ... @overload - def __getitem__(self, source_signal: Tuple[str, str]) -> DataSignal[CALL_TYPE]: ... + def __getitem__(self, source_signal: Tuple[str, str], /) -> DataSignal[CALL_TYPE]: ... def __getitem__( self, source_signal: Union[str, Tuple[str, str]] ) -> Union[DataSource[CALL_TYPE], DataSignal[CALL_TYPE]]: if isinstance(source_signal, str): - r = self.get_source(source_signal) + r = self._source_by_name.get(source_signal) assert r is not None return r - s = self.get_signal(source_signal[0], source_signal[1]) + s = self._signals_by_key.get((source_signal[0], source_signal[1])) assert s is not None return s @staticmethod def create( meta: List[Dict], - create_call: Callable[[Mapping[str, Union[None, EpiRangeLike, Iterable[EpiRangeLike]]]], CALL_TYPE], + create_call: Callable[[Mapping[str, Optional[EpiRangeParam]]], CALL_TYPE], ) -> "CovidcastDataSources": source_fields = fields(DataSource) sources = [DataSource(_create_call=create_call, **_limit_fields(k, source_fields)) for k in meta] diff --git a/epidatpy/_endpoints.py b/epidatpy/_endpoints.py index 2459870..1caf928 100644 --- a/epidatpy/_endpoints.py +++ b/epidatpy/_endpoints.py @@ -1,88 +1,50 @@ -from abc import ABC, abstractmethod -from datetime import date -from typing import Generic, Iterable, Literal, Mapping, Optional, Union, Sequence import warnings +from abc import ABC, abstractmethod +from typing import ( + Generic, + Literal, + Mapping, + Optional, + Sequence, + Union, +) from epiweeks import Week + +from ._covidcast import GeoType, TimeType, define_covidcast_fields from ._model import ( - EpiRangeLike, + CALL_TYPE, + EpidataFieldInfo, + EpidataFieldType, + EpiRange, EpiRangeParam, + IntParam, InvalidArgumentException, + ParamType, StringParam, - IntParam, - EpiRange, - EPI_RANGE_TYPE, - EpidataFieldInfo, - EpidataFieldType, - CALL_TYPE, ) -from ._covidcast import define_covidcast_fields, GeoType, TimeType +from ._parse import parse_user_date_or_week -def get_wildcard_equivalent_dates( - time_value: str, time_type: Literal["day", "week"] -) -> str: - if time_value == "*": +def get_wildcard_equivalent_dates(time_value: EpiRangeParam, time_type: Literal["day", "week"]) -> EpiRangeParam: + if isinstance(time_value, str) and time_value == "*": if time_type == "day": return EpiRange("10000101", "30000101") - elif time_type == "week": + if time_type == "week": return EpiRange("100001", "300001") return time_value -def reformat_epirange(epirange: EpiRange, to_type: str) -> EpiRange: - """Reformat from week to day or vice versa or noop.""" - if to_type not in ("day", "week"): - raise InvalidArgumentException("`to_type` must be 'day' or 'week'") - - if ( - to_type == "day" - and isinstance(epirange.start, (str, int)) - and len(str(epirange.start)) == 6 - ): - coercion_msg = ( - "`collection_weeks` is in week format but `pub_covid_hosp_facility`" - "expects day format; dates will be converted to day format but may not" - "correspond exactly to desired time range" - ) - warnings.warn(coercion_msg, UserWarning) - epirange = EpiRange( - parse_api_week(epirange.start), parse_api_week(epirange.end) - ) - elif ( - to_type == "week" - and isinstance(epirange.start, (int, str)) - and len(str(epirange.start)) == 8 - ): - epirange = EpiRange( - format_epiweek(epirange.start), format_epiweek(epirange.end) - ) - - return epirange - - -def parse_api_week(value: Union[str, int]) -> date: - return Week.fromstring(str(value)).startdate() - - -def format_epiweek(value: Union[str, int]) -> str: - return Week.fromstring(str(value)).cdcformat() - - class AEpiDataEndpoints(ABC, Generic[CALL_TYPE]): """ epidata endpoint list and fetcher """ - @staticmethod - def range(from_: EPI_RANGE_TYPE, to: EPI_RANGE_TYPE) -> EpiRange[EPI_RANGE_TYPE]: - return EpiRange[EPI_RANGE_TYPE](from_, to) - @abstractmethod def _create_call( self, endpoint: str, - params: Mapping[str, Union[None, EpiRangeLike, Iterable[EpiRangeLike]]], + params: Mapping[str, Optional[ParamType]], meta: Optional[Sequence[EpidataFieldInfo]] = None, only_supports_classic: bool = False, ) -> CALL_TYPE: @@ -97,11 +59,6 @@ def pvt_cdc( """Fetch CDC page hits.""" epiweeks = get_wildcard_equivalent_dates(epiweeks, "day") - if auth is None or epiweeks is None or locations is None: - raise InvalidArgumentException( - "`auth`, `epiweeks`, and `locations` are all required" - ) - return self._create_call( "cdc/", {"auth": auth, "epiweeks": epiweeks, "locations": locations}, @@ -132,9 +89,7 @@ def pub_covid_hosp_facility_lookup( """Lookup COVID hospitalization facility identifiers.""" if all((v is None for v in (state, ccn, city, zip, fips_code))): - raise InvalidArgumentException( - "one of `state`, `ccn`, `city`, `zip`, or `fips_code` is required" - ) + raise InvalidArgumentException("one of `state`, `ccn`, `city`, `zip`, or `fips_code` is required") return self._create_call( "covid_hosp_facility_lookup/", @@ -162,25 +117,36 @@ def pub_covid_hosp_facility_lookup( def pub_covid_hosp_facility( self, hospital_pks: StringParam, - collection_weeks: StringParam = "*", + collection_weeks: EpiRangeParam = "*", publication_dates: Optional[EpiRangeParam] = None, ) -> CALL_TYPE: """Fetch COVID hospitalization data for specific facilities.""" - if hospital_pks is None or collection_weeks is None: - raise InvalidArgumentException( - "`hospital_pks` and `collection_weeks` are both required" - ) - collection_weeks = get_wildcard_equivalent_dates(collection_weeks, "day") # Confusingly, the endpoint expects `collection_weeks` to be in day format, # but correspond to epiweeks. Allow `collection_weeks` to be provided in - # either day or week format. - if isinstance(collection_weeks, EpiRange): - collection_weeks = reformat_epirange(collection_weeks, to_type="day") - elif isinstance(collection_weeks, (str, int)): - collection_weeks = parse_api_week(collection_weeks) + # either day or week format and convert to day format. + parsed_weeks = collection_weeks + if isinstance(collection_weeks, EpiRange) and isinstance(collection_weeks.start, Week): + warnings.warn( + "`collection_weeks` is in week format but `pub_covid_hosp_facility`" + "expects day format; dates will be converted to day format but may not" + "correspond exactly to desired time range", + UserWarning, + ) + parsed_weeks = EpiRange( + parse_user_date_or_week(collection_weeks.start, "day"), + parse_user_date_or_week(collection_weeks.end, "day"), + ) + elif isinstance(collection_weeks, (str, int)) and len(str(collection_weeks)) == 6: + warnings.warn( + "`collection_weeks` is in week format but `pub_covid_hosp_facility`" + "expects day format; dates will be converted to day format but may not" + "correspond exactly to desired time range", + UserWarning, + ) + parsed_weeks = parse_user_date_or_week(collection_weeks, "day") fields_string = [ "hospital_pk", @@ -284,7 +250,7 @@ def pub_covid_hosp_facility( "covid_hosp_facility/", { "hospital_pks": hospital_pks, - "collection_weeks": collection_weeks, + "collection_weeks": parsed_weeks, "publication_dates": publication_dates, }, [ @@ -306,13 +272,8 @@ def pub_covid_hosp_state_timeseries( ) -> CALL_TYPE: """Fetch COVID hospitalization data.""" - if states is None or dates is None: - raise InvalidArgumentException("`states` and `dates` are both required") - if issues is not None and as_of is not None: - raise InvalidArgumentException( - "`issues` and `as_of` are mutually exclusive" - ) + raise InvalidArgumentException("`issues` and `as_of` are mutually exclusive") dates = get_wildcard_equivalent_dates(dates, "day") @@ -489,36 +450,18 @@ def pub_covidcast( signals: StringParam, geo_type: GeoType, time_type: TimeType, - geo_values: Union[int, str, Iterable[Union[int, str]]] = "*", + geo_values: Union[str, Sequence[str]] = "*", time_values: EpiRangeParam = "*", as_of: Union[None, str, int] = None, issues: Optional[EpiRangeParam] = None, lag: Optional[int] = None, ) -> CALL_TYPE: """Fetch Delphi's COVID-19 Surveillance Streams""" - if any( - v is None - for v in ( - data_source, - signals, - time_type, - geo_type, - time_values, - geo_values, - ) - ): - raise InvalidArgumentException( - "`data_source`, `signals`, `time_type`, `geo_type`, `time_values`, and `geo_values` are all required." - ) if sum([issues is not None, lag is not None, as_of is not None]) > 1: - raise InvalidArgumentException( - "`issues`, `lag`, and `as_of` are mutually exclusive." - ) + raise InvalidArgumentException("`issues`, `lag`, and `as_of` are mutually exclusive.") if data_source == "nchs-mortality" and time_type != "week": - raise InvalidArgumentException( - "nchs-mortality data source only supports the week time type." - ) + raise InvalidArgumentException("nchs-mortality data source only supports the week time type.") return self._create_call( "covidcast/", @@ -539,8 +482,6 @@ def pub_covidcast( def pub_delphi(self, system: str, epiweek: Union[int, str]) -> CALL_TYPE: """Fetch Delphi's forecast.""" - if system is None or epiweek is None: - raise InvalidArgumentException("`system` and `epiweek` are both required") return self._create_call( "delphi/", {"system": system, "epiweek": epiweek}, @@ -552,17 +493,10 @@ def pub_delphi(self, system: str, epiweek: Union[int, str]) -> CALL_TYPE: only_supports_classic=True, ) - def pub_dengue_nowcast( - self, locations: StringParam, epiweeks: EpiRangeParam = "*" - ) -> CALL_TYPE: + def pub_dengue_nowcast(self, locations: StringParam, epiweeks: EpiRangeParam = "*") -> CALL_TYPE: """Fetch Delphi's dengue nowcast.""" epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") - if locations is None or epiweeks is None: - raise InvalidArgumentException( - "`locations` and `epiweeks` are both required" - ) - return self._create_call( "dengue_nowcast/", {"locations": locations, "epiweeks": epiweeks}, @@ -584,11 +518,6 @@ def pvt_dengue_sensors( """Fetch Delphi's digital surveillance sensors.""" epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") - if auth is None or names is None or locations is None or epiweeks is None: - raise InvalidArgumentException( - "`auth`, `names`, `locations`, and `epiweeks` are all required" - ) - return self._create_call( "dengue_sensors/", { @@ -615,9 +544,6 @@ def pub_ecdc_ili( """Fetch ECDC ILI data.""" epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") - if regions is None or epiweeks is None: - raise InvalidArgumentException("`regions` and `epiweeks` are both required") - if issues is not None and lag is not None: raise InvalidArgumentException("`issues` and `lag` are mutually exclusive") @@ -644,11 +570,6 @@ def pub_flusurv( """Fetch FluSurv data.""" epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") - if locations is None or epiweeks is None: - raise InvalidArgumentException( - "`locations` and `epiweeks` are both required" - ) - if issues is not None and lag is not None: raise InvalidArgumentException("`issues` and `lag` are mutually exclusive") @@ -685,9 +606,6 @@ def pub_fluview_clinical( """Fetch FluView clinical data.""" epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") - if regions is None or epiweeks is None: - raise InvalidArgumentException("`regions` and `epiweeks` are both required") - if issues is not None and lag is not None: raise InvalidArgumentException("`issues` and `lag` are mutually exclusive") @@ -730,9 +648,6 @@ def pub_fluview( ) -> CALL_TYPE: epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") - if regions is None or epiweeks is None: - raise InvalidArgumentException("`regions` and `epiweeks` are both required") - if issues is not None and lag is not None: raise InvalidArgumentException("`issues` and `lag` are mutually exclusive") @@ -764,17 +679,10 @@ def pub_fluview( ], ) - def pub_gft( - self, locations: StringParam, epiweeks: EpiRangeParam = "*" - ) -> CALL_TYPE: + def pub_gft(self, locations: StringParam, epiweeks: EpiRangeParam = "*") -> CALL_TYPE: """Fetch Google Flu Trends data.""" epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") - if locations is None or epiweeks is None: - raise InvalidArgumentException( - "`locations` and `epiweeks` are both required" - ) - return self._create_call( "gft/", {"locations": locations, "epiweeks": epiweeks}, @@ -793,10 +701,8 @@ def pvt_ght( query: str = "", ) -> CALL_TYPE: """Fetch Google Health Trends data.""" - if auth is None or locations is None or epiweeks is None or query == "": - raise InvalidArgumentException( - "`auth`, `locations`, `epiweeks`, and `query` are all required" - ) + if auth is None or locations is None or query == "": + raise InvalidArgumentException("`auth`, `locations`, `epiweeks`, and `query` are all required") return self._create_call( "ght/", @@ -823,8 +729,6 @@ def pub_kcdc_ili( """Fetch KCDC ILI data.""" epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") - if regions is None or epiweeks is None: - raise InvalidArgumentException("`regions` and `epiweeks` are both required") if issues is not None and lag is not None: raise InvalidArgumentException("`issues` and `lag` are mutually exclusive") @@ -843,8 +747,6 @@ def pub_kcdc_ili( def pvt_meta_norostat(self, auth: str) -> CALL_TYPE: """Fetch NoroSTAT metadata.""" - if auth is None: - raise InvalidArgumentException("`auth` is required") return self._create_call( "meta_norostat/", {"auth": auth}, @@ -859,17 +761,10 @@ def pub_meta(self) -> CALL_TYPE: only_supports_classic=True, ) - def pub_nidss_dengue( - self, locations: StringParam, epiweeks: EpiRangeParam = "*" - ) -> CALL_TYPE: + def pub_nidss_dengue(self, locations: StringParam, epiweeks: EpiRangeParam = "*") -> CALL_TYPE: """Fetch NIDSS dengue data.""" epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") - if locations is None or epiweeks is None: - raise InvalidArgumentException( - "`locations` and `epiweeks` are both required" - ) - return self._create_call( "nidss_dengue/", {"locations": locations, "epiweeks": epiweeks}, @@ -890,8 +785,6 @@ def pub_nidss_flu( """Fetch NIDSS flu data.""" epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") - if regions is None or epiweeks is None: - raise InvalidArgumentException("`regions` and `epiweeks` are both required") if issues is not None and lag is not None: raise InvalidArgumentException("`issues` and `lag` are mutually exclusive") @@ -909,17 +802,10 @@ def pub_nidss_flu( ], ) - def pvt_norostat( - self, auth: str, location: str, epiweeks: EpiRangeParam = "*" - ) -> CALL_TYPE: + def pvt_norostat(self, auth: str, location: str, epiweeks: EpiRangeParam = "*") -> CALL_TYPE: """Fetch NoroSTAT data (point data, no min/max).""" epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") - if auth is None or location is None or epiweeks is None: - raise InvalidArgumentException( - "`auth`, `location`, and `epiweeks` are all required" - ) - return self._create_call( "norostat/", {"auth": auth, "epiweeks": epiweeks, "location": location}, @@ -930,17 +816,10 @@ def pvt_norostat( ], ) - def pub_nowcast( - self, locations: StringParam, epiweeks: EpiRangeParam = "*" - ) -> CALL_TYPE: + def pub_nowcast(self, locations: StringParam, epiweeks: EpiRangeParam = "*") -> CALL_TYPE: """Fetch Delphi's wILI nowcast.""" epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") - if locations is None or epiweeks is None: - raise InvalidArgumentException( - "`locations` and `epiweeks` are both required" - ) - return self._create_call( "nowcast/", {"locations": locations, "epiweeks": epiweeks}, @@ -962,8 +841,6 @@ def pub_paho_dengue( """Fetch PAHO Dengue data.""" epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") - if regions is None or epiweeks is None: - raise InvalidArgumentException("`regions` and `epiweeks` are both required") if issues is not None and lag is not None: raise InvalidArgumentException("`issues` and `lag` are mutually exclusive") @@ -985,17 +862,10 @@ def pub_paho_dengue( ], ) - def pvt_quidel( - self, auth: str, locations: StringParam, epiweeks: EpiRangeParam = "*" - ) -> CALL_TYPE: + def pvt_quidel(self, auth: str, locations: StringParam, epiweeks: EpiRangeParam = "*") -> CALL_TYPE: """Fetch Quidel data.""" epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") - if auth is None or epiweeks is None or locations is None: - raise InvalidArgumentException( - "`auth`, `epiweeks`, and `locations` are all required" - ) - return self._create_call( "quidel/", {"auth": auth, "epiweeks": epiweeks, "locations": locations}, @@ -1016,11 +886,6 @@ def pvt_sensors( """Fetch Delphi's digital surveillance sensors.""" epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") - if auth is None or names is None or locations is None or epiweeks is None: - raise InvalidArgumentException( - "`auth`, `names`, `locations`, and `epiweeks` are all required" - ) - return self._create_call( "sensors/", { @@ -1041,13 +906,10 @@ def pvt_twitter( self, auth: str, locations: StringParam, - time_type: str, - time_values: EpiRangeLike = "*", + time_type: Literal["day", "week"], + time_values: EpiRangeParam = "*", ) -> CALL_TYPE: """Fetch HealthTweets data.""" - if time_type not in ["day", "week"]: - raise InvalidArgumentException("`time_type` must be 'day' or 'week'") - if time_type == "day": dates = time_values epiweeks = None @@ -1057,14 +919,6 @@ def pvt_twitter( dates = None epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") - if auth is None or locations is None: - raise InvalidArgumentException("`auth` and `locations` are both required") - - if not (dates is None) ^ (epiweeks is None): - raise InvalidArgumentException( - "exactly one of `dates` and `epiweeks` is required" - ) - time_field = ( EpidataFieldInfo("date", EpidataFieldType.date) if dates @@ -1091,16 +945,12 @@ def pvt_twitter( def pub_wiki( self, articles: StringParam, - time_type: str, - time_values: EpiRangeLike = "*", + time_type: Literal["day", "week"], + time_values: EpiRangeParam = "*", hours: Optional[IntParam] = None, language: str = "en", ) -> CALL_TYPE: """Fetch Wikipedia access data.""" - - if time_type not in ["day", "week"]: - raise InvalidArgumentException("`time_type` must be 'day' or 'week'") - if time_type == "day": dates = time_values epiweeks = None @@ -1110,20 +960,6 @@ def pub_wiki( dates = None epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") - if articles is None: - raise InvalidArgumentException("`articles` is required") - - if not (dates is None) ^ (epiweeks is None): - raise InvalidArgumentException( - "exactly one of `dates` and `epiweeks` is required" - ) - - time_field = ( - EpidataFieldInfo("date", EpidataFieldType.date) - if dates - else EpidataFieldInfo("epiweek", EpidataFieldType.epiweek) - ) - return self._create_call( "wiki/", { @@ -1135,7 +971,11 @@ def pub_wiki( }, [ EpidataFieldInfo("article", EpidataFieldType.text), - time_field, + ( + EpidataFieldInfo("date", EpidataFieldType.date) + if dates + else EpidataFieldInfo("epiweek", EpidataFieldType.epiweek) + ), EpidataFieldInfo("count", EpidataFieldType.int), EpidataFieldInfo("total", EpidataFieldType.int), EpidataFieldInfo("hour", EpidataFieldType.int), diff --git a/epidatpy/_model.py b/epidatpy/_model.py index b6db10e..6e401a7 100644 --- a/epidatpy/_model.py +++ b/epidatpy/_model.py @@ -1,36 +1,41 @@ from dataclasses import dataclass, field -from enum import Enum from datetime import date -from urllib.parse import urlencode +from enum import Enum from typing import ( - Any, - Dict, Final, - Generic, - Iterable, List, + Literal, Mapping, Optional, Sequence, Tuple, - TypeVar, TypedDict, + TypeVar, Union, cast, ) +from urllib.parse import urlencode + from epiweeks import Week -from pandas import DataFrame, CategoricalDtype from ._parse import ( parse_api_date, - parse_api_week, parse_api_date_or_week, - fields_to_predicate, + parse_api_week, + parse_user_date_or_week, ) +GeoType = Literal["nation", "msa", "hrr", "hhs", "state", "county"] +TimeType = Literal["day", "week"] EpiDateLike = Union[int, str, date, Week] EpiRangeDict = TypedDict("EpiRangeDict", {"from": EpiDateLike, "to": EpiDateLike}) EpiRangeLike = Union[int, str, "EpiRange", EpiRangeDict, date, Week] +EpiRangeParam = Union[EpiRangeLike, Sequence[EpiRangeLike]] +StringParam = Union[str, Sequence[str]] +IntParam = Union[int, Sequence[int]] +ParamType = Union[StringParam, IntParam, EpiRangeParam] +EpiDataResponse = TypedDict("EpiDataResponse", {"result": int, "message": str, "epidata": List}) +CALL_TYPE = TypeVar("CALL_TYPE") def format_date(d: EpiDateLike) -> str: @@ -56,25 +61,22 @@ def format_item(value: EpiRangeLike) -> str: return str(value) -def format_list(values: Union[EpiRangeLike, Iterable[EpiRangeLike]]) -> str: +def format_list(values: EpiRangeParam) -> str: """Turn a list/tuple of values/ranges into a comma-separated string.""" - list_values = values if isinstance(values, (list, tuple, set)) else [values] - return ",".join([format_item(value) for value in list_values]) - - -EPI_RANGE_TYPE = TypeVar("EPI_RANGE_TYPE", int, date, str, Week) + if isinstance(values, Sequence) and not isinstance(values, str): + return ",".join([format_item(value) for value in values]) + return format_item(values) -@dataclass(repr=False) -class EpiRange(Generic[EPI_RANGE_TYPE]): +class EpiRange: """ Range object for dates/epiweeks """ - start: EPI_RANGE_TYPE - end: EPI_RANGE_TYPE - - def __post_init__(self) -> None: + def __init__(self, start: EpiDateLike, end: EpiDateLike) -> None: + # check if types are correct + self.start = parse_user_date_or_week(start) + self.end = parse_user_date_or_week(end) # swap if wrong order # complicated construct for typing inference if self.end < self.start: @@ -87,27 +89,6 @@ def __str__(self) -> str: return f"{format_date(self.start)}-{format_date(self.end)}" -EpiDataResponse = TypedDict( - "EpiDataResponse", {"result": int, "message": str, "epidata": List} -) - - -EpiRangeParam = Union[EpiRangeLike, Iterable[EpiRangeLike]] -StringParam = Union[str, Iterable[str]] -IntParam = Union[int, Iterable[int]] - - -class EpiDataFormatType(str, Enum): - """ - possible formatting options for API calls - """ - - json = "json" - classic = "classic" - csv = "csv" - jsonl = "jsonl" - - class InvalidArgumentException(Exception): """ exception for an invalid argument @@ -147,9 +128,6 @@ class EpidataFieldInfo: categories: Final[Sequence[str]] = field(default_factory=list) -CALL_TYPE = TypeVar("CALL_TYPE") - - def add_endpoint_to_url(url: str, endpoint: str) -> str: if not url.endswith("/"): url += "/" @@ -164,7 +142,7 @@ class AEpiDataCall: _base_url: Final[str] _endpoint: Final[str] - _params: Final[Mapping[str, Union[None, EpiRangeLike, Iterable[EpiRangeLike]]]] + _params: Final[Mapping[str, Optional[EpiRangeParam]]] meta: Final[Sequence[EpidataFieldInfo]] meta_by_name: Final[Mapping[str, EpidataFieldInfo]] only_supports_classic: Final[bool] @@ -173,7 +151,7 @@ def __init__( self, base_url: str, endpoint: str, - params: Mapping[str, Union[None, EpiRangeLike, Iterable[EpiRangeLike]]], + params: Mapping[str, Optional[EpiRangeParam]], meta: Optional[Sequence[EpidataFieldInfo]] = None, only_supports_classic: bool = False, ) -> None: @@ -190,57 +168,46 @@ def _verify_parameters(self) -> None: def _formatted_parameters( self, - format_type: Optional[EpiDataFormatType] = None, - fields: Optional[Iterable[str]] = None, + fields: Optional[Sequence[str]] = None, ) -> Mapping[str, str]: """ format this call into a [URL, Params] tuple """ all_params = dict(self._params) - if format_type and format_type != EpiDataFormatType.classic: - all_params["format"] = format_type if fields: all_params["fields"] = fields return {k: format_list(v) for k, v in all_params.items() if v is not None} def request_arguments( self, - format_type: Optional[EpiDataFormatType] = None, - fields: Optional[Iterable[str]] = None, + fields: Optional[Sequence[str]] = None, ) -> Tuple[str, Mapping[str, str]]: """ format this call into a [URL, Params] tuple """ - formatted_params = self._formatted_parameters(format_type, fields) - full_url = self._full_url() + formatted_params = self._formatted_parameters(fields) + full_url = add_endpoint_to_url(self._base_url, self._endpoint) return full_url, formatted_params - def _full_url(self) -> str: - """ - combines the endpoint with the given base url - """ - return add_endpoint_to_url(self._base_url, self._endpoint) - def request_url( self, - format_type: Optional[EpiDataFormatType] = None, - fields: Optional[Iterable[str]] = None, + fields: Optional[Sequence[str]] = None, ) -> str: """ format this call into a full HTTP request url with encoded parameters """ self._verify_parameters() - u, p = self.request_arguments(format_type, fields) + u, p = self.request_arguments(fields) query = urlencode(p) if query: return f"{u}?{query}" return u def __repr__(self) -> str: - return f"EpiDataCall(endpoint={self._endpoint}, params={self._formatted_parameters()})" + return str(self) def __str__(self) -> str: - return self.request_url() + return f"EpiDataCall(endpoint={self._endpoint}, params={self._formatted_parameters()})" def _parse_value( self, @@ -268,44 +235,4 @@ def _parse_row( ) -> Mapping[str, Union[str, float, int, date, None]]: if not self.meta: return row - return { - k: self._parse_value(k, v, disable_date_parsing) for k, v in row.items() - } - - def _as_df( - self, - rows: Sequence[Mapping[str, Union[str, float, int, date, None]]], - fields: Optional[Iterable[str]] = None, - disable_date_parsing: Optional[bool] = False, - ) -> DataFrame: - pred = fields_to_predicate(fields) - columns: List[str] = [info.name for info in self.meta if pred(info.name)] - df = DataFrame(rows, columns=columns or None) - - data_types: Dict[str, Any] = {} - for info in self.meta: - if not pred(info.name) or df[info.name].isnull().values.all(): - continue - if info.type == EpidataFieldType.bool: - data_types[info.name] = bool - elif info.type == EpidataFieldType.categorical: - data_types[info.name] = CategoricalDtype( - categories=info.categories or None, ordered=True - ) - elif info.type == EpidataFieldType.int: - data_types[info.name] = int - elif info.type in ( - EpidataFieldType.date, - EpidataFieldType.epiweek, - EpidataFieldType.date_or_epiweek, - ): - data_types[info.name] = ( - int if disable_date_parsing else "datetime64[ns]" - ) - elif info.type == EpidataFieldType.float: - data_types[info.name] = float - else: - data_types[info.name] = str - if data_types: - df = df.astype(data_types) - return df + return {k: self._parse_value(k, v, disable_date_parsing) for k, v in row.items()} diff --git a/epidatpy/_parse.py b/epidatpy/_parse.py index 1ffcfa9..9d65d72 100644 --- a/epidatpy/_parse.py +++ b/epidatpy/_parse.py @@ -1,7 +1,6 @@ -from typing import Callable, Iterable, Optional, Set, cast - -from typing import Union from datetime import date, datetime +from typing import Callable, Literal, Optional, Sequence, Set, Union + from epiweeks import Week @@ -15,7 +14,7 @@ def parse_api_date(value: Union[str, int, float, None]) -> Optional[date]: def parse_api_week(value: Union[str, int, float, None]) -> Optional[date]: if value is None: return None - return cast(date, Week.fromstring(str(value)).startdate()) + return Week.fromstring(str(value)).startdate() def parse_api_date_or_week(value: Union[str, int, float, None]) -> Optional[date]: @@ -23,15 +22,50 @@ def parse_api_date_or_week(value: Union[str, int, float, None]) -> Optional[date return None v = str(value) if len(v) == 6: - d = cast(date, Week.fromstring(v).startdate()) + d = Week.fromstring(v).startdate() else: d = datetime.strptime(v, "%Y%m%d").date() return d -def fields_to_predicate( - fields: Optional[Iterable[str]] = None, -) -> Callable[[str], bool]: +def parse_user_date_or_week( + value: Union[str, int, date, Week], out_type: Literal["day", "week", None] = None +) -> Union[date, Week]: + if isinstance(value, Week): + if out_type == "day": + return value.startdate() + return value + + if isinstance(value, date): + if out_type == "week": + return Week.fromdate(value) + return value + + value = str(value) + if out_type == "week": + if len(value) == 6: + return Week.fromstring(value) + if len(value) == 8: + return Week.fromdate(datetime.strptime(value, "%Y%m%d").date()) + if len(value) == 10: + return Week.fromdate(datetime.strptime(value, "%Y-%m-%d").date()) + if out_type == "day": + if len(value) == 8: + return datetime.strptime(value, "%Y%m%d").date() + if len(value) == 10: + return datetime.strptime(value, "%Y-%m-%d").date() + if out_type is None: + if len(value) == 6: + return Week.fromstring(value) + if len(value) == 8: + return datetime.strptime(value, "%Y%m%d").date() + if len(value) == 10: + return datetime.strptime(value, "%Y-%m-%d").date() + + raise ValueError(f"Cannot parse date or week from {value}") + + +def fields_to_predicate(fields: Optional[Sequence[str]] = None) -> Callable[[str], bool]: if not fields: return lambda _: True to_include: Set[str] = set() diff --git a/epidatpy/async_request.py b/epidatpy/async_request.py deleted file mode 100644 index 904cce3..0000000 --- a/epidatpy/async_request.py +++ /dev/null @@ -1,287 +0,0 @@ -from datetime import date -from typing import ( - AsyncGenerator, - Callable, - Coroutine, - Dict, - Final, - Iterable, - List, - Mapping, - Optional, - Sequence, - Union, - cast, -) -from json import loads - -from asyncio import get_event_loop, gather -from aiohttp import TCPConnector, ClientSession, ClientResponse -from pandas import DataFrame - -from ._model import ( - EpiRangeLike, - AEpiDataCall, - EpiDataFormatType, - EpiDataResponse, - EpiRange, - EpidataFieldInfo, - OnlySupportsClassicFormatException, - add_endpoint_to_url, -) -from ._endpoints import AEpiDataEndpoints -from ._constants import HTTP_HEADERS, BASE_URL -from ._covidcast import CovidcastDataSources, define_covidcast_fields - - -async def _async_request( - url: str, params: Mapping[str, str], session: Optional[ClientSession] = None -) -> ClientResponse: - async def call_impl(s: ClientSession) -> ClientResponse: - res = await s.get(url, params=params, headers=HTTP_HEADERS) - if res.status == 414: - return await s.post(url, params=params, headers=HTTP_HEADERS) - return res - - if session: - return await call_impl(session) - - async with ClientSession() as s: - return await call_impl(s) - - -class EpiDataAsyncCall(AEpiDataCall): - """ - async version of an epidata call - """ - - _session: Final[Optional[ClientSession]] - - def __init__( - self, - base_url: str, - session: Optional[ClientSession], - endpoint: str, - params: Mapping[str, Union[None, EpiRangeLike, Iterable[EpiRangeLike]]], - meta: Optional[Sequence[EpidataFieldInfo]] = None, - only_supports_classic: bool = False, - ) -> None: - super().__init__(base_url, endpoint, params, meta, only_supports_classic) - self._session = session - - def with_base_url(self, base_url: str) -> "EpiDataAsyncCall": - return EpiDataAsyncCall(base_url, self._session, self._endpoint, self._params) - - def with_session(self, session: ClientSession) -> "EpiDataAsyncCall": - return EpiDataAsyncCall(self._base_url, session, self._endpoint, self._params) - - async def _call( - self, - format_type: Optional[EpiDataFormatType] = None, - fields: Optional[Iterable[str]] = None, - ) -> ClientResponse: - url, params = self.request_arguments(format_type, fields) - return await _async_request(url, params, self._session) - - async def classic( - self, - fields: Optional[Iterable[str]] = None, - disable_date_parsing: Optional[bool] = False, - ) -> EpiDataResponse: - """Request and parse epidata in CLASSIC message format.""" - self._verify_parameters() - try: - response = await self._call(None, fields) - r = cast(EpiDataResponse, await response.json()) - epidata = r.get("epidata") - if epidata and isinstance(epidata, list) and len(epidata) > 0 and isinstance(epidata[0], dict): - r["epidata"] = [self._parse_row(row, disable_date_parsing=disable_date_parsing) for row in epidata] - return r - except Exception as e: # pylint: disable=broad-except - return {"result": 0, "message": f"error: {e}", "epidata": []} - - async def __call__( - self, - fields: Optional[Iterable[str]] = None, - disable_date_parsing: Optional[bool] = False, - ) -> EpiDataResponse: - """Request and parse epidata in CLASSIC message format.""" - return await self.classic(fields, disable_date_parsing=disable_date_parsing) - - async def json( - self, - fields: Optional[Iterable[str]] = None, - disable_date_parsing: Optional[bool] = False, - ) -> List[Mapping[str, Union[str, int, float, date, None]]]: - """Request and parse epidata in JSON format""" - self._verify_parameters() - if self.only_supports_classic: - raise OnlySupportsClassicFormatException() - response = await self._call(EpiDataFormatType.json, fields) - response.raise_for_status() - return [ - self._parse_row(row, disable_date_parsing) - for row in cast(List[Mapping[str, Union[str, int, float, None]]], await response.json()) - ] - - async def df( - self, - fields: Optional[Iterable[str]] = None, - disable_date_parsing: Optional[bool] = False, - ) -> DataFrame: - """Request and parse epidata as a pandas data frame""" - self._verify_parameters() - if self.only_supports_classic: - raise OnlySupportsClassicFormatException() - r = await self.json(fields, disable_date_parsing=disable_date_parsing) - return self._as_df(r, fields, disable_date_parsing) - - async def csv(self, fields: Optional[Iterable[str]] = None) -> str: - """Request and parse epidata in CSV format""" - self._verify_parameters() - if self.only_supports_classic: - raise OnlySupportsClassicFormatException() - response = await self._call(EpiDataFormatType.csv, fields) - response.raise_for_status() - return await response.text() - - async def iter( - self, - fields: Optional[Iterable[str]] = None, - disable_date_parsing: Optional[bool] = False, - ) -> AsyncGenerator[Mapping[str, Union[str, int, float, date, None]], None]: - """Request and streams epidata rows""" - self._verify_parameters() - if self.only_supports_classic: - raise OnlySupportsClassicFormatException() - response = await self._call(EpiDataFormatType.jsonl, fields) - response.raise_for_status() - async for line in response.content: - yield self._parse_row(loads(line), disable_date_parsing=disable_date_parsing) - - async def __( - self, - ) -> AsyncGenerator[Mapping[str, Union[str, int, float, date, None]], None]: - return self.iter() - - -class EpiDataAsyncContext(AEpiDataEndpoints[EpiDataAsyncCall]): - """ - sync epidata call class - """ - - _base_url: Final[str] - _session: Final[Optional[ClientSession]] - - def __init__(self, base_url: str = BASE_URL, session: Optional[ClientSession] = None) -> None: - super().__init__() - self._base_url = base_url - self._session = session - - def with_base_url(self, base_url: str) -> "EpiDataAsyncContext": - return EpiDataAsyncContext(base_url, self._session) - - def with_session(self, session: ClientSession) -> "EpiDataAsyncContext": - return EpiDataAsyncContext(self._base_url, session) - - def _create_call( - self, - endpoint: str, - params: Mapping[str, Union[None, EpiRangeLike, Iterable[EpiRangeLike]]], - meta: Optional[Sequence[EpidataFieldInfo]] = None, - only_supports_classic: bool = False, - ) -> EpiDataAsyncCall: - return EpiDataAsyncCall(self._base_url, self._session, endpoint, params, meta, only_supports_classic) - - @staticmethod - def all( - calls: Iterable[EpiDataAsyncCall], - call_api: Callable[[EpiDataAsyncCall, ClientSession], Coroutine], - batch_size: int = 50, - ) -> List: - loop = get_event_loop() - - async def impl() -> List: - tasks: List[Coroutine] = [] - connector = TCPConnector(limit=batch_size) - async with ClientSession(connector=connector) as session: - for call in calls: - co_routine = call_api(call, session) - tasks.append(co_routine) - return list(await gather(*tasks)) - - future = impl() - return loop.run_until_complete(future) - - def all_classic( - self, - calls: Iterable[EpiDataAsyncCall], - fields: Optional[Iterable[str]] = None, - batch_size: int = 50, - ) -> List[EpiDataResponse]: - """ - runs the given calls in a batch asynchronously and return their responses - """ - - def call_api(call: EpiDataAsyncCall, session: ClientSession) -> Coroutine: - return call.with_session(session).classic(fields) - - return self.all(calls, call_api, batch_size) - - def all_json( - self, - calls: Iterable[EpiDataAsyncCall], - fields: Optional[Iterable[str]] = None, - batch_size: int = 50, - ) -> List[List[Dict]]: - """ - runs the given calls in a batch asynchronously and return their responses - """ - - def call_api(call: EpiDataAsyncCall, session: ClientSession) -> Coroutine: - return call.with_session(session).json(fields) - - return self.all(calls, call_api, batch_size) - - def all_csv( - self, - calls: Iterable[EpiDataAsyncCall], - fields: Optional[Iterable[str]] = None, - batch_size: int = 50, - ) -> List[str]: - """ - runs the given calls in a batch asynchronously and return their responses - """ - - def call_api(call: EpiDataAsyncCall, session: ClientSession) -> Coroutine: - return call.with_session(session).csv(fields) - - return self.all(calls, call_api, batch_size) - - -Epidata = EpiDataAsyncContext() - - -async def CovidcastEpidata( - base_url: str = BASE_URL, session: Optional[ClientSession] = None -) -> CovidcastDataSources[EpiDataAsyncCall]: - url = add_endpoint_to_url(base_url, "covidcast/meta") - meta_data_res = await _async_request(url, {}, session) - meta_data_res.raise_for_status() - meta_data = await meta_data_res.json() - - def create_call( - params: Mapping[str, Union[None, EpiRangeLike, Iterable[EpiRangeLike]]], - ) -> EpiDataAsyncCall: - return EpiDataAsyncCall(base_url, session, "covidcast", params, define_covidcast_fields()) - - return CovidcastDataSources.create(meta_data, create_call) - - -__all__ = [ - "Epidata", - "EpiDataAsyncCall", - "EpiDataAsyncContext", - "EpiRange", - "CovidcastEpidata", -] diff --git a/epidatpy/request.py b/epidatpy/request.py index 9e941b2..60e0212 100644 --- a/epidatpy/request.py +++ b/epidatpy/request.py @@ -1,36 +1,38 @@ -from datetime import date from typing import ( + Any, + Dict, Final, - Generator, - Sequence, - cast, - Iterable, + List, Mapping, Optional, + Sequence, Union, - List, + cast, ) -from json import loads +from pandas import CategoricalDtype, DataFrame, Series, to_datetime from requests import Response, Session from requests.auth import HTTPBasicAuth from tenacity import retry, stop_after_attempt -from pandas import DataFrame +from ._auth import _get_api_key +from ._constants import BASE_URL, HTTP_HEADERS +from ._covidcast import CovidcastDataSources, define_covidcast_fields +from ._endpoints import AEpiDataEndpoints from ._model import ( - EpiRangeLike, AEpiDataCall, - EpiDataFormatType, + EpidataFieldInfo, + EpidataFieldType, EpiDataResponse, EpiRange, - EpidataFieldInfo, + EpiRangeParam, OnlySupportsClassicFormatException, add_endpoint_to_url, ) -from ._endpoints import AEpiDataEndpoints -from ._constants import HTTP_HEADERS, BASE_URL -from ._covidcast import CovidcastDataSources, define_covidcast_fields -from ._auth import get_api_key +from ._parse import fields_to_predicate + +# Make the linter happy about the unused variables +__all__ = ["Epidata", "EpiDataCall", "EpiDataContext", "EpiRange", "CovidcastEpidata"] @retry(reraise=True, stop=stop_after_attempt(2)) @@ -41,7 +43,7 @@ def _request_with_retry( stream: bool = False, ) -> Response: """Make request with a retry if an exception is thrown.""" - basic_auth = HTTPBasicAuth("epidata", get_api_key()) + basic_auth = HTTPBasicAuth("epidata", _get_api_key()) def call_impl(s: Session) -> Response: res = s.get(url, params=params, headers=HTTP_HEADERS, stream=stream, auth=basic_auth) @@ -68,7 +70,7 @@ def __init__( base_url: str, session: Optional[Session], endpoint: str, - params: Mapping[str, Union[None, EpiRangeLike, Iterable[EpiRangeLike]]], + params: Mapping[str, Optional[EpiRangeParam]], meta: Optional[Sequence[EpidataFieldInfo]] = None, only_supports_classic: bool = False, ) -> None: @@ -83,23 +85,25 @@ def with_session(self, session: Session) -> "EpiDataCall": def _call( self, - format_type: Optional[EpiDataFormatType] = None, - fields: Optional[Iterable[str]] = None, + fields: Optional[Sequence[str]] = None, stream: bool = False, ) -> Response: - url, params = self.request_arguments(format_type, fields) + url, params = self.request_arguments(fields) return _request_with_retry(url, params, self._session, stream) def classic( self, - fields: Optional[Iterable[str]] = None, + fields: Optional[Sequence[str]] = None, disable_date_parsing: Optional[bool] = False, + disable_type_parsing: Optional[bool] = False, ) -> EpiDataResponse: """Request and parse epidata in CLASSIC message format.""" self._verify_parameters() try: - response = self._call(None, fields) + response = self._call(fields) r = cast(EpiDataResponse, response.json()) + if disable_type_parsing: + return r epidata = r.get("epidata") if epidata and isinstance(epidata, list) and len(epidata) > 0 and isinstance(epidata[0], dict): r["epidata"] = [self._parse_row(row, disable_date_parsing=disable_date_parsing) for row in epidata] @@ -109,68 +113,59 @@ def classic( def __call__( self, - fields: Optional[Iterable[str]] = None, - disable_date_parsing: Optional[bool] = False, - ) -> EpiDataResponse: - """Request and parse epidata in CLASSIC message format.""" - return self.classic(fields, disable_date_parsing=disable_date_parsing) - - def json( - self, - fields: Optional[Iterable[str]] = None, + fields: Optional[Sequence[str]] = None, disable_date_parsing: Optional[bool] = False, - ) -> List[Mapping[str, Union[str, int, float, date, None]]]: - """Request and parse epidata in JSON format""" + ) -> Union[EpiDataResponse, DataFrame]: + """Request and parse epidata in df message format.""" if self.only_supports_classic: - raise OnlySupportsClassicFormatException() - self._verify_parameters() - response = self._call(EpiDataFormatType.json, fields) - response.raise_for_status() - return [ - self._parse_row(row, disable_date_parsing=disable_date_parsing) - for row in cast(List[Mapping[str, Union[str, int, float, None]]], response.json()) - ] + return self.classic(fields, disable_date_parsing=disable_date_parsing, disable_type_parsing=False) + return self.df(fields, disable_date_parsing=disable_date_parsing) def df( self, - fields: Optional[Iterable[str]] = None, + fields: Optional[Sequence[str]] = None, disable_date_parsing: Optional[bool] = False, ) -> DataFrame: """Request and parse epidata as a pandas data frame""" if self.only_supports_classic: raise OnlySupportsClassicFormatException() self._verify_parameters() - r = self.json(fields, disable_date_parsing=disable_date_parsing) - return self._as_df(r, fields, disable_date_parsing=disable_date_parsing) - - def csv(self, fields: Optional[Iterable[str]] = None) -> str: - """Request and parse epidata in CSV format""" - if self.only_supports_classic: - raise OnlySupportsClassicFormatException() - self._verify_parameters() - response = self._call(EpiDataFormatType.csv, fields) - response.raise_for_status() - return response.text - - def iter( - self, - fields: Optional[Iterable[str]] = None, - disable_date_parsing: Optional[bool] = False, - ) -> Generator[Mapping[str, Union[str, int, float, date, None]], None, Response]: - """Request and streams epidata rows""" - if self.only_supports_classic: - raise OnlySupportsClassicFormatException() - self._verify_parameters() - response = self._call(EpiDataFormatType.jsonl, fields, stream=True) - response.raise_for_status() - for line in response.iter_lines(): - yield self._parse_row(loads(line), disable_date_parsing=disable_date_parsing) - return response - - def __iter__( - self, - ) -> Generator[Mapping[str, Union[str, int, float, date, None]], None, Response]: - return self.iter() + json = self.classic(fields, disable_type_parsing=True) + rows = json.get("epidata", []) + pred = fields_to_predicate(fields) + columns: List[str] = [info.name for info in self.meta if pred(info.name)] + df = DataFrame(rows, columns=columns or None) + + data_types: Dict[str, Any] = {} + time_fields: List[str] = [] + for info in self.meta: + if not pred(info.name) or df[info.name].isnull().all(): + continue + if info.type == EpidataFieldType.bool: + data_types[info.name] = bool + elif info.type == EpidataFieldType.categorical: + data_types[info.name] = CategoricalDtype( + categories=Series(info.categories) if info.categories else None, ordered=True + ) + elif info.type == EpidataFieldType.int: + data_types[info.name] = "Int64" + elif info.type in ( + EpidataFieldType.date, + EpidataFieldType.epiweek, + EpidataFieldType.date_or_epiweek, + ): + data_types[info.name] = "Int64" + time_fields.append(info.name) + elif info.type == EpidataFieldType.float: + data_types[info.name] = "Float64" + else: + data_types[info.name] = "string" + if data_types: + df = df.astype(data_types) + if not disable_date_parsing: + for field in time_fields: + df[field] = to_datetime(df[field], format="%Y%m%d", errors="ignore") + return df class EpiDataContext(AEpiDataEndpoints[EpiDataCall]): @@ -195,7 +190,7 @@ def with_session(self, session: Session) -> "EpiDataContext": def _create_call( self, endpoint: str, - params: Mapping[str, Union[None, EpiRangeLike, Iterable[EpiRangeLike]]], + params: Mapping[str, Optional[EpiRangeParam]], meta: Optional[Sequence[EpidataFieldInfo]] = None, only_supports_classic: bool = False, ) -> EpiDataCall: @@ -212,11 +207,8 @@ def CovidcastEpidata(base_url: str = BASE_URL, session: Optional[Session] = None meta_data = meta_data_res.json() def create_call( - params: Mapping[str, Union[None, EpiRangeLike, Iterable[EpiRangeLike]]], + params: Mapping[str, Optional[EpiRangeParam]], ) -> EpiDataCall: return EpiDataCall(base_url, session, "covidcast", params, define_covidcast_fields()) return CovidcastDataSources.create(meta_data, create_call) - - -__all__ = ["Epidata", "EpiDataCall", "EpiDataContext", "EpiRange", "CovidcastEpidata"] diff --git a/mypy.ini b/mypy.ini deleted file mode 100644 index 2abe8a7..0000000 --- a/mypy.ini +++ /dev/null @@ -1,15 +0,0 @@ -# MyPy config file -# File reference here - http://mypy.readthedocs.io/en/latest/config_file.html#config-file - -[mypy] -ignore_missing_imports = True -no_strict_optional = True -disallow_incomplete_defs = True -disallow_subclassing_any = True -disallow_untyped_calls = True -disallow_untyped_defs = True -allow_untyped_decorators = True -warn_redundant_casts = False -warn_unused_ignores = True -warn_return_any = True -exclude = "(tasks|setup).py" diff --git a/pyproject.toml b/pyproject.toml index 46f545f..5cf932a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,21 +1,99 @@ +# This file was derived from the PyPA Sample Project +# https://github.com/pypa/sampleproject + +# Guide (user-friendly): +# https://packaging.python.org/en/latest/guides/writing-pyproject-toml/ + +# Specification (technical, formal): +# https://packaging.python.org/en/latest/specifications/pyproject-toml/ + + +[build-system] +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "epidatpy" +version = "0.5.0" +description = "A programmatic interface to Delphi's Epidata API." +readme = "README.md" +license = { file = "LICENSE" } +authors = [{ name = "Delphi Research Group" }] +maintainers = [{ name = "Dmitry Shemetov", email = "dshemeto@andrew.cmu.edu" }] +classifiers = [ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Intended Audience :: Science/Research", + "Natural Language :: English", + "Topic :: Scientific/Engineering :: Bio-Informatics", +] +requires-python = ">=3.8" +dependencies = [ + "aiohttp", + "epiweeks>=2.1", + "pandas>=1", + "requests>=2.25", + "tenacity", +] + +[project.optional-dependencies] +dev = [ + "black", + "coverage", + "invoke", + "mypy", + "pre-commit", + "pylint", + "pytest", + "recommonmark", + "sphinx_rtd_theme", + "sphinx-autodoc-typehints", + "sphinx", + "twine", + "types-requests", + "watchdog", + "wheel", +] + +[project.urls] +homepage = "https://github.com/cmu-delphi/epidatpy" +repository = "https://github.com/cmu-delphi/epidatpy" + + [tool.black] line-length = 120 target-version = ['py38'] -include = 'epidatpy' + +[tool.ruff] +lint.extend-select = ["I"] [tool.pylint] max-line-length = 120 min-public-methods = 1 disable = [ - "R0801", - "E1101", - "E0611", - "C0114", - "C0116", - "C0103", - "R0913", - "R0914", - "W0702", + "duplicate-code", + "invalid-name", + "missing-module-docstring", + "missing-function-docstring", + "too-many-arguments", + "too-many-locals", + "too-many-lines", "too-many-public-methods", "too-many-instance-attributes", + "too-many-lines", + "too-many-return-statements", + "too-many-branches", ] + +[tool.mypy] +ignore_missing_imports = true +disallow_incomplete_defs = true +disallow_subclassing_any = true +disallow_untyped_calls = true +disallow_untyped_defs = true +disallow_untyped_decorators = false +warn_redundant_casts = false +warn_unused_ignores = true +warn_return_any = true +exclude = "(tasks|setup).py" diff --git a/requirements-dev.txt b/requirements-dev.txt deleted file mode 100644 index 3546e59..0000000 --- a/requirements-dev.txt +++ /dev/null @@ -1,14 +0,0 @@ -mypy -pylint -black -pytest -invoke -watchdog -coverage -sphinx -recommonmark -sphinx_rtd_theme -sphinx-autodoc-typehints -twine -wheel -types-requests diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 5b324f5..0000000 --- a/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -pandas>=1 -requests>=2.25 -tenacity -aiohttp -epiweeks>=2.1 diff --git a/setup.py b/setup.py deleted file mode 100644 index d7eee09..0000000 --- a/setup.py +++ /dev/null @@ -1,26 +0,0 @@ -import setuptools -import pathlib - - -setuptools.setup( - name="epidatpy", - version="0.5.0", - author="Alex Reinhart", - author_email="areinhar@stat.cmu.edu", - description="A programmatic interface to Delphi's Epidata API.", - long_description=pathlib.Path("README.md").read_text(), - long_description_content_type="text/markdown", - url="https://github.com/cmu-delphi/epidatpy", - packages=setuptools.find_packages(), - classifiers=[ - "Programming Language :: Python :: 3", - "License :: OSI Approved :: MIT License", - "Operating System :: OS Independent", - "Intended Audience :: Science/Research", - "Natural Language :: English", - "Topic :: Scientific/Engineering :: Bio-Informatics", - ], - python_requires=">=3.8", - install_requires=[f.strip() for f in pathlib.Path("requirements.txt").read_text().split("\n") if f], - # package_data={'epidatpy': []} -) diff --git a/smoke_covid_test.py b/smoke_covid_test.py deleted file mode 100644 index 88e216d..0000000 --- a/smoke_covid_test.py +++ /dev/null @@ -1,31 +0,0 @@ -from epidatpy.request import CovidcastEpidata, EpiRange - -epidata = CovidcastEpidata() -print(list(epidata.source_names)) -apicall = epidata[("fb-survey", "smoothed_cli")].call( - "nation", - "us", - EpiRange(20210405, 20210410), -) -print(apicall) - -classic = apicall.classic() -print(classic) - -r = apicall.csv() -print(r[0:100]) - -data = apicall.json() -print(data[0]) - -df = apicall.df() -print(df.columns) -print(df.dtypes) -print(df.iloc[0]) -df = apicall.df(disable_date_parsing=True) -print(df.columns) -print(df.dtypes) -print(df.iloc[0]) - -for row in apicall.iter(): - print(row) diff --git a/smoke_test.py b/smoke_test.py index 436c1e9..5d38f45 100644 --- a/smoke_test.py +++ b/smoke_test.py @@ -1,34 +1,61 @@ from datetime import date -from epidatpy.request import Epidata, EpiRange -apicall = Epidata.covidcast("fb-survey", "smoothed_cli", "day", "nation", EpiRange(20210405, 20210410), "us") +from epidatpy import CovidcastEpidata, Epidata, EpiRange +print("Epidata Test") +apicall = Epidata.pub_covidcast("fb-survey", "smoothed_cli", "nation", "day", "us", EpiRange(20210405, 20210410)) + +# Call info print(apicall) +# URL +print(apicall.request_url()) classic = apicall.classic() print(classic) -r = apicall.csv() -print(r[0:100]) - -data = apicall.json() -print(data[0]) - df = apicall.df() print(df.columns) print(df.dtypes) print(df.iloc[0]) +print(df) +# Classic +classic = apicall.classic() +# DataFrame df = apicall.df(disable_date_parsing=True) print(df.columns) print(df.dtypes) print(df.iloc[0]) -for row in apicall.iter(): - print(row) StagingEpidata = Epidata.with_base_url("https://staging.delphi.cmu.edu/epidata/") -epicall = StagingEpidata.covidcast( - "fb-survey", "smoothed_cli", "day", "nation", EpiRange(date(2021, 4, 5), date(2021, 4, 10)), "*" +epicall = StagingEpidata.pub_covidcast( + "fb-survey", "smoothed_cli", "nation", "day", "*", EpiRange(date(2021, 4, 5), date(2021, 4, 10)) ) print(epicall._base_url) + + +# Covidcast test +print("Covidcast Test") +epidata = CovidcastEpidata() +print(epidata.source_names()) +print(epidata.signal_names("fb-survey")) +epidata["fb-survey"].signal_df +apicall = epidata[("fb-survey", "smoothed_cli")].call( + "nation", + "us", + EpiRange(20210405, 20210410), +) +print(apicall) + +classic = apicall.classic() +print(classic) + +df = apicall.df() +print(df.columns) +print(df.dtypes) +print(df.iloc[0]) +df = apicall.df(disable_date_parsing=True) +print(df.columns) +print(df.dtypes) +print(df.iloc[0]) diff --git a/smoke_test_async.py b/smoke_test_async.py deleted file mode 100644 index b26e09c..0000000 --- a/smoke_test_async.py +++ /dev/null @@ -1,21 +0,0 @@ -from asyncio import get_event_loop -from epidatpy.async_request import Epidata - - -async def main() -> None: - apicall = Epidata.covidcast("fb-survey", "smoothed_cli", "day", "nation", Epidata.range(20210405, 20210410), "us") - classic = await apicall.classic() - print(classic) - - r = await apicall.csv() - print(r[0:100]) - - data = await apicall.json() - print(data[0]) - - async for row in apicall.iter(): - print(row) - - -loop = get_event_loop() -loop.run_until_complete(main()) diff --git a/tasks.py b/tasks.py index 4cde5ba..afeb455 100644 --- a/tasks.py +++ b/tasks.py @@ -4,18 +4,13 @@ Execute 'invoke --list' for guidance on using Invoke """ -import pathlib import shutil -from pathlib import Path import webbrowser +from pathlib import Path -from invoke import task, Context - -Path().expanduser() - +from invoke import task ROOT_DIR = Path(__file__).parent -SETUP_FILE = ROOT_DIR.joinpath("setup.py") TEST_DIR = ROOT_DIR.joinpath("tests") SOURCE_DIR = ROOT_DIR.joinpath("epidatpy") TOX_DIR = ROOT_DIR.joinpath(".tox") @@ -29,7 +24,7 @@ JOINED_PYTHON_DIRS = " ".join(PYTHON_DIRS) -def _delete_file(file: pathlib.Path) -> None: +def _delete_file(file: Path) -> None: try: file.unlink(missing_ok=True) except TypeError: @@ -41,7 +36,7 @@ def _delete_file(file: pathlib.Path) -> None: @task() -def format(c): # pylint: disable=unused-argument,redefined-builtin +def format(c): # pylint: redefined-builtin """ Format code """ @@ -144,7 +139,7 @@ def clean_tests(c): # pylint: disable=unused-argument @task(pre=[clean_build, clean_python, clean_tests, clean_docs]) -def clean(_c): # pylint: disable=unused-argument +def clean(c): # pylint: disable=unused-argument """ Runs all clean sub-tasks """ @@ -155,8 +150,7 @@ def dist(c): """ Build source and wheel packages """ - c.run("python setup.py sdist") - c.run("python setup.py bdist_wheel") + c.run("python -m build --sdist --wheel") @task(pre=[clean, dist]) diff --git a/tests/test_auth.py b/tests/test_auth.py index b91dccf..d04d146 100644 --- a/tests/test_auth.py +++ b/tests/test_auth.py @@ -1,12 +1,12 @@ -from pytest import warns, MonkeyPatch +from pytest import MonkeyPatch, warns -from epidatpy import get_api_key +from epidatpy._auth import _get_api_key def test_get_api_key(monkeypatch: MonkeyPatch) -> None: with monkeypatch.context() as m: m.setenv("DELPHI_EPIDATA_KEY", "test") - assert get_api_key() == "test" + assert _get_api_key() == "test" m.delenv("DELPHI_EPIDATA_KEY") with warns(UserWarning): - assert get_api_key() is None + assert _get_api_key() == "" diff --git a/tests/test_model.py b/tests/test_model.py index a8412cd..13db48d 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -1,29 +1,31 @@ +import datetime + from epidatpy._model import EpiRange, format_item, format_list def test_epirange() -> None: - r = EpiRange(3, 4) - assert r.start == 3 and r.end == 4 - assert str(r) == "3-4" + r = EpiRange(20000101, 20000102) + assert r.start == datetime.date(2000, 1, 1) and r.end == datetime.date(2000, 1, 2) + assert str(r) == "20000101-20000102" def test_epirange_wrong_order() -> None: - r = EpiRange(4, 3) - assert r.start == 3 and r.end == 4 + r = EpiRange(20000101, 20000102) + assert r.start == datetime.date(2000, 1, 1) and r.end == datetime.date(2000, 1, 2) def test_format_item() -> None: assert format_item("a") == "a" assert format_item(1) == "1" assert format_item({"from": 1, "to": 3}) == "1-3" - assert format_item(EpiRange(3, 5)) == "3-5" + assert format_item(EpiRange(20000101, 20000102)) == "20000101-20000102" def test_format_list() -> None: assert format_list("a") == "a" assert format_list(1) == "1" assert format_list({"from": 1, "to": 3}) == "1-3" - assert format_list(EpiRange(3, 5)) == "3-5" + assert format_list(EpiRange(20000101, 20000102)) == "20000101-20000102" assert format_list(["a", "b"]) == "a,b" assert format_list(("a", "b")) == "a,b"