From e4ced830d1480d97328a498a3b06f9dd1f5fc92d Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Tue, 9 Jul 2024 14:53:08 -0700 Subject: [PATCH 01/19] ci: update actions --- .github/workflows/ci.yaml | 39 ----------------- .github/workflows/ci.yml | 39 +++++++++++++++++ .github/workflows/create_release.yml | 27 +++++++++--- .github/workflows/release_helper.yml | 65 +++++++++++++++++++--------- 4 files changed, 105 insertions(+), 65 deletions(-) delete mode 100644 .github/workflows/ci.yaml create mode 100644 .github/workflows/ci.yml diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml deleted file mode 100644 index fc28188..0000000 --- a/.github/workflows/ci.yaml +++ /dev/null @@ -1,39 +0,0 @@ -name: ci - -on: push - -jobs: - build: - runs-on: ubuntu-latest - strategy: - matrix: - python-version: [3.8] - steps: - - name: Check out code - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - cache: "pip" - - name: Install Dependencies - run: | - python -m venv venv - source venv/bin/activate - pip install -r requirements.txt -r requirements-dev.txt - - name: Check Formatting - run: | - source venv/bin/activate - inv lint-black - - name: Check Linting - run: | - source venv/bin/activate - inv lint-pylint - - name: Check Types - run: | - source venv/bin/activate - inv lint-mypy - - name: Test - run: | - source venv/bin/activate - inv test diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..91bcf24 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,39 @@ +name: ci + +on: push + +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [3.8] + steps: + - name: Check out code + uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + cache: "pip" + - name: Install Dependencies + run: | + python -m venv venv + source venv/bin/activate + pip install -e ".[dev]" + - name: Check Formatting + run: | + source venv/bin/activate + inv lint-black + - name: Check Linting + run: | + source venv/bin/activate + inv lint-pylint + - name: Check Types + run: | + source venv/bin/activate + inv lint-mypy + - name: Test + run: | + source venv/bin/activate + inv test diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml index 1b0cabf..43606e7 100644 --- a/.github/workflows/create_release.yml +++ b/.github/workflows/create_release.yml @@ -6,12 +6,13 @@ on: description: "Semantic Version Number (i.e., 5.5.0 or patch, minor, major, prepatch, preminor, premajor, prerelease)" required: true default: patch + jobs: create_release: runs-on: ubuntu-latest steps: - name: Check out code - uses: actions/checkout@v2 + uses: actions/checkout@v4 with: ref: main ssh-key: ${{ secrets.CMU_DELPHI_DEPLOY_MACHINE_SSH }} @@ -20,17 +21,31 @@ jobs: git fetch origin dev:dev git reset --hard dev - name: Set up Python 3.8 - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: 3.8 - name: Change version number id: version + # See this issue for explanation and testing: + # https://github.com/cmu-delphi/delphi-epidata/pull/1473 run: | python -m pip install bump2version - echo -n "::set-output name=next_tag::" - bump2version --list ${{ github.event.inputs.versionName }} | grep new_version | sed -r s,"^.*=",, + if [[ ${{ github.event.inputs.versionName }} =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then + # use given version number + NEXT_TAG="${{ github.event.inputs.versionName }}" + elif [[ ${{ github.event.inputs.versionName }} =~ ^(major|minor|patch)$ ]]; then + # calculate new version number based on given tag + NEXT_TAG=$(bump2version --dry-run --list ${{ github.event.inputs.versionName }} | grep ^new_version | sed -r s,"^.*=",,) + else + echo "\nInvalid version name: ${{ github.event.inputs.versionName }}" + exit 1 + fi + # apply given or calculated version number + bump2version --new-version $NEXT_TAG _ignored_arg_ + # save version number for later + echo "next_tag=$NEXT_TAG" >> $GITHUB_OUTPUT - name: Create pull request into main - uses: peter-evans/create-pull-request@v3 + uses: peter-evans/create-pull-request@v6 with: branch: release/${{ steps.version.outputs.next_tag }} commit-message: "chore: release ${{ steps.version.outputs.next_tag }}" @@ -38,6 +53,6 @@ jobs: title: Release ${{ steps.version.outputs.next_tag }} labels: chore # reviewers: - assignees: melange396 + assignees: dshemetov body: | Releasing ${{ steps.version.outputs.next_tag }}. diff --git a/.github/workflows/release_helper.yml b/.github/workflows/release_helper.yml index b297f6f..4e6291b 100644 --- a/.github/workflows/release_helper.yml +++ b/.github/workflows/release_helper.yml @@ -17,20 +17,20 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out code - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Set up Python 3.8 - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: 3.8 - name: Extract version id: extract_version run: | python -m pip install bump2version - echo -n "::set-output name=version::" - bump2version --dry-run --list patch | grep ^current_version | sed -r s,"^.*=",, + NEXT_TAG=$(bump2version --dry-run --list patch | grep ^current_version | sed -r s,"^.*=",,) + echo "version=$NEXT_TAG" >> $GITHUB_OUTPUT - name: Create Release id: create_release - uses: release-drafter/release-drafter@v5 + uses: release-drafter/release-drafter@v6 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: @@ -41,20 +41,19 @@ jobs: upload_url: ${{ steps.create_release.outputs.upload_url }} tag_name: ${{ steps.create_release.outputs.tag_name }} - release_package: - needs: create_release + lint: runs-on: ubuntu-latest steps: - name: Check out code - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Set up Python 3.8 - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: 3.8 - name: Install build dependencies run: | python -m pip install --upgrade pip - pip install --use-feature=2020-resolver -r requirements.txt -r requirements-dev.txt + pip install -e ".[dev]" - name: Linting run: | . venv/bin/activate @@ -63,14 +62,42 @@ jobs: run: | . venv/bin/activate inv test - - name: Create release + + build: + needs: [create_release, lint] + runs-on: ubuntu-latest + steps: + - name: Check out code + uses: actions/checkout@v4 + - name: Set up Python 3.8 + uses: actions/setup-python@v5 + with: + python-version: 3.8 + - name: Install build dependencies + run: | + python -m pip install --upgrade pip + pip install -e ".[dev]" + - name: Build run: | inv dist - - uses: actions/upload-artifact@v2 + + release_package: + needs: [create_release, lint] + runs-on: ubuntu-latest + # TODO: Make sure this works, copied from best practices here + # https://github.com/pypa/gh-action-pypi-publish/tree/release/v1/?tab=readme-ov-file#trusted-publishing + environment: + name: pypi + url: https://pypi.org/p/epidatpy + permissions: + id-token: write + steps: + - uses: actions/upload-artifact@v4 with: name: epidatpy + path: dist/*.tar.gz - name: Upload Release Asset - uses: AButler/upload-release-assets@v2.0 + uses: AButler/upload-release-assets@v3.0 with: files: "dist/*.tar.gz" repo-token: ${{ secrets.GITHUB_TOKEN }} @@ -78,9 +105,7 @@ jobs: - name: Publish a Python distribution to PyPI uses: pypa/gh-action-pypi-publish@release/v1 with: - user: __token__ - password: ${{ secrets.DELPHI_PYPI_PROD_TOKEN }} - skip_existing: true + skip-existing: true # repository_url: https://test.pypi.org/legacy/ sync_dev: @@ -88,7 +113,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out code - uses: actions/checkout@v2 + uses: actions/checkout@v4 with: ref: dev ssh-key: ${{ secrets.CMU_DELPHI_DEPLOY_MACHINE_SSH }} @@ -97,14 +122,14 @@ jobs: git fetch origin main:main git reset --hard main - name: Create pull request into dev - uses: peter-evans/create-pull-request@v3 + uses: peter-evans/create-pull-request@v6 with: branch: bot/sync-main-dev commit-message: "chore: sync main-dev" base: dev title: "chore: sync main->dev" labels: chore - # reviewers: - assignees: melange396 + # reviewers: + assignees: dshemetov body: | Syncing Main->Dev. From 57908fc0099685a6cad6bfe6996182e6295411df Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Wed, 10 Jul 2024 20:57:49 -0700 Subject: [PATCH 02/19] lint: format --- epidatpy/_constants.py | 1 - epidatpy/_covidcast.py | 118 +++++++++++++++---------------- epidatpy/_endpoints.py | 141 +++++++++++--------------------------- epidatpy/_model.py | 29 +++----- epidatpy/_parse.py | 5 +- epidatpy/async_request.py | 29 +++----- epidatpy/request.py | 29 ++++---- 7 files changed, 138 insertions(+), 214 deletions(-) diff --git a/epidatpy/_constants.py b/epidatpy/_constants.py index b56fcdf..41e57a4 100644 --- a/epidatpy/_constants.py +++ b/epidatpy/_constants.py @@ -1,6 +1,5 @@ from typing import Final - __version__: Final = "0.5.0" HTTP_HEADERS: Final = {"User-Agent": f"epidatpy/{__version__}"} BASE_URL: Final = "https://api.delphi.cmu.edu/epidata/" diff --git a/epidatpy/_covidcast.py b/epidatpy/_covidcast.py index 3367ed1..700c403 100644 --- a/epidatpy/_covidcast.py +++ b/epidatpy/_covidcast.py @@ -1,4 +1,5 @@ from dataclasses import Field, InitVar, dataclass, field, fields +from functools import cached_property from typing import ( Any, Callable, @@ -13,21 +14,21 @@ Sequence, Tuple, Union, - overload, get_args, + overload, ) -from functools import cached_property + from pandas import DataFrame + from ._model import ( - EpiRangeLike, CALL_TYPE, EpidataFieldInfo, EpidataFieldType, + EpiRangeLike, EpiRangeParam, InvalidArgumentException, ) - GeoType = Literal["nation", "msa", "hrr", "hhs", "state", "county"] TimeType = Literal["day", "week"] @@ -63,17 +64,9 @@ def define_covidcast_fields() -> List[EpidataFieldInfo]: return [ EpidataFieldInfo("source", EpidataFieldType.text), EpidataFieldInfo("signal", EpidataFieldType.text), - EpidataFieldInfo( - "geo_type", - EpidataFieldType.categorical, - categories=list(get_args(GeoType)), - ), + EpidataFieldInfo("geo_type", EpidataFieldType.categorical, categories=list(get_args(GeoType))), EpidataFieldInfo("geo_value", EpidataFieldType.text), - EpidataFieldInfo( - "time_type", - EpidataFieldType.categorical, - categories=list(get_args(TimeType)), - ), + EpidataFieldInfo("time_type", EpidataFieldType.categorical, categories=list(get_args(TimeType))), EpidataFieldInfo("time_value", EpidataFieldType.date_or_epiweek), EpidataFieldInfo("issue", EpidataFieldType.date), EpidataFieldInfo("lag", EpidataFieldType.int), @@ -119,11 +112,13 @@ class DataSignal(Generic[CALL_TYPE]): geo_types: Dict[GeoType, DataSignalGeoStatistics] = field(default_factory=dict) def __post_init__(self) -> None: - self.link = [WebLink(alt=l["alt"], href=l["href"]) if isinstance(l, dict) else l for l in self.link] + self.link = [ + WebLink(alt=link["alt"], href=link["href"]) if isinstance(link, dict) else link for link in self.link + ] stats_fields = fields(DataSignalGeoStatistics) self.geo_types = { - k: DataSignalGeoStatistics(**_limit_fields(l, stats_fields)) if isinstance(l, dict) else l - for k, l in self.geo_types.items() + k: DataSignalGeoStatistics(**_limit_fields(v, stats_fields)) if isinstance(v, dict) else v + for k, v in self.geo_types.items() } @staticmethod @@ -222,7 +217,9 @@ def __post_init__( self, _create_call: Callable[[Mapping[str, Union[None, EpiRangeLike, Iterable[EpiRangeLike]]]], CALL_TYPE], ) -> None: - self.link = [WebLink(alt=l["alt"], href=l["href"]) if isinstance(l, dict) else l for l in self.link] + self.link = [ + WebLink(alt=link["alt"], href=link["href"]) if isinstance(link, dict) else link for link in self.link + ] signal_fields = fields(DataSignal) self.signals = [ DataSignal(_create_call=_create_call, **_limit_fields(s, signal_fields)) if isinstance(s, dict) else s @@ -284,31 +281,31 @@ def source_names(self) -> Iterable[str]: def source_df(self) -> DataFrame: """Fetch metadata about available covidcast sources. - Obtains a data frame of source metadata describing all publicly available data - streams from the covidcast API. + Obtains a data frame of source metadata describing all publicly + available data streams from the covidcast API. :returns: A data frame containing one row per available source, with the - following columns: + following columns: - ``source`` + ``source`` Data source name. - ``signal`` + ``signal`` Signal name. - ``description`` + ``description`` Description of the signal. - ``reference_signal`` + ``reference_signal`` Geographic level for which this signal is available, such as county, - state, msa, hss, hrr, or nation. Most signals are available at multiple geographic - levels and will hence be listed in multiple rows with their own - metadata. + state, msa, hss, hrr, or nation. Most signals are available at + multiple geographic levels and will hence be listed in multiple rows + with their own metadata. - ``license`` + ``license`` The license - ``dua`` + ``dua`` Link to the Data Use Agreement. """ return DataSource.to_df(self.sources) @@ -327,65 +324,70 @@ def signal_df(self) -> DataFrame: for descriptions of the available sources. :returns: A data frame containing one row per available signal, with the - following columns: + following columns: - ``data_source`` + ``data_source`` Data source name. - ``signal`` + ``signal`` Signal name. - ``name`` + ``name`` Name of signal. - ``active`` - Whether the signal is currently not updated or not. Signals may be inactive - because the sources have become unavailable, other sources have replaced - them, or additional work is required for us to continue updating them. + ``active`` + Whether the signal is currently not updated or not. Signals may be + inactive because the sources have become unavailable, other sources + have replaced them, or additional work is required for us to + continue updating them. - ``short_description`` + ``short_description`` Brief description of the signal. - ``description`` + ``description`` Full description of the signal. - ``geo_types`` - Spatial resolution of the signal (e.g., `county`, `hrr`, `msa`, `dma`, `state`). - More detail about all `geo_types` is given in the `geographic coding documentation + ``geo_types`` + Spatial resolution of the signal (e.g., `county`, `hrr`, `msa`, + `dma`, `state`). More detail about all `geo_types` is given in the + `geographic coding documentation `_. - ``time_type`` - Temporal resolution of the signal (e.g., day, week; see - `date coding details `_). + ``time_type`` + Temporal resolution of the signal (e.g., day, week; see `date coding + details + `_). - ``time_label`` + ``time_label`` The time label ("Date", "Week"). - ``value_label`` - The value label ("Value", "Percentage", "Visits", "Visits per 100,000 people"). + ``value_label`` + The value label ("Value", "Percentage", "Visits", "Visits per + 100,000 people"). - ``format`` + ``format`` The value format ("per100k", "percent", "fraction", "count", "raw"). - ``category`` + ``category`` The signal category ("early", "public", "late", "other"). - ``high_values_are`` - What the higher value of signal indicates ("good", "bad", "neutral"). + ``high_values_are`` + What the higher value of signal indicates ("good", "bad", + "neutral"). - ``is_smoothed`` + ``is_smoothed`` Whether the signal is smoothed. - ``is_weighted`` + ``is_weighted`` Whether the signal is weighted. - ``is_cumulative`` + ``is_cumulative`` Whether the signal is cumulative. - ``has_stderr`` + ``has_stderr`` Whether the signal has `stderr` statistic. - ``has_sample_size`` + ``has_sample_size`` Whether the signal has `sample_size` statistic. """ return DataSignal.to_df(self.signals) diff --git a/epidatpy/_endpoints.py b/epidatpy/_endpoints.py index 2459870..cadffba 100644 --- a/epidatpy/_endpoints.py +++ b/epidatpy/_endpoints.py @@ -1,31 +1,30 @@ +import warnings from abc import ABC, abstractmethod from datetime import date -from typing import Generic, Iterable, Literal, Mapping, Optional, Union, Sequence -import warnings +from typing import Generic, Iterable, Literal, Mapping, Optional, Sequence, Union from epiweeks import Week + +from ._covidcast import GeoType, TimeType, define_covidcast_fields from ._model import ( + CALL_TYPE, + EPI_RANGE_TYPE, + EpidataFieldInfo, + EpidataFieldType, + EpiRange, EpiRangeLike, EpiRangeParam, + IntParam, InvalidArgumentException, StringParam, - IntParam, - EpiRange, - EPI_RANGE_TYPE, - EpidataFieldInfo, - EpidataFieldType, - CALL_TYPE, ) -from ._covidcast import define_covidcast_fields, GeoType, TimeType -def get_wildcard_equivalent_dates( - time_value: str, time_type: Literal["day", "week"] -) -> str: +def get_wildcard_equivalent_dates(time_value: str, time_type: Literal["day", "week"]) -> str: if time_value == "*": if time_type == "day": return EpiRange("10000101", "30000101") - elif time_type == "week": + if time_type == "week": return EpiRange("100001", "300001") return time_value @@ -35,28 +34,16 @@ def reformat_epirange(epirange: EpiRange, to_type: str) -> EpiRange: if to_type not in ("day", "week"): raise InvalidArgumentException("`to_type` must be 'day' or 'week'") - if ( - to_type == "day" - and isinstance(epirange.start, (str, int)) - and len(str(epirange.start)) == 6 - ): + if to_type == "day" and isinstance(epirange.start, (str, int)) and len(str(epirange.start)) == 6: coercion_msg = ( "`collection_weeks` is in week format but `pub_covid_hosp_facility`" "expects day format; dates will be converted to day format but may not" "correspond exactly to desired time range" ) warnings.warn(coercion_msg, UserWarning) - epirange = EpiRange( - parse_api_week(epirange.start), parse_api_week(epirange.end) - ) - elif ( - to_type == "week" - and isinstance(epirange.start, (int, str)) - and len(str(epirange.start)) == 8 - ): - epirange = EpiRange( - format_epiweek(epirange.start), format_epiweek(epirange.end) - ) + epirange = EpiRange(parse_api_week(epirange.start), parse_api_week(epirange.end)) + elif to_type == "week" and isinstance(epirange.start, (int, str)) and len(str(epirange.start)) == 8: + epirange = EpiRange(format_epiweek(epirange.start), format_epiweek(epirange.end)) return epirange @@ -98,9 +85,7 @@ def pvt_cdc( epiweeks = get_wildcard_equivalent_dates(epiweeks, "day") if auth is None or epiweeks is None or locations is None: - raise InvalidArgumentException( - "`auth`, `epiweeks`, and `locations` are all required" - ) + raise InvalidArgumentException("`auth`, `epiweeks`, and `locations` are all required") return self._create_call( "cdc/", @@ -132,9 +117,7 @@ def pub_covid_hosp_facility_lookup( """Lookup COVID hospitalization facility identifiers.""" if all((v is None for v in (state, ccn, city, zip, fips_code))): - raise InvalidArgumentException( - "one of `state`, `ccn`, `city`, `zip`, or `fips_code` is required" - ) + raise InvalidArgumentException("one of `state`, `ccn`, `city`, `zip`, or `fips_code` is required") return self._create_call( "covid_hosp_facility_lookup/", @@ -168,9 +151,7 @@ def pub_covid_hosp_facility( """Fetch COVID hospitalization data for specific facilities.""" if hospital_pks is None or collection_weeks is None: - raise InvalidArgumentException( - "`hospital_pks` and `collection_weeks` are both required" - ) + raise InvalidArgumentException("`hospital_pks` and `collection_weeks` are both required") collection_weeks = get_wildcard_equivalent_dates(collection_weeks, "day") @@ -310,9 +291,7 @@ def pub_covid_hosp_state_timeseries( raise InvalidArgumentException("`states` and `dates` are both required") if issues is not None and as_of is not None: - raise InvalidArgumentException( - "`issues` and `as_of` are mutually exclusive" - ) + raise InvalidArgumentException("`issues` and `as_of` are mutually exclusive") dates = get_wildcard_equivalent_dates(dates, "day") @@ -511,14 +490,10 @@ def pub_covidcast( "`data_source`, `signals`, `time_type`, `geo_type`, `time_values`, and `geo_values` are all required." ) if sum([issues is not None, lag is not None, as_of is not None]) > 1: - raise InvalidArgumentException( - "`issues`, `lag`, and `as_of` are mutually exclusive." - ) + raise InvalidArgumentException("`issues`, `lag`, and `as_of` are mutually exclusive.") if data_source == "nchs-mortality" and time_type != "week": - raise InvalidArgumentException( - "nchs-mortality data source only supports the week time type." - ) + raise InvalidArgumentException("nchs-mortality data source only supports the week time type.") return self._create_call( "covidcast/", @@ -552,16 +527,12 @@ def pub_delphi(self, system: str, epiweek: Union[int, str]) -> CALL_TYPE: only_supports_classic=True, ) - def pub_dengue_nowcast( - self, locations: StringParam, epiweeks: EpiRangeParam = "*" - ) -> CALL_TYPE: + def pub_dengue_nowcast(self, locations: StringParam, epiweeks: EpiRangeParam = "*") -> CALL_TYPE: """Fetch Delphi's dengue nowcast.""" epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") if locations is None or epiweeks is None: - raise InvalidArgumentException( - "`locations` and `epiweeks` are both required" - ) + raise InvalidArgumentException("`locations` and `epiweeks` are both required") return self._create_call( "dengue_nowcast/", @@ -585,9 +556,7 @@ def pvt_dengue_sensors( epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") if auth is None or names is None or locations is None or epiweeks is None: - raise InvalidArgumentException( - "`auth`, `names`, `locations`, and `epiweeks` are all required" - ) + raise InvalidArgumentException("`auth`, `names`, `locations`, and `epiweeks` are all required") return self._create_call( "dengue_sensors/", @@ -645,9 +614,7 @@ def pub_flusurv( epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") if locations is None or epiweeks is None: - raise InvalidArgumentException( - "`locations` and `epiweeks` are both required" - ) + raise InvalidArgumentException("`locations` and `epiweeks` are both required") if issues is not None and lag is not None: raise InvalidArgumentException("`issues` and `lag` are mutually exclusive") @@ -764,16 +731,12 @@ def pub_fluview( ], ) - def pub_gft( - self, locations: StringParam, epiweeks: EpiRangeParam = "*" - ) -> CALL_TYPE: + def pub_gft(self, locations: StringParam, epiweeks: EpiRangeParam = "*") -> CALL_TYPE: """Fetch Google Flu Trends data.""" epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") if locations is None or epiweeks is None: - raise InvalidArgumentException( - "`locations` and `epiweeks` are both required" - ) + raise InvalidArgumentException("`locations` and `epiweeks` are both required") return self._create_call( "gft/", @@ -794,9 +757,7 @@ def pvt_ght( ) -> CALL_TYPE: """Fetch Google Health Trends data.""" if auth is None or locations is None or epiweeks is None or query == "": - raise InvalidArgumentException( - "`auth`, `locations`, `epiweeks`, and `query` are all required" - ) + raise InvalidArgumentException("`auth`, `locations`, `epiweeks`, and `query` are all required") return self._create_call( "ght/", @@ -859,16 +820,12 @@ def pub_meta(self) -> CALL_TYPE: only_supports_classic=True, ) - def pub_nidss_dengue( - self, locations: StringParam, epiweeks: EpiRangeParam = "*" - ) -> CALL_TYPE: + def pub_nidss_dengue(self, locations: StringParam, epiweeks: EpiRangeParam = "*") -> CALL_TYPE: """Fetch NIDSS dengue data.""" epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") if locations is None or epiweeks is None: - raise InvalidArgumentException( - "`locations` and `epiweeks` are both required" - ) + raise InvalidArgumentException("`locations` and `epiweeks` are both required") return self._create_call( "nidss_dengue/", @@ -909,16 +866,12 @@ def pub_nidss_flu( ], ) - def pvt_norostat( - self, auth: str, location: str, epiweeks: EpiRangeParam = "*" - ) -> CALL_TYPE: + def pvt_norostat(self, auth: str, location: str, epiweeks: EpiRangeParam = "*") -> CALL_TYPE: """Fetch NoroSTAT data (point data, no min/max).""" epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") if auth is None or location is None or epiweeks is None: - raise InvalidArgumentException( - "`auth`, `location`, and `epiweeks` are all required" - ) + raise InvalidArgumentException("`auth`, `location`, and `epiweeks` are all required") return self._create_call( "norostat/", @@ -930,16 +883,12 @@ def pvt_norostat( ], ) - def pub_nowcast( - self, locations: StringParam, epiweeks: EpiRangeParam = "*" - ) -> CALL_TYPE: + def pub_nowcast(self, locations: StringParam, epiweeks: EpiRangeParam = "*") -> CALL_TYPE: """Fetch Delphi's wILI nowcast.""" epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") if locations is None or epiweeks is None: - raise InvalidArgumentException( - "`locations` and `epiweeks` are both required" - ) + raise InvalidArgumentException("`locations` and `epiweeks` are both required") return self._create_call( "nowcast/", @@ -985,16 +934,12 @@ def pub_paho_dengue( ], ) - def pvt_quidel( - self, auth: str, locations: StringParam, epiweeks: EpiRangeParam = "*" - ) -> CALL_TYPE: + def pvt_quidel(self, auth: str, locations: StringParam, epiweeks: EpiRangeParam = "*") -> CALL_TYPE: """Fetch Quidel data.""" epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") if auth is None or epiweeks is None or locations is None: - raise InvalidArgumentException( - "`auth`, `epiweeks`, and `locations` are all required" - ) + raise InvalidArgumentException("`auth`, `epiweeks`, and `locations` are all required") return self._create_call( "quidel/", @@ -1017,9 +962,7 @@ def pvt_sensors( epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") if auth is None or names is None or locations is None or epiweeks is None: - raise InvalidArgumentException( - "`auth`, `names`, `locations`, and `epiweeks` are all required" - ) + raise InvalidArgumentException("`auth`, `names`, `locations`, and `epiweeks` are all required") return self._create_call( "sensors/", @@ -1061,9 +1004,7 @@ def pvt_twitter( raise InvalidArgumentException("`auth` and `locations` are both required") if not (dates is None) ^ (epiweeks is None): - raise InvalidArgumentException( - "exactly one of `dates` and `epiweeks` is required" - ) + raise InvalidArgumentException("exactly one of `dates` and `epiweeks` is required") time_field = ( EpidataFieldInfo("date", EpidataFieldType.date) @@ -1114,9 +1055,7 @@ def pub_wiki( raise InvalidArgumentException("`articles` is required") if not (dates is None) ^ (epiweeks is None): - raise InvalidArgumentException( - "exactly one of `dates` and `epiweeks` is required" - ) + raise InvalidArgumentException("exactly one of `dates` and `epiweeks` is required") time_field = ( EpidataFieldInfo("date", EpidataFieldType.date) diff --git a/epidatpy/_model.py b/epidatpy/_model.py index b6db10e..9021230 100644 --- a/epidatpy/_model.py +++ b/epidatpy/_model.py @@ -1,7 +1,6 @@ from dataclasses import dataclass, field -from enum import Enum from datetime import date -from urllib.parse import urlencode +from enum import Enum from typing import ( Any, Dict, @@ -13,19 +12,21 @@ Optional, Sequence, Tuple, - TypeVar, TypedDict, + TypeVar, Union, cast, ) +from urllib.parse import urlencode + from epiweeks import Week -from pandas import DataFrame, CategoricalDtype +from pandas import CategoricalDtype, DataFrame from ._parse import ( + fields_to_predicate, parse_api_date, - parse_api_week, parse_api_date_or_week, - fields_to_predicate, + parse_api_week, ) EpiDateLike = Union[int, str, date, Week] @@ -87,9 +88,7 @@ def __str__(self) -> str: return f"{format_date(self.start)}-{format_date(self.end)}" -EpiDataResponse = TypedDict( - "EpiDataResponse", {"result": int, "message": str, "epidata": List} -) +EpiDataResponse = TypedDict("EpiDataResponse", {"result": int, "message": str, "epidata": List}) EpiRangeParam = Union[EpiRangeLike, Iterable[EpiRangeLike]] @@ -268,9 +267,7 @@ def _parse_row( ) -> Mapping[str, Union[str, float, int, date, None]]: if not self.meta: return row - return { - k: self._parse_value(k, v, disable_date_parsing) for k, v in row.items() - } + return {k: self._parse_value(k, v, disable_date_parsing) for k, v in row.items()} def _as_df( self, @@ -289,9 +286,7 @@ def _as_df( if info.type == EpidataFieldType.bool: data_types[info.name] = bool elif info.type == EpidataFieldType.categorical: - data_types[info.name] = CategoricalDtype( - categories=info.categories or None, ordered=True - ) + data_types[info.name] = CategoricalDtype(categories=info.categories or None, ordered=True) elif info.type == EpidataFieldType.int: data_types[info.name] = int elif info.type in ( @@ -299,9 +294,7 @@ def _as_df( EpidataFieldType.epiweek, EpidataFieldType.date_or_epiweek, ): - data_types[info.name] = ( - int if disable_date_parsing else "datetime64[ns]" - ) + data_types[info.name] = int if disable_date_parsing else "datetime64[ns]" elif info.type == EpidataFieldType.float: data_types[info.name] = float else: diff --git a/epidatpy/_parse.py b/epidatpy/_parse.py index 1ffcfa9..fdeeb54 100644 --- a/epidatpy/_parse.py +++ b/epidatpy/_parse.py @@ -1,7 +1,6 @@ -from typing import Callable, Iterable, Optional, Set, cast - -from typing import Union from datetime import date, datetime +from typing import Callable, Iterable, Optional, Set, Union, cast + from epiweeks import Week diff --git a/epidatpy/async_request.py b/epidatpy/async_request.py index 904cce3..f1c40f9 100644 --- a/epidatpy/async_request.py +++ b/epidatpy/async_request.py @@ -1,4 +1,6 @@ +from asyncio import gather, get_event_loop from datetime import date +from json import loads from typing import ( AsyncGenerator, Callable, @@ -13,25 +15,25 @@ Union, cast, ) -from json import loads -from asyncio import get_event_loop, gather -from aiohttp import TCPConnector, ClientSession, ClientResponse +from aiohttp import ClientResponse, ClientSession, TCPConnector from pandas import DataFrame +from ._constants import BASE_URL, HTTP_HEADERS +from ._covidcast import CovidcastDataSources, define_covidcast_fields +from ._endpoints import AEpiDataEndpoints from ._model import ( - EpiRangeLike, AEpiDataCall, + EpidataFieldInfo, EpiDataFormatType, EpiDataResponse, - EpiRange, - EpidataFieldInfo, + EpiRangeLike, OnlySupportsClassicFormatException, add_endpoint_to_url, ) -from ._endpoints import AEpiDataEndpoints -from ._constants import HTTP_HEADERS, BASE_URL -from ._covidcast import CovidcastDataSources, define_covidcast_fields + +# Make the linter happy about the unused variables +__all__ = ["Epidata", "EpiDataAsyncCall", "EpiDataAsyncContext", "CovidcastEpidata"] async def _async_request( @@ -276,12 +278,3 @@ def create_call( return EpiDataAsyncCall(base_url, session, "covidcast", params, define_covidcast_fields()) return CovidcastDataSources.create(meta_data, create_call) - - -__all__ = [ - "Epidata", - "EpiDataAsyncCall", - "EpiDataAsyncContext", - "EpiRange", - "CovidcastEpidata", -] diff --git a/epidatpy/request.py b/epidatpy/request.py index 9e941b2..1e7ca88 100644 --- a/epidatpy/request.py +++ b/epidatpy/request.py @@ -1,36 +1,38 @@ from datetime import date +from json import loads from typing import ( Final, Generator, - Sequence, - cast, Iterable, + List, Mapping, Optional, + Sequence, Union, - List, + cast, ) -from json import loads +from pandas import DataFrame from requests import Response, Session from requests.auth import HTTPBasicAuth from tenacity import retry, stop_after_attempt -from pandas import DataFrame +from ._auth import get_api_key +from ._constants import BASE_URL, HTTP_HEADERS +from ._covidcast import CovidcastDataSources, define_covidcast_fields +from ._endpoints import AEpiDataEndpoints from ._model import ( - EpiRangeLike, AEpiDataCall, + EpidataFieldInfo, EpiDataFormatType, EpiDataResponse, - EpiRange, - EpidataFieldInfo, + EpiRangeLike, OnlySupportsClassicFormatException, add_endpoint_to_url, ) -from ._endpoints import AEpiDataEndpoints -from ._constants import HTTP_HEADERS, BASE_URL -from ._covidcast import CovidcastDataSources, define_covidcast_fields -from ._auth import get_api_key + +# Make the linter happy about the unused variables +__all__ = ["Epidata", "EpiDataCall", "EpiDataContext", "CovidcastEpidata"] @retry(reraise=True, stop=stop_after_attempt(2)) @@ -217,6 +219,3 @@ def create_call( return EpiDataCall(base_url, session, "covidcast", params, define_covidcast_fields()) return CovidcastDataSources.create(meta_data, create_call) - - -__all__ = ["Epidata", "EpiDataCall", "EpiDataContext", "EpiRange", "CovidcastEpidata"] From 92a0e5b51c87b4ed6ea2bc3c329bb78c092e3c5f Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Wed, 10 Jul 2024 21:39:11 -0700 Subject: [PATCH 03/19] repo: ignore formatter in blame --- .git-blame-ignore-revs | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 .git-blame-ignore-revs diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 0000000..34bcb53 --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,4 @@ +# Run repo through formatter +c7831fc0fa1367d8517831996539ce0c6c48aa58 +# Run repo through formatter +57908fc0099685a6cad6bfe6996182e6295411df \ No newline at end of file From a332d2879d9264e176046b856449787c2143e1dd Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Mon, 8 Jul 2024 23:24:03 -0700 Subject: [PATCH 04/19] fix: repair Sphinx docs build and edit docs --- README.md | 10 ++- docs/Makefile | 20 +++++ docs/conf.py | 19 ++-- docs/covidcast_examples.rst | 16 ++-- docs/epidatpy.rst | 33 +++++++ docs/getting_started.rst | 117 ++++++++++-------------- docs/index.rst | 66 +++++++++----- docs/signals_covid.rst | 172 +++++++++++++----------------------- 8 files changed, 231 insertions(+), 222 deletions(-) create mode 100644 docs/Makefile create mode 100644 docs/epidatpy.rst diff --git a/README.md b/README.md index 923aebb..6e9def6 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,19 @@ -# Delphi Epidata Python Client `epidatpy` +# `epidatpy` [![License: MIT][mit-image]][mit-url] [![Github Actions][github-actions-image]][github-actions-url] [![PyPi][pypi-image]][pypi-url] [![Read the Docs][docs-image]][docs-url] +A Python client for the [Delphi Epidata API](https://cmu-delphi.github.io/delphi-epidata/). + ## Install -Install latest version: +Install with the following commands: ```sh +# Latest dev version pip install -e "git+https://github.com/cmu-delphi/epidatpy.git#egg=epidatpy" + +# PyPI version +pip install epidatpy ``` ## Usage diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..d4bb2cb --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/conf.py b/docs/conf.py index 8d506e1..20fc7be 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -18,9 +18,9 @@ # -- Project information ----------------------------------------------------- -project = "Delphi Epidata API client" -copyright = "2021, Delphi research group" # pylint: disable=redefined-builtin -author = "Delphi research group" +project = "epidatpy" +copyright = "2024, Delphi Research Group" # pylint: disable=redefined-builtin +author = "Delphi Research Group" # The full version, including alpha/beta/rc tags release = "1.0.0" @@ -43,7 +43,16 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. -exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] +exclude_patterns = [ + "_build", + "Thumbs.db", + ".DS_Store", + "smoke_covid_test", + "smoke_test_async", + "smoke_test", + "tasks", + "test_pydantic", +] add_module_names = False autoclass_content = "class" @@ -63,7 +72,7 @@ html_theme_options = { "extra_nav_links": { - "Delphi group": "https://delphi.cmu.edu/", + "Delphi Research Group": "https://delphi.cmu.edu/", "Delphi Epidata API": "https://cmu-delphi.github.io/delphi-epidata", } } diff --git a/docs/covidcast_examples.rst b/docs/covidcast_examples.rst index 5d46838..e02329d 100644 --- a/docs/covidcast_examples.rst +++ b/docs/covidcast_examples.rst @@ -14,7 +14,7 @@ distributed through Facebook (`fb-survey`), for every county in the United State 2020-05-01 and 2020-05-07: >>> from delphi_epidata.request import EpiRange ->>> apicall = epidata[("fb-survey", "smoothed_cli")].call( +>>> apicall = epidata[("fb-survey", "smoothed_cli")].call( ... 'county', "*", EpiRange(20200501, 20200507), ... ) EpiDataCall(endpoint=covidcast, params={'data_source': 'fb-survey', 'signals': 'smoothed_cli', 'time_type': 'day', 'time_values': '20200501-20200507', 'geo_type': 'county', 'geo_values': '*'}) @@ -22,11 +22,10 @@ EpiDataCall(endpoint=covidcast, params={'data_source': 'fb-survey', 'signals': ' >>> data.head() source signal geo_type geo_value time_type time_value issue lag value stderr sample_size direction missing_value missing_stderr missing_sample_size 0 fb-survey smoothed_cli county 01000 day 2020-05-01 2020-09-03 125 0.825410 0.136003 1722 NaN 0 0 0 -1 fb-survey smoothed_cli county 01001 day 2020-05-01 2020-09-03 125 1.299425 0.967136 115 NaN 0 0 0 -2 fb-survey smoothed_cli county 01003 day 2020-05-01 2020-09-03 125 0.696597 0.324753 584 NaN 0 0 0 -3 fb-survey smoothed_cli county 01015 day 2020-05-01 2020-09-03 125 0.428271 0.548566 122 NaN 0 0 0 -4 fb-survey smoothed_cli county 01031 day 2020-05-01 2020-09-03 125 0.025579 0.360827 114 NaN 0 0 0 - +1 fb-survey smoothed_cli county 01001 day 2020-05-01 2020-09-03 125 1.299425 0.967136 115 NaN 0 0 0 +2 fb-survey smoothed_cli county 01003 day 2020-05-01 2020-09-03 125 0.696597 0.324753 584 NaN 0 0 0 +3 fb-survey smoothed_cli county 01015 day 2020-05-01 2020-09-03 125 0.428271 0.548566 122 NaN 0 0 0 +4 fb-survey smoothed_cli county 01031 day 2020-05-01 2020-09-03 125 0.025579 0.360827 114 NaN 0 0 0 Each row represents one observation in one county on one day. The county FIPS code is given in the ``geo_value`` column, the date in the ``time_value`` @@ -47,7 +46,7 @@ and describes the mathematical derivation of the estimates. Using the ``geo_values`` argument, we can request data for a specific geography, such as the state of Pennsylvania for the month of September 2021: ->>> pa_data = epidata[("fb-survey", "smoothed_cli")].call( +>>> pa_data = epidata[("fb-survey", "smoothed_cli")].call( ... 'state', "pa", EpiRange(20210901, 20210930) ... ).df() >>> pa_data.head() @@ -58,5 +57,4 @@ such as the state of Pennsylvania for the month of September 2021: 3 fb-survey smoothed_cli state pa day 2021-09-04 2021-09-09 5 0.984799 0.092566 9069 NaN 0 0 0 4 fb-survey smoothed_cli state pa day 2021-09-05 2021-09-10 5 1.010306 0.093357 9016 NaN 0 0 0 -We can request multiple states by providing a list, such as ``["pa", "ny", -"mo"]``. \ No newline at end of file +We can request multiple states by providing a list, such as ``["pa", "ny", "mo"]``. diff --git a/docs/epidatpy.rst b/docs/epidatpy.rst new file mode 100644 index 0000000..f72e0d8 --- /dev/null +++ b/docs/epidatpy.rst @@ -0,0 +1,33 @@ +epidatpy Reference +================ + +.. toctree:: + :maxdepth: 4 + +Submodules +---------- + +Module contents +--------------- + +.. automodule:: epidatpy + :members: + :undoc-members: + :show-inheritance: + +epidatpy.request module +----------------------- + +.. automodule:: epidatpy.request + :members: + :undoc-members: + :show-inheritance: + +epidatpy.async\_request module +------------------------------ + +.. automodule:: epidatpy.async_request + :members: + :undoc-members: + :show-inheritance: + diff --git a/docs/getting_started.rst b/docs/getting_started.rst index ab6b099..a993e2a 100644 --- a/docs/getting_started.rst +++ b/docs/getting_started.rst @@ -4,156 +4,143 @@ Getting Started Overview -------------- -This package provides access to data from various Epidata API endpoints including COVIDcast, -which provides numerous COVID-related data streams, updated daily. +This package provides access to data from various Epidata API endpoints including COVIDcast, +which provides numerous COVID-related data streams, updated daily. .. _epidata-endpoints: Epidata Data Sources --------------- +-------------------- The parameters available for each source data are documented in each linked source-specific API page. -| **COVID-19 Data** -.. list-table:: +.. list-table:: :widths: 20 20 40 :header-rows: 1 * - Endpoint - Name - Description - * - `covidcast `_ + * - `pub_covidcast `_ - COVIDcast - Delphi’s COVID-19 surveillance streams. - * - `covidcast_meta `_ + * - `pub_covidcast_meta `_ - COVIDcast metadata - Metadata for Delphi's COVID-19 surveillance streams. - * - `covid_hosp_facility `_ + * - `pub_covid_hosp_facility `_ - COVID-19 Hospitalization by Facility - COVID-19 Reported Patient Impact and Hospital Capacity - Facility Lookup - * - `covid_hosp `_ + * - `pub_covid_hosp `_ - COVID-19 Hospitalization - COVID-19 Reported Patient Impact and Hospital Capacity. -| **Influenza Data** -.. list-table:: +.. list-table:: :widths: 20 20 40 :header-rows: 1 * - Endpoint - Name - Description - * - `afhsb `_ - - AFHSB - - ... - * - `meta_afhsb `_ - - AFHSB Metadata - - ... - * - `cdc `_ - - CDC Page Hits + * - `pvt_cdc `_ + - CDC Page Hits - ... - * - `delphi `_ - - Delphi’s Forecast + * - `pub_delphi `_ + - Delphi’s Forecast - ... - * - `ecdc_ili `_ + * - `pub_ecdc_ili `_ - ECDC ILI - ECDC ILI data from the ECDC website. - * - `flusurv `_ - - FluSurv + * - `pub_flusurv `_ + - FluSurv - FluSurv-NET data (flu hospitaliation rates) from CDC. - * - `fluview `_ + * - `pub_fluview `_ - FluView - Influenza-like illness (ILI) from U.S. Outpatient Influenza-like Illness Surveillance Network (ILINet). - * - `fluview_meta `_ + * - `pub_fluview_meta `_ - FluView Metadata - Summary data about ``fluview``. - * - `fluview_clinical `_ + * - `pub_fluview_clinical `_ - FluView Clinical - ... - * - `gft `_ + * - `pub_gft `_ - Google Flu Trends - Estimate of influenza activity based on volume of certain search queries. This is now a static endpoint due to discontinuation. - * - `ght `_ - - Google Health Trends - - Estimate of influenza activity based on volume of certain search queries. - * - `kcdc_ili `_ + * - `pub_kcdc_ili `_ - KCDC ILI - KCDC ILI data from KCDC website. - * - `meta `_ + * - `pub_meta `_ - API Metadata - Metadata for ``fluview``, ``twitter``, ``wiki``, and ``delphi``. - * - `nidss_flu `_ + * - `pub_nidss_flu `_ - NIDSS Flu - Outpatient ILI from Taiwan's National Infectious Disease Statistics System (NIDSS). - * - `nowcast `_ + * - `pub_nowcast `_ - ILI Nearby - A nowcast of U.S. national, regional, and state-level (weighted) percent ILI, available seven days (regionally) or five days (state-level) before the first ILINet report for the corresponding week. - * - `quidel `_ + * - `pvt_quidel `_ - Quidel - Data provided by Quidel Corp., which contains flu lab test results. - * - `sensors `_ + * - `pvt_sensors `_ - Delphi's Digital Surveillance Sensors - ... - * - `twitter `_ + * - `pvt_twitter `_ - Twitter Stream - Estimate of influenza activity based on analysis of language used in tweets from HealthTweets. - * - `wiki `_ + * - `pub_wiki `_ - Wikipedia Access Logs - Number of page visits for selected English, Influenza-related wikipedia articles. -| **Dengue Data** -.. list-table:: +.. list-table:: :widths: 20 20 40 :header-rows: 1 * - Endpoint - Name - Description - * - `dengue_nowcast `_ + * - `pub_dengue_nowcast `_ - Delphi's Dengue Nowcast - ... - * - `dengue_sensors `_ + * - `pvt_dengue_sensors `_ - Delphi’s Dengue Digital Surveillance Sensors - ... - * - `nidss_dengue `_ + * - `pub_nidss_dengue `_ - NIDSS Dengue - Counts of confirmed dengue cases from Taiwan's NIDSS. - * - `paho_dengue `_ + * - `pub_paho_dengue `_ - PAHO Dengue - ... -| **Norovirus Data** -.. list-table:: +.. list-table:: :widths: 20 20 40 :header-rows: 1 * - Endpoint - Name - Description - * - `meta_norostat `_ + * - `pvt_meta_norostat `_ - NoroSTAT Metadata - ... - * - `norostat `_ + * - `pvt_norostat `_ - NoroSTAT - Suspected and confirmed norovirus outbreaks reported by state health departments to the CDC. -| - Epiweeks and Dates ------------------ -Epiweeks use the U.S. definition. That is, the first epiweek each year is the week, starting on a Sunday, -containing January 4. See `this page `_ for more information. +Epiweeks use the U.S. definition. That is, the first epiweek each year is the +week, starting on a Sunday, containing January 4. See `this page +`_ +for more information. Formatting for epiweeks is YYYYWW and for dates is YYYYMMDD. -Use individual values, comma-separated lists or, a hyphenated range of values to specify single or several dates. +Use individual values, comma-separated lists or, a hyphenated range of values to specify single or several dates. An ``EpiRange`` object can be also used to construct a range of epiweeks or dates. Examples include: - ``param = 201530`` (A single epiweek) @@ -162,8 +149,6 @@ An ``EpiRange`` object can be also used to construct a range of epiweeks or date - ``param = '201440,201501-201510'`` (Several epiweeks, including a range) - ``param = EpiRange(20070101, 20071231)`` (A range of dates) -| - .. _getting-started: Basic examples @@ -176,8 +161,8 @@ distributed through Facebook, for every county in the United States between 2020-05-01 and 2020-05-07: >>> from epidatpy.request import Epidata, EpiRange ->>> apicall = Epidata.covidcast("fb-survey", "smoothed_cli", -... "day", "county", +>>> apicall = Epidata.covidcast("fb-survey", "smoothed_cli", +... "day", "county", ... EpiRange(20200501, 20200507), "*") >>> data = apicall.df() >>> data.head() @@ -197,7 +182,7 @@ May 1st were updated on September 3rd based on new data, giving a ``lag`` of 125 See the :py:func:`epidatpy.request.Epidata.covidcast` documentation for further details on the returned columns. -In the above code, the ``.df()`` function on the ``apicall`` variable generated a Pandas DataFrame. We can use +In the above code, the ``.df()`` function on the ``apicall`` variable generated a Pandas DataFrame. We can use other :ref:`output functions ` to parse the requested API call in different formats. To parse the data into JSON format, we can use the following command: @@ -238,7 +223,7 @@ into JSON format, we can use the following command: . }] -Note that all of the :ref:`output functions ` have a ``field`` parameter which takes in any form of iterator objects +Note that all of the :ref:`output functions ` have a ``field`` parameter which takes in any form of iterator objects to enable fetching the data with customization (e.g. specifying which fields or columns to output). Similar to the previous example, to parse the data in JSON format, but customize the field to show only ``geo_value`` and ``value``, we would use the following command: @@ -259,9 +244,6 @@ command: . }] - -| - **Wikipedia Access article "influenza" on 2020w01** >>> apicall_wiki = Epidata.wiki(articles='influenza', epiweeks='202001') @@ -269,8 +251,6 @@ command: >>> print(data) [{'article': 'influenza', 'count': 6516, 'total': 663604044, 'hour': -1, 'epiweek': datetime.date(2019, 12, 29), 'value': 9.81910834}] -| - **FluView on 2019w01 (national)** >>> apicall_fluview = Epidata.fluview(regions='nat', epiweeks='201901') @@ -294,10 +274,3 @@ command: 'ili': 3.63095}], 'result': 1, 'message': 'success'} - -| - -Other examples (TODO) --------------- - -(TODO) \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst index 6fc4ee9..a046b74 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,45 +1,65 @@ -Delphi Epidata +epidatpy =============== This package provides Python access to the `Delphi Epidata API `_ published by -the `Delphi group `_ at `Carnegie Mellon University +the `Delphi research group `_ at `Carnegie Mellon University `_. The package source code and bug tracker can be found `on GitHub `_. -.. note :: **You should consider subscribing** to the `API mailing list - `_ to be - notified of package updates, new data sources, corrections, and other - updates. - -.. warning :: If you use data from the COVIDcast API to power a public product, - dashboard, app, or other service, please download the data you need and store - it centrally rather than making API requests for every user. Our server - resources are limited and cannot support high-volume interactive use. - - See also the `COVIDcast Terms of Use - `_, noting that the data is a - research product and not warranted for a particular purpose. - - Installation ------------ -This package is available on PyPI as `covidcast -`_, and can be installed using ``pip`` or -your favorite Python package manager: +This package will be available on PyPI as `epidatpy +`_ and will be installable with ``pip``. +Meanwhile, it can be installed from GitHub: .. code-block:: sh - pip install epidatpy + pip install -e "git+https://github.com/cmu-delphi/epidatpy.git#egg=epidatpy" The package requires `pandas `_ and `requests `_; these should be installed automatically. +API Keys +-------- + +The Delphi Epidata API requires a (free) API key for full functionality. To +generate your key, register for a pseudo-anonymous account `here +`_ and see more +discussion on the `general API website +`_. The ``epidatpy`` +client will automatically look for this key in the environment variable +``DELPHI_EPIDATA_KEY``. We recommend storing your key in a ``.env`` file and using +`python-dotenv `_ to load it into +your environment. + +Note that for the time being, the private endpoints (i.e. those prefixed with +``pvt``) will require a separate key that needs to be passed as an argument. + +See also the `COVIDcast Terms of Use +`_, noting that the data is a +research product and not warranted for a particular purpose. + +For users of the covidcast Python package +------------------------------------------ + +The `covidcast `_ +package is deprecated and will no longer be updated. The ``epidatpy`` package is a +complete rewrite with a focus on speed, reliability, and ease of use. It also +supports more endpoints and data sources than ``covidcast``. When migrating from +that package, you will need to use the ``pub_covidcast`` function in +``epidatpy``. + +.. note :: **You should consider subscribing** to the `API mailing list + `_ to be + notified of package updates, new data sources, corrections, and other + updates. + Contents -------- @@ -48,5 +68,9 @@ Contents getting_started + covidcast_examples + signals_covid + epidatpy + diff --git a/docs/signals_covid.rst b/docs/signals_covid.rst index 2d43e9b..46d486b 100644 --- a/docs/signals_covid.rst +++ b/docs/signals_covid.rst @@ -1,126 +1,72 @@ Fetching Data ============= ->>> from delphi_epidata.request import Epidata -This package provides various functions that can be called on the ``Epidata`` object to obtain any :ref:`Epidata endpoint ` signals of interest. +>>> from epidatpy.request import Epidata +>>> epi = Epidata() +>>> epi.pub_covidcast('usa-facts', 'confirmed_7dav_incidence_num', '20210101', '20210131', 'state', 'tx') -The functions below will return an ``EpiDataCall`` object, which contains the appropriate URL -and parameters required to make an API request. The signal of interest can then be obtained in 5 different :ref:`output formats `. +This package provides various functions that can be called on the ``Epidata`` object to obtain any :ref:`Epidata endpoint ` signals of interest. The functions below are inherited by the ``Epidata`` object. Detailed examples are provided in the :ref:`usage examples `. COVIDcast Signals ----------------- -.. autofunction:: delphi_epidata.request.Epidata.covidcast -| -.. autofunction:: delphi_epidata.request.Epidata.covidcast_meta -| -.. autofunction:: delphi_epidata.request.Epidata.covid_hosp_facility -| -.. autofunction:: delphi_epidata.request.Epidata.covid_hosp_facility_lookup -| -.. autofunction:: delphi_epidata.request.Epidata.covid_hosp_state_timeseries -| +.. automethod:: epidatpy.AEpiDataEndpoints.pub_covidcast + +.. automethod:: epidatpy.AEpiDataEndpoints.pub_covidcast_meta + +.. automethod:: epidatpy.AEpiDataEndpoints.pub_covid_hosp_facility + +.. automethod:: epidatpy.AEpiDataEndpoints.pub_covid_hosp_facility_lookup + +.. automethod:: epidatpy.AEpiDataEndpoints.pub_covid_hosp_state_timeseries + Other Epidata Signals ------------------ -.. autofunction:: delphi_epidata.request.Epidata.pvt_afhsb -| -.. autofunction:: delphi_epidata.request.Epidata.pvt_meta_afhsb -| -.. autofunction:: delphi_epidata.request.Epidata.cdc -| -.. autofunction:: delphi_epidata.request.Epidata.delphi -| -.. autofunction:: delphi_epidata.request.Epidata.ecdc_ili -| -.. autofunction:: delphi_epidata.request.Epidata.flusurv -| -.. autofunction:: delphi_epidata.request.Epidata.fluview -| -.. autofunction:: delphi_epidata.request.Epidata.fluview_meta -| -.. autofunction:: delphi_epidata.request.Epidata.fluview_clinical -| -.. autofunction:: delphi_epidata.request.Epidata.gft -| -.. autofunction:: delphi_epidata.request.Epidata.ght -| -.. autofunction:: delphi_epidata.request.Epidata.kcdc_ili -| -.. autofunction:: delphi_epidata.request.Epidata.meta -| -.. autofunction:: delphi_epidata.request.Epidata.nidss_flu -| -.. autofunction:: delphi_epidata.request.Epidata.nowcast -| -.. autofunction:: delphi_epidata.request.Epidata.pvt_quidel -| -.. autofunction:: delphi_epidata.request.Epidata.pvt_sensors -| -.. autofunction:: delphi_epidata.request.Epidata.pvt_twitter -| -.. autofunction:: delphi_epidata.request.Epidata.wiki -| -.. autofunction:: delphi_epidata.request.Epidata.dengue_nowcast -| -.. autofunction:: delphi_epidata.request.Epidata.pvt_dengue_sensors -| -.. autofunction:: delphi_epidata.request.Epidata.nidss_dengue -| -.. autofunction:: delphi_epidata.request.Epidata.paho_dengue -| -.. autofunction:: delphi_epidata.request.Epidata.pvt_meta_norostat -| -.. autofunction:: delphi_epidata.request.Epidata.pvt_norostat - - -.. _output-data: - -Output Functions --------- - -The following functions can be called on an ``EpiDataCall`` object to make an API request and parse the signal in -5 different formats: - - Classic - - JSON - - Pandas DataFrame - - CSV - - Iterator -| -.. autofunction:: delphi_epidata.request.EpiDataCall.classic -| -.. autofunction:: delphi_epidata.request.EpiDataCall.json -| -.. autofunction:: delphi_epidata.request.EpiDataCall.df -| -.. autofunction:: delphi_epidata.request.EpiDataCall.csv -| -.. autofunction:: delphi_epidata.request.EpiDataCall.iter - - -More on COVIDcast (TODO) ------------------------- - -Many data sources and signals are available, so one can also obtain a data frame -of all signals and their associated metadata: - ->>> from epidatpy.request import CovidcastEpidata ->>> covid_ds = CovidcastEpidata() ->>> df_source = covid_ds.source_df ->>> df_signal = covid_ds.signal_df - -Calling ``CovidcastEpidata`` function will return a class object ``CovidcastDataSources``, -which has the property ``source_df`` and ``signal_df``, two data frames containing -the information of all available sources and signals. -More details of the two data frames are listed below. - -.. autoclass:: epidatpy.request.CovidcastDataSources() - :members: - -More metadata statistics can also be obtained as follows: +--------------------- ->>> from epidatpy.request import Epidata ->>> df = Epidata.covidcast_meta().df() +.. automethod:: epidatpy.AEpiDataEndpoints.pvt_cdc + +.. automethod:: epidatpy.AEpiDataEndpoints.pub_delphi + +.. automethod:: epidatpy.AEpiDataEndpoints.pub_ecdc_ili + +.. automethod:: epidatpy.AEpiDataEndpoints.pub_flusurv + +.. automethod:: epidatpy.AEpiDataEndpoints.pub_fluview + +.. automethod:: epidatpy.AEpiDataEndpoints.pub_fluview_meta + +.. automethod:: epidatpy.AEpiDataEndpoints.pub_fluview_clinical + +.. automethod:: epidatpy.AEpiDataEndpoints.pub_gft + +.. automethod:: epidatpy.AEpiDataEndpoints.pvt_ght + +.. automethod:: epidatpy.AEpiDataEndpoints.pub_kcdc_ili + +.. automethod:: epidatpy.AEpiDataEndpoints.pub_meta + +.. automethod:: epidatpy.AEpiDataEndpoints.pub_nidss_flu + +.. automethod:: epidatpy.AEpiDataEndpoints.pub_nowcast + +.. automethod:: epidatpy.AEpiDataEndpoints.pvt_quidel + +.. automethod:: epidatpy.AEpiDataEndpoints.pvt_sensors + +.. automethod:: epidatpy.AEpiDataEndpoints.pvt_twitter + +.. automethod:: epidatpy.AEpiDataEndpoints.pub_wiki + +.. automethod:: epidatpy.AEpiDataEndpoints.pub_dengue_nowcast + +.. automethod:: epidatpy.AEpiDataEndpoints.pvt_dengue_sensors + +.. automethod:: epidatpy.AEpiDataEndpoints.pub_nidss_dengue + +.. automethod:: epidatpy.AEpiDataEndpoints.pub_paho_dengue + +.. automethod:: epidatpy.AEpiDataEndpoints.pvt_meta_norostat -.. autofunction:: epidatpy.request.Epidata.covidcast_meta() \ No newline at end of file +.. automethod:: epidatpy.AEpiDataEndpoints.pvt_norostat From 18ff4f8259d5521a75721d5eef59e6c7ab0cb145 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Wed, 10 Jul 2024 20:53:13 -0700 Subject: [PATCH 05/19] refactor: unify settings and build to pyproject.toml --- .dockerignore | 2 - .editorconfig | 4 +- .gitignore | 9 ++-- .pre-commit-config.yaml | 10 +++++ README.md | 8 ++-- mypy.ini | 15 ------- pyproject.toml | 98 ++++++++++++++++++++++++++++++++++++----- requirements-dev.txt | 14 ------ requirements.txt | 5 --- setup.py | 26 ----------- smoke_test_async.py | 1 + tasks.py | 18 +++----- 12 files changed, 117 insertions(+), 93 deletions(-) delete mode 100644 .dockerignore create mode 100644 .pre-commit-config.yaml delete mode 100644 mypy.ini delete mode 100644 requirements-dev.txt delete mode 100644 requirements.txt delete mode 100644 setup.py diff --git a/.dockerignore b/.dockerignore deleted file mode 100644 index d4f76f6..0000000 --- a/.dockerignore +++ /dev/null @@ -1,2 +0,0 @@ -/venv -/.mypy_cache diff --git a/.editorconfig b/.editorconfig index 8a80734..ef887d0 100644 --- a/.editorconfig +++ b/.editorconfig @@ -6,7 +6,6 @@ root = true [*] - # Change these settings to your own preference indent_style = space indent_size = 4 @@ -19,3 +18,6 @@ insert_final_newline = true [*.md] trim_trailing_whitespace = false + +[*.yml] +indent_size = 2 \ No newline at end of file diff --git a/.gitignore b/.gitignore index fc5bc23..aa58631 100644 --- a/.gitignore +++ b/.gitignore @@ -1,10 +1,11 @@ .mypy_cache *.pyc __pycache__ -/venv -/docs/_build .coverage .pytest_cache +.DS_Store *.egg-info -/dist -.DS_Store \ No newline at end of file +dist/ +build/ +docs/_build +venv/ \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..92529d6 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,10 @@ +repos: +- repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: v0.5.1 + hooks: + # Run the linter. + - id: ruff + args: [--fix] + # Run the formatter. + - id: ruff-format diff --git a/README.md b/README.md index 6e9def6..6f4dbfa 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ Install with the following commands: # Latest dev version pip install -e "git+https://github.com/cmu-delphi/epidatpy.git#egg=epidatpy" -# PyPI version +# PyPI version (not yet available) pip install epidatpy ``` @@ -20,14 +20,14 @@ pip install epidatpy TODO -## Development Environment +## Development Prepare virtual environment and install dependencies ```sh python -m venv venv source ./venv/bin/activate -pip install --use-feature=2020-resolver -r requirements.txt -r requirements-dev.txt +pip install -e ".[dev]" ``` ### Common Commands @@ -44,7 +44,7 @@ inv dist # build distribution packages inv release # upload the current version to pypi ``` -## Release Process +### Release Process The release consists of multiple steps which can be all done via the GitHub website: diff --git a/mypy.ini b/mypy.ini deleted file mode 100644 index 2abe8a7..0000000 --- a/mypy.ini +++ /dev/null @@ -1,15 +0,0 @@ -# MyPy config file -# File reference here - http://mypy.readthedocs.io/en/latest/config_file.html#config-file - -[mypy] -ignore_missing_imports = True -no_strict_optional = True -disallow_incomplete_defs = True -disallow_subclassing_any = True -disallow_untyped_calls = True -disallow_untyped_defs = True -allow_untyped_decorators = True -warn_redundant_casts = False -warn_unused_ignores = True -warn_return_any = True -exclude = "(tasks|setup).py" diff --git a/pyproject.toml b/pyproject.toml index 46f545f..5cf932a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,21 +1,99 @@ +# This file was derived from the PyPA Sample Project +# https://github.com/pypa/sampleproject + +# Guide (user-friendly): +# https://packaging.python.org/en/latest/guides/writing-pyproject-toml/ + +# Specification (technical, formal): +# https://packaging.python.org/en/latest/specifications/pyproject-toml/ + + +[build-system] +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "epidatpy" +version = "0.5.0" +description = "A programmatic interface to Delphi's Epidata API." +readme = "README.md" +license = { file = "LICENSE" } +authors = [{ name = "Delphi Research Group" }] +maintainers = [{ name = "Dmitry Shemetov", email = "dshemeto@andrew.cmu.edu" }] +classifiers = [ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Intended Audience :: Science/Research", + "Natural Language :: English", + "Topic :: Scientific/Engineering :: Bio-Informatics", +] +requires-python = ">=3.8" +dependencies = [ + "aiohttp", + "epiweeks>=2.1", + "pandas>=1", + "requests>=2.25", + "tenacity", +] + +[project.optional-dependencies] +dev = [ + "black", + "coverage", + "invoke", + "mypy", + "pre-commit", + "pylint", + "pytest", + "recommonmark", + "sphinx_rtd_theme", + "sphinx-autodoc-typehints", + "sphinx", + "twine", + "types-requests", + "watchdog", + "wheel", +] + +[project.urls] +homepage = "https://github.com/cmu-delphi/epidatpy" +repository = "https://github.com/cmu-delphi/epidatpy" + + [tool.black] line-length = 120 target-version = ['py38'] -include = 'epidatpy' + +[tool.ruff] +lint.extend-select = ["I"] [tool.pylint] max-line-length = 120 min-public-methods = 1 disable = [ - "R0801", - "E1101", - "E0611", - "C0114", - "C0116", - "C0103", - "R0913", - "R0914", - "W0702", + "duplicate-code", + "invalid-name", + "missing-module-docstring", + "missing-function-docstring", + "too-many-arguments", + "too-many-locals", + "too-many-lines", "too-many-public-methods", "too-many-instance-attributes", + "too-many-lines", + "too-many-return-statements", + "too-many-branches", ] + +[tool.mypy] +ignore_missing_imports = true +disallow_incomplete_defs = true +disallow_subclassing_any = true +disallow_untyped_calls = true +disallow_untyped_defs = true +disallow_untyped_decorators = false +warn_redundant_casts = false +warn_unused_ignores = true +warn_return_any = true +exclude = "(tasks|setup).py" diff --git a/requirements-dev.txt b/requirements-dev.txt deleted file mode 100644 index 3546e59..0000000 --- a/requirements-dev.txt +++ /dev/null @@ -1,14 +0,0 @@ -mypy -pylint -black -pytest -invoke -watchdog -coverage -sphinx -recommonmark -sphinx_rtd_theme -sphinx-autodoc-typehints -twine -wheel -types-requests diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 5b324f5..0000000 --- a/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -pandas>=1 -requests>=2.25 -tenacity -aiohttp -epiweeks>=2.1 diff --git a/setup.py b/setup.py deleted file mode 100644 index d7eee09..0000000 --- a/setup.py +++ /dev/null @@ -1,26 +0,0 @@ -import setuptools -import pathlib - - -setuptools.setup( - name="epidatpy", - version="0.5.0", - author="Alex Reinhart", - author_email="areinhar@stat.cmu.edu", - description="A programmatic interface to Delphi's Epidata API.", - long_description=pathlib.Path("README.md").read_text(), - long_description_content_type="text/markdown", - url="https://github.com/cmu-delphi/epidatpy", - packages=setuptools.find_packages(), - classifiers=[ - "Programming Language :: Python :: 3", - "License :: OSI Approved :: MIT License", - "Operating System :: OS Independent", - "Intended Audience :: Science/Research", - "Natural Language :: English", - "Topic :: Scientific/Engineering :: Bio-Informatics", - ], - python_requires=">=3.8", - install_requires=[f.strip() for f in pathlib.Path("requirements.txt").read_text().split("\n") if f], - # package_data={'epidatpy': []} -) diff --git a/smoke_test_async.py b/smoke_test_async.py index b26e09c..b284f30 100644 --- a/smoke_test_async.py +++ b/smoke_test_async.py @@ -1,4 +1,5 @@ from asyncio import get_event_loop + from epidatpy.async_request import Epidata diff --git a/tasks.py b/tasks.py index 4cde5ba..afeb455 100644 --- a/tasks.py +++ b/tasks.py @@ -4,18 +4,13 @@ Execute 'invoke --list' for guidance on using Invoke """ -import pathlib import shutil -from pathlib import Path import webbrowser +from pathlib import Path -from invoke import task, Context - -Path().expanduser() - +from invoke import task ROOT_DIR = Path(__file__).parent -SETUP_FILE = ROOT_DIR.joinpath("setup.py") TEST_DIR = ROOT_DIR.joinpath("tests") SOURCE_DIR = ROOT_DIR.joinpath("epidatpy") TOX_DIR = ROOT_DIR.joinpath(".tox") @@ -29,7 +24,7 @@ JOINED_PYTHON_DIRS = " ".join(PYTHON_DIRS) -def _delete_file(file: pathlib.Path) -> None: +def _delete_file(file: Path) -> None: try: file.unlink(missing_ok=True) except TypeError: @@ -41,7 +36,7 @@ def _delete_file(file: pathlib.Path) -> None: @task() -def format(c): # pylint: disable=unused-argument,redefined-builtin +def format(c): # pylint: redefined-builtin """ Format code """ @@ -144,7 +139,7 @@ def clean_tests(c): # pylint: disable=unused-argument @task(pre=[clean_build, clean_python, clean_tests, clean_docs]) -def clean(_c): # pylint: disable=unused-argument +def clean(c): # pylint: disable=unused-argument """ Runs all clean sub-tasks """ @@ -155,8 +150,7 @@ def dist(c): """ Build source and wheel packages """ - c.run("python setup.py sdist") - c.run("python setup.py bdist_wheel") + c.run("python -m build --sdist --wheel") @task(pre=[clean, dist]) From 4343eca19884c9bb7abefb1fbd08a85a9d7859e9 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Wed, 10 Jul 2024 21:05:53 -0700 Subject: [PATCH 06/19] refactor: simplify imports --- epidatpy/__init__.py | 30 ++++++------------------------ smoke_test.py | 2 +- 2 files changed, 7 insertions(+), 25 deletions(-) diff --git a/epidatpy/__init__.py b/epidatpy/__init__.py index 49880ec..3bd7a38 100644 --- a/epidatpy/__init__.py +++ b/epidatpy/__init__.py @@ -1,27 +1,9 @@ """Fetch data from Delphi's API.""" -from ._constants import __version__ -from ._model import ( - EpiRange, - EpiRangeDict, - EpiDataResponse, - EpiRangeLike, - InvalidArgumentException, - EpiRangeParam, - IntParam, - StringParam, - EpiDataFormatType, - AEpiDataCall, -) -from ._covidcast import ( - DataSignal, - DataSource, - WebLink, - DataSignalGeoStatistics, - CovidcastDataSources, - GeoType, - TimeType, -) -from ._auth import get_api_key +# Make the linter happy about the unused variables +__all__ = ["__version__", "Epidata", "CovidcastEpidata", "EpiRange"] +__author__ = "Delphi Research Group" + -__author__ = "Delphi Group" +from ._constants import __version__ +from .request import CovidcastEpidata, Epidata, EpiRange diff --git a/smoke_test.py b/smoke_test.py index 436c1e9..f6a911c 100644 --- a/smoke_test.py +++ b/smoke_test.py @@ -1,5 +1,5 @@ from datetime import date -from epidatpy.request import Epidata, EpiRange +from epidatpy import CovidcastEpidata, Epidata, EpiRange apicall = Epidata.covidcast("fb-survey", "smoothed_cli", "day", "nation", EpiRange(20210405, 20210410), "us") From 081b8def74d2a03fbb429b9aac8b43b70219d3ae Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Wed, 10 Jul 2024 21:14:24 -0700 Subject: [PATCH 07/19] fix: update argument types --- epidatpy/_covidcast.py | 32 +++++++++++-------------- epidatpy/_endpoints.py | 54 ++++++++++++++++-------------------------- epidatpy/_model.py | 49 +++++++++++++++++++------------------- epidatpy/_parse.py | 4 ++-- epidatpy/request.py | 24 +++++++++---------- 5 files changed, 72 insertions(+), 91 deletions(-) diff --git a/epidatpy/_covidcast.py b/epidatpy/_covidcast.py index 700c403..e3deacf 100644 --- a/epidatpy/_covidcast.py +++ b/epidatpy/_covidcast.py @@ -1,4 +1,4 @@ -from dataclasses import Field, InitVar, dataclass, field, fields +from dataclasses import Field, InitVar, asdict, dataclass, field, fields from functools import cached_property from typing import ( Any, @@ -24,7 +24,6 @@ CALL_TYPE, EpidataFieldInfo, EpidataFieldType, - EpiRangeLike, EpiRangeParam, InvalidArgumentException, ) @@ -86,7 +85,7 @@ class DataSignal(Generic[CALL_TYPE]): represents a COVIDcast data signal """ - _create_call: Callable[[Mapping[str, Union[None, EpiRangeLike, Iterable[EpiRangeLike]]]], CALL_TYPE] + _create_call: Callable[[Mapping[str, Optional[EpiRangeParam]]], CALL_TYPE] source: str signal: str @@ -124,7 +123,7 @@ def __post_init__(self) -> None: @staticmethod def to_df(signals: Iterable["DataSignal"]) -> DataFrame: df = DataFrame( - signals, + [asdict(s) for s in signals], columns=[ "source", "signal", @@ -155,7 +154,7 @@ def key(self) -> Tuple[str, str]: def call( self, geo_type: GeoType, - geo_values: Union[int, str, Iterable[Union[int, str]]], + geo_values: Union[str, Sequence[str]], time_values: EpiRangeParam, as_of: Union[None, str, int] = None, issues: Optional[EpiRangeParam] = None, @@ -184,7 +183,7 @@ def call( def __call__( self, geo_type: GeoType, - geo_values: Union[int, str, Iterable[Union[int, str]]], + geo_values: Union[str, Sequence[str]], time_values: EpiRangeParam, as_of: Union[None, str, int] = None, issues: Optional[EpiRangeParam] = None, @@ -200,7 +199,7 @@ class DataSource(Generic[CALL_TYPE]): represents a COVIDcast data source """ - _create_call: InitVar[Callable[[Mapping[str, Union[None, EpiRangeLike, Iterable[EpiRangeLike]]]], CALL_TYPE]] + _create_call: InitVar[Callable[[Mapping[str, Optional[EpiRangeParam]]], CALL_TYPE]] source: str db_source: str @@ -213,10 +212,7 @@ class DataSource(Generic[CALL_TYPE]): signals: Sequence[DataSignal] = field(default_factory=list) - def __post_init__( - self, - _create_call: Callable[[Mapping[str, Union[None, EpiRangeLike, Iterable[EpiRangeLike]]]], CALL_TYPE], - ) -> None: + def __post_init__(self, _create_call: Callable[[Mapping[str, Optional[EpiRangeParam]]], CALL_TYPE]) -> None: self.link = [ WebLink(alt=link["alt"], href=link["href"]) if isinstance(link, dict) else link for link in self.link ] @@ -229,7 +225,7 @@ def __post_init__( @staticmethod def to_df(sources: Iterable["DataSource"]) -> DataFrame: df = DataFrame( - sources, + [asdict(source) for source in sources], columns=[ "source", "name", @@ -262,7 +258,7 @@ class CovidcastDataSources(Generic[CALL_TYPE]): init=False, default_factory=OrderedDict ) - _create_call: Callable[[Mapping[str, Union[None, EpiRangeLike, Iterable[EpiRangeLike]]]], CALL_TYPE] + _create_call: Callable[[Mapping[str, Optional[EpiRangeParam]]], CALL_TYPE] def __post_init__(self) -> None: self._source_by_name = {s.source: s for s in self.sources} @@ -274,8 +270,8 @@ def get_source(self, source: str) -> Optional[DataSource[CALL_TYPE]]: return self._source_by_name.get(source) @property - def source_names(self) -> Iterable[str]: - return (s.source for s in self.sources) + def source_names(self) -> Sequence[str]: + return [s.source for s in self.sources] @cached_property def source_df(self) -> DataFrame: @@ -403,10 +399,10 @@ def __iter__(self) -> Iterable[DataSource[CALL_TYPE]]: return iter(self.sources) @overload - def __getitem__(self, source: str) -> DataSource[CALL_TYPE]: ... + def __getitem__(self, source: str, /) -> DataSource[CALL_TYPE]: ... @overload - def __getitem__(self, source_signal: Tuple[str, str]) -> DataSignal[CALL_TYPE]: ... + def __getitem__(self, source_signal: Tuple[str, str], /) -> DataSignal[CALL_TYPE]: ... def __getitem__( self, source_signal: Union[str, Tuple[str, str]] @@ -422,7 +418,7 @@ def __getitem__( @staticmethod def create( meta: List[Dict], - create_call: Callable[[Mapping[str, Union[None, EpiRangeLike, Iterable[EpiRangeLike]]]], CALL_TYPE], + create_call: Callable[[Mapping[str, Optional[EpiRangeParam]]], CALL_TYPE], ) -> "CovidcastDataSources": source_fields = fields(DataSource) sources = [DataSource(_create_call=create_call, **_limit_fields(k, source_fields)) for k in meta] diff --git a/epidatpy/_endpoints.py b/epidatpy/_endpoints.py index cadffba..c22943c 100644 --- a/epidatpy/_endpoints.py +++ b/epidatpy/_endpoints.py @@ -1,9 +1,13 @@ import warnings from abc import ABC, abstractmethod -from datetime import date -from typing import Generic, Iterable, Literal, Mapping, Optional, Sequence, Union - -from epiweeks import Week +from typing import ( + Generic, + Literal, + Mapping, + Optional, + Sequence, + Union, +) from ._covidcast import GeoType, TimeType, define_covidcast_fields from ._model import ( @@ -12,16 +16,18 @@ EpidataFieldInfo, EpidataFieldType, EpiRange, - EpiRangeLike, EpiRangeParam, IntParam, InvalidArgumentException, + ParamType, StringParam, + format_epiweek, ) +from ._parse import parse_api_week -def get_wildcard_equivalent_dates(time_value: str, time_type: Literal["day", "week"]) -> str: - if time_value == "*": +def get_wildcard_equivalent_dates(time_value: EpiRangeParam, time_type: Literal["day", "week"]) -> EpiRangeParam: + if isinstance(time_value, str) and time_value == "*": if time_type == "day": return EpiRange("10000101", "30000101") if time_type == "week": @@ -29,11 +35,8 @@ def get_wildcard_equivalent_dates(time_value: str, time_type: Literal["day", "we return time_value -def reformat_epirange(epirange: EpiRange, to_type: str) -> EpiRange: +def reformat_epirange(epirange: EpiRange, to_type: Literal["day", "week"]) -> EpiRange: """Reformat from week to day or vice versa or noop.""" - if to_type not in ("day", "week"): - raise InvalidArgumentException("`to_type` must be 'day' or 'week'") - if to_type == "day" and isinstance(epirange.start, (str, int)) and len(str(epirange.start)) == 6: coercion_msg = ( "`collection_weeks` is in week format but `pub_covid_hosp_facility`" @@ -48,14 +51,6 @@ def reformat_epirange(epirange: EpiRange, to_type: str) -> EpiRange: return epirange -def parse_api_week(value: Union[str, int]) -> date: - return Week.fromstring(str(value)).startdate() - - -def format_epiweek(value: Union[str, int]) -> str: - return Week.fromstring(str(value)).cdcformat() - - class AEpiDataEndpoints(ABC, Generic[CALL_TYPE]): """ epidata endpoint list and fetcher @@ -69,7 +64,7 @@ def range(from_: EPI_RANGE_TYPE, to: EPI_RANGE_TYPE) -> EpiRange[EPI_RANGE_TYPE] def _create_call( self, endpoint: str, - params: Mapping[str, Union[None, EpiRangeLike, Iterable[EpiRangeLike]]], + params: Mapping[str, Optional[ParamType]], meta: Optional[Sequence[EpidataFieldInfo]] = None, only_supports_classic: bool = False, ) -> CALL_TYPE: @@ -145,7 +140,7 @@ def pub_covid_hosp_facility_lookup( def pub_covid_hosp_facility( self, hospital_pks: StringParam, - collection_weeks: StringParam = "*", + collection_weeks: EpiRangeParam = "*", publication_dates: Optional[EpiRangeParam] = None, ) -> CALL_TYPE: """Fetch COVID hospitalization data for specific facilities.""" @@ -468,7 +463,7 @@ def pub_covidcast( signals: StringParam, geo_type: GeoType, time_type: TimeType, - geo_values: Union[int, str, Iterable[Union[int, str]]] = "*", + geo_values: Union[str, Sequence[str]] = "*", time_values: EpiRangeParam = "*", as_of: Union[None, str, int] = None, issues: Optional[EpiRangeParam] = None, @@ -984,13 +979,10 @@ def pvt_twitter( self, auth: str, locations: StringParam, - time_type: str, - time_values: EpiRangeLike = "*", + time_type: Literal["day", "week"], + time_values: EpiRangeParam = "*", ) -> CALL_TYPE: """Fetch HealthTweets data.""" - if time_type not in ["day", "week"]: - raise InvalidArgumentException("`time_type` must be 'day' or 'week'") - if time_type == "day": dates = time_values epiweeks = None @@ -1032,16 +1024,12 @@ def pvt_twitter( def pub_wiki( self, articles: StringParam, - time_type: str, - time_values: EpiRangeLike = "*", + time_type: Literal["day", "week"], + time_values: EpiRangeParam = "*", hours: Optional[IntParam] = None, language: str = "en", ) -> CALL_TYPE: """Fetch Wikipedia access data.""" - - if time_type not in ["day", "week"]: - raise InvalidArgumentException("`time_type` must be 'day' or 'week'") - if time_type == "day": dates = time_values epiweeks = None diff --git a/epidatpy/_model.py b/epidatpy/_model.py index 9021230..c1447ce 100644 --- a/epidatpy/_model.py +++ b/epidatpy/_model.py @@ -6,7 +6,6 @@ Dict, Final, Generic, - Iterable, List, Mapping, Optional, @@ -20,7 +19,7 @@ from urllib.parse import urlencode from epiweeks import Week -from pandas import CategoricalDtype, DataFrame +from pandas import CategoricalDtype, DataFrame, Series from ._parse import ( fields_to_predicate, @@ -32,6 +31,13 @@ EpiDateLike = Union[int, str, date, Week] EpiRangeDict = TypedDict("EpiRangeDict", {"from": EpiDateLike, "to": EpiDateLike}) EpiRangeLike = Union[int, str, "EpiRange", EpiRangeDict, date, Week] +EpiRangeParam = Union[EpiRangeLike, Sequence[EpiRangeLike]] +StringParam = Union[str, Sequence[str]] +IntParam = Union[int, Sequence[int]] +EpiDataResponse = TypedDict("EpiDataResponse", {"result": int, "message": str, "epidata": List}) +ParamType = Union[StringParam, IntParam, EpiRangeParam] +EPI_RANGE_TYPE = TypeVar("EPI_RANGE_TYPE", int, date, str, Week) +CALL_TYPE = TypeVar("CALL_TYPE") def format_date(d: EpiDateLike) -> str: @@ -57,13 +63,15 @@ def format_item(value: EpiRangeLike) -> str: return str(value) -def format_list(values: Union[EpiRangeLike, Iterable[EpiRangeLike]]) -> str: +def format_list(values: EpiRangeParam) -> str: """Turn a list/tuple of values/ranges into a comma-separated string.""" - list_values = values if isinstance(values, (list, tuple, set)) else [values] - return ",".join([format_item(value) for value in list_values]) + if isinstance(values, Sequence) and not isinstance(values, str): + return ",".join([format_item(value) for value in values]) + return format_item(values) -EPI_RANGE_TYPE = TypeVar("EPI_RANGE_TYPE", int, date, str, Week) +def format_epiweek(value: Union[str, int]) -> str: + return Week.fromstring(str(value)).cdcformat() @dataclass(repr=False) @@ -88,14 +96,6 @@ def __str__(self) -> str: return f"{format_date(self.start)}-{format_date(self.end)}" -EpiDataResponse = TypedDict("EpiDataResponse", {"result": int, "message": str, "epidata": List}) - - -EpiRangeParam = Union[EpiRangeLike, Iterable[EpiRangeLike]] -StringParam = Union[str, Iterable[str]] -IntParam = Union[int, Iterable[int]] - - class EpiDataFormatType(str, Enum): """ possible formatting options for API calls @@ -146,9 +146,6 @@ class EpidataFieldInfo: categories: Final[Sequence[str]] = field(default_factory=list) -CALL_TYPE = TypeVar("CALL_TYPE") - - def add_endpoint_to_url(url: str, endpoint: str) -> str: if not url.endswith("/"): url += "/" @@ -163,7 +160,7 @@ class AEpiDataCall: _base_url: Final[str] _endpoint: Final[str] - _params: Final[Mapping[str, Union[None, EpiRangeLike, Iterable[EpiRangeLike]]]] + _params: Final[Mapping[str, Optional[EpiRangeParam]]] meta: Final[Sequence[EpidataFieldInfo]] meta_by_name: Final[Mapping[str, EpidataFieldInfo]] only_supports_classic: Final[bool] @@ -172,7 +169,7 @@ def __init__( self, base_url: str, endpoint: str, - params: Mapping[str, Union[None, EpiRangeLike, Iterable[EpiRangeLike]]], + params: Mapping[str, Optional[EpiRangeParam]], meta: Optional[Sequence[EpidataFieldInfo]] = None, only_supports_classic: bool = False, ) -> None: @@ -190,7 +187,7 @@ def _verify_parameters(self) -> None: def _formatted_parameters( self, format_type: Optional[EpiDataFormatType] = None, - fields: Optional[Iterable[str]] = None, + fields: Optional[Sequence[str]] = None, ) -> Mapping[str, str]: """ format this call into a [URL, Params] tuple @@ -205,7 +202,7 @@ def _formatted_parameters( def request_arguments( self, format_type: Optional[EpiDataFormatType] = None, - fields: Optional[Iterable[str]] = None, + fields: Optional[Sequence[str]] = None, ) -> Tuple[str, Mapping[str, str]]: """ format this call into a [URL, Params] tuple @@ -223,7 +220,7 @@ def _full_url(self) -> str: def request_url( self, format_type: Optional[EpiDataFormatType] = None, - fields: Optional[Iterable[str]] = None, + fields: Optional[Sequence[str]] = None, ) -> str: """ format this call into a full HTTP request url with encoded parameters @@ -272,7 +269,7 @@ def _parse_row( def _as_df( self, rows: Sequence[Mapping[str, Union[str, float, int, date, None]]], - fields: Optional[Iterable[str]] = None, + fields: Optional[Sequence[str]] = None, disable_date_parsing: Optional[bool] = False, ) -> DataFrame: pred = fields_to_predicate(fields) @@ -281,12 +278,14 @@ def _as_df( data_types: Dict[str, Any] = {} for info in self.meta: - if not pred(info.name) or df[info.name].isnull().values.all(): + if not pred(info.name) or df[info.name].isnull().all(): continue if info.type == EpidataFieldType.bool: data_types[info.name] = bool elif info.type == EpidataFieldType.categorical: - data_types[info.name] = CategoricalDtype(categories=info.categories or None, ordered=True) + data_types[info.name] = CategoricalDtype( + categories=Series(info.categories) if info.categories else None, ordered=True + ) elif info.type == EpidataFieldType.int: data_types[info.name] = int elif info.type in ( diff --git a/epidatpy/_parse.py b/epidatpy/_parse.py index fdeeb54..a275376 100644 --- a/epidatpy/_parse.py +++ b/epidatpy/_parse.py @@ -1,5 +1,5 @@ from datetime import date, datetime -from typing import Callable, Iterable, Optional, Set, Union, cast +from typing import Callable, Optional, Sequence, Set, Union, cast from epiweeks import Week @@ -29,7 +29,7 @@ def parse_api_date_or_week(value: Union[str, int, float, None]) -> Optional[date def fields_to_predicate( - fields: Optional[Iterable[str]] = None, + fields: Optional[Sequence[str]] = None, ) -> Callable[[str], bool]: if not fields: return lambda _: True diff --git a/epidatpy/request.py b/epidatpy/request.py index 1e7ca88..455d516 100644 --- a/epidatpy/request.py +++ b/epidatpy/request.py @@ -1,9 +1,6 @@ from datetime import date -from json import loads from typing import ( Final, - Generator, - Iterable, List, Mapping, Optional, @@ -26,13 +23,14 @@ EpidataFieldInfo, EpiDataFormatType, EpiDataResponse, - EpiRangeLike, + EpiRange, + EpiRangeParam, OnlySupportsClassicFormatException, add_endpoint_to_url, ) # Make the linter happy about the unused variables -__all__ = ["Epidata", "EpiDataCall", "EpiDataContext", "CovidcastEpidata"] +__all__ = ["Epidata", "EpiDataCall", "EpiDataContext", "EpiRange", "CovidcastEpidata"] @retry(reraise=True, stop=stop_after_attempt(2)) @@ -70,7 +68,7 @@ def __init__( base_url: str, session: Optional[Session], endpoint: str, - params: Mapping[str, Union[None, EpiRangeLike, Iterable[EpiRangeLike]]], + params: Mapping[str, Optional[EpiRangeParam]], meta: Optional[Sequence[EpidataFieldInfo]] = None, only_supports_classic: bool = False, ) -> None: @@ -86,7 +84,7 @@ def with_session(self, session: Session) -> "EpiDataCall": def _call( self, format_type: Optional[EpiDataFormatType] = None, - fields: Optional[Iterable[str]] = None, + fields: Optional[Sequence[str]] = None, stream: bool = False, ) -> Response: url, params = self.request_arguments(format_type, fields) @@ -94,7 +92,7 @@ def _call( def classic( self, - fields: Optional[Iterable[str]] = None, + fields: Optional[Sequence[str]] = None, disable_date_parsing: Optional[bool] = False, ) -> EpiDataResponse: """Request and parse epidata in CLASSIC message format.""" @@ -111,7 +109,7 @@ def classic( def __call__( self, - fields: Optional[Iterable[str]] = None, + fields: Optional[Sequence[str]] = None, disable_date_parsing: Optional[bool] = False, ) -> EpiDataResponse: """Request and parse epidata in CLASSIC message format.""" @@ -119,7 +117,7 @@ def __call__( def json( self, - fields: Optional[Iterable[str]] = None, + fields: Optional[Sequence[str]] = None, disable_date_parsing: Optional[bool] = False, ) -> List[Mapping[str, Union[str, int, float, date, None]]]: """Request and parse epidata in JSON format""" @@ -135,7 +133,7 @@ def json( def df( self, - fields: Optional[Iterable[str]] = None, + fields: Optional[Sequence[str]] = None, disable_date_parsing: Optional[bool] = False, ) -> DataFrame: """Request and parse epidata as a pandas data frame""" @@ -197,7 +195,7 @@ def with_session(self, session: Session) -> "EpiDataContext": def _create_call( self, endpoint: str, - params: Mapping[str, Union[None, EpiRangeLike, Iterable[EpiRangeLike]]], + params: Mapping[str, Optional[EpiRangeParam]], meta: Optional[Sequence[EpidataFieldInfo]] = None, only_supports_classic: bool = False, ) -> EpiDataCall: @@ -214,7 +212,7 @@ def CovidcastEpidata(base_url: str = BASE_URL, session: Optional[Session] = None meta_data = meta_data_res.json() def create_call( - params: Mapping[str, Union[None, EpiRangeLike, Iterable[EpiRangeLike]]], + params: Mapping[str, Optional[EpiRangeParam]], ) -> EpiDataCall: return EpiDataCall(base_url, session, "covidcast", params, define_covidcast_fields()) From caedb4fdfa2d95959095dba99bad90d4837e44c2 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Wed, 10 Jul 2024 21:14:54 -0700 Subject: [PATCH 08/19] refactor: remove bloat functions bloat refactor: remove bloat function and change repr refactor: convenience functions for CovidcastEpidata --- epidatpy/_covidcast.py | 35 +++++++++++------------------------ epidatpy/_endpoints.py | 5 ----- epidatpy/_model.py | 12 +++--------- smoke_test.py | 7 +++++++ 4 files changed, 21 insertions(+), 38 deletions(-) diff --git a/epidatpy/_covidcast.py b/epidatpy/_covidcast.py index e3deacf..579de04 100644 --- a/epidatpy/_covidcast.py +++ b/epidatpy/_covidcast.py @@ -144,8 +144,8 @@ def to_df(signals: Iterable["DataSignal"]) -> DataFrame: "has_sample_size", ], ) - df.insert(6, "geo_types", [",".join(s.geo_types.keys()) for s in signals]) - return df.set_index(["source", "signal"]) + df["geo_types"] = [",".join(s.geo_types.keys()) for s in signals] + return df @property def key(self) -> Tuple[str, str]: @@ -236,7 +236,7 @@ def to_df(sources: Iterable["DataSource"]) -> DataFrame: ], ) df["signals"] = [",".join(ss.signal for ss in s.signals) for s in sources] - return df.set_index("source") + return df def get_signal(self, signal: str) -> Optional[DataSignal]: return next((s for s in self.signals if s.signal == signal), None) @@ -266,13 +266,14 @@ def __post_init__(self) -> None: for signal in source.signals: self._signals_by_key[signal.key] = signal - def get_source(self, source: str) -> Optional[DataSource[CALL_TYPE]]: - return self._source_by_name.get(source) - - @property def source_names(self) -> Sequence[str]: return [s.source for s in self.sources] + def signal_names(self, source: Optional[str] = None) -> Sequence[str]: + if not source: + return [x.signal for src in self._source_by_name.values() for x in src.signals] + return [s.signal for s in self._source_by_name[source].signals] + @cached_property def source_df(self) -> DataFrame: """Fetch metadata about available covidcast sources. @@ -306,10 +307,6 @@ def source_df(self) -> DataFrame: """ return DataSource.to_df(self.sources) - @property - def signals(self) -> Iterable[DataSignal[CALL_TYPE]]: - return self._signals_by_key.values() - @cached_property def signal_df(self) -> DataFrame: """Fetch metadata about available covidcast signals. @@ -386,17 +383,7 @@ def signal_df(self) -> DataFrame: ``has_sample_size`` Whether the signal has `sample_size` statistic. """ - return DataSignal.to_df(self.signals) - - def get_signal(self, source: str, signal: str) -> Optional[DataSignal[CALL_TYPE]]: - return self._signals_by_key.get((source, signal)) - - @property - def signal_names(self) -> Iterable[Tuple[str, str]]: - return self._signals_by_key.keys() - - def __iter__(self) -> Iterable[DataSource[CALL_TYPE]]: - return iter(self.sources) + return DataSignal.to_df(self._signals_by_key.values()) @overload def __getitem__(self, source: str, /) -> DataSource[CALL_TYPE]: ... @@ -408,10 +395,10 @@ def __getitem__( self, source_signal: Union[str, Tuple[str, str]] ) -> Union[DataSource[CALL_TYPE], DataSignal[CALL_TYPE]]: if isinstance(source_signal, str): - r = self.get_source(source_signal) + r = self._source_by_name.get(source_signal) assert r is not None return r - s = self.get_signal(source_signal[0], source_signal[1]) + s = self._signals_by_key.get((source_signal[0], source_signal[1])) assert s is not None return s diff --git a/epidatpy/_endpoints.py b/epidatpy/_endpoints.py index c22943c..9467da6 100644 --- a/epidatpy/_endpoints.py +++ b/epidatpy/_endpoints.py @@ -12,7 +12,6 @@ from ._covidcast import GeoType, TimeType, define_covidcast_fields from ._model import ( CALL_TYPE, - EPI_RANGE_TYPE, EpidataFieldInfo, EpidataFieldType, EpiRange, @@ -56,10 +55,6 @@ class AEpiDataEndpoints(ABC, Generic[CALL_TYPE]): epidata endpoint list and fetcher """ - @staticmethod - def range(from_: EPI_RANGE_TYPE, to: EPI_RANGE_TYPE) -> EpiRange[EPI_RANGE_TYPE]: - return EpiRange[EPI_RANGE_TYPE](from_, to) - @abstractmethod def _create_call( self, diff --git a/epidatpy/_model.py b/epidatpy/_model.py index c1447ce..25fd9d7 100644 --- a/epidatpy/_model.py +++ b/epidatpy/_model.py @@ -208,15 +208,9 @@ def request_arguments( format this call into a [URL, Params] tuple """ formatted_params = self._formatted_parameters(format_type, fields) - full_url = self._full_url() + full_url = add_endpoint_to_url(self._base_url, self._endpoint) return full_url, formatted_params - def _full_url(self) -> str: - """ - combines the endpoint with the given base url - """ - return add_endpoint_to_url(self._base_url, self._endpoint) - def request_url( self, format_type: Optional[EpiDataFormatType] = None, @@ -233,10 +227,10 @@ def request_url( return u def __repr__(self) -> str: - return f"EpiDataCall(endpoint={self._endpoint}, params={self._formatted_parameters()})" + return str(self) def __str__(self) -> str: - return self.request_url() + return f"EpiDataCall(endpoint={self._endpoint}, params={self._formatted_parameters()})" def _parse_value( self, diff --git a/smoke_test.py b/smoke_test.py index f6a911c..2a184a2 100644 --- a/smoke_test.py +++ b/smoke_test.py @@ -3,7 +3,10 @@ apicall = Epidata.covidcast("fb-survey", "smoothed_cli", "day", "nation", EpiRange(20210405, 20210410), "us") +# Call info print(apicall) +# URL +print(apicall.request_url()) classic = apicall.classic() print(classic) @@ -18,6 +21,10 @@ print(df.columns) print(df.dtypes) print(df.iloc[0]) +print(df) +# Classic +classic = apicall.classic() +# DataFrame df = apicall.df(disable_date_parsing=True) print(df.columns) print(df.dtypes) From a6a1c984dbfc8f4fc9e0911f0f5406164768fb97 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Wed, 10 Jul 2024 21:17:45 -0700 Subject: [PATCH 09/19] refactor: remove endpoint arg None checking --- epidatpy/_endpoints.py | 101 ++++------------------------------------- 1 file changed, 9 insertions(+), 92 deletions(-) diff --git a/epidatpy/_endpoints.py b/epidatpy/_endpoints.py index 9467da6..6ae5677 100644 --- a/epidatpy/_endpoints.py +++ b/epidatpy/_endpoints.py @@ -74,9 +74,6 @@ def pvt_cdc( """Fetch CDC page hits.""" epiweeks = get_wildcard_equivalent_dates(epiweeks, "day") - if auth is None or epiweeks is None or locations is None: - raise InvalidArgumentException("`auth`, `epiweeks`, and `locations` are all required") - return self._create_call( "cdc/", {"auth": auth, "epiweeks": epiweeks, "locations": locations}, @@ -140,18 +137,15 @@ def pub_covid_hosp_facility( ) -> CALL_TYPE: """Fetch COVID hospitalization data for specific facilities.""" - if hospital_pks is None or collection_weeks is None: - raise InvalidArgumentException("`hospital_pks` and `collection_weeks` are both required") - collection_weeks = get_wildcard_equivalent_dates(collection_weeks, "day") # Confusingly, the endpoint expects `collection_weeks` to be in day format, # but correspond to epiweeks. Allow `collection_weeks` to be provided in # either day or week format. if isinstance(collection_weeks, EpiRange): - collection_weeks = reformat_epirange(collection_weeks, to_type="day") + parsed_weeks = reformat_epirange(collection_weeks, to_type="day") elif isinstance(collection_weeks, (str, int)): - collection_weeks = parse_api_week(collection_weeks) + parsed_weeks = parse_api_week(collection_weeks) fields_string = [ "hospital_pk", @@ -255,7 +249,7 @@ def pub_covid_hosp_facility( "covid_hosp_facility/", { "hospital_pks": hospital_pks, - "collection_weeks": collection_weeks, + "collection_weeks": parsed_weeks, "publication_dates": publication_dates, }, [ @@ -277,9 +271,6 @@ def pub_covid_hosp_state_timeseries( ) -> CALL_TYPE: """Fetch COVID hospitalization data.""" - if states is None or dates is None: - raise InvalidArgumentException("`states` and `dates` are both required") - if issues is not None and as_of is not None: raise InvalidArgumentException("`issues` and `as_of` are mutually exclusive") @@ -465,20 +456,6 @@ def pub_covidcast( lag: Optional[int] = None, ) -> CALL_TYPE: """Fetch Delphi's COVID-19 Surveillance Streams""" - if any( - v is None - for v in ( - data_source, - signals, - time_type, - geo_type, - time_values, - geo_values, - ) - ): - raise InvalidArgumentException( - "`data_source`, `signals`, `time_type`, `geo_type`, `time_values`, and `geo_values` are all required." - ) if sum([issues is not None, lag is not None, as_of is not None]) > 1: raise InvalidArgumentException("`issues`, `lag`, and `as_of` are mutually exclusive.") @@ -504,8 +481,6 @@ def pub_covidcast( def pub_delphi(self, system: str, epiweek: Union[int, str]) -> CALL_TYPE: """Fetch Delphi's forecast.""" - if system is None or epiweek is None: - raise InvalidArgumentException("`system` and `epiweek` are both required") return self._create_call( "delphi/", {"system": system, "epiweek": epiweek}, @@ -521,9 +496,6 @@ def pub_dengue_nowcast(self, locations: StringParam, epiweeks: EpiRangeParam = " """Fetch Delphi's dengue nowcast.""" epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") - if locations is None or epiweeks is None: - raise InvalidArgumentException("`locations` and `epiweeks` are both required") - return self._create_call( "dengue_nowcast/", {"locations": locations, "epiweeks": epiweeks}, @@ -545,9 +517,6 @@ def pvt_dengue_sensors( """Fetch Delphi's digital surveillance sensors.""" epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") - if auth is None or names is None or locations is None or epiweeks is None: - raise InvalidArgumentException("`auth`, `names`, `locations`, and `epiweeks` are all required") - return self._create_call( "dengue_sensors/", { @@ -574,9 +543,6 @@ def pub_ecdc_ili( """Fetch ECDC ILI data.""" epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") - if regions is None or epiweeks is None: - raise InvalidArgumentException("`regions` and `epiweeks` are both required") - if issues is not None and lag is not None: raise InvalidArgumentException("`issues` and `lag` are mutually exclusive") @@ -603,9 +569,6 @@ def pub_flusurv( """Fetch FluSurv data.""" epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") - if locations is None or epiweeks is None: - raise InvalidArgumentException("`locations` and `epiweeks` are both required") - if issues is not None and lag is not None: raise InvalidArgumentException("`issues` and `lag` are mutually exclusive") @@ -642,9 +605,6 @@ def pub_fluview_clinical( """Fetch FluView clinical data.""" epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") - if regions is None or epiweeks is None: - raise InvalidArgumentException("`regions` and `epiweeks` are both required") - if issues is not None and lag is not None: raise InvalidArgumentException("`issues` and `lag` are mutually exclusive") @@ -687,9 +647,6 @@ def pub_fluview( ) -> CALL_TYPE: epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") - if regions is None or epiweeks is None: - raise InvalidArgumentException("`regions` and `epiweeks` are both required") - if issues is not None and lag is not None: raise InvalidArgumentException("`issues` and `lag` are mutually exclusive") @@ -725,9 +682,6 @@ def pub_gft(self, locations: StringParam, epiweeks: EpiRangeParam = "*") -> CALL """Fetch Google Flu Trends data.""" epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") - if locations is None or epiweeks is None: - raise InvalidArgumentException("`locations` and `epiweeks` are both required") - return self._create_call( "gft/", {"locations": locations, "epiweeks": epiweeks}, @@ -746,7 +700,7 @@ def pvt_ght( query: str = "", ) -> CALL_TYPE: """Fetch Google Health Trends data.""" - if auth is None or locations is None or epiweeks is None or query == "": + if auth is None or locations is None or query == "": raise InvalidArgumentException("`auth`, `locations`, `epiweeks`, and `query` are all required") return self._create_call( @@ -774,8 +728,6 @@ def pub_kcdc_ili( """Fetch KCDC ILI data.""" epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") - if regions is None or epiweeks is None: - raise InvalidArgumentException("`regions` and `epiweeks` are both required") if issues is not None and lag is not None: raise InvalidArgumentException("`issues` and `lag` are mutually exclusive") @@ -794,8 +746,6 @@ def pub_kcdc_ili( def pvt_meta_norostat(self, auth: str) -> CALL_TYPE: """Fetch NoroSTAT metadata.""" - if auth is None: - raise InvalidArgumentException("`auth` is required") return self._create_call( "meta_norostat/", {"auth": auth}, @@ -814,9 +764,6 @@ def pub_nidss_dengue(self, locations: StringParam, epiweeks: EpiRangeParam = "*" """Fetch NIDSS dengue data.""" epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") - if locations is None or epiweeks is None: - raise InvalidArgumentException("`locations` and `epiweeks` are both required") - return self._create_call( "nidss_dengue/", {"locations": locations, "epiweeks": epiweeks}, @@ -837,8 +784,6 @@ def pub_nidss_flu( """Fetch NIDSS flu data.""" epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") - if regions is None or epiweeks is None: - raise InvalidArgumentException("`regions` and `epiweeks` are both required") if issues is not None and lag is not None: raise InvalidArgumentException("`issues` and `lag` are mutually exclusive") @@ -860,9 +805,6 @@ def pvt_norostat(self, auth: str, location: str, epiweeks: EpiRangeParam = "*") """Fetch NoroSTAT data (point data, no min/max).""" epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") - if auth is None or location is None or epiweeks is None: - raise InvalidArgumentException("`auth`, `location`, and `epiweeks` are all required") - return self._create_call( "norostat/", {"auth": auth, "epiweeks": epiweeks, "location": location}, @@ -877,9 +819,6 @@ def pub_nowcast(self, locations: StringParam, epiweeks: EpiRangeParam = "*") -> """Fetch Delphi's wILI nowcast.""" epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") - if locations is None or epiweeks is None: - raise InvalidArgumentException("`locations` and `epiweeks` are both required") - return self._create_call( "nowcast/", {"locations": locations, "epiweeks": epiweeks}, @@ -901,8 +840,6 @@ def pub_paho_dengue( """Fetch PAHO Dengue data.""" epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") - if regions is None or epiweeks is None: - raise InvalidArgumentException("`regions` and `epiweeks` are both required") if issues is not None and lag is not None: raise InvalidArgumentException("`issues` and `lag` are mutually exclusive") @@ -928,9 +865,6 @@ def pvt_quidel(self, auth: str, locations: StringParam, epiweeks: EpiRangeParam """Fetch Quidel data.""" epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") - if auth is None or epiweeks is None or locations is None: - raise InvalidArgumentException("`auth`, `epiweeks`, and `locations` are all required") - return self._create_call( "quidel/", {"auth": auth, "epiweeks": epiweeks, "locations": locations}, @@ -951,9 +885,6 @@ def pvt_sensors( """Fetch Delphi's digital surveillance sensors.""" epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") - if auth is None or names is None or locations is None or epiweeks is None: - raise InvalidArgumentException("`auth`, `names`, `locations`, and `epiweeks` are all required") - return self._create_call( "sensors/", { @@ -987,12 +918,6 @@ def pvt_twitter( dates = None epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") - if auth is None or locations is None: - raise InvalidArgumentException("`auth` and `locations` are both required") - - if not (dates is None) ^ (epiweeks is None): - raise InvalidArgumentException("exactly one of `dates` and `epiweeks` is required") - time_field = ( EpidataFieldInfo("date", EpidataFieldType.date) if dates @@ -1034,18 +959,6 @@ def pub_wiki( dates = None epiweeks = get_wildcard_equivalent_dates(epiweeks, "week") - if articles is None: - raise InvalidArgumentException("`articles` is required") - - if not (dates is None) ^ (epiweeks is None): - raise InvalidArgumentException("exactly one of `dates` and `epiweeks` is required") - - time_field = ( - EpidataFieldInfo("date", EpidataFieldType.date) - if dates - else EpidataFieldInfo("epiweek", EpidataFieldType.epiweek) - ) - return self._create_call( "wiki/", { @@ -1057,7 +970,11 @@ def pub_wiki( }, [ EpidataFieldInfo("article", EpidataFieldType.text), - time_field, + ( + EpidataFieldInfo("date", EpidataFieldType.date) + if dates + else EpidataFieldInfo("epiweek", EpidataFieldType.epiweek) + ), EpidataFieldInfo("count", EpidataFieldType.int), EpidataFieldInfo("total", EpidataFieldType.int), EpidataFieldInfo("hour", EpidataFieldType.int), From b461c00f1cf4e6e3c8bbbbcb39baccfbf3ffab0f Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Wed, 10 Jul 2024 21:17:59 -0700 Subject: [PATCH 10/19] refactor: merge smoke test file --- smoke_covid_test.py | 31 ------------------------------- smoke_test.py | 41 +++++++++++++++++++++++++++++++++-------- 2 files changed, 33 insertions(+), 39 deletions(-) delete mode 100644 smoke_covid_test.py diff --git a/smoke_covid_test.py b/smoke_covid_test.py deleted file mode 100644 index 88e216d..0000000 --- a/smoke_covid_test.py +++ /dev/null @@ -1,31 +0,0 @@ -from epidatpy.request import CovidcastEpidata, EpiRange - -epidata = CovidcastEpidata() -print(list(epidata.source_names)) -apicall = epidata[("fb-survey", "smoothed_cli")].call( - "nation", - "us", - EpiRange(20210405, 20210410), -) -print(apicall) - -classic = apicall.classic() -print(classic) - -r = apicall.csv() -print(r[0:100]) - -data = apicall.json() -print(data[0]) - -df = apicall.df() -print(df.columns) -print(df.dtypes) -print(df.iloc[0]) -df = apicall.df(disable_date_parsing=True) -print(df.columns) -print(df.dtypes) -print(df.iloc[0]) - -for row in apicall.iter(): - print(row) diff --git a/smoke_test.py b/smoke_test.py index 2a184a2..d767828 100644 --- a/smoke_test.py +++ b/smoke_test.py @@ -1,7 +1,9 @@ from datetime import date + from epidatpy import CovidcastEpidata, Epidata, EpiRange -apicall = Epidata.covidcast("fb-survey", "smoothed_cli", "day", "nation", EpiRange(20210405, 20210410), "us") +print("Epidata Test") +apicall = Epidata.pub_covidcast("fb-survey", "smoothed_cli", "nation", "day", "us", EpiRange(20210405, 20210410)) # Call info print(apicall) @@ -11,9 +13,6 @@ classic = apicall.classic() print(classic) -r = apicall.csv() -print(r[0:100]) - data = apicall.json() print(data[0]) @@ -30,12 +29,38 @@ print(df.dtypes) print(df.iloc[0]) -for row in apicall.iter(): - print(row) StagingEpidata = Epidata.with_base_url("https://staging.delphi.cmu.edu/epidata/") -epicall = StagingEpidata.covidcast( - "fb-survey", "smoothed_cli", "day", "nation", EpiRange(date(2021, 4, 5), date(2021, 4, 10)), "*" +epicall = StagingEpidata.pub_covidcast( + "fb-survey", "smoothed_cli", "nation", "day", "*", EpiRange(date(2021, 4, 5), date(2021, 4, 10)) ) print(epicall._base_url) + + +# Covidcast test +print("Covidcast Test") +epidata = CovidcastEpidata() +print(epidata.source_names) +epidata["fb-survey"].signal_df +apicall = epidata[("fb-survey", "smoothed_cli")].call( + "nation", + "us", + EpiRange(20210405, 20210410), +) +print(apicall) + +classic = apicall.classic() +print(classic) + +data = apicall.json() +print(data[0]) + +df = apicall.df() +print(df.columns) +print(df.dtypes) +print(df.iloc[0]) +df = apicall.df(disable_date_parsing=True) +print(df.columns) +print(df.dtypes) +print(df.iloc[0]) From b773b3fcd9cabcb16d1212d8e899a45d5fcfde87 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Wed, 10 Jul 2024 16:05:47 -0700 Subject: [PATCH 11/19] fix: validate covid_hosp_facility date arg --- epidatpy/_endpoints.py | 47 +++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/epidatpy/_endpoints.py b/epidatpy/_endpoints.py index 6ae5677..1caf928 100644 --- a/epidatpy/_endpoints.py +++ b/epidatpy/_endpoints.py @@ -9,6 +9,8 @@ Union, ) +from epiweeks import Week + from ._covidcast import GeoType, TimeType, define_covidcast_fields from ._model import ( CALL_TYPE, @@ -20,9 +22,8 @@ InvalidArgumentException, ParamType, StringParam, - format_epiweek, ) -from ._parse import parse_api_week +from ._parse import parse_user_date_or_week def get_wildcard_equivalent_dates(time_value: EpiRangeParam, time_type: Literal["day", "week"]) -> EpiRangeParam: @@ -34,22 +35,6 @@ def get_wildcard_equivalent_dates(time_value: EpiRangeParam, time_type: Literal[ return time_value -def reformat_epirange(epirange: EpiRange, to_type: Literal["day", "week"]) -> EpiRange: - """Reformat from week to day or vice versa or noop.""" - if to_type == "day" and isinstance(epirange.start, (str, int)) and len(str(epirange.start)) == 6: - coercion_msg = ( - "`collection_weeks` is in week format but `pub_covid_hosp_facility`" - "expects day format; dates will be converted to day format but may not" - "correspond exactly to desired time range" - ) - warnings.warn(coercion_msg, UserWarning) - epirange = EpiRange(parse_api_week(epirange.start), parse_api_week(epirange.end)) - elif to_type == "week" and isinstance(epirange.start, (int, str)) and len(str(epirange.start)) == 8: - epirange = EpiRange(format_epiweek(epirange.start), format_epiweek(epirange.end)) - - return epirange - - class AEpiDataEndpoints(ABC, Generic[CALL_TYPE]): """ epidata endpoint list and fetcher @@ -141,11 +126,27 @@ def pub_covid_hosp_facility( # Confusingly, the endpoint expects `collection_weeks` to be in day format, # but correspond to epiweeks. Allow `collection_weeks` to be provided in - # either day or week format. - if isinstance(collection_weeks, EpiRange): - parsed_weeks = reformat_epirange(collection_weeks, to_type="day") - elif isinstance(collection_weeks, (str, int)): - parsed_weeks = parse_api_week(collection_weeks) + # either day or week format and convert to day format. + parsed_weeks = collection_weeks + if isinstance(collection_weeks, EpiRange) and isinstance(collection_weeks.start, Week): + warnings.warn( + "`collection_weeks` is in week format but `pub_covid_hosp_facility`" + "expects day format; dates will be converted to day format but may not" + "correspond exactly to desired time range", + UserWarning, + ) + parsed_weeks = EpiRange( + parse_user_date_or_week(collection_weeks.start, "day"), + parse_user_date_or_week(collection_weeks.end, "day"), + ) + elif isinstance(collection_weeks, (str, int)) and len(str(collection_weeks)) == 6: + warnings.warn( + "`collection_weeks` is in week format but `pub_covid_hosp_facility`" + "expects day format; dates will be converted to day format but may not" + "correspond exactly to desired time range", + UserWarning, + ) + parsed_weeks = parse_user_date_or_week(collection_weeks, "day") fields_string = [ "hospital_pk", From 617b1ff6c065c86172ccdfbc8394aa51a11c2f73 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Wed, 10 Jul 2024 16:04:50 -0700 Subject: [PATCH 12/19] refactor: add parse_user_date_or_week, remove unneeded cast --- epidatpy/_parse.py | 47 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 6 deletions(-) diff --git a/epidatpy/_parse.py b/epidatpy/_parse.py index a275376..9d65d72 100644 --- a/epidatpy/_parse.py +++ b/epidatpy/_parse.py @@ -1,5 +1,5 @@ from datetime import date, datetime -from typing import Callable, Optional, Sequence, Set, Union, cast +from typing import Callable, Literal, Optional, Sequence, Set, Union from epiweeks import Week @@ -14,7 +14,7 @@ def parse_api_date(value: Union[str, int, float, None]) -> Optional[date]: def parse_api_week(value: Union[str, int, float, None]) -> Optional[date]: if value is None: return None - return cast(date, Week.fromstring(str(value)).startdate()) + return Week.fromstring(str(value)).startdate() def parse_api_date_or_week(value: Union[str, int, float, None]) -> Optional[date]: @@ -22,15 +22,50 @@ def parse_api_date_or_week(value: Union[str, int, float, None]) -> Optional[date return None v = str(value) if len(v) == 6: - d = cast(date, Week.fromstring(v).startdate()) + d = Week.fromstring(v).startdate() else: d = datetime.strptime(v, "%Y%m%d").date() return d -def fields_to_predicate( - fields: Optional[Sequence[str]] = None, -) -> Callable[[str], bool]: +def parse_user_date_or_week( + value: Union[str, int, date, Week], out_type: Literal["day", "week", None] = None +) -> Union[date, Week]: + if isinstance(value, Week): + if out_type == "day": + return value.startdate() + return value + + if isinstance(value, date): + if out_type == "week": + return Week.fromdate(value) + return value + + value = str(value) + if out_type == "week": + if len(value) == 6: + return Week.fromstring(value) + if len(value) == 8: + return Week.fromdate(datetime.strptime(value, "%Y%m%d").date()) + if len(value) == 10: + return Week.fromdate(datetime.strptime(value, "%Y-%m-%d").date()) + if out_type == "day": + if len(value) == 8: + return datetime.strptime(value, "%Y%m%d").date() + if len(value) == 10: + return datetime.strptime(value, "%Y-%m-%d").date() + if out_type is None: + if len(value) == 6: + return Week.fromstring(value) + if len(value) == 8: + return datetime.strptime(value, "%Y%m%d").date() + if len(value) == 10: + return datetime.strptime(value, "%Y-%m-%d").date() + + raise ValueError(f"Cannot parse date or week from {value}") + + +def fields_to_predicate(fields: Optional[Sequence[str]] = None) -> Callable[[str], bool]: if not fields: return lambda _: True to_include: Set[str] = set() From b8f24ddb4771b9ff8a9a5fec0af7f524c9a3c813 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Wed, 10 Jul 2024 16:06:21 -0700 Subject: [PATCH 13/19] refactor: EpiRange validates input --- epidatpy/_model.py | 20 +++++++------------- tests/test_model.py | 16 +++++++++------- 2 files changed, 16 insertions(+), 20 deletions(-) diff --git a/epidatpy/_model.py b/epidatpy/_model.py index 25fd9d7..6a67692 100644 --- a/epidatpy/_model.py +++ b/epidatpy/_model.py @@ -5,7 +5,6 @@ Any, Dict, Final, - Generic, List, Mapping, Optional, @@ -26,6 +25,7 @@ parse_api_date, parse_api_date_or_week, parse_api_week, + parse_user_date_or_week, ) EpiDateLike = Union[int, str, date, Week] @@ -34,9 +34,8 @@ EpiRangeParam = Union[EpiRangeLike, Sequence[EpiRangeLike]] StringParam = Union[str, Sequence[str]] IntParam = Union[int, Sequence[int]] -EpiDataResponse = TypedDict("EpiDataResponse", {"result": int, "message": str, "epidata": List}) ParamType = Union[StringParam, IntParam, EpiRangeParam] -EPI_RANGE_TYPE = TypeVar("EPI_RANGE_TYPE", int, date, str, Week) +EpiDataResponse = TypedDict("EpiDataResponse", {"result": int, "message": str, "epidata": List}) CALL_TYPE = TypeVar("CALL_TYPE") @@ -70,20 +69,15 @@ def format_list(values: EpiRangeParam) -> str: return format_item(values) -def format_epiweek(value: Union[str, int]) -> str: - return Week.fromstring(str(value)).cdcformat() - - -@dataclass(repr=False) -class EpiRange(Generic[EPI_RANGE_TYPE]): +class EpiRange: """ Range object for dates/epiweeks """ - start: EPI_RANGE_TYPE - end: EPI_RANGE_TYPE - - def __post_init__(self) -> None: + def __init__(self, start: EpiDateLike, end: EpiDateLike) -> None: + # check if types are correct + self.start = parse_user_date_or_week(start) + self.end = parse_user_date_or_week(end) # swap if wrong order # complicated construct for typing inference if self.end < self.start: diff --git a/tests/test_model.py b/tests/test_model.py index a8412cd..13db48d 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -1,29 +1,31 @@ +import datetime + from epidatpy._model import EpiRange, format_item, format_list def test_epirange() -> None: - r = EpiRange(3, 4) - assert r.start == 3 and r.end == 4 - assert str(r) == "3-4" + r = EpiRange(20000101, 20000102) + assert r.start == datetime.date(2000, 1, 1) and r.end == datetime.date(2000, 1, 2) + assert str(r) == "20000101-20000102" def test_epirange_wrong_order() -> None: - r = EpiRange(4, 3) - assert r.start == 3 and r.end == 4 + r = EpiRange(20000101, 20000102) + assert r.start == datetime.date(2000, 1, 1) and r.end == datetime.date(2000, 1, 2) def test_format_item() -> None: assert format_item("a") == "a" assert format_item(1) == "1" assert format_item({"from": 1, "to": 3}) == "1-3" - assert format_item(EpiRange(3, 5)) == "3-5" + assert format_item(EpiRange(20000101, 20000102)) == "20000101-20000102" def test_format_list() -> None: assert format_list("a") == "a" assert format_list(1) == "1" assert format_list({"from": 1, "to": 3}) == "1-3" - assert format_list(EpiRange(3, 5)) == "3-5" + assert format_list(EpiRange(20000101, 20000102)) == "20000101-20000102" assert format_list(["a", "b"]) == "a,b" assert format_list(("a", "b")) == "a,b" From 3dea05ecf979e7653deddf2b91b98bbfe571e978 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Wed, 10 Jul 2024 14:09:51 -0700 Subject: [PATCH 14/19] refactor: make auth function private --- epidatpy/_auth.py | 5 ++--- epidatpy/request.py | 4 ++-- tests/test_auth.py | 8 ++++---- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/epidatpy/_auth.py b/epidatpy/_auth.py index 566bccb..d581443 100644 --- a/epidatpy/_auth.py +++ b/epidatpy/_auth.py @@ -1,10 +1,9 @@ import os import warnings -from typing import Optional -def get_api_key() -> Optional[str]: - key = os.environ.get("DELPHI_EPIDATA_KEY", None) +def _get_api_key() -> str: + key = os.environ.get("DELPHI_EPIDATA_KEY", "") if not key: warnings.warn( diff --git a/epidatpy/request.py b/epidatpy/request.py index 455d516..f8a3133 100644 --- a/epidatpy/request.py +++ b/epidatpy/request.py @@ -14,7 +14,7 @@ from requests.auth import HTTPBasicAuth from tenacity import retry, stop_after_attempt -from ._auth import get_api_key +from ._auth import _get_api_key from ._constants import BASE_URL, HTTP_HEADERS from ._covidcast import CovidcastDataSources, define_covidcast_fields from ._endpoints import AEpiDataEndpoints @@ -41,7 +41,7 @@ def _request_with_retry( stream: bool = False, ) -> Response: """Make request with a retry if an exception is thrown.""" - basic_auth = HTTPBasicAuth("epidata", get_api_key()) + basic_auth = HTTPBasicAuth("epidata", _get_api_key()) def call_impl(s: Session) -> Response: res = s.get(url, params=params, headers=HTTP_HEADERS, stream=stream, auth=basic_auth) diff --git a/tests/test_auth.py b/tests/test_auth.py index b91dccf..d04d146 100644 --- a/tests/test_auth.py +++ b/tests/test_auth.py @@ -1,12 +1,12 @@ -from pytest import warns, MonkeyPatch +from pytest import MonkeyPatch, warns -from epidatpy import get_api_key +from epidatpy._auth import _get_api_key def test_get_api_key(monkeypatch: MonkeyPatch) -> None: with monkeypatch.context() as m: m.setenv("DELPHI_EPIDATA_KEY", "test") - assert get_api_key() == "test" + assert _get_api_key() == "test" m.delenv("DELPHI_EPIDATA_KEY") with warns(UserWarning): - assert get_api_key() is None + assert _get_api_key() == "" From 2a6c3f765eb66ef28f90634534a7121da7780dfa Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Wed, 10 Jul 2024 14:10:35 -0700 Subject: [PATCH 15/19] refactor: remove async components --- epidatpy/async_request.py | 280 -------------------------------------- smoke_test_async.py | 22 --- 2 files changed, 302 deletions(-) delete mode 100644 epidatpy/async_request.py delete mode 100644 smoke_test_async.py diff --git a/epidatpy/async_request.py b/epidatpy/async_request.py deleted file mode 100644 index f1c40f9..0000000 --- a/epidatpy/async_request.py +++ /dev/null @@ -1,280 +0,0 @@ -from asyncio import gather, get_event_loop -from datetime import date -from json import loads -from typing import ( - AsyncGenerator, - Callable, - Coroutine, - Dict, - Final, - Iterable, - List, - Mapping, - Optional, - Sequence, - Union, - cast, -) - -from aiohttp import ClientResponse, ClientSession, TCPConnector -from pandas import DataFrame - -from ._constants import BASE_URL, HTTP_HEADERS -from ._covidcast import CovidcastDataSources, define_covidcast_fields -from ._endpoints import AEpiDataEndpoints -from ._model import ( - AEpiDataCall, - EpidataFieldInfo, - EpiDataFormatType, - EpiDataResponse, - EpiRangeLike, - OnlySupportsClassicFormatException, - add_endpoint_to_url, -) - -# Make the linter happy about the unused variables -__all__ = ["Epidata", "EpiDataAsyncCall", "EpiDataAsyncContext", "CovidcastEpidata"] - - -async def _async_request( - url: str, params: Mapping[str, str], session: Optional[ClientSession] = None -) -> ClientResponse: - async def call_impl(s: ClientSession) -> ClientResponse: - res = await s.get(url, params=params, headers=HTTP_HEADERS) - if res.status == 414: - return await s.post(url, params=params, headers=HTTP_HEADERS) - return res - - if session: - return await call_impl(session) - - async with ClientSession() as s: - return await call_impl(s) - - -class EpiDataAsyncCall(AEpiDataCall): - """ - async version of an epidata call - """ - - _session: Final[Optional[ClientSession]] - - def __init__( - self, - base_url: str, - session: Optional[ClientSession], - endpoint: str, - params: Mapping[str, Union[None, EpiRangeLike, Iterable[EpiRangeLike]]], - meta: Optional[Sequence[EpidataFieldInfo]] = None, - only_supports_classic: bool = False, - ) -> None: - super().__init__(base_url, endpoint, params, meta, only_supports_classic) - self._session = session - - def with_base_url(self, base_url: str) -> "EpiDataAsyncCall": - return EpiDataAsyncCall(base_url, self._session, self._endpoint, self._params) - - def with_session(self, session: ClientSession) -> "EpiDataAsyncCall": - return EpiDataAsyncCall(self._base_url, session, self._endpoint, self._params) - - async def _call( - self, - format_type: Optional[EpiDataFormatType] = None, - fields: Optional[Iterable[str]] = None, - ) -> ClientResponse: - url, params = self.request_arguments(format_type, fields) - return await _async_request(url, params, self._session) - - async def classic( - self, - fields: Optional[Iterable[str]] = None, - disable_date_parsing: Optional[bool] = False, - ) -> EpiDataResponse: - """Request and parse epidata in CLASSIC message format.""" - self._verify_parameters() - try: - response = await self._call(None, fields) - r = cast(EpiDataResponse, await response.json()) - epidata = r.get("epidata") - if epidata and isinstance(epidata, list) and len(epidata) > 0 and isinstance(epidata[0], dict): - r["epidata"] = [self._parse_row(row, disable_date_parsing=disable_date_parsing) for row in epidata] - return r - except Exception as e: # pylint: disable=broad-except - return {"result": 0, "message": f"error: {e}", "epidata": []} - - async def __call__( - self, - fields: Optional[Iterable[str]] = None, - disable_date_parsing: Optional[bool] = False, - ) -> EpiDataResponse: - """Request and parse epidata in CLASSIC message format.""" - return await self.classic(fields, disable_date_parsing=disable_date_parsing) - - async def json( - self, - fields: Optional[Iterable[str]] = None, - disable_date_parsing: Optional[bool] = False, - ) -> List[Mapping[str, Union[str, int, float, date, None]]]: - """Request and parse epidata in JSON format""" - self._verify_parameters() - if self.only_supports_classic: - raise OnlySupportsClassicFormatException() - response = await self._call(EpiDataFormatType.json, fields) - response.raise_for_status() - return [ - self._parse_row(row, disable_date_parsing) - for row in cast(List[Mapping[str, Union[str, int, float, None]]], await response.json()) - ] - - async def df( - self, - fields: Optional[Iterable[str]] = None, - disable_date_parsing: Optional[bool] = False, - ) -> DataFrame: - """Request and parse epidata as a pandas data frame""" - self._verify_parameters() - if self.only_supports_classic: - raise OnlySupportsClassicFormatException() - r = await self.json(fields, disable_date_parsing=disable_date_parsing) - return self._as_df(r, fields, disable_date_parsing) - - async def csv(self, fields: Optional[Iterable[str]] = None) -> str: - """Request and parse epidata in CSV format""" - self._verify_parameters() - if self.only_supports_classic: - raise OnlySupportsClassicFormatException() - response = await self._call(EpiDataFormatType.csv, fields) - response.raise_for_status() - return await response.text() - - async def iter( - self, - fields: Optional[Iterable[str]] = None, - disable_date_parsing: Optional[bool] = False, - ) -> AsyncGenerator[Mapping[str, Union[str, int, float, date, None]], None]: - """Request and streams epidata rows""" - self._verify_parameters() - if self.only_supports_classic: - raise OnlySupportsClassicFormatException() - response = await self._call(EpiDataFormatType.jsonl, fields) - response.raise_for_status() - async for line in response.content: - yield self._parse_row(loads(line), disable_date_parsing=disable_date_parsing) - - async def __( - self, - ) -> AsyncGenerator[Mapping[str, Union[str, int, float, date, None]], None]: - return self.iter() - - -class EpiDataAsyncContext(AEpiDataEndpoints[EpiDataAsyncCall]): - """ - sync epidata call class - """ - - _base_url: Final[str] - _session: Final[Optional[ClientSession]] - - def __init__(self, base_url: str = BASE_URL, session: Optional[ClientSession] = None) -> None: - super().__init__() - self._base_url = base_url - self._session = session - - def with_base_url(self, base_url: str) -> "EpiDataAsyncContext": - return EpiDataAsyncContext(base_url, self._session) - - def with_session(self, session: ClientSession) -> "EpiDataAsyncContext": - return EpiDataAsyncContext(self._base_url, session) - - def _create_call( - self, - endpoint: str, - params: Mapping[str, Union[None, EpiRangeLike, Iterable[EpiRangeLike]]], - meta: Optional[Sequence[EpidataFieldInfo]] = None, - only_supports_classic: bool = False, - ) -> EpiDataAsyncCall: - return EpiDataAsyncCall(self._base_url, self._session, endpoint, params, meta, only_supports_classic) - - @staticmethod - def all( - calls: Iterable[EpiDataAsyncCall], - call_api: Callable[[EpiDataAsyncCall, ClientSession], Coroutine], - batch_size: int = 50, - ) -> List: - loop = get_event_loop() - - async def impl() -> List: - tasks: List[Coroutine] = [] - connector = TCPConnector(limit=batch_size) - async with ClientSession(connector=connector) as session: - for call in calls: - co_routine = call_api(call, session) - tasks.append(co_routine) - return list(await gather(*tasks)) - - future = impl() - return loop.run_until_complete(future) - - def all_classic( - self, - calls: Iterable[EpiDataAsyncCall], - fields: Optional[Iterable[str]] = None, - batch_size: int = 50, - ) -> List[EpiDataResponse]: - """ - runs the given calls in a batch asynchronously and return their responses - """ - - def call_api(call: EpiDataAsyncCall, session: ClientSession) -> Coroutine: - return call.with_session(session).classic(fields) - - return self.all(calls, call_api, batch_size) - - def all_json( - self, - calls: Iterable[EpiDataAsyncCall], - fields: Optional[Iterable[str]] = None, - batch_size: int = 50, - ) -> List[List[Dict]]: - """ - runs the given calls in a batch asynchronously and return their responses - """ - - def call_api(call: EpiDataAsyncCall, session: ClientSession) -> Coroutine: - return call.with_session(session).json(fields) - - return self.all(calls, call_api, batch_size) - - def all_csv( - self, - calls: Iterable[EpiDataAsyncCall], - fields: Optional[Iterable[str]] = None, - batch_size: int = 50, - ) -> List[str]: - """ - runs the given calls in a batch asynchronously and return their responses - """ - - def call_api(call: EpiDataAsyncCall, session: ClientSession) -> Coroutine: - return call.with_session(session).csv(fields) - - return self.all(calls, call_api, batch_size) - - -Epidata = EpiDataAsyncContext() - - -async def CovidcastEpidata( - base_url: str = BASE_URL, session: Optional[ClientSession] = None -) -> CovidcastDataSources[EpiDataAsyncCall]: - url = add_endpoint_to_url(base_url, "covidcast/meta") - meta_data_res = await _async_request(url, {}, session) - meta_data_res.raise_for_status() - meta_data = await meta_data_res.json() - - def create_call( - params: Mapping[str, Union[None, EpiRangeLike, Iterable[EpiRangeLike]]], - ) -> EpiDataAsyncCall: - return EpiDataAsyncCall(base_url, session, "covidcast", params, define_covidcast_fields()) - - return CovidcastDataSources.create(meta_data, create_call) diff --git a/smoke_test_async.py b/smoke_test_async.py deleted file mode 100644 index b284f30..0000000 --- a/smoke_test_async.py +++ /dev/null @@ -1,22 +0,0 @@ -from asyncio import get_event_loop - -from epidatpy.async_request import Epidata - - -async def main() -> None: - apicall = Epidata.covidcast("fb-survey", "smoothed_cli", "day", "nation", Epidata.range(20210405, 20210410), "us") - classic = await apicall.classic() - print(classic) - - r = await apicall.csv() - print(r[0:100]) - - data = await apicall.json() - print(data[0]) - - async for row in apicall.iter(): - print(row) - - -loop = get_event_loop() -loop.run_until_complete(main()) From 03bc328fa25bf37c06a7bc499a4f2cdfb66f5f44 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Wed, 10 Jul 2024 14:12:32 -0700 Subject: [PATCH 16/19] refactor: center DataFrame return in EpidataCall * remove json, csv, iter formats * remove format_type option, always request classic * consolidate DataFrame code * parse types only if classic, otherwise let Pandas do it --- epidatpy/_model.py | 60 +------------------------- epidatpy/request.py | 100 ++++++++++++++++++++------------------------ smoke_test.py | 6 --- 3 files changed, 47 insertions(+), 119 deletions(-) diff --git a/epidatpy/_model.py b/epidatpy/_model.py index 6a67692..4a7b910 100644 --- a/epidatpy/_model.py +++ b/epidatpy/_model.py @@ -2,8 +2,6 @@ from datetime import date from enum import Enum from typing import ( - Any, - Dict, Final, List, Mapping, @@ -18,10 +16,8 @@ from urllib.parse import urlencode from epiweeks import Week -from pandas import CategoricalDtype, DataFrame, Series from ._parse import ( - fields_to_predicate, parse_api_date, parse_api_date_or_week, parse_api_week, @@ -90,17 +86,6 @@ def __str__(self) -> str: return f"{format_date(self.start)}-{format_date(self.end)}" -class EpiDataFormatType(str, Enum): - """ - possible formatting options for API calls - """ - - json = "json" - classic = "classic" - csv = "csv" - jsonl = "jsonl" - - class InvalidArgumentException(Exception): """ exception for an invalid argument @@ -180,41 +165,36 @@ def _verify_parameters(self) -> None: def _formatted_parameters( self, - format_type: Optional[EpiDataFormatType] = None, fields: Optional[Sequence[str]] = None, ) -> Mapping[str, str]: """ format this call into a [URL, Params] tuple """ all_params = dict(self._params) - if format_type and format_type != EpiDataFormatType.classic: - all_params["format"] = format_type if fields: all_params["fields"] = fields return {k: format_list(v) for k, v in all_params.items() if v is not None} def request_arguments( self, - format_type: Optional[EpiDataFormatType] = None, fields: Optional[Sequence[str]] = None, ) -> Tuple[str, Mapping[str, str]]: """ format this call into a [URL, Params] tuple """ - formatted_params = self._formatted_parameters(format_type, fields) + formatted_params = self._formatted_parameters(fields) full_url = add_endpoint_to_url(self._base_url, self._endpoint) return full_url, formatted_params def request_url( self, - format_type: Optional[EpiDataFormatType] = None, fields: Optional[Sequence[str]] = None, ) -> str: """ format this call into a full HTTP request url with encoded parameters """ self._verify_parameters() - u, p = self.request_arguments(format_type, fields) + u, p = self.request_arguments(fields) query = urlencode(p) if query: return f"{u}?{query}" @@ -253,39 +233,3 @@ def _parse_row( if not self.meta: return row return {k: self._parse_value(k, v, disable_date_parsing) for k, v in row.items()} - - def _as_df( - self, - rows: Sequence[Mapping[str, Union[str, float, int, date, None]]], - fields: Optional[Sequence[str]] = None, - disable_date_parsing: Optional[bool] = False, - ) -> DataFrame: - pred = fields_to_predicate(fields) - columns: List[str] = [info.name for info in self.meta if pred(info.name)] - df = DataFrame(rows, columns=columns or None) - - data_types: Dict[str, Any] = {} - for info in self.meta: - if not pred(info.name) or df[info.name].isnull().all(): - continue - if info.type == EpidataFieldType.bool: - data_types[info.name] = bool - elif info.type == EpidataFieldType.categorical: - data_types[info.name] = CategoricalDtype( - categories=Series(info.categories) if info.categories else None, ordered=True - ) - elif info.type == EpidataFieldType.int: - data_types[info.name] = int - elif info.type in ( - EpidataFieldType.date, - EpidataFieldType.epiweek, - EpidataFieldType.date_or_epiweek, - ): - data_types[info.name] = int if disable_date_parsing else "datetime64[ns]" - elif info.type == EpidataFieldType.float: - data_types[info.name] = float - else: - data_types[info.name] = str - if data_types: - df = df.astype(data_types) - return df diff --git a/epidatpy/request.py b/epidatpy/request.py index f8a3133..8003ec3 100644 --- a/epidatpy/request.py +++ b/epidatpy/request.py @@ -1,5 +1,6 @@ -from datetime import date from typing import ( + Any, + Dict, Final, List, Mapping, @@ -9,7 +10,7 @@ cast, ) -from pandas import DataFrame +from pandas import CategoricalDtype, DataFrame, Series from requests import Response, Session from requests.auth import HTTPBasicAuth from tenacity import retry, stop_after_attempt @@ -21,13 +22,14 @@ from ._model import ( AEpiDataCall, EpidataFieldInfo, - EpiDataFormatType, + EpidataFieldType, EpiDataResponse, EpiRange, EpiRangeParam, OnlySupportsClassicFormatException, add_endpoint_to_url, ) +from ._parse import fields_to_predicate # Make the linter happy about the unused variables __all__ = ["Epidata", "EpiDataCall", "EpiDataContext", "EpiRange", "CovidcastEpidata"] @@ -83,23 +85,25 @@ def with_session(self, session: Session) -> "EpiDataCall": def _call( self, - format_type: Optional[EpiDataFormatType] = None, fields: Optional[Sequence[str]] = None, stream: bool = False, ) -> Response: - url, params = self.request_arguments(format_type, fields) + url, params = self.request_arguments(fields) return _request_with_retry(url, params, self._session, stream) def classic( self, fields: Optional[Sequence[str]] = None, disable_date_parsing: Optional[bool] = False, + disable_type_parsing: Optional[bool] = False, ) -> EpiDataResponse: """Request and parse epidata in CLASSIC message format.""" self._verify_parameters() try: - response = self._call(None, fields) + response = self._call(fields) r = cast(EpiDataResponse, response.json()) + if disable_type_parsing: + return r epidata = r.get("epidata") if epidata and isinstance(epidata, list) and len(epidata) > 0 and isinstance(epidata[0], dict): r["epidata"] = [self._parse_row(row, disable_date_parsing=disable_date_parsing) for row in epidata] @@ -111,25 +115,11 @@ def __call__( self, fields: Optional[Sequence[str]] = None, disable_date_parsing: Optional[bool] = False, - ) -> EpiDataResponse: - """Request and parse epidata in CLASSIC message format.""" - return self.classic(fields, disable_date_parsing=disable_date_parsing) - - def json( - self, - fields: Optional[Sequence[str]] = None, - disable_date_parsing: Optional[bool] = False, - ) -> List[Mapping[str, Union[str, int, float, date, None]]]: - """Request and parse epidata in JSON format""" + ) -> Union[EpiDataResponse, DataFrame]: + """Request and parse epidata in df message format.""" if self.only_supports_classic: - raise OnlySupportsClassicFormatException() - self._verify_parameters() - response = self._call(EpiDataFormatType.json, fields) - response.raise_for_status() - return [ - self._parse_row(row, disable_date_parsing=disable_date_parsing) - for row in cast(List[Mapping[str, Union[str, int, float, None]]], response.json()) - ] + return self.classic(fields, disable_date_parsing=disable_date_parsing, disable_type_parsing=False) + return self.df(fields, disable_date_parsing=disable_date_parsing) def df( self, @@ -140,37 +130,37 @@ def df( if self.only_supports_classic: raise OnlySupportsClassicFormatException() self._verify_parameters() - r = self.json(fields, disable_date_parsing=disable_date_parsing) - return self._as_df(r, fields, disable_date_parsing=disable_date_parsing) - - def csv(self, fields: Optional[Iterable[str]] = None) -> str: - """Request and parse epidata in CSV format""" - if self.only_supports_classic: - raise OnlySupportsClassicFormatException() - self._verify_parameters() - response = self._call(EpiDataFormatType.csv, fields) - response.raise_for_status() - return response.text - - def iter( - self, - fields: Optional[Iterable[str]] = None, - disable_date_parsing: Optional[bool] = False, - ) -> Generator[Mapping[str, Union[str, int, float, date, None]], None, Response]: - """Request and streams epidata rows""" - if self.only_supports_classic: - raise OnlySupportsClassicFormatException() - self._verify_parameters() - response = self._call(EpiDataFormatType.jsonl, fields, stream=True) - response.raise_for_status() - for line in response.iter_lines(): - yield self._parse_row(loads(line), disable_date_parsing=disable_date_parsing) - return response - - def __iter__( - self, - ) -> Generator[Mapping[str, Union[str, int, float, date, None]], None, Response]: - return self.iter() + json = self.classic(fields, disable_type_parsing=True) + rows = json.get("epidata", []) + pred = fields_to_predicate(fields) + columns: List[str] = [info.name for info in self.meta if pred(info.name)] + df = DataFrame(rows, columns=columns or None) + + data_types: Dict[str, Any] = {} + for info in self.meta: + if not pred(info.name) or df[info.name].isnull().all(): + continue + if info.type == EpidataFieldType.bool: + data_types[info.name] = bool + elif info.type == EpidataFieldType.categorical: + data_types[info.name] = CategoricalDtype( + categories=Series(info.categories) if info.categories else None, ordered=True + ) + elif info.type == EpidataFieldType.int: + data_types[info.name] = "Int64" + elif info.type in ( + EpidataFieldType.date, + EpidataFieldType.epiweek, + EpidataFieldType.date_or_epiweek, + ): + data_types[info.name] = "Int64" if disable_date_parsing else "datetime64[ns]" + elif info.type == EpidataFieldType.float: + data_types[info.name] = "Float64" + else: + data_types[info.name] = "string" + if data_types: + df = df.astype(data_types) + return df class EpiDataContext(AEpiDataEndpoints[EpiDataCall]): diff --git a/smoke_test.py b/smoke_test.py index d767828..0eb4cc9 100644 --- a/smoke_test.py +++ b/smoke_test.py @@ -13,9 +13,6 @@ classic = apicall.classic() print(classic) -data = apicall.json() -print(data[0]) - df = apicall.df() print(df.columns) print(df.dtypes) @@ -53,9 +50,6 @@ classic = apicall.classic() print(classic) -data = apicall.json() -print(data[0]) - df = apicall.df() print(df.columns) print(df.dtypes) From 9177e015e5c89b26659323e0269b517d918062fa Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Fri, 12 Jul 2024 18:51:32 -0700 Subject: [PATCH 17/19] fix: DataFrame date parsing --- epidatpy/request.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/epidatpy/request.py b/epidatpy/request.py index 8003ec3..60e0212 100644 --- a/epidatpy/request.py +++ b/epidatpy/request.py @@ -10,7 +10,7 @@ cast, ) -from pandas import CategoricalDtype, DataFrame, Series +from pandas import CategoricalDtype, DataFrame, Series, to_datetime from requests import Response, Session from requests.auth import HTTPBasicAuth from tenacity import retry, stop_after_attempt @@ -137,6 +137,7 @@ def df( df = DataFrame(rows, columns=columns or None) data_types: Dict[str, Any] = {} + time_fields: List[str] = [] for info in self.meta: if not pred(info.name) or df[info.name].isnull().all(): continue @@ -153,13 +154,17 @@ def df( EpidataFieldType.epiweek, EpidataFieldType.date_or_epiweek, ): - data_types[info.name] = "Int64" if disable_date_parsing else "datetime64[ns]" + data_types[info.name] = "Int64" + time_fields.append(info.name) elif info.type == EpidataFieldType.float: data_types[info.name] = "Float64" else: data_types[info.name] = "string" if data_types: df = df.astype(data_types) + if not disable_date_parsing: + for field in time_fields: + df[field] = to_datetime(df[field], format="%Y%m%d", errors="ignore") return df From b92ccbe4d4e2d61dc2bed77480b37bb6bbf6e1fd Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Fri, 12 Jul 2024 18:51:59 -0700 Subject: [PATCH 18/19] refactor: small smoke test demo change --- smoke_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/smoke_test.py b/smoke_test.py index 0eb4cc9..5d38f45 100644 --- a/smoke_test.py +++ b/smoke_test.py @@ -38,7 +38,8 @@ # Covidcast test print("Covidcast Test") epidata = CovidcastEpidata() -print(epidata.source_names) +print(epidata.source_names()) +print(epidata.signal_names("fb-survey")) epidata["fb-survey"].signal_df apicall = epidata[("fb-survey", "smoothed_cli")].call( "nation", From 871c832dc7e63057d28299e8c8356199237478ac Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Mon, 15 Jul 2024 12:44:04 -0700 Subject: [PATCH 19/19] refactor: put all types in _model --- epidatpy/_covidcast.py | 5 ++--- epidatpy/_model.py | 3 +++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/epidatpy/_covidcast.py b/epidatpy/_covidcast.py index 579de04..43e2b99 100644 --- a/epidatpy/_covidcast.py +++ b/epidatpy/_covidcast.py @@ -25,12 +25,11 @@ EpidataFieldInfo, EpidataFieldType, EpiRangeParam, + GeoType, InvalidArgumentException, + TimeType, ) -GeoType = Literal["nation", "msa", "hrr", "hhs", "state", "county"] -TimeType = Literal["day", "week"] - @dataclass class WebLink: diff --git a/epidatpy/_model.py b/epidatpy/_model.py index 4a7b910..6e401a7 100644 --- a/epidatpy/_model.py +++ b/epidatpy/_model.py @@ -4,6 +4,7 @@ from typing import ( Final, List, + Literal, Mapping, Optional, Sequence, @@ -24,6 +25,8 @@ parse_user_date_or_week, ) +GeoType = Literal["nation", "msa", "hrr", "hhs", "state", "county"] +TimeType = Literal["day", "week"] EpiDateLike = Union[int, str, date, Week] EpiRangeDict = TypedDict("EpiRangeDict", {"from": EpiDateLike, "to": EpiDateLike}) EpiRangeLike = Union[int, str, "EpiRange", EpiRangeDict, date, Week]