Skip to content

Commit

Permalink
Merge pull request #50 from OCHA-DAP/main
Browse files Browse the repository at this point in the history
HDXDSYS-1301  Simplify operational presence HAPI pipeline to read from global HDX dataset
  • Loading branch information
alexandru-m-g authored Jan 22, 2025
2 parents 95f7c02 + c74df0f commit 41a52fc
Show file tree
Hide file tree
Showing 50 changed files with 56,319 additions and 26,106 deletions.
12 changes: 6 additions & 6 deletions .github/workflows/publish.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,17 @@ jobs:
submodules: true

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
uses: docker/setup-buildx-action@v3

- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v1
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.ECR_AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.ECR_AWS_SECRET_ACCESS_KEY }}
aws-region: us-east-1

- name: Login to Public ECR
uses: docker/login-action@v2
uses: docker/login-action@v3
with:
registry: public.ecr.aws
username: ${{ secrets.ECR_AWS_ACCESS_KEY_ID }}
Expand All @@ -37,11 +37,11 @@ jobs:

- name: Create prod requirements
run: |
pip install --upgrade pip-tools
pip-compile pyproject.toml --resolver=backtracking --upgrade -q -c requirements.txt -o prod-requirements.txt
pip install --upgrade uv
uv pip compile pyproject.toml --resolver=backtracking --upgrade -q -c requirements.txt -o prod-requirements.txt
- name: Build, tag, and push image to Amazon ECR
uses: docker/build-push-action@v4
uses: docker/build-push-action@v6
with:
context: .
push: true
Expand Down
14 changes: 14 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,20 @@ All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

## [0.10.33] = 2025-01-22

### Fixed

- uv instead of pip-compile for publish GH Actions job

## [0.10.32] = 2025-01-22

### Changed

- 3W from global dataset
- Remove negative and rounded checks from HNO as are now in scraper
- Common logic for 3W and HNO

## [0.10.31] = 2025-01-13

### Changed
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM public.ecr.aws/unocha/hdx-scraper-baseimage:stable
FROM public.ecr.aws/unocha/python:3.12-stable

WORKDIR /srv

Expand Down
8 changes: 4 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,11 @@ requires-python = ">=3.8"

dependencies = [
"hapi-schema>= 0.9.6",
"hdx-python-api>= 6.3.6",
"hdx-python-country>= 3.8.6",
"hdx-python-api>= 6.3.7",
"hdx-python-country>= 3.8.7",
"hdx-python-database[postgresql]>= 1.3.4",
"hdx-python-scraper>= 2.5.1",
"hdx-python-utilities>= 3.8.0",
"hdx-python-scraper>= 2.5.5",
"hdx-python-utilities>= 3.8.2",
"libhxl",
"sqlalchemy"
]
Expand Down
29 changes: 15 additions & 14 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ attrs==24.3.0
# jsonlines
# jsonschema
# referencing
cachetools==5.5.0
cachetools==5.5.1
# via google-auth
certifi==2024.12.14
# via requests
Expand Down Expand Up @@ -40,7 +40,7 @@ email-validator==2.2.0
# via hdx-python-api
et-xmlfile==2.0.0
# via openpyxl
filelock==3.16.1
filelock==3.17.0
# via virtualenv
frictionless==5.18.0
# via hdx-python-utilities
Expand All @@ -56,28 +56,28 @@ gspread==6.1.4
# via hdx-python-scraper
hapi-schema==0.9.6
# via hapi-pipelines (pyproject.toml)
hdx-python-api==6.3.6
hdx-python-api==6.3.7
# via
# hapi-pipelines (pyproject.toml)
# hdx-python-scraper
hdx-python-country==3.8.6
hdx-python-country==3.8.7
# via
# hapi-pipelines (pyproject.toml)
# hdx-python-api
# hdx-python-scraper
hdx-python-database==1.3.4
# via hapi-pipelines (pyproject.toml)
hdx-python-scraper==2.5.1
hdx-python-scraper==2.5.5
# via hapi-pipelines (pyproject.toml)
hdx-python-utilities==3.8.0
hdx-python-utilities==3.8.2
# via
# hapi-pipelines (pyproject.toml)
# hdx-python-api
# hdx-python-country
# hdx-python-scraper
humanize==4.11.0
# via frictionless
identify==2.6.5
identify==2.6.6
# via pre-commit
idna==3.10
# via
Expand Down Expand Up @@ -120,7 +120,7 @@ markupsafe==3.0.2
# via jinja2
mdurl==0.1.2
# via markdown-it-py
more-itertools==10.5.0
more-itertools==10.6.0
# via inflect
nodeenv==1.9.1
# via pre-commit
Expand All @@ -144,11 +144,11 @@ ply==3.11
# libhxl
pockets==0.9.1
# via sphinxcontrib-napoleon
pre-commit==4.0.1
pre-commit==4.1.0
# via hapi-pipelines (pyproject.toml)
psycopg==3.2.3
psycopg==3.2.4
# via hdx-python-database
psycopg-binary==3.2.3
psycopg-binary==3.2.4
# via psycopg
pyasn1==0.6.1
# via
Expand Down Expand Up @@ -196,7 +196,7 @@ quantulum3==0.9.2
# via hdx-python-api
ratelimit==2.2.1
# via hdx-python-utilities
referencing==0.35.1
referencing==0.36.1
# via
# jsonschema
# jsonschema-specifications
Expand Down Expand Up @@ -251,7 +251,7 @@ sqlalchemy==2.0.37
# hdx-python-database
stringcase==1.2.0
# via frictionless
structlog==24.4.0
structlog==25.1.0
# via libhxl
tableschema-to-template==0.0.13
# via hdx-python-utilities
Expand All @@ -271,6 +271,7 @@ typing-extensions==4.12.2
# psycopg
# pydantic
# pydantic-core
# referencing
# sqlalchemy
# typeguard
# typer
Expand All @@ -284,7 +285,7 @@ urllib3==2.3.0
# requests
validators==0.34.0
# via frictionless
virtualenv==20.28.1
virtualenv==20.29.1
# via pre-commit
wheel==0.45.1
# via libhxl
Expand Down
16 changes: 1 addition & 15 deletions src/hapi/pipelines/app/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,6 @@ def parse_args():
action="store_true",
help="Use saved data",
)
parser.add_argument(
"-dbg",
"--debug",
default=False,
action="store_true",
help="Debug",
)
parser.add_argument(
"-ehx",
"--err-to-hdx",
Expand All @@ -104,7 +97,6 @@ def main(
basic_auths: Optional[Dict[str, str]] = None,
save: bool = False,
use_saved: bool = False,
debug: bool = False,
err_to_hdx: bool = False,
**ignore,
) -> None:
Expand All @@ -121,7 +113,6 @@ def main(
basic_auths (Optional[Dict[str, str]]): Basic authorisations
save (bool): Whether to save state for testing. Defaults to False.
use_saved (bool): Whether to use saved state for testing. Defaults to False.
debug (bool): Whether to output debug info. Defaults to False.
err_to_hdx (bool): Whether to write any errors to HDX metadata. Defaults to False.
Returns:
Expand All @@ -142,7 +133,7 @@ def main(
params["prepare_fn"] = prepare_hapi_views
logger.info(f"> Database parameters: {params}")
configuration = Configuration.read()
with HDXErrorHandler(should_exit_on_error=False) as error_handler:
with HDXErrorHandler(write_to_hdx=err_to_hdx) as error_handler:
with temp_dir() as temp_folder:
with Database(**params) as database:
session = database.get_session()
Expand All @@ -169,9 +160,6 @@ def main(
)
pipelines.run()
pipelines.output()
pipelines.output_errors(err_to_hdx)
if debug:
pipelines.debug("debug")
logger.info("HAPI pipelines completed!")


Expand Down Expand Up @@ -216,7 +204,6 @@ def main(
"food_security.yaml",
"idps.yaml",
"national_risk.yaml",
"operational_presence.yaml",
"refugees_and_returnees.yaml",
"wfp.yaml",
]
Expand All @@ -235,6 +222,5 @@ def main(
basic_auths=basic_auths,
save=args.save,
use_saved=args.use_saved,
debug=args.debug,
err_to_hdx=ehx,
)
51 changes: 14 additions & 37 deletions src/hapi/pipelines/app/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,11 @@ def __init__(
url=AdminLevel.formats_url, retriever=reader
).cache()
self.admins = Admins(
configuration, session, self.locations, libhxl_dataset
configuration,
session,
self.locations,
libhxl_dataset,
error_handler,
)
admin1_config = configuration["admin1"]
self.adminone = AdminLevel(admin_config=admin1_config, admin_level=1)
Expand All @@ -88,21 +92,13 @@ def __init__(
logger.info("Admin two name replacements:")
self.admintwo.output_admin_name_replacements()

self.org = Org(
session=session,
datasetinfo=configuration["org"],
)
self.org_type = OrgType(
session=session,
datasetinfo=configuration["org_type"],
org_type_map=configuration["org_type_map"],
)
self.sector = Sector(
session=session,
datasetinfo=configuration["sector"],
sector_map=configuration["sector_map"],
)
self.currency = Currency(configuration=configuration, session=session)
self.currency = Currency(session=session, configuration=configuration)

Sources.set_default_source_date_format("%Y-%m-%d")
self.runner = Runner(
Expand All @@ -113,6 +109,7 @@ def __init__(
)
self.configurable_scrapers = {}
self.create_configurable_scrapers()

self.metadata = Metadata(
runner=self.runner, session=session, today=today
)
Expand All @@ -135,7 +132,7 @@ def setup_configurable_scrapers(
if countryiso3s:
configuration = {}
# This assumes format prefix_iso_.... eg.
# population_gtm, operational_presence_afg_total
# population_gtm
iso3_index = len(prefix) + 1
for key, value in self.configuration[f"{prefix}{suffix}"].items():
if len(key) < iso3_index + 3:
Expand Down Expand Up @@ -172,13 +169,6 @@ def _create_configurable_scrapers(
current_scrapers + scraper_names
)

_create_configurable_scrapers(
"operational_presence", "admintwo", adminlevel=self.admintwo
)
_create_configurable_scrapers(
"operational_presence", "adminone", adminlevel=self.adminone
)
_create_configurable_scrapers("operational_presence", "national")
_create_configurable_scrapers("national_risk", "national")
_create_configurable_scrapers("refugees_and_returnees", "national")
_create_configurable_scrapers("idps", "national")
Expand Down Expand Up @@ -212,21 +202,17 @@ def output_operational_presence(self):
not self.themes_to_run
or "operational_presence" in self.themes_to_run
):
results = self.runner.get_hapi_results(
self.configurable_scrapers["operational_presence"]
org = Org(
session=self.session,
metadata=self.metadata,
configuration=self.configuration,
)
org.populate()
operational_presence = OperationalPresence(
session=self.session,
metadata=self.metadata,
admins=self.admins,
adminone=self.adminone,
admintwo=self.admintwo,
org=self.org,
org_type=self.org_type,
sector=self.sector,
results=results,
config=self.configuration,
error_handler=self.error_handler,
configuration=self.configuration,
)
operational_presence.populate()

Expand All @@ -253,9 +239,7 @@ def output_humanitarian_needs(self):
session=self.session,
metadata=self.metadata,
admins=self.admins,
sector=self.sector,
configuration=self.configuration,
error_handler=self.error_handler,
)
humanitarian_needs.populate()

Expand Down Expand Up @@ -374,7 +358,6 @@ def output(self):
self.locations.populate()
self.admins.populate()
self.metadata.populate()
self.org.populate()
self.org_type.populate()
self.sector.populate()
self.currency.populate()
Expand All @@ -389,9 +372,3 @@ def output(self):
self.output_poverty_rate()
self.output_conflict_event()
self.output_food_prices()

def debug(self, folder: str) -> None:
self.org.output_org_map(folder)

def output_errors(self, err_to_hdx: bool) -> None:
self.error_handler.output_errors(err_to_hdx)
Loading

0 comments on commit 41a52fc

Please sign in to comment.