Skip to content

Commit

Permalink
Revert "Update humanitarian needs to remove error checks as they are …
Browse files Browse the repository at this point in the history
…now in pipeline"

This reverts commit 00f2740.
  • Loading branch information
mcarans committed Jan 20, 2025
1 parent 00f2740 commit 5b4ec52
Show file tree
Hide file tree
Showing 14 changed files with 7,392 additions and 6,974 deletions.
6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,11 @@ requires-python = ">=3.8"

dependencies = [
"hapi-schema>= 0.9.6",
"hdx-python-api>= 6.3.7",
"hdx-python-api>= 6.3.6",
"hdx-python-country>= 3.8.6",
"hdx-python-database[postgresql]>= 1.3.4",
"hdx-python-scraper>= 2.5.5",
"hdx-python-utilities>= 3.8.2",
"hdx-python-scraper>= 2.5.1",
"hdx-python-utilities>= 3.8.0",
"libhxl",
"sqlalchemy"
]
Expand Down
19 changes: 9 additions & 10 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ gspread==6.1.4
# via hdx-python-scraper
hapi-schema==0.9.6
# via hapi-pipelines (pyproject.toml)
hdx-python-api==6.3.7
hdx-python-api==6.3.6
# via
# hapi-pipelines (pyproject.toml)
# hdx-python-scraper
Expand All @@ -67,9 +67,9 @@ hdx-python-country==3.8.6
# hdx-python-scraper
hdx-python-database==1.3.4
# via hapi-pipelines (pyproject.toml)
hdx-python-scraper==2.5.5
hdx-python-scraper==2.5.1
# via hapi-pipelines (pyproject.toml)
hdx-python-utilities==3.8.2
hdx-python-utilities==3.8.0
# via
# hapi-pipelines (pyproject.toml)
# hdx-python-api
Expand Down Expand Up @@ -120,7 +120,7 @@ markupsafe==3.0.2
# via jinja2
mdurl==0.1.2
# via markdown-it-py
more-itertools==10.6.0
more-itertools==10.5.0
# via inflect
nodeenv==1.9.1
# via pre-commit
Expand All @@ -146,9 +146,9 @@ pockets==0.9.1
# via sphinxcontrib-napoleon
pre-commit==4.0.1
# via hapi-pipelines (pyproject.toml)
psycopg==3.2.4
psycopg==3.2.3
# via hdx-python-database
psycopg-binary==3.2.4
psycopg-binary==3.2.3
# via psycopg
pyasn1==0.6.1
# via
Expand Down Expand Up @@ -196,7 +196,7 @@ quantulum3==0.9.2
# via hdx-python-api
ratelimit==2.2.1
# via hdx-python-utilities
referencing==0.36.1
referencing==0.35.1
# via
# jsonschema
# jsonschema-specifications
Expand Down Expand Up @@ -251,7 +251,7 @@ sqlalchemy==2.0.37
# hdx-python-database
stringcase==1.2.0
# via frictionless
structlog==25.1.0
structlog==24.4.0
# via libhxl
tableschema-to-template==0.0.13
# via hdx-python-utilities
Expand All @@ -271,7 +271,6 @@ typing-extensions==4.12.2
# psycopg
# pydantic
# pydantic-core
# referencing
# sqlalchemy
# typeguard
# typer
Expand All @@ -285,7 +284,7 @@ urllib3==2.3.0
# requests
validators==0.34.0
# via frictionless
virtualenv==20.29.1
virtualenv==20.28.1
# via pre-commit
wheel==0.45.1
# via libhxl
Expand Down
4 changes: 3 additions & 1 deletion src/hapi/pipelines/app/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def main(
params["prepare_fn"] = prepare_hapi_views
logger.info(f"> Database parameters: {params}")
configuration = Configuration.read()
with HDXErrorHandler(write_to_hdx=err_to_hdx) as error_handler:
with HDXErrorHandler(should_exit_on_error=False) as error_handler:
with temp_dir() as temp_folder:
with Database(**params) as database:
session = database.get_session()
Expand All @@ -169,6 +169,7 @@ def main(
)
pipelines.run()
pipelines.output()
pipelines.output_errors(err_to_hdx)
if debug:
pipelines.debug("debug")
logger.info("HAPI pipelines completed!")
Expand Down Expand Up @@ -215,6 +216,7 @@ def main(
"food_security.yaml",
"idps.yaml",
"national_risk.yaml",
"operational_presence.yaml",
"refugees_and_returnees.yaml",
"wfp.yaml",
]
Expand Down
48 changes: 34 additions & 14 deletions src/hapi/pipelines/app/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,7 @@ def __init__(
url=AdminLevel.formats_url, retriever=reader
).cache()
self.admins = Admins(
configuration,
session,
self.locations,
libhxl_dataset,
error_handler,
configuration, session, self.locations, libhxl_dataset
)
admin1_config = configuration["admin1"]
self.adminone = AdminLevel(admin_config=admin1_config, admin_level=1)
Expand All @@ -92,13 +88,21 @@ def __init__(
logger.info("Admin two name replacements:")
self.admintwo.output_admin_name_replacements()

self.org = Org(
session=session,
datasetinfo=configuration["org"],
)
self.org_type = OrgType(
session=session,
datasetinfo=configuration["org_type"],
org_type_map=configuration["org_type_map"],
)
self.sector = Sector(
session=session,
datasetinfo=configuration["sector"],
sector_map=configuration["sector_map"],
)
self.currency = Currency(session=session, configuration=configuration)
self.currency = Currency(configuration=configuration, session=session)

Sources.set_default_source_date_format("%Y-%m-%d")
self.runner = Runner(
Expand All @@ -109,7 +113,6 @@ def __init__(
)
self.configurable_scrapers = {}
self.create_configurable_scrapers()

self.metadata = Metadata(
runner=self.runner, session=session, today=today
)
Expand All @@ -132,7 +135,7 @@ def setup_configurable_scrapers(
if countryiso3s:
configuration = {}
# This assumes format prefix_iso_.... eg.
# population_gtm
# population_gtm, operational_presence_afg_total
iso3_index = len(prefix) + 1
for key, value in self.configuration[f"{prefix}{suffix}"].items():
if len(key) < iso3_index + 3:
Expand Down Expand Up @@ -169,6 +172,13 @@ def _create_configurable_scrapers(
current_scrapers + scraper_names
)

_create_configurable_scrapers(
"operational_presence", "admintwo", adminlevel=self.admintwo
)
_create_configurable_scrapers(
"operational_presence", "adminone", adminlevel=self.adminone
)
_create_configurable_scrapers("operational_presence", "national")
_create_configurable_scrapers("national_risk", "national")
_create_configurable_scrapers("refugees_and_returnees", "national")
_create_configurable_scrapers("idps", "national")
Expand Down Expand Up @@ -202,17 +212,21 @@ def output_operational_presence(self):
not self.themes_to_run
or "operational_presence" in self.themes_to_run
):
org = Org(
session=self.session,
metadata=self.metadata,
configuration=self.configuration,
results = self.runner.get_hapi_results(
self.configurable_scrapers["operational_presence"]
)
org.populate()
operational_presence = OperationalPresence(
session=self.session,
metadata=self.metadata,
admins=self.admins,
configuration=self.configuration,
adminone=self.adminone,
admintwo=self.admintwo,
org=self.org,
org_type=self.org_type,
sector=self.sector,
results=results,
config=self.configuration,
error_handler=self.error_handler,
)
operational_presence.populate()

Expand All @@ -239,7 +253,9 @@ def output_humanitarian_needs(self):
session=self.session,
metadata=self.metadata,
admins=self.admins,
sector=self.sector,
configuration=self.configuration,
error_handler=self.error_handler,
)
humanitarian_needs.populate()

Expand Down Expand Up @@ -358,6 +374,7 @@ def output(self):
self.locations.populate()
self.admins.populate()
self.metadata.populate()
self.org.populate()
self.org_type.populate()
self.sector.populate()
self.currency.populate()
Expand All @@ -375,3 +392,6 @@ def output(self):

def debug(self, folder: str) -> None:
self.org.output_org_map(folder)

def output_errors(self, err_to_hdx: bool) -> None:
self.error_handler.output_errors(err_to_hdx)
51 changes: 0 additions & 51 deletions src/hapi/pipelines/database/admins.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""Populate the admin tables."""

import logging
import re
from abc import ABC
from typing import Dict, List, Literal, Optional

Expand All @@ -26,22 +25,18 @@


class Admins(BaseUploader):
admin_name_regex = re.compile(r"Admin (\d) Name")

def __init__(
self,
configuration: Configuration,
session: Session,
locations: Locations,
libhxl_dataset: hxl.Dataset,
error_handler: HDXErrorHandler,
):
super().__init__(session)
self._limit = configuration["commit_limit"]
self._orphan_admin2s = configuration["orphan_admin2s"]
self._locations = locations
self._libhxl_dataset = libhxl_dataset
self._error_handler = error_handler
self.admin1_data = {}
self.admin2_data = {}

Expand Down Expand Up @@ -201,52 +196,6 @@ def get_admin2_ref(
)
return ref

def get_admin2_ref_from_row(
self, row: Dict, dataset_name: str, pipeline: str
):
countryiso3 = row["Country ISO3"]
if countryiso3 == "#country+code": # ignore HXL row
return None
admin_level = "0"
for header in row:
match = self.admin_name_regex.match(header)
if match and row[header]:
admin_level = match.group(1)
match admin_level:
case "0":
admin_level = "national"
admin_code = countryiso3
case "1":
admin_level = "adminone"
admin_code = row["Admin 1 PCode"]
case "2":
admin_level = "admintwo"
admin_code = row["Admin 2 PCode"]
case _:
return None
admin2_ref = self.get_admin2_ref(
admin_level,
admin_code,
dataset_name,
pipeline,
self._error_handler,
)
if admin2_ref is None:
if admin_level == "adminone":
admin_code = get_admin1_to_location_connector_code(countryiso3)
elif admin_level == "admintwo":
admin_code = get_admin2_to_location_connector_code(countryiso3)
else:
return None
admin2_ref = self.get_admin2_ref(
admin_level,
admin_code,
dataset_name,
pipeline,
self._error_handler,
)
return admin2_ref


def get_admin2_to_admin1_connector_code(admin1_code: str) -> str:
"""Get the code for an unspecified admin2, based on the admin1 code."""
Expand Down
2 changes: 1 addition & 1 deletion src/hapi/pipelines/database/currency.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
class Currency(BaseUploader):
def __init__(
self,
session: Session,
configuration: Configuration,
session: Session,
):
super().__init__(session)
self._configuration = configuration
Expand Down
Loading

0 comments on commit 5b4ec52

Please sign in to comment.