Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Version 0.10.29 #46

Merged
merged 8 commits into from
Dec 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,18 @@ All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

## [0.10.29] - 2024-12-12

### Fixed

- 3W for NGA

### Changed

- Added ability to read historical humanitarian needs data
- Replaced missing funding amounts with zeros
- Updated 3W data for CAF, SSD

## [0.10.28] - 2024-12-10

### Changed
Expand Down
42 changes: 20 additions & 22 deletions src/hapi/pipelines/configs/operational_presence.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,35 +62,33 @@ operational_presence_admintwo:

operational_presence_caf:
dataset: "republique-centrafricaine-presence-operationnelle"
resource: "3W_CAR_Mar2024"
resource: "3W_CAR_Sep2024"
format: "xlsx"
xlsx2csv: True
sheet: "OCHA CAR 3W OP T1 2024"
headers:
- 1
- 2
sheet: "3W T3 2024"
headers: 1
source_date:
start: "01/01/2024"
end: "31/03/2024"
start: "01/04/2024"
end: "30/09/2024"
use_hxl: False
admin:
- ~
- "PCODE2 #adm2+code"
- "PCODE2"
admin_exact: True
input:
- "Description_Acteur"
- "ACTEUR #org+acronym"
- "DESCRIPTION_ACTEUR"
- "ACTEUR"
- "TYPE_ORG"
- "SECTEUR #sector"
- "PREFECTURE #adm1+name"
- "SOUSPREFECTURE #adm2+name"
- "SECTEUR"
- "PREFECTURE"
- "SOUSPREFECTURE"
list:
- "Description_Acteur"
- "ACTEUR #org+acronym"
- "DESCRIPTION_ACTEUR"
- "ACTEUR"
- "TYPE_ORG"
- "SECTEUR #sector"
- "PREFECTURE #adm1+name"
- "SOUSPREFECTURE #adm2+name"
- "SECTEUR"
- "PREFECTURE"
- "SOUSPREFECTURE"

operational_presence_cmr:
dataset: "cameroon-5w-operational-presence"
Expand Down Expand Up @@ -356,7 +354,7 @@ operational_presence_admintwo:

operational_presence_nga:
dataset: "nigeria-3w"
resource: "who-is-doing-what-and-where_nga_3w_apr_jun_2024.xlsx.csv"
resource: "who-is-doing-what-and-where_nga_3w_apr_jun_2024.csv"
format: "csv"
headers: 1
source_date:
Expand Down Expand Up @@ -759,14 +757,14 @@ operational_presence_national:

operational_presence_ssd:
dataset: "south-sudan-operational-presence"
resource: "ss_20241106_3w_oerational presence_Jan to Sep_2024.xlsx"
resource: "ss_20241212_3w_oerational presence_Jan to Oct_2024.xlsx"
format: "xlsx"
xlsx2csv: True
sheet: "data"
sheet: "Data"
headers: 1
source_date:
start: "01/01/2024"
end: "30/09/2024"
end: "31/10/2024"
admin_single: "SSD"
input:
- "Name of organization"
Expand Down
12 changes: 3 additions & 9 deletions src/hapi/pipelines/database/funding.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,16 +96,10 @@ def populate(self) -> None:
# This check for a missing funding line has been added due to
# an error in the UKR funding requirements data
if funding_usd is None:
self._error_manager.add_missing_value_message(
"Funding",
dataset_name,
"funding_usd for appeal code",
appeal_code,
resource_name=resource_name,
err_to_hdx=True,
)
continue
funding_usd = 0
funding_pct = row["#value+funding+pct"]
if funding_pct is None and funding_usd == 0:
funding_pct = 0
reference_period_start = parse_date(row["#date+start"])
reference_period_end = parse_date(row["#date+end"])

Expand Down
159 changes: 80 additions & 79 deletions src/hapi/pipelines/database/humanitarian_needs.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,86 +95,87 @@ def populate(self) -> None:
self._metadata.add_dataset(dataset)
dataset_id = dataset["id"]
dataset_name = dataset["name"]
resource = dataset.get_resource(0) # assumes first resource is latest!
self._metadata.add_resource(dataset_id, resource)
negative_values_by_iso3 = {}
rounded_values_by_iso3 = {}
resource_id = resource["id"]
resource_name = resource["name"]
year = int(resource_name[-4:])
time_period_start = datetime(year, 1, 1)
time_period_end = datetime(year, 12, 31, 23, 59, 59)
url = resource["url"]
headers, rows = reader.get_tabular_rows(url, dict_form=True)
# Admin 1 PCode,Admin 2 PCode,Sector,Gender,Age Group,Disabled,Population Group,Population,In Need,Targeted,Affected,Reached
for row in rows:
countryiso3 = row["Country ISO3"]
admin2_ref = self.get_admin2_ref(row, dataset_name)
if not admin2_ref:
continue
provider_admin1_name = get_provider_name(row, "Admin 1 Name")
provider_admin2_name = get_provider_name(row, "Admin 2 Name")
sector = row["Sector"]
sector_code = self._sector.get_sector_code(sector)
if not sector_code:
self._error_manager.add_missing_value_message(
"HumanitarianNeeds", dataset_name, "sector", sector
)
continue
category = row["Category"]
if category is None:
category = ""

def create_row(in_col, population_status):
value = row[in_col]
if value is None:
return
value = get_numeric_if_possible(value)
if value < 0:
dict_of_lists_add(
negative_values_by_iso3, countryiso3, str(value)
resources = dataset.get_resources()
for resource in resources:
self._metadata.add_resource(dataset_id, resource)
negative_values_by_iso3 = {}
rounded_values_by_iso3 = {}
resource_id = resource["id"]
resource_name = resource["name"]
year = int(resource_name[-4:])
time_period_start = datetime(year, 1, 1)
time_period_end = datetime(year, 12, 31, 23, 59, 59)
url = resource["url"]
headers, rows = reader.get_tabular_rows(url, dict_form=True)
# Admin 1 PCode,Admin 2 PCode,Sector,Gender,Age Group,Disabled,Population Group,Population,In Need,Targeted,Affected,Reached
for row in rows:
countryiso3 = row["Country ISO3"]
admin2_ref = self.get_admin2_ref(row, dataset_name)
if not admin2_ref:
continue
provider_admin1_name = get_provider_name(row, "Admin 1 Name")
provider_admin2_name = get_provider_name(row, "Admin 2 Name")
sector = row["Sector"]
sector_code = self._sector.get_sector_code(sector)
if not sector_code:
self._error_manager.add_missing_value_message(
"HumanitarianNeeds", dataset_name, "sector", sector
)
return
if isinstance(value, float):
dict_of_lists_add(
rounded_values_by_iso3, countryiso3, str(value)
continue
category = row["Category"]
if category is None:
category = ""

def create_row(in_col, population_status):
value = row[in_col]
if value is None:
return
value = get_numeric_if_possible(value)
if value < 0:
dict_of_lists_add(
negative_values_by_iso3, countryiso3, str(value)
)
return
if isinstance(value, float):
dict_of_lists_add(
rounded_values_by_iso3, countryiso3, str(value)
)
value = round(value)
humanitarian_needs_row = DBHumanitarianNeeds(
resource_hdx_id=resource_id,
admin2_ref=admin2_ref,
provider_admin1_name=provider_admin1_name,
provider_admin2_name=provider_admin2_name,
category=category,
sector_code=sector_code,
population_status=population_status,
population=value,
reference_period_start=time_period_start,
reference_period_end=time_period_end,
)
value = round(value)
humanitarian_needs_row = DBHumanitarianNeeds(
resource_hdx_id=resource_id,
admin2_ref=admin2_ref,
provider_admin1_name=provider_admin1_name,
provider_admin2_name=provider_admin2_name,
category=category,
sector_code=sector_code,
population_status=population_status,
population=value,
reference_period_start=time_period_start,
reference_period_end=time_period_end,
)
self._session.add(humanitarian_needs_row)
self._session.add(humanitarian_needs_row)

create_row("Population", "all")
create_row("Affected", "AFF")
create_row("In Need", "INN")
create_row("Targeted", "TGT")
create_row("Reached", "REA")
create_row("Population", "all")
create_row("Affected", "AFF")
create_row("In Need", "INN")
create_row("Targeted", "TGT")
create_row("Reached", "REA")

self._session.commit()
for countryiso3, values in negative_values_by_iso3.items():
self._error_manager.add_multi_valued_message(
"HumanitarianNeeds",
dataset_name,
f"negative population value(s) removed in {countryiso3}",
values,
resource_name=resource_name,
err_to_hdx=True,
)
for countryiso3, values in rounded_values_by_iso3.items():
self._error_manager.add_multi_valued_message(
"HumanitarianNeeds",
dataset_name,
f"population value(s) rounded in {countryiso3}",
values,
message_type="warning",
)
self._session.commit()
for countryiso3, values in negative_values_by_iso3.items():
self._error_manager.add_multi_valued_message(
"HumanitarianNeeds",
dataset_name,
f"negative population value(s) removed in {countryiso3}",
values,
resource_name=resource_name,
err_to_hdx=True,
)
for countryiso3, values in rounded_values_by_iso3.items():
self._error_manager.add_multi_valued_message(
"HumanitarianNeeds",
dataset_name,
f"population value(s) rounded in {countryiso3}",
values,
message_type="warning",
)
2 changes: 1 addition & 1 deletion tests/fixtures/input/global-hpc-hno.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion tests/fixtures/input/nigeria-3w.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ def test_funding(self, configuration, folder, pipelines):
count = session.scalar(select(func.count(DBResource.hdx_id)))
check.equal(count, 3)
count = session.scalar(select(func.count(DBFunding.resource_hdx_id)))
check.equal(count, 56)
check.equal(count, 57)

@pytest.mark.parametrize(
"themes_to_run", [{"conflict_event": ("BFA", "COL")}]
Expand Down
Loading