Skip to content

Commit

Permalink
Merge pull request #46 from OCHA-DAP/main
Browse files Browse the repository at this point in the history
Version 0.10.29
  • Loading branch information
ccataalin authored Dec 12, 2024
2 parents caaa982 + 89865ed commit cc503ad
Show file tree
Hide file tree
Showing 8 changed files with 118 additions and 113 deletions.
12 changes: 12 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,18 @@ All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

## [0.10.29] - 2024-12-12

### Fixed

- 3W for NGA

### Changed

- Added ability to read historical humanitarian needs data
- Replaced missing funding amounts with zeros
- Updated 3W data for CAF, SSD

## [0.10.28] - 2024-12-10

### Changed
Expand Down
42 changes: 20 additions & 22 deletions src/hapi/pipelines/configs/operational_presence.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,35 +62,33 @@ operational_presence_admintwo:

operational_presence_caf:
dataset: "republique-centrafricaine-presence-operationnelle"
resource: "3W_CAR_Mar2024"
resource: "3W_CAR_Sep2024"
format: "xlsx"
xlsx2csv: True
sheet: "OCHA CAR 3W OP T1 2024"
headers:
- 1
- 2
sheet: "3W T3 2024"
headers: 1
source_date:
start: "01/01/2024"
end: "31/03/2024"
start: "01/04/2024"
end: "30/09/2024"
use_hxl: False
admin:
- ~
- "PCODE2 #adm2+code"
- "PCODE2"
admin_exact: True
input:
- "Description_Acteur"
- "ACTEUR #org+acronym"
- "DESCRIPTION_ACTEUR"
- "ACTEUR"
- "TYPE_ORG"
- "SECTEUR #sector"
- "PREFECTURE #adm1+name"
- "SOUSPREFECTURE #adm2+name"
- "SECTEUR"
- "PREFECTURE"
- "SOUSPREFECTURE"
list:
- "Description_Acteur"
- "ACTEUR #org+acronym"
- "DESCRIPTION_ACTEUR"
- "ACTEUR"
- "TYPE_ORG"
- "SECTEUR #sector"
- "PREFECTURE #adm1+name"
- "SOUSPREFECTURE #adm2+name"
- "SECTEUR"
- "PREFECTURE"
- "SOUSPREFECTURE"

operational_presence_cmr:
dataset: "cameroon-5w-operational-presence"
Expand Down Expand Up @@ -356,7 +354,7 @@ operational_presence_admintwo:

operational_presence_nga:
dataset: "nigeria-3w"
resource: "who-is-doing-what-and-where_nga_3w_apr_jun_2024.xlsx.csv"
resource: "who-is-doing-what-and-where_nga_3w_apr_jun_2024.csv"
format: "csv"
headers: 1
source_date:
Expand Down Expand Up @@ -759,14 +757,14 @@ operational_presence_national:

operational_presence_ssd:
dataset: "south-sudan-operational-presence"
resource: "ss_20241106_3w_oerational presence_Jan to Sep_2024.xlsx"
resource: "ss_20241212_3w_oerational presence_Jan to Oct_2024.xlsx"
format: "xlsx"
xlsx2csv: True
sheet: "data"
sheet: "Data"
headers: 1
source_date:
start: "01/01/2024"
end: "30/09/2024"
end: "31/10/2024"
admin_single: "SSD"
input:
- "Name of organization"
Expand Down
12 changes: 3 additions & 9 deletions src/hapi/pipelines/database/funding.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,16 +96,10 @@ def populate(self) -> None:
# This check for a missing funding line has been added due to
# an error in the UKR funding requirements data
if funding_usd is None:
self._error_manager.add_missing_value_message(
"Funding",
dataset_name,
"funding_usd for appeal code",
appeal_code,
resource_name=resource_name,
err_to_hdx=True,
)
continue
funding_usd = 0
funding_pct = row["#value+funding+pct"]
if funding_pct is None and funding_usd == 0:
funding_pct = 0
reference_period_start = parse_date(row["#date+start"])
reference_period_end = parse_date(row["#date+end"])

Expand Down
159 changes: 80 additions & 79 deletions src/hapi/pipelines/database/humanitarian_needs.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,86 +95,87 @@ def populate(self) -> None:
self._metadata.add_dataset(dataset)
dataset_id = dataset["id"]
dataset_name = dataset["name"]
resource = dataset.get_resource(0) # assumes first resource is latest!
self._metadata.add_resource(dataset_id, resource)
negative_values_by_iso3 = {}
rounded_values_by_iso3 = {}
resource_id = resource["id"]
resource_name = resource["name"]
year = int(resource_name[-4:])
time_period_start = datetime(year, 1, 1)
time_period_end = datetime(year, 12, 31, 23, 59, 59)
url = resource["url"]
headers, rows = reader.get_tabular_rows(url, dict_form=True)
# Admin 1 PCode,Admin 2 PCode,Sector,Gender,Age Group,Disabled,Population Group,Population,In Need,Targeted,Affected,Reached
for row in rows:
countryiso3 = row["Country ISO3"]
admin2_ref = self.get_admin2_ref(row, dataset_name)
if not admin2_ref:
continue
provider_admin1_name = get_provider_name(row, "Admin 1 Name")
provider_admin2_name = get_provider_name(row, "Admin 2 Name")
sector = row["Sector"]
sector_code = self._sector.get_sector_code(sector)
if not sector_code:
self._error_manager.add_missing_value_message(
"HumanitarianNeeds", dataset_name, "sector", sector
)
continue
category = row["Category"]
if category is None:
category = ""

def create_row(in_col, population_status):
value = row[in_col]
if value is None:
return
value = get_numeric_if_possible(value)
if value < 0:
dict_of_lists_add(
negative_values_by_iso3, countryiso3, str(value)
resources = dataset.get_resources()
for resource in resources:
self._metadata.add_resource(dataset_id, resource)
negative_values_by_iso3 = {}
rounded_values_by_iso3 = {}
resource_id = resource["id"]
resource_name = resource["name"]
year = int(resource_name[-4:])
time_period_start = datetime(year, 1, 1)
time_period_end = datetime(year, 12, 31, 23, 59, 59)
url = resource["url"]
headers, rows = reader.get_tabular_rows(url, dict_form=True)
# Admin 1 PCode,Admin 2 PCode,Sector,Gender,Age Group,Disabled,Population Group,Population,In Need,Targeted,Affected,Reached
for row in rows:
countryiso3 = row["Country ISO3"]
admin2_ref = self.get_admin2_ref(row, dataset_name)
if not admin2_ref:
continue
provider_admin1_name = get_provider_name(row, "Admin 1 Name")
provider_admin2_name = get_provider_name(row, "Admin 2 Name")
sector = row["Sector"]
sector_code = self._sector.get_sector_code(sector)
if not sector_code:
self._error_manager.add_missing_value_message(
"HumanitarianNeeds", dataset_name, "sector", sector
)
return
if isinstance(value, float):
dict_of_lists_add(
rounded_values_by_iso3, countryiso3, str(value)
continue
category = row["Category"]
if category is None:
category = ""

def create_row(in_col, population_status):
value = row[in_col]
if value is None:
return
value = get_numeric_if_possible(value)
if value < 0:
dict_of_lists_add(
negative_values_by_iso3, countryiso3, str(value)
)
return
if isinstance(value, float):
dict_of_lists_add(
rounded_values_by_iso3, countryiso3, str(value)
)
value = round(value)
humanitarian_needs_row = DBHumanitarianNeeds(
resource_hdx_id=resource_id,
admin2_ref=admin2_ref,
provider_admin1_name=provider_admin1_name,
provider_admin2_name=provider_admin2_name,
category=category,
sector_code=sector_code,
population_status=population_status,
population=value,
reference_period_start=time_period_start,
reference_period_end=time_period_end,
)
value = round(value)
humanitarian_needs_row = DBHumanitarianNeeds(
resource_hdx_id=resource_id,
admin2_ref=admin2_ref,
provider_admin1_name=provider_admin1_name,
provider_admin2_name=provider_admin2_name,
category=category,
sector_code=sector_code,
population_status=population_status,
population=value,
reference_period_start=time_period_start,
reference_period_end=time_period_end,
)
self._session.add(humanitarian_needs_row)
self._session.add(humanitarian_needs_row)

create_row("Population", "all")
create_row("Affected", "AFF")
create_row("In Need", "INN")
create_row("Targeted", "TGT")
create_row("Reached", "REA")
create_row("Population", "all")
create_row("Affected", "AFF")
create_row("In Need", "INN")
create_row("Targeted", "TGT")
create_row("Reached", "REA")

self._session.commit()
for countryiso3, values in negative_values_by_iso3.items():
self._error_manager.add_multi_valued_message(
"HumanitarianNeeds",
dataset_name,
f"negative population value(s) removed in {countryiso3}",
values,
resource_name=resource_name,
err_to_hdx=True,
)
for countryiso3, values in rounded_values_by_iso3.items():
self._error_manager.add_multi_valued_message(
"HumanitarianNeeds",
dataset_name,
f"population value(s) rounded in {countryiso3}",
values,
message_type="warning",
)
self._session.commit()
for countryiso3, values in negative_values_by_iso3.items():
self._error_manager.add_multi_valued_message(
"HumanitarianNeeds",
dataset_name,
f"negative population value(s) removed in {countryiso3}",
values,
resource_name=resource_name,
err_to_hdx=True,
)
for countryiso3, values in rounded_values_by_iso3.items():
self._error_manager.add_multi_valued_message(
"HumanitarianNeeds",
dataset_name,
f"population value(s) rounded in {countryiso3}",
values,
message_type="warning",
)
2 changes: 1 addition & 1 deletion tests/fixtures/input/global-hpc-hno.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion tests/fixtures/input/nigeria-3w.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ def test_funding(self, configuration, folder, pipelines):
count = session.scalar(select(func.count(DBResource.hdx_id)))
check.equal(count, 3)
count = session.scalar(select(func.count(DBFunding.resource_hdx_id)))
check.equal(count, 56)
check.equal(count, 57)

@pytest.mark.parametrize(
"themes_to_run", [{"conflict_event": ("BFA", "COL")}]
Expand Down

0 comments on commit cc503ad

Please sign in to comment.