Skip to content

Commit

Permalink
Use batch populate for orgs
Browse files Browse the repository at this point in the history
Update operational presence for AFG
  • Loading branch information
b-j-mills committed May 30, 2024
1 parent 449787e commit bfcdb3e
Show file tree
Hide file tree
Showing 8 changed files with 6,004 additions and 7,131 deletions.
10 changes: 9 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,17 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

## [0.9.13] - 2024-05-30

### Added

- Function to add data to tables in batches

### Changed

- Updated operational presence data for AFG

### Fixed

- Increased speed of conflict event pipeline and included data for all HRP countries with dates in 2023 or later
- Increased speed of conflict event pipeline and included data for all HRP countries

## [0.9.12] - 2024-05-29

Expand Down
7 changes: 2 additions & 5 deletions src/hapi/pipelines/configs/operational_presence.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,10 @@ operational_presence_default:

operational_presence_admintwo:
operational_presence_afg:
dataset: "afghanistan-who-does-what-where-july-to-september-2023"
resource: "afghanistan-3w-operational-presence-july-september-2023.csv"
dataset: "afghanistan-who-does-what-where-january-to-march-2024"
resource: "afghanistan-3w-operational-presence-january-march-2024.csv"
format: "csv"
headers: 1
source_date:
start: "01/07/2023"
end: "30/09/2023"
use_hxl: True
admin:
- ~
Expand Down
11 changes: 6 additions & 5 deletions src/hapi/pipelines/database/operational_presence.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,12 @@ def populate(self, debug=False):
operational_presence_rows.append(
operational_presence_row
)
if debug:
write_list_to_csv(
join("saved_data", "debug_operational_presence.csv"),
debug_rows,
)
return

self._org.populate_multiple()
batch_populate(
Expand All @@ -202,8 +208,3 @@ def populate(self, debug=False):
add_message(errors, dataset, msg)
for error in sorted(errors):
logger.error(error)
if debug:
write_list_to_csv(
join("saved_data", "debug_operational_presence.csv"),
debug_rows,
)
18 changes: 6 additions & 12 deletions src/hapi/pipelines/database/org.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
from hdx.location.names import clean_name
from hdx.scraper.utilities.reader import Read
from hdx.utilities.dictandlist import dict_of_sets_add
from sqlalchemy import insert
from sqlalchemy.orm import Session

from ..utilities.batch_populate import batch_populate
from .base_uploader import BaseUploader

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -73,21 +73,15 @@ def add_or_match_org(
] = (acronym, org_name, org_type)

def populate_multiple(self):
batch = []
for key in self.data:
values = self.data[key]
org_row = dict(
org_rows = [
dict(
acronym=values[0],
name=values[1],
org_type_code=values[2],
)
batch.append(org_row)
if len(batch) >= _BATCH_SIZE:
self._session.execute(insert(DBOrg), batch)
batch = []
if batch:
self._session.execute(insert(DBOrg), batch)
self._session.commit()
for values in self.data.values()
]
batch_populate(org_rows, self._session, DBOrg)

def get_org_info(self, org_name: str, location: str) -> Dict[str, str]:
org_name_map = {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"archived": false, "creator_user_id": "391f0864-b6e4-425f-9d46-df87aa456c2b", "data_update_frequency": "90", "dataset_date": "[2024-01-01T00:00:00 TO 2024-03-31T23:59:59]", "dataset_preview": "resource_id", "dataset_source": "Humanitarian partners", "due_date": "2024-08-18T05:32:58", "has_geodata": false, "has_quickcharts": true, "has_showcases": true, "id": "897dc815-4e61-4bc2-b775-aed25689e401", "is_requestdata_type": false, "isopen": false, "last_modified": "2024-05-20T05:32:58.952210", "license_id": "cc-by-igo", "license_title": "Creative Commons Attribution for Intergovernmental Organisations", "license_url": "http://creativecommons.org/licenses/by/3.0/igo/legalcode", "maintainer": "391f0864-b6e4-425f-9d46-df87aa456c2b", "maintainer_email": "[email protected]", "metadata_created": "2024-05-20T05:32:51.497096", "metadata_modified": "2024-05-21T13:03:08.513378", "methodology": "Registry", "name": "afghanistan-who-does-what-where-january-to-march-2024", "notes": "The Who does What Where (3W) is a core humanitarian coordination dataset. It is critical to know where humanitarian organizations are working, what they are doing and their capability in order to identify gaps, avoid duplication of efforts, and plan for future humanitarian response (if needed). The data includes a list of humanitarian organizations by district and cluster, as well as a unique count of organizations. An interactive map of the 3W data can be accessed [here](https://response.reliefweb.int/afghanistan/who-does-what-where-3w?_gl=1*g902oc*_ga*MTYyOTk1NDg4OC4xNjc2NTM2NTk3*_ga_E60ZNX2F68*MTcwNDE3NzMzMC4xNDAuMS4xNzA0MTg0MTQ5LjU4LjAuMA). ", "num_resources": 3, "num_tags": 4, "organization": {"id": "10e168ce-5b51-49ac-8616-a142d48618e5", "name": "ocha-afghanistan", "title": "OCHA Afghanistan", "type": "organization", "description": "OCHA resumed its operation in Afghanistan in 2009, providing humanitarian assistance in a complex environment where separate \u2013 and not always complementary \u2013 military, political and security objectives pose challenges to the implementation of humanitarian principles, the ability of responders to reach people in need and the safety and security of aid workers", "image_url": "", "created": "2014-08-06T14:26:07.642577", "is_organization": true, "approval_status": "approved", "state": "active"}, "overdue_date": "2024-09-17T05:32:58", "owner_org": "10e168ce-5b51-49ac-8616-a142d48618e5", "package_creator": "murtaza", "pageviews_last_14_days": 21, "private": false, "qa_completed": false, "solr_additions": "{\"countries\": [\"Afghanistan\"]}", "state": "active", "subnational": "1", "title": "Afghanistan - Who does What Where (January to March 2024)", "total_res_downloads": 8, "type": "dataset", "url": null, "version": null, "groups": [{"description": "", "display_name": "Afghanistan", "id": "afg", "image_display_url": "", "name": "afg", "title": "Afghanistan"}], "tags": [{"display_name": "hxl", "id": "a0fbb23a-6aad-4ccc-8062-e9ef9f20e5d2", "name": "hxl", "state": "active", "vocabulary_id": "b891512e-9516-4bf5-962a-7a289772a2a1"}, {"display_name": "operational capacity", "id": "d8a59526-9f9a-4c71-b38f-5d9f2eb1615a", "name": "operational capacity", "state": "active", "vocabulary_id": "b891512e-9516-4bf5-962a-7a289772a2a1"}, {"display_name": "operational presence", "id": "a25059f9-7e1f-49be-b629-ccccd97a95f8", "name": "operational presence", "state": "active", "vocabulary_id": "b891512e-9516-4bf5-962a-7a289772a2a1"}, {"display_name": "who is doing what and where-3w-4w-5w", "id": "ec53893c-6dba-4656-978b-4a32289ea2eb", "name": "who is doing what and where-3w-4w-5w", "state": "active", "vocabulary_id": "b891512e-9516-4bf5-962a-7a289772a2a1"}], "relationships_as_subject": [], "relationships_as_object": [], "is_fresh": true, "update_status": "fresh", "x_resource_grouping": [], "resources": [{"alt_url": "https://data.humdata.org/dataset/897dc815-4e61-4bc2-b775-aed25689e401/resource/6bf7a159-c6dd-4afb-ac66-31ee04568d27/download/", "cache_last_updated": null, "cache_url": null, "created": "2024-05-20T05:32:54.512504", "dataset_preview_enabled": true, "datastore_active": false, "description": "", "download_url": "https://data.humdata.org/dataset/897dc815-4e61-4bc2-b775-aed25689e401/resource/6bf7a159-c6dd-4afb-ac66-31ee04568d27/download/afghanistan-3w-operational-presence-january-march-2024.csv", "format": "CSV", "fs_check_info": "[{\"state\": \"processing\", \"message\": \"The processing of the file structure check has started\", \"timestamp\": \"2024-05-20T05:32:54.147674\"}, {\"state\": \"success\", \"message\": \"File structure check completed\", \"timestamp\": \"2024-05-20T05:32:56.611913\", \"sheet_changes\": [], \"hxl_proxy_response\": {\"url_or_filename\": \"https://data.humdata.org/dataset/897dc815-4e61-4bc2-b775-aed25689e401/resource/6bf7a159-c6dd-4afb-ac66-31ee04568d27/download/afghanistan-3w-operational-presence-january-march-2024.csv\", \"format\": \"CSV\", \"sheets\": [{\"name\": \"__DEFAULT__\", \"nrows\": 10904, \"ncols\": 11, \"is_hidden\": false, \"has_merged_cells\": false, \"is_hxlated\": true, \"header_hash\": \"668bc5362a8f1dbc2c270766477ecf87\", \"hxl_header_hash\": \"e980b1fa55019e44b2f3c35dc0b895bf\", \"headers\": [\"\\ufeffREGION\", \"PROVINCE\", \"PROV_CODE\", \"DISTRICT\", \"DIST_CODE\", \"ORG_ACRONYM\", \"ORG_NAME\", \"ORG_TYPE\", \"CLUSTER_SECTOR_CODE\", \"CLUSTER_SECTOR_NAME\", \"Question\"], \"hxl_headers\": [\"#region+name\", \"#adm1+name\", \"#adm1+code\", \"#adm2+name\", \"#adm2+code\", \"#org+acronym\", \"#org+name\", \"#org+type+name\", \"#sector+cluster+code\", \"#sector+cluster+name\", \"\"]}]}}]", "hash": "", "hdx_rel_url": "/dataset/897dc815-4e61-4bc2-b775-aed25689e401/resource/6bf7a159-c6dd-4afb-ac66-31ee04568d27/download/afghanistan-3w-operational-presence-january-march-2024.csv", "id": "6bf7a159-c6dd-4afb-ac66-31ee04568d27", "last_modified": "2024-05-20T05:32:54.379302", "metadata_modified": "2024-05-20T05:32:57.791904", "microdata": false, "mimetype": "text/csv", "mimetype_inner": null, "name": "afghanistan-3w-operational-presence-january-march-2024.csv", "originalHash": "-988266717", "package_id": "897dc815-4e61-4bc2-b775-aed25689e401", "pii": "false", "position": 0, "resource_type": "file.upload", "size": 1669576, "state": "active", "url": "https://data.humdata.org/dataset/897dc815-4e61-4bc2-b775-aed25689e401/resource/6bf7a159-c6dd-4afb-ac66-31ee04568d27/download/afghanistan-3w-operational-presence-january-march-2024.csv", "url_type": "upload"}, {"alt_url": "https://data.humdata.org/dataset/897dc815-4e61-4bc2-b775-aed25689e401/resource/202dcba2-61ee-4aad-9c64-445322f42710/download/", "cache_last_updated": null, "cache_url": null, "created": "2024-05-20T05:32:57.031196", "dataset_preview_enabled": false, "datastore_active": false, "description": "", "download_url": "https://data.humdata.org/dataset/897dc815-4e61-4bc2-b775-aed25689e401/resource/202dcba2-61ee-4aad-9c64-445322f42710/download/afghanistan-3w-operational-capacity-january-march-2024.csv", "format": "CSV", "fs_check_info": "[{\"state\": \"processing\", \"message\": \"The processing of the file structure check has started\", \"timestamp\": \"2024-05-20T05:32:56.652328\"}, {\"state\": \"success\", \"message\": \"File structure check completed\", \"timestamp\": \"2024-05-20T05:32:58.939463\", \"sheet_changes\": [], \"hxl_proxy_response\": {\"url_or_filename\": \"https://data.humdata.org/dataset/897dc815-4e61-4bc2-b775-aed25689e401/resource/202dcba2-61ee-4aad-9c64-445322f42710/download/afghanistan-3w-operational-capacity-january-march-2024.csv\", \"format\": \"CSV\", \"sheets\": [{\"name\": \"__DEFAULT__\", \"nrows\": 9075, \"ncols\": 11, \"is_hidden\": false, \"has_merged_cells\": false, \"is_hxlated\": true, \"header_hash\": \"50f940b27864501fed08d221d9967bdf\", \"hxl_header_hash\": \"e980b1fa55019e44b2f3c35dc0b895bf\", \"headers\": [\"REGION\", \"PROVINCE\", \"PROV_CODE\", \"DISTRICT\", \"DIST_CODE\", \"ORG_ACRONYM\", \"ORG_NAME\", \"ORG_TYPE\", \"CLUSTER_SECTOR_CODE\", \"CLUSTER_SECTOR_NAME\", \"Question\"], \"hxl_headers\": [\"#region+name\", \"#adm1+name\", \"#adm1+code\", \"#adm2+name\", \"#adm2+code\", \"#org+acronym\", \"#org+name\", \"#org+type+name\", \"#sector+cluster+code\", \"#sector+cluster+name\", \"\"]}]}}]", "hash": "", "hdx_rel_url": "/dataset/897dc815-4e61-4bc2-b775-aed25689e401/resource/202dcba2-61ee-4aad-9c64-445322f42710/download/afghanistan-3w-operational-capacity-january-march-2024.csv", "id": "202dcba2-61ee-4aad-9c64-445322f42710", "last_modified": "2024-05-20T05:32:56.856808", "metadata_modified": "2024-05-20T05:32:59.700442", "microdata": false, "mimetype": "text/csv", "mimetype_inner": null, "name": "afghanistan-3w-operational-capacity-january-march-2024.csv", "originalHash": "-988266717", "package_id": "897dc815-4e61-4bc2-b775-aed25689e401", "pii": "false", "position": 1, "resource_type": "file.upload", "size": 1409686, "state": "active", "url": "https://data.humdata.org/dataset/897dc815-4e61-4bc2-b775-aed25689e401/resource/202dcba2-61ee-4aad-9c64-445322f42710/download/afghanistan-3w-operational-capacity-january-march-2024.csv", "url_type": "upload"}, {"alt_url": "https://data.humdata.org/dataset/897dc815-4e61-4bc2-b775-aed25689e401/resource/db7922e3-311a-49f3-b2a2-b0dbe45ab86d/download/", "cache_last_updated": null, "cache_url": null, "created": "2024-05-20T05:32:59.055693", "dataset_preview_enabled": false, "datastore_active": false, "description": "", "download_url": "https://data.humdata.org/dataset/897dc815-4e61-4bc2-b775-aed25689e401/resource/db7922e3-311a-49f3-b2a2-b0dbe45ab86d/download/afghanistan-3w-january-march-2024.xlsx", "format": "XLSX", "fs_check_info": "[{\"state\": \"processing\", \"message\": \"The processing of the file structure check has started\", \"timestamp\": \"2024-05-20T05:32:58.759712\"}, {\"state\": \"success\", \"message\": \"File structure check completed\", \"timestamp\": \"2024-05-20T05:33:00.947984\", \"sheet_changes\": [], \"hxl_proxy_response\": {\"url_or_filename\": \"https://data.humdata.org/dataset/897dc815-4e61-4bc2-b775-aed25689e401/resource/db7922e3-311a-49f3-b2a2-b0dbe45ab86d/download/afghanistan-3w-january-march-2024.xlsx\", \"format\": \"XLSX\", \"sheets\": [{\"name\": \"MetaData\", \"is_hidden\": false, \"nrows\": 11, \"ncols\": 1, \"has_merged_cells\": false, \"is_hxlated\": false, \"header_hash\": \"8a9c0818e0622aadfcb2e59cfe5299b4\", \"hxl_header_hash\": null, \"headers\": [\"METADATA\"], \"hxl_headers\": null}, {\"name\": \"List_District\", \"is_hidden\": false, \"nrows\": 403, \"ncols\": 21, \"has_merged_cells\": false, \"is_hxlated\": true, \"header_hash\": \"30d419bf10484e12f5a01f983141414f\", \"hxl_header_hash\": \"7dafa0a7b166290817a95261ab0e3e99\", \"headers\": [\"Region\", \"PROV_CODE\", \"PROV_NA_ENG\", \"DIST_CODE\", \"DIST_NA_ENG\", \"OP_ALL\", \"OC_ALL\", \"OP_EIE\", \"OC_EIE\", \"OP_ESNFI\", \"OC_ESNFI\", \"OP_FSAC\", \"OC_FSAC\", \"OP_HEALTH\", \"OC_HEALTH\", \"OP_NUTRITION\", \"OC_NUTRITION\", \"OP_PROTECTION\", \"OC_PROTECTION\", \"OP_WASH\", \"OC_WASH\"], \"hxl_headers\": [\"#region+name\", \"#adm1+code\", \"#adm1+name\", \"#adm2+code\", \"#adm2+name\", \"#org+presence+total\", \"#org+capacity+total\", \"#org+presence+eie\", \"#org+capacity+eie\", \"#org+presence+esnfi\", \"#org+capacity+esnfi\", \"#org+presence+fsac\", \"#org+capacity+fsac\", \"#org+presence+health\", \"#org+capacity+health\", \"#org+presence+nutrition\", \"#org+capacity+nutrition\", \"#org+presence+protection\", \"#org+capacity+protection\", \"#org+presence+wash\", \"#org+capacity+wash\"]}, {\"name\": \"Count_District\", \"is_hidden\": false, \"nrows\": 403, \"ncols\": 21, \"has_merged_cells\": false, \"is_hxlated\": true, \"header_hash\": \"30d419bf10484e12f5a01f983141414f\", \"hxl_header_hash\": \"7dafa0a7b166290817a95261ab0e3e99\", \"headers\": [\"Region\", \"PROV_CODE\", \"PROV_NA_ENG\", \"DIST_CODE\", \"DIST_NA_ENG\", \"OP_ALL\", \"OC_ALL\", \"OP_EIE\", \"OC_EIE\", \"OP_ESNFI\", \"OC_ESNFI\", \"OP_FSAC\", \"OC_FSAC\", \"OP_HEALTH\", \"OC_HEALTH\", \"OP_NUTRITION\", \"OC_NUTRITION\", \"OP_PROTECTION\", \"OC_PROTECTION\", \"OP_WASH\", \"OC_WASH\"], \"hxl_headers\": [\"#region+name\", \"#adm1+code\", \"#adm1+name\", \"#adm2+code\", \"#adm2+name\", \"#org+presence+total\", \"#org+capacity+total\", \"#org+presence+eie\", \"#org+capacity+eie\", \"#org+presence+esnfi\", \"#org+capacity+esnfi\", \"#org+presence+fsac\", \"#org+capacity+fsac\", \"#org+presence+health\", \"#org+capacity+health\", \"#org+presence+nutrition\", \"#org+capacity+nutrition\", \"#org+presence+protection\", \"#org+capacity+protection\", \"#org+presence+wash\", \"#org+capacity+wash\"]}]}}]", "hash": "", "hdx_rel_url": "/dataset/897dc815-4e61-4bc2-b775-aed25689e401/resource/db7922e3-311a-49f3-b2a2-b0dbe45ab86d/download/afghanistan-3w-january-march-2024.xlsx", "id": "db7922e3-311a-49f3-b2a2-b0dbe45ab86d", "last_modified": "2024-05-20T05:32:58.952210", "metadata_modified": "2024-05-20T05:33:01.120876", "microdata": false, "mimetype": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "mimetype_inner": null, "name": "afghanistan-3w-january-march-2024.xlsx", "originalHash": "-988266717", "package_id": "897dc815-4e61-4bc2-b775-aed25689e401", "pii": "false", "position": 2, "resource_type": "file.upload", "size": 146500, "state": "active", "url": "https://data.humdata.org/dataset/897dc815-4e61-4bc2-b775-aed25689e401/resource/db7922e3-311a-49f3-b2a2-b0dbe45ab86d/download/afghanistan-3w-january-march-2024.xlsx", "url_type": "upload"}]}
Loading

0 comments on commit bfcdb3e

Please sign in to comment.