Skip to content

Commit aec421e

Browse files
committed
Add harmonization of beneficiary data in credit processing functions
1 parent 2b66185 commit aec421e

File tree

4 files changed

+60
-15
lines changed

4 files changed

+60
-15
lines changed

offsets_db_data/apx.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
load_registry_project_column_mapping,
1212
)
1313
from offsets_db_data.credits import * # noqa: F403
14+
from offsets_db_data.credits import harmonize_beneficiary_data
1415
from offsets_db_data.models import credit_without_id_schema, project_schema
1516
from offsets_db_data.projects import * # noqa: F403
1617

@@ -44,7 +45,12 @@ def determine_transaction_type(df: pd.DataFrame, *, download_type: str) -> pd.Da
4445

4546
@pf.register_dataframe_method
4647
def process_apx_credits(
47-
df: pd.DataFrame, *, download_type: str, registry_name: str, arb: pd.DataFrame | None = None
48+
df: pd.DataFrame,
49+
*,
50+
download_type: str,
51+
registry_name: str,
52+
arb: pd.DataFrame | None = None,
53+
harmonize_beneficiary_info: bool = False,
4854
) -> pd.DataFrame:
4955
"""
5056
Process APX credits data by setting registry, determining transaction types, renaming columns,
@@ -94,6 +100,9 @@ def process_apx_credits(
94100
)
95101
if arb is not None and not arb.empty:
96102
data = data.merge_with_arb(arb=arb)
103+
104+
if harmonize_beneficiary_info:
105+
data = data.pipe(harmonize_beneficiary_data)
97106
return data
98107

99108

offsets_db_data/credits.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
import pathlib
2+
import subprocess
3+
import tempfile
4+
15
import janitor # noqa: F401
26
import pandas as pd
37
import pandas_flavor as pf
@@ -122,3 +126,29 @@ def merge_with_arb(credits: pd.DataFrame, *, arb: pd.DataFrame) -> pd.DataFrame:
122126

123127
df = pd.concat([df, arb], ignore_index=True)
124128
return df
129+
130+
131+
def harmonize_beneficiary_data(credits: pd.DataFrame) -> pd.DataFrame:
132+
"""
133+
Harmonize the beneficiary information by removing the 'beneficiary_id' column and renaming the 'beneficiary_name' column to 'beneficiary'.
134+
135+
Parameters
136+
----------
137+
credits : pd.DataFrame
138+
Input DataFrame containing credit data.
139+
"""
140+
141+
tempdir = tempfile.gettempdir()
142+
temp_path = pathlib.Path(tempdir) / 'credits.csv'
143+
credits.to_csv(temp_path, index=False)
144+
145+
try:
146+
result = subprocess.run(
147+
['offsets-db-data-orcli', 'run', 'list'],
148+
capture_output=True,
149+
text=True,
150+
check=True,
151+
)
152+
print(result.stdout)
153+
except Exception as e:
154+
raise e

offsets_db_data/gld.py

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,21 +10,18 @@
1010
load_protocol_mapping,
1111
load_registry_project_column_mapping,
1212
)
13-
from offsets_db_data.credits import (
14-
aggregate_issuance_transactions, # noqa: F401
15-
filter_and_merge_transactions, # noqa: F401
16-
merge_with_arb, # noqa: F401
17-
)
13+
from offsets_db_data.credits import aggregate_issuance_transactions # noqa: F401
14+
from offsets_db_data.credits import filter_and_merge_transactions # noqa: F401
15+
from offsets_db_data.credits import merge_with_arb # noqa: F401
16+
from offsets_db_data.credits import harmonize_beneficiary_data
1817
from offsets_db_data.models import credit_without_id_schema, project_schema
19-
from offsets_db_data.projects import (
20-
harmonize_country_names, # noqa: F401
21-
add_category, # noqa: F401
22-
add_is_compliance_flag, # noqa: F401
23-
map_protocol, # noqa: F401
24-
harmonize_status_codes, # noqa: F401
25-
add_first_issuance_and_retirement_dates, # noqa: F401
26-
add_retired_and_issued_totals, # noqa: F401
27-
)
18+
from offsets_db_data.projects import add_category # noqa: F401
19+
from offsets_db_data.projects import add_first_issuance_and_retirement_dates # noqa: F401
20+
from offsets_db_data.projects import add_is_compliance_flag # noqa: F401
21+
from offsets_db_data.projects import add_retired_and_issued_totals # noqa: F401
22+
from offsets_db_data.projects import harmonize_country_names # noqa: F401
23+
from offsets_db_data.projects import harmonize_status_codes # noqa: F401
24+
from offsets_db_data.projects import map_protocol # noqa: F401
2825

2926

3027
@pf.register_dataframe_method
@@ -80,6 +77,7 @@ def process_gld_credits(
8077
registry_name: str = 'gold-standard',
8178
prefix: str = 'GLD',
8279
arb: pd.DataFrame | None = None,
80+
harmonize_beneficiary_info: bool = False,
8381
) -> pd.DataFrame:
8482
"""
8583
Process Gold Standard credits data by renaming columns, setting registry, determining transaction types,
@@ -142,6 +140,9 @@ def process_gld_credits(
142140
.validate(schema=credit_without_id_schema)
143141
)
144142

143+
if harmonize_beneficiary_info:
144+
data = data.pipe(harmonize_beneficiary_data)
145+
145146
return data
146147

147148

offsets_db_data/vcs.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
load_registry_project_column_mapping,
1212
)
1313
from offsets_db_data.credits import * # noqa: F403
14+
from offsets_db_data.credits import harmonize_beneficiary_data
1415
from offsets_db_data.models import credit_without_id_schema, project_schema
1516
from offsets_db_data.projects import * # noqa: F403
1617

@@ -175,6 +176,7 @@ def process_vcs_credits(
175176
registry_name: str = 'verra',
176177
prefix: str = 'VCS',
177178
arb: pd.DataFrame | None = None,
179+
harmonize_beneficiary_info: bool = False,
178180
) -> pd.DataFrame:
179181
"""
180182
Process Verra credits data, including generation of project IDs, determination of transaction types,
@@ -235,6 +237,9 @@ def process_vcs_credits(
235237
if arb is not None and not arb.empty:
236238
data = data.merge_with_arb(arb=arb)
237239

240+
if harmonize_beneficiary_info:
241+
data = data.pipe(harmonize_beneficiary_data)
242+
238243
return data
239244

240245

0 commit comments

Comments
 (0)