Skip to content

fix: global code update #225

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 56 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
56 commits
Select commit Hold shift + click to select a range
f0ac653
fix: update global soilid code
jjmaynard Dec 20, 2024
64cf160
fix: update global soilid code
jjmaynard Dec 20, 2024
93af38f
fix: postgres database integration
jjmaynard Jan 7, 2025
f5a11fb
Merge branch 'fix/global-code-update' of https://github.com/techmatte…
jjmaynard Jan 7, 2025
9ff26ea
fix: created .env file and updated .gitignore
jjmaynard Jan 10, 2025
f8bfc57
fix: revision of global functions
jjmaynard Feb 4, 2025
37561eb
fix: remove test files
jjmaynard Feb 4, 2025
b273b3d
fix: lint format changes
jjmaynard Feb 4, 2025
51cf8c8
fix: make format changes
jjmaynard Feb 4, 2025
ddab942
style: use LF line endings
paulschreiber Feb 4, 2025
d9ba6ab
Normalize line endings to LF
jjmaynard Feb 6, 2025
0b6c5c9
Update soil_id/db.py
jjmaynard Feb 12, 2025
418ea47
fix: new psql color tables
jjmaynard Feb 18, 2025
082568b
Merge branch 'fix/global-code-update' of https://github.com/techmatte…
jjmaynard Feb 18, 2025
1299c7e
fix: update psql functions
jjmaynard Feb 27, 2025
959add1
fix: HWSDv2 postgres integration
jjmaynard Mar 4, 2025
605b369
fix: global color calculation
jjmaynard Mar 4, 2025
587cc7e
feat: update tests for global algorithm
shrouxm Mar 11, 2025
2fc46f0
fix: global bugs
jjmaynard Mar 21, 2025
c24137a
test: update tests
shrouxm Mar 25, 2025
40261a0
fix: traceback errors-geo
jjmaynard Mar 28, 2025
3e70f5b
fix: no map data return
jjmaynard Mar 28, 2025
ac0a581
fix: make black/lint format
jjmaynard Mar 31, 2025
e86c5df
fix: sql code update
jjmaynard Apr 10, 2025
d26ba68
fix: update global soilid code
jjmaynard Dec 20, 2024
e7cd519
fix: postgres database integration
jjmaynard Jan 7, 2025
96f6c46
fix: update global soilid code
jjmaynard Dec 20, 2024
a429d2e
fix: created .env file and updated .gitignore
jjmaynard Jan 10, 2025
b03b133
fix: revision of global functions
jjmaynard Feb 4, 2025
1f91f2d
fix: remove test files
jjmaynard Feb 4, 2025
2ef351b
fix: lint format changes
jjmaynard Feb 4, 2025
d98bbb7
fix: make format changes
jjmaynard Feb 4, 2025
00a63f4
style: use LF line endings
paulschreiber Feb 4, 2025
05fe937
Normalize line endings to LF
jjmaynard Feb 6, 2025
c671b67
fix: new psql color tables
jjmaynard Feb 18, 2025
ea5f979
Update soil_id/db.py
jjmaynard Feb 12, 2025
8409988
fix: update psql functions
jjmaynard Feb 27, 2025
60d8d20
fix: HWSDv2 postgres integration
jjmaynard Mar 4, 2025
4f0d1c9
fix: global color calculation
jjmaynard Mar 4, 2025
26f1e84
feat: update tests for global algorithm
shrouxm Mar 11, 2025
f9f5cda
fix: global bugs
jjmaynard Mar 21, 2025
65152ab
test: update tests
shrouxm Mar 25, 2025
4e02a14
fix: traceback errors-geo
jjmaynard Mar 28, 2025
8d6e0b1
fix: no map data return
jjmaynard Mar 28, 2025
6a5ef1e
fix: make black/lint format
jjmaynard Mar 31, 2025
0951ee4
fix: sql code update
jjmaynard Apr 10, 2025
d54c625
build: update Makefile README.md pyproject.toml requirements-dev.txt …
paulschreiber Apr 14, 2025
ab9d207
style: run make format
paulschreiber Apr 14, 2025
caebd71
fix: fix imports in config
paulschreiber Apr 14, 2025
22eccb5
fix: remove unsued variables
paulschreiber Apr 14, 2025
6a947a2
fix: global test dataset and testing
jjmaynard Apr 22, 2025
e16183e
Merge branch 'fix/global-code-update' of https://github.com/techmatte…
jjmaynard Apr 22, 2025
d657f9a
test: update tests
shrouxm May 6, 2025
e727737
dx: add docker file for dev database
shrouxm May 6, 2025
a4fef55
fix: lint
shrouxm May 6, 2025
9cc216a
test: skip global test by default (no DB in CI)
shrouxm May 6, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion soil_id/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

# Output
APP_NAME = os.environ.get("APP_NAME", "org.terraso.soilid")
TEMP_DIR = tempfile.TemporaryDirectory(delete=False)
TEMP_DIR = tempfile.TemporaryDirectory()
CACHE_DIR = user_cache_dir(APP_NAME)
OUTPUT_PATH = TEMP_DIR.name
SOIL_ID_RANK_PATH = f"{OUTPUT_PATH}/soil_id_rank.csv"
Expand Down
162 changes: 125 additions & 37 deletions soil_id/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,19 @@

# Standard libraries
import logging
import sys

import pandas as pd

# Third-party libraries
import psycopg
from dotenv import load_dotenv

# local libraries
import soil_id.config

# Load .env file
load_dotenv()


def get_datastore_connection():
"""
Expand All @@ -33,17 +36,25 @@ def get_datastore_connection():
Returns:
Connection object if successful, otherwise exits the program.
"""
conn = None # Initialize variable
try:
# conn = psycopg.connect(
# host=os.getenv("DB_HOST"),
# user=os.getenv("DB_USERNAME"),
# password=os.getenv("DB_PASSWORD"),
# dbname=os.getenv("DB_NAME"),
# )
conn = psycopg.connect(
host=soil_id.config.DB_HOST,
user=soil_id.config.DB_USERNAME,
passwd=soil_id.config.DB_PASSWORD,
database=soil_id.config.DB_NAME,
password=soil_id.config.DB_PASSWORD,
dbname=soil_id.config.DB_NAME,
)
logging.info("Database connection successful.")
return conn
except Exception as err:
logging.error(err)
sys.exit(str(err))
logging.error(f"Database connection failed: {err}")
raise


# us, global
Expand All @@ -53,6 +64,7 @@ def save_model_output(
"""
Save the output of the model to the 'landpks_soil_model' table.
"""
conn = None
try:
conn = get_datastore_connection()
cur = conn.cursor()
Expand Down Expand Up @@ -87,6 +99,7 @@ def save_rank_output(record_id, model_version, rank_blob):
"""
Update the rank of the soil model in the 'landpks_soil_model' table.
"""
conn = None
try:
conn = get_datastore_connection()
cur = conn.cursor()
Expand All @@ -110,6 +123,7 @@ def load_model_output(plot_id):
"""
Load model output based on plot ID and model version.
"""
conn = None
try:
conn = get_datastore_connection()
cur = conn.cursor()
Expand All @@ -135,6 +149,7 @@ def save_soilgrids_output(plot_id, model_version, soilgrids_blob):
"""
Save the output of the soil grids to the 'landpks_soilgrids_model' table.
"""
conn = None
try:
conn = get_datastore_connection()
cur = conn.cursor()
Expand All @@ -160,17 +175,28 @@ def get_WISE30sec_data(MUGLB_NEW_Select):
"""
Retrieve WISE 30 second data based on selected MUGLB_NEW values.
"""
if not MUGLB_NEW_Select: # Handle empty list case
logging.warning("MUGLB_NEW_Select is empty. Returning empty DataFrame.")
return pd.DataFrame() # Return an empty DataFrame

conn = None
try:
conn = get_datastore_connection()
cur = conn.cursor()

# Create placeholders for the SQL IN clause
placeholders = ", ".join(["%s"] * len(MUGLB_NEW_Select))
sql = """SELECT MUGLB_NEW, COMPID, id, MU_GLOBAL, NEWSUID, SCID, PROP, CLAF,
PRID, Layer, TopDep, BotDep, CFRAG, SDTO, STPC, CLPC, CECS,
PHAQ, ELCO, SU_name, FAO_SYS
FROM wise_soil_data
WHERE MUGLB_NEW IN (%s)"""
cur.execute(sql, placeholders)
sql_query = f"""SELECT MUGLB_NEW, COMPID, id, MU_GLOBAL, NEWSUID, SCID, PROP, CLAF,
PRID, Layer, TopDep, BotDep, CFRAG, SDTO, STPC, CLPC, CECS,
PHAQ, ELCO, SU_name, FAO_SYS
FROM wise_soil_data
WHERE MUGLB_NEW IN ({placeholders})"""

# Execute the query only if the list is non-empty
cur.execute(sql_query, tuple(MUGLB_NEW_Select))
results = cur.fetchall()

# Convert the results to a pandas DataFrame
data = pd.DataFrame(
results,
columns=[
Expand All @@ -197,29 +223,40 @@ def get_WISE30sec_data(MUGLB_NEW_Select):
"FAO_SYS",
],
)

return data

except Exception as err:
logging.error(err)
logging.error(f"Error querying PostgreSQL: {err}")
return None

finally:
conn.close()
if conn:
conn.close()


# global
def get_WRB_descriptions(WRB_Comp_List):
"""
Retrieve WRB descriptions based on provided WRB component list.
"""
conn = None
try:
conn = get_datastore_connection()
cur = conn.cursor()

# Create placeholders for the SQL IN clause
placeholders = ", ".join(["%s"] * len(WRB_Comp_List))
sql = """SELECT WRB_tax, Description_en, Management_en, Description_es, Management_es,
Description_ks, Management_ks, Description_fr, Management_fr
FROM wrb_fao90_desc
WHERE WRB_tax IN %s"""
cur.execute(sql, placeholders)
sql = f"""SELECT WRB_tax, Description_en, Management_en, Description_es, Management_es,
Description_ks, Management_ks, Description_fr, Management_fr
FROM wrb_fao90_desc
WHERE WRB_tax IN ({placeholders})"""

# Execute the query with the parameters
cur.execute(sql, tuple(WRB_Comp_List))
results = cur.fetchall()

# Convert the results to a pandas DataFrame
data = pd.DataFrame(
results,
columns=[
Expand All @@ -234,40 +271,90 @@ def get_WRB_descriptions(WRB_Comp_List):
"Management_fr",
],
)

return data

except Exception as err:
logging.error(err)
logging.error(f"Error querying PostgreSQL: {err}")
return None

finally:
conn.close()
if conn:
conn.close()


# global only
def getSG_descriptions(WRB_Comp_List):
"""
Fetch WRB descriptions from a PostgreSQL database using wrb2006_to_fao90
and wrb_fao90_desc tables. Returns a pandas DataFrame with columns:
[WRB_tax, Description_en, Management_en, Description_es, ...]

Args:
WRB_Comp_List (list[str]): List of WRB_2006_Full values (e.g. ["Chernozem","Gleysol"]).

Returns:
pandas.DataFrame or None if an error occurs.
"""

conn = None
try:
# 1. Get a connection to your datastore (replace with your actual function):
conn = get_datastore_connection()

# Execute a SQL query and return the results
def execute_query(query, params):
with conn.cursor() as cur:
# Execute the query with the parameters
cur.execute(query, params)
return cur.fetchall()

# First SQL query
sql1 = """SELECT lu.WRB_1984_Full
FROM wrb2006_to_fao90 AS lu
WHERE lu.WRB_2006_Full IN %s"""
names = execute_query(sql1, (tuple(WRB_Comp_List),))
WRB_Comp_List = [item for t in list(names) for item in t]

# Second SQL query
sql2 = """SELECT WRB_tax, Description_en, Management_en, Description_es, Management_es,
Description_ks, Management_ks, Description_fr, Management_fr
FROM wrb_fao90_desc
WHERE WRB_tax IN %s"""
results = execute_query(sql2, (tuple(WRB_Comp_List),))
# 2. Map WRB_2006_Full -> WRB_1984_Full using wrb2006_to_fao90
# Make sure we pass (tuple(WRB_Comp_List),) so psycopg2 can fill IN ('A','B','C')
# Example: WHERE lu.WRB_2006_Full IN ('Chernozem','Gleysol',...)
sql1 = """
SELECT lu.WRB_1984_Full
FROM wrb2006_to_fao90 AS lu
WHERE lu.WRB_2006_Full = ANY(%s)
"""
names = execute_query(sql1, ([WRB_Comp_List],))

# Flatten from [(x,), (y,)] => [x, y]
WRB_Comp_List_mapped = [item for (item,) in names]

if not WRB_Comp_List_mapped:
# If no mapping found, return an empty DataFrame or None
logging.warning("No mapped WRB_1984_Full names found for given WRB_2006_Full values.")
return pd.DataFrame(
columns=[
"WRB_tax",
"Description_en",
"Management_en",
"Description_es",
"Management_es",
"Description_ks",
"Management_ks",
"Description_fr",
"Management_fr",
]
)

# 3. Get descriptions from wrb_fao90_desc where WRB_tax IN ...
sql2 = """
SELECT WRB_tax,
Description_en,
Management_en,
Description_es,
Management_es,
Description_ks,
Management_ks,
Description_fr,
Management_fr
FROM wrb_fao90_desc
WHERE WRB_tax = ANY(%s)
"""
results = execute_query(sql2, ([WRB_Comp_List_mapped],))

# Convert results to DataFrame
# 4. Convert the raw query results to a DataFrame
data = pd.DataFrame(
results,
columns=[
Expand All @@ -285,8 +372,9 @@ def execute_query(query, params):
return data

except Exception as err:
logging.error(err)
logging.error(f"Error querying PostgreSQL: {err}")
return None

finally:
conn.close()
if conn:
conn.close()
Loading
Loading