diff --git a/.gitignore b/.gitignore
index 29c5fd7a..d4ef1064 100644
--- a/.gitignore
+++ b/.gitignore
@@ -29,4 +29,4 @@ prof/
 Data/
 bulk_test_result*
 
-get-pip.py*
\ No newline at end of file
+get-pip.py*
diff --git a/Makefile b/Makefile
index 3c2227a8..eac84691 100644
--- a/Makefile
+++ b/Makefile
@@ -13,6 +13,7 @@ setup-git-hooks:
 
 lint:
 	ruff check soil_id
+	ruff format soil_id --diff
 
 format:
 	ruff format soil_id
@@ -73,7 +74,6 @@ process_bulk_test_results_legacy:
 	python -m soil_id.tests.legacy.process_bulk_test_results $(RESULTS_FILE)
 
 # Donwload Munsell CSV, SHX, SHP, SBX, SBN, PRJ, DBF
-# 1tN23iVe6X1fcomcfveVp4w3Pwd0HJuTe: LandPKS_munsell_rgb_lab.csv
 # 1WUa9e3vTWPi6G8h4OI3CBUZP5y7tf1Li: gsmsoilmu_a_us.shx
 # 1l9MxC0xENGmI_NmGlBY74EtlD6SZid_a: gsmsoilmu_a_us.shp
 # 1asGnnqe0zI2v8xuOszlsNmZkOSl7cJ2n: gsmsoilmu_a_us.sbx
@@ -84,7 +84,6 @@ process_bulk_test_results_legacy:
 download-soil-data:
 	mkdir -p Data
 	cd Data; \
-	gdown 1tN23iVe6X1fcomcfveVp4w3Pwd0HJuTe; \
 	gdown 1WUa9e3vTWPi6G8h4OI3CBUZP5y7tf1Li; \
 	gdown 1l9MxC0xENGmI_NmGlBY74EtlD6SZid_a; \
 	gdown 1asGnnqe0zI2v8xuOszlsNmZkOSl7cJ2n; \
@@ -119,6 +118,7 @@ dump_soil_id_db:
 	pg_dump --format=custom $(DATABASE_URL)  -t hwsd2_segment -t hwsd2_data -t landpks_munsell_rgb_lab -t normdist1 -t normdist2 -t wise_soil_data -t wrb2006_to_fao90 -t wrb_fao90_desc -f $(DATABASE_DUMP_FILE)
 
 restore_soil_id_db:
-	pg_restore --dbname=$(DATABASE_URL) --single-transaction --clean --if-exists --no-owner $(DATABASE_DUMP_FILE)
+	psql $(DATABASE_URL) -c "CREATE EXTENSION IF NOT EXISTS postgis;"
+	pg_restore --dbname=$(DATABASE_URL) --clean --if-exists --no-owner --verbose $(DATABASE_DUMP_FILE)
 	psql $(DATABASE_URL) -c "CLUSTER hwsd2_segment USING hwsd2_segment_shape_idx;"
 	psql $(DATABASE_URL) -c "ANALYZE;"
diff --git a/soil_id/config.py b/soil_id/config.py
index f228a462..29eecddb 100644
--- a/soil_id/config.py
+++ b/soil_id/config.py
@@ -13,33 +13,20 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with this program. If not, see https://www.gnu.org/licenses/.
 import os
-import tempfile
-
-from platformdirs import user_cache_dir
 
 DATA_PATH = os.environ.get("DATA_PATH", "Data")
 
 # Numpy seeding
 RANDOM_SEED = os.environ.get("RANDOM_SEED", 19)
 
-# Output
-APP_NAME = os.environ.get("APP_NAME", "org.terraso.soilid")
-TEMP_DIR = tempfile.TemporaryDirectory()
-CACHE_DIR = user_cache_dir(APP_NAME)
-OUTPUT_PATH = TEMP_DIR.name
-SOIL_ID_RANK_PATH = f"{OUTPUT_PATH}/soil_id_rank.csv"
-SOIL_ID_PROB_PATH = f"{OUTPUT_PATH}/soil_id_cond_prob.csv"
-REQUESTS_CACHE_PATH = f"{CACHE_DIR}/requests_cache"
-
 # Determines if in/out of US
 US_AREA_PATH = f"{DATA_PATH}/SoilID_US_Areas.shz"
 
 # US Soil ID
 STATSGO_PATH = f"{DATA_PATH}/gsmsoilmu_a_us.shp"
-MUNSELL_RGB_LAB_PATH = f"{DATA_PATH}/LandPKS_munsell_rgb_lab.csv"
 
 # Database
-DB_NAME = os.environ.get("DB_NAME", "terraso_backend")
+DB_NAME = os.environ.get("DB_NAME", "soil_id")
 DB_HOST = os.environ.get("DB_HOST")
 DB_USERNAME = os.environ.get("DB_USERNAME")
 DB_PASSWORD = os.environ.get("DB_PASSWORD")
diff --git a/soil_id/db.py b/soil_id/db.py
index 7d20042c..0db5d956 100644
--- a/soil_id/db.py
+++ b/soil_id/db.py
@@ -32,133 +32,18 @@ def get_datastore_connection():
     Returns:
         Connection object if successful, otherwise exits the program.
     """
-    conn = None
     try:
-        conn = psycopg.connect(
+        return psycopg.connect(
             host=soil_id.config.DB_HOST,
             user=soil_id.config.DB_USERNAME,
             password=soil_id.config.DB_PASSWORD,
             dbname=soil_id.config.DB_NAME,
         )
-        return conn
     except Exception as err:
         logging.error(f"Database connection failed: {err}")
         raise
 
 
-# us, global
-def save_model_output(
-    plot_id, model_version, result_blob, soilIDRank_output_pd, mucompdata_cond_prob
-):
-    """
-    Save the output of the model to the 'landpks_soil_model' table.
-    """
-    conn = None
-    try:
-        conn = get_datastore_connection()
-        cur = conn.cursor()
-
-        sql = """
-        INSERT INTO landpks_soil_model
-        (plot_id, model_version, result_blob, soilIDRank_output_pd, mucompdata_cond_prob)
-        VALUES (%s, %s, %s, %s, %s)
-        """
-        cur.execute(
-            sql,
-            (
-                plot_id,
-                model_version,
-                result_blob,
-                soilIDRank_output_pd,
-                mucompdata_cond_prob,
-            ),
-        )
-        conn.commit()
-
-    except Exception as err:
-        logging.error(err)
-        conn.rollback()
-        return None
-    finally:
-        conn.close()
-
-
-# us, global
-def save_rank_output(record_id, model_version, rank_blob):
-    """
-    Update the rank of the soil model in the 'landpks_soil_model' table.
-    """
-    conn = None
-    try:
-        conn = get_datastore_connection()
-        cur = conn.cursor()
-
-        sql = """UPDATE landpks_soil_model
-                 SET soilrank = %s
-                 WHERE ID = %s AND model_version = %s"""
-        cur.execute(sql, (rank_blob, record_id, model_version))
-        conn.commit()
-
-    except Exception as err:
-        logging.error(err)
-        conn.rollback()
-        return None
-    finally:
-        conn.close()
-
-
-# us, global
-def load_model_output(plot_id):
-    """
-    Load model output based on plot ID and model version.
-    """
-    conn = None
-    try:
-        conn = get_datastore_connection()
-        cur = conn.cursor()
-        model_version = 2
-        sql = """SELECT ID, result_blob, soilIDRank_output_pd, mucompdata_cond_prob
-                  FROM  landpks_soil_model
-                  WHERE plot_id = %s AND model_version = %s
-                  ORDER BY ID DESC LIMIT 1"""
-        cur.execute(sql, plot_id, model_version)
-        results = cur.fetchall()
-        for row in results:
-            model_run = [row[0], row[1], row[2], row[3]]
-        return model_run
-    except Exception as err:
-        logging.error(err)
-        return None
-    finally:
-        conn.close()
-
-
-# global only
-def save_soilgrids_output(plot_id, model_version, soilgrids_blob):
-    """
-    Save the output of the soil grids to the 'landpks_soilgrids_model' table.
-    """
-    conn = None
-    try:
-        conn = get_datastore_connection()
-        cur = conn.cursor()
-
-        sql = """
-        INSERT INTO landpks_soilgrids_model
-        (plot_id, model_version, soilgrids_blob)
-        VALUES (%s, %s, %s)
-        """
-        cur.execute(sql, (plot_id, model_version, soilgrids_blob))
-        conn.commit()
-
-    except Exception as err:
-        logging.error(err)
-        conn.rollback()
-        return None
-    finally:
-        conn.close()
-
-
 def get_hwsd2_profile_data(connection, hwsd2_mu_select):
     """
     Retrieve HWSD v2 data based on selected hwsd2 (map unit) values.
@@ -219,7 +104,7 @@ def get_hwsd2_profile_data(connection, hwsd2_mu_select):
         return pd.DataFrame()
 
 
-def extract_hwsd2_data(connection, lon, lat, buffer_dist, table_name):
+def extract_hwsd2_data(connection, lon, lat, buffer_dist):
     """
     Fetches HWSD soil data from a PostGIS table within a given buffer around a point,
     performing distance and intersection calculations directly on geographic coordinates.
@@ -228,156 +113,42 @@ def extract_hwsd2_data(connection, lon, lat, buffer_dist, table_name):
         lon (float): Longitude of the problem point.
         lat (float): Latitude of the problem point.
         buffer_dist (int): Buffer distance in meters.
-        table_name (str): Name of the PostGIS table (e.g., "hwsdv2").
 
     Returns:
-        DataFrame: Merged data from hwsdv2 and hwsdv2_data.
+        DataFrame: Merged data from hwsd2_segment and hwsd2_data.
+    """
+    point = f"ST_SetSRID(ST_Point({lon}, {lat}), 4326)::geography"
+    main_query = f"""
+        SELECT
+            hwsd2_id as hwsd2,
+            MIN(ST_Distance(
+                shape,
+                {point}
+            )) AS distance,
+            BOOL_OR(ST_Intersects(shape, {point})) AS pt_intersect
+        FROM hwsd2_segment
+        WHERE ST_DWithin(shape, {point}, {buffer_dist})
+        GROUP BY hwsd2_id;
     """
-    # Compute the buffer polygon (in WKT) around the problem point.
-    # Here, we use the geography type to compute a buffer in meters,
-    # then cast it back to geometry in EPSG:4326.
-    # buffer_query = """
-    #     WITH buffer AS (
-    #       SELECT ST_AsText(
-    #                ST_Buffer(
-    #                  ST_SetSRID(ST_Point(%s, %s), 4326)::geography,
-    #                  %s
-    #                )::geometry
-    #              ) AS wkt
-    #     )
-    #     SELECT wkt FROM buffer;
-    # """
-    with connection.cursor():
-        # cur.execute(buffer_query, (lon, lat, buffer_dist))
-        # buffer_wkt = cur.fetchone()[0]
-        # print("Buffer WKT:", buffer_wkt)
-
-        # Build the main query that uses the computed buffer.
-        # Distance is computed by casting geometries to geography,
-        # which returns the geodesic distance in meters.
-        # Q1
-        # main_query = f"""
-        #     WITH
-        #     -- Step 1: Get the polygon that contains the point
-        #     point_poly AS (
-        #         SELECT geom
-        #         FROM {table_name}
-        #         WHERE ST_Intersects(
-        #             geom,
-        #             ST_SetSRID(ST_Point({lon}, {lat}), 4326)
-        #         )
-        #     ),
-
-        #     -- Step 2: Get polygons that intersect the buffer
-        #     valid_geom AS (
-        #         SELECT
-        #             hwsd2,
-        #             geom
-        #         FROM {table_name}
-        #         WHERE geom && ST_GeomFromText('{buffer_wkt}', 4326)
-        #         AND ST_Intersects(geom, ST_GeomFromText('{buffer_wkt}', 4326))
-        #     )
-
-        #     -- Step 3: Filter to those that either contain the point or border the point's polygon
-        #     SELECT
-        #         vg.hwsd2,
-        #         ST_AsEWKB(vg.geom) AS geom,
-        #         ST_Distance(
-        #             vg.geom::geography,
-        #             ST_SetSRID(ST_Point({lon}, {lat}), 4326)::geography
-        #         ) AS distance,
-        #         ST_Intersects(
-        #             vg.geom,
-        #             ST_SetSRID(ST_Point({lon}, {lat}), 4326)
-        #         ) AS pt_intersect
-        #     FROM valid_geom vg, point_poly pp
-        #     WHERE
-        #         ST_Intersects(vg.geom, ST_SetSRID(ST_Point({lon}, {lat}), 4326))
-        #         OR ST_Intersects(vg.geom, pp.geom);
-        # """
- 
-        # # Q2
-        # main_query = f"""
-        #     WITH 
-        #     inputs AS (
-        #         SELECT
-        #             ST_GeomFromText('{buffer_wkt}', 4326) AS buffer_geom,
-        #             ST_SetSRID(ST_Point({lon}, {lat}), 4326) AS pt_geom
-        #     ),
-
-        #     valid_geom AS (
-        #         SELECT
-        #             hwsd2,
-        #             geom
-        #         FROM {table_name}, inputs
-        #         WHERE geom && inputs.buffer_geom
-        #         AND ST_Intersects(geom, inputs.buffer_geom)
-        #     )
-
-        #     SELECT
-        #         vg.hwsd2,
-        #         ST_AsEWKB(vg.geom) AS geom,
-        #         ST_Distance(
-        #             ST_ClosestPoint(vg.geom::geography, inputs.pt_geom::geography),
-        #             inputs.pt_geom::geography
-        #         ) AS distance,
-        #         ST_Intersects(vg.geom, inputs.pt_geom) AS pt_intersect
-        #     FROM valid_geom vg, inputs;
-        # """
-
-        # Q3
-        # point = f"ST_SetSRID(ST_Point({lon}, {lat}), 4326)"
-        # main_query = f"""
-        #     SELECT
-        #         geom,
-        #         hwsd2,
-        #         ST_Distance(
-        #             geom::geography,
-        #             {point}::geography
-        #         ) AS distance,
-        #         ST_Intersects(geom, {point}) AS pt_intersect
-        #     FROM {table_name}
-        #     WHERE ST_DWithin(geom::geography, {point}::geography, {buffer_dist});
-        # """
-
-
-        # Q4
-        point = f"ST_SetSRID(ST_Point({lon}, {lat}), 4326)::geography"
-        main_query = f"""
-            SELECT
-                hwsd2_id as hwsd2,
-                MIN(ST_Distance(
-                    shape,
-                    {point}
-                )) AS distance,
-                BOOL_OR(ST_Intersects(shape, {point})) AS pt_intersect
-            FROM hwsd2_segment
-            WHERE ST_DWithin(shape, {point}, {buffer_dist})
-            GROUP BY hwsd2_id;
-        """
-        
-        # Use GeoPandas to execute the main query and load results into a GeoDataFrame.
-        hwsd = pd.read_sql_query(main_query, connection)
-
-        # Get the list of hwsd2 identifiers.
-        hwsd2_mu_select = hwsd["hwsd2"].tolist()
 
-        # Call get_hwsd2_profile_data using the same connection.
-        hwsd_data = get_hwsd2_profile_data(connection, hwsd2_mu_select)
+    # Use GeoPandas to execute the main query and load results into a GeoDataFrame.
+    hwsd = pd.read_sql_query(main_query, connection)
 
-        # Merge the two datasets.
-        merged = pd.merge(hwsd_data, hwsd, on="hwsd2", how="left").drop_duplicates()
-        return merged
+    # Get the list of hwsd2 identifiers.
+    hwsd2_mu_select = hwsd["hwsd2"].tolist()
 
+    # Call get_hwsd2_profile_data using the same connection.
+    hwsd_data = get_hwsd2_profile_data(connection, hwsd2_mu_select)
 
-# global
+    # Merge the two datasets.
+    merged = pd.merge(hwsd_data, hwsd, on="hwsd2", how="left").drop_duplicates()
+    return merged
 
 
 # Function to fetch data from a PostgreSQL table
-def fetch_table_from_db(connection, table_name):
+def query_db(connection, query):
     try:
         with connection.cursor() as cur:
-            query = f"SELECT * FROM {table_name} ORDER BY id ASC;"
             cur.execute(query)
             rows = cur.fetchall()
 
@@ -388,44 +159,8 @@ def fetch_table_from_db(connection, table_name):
         return None
 
 
-def get_WRB_descriptions(connection, WRB_Comp_List):
-    """
-    Retrieve WRB descriptions based on provided WRB component list.
-    """
-    try:
-        with connection.cursor() as cur:
-
-            # Create placeholders for the SQL IN clause
-            placeholders = ", ".join(["%s"] * len(WRB_Comp_List))
-            sql = f"""SELECT WRB_tax, Description_en, Management_en, Description_es, Management_es,
-                             Description_ks, Management_ks, Description_fr, Management_fr
-                      FROM wrb_fao90_desc
-                      WHERE WRB_tax IN ({placeholders})"""
-
-            # Execute the query with the parameters
-            cur.execute(sql, tuple(WRB_Comp_List))
-            results = cur.fetchall()
-
-        # Convert the results to a pandas DataFrame
-        data = pd.DataFrame(
-            results,
-            columns=[
-                "WRB_tax",
-                "Description_en",
-                "Management_en",
-                "Description_es",
-                "Management_es",
-                "Description_ks",
-                "Management_ks",
-                "Description_fr",
-                "Management_fr",
-            ],
-        )
-
-        return data
-    except Exception as err:
-        logging.error(f"Error querying PostgreSQL: {err}")
-        return None
+def fetch_normdist(connection):
+    return query_db(connection, "SELECT * FROM normdist2 ORDER BY id ASC;")
 
 
 # global only
@@ -516,3 +251,7 @@ def execute_query(query, params):
     except Exception as err:
         logging.error(f"Error querying PostgreSQL: {err}")
         return None
+
+
+def fetch_munsell_rgb_lab(connection):
+    return pd.read_sql_query("SELECT * FROM landpks_munsell_rgb_lab;", connection)
diff --git a/soil_id/global_soil.py b/soil_id/global_soil.py
index c55a90c3..a417a015 100644
--- a/soil_id/global_soil.py
+++ b/soil_id/global_soil.py
@@ -26,7 +26,7 @@
 import pandas as pd
 
 from .color import calculate_deltaE2000
-from .db import extract_hwsd2_data, fetch_table_from_db, get_WRB_descriptions, getSG_descriptions
+from .db import extract_hwsd2_data, fetch_normdist, getSG_descriptions
 from .services import get_soilgrids_classification_data, get_soilgrids_property_data
 from .utils import (
     adjust_depth_interval,
@@ -45,8 +45,6 @@
     silt_calc,
 )
 
-# local libraries
-
 
 @dataclass
 class SoilListOutputData:
@@ -56,21 +54,13 @@ class SoilListOutputData:
 
 
 # entry points
-# getSoilLocationBasedGlobal
-# list_soils
-# rank_soils
-# rankPredictionGlobal
-# getSoilGridsGlobal
-# getSoilGridsUS
+# list_soils_global
+# rank_soils_global
 
-# when a site is created, call list_soils/getSoilLocationBasedGlobal.
-# when a site is created, call getSoilGridsGlobal
-# after user has collected data, call rank_soils/rankPredictionGlobal.
+# when a site is created, call list_soils_global
+# after user has collected data, call rank_soils_global
 
 
-##################################################################################################
-#                                 getSoilLocationBasedGlobal                                     #
-##################################################################################################
 def list_soils_global(connection, lon, lat, buffer_dist=100000):
     # Extract HWSD2 Data
     try:
@@ -78,7 +68,6 @@ def list_soils_global(connection, lon, lat, buffer_dist=100000):
             connection,
             lon,
             lat,
-            table_name="hwsdv2",
             buffer_dist=buffer_dist,
         )
     except KeyError:
@@ -437,16 +426,6 @@ def list_soils_global(connection, lon, lat, buffer_dist=100000):
     # Replace NaN values with an empty string
     mucompdata_cond_prob = mucompdata_cond_prob.fillna("")
 
-    # Merge component descriptions
-    WRB_Comp_Desc = get_WRB_descriptions(
-        connection,
-        mucompdata_cond_prob["compname_grp"].drop_duplicates().tolist()
-    )
-
-    mucompdata_cond_prob = pd.merge(
-        mucompdata_cond_prob, WRB_Comp_Desc, left_on="compname_grp", right_on="WRB_tax", how="left"
-    )
-
     mucompdata_cond_prob = mucompdata_cond_prob.drop_duplicates().reset_index(drop=True)
     mucomp_index = mucompdata_cond_prob.index
 
@@ -461,19 +440,6 @@ def list_soils_global(connection, lon, lat, buffer_dist=100000):
                 "minCompDistance": row.min_dist,
                 "soilDepth": row.c_very_bottom,
             },
-            "siteDescription": {
-                key: row[key]
-                for key in [
-                    "Description_en",
-                    "Management_en",
-                    "Description_es",
-                    "Management_es",
-                    "Description_ks",
-                    "Management_ks",
-                    "Description_fr",
-                    "Management_fr",
-                ]
-            },
         }
         for _, row in mucompdata_cond_prob.iterrows()
     ]
@@ -574,13 +540,8 @@ def convert_to_serializable(obj):
     )
 
 
-##############################################################################################
-#                                   rankPredictionGlobal                                     #
-##############################################################################################
 def rank_soils_global(
     connection,
-    lon,
-    lat,
     list_output_data: SoilListOutputData,
     soilHorizon,
     topDepth,
@@ -954,7 +915,7 @@ def rank_soils_global(
     ysf = []
 
     # Load color distribution data from NormDist2 (FAO90) table
-    rows = fetch_table_from_db(connection, "NormDist2")
+    rows = fetch_normdist(connection)
     row_id = 0
     for row in rows:
         # row is a tuple; iterate over its values.
@@ -1027,7 +988,9 @@ def norm_pdf_vec(x, mean_arr, std_arr):
         # Normalize probabilities
         def normalize(arr):
             min_val, max_val = np.min(arr), np.max(arr)
-            return (arr - min_val) / (max_val - min_val) if max_val != min_val else np.ones_like(arr)
+            return (
+                (arr - min_val) / (max_val - min_val) if max_val != min_val else np.ones_like(arr)
+            )
 
         prob_w = normalize(prob_w)
         prob_r = normalize(prob_r)
@@ -1255,11 +1218,6 @@ def normalize(arr):
     return output_data
 
 
-##################################################################################################
-#                                          getSoilGridsGlobal                                    #
-##################################################################################################
-
-
 def sg_list(connection, lon, lat):
     """
     Query the SoilGrids API (via get_soilgrids_property_data) and post-process
diff --git a/soil_id/tests/global/generate_bulk_test_results.py b/soil_id/tests/global/generate_bulk_test_results.py
index 8604ff1d..e51a0f2c 100644
--- a/soil_id/tests/global/generate_bulk_test_results.py
+++ b/soil_id/tests/global/generate_bulk_test_results.py
@@ -57,14 +57,11 @@
         else:
             start_time = time.perf_counter()
             try:
-
                 list_result = list_soils_global(connection=connection, lat=lat, lon=lon)
 
                 result_record["list_result"] = list_result.soil_list_json
                 result_record["rank_result"] = rank_soils_global(
                     connection=connection,
-                    lat=lat,
-                    lon=lon,
                     list_output_data=list_result,
                     topDepth=pedon["TOPDEP"].values.tolist(),
                     bottomDepth=pedon["BOTDEP"].values.tolist(),
diff --git a/soil_id/tests/global/process_bulk_test_results.py b/soil_id/tests/global/process_bulk_test_results.py
index 874acc49..852c78fe 100644
--- a/soil_id/tests/global/process_bulk_test_results.py
+++ b/soil_id/tests/global/process_bulk_test_results.py
@@ -44,13 +44,14 @@
 
         def last_word(s):
             return s.split()[-1].lower()
+
         secondary_index = [
             i
             for i, match in enumerate(matches)
             if last_word(match["component"]) == last_word(result_record["pedon_name"])
             or last_word(match["component"]) == (last_word(result_record["pedon_name"]) + "s")
         ]
-    
+
         if len(secondary_index) == 0:
             result_record["secondary_result"] = "missing"
         else:
@@ -71,7 +72,11 @@ def last_word(s):
 print(result_groups.count()["pedon_key"] / len(df) * 100)
 
 print("# Secondary result proportions:\n")
-print(secondary_result_groups.count()["pedon_key"] / (len(df) - df["secondary_result"].isnull().sum()) * 100)
+print(
+    secondary_result_groups.count()["pedon_key"]
+    / (len(df) - df["secondary_result"].isnull().sum())
+    * 100
+)
 
 if len(df) < 11:
     print("\n# Execution times:\n")
@@ -99,4 +104,6 @@ def last_word(s):
         indented_lines = ["  " + line for line in lines]
         print("\n".join(indented_lines) + "\n")
 
-df[['pedon_key', 'pedon_name', 'lat', 'lon', 'result', 'secondary_result', 'all_soils']].to_csv('global_algorithm_results.csv', index=False)
\ No newline at end of file
+df[["pedon_key", "pedon_name", "lat", "lon", "result", "secondary_result", "all_soils"]].to_csv(
+    "global_algorithm_results.csv", index=False
+)
diff --git a/soil_id/tests/global/test_global.py b/soil_id/tests/global/test_global.py
index bdaabe45..4403c9ab 100644
--- a/soil_id/tests/global/test_global.py
+++ b/soil_id/tests/global/test_global.py
@@ -27,17 +27,16 @@
     {"lat": -10.07856, "lon": 15.107436},
 ]
 
+
 @pytest.mark.parametrize("location", test_locations)
 def test_soil_location(location):
     with get_datastore_connection() as connection:
         logging.info(f"Testing {location['lon']}, {location['lat']}")
         start_time = time.perf_counter()
         list_soils_result = list_soils_global(connection, location["lon"], location["lat"])
-        logging.info(f"...time: {(time.perf_counter()-start_time):.2f}s")
+        logging.info(f"...time: {(time.perf_counter() - start_time):.2f}s")
         rank_soils_global(
             connection,
-            location["lon"],
-            location["lat"],
             list_output_data=list_soils_result,
             soilHorizon=["Loam"],
             topDepth=[0],
diff --git a/soil_id/tests/legacy/generate_bulk_test_results.py b/soil_id/tests/legacy/generate_bulk_test_results.py
index e4728a0b..dc4276ef 100644
--- a/soil_id/tests/legacy/generate_bulk_test_results.py
+++ b/soil_id/tests/legacy/generate_bulk_test_results.py
@@ -23,6 +23,7 @@
 import pandas
 import numpy
 
+
 def get_rfv(cf):
     if 0 <= cf < 2:
         return "0-1%"
@@ -35,14 +36,36 @@ def get_rfv(cf):
     elif 61 <= cf <= 100:
         return ">60%"
 
+
 def call_legacy(lat, lon, pedon):
     requests.get(f"https://api.landpotential.org/soilidlist?longitude={lon}&latitude={lat}")
 
     base = f"https://api.landpotential.org/soilidrank?longitude={lon}&latitude={lat}"
-    depths = "&".join([f"soilHorizon{idx + 1}_Depth={botdep}" for idx, botdep in enumerate(pedon["BOTDEP"].values.tolist())])
-    rfv = "&".join([f"soilHorizon{idx + 1}_RFV={get_rfv(rfv)}" for idx, rfv in enumerate(pedon["RFV"].values.tolist()) if not numpy.isnan(rfv) ])
-    texture = "&".join([f"soilHorizon{idx + 1}={textClass.upper()}" for idx, textClass in enumerate(pedon["textClass"].values.tolist())])
-    color = "&".join([f"soilHorizon{idx + 1}_LAB={",".join([f"{v}" for v in lab])}" for idx, lab in enumerate(pedon[["L", "A", "B"]].values.tolist())])
+    depths = "&".join(
+        [
+            f"soilHorizon{idx + 1}_Depth={botdep}"
+            for idx, botdep in enumerate(pedon["BOTDEP"].values.tolist())
+        ]
+    )
+    rfv = "&".join(
+        [
+            f"soilHorizon{idx + 1}_RFV={get_rfv(rfv)}"
+            for idx, rfv in enumerate(pedon["RFV"].values.tolist())
+            if not numpy.isnan(rfv)
+        ]
+    )
+    texture = "&".join(
+        [
+            f"soilHorizon{idx + 1}={textClass.upper()}"
+            for idx, textClass in enumerate(pedon["textClass"].values.tolist())
+        ]
+    )
+    color = "&".join(
+        [
+            f"soilHorizon{idx + 1}_LAB={','.join([f'{v}' for v in lab])}"
+            for idx, lab in enumerate(pedon[["L", "A", "B"]].values.tolist())
+        ]
+    )
     req = "&".join([base, depths, rfv, texture, color])
 
     print(req)
@@ -51,6 +74,7 @@ def call_legacy(lat, lon, pedon):
 
     return resp.json()
 
+
 test_data_df = pandas.read_csv(
     os.path.join(os.path.dirname(__file__), "../global/global_pedon_test_dataset.csv")
 )
@@ -66,7 +90,7 @@ def call_legacy(lat, lon, pedon):
 # buffering=1 is line buffering
 with open(result_file_name, "w", buffering=1) as result_file:
     result_agg = {}
- 
+
     for pedon_key, pedon in pedons:
         lat = pedon["Y_LatDD"].values[0]
         lon = pedon["X_LonDD"].values[0]
diff --git a/soil_id/tests/legacy/process_bulk_test_results.py b/soil_id/tests/legacy/process_bulk_test_results.py
index df39181b..c7f009ae 100644
--- a/soil_id/tests/legacy/process_bulk_test_results.py
+++ b/soil_id/tests/legacy/process_bulk_test_results.py
@@ -44,13 +44,14 @@
 
         def last_word(s):
             return s.split()[-1].lower()
+
         secondary_index = [
             i
             for i, match in enumerate(matches)
             if last_word(match["component"]) == last_word(result_record["pedon_name"])
             or last_word(match["component"]) == (last_word(result_record["pedon_name"]) + "s")
         ]
-    
+
         if len(secondary_index) == 0:
             result_record["secondary_result"] = "missing"
         else:
@@ -71,7 +72,11 @@ def last_word(s):
 print(result_groups.count()["pedon_key"] / len(df) * 100)
 
 print("# Secondary result proportions:\n")
-print(secondary_result_groups.count()["pedon_key"] / (len(df) - df["secondary_result"].isnull().sum()) * 100)
+print(
+    secondary_result_groups.count()["pedon_key"]
+    / (len(df) - df["secondary_result"].isnull().sum())
+    * 100
+)
 
 if len(df) < 11:
     print("\n# Execution times:\n")
@@ -79,19 +84,3 @@ def last_word(s):
 else:
     print("\n# Execution time deciles:\n")
     print(pandas.qcut(df["execution_time_s"], q=10, retbins=True)[1])
-
-
-if "crash" in result_groups.groups:
-    crashes = result_groups.get_group(("crash",))
-    # counts = df.value_counts(subset="traceback").sort_values(ascending=False)
-
-    # print(f"\n# Unique crash tracebacks ({len(counts)} unique, {len(crashes)} total):\n")
-
-    # for idx, (traceback, count) in enumerate(counts.to_dict().items()):
-    #     example = crashes.loc[crashes["traceback"] == traceback].iloc[0]
-    #     print(
-    #         f"Traceback #{idx + 1}, occurred {count} times. Example pedon: {example['pedon_key']}, lat: {example['lat']}, lon: {example['lon']}"
-    #     )
-    #     lines = traceback.splitlines()
-    #     indented_lines = ["  " + line for line in lines]
-    #     print("\n".join(indented_lines) + "\n")
\ No newline at end of file
diff --git a/soil_id/tests/test_find_region.py b/soil_id/tests/test_find_region.py
index 635ee6d9..06be9f43 100644
--- a/soil_id/tests/test_find_region.py
+++ b/soil_id/tests/test_find_region.py
@@ -17,13 +17,14 @@
 import pytest
 
 test_locations = [
-    (38.968984, -103.625974, 'US'),
-    (-6.708697, -69.306646, 'Global'),
-    (-3.521766, -136.995712, 'Global')
+    (38.968984, -103.625974, "US"),
+    (-6.708697, -69.306646, "Global"),
+    (-3.521766, -136.995712, "Global"),
 ]
 
+
 @pytest.mark.parametrize("location", test_locations)
 def test_find_region(location):
-    lat, lon, region = location    
+    lat, lon, region = location
 
     assert find_region_for_location(lat=lat, lon=lon) == region
diff --git a/soil_id/tests/us/generate_bulk_test_results.py b/soil_id/tests/us/generate_bulk_test_results.py
index 2264c005..a771fb16 100644
--- a/soil_id/tests/us/generate_bulk_test_results.py
+++ b/soil_id/tests/us/generate_bulk_test_results.py
@@ -22,6 +22,7 @@
 
 import pandas
 
+from soil_id.db import get_datastore_connection
 from soil_id.us_soil import list_soils, rank_soils
 
 
@@ -34,6 +35,7 @@ def clean_soil_list_json(obj):
         return list(map(clean_soil_list_json, obj))
     return obj
 
+
 test_data_df = pandas.read_csv(
     os.path.join(os.path.dirname(__file__), "US_SoilID_KSSL_LPKS_Testing.csv")
 )
@@ -61,7 +63,7 @@ def clean_soil_list_json(obj):
 
         start_time = time.perf_counter()
         try:
-            list_result = list_soils(lat=lat, lon=lon)
+            list_result = list_soils(get_datastore_connection(), lat=lat, lon=lon)
 
             result_record["list_result"] = list_result.soil_list_json
             result_record["rank_result"] = rank_soils(
diff --git a/soil_id/tests/us/process_bulk_test_results.py b/soil_id/tests/us/process_bulk_test_results.py
index c4c91a5f..1de6e2cb 100644
--- a/soil_id/tests/us/process_bulk_test_results.py
+++ b/soil_id/tests/us/process_bulk_test_results.py
@@ -78,4 +78,6 @@
         indented_lines = ["  " + line for line in lines]
         print("\n".join(indented_lines) + "\n")
 
-df[['pedon_key', 'pedon_name', 'lat', 'lon', 'result', 'all_soils']].to_csv('us_algorithm_results.csv', index=False)
+df[["pedon_key", "pedon_name", "lat", "lon", "result", "all_soils"]].to_csv(
+    "us_algorithm_results.csv", index=False
+)
diff --git a/soil_id/tests/us/test_us.py b/soil_id/tests/us/test_us.py
index 1a104d35..5d8ade87 100644
--- a/soil_id/tests/us/test_us.py
+++ b/soil_id/tests/us/test_us.py
@@ -18,6 +18,7 @@
 
 import pytest
 
+from soil_id.db import get_datastore_connection
 from soil_id.us_soil import list_soils, rank_soils
 
 test_locations = [
@@ -55,7 +56,7 @@ def test_soil_location(location):
     cracks = False
 
     start_time = time.perf_counter()
-    list_soils_result = list_soils(location["lon"], location["lat"])
+    list_soils_result = list_soils(get_datastore_connection(), location["lon"], location["lat"])
     logging.info(f"...time: {(time.perf_counter() - start_time):.2f}s")
     rank_soils(
         location["lon"],
@@ -74,7 +75,9 @@ def test_soil_location(location):
 
 
 def test_empty_rank():
-    SoilListOutputData = list_soils(test_locations[0]["lon"], test_locations[0]["lat"])
+    SoilListOutputData = list_soils(
+        get_datastore_connection(), test_locations[0]["lon"], test_locations[0]["lat"]
+    )
     rank_soils(
         test_locations[0]["lon"],
         test_locations[0]["lat"],
diff --git a/soil_id/us_soil.py b/soil_id/us_soil.py
index fd69378d..2d0acb8e 100644
--- a/soil_id/us_soil.py
+++ b/soil_id/us_soil.py
@@ -25,10 +25,8 @@
 import pandas as pd
 from pandas import json_normalize
 
-# local libraries
-import soil_id.config
-
 from .color import getProfileLAB, lab2munsell, munsell2rgb
+from .db import fetch_munsell_rgb_lab
 from .services import get_soil_series_data, get_soilweb_data, sda_return, get_elev_data
 from .soil_sim import soil_sim
 from .utils import (
@@ -55,15 +53,11 @@
 )
 
 # entry points
-# getSoilLocationBasedGlobal
 # list_soils
 # rank_soils
-# rankPredictionGlobal
-# getSoilGridsGlobal
 
-# when a site is created, call list_soils/getSoilLocationBasedGlobal.
-# when a site is created, call getSoilGridsGlobal
-# after user has collected data, call rank_soils/rankPredictionGlobal.
+# when a site is created, call list_soils
+# after user has collected data, call rank_soils
 
 # set Pandas dataframe options
 pd.set_option("future.no_silent_downcasting", True)
@@ -76,12 +70,9 @@ class SoilListOutputData:
     map_unit_component_data_csv: str
 
 
-############################################################################################
-#                                   list_soils                                 #
-############################################################################################
-def list_soils(lon, lat):
+def list_soils(connection, lon, lat):
     # Load in LAB to Munsell conversion look-up table
-    color_ref = pd.read_csv(soil_id.config.MUNSELL_RGB_LAB_PATH)
+    color_ref = fetch_munsell_rgb_lab(connection)
     LAB_ref = color_ref[["cielab_l", "cielab_a", "cielab_b"]]
     munsell_ref = color_ref[["hue", "value", "chroma"]]
 
@@ -1538,9 +1529,6 @@ def list_soils(lon, lat):
     )
 
 
-##############################################################################################
-#                                   rank_soils                                         #
-##############################################################################################
 def rank_soils(
     lon,
     lat,
@@ -2046,7 +2034,7 @@ def rank_soils(
 
     # Concatenate the sorted and ranked groups
     D_final = pd.concat(soilIDList_data).reset_index(drop=True)
-    
+
     # Merge with the Rank_Filter data
     D_final = pd.merge(D_final, Rank_Filter, on="compname", how="left")
 
diff --git a/soil_id/utils.py b/soil_id/utils.py
index 1909758c..ebb19f1d 100644
--- a/soil_id/utils.py
+++ b/soil_id/utils.py
@@ -945,32 +945,6 @@ def _gower_distance_row(
     return (sum_cat + sum_num) / feature_weight_sum
 
 
-def compute_site_similarity(
-    data: pd.DataFrame, features: list[str], feature_weight: np.ndarray
-) -> np.ndarray:
-    """
-    Compute Gower distances among the rows of `data` using only `features`
-    and the matching weights in `feature_weight`.
-
-    Parameters:
-    - data: DataFrame with columns "compname" + at least all names in `features`.
-    - features: list of column names to include in the distance.
-    - feature_weight: 1D array of floats, same length as `features`.
-
-    Returns:
-    - (n×n) numpy array of Gower distances, with NaNs replaced by the max distance.
-    """
-    # 1) subset & index by compname
-    site_mat = data.set_index("compname")[features]
-
-    # 2) compute
-    D = gower_distances(site_mat, feature_weight=feature_weight)
-
-    # 3) fill NaNs
-    D = np.where(np.isnan(D), np.nanmax(D), D)
-    return D
-
-
 def compute_text_comp(bedrock, p_sandpct_intpl, soilHorizon):
     """
     Computes a value based on the depth of bedrock and length of sand percentages.
@@ -1065,48 +1039,6 @@ def compute_lab_comp(cr_df):
     return 20 if not cr_df.dropna().empty else 0
 
 
-def compute_data_completeness(
-    bedrock, p_sandpct_intpl, soilHorizon, p_cfg_intpl, rfvDepth, cracks, cr_df
-):
-    text_comp = compute_text_comp(bedrock, p_sandpct_intpl, soilHorizon)
-    rf_comp = compute_rf_comp(bedrock, p_cfg_intpl, rfvDepth)
-    crack_comp = compute_crack_comp(cracks)
-    lab_comp = compute_lab_comp(cr_df)
-
-    data_completeness = text_comp + rf_comp + crack_comp + lab_comp
-
-    # Generate data completeness comment
-    if text_comp < 45:
-        text_comment = " soil texture,"
-    else:
-        text_comment = ""
-    if rf_comp < 30:
-        rf_comment = " soil rock fragments,"
-    else:
-        rf_comment = ""
-    if lab_comp < 20:
-        lab_comment = " soil color (20-50cm),"
-    else:
-        lab_comment = ""
-    if crack_comp < 5:
-        crack_comment = " soil cracking,"
-    else:
-        crack_comment = ""
-    if data_completeness < 100:
-        text_completeness = (
-            "To improve predictions, complete data entry for:"
-            + crack_comment
-            + text_comment
-            + rf_comment
-            + lab_comment
-            + " and re-sync."
-        )
-    else:
-        text_completeness = "SoilID data entry for this site is complete."
-
-    return data_completeness, text_completeness
-
-
 def trim_fraction(text):
     """
     Removes trailing ".0" from a given text string.
@@ -1782,35 +1714,6 @@ def pedon_color(lab_Color, top, bottom):
         return [pedon_l_mean, pedon_a_mean, pedon_b_mean]
 
 
-def extract_values(obj, key):
-    """
-    Pull all values of the specified key from a nested dictionary or list.
-
-    Parameters:
-    - obj (dict or list): The nested dictionary or list to search.
-    - key: The key to look for.
-
-    Returns:
-    - list: A list of values associated with the specified key.
-    """
-
-    arr = []
-
-    def extract(obj, key):
-        if isinstance(obj, dict):
-            if key in obj:
-                arr.append(obj[key])
-            for k, v in obj.items():
-                if isinstance(v, (dict, list)):
-                    extract(v, key)
-        elif isinstance(obj, list):
-            for item in obj:
-                extract(item, key)
-
-    extract(obj, key)
-    return arr
-
-
 # Functions for AWC simulation
 
 
@@ -2235,17 +2138,6 @@ def entropy_score(series):
     return sorted_information_gains
 
 
-# function to get data and aggregate SG data
-def sg_get_and_agg(variable, sg_data_w, bottom, return_depth=False):
-    pd_int = getProfile_SG(sg_data_w, variable, c_bot=False)
-    if return_depth:
-        pd_lpks, lpks_depths = agg_data_layer(data=pd_int.var_pct_intpl, bottom=bottom, depth=True)
-        return (pd_lpks.replace(np.nan, ""), lpks_depths)
-    else:
-        pd_lpks = agg_data_layer(data=pd_int.var_pct_intpl, bottom=bottom, depth=False)
-        return pd_lpks.replace(np.nan, "")
-
-
 def adjust_depth_interval(data, target_length=200):
     """Adjusts the depth interval of user data."""