fix: make black/lint format

jjmaynard · jjmaynard · commit ac0a5813dfa8 · 2025-03-31T08:12:13.000-07:00
diff --git a/soil_id/config.py b/soil_id/config.py
@@ -13,10 +13,12 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with this program. If not, see https://www.gnu.org/licenses/.
 from dotenv import load_dotenv
+
 load_dotenv(dotenv_path="/mnt/c/Users/jmaynard/Documents/GitHub/soil-id-algorithm/soil_id/.env")
 
 import os
 import tempfile
+
 from platformdirs import user_cache_dir
 
 DATA_PATH = os.environ.get("DATA_PATH", "Data")
diff --git a/soil_id/db.py b/soil_id/db.py
@@ -27,6 +27,7 @@
 # local libraries
 import soil_id.config
 
+
 def get_datastore_connection():
     """
     Establish a connection to the datastore using app configurations.
@@ -165,18 +166,18 @@ def get_hwsd2_profile_data(conn, hwsd2_mu_select):
     """
     Retrieve HWSD v2 data based on selected hwsd2 (map unit) values.
     This version reuses an existing connection.
-    
+
     Parameters:
         conn: A live database connection.
         hwsd2_mu_select (list): List of selected hwsd2 values.
-    
+
     Returns:
         DataFrame: Data from hwsd2_data.
     """
     if not hwsd2_mu_select:
         logging.warning("HWSD2 map unit selection is empty. Returning empty DataFrame.")
         return pd.DataFrame()
-    
+
     try:
         with conn.cursor() as cur:
             # Create placeholders for the SQL IN clause
@@ -190,14 +191,29 @@ def get_hwsd2_profile_data(conn, hwsd2_mu_select):
             """
             cur.execute(sql_query, tuple(hwsd2_mu_select))
             results = cur.fetchall()
-            
+
             # Convert the results to a pandas DataFrame.
             data = pd.DataFrame(
                 results,
                 columns=[
-                    "hwsd2", "compid", "id", "wise30s_smu_id", "sequence", "share", "fao90",
-                    "layer", "topdep", "botdep", "coarse", "sand", "silt", "clay", "cec_soil",
-                    "ph_water", "elec_cond", "fao90_name"
+                    "hwsd2",
+                    "compid",
+                    "id",
+                    "wise30s_smu_id",
+                    "sequence",
+                    "share",
+                    "fao90",
+                    "layer",
+                    "topdep",
+                    "botdep",
+                    "coarse",
+                    "sand",
+                    "silt",
+                    "clay",
+                    "cec_soil",
+                    "ph_water",
+                    "elec_cond",
+                    "fao90_name",
                 ],
             )
             return data
@@ -210,13 +226,13 @@ def extract_hwsd2_data(lon, lat, buffer_dist, table_name):
     """
     Fetches HWSD soil data from a PostGIS table within a given buffer around a point,
     performing distance and intersection calculations directly on geographic coordinates.
-    
+
     Parameters:
         lon (float): Longitude of the problem point.
         lat (float): Latitude of the problem point.
         buffer_dist (int): Buffer distance in meters.
         table_name (str): Name of the PostGIS table (e.g., "hwsdv2").
-    
+
     Returns:
         DataFrame: Merged data from hwsdv2 and hwsdv2_data.
     """
@@ -240,7 +256,7 @@ def extract_hwsd2_data(lon, lat, buffer_dist, table_name):
             cur.execute(buffer_query, (lon, lat, buffer_dist))
             buffer_wkt = cur.fetchone()[0]
             print("Buffer WKT:", buffer_wkt)
-        
+
         # Build the main query that uses the computed buffer.
         # Distance is computed by casting geometries to geography,
         # which returns the geodesic distance in meters.
@@ -270,20 +286,20 @@ def extract_hwsd2_data(lon, lat, buffer_dist, table_name):
                 ST_SetSRID(ST_Point({lon}, {lat}), 4326)
             );
         """
-        
+
         # Use GeoPandas to execute the main query and load results into a GeoDataFrame.
-        hwsd = gpd.read_postgis(main_query, conn, geom_col='geom')
+        hwsd = gpd.read_postgis(main_query, conn, geom_col="geom")
         print("Main query returned", len(hwsd), "rows.")
-        
+
         # Remove the geometry column (if not needed) from this dataset.
         hwsd = hwsd.drop(columns=["geom"])
-        
+
         # Get the list of hwsd2 identifiers.
         hwsd2_mu_select = hwsd["hwsd2"].tolist()
-        
+
         # Call get_hwsd2_profile_data using the same connection.
         hwsd_data = get_hwsd2_profile_data(conn, hwsd2_mu_select)
-        
+
         # Merge the two datasets.
         merged = pd.merge(hwsd_data, hwsd, on="hwsd2", how="left").drop_duplicates()
         return merged
diff --git a/soil_id/global_soil.py b/soil_id/global_soil.py
@@ -87,11 +87,11 @@ def list_soils_global(lon, lat):
             buffer_dist=10000,
         )
     except KeyError:
-        return("Data_unavailable")
-    
+        return "Data_unavailable"
+
     if hwsd2_data.empty:
-        return("Data_unavailable")
- 
+        return "Data_unavailable"
+
     # Component Data
     mucompdata_pd = hwsd2_data[["hwsd2", "fao90_name", "distance", "share", "compid"]]
     mucompdata_pd.columns = ["mukey", "compname", "distance", "share", "cokey"]
@@ -632,20 +632,12 @@ def rank_soils_global(
         p_cfg = [getCF_fromClass(rf) for rf in rfvDepth]
 
         p_sandpct_intpl = [
-            spt[i]
-            for i in range(len(soilHorizon))
-            for _ in range(top[i], bottom[i])
+            spt[i] for i in range(len(soilHorizon)) for _ in range(top[i], bottom[i])
         ]
         p_claypct_intpl = [
-            cpt[i]
-            for i in range(len(soilHorizon))
-            for _ in range(top[i], bottom[i])
-        ]
-        p_cfg_intpl = [
-            p_cfg[i]
-            for i in range(len(soilHorizon))
-            for _ in range(top[i], bottom[i])
+            cpt[i] for i in range(len(soilHorizon)) for _ in range(top[i], bottom[i])
         ]
+        p_cfg_intpl = [p_cfg[i] for i in range(len(soilHorizon)) for _ in range(top[i], bottom[i])]
 
         # Length of interpolated texture and RF depth
         p_bottom_depth = pd.DataFrame([-999, "sample_pedon", soil_df_slice.bottom.iloc[-1]]).T
@@ -727,7 +719,7 @@ def rank_soils_global(
 
     compnames = mucompdata_pd[["compname", "compname_grp"]]
 
-     # Determine the maximum depth based on bedrock and user input
+    # Determine the maximum depth based on bedrock and user input
     if bedrock is None:
         max_depth = min(p_bottom_depth.bottom_depth.values[0], 200)
     else:
@@ -740,7 +732,7 @@ def rank_soils_global(
     soil_matrix = pd.DataFrame(
         np.nan, index=np.arange(max_depth), columns=np.arange(len(slices_of_soil))
     )
-    
+
     for i in range(len(slices_of_soil)):
         slice_end = slices_of_soil.bottom_depth.iloc[i]
         soil_matrix.iloc[:slice_end, i] = 1
@@ -759,9 +751,7 @@ def rank_soils_global(
 
     # Horizon Data Similarity
     if soilIDRank_output_pd is not None:
-        cokey_groups = [
-            group for _, group in soilIDRank_output_pd.groupby("compname", sort=False)
-        ]
+        cokey_groups = [group for _, group in soilIDRank_output_pd.groupby("compname", sort=False)]
 
         # Create lists to store component statuses
         Comp_Rank_Status, Comp_Missing_Status, Comp_name = [], [], []
@@ -799,33 +789,41 @@ def rank_soils_global(
 
         dis_mat_list = []
 
-        for depth in soil_matrix.index:  # depth represents a user-recorded depth slice (e.g. 100, 101, …, 120)
+        for (
+            depth
+        ) in (
+            soil_matrix.index
+        ):  # depth represents a user-recorded depth slice (e.g. 100, 101, …, 120)
             # Gather the slice from each horizon variable
             slice_list = [horizon.loc[depth] for horizon in horz_vars]
-            
+
             # Concatenate slices horizontally then transpose so that each row is one component's data
             slice_df = pd.concat(slice_list, axis=1).T
 
             # If bedrock is specified and the depth is less than bedrock, filter out columns with missing data
             if bedrock is not None and depth < bedrock:
                 # Get columns that are non-null after dropping compname
-                sample_vars = slice_df.dropna(axis='columns').drop('compname', axis=1).columns.tolist()
-                
+                sample_vars = (
+                    slice_df.dropna(axis="columns").drop("compname", axis=1).columns.tolist()
+                )
+
                 # If there are fewer than 2 variables available, use the "sample_pedon" row to decide
                 if len(sample_vars) < 2:
-                    sample_vars = (slice_df.loc[slice_df['compname'] == "sample_pedon"]
-                                .dropna(axis='columns')
-                                .drop('compname', axis=1)
-                                .columns.tolist())
-                
+                    sample_vars = (
+                        slice_df.loc[slice_df["compname"] == "sample_pedon"]
+                        .dropna(axis="columns")
+                        .drop("compname", axis=1)
+                        .columns.tolist()
+                    )
+
                 # Subset slice_df to only include the sample variables that were kept
                 slice_mat = slice_df.loc[:, slice_df.columns.isin(sample_vars)]
             else:
-                slice_mat = slice_df.drop('compname', axis=1)
+                slice_mat = slice_df.drop("compname", axis=1)
 
             # Compute the Gower distance on the prepared slice matrix.
             D = gower_distances(slice_mat)
-            
+
             dis_mat_list.append(D)
 
         # Check if any components have all NaNs at every slice
@@ -920,7 +918,7 @@ def rank_soils_global(
     # --------------------------------------------------------------------------------------------
 
     # Start of soil color
-    #Load in SRG color distribution data
+    # Load in SRG color distribution data
     wmf = []
     wsf = []
     rmf = []
diff --git a/soil_id/tests/global/generate_bulk_test_results.py b/soil_id/tests/global/generate_bulk_test_results.py
@@ -20,6 +20,7 @@
 import traceback
 
 import pandas
+
 from soil_id.global_soil import list_soils_global, rank_soils_global
 
 test_data_df = pandas.read_csv(
diff --git a/soil_id/tests/us/generate_bulk_test_results.py b/soil_id/tests/us/generate_bulk_test_results.py
@@ -20,6 +20,7 @@
 import traceback
 
 import pandas
+
 from soil_id.us_soil import list_soils, rank_soils
 
 test_data_df = pandas.read_csv(
diff --git a/soil_id/us_soil.py b/soil_id/us_soil.py
@@ -32,8 +32,11 @@
 from .services import get_soil_series_data, get_soilweb_data, sda_return
 from .soil_sim import soil_sim
 from .utils import (
+    adjust_depth_interval,
     aggregate_data,
     compute_site_similarity,
+    create_new_layer,
+    create_new_layer_osd,
     drop_cokey_horz,
     extract_mucompdata_STATSGO,
     extract_muhorzdata_STATSGO,
@@ -49,11 +52,8 @@
     process_distance_scores,
     process_horizon_data,
     process_site_data,
-    update_intpl_data,
-    adjust_depth_interval,
     update_esd_data,
-    create_new_layer,
-    create_new_layer_osd,
+    update_intpl_data,
 )
 
 # entry points
diff --git a/soil_id/utils.py b/soil_id/utils.py
@@ -25,8 +25,11 @@
 import geopandas as gpd
 import numpy as np
 import pandas as pd
+import pyproj
 from numpy.linalg import cholesky
 from osgeo import ogr
+from pyproj import CRS
+from pyproj.database import query_utm_crs_info
 from rosetta import SoilData, rosetta
 from scipy.interpolate import UnivariateSpline
 from scipy.sparse import issparse
@@ -35,9 +38,6 @@
 from sklearn.impute import SimpleImputer
 from sklearn.metrics import pairwise
 from sklearn.utils import validation
-import pyproj
-from pyproj.database import query_utm_crs_info
-from pyproj import CRS
 
 # local libraries
 import soil_id.config
@@ -1398,7 +1398,7 @@ def convert_geometry_to_utm(geometry, src_crs="EPSG:4326"):
 #     utm_crs_list = query_utm_crs_info(datum_name="WGS84", area_of_interest=aoi)
 #     if not utm_crs_list:
 #         raise ValueError("No UTM CRS found for the specified location.")
-    
+
 #     # Select the first matching CRS
 #     crs = CRS.from_epsg(utm_crs_list[0].code)
 #     return crs
@@ -1407,15 +1407,15 @@ def convert_geometry_to_utm(geometry, src_crs="EPSG:4326"):
 # def get_target_utm_srid(lat, lon):
 #     """
 #     Determine the target UTM SRID (as an integer) based on latitude and longitude.
-    
+
 #     Parameters:
 #         lat (float): The latitude coordinate.
 #         lon (float): The longitude coordinate.
-        
+
 #     Returns:
 #         int: The UTM EPSG code as an integer. For example, for a point in the northern
 #              hemisphere in UTM zone 33, the function returns 32633.
-    
+
 #     Raises:
 #         ValueError: If the latitude is not in the valid range [-90, 90] or the longitude
 #                     is not in the valid range [-180, 180].
@@ -1425,10 +1425,10 @@ def convert_geometry_to_utm(geometry, src_crs="EPSG:4326"):
 #         raise ValueError("Latitude must be between -90 and 90.")
 #     if not (-180 <= lon <= 180):
 #         raise ValueError("Longitude must be between -180 and 180.")
-    
+
 #     # Determine UTM zone: zones are 6° wide starting at -180.
 #     utm_zone = int((lon + 180) / 6) + 1
-    
+
 #     # For the northern hemisphere, UTM EPSG codes start at 32600; for the southern, 32700.
 #     if lat >= 0:
 #         return 32600 + utm_zone
@@ -1843,8 +1843,9 @@ def pedon_color(lab_Color, top, bottom):
     )
 
     # Check for None values
-    if any(x is None for x in [top, bottom]) or \
-        any(s.isnull().any() for s in [pedon_l, pedon_a, pedon_b]):
+    if any(x is None for x in [top, bottom]) or any(
+        s.isnull().any() for s in [pedon_l, pedon_a, pedon_b]
+    ):
         return np.nan
 
     if top[0] != 0: