Skip to content

Commit ac0a581

Browse files
committed
fix: make black/lint format
1 parent 3e70f5b commit ac0a581

File tree

7 files changed

+82
-63
lines changed

7 files changed

+82
-63
lines changed

soil_id/config.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,12 @@
1313
# You should have received a copy of the GNU Affero General Public License
1414
# along with this program. If not, see https://www.gnu.org/licenses/.
1515
from dotenv import load_dotenv
16+
1617
load_dotenv(dotenv_path="/mnt/c/Users/jmaynard/Documents/GitHub/soil-id-algorithm/soil_id/.env")
1718

1819
import os
1920
import tempfile
21+
2022
from platformdirs import user_cache_dir
2123

2224
DATA_PATH = os.environ.get("DATA_PATH", "Data")

soil_id/db.py

Lines changed: 32 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
# local libraries
2828
import soil_id.config
2929

30+
3031
def get_datastore_connection():
3132
"""
3233
Establish a connection to the datastore using app configurations.
@@ -165,18 +166,18 @@ def get_hwsd2_profile_data(conn, hwsd2_mu_select):
165166
"""
166167
Retrieve HWSD v2 data based on selected hwsd2 (map unit) values.
167168
This version reuses an existing connection.
168-
169+
169170
Parameters:
170171
conn: A live database connection.
171172
hwsd2_mu_select (list): List of selected hwsd2 values.
172-
173+
173174
Returns:
174175
DataFrame: Data from hwsd2_data.
175176
"""
176177
if not hwsd2_mu_select:
177178
logging.warning("HWSD2 map unit selection is empty. Returning empty DataFrame.")
178179
return pd.DataFrame()
179-
180+
180181
try:
181182
with conn.cursor() as cur:
182183
# Create placeholders for the SQL IN clause
@@ -190,14 +191,29 @@ def get_hwsd2_profile_data(conn, hwsd2_mu_select):
190191
"""
191192
cur.execute(sql_query, tuple(hwsd2_mu_select))
192193
results = cur.fetchall()
193-
194+
194195
# Convert the results to a pandas DataFrame.
195196
data = pd.DataFrame(
196197
results,
197198
columns=[
198-
"hwsd2", "compid", "id", "wise30s_smu_id", "sequence", "share", "fao90",
199-
"layer", "topdep", "botdep", "coarse", "sand", "silt", "clay", "cec_soil",
200-
"ph_water", "elec_cond", "fao90_name"
199+
"hwsd2",
200+
"compid",
201+
"id",
202+
"wise30s_smu_id",
203+
"sequence",
204+
"share",
205+
"fao90",
206+
"layer",
207+
"topdep",
208+
"botdep",
209+
"coarse",
210+
"sand",
211+
"silt",
212+
"clay",
213+
"cec_soil",
214+
"ph_water",
215+
"elec_cond",
216+
"fao90_name",
201217
],
202218
)
203219
return data
@@ -210,13 +226,13 @@ def extract_hwsd2_data(lon, lat, buffer_dist, table_name):
210226
"""
211227
Fetches HWSD soil data from a PostGIS table within a given buffer around a point,
212228
performing distance and intersection calculations directly on geographic coordinates.
213-
229+
214230
Parameters:
215231
lon (float): Longitude of the problem point.
216232
lat (float): Latitude of the problem point.
217233
buffer_dist (int): Buffer distance in meters.
218234
table_name (str): Name of the PostGIS table (e.g., "hwsdv2").
219-
235+
220236
Returns:
221237
DataFrame: Merged data from hwsdv2 and hwsdv2_data.
222238
"""
@@ -240,7 +256,7 @@ def extract_hwsd2_data(lon, lat, buffer_dist, table_name):
240256
cur.execute(buffer_query, (lon, lat, buffer_dist))
241257
buffer_wkt = cur.fetchone()[0]
242258
print("Buffer WKT:", buffer_wkt)
243-
259+
244260
# Build the main query that uses the computed buffer.
245261
# Distance is computed by casting geometries to geography,
246262
# which returns the geodesic distance in meters.
@@ -270,20 +286,20 @@ def extract_hwsd2_data(lon, lat, buffer_dist, table_name):
270286
ST_SetSRID(ST_Point({lon}, {lat}), 4326)
271287
);
272288
"""
273-
289+
274290
# Use GeoPandas to execute the main query and load results into a GeoDataFrame.
275-
hwsd = gpd.read_postgis(main_query, conn, geom_col='geom')
291+
hwsd = gpd.read_postgis(main_query, conn, geom_col="geom")
276292
print("Main query returned", len(hwsd), "rows.")
277-
293+
278294
# Remove the geometry column (if not needed) from this dataset.
279295
hwsd = hwsd.drop(columns=["geom"])
280-
296+
281297
# Get the list of hwsd2 identifiers.
282298
hwsd2_mu_select = hwsd["hwsd2"].tolist()
283-
299+
284300
# Call get_hwsd2_profile_data using the same connection.
285301
hwsd_data = get_hwsd2_profile_data(conn, hwsd2_mu_select)
286-
302+
287303
# Merge the two datasets.
288304
merged = pd.merge(hwsd_data, hwsd, on="hwsd2", how="left").drop_duplicates()
289305
return merged

soil_id/global_soil.py

Lines changed: 30 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -87,11 +87,11 @@ def list_soils_global(lon, lat):
8787
buffer_dist=10000,
8888
)
8989
except KeyError:
90-
return("Data_unavailable")
91-
90+
return "Data_unavailable"
91+
9292
if hwsd2_data.empty:
93-
return("Data_unavailable")
94-
93+
return "Data_unavailable"
94+
9595
# Component Data
9696
mucompdata_pd = hwsd2_data[["hwsd2", "fao90_name", "distance", "share", "compid"]]
9797
mucompdata_pd.columns = ["mukey", "compname", "distance", "share", "cokey"]
@@ -632,20 +632,12 @@ def rank_soils_global(
632632
p_cfg = [getCF_fromClass(rf) for rf in rfvDepth]
633633

634634
p_sandpct_intpl = [
635-
spt[i]
636-
for i in range(len(soilHorizon))
637-
for _ in range(top[i], bottom[i])
635+
spt[i] for i in range(len(soilHorizon)) for _ in range(top[i], bottom[i])
638636
]
639637
p_claypct_intpl = [
640-
cpt[i]
641-
for i in range(len(soilHorizon))
642-
for _ in range(top[i], bottom[i])
643-
]
644-
p_cfg_intpl = [
645-
p_cfg[i]
646-
for i in range(len(soilHorizon))
647-
for _ in range(top[i], bottom[i])
638+
cpt[i] for i in range(len(soilHorizon)) for _ in range(top[i], bottom[i])
648639
]
640+
p_cfg_intpl = [p_cfg[i] for i in range(len(soilHorizon)) for _ in range(top[i], bottom[i])]
649641

650642
# Length of interpolated texture and RF depth
651643
p_bottom_depth = pd.DataFrame([-999, "sample_pedon", soil_df_slice.bottom.iloc[-1]]).T
@@ -727,7 +719,7 @@ def rank_soils_global(
727719

728720
compnames = mucompdata_pd[["compname", "compname_grp"]]
729721

730-
# Determine the maximum depth based on bedrock and user input
722+
# Determine the maximum depth based on bedrock and user input
731723
if bedrock is None:
732724
max_depth = min(p_bottom_depth.bottom_depth.values[0], 200)
733725
else:
@@ -740,7 +732,7 @@ def rank_soils_global(
740732
soil_matrix = pd.DataFrame(
741733
np.nan, index=np.arange(max_depth), columns=np.arange(len(slices_of_soil))
742734
)
743-
735+
744736
for i in range(len(slices_of_soil)):
745737
slice_end = slices_of_soil.bottom_depth.iloc[i]
746738
soil_matrix.iloc[:slice_end, i] = 1
@@ -759,9 +751,7 @@ def rank_soils_global(
759751

760752
# Horizon Data Similarity
761753
if soilIDRank_output_pd is not None:
762-
cokey_groups = [
763-
group for _, group in soilIDRank_output_pd.groupby("compname", sort=False)
764-
]
754+
cokey_groups = [group for _, group in soilIDRank_output_pd.groupby("compname", sort=False)]
765755

766756
# Create lists to store component statuses
767757
Comp_Rank_Status, Comp_Missing_Status, Comp_name = [], [], []
@@ -799,33 +789,41 @@ def rank_soils_global(
799789

800790
dis_mat_list = []
801791

802-
for depth in soil_matrix.index: # depth represents a user-recorded depth slice (e.g. 100, 101, …, 120)
792+
for (
793+
depth
794+
) in (
795+
soil_matrix.index
796+
): # depth represents a user-recorded depth slice (e.g. 100, 101, …, 120)
803797
# Gather the slice from each horizon variable
804798
slice_list = [horizon.loc[depth] for horizon in horz_vars]
805-
799+
806800
# Concatenate slices horizontally then transpose so that each row is one component's data
807801
slice_df = pd.concat(slice_list, axis=1).T
808802

809803
# If bedrock is specified and the depth is less than bedrock, filter out columns with missing data
810804
if bedrock is not None and depth < bedrock:
811805
# Get columns that are non-null after dropping compname
812-
sample_vars = slice_df.dropna(axis='columns').drop('compname', axis=1).columns.tolist()
813-
806+
sample_vars = (
807+
slice_df.dropna(axis="columns").drop("compname", axis=1).columns.tolist()
808+
)
809+
814810
# If there are fewer than 2 variables available, use the "sample_pedon" row to decide
815811
if len(sample_vars) < 2:
816-
sample_vars = (slice_df.loc[slice_df['compname'] == "sample_pedon"]
817-
.dropna(axis='columns')
818-
.drop('compname', axis=1)
819-
.columns.tolist())
820-
812+
sample_vars = (
813+
slice_df.loc[slice_df["compname"] == "sample_pedon"]
814+
.dropna(axis="columns")
815+
.drop("compname", axis=1)
816+
.columns.tolist()
817+
)
818+
821819
# Subset slice_df to only include the sample variables that were kept
822820
slice_mat = slice_df.loc[:, slice_df.columns.isin(sample_vars)]
823821
else:
824-
slice_mat = slice_df.drop('compname', axis=1)
822+
slice_mat = slice_df.drop("compname", axis=1)
825823

826824
# Compute the Gower distance on the prepared slice matrix.
827825
D = gower_distances(slice_mat)
828-
826+
829827
dis_mat_list.append(D)
830828

831829
# Check if any components have all NaNs at every slice
@@ -920,7 +918,7 @@ def rank_soils_global(
920918
# --------------------------------------------------------------------------------------------
921919

922920
# Start of soil color
923-
#Load in SRG color distribution data
921+
# Load in SRG color distribution data
924922
wmf = []
925923
wsf = []
926924
rmf = []

soil_id/tests/global/generate_bulk_test_results.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import traceback
2121

2222
import pandas
23+
2324
from soil_id.global_soil import list_soils_global, rank_soils_global
2425

2526
test_data_df = pandas.read_csv(

soil_id/tests/us/generate_bulk_test_results.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import traceback
2121

2222
import pandas
23+
2324
from soil_id.us_soil import list_soils, rank_soils
2425

2526
test_data_df = pandas.read_csv(

soil_id/us_soil.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,11 @@
3232
from .services import get_soil_series_data, get_soilweb_data, sda_return
3333
from .soil_sim import soil_sim
3434
from .utils import (
35+
adjust_depth_interval,
3536
aggregate_data,
3637
compute_site_similarity,
38+
create_new_layer,
39+
create_new_layer_osd,
3740
drop_cokey_horz,
3841
extract_mucompdata_STATSGO,
3942
extract_muhorzdata_STATSGO,
@@ -49,11 +52,8 @@
4952
process_distance_scores,
5053
process_horizon_data,
5154
process_site_data,
52-
update_intpl_data,
53-
adjust_depth_interval,
5455
update_esd_data,
55-
create_new_layer,
56-
create_new_layer_osd,
56+
update_intpl_data,
5757
)
5858

5959
# entry points

soil_id/utils.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,11 @@
2525
import geopandas as gpd
2626
import numpy as np
2727
import pandas as pd
28+
import pyproj
2829
from numpy.linalg import cholesky
2930
from osgeo import ogr
31+
from pyproj import CRS
32+
from pyproj.database import query_utm_crs_info
3033
from rosetta import SoilData, rosetta
3134
from scipy.interpolate import UnivariateSpline
3235
from scipy.sparse import issparse
@@ -35,9 +38,6 @@
3538
from sklearn.impute import SimpleImputer
3639
from sklearn.metrics import pairwise
3740
from sklearn.utils import validation
38-
import pyproj
39-
from pyproj.database import query_utm_crs_info
40-
from pyproj import CRS
4141

4242
# local libraries
4343
import soil_id.config
@@ -1398,7 +1398,7 @@ def convert_geometry_to_utm(geometry, src_crs="EPSG:4326"):
13981398
# utm_crs_list = query_utm_crs_info(datum_name="WGS84", area_of_interest=aoi)
13991399
# if not utm_crs_list:
14001400
# raise ValueError("No UTM CRS found for the specified location.")
1401-
1401+
14021402
# # Select the first matching CRS
14031403
# crs = CRS.from_epsg(utm_crs_list[0].code)
14041404
# return crs
@@ -1407,15 +1407,15 @@ def convert_geometry_to_utm(geometry, src_crs="EPSG:4326"):
14071407
# def get_target_utm_srid(lat, lon):
14081408
# """
14091409
# Determine the target UTM SRID (as an integer) based on latitude and longitude.
1410-
1410+
14111411
# Parameters:
14121412
# lat (float): The latitude coordinate.
14131413
# lon (float): The longitude coordinate.
1414-
1414+
14151415
# Returns:
14161416
# int: The UTM EPSG code as an integer. For example, for a point in the northern
14171417
# hemisphere in UTM zone 33, the function returns 32633.
1418-
1418+
14191419
# Raises:
14201420
# ValueError: If the latitude is not in the valid range [-90, 90] or the longitude
14211421
# is not in the valid range [-180, 180].
@@ -1425,10 +1425,10 @@ def convert_geometry_to_utm(geometry, src_crs="EPSG:4326"):
14251425
# raise ValueError("Latitude must be between -90 and 90.")
14261426
# if not (-180 <= lon <= 180):
14271427
# raise ValueError("Longitude must be between -180 and 180.")
1428-
1428+
14291429
# # Determine UTM zone: zones are 6° wide starting at -180.
14301430
# utm_zone = int((lon + 180) / 6) + 1
1431-
1431+
14321432
# # For the northern hemisphere, UTM EPSG codes start at 32600; for the southern, 32700.
14331433
# if lat >= 0:
14341434
# return 32600 + utm_zone
@@ -1843,8 +1843,9 @@ def pedon_color(lab_Color, top, bottom):
18431843
)
18441844

18451845
# Check for None values
1846-
if any(x is None for x in [top, bottom]) or \
1847-
any(s.isnull().any() for s in [pedon_l, pedon_a, pedon_b]):
1846+
if any(x is None for x in [top, bottom]) or any(
1847+
s.isnull().any() for s in [pedon_l, pedon_a, pedon_b]
1848+
):
18481849
return np.nan
18491850

18501851
if top[0] != 0:

0 commit comments

Comments
 (0)