Skip to content

Commit e86c5df

Browse files
committed
fix: sql code update
-modified sql code to query all home mapunit components and all adjacent map units and components. -fixed component name indexing.
1 parent ac0a581 commit e86c5df

File tree

2 files changed

+80
-44
lines changed

2 files changed

+80
-44
lines changed

soil_id/db.py

+31-17
Original file line numberDiff line numberDiff line change
@@ -261,32 +261,46 @@ def extract_hwsd2_data(lon, lat, buffer_dist, table_name):
261261
# Distance is computed by casting geometries to geography,
262262
# which returns the geodesic distance in meters.
263263
main_query = f"""
264-
WITH valid_geom AS (
265-
SELECT
264+
WITH
265+
-- Step 1: Get the polygon that contains the point
266+
point_poly AS (
267+
SELECT ST_MakeValid(geom) AS geom
268+
FROM {table_name}
269+
WHERE ST_Intersects(
270+
geom,
271+
ST_SetSRID(ST_Point({lon}, {lat}), 4326)
272+
)
273+
),
274+
275+
-- Step 2: Get polygons that intersect the buffer
276+
valid_geom AS (
277+
SELECT
266278
hwsd2,
267279
ST_MakeValid(geom) AS geom
268-
FROM {table_name}
269-
WHERE geom && ST_GeomFromText('{buffer_wkt}', 4326)
280+
FROM {table_name}
281+
WHERE geom && ST_GeomFromText('{buffer_wkt}', 4326)
270282
AND ST_Intersects(geom, ST_GeomFromText('{buffer_wkt}', 4326))
271283
)
284+
285+
-- Step 3: Filter to those that either contain the point or border the point's polygon
272286
SELECT
273-
hwsd2,
274-
ST_AsEWKB(geom) AS geom,
275-
ST_Distance(
276-
geom::geography,
287+
vg.hwsd2,
288+
ST_AsEWKB(vg.geom) AS geom,
289+
ST_Distance(
290+
vg.geom::geography,
277291
ST_SetSRID(ST_Point({lon}, {lat}), 4326)::geography
278-
) AS distance,
279-
ST_Intersects(
280-
geom,
281-
ST_SetSRID(ST_Point({lon}, {lat}), 4326)
282-
) AS pt_intersect
283-
FROM valid_geom
284-
WHERE ST_Intersects(
285-
geom,
292+
) AS distance,
293+
ST_Intersects(
294+
vg.geom,
286295
ST_SetSRID(ST_Point({lon}, {lat}), 4326)
287-
);
296+
) AS pt_intersect
297+
FROM valid_geom vg, point_poly pp
298+
WHERE
299+
ST_Intersects(vg.geom, ST_SetSRID(ST_Point({lon}, {lat}), 4326))
300+
OR ST_Intersects(vg.geom, pp.geom);
288301
"""
289302

303+
290304
# Use GeoPandas to execute the main query and load results into a GeoDataFrame.
291305
hwsd = gpd.read_postgis(main_query, conn, geom_col="geom")
292306
print("Main query returned", len(hwsd), "rows.")

soil_id/global_soil.py

+49-27
Original file line numberDiff line numberDiff line change
@@ -186,47 +186,64 @@ def list_soils_global(lon, lat):
186186
lambda x: str(x) if isinstance(x, np.ndarray) else x
187187
)
188188

189-
# Rank components and sort by rank and depth
190-
cokey_Index = {key: rank for rank, key in enumerate(comp_key)}
191-
muhorzdata_pd["Comp_Rank"] = muhorzdata_pd["cokey"].map(cokey_Index)
192-
muhorzdata_pd.sort_values(["Comp_Rank", "hzdept_r"], inplace=True)
193-
muhorzdata_pd.drop(columns="Comp_Rank", inplace=True)
194-
195189
# Check for duplicate component instances
196190
hz_drop = drop_cokey_horz(muhorzdata_pd)
197191
if hz_drop is not None:
198192
muhorzdata_pd = muhorzdata_pd[~muhorzdata_pd.cokey.isin(hz_drop)]
199-
muhorzdata_pd = muhorzdata_pd.drop_duplicates().reset_index(drop=True)
200193

201-
# Update comp_key
194+
muhorzdata_pd.reset_index(drop=True, inplace=True)
195+
196+
# Extract unique cokeys and subset mucompdata_pd
202197
comp_key = muhorzdata_pd["cokey"].unique().tolist()
198+
mucompdata_pd = mucompdata_pd[mucompdata_pd["cokey"].isin(comp_key)]
203199

204-
# Subset mucompdata_pd by new compname_key and add suffix to name if there are duplicates
205-
mucompdata_pd = mucompdata_pd.loc[mucompdata_pd["cokey"].isin(comp_key)].reset_index(drop=True)
200+
# Sort mucompdata_pd based on 'distance_score' and 'distance'
201+
mucompdata_pd.sort_values(["distance_score", "distance"], ascending=[False, True], inplace=True)
202+
mucompdata_pd.reset_index(drop=True, inplace=True)
203+
204+
# Duplicate the 'compname' column for grouping purposes
206205
mucompdata_pd["compname_grp"] = mucompdata_pd["compname"]
207206

208-
# Sort by 'distance_score' (descending) and 'distance' (ascending), then reset the index
209-
mucompdata_pd = mucompdata_pd.sort_values(
210-
["distance_score", "distance"], ascending=[False, True]
211-
).reset_index(drop=True)
207+
# Extract unique cokeys and create a ranking dictionary
208+
comp_key = mucompdata_pd["cokey"].unique().tolist()
209+
cokey_index = {key: index for index, key in enumerate(comp_key)}
212210

213-
# Add suffix to duplicate names
214-
name_counts = collections.Counter(mucompdata_pd["compname"])
215-
for name, count in name_counts.items():
216-
if count > 1:
217-
for suffix in range(1, count + 1):
218-
mucompdata_pd.loc[mucompdata_pd["compname"] == name, "compname"] = name + str(
219-
suffix
220-
)
211+
# Apply the ranking to muhorzdata_pd for sorting
212+
muhorzdata_pd["Comp_Rank"] = muhorzdata_pd["cokey"].map(cokey_index)
213+
214+
# Sort muhorzdata_pd by 'Comp_Rank' and 'hzdept_r', and clean up
215+
muhorzdata_pd.sort_values(["Comp_Rank", "hzdept_r"], ascending=[True, True], inplace=True)
216+
muhorzdata_pd.drop("Comp_Rank", axis=1, inplace=True)
217+
muhorzdata_pd.reset_index(drop=True, inplace=True)
221218

222-
# Add modified compname to muhorzdata
223-
muhorzdata_name = muhorzdata_pd[["cokey"]].merge(
224-
mucompdata_pd[["cokey", "compname"]], on="cokey"
219+
mucompdata_pd = mucompdata_pd.drop_duplicates().reset_index(drop=True)
220+
221+
# Update component names in mucompdata_pd to handle duplicates
222+
component_names = mucompdata_pd["compname"].tolist()
223+
name_counts = collections.Counter(component_names)
224+
225+
for name, count in name_counts.items():
226+
if count > 1: # If a component name is duplicated
227+
suffixes = range(1, count + 1) # Generate suffixes for the duplicate names
228+
for suffix in suffixes:
229+
index = component_names.index(
230+
name
231+
) # Find the index of the first occurrence of the duplicate name
232+
component_names[index] = name + str(suffix) # Append the suffix
233+
234+
mucompdata_pd["compname"] = component_names
235+
muhorzdata_pd.rename(columns={"compname": "compname_grp"}, inplace=True)
236+
# Merge the modified component names from mucompdata_pd to muhorzdata_pd
237+
muhorzdata_pd = muhorzdata_pd.merge(
238+
mucompdata_pd[["cokey", "compname"]], on="cokey", how="left"
225239
)
226-
muhorzdata_pd["compname"] = muhorzdata_name["compname"]
227240

228241
# Group data by cokey for texture
229242
muhorzdata_group_cokey = list(muhorzdata_pd.groupby("cokey", sort=False))
243+
pd.set_option('display.max_rows', None)
244+
pd.set_option('display.max_columns', None)
245+
pd.set_option('display.width', None)
246+
pd.set_option('display.max_colwidth', None)
230247

231248
# Initialize lists for storing data
232249
getProfile_cokey = []
@@ -405,7 +422,6 @@ def list_soils_global(lon, lat):
405422
mucompdata_cond_prob = mucompdata_cond_prob.sort_values(
406423
["soilID_rank", "distance_score"], ascending=[False, False]
407424
)
408-
mucomp_index = mucompdata_cond_prob.index
409425

410426
# Generate the ID list
411427
ID = [
@@ -435,6 +451,9 @@ def list_soils_global(lon, lat):
435451
mucompdata_cond_prob, WRB_Comp_Desc, left_on="compname_grp", right_on="WRB_tax", how="left"
436452
)
437453

454+
mucompdata_cond_prob = mucompdata_cond_prob.drop_duplicates().reset_index(drop=True)
455+
mucomp_index = mucompdata_cond_prob.index
456+
438457
# Extract site information
439458
Site = [
440459
{
@@ -474,6 +493,9 @@ def list_soils_global(lon, lat):
474493
ph_lyrs,
475494
ec_lyrs,
476495
]
496+
for idx, lst in enumerate(lists_to_reorder):
497+
if len(lst) < max(mucomp_index) + 1:
498+
print(f"List at index {idx} is too short: len={len(lst)}, max index in mucomp_index={max(mucomp_index)}")
477499
reordered_lists = [[lst[i] for i in mucomp_index] for lst in lists_to_reorder]
478500

479501
# Destructuring reordered lists for clarity

0 commit comments

Comments
 (0)