@@ -186,47 +186,64 @@ def list_soils_global(lon, lat):
186
186
lambda x : str (x ) if isinstance (x , np .ndarray ) else x
187
187
)
188
188
189
- # Rank components and sort by rank and depth
190
- cokey_Index = {key : rank for rank , key in enumerate (comp_key )}
191
- muhorzdata_pd ["Comp_Rank" ] = muhorzdata_pd ["cokey" ].map (cokey_Index )
192
- muhorzdata_pd .sort_values (["Comp_Rank" , "hzdept_r" ], inplace = True )
193
- muhorzdata_pd .drop (columns = "Comp_Rank" , inplace = True )
194
-
195
189
# Check for duplicate component instances
196
190
hz_drop = drop_cokey_horz (muhorzdata_pd )
197
191
if hz_drop is not None :
198
192
muhorzdata_pd = muhorzdata_pd [~ muhorzdata_pd .cokey .isin (hz_drop )]
199
- muhorzdata_pd = muhorzdata_pd .drop_duplicates ().reset_index (drop = True )
200
193
201
- # Update comp_key
194
+ muhorzdata_pd .reset_index (drop = True , inplace = True )
195
+
196
+ # Extract unique cokeys and subset mucompdata_pd
202
197
comp_key = muhorzdata_pd ["cokey" ].unique ().tolist ()
198
+ mucompdata_pd = mucompdata_pd [mucompdata_pd ["cokey" ].isin (comp_key )]
203
199
204
- # Subset mucompdata_pd by new compname_key and add suffix to name if there are duplicates
205
- mucompdata_pd = mucompdata_pd .loc [mucompdata_pd ["cokey" ].isin (comp_key )].reset_index (drop = True )
200
+ # Sort mucompdata_pd based on 'distance_score' and 'distance'
201
+ mucompdata_pd .sort_values (["distance_score" , "distance" ], ascending = [False , True ], inplace = True )
202
+ mucompdata_pd .reset_index (drop = True , inplace = True )
203
+
204
+ # Duplicate the 'compname' column for grouping purposes
206
205
mucompdata_pd ["compname_grp" ] = mucompdata_pd ["compname" ]
207
206
208
- # Sort by 'distance_score' (descending) and 'distance' (ascending), then reset the index
209
- mucompdata_pd = mucompdata_pd .sort_values (
210
- ["distance_score" , "distance" ], ascending = [False , True ]
211
- ).reset_index (drop = True )
207
+ # Extract unique cokeys and create a ranking dictionary
208
+ comp_key = mucompdata_pd ["cokey" ].unique ().tolist ()
209
+ cokey_index = {key : index for index , key in enumerate (comp_key )}
212
210
213
- # Add suffix to duplicate names
214
- name_counts = collections .Counter (mucompdata_pd ["compname" ])
215
- for name , count in name_counts .items ():
216
- if count > 1 :
217
- for suffix in range (1 , count + 1 ):
218
- mucompdata_pd .loc [mucompdata_pd ["compname" ] == name , "compname" ] = name + str (
219
- suffix
220
- )
211
+ # Apply the ranking to muhorzdata_pd for sorting
212
+ muhorzdata_pd ["Comp_Rank" ] = muhorzdata_pd ["cokey" ].map (cokey_index )
213
+
214
+ # Sort muhorzdata_pd by 'Comp_Rank' and 'hzdept_r', and clean up
215
+ muhorzdata_pd .sort_values (["Comp_Rank" , "hzdept_r" ], ascending = [True , True ], inplace = True )
216
+ muhorzdata_pd .drop ("Comp_Rank" , axis = 1 , inplace = True )
217
+ muhorzdata_pd .reset_index (drop = True , inplace = True )
221
218
222
- # Add modified compname to muhorzdata
223
- muhorzdata_name = muhorzdata_pd [["cokey" ]].merge (
224
- mucompdata_pd [["cokey" , "compname" ]], on = "cokey"
219
+ mucompdata_pd = mucompdata_pd .drop_duplicates ().reset_index (drop = True )
220
+
221
+ # Update component names in mucompdata_pd to handle duplicates
222
+ component_names = mucompdata_pd ["compname" ].tolist ()
223
+ name_counts = collections .Counter (component_names )
224
+
225
+ for name , count in name_counts .items ():
226
+ if count > 1 : # If a component name is duplicated
227
+ suffixes = range (1 , count + 1 ) # Generate suffixes for the duplicate names
228
+ for suffix in suffixes :
229
+ index = component_names .index (
230
+ name
231
+ ) # Find the index of the first occurrence of the duplicate name
232
+ component_names [index ] = name + str (suffix ) # Append the suffix
233
+
234
+ mucompdata_pd ["compname" ] = component_names
235
+ muhorzdata_pd .rename (columns = {"compname" : "compname_grp" }, inplace = True )
236
+ # Merge the modified component names from mucompdata_pd to muhorzdata_pd
237
+ muhorzdata_pd = muhorzdata_pd .merge (
238
+ mucompdata_pd [["cokey" , "compname" ]], on = "cokey" , how = "left"
225
239
)
226
- muhorzdata_pd ["compname" ] = muhorzdata_name ["compname" ]
227
240
228
241
# Group data by cokey for texture
229
242
muhorzdata_group_cokey = list (muhorzdata_pd .groupby ("cokey" , sort = False ))
243
+ pd .set_option ('display.max_rows' , None )
244
+ pd .set_option ('display.max_columns' , None )
245
+ pd .set_option ('display.width' , None )
246
+ pd .set_option ('display.max_colwidth' , None )
230
247
231
248
# Initialize lists for storing data
232
249
getProfile_cokey = []
@@ -405,7 +422,6 @@ def list_soils_global(lon, lat):
405
422
mucompdata_cond_prob = mucompdata_cond_prob .sort_values (
406
423
["soilID_rank" , "distance_score" ], ascending = [False , False ]
407
424
)
408
- mucomp_index = mucompdata_cond_prob .index
409
425
410
426
# Generate the ID list
411
427
ID = [
@@ -435,6 +451,9 @@ def list_soils_global(lon, lat):
435
451
mucompdata_cond_prob , WRB_Comp_Desc , left_on = "compname_grp" , right_on = "WRB_tax" , how = "left"
436
452
)
437
453
454
+ mucompdata_cond_prob = mucompdata_cond_prob .drop_duplicates ().reset_index (drop = True )
455
+ mucomp_index = mucompdata_cond_prob .index
456
+
438
457
# Extract site information
439
458
Site = [
440
459
{
@@ -474,6 +493,9 @@ def list_soils_global(lon, lat):
474
493
ph_lyrs ,
475
494
ec_lyrs ,
476
495
]
496
+ for idx , lst in enumerate (lists_to_reorder ):
497
+ if len (lst ) < max (mucomp_index ) + 1 :
498
+ print (f"List at index { idx } is too short: len={ len (lst )} , max index in mucomp_index={ max (mucomp_index )} " )
477
499
reordered_lists = [[lst [i ] for i in mucomp_index ] for lst in lists_to_reorder ]
478
500
479
501
# Destructuring reordered lists for clarity
0 commit comments