@@ -1007,33 +1007,49 @@ def rank_soils_global(
1007
1007
1008
1008
# Calculate color similarity
1009
1009
if not cr_df .isnull ().values .any ():
1010
- color_sim = []
1011
1010
w_df , r_df , y_df = cr_df .iloc [0 ], cr_df .iloc [1 ], cr_df .iloc [2 ]
1012
- fao_list = [item .lower () for item in fao90 ]
1013
1011
1014
- for compname in D_final_horz .compname :
1015
- soilgroup = re .sub (r"\d+$" , "" , " " .join (compname .split ()[1 :])).lower ()
1012
+ # Vectorized computation of color probabilities
1013
+ def norm_pdf_vec (x , mean_arr , std_arr ):
1014
+ var = np .square (std_arr )
1015
+ denom = np .sqrt (2 * np .pi * var )
1016
+ num = np .exp (- np .square (x - mean_arr ) / (2 * var ))
1017
+ return num / denom
1018
+
1019
+ # Convert to numpy arrays
1020
+ wmf , wsf = np .array (wmf , dtype = np .float64 ), np .array (wsf , dtype = np .float64 )
1021
+ rmf , rsf = np .array (rmf , dtype = np .float64 ), np .array (rsf , dtype = np .float64 )
1022
+ ymf , ysf = np .array (ymf , dtype = np .float64 ), np .array (ysf , dtype = np .float64 )
1016
1023
1017
- prob_w , prob_r , prob_y = [], [], []
1024
+ prob_w = norm_pdf_vec (float (w_df ), wmf , wsf )
1025
+ prob_r = norm_pdf_vec (float (r_df ), rmf , rsf )
1026
+ prob_y = norm_pdf_vec (float (y_df ), ymf , ysf )
1018
1027
1019
- idx = fao_list .index (soilgroup ) if soilgroup in fao_list else - 1
1028
+ # Normalize probabilities
1029
+ def normalize (arr ):
1030
+ min_val , max_val = np .min (arr ), np .max (arr )
1031
+ return (arr - min_val ) / (max_val - min_val ) if max_val != min_val else np .ones_like (arr )
1020
1032
1021
- for mw , sw , mr , sr , my , sy in zip (wmf , wsf , rmf , rsf , ymf , ysf ):
1022
- prob_w .append (norm (float (mw ), float (sw )).pdf (float (w_df )))
1023
- prob_r .append (norm (float (mr ), float (sr )).pdf (float (r_df )))
1024
- prob_y .append (norm (float (my ), float (sy )).pdf (float (y_df )))
1033
+ prob_w = normalize (prob_w )
1034
+ prob_r = normalize (prob_r )
1035
+ prob_y = normalize (prob_y )
1025
1036
1026
- max_prob_w , min_prob_w = max ( prob_w ), min ( prob_w )
1027
- max_prob_r , min_prob_r = max ( prob_r ), min ( prob_r )
1028
- max_prob_y , min_prob_y = max ( prob_y ), min ( prob_y )
1037
+ # Prepare FAO soil groups for lookup
1038
+ fao_list = [ item . lower () for item in fao90 ]
1039
+ fao_index_map = { name : i for i , name in enumerate ( fao_list )}
1029
1040
1030
- for j in range (len (fao_list )):
1031
- prob_w [j ] = (prob_w [j ] - min_prob_w ) / (max_prob_w - min_prob_w )
1032
- prob_r [j ] = (prob_r [j ] - min_prob_r ) / (max_prob_r - min_prob_r )
1033
- prob_y [j ] = (prob_y [j ] - min_prob_y ) / (max_prob_y - min_prob_y )
1041
+ # Vectorized scoring loop
1042
+ compnames = D_final_horz .compname .str .lower ()
1043
+ color_sim = []
1034
1044
1035
- crsr = (prob_w [idx ] + prob_r [idx ] + prob_y [idx ]) / 3.0 if idx != - 1 else 1.0
1036
- color_sim .append (crsr )
1045
+ for name in compnames :
1046
+ soilgroup = re .sub (r"\d+$" , "" , " " .join (name .split ()[1 :])).strip ()
1047
+ idx = fao_index_map .get (soilgroup , - 1 )
1048
+ if idx != - 1 :
1049
+ score = (prob_w [idx ] + prob_r [idx ] + prob_y [idx ]) / 3.0
1050
+ else :
1051
+ score = 1.0
1052
+ color_sim .append (score )
1037
1053
1038
1054
color_sim = pd .Series (color_sim )
1039
1055
0 commit comments