@@ -354,153 +354,69 @@ def aggregate_data(data, bottom_depths, sd=2):
354
354
355
355
356
356
def getProfile (data , variable ):
357
- var = []
358
- var_grp = []
359
- var_pct_intpl = []
360
- var_pct_intpl_grp = []
361
- if variable == "sandtotal_r" :
362
- for i in range (len (data )):
363
- if data ["texture" ].iloc [i ] is None :
364
- var .append (data ["sandtotal_r" ].iloc [i ])
365
- var_grp .append (np .nan )
366
- else :
367
- var .append (data ["sandtotal_r" ].iloc [i ])
368
- var_grp .append (getSand (data ["texture" ].iloc [i ]))
369
- if variable == "claytotal_r" :
370
- for i in range (len (data )):
371
- if data ["texture" ].iloc [i ] is None :
372
- var .append (data ["claytotal_r" ].iloc [i ])
373
- var_grp .append (np .nan )
374
- else :
375
- var .append (data ["claytotal_r" ].iloc [i ])
376
- var_grp .append (getClay (data ["texture" ].iloc [i ]))
377
- if variable == "total_frag_volume" :
378
- for i in range (len (data )):
379
- if data ["total_frag_volume" ].iloc [i ] is None :
380
- var .append (np .nan )
381
- var_grp .append (np .nan )
382
- else :
383
- var .append (data ["total_frag_volume" ].iloc [i ])
384
- var_grp .append (getCF (data ["total_frag_volume" ].iloc [i ]))
385
- if variable == "CEC" :
386
- for i in range (len (data )):
387
- if data ["CEC" ].iloc [i ] is None :
388
- var .append (np .nan )
389
- else :
390
- var .append (data ["CEC" ].iloc [i ])
391
- if variable == "pH" :
392
- for i in range (len (data )):
393
- if data ["pH" ].iloc [i ] is None :
394
- var .append (np .nan )
395
- else :
396
- var .append (data ["pH" ].iloc [i ])
397
- if variable == "EC" :
398
- for i in range (len (data )):
399
- if data ["EC" ].iloc [i ] is None :
400
- var .append (np .nan )
401
- else :
402
- var .append (data ["EC" ].iloc [i ])
403
-
404
- # Return empty fields when there is no depth data or the top depth is not 0
405
- if variable == "sandtotal_r" or variable == "claytotal_r" or variable == "total_frag_volume" :
406
- if pd .isnull (data ["hzdept_r" ]).any () or pd .isnull (data ["hzdepb_r" ]).any ():
407
- var_pct_intpl_final = pd .DataFrame (np .nan , index = np .arange (200 ), columns = np .arange (2 ))
408
- var_pct_intpl_final .columns = ["var_pct_intpl" , "var_pct_intpl_grp" ]
409
- return var_pct_intpl_final
410
-
411
- if data ["hzdept_r" ].iloc [0 ] != 0 :
412
- var_pct_intpl_final = pd .DataFrame (np .nan , index = np .arange (200 ), columns = np .arange (2 ))
413
- var_pct_intpl_final .columns = ["var_pct_intpl" , "var_pct_intpl_grp" ]
414
- return var_pct_intpl_final
415
-
416
- MisHrz = 0
417
- for i in range (len (data ["hzdept_r" ])):
418
- if i == len (data ["hzdept_r" ]) - 1 :
419
- break
420
-
421
- if data ["hzdept_r" ].iloc [i + 1 ] > data ["hzdepb_r" ].iloc [i ]:
422
- MisHrz = 1
423
- elif data ["hzdept_r" ].iloc [i + 1 ] < data ["hzdepb_r" ].iloc [i ]:
424
- data ["hzdept_r" ].iloc [i + 1 ] == data ["hzdepb_r" ].iloc [i ]
425
-
426
- if MisHrz == 1 :
427
- var_pct_intpl_final = pd .DataFrame (np .nan , index = np .arange (200 ), columns = np .arange (2 ))
428
- var_pct_intpl_final .columns = ["var_pct_intpl" , "var_pct_intpl_grp" ]
429
- return var_pct_intpl_final
430
-
431
- if len (data ["hzdept_r" ]) == 1 :
432
- for i in range (int (data ["hzdepb_r" ].iloc [0 ]) - int (data ["hzdept_r" ].iloc [0 ])):
433
- var_pct_intpl .append (var [0 ])
434
- var_pct_intpl_grp .append (var_grp [0 ])
435
- else :
436
- for i in range (len (data ["hzdepb_r" ])):
437
- for j in range (int (data ["hzdepb_r" ].iloc [i ]) - int (data ["hzdept_r" ].iloc [i ])):
438
- var_pct_intpl .append (var [i ])
439
- var_pct_intpl_grp .append (var_grp [i ])
440
-
441
- var_pct_intpl_final = pd .DataFrame ([var_pct_intpl , var_pct_intpl_grp ])
442
- var_pct_intpl_final = var_pct_intpl_final .T
443
- var_pct_intpl_final = var_pct_intpl_final .reset_index (drop = True )
444
- var_pct_intpl_final .columns = ["var_pct_intpl" , "var_pct_intpl_grp" ]
445
-
446
- if len (var_pct_intpl_final .index ) > 200 :
447
- var_pct_intpl_final = var_pct_intpl_final .iloc [0 :200 ]
448
- var_pct_intpl_final = var_pct_intpl_final .reset_index (drop = True )
357
+ def pad_dataframe (df , columns , length = 200 ):
358
+ if len (df ) > length :
359
+ return df .iloc [:length ].reset_index (drop = True )
449
360
else :
450
- Na_add = 200 - len (var_pct_intpl_final .index )
451
- pd_add = pd .DataFrame (np .nan , index = np .arange (Na_add ), columns = np .arange (2 ))
452
- pd_add .columns = ["var_pct_intpl" , "var_pct_intpl_grp" ]
453
- var_pct_intpl_final = pd .concat ([var_pct_intpl_final , pd_add ], axis = 0 )
454
- var_pct_intpl_final = var_pct_intpl_final .reset_index (drop = True )
361
+ pad_len = length - len (df )
362
+ pad_df = pd .DataFrame (np .nan , index = np .arange (pad_len ), columns = columns )
363
+ return pd .concat ([df , pad_df ], ignore_index = True )
364
+
365
+ # Handle main variable and group variable
366
+ if variable in ["sandtotal_r" , "claytotal_r" , "total_frag_volume" ]:
367
+ if variable == "sandtotal_r" :
368
+ var = data ["sandtotal_r" ].to_numpy ()
369
+ var_grp = data ["texture" ].apply (lambda x : getSand (x ) if pd .notnull (x ) else np .nan ).to_numpy ()
370
+ elif variable == "claytotal_r" :
371
+ var = data ["claytotal_r" ].to_numpy ()
372
+ var_grp = data ["texture" ].apply (lambda x : getClay (x ) if pd .notnull (x ) else np .nan ).to_numpy ()
373
+ else : # total_frag_volume
374
+ var = data ["total_frag_volume" ].apply (lambda x : x if pd .notnull (x ) else np .nan ).to_numpy ()
375
+ var_grp = data ["total_frag_volume" ].apply (lambda x : getCF (x ) if pd .notnull (x ) else np .nan ).to_numpy ()
376
+
377
+ elif variable in ["CEC" , "pH" , "EC" ]:
378
+ var = data [variable ].apply (lambda x : x if pd .notnull (x ) else np .nan ).to_numpy ()
379
+ var_grp = None
455
380
else :
456
- if pd .isnull (data ["hzdept_r" ]).any () or pd .isnull (data ["hzdepb_r" ]).any ():
457
- var_pct_intpl_final = pd .DataFrame (np .nan , index = np .arange (200 ), columns = np .arange (1 ))
458
- var_pct_intpl_final .columns = ["var_pct_intpl" ]
459
- return var_pct_intpl_final
460
-
461
- if data ["hzdept_r" ].iloc [0 ] != 0 :
462
- var_pct_intpl_final = pd .DataFrame (np .nan , index = np .arange (200 ), columns = np .arange (1 ))
463
- var_pct_intpl_final .columns = ["var_pct_intpl" ]
464
- return var_pct_intpl_final
465
-
466
- MisHrz = 0
467
- for i in range (len (data ["hzdept_r" ])):
468
- if i == len (data ["hzdept_r" ]) - 1 :
469
- break
381
+ raise ValueError (f"Unsupported variable: { variable } " )
470
382
471
- if data ["hzdept_r" ].iloc [i + 1 ] > data ["hzdepb_r" ].iloc [i ]:
472
- MisHrz = 1
473
- elif data ["hzdept_r" ].iloc [i + 1 ] < data ["hzdepb_r" ].iloc [i ]:
474
- data ["hzdept_r" ].iloc [i + 1 ] == data ["hzdepb_r" ].iloc [i ]
383
+ # Check horizon consistency
384
+ if data ["hzdept_r" ].isnull ().any () or data ["hzdepb_r" ].isnull ().any ():
385
+ return pad_dataframe (pd .DataFrame (columns = ["var_pct_intpl" , "var_pct_intpl_grp" ] if var_grp is not None else ["var_pct_intpl" ]), ["var_pct_intpl" , "var_pct_intpl_grp" ] if var_grp is not None else ["var_pct_intpl" ])
475
386
476
- if MisHrz == 1 :
477
- var_pct_intpl_final = pd .DataFrame (np .nan , index = np .arange (200 ), columns = np .arange (1 ))
478
- var_pct_intpl_final .columns = ["var_pct_intpl" ]
479
- return var_pct_intpl_final
387
+ if data ["hzdept_r" ].iloc [0 ] != 0 :
388
+ return pad_dataframe (pd .DataFrame (columns = ["var_pct_intpl" , "var_pct_intpl_grp" ] if var_grp is not None else ["var_pct_intpl" ]), ["var_pct_intpl" , "var_pct_intpl_grp" ] if var_grp is not None else ["var_pct_intpl" ])
480
389
481
- if len (data ["hzdept_r" ]) == 1 :
482
- for i in range (int (data ["hzdepb_r" ].iloc [0 ]) - int (data ["hzdept_r" ].iloc [0 ])):
483
- var_pct_intpl .append (var [0 ])
484
- else :
485
- for i in range (len (data ["hzdepb_r" ])):
486
- for j in range (int (data ["hzdepb_r" ].iloc [i ]) - int (data ["hzdept_r" ].iloc [i ])):
487
- var_pct_intpl .append (var [i ])
390
+ # Check for mismatched horizons
391
+ hzdept = data ["hzdept_r" ].to_numpy ()
392
+ hzdepb = data ["hzdepb_r" ].to_numpy ()
393
+ if any (hzdept [i + 1 ] > hzdepb [i ] for i in range (len (hzdept ) - 1 )):
394
+ return pad_dataframe (pd .DataFrame (columns = ["var_pct_intpl" , "var_pct_intpl_grp" ] if var_grp is not None else ["var_pct_intpl" ]), ["var_pct_intpl" , "var_pct_intpl_grp" ] if var_grp is not None else ["var_pct_intpl" ])
488
395
489
- var_pct_intpl_final = pd .DataFrame ([var_pct_intpl ])
490
- var_pct_intpl_final = var_pct_intpl_final .T
491
- var_pct_intpl_final = var_pct_intpl_final .reset_index (drop = True )
492
- var_pct_intpl_final .columns = ["var_pct_intpl" ]
396
+ # Interpolation
397
+ var_pct_intpl = []
398
+ var_pct_intpl_grp = []
493
399
494
- if len (var_pct_intpl_final .index ) > 200 :
495
- var_pct_intpl_final = var_pct_intpl_final .iloc [0 :200 ]
496
- var_pct_intpl_final = var_pct_intpl_final .reset_index (drop = True )
497
- else :
498
- Na_add = 200 - len (var_pct_intpl_final .index )
499
- pd_add = pd .DataFrame (np .nan , index = np .arange (Na_add ), columns = np .arange (1 ))
500
- pd_add .columns = ["var_pct_intpl" ]
501
- var_pct_intpl_final = pd .concat ([var_pct_intpl_final , pd_add ], axis = 0 )
502
- var_pct_intpl_final = var_pct_intpl_final .reset_index (drop = True )
503
- return var_pct_intpl_final
400
+ for i in range (len (hzdept )):
401
+ top = int (hzdept [i ])
402
+ bottom = int (hzdepb [i ])
403
+ depth_range = bottom - top
404
+ var_pct_intpl .extend ([var [i ]] * depth_range )
405
+ if var_grp is not None :
406
+ var_pct_intpl_grp .extend ([var_grp [i ]] * depth_range )
407
+
408
+ # Build result DataFrame
409
+ if var_grp is not None :
410
+ df_result = pd .DataFrame ({
411
+ "var_pct_intpl" : var_pct_intpl ,
412
+ "var_pct_intpl_grp" : var_pct_intpl_grp
413
+ })
414
+ return pad_dataframe (df_result , ["var_pct_intpl" , "var_pct_intpl_grp" ])
415
+ else :
416
+ df_result = pd .DataFrame ({
417
+ "var_pct_intpl" : var_pct_intpl
418
+ })
419
+ return pad_dataframe (df_result , ["var_pct_intpl" ])
504
420
505
421
506
422
def max_comp_depth (data ):
0 commit comments