88from scipy .interpolate import interp1d
99import rdtools
1010import xgboost as xgb
11+ from rdtools import utilities
1112
1213# Load in the XGBoost clipping model using joblib.
1314xgboost_clipping_model = None
@@ -335,8 +336,7 @@ def quantile_clip_filter(power_ac, quantile=0.98):
335336 """
336337 Filter data points likely to be affected by clipping
337338 with power or energy greater than or equal to 99% of the `quant`
338- quantile. NaN's and small values (power_ac(quantile) / 1000) are
339- removed before calculating clipping threshold.
339+ quantile.
340340
341341 Parameters
342342 ----------
@@ -351,16 +351,8 @@ def quantile_clip_filter(power_ac, quantile=0.98):
351351 Boolean Series of whether the given measurement is below 99% of the
352352 quantile filter.
353353 """
354- # Replace NaN's and small values for quantile calculation
355- # This ensures that power series with NaN's instead of zero values
356- # provide the same result.
357- lower = power_ac .fillna (0 ).quantile (quantile ) / 1000
358-
359- # Calculate the quantile and upper clipping threshold
360- q = power_ac [power_ac > lower ].quantile (quantile )
361- upper = q * 0.99
362-
363- return power_ac < upper
354+ v = utilities .robust_quantile (power_ac , quantile )
355+ return power_ac < v * 0.99
364356
365357
366358def _format_clipping_time_series (power_ac , mounting_type ):
@@ -519,18 +511,15 @@ def _apply_overall_clipping_threshold(power_ac, clipping_mask, clipped_power_ac)
519511 periods are labeled as True and non-clipping periods are
520512 labeled as False. Has a pandas datetime index.
521513 """
522-
523- # Ensure that series with NaN's return same results as series with 0's
524- lower = power_ac .fillna (0 ).quantile (0.99 ) / 1000
525- power_ac_quant = power_ac [power_ac > lower ].quantile (0.99 )
514+ q_power_ac = utilities .robust_quantile (power_ac , 0.99 )
515+ q_clipped_power_ac = utilities .robust_quantile (clipped_power_ac , 0.99 )
526516
527517 upper_bound_pdiff = abs (
528- (power_ac_quant - clipped_power_ac .quantile (0.99 ))
529- / ((power_ac_quant + clipped_power_ac .quantile (0.99 )) / 2 )
518+ (q_power_ac - q_clipped_power_ac ) / ((q_power_ac + q_clipped_power_ac ) / 2 )
530519 )
531520 percent_clipped = len (clipped_power_ac ) / len (power_ac ) * 100
532521 if (upper_bound_pdiff < 0.005 ) & (percent_clipped > 4 ):
533- max_clip = power_ac >= power_ac_quant
522+ max_clip = power_ac >= q_power_ac
534523 clipping_mask = clipping_mask | max_clip
535524 return clipping_mask
536525
@@ -656,15 +645,14 @@ def logic_clip_filter(
656645 # Set any values within the clipping max + clipping min threshold
657646 # as clipping. This is done specifically for capturing the noise
658647 # for high frequency data sets.
659-
660- # Ensure that time series with zeros and nan's return same result
661- lower = clip_pwr .fillna (0 ).quantile (0.99 ) / 1000
662- clip_pwr_no_nan = clip_pwr [clip_pwr > lower ]
663-
664- daily_mean = clip_pwr_no_nan .resample ("D" ).mean ()
648+ daily_mean = clip_pwr .resample ("D" ).mean ()
665649 df_daily = daily_mean .to_frame (name = "mean" )
666- df_daily ["clipping_max" ] = clip_pwr_no_nan .groupby (pd .Grouper (freq = "D" )).quantile (0.99 )
667- df_daily ["clipping_min" ] = clip_pwr_no_nan .groupby (pd .Grouper (freq = "D" )).quantile (0.075 )
650+ df_daily ["clipping_max" ] = clip_pwr .groupby (pd .Grouper (freq = "D" )).agg (
651+ utilities .robust_quantile , q = 0.99
652+ )
653+ df_daily ["clipping_min" ] = clip_pwr .groupby (pd .Grouper (freq = "D" )).agg (
654+ utilities .robust_quantile , q = 0.075
655+ )
668656 daily_clipping_max = df_daily ["clipping_max" ].reindex (
669657 index = power_ac_copy .index , method = "ffill"
670658 )
0 commit comments