Skip to content

Commit 9903aaa

Browse files
authored
Merge pull request #442 from NREL/fix_issue_#313
Fix issue #313 (different results with Nan's and Zeros in power series)
2 parents c59708a + 2338d92 commit 9903aaa

File tree

10 files changed

+205
-15
lines changed

10 files changed

+205
-15
lines changed

.github/workflows/nbval.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ jobs:
2929
- name: Run notebook and check output
3030
run: |
3131
# --sanitize-with: pre-process text to remove irrelevant differences (e.g. warning filepaths)
32-
pytest --nbval --sanitize-with docs/nbval_sanitization_rules.cfg docs/${{ matrix.notebook-file }}
32+
pytest --nbval docs/${{ matrix.notebook-file }} --sanitize-with docs/nbval_sanitization_rules.cfg
3333
- name: Run notebooks again, save files
3434
run: |
3535
pip install nbconvert[webpdf]

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ docs/sphinx/source/generated
2727
.eggs/
2828
build/
2929
dist/
30+
tmp/
3031
rdtools.egg-info*
3132

3233
# emacs temp files

docs/sphinx/source/changelog/pending.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@ Bug fixes
1212
---------
1313
* Set marker linewidth to zero in `rdtools.plotting.degradation_summary_plots` (:pull:`433`)
1414
* Fix :py:func:`~rdtools.normalization.energy_from_power` returns incorrect index for shifted hourly data (:issue:`370`, :pull:`437`)
15-
* Add warning to clearsky workflow when power_expected is passed by user (:pull:`439`)
15+
* Add warning to clearsky workflow when ``power_expected`` is passed by user (:pull:`439`)
16+
* Fix different results with Nan's and Zeros in power series (:issue:`313`, :pull:`442`)
1617

1718

1819
Requirements

rdtools/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@
3636
# from rdtools.plotting import soiling_rate_histogram
3737
# from rdtools.plotting import availability_summary_plots
3838
# from rdtools.availability import AvailabilityAnalysis
39+
from rdtools.utilities import robust_quantile
40+
from rdtools.utilities import robust_median
41+
from rdtools.utilities import robust_mean
3942

4043
from . import _version
4144
__version__ = _version.get_versions()['version']

rdtools/analysis_chains.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import numpy as np
99
import matplotlib.pyplot as plt
1010
from rdtools import normalization, filtering, aggregation, degradation
11-
from rdtools import clearsky_temperature, plotting
11+
from rdtools import clearsky_temperature, plotting, utilities
1212
import warnings
1313

1414

@@ -493,8 +493,9 @@ def _pvwatts_norm(self, poa_global, temperature_cell):
493493
if renorm:
494494
# Normalize to the 95th percentile for convenience, this is renormalized out
495495
# in the calculations but is relevant to normalized_filter()
496-
x = energy_normalized[np.isfinite(energy_normalized)]
497-
energy_normalized = energy_normalized / x.quantile(0.95)
496+
q = utilities.robust_quantile(energy_normalized[np.isfinite(energy_normalized)], 0.95)
497+
498+
energy_normalized = energy_normalized / q
498499

499500
return energy_normalized, insolation
500501

@@ -1012,7 +1013,6 @@ def sensor_analysis(
10121013
-------
10131014
None
10141015
"""
1015-
10161016
self._sensor_preprocess()
10171017
sensor_results = {}
10181018

rdtools/degradation.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import statsmodels.api as sm
66
from rdtools.bootstrap import _make_time_series_bootstrap_samples, \
77
_construct_confidence_intervals
8+
from rdtools import utilities
89

910

1011
def degradation_ols(energy_normalized, confidence_level=68.2):
@@ -259,7 +260,7 @@ def degradation_year_on_year(energy_normalized, recenter=True,
259260
if recenter:
260261
start = energy_normalized.index[0]
261262
oneyear = start + pd.Timedelta('364d')
262-
renorm = energy_normalized[start:oneyear].median()
263+
renorm = utilities.robust_median(energy_normalized[start:oneyear])
263264
else:
264265
renorm = 1.0
265266

rdtools/filtering.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from scipy.interpolate import interp1d
99
import rdtools
1010
import xgboost as xgb
11+
from rdtools import utilities
1112

1213
# Load in the XGBoost clipping model using joblib.
1314
xgboost_clipping_model = None
@@ -294,7 +295,7 @@ def pvlib_clearsky_filter(
294295
def clip_filter(power_ac, model="logic", **kwargs):
295296
"""
296297
Master wrapper for running one of the desired clipping filters.
297-
The default filter run is the quantile clipping filter.
298+
The default filter run is the logic clipping filter.
298299
299300
Parameters
300301
----------
@@ -350,7 +351,7 @@ def quantile_clip_filter(power_ac, quantile=0.98):
350351
Boolean Series of whether the given measurement is below 99% of the
351352
quantile filter.
352353
"""
353-
v = power_ac.quantile(quantile)
354+
v = utilities.robust_quantile(power_ac, quantile)
354355
return power_ac < v * 0.99
355356

356357

@@ -510,13 +511,15 @@ def _apply_overall_clipping_threshold(power_ac, clipping_mask, clipped_power_ac)
510511
periods are labeled as True and non-clipping periods are
511512
labeled as False. Has a pandas datetime index.
512513
"""
514+
q_power_ac = utilities.robust_quantile(power_ac, 0.99)
515+
q_clipped_power_ac = utilities.robust_quantile(clipped_power_ac, 0.99)
516+
513517
upper_bound_pdiff = abs(
514-
(power_ac.quantile(0.99) - clipped_power_ac.quantile(0.99))
515-
/ ((power_ac.quantile(0.99) + clipped_power_ac.quantile(0.99)) / 2)
518+
(q_power_ac - q_clipped_power_ac) / ((q_power_ac + q_clipped_power_ac) / 2)
516519
)
517520
percent_clipped = len(clipped_power_ac) / len(power_ac) * 100
518521
if (upper_bound_pdiff < 0.005) & (percent_clipped > 4):
519-
max_clip = power_ac >= power_ac.quantile(0.99)
522+
max_clip = power_ac >= q_power_ac
520523
clipping_mask = clipping_mask | max_clip
521524
return clipping_mask
522525

@@ -644,9 +647,11 @@ def logic_clip_filter(
644647
# for high frequency data sets.
645648
daily_mean = clip_pwr.resample("D").mean()
646649
df_daily = daily_mean.to_frame(name="mean")
647-
df_daily["clipping_max"] = clip_pwr.groupby(pd.Grouper(freq="D")).quantile(0.99)
648-
df_daily["clipping_min"] = clip_pwr.groupby(pd.Grouper(freq="D")).quantile(
649-
0.075
650+
df_daily["clipping_max"] = clip_pwr.groupby(pd.Grouper(freq="D")).agg(
651+
utilities.robust_quantile, q=0.99
652+
)
653+
df_daily["clipping_min"] = clip_pwr.groupby(pd.Grouper(freq="D")).agg(
654+
utilities.robust_quantile, q=0.075
650655
)
651656
daily_clipping_max = df_daily["clipping_max"].reindex(
652657
index=power_ac_copy.index, method="ffill"

rdtools/test/analysis_chains_test.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,48 @@ def sensor_analysis(sensor_parameters):
6868
return rd_analysis
6969

7070

71+
@pytest.fixture
72+
def sensor_analysis_nans(sensor_parameters):
73+
def randomly_replace_with(series, replace_with=0, fraction=0.1, seed=None):
74+
"""
75+
Randomly replace a fraction of entries in a pandas Series with input value `replace_with`.
76+
77+
Parameters:
78+
series (pd.Series): The input pandas Series.
79+
fraction (float): The fraction of entries to replace with 0. Default is 0.1 (10%).
80+
seed (int, optional): Seed for the random number generator for reproducibility.
81+
82+
Returns:
83+
pd.Series: The modified pandas Series with some entries replaced by 0.
84+
"""
85+
if seed is not None:
86+
np.random.seed(seed)
87+
88+
# Determine the number of entries to replace
89+
n_replace = int(len(series) * fraction)
90+
91+
# Randomly select indices to replace
92+
replace_indices = np.random.choice(series.index, size=n_replace, replace=False)
93+
94+
# Replace selected entries with
95+
series.loc[replace_indices] = replace_with
96+
97+
return series
98+
99+
sensor_parameters_zeros = sensor_parameters.copy()
100+
sensor_parameters_nans = sensor_parameters.copy()
101+
102+
sensor_parameters_zeros["pv"] = randomly_replace_with(sensor_parameters["pv"], seed=0)
103+
sensor_parameters_nans["pv"] = sensor_parameters_zeros["pv"].replace(0, np.nan)
104+
105+
rd_analysis_zeros = TrendAnalysis(**sensor_parameters_zeros)
106+
rd_analysis_zeros.sensor_analysis(analyses=["yoy_degradation"])
107+
108+
rd_analysis_nans = TrendAnalysis(**sensor_parameters_nans)
109+
rd_analysis_nans.sensor_analysis(analyses=["yoy_degradation"])
110+
return rd_analysis_zeros, rd_analysis_nans
111+
112+
71113
@pytest.fixture
72114
def sensor_analysis_exp_power(sensor_parameters):
73115
power_expected = normalization.pvwatts_dc_power(
@@ -209,6 +251,21 @@ def test_sensor_analysis(sensor_analysis):
209251
assert [-1, -1] == pytest.approx(ci, abs=1e-2)
210252

211253

254+
def test_sensor_analysis_nans(sensor_analysis_nans):
255+
rd_analysis_zeros, rd_analysis_nans = sensor_analysis_nans
256+
257+
yoy_results_zeros = rd_analysis_zeros.results["sensor"]["yoy_degradation"]
258+
rd_zeros = yoy_results_zeros["p50_rd"]
259+
ci_zeros = yoy_results_zeros["rd_confidence_interval"]
260+
261+
yoy_results_nans = rd_analysis_nans.results["sensor"]["yoy_degradation"]
262+
rd_nans = yoy_results_nans["p50_rd"]
263+
ci_nans = yoy_results_nans["rd_confidence_interval"]
264+
265+
assert rd_zeros == pytest.approx(rd_nans, abs=1e-2)
266+
assert ci_zeros == pytest.approx(ci_nans, abs=1e-1)
267+
268+
212269
def test_sensor_analysis_filter_components(sensor_analysis):
213270
columns = sensor_analysis.sensor_filter_components_aggregated.columns
214271
assert {'two_way_window_filter'} == set(columns)

rdtools/test/utilities_test.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import pandas as pd
2+
import numpy as np
3+
import pytest
4+
from rdtools.utilities import robust_quantile, robust_median, robust_mean
5+
6+
7+
@pytest.fixture
8+
def data():
9+
data_zeros = pd.Series([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
10+
data_nan = pd.Series([np.nan, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
11+
return data_zeros, data_nan
12+
13+
14+
def test_robust_quantile(data):
15+
data_zeros, data_nan = data
16+
quantile = 0.5
17+
expected_result = 5.5
18+
assert expected_result == robust_quantile(data_zeros, quantile)
19+
assert expected_result == robust_quantile(data_nan, quantile)
20+
21+
quantile = 0.25
22+
expected_result = 3.25
23+
assert expected_result == robust_quantile(data_zeros, quantile)
24+
assert expected_result == robust_quantile(data_nan, quantile)
25+
26+
quantile = 0.75
27+
expected_result = 7.75
28+
assert expected_result == robust_quantile(data_zeros, quantile)
29+
assert expected_result == robust_quantile(data_nan, quantile)
30+
31+
32+
def test_robust_median(data):
33+
data_zeros, data_nan = data
34+
expected_result = 5.5
35+
assert expected_result == robust_median(data_zeros)
36+
assert expected_result == robust_median(data_nan)
37+
38+
39+
def test_robust_mean(data):
40+
data_zeros, data_nan = data
41+
expected_result = 5.5
42+
assert expected_result == robust_mean(data_zeros)
43+
assert expected_result == robust_mean(data_nan)

rdtools/utilities.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
"""Utility functions for rdtools."""
2+
3+
4+
def robust_quantile(x, q):
5+
"""
6+
Compute the q-th quantile of a time series (x), ignoring small values and NaN's.
7+
NaN's and small values [x < Q(x,q)/1000] are removed before calculating the quantile.
8+
This function ensures that time series with NaN's and distributions without
9+
NaN's return the same results. Should only be used if x is expected to be ≥0.
10+
11+
Parameters
12+
----------
13+
x : pandas.Series
14+
Input time series.
15+
q : float
16+
Probability value.
17+
18+
Returns
19+
-------
20+
quantile : float
21+
The q-th quantile of x, ignoring small values and NaN's.
22+
"""
23+
24+
small = x.astype(float).fillna(0).quantile(q) / 1000
25+
q = x[x > small].quantile(q)
26+
27+
return q
28+
29+
30+
def robust_median(x, q=0.99):
31+
"""
32+
Compute the median of a time series (x), ignoring small values and NaN's.
33+
NaN's and small values [Q(x,q)/1000] are removed before calculating the mean.
34+
This function ensures that time series with NaN's and distributions without
35+
NaN's return the same results. Should only be used if x is expected to be ≥0.
36+
37+
Parameters
38+
----------
39+
x : pandas.Series
40+
Input time series.
41+
q : float, default 0.99
42+
Probability value to use for the small values threshold calculation [Q(x,q)/1000].
43+
44+
Returns
45+
-------
46+
quantile : float
47+
The q-th quantile of x, ignoring small values and NaN's.
48+
"""
49+
50+
small = x.astype(float).fillna(0).quantile(q) / 1000
51+
mdn = x[x > small].median()
52+
53+
return mdn
54+
55+
56+
def robust_mean(x, q=0.99):
57+
"""
58+
Compute the mean of a time series (x), ignoring small values and NaN's.
59+
NaN's and small values [x < Q(x,q)/1000] are removed before calculating the mean.
60+
This function ensures that time series with NaN's and distributions without
61+
NaN's return the same results. Should only be used if x is expected to be ≥0.
62+
63+
Parameters
64+
----------
65+
x : pandas.Series
66+
Input time series.
67+
q : float, default 0.99
68+
Probability value to use for the small values threshold calculation.
69+
70+
Returns
71+
-------
72+
quantile : float
73+
The q-th quantile of x, ignoring small values and NaN's.
74+
"""
75+
76+
small = x.astype(float).fillna(0).quantile(q) / 1000
77+
m = x[x > small].mean()
78+
79+
return m

0 commit comments

Comments
 (0)