Skip to content

Infer thresholds for detect_clearsky #1784

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 22 commits into from
Aug 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/sphinx/source/whatsnew/v0.10.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ Enhancements
:py:func:`pvlib.iotools.get_pvgis_hourly`, :py:func:`pvlib.iotools.get_cams`,
:py:func:`pvlib.iotools.get_bsrn`, and :py:func:`pvlib.iotools.read_midc_raw_data_from_nrel`.
(:pull:`1800`)
* Added option to infer threshold values for
:py:func:`pvlib.clearsky.detect_clearsky` (:issue:`1808`, :pull:`1784`)


Bug fixes
Expand All @@ -39,4 +41,5 @@ Requirements
Contributors
~~~~~~~~~~~~
* Adam R. Jensen (:ghuser:`AdamRJensen`)
* Abigail Jones (:ghuser:`ajonesr`)
* Taos Transue (:ghuser:`reepoi`)
72 changes: 69 additions & 3 deletions pvlib/clearsky.py
Original file line number Diff line number Diff line change
Expand Up @@ -644,8 +644,38 @@ def _clear_sample_index(clear_windows, samples_per_window, align, H):
return clear_samples


def detect_clearsky(measured, clearsky, times=None, window_length=10,
mean_diff=75, max_diff=75,
def _clearsky_get_threshold(sample_interval):
"""
Returns threshold values for kwargs in detect_clearsky. See
Table 1 in [1].

References
----------
.. [1] Jordan, D.C. and C. Hansen, "Clear-sky detection for PV
degradation analysis using multiple regression", Renewable Energy,
v209, p. 393-400, 2023.
"""

if (sample_interval < 1 or sample_interval > 30):
raise ValueError("`infer_limits=True` can only be used for inputs \
with time step from 1 to 30 minutes")

data_freq = np.array([1, 5, 15, 30])

window_length = np.interp(sample_interval, data_freq, [50, 60, 90, 120])
mean_diff = np.interp(sample_interval, data_freq, [75, 75, 75, 75])
max_diff = np.interp(sample_interval, data_freq, [60, 65, 75, 90])
lower_line_length = np.interp(sample_interval, data_freq, [-45,-45,-45,-45])
upper_line_length = np.interp(sample_interval, data_freq, [80, 80, 80, 80])
var_diff = np.interp(sample_interval, data_freq, [0.005, 0.01, 0.032, 0.07])
slope_dev = np.interp(sample_interval, data_freq, [50, 60, 75, 96])

return (window_length, mean_diff, max_diff, lower_line_length,
upper_line_length, var_diff, slope_dev)


def detect_clearsky(measured, clearsky, times=None, infer_limits=False,
window_length=10, mean_diff=75, max_diff=75,
lower_line_length=-5, upper_line_length=10,
var_diff=0.005, slope_dev=8, max_iterations=20,
return_components=False):
Expand Down Expand Up @@ -676,6 +706,9 @@ def detect_clearsky(measured, clearsky, times=None, window_length=10,
times : DatetimeIndex or None, default None.
Times of measured and clearsky values. If None the index of measured
will be used.
infer_limits : bool, default False
If True, does not use passed in kwargs (or defaults), but instead
interpolates these values from Table 1 in [2]_.
window_length : int, default 10
Length of sliding time window in minutes. Must be greater than 2
periods.
Expand Down Expand Up @@ -731,6 +764,9 @@ def detect_clearsky(measured, clearsky, times=None, window_length=10,
.. [1] Reno, M.J. and C.W. Hansen, "Identification of periods of clear
sky irradiance in time series of GHI measurements" Renewable Energy,
v90, p. 520-531, 2016.
.. [2] Jordan, D.C. and C. Hansen, "Clear-sky detection for PV
degradation analysis using multiple regression", Renewable Energy,
v209, p. 393-400, 2023. :doi:`10.1016/j.renene.2023.04.035`

Notes
-----
Expand Down Expand Up @@ -773,6 +809,21 @@ def detect_clearsky(measured, clearsky, times=None, window_length=10,
sample_interval, samples_per_window = \
tools._get_sample_intervals(times, window_length)

# if infer_limits, find threshold values using the sample interval
if infer_limits:
window_length, mean_diff, max_diff, lower_line_length, \
upper_line_length, var_diff, slope_dev = \
_clearsky_get_threshold(sample_interval)

# recalculate samples_per_window using returned window_length
_, samples_per_window = \
tools._get_sample_intervals(times, window_length)

# check that we have enough data to produce a nonempty hankel matrix
if len(times) < samples_per_window:
raise ValueError(f"times has only {len(times)} entries, but it must \
have at least {samples_per_window} entries")

# generate matrix of integers for creating windows with indexing
H = hankel(np.arange(samples_per_window),
np.arange(samples_per_window-1, len(times)))
Expand Down Expand Up @@ -826,7 +877,22 @@ def detect_clearsky(measured, clearsky, times=None, window_length=10,
def rmse(alpha):
return np.sqrt(np.mean((clear_meas - alpha*clear_clear)**2))

alpha = minimize_scalar(rmse).x
optimize_result = minimize_scalar(rmse)
if not optimize_result.success:
try:
message = "Optimizer exited unsuccessfully: " \
+ optimize_result.message
except AttributeError:
message = "Optimizer exited unsuccessfully: \
No message explaining the failure was returned. \
If you would like to see this message, please \
update your scipy version (try version 1.8.0 \
or beyond)."
raise RuntimeError(message)

else:
alpha = optimize_result.x

if round(alpha*10000) == round(previous_alpha*10000):
break
else:
Expand Down
126 changes: 126 additions & 0 deletions pvlib/data/detect_clearsky_threshold_data.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
# latitude:35.04
# longitude:-106.62
# elevation:1619
# window_length:10
Time (UTC),GHI,Clear or not
4/1/2012 17:30,862.0935267144818,1
4/1/2012 17:31,863.9568654037691,1
4/1/2012 17:32,865.802767695237,1
4/1/2012 17:33,867.6311939211416,1
4/1/2012 17:34,869.4421084958154,1
4/1/2012 17:35,871.2354749580768,1
4/1/2012 17:36,873.0112572200139,1
4/1/2012 17:37,874.7694195555663,1
4/1/2012 17:38,876.5099266138993,1
4/1/2012 17:39,878.2327434081394,1
4/1/2012 17:40,879.9378353205582,1
4/1/2012 17:41,881.6251681074431,1
4/1/2012 17:42,500.0,0
4/1/2012 17:43,300.0,0
4/1/2012 17:44,400.0,0
4/1/2012 17:45,888.1962370575133,1
4/1/2012 17:46,889.7942734332919,1
4/1/2012 17:47,891.3743529660121,1
4/1/2012 17:48,892.9364440027008,1
4/1/2012 17:49,894.4805152569894,1
4/1/2012 17:50,896.0065358138269,1
4/1/2012 17:51,897.514475133874,1
4/1/2012 17:52,899.0043030438522,1
4/1/2012 17:53,900.4759897479845,1
4/1/2012 17:54,901.9295058184756,1
4/1/2012 17:55,903.3648231563722,1
4/1/2012 17:56,950.0,0
4/1/2012 17:57,906.1807424805852,1
4/1/2012 17:58,907.5612891914101,1
4/1/2012 17:59,908.9235237269177,1
4/1/2012 18:00,910.2674188971979,1
4/1/2012 18:01,911.592947889838,1
4/1/2012 18:02,912.9000842614388,1
4/1/2012 18:03,914.1888019477013,1
4/1/2012 18:04,915.4590752551169,1
4/1/2012 18:05,916.7108788648916,1
4/1/2012 18:06,917.9441886574208,1
4/1/2012 18:07,919.1589784086842,1
4/1/2012 18:08,920.3552247618425,1
4/1/2012 18:09,921.5329038989687,1
4/1/2012 18:10,922.691992377965,1
4/1/2012 18:11,923.8324671359268,1
4/1/2012 18:12,924.9543054818693,1
4/1/2012 18:13,926.057485105408,1
4/1/2012 18:14,927.141984069634,1
4/1/2012 18:15,928.2077808145282,1
4/1/2012 18:16,929.254854160055,1
4/1/2012 18:17,930.2831839827653,1
4/1/2012 18:18,931.2927484780441,1
4/1/2012 18:19,932.2835282910601,1
4/1/2012 18:20,933.2555037490038,1
4/1/2012 18:21,934.2086555597849,1
4/1/2012 18:22,935.1429648059466,1
4/1/2012 18:23,936.058412951919,1
4/1/2012 18:24,936.9549818381174,1
4/1/2012 18:25,937.8326536837798,1
4/1/2012 18:26,938.6914110895166,1
4/1/2012 18:27,939.5312370318452,1
4/1/2012 18:28,940.3521148697101,1
4/1/2012 18:29,941.1540288705581,1
4/1/2012 18:30,941.9369620746523,1
4/1/2012 18:31,942.7008995238247,1
4/1/2012 18:32,943.4458260928685,1
4/1/2012 18:33,944.1717270394992,1
4/1/2012 18:34,944.8785879996012,1
4/1/2012 18:35,945.5663949895419,1
4/1/2012 18:36,946.2351344081491,1
4/1/2012 18:37,946.8847930330305,1
4/1/2012 18:38,947.5153580226918,1
4/1/2012 18:39,948.126816918376,1
4/1/2012 18:40,948.7191580309192,1
4/1/2012 18:41,949.2923688693852,1
4/1/2012 18:42,949.8464385206038,1
4/1/2012 18:43,950.3813560493355,1
4/1/2012 18:44,950.8971109033968,1
4/1/2012 18:45,951.3936929103834,1
4/1/2012 18:46,951.8710922815745,1
4/1/2012 18:47,952.3292996087865,1
4/1/2012 18:48,952.7683058659376,1
4/1/2012 18:49,953.1881024102506,1
4/1/2012 18:50,953.5886809795984,1
4/1/2012 18:51,953.9700339452911,1
4/1/2012 18:52,954.3321532981175,1
4/1/2012 18:53,954.6750321861093,1
4/1/2012 18:54,954.9986638782349,1
4/1/2012 18:55,955.3030420248847,1
4/1/2012 18:56,955.5881606602495,1
4/1/2012 18:57,955.8540142004646,1
4/1/2012 18:58,956.1005974445337,1
4/1/2012 18:59,956.3279055749699,1
4/1/2012 19:00,956.5359341563872,1
4/1/2012 19:01,956.7246791371283,1
4/1/2012 19:02,956.8941369551687,1
4/1/2012 19:03,957.0443040971436,1
4/1/2012 19:04,957.175177780536,1
4/1/2012 19:05,957.2867554853715,1
4/1/2012 19:06,957.3790350752402,1
4/1/2012 19:07,957.4520147966049,1
4/1/2012 19:08,957.5056932791584,1
4/1/2012 19:09,957.5400695359402,1
4/1/2012 19:10,957.5551429630466,1
4/1/2012 19:11,957.5509133398784,1
4/1/2012 19:12,957.527380828979,1
4/1/2012 19:13,957.4845459762313,1
4/1/2012 19:14,957.4224096624041,1
4/1/2012 19:15,957.3409732831614,1
4/1/2012 19:16,957.2402384985319,1
4/1/2012 19:17,957.1202073871192,1
4/1/2012 19:18,956.9808824107419,1
4/1/2012 19:19,956.8222664139458,1
4/1/2012 19:20,956.6443626248492,1
4/1/2012 19:21,956.4471746540574,1
4/1/2012 19:22,956.2307064955613,1
4/1/2012 19:23,955.9949625264852,1
4/1/2012 19:24,955.7399475061342,1
4/1/2012 19:25,955.4656663871413,1
4/1/2012 19:26,955.1721250622959,1
4/1/2012 19:27,954.8593292624192,1
4/1/2012 19:28,954.5272852786308,1
4/1/2012 19:29,954.175999783701,1
4/1/2012 19:30,953.8054798341012,1
55 changes: 55 additions & 0 deletions pvlib/tests/test_clearsky.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,49 @@ def detect_clearsky_data():
return expected, cs


@pytest.fixture
def detect_clearsky_threshold_data():
# this is (roughly) just a 2 hour period of the same data in
# detect_clearsky_data (which only spans 30 minutes)
data_file = DATA_DIR / 'detect_clearsky_threshold_data.csv'
expected = pd.read_csv(
data_file, index_col=0, parse_dates=True, comment='#')
expected = expected.tz_localize('UTC').tz_convert('Etc/GMT+7')
metadata = {}
with data_file.open() as f:
for line in f:
if line.startswith('#'):
key, value = line.strip('# \n').split(':')
metadata[key] = float(value)
else:
break
metadata['window_length'] = int(metadata['window_length'])
loc = Location(metadata['latitude'], metadata['longitude'],
altitude=metadata['elevation'])
# specify turbidity to guard against future lookup changes
cs = loc.get_clearsky(expected.index, linke_turbidity=2.658197)
return expected, cs


def test_clearsky_get_threshold():
out = clearsky._clearsky_get_threshold(4.5)
expected = (58.75, 75, 64.375, -45, 80.0, 0.009375, 58.75)
assert np.allclose(out, expected)


def test_clearsky_get_threshold_raises_error():
with pytest.raises(ValueError, match='can only be used for inputs'):
clearsky._clearsky_get_threshold(0.5)


def test_detect_clearsky_calls_threshold(mocker, detect_clearsky_threshold_data):
threshold_spy = mocker.spy(clearsky, '_clearsky_get_threshold')
expected, cs = detect_clearsky_threshold_data
threshold_actual = clearsky.detect_clearsky(expected['GHI'], cs['ghi'],
infer_limits=True)
assert threshold_spy.call_count == 1


def test_detect_clearsky(detect_clearsky_data):
expected, cs = detect_clearsky_data
clear_samples = clearsky.detect_clearsky(
Expand Down Expand Up @@ -629,6 +672,18 @@ def test_detect_clearsky_missing_index(detect_clearsky_data):
clearsky.detect_clearsky(expected['GHI'].values, cs['ghi'].values)


def test_detect_clearsky_not_enough_data(detect_clearsky_data):
expected, cs = detect_clearsky_data
with pytest.raises(ValueError, match='have at least'):
clearsky.detect_clearsky(expected['GHI'], cs['ghi'], window_length=60)


def test_detect_clearsky_optimizer_failed(detect_clearsky_data):
expected, cs = detect_clearsky_data
with pytest.raises(RuntimeError, match='Optimizer exited unsuccessfully'):
clearsky.detect_clearsky(expected['GHI'], cs['ghi'], window_length=15)


@pytest.fixture
def detect_clearsky_helper_data():
samples_per_window = 3
Expand Down