Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 40 additions & 20 deletions src/tsam/timeseriesaggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -938,29 +938,49 @@ def _rescaleClusterPeriods(self, clusterOrder, clusterPeriods, extremeClusterIdx
if column in self.weightDict:
scale_ub = scale_ub * self.weightDict[column]

# difference between predicted and original sum
diff = abs(sum_raw - (sum_clu_wo_peak + sum_peak))

# use while loop to rescale cluster periods
# Bounded, sum-preserving rescale: one multiplicative shape-preserving
# step, then water-fill the residual proportional to remaining headroom
# (or depth when shrinking). Values stay strictly in [0, scale_ub], so
# the cluster's distribution top is not flattened against the cap —
# which is the condition that previously caused downstream
# _representMinMax to overshoot the input envelope.
target_np = sum_raw - sum_peak

# Shape-preserving initial shift (single application, not iterated)
if sum_clu_wo_peak > 0 and target_np > 0:
arr[idx_wo_peak, ci, :] *= target_np / sum_clu_wo_peak
# Advanced indexing returns a copy; reassign to write clipped values back
arr[idx_wo_peak, ci, :] = np.clip(arr[idx_wo_peak, ci, :], 0, scale_ub)
np.nan_to_num(arr[:, ci, :], copy=False, nan=0.0)

weights_np = weightingVec[idx_wo_peak]
a = 0
while diff > sum_raw * TOLERANCE and a < MAX_ITERATOR:
# rescale values (only non-extreme clusters)
arr[idx_wo_peak, ci, :] *= (sum_raw - sum_peak) / sum_clu_wo_peak

# reset values higher than the upper scale or less than zero
arr[:, ci, :] = np.clip(arr[:, ci, :], 0, scale_ub)

# Handle NaN (replace with 0)
np.nan_to_num(arr[:, ci, :], copy=False, nan=0.0)

# calc new sum and new diff to orig data
col_data = arr[:, ci, :]
sum_clu_wo_peak = np.sum(
weightingVec[idx_wo_peak] * col_data[idx_wo_peak, :].sum(axis=1)
)
diff = abs(sum_raw - (sum_clu_wo_peak + sum_peak))
while a < MAX_ITERATOR:
col_np = arr[idx_wo_peak, ci, :]
cur_sum_np = np.sum(weights_np * col_np.sum(axis=1))
delta = target_np - cur_sum_np
if abs(delta) <= max(abs(sum_raw), 1.0) * TOLERANCE:
break
if delta > 0:
available = scale_ub - col_np
else:
available = col_np # depth above lower bound 0
total = np.sum(weights_np * available.sum(axis=1))
if total <= TOLERANCE:
break
step = min(abs(delta), total)
direction = 1.0 if delta > 0 else -1.0
col_np += direction * step * available / total
# Guard against tiny numerical bound violations from the division
np.clip(col_np, 0, scale_ub, out=col_np)
arr[idx_wo_peak, ci, :] = col_np
a += 1

sum_clu_wo_peak = np.sum(
weights_np * arr[idx_wo_peak, ci, :].sum(axis=1)
)
diff = abs(sum_raw - (sum_clu_wo_peak + sum_peak))

# Calculate and store final deviation
deviation_pct = (diff / sum_raw) * 100 if sum_raw != 0 else 0.0
converged = a < MAX_ITERATOR
Expand Down
51 changes: 41 additions & 10 deletions src/tsam/utils/durationRepresentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,25 +189,56 @@ def _representMinMax(
# due to the change of the min and max values
# of the duration curve
delta_sum = delta_max * appearance_max + delta_min * appearance_min
mid_weights = np.asarray(
meansAndWeightsSorted[1].iloc[1:-1].values, dtype=float
)
mid_orig = np.asarray(representationValues[1:-1], dtype=float)
weighted_mid_sum = float(np.sum(mid_weights * mid_orig))
# and derive how much the other values have to be changed to preserve
# the mean of the duration curve
correction_factor = (
-delta_sum
/ (meansAndWeightsSorted[1].iloc[1:-1] * representationValues[1:-1]).sum()
)
correction_factor = -delta_sum / weighted_mid_sum if weighted_mid_sum != 0 else 0.0

if correction_factor < -1 or correction_factor > 1:
warnings.warn(
"The cluster is too small to preserve the sum of the duration curve and additionally the min and max values of the original cluster members. The min max values of the cluster are not preserved. This does not necessarily mean that the min and max values of the original time series are not preserved."
)
return representationValues

# correct the values of the duration curve such
# that the mean of the duration curve is preserved
# since the min and max values are changed
representationValues[1:-1] = np.multiply(
representationValues[1:-1], (1 + correction_factor)
)
# Initial multiplicative correction: preserves the weighted sum exactly
# and keeps zero-valued segments at zero (matching the distribution
# shape of the cluster). When no bound is violated, no further work
# is needed and this matches the legacy behaviour.
corrected = mid_orig * (1 + correction_factor)

# Clip to the cluster envelope and redistribute the mass that got
# clipped to segments that still have room. This makes the result
# envelope-safe while restoring sum preservation up to feasibility.
lower = float(sortedAttr.min())
upper = float(sortedAttr.max())
target_weighted_sum = weighted_mid_sum - delta_sum
tol = max(abs(target_weighted_sum), 1.0) * 1e-12
for _ in range(20):
np.clip(corrected, lower, upper, out=corrected)
residual = target_weighted_sum - float(np.sum(mid_weights * corrected))
if abs(residual) <= tol:
break
if residual > 0:
room = upper - corrected
else:
room = corrected - lower
weighted_room = mid_weights * room
total = float(weighted_room.sum())
if total <= tol:
# remaining mass has nowhere feasible to go
warnings.warn(
"The cluster is too small to preserve the sum of the duration curve and additionally the min and max values of the original cluster members. The min max values of the cluster are not preserved. This does not necessarily mean that the min and max values of the original time series are not preserved."
)
break
step = min(abs(residual), total)
direction = 1.0 if residual > 0 else -1.0
corrected = corrected + direction * step * room / total

representationValues[1:-1] = corrected

# change the values of the duration curve such that the min and max
# values are preserved
Expand Down
32 changes: 31 additions & 1 deletion test/test_durationRepresentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,37 @@ def test_distributionMinMaxRepresentation():
predictedPeriods.min(),
)

assert np.isclose(raw.mean(), predictedPeriods.mean(), atol=1e-4).all()
# distributionAndMinMaxRepresentation now enforces the per-cluster envelope
# strictly, so the mean can drift slightly in clusters where the old code
# would have produced values above the local max. The trade buys a hard
# guarantee that the aggregated series never exceeds the input envelope.
assert np.isclose(raw.mean(), predictedPeriods.mean(), rtol=5e-3).all()


@pytest.mark.filterwarnings("ignore:The cluster is too small:UserWarning")
def test_distributionMinMaxRepresentation_with_rescale():
raw = pd.read_csv(TESTDATA_CSV, index_col=0)

aggregation = tsam.TimeSeriesAggregation(
raw,
noTypicalPeriods=24,
segmentation=True,
noSegments=8,
hoursPerPeriod=24,
sortValues=False,
clusterMethod="hierarchical",
representationMethod="distributionAndMinMaxRepresentation",
distributionPeriodWise=False,
rescaleClusterPeriods=True,
extremePeriodMethod="None",
)

predictedPeriods = aggregation.predictOriginalData()

assert (predictedPeriods.max() <= raw.max() + 1e-10).all()
assert (predictedPeriods.min() >= raw.min() - 1e-10).all()

assert np.isclose(raw.sum(), predictedPeriods.sum(), rtol=5e-3).all()


def test_distributionRepresentation_keeps_mean():
Expand Down
Loading