Split forecast_all, improve compute_rmse_for_terms

isaacaka · isaacaka · commit ec90c2bd47d4 · 2026-03-16T18:16:21.000Z
Remove training-window concatenation from forecast_all so it returns raw
forecast output only; move the concat to the notebook call site
Parameterise `compute_rmse_for_terms` with optional `n_components`
(defaults to all fitted components, preserving existing behaviour)
Update related docstrings in `pipeline_utils.py` and
`plotting_utils.py`
diff --git a/Notebooks/Jumper.ipynb b/Notebooks/Jumper.ipynb
@@ -376,8 +376,8 @@
     "- `load_ts_all(prepared_path, TERM_SPECS)` loads prepared component time series.\n",
     "- `build_predictions(...)` creates one `Predictions` object per term.\n",
     "- `Predictions.forecast_single_series(...)` is used as a single-component demo.\n",
-    "- `forecast_all(...)` runs multi-component forecasts for all terms and prepends the training window.\n",
-    "- `Simulation.reconstruct(...)` maps predicted components back to physical space.\n"
+    "- `forecast_all(...)` runs multi-component forecasts for all terms; the training window is prepended in the notebook.\n",
+    "- `Simulation.reconstruct(...)` maps predicted components back to physical space."
    ]
   },
   {
@@ -445,8 +445,10 @@
    "outputs": [],
    "source": [
     "hats, hat_stds, metrics_all = forecast_all(\n",
-    "    TERM_SPECS, preds, dfs, train_len=train_len, steps=steps\n",
-    ")"
+    "    TERM_SPECS, preds, train_len=train_len, steps=steps\n",
+    ")\n",
+    "# Prepend the training window to each forecast for downstream reconstruction.\n",
+    "hats = {k: pd.concat([dfs[k][:train_len], h]) for k, h in hats.items()}"
    ]
   },
   {
diff --git a/src/nemo_spinup_forecast/pipeline_utils.py b/src/nemo_spinup_forecast/pipeline_utils.py
@@ -122,6 +122,7 @@ def decompose_all(sims: Mapping[str, Simulation]) -> None:
 def compute_rmse_for_terms(
     specs: Sequence[TermSpec],
     sims: Mapping[str, Simulation],
+    n_components: int | None = None,
 ) -> tuple[dict[str, Any], dict[str, Any], dict[str, Any]]:
     """Compute reconstruction error outputs for each configured term.
 
@@ -131,24 +132,22 @@ def compute_rmse_for_terms(
         Term specifications defining the processing order and output keys.
     sims : Mapping[str, Simulation]
         Prepared and decomposed simulations keyed by :attr:`TermSpec.key`.
+    n_components : int or None, default=None
+        Number of components to use for reconstruction.  When ``None``,
+        all fitted components (``len(s.pca.components_)``) are used.
 
     Returns
     -------
     tuple[dict[str, Any], dict[str, Any], dict[str, Any]]
         Tuple ``(recs, rmseVs, rmseMs)`` where each dictionary is keyed by
         :attr:`TermSpec.key`.
-
-    Notes
-    -----
-    Uses all fitted components via ``len(s.pca.components_)`` before calling
-    :meth:`Simulation.error`.
     """
     recs: dict[str, Any] = {}
     rmseVs: dict[str, Any] = {}
     rmseMs: dict[str, Any] = {}
     for spec in specs:
         s = sims[spec.key]
-        n = len(s.pca.components_)
+        n = n_components if n_components is not None else len(s.pca.components_)
         rec, rmseV, rmseM = s.error(n)
         recs[spec.key] = rec
         rmseVs[spec.key] = rmseV
@@ -255,21 +254,18 @@ def build_predictions(
 def forecast_all(
     specs: Sequence[TermSpec],
     preds: Mapping[str, Predictions],
-    dfs: Mapping[str, pd.DataFrame],
     *,
     train_len: int,
     steps: int,
 ) -> tuple[dict[str, pd.DataFrame], dict[str, Any], dict[str, Any]]:
-    """Forecasts for all terms and return outputs by key.
+    """Run parallel forecasts for all terms and return raw outputs by key.
 
     Parameters
     ----------
     specs : Sequence[TermSpec]
         Term specifications defining processing order and output keys.
     preds : Mapping[str, Predictions]
         Prediction objects keyed by :attr:`TermSpec.key`.
-    dfs : Mapping[str, pd.DataFrame]
-        Original component time-series DataFrames keyed by term.
     train_len : int
         Number of initial rows used as the training window.
     steps : int
@@ -279,21 +275,13 @@ def forecast_all(
     -------
     tuple[dict[str, pd.DataFrame], dict[str, Any], dict[str, Any]]
         Tuple ``(hats, hat_stds, metrics)`` keyed by :attr:`TermSpec.key`.
-
-    Notes
-    -----
-    For each term, the function prepends ``dfs[key][:train_len]`` to the
-    forecast output from
-    :meth:`~nemo_spinup_forecast.forecast.Predictions.parallel_forecast`.
+        ``hats`` contains the raw forecast output (forecast period only).
     """
     hats: dict[str, pd.DataFrame] = {}
     hat_stds: dict[str, Any] = {}
     metrics: dict[str, Any] = {}
     for spec in specs:
-        # Forecast each time series component for each property
         hat, hat_std, m = preds[spec.key].parallel_forecast(train_len, steps)
-        # Concatenate the forecasted time series period with the reference traning period
-        hat = pd.concat([dfs[spec.key][:train_len], hat[:]])
         hats[spec.key] = hat
         hat_stds[spec.key] = hat_std
         metrics[spec.key] = m
diff --git a/src/nemo_spinup_forecast/plotting_utils.py b/src/nemo_spinup_forecast/plotting_utils.py
@@ -411,8 +411,6 @@ def plot_component_timeseries(
         Length for the x-axis.
     train_len : int
         Number of initial time steps that belong to the training window.
-        Must match the value used in :func:`forecast_all` to concatenate
-        training data with the forecast.
     steps_per_year : int, optional
         Number of time steps per year, used to set x-axis tick spacing.
         Default is 1 (yearly data).

Original file line number	Diff line number	Diff line change
`@@ -376,8 +376,8 @@`
`376`	`376`	"- `load_ts_all(prepared_path, TERM_SPECS)` loads prepared component time series.\n",
`377`	`377`	"- `build_predictions(...)` creates one `Predictions` object per term.\n",
`378`	`378`	"- `Predictions.forecast_single_series(...)` is used as a single-component demo.\n",
`379`		- "- `forecast_all(...)` runs multi-component forecasts for all terms and prepends the training window.\n",
`380`		- "- `Simulation.reconstruct(...)` maps predicted components back to physical space.\n"
	`379`	+ "- `forecast_all(...)` runs multi-component forecasts for all terms; the training window is prepended in the notebook.\n",
	`380`	+ "- `Simulation.reconstruct(...)` maps predicted components back to physical space."
`381`	`381`	`]`
`382`	`382`	`},`
`383`	`383`	`{`
`@@ -445,8 +445,10 @@`
`445`	`445`	`"outputs": [],`
`446`	`446`	`"source": [`
`447`	`447`	`"hats, hat_stds, metrics_all = forecast_all(\n",`
`448`		`- " TERM_SPECS, preds, dfs, train_len=train_len, steps=steps\n",`
`449`		`- ")"`
	`448`	`+ " TERM_SPECS, preds, train_len=train_len, steps=steps\n",`
	`449`	`+ ")\n",`
	`450`	`+ "# Prepend the training window to each forecast for downstream reconstruction.\n",`
	`451`	`+ "hats = {k: pd.concat([dfs[k][:train_len], h]) for k, h in hats.items()}"`
`450`	`452`	`]`
`451`	`453`	`},`
`452`	`454`	`{`