pymc-devs
diff --git a/‎.github/workflows/test.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/test.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎conda-envs/environment-test.yml‎
Lines changed: 3 additions & 3 deletions b/‎conda-envs/environment-test.yml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎pymc_extras/inference/laplace_approx/find_map.py‎
Lines changed: 14 additions & 9 deletions b/‎pymc_extras/inference/laplace_approx/find_map.py‎
Lines changed: 14 additions & 9 deletions
diff --git a/‎pymc_extras/inference/laplace_approx/laplace.py‎
Lines changed: 22 additions & 5 deletions b/‎pymc_extras/inference/laplace_approx/laplace.py‎
Lines changed: 22 additions & 5 deletions
diff --git a/‎pymc_extras/inference/pathfinder/pathfinder.py‎
Lines changed: 2 additions & 5 deletions b/‎pymc_extras/inference/pathfinder/pathfinder.py‎
Lines changed: 2 additions & 5 deletions
diff --git a/‎pymc_extras/model/marginal/graph_analysis.py‎
Lines changed: 2 additions & 2 deletions b/‎pymc_extras/model/marginal/graph_analysis.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎pymc_extras/statespace/core/compile.py‎
Lines changed: 1 addition & 1 deletion b/‎pymc_extras/statespace/core/compile.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pymc_extras/statespace/filters/kalman_filter.py‎
Lines changed: 2 additions & 2 deletions b/‎pymc_extras/statespace/filters/kalman_filter.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎pymc_extras/statespace/filters/kalman_smoother.py‎
Lines changed: 1 addition & 3 deletions b/‎pymc_extras/statespace/filters/kalman_smoother.py‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎pymc_extras/utils/model_equivalence.py‎
Lines changed: 2 additions & 2 deletions b/‎pymc_extras/utils/model_equivalence.py‎
Lines changed: 2 additions & 2 deletions
@@ -32,7 +32,7 @@ jobs:
           - tests/statespace/filters/test_kalman_filter.py
           - tests/statespace --ignore tests/statespace/core/test_statespace.py --ignore tests/statespace/filters/test_kalman_filter.py
           - tests/distributions
-          - tests --ignore tests/model --ignore tests/statespace --ignore tests/distributions
+          - tests --ignore tests/model --ignore tests/statespace --ignore tests/distributions --ignore tests/pathfinder
       fail-fast: false
     runs-on: ${{ matrix.os }}
     env:
 
@@ -1,10 +1,10 @@
-name: pymc-extras-test
+name: pymc-extras
 channels:
 - conda-forge
 - nodefaults
 dependencies:
-  - pymc>=5.24.1
-  - pytensor>=2.31.4
+  - pymc>=5.26.1
+  - pytensor>=2.35.1
   - scikit-learn
   - better-optimize>=0.1.5
   - dask<2025.1.1
 
@@ -168,6 +168,7 @@ def find_MAP(
     jitter_rvs: list[TensorVariable] | None = None,
     progressbar: bool = True,
     include_transformed: bool = True,
+    freeze_model: bool = True,
     gradient_backend: GradientBackend = "pytensor",
     compile_kwargs: dict | None = None,
     compute_hessian: bool = False,
@@ -210,6 +211,10 @@ def find_MAP(
         Whether to display a progress bar during optimization. Defaults to True.
     include_transformed: bool, optional
         Whether to include transformed variable values in the returned dictionary. Defaults to True.
+    freeze_model: bool, optional
+        If True, freeze_dims_and_data will be called on the model before compiling the loss functions. This is
+        sometimes necessary for JAX, and can sometimes improve performance by allowing constant folding. Defaults to
+        True.
     gradient_backend: str, default "pytensor"
         Which backend to use to compute gradients. Must be one of "pytensor" or "jax".
     compute_hessian: bool
@@ -229,11 +234,13 @@ def find_MAP(
         Results of Maximum A Posteriori (MAP) estimation, including the optimized point, inverse Hessian, transformed
         latent variables, and optimizer results.
     """
-    model = pm.modelcontext(model) if model is None else model
-    frozen_model = freeze_dims_and_data(model)
     compile_kwargs = {} if compile_kwargs is None else compile_kwargs
+    model = pm.modelcontext(model) if model is None else model
 
-    initial_params = _make_initial_point(frozen_model, initvals, random_seed, jitter_rvs)
+    if freeze_model:
+        model = freeze_dims_and_data(model)
+
+    initial_params = _make_initial_point(model, initvals, random_seed, jitter_rvs)
 
     do_basinhopping = method == "basinhopping"
     minimizer_kwargs = optimizer_kwargs.pop("minimizer_kwargs", {})
@@ -251,8 +258,8 @@ def find_MAP(
     )
 
     f_fused, f_hessp = scipy_optimize_funcs_from_loss(
-        loss=-frozen_model.logp(),
-        inputs=frozen_model.continuous_value_vars + frozen_model.discrete_value_vars,
+        loss=-model.logp(),
+        inputs=model.continuous_value_vars + model.discrete_value_vars,
         initial_point_dict=DictToArrayBijection.rmap(initial_params),
         use_grad=use_grad,
         use_hess=use_hess,
@@ -316,12 +323,10 @@ def find_MAP(
     }
 
     idata = map_results_to_inference_data(
-        map_point=optimized_point, model=frozen_model, include_transformed=include_transformed
+        map_point=optimized_point, model=model, include_transformed=include_transformed
     )
 
-    idata = add_fit_to_inference_data(
-        idata=idata, mu=raveled_optimized, H_inv=H_inv, model=frozen_model
-    )
+    idata = add_fit_to_inference_data(idata=idata, mu=raveled_optimized, H_inv=H_inv, model=model)
 
     idata = add_optimizer_result_to_inference_data(
         idata=idata, result=optimizer_result, method=method, mu=raveled_optimized, model=model
 
@@ -168,9 +168,13 @@ def _unconstrained_vector_to_constrained_rvs(model):
     unconstrained_vector.name = "unconstrained_vector"
 
     # Redo the names list to ensure it is sorted to match the return order
-    names = [*constrained_names, *unconstrained_names]
+    constrained_rvs_and_names = [(rv, name) for rv, name in zip(constrained_rvs, constrained_names)]
+    value_rvs_and_names = [
+        (rv, name) for rv, name in zip(value_rvs, names) for name in unconstrained_names
+    ]
+    # names = [*constrained_names, *unconstrained_names]
 
-    return names, constrained_rvs, value_rvs, unconstrained_vector
+    return constrained_rvs_and_names, value_rvs_and_names, unconstrained_vector
 
 
 def model_to_laplace_approx(
@@ -182,8 +186,11 @@ def model_to_laplace_approx(
 
     # temp_chain and temp_draw are a hack to allow sampling from the Laplace approximation. We only have one mu and cov,
     # so we add batch dims (which correspond to chains and draws). But the names "chain" and "draw" are reserved.
-    names, constrained_rvs, value_rvs, unconstrained_vector = (
-        _unconstrained_vector_to_constrained_rvs(model)
+
+    # The model was frozen during the find_MAP procedure. To ensure we're operating on the same model, freeze it again.
+    frozen_model = freeze_dims_and_data(model)
+    constrained_rvs_and_names, _, unconstrained_vector = _unconstrained_vector_to_constrained_rvs(
+        frozen_model
     )
 
     coords = model.coords | {
@@ -204,12 +211,13 @@ def model_to_laplace_approx(
         )
 
         cast_to_var = partial(type_cast, Variable)
+        constrained_rvs, constrained_names = zip(*constrained_rvs_and_names)
         batched_rvs = vectorize_graph(
             type_cast(list[Variable], constrained_rvs),
             replace={cast_to_var(unconstrained_vector): cast_to_var(laplace_approximation)},
         )
 
-        for name, batched_rv in zip(names, batched_rvs):
+        for name, batched_rv in zip(constrained_names, batched_rvs):
             batch_dims = ("temp_chain", "temp_draw")
             if batched_rv.ndim == 2:
                 dims = batch_dims
@@ -285,6 +293,7 @@ def fit_laplace(
     jitter_rvs: list[pt.TensorVariable] | None = None,
     progressbar: bool = True,
     include_transformed: bool = True,
+    freeze_model: bool = True,
     gradient_backend: GradientBackend = "pytensor",
     chains: int = 2,
     draws: int = 500,
@@ -328,6 +337,10 @@ def fit_laplace(
     include_transformed: bool, default True
         Whether to include transformed variables in the output. If True, transformed variables will be included in the
         output InferenceData object. If False, only the original variables will be included.
+    freeze_model: bool, optional
+        If True, freeze_dims_and_data will be called on the model before compiling the loss functions. This is
+        sometimes necessary for JAX, and can sometimes improve performance by allowing constant folding. Defaults to
+        True.
     gradient_backend: str, default "pytensor"
         The backend to use for gradient computations. Must be one of "pytensor" or "jax".
     chains: int, default: 2
@@ -376,6 +389,9 @@ def fit_laplace(
     optimizer_kwargs = {} if optimizer_kwargs is None else optimizer_kwargs
     model = pm.modelcontext(model) if model is None else model
 
+    if freeze_model:
+        model = freeze_dims_and_data(model)
+
     idata = find_MAP(
         method=optimize_method,
         model=model,
@@ -387,6 +403,7 @@ def fit_laplace(
         jitter_rvs=jitter_rvs,
         progressbar=progressbar,
         include_transformed=include_transformed,
+        freeze_model=False,
         gradient_backend=gradient_backend,
         compile_kwargs=compile_kwargs,
         compute_hessian=True,
 
@@ -22,7 +22,7 @@
 from collections.abc import Callable, Iterator
 from dataclasses import asdict, dataclass, field, replace
 from enum import Enum, auto
-from typing import Literal, TypeAlias
+from typing import Literal, Self, TypeAlias
 
 import arviz as az
 import filelock
@@ -60,9 +60,6 @@
 from rich.table import Table
 from rich.text import Text
 
-# TODO: change to typing.Self after Python versions greater than 3.10
-from typing_extensions import Self
-
 from pymc_extras.inference.laplace_approx.idata import add_data_to_inference_data
 from pymc_extras.inference.pathfinder.importance_sampling import (
     importance_sampling as _importance_sampling,
@@ -533,7 +530,7 @@ def bfgs_sample_sparse(
 
     # qr_input: (L, N, 2J)
     qr_input = inv_sqrt_alpha_diag @ beta
-    (Q, R), _ = pytensor.scan(fn=pt.nlinalg.qr, sequences=[qr_input], allow_gc=False)
+    (Q, R), _ = pytensor.scan(fn=pt.linalg.qr, sequences=[qr_input], allow_gc=False)
 
     IdN = pt.eye(R.shape[1])[None, ...]
     IdN += IdN * REGULARISATION_TERM
 
@@ -6,8 +6,8 @@
 from pymc import SymbolicRandomVariable
 from pymc.model.fgraph import ModelVar
 from pymc.variational.minibatch_rv import MinibatchRandomVariable
-from pytensor.graph import Variable, ancestors
-from pytensor.graph.basic import io_toposort
+from pytensor.graph.basic import Variable
+from pytensor.graph.traversal import ancestors, io_toposort
 from pytensor.tensor import TensorType, TensorVariable
 from pytensor.tensor.blockwise import Blockwise
 from pytensor.tensor.elemwise import CAReduce, DimShuffle, Elemwise
 
@@ -28,7 +28,7 @@ def compile_statespace(
         x0, P0, c, d, T, Z, R, H, Q, steps=steps, sequence_names=sequence_names
     )
 
-    inputs = list(pytensor.graph.basic.explicit_graph_inputs(outputs))
+    inputs = list(pytensor.graph.traversal.explicit_graph_inputs(outputs))
 
     _f = pm.compile(inputs, outputs, on_unused_input="ignore", **compile_kwargs)
 
 
@@ -200,7 +200,7 @@ def build_graph(
         self.n_endog = Z_shape[-2]
 
         data, a0, P0, *params = self.check_params(data, a0, P0, c, d, T, Z, R, H, Q)
-
+        data = pt.specify_shape(data, (data.type.shape[0], self.n_endog))
         sequences, non_sequences, seq_names, non_seq_names = split_vars_into_seq_and_nonseq(
             params, PARAM_NAMES
         )
@@ -658,7 +658,7 @@ def update(self, a, P, y, d, Z, H, all_nan_flag):
         # Construct upper-triangular block matrix A = [[chol(H), Z @ L_pred],
         #                                              [0,           L_pred]]
         # The Schur decomposition of this matrix will be B (upper triangular). We are
-        # more insterested in B^T:
+        # more interested in B^T:
         # Structure of B^T = [[chol(F),     0              ],
         #                    [K @ chol(F), chol(P_filtered)]
         zeros = pt.zeros((self.n_states, self.n_endog))
 
@@ -1,8 +1,6 @@
 import pytensor
 import pytensor.tensor as pt
 
-from pytensor.tensor.nlinalg import matrix_dot
-
 from pymc_extras.statespace.filters.utilities import (
     quad_form_sym,
     split_vars_into_seq_and_nonseq,
@@ -105,7 +103,7 @@ def smoother_step(self, *args):
         a_hat, P_hat = self.predict(a, P, T, R, Q)
 
         # Use pinv, otherwise P_hat is singular when there is missing data
-        smoother_gain = matrix_dot(pt.linalg.pinv(P_hat, hermitian=True), T, P).T
+        smoother_gain = (pt.linalg.pinv(P_hat, hermitian=True) @ T @ P).mT
         a_smooth_next = a + smoother_gain @ (a_smooth - a_hat)
 
         P_smooth_next = P + quad_form_sym(smoother_gain, P_smooth - P_hat)
 
@@ -4,8 +4,8 @@
 from pymc.model.fgraph import fgraph_from_model
 from pytensor import Variable
 from pytensor.compile import SharedVariable
-from pytensor.graph import Constant, graph_inputs
-from pytensor.graph.basic import equal_computations
+from pytensor.graph.basic import Constant, equal_computations
+from pytensor.graph.traversal import graph_inputs
 from pytensor.tensor.random.type import RandomType
Original file line number	Diff line number	Diff line change
`@@ -28,7 +28,7 @@ def compile_statespace(`
`28`	`28`	`x0, P0, c, d, T, Z, R, H, Q, steps=steps, sequence_names=sequence_names`
`29`	`29`	`)`
`30`	`30`
`31`		`- inputs = list(pytensor.graph.basic.explicit_graph_inputs(outputs))`
	`31`	`+ inputs = list(pytensor.graph.traversal.explicit_graph_inputs(outputs))`
`32`	`32`
`33`	`33`	`_f = pm.compile(inputs, outputs, on_unused_input="ignore", **compile_kwargs)`
`34`	`34`