1111from emcee import EnsembleSampler
1212from scipp .constants import k
1313from scipy .linalg import pinvh
14- from scipy .optimize import minimize
15- from scipy .stats import linregress , multivariate_normal
14+ from scipy .optimize import curve_fit
15+ from scipy .stats import gaussian_kde , linregress , multivariate_normal
1616from statsmodels .stats .correlation_tools import cov_nearest
17+ from statsmodels .stats .moment_helpers import corr2cov , cov2corr
1718from tqdm import tqdm
1819
1920from kinisi import __version__
@@ -93,6 +94,7 @@ def bayesian_regression(
9394 self ,
9495 start_dt : sc .Variable ,
9596 cond_max : float = 1e16 ,
97+ recondition : bool = False ,
9698 fit_intercept : bool = True ,
9799 n_samples : int = 1000 ,
98100 n_walkers : int = 32 ,
@@ -106,6 +108,7 @@ def bayesian_regression(
106108
107109 :param start_dt: The time at which the diffusion regime begins.
108110 :param cond_max: The maximum condition number of the covariance matrix. Optional, default is :py:attr:`1e16`.
111+ :param recondition: Whether to recondition the covariance matrix. Optional, default is :py:attr:`False`.
109112 :param fit_intercept: Whether to fit an intercept. Optional, default is :py:attr:`True`.
110113 :param n_samples: The number of MCMC samples to take. Optional, default is :py:attr:`1000`.
111114 :param n_walkers: The number of walkers to use in the MCMC. Optional, default is :py:attr:`32`.
@@ -119,6 +122,7 @@ def bayesian_regression(
119122
120123 self ._start_dt = start_dt
121124 self ._cond_max = cond_max
125+ self ._recondition = recondition
122126
123127 self .diff_regime = np .argwhere (self .dg ['da' ].coords ['time interval' ] >= self ._start_dt )[0 ][0 ]
124128 self ._covariance_matrix = self .compute_covariance_matrix ()
@@ -149,17 +153,7 @@ def log_likelihood(theta: np.ndarray) -> float:
149153 if slope < 0 :
150154 slope = 1e-20
151155
152- def nll (* args ) -> float :
153- """
154- General purpose negative log-likelihood.
155- :return: Negative log-likelihood
156- """
157- return - log_likelihood (* args )
158-
159- if fit_intercept :
160- max_likelihood = minimize (nll , np .array ([slope , intercept ])).x
161- else :
162- max_likelihood = minimize (nll , np .array ([slope ])).x
156+ max_likelihood = np .array ([slope , intercept ])
163157
164158 pos = max_likelihood + max_likelihood * 1e-3 * np .random .randn (n_walkers , max_likelihood .size )
165159 sampler = EnsembleSampler (* pos .shape , log_likelihood )
@@ -169,7 +163,6 @@ def nll(*args) -> float:
169163 # sampler._random = random_state
170164 sampler .run_mcmc (pos , n_samples + n_burn , progress = progress , progress_kwargs = {'desc' : 'Likelihood Sampling' })
171165 self ._flatchain = sampler .get_chain (flat = True , thin = n_thin , discard = n_burn )
172-
173166 self .gradient = Samples (
174167 self ._flatchain [:, 0 ], unit = (self .dg ['da' ].unit / self .dg ['da' ].coords ['time interval' ].unit )
175168 )
@@ -258,11 +251,20 @@ def compute_covariance_matrix(self) -> sc.Variable:
258251 value = ratio * self .dg ['da' ].data .variances [i ]
259252 cov [i , j ] = value
260253 cov [j , i ] = np .copy (cov [i , j ])
261- return sc .array (
262- dims = ['time_interval1' , 'time_interval2' ],
263- values = cov_nearest (minimum_eigenvalue_method (cov [self .diff_regime :, self .diff_regime :], self ._cond_max )),
264- unit = self .dg ['da' ].unit ** 2 ,
265- )
254+ if self ._recondition :
255+ return sc .array (
256+ dims = ['time_interval1' , 'time_interval2' ],
257+ values = cov_nearest (eigenvalue_clipping (cov_nearest (cov [self .diff_regime :, self .diff_regime :]))),
258+ unit = self .dg ['da' ].unit ** 2 ,
259+ )
260+ else :
261+ return sc .array (
262+ dims = ['time_interval1' , 'time_interval2' ],
263+ values = cov_nearest (
264+ minimum_eigenvalue_method (cov [self .diff_regime :, self .diff_regime :], self ._cond_max )
265+ ),
266+ unit = self .dg ['da' ].unit ** 2 ,
267+ )
266268
267269 def posterior_predictive (
268270 self , n_posterior_samples : int = None , n_predictive_samples : int = 256 , progress : bool = True
@@ -338,6 +340,62 @@ def minimum_eigenvalue_method(cov: np.ndarray, cond_max=1e16) -> np.ndarray:
338340 return new_cov
339341
340342
343+ def eigenvalue_clipping (cov : np .ndarray ) -> np .ndarray :
344+ """
345+ Eigenvalue clipping method for matrix reconditioning.
346+
347+ :param cov: Covariance matrix to recondition.
348+
349+ :return: Reconditioned covariance matrix.
350+ """
351+ corr = cov2corr (cov )
352+ eigenthings = np .linalg .eig (corr )
353+ eigenvalues = eigenthings .eigenvalues .real
354+
355+ kde = gaussian_kde (eigenvalues )
356+ x = np .linspace (eigenvalues .min () - 0.5 * eigenvalues .max (), eigenvalues .max () + 0.5 * eigenvalues .max (), 10000 )
357+
358+ popt , _ = curve_fit (marchenkopastur , x , kde .pdf (x ), bounds = ([0 , 0 ], [np .inf , np .inf ]), p0 = [0.5 , 1.0 ])
359+
360+ lambda_plus = (1 + popt [0 ] ** 0.5 ) ** 2
361+ lambda_minus = (1 - popt [0 ] ** 0.5 ) ** 2
362+
363+ lambda_minus = np .max (eigenvalues [eigenvalues < lambda_plus ])
364+ new_eigenvalues = np .copy (eigenvalues )
365+ new_eigenvalues [new_eigenvalues < lambda_plus ] = lambda_minus
366+
367+ new_corr = (eigenthings .eigenvectors @ np .diag (new_eigenvalues ) @ np .linalg .inv (eigenthings .eigenvectors )).real
368+ S = np .diag (1 / (np .diag (new_corr )) ** 0.5 )
369+ new_corr = S @ new_corr @ S .T
370+
371+ new_cov = corr2cov (new_corr , np .sqrt (cov .diagonal ()))
372+ return new_cov
373+
374+
375+ def marchenkopastur (x : np .ndarray , lambda_ : float , sigma : float ) -> np .ndarray :
376+ """
377+ Marchenko-Pastur distribution
378+
379+ :param x: points at which to evaluate the distribution
380+ :param lambda_: lambda parameter
381+ :param sigma: standard deviation of the distribution
382+ """
383+
384+ def m0 (a : np .ndarray ) -> np .ndarray :
385+ """
386+ Element wise maximum of (a, 0)
387+
388+ :param a: input array
389+ :return: element wise maximum of (a, 0)
390+ """
391+ return np .maximum (a , np .zeros_like (a ))
392+
393+ lambda_plus = (1 + lambda_ ** 0.5 ) ** 2
394+ lambda_minus = (1 - lambda_ ** 0.5 ) ** 2
395+
396+ return np .sqrt (m0 (lambda_plus - x ) * m0 (x - lambda_minus )) / (2 * np .pi * sigma ** 2 * lambda_ * x )
397+
398+
341399def _straight_line (abscissa : np .ndarray , gradient : float , intercept : float = 0.0 ) -> np .ndarray :
342400 """
343401 A one dimensional straight line function.
0 commit comments