First linear model (#3)

williamjameshandley · web-flow · commit b1c5e39d91d8 · 2023-10-17T11:20:30.000+01:00
* bump version to 0.0.1

* Added some tests for the new input system

* bump version to 0.0.2

* Updated zenodo badge

* towards 100% coverage

* Corrected lint

* 100% coverage

* Added version info to main file

* Corrected typo

* Thinned chains

* Added check to version
diff --git a/.gitignore b/.gitignore
@@ -8,3 +8,4 @@ build
 *~
 .pytest_cache/*
 .coverage
+venv
diff --git a/README.rst b/README.rst
@@ -3,7 +3,7 @@ lsbi: Linear Simulation Based Inference
 =======================================
 :lsbi: Linear Simulation Based Inference
 :Author: Will Handley
-:Version: 0.0.1
+:Version: 0.0.2
 :Homepage: https://github.com/handley-lab/lsbi
 :Documentation: http://lsbi.readthedocs.io/
 
@@ -19,8 +19,8 @@ lsbi: Linear Simulation Based Inference
 .. image:: https://badge.fury.io/py/lsbi.svg
    :target: https://badge.fury.io/py/lsbi
    :alt: PyPi location
-.. image:: https://zenodo.org/badge/XXXXXXXXX.svg
-   :target: https://zenodo.org/badge/latestdoi/XXXXXXXXX
+.. image:: https://zenodo.org/badge/705730277.svg
+   :target: https://zenodo.org/doi/10.5281/zenodo.10009816
    :alt: Permanent DOI for this release
 .. image:: https://img.shields.io/badge/license-MIT-blue.svg
    :target: https://github.com/handley-lab/lsbi/blob/master/LICENSE
diff --git a/lsbi/__init__.py b/lsbi/__init__.py
@@ -1 +1,2 @@
 """unimpeded: Universal model comparison & parameter estimation."""
+from lsbi._version import __version__  # noqa: F401
diff --git a/lsbi/_version.py b/lsbi/_version.py
@@ -1 +1 @@
-__version__ = '0.0.1'
+__version__ = '0.0.2'
diff --git a/lsbi/model.py b/lsbi/model.py
@@ -0,0 +1,138 @@
+"""Gaussian models for linear Bayesian inference."""
+import numpy as np
+from functools import cached_property
+from scipy.stats import multivariate_normal
+
+
+class LinearModel(object):
+    """A linear model.
+
+    Defined by:
+    - Parameters: theta (n,)
+    - Data: D (d,)
+    - Prior mean: mu (n,)
+    - Prior covariance: Sigma (n, n)
+    - Data mean: m (d,)
+    - Data covariance: C (d, d)
+    - Model M: D = m + M theta +/- sqrt(C)
+
+    Parameters
+    ----------
+    M : array_like, optional
+        Model matrix, defaults to identity matrix
+    m : array_like, optional
+        Data mean, defaults to zero vector
+    C : array_like, optional
+        Data covariance, defaults to identity matrix
+    mu : array_like, optional
+        Prior mean, defaults to zero vector
+    Sigma : array_like, optional
+        Prior covariance, defaults to identity matrix
+
+    the overall shape is attempted to be inferred from the input parameters.
+    """
+
+    def __init__(self, *args, **kwargs):
+
+        self.M = kwargs.pop('M', None)
+        self.m = kwargs.pop('m', None)
+        self.C = kwargs.pop('C', None)
+        self.mu = kwargs.pop('mu', None)
+        self.Sigma = kwargs.pop('Sigma', None)
+
+        n, d = None, None
+
+        if self.m is not None:
+            self.m = np.atleast_1d(self.m)
+            d, = self.m.shape
+        if self.C is not None:
+            self.C = np.atleast_2d(self.C)
+            d, d = self.C.shape
+        if self.Sigma is not None:
+            self.Sigma = np.atleast_2d(self.Sigma)
+            n, n = self.Sigma.shape
+        if self.mu is not None:
+            self.mu = np.atleast_1d(self.mu)
+            n, = self.mu.shape
+        if self.M is not None:
+            self.M = np.atleast_2d(self.M)
+            d, n = self.M.shape
+
+        if n is None:
+            raise ValueError('Unable to determine number of parameters n')
+        if d is None:
+            raise ValueError('Unable to determine data dimensions d')
+
+        if self.M is None:
+            self.M = np.eye(d, n)
+        if self.m is None:
+            self.m = np.zeros(d)
+        if self.C is None:
+            self.C = np.eye(d)
+        if self.mu is None:
+            self.mu = np.zeros(n)
+        if self.Sigma is None:
+            self.Sigma = np.eye(n)
+
+    @property
+    def n(self):
+        """Dimensionality of parameter space len(theta)."""
+        return self.M.shape[1]
+
+    @property
+    def d(self):
+        """Dimensionality of data space len(D)."""
+        return self.M.shape[0]
+
+    @cached_property
+    def invSigma(self):
+        """Inverse of prior covariance."""
+        return np.linalg.inv(self.Sigma)
+
+    @cached_property
+    def invC(self):
+        """Inverse of data covariance."""
+        return np.linalg.inv(self.C)
+
+    def likelihood(self, theta):
+        """P(D|theta) as a scipy distribution object."""
+        return multivariate_normal(self.D(theta), self.C)
+
+    def prior(self):
+        """P(theta) as a scipy distribution object."""
+        return multivariate_normal(self.mu, self.Sigma)
+
+    def posterior(self, D):
+        """P(theta|D) as a scipy distribution object."""
+        Sigma = np.linalg.inv(self.invSigma + self.M.T @ self.invC @ self.M)
+        mu = Sigma @ (self.invSigma @ self.mu
+                      + self.M.T @ self.invC @ (D-self.m))
+        return multivariate_normal(mu, Sigma)
+
+    def evidence(self):
+        """P(D) as a scipy distribution object."""
+        return multivariate_normal(self.D(self.mu),
+                                   self.C + self.M @ self.Sigma @ self.M.T)
+
+    def joint(self):
+        """P(D, theta) as a scipy distribution object."""
+        mu = np.concatenate([self.D(self.mu), self.mu])
+        Sigma = np.block([[self.C+self.M @ self.Sigma @ self.M.T,
+                           self.M @ self.Sigma],
+                          [self.Sigma @ self.M.T, self.Sigma]])
+        return multivariate_normal(mu, Sigma)
+
+    def D(self, theta):
+        """D(theta) as the underlying data model."""
+        return self.m + self.M @ theta
+
+    def DKL(self, D):
+        """D_KL(P(theta|D)||P(theta)) the Kullback-Leibler divergence."""
+        cov_p = self.posterior(D).cov
+        cov_q = self.prior().cov
+        mu_p = self.posterior(D).mean
+        mu_q = self.prior().mean
+        return 0.5 * (- np.linalg.slogdet(cov_p)[1]
+                      + np.linalg.slogdet(cov_q)[1]
+                      + np.trace(np.linalg.inv(cov_q) @ cov_p - 1)
+                      + (mu_q - mu_p) @ np.linalg.inv(cov_q) @ (mu_q - mu_p))
diff --git a/tests/test_example.py b/tests/test_example.py
diff --git a/tests/test_model.py b/tests/test_model.py
@@ -0,0 +1,140 @@
+from lsbi.model import LinearModel
+import numpy as np
+import scipy.stats
+from numpy.testing import assert_allclose
+import pytest
+
+
+def _test_shape(model, d, n):
+    assert model.n == n
+    assert model.d == d
+    assert model.M.shape == (d, n)
+    assert model.m.shape == (d,)
+    assert model.C.shape == (d, d)
+    assert model.mu.shape == (n,)
+    assert model.Sigma.shape == (n, n)
+
+
+def test_M():
+    model = LinearModel(M=np.random.rand())
+    _test_shape(model, 1, 1)
+
+    model = LinearModel(M=np.random.rand(1))
+    _test_shape(model, 1, 1)
+
+    model = LinearModel(M=np.random.rand(1, 5))
+    _test_shape(model, 1, 5)
+
+    model = LinearModel(M=np.random.rand(3, 1))
+    _test_shape(model, 3, 1)
+
+    model = LinearModel(M=np.random.rand(3, 5))
+    _test_shape(model, 3, 5)
+
+
+def test_m_mu():
+    model = LinearModel(m=np.random.rand(), mu=np.random.rand())
+    _test_shape(model, 1, 1)
+
+    model = LinearModel(m=np.random.rand(1), mu=np.random.rand(1))
+    _test_shape(model, 1, 1)
+
+    model = LinearModel(m=np.random.rand(1), mu=np.random.rand(5))
+    _test_shape(model, 1, 5)
+
+    model = LinearModel(m=np.random.rand(3), mu=np.random.rand(1))
+    _test_shape(model, 3, 1)
+
+    model = LinearModel(m=np.random.rand(3), mu=np.random.rand(5))
+    _test_shape(model, 3, 5)
+
+
+def test_failure():
+    with pytest.raises(ValueError) as excinfo:
+        LinearModel(m=np.random.rand(5))
+    assert "Unable to determine number of parameters n" in str(excinfo.value)
+
+    with pytest.raises(ValueError) as excinfo:
+        LinearModel(mu=np.random.rand(3))
+    assert "Unable to determine data dimensions d" in str(excinfo.value)
+
+
+def random_model(d, n):
+    M = np.random.rand(d, n)
+    m = np.random.rand(d)
+    C = scipy.stats.wishart(scale=np.eye(d)).rvs()
+    mu = np.random.rand(n)
+    Sigma = scipy.stats.wishart(scale=np.eye(n)).rvs()
+    return LinearModel(M=M, m=m, C=C, mu=mu, Sigma=Sigma)
+
+
+def test_joint():
+    d = 5
+    n = 3
+    N = 100
+    model = random_model(d, n)
+    prior = model.prior()
+    evidence = model.evidence()
+    joint = model.joint()
+
+    samples_1 = prior.rvs(N)
+    samples_2 = joint.rvs(N)[:, -n:]
+
+    for i in range(n):
+        p = scipy.stats.kstest(samples_1[:, i], samples_2[:, i]).pvalue
+        assert p > 1e-5
+
+    p = scipy.stats.kstest(prior.logpdf(samples_2),
+                           prior.logpdf(samples_1)).pvalue
+    assert p > 1e-5
+
+    samples_1 = evidence.rvs(N)
+    samples_2 = joint.rvs(N)[:, :d]
+
+    for i in range(d):
+        p = scipy.stats.kstest(samples_1[:, i], samples_2[:, i]).pvalue
+        assert p > 1e-5
+
+    p = scipy.stats.kstest(evidence.logpdf(samples_2),
+                           evidence.logpdf(samples_1)).pvalue
+    assert p > 1e-5
+
+
+def test_likelihood_posterior():
+    d = 5
+    n = 3
+    N = 1000
+    model = random_model(d, n)
+    joint = model.joint()
+
+    samples = []
+    theta = model.prior().rvs()
+    for _ in range(N):
+        data = model.likelihood(theta).rvs()
+        theta = model.posterior(data).rvs()
+        samples.append(np.concatenate([data, theta])[:])
+    samples_1 = np.array(samples)[::100]
+    samples_2 = joint.rvs(len(samples_1))
+
+    for i in range(n+d):
+        p = scipy.stats.kstest(samples_1[:, i], samples_2[:, i]).pvalue
+        assert p > 1e-5
+
+    p = scipy.stats.kstest(joint.logpdf(samples_2),
+                           joint.logpdf(samples_1)).pvalue
+    assert p > 1e-5
+
+
+def test_DKL():
+    d = 5
+    n = 3
+    N = 1000
+    model = random_model(d, n)
+
+    data = model.evidence().rvs()
+    posterior = model.posterior(data)
+    prior = model.prior()
+
+    samples = posterior.rvs(N)
+    Info = (posterior.logpdf(samples) - prior.logpdf(samples))
+    assert_allclose(Info.mean(), model.DKL(data), atol=5*Info.std()/np.sqrt(N))
diff --git a/tests/test_scaffolding.py b/tests/test_scaffolding.py
@@ -0,0 +1,4 @@
+def test_version():
+    from lsbi import __version__ as v1
+    from lsbi._version import __version__ as v2
+    assert v1 == v2

Original file line number	Diff line number	Diff line change
`@@ -1 +1,2 @@`
`1`	`1`	`"""unimpeded: Universal model comparison & parameter estimation."""`
	`2`	`+from lsbi._version import __version__ # noqa: F401`
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__ = '0.0.1'`
	`1`	`+__version__ = '0.0.2'`