Skip to content

Commit

Permalink
First linear model (#3)
Browse files Browse the repository at this point in the history
* bump version to 0.0.1

* Added some tests for the new input system

* bump version to 0.0.2

* Updated zenodo badge

* towards 100% coverage

* Corrected lint

* 100% coverage

* Added version info to main file

* Corrected typo

* Thinned chains

* Added check to version
  • Loading branch information
williamjameshandley authored Oct 17, 2023
1 parent 347a258 commit b1c5e39
Show file tree
Hide file tree
Showing 8 changed files with 288 additions and 6 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ build
*~
.pytest_cache/*
.coverage
venv
6 changes: 3 additions & 3 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ lsbi: Linear Simulation Based Inference
=======================================
:lsbi: Linear Simulation Based Inference
:Author: Will Handley
:Version: 0.0.1
:Version: 0.0.2
:Homepage: https://github.com/handley-lab/lsbi
:Documentation: http://lsbi.readthedocs.io/

Expand All @@ -19,8 +19,8 @@ lsbi: Linear Simulation Based Inference
.. image:: https://badge.fury.io/py/lsbi.svg
:target: https://badge.fury.io/py/lsbi
:alt: PyPi location
.. image:: https://zenodo.org/badge/XXXXXXXXX.svg
:target: https://zenodo.org/badge/latestdoi/XXXXXXXXX
.. image:: https://zenodo.org/badge/705730277.svg
:target: https://zenodo.org/doi/10.5281/zenodo.10009816
:alt: Permanent DOI for this release
.. image:: https://img.shields.io/badge/license-MIT-blue.svg
:target: https://github.com/handley-lab/lsbi/blob/master/LICENSE
Expand Down
1 change: 1 addition & 0 deletions lsbi/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
"""unimpeded: Universal model comparison & parameter estimation."""
from lsbi._version import __version__ # noqa: F401
2 changes: 1 addition & 1 deletion lsbi/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.0.1'
__version__ = '0.0.2'
138 changes: 138 additions & 0 deletions lsbi/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
"""Gaussian models for linear Bayesian inference."""
import numpy as np
from functools import cached_property
from scipy.stats import multivariate_normal


class LinearModel(object):
"""A linear model.
Defined by:
- Parameters: theta (n,)
- Data: D (d,)
- Prior mean: mu (n,)
- Prior covariance: Sigma (n, n)
- Data mean: m (d,)
- Data covariance: C (d, d)
- Model M: D = m + M theta +/- sqrt(C)
Parameters
----------
M : array_like, optional
Model matrix, defaults to identity matrix
m : array_like, optional
Data mean, defaults to zero vector
C : array_like, optional
Data covariance, defaults to identity matrix
mu : array_like, optional
Prior mean, defaults to zero vector
Sigma : array_like, optional
Prior covariance, defaults to identity matrix
the overall shape is attempted to be inferred from the input parameters.
"""

def __init__(self, *args, **kwargs):

self.M = kwargs.pop('M', None)
self.m = kwargs.pop('m', None)
self.C = kwargs.pop('C', None)
self.mu = kwargs.pop('mu', None)
self.Sigma = kwargs.pop('Sigma', None)

n, d = None, None

if self.m is not None:
self.m = np.atleast_1d(self.m)
d, = self.m.shape
if self.C is not None:
self.C = np.atleast_2d(self.C)
d, d = self.C.shape
if self.Sigma is not None:
self.Sigma = np.atleast_2d(self.Sigma)
n, n = self.Sigma.shape
if self.mu is not None:
self.mu = np.atleast_1d(self.mu)
n, = self.mu.shape
if self.M is not None:
self.M = np.atleast_2d(self.M)
d, n = self.M.shape

if n is None:
raise ValueError('Unable to determine number of parameters n')
if d is None:
raise ValueError('Unable to determine data dimensions d')

if self.M is None:
self.M = np.eye(d, n)
if self.m is None:
self.m = np.zeros(d)
if self.C is None:
self.C = np.eye(d)
if self.mu is None:
self.mu = np.zeros(n)
if self.Sigma is None:
self.Sigma = np.eye(n)

@property
def n(self):
"""Dimensionality of parameter space len(theta)."""
return self.M.shape[1]

@property
def d(self):
"""Dimensionality of data space len(D)."""
return self.M.shape[0]

@cached_property
def invSigma(self):
"""Inverse of prior covariance."""
return np.linalg.inv(self.Sigma)

@cached_property
def invC(self):
"""Inverse of data covariance."""
return np.linalg.inv(self.C)

def likelihood(self, theta):
"""P(D|theta) as a scipy distribution object."""
return multivariate_normal(self.D(theta), self.C)

def prior(self):
"""P(theta) as a scipy distribution object."""
return multivariate_normal(self.mu, self.Sigma)

def posterior(self, D):
"""P(theta|D) as a scipy distribution object."""
Sigma = np.linalg.inv(self.invSigma + self.M.T @ self.invC @ self.M)
mu = Sigma @ (self.invSigma @ self.mu
+ self.M.T @ self.invC @ (D-self.m))
return multivariate_normal(mu, Sigma)

def evidence(self):
"""P(D) as a scipy distribution object."""
return multivariate_normal(self.D(self.mu),
self.C + self.M @ self.Sigma @ self.M.T)

def joint(self):
"""P(D, theta) as a scipy distribution object."""
mu = np.concatenate([self.D(self.mu), self.mu])
Sigma = np.block([[self.C+self.M @ self.Sigma @ self.M.T,
self.M @ self.Sigma],
[self.Sigma @ self.M.T, self.Sigma]])
return multivariate_normal(mu, Sigma)

def D(self, theta):
"""D(theta) as the underlying data model."""
return self.m + self.M @ theta

def DKL(self, D):
"""D_KL(P(theta|D)||P(theta)) the Kullback-Leibler divergence."""
cov_p = self.posterior(D).cov
cov_q = self.prior().cov
mu_p = self.posterior(D).mean
mu_q = self.prior().mean
return 0.5 * (- np.linalg.slogdet(cov_p)[1]
+ np.linalg.slogdet(cov_q)[1]
+ np.trace(np.linalg.inv(cov_q) @ cov_p - 1)
+ (mu_q - mu_p) @ np.linalg.inv(cov_q) @ (mu_q - mu_p))
2 changes: 0 additions & 2 deletions tests/test_example.py

This file was deleted.

140 changes: 140 additions & 0 deletions tests/test_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
from lsbi.model import LinearModel
import numpy as np
import scipy.stats
from numpy.testing import assert_allclose
import pytest


def _test_shape(model, d, n):
assert model.n == n
assert model.d == d
assert model.M.shape == (d, n)
assert model.m.shape == (d,)
assert model.C.shape == (d, d)
assert model.mu.shape == (n,)
assert model.Sigma.shape == (n, n)


def test_M():
model = LinearModel(M=np.random.rand())
_test_shape(model, 1, 1)

model = LinearModel(M=np.random.rand(1))
_test_shape(model, 1, 1)

model = LinearModel(M=np.random.rand(1, 5))
_test_shape(model, 1, 5)

model = LinearModel(M=np.random.rand(3, 1))
_test_shape(model, 3, 1)

model = LinearModel(M=np.random.rand(3, 5))
_test_shape(model, 3, 5)


def test_m_mu():
model = LinearModel(m=np.random.rand(), mu=np.random.rand())
_test_shape(model, 1, 1)

model = LinearModel(m=np.random.rand(1), mu=np.random.rand(1))
_test_shape(model, 1, 1)

model = LinearModel(m=np.random.rand(1), mu=np.random.rand(5))
_test_shape(model, 1, 5)

model = LinearModel(m=np.random.rand(3), mu=np.random.rand(1))
_test_shape(model, 3, 1)

model = LinearModel(m=np.random.rand(3), mu=np.random.rand(5))
_test_shape(model, 3, 5)


def test_failure():
with pytest.raises(ValueError) as excinfo:
LinearModel(m=np.random.rand(5))
assert "Unable to determine number of parameters n" in str(excinfo.value)

with pytest.raises(ValueError) as excinfo:
LinearModel(mu=np.random.rand(3))
assert "Unable to determine data dimensions d" in str(excinfo.value)


def random_model(d, n):
M = np.random.rand(d, n)
m = np.random.rand(d)
C = scipy.stats.wishart(scale=np.eye(d)).rvs()
mu = np.random.rand(n)
Sigma = scipy.stats.wishart(scale=np.eye(n)).rvs()
return LinearModel(M=M, m=m, C=C, mu=mu, Sigma=Sigma)


def test_joint():
d = 5
n = 3
N = 100
model = random_model(d, n)
prior = model.prior()
evidence = model.evidence()
joint = model.joint()

samples_1 = prior.rvs(N)
samples_2 = joint.rvs(N)[:, -n:]

for i in range(n):
p = scipy.stats.kstest(samples_1[:, i], samples_2[:, i]).pvalue
assert p > 1e-5

p = scipy.stats.kstest(prior.logpdf(samples_2),
prior.logpdf(samples_1)).pvalue
assert p > 1e-5

samples_1 = evidence.rvs(N)
samples_2 = joint.rvs(N)[:, :d]

for i in range(d):
p = scipy.stats.kstest(samples_1[:, i], samples_2[:, i]).pvalue
assert p > 1e-5

p = scipy.stats.kstest(evidence.logpdf(samples_2),
evidence.logpdf(samples_1)).pvalue
assert p > 1e-5


def test_likelihood_posterior():
d = 5
n = 3
N = 1000
model = random_model(d, n)
joint = model.joint()

samples = []
theta = model.prior().rvs()
for _ in range(N):
data = model.likelihood(theta).rvs()
theta = model.posterior(data).rvs()
samples.append(np.concatenate([data, theta])[:])
samples_1 = np.array(samples)[::100]
samples_2 = joint.rvs(len(samples_1))

for i in range(n+d):
p = scipy.stats.kstest(samples_1[:, i], samples_2[:, i]).pvalue
assert p > 1e-5

p = scipy.stats.kstest(joint.logpdf(samples_2),
joint.logpdf(samples_1)).pvalue
assert p > 1e-5


def test_DKL():
d = 5
n = 3
N = 1000
model = random_model(d, n)

data = model.evidence().rvs()
posterior = model.posterior(data)
prior = model.prior()

samples = posterior.rvs(N)
Info = (posterior.logpdf(samples) - prior.logpdf(samples))
assert_allclose(Info.mean(), model.DKL(data), atol=5*Info.std()/np.sqrt(N))
4 changes: 4 additions & 0 deletions tests/test_scaffolding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
def test_version():
from lsbi import __version__ as v1
from lsbi._version import __version__ as v2
assert v1 == v2

0 comments on commit b1c5e39

Please sign in to comment.