handley-lab · Stefan-Heimersheim · Mar 22, 2022 · Mar 22, 2022 · Mar 22, 2022 · Mar 22, 2022
diff --git a/README.rst b/README.rst
@@ -2,7 +2,7 @@
 anesthetic: nested sampling post-processing
 ===========================================
 :Authors: Will Handley and Lukas Hergt
-:Version: 2.0.0-beta.28
+:Version: 2.0.0-beta.29
 :Homepage: https://github.com/handley-lab/anesthetic
 :Documentation: http://anesthetic.readthedocs.io/
 

diff --git a/anesthetic/_version.py b/anesthetic/_version.py
@@ -1 +1 @@
-__version__ = '2.0.0b28'
+__version__ = '2.0.0b29'
diff --git a/anesthetic/utils.py b/anesthetic/utils.py
@@ -3,6 +3,7 @@
 import pandas
 from scipy import special
 from scipy.interpolate import interp1d
+from scipy.optimize import minimize_scalar
 from scipy.stats import kstwobign
 from matplotlib.tri import Triangulation
 import contextlib
@@ -89,6 +90,148 @@ def quantile(a, q, w=None, interpolation='linear'):
     return quant
 
 
+def sample_cdf(samples, inverse=False, interpolation='linear'):
+    """Sample the empirical cdf for a 1d array."""
+    samples = np.array(samples)
+    samples.sort()
+    ngaps = len(samples)-1
+    gaps = np.random.dirichlet(np.ones(ngaps))
+    cdf = np.array([0, *np.cumsum(gaps)])
+    assert np.isclose(cdf[-1], 1, atol=1e-9, rtol=1e-9), \
+        "Error: CDF does not reach 1 but "+str(cdf[-1])
+    # Set the last element (tested to be approx 1)
+    # to exactly 1 to avoid interpolation errors
+    cdf[-1] = 1
+    if inverse:
+        return interp1d(cdf, samples, kind=interpolation)
+    else:
+        return interp1d(samples, cdf, kind=interpolation)
+
+
+def credibility_interval(samples, weights=None, level=0.68, method="hpd",
+                         u=None, n_iter=12, return_covariance=False,
+                         verbose=False):
+    """Compute the credibility interval of weighted samples.
+
+    Based on linear interpolation of the cumulative density function, thus
+    expect discretization errors on the scale of distances between samples.
+
+    https://github.com/Stefan-Heimersheim/fastCI#readme
+
+    Parameters
+    ----------
+    samples : array
+        Samples to compute the credibility interval of.
+    weights : array, default=np.ones_like(samples)
+        Weights corresponding to samples.
+    level : float, default=0.68
+        Credibility level (probability, <1).
+    method : str, default='hpd'
+        Which definition of interval to use:
+
+        * ``'hpd'``: Calculate highest (average) posterior density (HPD)
+          interval, equivalent to iso-pdf interval (interval with same
+          probability density at each end, also known as waterline-interval).
+          This only works if the distribution is unimodal.
+        * ``'ll'``/``'ul'``: Lower/upper limit. One-sided limits for which
+          ``level`` fraction of the (equally weighted) samples lie above/below
+          the limit.
+        * ``'et'``: Equal-tailed interval with the same fraction of (equally
+          weighted) samples below and above the interval region.
+
+    u : array, optional
+        Random values to use for reproducible sample compression.
+        Default: ``np.random.rand(len(weights))`` if non-unit weights used.
+    n_iter : int, default=12
+        Number of CDF samples to improve `mean` and `std` estimate.
+
+    Returns
+    -------
+    limit(s) : tuple or float
+        Returns the credibility interval boundari(es) and,
+        if requested, the associated covariance (based on ``n_iter`` samples).
+    """
+    if level >= 1:
+        raise ValueError('level must be <1, got {0:.2f}'.format(level))
+    if len(np.shape(samples)) != 1:
+        raise ValueError('Support only 1D arrays for samples')
+    if weights is not None and np.shape(samples) != np.shape(weights):
+        raise ValueError('Shape of samples and weights differs')
+
+    # Convert to numpy to unify indexing
+    samples = np.array(samples.copy())
+    if weights is None:
+        weights = np.ones(len(samples))
+    else:
+        weights = np.array(weights.copy())
+
+    # Convert samples to unit weight not the case
+    if not np.all(np.logical_or(weights == 0, weights == 1)):
+        # compress_weights with ncompress<=0 assures weights \in 0,1
+        # Note that this must be done, we cannot handle weights != 1
+        # see this discussion for details:
+        # https://github.com/williamjameshandley/anesthetic/pull/188#issuecomment-1274980982
+        weights = compress_weights(weights, ncompress=-1, u=u)
+        if verbose:
+            print("Compressing weights to", np.sum(weights),
+                  "unit weight samples.")
+        assert np.all(np.logical_or(weights == 0, weights == 1)), \
+               "Unexpected error in compress_weights, weights not binary"
+
+    indices = np.where(weights)[0]
+    x = samples[indices]
+
+    # Sample the confidence interval multiple times
+    # to get errorbars on confidence interval boundaries
+    ci_samples = []
+    for i in range(n_iter):
+        invCDF = sample_cdf(x, inverse=True)
+        if method == "hpd":
+            # Find smallest interval
+            def distance(Y, level=level):
+                return invCDF(Y+level)-invCDF(Y)
+            res = minimize_scalar(distance, bounds=(0, 1-level),
+                                  method="Bounded")
+            ci_samples.append(np.array([invCDF(res.x),
+                                        invCDF(res.x+level)]))
+        elif method == "ll":
+            # Get value from which we reach the desired level
+            ci_samples.append(invCDF(1-level))
+        elif method == "ul":
+            # Get value to which we reach the desired level
+            ci_samples.append(invCDF(level))
+        elif method == "et":
+            ci_samples.append(np.array([invCDF((1-level)/2),
+                                        invCDF((1+level)/2)]))
+        else:
+            raise ValueError("Method '{0:}' unknown".format(method))
+    ci_samples = np.array(ci_samples)
+    if np.shape(ci_samples) == (n_iter, ):
+        if verbose:
+            print(f"The {level:.0%} credibility interval is",
+                  "{0:.2g} +/- {1:.1g}".format(np.mean(ci_samples),
+                                               np.std(ci_samples, ddof=1)))
+        if return_covariance:
+            return np.mean(ci_samples), np.cov(ci_samples)
+        else:
+            return np.mean(ci_samples)
+    elif np.shape(ci_samples) == (n_iter, 2):
+        if verbose:
+            print(f"The {level:.0%} credibility interval is",
+                  "[{0:.2g} +/- {1:.1g}, {2:.2g} +/- {3:.1g}]".format(
+                    np.mean(ci_samples[:, 0]), np.std(ci_samples[:, 0],
+                                                      ddof=1),
+                    np.mean(ci_samples[:, 1]), np.std(ci_samples[:, 1],
+                                                      ddof=1)))
+        if return_covariance:
+            return np.mean(ci_samples, axis=0), \
+                   np.cov(ci_samples, rowvar=False)
+        else:
+            return np.mean(ci_samples, axis=0)
+    else:
+        raise ValueError('ci_samples in unregonized shape')
+
+
 def mirror_1d(d, xmin=None, xmax=None):
     """If necessary apply reflecting boundary conditions."""
     if xmin is not None and xmax is not None:

diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -2,14 +2,15 @@
 import numpy as np
 import pytest
 from scipy import special as sp
-from numpy.testing import assert_array_equal
+from numpy.testing import (assert_array_equal, assert_allclose)
 from anesthetic import read_chains
 from anesthetic.utils import (nest_level, compute_nlive, unique, is_int,
                               iso_probability_contours,
                               iso_probability_contours_from_samples,
                               logsumexp, sample_compression_1d,
                               triangular_sample_compression_2d,
-                              insertion_p_value, compress_weights)
+                              insertion_p_value, compress_weights,
+                              credibility_interval)
 
 
 def test_compress_weights():
@@ -172,3 +173,35 @@ def test_p_values_from_sample():
 
     ks_results = insertion_p_value(ns.insertion[nlive:-nlive], nlive, batch=1)
     assert ks_results['p-value'] > 0.05
+
+
+def test_credibility_interval():
+    np.random.seed(0)
+    from anesthetic import read_chains
+    normal_samples = np.random.normal(loc=2, scale=0.1, size=10000)
+    mean, cov = credibility_interval(normal_samples, level=0.68,
+                                     return_covariance=True)
+    assert_allclose(mean[0], 1.9, atol=0.01)
+    assert_allclose(mean[1], 2.1, atol=0.01)
+    assert_allclose(cov, [[7e-6, 6e-6], [6e-6, 8e-6]], atol=1e-1)
+
+    samples = read_chains('./tests/example_data/pc')
+    mean2, cov2 = credibility_interval(samples["x0"], level=0.68,
+                                       weights=samples.get_weights(),
+                                       method="hpd",
+                                       u=np.random.rand(len(samples)),
+                                       return_covariance=True)
+    assert_allclose(mean2[0], -0.1, atol=0.01)
+    assert_allclose(mean2[1], 0.1, atol=0.01)
+    assert_allclose(cov2, [[5e-5, 5e-5], [5e-5, 1e-4]], rtol=1e-1)
+    with pytest.raises(ValueError):
+        credibility_interval(samples.x0, level=1.1)
+
+    with pytest.raises(ValueError):
+        credibility_interval(samples[['x0', 'x1']])
+
+    with pytest.raises(ValueError):
+        credibility_interval(samples.x0, weights=[1, 2, 3])
+
+    with pytest.raises(ValueError):
+        credibility_interval(samples.x0, method='foo')