Skip to content

Commit

Permalink
make tension docstring more readable
Browse files Browse the repository at this point in the history
  • Loading branch information
lukashergt committed Sep 27, 2024
1 parent 9dae22f commit 8c70f2f
Showing 1 changed file with 36 additions and 42 deletions.
78 changes: 36 additions & 42 deletions anesthetic/tension.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,90 +4,84 @@


def stats(A, B, AB, nsamples=None, beta=None): # noqa: D301
"""Compute tension statistics between two samples.
r"""Compute tension statistics between two samples.
Using nested sampling we can compute:
- ``logR``: Logarithm of R statistic
- ``logR``: R statistic for dataset consistency
.. math::
\\log\\mathcal{R} = \\log\\mathcal{Z}_\\mathrm{AB}
- \\log\\mathcal{Z}_\\mathrm{A}
- \\log\\mathcal{Z}_\\mathrm{B}
\log R = \log Z_{AB} - \log Z_{A} - \log Z_{B}
- ``logI``: Logarithm of information ratio
- ``logI``: information ratio
.. math::
\\log\\mathcal{I} = \\mathcal{D}_\\mathrm{KL}^A
+ \\mathcal{D}_\\mathrm{KL}^B
- \\mathcal{D}_\\mathrm{KL}^{AB}
\log I = D_{KL}^{A} + D_{KL}^{B} - D_{KL}^{AB}
- ``logS``: Logarithm of suspiciousness
- ``logS``: suspiciousness
.. math::
\\log\\mathcal{S} = \\log\\mathcal{L}_\\mathrm{AB}
- \\log\\mathcal{L}_\\mathrm{A}
- \\log\\mathcal{L}_\\mathrm{B}
\log S = \log L_{AB} - \log L_{A} - \log L_{B}
- ``d_G``: Gaussian model dimensionality
(or posterior variance of the log-likelihood)
- ``d_G``: Gaussian model dimensionality of shared constrained parameters
.. math::
d_\\mathrm{G}/2 = \\mathrm{var}(\\log\\mathcal{L})_\\mathcal{P}
d = d_{A} + d_{B} - d_{AB}
- ``p``: p-value for the tension between two samples
.. math::
p = \\int_{d_\\mathrm{G} - 2 \\log\\mathcal{S}}^{\\infty} \\chi^2 (x)dx
p = \int_{d-2\log{S}}^{\infty} \chi^2_d(x) dx
Parameters
----------
A : :class:`anesthetic.samples.NestedSamples`
Nested Samples from a sampling run using only dataset A.
NestedSamples object from a sampling run using only dataset A.
Alternatively, you can pass the precomputed stats object returned from
:meth:`anesthetic.samples.NestedSamples.stats`.
B : :class:`anesthetic.samples.NestedSamples`
Nested Samples from a sampling run using only dataset B.
NestedSamples object from a sampling run using only dataset B.
Alternatively, you can pass the precomputed stats object returned from
:meth:`anesthetic.samples.NestedSamples.stats`.
AB : :class:`anesthetic.samples.NestedSamples`
Nested Samples from a sampling run using datasets A and B jointly.
NestedSamples object from a sampling run using both datasets A and B
jointly.
nsamples : int, optional
- If nsamples is not supplied, calculate mean value
- If nsamples is integer, draw nsamples from the distribution of
values inferred by nested sampling
beta : float, array-like, optional
inverse temperature(s) beta=1/kT. Default 1
beta : float, array-like, default=1
Inverse temperature(s) beta=1/kT.
Returns
-------
samples_stats : :class:`anesthetic.samples.Samples`
DataFrame containing the following tension statistics:
logR, logI, logS, d_G, p
samples : :class:`anesthetic.samples.Samples`
DataFrame containing the following tension statistics in columns:
['logR', 'logI', 'logS', 'd_G', 'p']
"""
statsA = A.stats(nsamples=nsamples, beta=beta)
statsB = B.stats(nsamples=nsamples, beta=beta)
statsAB = AB.stats(nsamples=nsamples, beta=beta)

samples_stats = Samples(index=statsA.index)
samples = Samples(index=statsA.index)

logR = statsAB.logZ - statsA.logZ - statsB.logZ
samples_stats['logR'] = logR
samples_stats.set_label('logR', r'$\ln\mathcal{R}$')
samples['logR'] = statsAB['logZ'] - statsA['logZ'] - statsB['logZ']
samples.set_label('logR', r'$\ln\mathcal{R}$')

logI = statsA.D_KL + statsB.D_KL - statsAB.D_KL
samples_stats['logI'] = logI
samples_stats.set_label('logI', r'$\ln\mathcal{I}$')
samples['logI'] = statsA['D_KL'] + statsB['D_KL'] - statsAB['D_KL']
samples.set_label('logI', r'$\ln\mathcal{I}$')

logS = statsAB.logL_P - statsA.logL_P - statsB.logL_P
samples_stats['logS'] = logS
samples_stats.set_label('logS', r'$\ln\mathcal{S}$')
samples['logS'] = statsAB['logL_P'] - statsA['logL_P'] - statsB['logL_P']
samples.set_label('logS', r'$\ln\mathcal{S}$')

d_G = statsA.d_G + statsB.d_G - statsAB.d_G
samples_stats['d_G'] = d_G
samples_stats.set_label('d_G', r'$d_\mathrm{G}$')
samples['d_G'] = statsA['d_G'] + statsB['d_G'] - statsAB['d_G']
samples.set_label('d_G', r'$d_\mathrm{G}$')

p = chi2.sf(d_G-2*logS, d_G)
samples_stats['p'] = p
samples_stats.set_label('p', r'$p$')
return samples_stats
p = chi2.sf(samples['d_G'] - 2 * samples['logS'], df=samples['d_G'])
samples['p'] = p
samples.set_label('p', '$p$')
return samples

0 comments on commit 8c70f2f

Please sign in to comment.