make tension docstring more readable

handley-lab · Sep 27, 2024 · 8c70f2f · 8c70f2f
1 parent 9dae22f
commit 8c70f2f
Showing 1 changed file with 36 additions and 42 deletions.
diff --git a/anesthetic/tension.py b/anesthetic/tension.py
@@ -4,90 +4,84 @@
 
 
 def stats(A, B, AB, nsamples=None, beta=None):  # noqa: D301
-    """Compute tension statistics between two samples.
+    r"""Compute tension statistics between two samples.
 
     Using nested sampling we can compute:
 
-    - ``logR``: Logarithm of R statistic
+    - ``logR``: R statistic for dataset consistency
 
       .. math::
-        \\log\\mathcal{R} = \\log\\mathcal{Z}_\\mathrm{AB}
-                            - \\log\\mathcal{Z}_\\mathrm{A}
-                            - \\log\\mathcal{Z}_\\mathrm{B}
+        \log R = \log Z_{AB} - \log Z_{A} - \log Z_{B}
 
-    - ``logI``: Logarithm of information ratio
+    - ``logI``: information ratio
 
       .. math::
-        \\log\\mathcal{I} = \\mathcal{D}_\\mathrm{KL}^A
-                            + \\mathcal{D}_\\mathrm{KL}^B
-                            - \\mathcal{D}_\\mathrm{KL}^{AB}
+        \log I = D_{KL}^{A} + D_{KL}^{B} - D_{KL}^{AB}
 
-    - ``logS``: Logarithm of suspiciousness
+    - ``logS``: suspiciousness
 
       .. math::
-        \\log\\mathcal{S} = \\log\\mathcal{L}_\\mathrm{AB}
-                            - \\log\\mathcal{L}_\\mathrm{A}
-                            - \\log\\mathcal{L}_\\mathrm{B}
+        \log S = \log L_{AB} - \log L_{A} - \log L_{B}
 
-    - ``d_G``: Gaussian model dimensionality
-      (or posterior variance of the log-likelihood)
+    - ``d_G``: Gaussian model dimensionality of shared constrained parameters
 
       .. math::
-        d_\\mathrm{G}/2 = \\mathrm{var}(\\log\\mathcal{L})_\\mathcal{P}
+        d = d_{A} + d_{B} - d_{AB}
 
     - ``p``: p-value for the tension between two samples
 
       .. math::
-        p = \\int_{d_\\mathrm{G} - 2 \\log\\mathcal{S}}^{\\infty} \\chi^2 (x)dx
+        p = \int_{d-2\log{S}}^{\infty} \chi^2_d(x) dx
 
     Parameters
     ----------
     A : :class:`anesthetic.samples.NestedSamples`
-        Nested Samples from a sampling run using only dataset A.
+        NestedSamples object from a sampling run using only dataset A.
+        Alternatively, you can pass the precomputed stats object returned from
+        :meth:`anesthetic.samples.NestedSamples.stats`.
 
     B : :class:`anesthetic.samples.NestedSamples`
-        Nested Samples from a sampling run using only dataset B.
+        NestedSamples object from a sampling run using only dataset B.
+        Alternatively, you can pass the precomputed stats object returned from
+        :meth:`anesthetic.samples.NestedSamples.stats`.
 
     AB : :class:`anesthetic.samples.NestedSamples`
-        Nested Samples from a sampling run using datasets A and B jointly.
+        NestedSamples object from a sampling run using both datasets A and B
+        jointly.
 
     nsamples : int, optional
         - If nsamples is not supplied, calculate mean value
         - If nsamples is integer, draw nsamples from the distribution of
           values inferred by nested sampling
 
-    beta : float, array-like, optional
-        inverse temperature(s) beta=1/kT. Default 1
+    beta : float, array-like, default=1
+        Inverse temperature(s) beta=1/kT.
 
     Returns
     -------
-    samples_stats : :class:`anesthetic.samples.Samples`
-        DataFrame containing the following tension statistics:
-        logR, logI, logS, d_G, p
+    samples : :class:`anesthetic.samples.Samples`
+        DataFrame containing the following tension statistics in columns:
+        ['logR', 'logI', 'logS', 'd_G', 'p']
     """
     statsA = A.stats(nsamples=nsamples, beta=beta)
     statsB = B.stats(nsamples=nsamples, beta=beta)
     statsAB = AB.stats(nsamples=nsamples, beta=beta)
 
-    samples_stats = Samples(index=statsA.index)
+    samples = Samples(index=statsA.index)
 
-    logR = statsAB.logZ - statsA.logZ - statsB.logZ
-    samples_stats['logR'] = logR
-    samples_stats.set_label('logR', r'$\ln\mathcal{R}$')
+    samples['logR'] = statsAB['logZ'] - statsA['logZ'] - statsB['logZ']
+    samples.set_label('logR', r'$\ln\mathcal{R}$')
 
-    logI = statsA.D_KL + statsB.D_KL - statsAB.D_KL
-    samples_stats['logI'] = logI
-    samples_stats.set_label('logI', r'$\ln\mathcal{I}$')
+    samples['logI'] = statsA['D_KL'] + statsB['D_KL'] - statsAB['D_KL']
+    samples.set_label('logI', r'$\ln\mathcal{I}$')
 
-    logS = statsAB.logL_P - statsA.logL_P - statsB.logL_P
-    samples_stats['logS'] = logS
-    samples_stats.set_label('logS', r'$\ln\mathcal{S}$')
+    samples['logS'] = statsAB['logL_P'] - statsA['logL_P'] - statsB['logL_P']
+    samples.set_label('logS', r'$\ln\mathcal{S}$')
 
-    d_G = statsA.d_G + statsB.d_G - statsAB.d_G
-    samples_stats['d_G'] = d_G
-    samples_stats.set_label('d_G', r'$d_\mathrm{G}$')
+    samples['d_G'] = statsA['d_G'] + statsB['d_G'] - statsAB['d_G']
+    samples.set_label('d_G', r'$d_\mathrm{G}$')
 
-    p = chi2.sf(d_G-2*logS, d_G)
-    samples_stats['p'] = p
-    samples_stats.set_label('p', r'$p$')
-    return samples_stats
+    p = chi2.sf(samples['d_G'] - 2 * samples['logS'], df=samples['d_G'])
+    samples['p'] = p
+    samples.set_label('p', '$p$')
+    return samples