Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Nested sampling paper updates #14

Merged
merged 6 commits into from
Nov 28, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions Paper/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
HighDimensionalSampling.aux
HighDimensionalSampling.bbl
HighDimensionalSampling.blg
HighDimensionalSampling.fdb_latexmk
HighDimensionalSampling.fls
HighDimensionalSampling.log
HighDimensionalSampling.pdf
HighDimensionalSampling.toc
HighDimensionalSampling.out
185 changes: 185 additions & 0 deletions Paper/HighDimensionalSampling.bib
Original file line number Diff line number Diff line change
Expand Up @@ -3232,3 +3232,188 @@ @article{Beringer:1900zz
reportNumber = "SLAC-REPRINT-2014-001",
SLACcitation = "%%CITATION = PHRVA,D86,010001;%%",
}

@article{Skilling:2006gxv,
author = "Skilling, John",
title = "{Nested sampling for general Bayesian computation}",
journal = "Bayesian Analysis",
volume = "1",
year = "2006",
number = "4",
pages = "833-859",
doi = "10.1214/06-BA127",
SLACcitation = "%%CITATION = INSPIRE-1670681;%%"
}

@article{Handley:2015fda,
author = "Handley, W. J. and Hobson, M. P. and Lasenby, A. N.",
title = "{PolyChord: nested sampling for cosmology}",
journal = "Mon. Not. Roy. Astron. Soc.",
volume = "450",
year = "2015",
number = "1",
pages = "L61-L65",
doi = "10.1093/mnrasl/slv047",
eprint = "1502.01856",
archivePrefix = "arXiv",
primaryClass = "astro-ph.CO",
SLACcitation = "%%CITATION = ARXIV:1502.01856;%%"
}

@ARTICLE{2015MNRAS.453.4384H,
author = {{Handley}, W.~J. and {Hobson}, M.~P. and {Lasenby}, A.~N.},
title = "{POLYCHORD: next-generation nested sampling}",
journal = "Mon. Not. Roy. Astron. Soc.",
keywords = {methods: data analysis, methods: statistical, Astrophysics - Instrumentation and Methods for Astrophysics},
year = "2015",
month = "Nov",
volume = {453},
number = {4},
pages = {4384-4398},
doi = {10.1093/mnras/stv1911},
archivePrefix = {arXiv},
eprint = {1506.00171},
primaryClass = {astro-ph.IM},
adsurl = {https://ui.adsabs.harvard.edu/abs/2015MNRAS.453.4384H},
adsnote = {Provided by the SAO/NASA Astrophysics Data System}
}

@article{Graff:2011gv,
author = "Graff, Philip and Feroz, Farhan and Hobson, Michael P.
and Lasenby, Anthony",
title = "{BAMBI: blind accelerated multimodal Bayesian inference}",
journal = "Mon. Not. Roy. Astron. Soc.",
volume = "421",
year = "2012",
pages = "169-180",
doi = "10.1111/j.1365-2966.2011.20288.x",
eprint = "1110.2997",
archivePrefix = "arXiv",
primaryClass = "astro-ph.IM",
SLACcitation = "%%CITATION = ARXIV:1110.2997;%%"
}

@article{Feroz:2007kg,
author = "Feroz, Farhan and Hobson, M. P.",
title = "{Multimodal nested sampling: an efficient and robust
alternative to MCMC methods for astronomical data
analysis}",
journal = "Mon. Not. Roy. Astron. Soc.",
volume = "384",
year = "2008",
pages = "449",
doi = "10.1111/j.1365-2966.2007.12353.x",
eprint = "0704.3704",
archivePrefix = "arXiv",
primaryClass = "astro-ph",
SLACcitation = "%%CITATION = ARXIV:0704.3704;%%"
}
@article{Feroz:2008xx,
author = "Feroz, F. and Hobson, M. P. and Bridges, M.",
title = "{MultiNest: an efficient and robust Bayesian inference
tool for cosmology and particle physics}",
journal = "Mon. Not. Roy. Astron. Soc.",
volume = "398",
year = "2009",
pages = "1601-1614",
doi = "10.1111/j.1365-2966.2009.14548.x",
eprint = "0809.3437",
archivePrefix = "arXiv",
primaryClass = "astro-ph",
SLACcitation = "%%CITATION = ARXIV:0809.3437;%%"
}
@article{Feroz:2013hea,
author = "Feroz, F. and Hobson, M. P. and Cameron, E. and Pettitt,
A. N.",
title = "{Importance Nested Sampling and the MultiNest Algorithm}",
year = "2013",
eprint = "1306.2144",
archivePrefix = "arXiv",
primaryClass = "astro-ph.IM",
SLACcitation = "%%CITATION = ARXIV:1306.2144;%%"
}
@article{Graff:2013cla,
author = "Graff, Philip and Feroz, Farhan and Hobson, Michael P.
and Lasenby, Anthony N.",
title = "{SKYNET: an efficient and robust neural network training
tool for machine learning in astronomy}",
journal = "Mon. Not. Roy. Astron. Soc.",
volume = "441",
year = "2014",
number = "2",
pages = "1741-1759",
doi = "10.1093/mnras/stu642",
eprint = "1309.0790",
archivePrefix = "arXiv",
primaryClass = "astro-ph.IM",
SLACcitation = "%%CITATION = ARXIV:1309.0790;%%"
}

@misc{pybambi,
author = {{Handley}, W.~J. and {Scott}, P. and {White}, M.~J},
title = {pyBAMBI},
month = Feb,
year = 2019,
doi = {10.5281/zenodo.2500877},
url = {https://doi.org/10.5281/zenodo.2500877}
}

@article{aeons,
author = {{Handley}, Will, and others},
title = {AEONS: Approximate end of nested sampling},
journal = {In preparation},
year = {2019}
}

@article{neal2003,
author = "Neal, Radford M.",
doi = "10.1214/aos/1056562461",
fjournal = "The Annals of Statistics",
journal = "Ann. Statist.",
month = "06",
number = "3",
pages = "705--767",
publisher = "The Institute of Mathematical Statistics",
title = "Slice sampling",
url = "https://doi.org/10.1214/aos/1056562461",
volume = "31",
year = "2003"
}
@ARTICLE{Higson:2019,
author = {{Higson}, Edward and {Handley}, Will and {Hobson}, Mike and
{Lasenby}, Anthony},
title = "{Dynamic nested sampling: an improved algorithm for parameter estimation and evidence calculation}",
journal = {Statistics and Computing},
keywords = {Statistics - Computation, Astrophysics - Instrumentation and Methods for Astrophysics, Physics - Data Analysis, Statistics and Probability, Statistics - Methodology, Statistics - Computation, Astrophysics - Instrumentation and Methods for Astrophysics, Physics - Data Analysis, Statistics and Probability, Statistics - Methodology},
year = "2019",
month = "Sep",
volume = {29},
number = {5},
pages = {891-913},
doi = {10.1007/s11222-018-9844-0},
archivePrefix = {arXiv},
eprint = {1704.03459},
primaryClass = {stat.CO},
adsurl = {https://ui.adsabs.harvard.edu/abs/2019S&C....29..891H},
adsnote = {Provided by the SAO/NASA Astrophysics Data System}
}


@Article{Brewer2011,
author="Brewer, Brendon J.
and P{\'a}rtay, Livia B.
and Cs{\'a}nyi, G{\'a}bor",
title="Diffusive nested sampling",
journal="Statistics and Computing",
year="2011",
month="Oct",
day="01",
volume="21",
number="4",
pages="649--656",
abstract="We introduce a general Monte Carlo method based on Nested Sampling (NS), for sampling complex probability distributions and estimating the normalising constant. The method uses one or more particles, which explore a mixture of nested probability distributions, each successive distribution occupying ∼e−1 times the enclosed prior mass of the previous distribution. While NS technically requires independent generation of particles, Markov Chain Monte Carlo (MCMC) exploration fits naturally into this technique. We illustrate the new method on a test problem and find that it can achieve four times the accuracy of classic MCMC-based Nested Sampling, for the same computational effort; equivalent to a factor of 16 speedup. An additional benefit is that more samples and a more accurate evidence value can be obtained simply by continuing the run for longer, as in standard MCMC.",
issn="1573-1375",
doi="10.1007/s11222-010-9198-8",
url="https://doi.org/10.1007/s11222-010-9198-8"
}

37 changes: 20 additions & 17 deletions Paper/HighDimensionalSampling.tex
Original file line number Diff line number Diff line change
@@ -1,28 +1,15 @@
\documentclass[12pt]{JHEP3}
\documentclass[11pt]{article}
\usepackage{jheppub}
\bibliographystyle{JHEP}
\usepackage{multicol}
\usepackage[dvips]{epsfig,graphics}
\usepackage{graphicx}
\usepackage{cite}

%% %%%%%%%%% Graphics macros %%%%%%%%%%%%%%
\newlength{\wth}
\setlength{\wth}{2.6in}

\newcommand{\twographst}[2]{%
\unitlength=1.1in
\begin{picture}(5.8,2.6)(-0.15,0)
\put(0,0){\epsfig{file=#1, width=\wth}}
\put(2.7,0){\epsfig{file=#2, width=\wth}}
\end{picture}
}

\title{A comparison of sampling algorithms for particle astrophysics applications}
\author{The DarkMachines High Dimensional Sampling group: ...}
\abstract{To be abstracted.}
\keywords{Sampling, Dark Matter}
\begin{document}
\section{Introduction}
\maketitle

The core of the scientific method in the physical sciences is the extraction of the parameters of theories that can explain a variety of observations. In the modern era, the increasing size of observed datasets, combined with the increasing complexity of physical theories, has introduced a substantial computational complexity in the elucidation of correct physical theories. This is exacerbated by the fact that we have passed the era where one experiment is likely to unambiguously determine the next theory of particle physics or cosmology, and we must instead combine clues from many different branches of observation. At the same time, one can argue that substantial increases in computing power have made scientists more ambitious in the scope of both the observations and the theoretical calculations that can be used to describe the physical world.

It is often true that one can define a likelihood for a set of model parameters by, for example, simulating the results of a variety of observations, and using standard statistics to define the likelihood terms that result from each individual experiment. These can then be combined into a composite likelihood, and one can use this along with a Bayesian or frequentist statistical framework to define and obtain the ``best'' set of parameters of a given model, and the associated uncertainties. One can also use the same approach to perform model comparison. The difficulty is that the likelihood function is rarely known analytically, and can only be obtained by sampling. The simplest approach of naive random sampling is deficient for two reasons. Firstly, it is known to concentrate samples at the boundary of the space once the number of parameters is moderately large, leading to biased inferences. Secondly, it is highly inefficient in most physical examples, in which the high likelihood regions of the parameter space usually occupy a very small region of the total multidimensional volume. The past decades have thus seen the development of a series of novel sampling procedures, many of which have been utilised in particle astrophyics applications {\bf (MJW: Need a citation strategy here. I suggest a very generous citation of both the original techniques and particle astrophysics global fits that have used them)}.
Expand Down Expand Up @@ -56,8 +43,24 @@ \subsection{Active learning}
\emph{Author: Bob Stienen}

\subsection{Nested sampling and pyBAMBI}
%\printinunitsof{in}\prntlen{\textwidth}

\emph{Author: Will Handley}
\begin{figure}
\centerline{%
\includegraphics{nested_sampling.pdf}
}
\caption{Nested sampling. Live points are in orange, dead points in blue. Posterior peaks are marked with a cross.\label{fig:nested_sampling}}
\end{figure}

Nested sampling~\cite{Skilling:2006gxv} is an alternative sampling strategy which simultaneously computes the Bayesian evidence whilst generating samples from the posterior distribution.
It achieves this by initially generating $n_\mathrm{live}\sim10^2-10^4$ samples from the prior. These points are then evolved these by deleting the points with the lowest likelihood and repopulating with new points drawn from the prior but at higher likelihood. The live points steadily evolve in an exponentially decreasing volume of the prior, tightening around the peak(s) of the likelihood (Figure~\ref{fig:nested_sampling}). The set of discarded ``dead'' points can then be used as posterior samples when appropriately weighted, and the same weighting may be used to compute the Bayesian evidence. The interplay between prior sampling with the likelihood entering via a constrained bound is what allows nested sampling to compute evidences. Furthermore, the set of discarded points may be weighted to produce posterior samples at any thermodynamic temperature, since nested sampling is an athermal algorithm~\cite{aeons}.

Historically the procedure by which nested sampling repopulates the set of live points is the main distinguishing feature between nested sampling algorithms. For example,
\texttt{MultiNest}~\cite{Feroz:2007kg,Feroz:2008xx,Feroz:2013hea} achieves this by constructing a hull of intersecting ellipsoids and then rejection sampling within them, whilst \texttt{PolyChord}~\cite{2015MNRAS.453.4384H,Handley:2015fda} utilises a slice-sampling~\cite{neal2003} approach in order to avoid the exponential inefficiency scaling with dimensionality associated with rejection sampling. Adjustments to the meta-algorithm of nested sampling have been suggested and implemented, for example dynamic~\cite{Higson:2019} and diffusive~\cite{Brewer2011} nested sampling.

Another extension to nested sampling is the \texttt{BAMBI}~\cite{Graff:2011gv} algorithm (blind accelerated multimodal Bayesian inference). \texttt{BAMBI} was designed as a means to reduce the time spent on likelihood calls by training a neural network to act as a proxy for the true likelihood. Periodically a neural network is trained and tested on the current set of live and dead points. When the neural network forms a sufficiently accurate proxy, it is swapped with the true (substantially slower) likelihood. At the end of this process one is left with a set of trained neural networks that can also be used for further analyses. \texttt{BAMBI}~\cite{Graff:2011gv} was originally implemented in an early version of \texttt{MultiNest}, and has since been updated for later versions and other nested sampling software in \texttt{pyBAMBI}~\cite{pybambi}. In this update, the interface is in pure python, with the additional functionality implemented via dumper function interfaces, with the neural network code \texttt{TensorFlow} replacing \texttt{SkyNet}~\cite{Graff:2013cla}.


\subsection{Sequential Neural Likelihoods}
\emph{Author: Barry Dillon}
Expand Down
Binary file added Paper/nested_sampling.pdf
Binary file not shown.
80 changes: 80 additions & 0 deletions Paper/nested_sampling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
from anesthetic import NestedSamples
import matplotlib.pyplot as plt
from scipy.special import logsumexp
import numpy
import tqdm

def loglikelihood(x):
"""Example non-trivial loglikelihood

- Constrained zero-centered correlated parameters x0 and x1,
- half-constrained x2 (exponential).
- unconstrained x3 between 0 and 1
- x4 is a slanted top-hat distribution between 2 and 4

"""
x0, x1 = x[:]
sigma0, sigma1 = 0.1, 0.1
mu0, mu1 = 0.7, 0.7
eps = 0.9
x0 = (x0-mu0)/sigma0
x1 = (x1-mu1)/sigma1

logl_A = -numpy.log(2*numpy.pi*sigma0*sigma1*(1-eps**2)**0.5) - (x0**2 - 2*eps*x0*x1 + x1**2)/(1-eps**2)/2

x0, x1 = x[:]
sigma0, sigma1 = 0.1, 0.1
mu0, mu1 = 0.3, 0.3
eps = -0.9
x0 = (x0-mu0)/sigma0
x1 = (x1-mu1)/sigma1

logl_B = -numpy.log(2*numpy.pi*sigma0*sigma1*(1-eps**2)**0.5) - (x0**2 - 2*eps*x0*x1 + x1**2)/(1-eps**2)/2

return logsumexp([logl_B,logl_A]) - numpy.log(2)


def ns_sim(ndims=2, nlive=100, ndead=700):
"""Brute force Nested Sampling run"""
numpy.random.seed(0)
low=(0, 0)
high=(1,1)
live_points = numpy.random.uniform(low=low, high=high, size=(nlive, ndims))
live_likes = numpy.array([loglikelihood(x) for x in live_points])
live_birth_likes = numpy.ones(nlive) * -numpy.inf

dead_points = []
dead_likes = []
birth_likes = []
for _ in tqdm.tqdm(range(ndead)):
i = numpy.argmin(live_likes)
Lmin = live_likes[i]
dead_points.append(live_points[i].copy())
dead_likes.append(live_likes[i])
birth_likes.append(live_birth_likes[i])
live_birth_likes[i] = Lmin
while live_likes[i] <= Lmin:
live_points[i, :] = numpy.random.uniform(low=low, high=high, size=ndims)
live_likes[i] = loglikelihood(live_points[i])
return dead_points, dead_likes, birth_likes, live_points, live_likes, live_birth_likes

data, logL, logL_birth, live, live_logL, live_logL_birth = ns_sim()

ns = NestedSamples(data=data, logL=logL, logL_birth=logL_birth)

fig, axes = plt.subplots(1,4, sharex=True, sharey=True, figsize=(5.95,5.95/3.8))

for i, ax in enumerate(axes):
j = i*100
points = ns.iloc[:j]
ax.plot(points[0], points[1], '.', ms=4, label='dead points')
points = ns.live_points(ns.logL[j])
ax.plot(points[0], points[1], '.', ms=4, label='live points')
ax.plot(0.7, 0.7, 'k+')
ax.plot(0.3, 0.3, 'k+')

ax.xaxis.set_ticks([])
ax.yaxis.set_ticks([])

fig.tight_layout()
fig.savefig('nested_sampling.pdf')