Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add ultranest import #313

Merged
merged 30 commits into from
Jul 19, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
a6286cd
add ultranest import
JohannesBuchner Jul 11, 2023
592eefe
add test and output data from ultranest and script that creates it; f…
JohannesBuchner Jul 11, 2023
4877bd7
typo
JohannesBuchner Jul 11, 2023
fcabe8e
minimize ultranest data included
JohannesBuchner Jul 11, 2023
caca2f2
move h5py import into function, to make it only run when needed
JohannesBuchner Jul 11, 2023
6283f88
version bump to 2.1.0
lukashergt Jul 11, 2023
bb1eebd
make ultranest part of gui tests
lukashergt Jul 11, 2023
b8cd8ab
fix `test_gui_params` to work for both `pc` and `un`
lukashergt Jul 11, 2023
adb1ebc
put `json` and `h5py` into optional dependencies
lukashergt Jul 11, 2023
8d02f60
remove `json` from optional dependencies as it is a built-in package
lukashergt Jul 11, 2023
070b75e
fix labelling of AxesDataFrame for the case where there are exact mat…
lukashergt Jul 11, 2023
7033eab
skip tests related to ultranest if `h5py` not in `sys.modules`
lukashergt Jul 11, 2023
441c1d7
remove trailing print statement
lukashergt Jul 11, 2023
639bb09
update docs for reading UltraNest samples
lukashergt Jul 11, 2023
ce41b63
rename `makeun.py` to `un_gen_data.py` such that it gets grouped with…
lukashergt Jul 11, 2023
58baf2c
add Johannes Buchner to list of contributors
lukashergt Jul 11, 2023
8b09789
revert commit 070b75e173698a2012563d038d2e66bb82b88774, which checked…
lukashergt Jul 19, 2023
8a060fd
pass `labels=self.get_labels_map()` rather than `labels=self.get_labe…
lukashergt Jul 19, 2023
63e264f
add test for axes labels when dropping labels or when only having par…
lukashergt Jul 19, 2023
c9891f9
correct `read_ultranest` docstring to refer to UltraNest files, not t…
lukashergt Jul 19, 2023
b894efb
add UltraNest to the list of supported file structures in `read_chains`
lukashergt Jul 19, 2023
f6c62cf
improve `read_cobaya`, `read_multinest` and `read_polychord` docstrings
lukashergt Jul 19, 2023
298d8d9
move `bin/gen_data.py` to `tests/example_data/gen_data.py` to have da…
lukashergt Jul 19, 2023
64e7349
bring `gen_data.py` to the anesthetic 2.0.0 century
lukashergt Jul 19, 2023
1acbf53
make `gen_data.py` flake8 conform
lukashergt Jul 19, 2023
71c8cf0
temporarily remove `un/info/results.json`
lukashergt Jul 19, 2023
0ab5210
add `un/info/results.json` back in, hopefully in the right format
lukashergt Jul 19, 2023
d6da757
add `mn` to the gui tests
lukashergt Jul 19, 2023
57b00e1
Merge branch 'master' into master
williamjameshandley Jul 19, 2023
76c982f
Added back in infuriating newline which github automerge fails to do …
williamjameshandley Jul 19, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion anesthetic/read/chain.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from anesthetic.read.getdist import read_getdist
from anesthetic.read.cobaya import read_cobaya
from anesthetic.read.multinest import read_multinest
from anesthetic.read.ultranest import read_ultranest


def read_chains(root, *args, **kwargs):
Expand Down Expand Up @@ -45,7 +46,11 @@ def read_chains(root, *args, **kwargs):
"anesthetic.html#anesthetic.samples.MCMCSamples.remove_burn_in"
)
errors = []
for read in [read_polychord, read_multinest, read_cobaya, read_getdist]:
readers = [
read_polychord, read_multinest, read_cobaya,
read_ultranest, read_getdist
]
for read in readers:
try:
samples = read(root, *args, **kwargs)
samples.root = root
Expand Down
30 changes: 30 additions & 0 deletions anesthetic/read/ultranest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""Read NestedSamples from ultranest results."""
import os
from anesthetic.samples import NestedSamples
import h5py
import json


def read_ultranest(root, *args, **kwargs):
"""Read <root>ev.dat and <root>phys_live.points in MultiNest format."""
lukashergt marked this conversation as resolved.
Show resolved Hide resolved
with open(os.path.join(root, 'info', 'results.json')) as infofile:
labels = json.load(infofile)['paramnames']
num_params = len(labels)

filepath = os.path.join(root, 'results', 'points.hdf5')
with h5py.File(filepath, 'r') as fileobj:
points = fileobj['points']
_, ncols = points.shape
x_dim = ncols - 3 - num_params
logL_birth = points[:, 0]
logL = points[:, 1]
# u_samples = points[:,3:x_dim]
samples = points[:, 3 + x_dim:3 + x_dim + num_params]

kwargs['label'] = kwargs.get('label', os.path.basename(root))
columns = kwargs.pop('columns', labels)
labels = kwargs.pop('labels', labels)
data = samples

return NestedSamples(data=data, logL=logL, logL_birth=logL_birth,
columns=columns, labels=labels, *args, **kwargs)
31 changes: 31 additions & 0 deletions tests/example_data/makeun.py
lukashergt marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import numpy as np
from ultranest import ReactiveNestedSampler

ndim = 4
sigma = np.logspace(-1, -6, ndim)
width = 1 - 5 * sigma
width[width < 1e-20] = 1e-20
centers = (np.sin(np.arange(ndim) / 2.) * width + 1.) / 2.


def loglike(theta):
"""compute log-likelihood."""
like = - 0.5 * (((theta - centers) / sigma)**2).sum(axis=1) \
- 0.5 * np.log(2 * np.pi * sigma**2).sum()
return like


def transform(x):
"""transform according to prior."""
return x * 20 - 10


paramnames = ['a', 'b', 'c', 'd']

sampler = ReactiveNestedSampler(
paramnames, loglike, transform=transform,
log_dir='un', resume=True, vectorized=True)

sampler.run(frac_remain=0.5, min_num_live_points=400)
sampler.print_results()
sampler.plot()
16,651 changes: 16,651 additions & 0 deletions tests/example_data/un/chains/equal_weighted_post.txt

Large diffs are not rendered by default.

16,651 changes: 16,651 additions & 0 deletions tests/example_data/un/chains/run.txt

Large diffs are not rendered by default.

16,651 changes: 16,651 additions & 0 deletions tests/example_data/un/chains/weighted_post.txt

Large diffs are not rendered by default.

16,651 changes: 16,651 additions & 0 deletions tests/example_data/un/chains/weighted_post_untransformed.txt

Large diffs are not rendered by default.

540 changes: 540 additions & 0 deletions tests/example_data/un/debug.log

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions tests/example_data/un/info/post_summary.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
"a_mean","a_stdev","a_median","a_errlo","a_errup","b_mean","b_stdev","b_median","b_errlo","b_errup","c_mean","c_stdev","c_median","c_errlo","c_errup","d_mean","d_stdev","d_median","d_errlo","d_errup"
5.025783329483370920e-01,9.929557150158441203e-02,5.066546971904539731e-01,4.016698354319299114e-01,6.012826682750720408e-01,7.370934893843076319e-01,2.131337783168813296e-03,7.370548529653415670e-01,7.350305173203235398e-01,7.393071000969335671e-01,9.206375311748793422e-01,4.603300006747625519e-05,9.206365431021019674e-01,9.205913816528905613e-01,9.206855255078583156e-01,9.987450408249887168e-01,1.018245828856401194e-06,9.987450545520015766e-01,9.987439938546689433e-01,9.987460211457328541e-01
77 changes: 77 additions & 0 deletions tests/example_data/un/info/results.json
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I understand correctly, the only thing we need from this is the paramnames entry, although it is nice that it has a maximum likelihood point. Presuming this is found by an optimisation procedure (like e.g. polychord's polishing settings.maximise=true gives) then this is could be useful in future iterations.

Copy link
Collaborator Author

@JohannesBuchner JohannesBuchner Jul 19, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the last live point discarded, but for the default frac_remain=0.01 this is good enough.

Copy link
Collaborator

@williamjameshandley williamjameshandley Jul 19, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Theoretically I disagree -- the rule of thumb is that logLmax ~ <logL>_P + d/2, and the width of the typical set in logL space is sqrt(d/2), so in even moderate dimensions the true likelihood peak lies some distance away from where nested sampling terminates, regardless of stopping criterion (see figure 2), but this isn't relevant to this PR!

Copy link
Collaborator

@lukashergt lukashergt Jul 19, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have been wondering if we could somehow make use of PolyChord's maximise=true output in anesthetic...

  • What happens when one appends the list of nested samples with the maximum likelihood point at the very end?
  • For safety we could do it with zero weight...?
  • Or is there a way of turning the maximum likelihood point into the final nested sampling point?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think in practice all of these would work fine for a run that had reached convergence. I would worry if you also started trying to use it at different temperatures.

As an API, a better solution would be for the maximum to be appended with a logL_birth=logL=logL_max, so it's officially 'beyond the last live point'.

We'd then have to write the volume calculation to ensure anesthetic set these to zero or nan weight by default, since there's no way to determine the volume if there's a gap.

I could imagine gaps occurring naturally in the case of importance weighting.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Issue raised in #317.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Theoretically I disagree -- the rule of thumb is that logLmax ~ <logL>_P + d/2, and the width of the typical set in logL space is sqrt(d/2), so in even moderate dimensions the true likelihood peak lies some distance away from where nested sampling terminates, regardless of stopping criterion (see figure 2), but this isn't relevant to this PR!

As an aside, I have been working through this analytically and experimentally this afternoon, and this statement only holds in higher dimensions.

Here is an analytic result for f=0.01, n=1000 for how close you get in loglikelihood to the maximum as a function of dimension:

plot

Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
{
"niter": 16650,
"logz": -12.414914594805953,
"logzerr": 0.7649516021328628,
"logz_bs": -12.378097297348035,
"logz_single": -12.414914594805953,
"logzerr_tail": 0.405716490463611,
"logzerr_bs": 0.6484944741256662,
"ess": 988.8766189106832,
"H": 38.639675527033575,
"Herr": 0.22373181701346842,
"posterior": {
"mean": [
0.5025783329483371,
0.7370934893843076,
0.9206375311748793,
0.9987450408249887
],
"stdev": [
0.09929557150158441,
0.0021313377831688133,
4.6033000067476255e-05,
1.0182458288564012e-06
],
"median": [
0.506654697190454,
0.7370548529653416,
0.920636543102102,
0.9987450545520016
],
"errlo": [
0.4016698354319299,
0.7350305173203235,
0.9205913816528906,
0.9987439938546689
],
"errup": [
0.601282668275072,
0.7393071000969336,
0.9206855255078583,
0.9987460211457329
],
"information_gain_bits": [
3.7847253159407837,
4.14647919764959,
4.14647919764959,
4.14647919764959
]
},
"maximum_likelihood": {
"logl": 28.526361154649237,
"point": [
0.4912994020358319,
0.7373000808358334,
0.9206344043582959,
0.9987452206617764
],
"point_untransformed": [
0.5245649701017916,
0.5368650040417917,
0.5460317202179148,
0.5499372610330888
]
},
"ncall": 39885,
"paramnames": [
"a",
"b",
"c",
"d"
],
"logzerr_single": 0.31080410038734035,
"insertion_order_MWW_test": {
"independent_iterations": Infinity,
"converged": true
}
}
lukashergt marked this conversation as resolved.
Show resolved Hide resolved
Binary file added tests/example_data/un/plots/corner.pdf
Binary file not shown.
Binary file added tests/example_data/un/plots/run.pdf
Binary file not shown.
Binary file added tests/example_data/un/plots/trace.pdf
Binary file not shown.
Binary file added tests/example_data/un/results/points.hdf5
Binary file not shown.
20 changes: 20 additions & 0 deletions tests/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from anesthetic.read.getdist import read_getdist
from anesthetic.read.cobaya import read_cobaya
from anesthetic.read.multinest import read_multinest
from anesthetic.read.ultranest import read_ultranest
import pandas._testing as tm
from anesthetic.read.hdf import HDFStore, read_hdf
try:
Expand Down Expand Up @@ -184,6 +185,25 @@ def test_read_multinest():
ns.plot_1d(['x0', 'x1', 'x2', 'x3'])


def test_read_ultranest():
np.random.seed(3)
ns = read_ultranest('./tests/example_data/un')
params = ['a', 'b', 'c', 'd', 'logL', 'logL_birth', 'nlive']
assert_array_equal(ns.drop_labels().columns, params)
labels = ['a',
'b',
'c',
'd',
r'$\ln\mathcal{L}$',
r'$\ln\mathcal{L}_\mathrm{birth}$',
r'$n_\mathrm{live}$']
assert_array_equal(ns.get_labels(), labels)

assert isinstance(ns, NestedSamples)
ns.plot_2d(['a', 'b', 'c', 'd'])
ns.plot_1d(['a', 'b', 'c', 'd'])


def test_read_polychord():
np.random.seed(3)
ns = read_polychord('./tests/example_data/pc')
Expand Down