Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reading from csv files #364

Merged
merged 14 commits into from
Mar 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
anesthetic: nested sampling post-processing
===========================================
:Authors: Will Handley and Lukas Hergt
:Version: 2.7.3
:Version: 2.8.0
:Homepage: https://github.com/handley-lab/anesthetic
:Documentation: http://anesthetic.readthedocs.io/

Expand Down
1 change: 1 addition & 0 deletions anesthetic/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,4 @@ def wrapper(backend=None):

read_hdf = anesthetic.read.hdf.read_hdf
read_chains = anesthetic.read.chain.read_chains
read_csv = anesthetic.read.csv.read_csv
2 changes: 1 addition & 1 deletion anesthetic/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '2.7.3'
__version__ = '2.8.0'
20 changes: 20 additions & 0 deletions anesthetic/labelled_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,26 @@
import numpy as np
from functools import cmp_to_key
from pandas.errors import IndexingError
import pandas as pd


def read_csv(filename, *args, **kwargs):
"""Read a CSV file into a ``LabelledDataFrame``."""
df = pd.read_csv(filename, index_col=[0, 1], header=[0, 1],
*args, **kwargs)
ldf = LabelledDataFrame(df)
if ldf.islabelled(0) and ldf.islabelled(1):
return ldf
df = pd.read_csv(filename, index_col=[0, 1], *args, **kwargs)
ldf = LabelledDataFrame(df)
if ldf.islabelled(0):
return ldf
df = pd.read_csv(filename, index_col=0, header=[0, 1], *args, **kwargs)
ldf = LabelledDataFrame(df)
if ldf.islabelled(1):
return ldf
df = pd.read_csv(filename, index_col=0, *args, **kwargs)
return LabelledDataFrame(df)


def ac(funcs, *args):
Expand Down
10 changes: 6 additions & 4 deletions anesthetic/read/chain.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from anesthetic.read.multinest import read_multinest
from anesthetic.read.ultranest import read_ultranest
from anesthetic.read.nestedfit import read_nestedfit
from anesthetic.read.csv import read_csv


def read_chains(root, *args, **kwargs):
Expand All @@ -18,8 +19,8 @@ def read_chains(root, *args, **kwargs):
* `Nested_fit <https://github.com/martinit18/Nested_Fit>`_,
* `CosmoMC <https://github.com/cmbant/CosmoMC>`_,
* `Cobaya <https://github.com/CobayaSampler/cobaya>`_,
* or anything `GetDist <https://github.com/cmbant/getdist>`_
compatible.
* anything `GetDist <https://github.com/cmbant/getdist>`_ compatible,
* files produced using ``DataFrame.to_csv()`` from anesthetic.

Note that in order to optimally read chains from Cobaya you need to have
`GetDist <https://getdist.readthedocs.io/en/latest/>`__ installed.
Expand All @@ -40,6 +41,7 @@ def read_chains(root, *args, **kwargs):

"""
root = str(root)
# TODO: remove this in version >= 2.1
if 'burn_in' in kwargs:
raise KeyError(
"This is anesthetic 1.0 syntax. The `burn_in` keyword is no "
Expand All @@ -51,8 +53,8 @@ def read_chains(root, *args, **kwargs):
)
errors = []
readers = [
read_polychord, read_multinest, read_cobaya,
read_ultranest, read_nestedfit, read_getdist
read_polychord, read_multinest, read_cobaya, read_ultranest,
read_nestedfit, read_getdist, read_csv
]
for read in readers:
try:
Expand Down
15 changes: 15 additions & 0 deletions anesthetic/read/csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
"""Read and write CSV files for anesthetic."""
from anesthetic.weighted_labelled_pandas import read_csv as wl_read_csv
from anesthetic.samples import MCMCSamples, NestedSamples
from pathlib import Path


def read_csv(filename, *args, **kwargs):
"""Read a CSV file into a :class:`Samples` object."""
filename = Path(filename)
kwargs['label'] = kwargs.get('label', filename.stem)
wldf = wl_read_csv(filename.with_suffix('.csv'))
if 'nlive' in wldf.columns:
return NestedSamples(wldf, *args, **kwargs)
else:
return MCMCSamples(wldf, *args, **kwargs)
79 changes: 1 addition & 78 deletions anesthetic/samples.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,89 +14,12 @@
from anesthetic.utils import (compute_nlive, compute_insertion_indexes,
is_int, logsumexp)
from anesthetic.gui.plot import RunPlotter
from anesthetic.weighted_pandas import WeightedDataFrame, WeightedSeries
from anesthetic.labelled_pandas import LabelledDataFrame, LabelledSeries
from anesthetic.weighted_labelled_pandas import WeightedLabelledDataFrame
from anesthetic.plot import (make_1d_axes, make_2d_axes,
AxesSeries, AxesDataFrame)
from anesthetic.utils import adjust_docstrings


class WeightedLabelledDataFrame(WeightedDataFrame, LabelledDataFrame):
""":class:`pandas.DataFrame` with weights and labels."""

_metadata = WeightedDataFrame._metadata + LabelledDataFrame._metadata

def __init__(self, *args, **kwargs):
labels = kwargs.pop('labels', None)
if not hasattr(self, '_labels'):
self._labels = ('weights', 'labels')
super().__init__(*args, **kwargs)
if labels is not None:
if isinstance(labels, dict):
labels = [labels.get(p, '') for p in self]
self.set_labels(labels, inplace=True)

def islabelled(self, axis=1):
"""Search for existence of labels."""
return super().islabelled(axis=axis)

def get_labels(self, axis=1):
"""Retrieve labels from an axis."""
return super().get_labels(axis=axis)

def get_labels_map(self, axis=1, fill=True):
"""Retrieve mapping from paramnames to labels from an axis."""
return super().get_labels_map(axis=axis, fill=fill)

def get_label(self, param, axis=1):
"""Retrieve mapping from paramnames to labels from an axis."""
return super().get_label(param, axis=axis)

def set_label(self, param, value, axis=1):
"""Set a specific label to a specific value on an axis."""
return super().set_label(param, value, axis=axis, inplace=True)

def drop_labels(self, axis=1):
"""Drop the labels from an axis if present."""
return super().drop_labels(axis)

def set_labels(self, labels, axis=1, inplace=False, level=None):
"""Set labels along an axis."""
return super().set_labels(labels, axis=axis,
inplace=inplace, level=level)

@property
def _constructor(self):
return WeightedLabelledDataFrame

@property
def _constructor_sliced(self):
return WeightedLabelledSeries


class WeightedLabelledSeries(WeightedSeries, LabelledSeries):
"""Series with weights and labels."""

_metadata = WeightedSeries._metadata + LabelledSeries._metadata

def __init__(self, *args, **kwargs):
if not hasattr(self, '_labels'):
self._labels = ('weights', 'labels')
super().__init__(*args, **kwargs)

def set_label(self, param, value, axis=0):
"""Set a specific label to a specific value."""
return super().set_label(param, value, axis=axis, inplace=True)

@property
def _constructor(self):
return WeightedLabelledSeries

@property
def _constructor_expanddim(self):
return WeightedLabelledDataFrame


class Samples(WeightedLabelledDataFrame):
"""Storage and plotting tools for general samples.

Expand Down
100 changes: 100 additions & 0 deletions anesthetic/weighted_labelled_pandas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
"""Pandas DataFrame with weights and labels."""
from anesthetic.weighted_pandas import WeightedDataFrame, WeightedSeries
from anesthetic.labelled_pandas import LabelledDataFrame, LabelledSeries
import pandas as pd


def read_csv(filename, *args, **kwargs):
"""Read a CSV file into a ``WeightedLabelledDataFrame``."""
df = pd.read_csv(filename, index_col=[0, 1], header=[0, 1],
*args, **kwargs)
wldf = WeightedLabelledDataFrame(df)
if wldf.isweighted() and wldf.islabelled():
wldf.set_weights(wldf.get_weights().astype(float), inplace=True)
return wldf
df = pd.read_csv(filename, index_col=[0, 1], *args, **kwargs)
wldf = WeightedLabelledDataFrame(df)
if wldf.isweighted():
return wldf
df = pd.read_csv(filename, index_col=0, header=[0, 1], *args, **kwargs)
wldf = WeightedLabelledDataFrame(df)
if wldf.islabelled():
return wldf
df = pd.read_csv(filename, index_col=0, *args, **kwargs)
return WeightedLabelledDataFrame(df)


class WeightedLabelledDataFrame(WeightedDataFrame, LabelledDataFrame):
""":class:`pandas.DataFrame` with weights and labels."""

_metadata = WeightedDataFrame._metadata + LabelledDataFrame._metadata

def __init__(self, *args, **kwargs):
labels = kwargs.pop('labels', None)
if not hasattr(self, '_labels'):
self._labels = ('weights', 'labels')
super().__init__(*args, **kwargs)
if labels is not None:
if isinstance(labels, dict):
labels = [labels.get(p, '') for p in self]
self.set_labels(labels, inplace=True)

def islabelled(self, axis=1):
"""Search for existence of labels."""
return super().islabelled(axis=axis)

def get_labels(self, axis=1):
"""Retrieve labels from an axis."""
return super().get_labels(axis=axis)

def get_labels_map(self, axis=1, fill=True):
"""Retrieve mapping from paramnames to labels from an axis."""
return super().get_labels_map(axis=axis, fill=fill)

def get_label(self, param, axis=1):
"""Retrieve mapping from paramnames to labels from an axis."""
return super().get_label(param, axis=axis)

def set_label(self, param, value, axis=1):
"""Set a specific label to a specific value on an axis."""
return super().set_label(param, value, axis=axis, inplace=True)

def drop_labels(self, axis=1):
"""Drop the labels from an axis if present."""
return super().drop_labels(axis)

def set_labels(self, labels, axis=1, inplace=False, level=None):
"""Set labels along an axis."""
return super().set_labels(labels, axis=axis,
inplace=inplace, level=level)

@property
def _constructor(self):
return WeightedLabelledDataFrame

@property
def _constructor_sliced(self):
return WeightedLabelledSeries


class WeightedLabelledSeries(WeightedSeries, LabelledSeries):
"""Series with weights and labels."""

_metadata = WeightedSeries._metadata + LabelledSeries._metadata

def __init__(self, *args, **kwargs):
if not hasattr(self, '_labels'):
self._labels = ('weights', 'labels')
super().__init__(*args, **kwargs)

def set_label(self, param, value, axis=0):
"""Set a specific label to a specific value."""
return super().set_label(param, value, axis=axis, inplace=True)

@property
def _constructor(self):
return WeightedLabelledSeries

@property
def _constructor_expanddim(self):
return WeightedLabelledDataFrame
24 changes: 24 additions & 0 deletions anesthetic/weighted_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,30 @@
from pandas.core.dtypes.missing import notna
from pandas.core.accessor import CachedAccessor
from anesthetic.plotting import PlotAccessor
import pandas as pd


def read_csv(filename, *args, **kwargs):
"""Read a CSV file into a ``WeightedDataFrame``."""
df = pd.read_csv(filename, index_col=[0, 1], header=[0, 1],
*args, **kwargs)
wdf = WeightedDataFrame(df)
if wdf.isweighted(0) and wdf.isweighted(1):
wdf.set_weights(wdf.get_weights(axis=1).astype(float),
axis=1, inplace=True)
return wdf
df = pd.read_csv(filename, index_col=[0, 1], *args, **kwargs)
wdf = WeightedDataFrame(df)
if wdf.isweighted(0):
return wdf
df = pd.read_csv(filename, index_col=0, header=[0, 1], *args, **kwargs)
wdf = WeightedDataFrame(df)
if wdf.isweighted(1):
wdf.set_weights(wdf.get_weights(axis=1).astype(float),
axis=1, inplace=True)
return wdf
df = pd.read_csv(filename, index_col=0, *args, **kwargs)
return WeightedDataFrame(df)


class WeightedGroupBy(GroupBy):
Expand Down
10 changes: 9 additions & 1 deletion docs/source/anesthetic.rst
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ anesthetic.samples module
.. automodule:: anesthetic.samples
:members:
:undoc-members:
:show-inheritance:


anesthetic.scripts module
Expand Down Expand Up @@ -102,6 +101,15 @@ anesthetic.utils module
:show-inheritance:


anesthetic.weighted\_labelled\_pandas module
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

.. automodule:: anesthetic.weighted_labelled_pandas
:members:
:undoc-members:
:show-inheritance:


anesthetic.weighted\_pandas module
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Expand Down
29 changes: 28 additions & 1 deletion tests/test_labelled_pandas.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import numpy as np
from numpy.testing import assert_array_equal
from anesthetic.labelled_pandas import LabelledSeries, LabelledDataFrame
from anesthetic.labelled_pandas import (LabelledSeries, LabelledDataFrame,
read_csv)
from pandas import Series, DataFrame, MultiIndex
import pandas.testing
import pytest
Expand Down Expand Up @@ -469,3 +470,29 @@ def test_drop_labels(lframe_index):
assert_frame_equal_not_index(ldf, nolabels)
assert_frame_equal(nolabels.drop_labels(), nolabels)
assert nolabels.drop_labels() is not nolabels


def test_read_csv():
filename = 'mcmc_ldf.csv'

lframe = LabelledDataFrame(np.random.rand(3, 3),
index=['A', 'B', 'C'],
columns=['a', 'b', 'c'])
lframe.to_csv(filename)
lframe_ = read_csv(filename)
pandas.testing.assert_frame_equal(lframe, lframe_)

lframe.set_labels(['$A$', '$B$', '$C$'], axis=0, inplace=True)
lframe.to_csv(filename)
lframe_ = read_csv(filename)
pandas.testing.assert_frame_equal(lframe, lframe_)

lframe.set_labels(['$a$', '$b$', '$c$'], axis=1, inplace=True)
lframe.to_csv(filename)
lframe_ = read_csv(filename)
pandas.testing.assert_frame_equal(lframe, lframe_)

lframe = lframe.drop_labels(axis=0)
lframe.to_csv(filename)
lframe_ = read_csv(filename)
pandas.testing.assert_frame_equal(lframe, lframe_)
Loading
Loading