Merge branch 'master' into pandas2.2

handley-lab · Mar 7, 2024 · 1ab5407 · 1ab5407
2 parents 91eb24e + 54170d8
commit 1ab5407
Show file tree

Hide file tree

Showing 16 changed files with 289 additions and 91 deletions.
diff --git a/.github/workflows/CI.yaml b/.github/workflows/CI.yaml
@@ -113,7 +113,7 @@ jobs:
         run: |
           conda config --append channels conda-forge
           conda install pytest pytest-cov
-          conda install scipy numpy 'matplotlib>=3.6.1' 'pandas>=2.0.0'
+          conda install scipy numpy 'matplotlib>=3.6.1,<3.9.0' 'pandas>=2.0.0,<2.2.0'
 
       - name: Test with pytest
         shell: bash -l {0}

diff --git a/README.rst b/README.rst
@@ -2,7 +2,7 @@
 anesthetic: nested sampling post-processing
 ===========================================
 :Authors: Will Handley and Lukas Hergt
-:Version: 2.7.4
+:Version: 2.8.2
 :Homepage: https://github.com/handley-lab/anesthetic
 :Documentation: http://anesthetic.readthedocs.io/
 

diff --git a/anesthetic/__init__.py b/anesthetic/__init__.py
@@ -49,3 +49,4 @@ def wrapper(backend=None):
 
 read_hdf = anesthetic.read.hdf.read_hdf
 read_chains = anesthetic.read.chain.read_chains
+read_csv = anesthetic.read.csv.read_csv
diff --git a/anesthetic/_version.py b/anesthetic/_version.py
@@ -1 +1 @@
-__version__ = '2.7.4'
+__version__ = '2.8.2'
diff --git a/anesthetic/labelled_pandas.py b/anesthetic/labelled_pandas.py
@@ -5,6 +5,26 @@
 import numpy as np
 from functools import cmp_to_key
 from pandas.errors import IndexingError
+import pandas as pd
+
+
+def read_csv(filename, *args, **kwargs):
+    """Read a CSV file into a ``LabelledDataFrame``."""
+    df = pd.read_csv(filename, index_col=[0, 1], header=[0, 1],
+                     *args, **kwargs)
+    ldf = LabelledDataFrame(df)
+    if ldf.islabelled(0) and ldf.islabelled(1):
+        return ldf
+    df = pd.read_csv(filename, index_col=[0, 1], *args, **kwargs)
+    ldf = LabelledDataFrame(df)
+    if ldf.islabelled(0):
+        return ldf
+    df = pd.read_csv(filename, index_col=0, header=[0, 1], *args, **kwargs)
+    ldf = LabelledDataFrame(df)
+    if ldf.islabelled(1):
+        return ldf
+    df = pd.read_csv(filename, index_col=0, *args, **kwargs)
+    return LabelledDataFrame(df)
 
 
 def ac(funcs, *args):

diff --git a/anesthetic/read/chain.py b/anesthetic/read/chain.py
@@ -5,6 +5,7 @@
 from anesthetic.read.multinest import read_multinest
 from anesthetic.read.ultranest import read_ultranest
 from anesthetic.read.nestedfit import read_nestedfit
+from anesthetic.read.csv import read_csv
 
 
 def read_chains(root, *args, **kwargs):
@@ -18,8 +19,8 @@ def read_chains(root, *args, **kwargs):
         * `Nested_fit <https://github.com/martinit18/Nested_Fit>`_,
         * `CosmoMC <https://github.com/cmbant/CosmoMC>`_,
         * `Cobaya <https://github.com/CobayaSampler/cobaya>`_,
-        * or anything `GetDist <https://github.com/cmbant/getdist>`_
-          compatible.
+        * anything `GetDist <https://github.com/cmbant/getdist>`_ compatible,
+        * files produced using ``DataFrame.to_csv()`` from anesthetic.
 
     Note that in order to optimally read chains from Cobaya you need to have
     `GetDist <https://getdist.readthedocs.io/en/latest/>`__ installed.
@@ -40,6 +41,7 @@ def read_chains(root, *args, **kwargs):
 
     """
     root = str(root)
+    # TODO: remove this in version >= 2.1
     if 'burn_in' in kwargs:
         raise KeyError(
             "This is anesthetic 1.0 syntax. The `burn_in` keyword is no "
@@ -51,8 +53,8 @@ def read_chains(root, *args, **kwargs):
         )
     errors = []
     readers = [
-        read_polychord, read_multinest, read_cobaya,
-        read_ultranest, read_nestedfit, read_getdist
+        read_polychord, read_multinest, read_cobaya, read_ultranest,
+        read_nestedfit, read_getdist, read_csv
     ]
     for read in readers:
         try:

diff --git a/anesthetic/read/csv.py b/anesthetic/read/csv.py
@@ -0,0 +1,15 @@
+"""Read and write CSV files for anesthetic."""
+from anesthetic.weighted_labelled_pandas import read_csv as wl_read_csv
+from anesthetic.samples import MCMCSamples, NestedSamples
+from pathlib import Path
+
+
+def read_csv(filename, *args, **kwargs):
+    """Read a CSV file into a :class:`Samples` object."""
+    filename = Path(filename)
+    kwargs['label'] = kwargs.get('label', filename.stem)
+    wldf = wl_read_csv(filename.with_suffix('.csv'))
+    if 'nlive' in wldf.columns:
+        return NestedSamples(wldf, *args, **kwargs)
+    else:
+        return MCMCSamples(wldf, *args, **kwargs)
diff --git a/anesthetic/samples.py b/anesthetic/samples.py
@@ -14,89 +14,12 @@
 from anesthetic.utils import (compute_nlive, compute_insertion_indexes,
                               is_int, logsumexp)
 from anesthetic.gui.plot import RunPlotter
-from anesthetic.weighted_pandas import WeightedDataFrame, WeightedSeries
-from anesthetic.labelled_pandas import LabelledDataFrame, LabelledSeries
+from anesthetic.weighted_labelled_pandas import WeightedLabelledDataFrame
 from anesthetic.plot import (make_1d_axes, make_2d_axes,
                              AxesSeries, AxesDataFrame)
 from anesthetic.utils import adjust_docstrings
 
 
-class WeightedLabelledDataFrame(WeightedDataFrame, LabelledDataFrame):
-    """:class:`pandas.DataFrame` with weights and labels."""
-
-    _metadata = WeightedDataFrame._metadata + LabelledDataFrame._metadata
-
-    def __init__(self, *args, **kwargs):
-        labels = kwargs.pop('labels', None)
-        if not hasattr(self, '_labels'):
-            self._labels = ('weights', 'labels')
-        super().__init__(*args, **kwargs)
-        if labels is not None:
-            if isinstance(labels, dict):
-                labels = [labels.get(p, '') for p in self]
-            self.set_labels(labels, inplace=True)
-
-    def islabelled(self, axis=1):
-        """Search for existence of labels."""
-        return super().islabelled(axis=axis)
-
-    def get_labels(self, axis=1):
-        """Retrieve labels from an axis."""
-        return super().get_labels(axis=axis)
-
-    def get_labels_map(self, axis=1, fill=True):
-        """Retrieve mapping from paramnames to labels from an axis."""
-        return super().get_labels_map(axis=axis, fill=fill)
-
-    def get_label(self, param, axis=1):
-        """Retrieve mapping from paramnames to labels from an axis."""
-        return super().get_label(param, axis=axis)
-
-    def set_label(self, param, value, axis=1):
-        """Set a specific label to a specific value on an axis."""
-        return super().set_label(param, value, axis=axis, inplace=True)
-
-    def drop_labels(self, axis=1):
-        """Drop the labels from an axis if present."""
-        return super().drop_labels(axis)
-
-    def set_labels(self, labels, axis=1, inplace=False, level=None):
-        """Set labels along an axis."""
-        return super().set_labels(labels, axis=axis,
-                                  inplace=inplace, level=level)
-
-    @property
-    def _constructor(self):
-        return WeightedLabelledDataFrame
-
-    @property
-    def _constructor_sliced(self):
-        return WeightedLabelledSeries
-
-
-class WeightedLabelledSeries(WeightedSeries, LabelledSeries):
-    """Series with weights and labels."""
-
-    _metadata = WeightedSeries._metadata + LabelledSeries._metadata
-
-    def __init__(self, *args, **kwargs):
-        if not hasattr(self, '_labels'):
-            self._labels = ('weights', 'labels')
-        super().__init__(*args, **kwargs)
-
-    def set_label(self, param, value, axis=0):
-        """Set a specific label to a specific value."""
-        return super().set_label(param, value, axis=axis, inplace=True)
-
-    @property
-    def _constructor(self):
-        return WeightedLabelledSeries
-
-    @property
-    def _constructor_expanddim(self):
-        return WeightedLabelledDataFrame
-
-
 class Samples(WeightedLabelledDataFrame):
     """Storage and plotting tools for general samples.
 

diff --git a/anesthetic/weighted_labelled_pandas.py b/anesthetic/weighted_labelled_pandas.py
@@ -0,0 +1,100 @@
+"""Pandas DataFrame with weights and labels."""
+from anesthetic.weighted_pandas import WeightedDataFrame, WeightedSeries
+from anesthetic.labelled_pandas import LabelledDataFrame, LabelledSeries
+import pandas as pd
+
+
+def read_csv(filename, *args, **kwargs):
+    """Read a CSV file into a ``WeightedLabelledDataFrame``."""
+    df = pd.read_csv(filename, index_col=[0, 1], header=[0, 1],
+                     *args, **kwargs)
+    wldf = WeightedLabelledDataFrame(df)
+    if wldf.isweighted() and wldf.islabelled():
+        wldf.set_weights(wldf.get_weights().astype(float), inplace=True)
+        return wldf
+    df = pd.read_csv(filename, index_col=[0, 1], *args, **kwargs)
+    wldf = WeightedLabelledDataFrame(df)
+    if wldf.isweighted():
+        return wldf
+    df = pd.read_csv(filename, index_col=0, header=[0, 1], *args, **kwargs)
+    wldf = WeightedLabelledDataFrame(df)
+    if wldf.islabelled():
+        return wldf
+    df = pd.read_csv(filename, index_col=0, *args, **kwargs)
+    return WeightedLabelledDataFrame(df)
+
+
+class WeightedLabelledDataFrame(WeightedDataFrame, LabelledDataFrame):
+    """:class:`pandas.DataFrame` with weights and labels."""
+
+    _metadata = WeightedDataFrame._metadata + LabelledDataFrame._metadata
+
+    def __init__(self, *args, **kwargs):
+        labels = kwargs.pop('labels', None)
+        if not hasattr(self, '_labels'):
+            self._labels = ('weights', 'labels')
+        super().__init__(*args, **kwargs)
+        if labels is not None:
+            if isinstance(labels, dict):
+                labels = [labels.get(p, '') for p in self]
+            self.set_labels(labels, inplace=True)
+
+    def islabelled(self, axis=1):
+        """Search for existence of labels."""
+        return super().islabelled(axis=axis)
+
+    def get_labels(self, axis=1):
+        """Retrieve labels from an axis."""
+        return super().get_labels(axis=axis)
+
+    def get_labels_map(self, axis=1, fill=True):
+        """Retrieve mapping from paramnames to labels from an axis."""
+        return super().get_labels_map(axis=axis, fill=fill)
+
+    def get_label(self, param, axis=1):
+        """Retrieve mapping from paramnames to labels from an axis."""
+        return super().get_label(param, axis=axis)
+
+    def set_label(self, param, value, axis=1):
+        """Set a specific label to a specific value on an axis."""
+        return super().set_label(param, value, axis=axis, inplace=True)
+
+    def drop_labels(self, axis=1):
+        """Drop the labels from an axis if present."""
+        return super().drop_labels(axis)
+
+    def set_labels(self, labels, axis=1, inplace=False, level=None):
+        """Set labels along an axis."""
+        return super().set_labels(labels, axis=axis,
+                                  inplace=inplace, level=level)
+
+    @property
+    def _constructor(self):
+        return WeightedLabelledDataFrame
+
+    @property
+    def _constructor_sliced(self):
+        return WeightedLabelledSeries
+
+
+class WeightedLabelledSeries(WeightedSeries, LabelledSeries):
+    """Series with weights and labels."""
+
+    _metadata = WeightedSeries._metadata + LabelledSeries._metadata
+
+    def __init__(self, *args, **kwargs):
+        if not hasattr(self, '_labels'):
+            self._labels = ('weights', 'labels')
+        super().__init__(*args, **kwargs)
+
+    def set_label(self, param, value, axis=0):
+        """Set a specific label to a specific value."""
+        return super().set_label(param, value, axis=axis, inplace=True)
+
+    @property
+    def _constructor(self):
+        return WeightedLabelledSeries
+
+    @property
+    def _constructor_expanddim(self):
+        return WeightedLabelledDataFrame
diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py
@@ -15,6 +15,30 @@
 from pandas.core.dtypes.missing import notna
 from pandas.core.accessor import CachedAccessor
 from anesthetic.plotting import PlotAccessor
+import pandas as pd
+
+
+def read_csv(filename, *args, **kwargs):
+    """Read a CSV file into a ``WeightedDataFrame``."""
+    df = pd.read_csv(filename, index_col=[0, 1], header=[0, 1],
+                     *args, **kwargs)
+    wdf = WeightedDataFrame(df)
+    if wdf.isweighted(0) and wdf.isweighted(1):
+        wdf.set_weights(wdf.get_weights(axis=1).astype(float),
+                        axis=1, inplace=True)
+        return wdf
+    df = pd.read_csv(filename, index_col=[0, 1], *args, **kwargs)
+    wdf = WeightedDataFrame(df)
+    if wdf.isweighted(0):
+        return wdf
+    df = pd.read_csv(filename, index_col=0, header=[0, 1], *args, **kwargs)
+    wdf = WeightedDataFrame(df)
+    if wdf.isweighted(1):
+        wdf.set_weights(wdf.get_weights(axis=1).astype(float),
+                        axis=1, inplace=True)
+        return wdf
+    df = pd.read_csv(filename, index_col=0, *args, **kwargs)
+    return WeightedDataFrame(df)
 
 
 class WeightedGroupBy(GroupBy):

diff --git a/docs/source/anesthetic.rst b/docs/source/anesthetic.rst
@@ -72,7 +72,6 @@ anesthetic.samples module
 .. automodule:: anesthetic.samples
    :members:
    :undoc-members:
-   :show-inheritance:
 
 
 anesthetic.scripts module
@@ -102,6 +101,15 @@ anesthetic.utils module
    :show-inheritance:
 
 
+anesthetic.weighted\_labelled\_pandas module
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. automodule:: anesthetic.weighted_labelled_pandas
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+
 anesthetic.weighted\_pandas module
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

diff --git a/tests/test_labelled_pandas.py b/tests/test_labelled_pandas.py
@@ -1,6 +1,7 @@
 import numpy as np
 from numpy.testing import assert_array_equal
-from anesthetic.labelled_pandas import LabelledSeries, LabelledDataFrame
+from anesthetic.labelled_pandas import (LabelledSeries, LabelledDataFrame,
+                                        read_csv)
 from pandas import Series, DataFrame, MultiIndex
 import pandas.testing
 import pytest
@@ -469,3 +470,29 @@ def test_drop_labels(lframe_index):
     assert_frame_equal_not_index(ldf, nolabels)
     assert_frame_equal(nolabels.drop_labels(), nolabels)
     assert nolabels.drop_labels() is not nolabels
+
+
+def test_read_csv():
+    filename = 'mcmc_ldf.csv'
+
+    lframe = LabelledDataFrame(np.random.rand(3, 3),
+                               index=['A', 'B', 'C'],
+                               columns=['a', 'b', 'c'])
+    lframe.to_csv(filename)
+    lframe_ = read_csv(filename)
+    pandas.testing.assert_frame_equal(lframe, lframe_)
+
+    lframe.set_labels(['$A$', '$B$', '$C$'], axis=0, inplace=True)
+    lframe.to_csv(filename)
+    lframe_ = read_csv(filename)
+    pandas.testing.assert_frame_equal(lframe, lframe_)
+
+    lframe.set_labels(['$a$', '$b$', '$c$'], axis=1, inplace=True)
+    lframe.to_csv(filename)
+    lframe_ = read_csv(filename)
+    pandas.testing.assert_frame_equal(lframe, lframe_)
+
+    lframe = lframe.drop_labels(axis=0)
+    lframe.to_csv(filename)
+    lframe_ = read_csv(filename)
+    pandas.testing.assert_frame_equal(lframe, lframe_)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -49,3 +49,4 @@ def wrapper(backend=None):

		read_hdf = anesthetic.read.hdf.read_hdf
		read_chains = anesthetic.read.chain.read_chains
		read_csv = anesthetic.read.csv.read_csv