From cb60b9aedc52eaf08584554982d2859d8a8a28b0 Mon Sep 17 00:00:00 2001 From: Adam Ormondroyd <52655393+AdamOrmondroyd@users.noreply.github.com> Date: Sat, 16 Mar 2024 20:48:44 +0000 Subject: [PATCH] Pandas 2.2.0 (#359) * fix boxplot closures (maybe_color_bp changed scope) * _args_adjust no longer a thing (still need to deal with hists * remove convert_period from _get_xticks * add fig to PlanePlot2D._make_plot() * range is now self._bin_range, and convert data to numeric first (not quite happy with the latter). * update pandas requirement * bump version * fix MultiIndex.format deprecation * address warnings in plot.py (no idea why these specific ones are the problem???) * Revert "address warnings in plot.py (no idea why these specific ones are the problem???)" This reverts commit 1f9857bc84917d5d298cb2e47dfe2f8f0dbf475c. * fix chained assignment warnings * don't include groups in chains.apply in remove_burn_in * replace grouper with _grouper * more chained assignment warning fixes * fix Index.format deprecation * bump version to 2.7.1 * remove unused exception handling from HistPlot._calculate_bins * remember loc to fix covariance coverage (cov cov lol) * remove pandas 1.5-specific documentation fix * use sphinx-autodoc-typehints to fix PlotAccessor documentation * bump version to 2.7.4 * add test for `range=None` in `hist_plot_1d` as this is no longer covered after changes to defaults in pandas * consistent diagonal * try whether `auto-update-conda` makes conda CI work better * make conda install `pandas~=2.2.0` * Lukas' tidy suggestion for Hist1dPlot._calculate_bins() * remove unused import * I can only apologise, I'd forgotten to reinstall the pre-commit hooks after cloning a fresh repo --------- Co-authored-by: lukashergt --- .github/workflows/CI.yaml | 2 +- README.rst | 2 +- anesthetic/_format.py | 5 +- anesthetic/_version.py | 2 +- anesthetic/plot.py | 10 ++-- anesthetic/plotting/_matplotlib/boxplot.py | 7 +-- anesthetic/plotting/_matplotlib/core.py | 9 ++-- anesthetic/plotting/_matplotlib/hist.py | 57 +++++++++------------- anesthetic/samples.py | 3 +- anesthetic/weighted_pandas.py | 9 ++-- docs/source/conf.py | 3 +- pyproject.toml | 5 +- tests/test_plot.py | 5 ++ tests/test_weighted_pandas.py | 14 +++--- 14 files changed, 63 insertions(+), 70 deletions(-) diff --git a/.github/workflows/CI.yaml b/.github/workflows/CI.yaml index 676562b6..f2139d94 100644 --- a/.github/workflows/CI.yaml +++ b/.github/workflows/CI.yaml @@ -113,7 +113,7 @@ jobs: run: | conda config --append channels conda-forge conda install pytest pytest-cov - conda install scipy numpy 'matplotlib>=3.6.1,<3.9.0' 'pandas>=2.0.0,<2.2.0' + conda install scipy numpy 'matplotlib>=3.6.1,<3.9.0' 'pandas~=2.2.0' - name: Test with pytest shell: bash -l {0} diff --git a/README.rst b/README.rst index 6a6ad009..72289ae4 100644 --- a/README.rst +++ b/README.rst @@ -2,7 +2,7 @@ anesthetic: nested sampling post-processing =========================================== :Authors: Will Handley and Lukas Hergt -:Version: 2.8.1 +:Version: 2.8.2 :Homepage: https://github.com/handley-lab/anesthetic :Documentation: http://anesthetic.readthedocs.io/ diff --git a/anesthetic/_format.py b/anesthetic/_format.py index 45f45757..e506a1ba 100644 --- a/anesthetic/_format.py +++ b/anesthetic/_format.py @@ -12,8 +12,7 @@ class _DataFrameFormatter(DataFrameFormatter): def _get_formatted_column_labels(self, frame): columns = frame.columns if isinstance(columns, MultiIndex): - fmt_columns = columns.format(sparsify=False, adjoin=False) - fmt_columns = list(zip(*fmt_columns)) + fmt_columns = [tuple(str(c) for c in column) for column in columns] dtypes = self.frame.dtypes._values # if we have a Float level, they don't use leading space at all @@ -37,7 +36,7 @@ def space_format(x, y): str_columns = [list(x) for x in zip(*str_columns)] str_columns = [_make_fixed_width(x) for x in str_columns] else: - fmt_columns = columns.format() + fmt_columns = [str(x) for x in columns] dtypes = self.frame.dtypes need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes))) str_columns = [ diff --git a/anesthetic/_version.py b/anesthetic/_version.py index 80e22f7a..964a32ab 100644 --- a/anesthetic/_version.py +++ b/anesthetic/_version.py @@ -1 +1 @@ -__version__ = '2.8.1' +__version__ = '2.8.2' diff --git a/anesthetic/plot.py b/anesthetic/plot.py index 1b1e065a..05f3fa27 100644 --- a/anesthetic/plot.py +++ b/anesthetic/plot.py @@ -247,12 +247,12 @@ def _position_frame(index, columns, lower, diagonal, upper): for i, x in enumerate(columns): if all_params.index(x) < all_params.index(y): if lower: - position[x][y] = -1 + position.loc[y, x] = -1 elif all_params.index(x) > all_params.index(y): if upper: - position[x][y] = +1 + position.loc[y, x] = +1 elif diagonal: - position[x][y] = 0 + position.loc[y, x] = 0 return position @classmethod @@ -277,7 +277,7 @@ def _axes_frame(cls, position, fig, gridspec_kw=None, subplot_spec=None): hspace=hspace, wspace=wspace, subplot_spec=subplot_spec, **gridspec_kw) - axes[:][:] = None + axes.loc[:, :] = None for j, y in enumerate(axes.index[::-1]): for i, x in enumerate(axes.columns): if position[x][y] is not None: @@ -285,7 +285,7 @@ def _axes_frame(cls, position, fig, gridspec_kw=None, subplot_spec=None): sx = sx[0] if sx else None sy = list(axes.T[y].dropna()) sy = sy[0] if sy else None - axes[x][y] = fig.add_subplot( + axes.loc[y, x] = fig.add_subplot( gs[axes.index.size - 1 - j, i], sharex=sx, sharey=sy ) if position[x][y] == 0: diff --git a/anesthetic/plotting/_matplotlib/boxplot.py b/anesthetic/plotting/_matplotlib/boxplot.py index 6505817d..10a303d5 100644 --- a/anesthetic/plotting/_matplotlib/boxplot.py +++ b/anesthetic/plotting/_matplotlib/boxplot.py @@ -1,5 +1,6 @@ import pandas.plotting._matplotlib.boxplot -from pandas.plotting._matplotlib.boxplot import BoxPlot as _BoxPlot +from pandas.plotting._matplotlib.boxplot import (BoxPlot as _BoxPlot, + maybe_color_bp) from anesthetic.plotting._matplotlib.core import _WeightedMPLPlot, _get_weights from anesthetic.utils import quantile from pandas.core.dtypes.missing import remove_na_arraylike @@ -56,9 +57,9 @@ def boxplot(data, *args, **kwds): def create_plot_group(): fontsize = None # pragma: no cover - maybe_color_bp = None # pragma: no cover return_type = None # pragma: no cover rot = None # pragma: no cover + colors = None # pragma: no cover def plot_group(keys, values, ax, **kwds): # pragma: no cover # GH 45465: xlabel/ylabel need to be popped out before plotting @@ -96,7 +97,7 @@ def plot_group(keys, values, ax, **kwds): # pragma: no cover ax.set_xticklabels(keys, rotation=rot) else: ax.set_yticklabels(keys, rotation=rot) - maybe_color_bp(bp, **kwds) + maybe_color_bp(bp, color_tup=colors, **kwds) # Return axes in multiplot case, maybe revisit later # 985 if return_type == "dict": diff --git a/anesthetic/plotting/_matplotlib/core.py b/anesthetic/plotting/_matplotlib/core.py index 0b156dac..8d5ca090 100644 --- a/anesthetic/plotting/_matplotlib/core.py +++ b/anesthetic/plotting/_matplotlib/core.py @@ -52,11 +52,11 @@ def _get_index_name(self): else: return super()._get_index_name() - def _get_xticks(self, convert_period: bool = False): + def _get_xticks(self): if isinstance(self.data, _WeightedObject): return self.data.drop_weights().index._mpl_repr() else: - return super()._get_xticks(convert_period) + return super()._get_xticks() def _compress_weights(kwargs, data): @@ -82,7 +82,7 @@ def __init__(self, data, x, y, s=None, c=None, **kwargs) -> None: class _PlanePlot2d(PlanePlot): - def _make_plot(self): + def _make_plot(self, fig): if self.colormap is not None: self.kwds['cmap'] = plt.get_cmap(self.colormap) colors = self._get_colors() @@ -109,9 +109,6 @@ def _make_plot(self): f"supported by {self._kind}") self._plot(ax, x.values, y.values, **kwds) - def _args_adjust(self): - pass - class ScatterPlot2d(_CompressedMPLPlot, _PlanePlot2d): # noqa: disable=D101 diff --git a/anesthetic/plotting/_matplotlib/hist.py b/anesthetic/plotting/_matplotlib/hist.py index 763591cb..98bae550 100644 --- a/anesthetic/plotting/_matplotlib/hist.py +++ b/anesthetic/plotting/_matplotlib/hist.py @@ -22,60 +22,56 @@ hist_plot_1d, quantile_plot_interval, ) -from anesthetic.utils import quantile, histogram_bin_edges +from anesthetic.utils import histogram_bin_edges class HistPlot(_WeightedMPLPlot, _HistPlot): # noqa: disable=D101 - def _args_adjust(self) -> None: + def _adjust_bins(self, bins) -> None: if ( - hasattr(self, 'bins') and - isinstance(self.bins, str) and - self.bins in ['fd', 'scott', 'sqrt'] + isinstance(bins, str) and + bins in ['fd', 'scott', 'sqrt'] ): - self.bins = self._calculate_bins(self.data) - super()._args_adjust() + bins = self._calculate_bins(self.data, bins) + return super()._adjust_bins(bins) # noqa: disable=D101 - def _calculate_bins(self, data): + def _calculate_bins(self, data, bins): if self.logx: data = np.log10(data) - if 'range' in self.kwds and self.kwds['range'] is not None: - xmin, xmax = self.kwds['range'] - self.kwds['range'] = (np.log10(xmin), np.log10(xmax)) + if self._bin_range is not None: + xmin, xmax = self._bin_range + self._bin_range = (np.log10(xmin), np.log10(xmax)) nd_values = data.infer_objects(copy=False)._get_numeric_data() values = np.ravel(nd_values) - weights = self.kwds.get("weights", None) + weights = self.weights if weights is not None: - try: - weights = np.broadcast_to(weights[:, None], nd_values.shape) - except ValueError: - pass + weights = np.broadcast_to(weights[:, None], nd_values.shape) weights = np.ravel(weights) weights = weights[~isna(values)] values = values[~isna(values)] - if isinstance(self.bins, str) and self.bins in ['fd', 'scott', 'sqrt']: + if isinstance(bins, str) and bins in ['fd', 'scott', 'sqrt']: bins = histogram_bin_edges( values, weights=weights, - bins=self.bins, + bins=bins, beta=self.kwds.pop('beta', 'equal'), - range=self.kwds.get('range', None) + range=self._bin_range ) else: bins = np.histogram_bin_edges( values, weights=weights, - bins=self.bins, - range=self.kwds.get('range', None) + bins=bins, + range=self._bin_range ) if self.logx: bins = 10**bins - if 'range' in self.kwds and self.kwds['range'] is not None: - self.kwds['range'] = (xmin, xmax) + if self._bin_range is not None: + self._bin_range = (xmin, xmax) return bins def _get_colors(self, num_colors=None, color_kwds='color'): @@ -180,18 +176,13 @@ def __init__( ) -> None: super().__init__(data, bins=bins, bottom=bottom, **kwargs) - def _calculate_bins(self, data): - if 'range' not in self.kwds or self.kwds['range'] is None: + def _calculate_bins(self, data, bins): + if self._bin_range is None: q = self.kwds.get('q', 5) q = quantile_plot_interval(q=q) - weights = self.kwds.get('weights', None) - xmin = quantile(data, q[0], weights) - xmax = quantile(data, q[-1], weights) - self.kwds['range'] = (xmin, xmax) - bins = super()._calculate_bins(data) - self.kwds.pop('range') - else: - bins = super()._calculate_bins(data) + xmin, xmax = data.quantile(q).to_numpy().ravel() + self._bin_range = (xmin, xmax) + bins = super()._calculate_bins(data, bins) return bins @classmethod diff --git a/anesthetic/samples.py b/anesthetic/samples.py index 182a75ff..d2c95317 100644 --- a/anesthetic/samples.py +++ b/anesthetic/samples.py @@ -518,7 +518,8 @@ def remove_burn_in(self, burn_in, reset_index=False, inplace=False): nsamples = chains.count().iloc[:, 0].to_numpy() ndrop = ndrop * nsamples ndrop = ndrop.astype(int) - data = self.drop(chains.apply(lambda g: g.head(ndrop[g.name-1])).index, + data = self.drop(chains.apply(lambda g: g.head(ndrop[g.name-1]), + include_groups=False).index, inplace=inplace) if reset_index: data = data.reset_index(drop=True, inplace=inplace) diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py index 6783ee98..78f6b9d7 100644 --- a/anesthetic/weighted_pandas.py +++ b/anesthetic/weighted_pandas.py @@ -44,8 +44,7 @@ def read_csv(filename, *args, **kwargs): class WeightedGroupBy(GroupBy): """Weighted version of ``pandas.core.groupby.GroupBy``.""" - grouper: ops.BaseGrouper - """:meta private:""" + _grouper: ops.BaseGrouper def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -119,10 +118,10 @@ def _gotitem(self, key, ndim: int, subset=None): # pragma: no cover subset = self.obj return WeightedDataFrameGroupBy( subset, - self.grouper, + self._grouper, axis=self.axis, level=self.level, - grouper=self.grouper, + grouper=self._grouper, exclusions=self.exclusions, selection=key, as_index=self.as_index, @@ -138,7 +137,7 @@ def _gotitem(self, key, ndim: int, subset=None): # pragma: no cover return WeightedSeriesGroupBy( subset, level=self.level, - grouper=self.grouper, + grouper=self._grouper, selection=key, sort=self.sort, group_keys=self.group_keys, diff --git a/docs/source/conf.py b/docs/source/conf.py index 466a88e2..92921209 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -58,6 +58,7 @@ def get_version(short=False): 'sphinx.ext.githubpages', 'sphinx.ext.imgconverter', 'sphinx_copybutton', + 'sphinx_autodoc_typehints', 'matplotlib.sphinxext.plot_directive', 'numpydoc', ] @@ -98,8 +99,6 @@ def get_version(short=False): autosummary_generate = True -nitpick_ignore = [('py:obj', 'pandas.core.groupby.SeriesGroupBy.sample')] # not currently included in pandas 1.5, but will in future - # -- Options for autosectionlabel------------------------------------------ autosectionlabel_prefix_document = True diff --git a/pyproject.toml b/pyproject.toml index caa28267..d7807831 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,7 @@ requires-python = ">=3.8" dependencies = [ "scipy", "numpy", - "pandas>=2.0.0,<2.2.0", + "pandas~=2.2.0", "matplotlib>=3.6.1,<3.9.0", ] classifiers = [ @@ -60,7 +60,8 @@ classifiers = [ "JOSS paper" = "https://joss.theoj.org/papers/10.21105/joss.01414" [project.optional-dependencies] -docs = ["sphinx>=4.2.0", "sphinx_rtd_theme>=1.2.2", "sphinx-copybutton", "numpydoc"] +docs = ["sphinx>=4.2.0", "sphinx_rtd_theme>=1.2.2", "sphinx-copybutton", + "sphinx-autodoc-typehints", "numpydoc"] test = ["pytest", "pytest-cov", "flake8", "pydocstyle", "packaging", "pre-commit"] ultranest = ["h5py"] astropy = ["astropy"] diff --git a/tests/test_plot.py b/tests/test_plot.py index 8864d905..ab09c3c9 100644 --- a/tests/test_plot.py +++ b/tests/test_plot.py @@ -831,6 +831,11 @@ def test_logscale_hist_kwargs(b): assert amax < 0.5 assert edges[0] < 1e-3 assert edges[-1] > 1e3 + h, edges, _ = hist_plot_1d(ax, data, bins=b, range=None) + amax = abs(np.log10(edges[np.argmax(h)])) + assert amax < 0.5 + assert edges[0] < 1e-3 + assert edges[-1] > 1e3 h, edges, _ = hist_plot_1d(ax, data, bins=b, range=(1e-3, 1e3)) amax = abs(np.log10(edges[np.argmax(h)])) assert amax < 0.5 diff --git a/tests/test_weighted_pandas.py b/tests/test_weighted_pandas.py index 2bdb4272..b390eeb4 100644 --- a/tests/test_weighted_pandas.py +++ b/tests/test_weighted_pandas.py @@ -409,7 +409,7 @@ def test_WeightedDataFrame_compress(frame): def test_WeightedDataFrame_nan(frame): - frame['A'][0] = np.nan + frame.loc[0, 'A'] = np.nan assert ~frame.mean().isna().any() assert ~frame.mean(axis=1).isna().any() assert_array_equal(frame.mean(skipna=False).isna(), [True] + [False]*5) @@ -422,7 +422,7 @@ def test_WeightedDataFrame_nan(frame): assert_array_equal(frame.std(axis=1, skipna=False).isna()[0:6], [True, False, False, False, False, False]) - frame['B'][2] = np.nan + frame.loc[2, 'B'] = np.nan assert ~frame.mean().isna().any() assert_array_equal(frame.mean(skipna=False).isna(), [True, True] + [False]*4) @@ -435,10 +435,10 @@ def test_WeightedDataFrame_nan(frame): assert_array_equal(frame.std(axis=1, skipna=False).isna()[0:6], [True, False, True, False, False, False]) - frame['C'][4] = np.nan - frame['D'][5] = np.nan - frame['E'][6] = np.nan - frame['F'][7] = np.nan + frame.loc[4, 'C'] = np.nan + frame.loc[5, 'D'] = np.nan + frame.loc[6, 'E'] = np.nan + frame.loc[7, 'F'] = np.nan assert ~frame.mean().isna().any() assert frame.mean(skipna=False).isna().all() assert_array_equal(frame.mean(axis=1, skipna=False).isna()[0:6], @@ -493,7 +493,7 @@ def test_WeightedSeries_cov(frame): assert_allclose(frame.A.cov(frame.A), 1./12, atol=1e-2) assert_allclose(frame.A.cov(frame.B), 0, atol=1e-2) - frame['A'][0] = np.nan + frame.loc[0, 'A'] = np.nan assert_allclose(frame.A.cov(frame.A), 1./12, atol=1e-2) assert_allclose(frame.A.cov(frame.B), 0, atol=1e-2)