Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pandas 2.2.0 #359

Merged
merged 31 commits into from
Mar 16, 2024
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
42e27c4
fix boxplot closures (maybe_color_bp changed scope)
AdamOrmondroyd Jan 23, 2024
55dec12
_args_adjust no longer a thing (still need to deal with hists
AdamOrmondroyd Jan 23, 2024
a2a1116
remove convert_period from _get_xticks
AdamOrmondroyd Jan 23, 2024
06050b5
add fig to PlanePlot2D._make_plot()
AdamOrmondroyd Jan 23, 2024
ef13f21
range is now self._bin_range, and convert data to numeric first (not …
AdamOrmondroyd Jan 23, 2024
672bc6c
update pandas requirement
AdamOrmondroyd Jan 23, 2024
5d851dc
bump version
AdamOrmondroyd Jan 23, 2024
6d42c9b
fix MultiIndex.format deprecation
AdamOrmondroyd Feb 17, 2024
1f9857b
address warnings in plot.py (no idea why these specific ones are the …
AdamOrmondroyd Feb 17, 2024
64c14c8
Revert "address warnings in plot.py (no idea why these specific ones …
AdamOrmondroyd Feb 17, 2024
e710dfc
fix chained assignment warnings
AdamOrmondroyd Feb 17, 2024
f3a6d8f
don't include groups in chains.apply in remove_burn_in
AdamOrmondroyd Feb 17, 2024
9610c7b
replace grouper with _grouper
AdamOrmondroyd Feb 17, 2024
99c652d
more chained assignment warning fixes
AdamOrmondroyd Feb 17, 2024
888c979
Merge branch 'master' into pandas2.2
AdamOrmondroyd Feb 17, 2024
b30ae7c
fix Index.format deprecation
AdamOrmondroyd Feb 17, 2024
8fff3bc
bump version to 2.7.1
AdamOrmondroyd Feb 17, 2024
8a16d12
remove unused exception handling from HistPlot._calculate_bins
AdamOrmondroyd Feb 17, 2024
c1e3b8f
remember loc to fix covariance coverage (cov cov lol)
AdamOrmondroyd Feb 17, 2024
ae4182a
remove pandas 1.5-specific documentation fix
AdamOrmondroyd Feb 18, 2024
1b5b2ad
use sphinx-autodoc-typehints to fix PlotAccessor documentation
AdamOrmondroyd Feb 18, 2024
5e68563
Merge branch 'master' into pandas2.2
AdamOrmondroyd Mar 2, 2024
0f6e45e
bump version to 2.7.4
AdamOrmondroyd Mar 2, 2024
91eb24e
add test for `range=None` in `hist_plot_1d` as this is no longer cove…
lukashergt Mar 7, 2024
1ab5407
Merge branch 'master' into pandas2.2
lukashergt Mar 7, 2024
9a57567
consistent diagonal
AdamOrmondroyd Mar 7, 2024
4a1bbf0
try whether `auto-update-conda` makes conda CI work better
lukashergt Mar 12, 2024
ae96746
make conda install `pandas~=2.2.0`
lukashergt Mar 12, 2024
71c7bac
Lukas' tidy suggestion for Hist1dPlot._calculate_bins()
AdamOrmondroyd Mar 16, 2024
6ed8648
remove unused import
AdamOrmondroyd Mar 16, 2024
a009988
I can only apologise, I'd forgotten to reinstall the pre-commit hooks…
AdamOrmondroyd Mar 16, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/CI.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ jobs:
run: |
conda config --append channels conda-forge
conda install pytest pytest-cov
conda install scipy numpy 'matplotlib>=3.6.1,<3.9.0' 'pandas>=2.0.0,<2.2.0'
conda install scipy numpy 'matplotlib>=3.6.1,<3.9.0' 'pandas~=2.2.0'

- name: Test with pytest
shell: bash -l {0}
Expand Down
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
anesthetic: nested sampling post-processing
===========================================
:Authors: Will Handley and Lukas Hergt
:Version: 2.8.1
:Version: 2.8.2
:Homepage: https://github.com/handley-lab/anesthetic
:Documentation: http://anesthetic.readthedocs.io/

Expand Down
5 changes: 2 additions & 3 deletions anesthetic/_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@ class _DataFrameFormatter(DataFrameFormatter):
def _get_formatted_column_labels(self, frame):
columns = frame.columns
if isinstance(columns, MultiIndex):
fmt_columns = columns.format(sparsify=False, adjoin=False)
fmt_columns = list(zip(*fmt_columns))
fmt_columns = [tuple(str(c) for c in column) for column in columns]
dtypes = self.frame.dtypes._values

# if we have a Float level, they don't use leading space at all
Expand All @@ -37,7 +36,7 @@ def space_format(x, y):
str_columns = [list(x) for x in zip(*str_columns)]
str_columns = [_make_fixed_width(x) for x in str_columns]
else:
fmt_columns = columns.format()
fmt_columns = [str(x) for x in columns]
dtypes = self.frame.dtypes
need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))
str_columns = [
Expand Down
2 changes: 1 addition & 1 deletion anesthetic/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '2.8.1'
__version__ = '2.8.2'
10 changes: 5 additions & 5 deletions anesthetic/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,12 +247,12 @@ def _position_frame(index, columns, lower, diagonal, upper):
for i, x in enumerate(columns):
if all_params.index(x) < all_params.index(y):
if lower:
position[x][y] = -1
position.loc[y, x] = -1
elif all_params.index(x) > all_params.index(y):
if upper:
position[x][y] = +1
position.loc[y, x] = +1
elif diagonal:
position[x][y] = 0
position.loc[y, x] = 0
return position

@classmethod
Expand All @@ -277,15 +277,15 @@ def _axes_frame(cls, position, fig, gridspec_kw=None, subplot_spec=None):
hspace=hspace, wspace=wspace,
subplot_spec=subplot_spec,
**gridspec_kw)
axes[:][:] = None
axes.loc[:, :] = None
for j, y in enumerate(axes.index[::-1]):
for i, x in enumerate(axes.columns):
if position[x][y] is not None:
sx = list(axes[x].dropna())
sx = sx[0] if sx else None
sy = list(axes.T[y].dropna())
sy = sy[0] if sy else None
axes[x][y] = fig.add_subplot(
axes.loc[y, x] = fig.add_subplot(
gs[axes.index.size - 1 - j, i], sharex=sx, sharey=sy
)
if position[x][y] == 0:
Expand Down
7 changes: 4 additions & 3 deletions anesthetic/plotting/_matplotlib/boxplot.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pandas.plotting._matplotlib.boxplot
from pandas.plotting._matplotlib.boxplot import BoxPlot as _BoxPlot
from pandas.plotting._matplotlib.boxplot import (BoxPlot as _BoxPlot,
maybe_color_bp)
from anesthetic.plotting._matplotlib.core import _WeightedMPLPlot, _get_weights
from anesthetic.utils import quantile
from pandas.core.dtypes.missing import remove_na_arraylike
Expand Down Expand Up @@ -56,9 +57,9 @@ def boxplot(data, *args, **kwds):

def create_plot_group():
fontsize = None # pragma: no cover
maybe_color_bp = None # pragma: no cover
return_type = None # pragma: no cover
rot = None # pragma: no cover
colors = None # pragma: no cover

def plot_group(keys, values, ax, **kwds): # pragma: no cover
# GH 45465: xlabel/ylabel need to be popped out before plotting
Expand Down Expand Up @@ -96,7 +97,7 @@ def plot_group(keys, values, ax, **kwds): # pragma: no cover
ax.set_xticklabels(keys, rotation=rot)
else:
ax.set_yticklabels(keys, rotation=rot)
maybe_color_bp(bp, **kwds)
maybe_color_bp(bp, color_tup=colors, **kwds)

# Return axes in multiplot case, maybe revisit later # 985
if return_type == "dict":
Expand Down
9 changes: 3 additions & 6 deletions anesthetic/plotting/_matplotlib/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,11 @@ def _get_index_name(self):
else:
return super()._get_index_name()

def _get_xticks(self, convert_period: bool = False):
def _get_xticks(self):
if isinstance(self.data, _WeightedObject):
return self.data.drop_weights().index._mpl_repr()
else:
return super()._get_xticks(convert_period)
return super()._get_xticks()


def _compress_weights(kwargs, data):
Expand All @@ -82,7 +82,7 @@ def __init__(self, data, x, y, s=None, c=None, **kwargs) -> None:

class _PlanePlot2d(PlanePlot):

def _make_plot(self):
def _make_plot(self, fig):
if self.colormap is not None:
self.kwds['cmap'] = plt.get_cmap(self.colormap)
colors = self._get_colors()
Expand All @@ -109,9 +109,6 @@ def _make_plot(self):
f"supported by {self._kind}")
self._plot(ax, x.values, y.values, **kwds)

def _args_adjust(self):
pass


class ScatterPlot2d(_CompressedMPLPlot, _PlanePlot2d):
# noqa: disable=D101
Expand Down
60 changes: 28 additions & 32 deletions anesthetic/plotting/_matplotlib/hist.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,54 +28,50 @@
class HistPlot(_WeightedMPLPlot, _HistPlot):

# noqa: disable=D101
def _args_adjust(self) -> None:
def _adjust_bins(self, bins) -> None:
if (
hasattr(self, 'bins') and
isinstance(self.bins, str) and
self.bins in ['fd', 'scott', 'sqrt']
isinstance(bins, str) and
bins in ['fd', 'scott', 'sqrt']
):
self.bins = self._calculate_bins(self.data)
super()._args_adjust()
bins = self._calculate_bins(self.data, bins)
return super()._adjust_bins(bins)

# noqa: disable=D101
def _calculate_bins(self, data):
def _calculate_bins(self, data, bins):
if self.logx:
data = np.log10(data)
if 'range' in self.kwds and self.kwds['range'] is not None:
xmin, xmax = self.kwds['range']
self.kwds['range'] = (np.log10(xmin), np.log10(xmax))
if self._bin_range is not None:
xmin, xmax = self._bin_range
self._bin_range = (np.log10(xmin), np.log10(xmax))
nd_values = data.infer_objects(copy=False)._get_numeric_data()
values = np.ravel(nd_values)
weights = self.kwds.get("weights", None)
weights = self.weights
if weights is not None:
try:
weights = np.broadcast_to(weights[:, None], nd_values.shape)
except ValueError:
pass
weights = np.broadcast_to(weights[:, None], nd_values.shape)
weights = np.ravel(weights)
weights = weights[~isna(values)]

values = values[~isna(values)]

if isinstance(self.bins, str) and self.bins in ['fd', 'scott', 'sqrt']:
if isinstance(bins, str) and bins in ['fd', 'scott', 'sqrt']:
bins = histogram_bin_edges(
values,
weights=weights,
bins=self.bins,
bins=bins,
beta=self.kwds.pop('beta', 'equal'),
range=self.kwds.get('range', None)
range=self._bin_range
)
else:
bins = np.histogram_bin_edges(
values,
weights=weights,
bins=self.bins,
range=self.kwds.get('range', None)
bins=bins,
range=self._bin_range
)
if self.logx:
bins = 10**bins
if 'range' in self.kwds and self.kwds['range'] is not None:
self.kwds['range'] = (xmin, xmax)
if self._bin_range is not None:
self._bin_range = (xmin, xmax)
return bins

def _get_colors(self, num_colors=None, color_kwds='color'):
Expand Down Expand Up @@ -180,18 +176,18 @@ def __init__(
) -> None:
super().__init__(data, bins=bins, bottom=bottom, **kwargs)

def _calculate_bins(self, data):
if 'range' not in self.kwds or self.kwds['range'] is None:
def _calculate_bins(self, data, bins):
nd_values = data.infer_objects(copy=False)._get_numeric_data()
values = np.ravel(nd_values)
values = values[~isna(values)]
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not happy with repeating this a third time, and I haven't checked if the weights definitely need checking for nans

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not work with data.to_numpy().ravel() in quantile below? seems simpler to me...

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Essentially because I'm attempting to replicate the pandas versions as closely as possible

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, considering that Hist1dPlot was created for a WeightedSeries, we could simplify things here.
Even simpler than my previous suggestion:

            xmin, xmax = data.quantile(q).to_numpy().ravel()

Not sure what is easier for maintenance in the long run, though, so happy with either approach.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's too tidy to resist!

if self._bin_range is None:
q = self.kwds.get('q', 5)
q = quantile_plot_interval(q=q)
weights = self.kwds.get('weights', None)
xmin = quantile(data, q[0], weights)
xmax = quantile(data, q[-1], weights)
self.kwds['range'] = (xmin, xmax)
bins = super()._calculate_bins(data)
self.kwds.pop('range')
else:
bins = super()._calculate_bins(data)
weights = self.weights
xmin = quantile(values, q[0], weights)
xmax = quantile(values, q[-1], weights)
self._bin_range = (xmin, xmax)
bins = super()._calculate_bins(data, bins)
return bins

@classmethod
Expand Down
3 changes: 2 additions & 1 deletion anesthetic/samples.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,7 +518,8 @@ def remove_burn_in(self, burn_in, reset_index=False, inplace=False):
nsamples = chains.count().iloc[:, 0].to_numpy()
ndrop = ndrop * nsamples
ndrop = ndrop.astype(int)
data = self.drop(chains.apply(lambda g: g.head(ndrop[g.name-1])).index,
data = self.drop(chains.apply(lambda g: g.head(ndrop[g.name-1]),
include_groups=False).index,
inplace=inplace)
if reset_index:
data = data.reset_index(drop=True, inplace=inplace)
Expand Down
9 changes: 4 additions & 5 deletions anesthetic/weighted_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,7 @@ def read_csv(filename, *args, **kwargs):
class WeightedGroupBy(GroupBy):
"""Weighted version of ``pandas.core.groupby.GroupBy``."""

grouper: ops.BaseGrouper
""":meta private:"""
_grouper: ops.BaseGrouper

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
Expand Down Expand Up @@ -119,10 +118,10 @@ def _gotitem(self, key, ndim: int, subset=None): # pragma: no cover
subset = self.obj
return WeightedDataFrameGroupBy(
subset,
self.grouper,
self._grouper,
axis=self.axis,
level=self.level,
grouper=self.grouper,
grouper=self._grouper,
exclusions=self.exclusions,
selection=key,
as_index=self.as_index,
Expand All @@ -138,7 +137,7 @@ def _gotitem(self, key, ndim: int, subset=None): # pragma: no cover
return WeightedSeriesGroupBy(
subset,
level=self.level,
grouper=self.grouper,
grouper=self._grouper,
selection=key,
sort=self.sort,
group_keys=self.group_keys,
Expand Down
3 changes: 1 addition & 2 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def get_version(short=False):
'sphinx.ext.githubpages',
'sphinx.ext.imgconverter',
'sphinx_copybutton',
'sphinx_autodoc_typehints',
'matplotlib.sphinxext.plot_directive',
'numpydoc',
]
Expand Down Expand Up @@ -98,8 +99,6 @@ def get_version(short=False):

autosummary_generate = True

nitpick_ignore = [('py:obj', 'pandas.core.groupby.SeriesGroupBy.sample')] # not currently included in pandas 1.5, but will in future

# -- Options for autosectionlabel------------------------------------------
autosectionlabel_prefix_document = True

Expand Down
5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ requires-python = ">=3.8"
dependencies = [
"scipy",
"numpy",
"pandas>=2.0.0,<2.2.0",
"pandas~=2.2.0",
"matplotlib>=3.6.1,<3.9.0",
]
classifiers = [
Expand All @@ -60,7 +60,8 @@ classifiers = [
"JOSS paper" = "https://joss.theoj.org/papers/10.21105/joss.01414"

[project.optional-dependencies]
docs = ["sphinx>=4.2.0", "sphinx_rtd_theme>=1.2.2", "sphinx-copybutton", "numpydoc"]
docs = ["sphinx>=4.2.0", "sphinx_rtd_theme>=1.2.2", "sphinx-copybutton",
"sphinx-autodoc-typehints", "numpydoc"]
test = ["pytest", "pytest-cov", "flake8", "pydocstyle", "packaging", "pre-commit"]
ultranest = ["h5py"]
astropy = ["astropy"]
Expand Down
5 changes: 5 additions & 0 deletions tests/test_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -831,6 +831,11 @@ def test_logscale_hist_kwargs(b):
assert amax < 0.5
assert edges[0] < 1e-3
assert edges[-1] > 1e3
h, edges, _ = hist_plot_1d(ax, data, bins=b, range=None)
amax = abs(np.log10(edges[np.argmax(h)]))
assert amax < 0.5
assert edges[0] < 1e-3
assert edges[-1] > 1e3
h, edges, _ = hist_plot_1d(ax, data, bins=b, range=(1e-3, 1e3))
amax = abs(np.log10(edges[np.argmax(h)]))
assert amax < 0.5
Expand Down
14 changes: 7 additions & 7 deletions tests/test_weighted_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,7 @@ def test_WeightedDataFrame_compress(frame):


def test_WeightedDataFrame_nan(frame):
frame['A'][0] = np.nan
frame.loc[0, 'A'] = np.nan
assert ~frame.mean().isna().any()
assert ~frame.mean(axis=1).isna().any()
assert_array_equal(frame.mean(skipna=False).isna(), [True] + [False]*5)
Expand All @@ -422,7 +422,7 @@ def test_WeightedDataFrame_nan(frame):
assert_array_equal(frame.std(axis=1, skipna=False).isna()[0:6],
[True, False, False, False, False, False])

frame['B'][2] = np.nan
frame.loc[2, 'B'] = np.nan
assert ~frame.mean().isna().any()
assert_array_equal(frame.mean(skipna=False).isna(),
[True, True] + [False]*4)
Expand All @@ -435,10 +435,10 @@ def test_WeightedDataFrame_nan(frame):
assert_array_equal(frame.std(axis=1, skipna=False).isna()[0:6],
[True, False, True, False, False, False])

frame['C'][4] = np.nan
frame['D'][5] = np.nan
frame['E'][6] = np.nan
frame['F'][7] = np.nan
frame.loc[4, 'C'] = np.nan
frame.loc[5, 'D'] = np.nan
frame.loc[6, 'E'] = np.nan
frame.loc[7, 'F'] = np.nan
assert ~frame.mean().isna().any()
assert frame.mean(skipna=False).isna().all()
assert_array_equal(frame.mean(axis=1, skipna=False).isna()[0:6],
Expand Down Expand Up @@ -493,7 +493,7 @@ def test_WeightedSeries_cov(frame):
assert_allclose(frame.A.cov(frame.A), 1./12, atol=1e-2)
assert_allclose(frame.A.cov(frame.B), 0, atol=1e-2)

frame['A'][0] = np.nan
frame.loc[0, 'A'] = np.nan
assert_allclose(frame.A.cov(frame.A), 1./12, atol=1e-2)
assert_allclose(frame.A.cov(frame.B), 0, atol=1e-2)

Expand Down
Loading