pandas-dev
diff --git a/‎.github/workflows/unit-tests.yml
Lines changed: 0 additions & 3 deletions b/‎.github/workflows/unit-tests.yml
Lines changed: 0 additions & 3 deletions
diff --git a/‎AUTHORS.md
Lines changed: 6 additions & 6 deletions b/‎AUTHORS.md
Lines changed: 6 additions & 6 deletions
diff --git a/‎ci/deps/actions-311-downstream_compat.yaml
Lines changed: 2 additions & 1 deletion b/‎ci/deps/actions-311-downstream_compat.yaml
Lines changed: 2 additions & 1 deletion
diff --git a/‎doc/source/reference/indexing.rst
Lines changed: 1 addition & 0 deletions b/‎doc/source/reference/indexing.rst
Lines changed: 1 addition & 0 deletions
diff --git a/‎doc/source/whatsnew/v2.3.0.rst
Lines changed: 0 additions & 35 deletions b/‎doc/source/whatsnew/v2.3.0.rst
Lines changed: 0 additions & 35 deletions
diff --git a/‎doc/source/whatsnew/v2.3.1.rst
Lines changed: 51 additions & 5 deletions b/‎doc/source/whatsnew/v2.3.1.rst
Lines changed: 51 additions & 5 deletions
diff --git a/‎doc/source/whatsnew/v3.0.0.rst
Lines changed: 6 additions & 0 deletions b/‎doc/source/whatsnew/v3.0.0.rst
Lines changed: 6 additions & 0 deletions
diff --git a/‎environment.yml
Lines changed: 1 addition & 2 deletions b/‎environment.yml
Lines changed: 1 addition & 2 deletions
diff --git a/‎pandas/_libs/src/datetime/pd_datetime.c
Lines changed: 4 additions & 0 deletions b/‎pandas/_libs/src/datetime/pd_datetime.c
Lines changed: 4 additions & 0 deletions
diff --git a/‎pandas/compat/_optional.py
Lines changed: 2 additions & 2 deletions b/‎pandas/compat/_optional.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎pandas/conftest.py
Lines changed: 6 additions & 0 deletions b/‎pandas/conftest.py
Lines changed: 6 additions & 0 deletions
diff --git a/‎pandas/core/apply.py
Lines changed: 2 additions & 1 deletion b/‎pandas/core/apply.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎pandas/core/arrays/datetimelike.py
Lines changed: 3 additions & 2 deletions b/‎pandas/core/arrays/datetimelike.py
Lines changed: 3 additions & 2 deletions
diff --git a/‎pandas/core/arrays/interval.py
Lines changed: 3 additions & 3 deletions b/‎pandas/core/arrays/interval.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎pandas/core/arrays/string_arrow.py
Lines changed: 0 additions & 4 deletions b/‎pandas/core/arrays/string_arrow.py
Lines changed: 0 additions & 4 deletions
diff --git a/‎pandas/core/dtypes/cast.py
Lines changed: 4 additions & 0 deletions b/‎pandas/core/dtypes/cast.py
Lines changed: 4 additions & 0 deletions
diff --git a/‎pandas/core/generic.py
Lines changed: 6 additions & 6 deletions b/‎pandas/core/generic.py
Lines changed: 6 additions & 6 deletions
diff --git a/‎pandas/core/groupby/generic.py
Lines changed: 2 additions & 2 deletions b/‎pandas/core/groupby/generic.py
Lines changed: 2 additions & 2 deletions
@@ -140,9 +140,6 @@ jobs:
 
       moto:
         image: motoserver/moto:5.0.27
-        env:
-          AWS_ACCESS_KEY_ID: foobar_key
-          AWS_SECRET_ACCESS_KEY: foobar_secret
         ports:
           - 5000:5000
 
 
@@ -7,12 +7,12 @@ About the Copyright Holders
     led by Wes McKinney. AQR released the source under this license in 2009.
 *   Copyright (c) 2011-2012, Lambda Foundry, Inc.
 
-    Wes is now an employee of Lambda Foundry, and remains the pandas project
+    Wes became an employee of Lambda Foundry, and remained the pandas project
     lead.
 *   Copyright (c) 2011-2012, PyData Development Team
 
     The PyData Development Team is the collection of developers of the PyData
-    project. This includes all of the PyData sub-projects, including pandas. The
+    project. This includes all of the PyData sub-projects, such as pandas. The
     core team that coordinates development on GitHub can be found here:
     https://github.com/pydata.
 
@@ -23,11 +23,11 @@ Our Copyright Policy
 
 PyData uses a shared copyright model. Each contributor maintains copyright
 over their contributions to PyData. However, it is important to note that
-these contributions are typically only changes to the repositories. Thus,
+these contributions are typically limited to changes to the repositories. Thus,
 the PyData source code, in its entirety, is not the copyright of any single
 person or institution. Instead, it is the collective copyright of the
 entire PyData Development Team. If individual contributors want to maintain
-a record of what changes/contributions they have specific copyright on,
+a record of the specific changes or contributions they hold copyright to,
 they should indicate their copyright in the commit message of the change
 when they commit the change to one of the PyData repositories.
 
@@ -50,7 +50,7 @@ Other licenses can be found in the LICENSES directory.
 License
 =======
 
-pandas is distributed under a 3-clause ("Simplified" or "New") BSD
+pandas is distributed under the 3-clause ("Simplified" or "New") BSD
 license. Parts of NumPy, SciPy, numpydoc, bottleneck, which all have
-BSD-compatible licenses, are included. Their licenses follow the pandas
+BSD-compatible licenses, are included. Their licenses are compatible with the pandas
 license.
@@ -50,7 +50,8 @@ dependencies:
   - pytz>=2023.4
   - pyxlsb>=1.0.10
   - s3fs>=2023.12.2
-  - scipy>=1.12.0
+  # TEMP upper pin for scipy (https://github.com/statsmodels/statsmodels/issues/9584)
+  - scipy>=1.12.0,<1.16
   - sqlalchemy>=2.0.0
   - tabulate>=0.9.0
   - xarray>=2024.1.1
 
@@ -98,6 +98,7 @@ Conversion
    :toctree: api/
 
    Index.astype
+   Index.infer_objects
    Index.item
    Index.map
    Index.ravel
 
@@ -31,39 +31,6 @@ Other enhancements
 - The :meth:`~Series.cumsum`, :meth:`~Series.cummin`, and :meth:`~Series.cummax` reductions are now implemented for :class:`StringDtype` columns (:issue:`60633`)
 - The :meth:`~Series.sum` reduction is now implemented for :class:`StringDtype` columns (:issue:`59853`)
 
-.. ---------------------------------------------------------------------------
-.. _whatsnew_230.notable_bug_fixes:
-
-Notable bug fixes
-~~~~~~~~~~~~~~~~~
-
-These are bug fixes that might have notable behavior changes.
-
-.. _whatsnew_230.notable_bug_fixes.string_comparisons:
-
-Comparisons between different string dtypes
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-In previous versions, comparing :class:`Series` of different string dtypes (e.g. ``pd.StringDtype("pyarrow", na_value=pd.NA)`` against ``pd.StringDtype("python", na_value=np.nan)``) would result in inconsistent resulting dtype or incorrectly raise. pandas will now use the hierarchy
-
-    object < (python, NaN) < (pyarrow, NaN) < (python, NA) < (pyarrow, NA)
-
-in determining the result dtype when there are different string dtypes compared. Some examples:
-
-- When ``pd.StringDtype("pyarrow", na_value=pd.NA)`` is compared against any other string dtype, the result will always be ``boolean[pyarrow]``.
-- When ``pd.StringDtype("python", na_value=pd.NA)`` is compared against ``pd.StringDtype("pyarrow", na_value=np.nan)``, the result will be ``boolean``, the NumPy-backed nullable extension array.
-- When ``pd.StringDtype("python", na_value=pd.NA)`` is compared against ``pd.StringDtype("python", na_value=np.nan)``, the result will be ``boolean``, the NumPy-backed nullable extension array.
-
-.. _whatsnew_230.api_changes:
-
-API changes
-~~~~~~~~~~~
-
-- When enabling the ``future.infer_string`` option, :class:`Index` set operations (like
-  union or intersection) will now ignore the dtype of an empty :class:`RangeIndex` or
-  empty :class:`Index` with ``object`` dtype when determining the dtype of the resulting
-  Index (:issue:`60797`)
-
 .. ---------------------------------------------------------------------------
 .. _whatsnew_230.deprecations:
 
@@ -85,8 +52,6 @@ Numeric
 
 Strings
 ^^^^^^^
-- Bug in :meth:`.DataFrameGroupBy.min`, :meth:`.DataFrameGroupBy.max`, :meth:`.Resampler.min`, :meth:`.Resampler.max` where all NA values of string dtype would return float instead of string dtype (:issue:`60810`)
-- Bug in :meth:`DataFrame.sum` with ``axis=1``, :meth:`.DataFrameGroupBy.sum` or :meth:`.SeriesGroupBy.sum` with ``skipna=True``, and :meth:`.Resampler.sum` with all NA values of :class:`StringDtype` resulted in ``0`` instead of the empty string ``""`` (:issue:`60229`)
 - Bug in :meth:`Series.__pos__` and :meth:`DataFrame.__pos__` where an ``Exception`` was not raised for :class:`StringDtype` with ``storage="pyarrow"`` (:issue:`60710`)
 - Bug in :meth:`Series.rank` for :class:`StringDtype` with ``storage="pyarrow"`` that incorrectly returned integer results with ``method="average"`` and raised an error if it would truncate results (:issue:`59768`)
 - Bug in :meth:`Series.replace` with :class:`StringDtype` when replacing with a non-string value was not upcasting to ``object`` dtype (:issue:`60282`)
 
@@ -9,11 +9,57 @@ including other versions of pandas.
 {{ header }}
 
 .. ---------------------------------------------------------------------------
-.. _whatsnew_231.enhancements:
+.. _whatsnew_231.string_fixes:
+
+Improvements and fixes for the StringDtype
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. _whatsnew_231.string_fixes.string_comparisons:
+
+Comparisons between different string dtypes
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+In previous versions, comparing :class:`Series` of different string dtypes (e.g. ``pd.StringDtype("pyarrow", na_value=pd.NA)`` against ``pd.StringDtype("python", na_value=np.nan)``) would result in inconsistent resulting dtype or incorrectly raise. pandas will now use the hierarchy
+
+    object < (python, NaN) < (pyarrow, NaN) < (python, NA) < (pyarrow, NA)
+
+in determining the result dtype when there are different string dtypes compared. Some examples:
+
+- When ``pd.StringDtype("pyarrow", na_value=pd.NA)`` is compared against any other string dtype, the result will always be ``boolean[pyarrow]``.
+- When ``pd.StringDtype("python", na_value=pd.NA)`` is compared against ``pd.StringDtype("pyarrow", na_value=np.nan)``, the result will be ``boolean``, the NumPy-backed nullable extension array.
+- When ``pd.StringDtype("python", na_value=pd.NA)`` is compared against ``pd.StringDtype("python", na_value=np.nan)``, the result will be ``boolean``, the NumPy-backed nullable extension array.
+
+.. _whatsnew_231.string_fixes.ignore_empty:
+
+Index set operations ignore empty RangeIndex and object dtype Index
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+When enabling the ``future.infer_string`` option, :class:`Index` set operations (like
+union or intersection) will now ignore the dtype of an empty :class:`RangeIndex` or
+empty :class:`Index` with ``object`` dtype when determining the dtype of the resulting
+Index (:issue:`60797`).
+
+This ensures that combining such empty Index with strings will infer the string dtype
+correctly, rather than defaulting to ``object`` dtype. For example:
+
+.. code-block:: python
+
+    >>> pd.options.mode.infer_string = True
+    >>> df = pd.DataFrame()
+    >>> df.columns.dtype
+    dtype('int64')               # default RangeIndex for empty columns
+    >>> df["a"] = [1, 2, 3]
+    >>> df.columns.dtype
+    <StringDtype(na_value=nan)>  # new columns use string dtype instead of object dtype
+
+.. _whatsnew_231.string_fixes.bugs:
+
+Bug fixes
+^^^^^^^^^
+- Bug in :meth:`.DataFrameGroupBy.min`, :meth:`.DataFrameGroupBy.max`, :meth:`.Resampler.min`, :meth:`.Resampler.max` where all NA values of string dtype would return float instead of string dtype (:issue:`60810`)
+- Bug in :meth:`DataFrame.sum` with ``axis=1``, :meth:`.DataFrameGroupBy.sum` or :meth:`.SeriesGroupBy.sum` with ``skipna=True``, and :meth:`.Resampler.sum` with all NA values of :class:`StringDtype` resulted in ``0`` instead of the empty string ``""`` (:issue:`60229`)
+- Fixed bug in :meth:`DataFrame.explode` and :meth:`Series.explode` where methods would fail with ``dtype="str"`` (:issue:`61623`)
 
-Enhancements
-~~~~~~~~~~~~
--
 
 .. _whatsnew_231.regressions:
 
@@ -26,7 +72,7 @@ Fixed regressions
 
 Bug fixes
 ~~~~~~~~~
-- Fixed bug in :meth:`DataFrame.explode` and :meth:`Series.explode` where methods would fail with ``dtype="str"`` (:issue:`61623`)
+-
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_231.other:
 
@@ -28,6 +28,9 @@ Enhancement2
 
 Other enhancements
 ^^^^^^^^^^^^^^^^^^
+- :func:`pandas.merge` propagates the ``attrs`` attribute to the result if all
+  inputs have identical ``attrs``, as has so far already been the case for
+  :func:`pandas.concat`.
 - :class:`pandas.api.typing.FrozenList` is available for typing the outputs of :attr:`MultiIndex.names`, :attr:`MultiIndex.codes` and :attr:`MultiIndex.levels` (:issue:`58237`)
 - :class:`pandas.api.typing.SASReader` is available for typing the output of :func:`read_sas` (:issue:`55689`)
 - Added :meth:`.Styler.to_typst` to write Styler objects to file, buffer or string in Typst format (:issue:`57617`)
@@ -745,9 +748,11 @@ Indexing
 - Bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` in Python 3.12 (:issue:`57500`)
 - Bug in :meth:`DataFrame.__getitem__` when slicing a :class:`DataFrame` with many rows raised an ``OverflowError`` (:issue:`59531`)
 - Bug in :meth:`DataFrame.from_records` throwing a ``ValueError`` when passed an empty list in ``index`` (:issue:`58594`)
+- Bug in :meth:`DataFrame.loc` and :meth:`DataFrame.iloc` returning incorrect dtype when selecting from a :class:`DataFrame` with mixed data types. (:issue:`60600`)
 - Bug in :meth:`DataFrame.loc` with inconsistent behavior of loc-set with 2 given indexes to Series (:issue:`59933`)
 - Bug in :meth:`Index.get_indexer` and similar methods when ``NaN`` is located at or after position 128 (:issue:`58924`)
 - Bug in :meth:`MultiIndex.insert` when a new value inserted to a datetime-like level gets cast to ``NaT`` and fails indexing (:issue:`60388`)
+- Bug in :meth:`Series.__setitem__` when assigning boolean series with boolean indexer will raise ``LossySetitemError`` (:issue:`57338`)
 - Bug in printing :attr:`Index.names` and :attr:`MultiIndex.levels` would not escape single quotes (:issue:`60190`)
 - Bug in reindexing of :class:`DataFrame` with :class:`PeriodDtype` columns in case of consolidated block (:issue:`60980`, :issue:`60273`)
 
@@ -777,6 +782,7 @@ I/O
 - Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`)
 - Bug in :meth:`DataFrame.to_excel` where the :class:`MultiIndex` index with a period level was not a date (:issue:`60099`)
 - Bug in :meth:`DataFrame.to_stata` when exporting a column containing both long strings (Stata strL) and :class:`pd.NA` values (:issue:`23633`)
+- Bug in :meth:`DataFrame.to_stata` when input encoded length and normal length are mismatched (:issue:`61583`)
 - Bug in :meth:`DataFrame.to_stata` when writing :class:`DataFrame` and ``byteorder=`big```. (:issue:`58969`)
 - Bug in :meth:`DataFrame.to_stata` when writing more than 32,000 value labels. (:issue:`60107`)
 - Bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)
 
@@ -64,9 +64,8 @@ dependencies:
   - dask-core
   - seaborn-base
 
-  # local testing dependencies
+  # Mocking s3 tests
   - moto
-  - flask
 
   # benchmarks
   - asv>=0.6.1
 
@@ -192,6 +192,10 @@ static npy_datetime PyDateTimeToEpoch(PyObject *dt, NPY_DATETIMEUNIT base) {
   return npy_dt;
 }
 
+/* Initializes and exposes a customer datetime C-API from the pandas library
+ * by creating a PyCapsule that stores function pointers, which can be accessed
+ * later by other C code or Cython code that imports the capsule.
+ */
 static int pandas_datetime_exec(PyObject *Py_UNUSED(module)) {
   PyDateTime_IMPORT;
   PandasDateTime_CAPI *capi = PyMem_Malloc(sizeof(PandasDateTime_CAPI));
 
@@ -152,8 +152,8 @@ def import_optional_dependency(
     install_name = package_name if package_name is not None else name
 
     msg = (
-        f"Missing optional dependency '{install_name}'. {extra} "
-        f"Use pip or conda to install {install_name}."
+        f"`Import {install_name}` failed. {extra} "
+        f"Use pip or conda to install the {install_name} package."
     )
     try:
         module = importlib.import_module(name)
 
@@ -2116,3 +2116,9 @@ def temp_file(tmp_path):
     file_path = tmp_path / str(uuid.uuid4())
     file_path.touch()
     return file_path
+
+
+@pytest.fixture(scope="session")
+def monkeysession():
+    with pytest.MonkeyPatch.context() as mp:
+        yield mp
@@ -10,6 +10,7 @@
     TYPE_CHECKING,
     Any,
     Literal,
+    TypeAlias,
     cast,
 )
 
@@ -71,7 +72,7 @@
     from pandas.core.resample import Resampler
     from pandas.core.window.rolling import BaseWindow
 
-ResType = dict[int, Any]
+ResType: TypeAlias = dict[int, Any]
 
 
 class BaseExecutionEngine(abc.ABC):
 
@@ -10,6 +10,7 @@
     TYPE_CHECKING,
     Any,
     Literal,
+    TypeAlias,
     Union,
     cast,
     final,
@@ -161,7 +162,7 @@
         TimedeltaArray,
     )
 
-DTScalarOrNaT = Union[DatetimeLikeScalar, NaTType]
+DTScalarOrNaT: TypeAlias = DatetimeLikeScalar | NaTType
 
 
 def _make_unpacked_invalid_op(op_name: str):
@@ -386,7 +387,7 @@ def __getitem__(self, key: PositionalIndexer2D) -> Self | DTScalarOrNaT:
         # Use cast as we know we will get back a DatetimeLikeArray or DTScalar,
         # but skip evaluating the Union at runtime for performance
         # (see https://github.com/pandas-dev/pandas/pull/44624)
-        result = cast("Union[Self, DTScalarOrNaT]", super().__getitem__(key))
+        result = cast(Union[Self, DTScalarOrNaT], super().__getitem__(key))
         if lib.is_scalar(result):
             return result
         else:
 
@@ -9,7 +9,7 @@
 from typing import (
     TYPE_CHECKING,
     Literal,
-    Union,
+    TypeAlias,
     overload,
 )
 
@@ -109,8 +109,8 @@
     )
 
 
-IntervalSide = Union[TimeArrayLike, np.ndarray]
-IntervalOrNA = Union[Interval, float]
+IntervalSide: TypeAlias = TimeArrayLike | np.ndarray
+IntervalOrNA: TypeAlias = Interval | float
 
 _interval_shared_docs: dict[str, str] = {}
 
 
@@ -4,7 +4,6 @@
 import re
 from typing import (
     TYPE_CHECKING,
-    Union,
 )
 import warnings
 
@@ -64,9 +63,6 @@
     from pandas import Series
 
 
-ArrowStringScalarOrNAT = Union[str, libmissing.NAType]
-
-
 def _chk_pyarrow_available() -> None:
     if pa_version_under10p1:
         msg = "pyarrow>=10.0.1 is required for PyArrow backed ArrowExtensionArray."
 
@@ -1926,6 +1926,10 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
                         # i.e. there are pd.NA elements
                         raise LossySetitemError
                 return element
+            # GH 57338 check boolean array set as object type
+            if tipo.kind == "O" and isinstance(element, np.ndarray):
+                if lib.is_bool_array(element):
+                    return element.astype("bool")
             raise LossySetitemError
         if lib.is_bool(element):
             return element
 
@@ -330,8 +330,8 @@ def attrs(self) -> dict[Hashable, Any]:
         -----
         Many operations that create new datasets will copy ``attrs``. Copies
         are always deep so that changing ``attrs`` will only affect the
-        present dataset. ``pandas.concat`` copies ``attrs`` only if all input
-        datasets have the same ``attrs``.
+        present dataset. :func:`pandas.concat` and :func:`pandas.merge` will
+        only copy ``attrs`` if all input datasets have the same ``attrs``.
 
         Examples
         --------
@@ -6090,11 +6090,11 @@ def __finalize__(self, other, method: str | None = None, **kwargs) -> Self:
                 assert isinstance(name, str)
                 object.__setattr__(self, name, getattr(other, name, None))
 
-        if method == "concat":
-            objs = other.objs
-            # propagate attrs only if all concat arguments have the same attrs
+        elif hasattr(other, "input_objs"):
+            objs = other.input_objs
+            # propagate attrs only if all inputs have the same attrs
             if all(bool(obj.attrs) for obj in objs):
-                # all concatenate arguments have non-empty attrs
+                # all inputs have non-empty attrs
                 attrs = objs[0].attrs
                 have_same_attrs = all(obj.attrs == attrs for obj in objs[1:])
                 if have_same_attrs:
 
@@ -17,8 +17,8 @@
     Any,
     Literal,
     NamedTuple,
+    TypeAlias,
     TypeVar,
-    Union,
     cast,
 )
 import warnings
@@ -102,7 +102,7 @@
     from pandas.core.generic import NDFrame
 
 # TODO(typing) the return value on this callable should be any *scalar*.
-AggScalar = Union[str, Callable[..., Any]]
+AggScalar: TypeAlias = str | Callable[..., Any]
 # TODO: validate types on ScalarResult and move to _typing
 # Blocked from using by https://github.com/python/mypy/issues/1484
 # See note at _mangle_lambda_list
Original file line number	Diff line number	Diff line change
`@@ -9,7 +9,7 @@`
`9`	`9`	`from typing import (`
`10`	`10`	`TYPE_CHECKING,`
`11`	`11`	`Literal,`
`12`		`- Union,`
	`12`	`+ TypeAlias,`
`13`	`13`	`overload,`
`14`	`14`	`)`
`15`	`15`
`@@ -109,8 +109,8 @@`
`109`	`109`	`)`
`110`	`110`
`111`	`111`
`112`		`-IntervalSide = Union[TimeArrayLike, np.ndarray]`
`113`		`-IntervalOrNA = Union[Interval, float]`
	`112`	`+IntervalSide: TypeAlias = TimeArrayLike \| np.ndarray`
	`113`	`+IntervalOrNA: TypeAlias = Interval \| float`
`114`	`114`
`115`	`115`	`_interval_shared_docs: dict[str, str] = {}`
`116`	`116`