From 31e65e0eec173ab6fcc48d0d686d0c2ace6d19af Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 3 Jul 2025 15:47:02 -0700
Subject: [PATCH 01/14] BUG: Decimal(NaN) incorrectly allowed in ArrowEA
 constructor with timestamp type

---
 doc/source/whatsnew/v3.0.0.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 10fb9503ffb3d..2c209f521b274 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -709,6 +709,8 @@ Datetimelike
 - Bug in constructing arrays with :class:`ArrowDtype` with ``timestamp`` type incorrectly allowing ``Decimal("NaN")`` (:issue:`61773`)
 - Bug in constructing arrays with a timezone-aware :class:`ArrowDtype` from timezone-naive datetime objects incorrectly treating those as UTC times instead of wall times like :class:`DatetimeTZDtype` (:issue:`61775`)
 - Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`)
+- Bug in constructing arrays with :class:`ArrowDtype` with ``timestamp`` type incorrectly allowing ``Decimal("NaN")`` (:issue:`??`)
+-
 
 Timedelta
 ^^^^^^^^^

From 9dcd8fbf7bc4d6bac039364bf9ad2da4d3502b0a Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 3 Jul 2025 15:49:04 -0700
Subject: [PATCH 02/14] GH ref

---
 doc/source/whatsnew/v3.0.0.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 2c209f521b274..f159282717109 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -709,7 +709,6 @@ Datetimelike
 - Bug in constructing arrays with :class:`ArrowDtype` with ``timestamp`` type incorrectly allowing ``Decimal("NaN")`` (:issue:`61773`)
 - Bug in constructing arrays with a timezone-aware :class:`ArrowDtype` from timezone-naive datetime objects incorrectly treating those as UTC times instead of wall times like :class:`DatetimeTZDtype` (:issue:`61775`)
 - Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`)
-- Bug in constructing arrays with :class:`ArrowDtype` with ``timestamp`` type incorrectly allowing ``Decimal("NaN")`` (:issue:`??`)
 -
 
 Timedelta

From 3fb47c78fb54b0ebd34f58aedc11403458042802 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Fri, 4 Jul 2025 08:21:07 -0700
Subject: [PATCH 03/14] BUG: ArrowEA constructor with timestamp type

---
 doc/source/whatsnew/v3.0.0.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index f159282717109..10fb9503ffb3d 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -709,7 +709,6 @@ Datetimelike
 - Bug in constructing arrays with :class:`ArrowDtype` with ``timestamp`` type incorrectly allowing ``Decimal("NaN")`` (:issue:`61773`)
 - Bug in constructing arrays with a timezone-aware :class:`ArrowDtype` from timezone-naive datetime objects incorrectly treating those as UTC times instead of wall times like :class:`DatetimeTZDtype` (:issue:`61775`)
 - Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`)
--
 
 Timedelta
 ^^^^^^^^^

From c18ab05d9c01eeff2d588e1ed50c729819801824 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sat, 28 Jun 2025 10:07:44 -0700
Subject: [PATCH 04/14] POC: consistent NaN treatment for pyarrow dtypes

---
 pandas/_libs/parsers.pyx                 |  2 +-
 pandas/core/arrays/arrow/array.py        | 54 ++++++++++++++++++------
 pandas/core/arrays/string_.py            |  8 +++-
 pandas/core/generic.py                   | 19 ++++++++-
 pandas/tests/extension/test_arrow.py     |  2 +-
 pandas/tests/groupby/test_reductions.py  |  6 ++-
 pandas/tests/series/methods/test_rank.py |  9 ++++
 7 files changed, 81 insertions(+), 19 deletions(-)

diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
index 43670abca2fac..e115fc67adc2a 100644
--- a/pandas/_libs/parsers.pyx
+++ b/pandas/_libs/parsers.pyx
@@ -1453,7 +1453,7 @@ def _maybe_upcast(
         if isinstance(arr, IntegerArray) and arr.isna().all():
             # use null instead of int64 in pyarrow
             arr = arr.to_numpy(na_value=None)
-        arr = ArrowExtensionArray(pa.array(arr, from_pandas=True))
+        arr = ArrowExtensionArray(pa.array(arr))
 
     return arr
 
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 919453b29b7f9..3f6559b98a7a4 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -16,6 +16,7 @@
 import numpy as np
 
 from pandas._libs import lib
+from pandas._libs.missing import NA
 from pandas._libs.tslibs import (
     Timedelta,
     Timestamp,
@@ -351,7 +352,7 @@ def _from_sequence_of_strings(
                 # duration to string casting behavior
                 mask = isna(scalars)
                 if not isinstance(strings, (pa.Array, pa.ChunkedArray)):
-                    strings = pa.array(strings, type=pa.string(), from_pandas=True)
+                    strings = pa.array(strings, type=pa.string())
                 strings = pc.if_else(mask, None, strings)
                 try:
                     scalars = strings.cast(pa.int64())
@@ -372,7 +373,7 @@ def _from_sequence_of_strings(
             if isinstance(strings, (pa.Array, pa.ChunkedArray)):
                 scalars = strings
             else:
-                scalars = pa.array(strings, type=pa.string(), from_pandas=True)
+                scalars = pa.array(strings, type=pa.string())
             scalars = pc.if_else(pc.equal(scalars, "1.0"), "1", scalars)
             scalars = pc.if_else(pc.equal(scalars, "0.0"), "0", scalars)
             scalars = scalars.cast(pa.bool_())
@@ -384,6 +385,13 @@ def _from_sequence_of_strings(
             from pandas.core.tools.numeric import to_numeric
 
             scalars = to_numeric(strings, errors="raise")
+            if not pa.types.is_decimal(pa_type):
+                # TODO: figure out why doing this cast breaks with decimal dtype
+                #  in test_from_sequence_of_strings_pa_array
+                mask = strings.is_null()
+                scalars = pa.array(scalars, mask=np.array(mask), type=pa_type)
+                # TODO: could we just do strings.cast(pa_type)?
+
         else:
             raise NotImplementedError(
                 f"Converting strings to {pa_type} is not implemented."
@@ -426,7 +434,7 @@ def _box_pa_scalar(cls, value, pa_type: pa.DataType | None = None) -> pa.Scalar:
         """
         if isinstance(value, pa.Scalar):
             pa_scalar = value
-        elif isna(value):
+        elif isna(value) and not lib.is_float(value):
             pa_scalar = pa.scalar(None, type=pa_type)
         else:
             # Workaround https://github.com/apache/arrow/issues/37291
@@ -443,7 +451,7 @@ def _box_pa_scalar(cls, value, pa_type: pa.DataType | None = None) -> pa.Scalar:
                     value = value.as_unit(pa_type.unit)
                 value = value._value
 
-            pa_scalar = pa.scalar(value, type=pa_type, from_pandas=True)
+            pa_scalar = pa.scalar(value, type=pa_type)
 
         if pa_type is not None and pa_scalar.type != pa_type:
             pa_scalar = pa_scalar.cast(pa_type)
@@ -475,6 +483,13 @@ def _box_pa_array(
             if copy:
                 value = value.copy()
             pa_array = value.__arrow_array__()
+
+        elif hasattr(value, "__arrow_array__"):
+            # e.g. StringArray
+            if copy:
+                value = value.copy()
+            pa_array = value.__arrow_array__()
+
         else:
             if (
                 isinstance(value, np.ndarray)
@@ -528,11 +543,24 @@ def _box_pa_array(
                 pa_array = pa.array(dta._ndarray, type=pa_type, mask=dta_mask)
                 return pa_array
 
+            mask = None
+            if getattr(value, "dtype", None) is None or value.dtype.kind not in "mfM":
+                # similar to isna(value) but exclude NaN
+                # TODO: cythonize!
+                mask = np.array([x is NA or x is None for x in value], dtype=bool)
+
+            from_pandas = False
+            if pa.types.is_integer(pa_type):
+                # If user specifically asks to cast a numpy float array with NaNs
+                #  to pyarrow integer, we'll treat those NaNs as NA
+                from_pandas = True
             try:
-                pa_array = pa.array(value, type=pa_type, from_pandas=True)
+                pa_array = pa.array(
+                    value, type=pa_type, mask=mask, from_pandas=from_pandas
+                )
             except (pa.ArrowInvalid, pa.ArrowTypeError):
                 # GH50430: let pyarrow infer type, then cast
-                pa_array = pa.array(value, from_pandas=True)
+                pa_array = pa.array(value, mask=mask, from_pandas=from_pandas)
 
             if pa_type is None and pa.types.is_duration(pa_array.type):
                 # Workaround https://github.com/apache/arrow/issues/37291
@@ -540,7 +568,7 @@ def _box_pa_array(
 
                 value = to_timedelta(value)
                 value = value.to_numpy()
-                pa_array = pa.array(value, type=pa_type, from_pandas=True)
+                pa_array = pa.array(value, type=pa_type)
 
             if pa.types.is_duration(pa_array.type) and pa_array.null_count > 0:
                 # GH52843: upstream bug for duration types when originally
@@ -1187,7 +1215,7 @@ def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
         if not len(values):
             return np.zeros(len(self), dtype=bool)
 
-        result = pc.is_in(self._pa_array, value_set=pa.array(values, from_pandas=True))
+        result = pc.is_in(self._pa_array, value_set=pa.array(values))
         # pyarrow 2.0.0 returned nulls, so we explicitly specify dtype to convert nulls
         # to False
         return np.array(result, dtype=np.bool_)
@@ -1994,7 +2022,7 @@ def __setitem__(self, key, value) -> None:
                 raise ValueError("Length of indexer and values mismatch")
             chunks = [
                 *self._pa_array[:key].chunks,
-                pa.array([value], type=self._pa_array.type, from_pandas=True),
+                pa.array([value], type=self._pa_array.type),
                 *self._pa_array[key + 1 :].chunks,
             ]
             data = pa.chunked_array(chunks).combine_chunks()
@@ -2048,7 +2076,7 @@ def _rank_calc(
                 pa_type = pa.float64()
             else:
                 pa_type = pa.uint64()
-            result = pa.array(ranked, type=pa_type, from_pandas=True)
+            result = pa.array(ranked, type=pa_type)
             return result
 
         data = self._pa_array.combine_chunks()
@@ -2300,7 +2328,7 @@ def _to_numpy_and_type(value) -> tuple[np.ndarray, pa.DataType | None]:
         right, right_type = _to_numpy_and_type(right)
         pa_type = left_type or right_type
         result = np.where(cond, left, right)
-        return pa.array(result, type=pa_type, from_pandas=True)
+        return pa.array(result, type=pa_type)
 
     @classmethod
     def _replace_with_mask(
@@ -2343,7 +2371,7 @@ def _replace_with_mask(
             replacements = replacements.as_py()
         result = np.array(values, dtype=object)
         result[mask] = replacements
-        return pa.array(result, type=values.type, from_pandas=True)
+        return pa.array(result, type=values.type)
 
     # ------------------------------------------------------------------
     # GroupBy Methods
@@ -2422,7 +2450,7 @@ def _groupby_op(
             return type(self)(pa_result)
         else:
             # DatetimeArray, TimedeltaArray
-            pa_result = pa.array(result, from_pandas=True)
+            pa_result = pa.array(result)
             return type(self)(pa_result)
 
     def _apply_elementwise(self, func: Callable) -> list[list[Any]]:
diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index f52b709a59de9..7f65463a97815 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -481,6 +481,12 @@ def _str_map_str_or_object(
             if self.dtype.storage == "pyarrow":
                 import pyarrow as pa
 
+                # TODO: shouldn't this already be caught my passed mask?
+                #  it isn't in test_extract_expand_capture_groups_index
+                # mask = mask | np.array(
+                #    [x is libmissing.NA for x in result], dtype=bool
+                #    )
+
                 result = pa.array(
                     result, mask=mask, type=pa.large_string(), from_pandas=True
                 )
@@ -733,7 +739,7 @@ def __arrow_array__(self, type=None):
 
         values = self._ndarray.copy()
         values[self.isna()] = None
-        return pa.array(values, type=type, from_pandas=True)
+        return pa.array(values, type=type)
 
     def _values_for_factorize(self) -> tuple[np.ndarray, libmissing.NAType | float]:  # type: ignore[override]
         arr = self._ndarray
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 7f1ccc482f70f..4a788638bae45 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -9873,7 +9873,7 @@ def where(
     def where(
         self,
         cond,
-        other=np.nan,
+        other=lib.no_default,
         *,
         inplace: bool = False,
         axis: Axis | None = None,
@@ -10031,6 +10031,23 @@ def where(
                         stacklevel=2,
                     )
 
+        if other is lib.no_default:
+            if self.ndim == 1:
+                if isinstance(self.dtype, ExtensionDtype):
+                    other = self.dtype.na_value
+                else:
+                    other = np.nan
+            else:
+                if self._mgr.nblocks == 1 and isinstance(
+                    self._mgr.blocks[0].values.dtype, ExtensionDtype
+                ):
+                    # FIXME: checking this is kludgy!
+                    other = self._mgr.blocks[0].values.dtype.na_value
+                else:
+                    # FIXME: the same problem we had with Series will now
+                    #  show up column-by-column!
+                    other = np.nan
+
         other = common.apply_if_callable(other, self)
         return self._where(cond, other, inplace=inplace, axis=axis, level=level)
 
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index 7e7cd8fb13456..14b65a56f8c05 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -721,7 +721,7 @@ def test_EA_types(self, engine, data, dtype_backend, request):
                 pytest.mark.xfail(reason="CSV parsers don't correctly handle binary")
             )
         df = pd.DataFrame({"with_dtype": pd.Series(data, dtype=str(data.dtype))})
-        csv_output = df.to_csv(index=False, na_rep=np.nan)
+        csv_output = df.to_csv(index=False, na_rep=np.nan)  # should be NA?
         if pa.types.is_binary(pa_dtype):
             csv_output = BytesIO(csv_output)
         else:
diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py
index 014558bbf4bba..08cf1047f316c 100644
--- a/pandas/tests/groupby/test_reductions.py
+++ b/pandas/tests/groupby/test_reductions.py
@@ -381,8 +381,10 @@ def test_first_last_skipna(any_real_nullable_dtype, sort, skipna, how):
     df = DataFrame(
         {
             "a": [2, 1, 1, 2, 3, 3],
-            "b": [na_value, 3.0, na_value, 4.0, np.nan, np.nan],
-            "c": [na_value, 3.0, na_value, 4.0, np.nan, np.nan],
+            # TODO: test that has mixed na_value and NaN either working for
+            #  float or raising for int?
+            "b": [na_value, 3.0, na_value, 4.0, na_value, na_value],
+            "c": [na_value, 3.0, na_value, 4.0, na_value, na_value],
         },
         dtype=any_real_nullable_dtype,
     )
diff --git a/pandas/tests/series/methods/test_rank.py b/pandas/tests/series/methods/test_rank.py
index 7c6a7893ba3a0..8363ba118d4d3 100644
--- a/pandas/tests/series/methods/test_rank.py
+++ b/pandas/tests/series/methods/test_rank.py
@@ -276,6 +276,13 @@ def test_rank_tie_methods(self, ser, results, dtype, using_infer_string):
 
         ser = ser if dtype is None else ser.astype(dtype)
         result = ser.rank(method=method)
+        if dtype == "float64[pyarrow]":
+            # the NaNs are not treated as NA
+            exp = exp.copy()
+            if method == "average":
+                exp[np.isnan(ser)] = 9.5
+            elif method == "dense":
+                exp[np.isnan(ser)] = 6
         tm.assert_series_equal(result, Series(exp, dtype=expected_dtype(dtype, method)))
 
     @pytest.mark.parametrize("na_option", ["top", "bottom", "keep"])
@@ -321,6 +328,8 @@ def test_rank_tie_methods_on_infs_nans(
             order = [ranks[1], ranks[0], ranks[2]]
         elif na_option == "bottom":
             order = [ranks[0], ranks[2], ranks[1]]
+        elif dtype == "float64[pyarrow]":
+            order = [ranks[0], [NA] * chunk, ranks[1]]
         else:
             order = [ranks[0], [np.nan] * chunk, ranks[1]]
         expected = order if ascending else order[::-1]

From 74a22486c394b1ba8de5f0705ffdaba67dd50e58 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sat, 28 Jun 2025 10:23:00 -0700
Subject: [PATCH 05/14] comment

---
 pandas/tests/extension/base/setitem.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py
index 185d6d750cace..99ab5d2f7e86f 100644
--- a/pandas/tests/extension/base/setitem.py
+++ b/pandas/tests/extension/base/setitem.py
@@ -422,6 +422,7 @@ def test_setitem_frame_2d_values(self, data):
         df.iloc[:-1] = df.iloc[:-1].copy()
         tm.assert_frame_equal(df, orig)
 
+        # FIXME: Breaks for pyarrow float dtype bc df.values changes NAs to NaN
         df.iloc[:] = df.values
         tm.assert_frame_equal(df, orig)
 

From 9d8fef493f599ff5342a4fa2c96ee1ad953828c1 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sat, 5 Jul 2025 09:41:02 -0700
Subject: [PATCH 06/14] Down to 40 failing tests

---
 pandas/_config/__init__.py           |  5 +++
 pandas/_libs/missing.pyi             |  1 +
 pandas/_libs/missing.pyx             | 18 ++++++++
 pandas/core/arrays/_utils.py         | 15 ++++++-
 pandas/core/arrays/arrow/array.py    | 66 +++++++++++++++++++---------
 pandas/core/arrays/base.py           |  3 ++
 pandas/core/arrays/masked.py         |  4 +-
 pandas/core/config_init.py           |  9 ++++
 pandas/tests/extension/test_arrow.py | 21 ++++++---
 9 files changed, 114 insertions(+), 28 deletions(-)

diff --git a/pandas/_config/__init__.py b/pandas/_config/__init__.py
index 463e8af7cc561..fbf388224254f 100644
--- a/pandas/_config/__init__.py
+++ b/pandas/_config/__init__.py
@@ -33,3 +33,8 @@
 def using_string_dtype() -> bool:
     _mode_options = _global_config["future"]
     return _mode_options["infer_string"]
+
+
+def using_pyarrow_strict_nans() -> bool:
+    _mode_options = _global_config["mode"]
+    return _mode_options["pyarrow_strict_nans"]
diff --git a/pandas/_libs/missing.pyi b/pandas/_libs/missing.pyi
index 6bf30a03cef32..6c76fe49330b6 100644
--- a/pandas/_libs/missing.pyi
+++ b/pandas/_libs/missing.pyi
@@ -14,3 +14,4 @@ def isneginf_scalar(val: object) -> bool: ...
 def checknull(val: object) -> bool: ...
 def isnaobj(arr: np.ndarray) -> npt.NDArray[np.bool_]: ...
 def is_numeric_na(values: np.ndarray) -> npt.NDArray[np.bool_]: ...
+def is_pdna_or_none(values: npt.ndarray) -> npt.NDArray[np.bool_]: ...
diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx
index c7f905c4d0be0..164a47cb5adb7 100644
--- a/pandas/_libs/missing.pyx
+++ b/pandas/_libs/missing.pyx
@@ -249,6 +249,24 @@ cdef bint checknull_with_nat_and_na(object obj):
     return checknull_with_nat(obj) or obj is C_NA
 
 
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def is_pdna_or_none(values: ndarray) -> ndarray:
+    cdef:
+        ndarray[uint8_t] result
+        Py_ssize_t i, N
+        object val
+
+    N = len(values)
+    result = np.zeros(N, dtype=np.uint8)
+
+    for i in range(N):
+        val = values[i]
+        if val is None or val is C_NA:
+            result[i] = True
+    return result.view(bool)
+
+
 @cython.wraparound(False)
 @cython.boundscheck(False)
 def is_numeric_na(values: ndarray) -> ndarray:
diff --git a/pandas/core/arrays/_utils.py b/pandas/core/arrays/_utils.py
index 6b46396d5efdf..9adde3846ca03 100644
--- a/pandas/core/arrays/_utils.py
+++ b/pandas/core/arrays/_utils.py
@@ -7,7 +7,10 @@
 
 import numpy as np
 
+from pandas._config import using_pyarrow_strict_nans
+
 from pandas._libs import lib
+from pandas._libs.missing import NA
 from pandas.errors import LossySetitemError
 
 from pandas.core.dtypes.cast import np_can_hold_element
@@ -21,7 +24,11 @@
 
 
 def to_numpy_dtype_inference(
-    arr: ArrayLike, dtype: npt.DTypeLike | None, na_value, hasna: bool
+    arr: ArrayLike,
+    dtype: npt.DTypeLike | None,
+    na_value,
+    hasna: bool,
+    is_pyarrow: bool = True,
 ) -> tuple[npt.DTypeLike, Any]:
     if dtype is None and is_numeric_dtype(arr.dtype):
         dtype_given = False
@@ -34,7 +41,11 @@ def to_numpy_dtype_inference(
                 else:
                     dtype = arr.dtype.numpy_dtype  # type: ignore[union-attr]
                 if na_value is lib.no_default:
-                    na_value = np.nan
+                    if is_pyarrow and using_pyarrow_strict_nans():
+                        na_value = NA
+                        dtype = np.dtype(object)
+                    else:
+                        na_value = np.nan
         else:
             dtype = arr.dtype.numpy_dtype  # type: ignore[union-attr]
     elif dtype is not None:
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 3f6559b98a7a4..a81f69fc314aa 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -15,8 +15,10 @@
 
 import numpy as np
 
+from pandas._config import using_pyarrow_strict_nans
+
 from pandas._libs import lib
-from pandas._libs.missing import NA
+from pandas._libs.missing import is_pdna_or_none
 from pandas._libs.tslibs import (
     Timedelta,
     Timestamp,
@@ -324,6 +326,11 @@ def _from_sequence_of_strings(
         """
         Construct a new ExtensionArray from a sequence of strings.
         """
+        mask = isna(strings)
+
+        if isinstance(strings, cls):
+            strings = strings._pa_array
+
         pa_type = to_pyarrow_type(dtype)
         if (
             pa_type is None
@@ -342,22 +349,35 @@ def _from_sequence_of_strings(
             from pandas.core.tools.datetimes import to_datetime
 
             scalars = to_datetime(strings, errors="raise").date
+
+            if isinstance(strings, cls):
+                # Avoid an object path
+                # TODO: this assumes that pyarrows str->date casting is the
+                # same as to_datetime. Is that a fair assumption?
+                scalars = strings._pa_array.cast(pa_type)
+            else:
+                scalars = pa.array(scalars, mask=mask.view(bool), type=pa_type)
+
         elif pa.types.is_duration(pa_type):
             from pandas.core.tools.timedeltas import to_timedelta
 
             scalars = to_timedelta(strings, errors="raise")
+
             if pa_type.unit != "ns":
                 # GH51175: test_from_sequence_of_strings_pa_array
                 # attempt to parse as int64 reflecting pyarrow's
                 # duration to string casting behavior
                 mask = isna(scalars)
-                if not isinstance(strings, (pa.Array, pa.ChunkedArray)):
-                    strings = pa.array(strings, type=pa.string())
+                if isinstance(strings, cls):
+                    strings = strings._pa_array
+                elif not isinstance(strings, (pa.Array, pa.ChunkedArray)):
+                    strings = pa.array(strings, type=pa.string(), mask=mask)
                 strings = pc.if_else(mask, None, strings)
                 try:
                     scalars = strings.cast(pa.int64())
                 except pa.ArrowInvalid:
                     pass
+
         elif pa.types.is_time(pa_type):
             from pandas.core.tools.times import to_time
 
@@ -373,7 +393,7 @@ def _from_sequence_of_strings(
             if isinstance(strings, (pa.Array, pa.ChunkedArray)):
                 scalars = strings
             else:
-                scalars = pa.array(strings, type=pa.string())
+                scalars = pa.array(strings, type=pa.string(), mask=mask)
             scalars = pc.if_else(pc.equal(scalars, "1.0"), "1", scalars)
             scalars = pc.if_else(pc.equal(scalars, "0.0"), "0", scalars)
             scalars = scalars.cast(pa.bool_())
@@ -385,12 +405,16 @@ def _from_sequence_of_strings(
             from pandas.core.tools.numeric import to_numeric
 
             scalars = to_numeric(strings, errors="raise")
-            if not pa.types.is_decimal(pa_type):
+            if not pa.types.is_decimal(pa_type) and isinstance(
+                strings, (pa.Array, pa.ChunkedArray)
+            ):
                 # TODO: figure out why doing this cast breaks with decimal dtype
                 #  in test_from_sequence_of_strings_pa_array
                 mask = strings.is_null()
                 scalars = pa.array(scalars, mask=np.array(mask), type=pa_type)
                 # TODO: could we just do strings.cast(pa_type)?
+            elif mask is not None:
+                scalars = pa.array(scalars, mask=mask.view(bool), type=pa_type)
 
         else:
             raise NotImplementedError(
@@ -544,23 +568,20 @@ def _box_pa_array(
                 return pa_array
 
             mask = None
-            if getattr(value, "dtype", None) is None or value.dtype.kind not in "mfM":
-                # similar to isna(value) but exclude NaN
-                # TODO: cythonize!
-                mask = np.array([x is NA or x is None for x in value], dtype=bool)
-
-            from_pandas = False
-            if pa.types.is_integer(pa_type):
-                # If user specifically asks to cast a numpy float array with NaNs
-                #  to pyarrow integer, we'll treat those NaNs as NA
-                from_pandas = True
+            if getattr(value, "dtype", None) is None or value.dtype.kind not in "mMf":
+                try:
+                    arr_value = np.asarray(value)
+                except ValueError:
+                    # e.g. list dtype with mixed-length lists
+                    arr_value = np.asarray(value, dtype=object)
+                # similar to isna(value) but exclude NaN, NaT, nat-like, nan-like
+                mask = is_pdna_or_none(arr_value)
+
             try:
-                pa_array = pa.array(
-                    value, type=pa_type, mask=mask, from_pandas=from_pandas
-                )
+                pa_array = pa.array(value, type=pa_type, mask=mask)
             except (pa.ArrowInvalid, pa.ArrowTypeError):
                 # GH50430: let pyarrow infer type, then cast
-                pa_array = pa.array(value, mask=mask, from_pandas=from_pandas)
+                pa_array = pa.array(value, mask=mask)
 
             if pa_type is None and pa.types.is_duration(pa_array.type):
                 # Workaround https://github.com/apache/arrow/issues/37291
@@ -1496,7 +1517,11 @@ def to_numpy(
             pa.types.is_floating(pa_type)
             and (
                 na_value is np.nan
-                or (original_na_value is lib.no_default and is_float_dtype(dtype))
+                or (
+                    original_na_value is lib.no_default
+                    and is_float_dtype(dtype)
+                    and not using_pyarrow_strict_nans()
+                )
             )
         ):
             result = data._pa_array.to_numpy()
@@ -2369,6 +2394,7 @@ def _replace_with_mask(
             replacements = np.array(replacements, dtype=object)
         elif isinstance(replacements, pa.Scalar):
             replacements = replacements.as_py()
+
         result = np.array(values, dtype=object)
         result[mask] = replacements
         return pa.array(result, type=values.type)
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index d0048e122051a..959a2acc8601f 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -778,6 +778,9 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
 
             return TimedeltaArray._from_sequence(self, dtype=dtype, copy=copy)
 
+        # if dtype.kind == "U":
+        #    dtype = np.dtype(object)
+        # return self.to_numpy(dtype=dtype, copy=copy)
         if not copy:
             return np.asarray(self, dtype=dtype)
         else:
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index e7a6b207363c3..6438a967eae42 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -484,7 +484,9 @@ def to_numpy(
         array([ True, False, False])
         """
         hasna = self._hasna
-        dtype, na_value = to_numpy_dtype_inference(self, dtype, na_value, hasna)
+        dtype, na_value = to_numpy_dtype_inference(
+            self, dtype, na_value, hasna, is_pyarrow=False
+        )
         if dtype is None:
             dtype = object
 
diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
index 20fe8cbab1c9f..6e2ecae593d99 100644
--- a/pandas/core/config_init.py
+++ b/pandas/core/config_init.py
@@ -427,6 +427,15 @@ def is_terminal() -> bool:
         validator=is_one_of_factory([True, False, "warn"]),
     )
 
+with cf.config_prefix("mode"):
+    cf.register_option(
+        "pyarrow_strict_nans",
+        True,
+        # TODO: Change this to False before merging
+        "Whether to make ArrowDtype arrays consistently treat NaN as distinct from NA",
+        validator=is_one_of_factory([True, False]),
+    )
+
 
 # user warnings
 chained_assignment = """
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index 14b65a56f8c05..d16c11265e1ca 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -32,6 +32,8 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_pyarrow_strict_nans
+
 from pandas._libs import lib
 from pandas._libs.tslibs import timezones
 from pandas.compat import (
@@ -721,7 +723,10 @@ def test_EA_types(self, engine, data, dtype_backend, request):
                 pytest.mark.xfail(reason="CSV parsers don't correctly handle binary")
             )
         df = pd.DataFrame({"with_dtype": pd.Series(data, dtype=str(data.dtype))})
-        csv_output = df.to_csv(index=False, na_rep=np.nan)  # should be NA?
+        if using_pyarrow_strict_nans():
+            csv_output = df.to_csv(index=False, na_rep="NA")
+        else:
+            csv_output = df.to_csv(index=False, na_rep=np.nan)
         if pa.types.is_binary(pa_dtype):
             csv_output = BytesIO(csv_output)
         else:
@@ -1512,7 +1517,8 @@ def test_pickle_roundtrip(data):
 
 def test_astype_from_non_pyarrow(data):
     # GH49795
-    pd_array = data._pa_array.to_pandas().array
+    np_arr = data.to_numpy()
+    pd_array = pd.array(np_arr, dtype=np_arr.dtype)
     result = pd_array.astype(data.dtype)
     assert not isinstance(pd_array.dtype, ArrowDtype)
     assert isinstance(result.dtype, ArrowDtype)
@@ -1546,7 +1552,9 @@ def test_to_numpy_with_defaults(data):
     else:
         expected = np.array(data._pa_array)
 
-    if data._hasna and not is_numeric_dtype(data.dtype):
+    if data._hasna and (
+        not is_numeric_dtype(data.dtype) or using_pyarrow_strict_nans()
+    ):
         expected = expected.astype(object)
         expected[pd.isna(data)] = pd.NA
 
@@ -2868,7 +2876,7 @@ def test_dt_components():
     )
     result = ser.dt.components
     expected = pd.DataFrame(
-        [[1, 0, 0, 2, 0, 3, 4], [None, None, None, None, None, None, None]],
+        [[1, 0, 0, 2, 0, 3, 4], [pd.NA, pd.NA, pd.NA, pd.NA, pd.NA, pd.NA, pd.NA]],
         columns=[
             "days",
             "hours",
@@ -2893,7 +2901,10 @@ def test_dt_components_large_values():
     )
     result = ser.dt.components
     expected = pd.DataFrame(
-        [[365, 23, 59, 59, 999, 0, 0], [None, None, None, None, None, None, None]],
+        [
+            [365, 23, 59, 59, 999, 0, 0],
+            [pd.NA, pd.NA, pd.NA, pd.NA, pd.NA, pd.NA, pd.NA],
+        ],
         columns=[
             "days",
             "hours",

From f47c746a770dbd2794801c4c90f2a5f2389e452a Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sun, 6 Jul 2025 10:17:51 -0700
Subject: [PATCH 07/14] Fix rank, json tests

---
 pandas/io/json/_json.py                  | 14 +++++++++++++
 pandas/tests/extension/test_arrow.py     |  5 ++++-
 pandas/tests/series/methods/test_rank.py | 25 ++++++++++++++++++++----
 3 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index 6b4f6c05c3123..f8170dd843793 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -994,6 +994,13 @@ def _read_ujson(self) -> DataFrame | Series:
         else:
             obj = self._get_object_parser(self.data)
         if self.dtype_backend is not lib.no_default:
+            if self.dtype_backend == "pyarrow":
+                # The construction above takes "null" to NaN, which we want to
+                #  convert to NA. But .convert_dtypes to pyarrow doesn't allow
+                #  that, so we do a 2-step conversion through numpy-nullable.
+                obj = obj.convert_dtypes(
+                    infer_objects=False, dtype_backend="numpy_nullable"
+                )
             return obj.convert_dtypes(
                 infer_objects=False, dtype_backend=self.dtype_backend
             )
@@ -1071,6 +1078,13 @@ def __next__(self) -> DataFrame | Series:
             raise ex
 
         if self.dtype_backend is not lib.no_default:
+            if self.dtype_backend == "pyarrow":
+                # The construction above takes "null" to NaN, which we want to
+                #  convert to NA. But .convert_dtypes to pyarrow doesn't allow
+                #  that, so we do a 2-step conversion through numpy-nullable.
+                obj = obj.convert_dtypes(
+                    infer_objects=False, dtype_backend="numpy_nullable"
+                )
             return obj.convert_dtypes(
                 infer_objects=False, dtype_backend=self.dtype_backend
             )
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index d16c11265e1ca..482754a9b5f18 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -285,7 +285,10 @@ def test_map(self, data_missing, na_action):
             tm.assert_numpy_array_equal(result, expected)
         else:
             result = data_missing.map(lambda x: x, na_action=na_action)
-            if data_missing.dtype == "float32[pyarrow]":
+            if (
+                data_missing.dtype == "float32[pyarrow]"
+                and not using_pyarrow_strict_nans()
+            ):
                 # map roundtrips through objects, which converts to float64
                 expected = data_missing.to_numpy(dtype="float64", na_value=np.nan)
             else:
diff --git a/pandas/tests/series/methods/test_rank.py b/pandas/tests/series/methods/test_rank.py
index 8363ba118d4d3..85b99c87e7cc8 100644
--- a/pandas/tests/series/methods/test_rank.py
+++ b/pandas/tests/series/methods/test_rank.py
@@ -271,7 +271,12 @@ def test_rank_signature(self):
 
     def test_rank_tie_methods(self, ser, results, dtype, using_infer_string):
         method, exp = results
-        if dtype == "int64" or (not using_infer_string and dtype == "str"):
+        if (
+            dtype == "int64"
+            or dtype == "int64[pyarrow]"
+            or dtype == "uint64[pyarrow]"
+            or (not using_infer_string and dtype == "str")
+        ):
             pytest.skip("int64/str does not support NaN")
 
         ser = ser if dtype is None else ser.astype(dtype)
@@ -283,7 +288,15 @@ def test_rank_tie_methods(self, ser, results, dtype, using_infer_string):
                 exp[np.isnan(ser)] = 9.5
             elif method == "dense":
                 exp[np.isnan(ser)] = 6
-        tm.assert_series_equal(result, Series(exp, dtype=expected_dtype(dtype, method)))
+            elif method == "max":
+                exp[np.isnan(ser)] = 10
+            elif method == "min":
+                exp[np.isnan(ser)] = 9
+            elif method == "first":
+                exp[np.isnan(ser)] = [9, 10]
+
+        expected = Series(exp, dtype=expected_dtype(dtype, method))
+        tm.assert_series_equal(result, expected)
 
     @pytest.mark.parametrize("na_option", ["top", "bottom", "keep"])
     @pytest.mark.parametrize(
@@ -395,8 +408,12 @@ def test_rank_dense_method(self, dtype, ser, exp):
 
     def test_rank_descending(self, ser, results, dtype, using_infer_string):
         method, _ = results
-        if dtype == "int64" or (not using_infer_string and dtype == "str"):
-            s = ser.dropna()
+        if (
+            dtype == "int64"
+            or dtype == "int64[pyarrow]"
+            or (not using_infer_string and dtype == "str")
+        ):
+            s = ser.dropna().astype(dtype)
         else:
             s = ser.astype(dtype)
 

From 083f7057dd74a271c0c36dadd1c901bda22416f1 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sun, 6 Jul 2025 10:33:51 -0700
Subject: [PATCH 08/14] CLN: remove outdated

---
 pandas/core/arrays/arrow/array.py      | 12 ++----------
 pandas/core/arrays/base.py             |  3 ---
 pandas/tests/extension/base/setitem.py |  1 -
 3 files changed, 2 insertions(+), 14 deletions(-)

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index a81f69fc314aa..6c4aba95c0c04 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -350,13 +350,7 @@ def _from_sequence_of_strings(
 
             scalars = to_datetime(strings, errors="raise").date
 
-            if isinstance(strings, cls):
-                # Avoid an object path
-                # TODO: this assumes that pyarrows str->date casting is the
-                # same as to_datetime. Is that a fair assumption?
-                scalars = strings._pa_array.cast(pa_type)
-            else:
-                scalars = pa.array(scalars, mask=mask.view(bool), type=pa_type)
+            scalars = pa.array(scalars, mask=mask.view(bool), type=pa_type)
 
         elif pa.types.is_duration(pa_type):
             from pandas.core.tools.timedeltas import to_timedelta
@@ -368,9 +362,7 @@ def _from_sequence_of_strings(
                 # attempt to parse as int64 reflecting pyarrow's
                 # duration to string casting behavior
                 mask = isna(scalars)
-                if isinstance(strings, cls):
-                    strings = strings._pa_array
-                elif not isinstance(strings, (pa.Array, pa.ChunkedArray)):
+                if not isinstance(strings, (pa.Array, pa.ChunkedArray)):
                     strings = pa.array(strings, type=pa.string(), mask=mask)
                 strings = pc.if_else(mask, None, strings)
                 try:
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 959a2acc8601f..d0048e122051a 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -778,9 +778,6 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
 
             return TimedeltaArray._from_sequence(self, dtype=dtype, copy=copy)
 
-        # if dtype.kind == "U":
-        #    dtype = np.dtype(object)
-        # return self.to_numpy(dtype=dtype, copy=copy)
         if not copy:
             return np.asarray(self, dtype=dtype)
         else:
diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py
index 99ab5d2f7e86f..185d6d750cace 100644
--- a/pandas/tests/extension/base/setitem.py
+++ b/pandas/tests/extension/base/setitem.py
@@ -422,7 +422,6 @@ def test_setitem_frame_2d_values(self, data):
         df.iloc[:-1] = df.iloc[:-1].copy()
         tm.assert_frame_equal(df, orig)
 
-        # FIXME: Breaks for pyarrow float dtype bc df.values changes NAs to NaN
         df.iloc[:] = df.values
         tm.assert_frame_equal(df, orig)
 

From a34020325cb8c90750cc7d7c7c0f7d8b331b0172 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sun, 6 Jul 2025 14:12:03 -0700
Subject: [PATCH 09/14] Fix where kludge

---
 pandas/core/arrays/arrow/array.py    |  2 ++
 pandas/core/generic.py               | 17 -----------------
 pandas/tests/extension/test_arrow.py |  7 +++++--
 3 files changed, 7 insertions(+), 19 deletions(-)

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 6c4aba95c0c04..568859d8416c7 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -405,6 +405,8 @@ def _from_sequence_of_strings(
                 mask = strings.is_null()
                 scalars = pa.array(scalars, mask=np.array(mask), type=pa_type)
                 # TODO: could we just do strings.cast(pa_type)?
+            elif isinstance(strings, (pa.Array, pa.ChunkedArray)):
+                scalars = strings.cast(pa_type)
             elif mask is not None:
                 scalars = pa.array(scalars, mask=mask.view(bool), type=pa_type)
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 4a788638bae45..8c471e0f5ece7 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -10031,23 +10031,6 @@ def where(
                         stacklevel=2,
                     )
 
-        if other is lib.no_default:
-            if self.ndim == 1:
-                if isinstance(self.dtype, ExtensionDtype):
-                    other = self.dtype.na_value
-                else:
-                    other = np.nan
-            else:
-                if self._mgr.nblocks == 1 and isinstance(
-                    self._mgr.blocks[0].values.dtype, ExtensionDtype
-                ):
-                    # FIXME: checking this is kludgy!
-                    other = self._mgr.blocks[0].values.dtype.na_value
-                else:
-                    # FIXME: the same problem we had with Series will now
-                    #  show up column-by-column!
-                    other = np.nan
-
         other = common.apply_if_callable(other, self)
         return self._where(cond, other, inplace=inplace, axis=axis, level=level)
 
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index 482754a9b5f18..229c0c8070a4f 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -1569,8 +1569,11 @@ def test_to_numpy_int_with_na():
     data = [1, None]
     arr = pd.array(data, dtype="int64[pyarrow]")
     result = arr.to_numpy()
-    expected = np.array([1, np.nan])
-    assert isinstance(result[0], float)
+    if using_pyarrow_strict_nans():
+        expected = np.array([1, pd.NA], dtype=object)
+    else:
+        expected = np.array([1, np.nan])
+        assert isinstance(result[0], float)
     tm.assert_numpy_array_equal(result, expected)
 
 

From 587e53f739d88267e6cafd7f4a9c37fde9d738d7 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sun, 6 Jul 2025 14:21:09 -0700
Subject: [PATCH 10/14] update tests

---
 pandas/tests/extension/test_arrow.py              | 5 ++++-
 pandas/tests/frame/methods/test_convert_dtypes.py | 6 +++++-
 pandas/tests/groupby/methods/test_kurt.py         | 2 +-
 pandas/tests/tools/test_to_numeric.py             | 2 +-
 4 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index 229c0c8070a4f..49a60c917ece0 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -3537,7 +3537,10 @@ def test_cast_dictionary_different_value_dtype(arrow_type):
 def test_map_numeric_na_action():
     ser = pd.Series([32, 40, None], dtype="int64[pyarrow]")
     result = ser.map(lambda x: 42, na_action="ignore")
-    expected = pd.Series([42.0, 42.0, np.nan], dtype="float64")
+    if using_pyarrow_strict_nans():
+        expected = pd.Series([42.0, 42.0, pd.NA], dtype="object")
+    else:
+        expected = pd.Series([42.0, 42.0, np.nan], dtype="float64")
     tm.assert_series_equal(result, expected)
 
 
diff --git a/pandas/tests/frame/methods/test_convert_dtypes.py b/pandas/tests/frame/methods/test_convert_dtypes.py
index ab847e2f8e81e..21f7811100d43 100644
--- a/pandas/tests/frame/methods/test_convert_dtypes.py
+++ b/pandas/tests/frame/methods/test_convert_dtypes.py
@@ -3,6 +3,8 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_pyarrow_strict_nans
+
 import pandas.util._test_decorators as td
 
 import pandas as pd
@@ -73,6 +75,8 @@ def test_pyarrow_dtype_backend(self):
             }
         )
         result = df.convert_dtypes(dtype_backend="pyarrow")
+
+        item = None if not using_pyarrow_strict_nans() else np.nan
         expected = pd.DataFrame(
             {
                 "a": pd.arrays.ArrowExtensionArray(
@@ -80,7 +84,7 @@ def test_pyarrow_dtype_backend(self):
                 ),
                 "b": pd.arrays.ArrowExtensionArray(pa.array(["x", "y", None])),
                 "c": pd.arrays.ArrowExtensionArray(pa.array([True, False, None])),
-                "d": pd.arrays.ArrowExtensionArray(pa.array([None, 100.5, 200.0])),
+                "d": pd.arrays.ArrowExtensionArray(pa.array([item, 100.5, 200.0])),
                 "e": pd.arrays.ArrowExtensionArray(
                     pa.array(
                         [
diff --git a/pandas/tests/groupby/methods/test_kurt.py b/pandas/tests/groupby/methods/test_kurt.py
index 21b7c50c3c5aa..7aac23c2147fb 100644
--- a/pandas/tests/groupby/methods/test_kurt.py
+++ b/pandas/tests/groupby/methods/test_kurt.py
@@ -43,7 +43,7 @@ def test_groupby_kurt_arrow_float64(dtype):
     # Test groupby.kurt() with float64[pyarrow] and Float64 dtypes
     df = pd.DataFrame(
         {
-            "x": [1.0, np.nan, 3.2, 4.8, 2.3, 1.9, 8.9],
+            "x": [1.0, pd.NA, 3.2, 4.8, 2.3, 1.9, 8.9],
             "y": [1.6, 3.3, 3.2, 6.8, 1.3, 2.9, 9.0],
         },
         dtype=dtype,
diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py
index 893f526fb3eb0..e3471c2e3ac0d 100644
--- a/pandas/tests/tools/test_to_numeric.py
+++ b/pandas/tests/tools/test_to_numeric.py
@@ -898,7 +898,7 @@ def test_to_numeric_dtype_backend_error(dtype_backend):
         dtype = "double[pyarrow]"
     else:
         dtype = "Float64"
-    expected = Series([np.nan, np.nan, np.nan], dtype=dtype)
+    expected = Series([pd.NA, pd.NA, pd.NA], dtype=dtype)
     tm.assert_series_equal(result, expected)
 
 

From 734465c3d59748b60be39d451d7e3d6d9c39b403 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sun, 6 Jul 2025 14:32:52 -0700
Subject: [PATCH 11/14] Fix remaining tests

---
 pandas/core/arrays/base.py    | 8 ++++++++
 pandas/tests/io/test_stata.py | 3 ++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index d0048e122051a..52be1a76363d6 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -2539,6 +2539,14 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
             if result is not NotImplemented:
                 return result
 
+        # TODO: putting this here is hacky as heck
+        if self.dtype == "float64[pyarrow]":
+            # e.g. test_log_arrow_backed_missing_value
+            new_inputs = [
+                x if x is not self else x.to_numpy(na_value=np.nan) for x in inputs
+            ]
+            return getattr(ufunc, method)(*new_inputs, **kwargs)
+
         return arraylike.default_array_ufunc(self, ufunc, method, *inputs, **kwargs)
 
     def map(self, mapper, na_action: Literal["ignore"] | None = None):
diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
index 90fda2c10962b..3ebf4416f7289 100644
--- a/pandas/tests/io/test_stata.py
+++ b/pandas/tests/io/test_stata.py
@@ -2056,9 +2056,10 @@ def test_writer_118_exceptions(self, temp_file):
         ["numpy_nullable", pytest.param("pyarrow", marks=td.skip_if_no("pyarrow"))],
     )
     def test_read_write_ea_dtypes(self, dtype_backend, temp_file, tmp_path):
+        dtype = "Int64" if dtype_backend == "numpy_nullable" else "int64[pyarrow]"
         df = DataFrame(
             {
-                "a": [1, 2, None],
+                "a": pd.array([1, 2, None], dtype=dtype),
                 "b": ["a", "b", "c"],
                 "c": [True, False, None],
                 "d": [1.5, 2.5, 3.5],

From d2aeeff8b8ad29842de22ee6b661771cf23dd115 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 7 Jul 2025 07:49:33 -0700
Subject: [PATCH 12/14] mypy fixup

---
 pandas/_libs/missing.pyi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_libs/missing.pyi b/pandas/_libs/missing.pyi
index 6c76fe49330b6..64256ae4b36ad 100644
--- a/pandas/_libs/missing.pyi
+++ b/pandas/_libs/missing.pyi
@@ -14,4 +14,4 @@ def isneginf_scalar(val: object) -> bool: ...
 def checknull(val: object) -> bool: ...
 def isnaobj(arr: np.ndarray) -> npt.NDArray[np.bool_]: ...
 def is_numeric_na(values: np.ndarray) -> npt.NDArray[np.bool_]: ...
-def is_pdna_or_none(values: npt.ndarray) -> npt.NDArray[np.bool_]: ...
+def is_pdna_or_none(values: np.ndarray) -> npt.NDArray[np.bool_]: ...

From 73a95d2ce3ff4f7891389d85bc0c2496091855fa Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 7 Jul 2025 12:59:54 -0700
Subject: [PATCH 13/14] old-numpy compat

---
 pandas/core/arrays/arrow/array.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 568859d8416c7..83b4f6517a3a5 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -562,12 +562,8 @@ def _box_pa_array(
                 return pa_array
 
             mask = None
-            if getattr(value, "dtype", None) is None or value.dtype.kind not in "mMf":
-                try:
-                    arr_value = np.asarray(value)
-                except ValueError:
-                    # e.g. list dtype with mixed-length lists
-                    arr_value = np.asarray(value, dtype=object)
+            if getattr(value, "dtype", None) is None or value.dtype.kind not in "iumMf":
+                arr_value = np.asarray(value, dtype=object)
                 # similar to isna(value) but exclude NaN, NaT, nat-like, nan-like
                 mask = is_pdna_or_none(arr_value)
 

From ce28027b3b7eca6bae1f138e72257795860c63e4 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 7 Jul 2025 15:45:04 -0700
Subject: [PATCH 14/14] simplify

---
 pandas/core/arrays/arrow/array.py | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 83b4f6517a3a5..90c9a38b43b1d 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -369,7 +369,6 @@ def _from_sequence_of_strings(
                     scalars = strings.cast(pa.int64())
                 except pa.ArrowInvalid:
                     pass
-
         elif pa.types.is_time(pa_type):
             from pandas.core.tools.times import to_time
 
@@ -397,18 +396,10 @@ def _from_sequence_of_strings(
             from pandas.core.tools.numeric import to_numeric
 
             scalars = to_numeric(strings, errors="raise")
-            if not pa.types.is_decimal(pa_type) and isinstance(
-                strings, (pa.Array, pa.ChunkedArray)
-            ):
-                # TODO: figure out why doing this cast breaks with decimal dtype
-                #  in test_from_sequence_of_strings_pa_array
-                mask = strings.is_null()
-                scalars = pa.array(scalars, mask=np.array(mask), type=pa_type)
-                # TODO: could we just do strings.cast(pa_type)?
-            elif isinstance(strings, (pa.Array, pa.ChunkedArray)):
+            if isinstance(strings, (pa.Array, pa.ChunkedArray)):
                 scalars = strings.cast(pa_type)
             elif mask is not None:
-                scalars = pa.array(scalars, mask=mask.view(bool), type=pa_type)
+                scalars = pa.array(scalars, mask=mask, type=pa_type)
 
         else:
             raise NotImplementedError(