Skip to content

Commit 9cddf75

Browse files
committed
BUG: ArrowEA constructor with timestamp type
1 parent 2b6c12d commit 9cddf75

File tree

3 files changed

+43
-7
lines changed

3 files changed

+43
-7
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -707,8 +707,8 @@ Datetimelike
707707
- Bug in :meth:`to_datetime` with ``format="ISO8601"`` and ``utc=True`` where naive timestamps incorrectly inherited timezone offset from previous timestamps in a series. (:issue:`61389`)
708708
- Bug in :meth:`to_datetime` wrongly converts when ``arg`` is a ``np.datetime64`` object with unit of ``ps``. (:issue:`60341`)
709709
- Bug in constructing arrays with :class:`ArrowDtype` with ``timestamp`` type incorrectly allowing ``Decimal("NaN")`` (:issue:`61773`)
710+
- Bug in constructing arrays with a timezone-aware :class:`ArrowDtype` from timezone-naive datetime objects incorrectly treating those as UTC times instead of wall times like :class:`DatetimeTZDtype` (:issue:`61775`)
710711
- Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`)
711-
-
712712

713713
Timedelta
714714
^^^^^^^^^

pandas/core/arrays/arrow/array.py

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@
6363
from pandas.core.arrays.masked import BaseMaskedArray
6464
from pandas.core.arrays.string_ import StringDtype
6565
import pandas.core.common as com
66+
from pandas.core.construction import extract_array
6667
from pandas.core.indexers import (
6768
check_array_indexer,
6869
unpack_tuple_and_ellipses,
@@ -501,10 +502,31 @@ def _box_pa_array(
501502
value = value.to_numpy()
502503

503504
if pa_type is not None and pa.types.is_timestamp(pa_type):
504-
# Use to_datetime to handle NaNs, disallow Decimal("NaN")
505-
from pandas import to_datetime
505+
# Use DatetimeArray to exclude Decimal(NaN) (GH#61774) and
506+
# ensure constructor treats tznaive the same as non-pyarrow
507+
# dtypes (GH#61775)
508+
from pandas.core.arrays.datetimes import (
509+
DatetimeArray,
510+
tz_to_dtype,
511+
)
506512

507-
value = to_datetime(value).as_unit(pa_type.unit)
513+
pass_dtype = tz_to_dtype(tz=pa_type.tz, unit=pa_type.unit)
514+
value = extract_array(value, extract_numpy=True)
515+
if isinstance(value, DatetimeArray):
516+
dta = value
517+
else:
518+
dta = DatetimeArray._from_sequence(
519+
value, copy=copy, dtype=pass_dtype
520+
)
521+
mask = dta.isna()
522+
value_i8 = dta.view("i8")
523+
if not value_i8.flags["WRITEABLE"]:
524+
# e.g. test_setitem_frame_2d_values
525+
value_i8 = value_i8.copy()
526+
dta = DatetimeArray._from_sequence(value_i8, dtype=dta.dtype)
527+
value_i8[mask] = 0 # GH#61776 avoid __sub__ overflow
528+
pa_array = pa.array(dta._ndarray, type=pa_type, mask=mask)
529+
return pa_array
508530

509531
try:
510532
pa_array = pa.array(value, type=pa_type, from_pandas=True)

pandas/tests/extension/test_arrow.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2682,14 +2682,15 @@ def test_dt_tz_localize_unsupported_tz_options():
26822682
ser.dt.tz_localize("UTC", nonexistent="NaT")
26832683

26842684

2685+
@pytest.mark.xfail(reason="Converts to UTC before localizing GH#61780")
26852686
def test_dt_tz_localize_none():
26862687
ser = pd.Series(
26872688
[datetime(year=2023, month=1, day=2, hour=3), None],
26882689
dtype=ArrowDtype(pa.timestamp("ns", tz="US/Pacific")),
26892690
)
26902691
result = ser.dt.tz_localize(None)
26912692
expected = pd.Series(
2692-
[datetime(year=2023, month=1, day=2, hour=3), None],
2693+
[ser[0].tz_localize(None), None],
26932694
dtype=ArrowDtype(pa.timestamp("ns")),
26942695
)
26952696
tm.assert_series_equal(result, expected)
@@ -2749,7 +2750,7 @@ def test_dt_tz_convert_none():
27492750
)
27502751
result = ser.dt.tz_convert(None)
27512752
expected = pd.Series(
2752-
[datetime(year=2023, month=1, day=2, hour=3), None],
2753+
[ser[0].tz_convert(None), None],
27532754
dtype=ArrowDtype(pa.timestamp("ns")),
27542755
)
27552756
tm.assert_series_equal(result, expected)
@@ -2763,7 +2764,7 @@ def test_dt_tz_convert(unit):
27632764
)
27642765
result = ser.dt.tz_convert("US/Eastern")
27652766
expected = pd.Series(
2766-
[datetime(year=2023, month=1, day=2, hour=3), None],
2767+
[ser[0].tz_convert("US/Eastern"), None],
27672768
dtype=ArrowDtype(pa.timestamp(unit, "US/Eastern")),
27682769
)
27692770
tm.assert_series_equal(result, expected)
@@ -3558,3 +3559,16 @@ def test_timestamp_dtype_disallows_decimal():
35583559

35593560
with pytest.raises(TypeError, match=msg):
35603561
pd.array(vals, dtype=ArrowDtype(pa.timestamp("us")))
3562+
3563+
3564+
def test_timestamp_dtype_matches_to_datetime():
3565+
# GH#61775
3566+
dtype1 = "datetime64[ns, US/Eastern]"
3567+
dtype2 = "timestamp[ns, US/Eastern][pyarrow]"
3568+
3569+
ts = pd.Timestamp("2025-07-03 18:10")
3570+
3571+
result = pd.Series([ts], dtype=dtype2)
3572+
expected = pd.Series([ts], dtype=dtype1).convert_dtypes(dtype_backend="pyarrow")
3573+
3574+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)