Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(datatypes): return pd.Timestamp or pd.Series[datetime64] for date.to_pandas() #8784

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions docs/contribute/02_workflow.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -274,3 +274,11 @@ you are going only up).
```bash
$ colima delete
```

### `x86_64` or `amd64` based containers

While starting the containers based on `x86_64` / `amd64`, the architecture flag needs to be set in two places:
1. Add `platform: linux/amd64` for the service in `compose.yaml`.
2. Set the `--arch` flag while starting the VM `colima start --arch x86_64`

For instance, this step is necessary for the `oracle` service in `compose.yaml`. Otherwise, the container will fail shortly after getting started.
8 changes: 4 additions & 4 deletions ibis/backends/oracle/converter.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
from __future__ import annotations

import datetime
import pandas as pd

from ibis.formats.pandas import PandasData


class OraclePandasData(PandasData):
@classmethod
def convert_Timestamp_element(cls, dtype):
return datetime.datetime.fromisoformat
return pd.Timestamp.fromisoformat

@classmethod
def convert_Date_element(cls, dtype):
return datetime.date.fromisoformat
return pd.Timestamp.fromisoformat
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is sort of up for grabs given that pandas doesn't have a standard way to represent an array of dates (the _element suffix implies [perhaps not in an obvious way] that this function is being called once per element of an array).

I think it's fine to also change this to using pandas timestamps.


@classmethod
def convert_Time_element(cls, dtype):
return datetime.time.fromisoformat
return pd.Timestamp.fromisoformat
8 changes: 4 additions & 4 deletions ibis/backends/snowflake/converter.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from __future__ import annotations

import datetime
import json
from typing import TYPE_CHECKING

import pandas as pd
import pyarrow as pa

from ibis.formats.pandas import PandasData
Expand Down Expand Up @@ -52,15 +52,15 @@ def __arrow_ext_scalar_class__(self):
class SnowflakePandasData(PandasData):
@classmethod
def convert_Timestamp_element(cls, dtype):
return datetime.datetime.fromisoformat
return pd.Timestamp.fromisoformat

@classmethod
def convert_Date_element(cls, dtype):
return datetime.date.fromisoformat
return pd.Timestamp.fromisoformat

@classmethod
def convert_Time_element(cls, dtype):
return datetime.time.fromisoformat
return pd.Timestamp.fromisoformat

@classmethod
def convert_JSON(cls, s, dtype, pandas_type):
Expand Down
2 changes: 2 additions & 0 deletions ibis/backends/sqlite/tests/test_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ def test_type_map(db):
sol = pd.DataFrame(
{"str_col": ["a"], "date_col": pd.Series([date(2022, 1, 1)], dtype="object")}
)
sol["date_col"] = sol["date_col"].astype(res["date_col"].dtype)

assert res.equals(sol)


Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/tests/test_aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -1253,7 +1253,7 @@ def test_string_quantile(alltypes, func):
)
def test_date_quantile(alltypes):
expr = alltypes.timestamp_col.date().quantile(0.5)
result = expr.execute()
result = expr.execute().to_pydatetime().date()
assert result == date(2009, 12, 31)


Expand Down
33 changes: 11 additions & 22 deletions ibis/backends/tests/test_temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -646,9 +646,7 @@ def convert_to_offset(x):
"ignore", category=(UserWarning, pd.errors.PerformanceWarning)
)
expected = (
pd.to_datetime(df.date_string_col)
.add(offset)
.map(lambda ts: ts.normalize().date(), na_action="ignore")
pd.to_datetime(df.date_string_col).add(offset).astype("datetime64[s]")
)

expected = backend.default_series_rename(expected)
Expand Down Expand Up @@ -723,12 +721,7 @@ def convert_to_offset(x):
),
param(
lambda t, _: t.timestamp_col.date() + ibis.interval(days=4),
lambda t, _: (
t.timestamp_col.dt.floor("d")
.add(pd.Timedelta(days=4))
.dt.normalize()
.dt.date
),
lambda t, _: t.timestamp_col.dt.floor("d").add(pd.Timedelta(days=4)),
id="date-add-interval",
marks=[
pytest.mark.notimpl(
Expand All @@ -739,12 +732,7 @@ def convert_to_offset(x):
),
param(
lambda t, _: t.timestamp_col.date() - ibis.interval(days=14),
lambda t, _: (
t.timestamp_col.dt.floor("d")
.sub(pd.Timedelta(days=14))
.dt.normalize()
.dt.date
),
lambda t, _: t.timestamp_col.dt.floor("d").sub(pd.Timedelta(days=14)),
id="date-subtract-interval",
marks=[
pytest.mark.notimpl(
Expand Down Expand Up @@ -999,14 +987,15 @@ def test_interval_add_cast_column(backend, alltypes, df):
delta = alltypes.bigint_col.cast("interval('D')")
expr = alltypes.select("id", (timestamp_date + delta).name("tmp"))
result = expr.execute().sort_values("id").reset_index().tmp

df = df.sort_values("id").reset_index(drop=True)
expected = (
df["timestamp_col"]
.dt.normalize()
.add(df.bigint_col.astype("timedelta64[D]"))
.rename("tmp")
.dt.date
)

backend.assert_series_equal(result, expected.astype(result.dtype))


Expand Down Expand Up @@ -2239,21 +2228,21 @@ def test_time_literal_sql(dialect, snapshot, micros):
)
def test_date_scalar(con, value, func):
expr = ibis.date(func(value)).name("tmp")

result = con.execute(expr)

assert not isinstance(result, datetime.datetime)
assert isinstance(result, datetime.date)

assert result == datetime.date.fromisoformat(value)
assert isinstance(result, pd.Timestamp)
assert result == pd.Timestamp.fromisoformat(value)


@pytest.mark.notyet(
["datafusion", "druid", "exasol"], raises=com.OperationNotDefinedError
)
def test_simple_unix_date_offset(con):
d = ibis.date("2023-04-07")
s = "2023-04-07"
d = ibis.date(s)
expr = d.epoch_days()
result = con.execute(expr)
delta = datetime.date(2023, 4, 7) - datetime.date(1970, 1, 1)
assert result == delta.days
assert isinstance(result, pd.Timestamp)
assert result == pd.Timestamp.fromisoformat(s)
13 changes: 8 additions & 5 deletions ibis/formats/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,17 +222,20 @@ def convert_Timestamp(cls, s, dtype, pandas_type):
def convert_Date(cls, s, dtype, pandas_type):
if isinstance(s.dtype, pd.DatetimeTZDtype):
s = s.dt.tz_convert("UTC").dt.tz_localize(None)

try:
return s.astype(pandas_type).dt.date
return s.astype(pandas_type)
except (ValueError, TypeError, pd._libs.tslibs.OutOfBoundsDatetime):

def try_date(v):
if isinstance(v, datetime.datetime):
return v.date()
if isinstance(v, datetime.date):
return pd.Timestamp(v)
elif isinstance(v, str):
if v.endswith("Z"):
return datetime.datetime.fromisoformat(v[:-1]).date()
return datetime.date.fromisoformat(v)
datetime_obj = datetime.datetime.fromisoformat(v[:-1])
else:
datetime_obj = datetime.datetime.fromisoformat(v)
return pd.Timestamp(datetime_obj)
else:
return v

Expand Down
Loading