Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: support datetime objects in literal instantiation #1542

Merged
merged 4 commits into from
Jan 22, 2025
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions pyiceberg/expressions/literals.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

import struct
from abc import ABC, abstractmethod
from datetime import datetime
from decimal import ROUND_HALF_UP, Decimal
from functools import singledispatchmethod
from math import isnan
Expand All @@ -49,6 +50,7 @@
)
from pyiceberg.utils.datetime import (
date_str_to_days,
datetime_to_micros,
micros_to_days,
time_str_to_micros,
timestamp_to_micros,
Expand Down Expand Up @@ -145,6 +147,8 @@ def literal(value: L) -> Literal[L]:
return BinaryLiteral(value)
elif isinstance(value, Decimal):
return DecimalLiteral(value)
elif isinstance(value, datetime):
return TimestampLiteral(datetime_to_micros(value))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The tzinfo is handled here:

def datetime_to_micros(dt: datetime) -> int:
"""Convert a datetime to microseconds from 1970-01-01T00:00:00.000000."""
if dt.tzinfo:
delta = dt - EPOCH_TIMESTAMPTZ
else:
delta = dt - EPOCH_TIMESTAMP
return (delta.days * 86400 + delta.seconds) * 1_000_000 + delta.microseconds

else:
raise TypeError(f"Invalid literal value: {repr(value)}")

Expand Down
4 changes: 4 additions & 0 deletions tests/expressions/test_literals.py
Original file line number Diff line number Diff line change
Expand Up @@ -906,6 +906,10 @@ def test_uuid_to_binary() -> None:
assert isinstance(binary_literal, BinaryLiteral) # type: ignore


def test_literal_from_datetime() -> None:
assert isinstance(literal(datetime.datetime.now()), TimestampLiteral)


# __ __ ___
# | \/ |_ _| _ \_ _
# | |\/| | || | _/ || |
Expand Down
28 changes: 28 additions & 0 deletions tests/integration/test_reads.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import math
import time
import uuid
from datetime import datetime, timedelta
from pathlib import PosixPath
from urllib.parse import urlparse

Expand Down Expand Up @@ -950,3 +951,30 @@ def test_read_from_s3_and_local_fs(catalog: Catalog, tmp_path: PosixPath) -> Non

result_table = tbl.scan().to_arrow()
assert result_table["colA"].to_pylist() == ["one", "one"]


@pytest.mark.integration
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
def test_scan_with_datetime(catalog: Catalog) -> None:
table = create_table(catalog)

yesterday = datetime.now() - timedelta(days=1)
table.append(
pa.Table.from_pylist(
[
{
"str": "foo",
"int": 1,
"bool": True,
"datetime": yesterday,
}
],
schema=table.schema().as_arrow(),
),
)

df = table.scan(row_filter=GreaterThanOrEqual("datetime", yesterday)).to_pandas()
assert len(df) == 1

df = table.scan(row_filter=LessThan("datetime", yesterday)).to_pandas()
assert len(df) == 0
jayceslesar marked this conversation as resolved.
Show resolved Hide resolved
Loading