From 200e6c860eeabbf04b16b4b46c9ae70df3c362ee Mon Sep 17 00:00:00 2001 From: Jayce Date: Mon, 20 Jan 2025 16:54:49 -0500 Subject: [PATCH 1/4] feat: support datetime objects in literal instantiation --- pyiceberg/expressions/literals.py | 4 ++++ tests/expressions/test_literals.py | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/pyiceberg/expressions/literals.py b/pyiceberg/expressions/literals.py index d1c170d0dd..d5c92206b2 100644 --- a/pyiceberg/expressions/literals.py +++ b/pyiceberg/expressions/literals.py @@ -23,6 +23,7 @@ import struct from abc import ABC, abstractmethod +from datetime import datetime from decimal import ROUND_HALF_UP, Decimal from functools import singledispatchmethod from math import isnan @@ -49,6 +50,7 @@ ) from pyiceberg.utils.datetime import ( date_str_to_days, + datetime_to_micros, micros_to_days, time_str_to_micros, timestamp_to_micros, @@ -145,6 +147,8 @@ def literal(value: L) -> Literal[L]: return BinaryLiteral(value) elif isinstance(value, Decimal): return DecimalLiteral(value) + elif isinstance(value, datetime): + return TimestampLiteral(datetime_to_micros(value)) else: raise TypeError(f"Invalid literal value: {repr(value)}") diff --git a/tests/expressions/test_literals.py b/tests/expressions/test_literals.py index 59c2a3deaa..6a64f8a038 100644 --- a/tests/expressions/test_literals.py +++ b/tests/expressions/test_literals.py @@ -906,6 +906,10 @@ def test_uuid_to_binary() -> None: assert isinstance(binary_literal, BinaryLiteral) # type: ignore +def test_literal_from_datetime() -> None: + assert isinstance(literal(datetime.datetime.now()), TimestampLiteral) + + # __ __ ___ # | \/ |_ _| _ \_ _ # | |\/| | || | _/ || | From 8af4ca13e803122d4cd5a42517df5bcd06998ccf Mon Sep 17 00:00:00 2001 From: Jayce Date: Mon, 20 Jan 2025 17:38:21 -0500 Subject: [PATCH 2/4] add integration test --- tests/integration/test_reads.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/integration/test_reads.py b/tests/integration/test_reads.py index f2e79bae60..082360f7c7 100644 --- a/tests/integration/test_reads.py +++ b/tests/integration/test_reads.py @@ -19,6 +19,7 @@ import math import time import uuid +from datetime import datetime from pathlib import PosixPath from urllib.parse import urlparse @@ -950,3 +951,11 @@ def test_read_from_s3_and_local_fs(catalog: Catalog, tmp_path: PosixPath) -> Non result_table = tbl.scan().to_arrow() assert result_table["colA"].to_pylist() == ["one", "one"] + + +@pytest.mark.integration +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) +def test_scan_with_datetime(catalog: Catalog) -> None: + table = create_table(catalog) + # test that this doesn't raise an exception... + table.scan(row_filter=GreaterThanOrEqual("datetime", datetime.now())).to_pandas() From 7e7654f92d7d269b6c0eed48b765064cab17231b Mon Sep 17 00:00:00 2001 From: jayceslesar Date: Mon, 20 Jan 2025 19:30:42 -0500 Subject: [PATCH 3/4] proper tests --- tests/integration/test_reads.py | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_reads.py b/tests/integration/test_reads.py index 082360f7c7..ee5f8a2574 100644 --- a/tests/integration/test_reads.py +++ b/tests/integration/test_reads.py @@ -19,7 +19,7 @@ import math import time import uuid -from datetime import datetime +from datetime import datetime, timedelta from pathlib import PosixPath from urllib.parse import urlparse @@ -957,5 +957,24 @@ def test_read_from_s3_and_local_fs(catalog: Catalog, tmp_path: PosixPath) -> Non @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_scan_with_datetime(catalog: Catalog) -> None: table = create_table(catalog) - # test that this doesn't raise an exception... - table.scan(row_filter=GreaterThanOrEqual("datetime", datetime.now())).to_pandas() + + yesterday = datetime.now() - timedelta(days=1) + table.append( + pa.Table.from_pylist( + [ + { + "str": "foo", + "int": 1, + "bool": True, + "datetime": yesterday, + } + ], + schema=table.schema().as_arrow(), + ), + ) + + df = table.scan(row_filter=GreaterThanOrEqual("datetime", yesterday)).to_pandas() + assert len(df) == 1 + + df = table.scan(row_filter=LessThan("datetime", yesterday)).to_pandas() + assert len(df) == 0 From b0510afb33ea447ac8010963ad0d699e1b96e649 Mon Sep 17 00:00:00 2001 From: jayceslesar Date: Tue, 21 Jan 2025 16:39:48 -0500 Subject: [PATCH 4/4] fix typing --- pyiceberg/expressions/literals.py | 2 +- pyiceberg/typedef.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pyiceberg/expressions/literals.py b/pyiceberg/expressions/literals.py index d5c92206b2..c91c759d0f 100644 --- a/pyiceberg/expressions/literals.py +++ b/pyiceberg/expressions/literals.py @@ -148,7 +148,7 @@ def literal(value: L) -> Literal[L]: elif isinstance(value, Decimal): return DecimalLiteral(value) elif isinstance(value, datetime): - return TimestampLiteral(datetime_to_micros(value)) + return TimestampLiteral(datetime_to_micros(value)) # type: ignore else: raise TypeError(f"Invalid literal value: {repr(value)}") diff --git a/pyiceberg/typedef.py b/pyiceberg/typedef.py index 01b8bea58c..6ca53bd61f 100644 --- a/pyiceberg/typedef.py +++ b/pyiceberg/typedef.py @@ -17,6 +17,7 @@ from __future__ import annotations from abc import abstractmethod +from datetime import datetime from decimal import Decimal from functools import lru_cache from typing import ( @@ -78,7 +79,7 @@ def __missing__(self, key: K) -> V: RecursiveDict = Dict[str, Union[str, "RecursiveDict"]] # Represents the literal value -L = TypeVar("L", str, bool, int, float, bytes, UUID, Decimal, covariant=True) +L = TypeVar("L", str, bool, int, float, bytes, UUID, Decimal, datetime, covariant=True) @runtime_checkable