From 2cd4e789d0fd7d6aab261037e8229ca94c218923 Mon Sep 17 00:00:00 2001 From: Jayce Slesar <47452474+jayceslesar@users.noreply.github.com> Date: Tue, 21 Jan 2025 21:52:57 -0500 Subject: [PATCH] feat: support datetime objects in literal instantiation (#1542) * feat: support datetime objects in literal instantiation * add integration test * proper tests * fix typing --- pyiceberg/expressions/literals.py | 4 ++++ pyiceberg/typedef.py | 3 ++- tests/expressions/test_literals.py | 4 ++++ tests/integration/test_reads.py | 28 ++++++++++++++++++++++++++++ 4 files changed, 38 insertions(+), 1 deletion(-) diff --git a/pyiceberg/expressions/literals.py b/pyiceberg/expressions/literals.py index d1c170d0dd..c91c759d0f 100644 --- a/pyiceberg/expressions/literals.py +++ b/pyiceberg/expressions/literals.py @@ -23,6 +23,7 @@ import struct from abc import ABC, abstractmethod +from datetime import datetime from decimal import ROUND_HALF_UP, Decimal from functools import singledispatchmethod from math import isnan @@ -49,6 +50,7 @@ ) from pyiceberg.utils.datetime import ( date_str_to_days, + datetime_to_micros, micros_to_days, time_str_to_micros, timestamp_to_micros, @@ -145,6 +147,8 @@ def literal(value: L) -> Literal[L]: return BinaryLiteral(value) elif isinstance(value, Decimal): return DecimalLiteral(value) + elif isinstance(value, datetime): + return TimestampLiteral(datetime_to_micros(value)) # type: ignore else: raise TypeError(f"Invalid literal value: {repr(value)}") diff --git a/pyiceberg/typedef.py b/pyiceberg/typedef.py index e3fc312801..9eacc752c1 100644 --- a/pyiceberg/typedef.py +++ b/pyiceberg/typedef.py @@ -17,6 +17,7 @@ from __future__ import annotations from abc import abstractmethod +from datetime import datetime from decimal import Decimal from functools import lru_cache from typing import ( @@ -78,7 +79,7 @@ def __missing__(self, key: K) -> V: RecursiveDict = Dict[str, Union[str, "RecursiveDict"]] # Represents the literal value -L = TypeVar("L", str, bool, int, float, bytes, UUID, Decimal, covariant=True) +L = TypeVar("L", str, bool, int, float, bytes, UUID, Decimal, datetime, covariant=True) @runtime_checkable diff --git a/tests/expressions/test_literals.py b/tests/expressions/test_literals.py index 59c2a3deaa..6a64f8a038 100644 --- a/tests/expressions/test_literals.py +++ b/tests/expressions/test_literals.py @@ -906,6 +906,10 @@ def test_uuid_to_binary() -> None: assert isinstance(binary_literal, BinaryLiteral) # type: ignore +def test_literal_from_datetime() -> None: + assert isinstance(literal(datetime.datetime.now()), TimestampLiteral) + + # __ __ ___ # | \/ |_ _| _ \_ _ # | |\/| | || | _/ || | diff --git a/tests/integration/test_reads.py b/tests/integration/test_reads.py index f2e79bae60..ee5f8a2574 100644 --- a/tests/integration/test_reads.py +++ b/tests/integration/test_reads.py @@ -19,6 +19,7 @@ import math import time import uuid +from datetime import datetime, timedelta from pathlib import PosixPath from urllib.parse import urlparse @@ -950,3 +951,30 @@ def test_read_from_s3_and_local_fs(catalog: Catalog, tmp_path: PosixPath) -> Non result_table = tbl.scan().to_arrow() assert result_table["colA"].to_pylist() == ["one", "one"] + + +@pytest.mark.integration +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) +def test_scan_with_datetime(catalog: Catalog) -> None: + table = create_table(catalog) + + yesterday = datetime.now() - timedelta(days=1) + table.append( + pa.Table.from_pylist( + [ + { + "str": "foo", + "int": 1, + "bool": True, + "datetime": yesterday, + } + ], + schema=table.schema().as_arrow(), + ), + ) + + df = table.scan(row_filter=GreaterThanOrEqual("datetime", yesterday)).to_pandas() + assert len(df) == 1 + + df = table.scan(row_filter=LessThan("datetime", yesterday)).to_pandas() + assert len(df) == 0