Skip to content

Commit

Permalink
Improve LocationProvider unit tests (#1511)
Browse files Browse the repository at this point in the history
* Improve `LocationProvider` unit tests

* Renamed `test_object_storage_injects_entropy` to test_object_storage_no_partition

---------

Co-authored-by: Sreesh Maheshwar <[email protected]>
  • Loading branch information
smaheshwar-pltr and Sreesh Maheshwar authored Jan 13, 2025
1 parent c409678 commit a09bcde
Showing 1 changed file with 14 additions and 9 deletions.
23 changes: 14 additions & 9 deletions tests/table/test_locations.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,18 @@ def new_data_location(self, data_file_name: str, partition_key: Optional[Partiti
return f"custom_location_provider/{data_file_name}"


def test_default_location_provider() -> None:
def test_simple_location_provider_no_partition() -> None:
provider = load_location_provider(table_location="table_location", table_properties={"write.object-storage.enabled": "false"})

assert provider.new_data_location("my_file") == "table_location/data/my_file"


def test_simple_location_provider_with_partition() -> None:
provider = load_location_provider(table_location="table_location", table_properties={"write.object-storage.enabled": "false"})

assert provider.new_data_location("my_file", PARTITION_KEY) == "table_location/data/string_field=example_string/my_file"


def test_custom_location_provider() -> None:
qualified_name = CustomLocationProvider.__module__ + "." + CustomLocationProvider.__name__
provider = load_location_provider(
Expand All @@ -65,7 +71,7 @@ def test_custom_location_provider_not_found() -> None:
)


def test_object_storage_injects_entropy() -> None:
def test_object_storage_no_partition() -> None:
provider = load_location_provider(table_location="table_location", table_properties=EMPTY_DICT)

location = provider.new_data_location("test.parquet")
Expand All @@ -82,19 +88,18 @@ def test_object_storage_injects_entropy() -> None:
assert all(c in "01" for c in dir_name)


@pytest.mark.parametrize("object_storage", [True, False])
def test_partition_value_in_path(object_storage: bool) -> None:
def test_object_storage_with_partition() -> None:
provider = load_location_provider(
table_location="table_location",
table_properties={
"write.object-storage.enabled": str(object_storage),
},
table_properties={"write.object-storage.enabled": "true"},
)

location = provider.new_data_location("test.parquet", PARTITION_KEY)
partition_segment = location.split("/")[-2]

assert partition_segment == "string_field=example_string"
# Partition values AND entropy included in the path. Entropy differs to that in the test below because the partition
# key AND the data file name are used as the hash input. This matches Java behaviour; the hash below is what the
# Java implementation produces for this input too.
assert location == "table_location/data/0001/0010/1001/00000011/string_field=example_string/test.parquet"


# NB: We test here with None partition key too because disabling partitioned paths still replaces final / with - even in
Expand Down

0 comments on commit a09bcde

Please sign in to comment.