From a88a484942a816007fe4a374a83aae7474e7040f Mon Sep 17 00:00:00 2001 From: Sreesh Maheshwar Date: Sun, 12 Jan 2025 18:09:04 +0000 Subject: [PATCH 1/2] Improve `LocationProvider` unit tests --- tests/table/test_locations.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/tests/table/test_locations.py b/tests/table/test_locations.py index bda2442aca..44136e8c78 100644 --- a/tests/table/test_locations.py +++ b/tests/table/test_locations.py @@ -38,12 +38,18 @@ def new_data_location(self, data_file_name: str, partition_key: Optional[Partiti return f"custom_location_provider/{data_file_name}" -def test_default_location_provider() -> None: +def test_simple_location_provider_no_partition() -> None: provider = load_location_provider(table_location="table_location", table_properties=EMPTY_DICT) assert provider.new_data_location("my_file") == "table_location/data/my_file" +def test_simple_location_provider_with_partition() -> None: + provider = load_location_provider(table_location="table_location", table_properties=EMPTY_DICT) + + assert provider.new_data_location("my_file", PARTITION_KEY) == "table_location/data/string_field=example_string/my_file" + + def test_custom_location_provider() -> None: qualified_name = CustomLocationProvider.__module__ + "." + CustomLocationProvider.__name__ provider = load_location_provider( @@ -82,19 +88,18 @@ def test_object_storage_injects_entropy() -> None: assert all(c in "01" for c in dir_name) -@pytest.mark.parametrize("object_storage", [True, False]) -def test_partition_value_in_path(object_storage: bool) -> None: +def test_object_storage_with_partition() -> None: provider = load_location_provider( table_location="table_location", - table_properties={ - "write.object-storage.enabled": str(object_storage), - }, + table_properties={"write.object-storage.enabled": "true"}, ) location = provider.new_data_location("test.parquet", PARTITION_KEY) - partition_segment = location.split("/")[-2] - assert partition_segment == "string_field=example_string" + # Partition values AND entropy included in the path. Entropy differs to that in the test below because the partition + # key AND the data file name are used as the hash input. This matches Java behaviour; the hash below is what the + # Java implementation produces for this input too. + assert location == "table_location/data/0001/0010/1001/00000011/string_field=example_string/test.parquet" # NB: We test here with None partition key too because disabling partitioned paths still replaces final / with - even in From b2bf54b7db4908aa5488b656e64fb40fe3ec2d37 Mon Sep 17 00:00:00 2001 From: Sreesh Maheshwar Date: Sun, 12 Jan 2025 19:56:22 +0000 Subject: [PATCH 2/2] Renamed `test_object_storage_injects_entropy` to test_object_storage_no_partition --- tests/table/test_locations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/table/test_locations.py b/tests/table/test_locations.py index 44136e8c78..956eb517b1 100644 --- a/tests/table/test_locations.py +++ b/tests/table/test_locations.py @@ -71,7 +71,7 @@ def test_custom_location_provider_not_found() -> None: ) -def test_object_storage_injects_entropy() -> None: +def test_object_storage_no_partition() -> None: provider = load_location_provider(table_location="table_location", table_properties={"write.object-storage.enabled": "true"}) location = provider.new_data_location("test.parquet")