Skip to content

Commit a09bcde

Browse files
smaheshwar-pltrSreesh Maheshwar
andauthored
Improve LocationProvider unit tests (#1511)
* Improve `LocationProvider` unit tests * Renamed `test_object_storage_injects_entropy` to test_object_storage_no_partition --------- Co-authored-by: Sreesh Maheshwar <[email protected]>
1 parent c409678 commit a09bcde

File tree

1 file changed

+14
-9
lines changed

1 file changed

+14
-9
lines changed

tests/table/test_locations.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,18 @@ def new_data_location(self, data_file_name: str, partition_key: Optional[Partiti
3838
return f"custom_location_provider/{data_file_name}"
3939

4040

41-
def test_default_location_provider() -> None:
41+
def test_simple_location_provider_no_partition() -> None:
4242
provider = load_location_provider(table_location="table_location", table_properties={"write.object-storage.enabled": "false"})
4343

4444
assert provider.new_data_location("my_file") == "table_location/data/my_file"
4545

4646

47+
def test_simple_location_provider_with_partition() -> None:
48+
provider = load_location_provider(table_location="table_location", table_properties={"write.object-storage.enabled": "false"})
49+
50+
assert provider.new_data_location("my_file", PARTITION_KEY) == "table_location/data/string_field=example_string/my_file"
51+
52+
4753
def test_custom_location_provider() -> None:
4854
qualified_name = CustomLocationProvider.__module__ + "." + CustomLocationProvider.__name__
4955
provider = load_location_provider(
@@ -65,7 +71,7 @@ def test_custom_location_provider_not_found() -> None:
6571
)
6672

6773

68-
def test_object_storage_injects_entropy() -> None:
74+
def test_object_storage_no_partition() -> None:
6975
provider = load_location_provider(table_location="table_location", table_properties=EMPTY_DICT)
7076

7177
location = provider.new_data_location("test.parquet")
@@ -82,19 +88,18 @@ def test_object_storage_injects_entropy() -> None:
8288
assert all(c in "01" for c in dir_name)
8389

8490

85-
@pytest.mark.parametrize("object_storage", [True, False])
86-
def test_partition_value_in_path(object_storage: bool) -> None:
91+
def test_object_storage_with_partition() -> None:
8792
provider = load_location_provider(
8893
table_location="table_location",
89-
table_properties={
90-
"write.object-storage.enabled": str(object_storage),
91-
},
94+
table_properties={"write.object-storage.enabled": "true"},
9295
)
9396

9497
location = provider.new_data_location("test.parquet", PARTITION_KEY)
95-
partition_segment = location.split("/")[-2]
9698

97-
assert partition_segment == "string_field=example_string"
99+
# Partition values AND entropy included in the path. Entropy differs to that in the test below because the partition
100+
# key AND the data file name are used as the hash input. This matches Java behaviour; the hash below is what the
101+
# Java implementation produces for this input too.
102+
assert location == "table_location/data/0001/0010/1001/00000011/string_field=example_string/test.parquet"
98103

99104

100105
# NB: We test here with None partition key too because disabling partitioned paths still replaces final / with - even in

0 commit comments

Comments
 (0)