@@ -38,12 +38,18 @@ def new_data_location(self, data_file_name: str, partition_key: Optional[Partiti
38
38
return f"custom_location_provider/{ data_file_name } "
39
39
40
40
41
- def test_default_location_provider () -> None :
41
+ def test_simple_location_provider_no_partition () -> None :
42
42
provider = load_location_provider (table_location = "table_location" , table_properties = {"write.object-storage.enabled" : "false" })
43
43
44
44
assert provider .new_data_location ("my_file" ) == "table_location/data/my_file"
45
45
46
46
47
+ def test_simple_location_provider_with_partition () -> None :
48
+ provider = load_location_provider (table_location = "table_location" , table_properties = {"write.object-storage.enabled" : "false" })
49
+
50
+ assert provider .new_data_location ("my_file" , PARTITION_KEY ) == "table_location/data/string_field=example_string/my_file"
51
+
52
+
47
53
def test_custom_location_provider () -> None :
48
54
qualified_name = CustomLocationProvider .__module__ + "." + CustomLocationProvider .__name__
49
55
provider = load_location_provider (
@@ -65,7 +71,7 @@ def test_custom_location_provider_not_found() -> None:
65
71
)
66
72
67
73
68
- def test_object_storage_injects_entropy () -> None :
74
+ def test_object_storage_no_partition () -> None :
69
75
provider = load_location_provider (table_location = "table_location" , table_properties = EMPTY_DICT )
70
76
71
77
location = provider .new_data_location ("test.parquet" )
@@ -82,19 +88,18 @@ def test_object_storage_injects_entropy() -> None:
82
88
assert all (c in "01" for c in dir_name )
83
89
84
90
85
- @pytest .mark .parametrize ("object_storage" , [True , False ])
86
- def test_partition_value_in_path (object_storage : bool ) -> None :
91
+ def test_object_storage_with_partition () -> None :
87
92
provider = load_location_provider (
88
93
table_location = "table_location" ,
89
- table_properties = {
90
- "write.object-storage.enabled" : str (object_storage ),
91
- },
94
+ table_properties = {"write.object-storage.enabled" : "true" },
92
95
)
93
96
94
97
location = provider .new_data_location ("test.parquet" , PARTITION_KEY )
95
- partition_segment = location .split ("/" )[- 2 ]
96
98
97
- assert partition_segment == "string_field=example_string"
99
+ # Partition values AND entropy included in the path. Entropy differs to that in the test below because the partition
100
+ # key AND the data file name are used as the hash input. This matches Java behaviour; the hash below is what the
101
+ # Java implementation produces for this input too.
102
+ assert location == "table_location/data/0001/0010/1001/00000011/string_field=example_string/test.parquet"
98
103
99
104
100
105
# NB: We test here with None partition key too because disabling partitioned paths still replaces final / with - even in
0 commit comments