@@ -100,10 +100,8 @@ def _inspect_files_asserts(df: pa.Table, spark_df: DataFrame) -> None:
100
100
assert isinstance (value .as_py (), int )
101
101
102
102
for split_offsets in df ["split_offsets" ]:
103
- assert isinstance (split_offsets .as_py (), list )
104
-
105
- for file_format in df ["file_format" ]:
106
- assert file_format .as_py () == "PARQUET"
103
+ if split_offsets .as_py () is not None :
104
+ assert isinstance (split_offsets .as_py (), list )
107
105
108
106
for file_path in df ["file_path" ]:
109
107
assert file_path .as_py ().startswith ("s3://" )
@@ -985,3 +983,49 @@ def test_inspect_all_files(
985
983
_inspect_files_asserts (all_files_df , spark .table (f"{ identifier } .all_files" ))
986
984
_inspect_files_asserts (all_data_files_df , spark .table (f"{ identifier } .all_data_files" ))
987
985
_inspect_files_asserts (all_delete_files_df , spark .table (f"{ identifier } .all_delete_files" ))
986
+
987
+
988
+ @pytest .mark .integration
989
+ def test_inspect_files_format_version_3 (spark : SparkSession , session_catalog : Catalog , arrow_table_with_null : pa .Table ) -> None :
990
+ identifier = "default.table_metadata_files"
991
+
992
+ tbl = _create_table (
993
+ session_catalog ,
994
+ identifier ,
995
+ properties = {
996
+ "format-version" : "3" ,
997
+ "write.delete.mode" : "merge-on-read" ,
998
+ "write.update.mode" : "merge-on-read" ,
999
+ "write.merge.mode" : "merge-on-read" ,
1000
+ },
1001
+ )
1002
+
1003
+ insert_data_sql = f"""INSERT INTO { identifier } VALUES
1004
+ (false, 'a', 'aaaaaaaaaaaaaaaaaaaaaa', 1, 1, 0.0, 0.0, TIMESTAMP('2023-01-01 19:25:00'), TIMESTAMP('2023-01-01 19:25:00+00:00'), DATE('2023-01-01'), X'01', X'00000000000000000000000000000000'),
1005
+ (NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL),
1006
+ (true, 'z', 'zzzzzzzzzzzzzzzzzzzzzz', 9, 9, 0.9, 0.9, TIMESTAMP('2023-03-01 19:25:00'), TIMESTAMP('2023-03-01 19:25:00+00:00'), DATE('2023-03-01'), X'12', X'11111111111111111111111111111111');
1007
+ """
1008
+
1009
+ spark .sql (insert_data_sql )
1010
+ spark .sql (insert_data_sql )
1011
+ spark .sql (f"UPDATE { identifier } SET int = 2 WHERE int = 1" )
1012
+ spark .sql (f"DELETE FROM { identifier } WHERE int = 9" )
1013
+ spark .table (identifier ).show (20 , False )
1014
+
1015
+ tbl .refresh ()
1016
+
1017
+ files_df = tbl .inspect .files ()
1018
+ data_files_df = tbl .inspect .data_files ()
1019
+ delete_files_df = tbl .inspect .delete_files ()
1020
+
1021
+ all_files_df = tbl .inspect .all_files ()
1022
+ all_data_files_df = tbl .inspect .all_data_files ()
1023
+ all_delete_files_df = tbl .inspect .all_delete_files ()
1024
+
1025
+ _inspect_files_asserts (files_df , spark .table (f"{ identifier } .files" ))
1026
+ _inspect_files_asserts (data_files_df , spark .table (f"{ identifier } .data_files" ))
1027
+ _inspect_files_asserts (delete_files_df , spark .table (f"{ identifier } .delete_files" ))
1028
+
1029
+ _inspect_files_asserts (all_files_df , spark .table (f"{ identifier } .all_files" ))
1030
+ _inspect_files_asserts (all_data_files_df , spark .table (f"{ identifier } .all_data_files" ))
1031
+ _inspect_files_asserts (all_delete_files_df , spark .table (f"{ identifier } .all_delete_files" ))
0 commit comments