Skip to content

Commit 8888093

Browse files
committed
1. update docs
2. dont use latest schema and partition spec , use the one at the time of the snapshot instead
1 parent 1834dd8 commit 8888093

File tree

2 files changed

+359
-8
lines changed

2 files changed

+359
-8
lines changed

mkdocs/docs/api.md

+349-2
Original file line numberDiff line numberDiff line change
@@ -792,8 +792,6 @@ readable_metrics: [
792792
[6.0989]]
793793
```
794794

795-
To show all the table's current manifest entries for both data and delete files, use `table.inspect.all_entries()`.
796-
797795
### References
798796

799797
To show a table's known snapshot references:
@@ -1049,6 +1047,355 @@ readable_metrics: [
10491047

10501048
To show only data files or delete files in the current snapshot, use `table.inspect.data_files()` and `table.inspect.delete_files()` respectively.
10511049

1050+
### All Metadata Tables
1051+
1052+
These tables are unions of the metadata tables specific to the current snapshot, and return metadata across all snapshots.
1053+
!!! danger
1054+
The "all" metadata tables may produce more than one row per data file or manifest file because metadata files may be part of more than one table snapshot.
1055+
1056+
#### All Entries
1057+
1058+
To show the table's manifest entries from all the snapshots for both data and delete files:
1059+
1060+
```python
1061+
table.inspect.all_entries()
1062+
```
1063+
1064+
```python
1065+
pyarrow.Table
1066+
status: int8 not null
1067+
snapshot_id: int64 not null
1068+
sequence_number: int64 not null
1069+
file_sequence_number: int64 not null
1070+
data_file: struct<content: int8 not null, file_path: string not null, file_format: string not null, spec_id: int32 not null, partition: struct<data: large_string> not null, record_count: int64 not null, file_size_in_bytes: int64 not null, column_sizes: map<int32, int64>, value_counts: map<int32, int64>, null_value_counts: map<int32, int64>, nan_value_counts: map<int32, int64>, lower_bounds: map<int32, binary>, upper_bounds: map<int32, binary>, key_metadata: binary, split_offsets: list<item: int64>, equality_ids: list<item: int32>, sort_order_id: int32> not null
1071+
child 0, content: int8 not null
1072+
child 1, file_path: string not null
1073+
child 2, file_format: string not null
1074+
child 3, spec_id: int32 not null
1075+
child 4, partition: struct<data: large_string> not null
1076+
child 0, data: large_string
1077+
child 5, record_count: int64 not null
1078+
child 6, file_size_in_bytes: int64 not null
1079+
child 7, column_sizes: map<int32, int64>
1080+
child 0, entries: struct<key: int32 not null, value: int64> not null
1081+
child 0, key: int32 not null
1082+
child 1, value: int64
1083+
child 8, value_counts: map<int32, int64>
1084+
child 0, entries: struct<key: int32 not null, value: int64> not null
1085+
child 0, key: int32 not null
1086+
child 1, value: int64
1087+
child 9, null_value_counts: map<int32, int64>
1088+
child 0, entries: struct<key: int32 not null, value: int64> not null
1089+
child 0, key: int32 not null
1090+
child 1, value: int64
1091+
child 10, nan_value_counts: map<int32, int64>
1092+
child 0, entries: struct<key: int32 not null, value: int64> not null
1093+
child 0, key: int32 not null
1094+
child 1, value: int64
1095+
child 11, lower_bounds: map<int32, binary>
1096+
child 0, entries: struct<key: int32 not null, value: binary> not null
1097+
child 0, key: int32 not null
1098+
child 1, value: binary
1099+
child 12, upper_bounds: map<int32, binary>
1100+
child 0, entries: struct<key: int32 not null, value: binary> not null
1101+
child 0, key: int32 not null
1102+
child 1, value: binary
1103+
child 13, key_metadata: binary
1104+
child 14, split_offsets: list<item: int64>
1105+
child 0, item: int64
1106+
child 15, equality_ids: list<item: int32>
1107+
child 0, item: int32
1108+
child 16, sort_order_id: int32
1109+
readable_metrics: struct<id: struct<column_size: int64, value_count: int64, null_value_count: int64, nan_value_count: int64, lower_bound: int32, upper_bound: int32> not null, data: struct<column_size: int64, value_count: int64, null_value_count: int64, nan_value_count: int64, lower_bound: large_string, upper_bound: large_string> not null>
1110+
child 0, id: struct<column_size: int64, value_count: int64, null_value_count: int64, nan_value_count: int64, lower_bound: int32, upper_bound: int32> not null
1111+
child 0, column_size: int64
1112+
child 1, value_count: int64
1113+
child 2, null_value_count: int64
1114+
child 3, nan_value_count: int64
1115+
child 4, lower_bound: int32
1116+
child 5, upper_bound: int32
1117+
child 1, data: struct<column_size: int64, value_count: int64, null_value_count: int64, nan_value_count: int64, lower_bound: large_string, upper_bound: large_string> not null
1118+
child 0, column_size: int64
1119+
child 1, value_count: int64
1120+
child 2, null_value_count: int64
1121+
child 3, nan_value_count: int64
1122+
child 4, lower_bound: large_string
1123+
child 5, upper_bound: large_string
1124+
----
1125+
status: [[1],[1],...,[],[]]
1126+
snapshot_id: [[6449946327458654223],[7507782590078860647],...,[],[]]
1127+
sequence_number: [[1],[2],...,[],[]]
1128+
file_sequence_number: [[1],[2],...,[],[]]
1129+
data_file: [
1130+
-- is_valid: all not null
1131+
-- child 0 type: int8
1132+
[0]
1133+
-- child 1 type: string
1134+
["s3://warehouse/default/table_metadata_all_entries/data/data=a/00000-1-20675924-1844-4414-aa3b-cbb033884013-0-00001.parquet"]
1135+
-- child 2 type: string
1136+
["PARQUET"]
1137+
-- child 3 type: int32
1138+
[0]
1139+
-- child 4 type: struct<data: large_string>
1140+
-- is_valid: all not null
1141+
-- child 0 type: large_string
1142+
["a"]
1143+
-- child 5 type: int64
1144+
[1]
1145+
-- child 6 type: int64
1146+
[636]
1147+
-- child 7 type: map<int32, int64>
1148+
[keys:[1,2]values:[39,40]]
1149+
-- child 8 type: map<int32, int64>
1150+
[keys:[1,2]values:[1,1]]
1151+
-- child 9 type: map<int32, int64>
1152+
[keys:[1,2]values:[0,0]]
1153+
-- child 10 type: map<int32, int64>
1154+
[keys:[]values:[]]
1155+
-- child 11 type: map<int32, binary>
1156+
[keys:[1,2]values:[01000000,61]]
1157+
-- child 12 type: map<int32, binary>
1158+
[keys:[1,2]values:[01000000,61]]
1159+
-- child 13 type: binary
1160+
[null]
1161+
-- child 14 type: list<item: int64>
1162+
[[4]]
1163+
-- child 15 type: list<item: int32>
1164+
[null]
1165+
-- child 16 type: int32
1166+
[0],
1167+
-- is_valid: all not null
1168+
-- child 0 type: int8
1169+
[0]
1170+
-- child 1 type: string
1171+
["s3://warehouse/default/table_metadata_all_entries/data/data=b/00000-3-c28af222-7039-435e-b2a9-a4dc698b75e5-0-00001.parquet"]
1172+
-- child 2 type: string
1173+
["PARQUET"]
1174+
-- child 3 type: int32
1175+
[0]
1176+
-- child 4 type: struct<data: large_string>
1177+
-- is_valid: all not null
1178+
-- child 0 type: large_string
1179+
["b"]
1180+
-- child 5 type: int64
1181+
[1]
1182+
-- child 6 type: int64
1183+
[636]
1184+
-- child 7 type: map<int32, int64>
1185+
[keys:[1,2]values:[39,40]]
1186+
-- child 8 type: map<int32, int64>
1187+
[keys:[1,2]values:[1,1]]
1188+
-- child 9 type: map<int32, int64>
1189+
[keys:[1,2]values:[0,0]]
1190+
-- child 10 type: map<int32, int64>
1191+
[keys:[]values:[]]
1192+
-- child 11 type: map<int32, binary>
1193+
[keys:[1,2]values:[02000000,62]]
1194+
-- child 12 type: map<int32, binary>
1195+
[keys:[1,2]values:[02000000,62]]
1196+
-- child 13 type: binary
1197+
[null]
1198+
-- child 14 type: list<item: int64>
1199+
[[4]]
1200+
-- child 15 type: list<item: int32>
1201+
[null]
1202+
-- child 16 type: int32
1203+
[0],
1204+
...,
1205+
-- is_valid: all not null
1206+
-- child 0 type: int8
1207+
[]
1208+
-- child 1 type: string
1209+
[]
1210+
-- child 2 type: string
1211+
[]
1212+
-- child 3 type: int32
1213+
[]
1214+
-- child 4 type: struct<data: large_string>
1215+
-- is_valid: all not null
1216+
-- child 0 type: large_string
1217+
[]
1218+
-- child 5 type: int64
1219+
[]
1220+
-- child 6 type: int64
1221+
[]
1222+
-- child 7 type: map<int32, int64>
1223+
[]
1224+
-- child 8 type: map<int32, int64>
1225+
[]
1226+
-- child 9 type: map<int32, int64>
1227+
[]
1228+
-- child 10 type: map<int32, int64>
1229+
[]
1230+
-- child 11 type: map<int32, binary>
1231+
[]
1232+
-- child 12 type: map<int32, binary>
1233+
[]
1234+
-- child 13 type: binary
1235+
[]
1236+
-- child 14 type: list<item: int64>
1237+
[]
1238+
-- child 15 type: list<item: int32>
1239+
[]
1240+
-- child 16 type: int32
1241+
[],
1242+
-- is_valid: all not null
1243+
-- child 0 type: int8
1244+
[]
1245+
-- child 1 type: string
1246+
[]
1247+
-- child 2 type: string
1248+
[]
1249+
-- child 3 type: int32
1250+
[]
1251+
-- child 4 type: struct<data: large_string>
1252+
-- is_valid: all not null
1253+
-- child 0 type: large_string
1254+
[]
1255+
-- child 5 type: int64
1256+
[]
1257+
-- child 6 type: int64
1258+
[]
1259+
-- child 7 type: map<int32, int64>
1260+
[]
1261+
-- child 8 type: map<int32, int64>
1262+
[]
1263+
-- child 9 type: map<int32, int64>
1264+
[]
1265+
-- child 10 type: map<int32, int64>
1266+
[]
1267+
-- child 11 type: map<int32, binary>
1268+
[]
1269+
-- child 12 type: map<int32, binary>
1270+
[]
1271+
-- child 13 type: binary
1272+
[]
1273+
-- child 14 type: list<item: int64>
1274+
[]
1275+
-- child 15 type: list<item: int32>
1276+
[]
1277+
-- child 16 type: int32
1278+
[]]
1279+
readable_metrics: [
1280+
-- is_valid: all not null
1281+
-- child 0 type: struct<column_size: int64, value_count: int64, null_value_count: int64, nan_value_count: int64, lower_bound: int32, upper_bound: int32>
1282+
-- is_valid: all not null
1283+
-- child 0 type: int64
1284+
[39]
1285+
-- child 1 type: int64
1286+
[1]
1287+
-- child 2 type: int64
1288+
[0]
1289+
-- child 3 type: int64
1290+
[null]
1291+
-- child 4 type: int32
1292+
[1]
1293+
-- child 5 type: int32
1294+
[1]
1295+
-- child 1 type: struct<column_size: int64, value_count: int64, null_value_count: int64, nan_value_count: int64, lower_bound: large_string, upper_bound: large_string>
1296+
-- is_valid: all not null
1297+
-- child 0 type: int64
1298+
[40]
1299+
-- child 1 type: int64
1300+
[1]
1301+
-- child 2 type: int64
1302+
[0]
1303+
-- child 3 type: int64
1304+
[null]
1305+
-- child 4 type: large_string
1306+
["a"]
1307+
-- child 5 type: large_string
1308+
["a"],
1309+
-- is_valid: all not null
1310+
-- child 0 type: struct<column_size: int64, value_count: int64, null_value_count: int64, nan_value_count: int64, lower_bound: int32, upper_bound: int32>
1311+
-- is_valid: all not null
1312+
-- child 0 type: int64
1313+
[39]
1314+
-- child 1 type: int64
1315+
[1]
1316+
-- child 2 type: int64
1317+
[0]
1318+
-- child 3 type: int64
1319+
[null]
1320+
-- child 4 type: int32
1321+
[2]
1322+
-- child 5 type: int32
1323+
[2]
1324+
-- child 1 type: struct<column_size: int64, value_count: int64, null_value_count: int64, nan_value_count: int64, lower_bound: large_string, upper_bound: large_string>
1325+
-- is_valid: all not null
1326+
-- child 0 type: int64
1327+
[40]
1328+
-- child 1 type: int64
1329+
[1]
1330+
-- child 2 type: int64
1331+
[0]
1332+
-- child 3 type: int64
1333+
[null]
1334+
-- child 4 type: large_string
1335+
["b"]
1336+
-- child 5 type: large_string
1337+
["b"],
1338+
...,
1339+
-- is_valid: all not null
1340+
-- child 0 type: struct<column_size: int64, value_count: int64, null_value_count: int64, nan_value_count: int64, lower_bound: int32, upper_bound: int32>
1341+
-- is_valid: all not null
1342+
-- child 0 type: int64
1343+
[]
1344+
-- child 1 type: int64
1345+
[]
1346+
-- child 2 type: int64
1347+
[]
1348+
-- child 3 type: int64
1349+
[]
1350+
-- child 4 type: int32
1351+
[]
1352+
-- child 5 type: int32
1353+
[]
1354+
-- child 1 type: struct<column_size: int64, value_count: int64, null_value_count: int64, nan_value_count: int64, lower_bound: large_string, upper_bound: large_string>
1355+
-- is_valid: all not null
1356+
-- child 0 type: int64
1357+
[]
1358+
-- child 1 type: int64
1359+
[]
1360+
-- child 2 type: int64
1361+
[]
1362+
-- child 3 type: int64
1363+
[]
1364+
-- child 4 type: large_string
1365+
[]
1366+
-- child 5 type: large_string
1367+
[],
1368+
-- is_valid: all not null
1369+
-- child 0 type: struct<column_size: int64, value_count: int64, null_value_count: int64, nan_value_count: int64, lower_bound: int32, upper_bound: int32>
1370+
-- is_valid: all not null
1371+
-- child 0 type: int64
1372+
[]
1373+
-- child 1 type: int64
1374+
[]
1375+
-- child 2 type: int64
1376+
[]
1377+
-- child 3 type: int64
1378+
[]
1379+
-- child 4 type: int32
1380+
[]
1381+
-- child 5 type: int32
1382+
[]
1383+
-- child 1 type: struct<column_size: int64, value_count: int64, null_value_count: int64, nan_value_count: int64, lower_bound: large_string, upper_bound: large_string>
1384+
-- is_valid: all not null
1385+
-- child 0 type: int64
1386+
[]
1387+
-- child 1 type: int64
1388+
[]
1389+
-- child 2 type: int64
1390+
[]
1391+
-- child 3 type: int64
1392+
[]
1393+
-- child 4 type: large_string
1394+
[]
1395+
-- child 5 type: large_string
1396+
[]]
1397+
```
1398+
10521399
## Add Files
10531400

10541401
Expert Iceberg users may choose to commit existing parquet files to the Iceberg table as data files, without rewriting them.

0 commit comments

Comments
 (0)