58
58
from pyiceberg .io import InputStream , OutputStream , load_file_io
59
59
from pyiceberg .io .pyarrow import (
60
60
ICEBERG_SCHEMA ,
61
+ ArrowScan ,
61
62
PyArrowFile ,
62
63
PyArrowFileIO ,
63
64
StatsAggregator ,
69
70
_to_requested_schema ,
70
71
bin_pack_arrow_table ,
71
72
expression_to_pyarrow ,
72
- project_table ,
73
73
schema_to_pyarrow ,
74
74
)
75
75
from pyiceberg .manifest import DataFile , DataFileContent , FileFormat
@@ -952,7 +952,19 @@ def file_map(schema_map: Schema, tmpdir: str) -> str:
952
952
def project (
953
953
schema : Schema , files : List [str ], expr : Optional [BooleanExpression ] = None , table_schema : Optional [Schema ] = None
954
954
) -> pa .Table :
955
- return project_table (
955
+ return ArrowScan (
956
+ table_metadata = TableMetadataV2 (
957
+ location = "file://a/b/" ,
958
+ last_column_id = 1 ,
959
+ format_version = 2 ,
960
+ schemas = [table_schema or schema ],
961
+ partition_specs = [PartitionSpec ()],
962
+ ),
963
+ io = PyArrowFileIO (),
964
+ projected_schema = schema ,
965
+ row_filter = expr or AlwaysTrue (),
966
+ case_sensitive = True ,
967
+ ).to_table (
956
968
tasks = [
957
969
FileScanTask (
958
970
DataFile (
@@ -965,18 +977,7 @@ def project(
965
977
)
966
978
)
967
979
for file in files
968
- ],
969
- table_metadata = TableMetadataV2 (
970
- location = "file://a/b/" ,
971
- last_column_id = 1 ,
972
- format_version = 2 ,
973
- schemas = [table_schema or schema ],
974
- partition_specs = [PartitionSpec ()],
975
- ),
976
- io = PyArrowFileIO (),
977
- row_filter = expr or AlwaysTrue (),
978
- projected_schema = schema ,
979
- case_sensitive = True ,
980
+ ]
980
981
)
981
982
982
983
@@ -1411,9 +1412,7 @@ def test_delete(deletes_file: str, example_task: FileScanTask, table_schema_simp
1411
1412
data_file = example_task .file ,
1412
1413
delete_files = {DataFile (content = DataFileContent .POSITION_DELETES , file_path = deletes_file , file_format = FileFormat .PARQUET )},
1413
1414
)
1414
-
1415
- with_deletes = project_table (
1416
- tasks = [example_task_with_delete ],
1415
+ with_deletes = ArrowScan (
1417
1416
table_metadata = TableMetadataV2 (
1418
1417
location = metadata_location ,
1419
1418
last_column_id = 1 ,
@@ -1423,9 +1422,9 @@ def test_delete(deletes_file: str, example_task: FileScanTask, table_schema_simp
1423
1422
partition_specs = [PartitionSpec ()],
1424
1423
),
1425
1424
io = load_file_io (),
1426
- row_filter = AlwaysTrue (),
1427
1425
projected_schema = table_schema_simple ,
1428
- )
1426
+ row_filter = AlwaysTrue (),
1427
+ ).to_table (tasks = [example_task_with_delete ])
1429
1428
1430
1429
assert (
1431
1430
str (with_deletes )
@@ -1450,8 +1449,7 @@ def test_delete_duplicates(deletes_file: str, example_task: FileScanTask, table_
1450
1449
},
1451
1450
)
1452
1451
1453
- with_deletes = project_table (
1454
- tasks = [example_task_with_delete ],
1452
+ with_deletes = ArrowScan (
1455
1453
table_metadata = TableMetadataV2 (
1456
1454
location = metadata_location ,
1457
1455
last_column_id = 1 ,
@@ -1461,9 +1459,9 @@ def test_delete_duplicates(deletes_file: str, example_task: FileScanTask, table_
1461
1459
partition_specs = [PartitionSpec ()],
1462
1460
),
1463
1461
io = load_file_io (),
1464
- row_filter = AlwaysTrue (),
1465
1462
projected_schema = table_schema_simple ,
1466
- )
1463
+ row_filter = AlwaysTrue (),
1464
+ ).to_table (tasks = [example_task_with_delete ])
1467
1465
1468
1466
assert (
1469
1467
str (with_deletes )
@@ -1480,8 +1478,8 @@ def test_delete_duplicates(deletes_file: str, example_task: FileScanTask, table_
1480
1478
1481
1479
def test_pyarrow_wrap_fsspec (example_task : FileScanTask , table_schema_simple : Schema ) -> None :
1482
1480
metadata_location = "file://a/b/c.json"
1483
- projection = project_table (
1484
- tasks = [ example_task ],
1481
+
1482
+ projection = ArrowScan (
1485
1483
table_metadata = TableMetadataV2 (
1486
1484
location = metadata_location ,
1487
1485
last_column_id = 1 ,
@@ -1494,7 +1492,7 @@ def test_pyarrow_wrap_fsspec(example_task: FileScanTask, table_schema_simple: Sc
1494
1492
case_sensitive = True ,
1495
1493
projected_schema = table_schema_simple ,
1496
1494
row_filter = AlwaysTrue (),
1497
- )
1495
+ ). to_table ( tasks = [ example_task ])
1498
1496
1499
1497
assert (
1500
1498
str (projection )
0 commit comments