@@ -3840,8 +3840,46 @@ def test_orc_schema_conversion_with_field_ids() -> None:
38403840 id_field_no_ids = arrow_schema_no_ids .field (0 )
38413841 name_field_no_ids = arrow_schema_no_ids .field (1 )
38423842
3843- assert not id_field_no_ids .metadata
3844- assert not name_field_no_ids .metadata
3843+ assert PYARROW_PARQUET_FIELD_ID_KEY not in id_field_no_ids .metadata
3844+ assert PYARROW_PARQUET_FIELD_ID_KEY not in name_field_no_ids .metadata
3845+ assert PYARROW_PARQUET_FIELD_ID_KEY not in id_field_no_ids .metadata
3846+ assert PYARROW_PARQUET_FIELD_ID_KEY not in name_field_no_ids .metadata
3847+
3848+
3849+ def test_orc_schema_conversion_with_required_attribute () -> None :
3850+ """
3851+ Test that schema_to_pyarrow correctly adds ORC iceberg.required attribute.
3852+ To run just this test:
3853+ pytest tests/io/test_pyarrow.py -k test_orc_schema_conversion_with_required_attribute
3854+ """
3855+ from pyiceberg .io .pyarrow import ORC_FIELD_REQUIRED_KEY , schema_to_pyarrow
3856+ from pyiceberg .manifest import FileFormat
3857+ from pyiceberg .schema import Schema
3858+ from pyiceberg .types import IntegerType , StringType
3859+
3860+ # Define schema
3861+ schema = Schema (
3862+ NestedField (1 , "id" , IntegerType (), required = True ),
3863+ NestedField (2 , "name" , StringType (), required = False ),
3864+ )
3865+
3866+ # Test 1: Specify Parquet format
3867+ arrow_schema_default = schema_to_pyarrow (schema , file_format = FileFormat .PARQUET )
3868+
3869+ id_field = arrow_schema_default .field (0 )
3870+ name_field = arrow_schema_default .field (1 )
3871+
3872+ assert ORC_FIELD_REQUIRED_KEY not in id_field .metadata
3873+ assert ORC_FIELD_REQUIRED_KEY not in name_field .metadata
3874+
3875+ # Test 2: Specify ORC format
3876+ arrow_schema_orc = schema_to_pyarrow (schema , file_format = FileFormat .ORC )
3877+
3878+ id_field_orc = arrow_schema_orc .field (0 )
3879+ name_field_orc = arrow_schema_orc .field (1 )
3880+
3881+ assert id_field_orc .metadata [ORC_FIELD_REQUIRED_KEY ] is True
3882+ assert name_field_orc .metadata [ORC_FIELD_REQUIRED_KEY ] is False
38453883
38463884
38473885def test_orc_batching_behavior_documentation (tmp_path : Path ) -> None :
0 commit comments