Skip to content

Commit

Permalink
Allow Partition data to be nullable in ManifestEntry (#509)
Browse files Browse the repository at this point in the history
* fix

* use partition field nullability
  • Loading branch information
sungwy authored Mar 11, 2024
1 parent a222825 commit 0ab3262
Show file tree
Hide file tree
Showing 4 changed files with 5 additions and 3 deletions.
1 change: 1 addition & 0 deletions pyiceberg/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,7 @@ def data_file_with_partition(partition_type: StructType, format_version: Literal
field_id=field.field_id,
name=field.name,
field_type=partition_field_to_data_file_partition_field(field.field_type),
required=field.required,
)
for field in partition_type.fields
])
Expand Down
3 changes: 2 additions & 1 deletion pyiceberg/partitioning.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,8 @@ def partition_type(self, schema: Schema) -> StructType:
for field in self.fields:
source_type = schema.find_type(field.source_id)
result_type = field.transform.result_type(source_type)
nested_fields.append(NestedField(field.field_id, field.name, result_type, required=False))
required = schema.find_field(field.source_id).required
nested_fields.append(NestedField(field.field_id, field.name, result_type, required=required))
return StructType(*nested_fields)

def partition_to_path(self, data: Record, schema: Schema) -> str:
Expand Down
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -892,7 +892,7 @@ def metadata_location_gz(tmp_path_factory: pytest.TempPathFactory) -> str:
"data_file": {
"file_path": "/home/iceberg/warehouse/nyc/taxis_partitioned/data/VendorID=1/00000-633-d8a4223e-dc97-45a1-86e1-adaba6e8abd7-00002.parquet",
"file_format": "PARQUET",
"partition": {"VendorID": 1, "tpep_pickup_datetime": 1925},
"partition": {"VendorID": 1, "tpep_pickup_datetime": None},
"record_count": 95050,
"file_size_in_bytes": 1265950,
"block_size_in_bytes": 67108864,
Expand Down
2 changes: 1 addition & 1 deletion tests/table/test_partitioning.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,5 +127,5 @@ def test_partition_type(table_schema_simple: Schema) -> None:

assert spec.partition_type(table_schema_simple) == StructType(
NestedField(field_id=1000, name="str_truncate", field_type=StringType(), required=False),
NestedField(field_id=1001, name="int_bucket", field_type=IntegerType(), required=False),
NestedField(field_id=1001, name="int_bucket", field_type=IntegerType(), required=True),
)

0 comments on commit 0ab3262

Please sign in to comment.