apache · Fokko · May 22, 2025 · Jun 24, 2025 · Jun 24, 2025 · Jun 24, 2025
diff --git a/pyiceberg/table/metadata.py b/pyiceberg/table/metadata.py
@@ -36,7 +36,7 @@
     SortOrder,
     assign_fresh_sort_order_ids,
 )
-from pyiceberg.table.statistics import StatisticsFile
+from pyiceberg.table.statistics import PartitionStatisticsFile, StatisticsFile
 from pyiceberg.typedef import (
     EMPTY_DICT,
     IcebergBaseModel,
@@ -222,6 +222,14 @@ class TableMetadataCommonFields(IcebergBaseModel):
     table correctly. A table can contain many statistics files
     associated with different table snapshots."""
 
+    partition_statistics: List[PartitionStatisticsFile] = Field(alias="partition-statistics", default_factory=list)
+    """A optional list of partition statistics files.
+    Partition statistics are not required for reading or planning
+    and readers may ignore them. Each table snapshot may be associated
+    with at most one partition statistics file. A writer can optionally
+    write the partition statistics file during each write operation,
+    or it can also be computed on demand."""
+
     # validators
     @field_validator("properties", mode="before")
     def transform_properties_dict_value_to_str(cls, properties: Properties) -> Dict[str, str]:

diff --git a/pyiceberg/table/statistics.py b/pyiceberg/table/statistics.py
@@ -29,15 +29,24 @@ class BlobMetadata(IcebergBaseModel):
     properties: Optional[Dict[str, str]] = None
 
 
-class StatisticsFile(IcebergBaseModel):
+class StatisticsCommonFields(IcebergBaseModel):
+    """Common fields between table and partition statistics structs found on metadata."""
+
     snapshot_id: int = Field(alias="snapshot-id")
     statistics_path: str = Field(alias="statistics-path")
     file_size_in_bytes: int = Field(alias="file-size-in-bytes")
+
+
+class StatisticsFile(StatisticsCommonFields):
 class TableMetadataV2(TableMetadataCommonFields, IcebergBaseModel): 
 class TableMetadataCommonFields(IcebergBaseModel): 
 class TableMetadataV2(TableMetadataCommonFields, IcebergBaseModel): 
 class TableMetadataCommonFields(IcebergBaseModel): 
     file_footer_size_in_bytes: int = Field(alias="file-footer-size-in-bytes")
     key_metadata: Optional[str] = Field(alias="key-metadata", default=None)
     blob_metadata: List[BlobMetadata] = Field(alias="blob-metadata")
 
 
+class PartitionStatisticsFile(StatisticsCommonFields):
+    pass
+
+
 def filter_statistics_by_snapshot_id(
     statistics: List[StatisticsFile],
     reject_snapshot_id: int,

diff --git a/tests/table/test_metadata.py b/tests/table/test_metadata.py
@@ -173,13 +173,13 @@ def test_updating_metadata(example_table_metadata_v2: Dict[str, Any]) -> None:
 def test_serialize_v1(example_table_metadata_v1: Dict[str, Any]) -> None:
     table_metadata = TableMetadataV1(**example_table_metadata_v1)
     table_metadata_json = table_metadata.model_dump_json()
-    expected = """{"location":"s3://bucket/test/location","table-uuid":"d20125c8-7284-442c-9aea-15fee620737c","last-updated-ms":1602638573874,"last-column-id":3,"schemas":[{"type":"struct","fields":[{"id":1,"name":"x","type":"long","required":true},{"id":2,"name":"y","type":"long","required":true,"doc":"comment"},{"id":3,"name":"z","type":"long","required":true}],"schema-id":0,"identifier-field-ids":[]}],"current-schema-id":0,"partition-specs":[{"spec-id":0,"fields":[{"source-id":1,"field-id":1000,"transform":"identity","name":"x"}]}],"default-spec-id":0,"last-partition-id":1000,"properties":{},"snapshots":[{"snapshot-id":1925,"timestamp-ms":1602638573822,"manifest-list":"s3://bucket/test/manifest-list"}],"snapshot-log":[],"metadata-log":[],"sort-orders":[{"order-id":0,"fields":[]}],"default-sort-order-id":0,"refs":{},"statistics":[],"format-version":1,"schema":{"type":"struct","fields":[{"id":1,"name":"x","type":"long","required":true},{"id":2,"name":"y","type":"long","required":true,"doc":"comment"},{"id":3,"name":"z","type":"long","required":true}],"schema-id":0,"identifier-field-ids":[]},"partition-spec":[{"name":"x","transform":"identity","source-id":1,"field-id":1000}]}"""
+    expected = """{"location":"s3://bucket/test/location","table-uuid":"d20125c8-7284-442c-9aea-15fee620737c","last-updated-ms":1602638573874,"last-column-id":3,"schemas":[{"type":"struct","fields":[{"id":1,"name":"x","type":"long","required":true},{"id":2,"name":"y","type":"long","required":true,"doc":"comment"},{"id":3,"name":"z","type":"long","required":true}],"schema-id":0,"identifier-field-ids":[]}],"current-schema-id":0,"partition-specs":[{"spec-id":0,"fields":[{"source-id":1,"field-id":1000,"transform":"identity","name":"x"}]}],"default-spec-id":0,"last-partition-id":1000,"properties":{},"snapshots":[{"snapshot-id":1925,"timestamp-ms":1602638573822,"manifest-list":"s3://bucket/test/manifest-list"}],"snapshot-log":[],"metadata-log":[],"sort-orders":[{"order-id":0,"fields":[]}],"default-sort-order-id":0,"refs":{},"statistics":[],"partition-statistics":[],"format-version":1,"schema":{"type":"struct","fields":[{"id":1,"name":"x","type":"long","required":true},{"id":2,"name":"y","type":"long","required":true,"doc":"comment"},{"id":3,"name":"z","type":"long","required":true}],"schema-id":0,"identifier-field-ids":[]},"partition-spec":[{"name":"x","transform":"identity","source-id":1,"field-id":1000}]}"""
     assert table_metadata_json == expected
 
 
 def test_serialize_v2(example_table_metadata_v2: Dict[str, Any]) -> None:
     table_metadata = TableMetadataV2(**example_table_metadata_v2).model_dump_json()
-    expected = """{"location":"s3://bucket/test/location","table-uuid":"9c12d441-03fe-4693-9a96-a0705ddf69c1","last-updated-ms":1602638573590,"last-column-id":3,"schemas":[{"type":"struct","fields":[{"id":1,"name":"x","type":"long","required":true}],"schema-id":0,"identifier-field-ids":[]},{"type":"struct","fields":[{"id":1,"name":"x","type":"long","required":true},{"id":2,"name":"y","type":"long","required":true,"doc":"comment"},{"id":3,"name":"z","type":"long","required":true}],"schema-id":1,"identifier-field-ids":[1,2]}],"current-schema-id":1,"partition-specs":[{"spec-id":0,"fields":[{"source-id":1,"field-id":1000,"transform":"identity","name":"x"}]}],"default-spec-id":0,"last-partition-id":1000,"properties":{"read.split.target.size":"134217728"},"current-snapshot-id":3055729675574597004,"snapshots":[{"snapshot-id":3051729675574597004,"sequence-number":0,"timestamp-ms":1515100955770,"manifest-list":"s3://a/b/1.avro","summary":{"operation":"append"}},{"snapshot-id":3055729675574597004,"parent-snapshot-id":3051729675574597004,"sequence-number":1,"timestamp-ms":1555100955770,"manifest-list":"s3://a/b/2.avro","summary":{"operation":"append"},"schema-id":1}],"snapshot-log":[{"snapshot-id":3051729675574597004,"timestamp-ms":1515100955770},{"snapshot-id":3055729675574597004,"timestamp-ms":1555100955770}],"metadata-log":[{"metadata-file":"s3://bucket/.../v1.json","timestamp-ms":1515100}],"sort-orders":[{"order-id":3,"fields":[{"source-id":2,"transform":"identity","direction":"asc","null-order":"nulls-first"},{"source-id":3,"transform":"bucket[4]","direction":"desc","null-order":"nulls-last"}]}],"default-sort-order-id":3,"refs":{"test":{"snapshot-id":3051729675574597004,"type":"tag","max-ref-age-ms":10000000},"main":{"snapshot-id":3055729675574597004,"type":"branch"}},"statistics":[],"format-version":2,"last-sequence-number":34}"""
+    expected = """{"location":"s3://bucket/test/location","table-uuid":"9c12d441-03fe-4693-9a96-a0705ddf69c1","last-updated-ms":1602638573590,"last-column-id":3,"schemas":[{"type":"struct","fields":[{"id":1,"name":"x","type":"long","required":true}],"schema-id":0,"identifier-field-ids":[]},{"type":"struct","fields":[{"id":1,"name":"x","type":"long","required":true},{"id":2,"name":"y","type":"long","required":true,"doc":"comment"},{"id":3,"name":"z","type":"long","required":true}],"schema-id":1,"identifier-field-ids":[1,2]}],"current-schema-id":1,"partition-specs":[{"spec-id":0,"fields":[{"source-id":1,"field-id":1000,"transform":"identity","name":"x"}]}],"default-spec-id":0,"last-partition-id":1000,"properties":{"read.split.target.size":"134217728"},"current-snapshot-id":3055729675574597004,"snapshots":[{"snapshot-id":3051729675574597004,"sequence-number":0,"timestamp-ms":1515100955770,"manifest-list":"s3://a/b/1.avro","summary":{"operation":"append"}},{"snapshot-id":3055729675574597004,"parent-snapshot-id":3051729675574597004,"sequence-number":1,"timestamp-ms":1555100955770,"manifest-list":"s3://a/b/2.avro","summary":{"operation":"append"},"schema-id":1}],"snapshot-log":[{"snapshot-id":3051729675574597004,"timestamp-ms":1515100955770},{"snapshot-id":3055729675574597004,"timestamp-ms":1555100955770}],"metadata-log":[{"metadata-file":"s3://bucket/.../v1.json","timestamp-ms":1515100}],"sort-orders":[{"order-id":3,"fields":[{"source-id":2,"transform":"identity","direction":"asc","null-order":"nulls-first"},{"source-id":3,"transform":"bucket[4]","direction":"desc","null-order":"nulls-last"}]}],"default-sort-order-id":3,"refs":{"test":{"snapshot-id":3051729675574597004,"type":"tag","max-ref-age-ms":10000000},"main":{"snapshot-id":3055729675574597004,"type":"branch"}},"statistics":[],"partition-statistics":[],"format-version":2,"last-sequence-number":34}"""
     assert table_metadata == expected
 
 

diff --git a/tests/table/test_statistics.py b/tests/table/test_statistics.py
@@ -0,0 +1,54 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from pyiceberg.table.statistics import BlobMetadata, PartitionStatisticsFile, StatisticsFile
+
+
+def test_partition_statistics_file() -> None:
+    partition_statistics_file_json = (
+        """{"snapshot-id":123,"statistics-path":"s3://bucket/statistics.parquet","file-size-in-bytes":345}"""
+    )
+    partition_statistics_file = PartitionStatisticsFile.model_validate_json(partition_statistics_file_json)
+
+    assert partition_statistics_file == PartitionStatisticsFile(
+        snapshot_id=123, statistics_path="s3://bucket/statistics.parquet", file_size_in_bytes=345
+    )
+
+    assert partition_statistics_file.model_dump_json() == partition_statistics_file_json
+
+
+def test_statistics_file() -> None:
+    statistics_file_json = """{"snapshot-id":123,"statistics-path":"s3://bucket/statistics.parquet","file-size-in-bytes":345,"file-footer-size-in-bytes":456,"blob-metadata":[{"type":"apache-datasketches-theta-v1","snapshot-id":567,"sequence-number":22,"fields":[1,2,3],"properties":{"foo":"bar"}}]}"""
+    statistics_file = StatisticsFile.model_validate_json(statistics_file_json)
+
+    assert statistics_file == StatisticsFile(
+        snapshot_id=123,
+        statistics_path="s3://bucket/statistics.parquet",
+        file_size_in_bytes=345,
+        file_footer_size_in_bytes=456,
+        key_metadata=None,
+        blob_metadata=[
+            BlobMetadata(
+                type="apache-datasketches-theta-v1",
+                snapshot_id=567,
+                sequence_number=22,
+                fields=[1, 2, 3],
+                properties={"foo": "bar"},
+            )
+        ],
+    )
+
+    assert statistics_file.model_dump_json() == statistics_file_json