From ecb5b9e6a1016c04a721602ff98d154a044bdfd2 Mon Sep 17 00:00:00 2001 From: Li Yazhou Date: Fri, 3 Jan 2025 21:52:00 +0800 Subject: [PATCH 1/4] add iceberg schema for manifests table --- crates/iceberg/src/metadata_scan.rs | 112 ++++++++++++++++++++++++++-- 1 file changed, 107 insertions(+), 5 deletions(-) diff --git a/crates/iceberg/src/metadata_scan.rs b/crates/iceberg/src/metadata_scan.rs index 16604d781..7ba1b74ad 100644 --- a/crates/iceberg/src/metadata_scan.rs +++ b/crates/iceberg/src/metadata_scan.rs @@ -26,6 +26,7 @@ use arrow_array::types::{Int32Type, Int64Type, Int8Type, TimestampMillisecondTyp use arrow_array::RecordBatch; use arrow_schema::{DataType, Field, Fields, Schema, TimeUnit}; +use crate::spec::{ListType, NestedField, PrimitiveType, StructType, Type}; use crate::table::Table; use crate::Result; @@ -143,9 +144,9 @@ impl<'a> ManifestsTable<'a> { ] } - /// Returns the schema of the manifests table. - pub fn schema(&self) -> Schema { - Schema::new(vec![ + /// Returns the fields of the manifests table. + fn fields(&self) -> Vec { + vec![ Field::new("content", DataType::Int8, false), Field::new("path", DataType::Utf8, false), Field::new("length", DataType::Int64, false), @@ -166,7 +167,107 @@ impl<'a> ManifestsTable<'a> { ))), false, ), - ]) + ] + } + + /// Returns the iceberg schema of the manifests table. + pub fn schema(&self) -> crate::spec::Schema { + let fields = vec![ + NestedField::new(14, "content", Type::Primitive(PrimitiveType::Int), false), + NestedField::new(1, "path", Type::Primitive(PrimitiveType::String), false), + NestedField::new(2, "length", Type::Primitive(PrimitiveType::Long), false), + NestedField::new( + 3, + "partition_spec_id", + Type::Primitive(PrimitiveType::Int), + false, + ), + NestedField::new( + 4, + "added_snapshot_id", + Type::Primitive(PrimitiveType::Long), + false, + ), + NestedField::new( + 5, + "added_data_files_count", + Type::Primitive(PrimitiveType::Int), + false, + ), + NestedField::new( + 6, + "existing_data_files_count", + Type::Primitive(PrimitiveType::Int), + false, + ), + NestedField::new( + 7, + "deleted_data_files_count", + Type::Primitive(PrimitiveType::Int), + false, + ), + NestedField::new( + 15, + "added_delete_files_count", + Type::Primitive(PrimitiveType::Int), + false, + ), + NestedField::new( + 16, + "existing_delete_files_count", + Type::Primitive(PrimitiveType::Int), + false, + ), + NestedField::new( + 17, + "deleted_delete_files_count", + Type::Primitive(PrimitiveType::Int), + false, + ), + NestedField::new( + 8, + "partition_summaries", + Type::List(ListType { + element_field: Arc::new(NestedField::new( + 0, + "item", + Type::Struct(StructType::new(vec![ + Arc::new(NestedField::new( + 10, + "contains_null", + Type::Primitive(PrimitiveType::Boolean), + true, + )), + Arc::new(NestedField::new( + 11, + "contains_nan", + Type::Primitive(PrimitiveType::Boolean), + false, + )), + Arc::new(NestedField::new( + 12, + "lower_bound", + Type::Primitive(PrimitiveType::String), + false, + )), + Arc::new(NestedField::new( + 13, + "upper_bound", + Type::Primitive(PrimitiveType::String), + false, + )), + ])), + false, + )), + }), + false, + ), + ]; + + crate::spec::Schema::builder() + .with_fields(fields.into_iter().map(|f| f.into())) + .build() + .unwrap() } /// Scans the manifests table. @@ -238,7 +339,8 @@ impl<'a> ManifestsTable<'a> { } } - Ok(RecordBatch::try_new(Arc::new(self.schema()), vec![ + let schema = Schema::new(self.fields()); + Ok(RecordBatch::try_new(Arc::new(schema), vec![ Arc::new(content.finish()), Arc::new(path.finish()), Arc::new(length.finish()), From 569bb7af94b0b4dde5ad65d2556851c86e6ad21d Mon Sep 17 00:00:00 2001 From: Li Yazhou Date: Fri, 3 Jan 2025 22:21:29 +0800 Subject: [PATCH 2/4] pass test --- crates/iceberg/src/metadata_scan.rs | 123 ++++++++++++---------------- 1 file changed, 51 insertions(+), 72 deletions(-) diff --git a/crates/iceberg/src/metadata_scan.rs b/crates/iceberg/src/metadata_scan.rs index 7ba1b74ad..c19231998 100644 --- a/crates/iceberg/src/metadata_scan.rs +++ b/crates/iceberg/src/metadata_scan.rs @@ -17,15 +17,17 @@ //! Metadata table api. +use std::collections::HashMap; use std::sync::Arc; use arrow_array::builder::{ BooleanBuilder, ListBuilder, MapBuilder, PrimitiveBuilder, StringBuilder, StructBuilder, }; -use arrow_array::types::{Int32Type, Int64Type, Int8Type, TimestampMillisecondType}; +use arrow_array::types::{Int32Type, Int64Type, TimestampMillisecondType}; use arrow_array::RecordBatch; use arrow_schema::{DataType, Field, Fields, Schema, TimeUnit}; +use crate::arrow::schema_to_arrow_schema; use crate::spec::{ListType, NestedField, PrimitiveType, StructType, Type}; use crate::table::Table; use crate::Result; @@ -135,101 +137,66 @@ pub struct ManifestsTable<'a> { } impl<'a> ManifestsTable<'a> { - fn partition_summary_fields(&self) -> Vec { - vec![ - Field::new("contains_null", DataType::Boolean, false), - Field::new("contains_nan", DataType::Boolean, true), - Field::new("lower_bound", DataType::Utf8, true), - Field::new("upper_bound", DataType::Utf8, true), - ] - } - - /// Returns the fields of the manifests table. - fn fields(&self) -> Vec { - vec![ - Field::new("content", DataType::Int8, false), - Field::new("path", DataType::Utf8, false), - Field::new("length", DataType::Int64, false), - Field::new("partition_spec_id", DataType::Int32, false), - Field::new("added_snapshot_id", DataType::Int64, false), - Field::new("added_data_files_count", DataType::Int32, false), - Field::new("existing_data_files_count", DataType::Int32, false), - Field::new("deleted_data_files_count", DataType::Int32, false), - Field::new("added_delete_files_count", DataType::Int32, false), - Field::new("existing_delete_files_count", DataType::Int32, false), - Field::new("deleted_delete_files_count", DataType::Int32, false), - Field::new( - "partition_summaries", - DataType::List(Arc::new(Field::new_struct( - "item", - self.partition_summary_fields(), - false, - ))), - false, - ), - ] - } - /// Returns the iceberg schema of the manifests table. pub fn schema(&self) -> crate::spec::Schema { let fields = vec![ - NestedField::new(14, "content", Type::Primitive(PrimitiveType::Int), false), - NestedField::new(1, "path", Type::Primitive(PrimitiveType::String), false), - NestedField::new(2, "length", Type::Primitive(PrimitiveType::Long), false), + NestedField::new(14, "content", Type::Primitive(PrimitiveType::Int), true), + NestedField::new(1, "path", Type::Primitive(PrimitiveType::String), true), + NestedField::new(2, "length", Type::Primitive(PrimitiveType::Long), true), NestedField::new( 3, "partition_spec_id", Type::Primitive(PrimitiveType::Int), - false, + true, ), NestedField::new( 4, "added_snapshot_id", Type::Primitive(PrimitiveType::Long), - false, + true, ), NestedField::new( 5, "added_data_files_count", Type::Primitive(PrimitiveType::Int), - false, + true, ), NestedField::new( 6, "existing_data_files_count", Type::Primitive(PrimitiveType::Int), - false, + true, ), NestedField::new( 7, "deleted_data_files_count", Type::Primitive(PrimitiveType::Int), - false, + true, ), NestedField::new( 15, "added_delete_files_count", Type::Primitive(PrimitiveType::Int), - false, + true, ), NestedField::new( 16, "existing_delete_files_count", Type::Primitive(PrimitiveType::Int), - false, + true, ), NestedField::new( 17, "deleted_delete_files_count", Type::Primitive(PrimitiveType::Int), - false, + true, ), NestedField::new( 8, "partition_summaries", Type::List(ListType { element_field: Arc::new(NestedField::new( - 0, + 9, "item", Type::Struct(StructType::new(vec![ Arc::new(NestedField::new( @@ -257,10 +224,10 @@ impl<'a> ManifestsTable<'a> { false, )), ])), - false, + true, )), }), - false, + true, ), ]; @@ -272,7 +239,20 @@ impl<'a> ManifestsTable<'a> { /// Scans the manifests table. pub async fn scan(&self) -> Result { - let mut content = PrimitiveBuilder::::new(); + let schema = schema_to_arrow_schema(&self.schema())?; + let partition_summary_fields = if let DataType::List(list_type) = + schema.field_with_name("partition_summaries")?.data_type() + { + if let DataType::Struct(fields) = list_type.data_type() { + fields.to_vec() + } else { + unreachable!() + } + } else { + unreachable!() + }; + + let mut content = PrimitiveBuilder::::new(); let mut path = StringBuilder::new(); let mut length = PrimitiveBuilder::::new(); let mut partition_spec_id = PrimitiveBuilder::::new(); @@ -284,21 +264,21 @@ impl<'a> ManifestsTable<'a> { let mut existing_delete_files_count = PrimitiveBuilder::::new(); let mut deleted_delete_files_count = PrimitiveBuilder::::new(); let mut partition_summaries = ListBuilder::new(StructBuilder::from_fields( - Fields::from(self.partition_summary_fields()), + Fields::from(partition_summary_fields.clone()), 0, )) - .with_field(Arc::new(Field::new_struct( - "item", - self.partition_summary_fields(), - false, - ))); + .with_field(Arc::new( + Field::new_struct("item", partition_summary_fields, false).with_metadata( + HashMap::from([("PARQUET:field_id".to_string(), "9".to_string())]), + ), + )); if let Some(snapshot) = self.table.metadata().current_snapshot() { let manifest_list = snapshot .load_manifest_list(self.table.file_io(), &self.table.metadata_ref()) .await?; for manifest in manifest_list.entries() { - content.append_value(manifest.content as i8); + content.append_value(manifest.content as i32); path.append_value(manifest.manifest_path.clone()); length.append_value(manifest.manifest_length); partition_spec_id.append_value(manifest.partition_spec_id); @@ -339,7 +319,6 @@ impl<'a> ManifestsTable<'a> { } } - let schema = Schema::new(self.fields()); Ok(RecordBatch::try_new(Arc::new(schema), vec![ Arc::new(content.finish()), Arc::new(path.finish()), @@ -499,20 +478,20 @@ mod tests { check_record_batch( record_batch, expect![[r#" - Field { name: "content", data_type: Int8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, - Field { name: "path", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, - Field { name: "length", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, - Field { name: "partition_spec_id", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, - Field { name: "added_snapshot_id", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, - Field { name: "added_data_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, - Field { name: "existing_data_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, - Field { name: "deleted_data_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, - Field { name: "added_delete_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, - Field { name: "existing_delete_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, - Field { name: "deleted_delete_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, - Field { name: "partition_summaries", data_type: List(Field { name: "item", data_type: Struct([Field { name: "contains_null", data_type: Boolean, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "contains_nan", data_type: Boolean, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "lower_bound", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "upper_bound", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }"#]], + Field { name: "content", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "14"} }, + Field { name: "path", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "1"} }, + Field { name: "length", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "2"} }, + Field { name: "partition_spec_id", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "3"} }, + Field { name: "added_snapshot_id", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "4"} }, + Field { name: "added_data_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "5"} }, + Field { name: "existing_data_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "6"} }, + Field { name: "deleted_data_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "7"} }, + Field { name: "added_delete_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "15"} }, + Field { name: "existing_delete_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "16"} }, + Field { name: "deleted_delete_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "17"} }, + Field { name: "partition_summaries", data_type: List(Field { name: "item", data_type: Struct([Field { name: "contains_null", data_type: Boolean, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "10"} }, Field { name: "contains_nan", data_type: Boolean, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "11"} }, Field { name: "lower_bound", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "12"} }, Field { name: "upper_bound", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "13"} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "9"} }), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "8"} }"#]], expect![[r#" - content: PrimitiveArray + content: PrimitiveArray [ 0, ], From a7b2c5cfe4640cfe1ac3d77b2a7ae7d736ff8387 Mon Sep 17 00:00:00 2001 From: Li Yazhou Date: Fri, 24 Jan 2025 18:59:54 +0800 Subject: [PATCH 3/4] refactor the partition summaries --- crates/iceberg/src/inspect/manifests.rs | 85 +++++++++++++++---------- 1 file changed, 50 insertions(+), 35 deletions(-) diff --git a/crates/iceberg/src/inspect/manifests.rs b/crates/iceberg/src/inspect/manifests.rs index 1e2783448..7b320dfb5 100644 --- a/crates/iceberg/src/inspect/manifests.rs +++ b/crates/iceberg/src/inspect/manifests.rs @@ -19,7 +19,7 @@ use std::collections::HashMap; use std::sync::Arc; use arrow_array::builder::{ - BooleanBuilder, ListBuilder, PrimitiveBuilder, StringBuilder, StructBuilder, + BooleanBuilder, GenericListBuilder, ListBuilder, PrimitiveBuilder, StringBuilder, StructBuilder, }; use arrow_array::types::{Int32Type, Int64Type}; use arrow_array::RecordBatch; @@ -28,7 +28,7 @@ use futures::{stream, StreamExt}; use crate::arrow::schema_to_arrow_schema; use crate::scan::ArrowRecordBatchStream; -use crate::spec::{ListType, NestedField, PrimitiveType, StructType, Type}; +use crate::spec::{FieldSummary, ListType, NestedField, PrimitiveType, StructType, Type}; use crate::table::Table; use crate::Result; @@ -143,8 +143,7 @@ impl<'a> ManifestsTable<'a> { .unwrap() } - /// Scans the manifests table. - pub async fn scan(&self) -> Result { + fn partition_summary_builder(&self) -> Result> { let schema = schema_to_arrow_schema(&self.schema())?; let partition_summary_fields = if let DataType::List(list_type) = schema.field_with_name("partition_summaries")?.data_type() @@ -158,6 +157,51 @@ impl<'a> ManifestsTable<'a> { unreachable!() }; + let partition_summaries = ListBuilder::new(StructBuilder::from_fields( + Fields::from(partition_summary_fields.clone()), + 0, + )) + .with_field(Arc::new( + Field::new_struct("item", partition_summary_fields, false).with_metadata( + HashMap::from([("PARQUET:field_id".to_string(), "9".to_string())]), + ), + )); + + Ok(partition_summaries) + } + + fn append_partition_summary( + &self, + builder: &mut GenericListBuilder, + partitions: &[FieldSummary], + ) { + let partition_summaries_builder = builder.values(); + for summary in partitions { + partition_summaries_builder + .field_builder::(0) + .unwrap() + .append_value(summary.contains_null); + partition_summaries_builder + .field_builder::(1) + .unwrap() + .append_option(summary.contains_nan); + partition_summaries_builder + .field_builder::(2) + .unwrap() + .append_option(summary.lower_bound.as_ref().map(|v| v.to_string())); + partition_summaries_builder + .field_builder::(3) + .unwrap() + .append_option(summary.upper_bound.as_ref().map(|v| v.to_string())); + partition_summaries_builder.append(true); + } + builder.append(true); + } + + /// Scans the manifests table. + pub async fn scan(&self) -> Result { + let schema = schema_to_arrow_schema(&self.schema())?; + let mut content = PrimitiveBuilder::::new(); let mut path = StringBuilder::new(); let mut length = PrimitiveBuilder::::new(); @@ -169,15 +213,7 @@ impl<'a> ManifestsTable<'a> { let mut added_delete_files_count = PrimitiveBuilder::::new(); let mut existing_delete_files_count = PrimitiveBuilder::::new(); let mut deleted_delete_files_count = PrimitiveBuilder::::new(); - let mut partition_summaries = ListBuilder::new(StructBuilder::from_fields( - Fields::from(partition_summary_fields.clone()), - 0, - )) - .with_field(Arc::new( - Field::new_struct("item", partition_summary_fields, false).with_metadata( - HashMap::from([("PARQUET:field_id".to_string(), "9".to_string())]), - ), - )); + let mut partition_summaries = self.partition_summary_builder()?; if let Some(snapshot) = self.table.metadata().current_snapshot() { let manifest_list = snapshot @@ -200,28 +236,7 @@ impl<'a> ManifestsTable<'a> { .append_value(manifest.existing_files_count.unwrap_or(0) as i32); deleted_delete_files_count .append_value(manifest.deleted_files_count.unwrap_or(0) as i32); - - let partition_summaries_builder = partition_summaries.values(); - for summary in &manifest.partitions { - partition_summaries_builder - .field_builder::(0) - .unwrap() - .append_value(summary.contains_null); - partition_summaries_builder - .field_builder::(1) - .unwrap() - .append_option(summary.contains_nan); - partition_summaries_builder - .field_builder::(2) - .unwrap() - .append_option(summary.lower_bound.as_ref().map(|v| v.to_string())); - partition_summaries_builder - .field_builder::(3) - .unwrap() - .append_option(summary.upper_bound.as_ref().map(|v| v.to_string())); - partition_summaries_builder.append(true); - } - partition_summaries.append(true); + self.append_partition_summary(&mut partition_summaries, &manifest.partitions); } } From fb8427897eb36bbc183760af7f7ce35183ffda96 Mon Sep 17 00:00:00 2001 From: Li Yazhou Date: Fri, 24 Jan 2025 19:06:28 +0800 Subject: [PATCH 4/4] refactor the code --- crates/iceberg/src/inspect/manifests.rs | 109 ++++++++++++------------ 1 file changed, 53 insertions(+), 56 deletions(-) diff --git a/crates/iceberg/src/inspect/manifests.rs b/crates/iceberg/src/inspect/manifests.rs index 7b320dfb5..e94e48a45 100644 --- a/crates/iceberg/src/inspect/manifests.rs +++ b/crates/iceberg/src/inspect/manifests.rs @@ -143,61 +143,6 @@ impl<'a> ManifestsTable<'a> { .unwrap() } - fn partition_summary_builder(&self) -> Result> { - let schema = schema_to_arrow_schema(&self.schema())?; - let partition_summary_fields = if let DataType::List(list_type) = - schema.field_with_name("partition_summaries")?.data_type() - { - if let DataType::Struct(fields) = list_type.data_type() { - fields.to_vec() - } else { - unreachable!() - } - } else { - unreachable!() - }; - - let partition_summaries = ListBuilder::new(StructBuilder::from_fields( - Fields::from(partition_summary_fields.clone()), - 0, - )) - .with_field(Arc::new( - Field::new_struct("item", partition_summary_fields, false).with_metadata( - HashMap::from([("PARQUET:field_id".to_string(), "9".to_string())]), - ), - )); - - Ok(partition_summaries) - } - - fn append_partition_summary( - &self, - builder: &mut GenericListBuilder, - partitions: &[FieldSummary], - ) { - let partition_summaries_builder = builder.values(); - for summary in partitions { - partition_summaries_builder - .field_builder::(0) - .unwrap() - .append_value(summary.contains_null); - partition_summaries_builder - .field_builder::(1) - .unwrap() - .append_option(summary.contains_nan); - partition_summaries_builder - .field_builder::(2) - .unwrap() - .append_option(summary.lower_bound.as_ref().map(|v| v.to_string())); - partition_summaries_builder - .field_builder::(3) - .unwrap() - .append_option(summary.upper_bound.as_ref().map(|v| v.to_string())); - partition_summaries_builder.append(true); - } - builder.append(true); - } - /// Scans the manifests table. pub async fn scan(&self) -> Result { let schema = schema_to_arrow_schema(&self.schema())?; @@ -236,7 +181,7 @@ impl<'a> ManifestsTable<'a> { .append_value(manifest.existing_files_count.unwrap_or(0) as i32); deleted_delete_files_count .append_value(manifest.deleted_files_count.unwrap_or(0) as i32); - self.append_partition_summary(&mut partition_summaries, &manifest.partitions); + self.append_partition_summaries(&mut partition_summaries, &manifest.partitions); } } @@ -256,6 +201,58 @@ impl<'a> ManifestsTable<'a> { ])?; Ok(stream::iter(vec![Ok(batch)]).boxed()) } + + fn partition_summary_builder(&self) -> Result> { + let schema = schema_to_arrow_schema(&self.schema())?; + let partition_summary_fields = + match schema.field_with_name("partition_summaries")?.data_type() { + DataType::List(list_type) => match list_type.data_type() { + DataType::Struct(fields) => fields.to_vec(), + _ => unreachable!(), + }, + _ => unreachable!(), + }; + + let partition_summaries = ListBuilder::new(StructBuilder::from_fields( + Fields::from(partition_summary_fields.clone()), + 0, + )) + .with_field(Arc::new( + Field::new_struct("item", partition_summary_fields, false).with_metadata( + HashMap::from([("PARQUET:field_id".to_string(), "9".to_string())]), + ), + )); + + Ok(partition_summaries) + } + + fn append_partition_summaries( + &self, + builder: &mut GenericListBuilder, + partitions: &[FieldSummary], + ) { + let partition_summaries_builder = builder.values(); + for summary in partitions { + partition_summaries_builder + .field_builder::(0) + .unwrap() + .append_value(summary.contains_null); + partition_summaries_builder + .field_builder::(1) + .unwrap() + .append_option(summary.contains_nan); + partition_summaries_builder + .field_builder::(2) + .unwrap() + .append_option(summary.lower_bound.as_ref().map(|v| v.to_string())); + partition_summaries_builder + .field_builder::(3) + .unwrap() + .append_option(summary.upper_bound.as_ref().map(|v| v.to_string())); + partition_summaries_builder.append(true); + } + builder.append(true); + } } #[cfg(test)]