From 82e129945bed150b6d9fbcb93ebb4d92f8150895 Mon Sep 17 00:00:00 2001 From: Renjie Liu Date: Thu, 14 Dec 2023 17:16:17 +0800 Subject: [PATCH] test: Remove binary manifest list avro file (#118) * Remove binary manifest list avro file * Remove binary manifest * Fix comments --- Makefile | 3 + crates/iceberg/src/spec/manifest.rs | 899 ++++++++---------- crates/iceberg/src/spec/manifest_list.rs | 141 +-- .../testdata/partition_manifest_v1.avro | Bin 6067 -> 0 bytes .../testdata/partition_manifest_v2.avro | Bin 3350 -> 0 bytes .../testdata/simple_manifest_list_v1.avro | Bin 3759 -> 0 bytes .../testdata/simple_manifest_list_v2.avro | Bin 1247 -> 0 bytes .../testdata/unpartition_manifest_v1.avro | Bin 6032 -> 0 bytes .../testdata/unpartition_manifest_v2.avro | Bin 3095 -> 0 bytes 9 files changed, 508 insertions(+), 535 deletions(-) delete mode 100644 crates/iceberg/testdata/partition_manifest_v1.avro delete mode 100644 crates/iceberg/testdata/partition_manifest_v2.avro delete mode 100644 crates/iceberg/testdata/simple_manifest_list_v1.avro delete mode 100644 crates/iceberg/testdata/simple_manifest_list_v2.avro delete mode 100644 crates/iceberg/testdata/unpartition_manifest_v1.avro delete mode 100644 crates/iceberg/testdata/unpartition_manifest_v2.avro diff --git a/Makefile b/Makefile index d846303ff..c34f6c97d 100644 --- a/Makefile +++ b/Makefile @@ -34,6 +34,9 @@ cargo-sort: check: check-fmt check-clippy cargo-sort +unit-test: + cargo test --no-fail-fast --lib --all-features --workspace + test: cargo test --no-fail-fast --all-targets --all-features --workspace cargo test --no-fail-fast --doc --all-features --workspace \ No newline at end of file diff --git a/crates/iceberg/src/spec/manifest.rs b/crates/iceberg/src/spec/manifest.rs index bc12edff7..b14b1295e 100644 --- a/crates/iceberg/src/spec/manifest.rs +++ b/crates/iceberg/src/spec/manifest.rs @@ -1311,511 +1311,452 @@ mod tests { use crate::spec::Type; use std::sync::Arc; - #[test] - fn test_parse_manifest_v2_unpartition() { - let path = format!( - "{}/testdata/unpartition_manifest_v2.avro", - env!("CARGO_MANIFEST_DIR") - ); - let bs = fs::read(path).expect("read_file must succeed"); - let manifest = Manifest::parse_avro(bs.as_slice()).unwrap(); - // test metadata - assert!(manifest.metadata.schema_id == 0); - assert_eq!(manifest.metadata.schema, { - let fields = vec![ - // id v_int v_long v_float v_double v_varchar v_bool v_date v_timestamp v_decimal v_ts_ntz - Arc::new(NestedField::optional( - 1, - "id", - Type::Primitive(PrimitiveType::Long), - )), - Arc::new(NestedField::optional( - 2, - "v_int", - Type::Primitive(PrimitiveType::Int), - )), - Arc::new(NestedField::optional( - 3, - "v_long", - Type::Primitive(PrimitiveType::Long), - )), - Arc::new(NestedField::optional( - 4, - "v_float", - Type::Primitive(PrimitiveType::Float), - )), - Arc::new(NestedField::optional( - 5, - "v_double", - Type::Primitive(PrimitiveType::Double), - )), - Arc::new(NestedField::optional( - 6, - "v_varchar", - Type::Primitive(PrimitiveType::String), - )), - Arc::new(NestedField::optional( - 7, - "v_bool", - Type::Primitive(PrimitiveType::Boolean), - )), - Arc::new(NestedField::optional( - 8, - "v_date", - Type::Primitive(PrimitiveType::Date), - )), - Arc::new(NestedField::optional( - 9, - "v_timestamp", - Type::Primitive(PrimitiveType::Timestamptz), - )), - Arc::new(NestedField::optional( - 10, - "v_decimal", - Type::Primitive(PrimitiveType::Decimal { - precision: 36, - scale: 10, - }), - )), - Arc::new(NestedField::optional( - 11, - "v_ts_ntz", - Type::Primitive(PrimitiveType::Timestamp), - )), - ]; - Schema::builder().with_fields(fields).build().unwrap() - }); - assert!(manifest.metadata.partition_spec.fields.is_empty()); - assert!(manifest.metadata.content == ManifestContentType::Data); - assert!(manifest.metadata.format_version == FormatVersion::V2); - // test entries - assert!(manifest.entries.len() == 1); - let entry = &manifest.entries[0]; - assert!(entry.status == ManifestStatus::Added); - assert!(entry.snapshot_id == Some(0)); - assert!(entry.sequence_number == Some(1)); - assert!(entry.file_sequence_number == Some(1)); - assert_eq!( - entry.data_file, - DataFile { - content: DataContentType::Data, - file_path: "s3a://icebergdata/demo/s1/t1/data/00000-0-ba56fbfa-f2ff-40c9-bb27-565ad6dc2be8-00000.parquet".to_string(), - file_format: DataFileFormat::Parquet, - partition: Struct::empty(), - record_count: 1, - file_size_in_bytes: 5442, - column_sizes: HashMap::from([(0,73),(6,34),(2,73),(7,61),(3,61),(5,62),(9,79),(10,73),(1,61),(4,73),(8,73)]), - value_counts: HashMap::from([(4,1),(5,1),(2,1),(0,1),(3,1),(6,1),(8,1),(1,1),(10,1),(7,1),(9,1)]), - null_value_counts: HashMap::from([(1,0),(6,0),(2,0),(8,0),(0,0),(3,0),(5,0),(9,0),(7,0),(4,0),(10,0)]), - nan_value_counts: HashMap::new(), - lower_bounds: HashMap::new(), - upper_bounds: HashMap::new(), - key_metadata: Vec::new(), - split_offsets: vec![4], - equality_ids: Vec::new(), - sort_order_id: None, - } - ); + #[tokio::test] + async fn test_parse_manifest_v2_unpartition() { + let manifest = Manifest { + metadata: ManifestMetadata { + schema_id: 0, + schema: Schema::builder() + .with_fields(vec![ + // id v_int v_long v_float v_double v_varchar v_bool v_date v_timestamp v_decimal v_ts_ntz + Arc::new(NestedField::optional( + 1, + "id", + Type::Primitive(PrimitiveType::Long), + )), + Arc::new(NestedField::optional( + 2, + "v_int", + Type::Primitive(PrimitiveType::Int), + )), + Arc::new(NestedField::optional( + 3, + "v_long", + Type::Primitive(PrimitiveType::Long), + )), + Arc::new(NestedField::optional( + 4, + "v_float", + Type::Primitive(PrimitiveType::Float), + )), + Arc::new(NestedField::optional( + 5, + "v_double", + Type::Primitive(PrimitiveType::Double), + )), + Arc::new(NestedField::optional( + 6, + "v_varchar", + Type::Primitive(PrimitiveType::String), + )), + Arc::new(NestedField::optional( + 7, + "v_bool", + Type::Primitive(PrimitiveType::Boolean), + )), + Arc::new(NestedField::optional( + 8, + "v_date", + Type::Primitive(PrimitiveType::Date), + )), + Arc::new(NestedField::optional( + 9, + "v_timestamp", + Type::Primitive(PrimitiveType::Timestamptz), + )), + Arc::new(NestedField::optional( + 10, + "v_decimal", + Type::Primitive(PrimitiveType::Decimal { + precision: 36, + scale: 10, + }), + )), + Arc::new(NestedField::optional( + 11, + "v_ts_ntz", + Type::Primitive(PrimitiveType::Timestamp), + )), + ]) + .build() + .unwrap(), + partition_spec: PartitionSpec { + spec_id: 0, + fields: vec![], + }, + content: ManifestContentType::Data, + format_version: FormatVersion::V2, + }, + entries: vec![ + ManifestEntry { + status: ManifestStatus::Added, + snapshot_id: None, + sequence_number: None, + file_sequence_number: None, + data_file: DataFile { + content: DataContentType::Data, + file_path: "s3a://icebergdata/demo/s1/t1/data/00000-0-ba56fbfa-f2ff-40c9-bb27-565ad6dc2be8-00000.parquet".to_string(), + file_format: DataFileFormat::Parquet, + partition: Struct::empty(), + record_count: 1, + file_size_in_bytes: 5442, + column_sizes: HashMap::from([(0,73),(6,34),(2,73),(7,61),(3,61),(5,62),(9,79),(10,73),(1,61),(4,73),(8,73)]), + value_counts: HashMap::from([(4,1),(5,1),(2,1),(0,1),(3,1),(6,1),(8,1),(1,1),(10,1),(7,1),(9,1)]), + null_value_counts: HashMap::from([(1,0),(6,0),(2,0),(8,0),(0,0),(3,0),(5,0),(9,0),(7,0),(4,0),(10,0)]), + nan_value_counts: HashMap::new(), + lower_bounds: HashMap::new(), + upper_bounds: HashMap::new(), + key_metadata: Vec::new(), + split_offsets: vec![4], + equality_ids: Vec::new(), + sort_order_id: None, + } + } + ] + }; + + let writer = |output_file: OutputFile| ManifestWriter::new(output_file, 1, vec![]); + + test_manifest_read_write(manifest, writer).await; } - #[test] - fn test_parse_manifest_v2_partition() { - let path = format!( - "{}/testdata/partition_manifest_v2.avro", - env!("CARGO_MANIFEST_DIR") - ); - let bs = fs::read(path).expect("read_file must succeed"); - let manifest = Manifest::parse_avro(bs.as_slice()).unwrap(); - assert_eq!(manifest.metadata.schema_id, 0); - assert_eq!(manifest.metadata.schema, { - let fields = vec![ - Arc::new(NestedField::optional( - 1, - "id", - Type::Primitive(PrimitiveType::Long), - )), - Arc::new(NestedField::optional( - 2, - "v_int", - Type::Primitive(PrimitiveType::Int), - )), - Arc::new(NestedField::optional( - 3, - "v_long", - Type::Primitive(PrimitiveType::Long), - )), - Arc::new(NestedField::optional( - 4, - "v_float", - Type::Primitive(PrimitiveType::Float), - )), - Arc::new(NestedField::optional( - 5, - "v_double", - Type::Primitive(PrimitiveType::Double), - )), - Arc::new(NestedField::optional( - 6, - "v_varchar", - Type::Primitive(PrimitiveType::String), - )), - Arc::new(NestedField::optional( - 7, - "v_bool", - Type::Primitive(PrimitiveType::Boolean), - )), - Arc::new(NestedField::optional( - 8, - "v_date", - Type::Primitive(PrimitiveType::Date), - )), - Arc::new(NestedField::optional( - 9, - "v_timestamp", - Type::Primitive(PrimitiveType::Timestamptz), - )), - Arc::new(NestedField::optional( - 10, - "v_decimal", - Type::Primitive(PrimitiveType::Decimal { - precision: 36, - scale: 10, - }), - )), - Arc::new(NestedField::optional( - 11, - "v_ts_ntz", - Type::Primitive(PrimitiveType::Timestamp), - )), - ]; - Schema::builder().with_fields(fields).build().unwrap() - }); - assert_eq!(manifest.metadata.partition_spec, { - let fields = vec![ - PartitionField { - name: "v_int".to_string(), - transform: Transform::Identity, - source_id: 2, - field_id: 1000, + #[tokio::test] + async fn test_parse_manifest_v2_partition() { + let manifest = Manifest { + metadata: ManifestMetadata { + schema_id: 0, + schema: Schema::builder() + .with_fields(vec![ + Arc::new(NestedField::optional( + 1, + "id", + Type::Primitive(PrimitiveType::Long), + )), + Arc::new(NestedField::optional( + 2, + "v_int", + Type::Primitive(PrimitiveType::Int), + )), + Arc::new(NestedField::optional( + 3, + "v_long", + Type::Primitive(PrimitiveType::Long), + )), + Arc::new(NestedField::optional( + 4, + "v_float", + Type::Primitive(PrimitiveType::Float), + )), + Arc::new(NestedField::optional( + 5, + "v_double", + Type::Primitive(PrimitiveType::Double), + )), + Arc::new(NestedField::optional( + 6, + "v_varchar", + Type::Primitive(PrimitiveType::String), + )), + Arc::new(NestedField::optional( + 7, + "v_bool", + Type::Primitive(PrimitiveType::Boolean), + )), + Arc::new(NestedField::optional( + 8, + "v_date", + Type::Primitive(PrimitiveType::Date), + )), + Arc::new(NestedField::optional( + 9, + "v_timestamp", + Type::Primitive(PrimitiveType::Timestamptz), + )), + Arc::new(NestedField::optional( + 10, + "v_decimal", + Type::Primitive(PrimitiveType::Decimal { + precision: 36, + scale: 10, + }), + )), + Arc::new(NestedField::optional( + 11, + "v_ts_ntz", + Type::Primitive(PrimitiveType::Timestamp), + )), + ]) + .build() + .unwrap(), + partition_spec: PartitionSpec { + spec_id: 0, + fields: vec![ + PartitionField { + name: "v_int".to_string(), + transform: Transform::Identity, + source_id: 2, + field_id: 1000, + }, + PartitionField { + name: "v_long".to_string(), + transform: Transform::Identity, + source_id: 3, + field_id: 1001, + }, + ], }, - PartitionField { - name: "v_long".to_string(), - transform: Transform::Identity, - source_id: 3, - field_id: 1001, + content: ManifestContentType::Data, + format_version: FormatVersion::V2, + }, + entries: vec![ManifestEntry { + status: ManifestStatus::Added, + snapshot_id: None, + sequence_number: None, + file_sequence_number: None, + data_file: DataFile { + content: DataContentType::Data, + file_format: DataFileFormat::Parquet, + file_path: "s3a://icebergdata/demo/s1/t1/data/00000-0-378b56f5-5c52-4102-a2c2-f05f8a7cbe4a-00000.parquet".to_string(), + partition: Struct::from_iter( + vec![ + (1000, Some(Literal::int(1)), "v_int".to_string()), + (1001, Some(Literal::long(1000)), "v_long".to_string()) + ] + .into_iter() + ), + record_count: 1, + file_size_in_bytes: 5442, + column_sizes: HashMap::from([ + (0, 73), + (6, 34), + (2, 73), + (7, 61), + (3, 61), + (5, 62), + (9, 79), + (10, 73), + (1, 61), + (4, 73), + (8, 73) + ]), + value_counts: HashMap::from([ + (4, 1), + (5, 1), + (2, 1), + (0, 1), + (3, 1), + (6, 1), + (8, 1), + (1, 1), + (10, 1), + (7, 1), + (9, 1) + ]), + null_value_counts: HashMap::from([ + (1, 0), + (6, 0), + (2, 0), + (8, 0), + (0, 0), + (3, 0), + (5, 0), + (9, 0), + (7, 0), + (4, 0), + (10, 0) + ]), + nan_value_counts: HashMap::new(), + lower_bounds: HashMap::new(), + upper_bounds: HashMap::new(), + key_metadata: vec![], + split_offsets: vec![4], + equality_ids: vec![], + sort_order_id: None, }, - ]; - PartitionSpec { spec_id: 0, fields } - }); - assert!(manifest.metadata.content == ManifestContentType::Data); - assert!(manifest.metadata.format_version == FormatVersion::V2); - assert_eq!(manifest.entries.len(), 1); - let entry = &manifest.entries[0]; - assert_eq!(entry.status, ManifestStatus::Added); - assert_eq!(entry.snapshot_id, Some(0)); - assert_eq!(entry.sequence_number, Some(1)); - assert_eq!(entry.file_sequence_number, Some(1)); - assert_eq!(entry.data_file.content, DataContentType::Data); - assert_eq!( - entry.data_file.file_path, - "s3a://icebergdata/demo/s1/t1/data/00000-0-378b56f5-5c52-4102-a2c2-f05f8a7cbe4a-00000.parquet" - ); - assert_eq!(entry.data_file.file_format, DataFileFormat::Parquet); - assert_eq!( - entry.data_file.partition, - Struct::from_iter( - vec![ - (1000, Some(Literal::int(1)), "v_int".to_string()), - (1001, Some(Literal::long(1000)), "v_long".to_string()) - ] - .into_iter() - ) - ); - assert_eq!(entry.data_file.record_count, 1); - assert_eq!(entry.data_file.file_size_in_bytes, 5442); - assert_eq!( - entry.data_file.column_sizes, - HashMap::from([ - (0, 73), - (6, 34), - (2, 73), - (7, 61), - (3, 61), - (5, 62), - (9, 79), - (10, 73), - (1, 61), - (4, 73), - (8, 73) - ]) - ); - assert_eq!( - entry.data_file.value_counts, - HashMap::from([ - (4, 1), - (5, 1), - (2, 1), - (0, 1), - (3, 1), - (6, 1), - (8, 1), - (1, 1), - (10, 1), - (7, 1), - (9, 1) - ]) - ); - assert_eq!( - entry.data_file.null_value_counts, - HashMap::from([ - (1, 0), - (6, 0), - (2, 0), - (8, 0), - (0, 0), - (3, 0), - (5, 0), - (9, 0), - (7, 0), - (4, 0), - (10, 0) - ]) - ); - assert!(entry.data_file.nan_value_counts.is_empty()); - assert!(entry.data_file.lower_bounds.is_empty()); - assert!(entry.data_file.upper_bounds.is_empty()); - assert!(entry.data_file.key_metadata.is_empty()); - assert_eq!(entry.data_file.split_offsets, vec![4]); - assert!(entry.data_file.equality_ids.is_empty()); - assert_eq!(entry.data_file.sort_order_id, None); - } + }], + }; - #[test] - fn test_parse_manifest_v1_unpartition() { - let path = format!( - "{}/testdata/unpartition_manifest_v1.avro", - env!("CARGO_MANIFEST_DIR") - ); - let bs = fs::read(path).expect("read_file must succeed"); - let manifest = Manifest::parse_avro(bs.as_slice()).unwrap(); - // test metadata - assert!(manifest.metadata.schema_id == 0); - assert_eq!(manifest.metadata.schema, { - let fields = vec![ - Arc::new(NestedField::optional( - 1, - "id", - Type::Primitive(PrimitiveType::Int), - )), - Arc::new(NestedField::optional( - 2, - "data", - Type::Primitive(PrimitiveType::String), - )), - Arc::new(NestedField::optional( - 3, - "comment", - Type::Primitive(PrimitiveType::String), - )), - ]; - Schema::builder() - .with_schema_id(1) - .with_fields(fields) - .build() - .unwrap() - }); - assert!(manifest.metadata.partition_spec.fields.is_empty()); - assert!(manifest.metadata.content == ManifestContentType::Data); - assert!(manifest.metadata.format_version == FormatVersion::V1); - assert_eq!(manifest.entries.len(), 4); - let entry = &manifest.entries[0]; - assert!(entry.status == ManifestStatus::Added); - assert!(entry.snapshot_id == Some(2966623707104393227)); - assert!(entry.sequence_number.is_none()); - assert!(entry.file_sequence_number.is_none()); - assert_eq!( - entry.data_file, - DataFile { - content: DataContentType::Data, - file_path: "s3://testbucket/iceberg_data/iceberg_ctl/iceberg_db/iceberg_tbl/data/00000-7-45268d71-54eb-476c-b42c-942d880c04a1-00001.parquet".to_string(), - file_format: DataFileFormat::Parquet, - partition: Struct::empty(), - record_count: 1, - file_size_in_bytes: 875, - column_sizes: HashMap::from([(1,47),(2,48),(3,52)]), - value_counts: HashMap::from([(1,1),(2,1),(3,1)]), - null_value_counts: HashMap::from([(1,0),(2,0),(3,0)]), - nan_value_counts: HashMap::new(), - lower_bounds: HashMap::from([(1,Literal::int(1)),(2,Literal::string("a")),(3,Literal::string("AC/DC"))]), - upper_bounds: HashMap::from([(1,Literal::int(1)),(2,Literal::string("a")),(3,Literal::string("AC/DC"))]), - key_metadata: vec![], - split_offsets: vec![4], - equality_ids: vec![], - sort_order_id: Some(0), - } - ); + let writer = |output_file: OutputFile| ManifestWriter::new(output_file, 1, vec![]); + + let res = test_manifest_read_write(manifest, writer).await; + + assert_eq!(res.sequence_number, UNASSIGNED_SEQUENCE_NUMBER); + assert_eq!(res.min_sequence_number, UNASSIGNED_SEQUENCE_NUMBER); } - #[test] - fn test_parse_manifest_v1_partition() { - let path = format!( - "{}/testdata/partition_manifest_v1.avro", - env!("CARGO_MANIFEST_DIR") - ); - let bs = fs::read(path).expect("read_file must succeed"); - let manifest = Manifest::parse_avro(bs.as_slice()).unwrap(); - // test metadata - assert!(manifest.metadata.schema_id == 0); - assert_eq!(manifest.metadata.schema, { - let fields = vec![ - Arc::new(NestedField::optional( - 1, - "id", - Type::Primitive(PrimitiveType::Long), - )), - Arc::new(NestedField::optional( - 2, - "data", - Type::Primitive(PrimitiveType::String), - )), - Arc::new(NestedField::optional( - 3, - "category", - Type::Primitive(PrimitiveType::String), - )), - ]; - Schema::builder().with_fields(fields).build().unwrap() - }); - assert_eq!(manifest.metadata.partition_spec, { - let fields = vec![PartitionField { - name: "category".to_string(), - transform: Transform::Identity, - source_id: 3, - field_id: 1000, - }]; - PartitionSpec { spec_id: 0, fields } - }); - assert!(manifest.metadata.content == ManifestContentType::Data); - assert!(manifest.metadata.format_version == FormatVersion::V1); - - // test entries - assert!(manifest.entries.len() == 1); - let entry = &manifest.entries[0]; - assert!(entry.status == ManifestStatus::Added); - assert!(entry.snapshot_id == Some(8205833995881562618)); - assert!(entry.sequence_number.is_none()); - assert!(entry.file_sequence_number.is_none()); - assert_eq!(entry.data_file.content, DataContentType::Data); - assert_eq!( - entry.data_file.file_path, - "s3://testbucket/prod/db/sample/data/category=x/00010-1-d5c93668-1e52-41ac-92a6-bba590cbf249-00001.parquet" - ); - assert_eq!(entry.data_file.file_format, DataFileFormat::Parquet); - assert_eq!( - entry.data_file.partition, - Struct::from_iter( - vec![( - 1000, - Some( - Literal::try_from_bytes(&[120], &Type::Primitive(PrimitiveType::String)) - .unwrap() - ), - "category".to_string() - )] - .into_iter() - ) - ); - assert_eq!(entry.data_file.record_count, 1); - assert_eq!(entry.data_file.file_size_in_bytes, 874); - assert_eq!( - entry.data_file.column_sizes, - HashMap::from([(1, 46), (2, 48), (3, 48)]) - ); - assert_eq!( - entry.data_file.value_counts, - HashMap::from([(1, 1), (2, 1), (3, 1)]) - ); - assert_eq!( - entry.data_file.null_value_counts, - HashMap::from([(1, 0), (2, 0), (3, 0)]) - ); - assert_eq!(entry.data_file.nan_value_counts, HashMap::new()); - assert_eq!( - entry.data_file.lower_bounds, - HashMap::from([ - (1, Literal::long(1)), - (2, Literal::string("a")), - (3, Literal::string("x")) - ]) - ); - assert_eq!( - entry.data_file.upper_bounds, - HashMap::from([ - (1, Literal::long(1)), - (2, Literal::string("a")), - (3, Literal::string("x")) - ]) - ); - assert!(entry.data_file.key_metadata.is_empty()); - assert_eq!(entry.data_file.split_offsets, vec![4]); - assert!(entry.data_file.equality_ids.is_empty()); - assert_eq!(entry.data_file.sort_order_id, Some(0)); + #[tokio::test] + async fn test_parse_manifest_v1_unpartition() { + let manifest = Manifest { + metadata: ManifestMetadata { + schema_id: 1, + schema: Schema::builder() + .with_schema_id(1) + .with_fields(vec![ + Arc::new(NestedField::optional( + 1, + "id", + Type::Primitive(PrimitiveType::Int), + )), + Arc::new(NestedField::optional( + 2, + "data", + Type::Primitive(PrimitiveType::String), + )), + Arc::new(NestedField::optional( + 3, + "comment", + Type::Primitive(PrimitiveType::String), + )), + ]) + .build() + .unwrap(), + partition_spec: PartitionSpec { + spec_id: 0, + fields: vec![], + }, + content: ManifestContentType::Data, + format_version: FormatVersion::V1, + }, + entries: vec![ManifestEntry { + status: ManifestStatus::Added, + snapshot_id: Some(0), + sequence_number: None, + file_sequence_number: None, + data_file: DataFile { + content: DataContentType::Data, + file_path: "s3://testbucket/iceberg_data/iceberg_ctl/iceberg_db/iceberg_tbl/data/00000-7-45268d71-54eb-476c-b42c-942d880c04a1-00001.parquet".to_string(), + file_format: DataFileFormat::Parquet, + partition: Struct::empty(), + record_count: 1, + file_size_in_bytes: 875, + column_sizes: HashMap::from([(1,47),(2,48),(3,52)]), + value_counts: HashMap::from([(1,1),(2,1),(3,1)]), + null_value_counts: HashMap::from([(1,0),(2,0),(3,0)]), + nan_value_counts: HashMap::new(), + lower_bounds: HashMap::from([(1,Literal::int(1)),(2,Literal::string("a")),(3,Literal::string("AC/DC"))]), + upper_bounds: HashMap::from([(1,Literal::int(1)),(2,Literal::string("a")),(3,Literal::string("AC/DC"))]), + key_metadata: vec![], + split_offsets: vec![4], + equality_ids: vec![], + sort_order_id: Some(0), + } + }], + }; + + let writer = + |output_file: OutputFile| ManifestWriter::new(output_file, 2966623707104393227, vec![]); + + test_manifest_read_write(manifest, writer).await; } #[tokio::test] - async fn test_writer_manifest_v1_partition() { - // Read manifest - let path = format!( - "{}/testdata/partition_manifest_v1.avro", - env!("CARGO_MANIFEST_DIR") - ); - let bs = fs::read(path).expect("read_file must succeed"); - let manifest = Manifest::parse_avro(bs.as_slice()).unwrap(); + async fn test_parse_manifest_v1_partition() { + let manifest = Manifest { + metadata: ManifestMetadata { + schema_id: 0, + schema: Schema::builder() + .with_fields(vec![ + Arc::new(NestedField::optional( + 1, + "id", + Type::Primitive(PrimitiveType::Long), + )), + Arc::new(NestedField::optional( + 2, + "data", + Type::Primitive(PrimitiveType::String), + )), + Arc::new(NestedField::optional( + 3, + "category", + Type::Primitive(PrimitiveType::String), + )), + ]) + .build() + .unwrap(), + partition_spec: PartitionSpec { + spec_id: 0, + fields: vec![PartitionField { + name: "category".to_string(), + transform: Transform::Identity, + source_id: 3, + field_id: 1000, + }], + }, + content: ManifestContentType::Data, + format_version: FormatVersion::V1, + }, + entries: vec![ + ManifestEntry { + status: ManifestStatus::Added, + snapshot_id: Some(0), + sequence_number: None, + file_sequence_number: None, + data_file: DataFile { + content: DataContentType::Data, + file_path: "s3://testbucket/prod/db/sample/data/category=x/00010-1-d5c93668-1e52-41ac-92a6-bba590cbf249-00001.parquet".to_string(), + file_format: DataFileFormat::Parquet, + partition: Struct::from_iter( + vec![( + 1000, + Some( + Literal::try_from_bytes(&[120], &Type::Primitive(PrimitiveType::String)) + .unwrap() + ), + "category".to_string() + )] + .into_iter() + ), + record_count: 1, + file_size_in_bytes: 874, + column_sizes: HashMap::from([(1, 46), (2, 48), (3, 48)]), + value_counts: HashMap::from([(1, 1), (2, 1), (3, 1)]), + null_value_counts: HashMap::from([(1, 0), (2, 0), (3, 0)]), + nan_value_counts: HashMap::new(), + lower_bounds: HashMap::from([ + (1, Literal::long(1)), + (2, Literal::string("a")), + (3, Literal::string("x")) + ]), + upper_bounds: HashMap::from([ + (1, Literal::long(1)), + (2, Literal::string("a")), + (3, Literal::string("x")) + ]), + key_metadata: vec![], + split_offsets: vec![4], + equality_ids: vec![], + sort_order_id: Some(0), + }, + } + ] + }; - // Write manifest - let temp_dir = TempDir::new().unwrap(); - let path = temp_dir.path().join("manifest_list_v1.avro"); - let io = FileIOBuilder::new_fs_io().build().unwrap(); - let output_file = io.new_output(path.to_str().unwrap()).unwrap(); - let writer = ManifestWriter::new(output_file, 1, vec![]); - let entry = writer.write(manifest.clone()).await.unwrap(); + let writer = |output_file: OutputFile| ManifestWriter::new(output_file, 1, vec![]); + + let entry = test_manifest_read_write(manifest, writer).await; - // Check partition summary assert_eq!(entry.partitions.len(), 1); assert_eq!(entry.partitions[0].lower_bound, Some(Literal::string("x"))); assert_eq!(entry.partitions[0].upper_bound, Some(Literal::string("x"))); - - // Verify manifest - let bs = fs::read(path).expect("read_file must succeed"); - let actual_manifest = Manifest::parse_avro(bs.as_slice()).unwrap(); - - assert_eq!(actual_manifest, manifest); } - #[tokio::test] - async fn test_writer_manifest_v2_partition() { - // Read manifest - let path = format!( - "{}/testdata/partition_manifest_v2.avro", - env!("CARGO_MANIFEST_DIR") - ); - let bs = fs::read(path).expect("read_file must succeed"); - let manifest = Manifest::parse_avro(bs.as_slice()).unwrap(); - - // Write manifest + async fn test_manifest_read_write( + manifest: Manifest, + writer_builder: impl FnOnce(OutputFile) -> ManifestWriter, + ) -> ManifestListEntry { let temp_dir = TempDir::new().unwrap(); - let path = temp_dir.path().join("manifest_list_v2.avro"); + let path = temp_dir.path().join("test_manifest.avro"); let io = FileIOBuilder::new_fs_io().build().unwrap(); let output_file = io.new_output(path.to_str().unwrap()).unwrap(); - let writer = ManifestWriter::new(output_file, 1, vec![]); + let writer = writer_builder(output_file); let res = writer.write(manifest.clone()).await.unwrap(); - assert_eq!(res.sequence_number, UNASSIGNED_SEQUENCE_NUMBER); - assert_eq!(res.min_sequence_number, 1); // Verify manifest let bs = fs::read(path).expect("read_file must succeed"); let actual_manifest = Manifest::parse_avro(bs.as_slice()).unwrap(); assert_eq!(actual_manifest, manifest); + res } } diff --git a/crates/iceberg/src/spec/manifest_list.rs b/crates/iceberg/src/spec/manifest_list.rs index db0c30c2c..a1ce291ea 100644 --- a/crates/iceberg/src/spec/manifest_list.rs +++ b/crates/iceberg/src/spec/manifest_list.rs @@ -1047,55 +1047,104 @@ mod test { use super::_serde::ManifestListV2; - #[test] - fn test_parse_manifest_list_v1() { - let path = format!( - "{}/testdata/simple_manifest_list_v1.avro", - env!("CARGO_MANIFEST_DIR") + #[tokio::test] + async fn test_parse_manifest_list_v1() { + let manifest_list = ManifestList { + entries: vec![ + ManifestListEntry { + manifest_path: "/opt/bitnami/spark/warehouse/db/table/metadata/10d28031-9739-484c-92db-cdf2975cead4-m0.avro".to_string(), + manifest_length: 5806, + partition_spec_id: 0, + content: ManifestContentType::Data, + sequence_number: 0, + min_sequence_number: 0, + added_snapshot_id: 1646658105718557341, + added_data_files_count: Some(3), + existing_data_files_count: Some(0), + deleted_data_files_count: Some(0), + added_rows_count: Some(3), + existing_rows_count: Some(0), + deleted_rows_count: Some(0), + partitions: vec![], + key_metadata: vec![], + } + ] + }; + + let file_io = FileIOBuilder::new_fs_io().build().unwrap(); + + let tmp_dir = TempDir::new().unwrap(); + let file_name = "simple_manifest_list_v1.avro"; + let full_path = format!("{}/{}", tmp_dir.path().to_str().unwrap(), file_name); + + let mut writer = ManifestListWriter::v1( + file_io.new_output(full_path.clone()).unwrap(), + 1646658105718557341, + 1646658105718557341, ); - let bs = fs::read(path).expect("read_file must succeed"); + writer + .add_manifest_entries(manifest_list.entries.clone().into_iter()) + .unwrap(); + writer.close().await.unwrap(); - let manifest_list = ManifestList::parse_with_version( + let bs = fs::read(full_path).expect("read_file must succeed"); + + let parsed_manifest_list = ManifestList::parse_with_version( &bs, crate::spec::FormatVersion::V1, &StructType::new(vec![]), ) .unwrap(); - assert_eq!(1, manifest_list.entries.len()); - assert_eq!( - manifest_list.entries[0], - ManifestListEntry { - manifest_path: "/opt/bitnami/spark/warehouse/db/table/metadata/10d28031-9739-484c-92db-cdf2975cead4-m0.avro".to_string(), - manifest_length: 5806, - partition_spec_id: 0, - content: ManifestContentType::Data, - sequence_number: 0, - min_sequence_number: 0, - added_snapshot_id: 1646658105718557341, - added_data_files_count: Some(3), - existing_data_files_count: Some(0), - deleted_data_files_count: Some(0), - added_rows_count: Some(3), - existing_rows_count: Some(0), - deleted_rows_count: Some(0), - partitions: vec![], - key_metadata: vec![], - } - ); + assert_eq!(manifest_list, parsed_manifest_list); } - #[test] - fn test_parse_manifest_list_v2() { - let path = format!( - "{}/testdata/simple_manifest_list_v2.avro", - env!("CARGO_MANIFEST_DIR") + #[tokio::test] + async fn test_parse_manifest_list_v2() { + let manifest_list = ManifestList { + entries: vec![ + ManifestListEntry { + manifest_path: "s3a://icebergdata/demo/s1/t1/metadata/05ffe08b-810f-49b3-a8f4-e88fc99b254a-m0.avro".to_string(), + manifest_length: 6926, + partition_spec_id: 1, + content: ManifestContentType::Data, + sequence_number: 1, + min_sequence_number: 1, + added_snapshot_id: 377075049360453639, + added_data_files_count: Some(1), + existing_data_files_count: Some(0), + deleted_data_files_count: Some(0), + added_rows_count: Some(3), + existing_rows_count: Some(0), + deleted_rows_count: Some(0), + partitions: vec![FieldSummary { contains_null: false, contains_nan: Some(false), lower_bound: Some(Literal::long(1)), upper_bound: Some(Literal::long(1))}], + key_metadata: vec![], + } + ] + }; + + let file_io = FileIOBuilder::new_fs_io().build().unwrap(); + + let tmp_dir = TempDir::new().unwrap(); + let file_name = "simple_manifest_list_v1.avro"; + let full_path = format!("{}/{}", tmp_dir.path().to_str().unwrap(), file_name); + + let mut writer = ManifestListWriter::v2( + file_io.new_output(full_path.clone()).unwrap(), + 1646658105718557341, + 1646658105718557341, + 1, ); - let bs = fs::read(path).expect("read_file must succeed"); + writer + .add_manifest_entries(manifest_list.entries.clone().into_iter()) + .unwrap(); + writer.close().await.unwrap(); - let manifest_list = ManifestList::parse_with_version( + let bs = fs::read(full_path).expect("read_file must succeed"); + + let parsed_manifest_list = ManifestList::parse_with_version( &bs, crate::spec::FormatVersion::V2, &StructType::new(vec![Arc::new(NestedField::required( @@ -1106,27 +1155,7 @@ mod test { ) .unwrap(); - assert_eq!(1, manifest_list.entries.len()); - assert_eq!( - manifest_list.entries[0], - ManifestListEntry { - manifest_path: "s3a://icebergdata/demo/s1/t1/metadata/05ffe08b-810f-49b3-a8f4-e88fc99b254a-m0.avro".to_string(), - manifest_length: 6926, - partition_spec_id: 1, - content: ManifestContentType::Data, - sequence_number: 1, - min_sequence_number: 1, - added_snapshot_id: 377075049360453639, - added_data_files_count: Some(1), - existing_data_files_count: Some(0), - deleted_data_files_count: Some(0), - added_rows_count: Some(3), - existing_rows_count: Some(0), - deleted_rows_count: Some(0), - partitions: vec![FieldSummary { contains_null: false, contains_nan: Some(false), lower_bound: Some(Literal::long(1)), upper_bound: Some(Literal::long(1))}], - key_metadata: vec![], - } - ); + assert_eq!(manifest_list, parsed_manifest_list); } #[test] diff --git a/crates/iceberg/testdata/partition_manifest_v1.avro b/crates/iceberg/testdata/partition_manifest_v1.avro deleted file mode 100644 index d1ada6188830738b625928ec1567eaef01534774..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6067 zcmb_gO=uid9B&fvqVyou2CX;@^ibI7&P>{T;L#QewU#zwXbh7#vv0C5J3F(TkCa%V z7F4JpmYBnmMq^5qDiv=E(t|BM^xTUc1;MY2cR}#~-psuBb}}>BYy(RmGw=WP`+vOO ze_pqih7LXz+D+U>w??k0(Mkua(`pz6u^p*3HF+~e8tQbtraA<>jZmGQyQ0!3T21v( zo1TaNYHENl#U#M+!a;6`Ynd|F_ZHRFS|X!!nFfkbg%r3zBsqi2*(kz`ez4Mq?ELDp zXgTm-N}{;)GLJ97wjWTRQ~%q@BMuHD6MGTUEJd0g2_qE6A&8_95s%`-&L!~B2(z;u z>V!={G8rz3z?|yEt_vazj`^C}zz&MtNS&t7Wlfr-iGpN0#KqijX$*m$`IBTQ^@R$X z9TYXW2AW+8Ex6#@ki0WKKuUa1dE@+P6y*O%hIes`15$YVbQ69fmFGfHrm|QASAS#nl zhp6XQdvVYu9FkHpZ9irOkpjVzoB@0`Zd*7|d`C$RhYInOs0q*{iIpKXB-IS!ki3me z;+fV;gr)N9F)-st$Yt6TdYr1yj?22oCEb?m+by$Nxh#8JlC^y|ZhK5vc$C#Q&kqU$ zM4OaExD6G_pXYn8rD+r9vZhUD#nw~4ElhQ!sOEbPBqg_&ksFIXN&2iOZ`vfwr0Xvd z8@V&dsB5E+x=Jg6^}=hYL-UkGsE~#d`S7FYGGL)e8YFCiN9EmHpUnG|6vzHRH3d-V z^+BMT%DW|SO^Zp&x1JRJU{JAMs6iFlZrp%*pwJ1YAg`gEMQ5wAlbxheCQIr*0;p>g zz~VtsU2D2Lmv*f+T_32{7*b3*)L=4E$}3Hi=dp*peFw{!1h9;$0jkZ|(F9f1W=stT zRyJ3mt`vqEAFW9TR~DNg4-ePBj04jG%z@x!1LrYjntp=T*~9?Q=_kq?wP33269Zw# z;#F9@l_2Tz;Ey|<{RN3mGh>ixrptpO*Ng=q8H0^87H>Zw$<+>1#BAdTrTjKWITGji zeRI;YgOv_rUm&Msatd6}R%Kocc^ZWsmqe!TI3bSuuios2#4c_F?`14jGYoMVUZ83t zWC4j2y35CrA=9GJ55S|K0rgTug6U1pLAU^Cfeyl1<@BqSQIF@j7@e|*7{|B$2DT4^ zcP{J)M-tLGwv2-i$m6i4aw0hvcCZbx4%d$m8v`#}OxJ_ok8+CA-}(a!QO5VP-9W5B zut13oh(=4Bx_4;`d$t52+V}R9Ee1kVwz@MKcE3yOTU|uJsAjq)6sz0I*1%(}ixrVs z;QK;|MVu#wye(;1%0TVaA?4olshNWX|-ipRk83VJFG^cMptqNKAR1|2GIe`2U+ z<2)!$lzI{I-eY7sG=5oR(4&tV*$$e_!!V#M(y3g)i1!$U0fkf&g#oF1{3v$iuRJbY zbYG6D?lC6)L}^SqV3);Y5f8C1VeG+}FtTgs$jnl>FWNpwB8BF@T1GE#8AEEB!QGcR zA9vpn_ZZZJOz%qESGfbph^gdx!i>1S@|3Q`4xf%L3F&vV9GBw{V8oa8(k~ zuR@_82R3FiAFf&H>jnE}ci`}EfBZ9Se*g22pB#$sj=pei_|IY6`uEX0&AaQ z!oouHi`MNM)`m-5$JuUmmzHj|tZw&Ci>x=ly0O0ATq4&u*PVv-b?dEVH) X+L>$anP+}^f3(}3eD9;7d>H=&fS%yT diff --git a/crates/iceberg/testdata/partition_manifest_v2.avro b/crates/iceberg/testdata/partition_manifest_v2.avro deleted file mode 100644 index 9d10a8711e98d44d1cc9249b8c01396c004f8882..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3350 zcmcgvzmMZY6n2KyDq5|=YPH(x3YZF{_+mSoO*S>uR8*(cXkw3V&Dk?%W*n3pDJA6Tw}T zv+xJ>>?KyBkRfxE2v37PjuVzf_QCf&ua7y+7@p!eNw~&+tR>|t7BYi(NyIofTj2Zt zD_(|MUi^laRPg7mZf>6#&s4i}CKYf*!6ltb@l=I(ETezC{}Ss(ZrDf`3K@Z)HM@-R zBxXtzAS4&?-bSJD6vA7ZC{1;t%+{p~6OQ3sEpwWyJD~}rp|W{~d6A~@Xi4Apal~R; zq#94n`zDxuUN9aq!i#LiXG?qq30u%<4|8joR7z-fq5iSK6A^^)jsYlxt3^E_@ZXyUe%ujo8Y@ zBcUjoS+c-}Rawqt$>7kC*+R2M-)-B5A}umb6j*Ppsz~aLP${YD>qN5*+PZ$feGp#w z{+P^te*(6zm-huTK6W2~IZcaA!Bsjr6DD3ns}7PEmA_&Iztf=EskvULMu|>G~qi56j(j{qWIU z{~+$rg(PE|nn_XL1l47OOBZ_%yvox=6A{OX>4(>?x=Y&UCk#Fco)<8D-@vV%P5ami z31npi)!H@GcMb`2;oakUG+B`ZVhDM?^=Ryx)LmFr5{ih0U2~J1mICG96Ochxs#yI5 z;;C^38+o-6LY9z(xD&}>A*QKf_03$52!jDhwk5&ZVAZTPD3D+^%DU@=W_rMGR}Ea8 z3TmW48?anAW6$h3Y9J#~%&ZD&gRTuK>~#Y>r!u^wMnkoM8;ZkhJZ^wzLKwZ)hBV8J zaw}F0bkcxAUCsDwgWBba?@a@zlMI9>&GJ3q+D0$yz5S-PhDJ&_E=| diff --git a/crates/iceberg/testdata/simple_manifest_list_v1.avro b/crates/iceberg/testdata/simple_manifest_list_v1.avro deleted file mode 100644 index bcdd126de418800e16f19bf36146462aaf68079d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3759 zcmbW3J!~9B6vvZP2t`yVMMyDPAyVL=+xYH$c88Q8MM5IN2AKqFGq*eE8$362%+A^z z(FxH|fP}(wQjBfIC<>Befzr^@MQJDzqM)OK7CI_s_G4!D_SWpVyN{XodvE6b-+$if zesK2u1-Q-GYoXsq1NiStcO5YrB4@?n$Y(ro790u(>iYmv+(V)8dYB;ja}OgDgwD$C zyQLLFDEe|m7AO=P(@pt3V7`p=Ci9`djDmOGz5#Zy=z}*_K|r0y z=%yW=Fe~;N6OaIvAPDit2w)1jBZ0y^-7K(c1s*~!u*$~^hsgINStpLE&{?m_Kf#S_ zAPP|c1Oq!9i|Kr?T47-jpuh_$9EN=+?69{T8WAD`Cz)^L$FiC&RS?mK0wfXSM+EYd zSIM9rjEHbnRE+EnvZ5AC^Z*K&+*auMEJ}(=uSBPyD%2Ugu2Kew3O)k)`V)9fi-$hK zp^%sC&5698?W<31S=0a#B;+a23B8tXtIup(Ox-T3r;4Ykl><44Bl)-NkbxA4{(V|e zyw&K)Wo04xj0%Wp=w+Rf38~8%L6E98NR+h3BiZ6T0O=_0`92H*1Zoq|d4g?7qUlau zwkk5PBy_PRCuiy|se(c}gIV1^ZpTW2upPv`uIy2xKHa0KfHnVSu2+!k0?97WV;n$P zwvxWOnG?YFREdVeQ-G6Q^Ml>-#(2Sz%{P!5%S785>%vx}!2n9>sZtR1`{p=Ltf#SL z9b`InINPaId~xWcl2_|%DBiW_n^t^%m==3oo;}fWt=GrpGOtT(%IkSCu|l5($bUh$ z5Py(^aX|pj)8l#&)P3Vea(qbXjZ6K5Wvg0NmXW3{zO2#4 z!?z80$G zRR&>ZPOj7W3AsdVz{dDxsYvw}$;YO2r03M@@ytJ8{`%mLFV3(1;x9k{{q)_> PPoF(K^Vyw?r8NEnIL8Rv diff --git a/crates/iceberg/testdata/simple_manifest_list_v2.avro b/crates/iceberg/testdata/simple_manifest_list_v2.avro deleted file mode 100644 index 852d773131ef50dbf82abdd19766b2e6db2dcce0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1247 zcmb7^KX21O7{(nB6=Ldu7=Y=7#1hvIO(Z8@f)7wdr}N$GUV_#)NETS^lf25 zjN^`Lg>3;8^BQO6%9Y!;q*^)frGGuQ@Tvqw0jpAGU^cq;l9Kzy_7xMxfMKghVdvW2 z;%oaZrHnX&b1uMQs>>a`@Y;W6|Jm@~9Sj6GPqTN!lE~=u4ojPT4YQB9;n+vJp08aG zE0?>+O7+QIdLDH)eg~8>FbTK z0YXi~w2v6%q!bQKYRj!)3w;g-XMVDqq;C2b4lLTyT^7ZEcTcNohvO+MumnfyOx*E8 zcLPnU2Y>$vR5PIW>Nk9nYa@w^&cIl|8H3>F=FyL*&#HIduiiZV^!w*&co}}^*<&(^ zV@|>EuUWlf%s}ec4q`Wm8;Up?cpJLBooXIwjNv_(X^l-3O`Dj2kfHowW2f^;e4p+k}=2X(r% zuAR1?D0~>5hyOgZCN@VUO@yN%Ue-mA?MzY(I_5D=SPyNP1bKr`V+0vU%0|bIbL<2) z92UW2>Ug$keQYM*qNxQYhOPn!ohAyZhBCHN3G<2+5_Q85ezi7u?!svu(GA{+>hP6C z5W-}g!{m4oK>Mu1NAw2O@MBg?gF0G?$}*V9Dxj+O&_OZ8M`eR9MbF)y7?&r+kv}BK zq7yhJ2934zG3#UHf{gtU-e_<IH+w(GrO_^sXlujvy%Hg3@O+bIB2(L9q#dsbWuNyy{FeH?m z6C*W##v8{#eJnnC)) z#=Pj#X3;Mx{y>`{x_TxqK(%V{GWJHH4I;8TN#-sk13^5FCL# zx-q^Os-&!LlmuR`Hn&rlkI*qRL%4yT<|C+1CPGRIQicj2#4aWlIy)%oO^{LQk@NZz zb26J_bf_u;s#tF(RFx#AMR>KE$yIOGI|8ckEO?*`1vwgoA*Rq;qIe}pRqz$*xpOCp zto*pBqevjjAOhp2+u610dyRE3lLz)1%VtC%W(qrp9yB*f>P}NQ74P@0a;v0 z2HnQpHw5XrjVsBJEPJm)Us0GfXSJFWoVM9G_wta{R#Ec@fOF6!_QDy#PGgHupA~07 z9b1%oP?KzRwm2j1FkTAdO-mB%Ui{HWWOPYF+l2 z_8U@^MI?evVa9RfWmtvW8hikh7xA5X9dT}1q^lZaqy?d0k||dtc^wj};4El3I7_W4 zON+YDtRs0*p)ETd3v)pYioztwPUa0U$5e{>EuyA_;>%)aNB!mN5m5lFS>7B;5J8u1 zv^4#|Tt}iLyRmpJDM4;5@WdABwxB1s7P#{yzqG*3$F_=-_QW*)!h_$<0S~N)U>31a(Ba#`gXb?z#X(Wf+O;W7Gv2EML&s02yzMz4V64v z8iML$#3i{cCP@9{UhSze^<;YKPl6Q2^OUH|hY~8~C4fYk??i$9l0^Qe3q!p494`qM z0b(B4!Vb?!YI|tdbLOgHit#__#*eQ~c#T_MK@4-Nm)d-q-;xlh)j`Y;OSTgXB@PDc zmk3M}U}zY?5t5BbFt9LPqk|#6o_0VnWSo;p=V2maVRTa=gDD#+vipRt1A~LMur_28 zO*>2!95hl26daUp2iu7qG0dvKqRca&*c>yJgvJ`Qg z?I=q+`uQqBjX4is#aP#L`vh?qSJ5P@%}KA2tcXN`M!dyU(b~->2k}A;q!)a0oEI%t zR^#jH$Mmg@bU*IDNN3GBHD&MZI|JF5CLF)KpyHf&*2G9(S>=wh9kbqO-?sbeu6y2T zv5%KdcbVU>^{1y@6Lyfb2@&z*MFSKJ-_(%1HFR}WglL8Yeh9x zK1W$mq)-E|y(_A^zd+UX# zmgH4z`yy}8_PpG)dGhIw+0}vKm7Ogo7M#1%v8Lq6um2tVdGO|;MbBovJoov3Pmfu3 z$TR84D^2q%$CNF9YYJVy{;Rch?{pn~?%=n%HNDsGT)K8z`gCV&tnP`ysawY-^8P<6 COY>d; diff --git a/crates/iceberg/testdata/unpartition_manifest_v2.avro b/crates/iceberg/testdata/unpartition_manifest_v2.avro deleted file mode 100644 index 7652159f3780b804831825a1c48ee040c68a4dcf..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3095 zcmcgu%Wm5^6qVUDFoM7c(jco2T5#0@QEbVLlek@WKdYiZgMgqUs$naUDoMGBGuibE zyy|McV;23Ke!*Z?vuQ6SOI98-QpBhhi7DQ5d31TmbN>uJbb7r|3d4lyM%2(!|itrv5G_~WD3LZ0U2;kH?ypK6cB8|r<7gdNh)Z`kb<|()^ z_;PPCEoi3Cq#@jZId3QDWrU)SbG&pRX&&pU$Hn-%}aY>wMCIiX3u*vt7R!M86_VgV^@u^Q^f zK;0kw4(j$ykyZ2eJWBAM{hhdj3Q5WgwU_sey5lm%k^`@^gc~B`STl3?eXH)0 zc1r^b(a9uKZu%=V2}cNPUr1=Zk+!9+|G&8pL} zFd2*SYs>^Z##&JutTI)L8a7ac54FQ;o3gkU4P2ba!fM;rE9b4jS>!F=kq??SVYzZRUi4w0KIFQJ+UV8;}#)YynZnr*vB2X0X-PSK}_A)k7M`6AUt-1z&~^kMh7$+MWG+CBew|Nhm!;?uIbJH zqc3mJt~2XwzFwPkdeimk#$VficXnnSH0{nhTeFUXcDrZ;tpTfbw28KW+79ZWZ5Zz& eXVY18wj9SrbP#6AQ|Gb|P)9Fb7