@@ -21,7 +21,7 @@ use serde_derive::{Deserialize, Serialize};
2121use serde_with:: serde_as;
2222
2323use super :: { Datum , ManifestEntry , Schema , Struct } ;
24- use crate :: spec:: { Literal , RawLiteral , StructType , Type } ;
24+ use crate :: spec:: { FormatVersion , Literal , RawLiteral , StructType , Type } ;
2525use crate :: { Error , ErrorKind } ;
2626
2727#[ derive( Serialize , Deserialize ) ]
@@ -40,7 +40,7 @@ impl ManifestEntryV2 {
4040 snapshot_id : value. snapshot_id ,
4141 sequence_number : value. sequence_number ,
4242 file_sequence_number : value. file_sequence_number ,
43- data_file : DataFileSerde :: try_from ( value. data_file , partition_type, false ) ?,
43+ data_file : DataFileSerde :: try_from ( value. data_file , partition_type, FormatVersion :: V2 ) ?,
4444 } )
4545 }
4646
@@ -74,7 +74,7 @@ impl ManifestEntryV1 {
7474 Ok ( Self {
7575 status : value. status as i32 ,
7676 snapshot_id : value. snapshot_id . unwrap_or_default ( ) ,
77- data_file : DataFileSerde :: try_from ( value. data_file , partition_type, true ) ?,
77+ data_file : DataFileSerde :: try_from ( value. data_file , partition_type, FormatVersion :: V1 ) ?,
7878 } )
7979 }
8080
@@ -129,9 +129,13 @@ impl DataFileSerde {
129129 pub fn try_from (
130130 value : super :: DataFile ,
131131 partition_type : & StructType ,
132- is_version_1 : bool ,
132+ format_version : FormatVersion ,
133133 ) -> Result < Self , Error > {
134- let block_size_in_bytes = if is_version_1 { Some ( 0 ) } else { None } ;
134+ let block_size_in_bytes = if format_version == FormatVersion :: V1 {
135+ Some ( 0 )
136+ } else {
137+ None
138+ } ;
135139 Ok ( Self {
136140 content : value. content as i32 ,
137141 file_path : value. file_path ,
@@ -292,16 +296,23 @@ fn parse_i64_entry(v: Vec<I64Entry>) -> Result<HashMap<i32, u64>, Error> {
292296 Ok ( m)
293297}
294298
299+ #[ allow( unused_mut) ]
295300fn to_i64_entry ( entries : HashMap < i32 , u64 > ) -> Result < Vec < I64Entry > , Error > {
296- entries
301+ let mut i64_entries = entries
297302 . iter ( )
298303 . map ( |e| {
299304 Ok ( I64Entry {
300305 key : * e. 0 ,
301306 value : ( * e. 1 ) . try_into ( ) ?,
302307 } )
303308 } )
304- . collect ( )
309+ . collect :: < Result < Vec < _ > , Error > > ( ) ?;
310+
311+ // Ensure that the order is deterministic during testing
312+ #[ cfg( test) ]
313+ i64_entries. sort_by_key ( |e| e. key ) ;
314+
315+ Ok ( i64_entries)
305316}
306317
307318#[ cfg( test) ]
@@ -432,4 +443,154 @@ mod tests {
432443
433444 assert_eq ! ( actual_data_file[ 0 ] . content, DataContentType :: Data )
434445 }
446+
447+ #[ test]
448+ fn test_manifest_entry_v1_to_v2_projection ( ) {
449+ use crate :: spec:: manifest:: _serde:: { DataFileSerde , ManifestEntryV1 } ;
450+ use crate :: spec:: { Literal , RawLiteral , Struct , StructType } ;
451+
452+ let partition = RawLiteral :: try_from (
453+ Literal :: Struct ( Struct :: empty ( ) ) ,
454+ & Type :: Struct ( StructType :: new ( vec ! [ ] ) ) ,
455+ )
456+ . unwrap ( ) ;
457+
458+ // Create a V1 manifest entry struct (lacks V2 sequence number fields)
459+ let v1_entry = ManifestEntryV1 {
460+ status : 1 , // Added
461+ snapshot_id : 12345 ,
462+ data_file : DataFileSerde {
463+ content : 0 , // DataFileSerde is shared between V1/V2
464+ file_path : "test/path.parquet" . to_string ( ) ,
465+ file_format : "PARQUET" . to_string ( ) ,
466+ partition,
467+ record_count : 100 ,
468+ file_size_in_bytes : 1024 ,
469+ block_size_in_bytes : Some ( 0 ) , // V1 includes this field
470+ column_sizes : None ,
471+ value_counts : None ,
472+ null_value_counts : None ,
473+ nan_value_counts : None ,
474+ lower_bounds : None ,
475+ upper_bounds : None ,
476+ key_metadata : None ,
477+ split_offsets : None ,
478+ equality_ids : None , // Will be converted to empty vec
479+ sort_order_id : None ,
480+ first_row_id : None ,
481+ referenced_data_file : None ,
482+ content_offset : None ,
483+ content_size_in_bytes : None ,
484+ } ,
485+ } ;
486+
487+ // Test the explicit V1→V2 conversion logic in ManifestEntryV1::try_into()
488+ let v2_entry = v1_entry
489+ . try_into (
490+ 0 , // partition_spec_id
491+ & StructType :: new ( vec ! [ ] ) ,
492+ & schema ( ) ,
493+ )
494+ . unwrap ( ) ;
495+
496+ // Verify that V1→V2 conversion adds the missing V2 sequence number fields
497+ assert_eq ! (
498+ v2_entry. sequence_number,
499+ Some ( 0 ) ,
500+ "ManifestEntryV1::try_into() should set sequence_number to 0"
501+ ) ;
502+ assert_eq ! (
503+ v2_entry. file_sequence_number,
504+ Some ( 0 ) ,
505+ "ManifestEntryV1::try_into() should set file_sequence_number to 0"
506+ ) ;
507+ assert_eq ! (
508+ v2_entry. snapshot_id,
509+ Some ( 12345 ) ,
510+ "snapshot_id should be preserved during conversion"
511+ ) ;
512+
513+ // Verify that DataFileSerde conversion applies V2 defaults
514+ assert_eq ! (
515+ v2_entry. data_file. content,
516+ DataContentType :: Data ,
517+ "DataFileSerde should convert content 0 to DataContentType::Data"
518+ ) ;
519+ assert_eq ! (
520+ v2_entry. data_file. equality_ids,
521+ Vec :: <i32 >:: new( ) ,
522+ "DataFileSerde should convert None equality_ids to empty vec"
523+ ) ;
524+
525+ // Verify other fields are preserved during conversion
526+ assert_eq ! ( v2_entry. data_file. file_path, "test/path.parquet" ) ;
527+ assert_eq ! ( v2_entry. data_file. record_count, 100 ) ;
528+ assert_eq ! ( v2_entry. data_file. file_size_in_bytes, 1024 ) ;
529+ }
530+
531+ #[ test]
532+ fn test_data_file_serde_v1_field_defaults ( ) {
533+ use crate :: spec:: manifest:: _serde:: DataFileSerde ;
534+ use crate :: spec:: { Literal , RawLiteral , Struct , StructType } ;
535+
536+ let partition = RawLiteral :: try_from (
537+ Literal :: Struct ( Struct :: empty ( ) ) ,
538+ & Type :: Struct ( StructType :: new ( vec ! [ ] ) ) ,
539+ )
540+ . unwrap ( ) ;
541+
542+ // Create a DataFileSerde that simulates V1 deserialization behavior
543+ // (missing V2 fields would be None due to #[serde(default)])
544+ let v1_style_data_file = DataFileSerde {
545+ content : 0 , // V1 doesn't have this field, defaults to 0 via #[serde(default)]
546+ file_path : "test/data.parquet" . to_string ( ) ,
547+ file_format : "PARQUET" . to_string ( ) ,
548+ partition,
549+ record_count : 500 ,
550+ file_size_in_bytes : 2048 ,
551+ block_size_in_bytes : Some ( 1024 ) , // V1 includes this field, V2 skips it
552+ column_sizes : None ,
553+ value_counts : None ,
554+ null_value_counts : None ,
555+ nan_value_counts : None ,
556+ lower_bounds : None ,
557+ upper_bounds : None ,
558+ key_metadata : None ,
559+ split_offsets : None ,
560+ equality_ids : None , // V1 doesn't have this field, defaults to None via #[serde(default)]
561+ sort_order_id : None ,
562+ first_row_id : None ,
563+ referenced_data_file : None ,
564+ content_offset : None ,
565+ content_size_in_bytes : None ,
566+ } ;
567+
568+ // Test the DataFileSerde::try_into() conversion that handles V1 field defaults
569+ let data_file = v1_style_data_file
570+ . try_into (
571+ 0 , // partition_spec_id
572+ & StructType :: new ( vec ! [ ] ) ,
573+ & schema ( ) ,
574+ )
575+ . unwrap ( ) ;
576+
577+ // Verify that DataFileSerde::try_into() applies correct defaults for missing V2 fields
578+ assert_eq ! (
579+ data_file. content,
580+ DataContentType :: Data ,
581+ "content 0 should convert to DataContentType::Data"
582+ ) ;
583+ assert_eq ! (
584+ data_file. equality_ids,
585+ Vec :: <i32 >:: new( ) ,
586+ "None equality_ids should convert to empty vec via unwrap_or_default()"
587+ ) ;
588+
589+ // Verify other fields are handled correctly during conversion
590+ assert_eq ! ( data_file. file_path, "test/data.parquet" ) ;
591+ assert_eq ! ( data_file. file_format, DataFileFormat :: Parquet ) ;
592+ assert_eq ! ( data_file. record_count, 500 ) ;
593+ assert_eq ! ( data_file. file_size_in_bytes, 2048 ) ;
594+ assert_eq ! ( data_file. partition_spec_id, 0 ) ;
595+ }
435596}
0 commit comments