@@ -326,10 +326,11 @@ impl<'a> VariantToShreddedObjectVariantRowBuilder<'a> {
326326mod tests {
327327 use super :: * ;
328328 use crate :: VariantArrayBuilder ;
329- use arrow:: array:: { Array , Float64Array , Int64Array } ;
329+ use arrow:: array:: { Array , FixedSizeBinaryArray , Float64Array , Int64Array } ;
330330 use arrow:: datatypes:: { DataType , Field , Fields } ;
331331 use parquet_variant:: { ObjectBuilder , ReadOnlyMetadataBuilder , Variant , VariantBuilder } ;
332332 use std:: sync:: Arc ;
333+ use uuid:: Uuid ;
333334
334335 #[ test]
335336 fn test_already_shredded_input_error ( ) {
@@ -369,6 +370,73 @@ mod tests {
369370 shred_variant ( & input, & list_schema) . expect_err ( "unsupported" ) ;
370371 }
371372
373+ #[ test]
374+ fn test_invalid_fixed_size_binary_shredding ( ) {
375+ let mock_uuid_1 = Uuid :: new_v4 ( ) ;
376+
377+ let input = VariantArray :: from_iter ( [ Some ( Variant :: from ( mock_uuid_1) ) , None ] ) ;
378+
379+ // shred_variant only supports FixedSizeBinary(16). Any other length will err.
380+ let err = shred_variant ( & input, & DataType :: FixedSizeBinary ( 17 ) ) . unwrap_err ( ) ;
381+
382+ assert_eq ! (
383+ err. to_string( ) ,
384+ "Invalid argument error: FixedSizeBinary(17) is not a valid variant shredding type. Only FixedSizeBinary(16) for UUID is supported."
385+ ) ;
386+ }
387+
388+ #[ test]
389+ fn test_uuid_shredding ( ) {
390+ let mock_uuid_1 = Uuid :: new_v4 ( ) ;
391+ let mock_uuid_2 = Uuid :: new_v4 ( ) ;
392+
393+ let input = VariantArray :: from_iter ( [
394+ Some ( Variant :: from ( mock_uuid_1) ) ,
395+ None ,
396+ Some ( Variant :: from ( false ) ) ,
397+ Some ( Variant :: from ( mock_uuid_2) ) ,
398+ ] ) ;
399+
400+ let variant_array = shred_variant ( & input, & DataType :: FixedSizeBinary ( 16 ) ) . unwrap ( ) ;
401+
402+ // // inspect the typed_value Field and make sure it contains the canonical Uuid extension type
403+ // let typed_value_field = variant_array
404+ // .inner()
405+ // .fields()
406+ // .into_iter()
407+ // .find(|f| f.name() == "typed_value")
408+ // .unwrap();
409+
410+ // assert!(
411+ // typed_value_field
412+ // .try_extension_type::<extension::Uuid>()
413+ // .is_ok()
414+ // );
415+
416+ // probe the downcasted typed_value array to make sure uuids are shredded correctly
417+ let uuids = variant_array
418+ . typed_value_field ( )
419+ . unwrap ( )
420+ . as_any ( )
421+ . downcast_ref :: < FixedSizeBinaryArray > ( )
422+ . unwrap ( ) ;
423+
424+ assert_eq ! ( uuids. len( ) , 4 ) ;
425+
426+ assert ! ( !uuids. is_null( 0 ) ) ;
427+
428+ let got_uuid_1: & [ u8 ] = uuids. value ( 0 ) ;
429+ assert_eq ! ( got_uuid_1, mock_uuid_1. as_bytes( ) ) ;
430+
431+ assert ! ( uuids. is_null( 1 ) ) ;
432+ assert ! ( uuids. is_null( 2 ) ) ;
433+
434+ assert ! ( !uuids. is_null( 3 ) ) ;
435+
436+ let got_uuid_2: & [ u8 ] = uuids. value ( 3 ) ;
437+ assert_eq ! ( got_uuid_2, mock_uuid_2. as_bytes( ) ) ;
438+ }
439+
372440 #[ test]
373441 fn test_primitive_shredding_comprehensive ( ) {
374442 // Test mixed scenarios in a single array
@@ -869,6 +937,187 @@ mod tests {
869937 assert ! ( value_field3. is_null( 0 ) ) ; // fully shredded, no remaining fields
870938 }
871939
940+ #[ test]
941+ fn test_uuid_shredding_in_objects ( ) {
942+ let mock_uuid_1 = Uuid :: new_v4 ( ) ;
943+ let mock_uuid_2 = Uuid :: new_v4 ( ) ;
944+ let mock_uuid_3 = Uuid :: new_v4 ( ) ;
945+
946+ let mut builder = VariantArrayBuilder :: new ( 6 ) ;
947+
948+ // Row 0: Fully shredded object with both UUID fields
949+ builder
950+ . new_object ( )
951+ . with_field ( "id" , mock_uuid_1)
952+ . with_field ( "session_id" , mock_uuid_2)
953+ . finish ( ) ;
954+
955+ // Row 1: Partially shredded object - UUID fields plus extra field
956+ builder
957+ . new_object ( )
958+ . with_field ( "id" , mock_uuid_2)
959+ . with_field ( "session_id" , mock_uuid_3)
960+ . with_field ( "name" , "test_user" )
961+ . finish ( ) ;
962+
963+ // Row 2: Missing UUID field (no session_id)
964+ builder. new_object ( ) . with_field ( "id" , mock_uuid_1) . finish ( ) ;
965+
966+ // Row 3: Type mismatch - id is UUID but session_id is a string
967+ builder
968+ . new_object ( )
969+ . with_field ( "id" , mock_uuid_3)
970+ . with_field ( "session_id" , "not-a-uuid" )
971+ . finish ( ) ;
972+
973+ // Row 4: Object with non-UUID value in id field
974+ builder
975+ . new_object ( )
976+ . with_field ( "id" , 12345i64 )
977+ . with_field ( "session_id" , mock_uuid_1)
978+ . finish ( ) ;
979+
980+ // Row 5: Null
981+ builder. append_null ( ) ;
982+
983+ let input = builder. build ( ) ;
984+
985+ let fields = Fields :: from ( vec ! [
986+ Field :: new( "id" , DataType :: FixedSizeBinary ( 16 ) , true ) ,
987+ Field :: new( "session_id" , DataType :: FixedSizeBinary ( 16 ) , true ) ,
988+ ] ) ;
989+ let target_schema = DataType :: Struct ( fields) ;
990+
991+ let result = shred_variant ( & input, & target_schema) . unwrap ( ) ;
992+
993+ assert ! ( result. value_field( ) . is_some( ) ) ;
994+ assert ! ( result. typed_value_field( ) . is_some( ) ) ;
995+ assert_eq ! ( result. len( ) , 6 ) ;
996+
997+ let metadata = result. metadata_field ( ) ;
998+ let value = result. value_field ( ) . unwrap ( ) ;
999+ let typed_value = result
1000+ . typed_value_field ( )
1001+ . unwrap ( )
1002+ . as_any ( )
1003+ . downcast_ref :: < arrow:: array:: StructArray > ( )
1004+ . unwrap ( ) ;
1005+
1006+ // Extract id and session_id fields from typed_value struct
1007+ let id_field =
1008+ ShreddedVariantFieldArray :: try_new ( typed_value. column_by_name ( "id" ) . unwrap ( ) ) . unwrap ( ) ;
1009+ let session_id_field =
1010+ ShreddedVariantFieldArray :: try_new ( typed_value. column_by_name ( "session_id" ) . unwrap ( ) )
1011+ . unwrap ( ) ;
1012+
1013+ let id_value = id_field
1014+ . value_field ( )
1015+ . unwrap ( )
1016+ . as_any ( )
1017+ . downcast_ref :: < BinaryViewArray > ( )
1018+ . unwrap ( ) ;
1019+ let id_typed_value = id_field
1020+ . typed_value_field ( )
1021+ . unwrap ( )
1022+ . as_any ( )
1023+ . downcast_ref :: < FixedSizeBinaryArray > ( )
1024+ . unwrap ( ) ;
1025+ let session_id_value = session_id_field
1026+ . value_field ( )
1027+ . unwrap ( )
1028+ . as_any ( )
1029+ . downcast_ref :: < BinaryViewArray > ( )
1030+ . unwrap ( ) ;
1031+ let session_id_typed_value = session_id_field
1032+ . typed_value_field ( )
1033+ . unwrap ( )
1034+ . as_any ( )
1035+ . downcast_ref :: < FixedSizeBinaryArray > ( )
1036+ . unwrap ( ) ;
1037+
1038+ // Row 0: Fully shredded - both UUID fields shred successfully
1039+ assert ! ( result. is_valid( 0 ) ) ;
1040+
1041+ assert ! ( value. is_null( 0 ) ) ; // fully shredded, no remaining fields
1042+ assert ! ( id_value. is_null( 0 ) ) ;
1043+ assert ! ( session_id_value. is_null( 0 ) ) ;
1044+
1045+ assert ! ( typed_value. is_valid( 0 ) ) ;
1046+ assert ! ( id_typed_value. is_valid( 0 ) ) ;
1047+ assert ! ( session_id_typed_value. is_valid( 0 ) ) ;
1048+
1049+ assert_eq ! ( id_typed_value. value( 0 ) , mock_uuid_1. as_bytes( ) ) ;
1050+ assert_eq ! ( session_id_typed_value. value( 0 ) , mock_uuid_2. as_bytes( ) ) ;
1051+
1052+ // Row 1: Partially shredded - value contains extra name field
1053+ assert ! ( result. is_valid( 1 ) ) ;
1054+
1055+ assert ! ( value. is_valid( 1 ) ) ; // contains unshredded "name" field
1056+ assert ! ( typed_value. is_valid( 1 ) ) ;
1057+
1058+ assert ! ( id_value. is_null( 1 ) ) ;
1059+ assert ! ( id_typed_value. is_valid( 1 ) ) ;
1060+ assert_eq ! ( id_typed_value. value( 1 ) , mock_uuid_2. as_bytes( ) ) ;
1061+
1062+ assert ! ( session_id_value. is_null( 1 ) ) ;
1063+ assert ! ( session_id_typed_value. is_valid( 1 ) ) ;
1064+ assert_eq ! ( session_id_typed_value. value( 1 ) , mock_uuid_3. as_bytes( ) ) ;
1065+
1066+ // Verify the value field contains the name field
1067+ let row_1_variant = Variant :: new ( metadata. value ( 1 ) , value. value ( 1 ) ) ;
1068+ let Variant :: Object ( obj) = row_1_variant else {
1069+ panic ! ( "Expected object" ) ;
1070+ } ;
1071+
1072+ assert_eq ! ( obj. get( "name" ) , Some ( Variant :: from( "test_user" ) ) ) ;
1073+
1074+ // Row 2: Missing session_id field
1075+ assert ! ( result. is_valid( 2 ) ) ;
1076+
1077+ assert ! ( value. is_null( 2 ) ) ; // fully shredded, no extra fields
1078+ assert ! ( typed_value. is_valid( 2 ) ) ;
1079+
1080+ assert ! ( id_value. is_null( 2 ) ) ;
1081+ assert ! ( id_typed_value. is_valid( 2 ) ) ;
1082+ assert_eq ! ( id_typed_value. value( 2 ) , mock_uuid_1. as_bytes( ) ) ;
1083+
1084+ assert ! ( session_id_value. is_null( 2 ) ) ;
1085+ assert ! ( session_id_typed_value. is_null( 2 ) ) ; // missing field
1086+
1087+ // Row 3: Type mismatch - session_id is a string, not UUID
1088+ assert ! ( result. is_valid( 3 ) ) ;
1089+
1090+ assert ! ( value. is_null( 3 ) ) ; // no extra fields
1091+ assert ! ( typed_value. is_valid( 3 ) ) ;
1092+
1093+ assert ! ( id_value. is_null( 3 ) ) ;
1094+ assert ! ( id_typed_value. is_valid( 3 ) ) ;
1095+ assert_eq ! ( id_typed_value. value( 3 ) , mock_uuid_3. as_bytes( ) ) ;
1096+
1097+ assert ! ( session_id_value. is_valid( 3 ) ) ; // type mismatch, stored in value
1098+ assert ! ( session_id_typed_value. is_null( 3 ) ) ;
1099+ let session_id_variant = Variant :: new ( metadata. value ( 3 ) , session_id_value. value ( 3 ) ) ;
1100+ assert_eq ! ( session_id_variant, Variant :: from( "not-a-uuid" ) ) ;
1101+
1102+ // Row 4: Type mismatch - id is int64, not UUID
1103+ assert ! ( result. is_valid( 4 ) ) ;
1104+
1105+ assert ! ( value. is_null( 4 ) ) ; // no extra fields
1106+ assert ! ( typed_value. is_valid( 4 ) ) ;
1107+
1108+ assert ! ( id_value. is_valid( 4 ) ) ; // type mismatch, stored in value
1109+ assert ! ( id_typed_value. is_null( 4 ) ) ;
1110+ let id_variant = Variant :: new ( metadata. value ( 4 ) , id_value. value ( 4 ) ) ;
1111+ assert_eq ! ( id_variant, Variant :: from( 12345i64 ) ) ;
1112+
1113+ assert ! ( session_id_value. is_null( 4 ) ) ;
1114+ assert ! ( session_id_typed_value. is_valid( 4 ) ) ;
1115+ assert_eq ! ( session_id_typed_value. value( 4 ) , mock_uuid_1. as_bytes( ) ) ;
1116+
1117+ // Row 5: Null
1118+ assert ! ( result. is_null( 5 ) ) ;
1119+ }
1120+
8721121 #[ test]
8731122 fn test_spec_compliance ( ) {
8741123 let input = VariantArray :: from_iter ( vec ! [ Variant :: from( 42i64 ) , Variant :: from( "hello" ) ] ) ;
0 commit comments