33import pytest
44from nested_pandas import NestedDtype
55from nested_pandas .series .utils import (
6+ align_chunked_struct_list_offsets ,
7+ align_struct_list_offsets ,
68 nested_types_mapper ,
79 struct_field_names ,
810 transpose_list_struct_array ,
911 transpose_list_struct_scalar ,
1012 transpose_list_struct_type ,
1113 transpose_struct_list_array ,
1214 transpose_struct_list_type ,
13- validate_struct_list_array_for_equal_lengths ,
15+ validate_struct_list_type ,
1416)
1517
1618
17- def test_validate_struct_list_array_for_equal_lengths ():
18- """Test validate_struct_list_array_for_equal_lengths function."""
19+ def test_align_struct_list_offsets ():
20+ """Test align_struct_list_offsets function."""
1921 # Raises for wrong types
2022 with pytest .raises (ValueError ):
21- validate_struct_list_array_for_equal_lengths (pa .array ([], type = pa .int64 ()))
23+ align_struct_list_offsets (pa .array ([], type = pa .int64 ()))
2224 with pytest .raises (ValueError ):
23- validate_struct_list_array_for_equal_lengths (pa .array ([], type = pa .list_ (pa .int64 ())))
25+ align_struct_list_offsets (pa .array ([], type = pa .list_ (pa .int64 ())))
2426
2527 # Raises if one of the fields is not a ListArray
2628 with pytest .raises (ValueError ):
27- validate_struct_list_array_for_equal_lengths (
29+ align_struct_list_offsets (
2830 pa .StructArray .from_arrays ([pa .array ([[1 , 2 ], [3 , 4 , 5 ]]), pa .array ([1 , 2 ])], ["a" , "b" ])
2931 )
3032
3133 # Raises for mismatched lengths
3234 with pytest .raises (ValueError ):
33- validate_struct_list_array_for_equal_lengths (
35+ align_struct_list_offsets (
3436 pa .StructArray .from_arrays (
3537 [pa .array ([[1 , 2 ], [3 , 4 , 5 ]]), pa .array ([[1 , 2 , 3 ], [4 , 5 ]])], ["a" , "b" ]
3638 )
@@ -43,7 +45,96 @@ def test_validate_struct_list_array_for_equal_lengths():
4345 ],
4446 names = ["a" , "b" ],
4547 )
46- assert validate_struct_list_array_for_equal_lengths (input_array ) is None
48+ assert align_struct_list_offsets (input_array ) is input_array
49+
50+ a = pa .array ([[0 , 0 , 0 ], [1 , 2 ], [3 , 4 ], [], [5 , 6 , 7 ]])[1 :]
51+ assert a .offsets [0 ].as_py () == 3
52+ b = pa .array ([["x" , "y" ], ["y" , "x" ], [], ["d" , "e" , "f" ]])
53+ assert b .offsets [0 ].as_py () == 0
54+ input_array = pa .StructArray .from_arrays (
55+ arrays = [a , b ],
56+ names = ["a" , "b" ],
57+ )
58+ aligned_array = align_struct_list_offsets (input_array )
59+ assert aligned_array is not input_array
60+ assert aligned_array .equals (input_array )
61+
62+
63+ def test_align_chunked_struct_list_offsets ():
64+ """Test align_chunked_struct_list_offsets function."""
65+ # Input is an array, output is chunked array
66+ a = pa .array ([[1 , 2 ], [3 , 4 ], [], [5 , 6 , 7 ]])
67+ b = pa .array ([["x" , "y" ], ["y" , "x" ], [], ["d" , "e" , "f" ]])
68+ input_array = pa .StructArray .from_arrays (
69+ arrays = [a , b ],
70+ names = ["a" , "b" ],
71+ )
72+ output_array = align_chunked_struct_list_offsets (input_array )
73+ assert isinstance (output_array , pa .ChunkedArray )
74+ assert output_array .equals (pa .chunked_array ([input_array ]))
75+
76+ # Input is an "aligned" chunked array
77+ input_array = pa .chunked_array (
78+ [
79+ pa .StructArray .from_arrays (
80+ arrays = [a , b ],
81+ names = ["a" , "b" ],
82+ )
83+ ]
84+ * 2
85+ )
86+ output_array = align_chunked_struct_list_offsets (input_array )
87+ assert output_array .equals (input_array )
88+
89+ # Input is an "aligned" chunked array, but offsets do not start with zero
90+ a = pa .array ([[0 , 0 , 0 ], [1 , 2 ], [3 , 4 ], [], [5 , 6 , 7 ]])[1 :]
91+ b = pa .array ([["a" , "a" , "a" , "a" ], ["x" , "y" ], ["y" , "x" ], [], ["d" , "e" , "f" ]])[1 :]
92+ input_array = pa .chunked_array (
93+ [
94+ pa .StructArray .from_arrays (
95+ arrays = [a , b ],
96+ names = ["a" , "b" ],
97+ )
98+ ]
99+ * 3
100+ )
101+ output_array = align_chunked_struct_list_offsets (input_array )
102+ assert output_array .equals (input_array )
103+
104+ # Input is a "non-aligned" chunked array
105+ a = pa .array ([[0 , 0 , 0 ], [1 , 2 ], [3 , 4 ], [], [5 , 6 , 7 ]])[1 :]
106+ b = pa .array ([["x" , "y" ], ["y" , "x" ], [], ["d" , "e" , "f" ]])
107+ input_array = pa .chunked_array (
108+ [
109+ pa .StructArray .from_arrays (
110+ arrays = [a , b ],
111+ names = ["a" , "b" ],
112+ )
113+ ]
114+ * 4
115+ )
116+ output_array = align_chunked_struct_list_offsets (input_array )
117+ assert output_array .equals (input_array )
118+
119+
120+ def test_validate_struct_list_type ():
121+ """Test validate_struct_list_type function."""
122+ with pytest .raises (ValueError ):
123+ validate_struct_list_type (pa .float64 ())
124+
125+ with pytest .raises (ValueError ):
126+ validate_struct_list_type (pa .list_ (pa .struct ({"a" : pa .int64 ()})))
127+
128+ with pytest .raises (ValueError ):
129+ validate_struct_list_type (pa .struct ({"a" : pa .float64 ()}))
130+
131+ with pytest .raises (ValueError ):
132+ validate_struct_list_type (pa .struct ({"a" : pa .list_ (pa .float64 ()), "b" : pa .float64 ()}))
133+
134+ assert (
135+ validate_struct_list_type (pa .struct ({"a" : pa .list_ (pa .float64 ()), "b" : pa .list_ (pa .float64 ())}))
136+ is None
137+ )
47138
48139
49140def test_transpose_struct_list_type ():
0 commit comments