1010from numpy .typing import ArrayLike
1111from pandas .api .extensions import register_series_accessor
1212
13- from nested_pandas .nestedframe .core import NestedFrame
1413from nested_pandas .series .dtype import NestedDtype
14+ from nested_pandas .series .nestedseries import NestedSeries
1515from nested_pandas .series .packer import pack_flat , pack_sorted_df_into_struct
1616from nested_pandas .series .utils import nested_types_mapper
1717
@@ -152,7 +152,7 @@ def fields(self) -> list[str]:
152152 """Names of the nested columns"""
153153 return self ._series .array .field_names
154154
155- def with_field (self , field : str , value : ArrayLike ) -> pd . Series :
155+ def with_field (self , field : str , value : ArrayLike ) -> NestedSeries :
156156 """Set the field from flat-array of values and return a new series
157157
158158 It is an alias for `.nest.with_flat_field`.
@@ -167,7 +167,7 @@ def with_field(self, field: str, value: ArrayLike) -> pd.Series:
167167
168168 Returns
169169 -------
170- pd.Series
170+ NestedSeries
171171 The new series with the field set.
172172
173173 Examples
@@ -185,7 +185,7 @@ def with_field(self, field: str, value: ArrayLike) -> pd.Series:
185185 """
186186 return self .with_flat_field (field , value )
187187
188- def with_flat_field (self , field : str , value : ArrayLike ) -> pd . Series :
188+ def with_flat_field (self , field : str , value : ArrayLike ) -> NestedSeries :
189189 """Set the field from flat-array of values and return a new series
190190
191191 Parameters
@@ -198,7 +198,7 @@ def with_flat_field(self, field: str, value: ArrayLike) -> pd.Series:
198198
199199 Returns
200200 -------
201- pd.Series
201+ NestedSeries
202202 The new series with the field set.
203203
204204 Examples
@@ -217,9 +217,9 @@ def with_flat_field(self, field: str, value: ArrayLike) -> pd.Series:
217217 """
218218 new_array = self ._series .array .copy ()
219219 new_array .set_flat_field (field , value )
220- return pd . Series (new_array , copy = False , index = self ._series .index , name = self ._series .name )
220+ return NestedSeries (new_array , copy = False , index = self ._series .index , name = self ._series .name )
221221
222- def with_list_field (self , field : str , value : ArrayLike ) -> pd . Series :
222+ def with_list_field (self , field : str , value : ArrayLike ) -> NestedSeries :
223223 """Set the field from list-array of values and return a new series
224224
225225 Parameters
@@ -232,7 +232,7 @@ def with_list_field(self, field: str, value: ArrayLike) -> pd.Series:
232232
233233 Returns
234234 -------
235- pd.Series
235+ NestedSeries
236236 The new series with the field set.
237237
238238 Examples
@@ -253,9 +253,9 @@ def with_list_field(self, field: str, value: ArrayLike) -> pd.Series:
253253 """
254254 new_array = self ._series .array .copy ()
255255 new_array .set_list_field (field , value )
256- return pd . Series (new_array , copy = False , index = self ._series .index , name = self ._series .name )
256+ return NestedSeries (new_array , copy = False , index = self ._series .index , name = self ._series .name )
257257
258- def with_filled_field (self , field : str , value : ArrayLike ) -> pd . Series :
258+ def with_filled_field (self , field : str , value : ArrayLike ) -> NestedSeries :
259259 """Set the field by repeating values and return a new series
260260
261261 The input value array must have as many elements as the Series,
@@ -273,7 +273,7 @@ def with_filled_field(self, field: str, value: ArrayLike) -> pd.Series:
273273
274274 Returns
275275 -------
276- pd.Series
276+ NestedSeries
277277 The new series with the field set.
278278
279279 Examples
@@ -292,9 +292,9 @@ def with_filled_field(self, field: str, value: ArrayLike) -> pd.Series:
292292 """
293293 new_array = self ._series .array .copy ()
294294 new_array .fill_field_lists (field , value )
295- return pd . Series (new_array , copy = False , index = self ._series .index , name = self ._series .name )
295+ return NestedSeries (new_array , copy = False , index = self ._series .index , name = self ._series .name )
296296
297- def without_field (self , field : str | list [str ]) -> pd . Series :
297+ def without_field (self , field : str | list [str ]) -> NestedSeries :
298298 """Remove the field(s) from the series and return a new series
299299
300300 Note, that at least one field must be left in the series.
@@ -306,7 +306,7 @@ def without_field(self, field: str | list[str]) -> pd.Series:
306306
307307 Returns
308308 -------
309- pd.Series
309+ NestedSeries
310310 The new series without the field(s).
311311
312312 Examples
@@ -328,9 +328,9 @@ def without_field(self, field: str | list[str]) -> pd.Series:
328328
329329 new_array = self ._series .array .copy ()
330330 new_array .pop_fields (field )
331- return pd . Series (new_array , copy = False , index = self ._series .index , name = self ._series .name )
331+ return NestedSeries (new_array , copy = False , index = self ._series .index , name = self ._series .name )
332332
333- def query_flat (self , query : str ) -> pd . Series :
333+ def query_flat (self , query : str ) -> NestedSeries :
334334 """Query the flat arrays with a boolean expression
335335
336336 Currently, it will remove empty rows from the output series.
@@ -343,7 +343,7 @@ def query_flat(self, query: str) -> pd.Series:
343343
344344 Returns
345345 -------
346- pd.Series
346+ NestedSeries
347347 The filtered series.
348348
349349 Examples
@@ -363,8 +363,10 @@ def query_flat(self, query: str) -> pd.Series:
363363 flat = self .to_flat ().query (query )
364364
365365 if len (flat ) == 0 :
366- return pd .Series (
367- [], dtype = self ._series .dtype , index = pd .Index ([], dtype = flat .index .dtype , name = flat .index .name )
366+ return NestedSeries (
367+ [],
368+ dtype = self ._series .dtype ,
369+ index = pd .Index ([], dtype = flat .index .dtype , name = flat .index .name ),
368370 )
369371 return pack_sorted_df_into_struct (flat )
370372
@@ -393,7 +395,7 @@ def get_flat_index(self) -> pd.Index:
393395 return flat_index
394396
395397 def get_flat_series (self , field : str ) -> pd .Series :
396- """Get the flat-array field as a Series
398+ """Get the flat-array field as a pd. Series
397399
398400 Parameters
399401 ----------
@@ -434,13 +436,16 @@ def get_flat_series(self, field: str) -> pd.Series:
434436
435437 flat_chunked_array = pa .chunked_array (flat_chunks , type = self ._series .dtype .fields [field ])
436438
437- return pd .Series (
439+ flat_series = pd .Series (
438440 flat_chunked_array ,
439441 dtype = self ._series .dtype .field_dtype (field ),
440442 index = self .get_flat_index (),
441443 name = field ,
442444 copy = False ,
443445 )
446+ if isinstance (self ._series .dtype .field_dtype (field ), NestedDtype ):
447+ return NestedSeries (flat_series , copy = False )
448+ return flat_series
444449
445450 def get_list_series (self , field : str ) -> pd .Series :
446451 """Get the list-array field as a Series
@@ -479,20 +484,26 @@ def get_list_series(self, field: str) -> pd.Series:
479484 copy = False ,
480485 )
481486
482- def __getitem__ (self , key : str | list [str ]) -> pd . Series :
487+ def __getitem__ (self , key : str | list [str ]) -> NestedSeries :
483488 # Allow boolean masking given a Series of booleans
484489 if isinstance (key , pd .Series ) and pd .api .types .is_bool_dtype (key .dtype ):
485490 flat_df = self .to_flat () # Use the flat representation
486491 if not key .index .equals (flat_df .index ):
487492 raise ValueError ("Boolean mask must have the same index as the flattened nested dataframe." )
488- # Apply the mask to the series, return a new NestedFrame
489- return NestedFrame (index = self ._series .index ).add_nested (flat_df [key ], name = self ._series .name )
493+ # Apply the mask to the series
494+ return NestedSeries (
495+ pack_flat (flat_df [key ]),
496+ index = self ._series .index ,
497+ name = self ._series .name ,
498+ )
490499
491- # If the key is a single string, return the flat series for that field
500+ # A list of fields may return a pd.Series or a NestedSeries depending
501+ # on the number of fields requested and their dtypes
492502 if isinstance (key , list ):
493503 new_array = self ._series .array .view_fields (key )
494- return pd . Series (new_array , index = self ._series .index , name = self ._series .name )
504+ return NestedSeries (new_array , index = self ._series .index , name = self ._series .name )
495505
506+ # If the key is a single string, return the flat series for that field
496507 return self .get_flat_series (key )
497508
498509 def __setitem__ (self , key : str , value : ArrayLike ) -> None :
@@ -551,8 +562,8 @@ def clear(self) -> None:
551562 """
552563 raise NotImplementedError ("Cannot delete fields from nested series" )
553564
554- def to_flatten_inner (self , field : str ) -> pd . Series :
555- """Explode the nested inner field and return as a pd.Series
565+ def to_flatten_inner (self , field : str ) -> NestedSeries :
566+ """Explode the nested inner field and return as a NestedSeries
556567
557568 Works for the case of multiple nesting only, the field must represent
558569 a nested series.
@@ -576,7 +587,7 @@ def to_flatten_inner(self, field: str) -> pd.Series:
576587
577588 Returns
578589 -------
579- pd.Series
590+ NestedSeries
580591 This series object, but with the inner field exploded.
581592
582593 Examples
@@ -585,17 +596,18 @@ def to_flatten_inner(self, field: str) -> pd.Series:
585596 >>> from nested_pandas import NestedFrame
586597 >>> from nested_pandas.datasets import generate_data
587598 >>> nf = generate_data(5, 2, seed=1).rename(columns={"nested": "inner"})
599+ >>> nf["b"] = "b" # Shorten width of example output
588600
589601 Assign a repeated ID to double-nest on
590602
591603 >>> nf["id"] = [0, 0, 0, 1, 1]
592604 >>> nf
593- a b inner id
594- 0 0.417022 0.184677 [{t: 8.38389, flux: 80.074457, band: 'r'}; …] ... 0
595- 1 0.720324 0.372520 [{t: 13.70439, flux: 96.826158, band: 'g'}; …]... 0
596- 2 0.000114 0.691121 [{t: 4.089045, flux: 31.342418, band: 'g'}; …]... 0
597- 3 0.302333 0.793535 [{t: 17.562349, flux: 69.232262, band: 'r'}; …... 1
598- 4 0.146756 1.077633 [{t: 0.547752, flux: 87.638915, band: 'g'}; …]... 1
605+ a b inner id
606+ 0 0.417022 b [{t: 8.38389, flux: 80.074457, band: 'r'}; …] ... 0
607+ 1 0.720324 b [{t: 13.70439, flux: 96.826158, band: 'g'}; …]... 0
608+ 2 0.000114 b [{t: 4.089045, flux: 31.342418, band: 'g'}; …]... 0
609+ 3 0.302333 b [{t: 17.562349, flux: 69.232262, band: 'r'}; …... 1
610+ 4 0.146756 b [{t: 0.547752, flux: 87.638915, band: 'g'}; …]... 1
599611
600612 >>> nf.inner.nest.to_flat()
601613 t flux band
@@ -620,23 +632,23 @@ def to_flatten_inner(self, field: str) -> pd.Series:
620632 >>> concated_nf_series = dnf["outer"].nest.to_flatten_inner("inner")
621633 >>> concated_nf_series
622634 id
623- 0 [{a: 0.417022, b: 0.184677 , t: 8.38389, flux: ...
624- 1 [{a: 0.302333, b: 0.793535 , t: 17.562349, flux...
625- Name: outer, dtype: nested<a: [double], b: [double ], t: [double], flux: [double], band: [string]>
635+ 0 [{a: 0.417022, b: 'b' , t: 8.38389, flux: 80.07 ...
636+ 1 [{a: 0.302333, b: 'b' , t: 17.562349, flux: 69. ...
637+ Name: outer, dtype: nested<a: [double], b: [string ], t: [double], flux: [double], band: [string]>
626638
627639 >>> concated_nf_series.nest.to_flat() # doctest: +NORMALIZE_WHITESPACE
628- a b t flux band
640+ a b t flux band
629641 id
630- 0 0.417022 0.184677 8.38389 80.074457 r
631- 0 0.417022 0.184677 13.40935 89.460666 g
632- 0 0.720324 0.37252 13.70439 96.826158 g
633- 0 0.720324 0.37252 8.346096 8.504421 g
634- 0 0.000114 0.691121 4.089045 31.342418 g
635- 0 0.000114 0.691121 11.173797 3.905478 g
636- 1 0.302333 0.793535 17.562349 69.232262 r
637- 1 0.302333 0.793535 2.807739 16.983042 r
638- 1 0.146756 1.077633 0.547752 87.638915 g
639- 1 0.146756 1.077633 3.96203 87.81425 r
642+ 0 0.417022 b 8.38389 80.074457 r
643+ 0 0.417022 b 13.40935 89.460666 g
644+ 0 0.720324 b 13.70439 96.826158 g
645+ 0 0.720324 b 8.346096 8.504421 g
646+ 0 0.000114 b 4.089045 31.342418 g
647+ 0 0.000114 b 11.173797 3.905478 g
648+ 1 0.302333 b 17.562349 69.232262 r
649+ 1 0.302333 b 2.807739 16.983042 r
650+ 1 0.146756 b 0.547752 87.638915 g
651+ 1 0.146756 b 3.96203 87.81425 r
640652 """
641653 if not isinstance (self ._series .dtype .field_dtype (field ), NestedDtype ):
642654 raise ValueError (
@@ -669,7 +681,7 @@ def to_flatten_inner(self, field: str) -> pd.Series:
669681
670682 # Some indexes may be missed if the original series had some NULLs
671683 if len (result ) < len (series ):
672- nulls = pd . Series (None , index = series .index , dtype = result .dtype )
684+ nulls = NestedSeries (None , index = series .index , dtype = result .dtype )
673685 nulls [result .index ] = result
674686 result = nulls
675687
0 commit comments