@@ -129,7 +129,7 @@ def repack_row(chunk, header=True):
129129 # Add a row that shows the number of additional rows not shown
130130 len_row = pd .DataFrame (
131131 {
132- col : [f"<i>+{ n_rows - 1 } rows</i>" ] if i == 0 else ["..." ]
132+ col : [f"<i>+{ n_rows - 1 } rows</i>" ] if i == 0 else ["..." ]
133133 for i , col in enumerate (chunk .columns )
134134 }
135135 )
@@ -983,31 +983,32 @@ def describe(self, exclude_nest: bool = False, percentiles=None, include=None, e
983983 def explode (self , column : IndexLabel , ignore_index : bool = False ):
984984 """
985985
986- Transform each element of a list-like base column to a row, replicating index value.
987- Or unnest a specified nested column with the other columns being replicated as part
988- of the unnest. The exploded columns will be added to the right of the rest of the frame.
986+ Transform each element of a list-like base column to a row, replicating index values.
989987
990988 Parameters
991989 ----------
992990 column : IndexLabel
993- Base column(s) or nested column to explode.
994- For multiple base columns, specify a non-empty list with each element being a string or tuple.
995- For all specified base columns, their list-like data on same row of the frame
996- must have matching length.
997- Only a single nested column can be exploded at a time. Indicate the nested column as a string.
991+ Column(s) to explode.
992+ For multiple columns, specify a non-empty list with each element
993+ be str or tuple, and all specified columns their list-like data
994+ on same row of the frame must have matching length.
998995 ignore_index : bool, default False
999996 If True, the resulting index will be labeled 0, 1, ..., n - 1.
1000997
1001998 Returns
1002999 -------
10031000 NestedFrame
1004- A new NestedFrame with the specified column(s) exploded.
1001+ Exploded lists and to rows of the subset columns;
1002+ index will be duplicated for these rows.
10051003
10061004 Raises
10071005 ------
10081006 ValueError
1009- If specified columns to explode have more than one nested column,
1010- or contain a mix of nested and base columns.
1007+ It raises if:
1008+ 1) columns of the frame are not unique,
1009+ 2) specified columns to explode is an empty list,
1010+ 3) specified columns to explode do not have matching counts of
1011+ elements rowwise in the frame.
10111012
10121013 See Also
10131014 --------
@@ -1033,40 +1034,82 @@ def explode(self, column: IndexLabel, ignore_index: bool = False):
10331034
10341035 """
10351036
1036- if isinstance (column , list ):
1037- nested_in_list = [col for col in column if col in self .nested_columns ]
1038- # list contains more than 1 nested columns
1039- if len (nested_in_list ) > 1 :
1037+ if isinstance (column , str ):
1038+ columns = [column ]
1039+ elif isinstance (column , list ):
1040+ columns = column
1041+ if len (columns ) == 0 :
1042+ raise ValueError ("`column` must not be empty" )
1043+ if len (set (columns )) != len (columns ):
1044+ raise ValueError ("`column` must have unique elements" )
1045+ else :
1046+ raise ValueError ("`column` must be str or list" )
1047+ if len (extra_cols := set (columns ) - set (self .columns )) > 0 :
1048+ if len (extra_cols ) == 1 :
10401049 raise ValueError (
1041- f"Exploding multiple nested columns at once is not supported.\n "
1042- f"Nested columns: { nested_in_list } "
1050+ f"column { extra_cols .pop ()} not found, available columns: { list (self .columns )} "
10431051 )
1052+ raise ValueError (
1053+ f"columns { sorted (extra_cols )} not found, available columns: { list (self .columns )} "
1054+ )
10441055
1045- # list contains mixing nested & base columns
1046- if len (nested_in_list ) == 1 and len (column ) > 1 :
1056+ nested_columns = [col for col in columns if col in self .nested_columns ]
1057+ base_columns = [col for col in columns if col not in nested_columns ]
1058+
1059+ # Shortcut for the base-column-only case
1060+ if len (nested_columns ) == 0 :
1061+ return NestedFrame (super ().explode (columns , ignore_index = ignore_index ))
1062+
1063+ # Handle duplicated index use-case: use "ordinal" index, but keep the original one as a column to
1064+ # restore it later.
1065+ default_index_name = "__index_"
1066+ index_col_name = self .index .name or default_index_name
1067+ w_ordinal_idx = self .reset_index (drop = False , names = index_col_name )
1068+
1069+ # Call pandas.DataFrame.explode for non-nested columns
1070+ all_but_requested_nested_columns = [col for col in w_ordinal_idx .columns if col not in nested_columns ]
1071+ base_exploded = w_ordinal_idx [all_but_requested_nested_columns ]
1072+ if len (all_but_requested_nested_columns ) > 0 and len (base_columns ) > 0 :
1073+ base_exploded = super (NestedFrame , base_exploded ).explode (base_columns , ignore_index = False )
1074+ base_exploded = NestedFrame (base_exploded )
1075+
1076+ # Check if it was actually exploded, or no list-columns were there.
1077+ # This could fail in the case when all lists had one element only, we ignore that edge-case here.
1078+ is_base_exploded = not w_ordinal_idx .index .equals (base_exploded .index )
1079+
1080+ # Unnest each requested nested column and store as a "flat" dataframe.
1081+ flat_frames : list [Self ] = [] # type: ignore[name-defined] # noqa: F821
1082+ for nested_col in nested_columns :
1083+ # Check if counts (lengths) in nested columns mismatch
1084+ if len (flat_frames ) > 0 and np .any (
1085+ w_ordinal_idx [nested_col ].nest .list_lengths
1086+ != w_ordinal_idx [nested_columns [0 ]].nest .list_lengths
1087+ ):
10471088 raise ValueError (
1048- f"Exploding nested column together with base columns is not supported.\n "
1049- f"Nested column: { nested_in_list [0 ]} "
1089+ f"One or few rows of { nested_col } have different element counts from { nested_columns [0 ]} "
10501090 )
1091+ flat = w_ordinal_idx [nested_col ].nest .to_flat ()
1092+ # Check if counts (lengths) of this nested column mismatch with one of the list columns.
1093+ if is_base_exploded and not base_exploded .index .equals (flat .index ):
1094+ raise ValueError (
1095+ f"One or few rows of { nested_col } have different element counts "
1096+ f"from one or few of these columns: { base_columns } "
1097+ )
1098+ flat_frames .append (flat )
10511099
1052- # normalize a single-element list to string
1053- if isinstance (column , list ) and len (column ) == 1 :
1054- column = column [0 ]
1055-
1056- # handle single nested column explode
1057- if isinstance (column , str ) and column in self .nested_columns :
1058- selected_nested_df = self [column ].nest .to_flat ()
1059- other_col = [col for col in self .columns if col != column ]
1060- other_col_df = self [other_col ]
1061- result = other_col_df .join (selected_nested_df )
1062-
1063- if ignore_index :
1064- result = result .reset_index (drop = True )
1065-
1066- return NestedFrame (result )
1100+ if is_base_exploded :
1101+ result = pd .concat ([base_exploded ] + flat_frames , axis = 1 )
1102+ else :
1103+ # Join works here, because we used the ordinal index before exploding
1104+ result = base_exploded .join (pd .concat (flat_frames , axis = 1 ))
10671105
1068- # otherwise just use pandas' explode
1069- return NestedFrame (super ().explode (column = column , ignore_index = ignore_index ))
1106+ if ignore_index :
1107+ return result .drop (index_col_name , axis = 1 ).reset_index (drop = True )
1108+ # Restore original index
1109+ result = result .set_index (index_col_name , drop = True )
1110+ if result .index .name == default_index_name :
1111+ result .index .name = None
1112+ return result
10701113
10711114 def eval (self , expr : str , * , inplace : bool = False , ** kwargs ) -> Any | None :
10721115 """Evaluate a string describing operations on NestedFrame columns.
0 commit comments