-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcount_categorical.py
45 lines (33 loc) · 1.51 KB
/
count_categorical.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
def count_categorical(df, group_var, df_name):
"""Computes counts and normalized counts for each observation
of `group_var` of each unique category in every categorical variable
Parameters
--------
df : dataframe
The dataframe to calculate the value counts for.
group_var : string
The variable by which to group the dataframe. For each unique
value of this variable, the final dataframe will have one row
df_name : string
Variable added to the front of column names to keep track of columns
Return
--------
categorical : dataframe
A dataframe with counts and normalized counts of each unique category in every categorical variable
with one row for every unique value of the `group_var`.
"""
# Select the categorical columns
categorical = pd.get_dummies(df.select_dtypes('object'))
# Make sure to put the identifying id on the column
categorical[group_var] = df[group_var]
# Groupby the group var and calculate the sum and mean
categorical = categorical.groupby(group_var).agg(['sum', 'mean'])
column_names = []
# Iterate through the columns in level 0
for var in categorical.columns.levels[0]:
# Iterate through the stats in level 1
for stat in ['count', 'count_norm']:
# Make a new column name
column_names.append('%s_%s_%s' % (df_name, var, stat))
categorical.columns = column_names
return categorical