1
+ {
2
+ "metadata" : {
3
+ "name" : " " ,
4
+ "signature" : " sha256:f31de46477b2663a07c56bdac9684ccc42094e16ec9c87b5caaea8cf66eaf43d"
5
+ },
6
+ "nbformat" : 3 ,
7
+ "nbformat_minor" : 0 ,
8
+ "worksheets" : [
9
+ {
10
+ "cells" : [
11
+ {
12
+ "cell_type" : " markdown" ,
13
+ "metadata" : {},
14
+ "source" : [
15
+ " # Expand Cells Containing Lists Into Their Own Variables In Pandas\n " ,
16
+ " \n " ,
17
+ " This snippit was written by [Chris R. Albon](http://www.chrisralbon.com/) and is part of his collection of [well-documented Python snippits](https://github.com/chrisalbon/code_py). All code is written in Python 3 in iPython notebook and offered under the [Creative Commons Attribution-ShareAlike 4.0 International License](http://creativecommons.org/licenses/by-sa/4.0/)."
18
+ ]
19
+ },
20
+ {
21
+ "cell_type" : " code" ,
22
+ "collapsed" : false ,
23
+ "input" : [
24
+ " # import pandas\n " ,
25
+ " import pandas as pd"
26
+ ],
27
+ "language" : " python" ,
28
+ "metadata" : {},
29
+ "outputs" : [],
30
+ "prompt_number" : 20
31
+ },
32
+ {
33
+ "cell_type" : " code" ,
34
+ "collapsed" : false ,
35
+ "input" : [
36
+ " # create a dataset\n " ,
37
+ " raw_data = {'score': [1,2,3], \n " ,
38
+ " 'tags': [['apple','pear','guava'],['truck','car','plane'],['cat','dog','mouse']]}\n " ,
39
+ " df = pd.DataFrame(raw_data, columns = ['score', 'tags'])\n " ,
40
+ " \n " ,
41
+ " # view the dataset\n " ,
42
+ " df"
43
+ ],
44
+ "language" : " python" ,
45
+ "metadata" : {},
46
+ "outputs" : [
47
+ {
48
+ "html" : [
49
+ " <div style=\" max-height:1000px;max-width:1500px;overflow:auto;\" >\n " ,
50
+ " <table border=\" 1\" class=\" dataframe\" >\n " ,
51
+ " <thead>\n " ,
52
+ " <tr style=\" text-align: right;\" >\n " ,
53
+ " <th></th>\n " ,
54
+ " <th>score</th>\n " ,
55
+ " <th>tags</th>\n " ,
56
+ " </tr>\n " ,
57
+ " </thead>\n " ,
58
+ " <tbody>\n " ,
59
+ " <tr>\n " ,
60
+ " <th>0</th>\n " ,
61
+ " <td> 1</td>\n " ,
62
+ " <td> [apple, pear, guava]</td>\n " ,
63
+ " </tr>\n " ,
64
+ " <tr>\n " ,
65
+ " <th>1</th>\n " ,
66
+ " <td> 2</td>\n " ,
67
+ " <td> [truck, car, plane]</td>\n " ,
68
+ " </tr>\n " ,
69
+ " <tr>\n " ,
70
+ " <th>2</th>\n " ,
71
+ " <td> 3</td>\n " ,
72
+ " <td> [cat, dog, mouse]</td>\n " ,
73
+ " </tr>\n " ,
74
+ " </tbody>\n " ,
75
+ " </table>\n " ,
76
+ " </div>"
77
+ ],
78
+ "metadata" : {},
79
+ "output_type" : " pyout" ,
80
+ "prompt_number" : 24 ,
81
+ "text" : [
82
+ " score tags\n " ,
83
+ " 0 1 [apple, pear, guava]\n " ,
84
+ " 1 2 [truck, car, plane]\n " ,
85
+ " 2 3 [cat, dog, mouse]"
86
+ ]
87
+ }
88
+ ],
89
+ "prompt_number" : 24
90
+ },
91
+ {
92
+ "cell_type" : " code" ,
93
+ "collapsed" : false ,
94
+ "input" : [
95
+ " # expand df.tags into its own dataframe\n " ,
96
+ " tags = df['tags'].apply(pd.Series)\n " ,
97
+ " \n " ,
98
+ " # rename each variable is tags\n " ,
99
+ " tags = tags.rename(columns = lambda x : 'tag_' + str(x))\n " ,
100
+ " \n " ,
101
+ " # view the tags dataframe\n " ,
102
+ " tags"
103
+ ],
104
+ "language" : " python" ,
105
+ "metadata" : {},
106
+ "outputs" : [
107
+ {
108
+ "html" : [
109
+ " <div style=\" max-height:1000px;max-width:1500px;overflow:auto;\" >\n " ,
110
+ " <table border=\" 1\" class=\" dataframe\" >\n " ,
111
+ " <thead>\n " ,
112
+ " <tr style=\" text-align: right;\" >\n " ,
113
+ " <th></th>\n " ,
114
+ " <th>tag_0</th>\n " ,
115
+ " <th>tag_1</th>\n " ,
116
+ " <th>tag_2</th>\n " ,
117
+ " </tr>\n " ,
118
+ " </thead>\n " ,
119
+ " <tbody>\n " ,
120
+ " <tr>\n " ,
121
+ " <th>0</th>\n " ,
122
+ " <td> apple</td>\n " ,
123
+ " <td> pear</td>\n " ,
124
+ " <td> guava</td>\n " ,
125
+ " </tr>\n " ,
126
+ " <tr>\n " ,
127
+ " <th>1</th>\n " ,
128
+ " <td> truck</td>\n " ,
129
+ " <td> car</td>\n " ,
130
+ " <td> plane</td>\n " ,
131
+ " </tr>\n " ,
132
+ " <tr>\n " ,
133
+ " <th>2</th>\n " ,
134
+ " <td> cat</td>\n " ,
135
+ " <td> dog</td>\n " ,
136
+ " <td> mouse</td>\n " ,
137
+ " </tr>\n " ,
138
+ " </tbody>\n " ,
139
+ " </table>\n " ,
140
+ " </div>"
141
+ ],
142
+ "metadata" : {},
143
+ "output_type" : " pyout" ,
144
+ "prompt_number" : 25 ,
145
+ "text" : [
146
+ " tag_0 tag_1 tag_2\n " ,
147
+ " 0 apple pear guava\n " ,
148
+ " 1 truck car plane\n " ,
149
+ " 2 cat dog mouse"
150
+ ]
151
+ }
152
+ ],
153
+ "prompt_number" : 25
154
+ },
155
+ {
156
+ "cell_type" : " code" ,
157
+ "collapsed" : false ,
158
+ "input" : [
159
+ " # join the tags dataframe back to the original dataframe\n " ,
160
+ " pd.concat([df[:], tags[:]], axis=1)"
161
+ ],
162
+ "language" : " python" ,
163
+ "metadata" : {},
164
+ "outputs" : [
165
+ {
166
+ "html" : [
167
+ " <div style=\" max-height:1000px;max-width:1500px;overflow:auto;\" >\n " ,
168
+ " <table border=\" 1\" class=\" dataframe\" >\n " ,
169
+ " <thead>\n " ,
170
+ " <tr style=\" text-align: right;\" >\n " ,
171
+ " <th></th>\n " ,
172
+ " <th>score</th>\n " ,
173
+ " <th>tags</th>\n " ,
174
+ " <th>tag_0</th>\n " ,
175
+ " <th>tag_1</th>\n " ,
176
+ " <th>tag_2</th>\n " ,
177
+ " </tr>\n " ,
178
+ " </thead>\n " ,
179
+ " <tbody>\n " ,
180
+ " <tr>\n " ,
181
+ " <th>0</th>\n " ,
182
+ " <td> 1</td>\n " ,
183
+ " <td> [apple, pear, guava]</td>\n " ,
184
+ " <td> apple</td>\n " ,
185
+ " <td> pear</td>\n " ,
186
+ " <td> guava</td>\n " ,
187
+ " </tr>\n " ,
188
+ " <tr>\n " ,
189
+ " <th>1</th>\n " ,
190
+ " <td> 2</td>\n " ,
191
+ " <td> [truck, car, plane]</td>\n " ,
192
+ " <td> truck</td>\n " ,
193
+ " <td> car</td>\n " ,
194
+ " <td> plane</td>\n " ,
195
+ " </tr>\n " ,
196
+ " <tr>\n " ,
197
+ " <th>2</th>\n " ,
198
+ " <td> 3</td>\n " ,
199
+ " <td> [cat, dog, mouse]</td>\n " ,
200
+ " <td> cat</td>\n " ,
201
+ " <td> dog</td>\n " ,
202
+ " <td> mouse</td>\n " ,
203
+ " </tr>\n " ,
204
+ " </tbody>\n " ,
205
+ " </table>\n " ,
206
+ " </div>"
207
+ ],
208
+ "metadata" : {},
209
+ "output_type" : " pyout" ,
210
+ "prompt_number" : 26 ,
211
+ "text" : [
212
+ " score tags tag_0 tag_1 tag_2\n " ,
213
+ " 0 1 [apple, pear, guava] apple pear guava\n " ,
214
+ " 1 2 [truck, car, plane] truck car plane\n " ,
215
+ " 2 3 [cat, dog, mouse] cat dog mouse"
216
+ ]
217
+ }
218
+ ],
219
+ "prompt_number" : 26
220
+ }
221
+ ],
222
+ "metadata" : {}
223
+ }
224
+ ]
225
+ }
0 commit comments