1+ {
2+  "metadata" : {
3+   "name" : " " 
4+   "signature" : " sha256:f31de46477b2663a07c56bdac9684ccc42094e16ec9c87b5caaea8cf66eaf43d" 
5+  },
6+  "nbformat" : 3 ,
7+  "nbformat_minor" : 0 ,
8+  "worksheets" : [
9+   {
10+    "cells" : [
11+     {
12+      "cell_type" : " markdown" 
13+      "metadata" : {},
14+      "source" : [
15+       " # Expand Cells Containing Lists Into Their Own Variables In Pandas\n " 
16+       " \n " 
17+       " This snippit was written by [Chris R. Albon](http://www.chrisralbon.com/) and is part of his collection of [well-documented Python snippits](https://github.com/chrisalbon/code_py). All code is written in Python 3 in iPython notebook and offered under the [Creative Commons Attribution-ShareAlike 4.0 International License](http://creativecommons.org/licenses/by-sa/4.0/)." 
18+      ]
19+     },
20+     {
21+      "cell_type" : " code" 
22+      "collapsed" : false ,
23+      "input" : [
24+       " # import pandas\n " 
25+       " import pandas as pd" 
26+      ],
27+      "language" : " python" 
28+      "metadata" : {},
29+      "outputs" : [],
30+      "prompt_number" : 20 
31+     },
32+     {
33+      "cell_type" : " code" 
34+      "collapsed" : false ,
35+      "input" : [
36+       " # create a dataset\n " 
37+       " raw_data = {'score': [1,2,3], \n " 
38+       "         'tags': [['apple','pear','guava'],['truck','car','plane'],['cat','dog','mouse']]}\n " 
39+       " df = pd.DataFrame(raw_data, columns = ['score', 'tags'])\n " 
40+       " \n " 
41+       " # view the dataset\n " 
42+       " df" 
43+      ],
44+      "language" : " python" 
45+      "metadata" : {},
46+      "outputs" : [
47+       {
48+        "html" : [
49+         " <div style=\" max-height:1000px;max-width:1500px;overflow:auto;\" >\n " 
50+         " <table border=\" 1\"  class=\" dataframe\" >\n " 
51+         "   <thead>\n " 
52+         "     <tr style=\" text-align: right;\" >\n " 
53+         "       <th></th>\n " 
54+         "       <th>score</th>\n " 
55+         "       <th>tags</th>\n " 
56+         "     </tr>\n " 
57+         "   </thead>\n " 
58+         "   <tbody>\n " 
59+         "     <tr>\n " 
60+         "       <th>0</th>\n " 
61+         "       <td> 1</td>\n " 
62+         "       <td> [apple, pear, guava]</td>\n " 
63+         "     </tr>\n " 
64+         "     <tr>\n " 
65+         "       <th>1</th>\n " 
66+         "       <td> 2</td>\n " 
67+         "       <td>  [truck, car, plane]</td>\n " 
68+         "     </tr>\n " 
69+         "     <tr>\n " 
70+         "       <th>2</th>\n " 
71+         "       <td> 3</td>\n " 
72+         "       <td>    [cat, dog, mouse]</td>\n " 
73+         "     </tr>\n " 
74+         "   </tbody>\n " 
75+         " </table>\n " 
76+         " </div>" 
77+        ],
78+        "metadata" : {},
79+        "output_type" : " pyout" 
80+        "prompt_number" : 24 ,
81+        "text" : [
82+         "    score                  tags\n " 
83+         " 0      1  [apple, pear, guava]\n " 
84+         " 1      2   [truck, car, plane]\n " 
85+         " 2      3     [cat, dog, mouse]" 
86+        ]
87+       }
88+      ],
89+      "prompt_number" : 24 
90+     },
91+     {
92+      "cell_type" : " code" 
93+      "collapsed" : false ,
94+      "input" : [
95+       " # expand df.tags into its own dataframe\n " 
96+       " tags = df['tags'].apply(pd.Series)\n " 
97+       " \n " 
98+       " # rename each variable is tags\n " 
99+       " tags = tags.rename(columns = lambda x : 'tag_' + str(x))\n " 
100+       " \n " 
101+       " # view the tags dataframe\n " 
102+       " tags" 
103+      ],
104+      "language" : " python" 
105+      "metadata" : {},
106+      "outputs" : [
107+       {
108+        "html" : [
109+         " <div style=\" max-height:1000px;max-width:1500px;overflow:auto;\" >\n " 
110+         " <table border=\" 1\"  class=\" dataframe\" >\n " 
111+         "   <thead>\n " 
112+         "     <tr style=\" text-align: right;\" >\n " 
113+         "       <th></th>\n " 
114+         "       <th>tag_0</th>\n " 
115+         "       <th>tag_1</th>\n " 
116+         "       <th>tag_2</th>\n " 
117+         "     </tr>\n " 
118+         "   </thead>\n " 
119+         "   <tbody>\n " 
120+         "     <tr>\n " 
121+         "       <th>0</th>\n " 
122+         "       <td> apple</td>\n " 
123+         "       <td> pear</td>\n " 
124+         "       <td> guava</td>\n " 
125+         "     </tr>\n " 
126+         "     <tr>\n " 
127+         "       <th>1</th>\n " 
128+         "       <td> truck</td>\n " 
129+         "       <td>  car</td>\n " 
130+         "       <td> plane</td>\n " 
131+         "     </tr>\n " 
132+         "     <tr>\n " 
133+         "       <th>2</th>\n " 
134+         "       <td>   cat</td>\n " 
135+         "       <td>  dog</td>\n " 
136+         "       <td> mouse</td>\n " 
137+         "     </tr>\n " 
138+         "   </tbody>\n " 
139+         " </table>\n " 
140+         " </div>" 
141+        ],
142+        "metadata" : {},
143+        "output_type" : " pyout" 
144+        "prompt_number" : 25 ,
145+        "text" : [
146+         "    tag_0 tag_1  tag_2\n " 
147+         " 0  apple  pear  guava\n " 
148+         " 1  truck   car  plane\n " 
149+         " 2    cat   dog  mouse" 
150+        ]
151+       }
152+      ],
153+      "prompt_number" : 25 
154+     },
155+     {
156+      "cell_type" : " code" 
157+      "collapsed" : false ,
158+      "input" : [
159+       " # join the tags dataframe back to the original dataframe\n " 
160+       " pd.concat([df[:], tags[:]], axis=1)" 
161+      ],
162+      "language" : " python" 
163+      "metadata" : {},
164+      "outputs" : [
165+       {
166+        "html" : [
167+         " <div style=\" max-height:1000px;max-width:1500px;overflow:auto;\" >\n " 
168+         " <table border=\" 1\"  class=\" dataframe\" >\n " 
169+         "   <thead>\n " 
170+         "     <tr style=\" text-align: right;\" >\n " 
171+         "       <th></th>\n " 
172+         "       <th>score</th>\n " 
173+         "       <th>tags</th>\n " 
174+         "       <th>tag_0</th>\n " 
175+         "       <th>tag_1</th>\n " 
176+         "       <th>tag_2</th>\n " 
177+         "     </tr>\n " 
178+         "   </thead>\n " 
179+         "   <tbody>\n " 
180+         "     <tr>\n " 
181+         "       <th>0</th>\n " 
182+         "       <td> 1</td>\n " 
183+         "       <td> [apple, pear, guava]</td>\n " 
184+         "       <td> apple</td>\n " 
185+         "       <td> pear</td>\n " 
186+         "       <td> guava</td>\n " 
187+         "     </tr>\n " 
188+         "     <tr>\n " 
189+         "       <th>1</th>\n " 
190+         "       <td> 2</td>\n " 
191+         "       <td>  [truck, car, plane]</td>\n " 
192+         "       <td> truck</td>\n " 
193+         "       <td>  car</td>\n " 
194+         "       <td> plane</td>\n " 
195+         "     </tr>\n " 
196+         "     <tr>\n " 
197+         "       <th>2</th>\n " 
198+         "       <td> 3</td>\n " 
199+         "       <td>    [cat, dog, mouse]</td>\n " 
200+         "       <td>   cat</td>\n " 
201+         "       <td>  dog</td>\n " 
202+         "       <td> mouse</td>\n " 
203+         "     </tr>\n " 
204+         "   </tbody>\n " 
205+         " </table>\n " 
206+         " </div>" 
207+        ],
208+        "metadata" : {},
209+        "output_type" : " pyout" 
210+        "prompt_number" : 26 ,
211+        "text" : [
212+         "    score                  tags  tag_0 tag_1  tag_2\n " 
213+         " 0      1  [apple, pear, guava]  apple  pear  guava\n " 
214+         " 1      2   [truck, car, plane]  truck   car  plane\n " 
215+         " 2      3     [cat, dog, mouse]    cat   dog  mouse" 
216+        ]
217+       }
218+      ],
219+      "prompt_number" : 26 
220+     }
221+    ],
222+    "metadata" : {}
223+   }
224+  ]
225+ }
0 commit comments