1+ {
2+ "metadata" : {
3+ "name" : " " ,
4+ "signature" : " sha256:f31de46477b2663a07c56bdac9684ccc42094e16ec9c87b5caaea8cf66eaf43d"
5+ },
6+ "nbformat" : 3 ,
7+ "nbformat_minor" : 0 ,
8+ "worksheets" : [
9+ {
10+ "cells" : [
11+ {
12+ "cell_type" : " markdown" ,
13+ "metadata" : {},
14+ "source" : [
15+ " # Expand Cells Containing Lists Into Their Own Variables In Pandas\n " ,
16+ " \n " ,
17+ " This snippit was written by [Chris R. Albon](http://www.chrisralbon.com/) and is part of his collection of [well-documented Python snippits](https://github.com/chrisalbon/code_py). All code is written in Python 3 in iPython notebook and offered under the [Creative Commons Attribution-ShareAlike 4.0 International License](http://creativecommons.org/licenses/by-sa/4.0/)."
18+ ]
19+ },
20+ {
21+ "cell_type" : " code" ,
22+ "collapsed" : false ,
23+ "input" : [
24+ " # import pandas\n " ,
25+ " import pandas as pd"
26+ ],
27+ "language" : " python" ,
28+ "metadata" : {},
29+ "outputs" : [],
30+ "prompt_number" : 20
31+ },
32+ {
33+ "cell_type" : " code" ,
34+ "collapsed" : false ,
35+ "input" : [
36+ " # create a dataset\n " ,
37+ " raw_data = {'score': [1,2,3], \n " ,
38+ " 'tags': [['apple','pear','guava'],['truck','car','plane'],['cat','dog','mouse']]}\n " ,
39+ " df = pd.DataFrame(raw_data, columns = ['score', 'tags'])\n " ,
40+ " \n " ,
41+ " # view the dataset\n " ,
42+ " df"
43+ ],
44+ "language" : " python" ,
45+ "metadata" : {},
46+ "outputs" : [
47+ {
48+ "html" : [
49+ " <div style=\" max-height:1000px;max-width:1500px;overflow:auto;\" >\n " ,
50+ " <table border=\" 1\" class=\" dataframe\" >\n " ,
51+ " <thead>\n " ,
52+ " <tr style=\" text-align: right;\" >\n " ,
53+ " <th></th>\n " ,
54+ " <th>score</th>\n " ,
55+ " <th>tags</th>\n " ,
56+ " </tr>\n " ,
57+ " </thead>\n " ,
58+ " <tbody>\n " ,
59+ " <tr>\n " ,
60+ " <th>0</th>\n " ,
61+ " <td> 1</td>\n " ,
62+ " <td> [apple, pear, guava]</td>\n " ,
63+ " </tr>\n " ,
64+ " <tr>\n " ,
65+ " <th>1</th>\n " ,
66+ " <td> 2</td>\n " ,
67+ " <td> [truck, car, plane]</td>\n " ,
68+ " </tr>\n " ,
69+ " <tr>\n " ,
70+ " <th>2</th>\n " ,
71+ " <td> 3</td>\n " ,
72+ " <td> [cat, dog, mouse]</td>\n " ,
73+ " </tr>\n " ,
74+ " </tbody>\n " ,
75+ " </table>\n " ,
76+ " </div>"
77+ ],
78+ "metadata" : {},
79+ "output_type" : " pyout" ,
80+ "prompt_number" : 24 ,
81+ "text" : [
82+ " score tags\n " ,
83+ " 0 1 [apple, pear, guava]\n " ,
84+ " 1 2 [truck, car, plane]\n " ,
85+ " 2 3 [cat, dog, mouse]"
86+ ]
87+ }
88+ ],
89+ "prompt_number" : 24
90+ },
91+ {
92+ "cell_type" : " code" ,
93+ "collapsed" : false ,
94+ "input" : [
95+ " # expand df.tags into its own dataframe\n " ,
96+ " tags = df['tags'].apply(pd.Series)\n " ,
97+ " \n " ,
98+ " # rename each variable is tags\n " ,
99+ " tags = tags.rename(columns = lambda x : 'tag_' + str(x))\n " ,
100+ " \n " ,
101+ " # view the tags dataframe\n " ,
102+ " tags"
103+ ],
104+ "language" : " python" ,
105+ "metadata" : {},
106+ "outputs" : [
107+ {
108+ "html" : [
109+ " <div style=\" max-height:1000px;max-width:1500px;overflow:auto;\" >\n " ,
110+ " <table border=\" 1\" class=\" dataframe\" >\n " ,
111+ " <thead>\n " ,
112+ " <tr style=\" text-align: right;\" >\n " ,
113+ " <th></th>\n " ,
114+ " <th>tag_0</th>\n " ,
115+ " <th>tag_1</th>\n " ,
116+ " <th>tag_2</th>\n " ,
117+ " </tr>\n " ,
118+ " </thead>\n " ,
119+ " <tbody>\n " ,
120+ " <tr>\n " ,
121+ " <th>0</th>\n " ,
122+ " <td> apple</td>\n " ,
123+ " <td> pear</td>\n " ,
124+ " <td> guava</td>\n " ,
125+ " </tr>\n " ,
126+ " <tr>\n " ,
127+ " <th>1</th>\n " ,
128+ " <td> truck</td>\n " ,
129+ " <td> car</td>\n " ,
130+ " <td> plane</td>\n " ,
131+ " </tr>\n " ,
132+ " <tr>\n " ,
133+ " <th>2</th>\n " ,
134+ " <td> cat</td>\n " ,
135+ " <td> dog</td>\n " ,
136+ " <td> mouse</td>\n " ,
137+ " </tr>\n " ,
138+ " </tbody>\n " ,
139+ " </table>\n " ,
140+ " </div>"
141+ ],
142+ "metadata" : {},
143+ "output_type" : " pyout" ,
144+ "prompt_number" : 25 ,
145+ "text" : [
146+ " tag_0 tag_1 tag_2\n " ,
147+ " 0 apple pear guava\n " ,
148+ " 1 truck car plane\n " ,
149+ " 2 cat dog mouse"
150+ ]
151+ }
152+ ],
153+ "prompt_number" : 25
154+ },
155+ {
156+ "cell_type" : " code" ,
157+ "collapsed" : false ,
158+ "input" : [
159+ " # join the tags dataframe back to the original dataframe\n " ,
160+ " pd.concat([df[:], tags[:]], axis=1)"
161+ ],
162+ "language" : " python" ,
163+ "metadata" : {},
164+ "outputs" : [
165+ {
166+ "html" : [
167+ " <div style=\" max-height:1000px;max-width:1500px;overflow:auto;\" >\n " ,
168+ " <table border=\" 1\" class=\" dataframe\" >\n " ,
169+ " <thead>\n " ,
170+ " <tr style=\" text-align: right;\" >\n " ,
171+ " <th></th>\n " ,
172+ " <th>score</th>\n " ,
173+ " <th>tags</th>\n " ,
174+ " <th>tag_0</th>\n " ,
175+ " <th>tag_1</th>\n " ,
176+ " <th>tag_2</th>\n " ,
177+ " </tr>\n " ,
178+ " </thead>\n " ,
179+ " <tbody>\n " ,
180+ " <tr>\n " ,
181+ " <th>0</th>\n " ,
182+ " <td> 1</td>\n " ,
183+ " <td> [apple, pear, guava]</td>\n " ,
184+ " <td> apple</td>\n " ,
185+ " <td> pear</td>\n " ,
186+ " <td> guava</td>\n " ,
187+ " </tr>\n " ,
188+ " <tr>\n " ,
189+ " <th>1</th>\n " ,
190+ " <td> 2</td>\n " ,
191+ " <td> [truck, car, plane]</td>\n " ,
192+ " <td> truck</td>\n " ,
193+ " <td> car</td>\n " ,
194+ " <td> plane</td>\n " ,
195+ " </tr>\n " ,
196+ " <tr>\n " ,
197+ " <th>2</th>\n " ,
198+ " <td> 3</td>\n " ,
199+ " <td> [cat, dog, mouse]</td>\n " ,
200+ " <td> cat</td>\n " ,
201+ " <td> dog</td>\n " ,
202+ " <td> mouse</td>\n " ,
203+ " </tr>\n " ,
204+ " </tbody>\n " ,
205+ " </table>\n " ,
206+ " </div>"
207+ ],
208+ "metadata" : {},
209+ "output_type" : " pyout" ,
210+ "prompt_number" : 26 ,
211+ "text" : [
212+ " score tags tag_0 tag_1 tag_2\n " ,
213+ " 0 1 [apple, pear, guava] apple pear guava\n " ,
214+ " 1 2 [truck, car, plane] truck car plane\n " ,
215+ " 2 3 [cat, dog, mouse] cat dog mouse"
216+ ]
217+ }
218+ ],
219+ "prompt_number" : 26
220+ }
221+ ],
222+ "metadata" : {}
223+ }
224+ ]
225+ }
0 commit comments