Skip to content

Commit 288358e

Browse files
committed
added a tags snippet
1 parent c0c994b commit 288358e

3 files changed

+862
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
{
2+
"metadata": {
3+
"name": "",
4+
"signature": "sha256:f31de46477b2663a07c56bdac9684ccc42094e16ec9c87b5caaea8cf66eaf43d"
5+
},
6+
"nbformat": 3,
7+
"nbformat_minor": 0,
8+
"worksheets": [
9+
{
10+
"cells": [
11+
{
12+
"cell_type": "markdown",
13+
"metadata": {},
14+
"source": [
15+
"# Expand Cells Containing Lists Into Their Own Variables In Pandas\n",
16+
"\n",
17+
"This snippit was written by [Chris R. Albon](http://www.chrisralbon.com/) and is part of his collection of [well-documented Python snippits](https://github.com/chrisalbon/code_py). All code is written in Python 3 in iPython notebook and offered under the [Creative Commons Attribution-ShareAlike 4.0 International License](http://creativecommons.org/licenses/by-sa/4.0/)."
18+
]
19+
},
20+
{
21+
"cell_type": "code",
22+
"collapsed": false,
23+
"input": [
24+
"# import pandas\n",
25+
"import pandas as pd"
26+
],
27+
"language": "python",
28+
"metadata": {},
29+
"outputs": [],
30+
"prompt_number": 20
31+
},
32+
{
33+
"cell_type": "code",
34+
"collapsed": false,
35+
"input": [
36+
"# create a dataset\n",
37+
"raw_data = {'score': [1,2,3], \n",
38+
" 'tags': [['apple','pear','guava'],['truck','car','plane'],['cat','dog','mouse']]}\n",
39+
"df = pd.DataFrame(raw_data, columns = ['score', 'tags'])\n",
40+
"\n",
41+
"# view the dataset\n",
42+
"df"
43+
],
44+
"language": "python",
45+
"metadata": {},
46+
"outputs": [
47+
{
48+
"html": [
49+
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
50+
"<table border=\"1\" class=\"dataframe\">\n",
51+
" <thead>\n",
52+
" <tr style=\"text-align: right;\">\n",
53+
" <th></th>\n",
54+
" <th>score</th>\n",
55+
" <th>tags</th>\n",
56+
" </tr>\n",
57+
" </thead>\n",
58+
" <tbody>\n",
59+
" <tr>\n",
60+
" <th>0</th>\n",
61+
" <td> 1</td>\n",
62+
" <td> [apple, pear, guava]</td>\n",
63+
" </tr>\n",
64+
" <tr>\n",
65+
" <th>1</th>\n",
66+
" <td> 2</td>\n",
67+
" <td> [truck, car, plane]</td>\n",
68+
" </tr>\n",
69+
" <tr>\n",
70+
" <th>2</th>\n",
71+
" <td> 3</td>\n",
72+
" <td> [cat, dog, mouse]</td>\n",
73+
" </tr>\n",
74+
" </tbody>\n",
75+
"</table>\n",
76+
"</div>"
77+
],
78+
"metadata": {},
79+
"output_type": "pyout",
80+
"prompt_number": 24,
81+
"text": [
82+
" score tags\n",
83+
"0 1 [apple, pear, guava]\n",
84+
"1 2 [truck, car, plane]\n",
85+
"2 3 [cat, dog, mouse]"
86+
]
87+
}
88+
],
89+
"prompt_number": 24
90+
},
91+
{
92+
"cell_type": "code",
93+
"collapsed": false,
94+
"input": [
95+
"# expand df.tags into its own dataframe\n",
96+
"tags = df['tags'].apply(pd.Series)\n",
97+
"\n",
98+
"# rename each variable is tags\n",
99+
"tags = tags.rename(columns = lambda x : 'tag_' + str(x))\n",
100+
"\n",
101+
"# view the tags dataframe\n",
102+
"tags"
103+
],
104+
"language": "python",
105+
"metadata": {},
106+
"outputs": [
107+
{
108+
"html": [
109+
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
110+
"<table border=\"1\" class=\"dataframe\">\n",
111+
" <thead>\n",
112+
" <tr style=\"text-align: right;\">\n",
113+
" <th></th>\n",
114+
" <th>tag_0</th>\n",
115+
" <th>tag_1</th>\n",
116+
" <th>tag_2</th>\n",
117+
" </tr>\n",
118+
" </thead>\n",
119+
" <tbody>\n",
120+
" <tr>\n",
121+
" <th>0</th>\n",
122+
" <td> apple</td>\n",
123+
" <td> pear</td>\n",
124+
" <td> guava</td>\n",
125+
" </tr>\n",
126+
" <tr>\n",
127+
" <th>1</th>\n",
128+
" <td> truck</td>\n",
129+
" <td> car</td>\n",
130+
" <td> plane</td>\n",
131+
" </tr>\n",
132+
" <tr>\n",
133+
" <th>2</th>\n",
134+
" <td> cat</td>\n",
135+
" <td> dog</td>\n",
136+
" <td> mouse</td>\n",
137+
" </tr>\n",
138+
" </tbody>\n",
139+
"</table>\n",
140+
"</div>"
141+
],
142+
"metadata": {},
143+
"output_type": "pyout",
144+
"prompt_number": 25,
145+
"text": [
146+
" tag_0 tag_1 tag_2\n",
147+
"0 apple pear guava\n",
148+
"1 truck car plane\n",
149+
"2 cat dog mouse"
150+
]
151+
}
152+
],
153+
"prompt_number": 25
154+
},
155+
{
156+
"cell_type": "code",
157+
"collapsed": false,
158+
"input": [
159+
"# join the tags dataframe back to the original dataframe\n",
160+
"pd.concat([df[:], tags[:]], axis=1)"
161+
],
162+
"language": "python",
163+
"metadata": {},
164+
"outputs": [
165+
{
166+
"html": [
167+
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
168+
"<table border=\"1\" class=\"dataframe\">\n",
169+
" <thead>\n",
170+
" <tr style=\"text-align: right;\">\n",
171+
" <th></th>\n",
172+
" <th>score</th>\n",
173+
" <th>tags</th>\n",
174+
" <th>tag_0</th>\n",
175+
" <th>tag_1</th>\n",
176+
" <th>tag_2</th>\n",
177+
" </tr>\n",
178+
" </thead>\n",
179+
" <tbody>\n",
180+
" <tr>\n",
181+
" <th>0</th>\n",
182+
" <td> 1</td>\n",
183+
" <td> [apple, pear, guava]</td>\n",
184+
" <td> apple</td>\n",
185+
" <td> pear</td>\n",
186+
" <td> guava</td>\n",
187+
" </tr>\n",
188+
" <tr>\n",
189+
" <th>1</th>\n",
190+
" <td> 2</td>\n",
191+
" <td> [truck, car, plane]</td>\n",
192+
" <td> truck</td>\n",
193+
" <td> car</td>\n",
194+
" <td> plane</td>\n",
195+
" </tr>\n",
196+
" <tr>\n",
197+
" <th>2</th>\n",
198+
" <td> 3</td>\n",
199+
" <td> [cat, dog, mouse]</td>\n",
200+
" <td> cat</td>\n",
201+
" <td> dog</td>\n",
202+
" <td> mouse</td>\n",
203+
" </tr>\n",
204+
" </tbody>\n",
205+
"</table>\n",
206+
"</div>"
207+
],
208+
"metadata": {},
209+
"output_type": "pyout",
210+
"prompt_number": 26,
211+
"text": [
212+
" score tags tag_0 tag_1 tag_2\n",
213+
"0 1 [apple, pear, guava] apple pear guava\n",
214+
"1 2 [truck, car, plane] truck car plane\n",
215+
"2 3 [cat, dog, mouse] cat dog mouse"
216+
]
217+
}
218+
],
219+
"prompt_number": 26
220+
}
221+
],
222+
"metadata": {}
223+
}
224+
]
225+
}

0 commit comments

Comments
 (0)