Skip to content

Commit 14d9c9f

Browse files
committed
More snippits
1 parent 2354b0f commit 14d9c9f

11 files changed

+2331
-6
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,256 @@
1+
{
2+
"metadata": {
3+
"name": "",
4+
"signature": "sha256:d3d126284dab16155e99aca0e6e096469bfc319d5d5a33d5c0566e685cf6c24a"
5+
},
6+
"nbformat": 3,
7+
"nbformat_minor": 0,
8+
"worksheets": [
9+
{
10+
"cells": [
11+
{
12+
"cell_type": "markdown",
13+
"metadata": {},
14+
"source": [
15+
"# Convert A String Categorical Variable To A Numeric Variable Naively\n",
16+
"\n",
17+
"This snippit was written by [Chris R. Albon](http://www.chrisralbon.com/) and is part of his collection of [well-documented Python snippits](https://github.com/chrisalbon/code_py). All code is written in Python 3 in iPython notebook and offered under the [Creative Commons Attribution-ShareAlike 4.0 International License](http://creativecommons.org/licenses/by-sa/4.0/).\n",
18+
"\n",
19+
"Originally from: Data Origami."
20+
]
21+
},
22+
{
23+
"cell_type": "markdown",
24+
"metadata": {},
25+
"source": [
26+
"### import modules"
27+
]
28+
},
29+
{
30+
"cell_type": "code",
31+
"collapsed": false,
32+
"input": [
33+
"import pandas as pd"
34+
],
35+
"language": "python",
36+
"metadata": {},
37+
"outputs": [],
38+
"prompt_number": 2
39+
},
40+
{
41+
"cell_type": "markdown",
42+
"metadata": {},
43+
"source": [
44+
"### Create dataframe"
45+
]
46+
},
47+
{
48+
"cell_type": "code",
49+
"collapsed": false,
50+
"input": [
51+
"raw_data = {'patient': [1, 1, 1, 2, 2], \n",
52+
" 'obs': [1, 2, 3, 1, 2], \n",
53+
" 'treatment': [0, 1, 0, 1, 0],\n",
54+
" 'score': ['strong', 'weak', 'normal', 'weak', 'strong']} \n",
55+
"df = pd.DataFrame(raw_data, columns = ['patient', 'obs', 'treatment', 'score'])\n",
56+
"df"
57+
],
58+
"language": "python",
59+
"metadata": {},
60+
"outputs": [
61+
{
62+
"html": [
63+
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
64+
"<table border=\"1\" class=\"dataframe\">\n",
65+
" <thead>\n",
66+
" <tr style=\"text-align: right;\">\n",
67+
" <th></th>\n",
68+
" <th>patient</th>\n",
69+
" <th>obs</th>\n",
70+
" <th>treatment</th>\n",
71+
" <th>score</th>\n",
72+
" </tr>\n",
73+
" </thead>\n",
74+
" <tbody>\n",
75+
" <tr>\n",
76+
" <th>0</th>\n",
77+
" <td> 1</td>\n",
78+
" <td> 1</td>\n",
79+
" <td> 0</td>\n",
80+
" <td> strong</td>\n",
81+
" </tr>\n",
82+
" <tr>\n",
83+
" <th>1</th>\n",
84+
" <td> 1</td>\n",
85+
" <td> 2</td>\n",
86+
" <td> 1</td>\n",
87+
" <td> weak</td>\n",
88+
" </tr>\n",
89+
" <tr>\n",
90+
" <th>2</th>\n",
91+
" <td> 1</td>\n",
92+
" <td> 3</td>\n",
93+
" <td> 0</td>\n",
94+
" <td> normal</td>\n",
95+
" </tr>\n",
96+
" <tr>\n",
97+
" <th>3</th>\n",
98+
" <td> 2</td>\n",
99+
" <td> 1</td>\n",
100+
" <td> 1</td>\n",
101+
" <td> weak</td>\n",
102+
" </tr>\n",
103+
" <tr>\n",
104+
" <th>4</th>\n",
105+
" <td> 2</td>\n",
106+
" <td> 2</td>\n",
107+
" <td> 0</td>\n",
108+
" <td> strong</td>\n",
109+
" </tr>\n",
110+
" </tbody>\n",
111+
"</table>\n",
112+
"</div>"
113+
],
114+
"metadata": {},
115+
"output_type": "pyout",
116+
"prompt_number": 3,
117+
"text": [
118+
" patient obs treatment score\n",
119+
"0 1 1 0 strong\n",
120+
"1 1 2 1 weak\n",
121+
"2 1 3 0 normal\n",
122+
"3 2 1 1 weak\n",
123+
"4 2 2 0 strong"
124+
]
125+
}
126+
],
127+
"prompt_number": 3
128+
},
129+
{
130+
"cell_type": "markdown",
131+
"metadata": {},
132+
"source": [
133+
"### Create a function that converts all values of df['score'] into numbers"
134+
]
135+
},
136+
{
137+
"cell_type": "code",
138+
"collapsed": false,
139+
"input": [
140+
"def score_to_numeric(x):\n",
141+
" if x=='strong':\n",
142+
" return 3\n",
143+
" if x=='normal':\n",
144+
" return 2\n",
145+
" if x=='weak':\n",
146+
" return 1"
147+
],
148+
"language": "python",
149+
"metadata": {},
150+
"outputs": [],
151+
"prompt_number": 5
152+
},
153+
{
154+
"cell_type": "markdown",
155+
"metadata": {},
156+
"source": [
157+
"### Apply the function to the score variable"
158+
]
159+
},
160+
{
161+
"cell_type": "code",
162+
"collapsed": false,
163+
"input": [
164+
"df['score_num'] = df['score'].apply(score_to_numeric)\n",
165+
"df"
166+
],
167+
"language": "python",
168+
"metadata": {},
169+
"outputs": [
170+
{
171+
"html": [
172+
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
173+
"<table border=\"1\" class=\"dataframe\">\n",
174+
" <thead>\n",
175+
" <tr style=\"text-align: right;\">\n",
176+
" <th></th>\n",
177+
" <th>patient</th>\n",
178+
" <th>obs</th>\n",
179+
" <th>treatment</th>\n",
180+
" <th>score</th>\n",
181+
" <th>score_num</th>\n",
182+
" </tr>\n",
183+
" </thead>\n",
184+
" <tbody>\n",
185+
" <tr>\n",
186+
" <th>0</th>\n",
187+
" <td> 1</td>\n",
188+
" <td> 1</td>\n",
189+
" <td> 0</td>\n",
190+
" <td> strong</td>\n",
191+
" <td> 3</td>\n",
192+
" </tr>\n",
193+
" <tr>\n",
194+
" <th>1</th>\n",
195+
" <td> 1</td>\n",
196+
" <td> 2</td>\n",
197+
" <td> 1</td>\n",
198+
" <td> weak</td>\n",
199+
" <td> 1</td>\n",
200+
" </tr>\n",
201+
" <tr>\n",
202+
" <th>2</th>\n",
203+
" <td> 1</td>\n",
204+
" <td> 3</td>\n",
205+
" <td> 0</td>\n",
206+
" <td> normal</td>\n",
207+
" <td> 2</td>\n",
208+
" </tr>\n",
209+
" <tr>\n",
210+
" <th>3</th>\n",
211+
" <td> 2</td>\n",
212+
" <td> 1</td>\n",
213+
" <td> 1</td>\n",
214+
" <td> weak</td>\n",
215+
" <td> 1</td>\n",
216+
" </tr>\n",
217+
" <tr>\n",
218+
" <th>4</th>\n",
219+
" <td> 2</td>\n",
220+
" <td> 2</td>\n",
221+
" <td> 0</td>\n",
222+
" <td> strong</td>\n",
223+
" <td> 3</td>\n",
224+
" </tr>\n",
225+
" </tbody>\n",
226+
"</table>\n",
227+
"</div>"
228+
],
229+
"metadata": {},
230+
"output_type": "pyout",
231+
"prompt_number": 7,
232+
"text": [
233+
" patient obs treatment score score_num\n",
234+
"0 1 1 0 strong 3\n",
235+
"1 1 2 1 weak 1\n",
236+
"2 1 3 0 normal 2\n",
237+
"3 2 1 1 weak 1\n",
238+
"4 2 2 0 strong 3"
239+
]
240+
}
241+
],
242+
"prompt_number": 7
243+
},
244+
{
245+
"cell_type": "code",
246+
"collapsed": false,
247+
"input": [],
248+
"language": "python",
249+
"metadata": {},
250+
"outputs": []
251+
}
252+
],
253+
"metadata": {}
254+
}
255+
]
256+
}

0 commit comments

Comments
 (0)