1
+ {
2
+ "metadata" : {
3
+ "name" : " " ,
4
+ "signature" : " sha256:d3d126284dab16155e99aca0e6e096469bfc319d5d5a33d5c0566e685cf6c24a"
5
+ },
6
+ "nbformat" : 3 ,
7
+ "nbformat_minor" : 0 ,
8
+ "worksheets" : [
9
+ {
10
+ "cells" : [
11
+ {
12
+ "cell_type" : " markdown" ,
13
+ "metadata" : {},
14
+ "source" : [
15
+ " # Convert A String Categorical Variable To A Numeric Variable Naively\n " ,
16
+ " \n " ,
17
+ " This snippit was written by [Chris R. Albon](http://www.chrisralbon.com/) and is part of his collection of [well-documented Python snippits](https://github.com/chrisalbon/code_py). All code is written in Python 3 in iPython notebook and offered under the [Creative Commons Attribution-ShareAlike 4.0 International License](http://creativecommons.org/licenses/by-sa/4.0/).\n " ,
18
+ " \n " ,
19
+ " Originally from: Data Origami."
20
+ ]
21
+ },
22
+ {
23
+ "cell_type" : " markdown" ,
24
+ "metadata" : {},
25
+ "source" : [
26
+ " ### import modules"
27
+ ]
28
+ },
29
+ {
30
+ "cell_type" : " code" ,
31
+ "collapsed" : false ,
32
+ "input" : [
33
+ " import pandas as pd"
34
+ ],
35
+ "language" : " python" ,
36
+ "metadata" : {},
37
+ "outputs" : [],
38
+ "prompt_number" : 2
39
+ },
40
+ {
41
+ "cell_type" : " markdown" ,
42
+ "metadata" : {},
43
+ "source" : [
44
+ " ### Create dataframe"
45
+ ]
46
+ },
47
+ {
48
+ "cell_type" : " code" ,
49
+ "collapsed" : false ,
50
+ "input" : [
51
+ " raw_data = {'patient': [1, 1, 1, 2, 2], \n " ,
52
+ " 'obs': [1, 2, 3, 1, 2], \n " ,
53
+ " 'treatment': [0, 1, 0, 1, 0],\n " ,
54
+ " 'score': ['strong', 'weak', 'normal', 'weak', 'strong']} \n " ,
55
+ " df = pd.DataFrame(raw_data, columns = ['patient', 'obs', 'treatment', 'score'])\n " ,
56
+ " df"
57
+ ],
58
+ "language" : " python" ,
59
+ "metadata" : {},
60
+ "outputs" : [
61
+ {
62
+ "html" : [
63
+ " <div style=\" max-height:1000px;max-width:1500px;overflow:auto;\" >\n " ,
64
+ " <table border=\" 1\" class=\" dataframe\" >\n " ,
65
+ " <thead>\n " ,
66
+ " <tr style=\" text-align: right;\" >\n " ,
67
+ " <th></th>\n " ,
68
+ " <th>patient</th>\n " ,
69
+ " <th>obs</th>\n " ,
70
+ " <th>treatment</th>\n " ,
71
+ " <th>score</th>\n " ,
72
+ " </tr>\n " ,
73
+ " </thead>\n " ,
74
+ " <tbody>\n " ,
75
+ " <tr>\n " ,
76
+ " <th>0</th>\n " ,
77
+ " <td> 1</td>\n " ,
78
+ " <td> 1</td>\n " ,
79
+ " <td> 0</td>\n " ,
80
+ " <td> strong</td>\n " ,
81
+ " </tr>\n " ,
82
+ " <tr>\n " ,
83
+ " <th>1</th>\n " ,
84
+ " <td> 1</td>\n " ,
85
+ " <td> 2</td>\n " ,
86
+ " <td> 1</td>\n " ,
87
+ " <td> weak</td>\n " ,
88
+ " </tr>\n " ,
89
+ " <tr>\n " ,
90
+ " <th>2</th>\n " ,
91
+ " <td> 1</td>\n " ,
92
+ " <td> 3</td>\n " ,
93
+ " <td> 0</td>\n " ,
94
+ " <td> normal</td>\n " ,
95
+ " </tr>\n " ,
96
+ " <tr>\n " ,
97
+ " <th>3</th>\n " ,
98
+ " <td> 2</td>\n " ,
99
+ " <td> 1</td>\n " ,
100
+ " <td> 1</td>\n " ,
101
+ " <td> weak</td>\n " ,
102
+ " </tr>\n " ,
103
+ " <tr>\n " ,
104
+ " <th>4</th>\n " ,
105
+ " <td> 2</td>\n " ,
106
+ " <td> 2</td>\n " ,
107
+ " <td> 0</td>\n " ,
108
+ " <td> strong</td>\n " ,
109
+ " </tr>\n " ,
110
+ " </tbody>\n " ,
111
+ " </table>\n " ,
112
+ " </div>"
113
+ ],
114
+ "metadata" : {},
115
+ "output_type" : " pyout" ,
116
+ "prompt_number" : 3 ,
117
+ "text" : [
118
+ " patient obs treatment score\n " ,
119
+ " 0 1 1 0 strong\n " ,
120
+ " 1 1 2 1 weak\n " ,
121
+ " 2 1 3 0 normal\n " ,
122
+ " 3 2 1 1 weak\n " ,
123
+ " 4 2 2 0 strong"
124
+ ]
125
+ }
126
+ ],
127
+ "prompt_number" : 3
128
+ },
129
+ {
130
+ "cell_type" : " markdown" ,
131
+ "metadata" : {},
132
+ "source" : [
133
+ " ### Create a function that converts all values of df['score'] into numbers"
134
+ ]
135
+ },
136
+ {
137
+ "cell_type" : " code" ,
138
+ "collapsed" : false ,
139
+ "input" : [
140
+ " def score_to_numeric(x):\n " ,
141
+ " if x=='strong':\n " ,
142
+ " return 3\n " ,
143
+ " if x=='normal':\n " ,
144
+ " return 2\n " ,
145
+ " if x=='weak':\n " ,
146
+ " return 1"
147
+ ],
148
+ "language" : " python" ,
149
+ "metadata" : {},
150
+ "outputs" : [],
151
+ "prompt_number" : 5
152
+ },
153
+ {
154
+ "cell_type" : " markdown" ,
155
+ "metadata" : {},
156
+ "source" : [
157
+ " ### Apply the function to the score variable"
158
+ ]
159
+ },
160
+ {
161
+ "cell_type" : " code" ,
162
+ "collapsed" : false ,
163
+ "input" : [
164
+ " df['score_num'] = df['score'].apply(score_to_numeric)\n " ,
165
+ " df"
166
+ ],
167
+ "language" : " python" ,
168
+ "metadata" : {},
169
+ "outputs" : [
170
+ {
171
+ "html" : [
172
+ " <div style=\" max-height:1000px;max-width:1500px;overflow:auto;\" >\n " ,
173
+ " <table border=\" 1\" class=\" dataframe\" >\n " ,
174
+ " <thead>\n " ,
175
+ " <tr style=\" text-align: right;\" >\n " ,
176
+ " <th></th>\n " ,
177
+ " <th>patient</th>\n " ,
178
+ " <th>obs</th>\n " ,
179
+ " <th>treatment</th>\n " ,
180
+ " <th>score</th>\n " ,
181
+ " <th>score_num</th>\n " ,
182
+ " </tr>\n " ,
183
+ " </thead>\n " ,
184
+ " <tbody>\n " ,
185
+ " <tr>\n " ,
186
+ " <th>0</th>\n " ,
187
+ " <td> 1</td>\n " ,
188
+ " <td> 1</td>\n " ,
189
+ " <td> 0</td>\n " ,
190
+ " <td> strong</td>\n " ,
191
+ " <td> 3</td>\n " ,
192
+ " </tr>\n " ,
193
+ " <tr>\n " ,
194
+ " <th>1</th>\n " ,
195
+ " <td> 1</td>\n " ,
196
+ " <td> 2</td>\n " ,
197
+ " <td> 1</td>\n " ,
198
+ " <td> weak</td>\n " ,
199
+ " <td> 1</td>\n " ,
200
+ " </tr>\n " ,
201
+ " <tr>\n " ,
202
+ " <th>2</th>\n " ,
203
+ " <td> 1</td>\n " ,
204
+ " <td> 3</td>\n " ,
205
+ " <td> 0</td>\n " ,
206
+ " <td> normal</td>\n " ,
207
+ " <td> 2</td>\n " ,
208
+ " </tr>\n " ,
209
+ " <tr>\n " ,
210
+ " <th>3</th>\n " ,
211
+ " <td> 2</td>\n " ,
212
+ " <td> 1</td>\n " ,
213
+ " <td> 1</td>\n " ,
214
+ " <td> weak</td>\n " ,
215
+ " <td> 1</td>\n " ,
216
+ " </tr>\n " ,
217
+ " <tr>\n " ,
218
+ " <th>4</th>\n " ,
219
+ " <td> 2</td>\n " ,
220
+ " <td> 2</td>\n " ,
221
+ " <td> 0</td>\n " ,
222
+ " <td> strong</td>\n " ,
223
+ " <td> 3</td>\n " ,
224
+ " </tr>\n " ,
225
+ " </tbody>\n " ,
226
+ " </table>\n " ,
227
+ " </div>"
228
+ ],
229
+ "metadata" : {},
230
+ "output_type" : " pyout" ,
231
+ "prompt_number" : 7 ,
232
+ "text" : [
233
+ " patient obs treatment score score_num\n " ,
234
+ " 0 1 1 0 strong 3\n " ,
235
+ " 1 1 2 1 weak 1\n " ,
236
+ " 2 1 3 0 normal 2\n " ,
237
+ " 3 2 1 1 weak 1\n " ,
238
+ " 4 2 2 0 strong 3"
239
+ ]
240
+ }
241
+ ],
242
+ "prompt_number" : 7
243
+ },
244
+ {
245
+ "cell_type" : " code" ,
246
+ "collapsed" : false ,
247
+ "input" : [],
248
+ "language" : " python" ,
249
+ "metadata" : {},
250
+ "outputs" : []
251
+ }
252
+ ],
253
+ "metadata" : {}
254
+ }
255
+ ]
256
+ }
0 commit comments