Skip to content

Commit aad3324

Browse files
committed
name update
1 parent 153f628 commit aad3324

10 files changed

+28537
-63833
lines changed

.ipynb_checkpoints/example_data_wrangling_baby_names-checkpoint.ipynb

+193-27
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"metadata": {
33
"name": "",
4-
"signature": "sha256:d039f7a050a26f08d91e279de296855d750849e86e2ce423af76319d25eabcfc"
4+
"signature": "sha256:a894bc887c29aeaca41270a3822bbb75952f1f89879d187e0d78aeead85fcf05"
55
},
66
"nbformat": 3,
77
"nbformat_minor": 0,
@@ -45,7 +45,7 @@
4545
"language": "python",
4646
"metadata": {},
4747
"outputs": [],
48-
"prompt_number": 1
48+
"prompt_number": 41
4949
},
5050
{
5151
"cell_type": "markdown",
@@ -73,13 +73,13 @@
7373
{
7474
"metadata": {},
7575
"output_type": "pyout",
76-
"prompt_number": 2,
76+
"prompt_number": 42,
7777
"text": [
7878
"['1880', '1881', '1882', '1883', '1884']"
7979
]
8080
}
8181
],
82-
"prompt_number": 2
82+
"prompt_number": 42
8383
},
8484
{
8585
"cell_type": "code",
@@ -101,7 +101,7 @@
101101
"language": "python",
102102
"metadata": {},
103103
"outputs": [],
104-
"prompt_number": 3
104+
"prompt_number": 43
105105
},
106106
{
107107
"cell_type": "code",
@@ -116,13 +116,13 @@
116116
{
117117
"metadata": {},
118118
"output_type": "pyout",
119-
"prompt_number": 4,
119+
"prompt_number": 44,
120120
"text": [
121121
"['df_1880', 'df_1881', 'df_1882', 'df_1883', 'df_1884']"
122122
]
123123
}
124124
],
125-
"prompt_number": 4
125+
"prompt_number": 44
126126
},
127127
{
128128
"cell_type": "code",
@@ -144,7 +144,7 @@
144144
"language": "python",
145145
"metadata": {},
146146
"outputs": [],
147-
"prompt_number": 5
147+
"prompt_number": 45
148148
},
149149
{
150150
"cell_type": "code",
@@ -159,13 +159,13 @@
159159
{
160160
"metadata": {},
161161
"output_type": "pyout",
162-
"prompt_number": 6,
162+
"prompt_number": 46,
163163
"text": [
164164
"['yob1880.txt', 'yob1881.txt', 'yob1882.txt', 'yob1883.txt', 'yob1884.txt']"
165165
]
166166
}
167167
],
168-
"prompt_number": 6
168+
"prompt_number": 46
169169
},
170170
{
171171
"cell_type": "code",
@@ -177,7 +177,7 @@
177177
"language": "python",
178178
"metadata": {},
179179
"outputs": [],
180-
"prompt_number": 7
180+
"prompt_number": 47
181181
},
182182
{
183183
"cell_type": "code",
@@ -189,7 +189,7 @@
189189
"language": "python",
190190
"metadata": {},
191191
"outputs": [],
192-
"prompt_number": 8
192+
"prompt_number": 48
193193
},
194194
{
195195
"cell_type": "code",
@@ -212,7 +212,7 @@
212212
"language": "python",
213213
"metadata": {},
214214
"outputs": [],
215-
"prompt_number": 9
215+
"prompt_number": 49
216216
},
217217
{
218218
"cell_type": "code",
@@ -227,13 +227,13 @@
227227
{
228228
"metadata": {},
229229
"output_type": "pyout",
230-
"prompt_number": 10,
230+
"prompt_number": 50,
231231
"text": [
232232
"1759019"
233233
]
234234
}
235235
],
236-
"prompt_number": 10
236+
"prompt_number": 50
237237
},
238238
{
239239
"cell_type": "markdown",
@@ -252,7 +252,7 @@
252252
"language": "python",
253253
"metadata": {},
254254
"outputs": [],
255-
"prompt_number": 12
255+
"prompt_number": 51
256256
},
257257
{
258258
"cell_type": "code",
@@ -267,13 +267,13 @@
267267
{
268268
"metadata": {},
269269
"output_type": "pyout",
270-
"prompt_number": 13,
270+
"prompt_number": 52,
271271
"text": [
272272
"1043318"
273273
]
274274
}
275275
],
276-
"prompt_number": 13
276+
"prompt_number": 52
277277
},
278278
{
279279
"cell_type": "code",
@@ -285,7 +285,7 @@
285285
"language": "python",
286286
"metadata": {},
287287
"outputs": [],
288-
"prompt_number": 14
288+
"prompt_number": 53
289289
},
290290
{
291291
"cell_type": "code",
@@ -297,26 +297,96 @@
297297
"language": "python",
298298
"metadata": {},
299299
"outputs": [],
300-
"prompt_number": 15
300+
"prompt_number": 54
301+
},
302+
{
303+
"cell_type": "code",
304+
"collapsed": false,
305+
"input": [
306+
"df.head(3)"
307+
],
308+
"language": "python",
309+
"metadata": {},
310+
"outputs": [
311+
{
312+
"html": [
313+
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
314+
"<table border=\"1\" class=\"dataframe\">\n",
315+
" <thead>\n",
316+
" <tr style=\"text-align: right;\">\n",
317+
" <th></th>\n",
318+
" <th>name</th>\n",
319+
" <th>sex</th>\n",
320+
" <th>count</th>\n",
321+
" <th>year</th>\n",
322+
" <th>2012</th>\n",
323+
" <th>count_2012</th>\n",
324+
" </tr>\n",
325+
" </thead>\n",
326+
" <tbody>\n",
327+
" <tr>\n",
328+
" <th>0</th>\n",
329+
" <td> Mary</td>\n",
330+
" <td> F</td>\n",
331+
" <td> 7065</td>\n",
332+
" <td> 1880</td>\n",
333+
" <td> False</td>\n",
334+
" <td> 22245</td>\n",
335+
" </tr>\n",
336+
" <tr>\n",
337+
" <th>1</th>\n",
338+
" <td> Anna</td>\n",
339+
" <td> F</td>\n",
340+
" <td> 2604</td>\n",
341+
" <td> 1880</td>\n",
342+
" <td> False</td>\n",
343+
" <td> 20871</td>\n",
344+
" </tr>\n",
345+
" <tr>\n",
346+
" <th>2</th>\n",
347+
" <td> Emma</td>\n",
348+
" <td> F</td>\n",
349+
" <td> 2003</td>\n",
350+
" <td> 1880</td>\n",
351+
" <td> False</td>\n",
352+
" <td> 19026</td>\n",
353+
" </tr>\n",
354+
" </tbody>\n",
355+
"</table>\n",
356+
"</div>"
357+
],
358+
"metadata": {},
359+
"output_type": "pyout",
360+
"prompt_number": 55,
361+
"text": [
362+
" name sex count year 2012 count_2012\n",
363+
"0 Mary F 7065 1880 False 22245\n",
364+
"1 Anna F 2604 1880 False 20871\n",
365+
"2 Emma F 2003 1880 False 19026"
366+
]
367+
}
368+
],
369+
"prompt_number": 55
301370
},
302371
{
303372
"cell_type": "markdown",
304373
"metadata": {},
305374
"source": [
306-
"## Reshape the data into the force we want"
375+
"## Reshape the data into the format we want"
307376
]
308377
},
309378
{
310379
"cell_type": "code",
311380
"collapsed": false,
312381
"input": [
313-
"# Create a variable that is a pivot table, totally the number of times a name is registered\n",
382+
"# Create a variable that is a pivot table, \n",
383+
"# totalling the number of times a name is registered\n",
314384
"names = df.pivot_table(index=['name'], aggfunc=np.sum)"
315385
],
316386
"language": "python",
317387
"metadata": {},
318388
"outputs": [],
319-
"prompt_number": 20
389+
"prompt_number": 56
320390
},
321391
{
322392
"cell_type": "code",
@@ -328,7 +398,7 @@
328398
"language": "python",
329399
"metadata": {},
330400
"outputs": [],
331-
"prompt_number": 21
401+
"prompt_number": 57
332402
},
333403
{
334404
"cell_type": "code",
@@ -340,7 +410,103 @@
340410
"language": "python",
341411
"metadata": {},
342412
"outputs": [],
343-
"prompt_number": 22
413+
"prompt_number": 58
414+
},
415+
{
416+
"cell_type": "code",
417+
"collapsed": false,
418+
"input": [
419+
"# Turn the index into its own column\n",
420+
"names['names'] = names.index"
421+
],
422+
"language": "python",
423+
"metadata": {},
424+
"outputs": [],
425+
"prompt_number": 59
426+
},
427+
{
428+
"cell_type": "code",
429+
"collapsed": false,
430+
"input": [
431+
"# create a dataframe with all names ending in a\n",
432+
"a_names = names[names['names'].str.endswith('a')]"
433+
],
434+
"language": "python",
435+
"metadata": {},
436+
"outputs": [],
437+
"prompt_number": 62
438+
},
439+
{
440+
"cell_type": "code",
441+
"collapsed": false,
442+
"input": [
443+
"# How many names in a_names?\n",
444+
"len(a_names)"
445+
],
446+
"language": "python",
447+
"metadata": {},
448+
"outputs": [
449+
{
450+
"metadata": {},
451+
"output_type": "pyout",
452+
"prompt_number": 75,
453+
"text": [
454+
"26687"
455+
]
456+
}
457+
],
458+
"prompt_number": 75
459+
},
460+
{
461+
"cell_type": "code",
462+
"collapsed": false,
463+
"input": [
464+
"# Let's find Zaria\n",
465+
"a_names[a_names['names'] == 'Zaria']"
466+
],
467+
"language": "python",
468+
"metadata": {},
469+
"outputs": [
470+
{
471+
"html": [
472+
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
473+
"<table border=\"1\" class=\"dataframe\">\n",
474+
" <thead>\n",
475+
" <tr style=\"text-align: right;\">\n",
476+
" <th></th>\n",
477+
" <th>count</th>\n",
478+
" <th>count_2012</th>\n",
479+
" <th>names</th>\n",
480+
" </tr>\n",
481+
" <tr>\n",
482+
" <th>name</th>\n",
483+
" <th></th>\n",
484+
" <th></th>\n",
485+
" <th></th>\n",
486+
" </tr>\n",
487+
" </thead>\n",
488+
" <tbody>\n",
489+
" <tr>\n",
490+
" <th>Zaria</th>\n",
491+
" <td> 6892</td>\n",
492+
" <td> 7449</td>\n",
493+
" <td> Zaria</td>\n",
494+
" </tr>\n",
495+
" </tbody>\n",
496+
"</table>\n",
497+
"</div>"
498+
],
499+
"metadata": {},
500+
"output_type": "pyout",
501+
"prompt_number": 76,
502+
"text": [
503+
" count count_2012 names\n",
504+
"name \n",
505+
"Zaria 6892 7449 Zaria"
506+
]
507+
}
508+
],
509+
"prompt_number": 76
344510
},
345511
{
346512
"cell_type": "markdown",
@@ -354,12 +520,12 @@
354520
"collapsed": false,
355521
"input": [
356522
"# Export the data to csv\n",
357-
"names.to_csv('names.csv')"
523+
"a_names.to_csv('names.csv')"
358524
],
359525
"language": "python",
360526
"metadata": {},
361527
"outputs": [],
362-
"prompt_number": 23
528+
"prompt_number": 61
363529
}
364530
],
365531
"metadata": {}

0 commit comments

Comments
 (0)