|
1 | 1 | {
|
2 | 2 | "metadata": {
|
3 | 3 | "name": "",
|
4 |
| - "signature": "sha256:d039f7a050a26f08d91e279de296855d750849e86e2ce423af76319d25eabcfc" |
| 4 | + "signature": "sha256:a894bc887c29aeaca41270a3822bbb75952f1f89879d187e0d78aeead85fcf05" |
5 | 5 | },
|
6 | 6 | "nbformat": 3,
|
7 | 7 | "nbformat_minor": 0,
|
|
45 | 45 | "language": "python",
|
46 | 46 | "metadata": {},
|
47 | 47 | "outputs": [],
|
48 |
| - "prompt_number": 1 |
| 48 | + "prompt_number": 41 |
49 | 49 | },
|
50 | 50 | {
|
51 | 51 | "cell_type": "markdown",
|
|
73 | 73 | {
|
74 | 74 | "metadata": {},
|
75 | 75 | "output_type": "pyout",
|
76 |
| - "prompt_number": 2, |
| 76 | + "prompt_number": 42, |
77 | 77 | "text": [
|
78 | 78 | "['1880', '1881', '1882', '1883', '1884']"
|
79 | 79 | ]
|
80 | 80 | }
|
81 | 81 | ],
|
82 |
| - "prompt_number": 2 |
| 82 | + "prompt_number": 42 |
83 | 83 | },
|
84 | 84 | {
|
85 | 85 | "cell_type": "code",
|
|
101 | 101 | "language": "python",
|
102 | 102 | "metadata": {},
|
103 | 103 | "outputs": [],
|
104 |
| - "prompt_number": 3 |
| 104 | + "prompt_number": 43 |
105 | 105 | },
|
106 | 106 | {
|
107 | 107 | "cell_type": "code",
|
|
116 | 116 | {
|
117 | 117 | "metadata": {},
|
118 | 118 | "output_type": "pyout",
|
119 |
| - "prompt_number": 4, |
| 119 | + "prompt_number": 44, |
120 | 120 | "text": [
|
121 | 121 | "['df_1880', 'df_1881', 'df_1882', 'df_1883', 'df_1884']"
|
122 | 122 | ]
|
123 | 123 | }
|
124 | 124 | ],
|
125 |
| - "prompt_number": 4 |
| 125 | + "prompt_number": 44 |
126 | 126 | },
|
127 | 127 | {
|
128 | 128 | "cell_type": "code",
|
|
144 | 144 | "language": "python",
|
145 | 145 | "metadata": {},
|
146 | 146 | "outputs": [],
|
147 |
| - "prompt_number": 5 |
| 147 | + "prompt_number": 45 |
148 | 148 | },
|
149 | 149 | {
|
150 | 150 | "cell_type": "code",
|
|
159 | 159 | {
|
160 | 160 | "metadata": {},
|
161 | 161 | "output_type": "pyout",
|
162 |
| - "prompt_number": 6, |
| 162 | + "prompt_number": 46, |
163 | 163 | "text": [
|
164 | 164 | "['yob1880.txt', 'yob1881.txt', 'yob1882.txt', 'yob1883.txt', 'yob1884.txt']"
|
165 | 165 | ]
|
166 | 166 | }
|
167 | 167 | ],
|
168 |
| - "prompt_number": 6 |
| 168 | + "prompt_number": 46 |
169 | 169 | },
|
170 | 170 | {
|
171 | 171 | "cell_type": "code",
|
|
177 | 177 | "language": "python",
|
178 | 178 | "metadata": {},
|
179 | 179 | "outputs": [],
|
180 |
| - "prompt_number": 7 |
| 180 | + "prompt_number": 47 |
181 | 181 | },
|
182 | 182 | {
|
183 | 183 | "cell_type": "code",
|
|
189 | 189 | "language": "python",
|
190 | 190 | "metadata": {},
|
191 | 191 | "outputs": [],
|
192 |
| - "prompt_number": 8 |
| 192 | + "prompt_number": 48 |
193 | 193 | },
|
194 | 194 | {
|
195 | 195 | "cell_type": "code",
|
|
212 | 212 | "language": "python",
|
213 | 213 | "metadata": {},
|
214 | 214 | "outputs": [],
|
215 |
| - "prompt_number": 9 |
| 215 | + "prompt_number": 49 |
216 | 216 | },
|
217 | 217 | {
|
218 | 218 | "cell_type": "code",
|
|
227 | 227 | {
|
228 | 228 | "metadata": {},
|
229 | 229 | "output_type": "pyout",
|
230 |
| - "prompt_number": 10, |
| 230 | + "prompt_number": 50, |
231 | 231 | "text": [
|
232 | 232 | "1759019"
|
233 | 233 | ]
|
234 | 234 | }
|
235 | 235 | ],
|
236 |
| - "prompt_number": 10 |
| 236 | + "prompt_number": 50 |
237 | 237 | },
|
238 | 238 | {
|
239 | 239 | "cell_type": "markdown",
|
|
252 | 252 | "language": "python",
|
253 | 253 | "metadata": {},
|
254 | 254 | "outputs": [],
|
255 |
| - "prompt_number": 12 |
| 255 | + "prompt_number": 51 |
256 | 256 | },
|
257 | 257 | {
|
258 | 258 | "cell_type": "code",
|
|
267 | 267 | {
|
268 | 268 | "metadata": {},
|
269 | 269 | "output_type": "pyout",
|
270 |
| - "prompt_number": 13, |
| 270 | + "prompt_number": 52, |
271 | 271 | "text": [
|
272 | 272 | "1043318"
|
273 | 273 | ]
|
274 | 274 | }
|
275 | 275 | ],
|
276 |
| - "prompt_number": 13 |
| 276 | + "prompt_number": 52 |
277 | 277 | },
|
278 | 278 | {
|
279 | 279 | "cell_type": "code",
|
|
285 | 285 | "language": "python",
|
286 | 286 | "metadata": {},
|
287 | 287 | "outputs": [],
|
288 |
| - "prompt_number": 14 |
| 288 | + "prompt_number": 53 |
289 | 289 | },
|
290 | 290 | {
|
291 | 291 | "cell_type": "code",
|
|
297 | 297 | "language": "python",
|
298 | 298 | "metadata": {},
|
299 | 299 | "outputs": [],
|
300 |
| - "prompt_number": 15 |
| 300 | + "prompt_number": 54 |
| 301 | + }, |
| 302 | + { |
| 303 | + "cell_type": "code", |
| 304 | + "collapsed": false, |
| 305 | + "input": [ |
| 306 | + "df.head(3)" |
| 307 | + ], |
| 308 | + "language": "python", |
| 309 | + "metadata": {}, |
| 310 | + "outputs": [ |
| 311 | + { |
| 312 | + "html": [ |
| 313 | + "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", |
| 314 | + "<table border=\"1\" class=\"dataframe\">\n", |
| 315 | + " <thead>\n", |
| 316 | + " <tr style=\"text-align: right;\">\n", |
| 317 | + " <th></th>\n", |
| 318 | + " <th>name</th>\n", |
| 319 | + " <th>sex</th>\n", |
| 320 | + " <th>count</th>\n", |
| 321 | + " <th>year</th>\n", |
| 322 | + " <th>2012</th>\n", |
| 323 | + " <th>count_2012</th>\n", |
| 324 | + " </tr>\n", |
| 325 | + " </thead>\n", |
| 326 | + " <tbody>\n", |
| 327 | + " <tr>\n", |
| 328 | + " <th>0</th>\n", |
| 329 | + " <td> Mary</td>\n", |
| 330 | + " <td> F</td>\n", |
| 331 | + " <td> 7065</td>\n", |
| 332 | + " <td> 1880</td>\n", |
| 333 | + " <td> False</td>\n", |
| 334 | + " <td> 22245</td>\n", |
| 335 | + " </tr>\n", |
| 336 | + " <tr>\n", |
| 337 | + " <th>1</th>\n", |
| 338 | + " <td> Anna</td>\n", |
| 339 | + " <td> F</td>\n", |
| 340 | + " <td> 2604</td>\n", |
| 341 | + " <td> 1880</td>\n", |
| 342 | + " <td> False</td>\n", |
| 343 | + " <td> 20871</td>\n", |
| 344 | + " </tr>\n", |
| 345 | + " <tr>\n", |
| 346 | + " <th>2</th>\n", |
| 347 | + " <td> Emma</td>\n", |
| 348 | + " <td> F</td>\n", |
| 349 | + " <td> 2003</td>\n", |
| 350 | + " <td> 1880</td>\n", |
| 351 | + " <td> False</td>\n", |
| 352 | + " <td> 19026</td>\n", |
| 353 | + " </tr>\n", |
| 354 | + " </tbody>\n", |
| 355 | + "</table>\n", |
| 356 | + "</div>" |
| 357 | + ], |
| 358 | + "metadata": {}, |
| 359 | + "output_type": "pyout", |
| 360 | + "prompt_number": 55, |
| 361 | + "text": [ |
| 362 | + " name sex count year 2012 count_2012\n", |
| 363 | + "0 Mary F 7065 1880 False 22245\n", |
| 364 | + "1 Anna F 2604 1880 False 20871\n", |
| 365 | + "2 Emma F 2003 1880 False 19026" |
| 366 | + ] |
| 367 | + } |
| 368 | + ], |
| 369 | + "prompt_number": 55 |
301 | 370 | },
|
302 | 371 | {
|
303 | 372 | "cell_type": "markdown",
|
304 | 373 | "metadata": {},
|
305 | 374 | "source": [
|
306 |
| - "## Reshape the data into the force we want" |
| 375 | + "## Reshape the data into the format we want" |
307 | 376 | ]
|
308 | 377 | },
|
309 | 378 | {
|
310 | 379 | "cell_type": "code",
|
311 | 380 | "collapsed": false,
|
312 | 381 | "input": [
|
313 |
| - "# Create a variable that is a pivot table, totally the number of times a name is registered\n", |
| 382 | + "# Create a variable that is a pivot table, \n", |
| 383 | + "# totalling the number of times a name is registered\n", |
314 | 384 | "names = df.pivot_table(index=['name'], aggfunc=np.sum)"
|
315 | 385 | ],
|
316 | 386 | "language": "python",
|
317 | 387 | "metadata": {},
|
318 | 388 | "outputs": [],
|
319 |
| - "prompt_number": 20 |
| 389 | + "prompt_number": 56 |
320 | 390 | },
|
321 | 391 | {
|
322 | 392 | "cell_type": "code",
|
|
328 | 398 | "language": "python",
|
329 | 399 | "metadata": {},
|
330 | 400 | "outputs": [],
|
331 |
| - "prompt_number": 21 |
| 401 | + "prompt_number": 57 |
332 | 402 | },
|
333 | 403 | {
|
334 | 404 | "cell_type": "code",
|
|
340 | 410 | "language": "python",
|
341 | 411 | "metadata": {},
|
342 | 412 | "outputs": [],
|
343 |
| - "prompt_number": 22 |
| 413 | + "prompt_number": 58 |
| 414 | + }, |
| 415 | + { |
| 416 | + "cell_type": "code", |
| 417 | + "collapsed": false, |
| 418 | + "input": [ |
| 419 | + "# Turn the index into its own column\n", |
| 420 | + "names['names'] = names.index" |
| 421 | + ], |
| 422 | + "language": "python", |
| 423 | + "metadata": {}, |
| 424 | + "outputs": [], |
| 425 | + "prompt_number": 59 |
| 426 | + }, |
| 427 | + { |
| 428 | + "cell_type": "code", |
| 429 | + "collapsed": false, |
| 430 | + "input": [ |
| 431 | + "# create a dataframe with all names ending in a\n", |
| 432 | + "a_names = names[names['names'].str.endswith('a')]" |
| 433 | + ], |
| 434 | + "language": "python", |
| 435 | + "metadata": {}, |
| 436 | + "outputs": [], |
| 437 | + "prompt_number": 62 |
| 438 | + }, |
| 439 | + { |
| 440 | + "cell_type": "code", |
| 441 | + "collapsed": false, |
| 442 | + "input": [ |
| 443 | + "# How many names in a_names?\n", |
| 444 | + "len(a_names)" |
| 445 | + ], |
| 446 | + "language": "python", |
| 447 | + "metadata": {}, |
| 448 | + "outputs": [ |
| 449 | + { |
| 450 | + "metadata": {}, |
| 451 | + "output_type": "pyout", |
| 452 | + "prompt_number": 75, |
| 453 | + "text": [ |
| 454 | + "26687" |
| 455 | + ] |
| 456 | + } |
| 457 | + ], |
| 458 | + "prompt_number": 75 |
| 459 | + }, |
| 460 | + { |
| 461 | + "cell_type": "code", |
| 462 | + "collapsed": false, |
| 463 | + "input": [ |
| 464 | + "# Let's find Zaria\n", |
| 465 | + "a_names[a_names['names'] == 'Zaria']" |
| 466 | + ], |
| 467 | + "language": "python", |
| 468 | + "metadata": {}, |
| 469 | + "outputs": [ |
| 470 | + { |
| 471 | + "html": [ |
| 472 | + "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", |
| 473 | + "<table border=\"1\" class=\"dataframe\">\n", |
| 474 | + " <thead>\n", |
| 475 | + " <tr style=\"text-align: right;\">\n", |
| 476 | + " <th></th>\n", |
| 477 | + " <th>count</th>\n", |
| 478 | + " <th>count_2012</th>\n", |
| 479 | + " <th>names</th>\n", |
| 480 | + " </tr>\n", |
| 481 | + " <tr>\n", |
| 482 | + " <th>name</th>\n", |
| 483 | + " <th></th>\n", |
| 484 | + " <th></th>\n", |
| 485 | + " <th></th>\n", |
| 486 | + " </tr>\n", |
| 487 | + " </thead>\n", |
| 488 | + " <tbody>\n", |
| 489 | + " <tr>\n", |
| 490 | + " <th>Zaria</th>\n", |
| 491 | + " <td> 6892</td>\n", |
| 492 | + " <td> 7449</td>\n", |
| 493 | + " <td> Zaria</td>\n", |
| 494 | + " </tr>\n", |
| 495 | + " </tbody>\n", |
| 496 | + "</table>\n", |
| 497 | + "</div>" |
| 498 | + ], |
| 499 | + "metadata": {}, |
| 500 | + "output_type": "pyout", |
| 501 | + "prompt_number": 76, |
| 502 | + "text": [ |
| 503 | + " count count_2012 names\n", |
| 504 | + "name \n", |
| 505 | + "Zaria 6892 7449 Zaria" |
| 506 | + ] |
| 507 | + } |
| 508 | + ], |
| 509 | + "prompt_number": 76 |
344 | 510 | },
|
345 | 511 | {
|
346 | 512 | "cell_type": "markdown",
|
|
354 | 520 | "collapsed": false,
|
355 | 521 | "input": [
|
356 | 522 | "# Export the data to csv\n",
|
357 |
| - "names.to_csv('names.csv')" |
| 523 | + "a_names.to_csv('names.csv')" |
358 | 524 | ],
|
359 | 525 | "language": "python",
|
360 | 526 | "metadata": {},
|
361 | 527 | "outputs": [],
|
362 |
| - "prompt_number": 23 |
| 528 | + "prompt_number": 61 |
363 | 529 | }
|
364 | 530 | ],
|
365 | 531 | "metadata": {}
|
|
0 commit comments