Skip to content

Commit cf1ed8d

Browse files
committed
added a new snippet
1 parent de097b3 commit cf1ed8d

12 files changed

+3019
-76
lines changed

.ipynb_checkpoints/beautiful_soup_html_basics-checkpoint.ipynb

+69-27
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"metadata": {
33
"name": "",
4-
"signature": "sha256:3d71f03c99b034e4a28c13a4ab2fdc87812a96d4e925c7face4a085060852d8e"
4+
"signature": "sha256:e9ecaccea72674cf0609651f4b1328b2c2374ae43730b4bf93b47fa871b273ee"
55
},
66
"nbformat": 3,
77
"nbformat_minor": 0,
@@ -180,7 +180,7 @@
180180
"cell_type": "markdown",
181181
"metadata": {},
182182
"source": [
183-
"### The parent"
183+
"### The parent of the title tag"
184184
]
185185
},
186186
{
@@ -203,6 +203,13 @@
203203
],
204204
"prompt_number": 68
205205
},
206+
{
207+
"cell_type": "markdown",
208+
"metadata": {},
209+
"source": [
210+
"### The class of the first paragraph tag"
211+
]
212+
},
206213
{
207214
"cell_type": "code",
208215
"collapsed": false,
@@ -223,6 +230,13 @@
223230
],
224231
"prompt_number": 69
225232
},
233+
{
234+
"cell_type": "markdown",
235+
"metadata": {},
236+
"source": [
237+
"### The first link tag"
238+
]
239+
},
226240
{
227241
"cell_type": "code",
228242
"collapsed": false,
@@ -243,6 +257,13 @@
243257
],
244258
"prompt_number": 70
245259
},
260+
{
261+
"cell_type": "markdown",
262+
"metadata": {},
263+
"source": [
264+
"### Find all the link tags"
265+
]
266+
},
246267
{
247268
"cell_type": "code",
248269
"collapsed": false,
@@ -270,6 +291,13 @@
270291
],
271292
"prompt_number": 71
272293
},
294+
{
295+
"cell_type": "markdown",
296+
"metadata": {},
297+
"source": [
298+
"### Get all the text on the page"
299+
]
300+
},
273301
{
274302
"cell_type": "code",
275303
"collapsed": false,
@@ -290,6 +318,13 @@
290318
],
291319
"prompt_number": 79
292320
},
321+
{
322+
"cell_type": "markdown",
323+
"metadata": {},
324+
"source": [
325+
"### The string inside the first paragraph tag"
326+
]
327+
},
293328
{
294329
"cell_type": "code",
295330
"collapsed": false,
@@ -310,6 +345,13 @@
310345
],
311346
"prompt_number": 88
312347
},
348+
{
349+
"cell_type": "markdown",
350+
"metadata": {},
351+
"source": [
352+
"### Find all the h2 tags"
353+
]
354+
},
313355
{
314356
"cell_type": "code",
315357
"collapsed": false,
@@ -334,6 +376,13 @@
334376
],
335377
"prompt_number": 121
336378
},
379+
{
380+
"cell_type": "markdown",
381+
"metadata": {},
382+
"source": [
383+
"### Find all the links on the page"
384+
]
385+
},
337386
{
338387
"cell_type": "code",
339388
"collapsed": false,
@@ -361,6 +410,13 @@
361410
],
362411
"prompt_number": 97
363412
},
413+
{
414+
"cell_type": "markdown",
415+
"metadata": {},
416+
"source": [
417+
"### Find all the tag pairs with class=logo"
418+
]
419+
},
364420
{
365421
"cell_type": "code",
366422
"collapsed": false,
@@ -382,24 +438,11 @@
382438
"prompt_number": 100
383439
},
384440
{
385-
"cell_type": "code",
386-
"collapsed": false,
387-
"input": [
388-
"soup.find_all(text='chris')"
389-
],
390-
"language": "python",
441+
"cell_type": "markdown",
391442
"metadata": {},
392-
"outputs": [
393-
{
394-
"metadata": {},
395-
"output_type": "pyout",
396-
"prompt_number": 103,
397-
"text": [
398-
"[]"
399-
]
400-
}
401-
],
402-
"prompt_number": 103
443+
"source": [
444+
"### Select the string in front of the link nested inside the h2 tag pair"
445+
]
403446
},
404447
{
405448
"cell_type": "code",
@@ -422,6 +465,13 @@
422465
],
423466
"prompt_number": 120
424467
},
468+
{
469+
"cell_type": "markdown",
470+
"metadata": {},
471+
"source": [
472+
"### Print the pretty, nested version of the Beautiful Soup object"
473+
]
474+
},
425475
{
426476
"cell_type": "code",
427477
"collapsed": false,
@@ -549,14 +599,6 @@
549599
}
550600
],
551601
"prompt_number": 114
552-
},
553-
{
554-
"cell_type": "code",
555-
"collapsed": false,
556-
"input": [],
557-
"language": "python",
558-
"metadata": {},
559-
"outputs": []
560602
}
561603
],
562604
"metadata": {}

.ipynb_checkpoints/beautiful_soup_scrape_table-checkpoint.ipynb

+11-11
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"metadata": {
33
"name": "",
4-
"signature": "sha256:d7bee2df7a6656262842a249799bdab970b9dc3f2a93fab72055703649ff613e"
4+
"signature": "sha256:ccfbc6e59ff033c9cebbcbd45224b27b280b9487226bc719772a84909f12f03c"
55
},
66
"nbformat": 3,
77
"nbformat_minor": 0,
@@ -29,7 +29,7 @@
2929
"language": "python",
3030
"metadata": {},
3131
"outputs": [],
32-
"prompt_number": 95
32+
"prompt_number": 105
3333
},
3434
{
3535
"cell_type": "markdown",
@@ -120,7 +120,7 @@
120120
],
121121
"metadata": {},
122122
"output_type": "pyout",
123-
"prompt_number": 97,
123+
"prompt_number": 106,
124124
"text": [
125125
" first_name last_name age preTestScore postTestScore\n",
126126
"0 Jason Miller 42 4 25\n",
@@ -133,7 +133,7 @@
133133
]
134134
}
135135
],
136-
"prompt_number": 97
136+
"prompt_number": 106
137137
},
138138
{
139139
"cell_type": "markdown",
@@ -158,7 +158,7 @@
158158
"language": "python",
159159
"metadata": {},
160160
"outputs": [],
161-
"prompt_number": 96
161+
"prompt_number": 107
162162
},
163163
{
164164
"cell_type": "markdown",
@@ -171,16 +171,16 @@
171171
"cell_type": "code",
172172
"collapsed": false,
173173
"input": [
174-
"# Create an object of the first object that is class=dataframe\n",
175-
"table = soup.find(class_='dataframe')\n",
176-
"\n",
177174
"# Create four variables to score the scraped data in\n",
178175
"first_name = []\n",
179176
"last_name = []\n",
180177
"age = []\n",
181178
"preTestScore = []\n",
182179
"postTestScore = []\n",
183180
"\n",
181+
"# Create an object of the first object that is class=dataframe\n",
182+
"table = soup.find(class_='dataframe')\n",
183+
"\n",
184184
"# Find all the <tr> tag pairs, skip the first one, then for each.\n",
185185
"for row in table.find_all('tr')[1:]:\n",
186186
" # Create a variable of all the <td> tag pairs in each <tr> tag pair,\n",
@@ -220,7 +220,7 @@
220220
"language": "python",
221221
"metadata": {},
222222
"outputs": [],
223-
"prompt_number": 98
223+
"prompt_number": 108
224224
},
225225
{
226226
"cell_type": "code",
@@ -294,7 +294,7 @@
294294
],
295295
"metadata": {},
296296
"output_type": "pyout",
297-
"prompt_number": 99,
297+
"prompt_number": 109,
298298
"text": [
299299
" age first_name last_name postTestScore preTestScore\n",
300300
"0 42 Jason Miller 25 4\n",
@@ -307,7 +307,7 @@
307307
]
308308
}
309309
],
310-
"prompt_number": 99
310+
"prompt_number": 109
311311
}
312312
],
313313
"metadata": {}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
{
2+
"metadata": {
3+
"name": "",
4+
"signature": "sha256:6f65c854788d4cbf9604b003be0d65dbdd064b6f4c7077afb4696803529fd84d"
5+
},
6+
"nbformat": 3,
7+
"nbformat_minor": 0,
8+
"worksheets": [
9+
{
10+
"cells": [
11+
{
12+
"cell_type": "markdown",
13+
"metadata": {},
14+
"source": [
15+
"# Lambda Functions\n",
16+
"\n",
17+
"This snippit was written by [Chris R. Albon](http://www.chrisralbon.com/) and is part of his collection of [well-documented Python snippits](https://github.com/chrisalbon/code_py). All code is written in Python 3 in iPython notebook and offered under the [Creative Commons Attribution-ShareAlike 4.0 International License](http://creativecommons.org/licenses/by-sa/4.0/).\n",
18+
"\n",
19+
"From: Learning Python"
20+
]
21+
},
22+
{
23+
"cell_type": "markdown",
24+
"metadata": {},
25+
"source": [
26+
"Lambda functions can ask as mini-functions, allowing you to create small bits of code into things like series."
27+
]
28+
},
29+
{
30+
"cell_type": "markdown",
31+
"metadata": {},
32+
"source": [
33+
"### Create a series, called pipeline, that contains three mini functions"
34+
]
35+
},
36+
{
37+
"cell_type": "code",
38+
"collapsed": true,
39+
"input": [
40+
"pipeline = [lambda x: x **2 - 1 + 5,\n",
41+
" lambda x: x **20 - 2 + 3,\n",
42+
" lambda x: x **200 - 1 + 4]"
43+
],
44+
"language": "python",
45+
"metadata": {},
46+
"outputs": [],
47+
"prompt_number": 3
48+
},
49+
{
50+
"cell_type": "markdown",
51+
"metadata": {},
52+
"source": [
53+
"### For each item in pipeline, run the lambda function with x = 3"
54+
]
55+
},
56+
{
57+
"cell_type": "code",
58+
"collapsed": false,
59+
"input": [
60+
"for f in pipeline:\n",
61+
" print(f(3))"
62+
],
63+
"language": "python",
64+
"metadata": {},
65+
"outputs": []
66+
}
67+
],
68+
"metadata": {}
69+
}
70+
]
71+
}

0 commit comments

Comments
 (0)