-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_extractor_json_creater_2000.py
83 lines (67 loc) · 1.94 KB
/
data_extractor_json_creater_2000.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# To add a new cell, type '# %%'
# To add a new markdown cell, type '# %% [markdown]'
# %%
from bs4 import BeautifulSoup
import json
import requests
# %%
req = requests.get("https://www.herrenberg.de/de/Stadtleben/Erlebnis-Herrenberg/Service/Parkplaetze")
soup = BeautifulSoup(req.text)
# %%
parsedLinks = []
# Filter links that href=#
for a in soup.find(id="mainColArea").find_all('a'):
if a['href'].find("#") == -1:
link = a['href'].replace('&', '&')
parsedLinks.append(link)
# %%
dictlist = [dict() for x in range(0, 16)]
i = 0
exception = parsedLinks.pop(13)
for a in parsedLinks:
req = requests.get(a)
html = BeautifulSoup(req.text)
dataDiv = html.find(id="mainContentArea")
dictlist[i] = {
"type": "Feature",
"geometry": {
"type": "Point",
"coordinates": [
str(dataDiv.find("a", {"target": "googleMaps"})['href']).split("=")[1].split("%2C")[1],
str(dataDiv.find("a", {"target": "googleMaps"})['href']).split("=")[1].split("%2C")[0]
]
},
"properties": {
"name": dataDiv.h2.text.split(",")[0],
"address": str(dataDiv.find("div", {"class": "cCore_addressBlock_address"})).split("<br/>")[1],
"capacity": int(dataDiv.h2.text.split(", ")[1].split(" ")[0]),
}
}
i = i + 1
# %%
req = requests.get(exception)
html = BeautifulSoup(req.text)
dataDiv = html.find(id="mainContentArea")
dictlist[15] = {
"type": "Feature",
"geometry": {
"type": "Point",
"coordinates": [
"8.86822",
"48.59946"
]
},
"properties": {
"name": dataDiv.h2.text.split(",")[0],
"address": str(dataDiv.find("div", {"class": "cCore_addressBlock_address"})).split("<br/>")[1],
"capacity": int(dataDiv.h2.text.split(" ")[1]),
}
}
# %%
park = {
"type": "FeatureCollection",
"features": dictlist
}
# %%
with open('parking_lots.geojson', 'w') as fp:
json.dump(park, fp)