|
85 | 85 | "import folium\n",
|
86 | 86 | "import mapclassify\n",
|
87 | 87 | "import matplotlib as mpl\n",
|
88 |
| - "import matplotlib.pyplot as plt\n", |
89 | 88 | "import pandas as pd\n",
|
90 | 89 | "import geopandas as gpd\n",
|
91 | 90 | "import plotly.express as px\n",
|
92 | 91 | "import plotly.graph_objects as go\n",
|
93 |
| - "from plotly.subplots import make_subplots\n", |
94 | 92 | "\n",
|
95 | 93 | "from graphly.api_client import SparqlClient\n",
|
| 94 | + "from geopy.extra.rate_limiter import RateLimiter\n", |
| 95 | + "from geopy.geocoders import Nominatim\n", |
| 96 | + "from shapely.geometry import Point\n", |
96 | 97 | "\n",
|
97 | 98 | "%matplotlib inline"
|
98 | 99 | ]
|
|
501 | 502 | "metadata": {},
|
502 | 503 | "outputs": [],
|
503 | 504 | "source": [
|
| 505 | + "geolocator = Nominatim(user_agent=\"zefix\")\n", |
| 506 | + "find_location = RateLimiter(geolocator.geocode, min_delay_seconds=1)\n", |
| 507 | + "\n", |
504 | 508 | "def geocode(df: pd.DataFrame, cache_file: str=\"addresses.json\"):\n",
|
505 | 509 | "\n",
|
506 | 510 | " if os.path.isfile(cache_file): \n",
|
|
513 | 517 | " is_new_address = ~df.address.isin(cached_addresses)\n",
|
514 | 518 | "\n",
|
515 | 519 | " if any(is_new_address):\n",
|
516 |
| - " new_locations = gpd.tools.geocode(df.address[is_new_address], user_agent=\"zazuko\", provider=\"nominatim\")\n", |
517 |
| - " new_locations[\"address\"] = df.address[is_new_address]\n", |
| 520 | + " \n", |
| 521 | + " coordinates = []\n", |
| 522 | + " addresses = []\n", |
| 523 | + " for address in df.address[is_new_address]:\n", |
| 524 | + " location = find_location(address)\n", |
| 525 | + " if coordinates:\n", |
| 526 | + " coordinates.append(Point(location))\n", |
| 527 | + " addresses.append(address)\n", |
| 528 | + "\n", |
| 529 | + " new_locations = gpd.GeoDataFrame.from_dict({\"address\": addresses, \"geometry\": coordinates})\n", |
518 | 530 | " new_locations = new_locations.set_crs(epsg=4326)\n",
|
519 | 531 | " new_locations = new_locations[~new_locations.geometry.is_empty]\n",
|
520 | 532 | " all_locations = gpd.GeoDataFrame(pd.concat([new_locations, cached_locations], ignore_index=True), crs=new_locations.crs)\n",
|
|
609 | 621 | "name": "python",
|
610 | 622 | "nbconvert_exporter": "python",
|
611 | 623 | "pygments_lexer": "ipython3",
|
612 |
| - "version": "3.9.0" |
| 624 | + "version": "3.9.12" |
613 | 625 | },
|
614 | 626 | "title": "Central Business Name Index - Zefix"
|
615 | 627 | },
|
|
0 commit comments