Skip to content

Commit 389563e

Browse files
authored
Merge pull request #29 from zazuko/geocoding-limits
geocoding limits
2 parents dd63da9 + d418e86 commit 389563e

File tree

1 file changed

+17
-5
lines changed

1 file changed

+17
-5
lines changed

notebooks/zefix/zefix.ipynb

+17-5
Original file line numberDiff line numberDiff line change
@@ -85,14 +85,15 @@
8585
"import folium\n",
8686
"import mapclassify\n",
8787
"import matplotlib as mpl\n",
88-
"import matplotlib.pyplot as plt\n",
8988
"import pandas as pd\n",
9089
"import geopandas as gpd\n",
9190
"import plotly.express as px\n",
9291
"import plotly.graph_objects as go\n",
93-
"from plotly.subplots import make_subplots\n",
9492
"\n",
9593
"from graphly.api_client import SparqlClient\n",
94+
"from geopy.extra.rate_limiter import RateLimiter\n",
95+
"from geopy.geocoders import Nominatim\n",
96+
"from shapely.geometry import Point\n",
9697
"\n",
9798
"%matplotlib inline"
9899
]
@@ -501,6 +502,9 @@
501502
"metadata": {},
502503
"outputs": [],
503504
"source": [
505+
"geolocator = Nominatim(user_agent=\"zefix\")\n",
506+
"find_location = RateLimiter(geolocator.geocode, min_delay_seconds=1)\n",
507+
"\n",
504508
"def geocode(df: pd.DataFrame, cache_file: str=\"addresses.json\"):\n",
505509
"\n",
506510
" if os.path.isfile(cache_file): \n",
@@ -513,8 +517,16 @@
513517
" is_new_address = ~df.address.isin(cached_addresses)\n",
514518
"\n",
515519
" if any(is_new_address):\n",
516-
" new_locations = gpd.tools.geocode(df.address[is_new_address], user_agent=\"zazuko\", provider=\"nominatim\")\n",
517-
" new_locations[\"address\"] = df.address[is_new_address]\n",
520+
" \n",
521+
" coordinates = []\n",
522+
" addresses = []\n",
523+
" for address in df.address[is_new_address]:\n",
524+
" location = find_location(address)\n",
525+
" if coordinates:\n",
526+
" coordinates.append(Point(location))\n",
527+
" addresses.append(address)\n",
528+
"\n",
529+
" new_locations = gpd.GeoDataFrame.from_dict({\"address\": addresses, \"geometry\": coordinates})\n",
518530
" new_locations = new_locations.set_crs(epsg=4326)\n",
519531
" new_locations = new_locations[~new_locations.geometry.is_empty]\n",
520532
" all_locations = gpd.GeoDataFrame(pd.concat([new_locations, cached_locations], ignore_index=True), crs=new_locations.crs)\n",
@@ -609,7 +621,7 @@
609621
"name": "python",
610622
"nbconvert_exporter": "python",
611623
"pygments_lexer": "ipython3",
612-
"version": "3.9.0"
624+
"version": "3.9.12"
613625
},
614626
"title": "Central Business Name Index - Zefix"
615627
},

0 commit comments

Comments
 (0)