From 4af9cf3e0be05a5a31bdb882b3e9dc3a8a33c450 Mon Sep 17 00:00:00 2001 From: Francia Csaba Date: Thu, 1 Aug 2024 13:14:26 +0200 Subject: [PATCH] Moved stop importer tests to amarillo-stops --- amarillo/services/stop_importer/__init__.py | 8 -- amarillo/services/stop_importer/base.py | 79 ------------------ amarillo/services/stop_importer/gtfs.py | 29 ------- amarillo/services/stop_importer/overpass.py | 39 --------- amarillo/tests/fixtures/stops.gtfs.zip | Bin 777 -> 0 bytes .../tests/fixtures/stops_overpass_result.csv | 40 --------- amarillo/tests/test_stops_gtfs_importer.py | 15 ---- .../tests/test_stops_overpass_importer.py | 14 ---- 8 files changed, 224 deletions(-) delete mode 100644 amarillo/services/stop_importer/__init__.py delete mode 100644 amarillo/services/stop_importer/base.py delete mode 100644 amarillo/services/stop_importer/gtfs.py delete mode 100644 amarillo/services/stop_importer/overpass.py delete mode 100644 amarillo/tests/fixtures/stops.gtfs.zip delete mode 100644 amarillo/tests/fixtures/stops_overpass_result.csv delete mode 100644 amarillo/tests/test_stops_gtfs_importer.py delete mode 100644 amarillo/tests/test_stops_overpass_importer.py diff --git a/amarillo/services/stop_importer/__init__.py b/amarillo/services/stop_importer/__init__.py deleted file mode 100644 index 0c9769a..0000000 --- a/amarillo/services/stop_importer/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -from .base import ( - CsvStopsImporter as CsvStopsImporter, -) -from .base import ( - GeojsonStopsImporter as GeojsonStopsImporter, -) -from .gtfs import GtfsStopsImporter as GtfsStopsImporter -from .overpass import OverpassStopsImporter as OverpassStopsImporter diff --git a/amarillo/services/stop_importer/base.py b/amarillo/services/stop_importer/base.py deleted file mode 100644 index b80c99b..0000000 --- a/amarillo/services/stop_importer/base.py +++ /dev/null @@ -1,79 +0,0 @@ -import codecs -import csv -import logging -import re - -import geopandas as gpd -import requests - -logger = logging.getLogger(__name__) - - -class BaseStopsImporter: - def _normalize_stop_name(self, stop_name): - # if the name is empty, we set P+R as a fall back. However, it should be named at the source - default_name = 'P+R' - if stop_name in ('', 'Park&Ride'): - return default_name - return re.sub(r'P(ark)?\s?[\+&]\s?R(ail|ide)?', 'P+R', stop_name) - - def _as_dataframe(self, id, lat, lon, stop_name): - df = gpd.GeoDataFrame(data={'x': lon, 'y': lat, 'stop_name': stop_name, 'id': id}) - return gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.x, df.y, crs='EPSG:4326')) - - -class CsvStopsImporter(BaseStopsImporter): - DEFAULT_COLUMN_MAPPING = { - 'stop_id': 'stop_id', - 'stop_lat': 'stop_lat', - 'stop_lon': 'stop_lon', - 'stop_name': 'stop_name', - } - - def load_stops(self, source, timeout=15): - if source.startswith('http'): - with requests.get(source, timeout=timeout) as csv_source: - return self._load_stops_from_csv_source( - codecs.iterdecode(csv_source.iter_lines(), 'utf-8'), delimiter=';' - ) - else: - with open(source, encoding='utf-8') as csv_source: - return self._load_stops_from_csv_source(csv_source, delimiter=';') - - def _load_stops_from_csv_source(self, csv_source, delimiter: str = ',', column_mapping=None): - if column_mapping is None: - column_mapping = self.DEFAULT_COLUMN_MAPPING - id = [] - lat = [] - lon = [] - stop_name = [] - reader = csv.DictReader(csv_source, delimiter=delimiter) - for row in reader: - id.append(row[column_mapping['stop_id']]) - lat.append(float(row[column_mapping['stop_lat']].replace(',', '.'))) - lon.append(float(row[column_mapping['stop_lon']].replace(',', '.'))) - stop_name.append(self._normalize_stop_name(row[column_mapping['stop_name']])) - - return self._as_dataframe(id, lat, lon, stop_name) - - -class GeojsonStopsImporter(BaseStopsImporter): - def load_stops(self, source, timeout=15): - with requests.get(source, timeout=timeout) as json_source: - geojson_source = json_source.json() - id = [] - lat = [] - lon = [] - stop_name = [] - for row in geojson_source['features']: - coord = row['geometry']['coordinates'] - if not coord or not row['properties'].get('name'): - logger.error('Stop feature {} has null coord or name'.format(row['id'])) - continue - - id.append(row['id']) - lon.append(coord[0]) - lat.append(coord[1]) - stop_name.append(self._normalize_stop_name(row['properties']['name'])) - - return self._as_dataframe(id, lat, lon, stop_name) diff --git a/amarillo/services/stop_importer/gtfs.py b/amarillo/services/stop_importer/gtfs.py deleted file mode 100644 index b895b5f..0000000 --- a/amarillo/services/stop_importer/gtfs.py +++ /dev/null @@ -1,29 +0,0 @@ -import io -import zipfile -from pathlib import Path - -import requests - -from .base import CsvStopsImporter - - -class GtfsStopsImporter(CsvStopsImporter): - def load_stops(self, id, url, timeout=15, **kwargs): - if url.startswith('http'): - # TODO: only reload if file is older than x - gtfs_file = Path(f'data/{id}.gtfs.zip') - with requests.get(url, timeout=timeout) as response: - if response.ok: - self._store_response(gtfs_file, response) - else: - gtfs_file = url - - with zipfile.ZipFile(gtfs_file) as gtfs: - with gtfs.open('stops.txt', 'r') as stops_file: - return self._load_stops_from_csv_source(io.TextIOWrapper(stops_file, 'utf-8-sig')) - - def _store_response(self, filename, response): - with filename.open('wb') as file: - for chunk in response.iter_content(chunk_size=1024 * 1024): - if chunk: - file.write(chunk) diff --git a/amarillo/services/stop_importer/overpass.py b/amarillo/services/stop_importer/overpass.py deleted file mode 100644 index 8067471..0000000 --- a/amarillo/services/stop_importer/overpass.py +++ /dev/null @@ -1,39 +0,0 @@ -import csv -import io -import logging - -import requests - -from .base import BaseStopsImporter - -logger = logging.getLogger(__name__) - - -class OverpassStopsImporter(BaseStopsImporter): - def load_stops(self, area_selector, timeout=15, **kwargs): - query = f''' - [out:csv(::"type", ::"id", ::"lat", ::"lon", name,parking,park_ride,operator,access,lit,fee,capacity,"capacity:disabled",supervised,surface,covered,maxstay,opening_hours)][timeout:60]; - area{area_selector}->.a; - nwr(area.a)[park_ride][park_ride!=no][access!=customers]; - out center; - ''' - - response = requests.post('https://overpass-api.de/api/interpreter', data=query, timeout=timeout) - if not response.ok: - logger.error(f'Error retrieving stops from overpass: {response.text}') - - return self._parse_overpass_csv_response(response.text.splitlines()) - - def _parse_overpass_csv_response(self, csv_source): - id = [] - lat = [] - lon = [] - stop_name = [] - reader = csv.DictReader(csv_source, delimiter='\t') - for row in reader: - id.append(f'osm:{row["@type"][0]}{row["@id"]}') - lat.append(float(row['@lat'])) - lon.append(float(row['@lon'])) - stop_name.append(self._normalize_stop_name(row['name'])) - - return self._as_dataframe(id, lat, lon, stop_name) diff --git a/amarillo/tests/fixtures/stops.gtfs.zip b/amarillo/tests/fixtures/stops.gtfs.zip deleted file mode 100644 index a1a2d891f2b42027b92d93a16b12042487bdbefc..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 777 zcmWIWW@Zs#-~hrO9>oz1P~gqTz`)6%z))P0Ur?-9Qc)5b!pp#Z)%|BG43}1LGcdAz z1*!vTXJE)Z@11qnK%n7atwOm*+-!rYb0<#gc$r%^3vOMYcJ9Ott)<^z>h5}>#C}TD zsCTZbv*Ut?@ikJh`|6)fI==1wtJ`NkPMMb3kt%C+-MOxJ>G?a;&Kq7$o+C89OmF+P zyCJbz4OIY+T?-gf zgExkSX@6KTB|1Lp#@lCH0p9E!9xq`eZ6?> z!Sx6jY-quioFJN)L5)l$nvu7@wDWN7kE803; 0 - assert stopsDataFrames.loc[0, ['x', 'y', 'stop_name', 'id']].values.tolist() == [ - 8.75033716398694, - 48.7891850492262, - 'Monakam Brunnenstr.', - 'de:08235:4060:0:3', - ] diff --git a/amarillo/tests/test_stops_overpass_importer.py b/amarillo/tests/test_stops_overpass_importer.py deleted file mode 100644 index 7e6a4b4..0000000 --- a/amarillo/tests/test_stops_overpass_importer.py +++ /dev/null @@ -1,14 +0,0 @@ -from amarillo.services.stop_importer.overpass import OverpassStopsImporter - - -def test_load_geojson_stops_from_web_(): - with open('amarillo/tests/fixtures/stops_overpass_result.csv') as f: - stopsDataFrames = OverpassStopsImporter()._parse_overpass_csv_response(f) - - assert len(stopsDataFrames) > 0 - assert stopsDataFrames.loc[0, ['x', 'y', 'stop_name', 'id']].values.tolist() == [ - 11.7621206, - 46.7470278, - 'P+R', - 'osm:n5206558994', - ]