Skip to content

Commit 12c5efb

Browse files
committed
Converter for Bavaria, Germany
1 parent 9276422 commit 12c5efb

File tree

8 files changed

+83
-22
lines changed

8 files changed

+83
-22
lines changed

fiboa_cli/convert_utils.py

+10-6
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ def add_asset_to_collection(collection, output_file, rows=None, columns=None):
125125

126126

127127
def stream_file(fs, src_uri, dst_file, chunk_size=10 * 1024 * 1024):
128-
with fs.open(src_uri, mode="rb") as f:
128+
with fs.open(src_uri, mode="rb", block_size=0) as f:
129129
while True:
130130
chunk = f.read(chunk_size)
131131
if not chunk:
@@ -172,6 +172,7 @@ class BaseConverter:
172172
source_variants: Optional[dict[dict[str, str] | str]] = None
173173
variant: str = None
174174
open_options = {}
175+
avoid_range_request = False
175176
years: Optional[dict[dict[int, str] | str]] = None
176177
year: str = None
177178

@@ -218,7 +219,7 @@ def layer_filter(self, layer: str, uri: str) -> bool:
218219
def post_migrate(self, gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
219220
return gdf
220221

221-
def get_cache(self, cache_folder=None, force=False):
222+
def get_cache(self, cache_folder=None, force=False, **kwargs):
222223
if cache_folder is None:
223224
if not force:
224225
return None, None
@@ -228,12 +229,12 @@ def get_cache(self, cache_folder=None, force=False):
228229
with TemporaryDirectory(**_kwargs) as tmp_folder:
229230
cache_folder = tmp_folder
230231

231-
cache_fs = get_fs(cache_folder)
232+
cache_fs = get_fs(cache_folder, **kwargs)
232233
if not cache_fs.exists(cache_folder):
233234
cache_fs.makedirs(cache_folder)
234235
return cache_fs, cache_folder
235236

236-
def download_files(self, uris, cache_folder=None):
237+
def download_files(self, uris, cache_folder=None, **kwargs):
237238
"""Download (and cache) files from various sources"""
238239
if isinstance(uris, str):
239240
uris = {uris: name_from_uri(uris)}
@@ -249,7 +250,7 @@ def download_files(self, uris, cache_folder=None):
249250
else:
250251
name = target
251252

252-
source_fs = get_fs(uri)
253+
source_fs = get_fs(uri, **kwargs)
253254
cache_fs, cache_folder = self.get_cache(cache_folder, force=True)
254255

255256
if isinstance(source_fs, LocalFileSystem):
@@ -501,7 +502,10 @@ def convert(
501502
raise ValueError("No input files provided")
502503

503504
log("Getting file(s) if not cached yet")
504-
paths = self.download_files(urls, cache)
505+
request_args = {}
506+
if self.avoid_range_request:
507+
request_args["block_size"] = 0
508+
paths = self.download_files(urls, cache, **request_args)
505509

506510
kwargs.update(self.open_options)
507511
gdf = self.read_data(paths, **kwargs)

fiboa_cli/converter_rest.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def get_data(self, paths, **kwargs):
4040
return super().get_data(paths, **kwargs)
4141

4242
base_url = paths[0] # loop over paths to support more than 1 source
43-
source_fs = get_fs(base_url)
43+
source_fs = get_fs(base_url, **kwargs)
4444
cache_fs, cache_folder = self.get_cache(self.cache_folder)
4545

4646
service_metadata = requests.get(base_url, {"f": "pjson"}).json()

fiboa_cli/datasets/be_wal.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,11 @@
88

99
class Converter(AdminConverterMixin, BaseConverter):
1010
sources = {
11-
"https://geoservices.wallonie.be/geotraitement/spwdatadownload/get/2a0d9be0-ac3d-443e-9db0-a7cfb0f128e2/LU_ExistingLandUse_SIGEC2022.gml.zip?blocksize=0": [
11+
"https://geoservices.wallonie.be/geotraitement/spwdatadownload/get/2a0d9be0-ac3d-443e-9db0-a7cfb0f128e2/LU_ExistingLandUse_SIGEC2022.gml.zip": [
1212
"LU_ExistingLandUse_SIGEC2022.gml"
1313
]
1414
}
15+
avoid_range_request = True
1516
id = "be_wal"
1617
admin_region_code = "WAL"
1718
short_name = "Belgium, Wallonia"

fiboa_cli/datasets/de_by.py

+59
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
from ..convert_utils import BaseConverter
2+
from .commons.admin import AdminConverterMixin
3+
4+
5+
class Converter(AdminConverterMixin, BaseConverter):
6+
sources = "https://geodaten.bayern.de/odd/m/3/daten/ln/landnutzung.gpkg"
7+
avoid_range_request = True
8+
9+
id = "de_by"
10+
admin_subdivision_code = "BY"
11+
short_name = "Germany, Bavaria"
12+
title = "Field boundaries for Bavaria, Germany"
13+
description = """A field block (German: "Feldblock") is a contiguous agricultural area surrounded by permanent boundaries, which is cultivated by one or more farmers with one or more crops, is fully or partially set aside or is fully or partially taken out of production."""
14+
license = "CC-BY-4.0"
15+
attribution = "Datenquelle: Bayerische Vermessungsverwaltung – www.geodaten.bayern.de"
16+
providers = [
17+
{
18+
"name": "Bayerische Vermessungsverwaltung",
19+
"url": "https://www.ldbv.bayern.de",
20+
"roles": ["producer", "licensor"],
21+
}
22+
]
23+
extensions = {"https://fiboa.github.io/flik-extension/v0.1.0/schema.yaml"}
24+
25+
columns = {
26+
"geometry": "geometry",
27+
"objid": ["id", "flik"],
28+
"datumderletztenueberpruefung": "determination_datetime",
29+
"beginnt": "datetime:first_determination",
30+
"bewirtschaftung": "cultivation",
31+
# "artderbetriebsflaeche": "artderbetriebsflaeche",
32+
# "name": "name",
33+
# "istweiterenutzung": "istweiterenutzung",
34+
# "mappingannahme": "mappingannahme",
35+
"quellobjektid": "source_id",
36+
}
37+
missing_schemas = {
38+
"properties": {
39+
"datetime:first_determination": {"type": "date-time"},
40+
"cultivation": {"type": "string"},
41+
# "artderbetriebsflaeche": {"type": "string"},
42+
# "name": {"type": "string"},
43+
# "istweiterenutzung": {"type": "string"},
44+
# "mappingannahme": {"type": "boolean"},
45+
"source_id": {"type": "string"},
46+
}
47+
}
48+
49+
column_filters = {
50+
# see https://www.adv-online.de/GeoInfoDok/Aktuelle-Anwendungsschemata/Landnutzung-1.0.2/binarywriterservlet?imgUid=be12989a-7b60-5819-393b-216067bef8a0&uBasVariant=11111111-1111-1111-1111-111111111111#_C10573-_A10573_44376
51+
"bewirtschaftung": lambda col: col.isin(["1010", "1011", "1012", "1013", "1014", "1030", "1040", "1050"])
52+
}
53+
54+
column_migrations = {
55+
"flik": lambda col: col # todo cut id?
56+
}
57+
58+
def layer_filter(self, layer: str, uri: str) -> bool:
59+
return layer == "ln_landwirtschaft"

fiboa_cli/datasets/es_ar.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,11 @@ class ARConverter(ESBaseConverter):
88
# These files can be annoying to download (web server failure, no http-range support for continuation)
99
# Alternative is to download the files by municipality, check the atom.xml
1010
sources = {
11-
"https://icearagon.aragon.es/datosdescarga/descarga.php?file=/CartoTema/sigpac/rec22_sigpac.shp.zip&blocksize=0": "rec22_sigpac.shp.zip",
12-
"https://icearagon.aragon.es/datosdescarga/descarga.php?file=/CartoTema/sigpac/rec44_sigpac.shp.zip&blocksize=0": "rec44_sigpac.shp.zip",
13-
"https://icearagon.aragon.es/datosdescarga/descarga.php?file=/CartoTema/sigpac/rec50_sigpac.shp.zip&blocksize=0": "rec50_sigpac.shp.zip",
11+
"https://icearagon.aragon.es/datosdescarga/descarga.php?file=/CartoTema/sigpac/rec22_sigpac.shp.zip": "rec22_sigpac.shp.zip",
12+
"https://icearagon.aragon.es/datosdescarga/descarga.php?file=/CartoTema/sigpac/rec44_sigpac.shp.zip": "rec44_sigpac.shp.zip",
13+
"https://icearagon.aragon.es/datosdescarga/descarga.php?file=/CartoTema/sigpac/rec50_sigpac.shp.zip": "rec50_sigpac.shp.zip",
1414
}
15+
avoid_range_request = True
1516

1617
id = "es_ar"
1718
short_name = "Spain Aragon"

fiboa_cli/datasets/es_nc.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -72,4 +72,4 @@ def download_files(self, uris, cache_folder=None):
7272
# Hostname has invalid SSL, prefill cache and avoid ssl-errors
7373
self.prefill_cache(uris, cache_folder)
7474

75-
return super().download_files(uris, cache_folder=cache_folder)
75+
return super().download_files(uris, cache_folder)

fiboa_cli/datasets/sk.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
class Converter(AdminConverterMixin, BaseConverter):
66
sources = {
7-
"https://data.slovensko.sk/download?id=e39ad227-1899-4cff-b7c8-734f90aa0b59&blocksize=0": [
7+
"https://data.slovensko.sk/download?id=e39ad227-1899-4cff-b7c8-734f90aa0b59": [
88
"HU2024_20240917shp/HU2024_20240917.shp"
99
]
1010
}

fiboa_cli/util.py

+5-9
Original file line numberDiff line numberDiff line change
@@ -103,28 +103,24 @@ def load_datatypes(version):
103103
return response["$defs"]
104104

105105

106-
def get_fs(url_or_path: str) -> AbstractFileSystem:
106+
def get_fs(url_or_path: str, **kwargs) -> AbstractFileSystem:
107107
"""Choose fsspec filesystem by sniffing input url"""
108108
parsed = urlparse(url_or_path)
109109

110110
if parsed.scheme in ("http", "https"):
111-
if re.search(r"[?&]blocksize=0", url_or_path):
112-
# We read in chunks. Some origin-server don't support http-range request
113-
# Add an additional blocksize=0 parameter to your url for a workaround
114-
return HTTPFileSystem(block_size=0)
115-
return HTTPFileSystem()
111+
return HTTPFileSystem(**kwargs)
116112

117113
if parsed.scheme == "s3":
118114
from s3fs import S3FileSystem
119115

120-
return S3FileSystem()
116+
return S3FileSystem(**kwargs)
121117

122118
if parsed.scheme == "gs":
123119
from gcsfs import GCSFileSystem
124120

125-
return GCSFileSystem()
121+
return GCSFileSystem(**kwargs)
126122

127-
return LocalFileSystem()
123+
return LocalFileSystem(**kwargs)
128124

129125

130126
def is_valid_file_uri(uri, extensions=[]):

0 commit comments

Comments
 (0)