Skip to content
8 changes: 6 additions & 2 deletions hydromt/data_catalog/data_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -1006,7 +1006,7 @@ def export_data(
metadata: Optional[Dict[str, Any]] = None,
force_overwrite: bool = False,
append: bool = False,
handle_nodata: NoDataStrategy = NoDataStrategy.IGNORE,
handle_nodata: NoDataStrategy = NoDataStrategy.WARN,
) -> None:
"""Export a data slice of each dataset and a data_catalog.yml file to disk.

Expand Down Expand Up @@ -1035,6 +1035,9 @@ def export_data(
override any existing files if True. False by default.
append: bool, optional
If True, append to existing data catalog, by default False.
handle_nodata: NoDataStrategy, optional
Strategy to handle no data situations when exporting data. By default
it will log a warning message.
"""
if time_range is not None:
time_range = TimeRange.create(time_range)
Expand Down Expand Up @@ -1180,7 +1183,8 @@ def export_data(
for key, available_variants in sources_out.items():
for _provider, available_versions in available_variants.items():
for _version, adapter in available_versions.items():
data_catalog_out.add_source(key, adapter)
if adapter is not None:
data_catalog_out.add_source(key, adapter)

data_catalog_out.to_yml(path, root="auto", meta=metadata)

Expand Down
7 changes: 5 additions & 2 deletions hydromt/data_catalog/drivers/dataframe/pandas_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,11 @@ def read(
df = pd.read_fwf(uri, **self.options.get_kwargs())
else:
raise IOError(f"DataFrame: extension {extension} unknown.")
if df.index.size == 0:
exec_nodata_strat(f"No data from driver {self}'.", strategy=handle_nodata)
if df.empty:
exec_nodata_strat(
f"No data from {self.name} driver for file uris: {', '.join(uris)}.",
strategy=handle_nodata,
)
return df

def write(
Expand Down
5 changes: 4 additions & 1 deletion hydromt/data_catalog/drivers/geodataframe/pyogrio_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,10 @@ def read(
raise IOError(f"DataFrame from uri: '{_uri}' contains no geometry column.")

if gdf.index.size == 0:
exec_nodata_strat(f"No data from driver {self}'.", strategy=handle_nodata)
exec_nodata_strat(
f"No data from {self.name} driver for file uris: {', '.join(uris)}.",
strategy=handle_nodata,
)
return gdf

def write(
Expand Down
5 changes: 4 additions & 1 deletion hydromt/data_catalog/drivers/geodataframe/table_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,10 @@ def read(
**self.options.get_kwargs(),
)
if gdf.index.size == 0:
exec_nodata_strat(f"No data from driver {self}'.", strategy=handle_nodata)
exec_nodata_strat(
f"No data from {self.name} driver for file uris: {', '.join(uris)}.",
strategy=handle_nodata,
)
return gdf

def write(
Expand Down
5 changes: 3 additions & 2 deletions hydromt/data_catalog/drivers/geodataset/vector_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,14 +103,15 @@ def read(
if isinstance(out, xr.DataArray):
if out.size == 0:
exec_nodata_strat(
f"No data from driver {self}'.", strategy=handle_nodata
f"No data from {self.name} driver for file uris: {', '.join(uris)}.",
strategy=handle_nodata,
)
return out.to_dataset()
else:
for variable in out.data_vars:
if out[variable].size == 0:
exec_nodata_strat(
f"No data from driver {self}' for variable {variable}.",
f"No data from {self.name} driver for file uris: {', '.join(uris)}.",
strategy=handle_nodata,
)
return out
Expand Down
6 changes: 5 additions & 1 deletion hydromt/data_catalog/sources/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from hydromt.data_catalog.adapters import DataFrameAdapter
from hydromt.data_catalog.drivers import DataFrameDriver
from hydromt.data_catalog.sources import DataSource
from hydromt.error import NoDataStrategy
from hydromt.error import NoDataStrategy, exec_nodata_strat
from hydromt.typing import TimeRange
from hydromt.typing.fsspec_types import FSSpecFileSystem

Expand Down Expand Up @@ -97,6 +97,10 @@ def to_file(
variables=variables, time_range=time_range, handle_nodata=handle_nodata
)
if df is None:
exec_nodata_strat(
f"Reading file(s) for {self.name} returned no data.",
handle_nodata,
)
return None

# driver can return different path if file ext changes
Expand Down
6 changes: 5 additions & 1 deletion hydromt/data_catalog/sources/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from hydromt.data_catalog.adapters.dataset import DatasetAdapter
from hydromt.data_catalog.drivers import DatasetDriver
from hydromt.data_catalog.sources.data_source import DataSource
from hydromt.error import NoDataStrategy
from hydromt.error import NoDataStrategy, exec_nodata_strat
from hydromt.typing import (
TimeRange,
)
Expand Down Expand Up @@ -120,6 +120,10 @@ def to_file(
time_range=time_range, handle_nodata=handle_nodata
)
if ds is None:
exec_nodata_strat(
handle_nodata,
f"Reading file(s) for {self.name} returned no data.",
)
return None

# driver can return different path if file ext changes
Expand Down
6 changes: 5 additions & 1 deletion hydromt/data_catalog/sources/geodataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from hydromt.data_catalog.adapters.geodataframe import GeoDataFrameAdapter
from hydromt.data_catalog.drivers import GeoDataFrameDriver
from hydromt.data_catalog.sources.data_source import DataSource
from hydromt.error import NoDataStrategy
from hydromt.error import NoDataStrategy, exec_nodata_strat
from hydromt.gis.gis_utils import _parse_geom_bbox_buffer
from hydromt.typing import (
Bbox,
Expand Down Expand Up @@ -127,6 +127,10 @@ def to_file(
handle_nodata=handle_nodata,
)
if gdf is None: # handle_nodata == ignore
exec_nodata_strat(
handle_nodata,
f"Reading file(s) for {self.name} returned no data.",
)
return None

dest_path = driver.write(file_path, gdf, write_kwargs=write_kwargs)
Expand Down
6 changes: 5 additions & 1 deletion hydromt/data_catalog/sources/geodataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from hydromt.data_catalog.adapters.geodataset import GeoDatasetAdapter
from hydromt.data_catalog.drivers.geodataset.geodataset_driver import GeoDatasetDriver
from hydromt.data_catalog.sources.data_source import DataSource
from hydromt.error import NoDataStrategy
from hydromt.error import NoDataStrategy, exec_nodata_strat
from hydromt.gis.gis_utils import _parse_geom_bbox_buffer
from hydromt.typing import (
Bbox,
Expand Down Expand Up @@ -134,6 +134,10 @@ def to_file(
handle_nodata=handle_nodata,
)
if ds is None: # handle_nodata == ignore
exec_nodata_strat(
handle_nodata,
f"Reading file(s) for {self.name} returned no data.",
)
Comment on lines +137 to +140
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you check the log? Feels like we might be warning twice? Already in read_data just above and now here again?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes that could be possible. It depends on how the drivers are implemented. For geodataset the warning is indeed logged twice if the handle_nodata is set to WARN, by default it is raises an exception.

We could remove this code block for this data source. I just saw that the to_file method of every data source contains this code block and changed them because some drivers can return None instead of a dataset or dataframe.

return None

dest_path = driver.write(file_path, ds, write_kwargs=write_kwargs)
Expand Down
8 changes: 6 additions & 2 deletions hydromt/data_catalog/sources/rasterdataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from hydromt.data_catalog.adapters.rasterdataset import RasterDatasetAdapter
from hydromt.data_catalog.drivers import RasterDatasetDriver
from hydromt.data_catalog.sources.data_source import DataSource
from hydromt.error import NoDataStrategy
from hydromt.error import NoDataStrategy, exec_nodata_strat
from hydromt.gis.gis_utils import _parse_geom_bbox_buffer
from hydromt.typing import (
Bbox,
Expand Down Expand Up @@ -139,7 +139,11 @@ def to_file(
zoom=zoom,
handle_nodata=handle_nodata,
)
if ds is None: # handle_nodata == ignore
if ds is None:
exec_nodata_strat(
f"Reading file(s) for {self.name} returned no data.",
handle_nodata,
)
return None

dest_path = driver.write(file_path, ds, write_kwargs=write_kwargs)
Expand Down
12 changes: 12 additions & 0 deletions tests/data_catalog/drivers/geodataframe/test_table_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from hydromt.data_catalog.drivers.geodataframe.table_driver import (
GeoDataFrameTableDriver,
)
from hydromt.error import NoDataException


class TestGeoDataFrameTableDriver:
Expand Down Expand Up @@ -79,3 +80,14 @@ def test_header_case_insensitive(
driver = GeoDataFrameTableDriver()
gdf = driver.read(uris=[uri])
pd.testing.assert_frame_equal(gdf, geodf)

def test_read_no_data(self, mocker):
mocker.patch(
"hydromt.data_catalog.drivers.geodataframe.table_driver.open_vector_from_table",
return_value=gpd.GeoDataFrame(),
)
with pytest.raises(
NoDataException,
match="No data from geodataframe_table driver for file uris: some_path.csv.",
):
GeoDataFrameTableDriver().read(uris=["some_path.csv"])
8 changes: 8 additions & 0 deletions tests/data_catalog/drivers/test_pandas_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from hydromt._compat import HAS_OPENPYXL
from hydromt.data_catalog.drivers.dataframe import PandasDriver
from hydromt.error import NoDataException


class TestPandasDriver:
Expand Down Expand Up @@ -101,6 +102,13 @@ def test_read_with_filters(
marks=pytest.mark.skipif(not HAS_OPENPYXL, reason="openpyxl is not installed"),
)

def test_read_no_data(self, driver: PandasDriver, tmp_path):
with pytest.raises(
NoDataException,
match="No data from pandas driver for file uris",
):
driver.read([])

@pytest.mark.parametrize(
"filename", ["temp.csv", "temp.parquet", temp_xls_param, temp_xlsx_param]
)
Expand Down
30 changes: 26 additions & 4 deletions tests/data_catalog/sources/test_dataframe_source.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,26 @@
import re
from pathlib import Path
from typing import Type

import pandas as pd
import pytest

from hydromt.data_catalog.adapters import DataFrameAdapter
from hydromt.data_catalog.drivers import DataFrameDriver
from hydromt.data_catalog.sources import DataFrameSource
from hydromt.data_catalog.uri_resolvers import URIResolver
from hydromt.error import NoDataException


class TestDataFrameSource:
def test_read_data(
@pytest.fixture
def MockDataFrameSource(
self,
MockDataFrameDriver: Type[DataFrameDriver],
mock_resolver: URIResolver,
mock_df_adapter: DataFrameAdapter,
df: pd.DataFrame,
managed_tmp_path: Path,
):
) -> DataFrameSource:
managed_tmp_path.touch("test.xls")
source = DataFrameSource(
root=".",
Expand All @@ -27,4 +30,23 @@ def test_read_data(
data_adapter=mock_df_adapter,
uri=str(managed_tmp_path / "test.xls"),
)
pd.testing.assert_frame_equal(df, source.read_data())
return source

def test_read_data(
self,
MockDataFrameSource: DataFrameSource,
df: pd.DataFrame,
):
pd.testing.assert_frame_equal(df, MockDataFrameSource.read_data())

def test_to_file_nodata(self, mocker, MockDataFrameSource: DataFrameSource):
mocker.patch.object(
DataFrameSource,
"read_data",
return_value=None,
)
with pytest.raises(
NoDataException,
match=re.escape("Reading file(s) for example_source returned no data."),
):
MockDataFrameSource.to_file("file.csv")
10 changes: 10 additions & 0 deletions tests/data_catalog/sources/test_raster_dataset_source.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import re
from pathlib import Path
from typing import Type

Expand All @@ -9,6 +10,7 @@
from hydromt.data_catalog.drivers import RasterDatasetDriver
from hydromt.data_catalog.sources import RasterDatasetSource
from hydromt.data_catalog.uri_resolvers import URIResolver
from hydromt.error import NoDataException, NoDataStrategy
from hydromt.gis.gis_utils import _to_geographic_bbox
from hydromt.typing import SourceMetadata

Expand Down Expand Up @@ -64,3 +66,11 @@ def test_detect_extent(
)
def test_infer_default_driver(self, uri, expected_driver):
assert RasterDatasetSource._infer_default_driver(uri) == expected_driver

def test_to_file_nodata(self, writable_source: RasterDatasetSource, mocker):
mocker.patch.object(RasterDatasetSource, "read_data", return_value=None)
with pytest.raises(
NoDataException,
match=re.escape("Reading file(s) for test returned no data."),
):
writable_source.to_file("output.zarr", handle_nodata=NoDataStrategy.RAISE)
22 changes: 22 additions & 0 deletions tests/data_catalog/test_data_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,28 @@ def test_export_dataframe(tmp_path: Path, df, df_time):
assert isinstance(obj, dtypes), key


@pytest.mark.integration
def test_export_data_bulk(tmp_path: Path, caplog: pytest.LogCaptureFixture):
data_catalog = DataCatalog(data_libs=["artifact_data"])
data_catalog_reread_path = tmp_path / "bulk_exported"
data_catalog_reread_path.mkdir(exist_ok=True)
bbox = [11.989, 46.02, 12.253, 46.166] # Small bounding box in Piave basin
caplog.set_level(WARNING)
data_catalog.export_data(data_catalog_reread_path, bbox=bbox, force_overwrite=True)
# test if data catalog can be read
new_data_catalog = DataCatalog(
data_libs=[str(data_catalog_reread_path / "data_catalog.yml")]
)
assert (
len(new_data_catalog) == 45
) # Number of exported sources, not all exported due to nodat for this bbox

with pytest.raises(NoDataException):
data_catalog.export_data(
data_catalog_reread_path, bbox=bbox, handle_nodata=NoDataStrategy.RAISE
)


@pytest.mark.skip("flakey test due to external http issues")
@pytest.mark.integration
def test_http_data():
Expand Down
Loading