diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6979c53f5..35cddc9bf 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -30,58 +30,39 @@ repos: hooks: - id: absolufy-imports - # Format the code aggressively using black - - repo: https://github.com/psf/black - rev: 24.10.0 + # Ruff linter and code formatter + - repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: v0.8.6 hooks: - - id: black - args: [--line-length=120] + # Run the linter. + - id: ruff + # Run the formatter. + - id: ruff-format - # Lint the code using flake8 - - repo: https://github.com/pycqa/flake8 - rev: 7.1.1 + # Enable lint fixes with ruff + - repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: v0.8.6 hooks: - - id: flake8 - # More than one argument in the second list, so need to pass arguments as below (and -- to finish) - args: [ - '--max-line-length', '120', # we can write dicts however we want - '--extend-ignore', 'E203,C408,B028', # flake8 disagrees with black, so this should be ignored. - '--' - ] - additional_dependencies: - - flake8-comprehensions - - flake8-bugbear - files: ^(xdem|tests) - - # Lint the code using mypy - - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.13.0 - hooks: - - id: mypy - args: [ - --config-file=mypy.ini, - --strict, - --implicit-optional, - --ignore-missing-imports, # Don't warn about stubs since pre-commit runs in a limited env - --allow-untyped-calls, # Dynamic function/method calls are okay. Untyped function definitions are not okay. - --show-error-codes, - --no-warn-unused-ignores, # Ignore 'type: ignore' comments that are not used. - --disable-error-code=attr-defined, # "Module has no attribute 'XXX'" occurs because of the pre-commit env. - --disable-error-code=name-defined, # "Name 'XXX' is not defined" occurs because of the pre-commit env. - --disable-error-code=var-annotated, - --disable-error-code=no-any-return - - ] - additional_dependencies: [tokenize-rt==3.2.0, numpy==1.26] - files: ^(xdem|tests|doc/code) - + # Run the linter. + - id: ruff + args: [ --fix ] + # Run the formatter. + - id: ruff-format - # Sort imports using isort - - repo: https://github.com/PyCQA/isort - rev: 5.13.2 + # To run ruff over Jupyter Notebooks + - repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: v0.8.6 hooks: - - id: isort - args: ["--profile", "black"] + # Run the linter. + - id: ruff + types_or: [ python, pyi, jupyter ] + args: [ --fix ] + # Run the formatter. + - id: ruff-format + types_or: [ python, pyi, jupyter ] # Automatically upgrade syntax to a minimum version - repo: https://github.com/asottile/pyupgrade diff --git a/.ruff.toml b/.ruff.toml new file mode 100644 index 000000000..df7438b00 --- /dev/null +++ b/.ruff.toml @@ -0,0 +1,83 @@ +# Exclude a variety of commonly ignored directories. +exclude = ["ALL"] + +extend-exclude = [ + ".github", + "binder", + "doc", + "xdem.egg-info", + ".coveragerc", + ".gitignore", + ".pre-commit-config.yaml", + ".readthedocs.yaml", + ".relint.yml", +] + +# Support Python 3.10+. +target-version = "py310" + +# Same as Black. +# The formatter wraps lines at a length of 120 when enforcing long-lines violations (like E501). +line-length = 120 +# Number of spaces per tabulation, used by the formatter and when enforcing long-line violations. +indent-width = 4 + +[lint.pycodestyle] +# E501 reports lines that exceed the length of 120. +max-line-length = 120 + +[lint] +# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default. +# Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or +# McCabe complexity (`C901`) by default (10). +select = ["ALL"] + +# Skip To do format annotations rules (FIX002, TD002, TD003) +# Skip pydocstyle rules (D101, D205, D400, D401, D415) +# Skip flake8-simplify rules (SIM102, SIM108, SIM115) +# Skip pygrep-hooks rules (PGH003, PGH004) +# Skip pylint refactor rules (PLR0912, PLR0913, PLR0915, PLR2004) +# Skip the use of assert (S101) +# Skip flake8-type-checking rules (TC001, TC002, TC003) +# Skip tryceratops rules (TRY003, TRY201) +# Skip pyupgrade rule : non-pep604-isinstance (UP038) +# ... +ignore = ["ANN401", "ARG002", "B028", "B904", "BLE001", "C901", "D101", "D205", "D400", "D401", "D415", "EM101", + "EM102", "ERA001", "F541", "FBT001", "FBT002", "FBT003", "FIX002", "INP001", "PD011", "PGH003", "PGH004", + "PLR0912", "PLR0913", "PLR0915", "PLR2004", "PLW0127", "PT011", "PTH118", "PYI041", "PYI051", "RET504", + "S101", "SIM102", "SIM108", "SIM115", "T201", "TC001", "TC002", "TC003", "TD002", "TD003", "TRY003", + "TRY201", "UP038"] + +# Allow fix for all enabled rules (when `--fix`) is provided. +fixable = ["ALL"] +unfixable = [] + +# Allow unused variables when underscore-prefixed. +dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" + +[format] +# Like Black, use double quotes for strings. +quote-style = "double" + +# Like Black, indent with spaces, rather than tabs. +indent-style = "space" + +# Like Black, respect magic trailing commas. +skip-magic-trailing-comma = false + +# Like Black, automatically detect the appropriate line ending. +line-ending = "auto" + +# Enable auto-formatting of code examples in docstrings. Markdown, +# reStructuredText code/literal blocks and doctests are all supported. +# +# This is currently disabled by default, but it is planned for this +# to be opt-out in the future. +docstring-code-format = false + +# Set the line length limit used when formatting code snippets in +# docstrings. +# +# This only has an effect when the `docstring-code-format` setting is +# enabled. +docstring-code-line-length = "dynamic" diff --git a/Makefile b/Makefile index 21220f1d0..b7b4be798 100644 --- a/Makefile +++ b/Makefile @@ -82,6 +82,26 @@ test: ## run tests ${VENV}/bin/pytest; \ fi +## Code quality, linting section + +.PHONY: lint +lint: ruff ## Apply the ruff linter. + +.PHONY: lint-check +lint-check: ## Check whether the codebase satisfies the linter rules. + @echo + @echo "Checking linter rules..." + @echo "========================" + @echo + @ruff check $(PATH) + +.PHONY: ruff +ruff: ## Apply ruff. + @echo "Applying ruff..." + @echo "================" + @echo + @ruff --fix $(PATH) + ## Clean section .PHONY: clean diff --git a/examples/__init__.py b/examples/__init__.py new file mode 100644 index 000000000..e5852c362 --- /dev/null +++ b/examples/__init__.py @@ -0,0 +1,18 @@ +# Copyright (c) 2024 Centre National d'Etudes Spatiales (CNES). +# +# This file is part of the xDEM project: +# https://github.com/glaciohack/xdem +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""xDEM examples module init file.""" diff --git a/examples/advanced/__init__.py b/examples/advanced/__init__.py new file mode 100644 index 000000000..b30c7302d --- /dev/null +++ b/examples/advanced/__init__.py @@ -0,0 +1,18 @@ +# Copyright (c) 2024 Centre National d'Etudes Spatiales (CNES). +# +# This file is part of the xDEM project: +# https://github.com/glaciohack/xdem +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" """ # noqa diff --git a/examples/advanced/plot_blockwise_coreg.py b/examples/advanced/plot_blockwise_coreg.py index eb4d41927..211134d2b 100644 --- a/examples/advanced/plot_blockwise_coreg.py +++ b/examples/advanced/plot_blockwise_coreg.py @@ -1,11 +1,12 @@ -""" -Blockwise coregistration +"""Blockwise coregistration ======================== Often, biases are spatially variable, and a "global" shift may not be enough to coregister a DEM properly. -In the :ref:`sphx_glr_basic_examples_plot_nuth_kaab.py` example, we saw that the method improved the alignment significantly, but there were still possibly nonlinear artefacts in the result. +In the :ref:`sphx_glr_basic_examples_plot_nuth_kaab.py` example, we saw that the method improved the alignment +significantly, but there were still possibly nonlinear artefacts in the result. Clearly, nonlinear coregistration approaches are needed. -One solution is :class:`xdem.coreg.BlockwiseCoreg`, a helper to run any ``Coreg`` class over an arbitrarily small grid, and then "puppet warp" the DEM to fit the reference best. +One solution is :class:`xdem.coreg.BlockwiseCoreg`, a helper to run any ``Coreg`` class over an arbitrarily small grid, +and then "puppet warp" the DEM to fit the reference best. The ``BlockwiseCoreg`` class runs in five steps: @@ -43,7 +44,8 @@ ] # %% -# The DEM to be aligned (a 1990 photogrammetry-derived DEM) has some vertical and horizontal biases that we want to avoid, as well as possible nonlinear distortions. +# The DEM to be aligned (a 1990 photogrammetry-derived DEM) has some vertical and horizontal biases that we want to +# avoid, as well as possible nonlinear distortions. # The product is a mosaic of multiple DEMs, so "seams" may exist in the data. # These can be visualized by plotting a change map: @@ -75,11 +77,12 @@ # %% # The estimated shifts can be visualized by applying the coregistration to a completely flat surface. -# This shows the estimated shifts that would be applied in elevation; additional horizontal shifts will also be applied if the method supports it. +# This shows the estimated shifts that would be applied in elevation; additional horizontal shifts will also be applied +# if the method supports it. # The :func:`xdem.coreg.BlockwiseCoreg.stats` method can be used to annotate each block with its associated Z shift. z_correction = blockwise.apply( - np.zeros_like(dem_to_be_aligned.data), transform=dem_to_be_aligned.transform, crs=dem_to_be_aligned.crs + np.zeros_like(dem_to_be_aligned.data), transform=dem_to_be_aligned.transform, crs=dem_to_be_aligned.crs, )[0] plt.title("Vertical correction") plt.imshow(z_correction, cmap="RdYlBu", vmin=-10, vmax=10, extent=plt_extent) diff --git a/examples/advanced/plot_demcollection.py b/examples/advanced/plot_demcollection.py index 8af130d31..b78b12c5e 100644 --- a/examples/advanced/plot_demcollection.py +++ b/examples/advanced/plot_demcollection.py @@ -1,16 +1,17 @@ -""" -Working with a collection of DEMs +"""Working with a collection of DEMs ================================= .. caution:: This functionality might be removed in future package versions. Oftentimes, more than two timestamps (DEMs) are analyzed simultaneously. One single dDEM only captures one interval, so multiple dDEMs have to be created. -In addition, if multiple masking polygons exist (e.g. glacier outlines from multiple years), these should be accounted for properly. -The :class:`xdem.DEMCollection` is a tool to properly work with multiple timestamps at the same time, and makes calculations of elevation/volume change over multiple years easy. +In addition, if multiple masking polygons exist (e.g. glacier outlines from multiple years), +these should be accounted for properly. +The :class:`xdem.DEMCollection` is a tool to properly work with multiple timestamps at the same time, and makes +calculations of elevation/volume change over multiple years easy. """ -from datetime import datetime +from datetime import datetime, timezone import geoutils as gu import matplotlib.pyplot as plt @@ -32,8 +33,12 @@ # These parts can be delineated with masks or polygons. # Here, we have glacier outlines from 1990 and 2009. outlines = { - datetime(1990, 8, 1): gu.Vector(xdem.examples.get_path("longyearbyen_glacier_outlines")), - datetime(2009, 8, 1): gu.Vector(xdem.examples.get_path("longyearbyen_glacier_outlines_2010")), + datetime(1990, 8, 1, tzinfo=timezone.utc): gu.Vector( + xdem.examples.get_path("longyearbyen_glacier_outlines"), + ), + datetime(2009, 8, 1, tzinfo=timezone.utc): gu.Vector( + xdem.examples.get_path("longyearbyen_glacier_outlines_2010"), + ), } # %% @@ -42,7 +47,9 @@ # Fake a 2060 DEM by assuming twice the change from 1990-2009 between 2009 and 2060 dem_2060 = dem_2009 + (dem_2009 - dem_1990).data * 3 -timestamps = [datetime(1990, 8, 1), datetime(2009, 8, 1), datetime(2060, 8, 1)] +timestamps = [datetime(1990, 8, 1, tzinfo=timezone.utc), + datetime(2009, 8, 1, tzinfo=timezone.utc), + datetime(2060, 8, 1, tzinfo=timezone.utc)] # %% # Now, all data are ready to be collected in an :class:`xdem.DEMCollection` object. @@ -52,7 +59,7 @@ # demcollection = xdem.DEMCollection( - dems=[dem_1990, dem_2009, dem_2060], timestamps=timestamps, outlines=outlines, reference_dem=1 + dems=[dem_1990, dem_2009, dem_2060], timestamps=timestamps, outlines=outlines, reference_dem=1, ) # %% @@ -69,9 +76,11 @@ # These are saved internally, but are also returned as a list. # # An elevation or volume change series can automatically be generated from the ``DEMCollection``. -# In this case, we should specify *which* glacier we want the change for, as a regional value may not always be required. +# In this case, we should specify *which* glacier we want the change for, +# as a regional value may not always be required. # We can look at the glacier called "Scott Turnerbreen", specified in the "NAME" column of the outline data. -# `See here for the outline filtering syntax `_. +# `See here for the outline filtering syntax +# `_. demcollection.get_cumulative_series(kind="dh", outlines_filter="NAME == 'Scott Turnerbreen'") diff --git a/examples/advanced/plot_deramp.py b/examples/advanced/plot_deramp.py index 13b786739..ec84f0d36 100644 --- a/examples/advanced/plot_deramp.py +++ b/examples/advanced/plot_deramp.py @@ -1,5 +1,4 @@ -""" -Bias-correction with deramping +"""Bias-correction with deramping ============================== Deramping can help correct rotational or doming errors in elevation data. diff --git a/examples/advanced/plot_heterosc_estimation_modelling.py b/examples/advanced/plot_heterosc_estimation_modelling.py index 4dd3d3268..7510e7766 100644 --- a/examples/advanced/plot_heterosc_estimation_modelling.py +++ b/examples/advanced/plot_heterosc_estimation_modelling.py @@ -1,16 +1,17 @@ -""" -Estimation and modelling of heteroscedasticity +"""Estimation and modelling of heteroscedasticity ============================================== Digital elevation models have a precision that can vary with terrain and instrument-related variables. This variability in variance is called `heteroscedasticy `_, -and rarely accounted for in DEM studies (see :ref:`accuracy-precision`). Quantifying elevation heteroscedasticity is essential to +and rarely accounted for in DEM studies (see :ref:`accuracy-precision`). +Quantifying elevation heteroscedasticity is essential to use stable terrain as an error proxy for moving terrain, and standardize data towards a stationary variance, necessary to apply spatial statistics (see :ref:`uncertainty`). Here, we show an advanced example in which we look for terrain-dependent explanatory variables to explain the heteroscedasticity for a DEM difference at Longyearbyen. We detail the steps used by -:func:`~xdem.spatialstats.infer_heteroscedasticity_from_stable` exemplified in :ref:`sphx_glr_basic_examples_plot_infer_heterosc.py`. +:func:`~xdem.spatialstats.infer_heteroscedasticity_from_stable` +exemplified in :ref:`sphx_glr_basic_examples_plot_infer_heterosc.py`. We use `data binning `_ and robust statistics in N-dimension with :func:`~xdem.spatialstats.nd_binning`, apply a N-dimensional interpolation with @@ -39,7 +40,7 @@ # We derive terrain attributes from the reference DEM (see :ref:`sphx_glr_basic_examples_plot_terrain_attributes.py`), # which we will use to explore the variability in elevation error. slope, aspect, planc, profc = xdem.terrain.get_terrain_attribute( - dem=ref_dem, attribute=["slope", "aspect", "planform_curvature", "profile_curvature"] + dem=ref_dem, attribute=["slope", "aspect", "planform_curvature", "profile_curvature"], ) # %% @@ -51,11 +52,13 @@ profc_arr = profc[~mask_glacier].filled(np.nan) # %% -# We use :func:`xdem.spatialstats.nd_binning` to perform N-dimensional binning on all those terrain variables, with uniform -# bin length divided by 30. We use the NMAD as a robust measure of `statistical dispersion `_ +# We use :func:`xdem.spatialstats.nd_binning` to perform N-dimensional binning on all +# those terrain variables, with uniform +# bin length divided by 30. We use the NMAD as a robust measure of +# `statistical dispersion `_ # (see :ref:`robuststats-meanstd`). -df = xdem.spatialstats.nd_binning( +df_bin = xdem.spatialstats.nd_binning( values=dh_arr, list_var=[slope_arr, aspect_arr, planc_arr, profc_arr], list_var_names=["slope", "aspect", "planc", "profc"], @@ -67,11 +70,11 @@ # We obtain a dataframe with the 1D binning results for each variable, the 2D binning results for all combinations of # variables and the N-D (here 4D) binning with all variables. # Overview of the dataframe structure for the 1D binning: -df[df.nd == 1] +df_bin[df_bin.nd == 1] # %% # And for the 4D binning: -df[df.nd == 4] +df_bin[df_bin.nd == 4] # %% # We can now visualize the results of the 1D binning of the computed NMAD of elevation differences with each variable @@ -79,7 +82,7 @@ # We can start with the slope that has been long known to be related to the elevation measurement error (e.g., # `Toutin (2002) `_). xdem.spatialstats.plot_1d_binning( - df, var_name="slope", statistic_name="nmad", label_var="Slope (degrees)", label_statistic="NMAD of dh (m)" + df_bin, var_name="slope", statistic_name="nmad", label_var="Slope (degrees)", label_statistic="NMAD of dh (m)", ) # %% @@ -88,14 +91,15 @@ # # What about the aspect? -xdem.spatialstats.plot_1d_binning(df, "aspect", "nmad", "Aspect (degrees)", "NMAD of dh (m)") +xdem.spatialstats.plot_1d_binning(df_bin, "aspect", "nmad", "Aspect (degrees)", "NMAD of dh (m)") # %% -# There is no variability with the aspect that shows a dispersion averaging 2-3 meters, i.e. that of the complete sample. +# There is no variability with the aspect that shows a dispersion averaging 2-3 meters, +# i.e. that of the complete sample. # # What about the plan curvature? -xdem.spatialstats.plot_1d_binning(df, "planc", "nmad", "Planform curvature (100 m$^{-1}$)", "NMAD of dh (m)") +xdem.spatialstats.plot_1d_binning(df_bin, "planc", "nmad", "Planform curvature (100 m$^{-1}$)", "NMAD of dh (m)") # %% # The relation with the plan curvature remains ambiguous. @@ -105,14 +109,14 @@ # # .. note:: We need a higher number of bins to work with quantiles and still resolve the edges of the distribution. -df = xdem.spatialstats.nd_binning( +df_bin = xdem.spatialstats.nd_binning( values=dh_arr, list_var=[profc_arr], list_var_names=["profc"], statistics=["count", np.nanmedian, xdem.spatialstats.nmad], list_var_bins=[np.nanquantile(profc_arr, np.linspace(0, 1, 1000))], ) -xdem.spatialstats.plot_1d_binning(df, "profc", "nmad", "Profile curvature (100 m$^{-1}$)", "NMAD of dh (m)") +xdem.spatialstats.plot_1d_binning(df_bin, "profc", "nmad", "Profile curvature (100 m$^{-1}$)", "NMAD of dh (m)") # %% # We clearly identify a variability with the profile curvature, from 2 meters for low curvatures to above 4 meters @@ -120,28 +124,29 @@ # # What about the role of the plan curvature? -df = xdem.spatialstats.nd_binning( +df_bin = xdem.spatialstats.nd_binning( values=dh_arr, list_var=[planc_arr], list_var_names=["planc"], statistics=["count", np.nanmedian, xdem.spatialstats.nmad], list_var_bins=[np.nanquantile(planc_arr, np.linspace(0, 1, 1000))], ) -xdem.spatialstats.plot_1d_binning(df, "planc", "nmad", "Planform curvature (100 m$^{-1}$)", "NMAD of dh (m)") +xdem.spatialstats.plot_1d_binning(df_bin, "planc", "nmad", "Planform curvature (100 m$^{-1}$)", "NMAD of dh (m)") # %% -# The plan curvature shows a similar relation. Those are symmetrical with 0, and almost equal for both types of curvature. +# The plan curvature shows a similar relation. Those are symmetrical with 0, +# and almost equal for both types of curvature. # To simplify the analysis, we here combine those curvatures into the maximum absolute curvature: maxc_arr = np.maximum(np.abs(planc_arr), np.abs(profc_arr)) -df = xdem.spatialstats.nd_binning( +df_bin = xdem.spatialstats.nd_binning( values=dh_arr, list_var=[maxc_arr], list_var_names=["maxc"], statistics=["count", np.nanmedian, xdem.spatialstats.nmad], list_var_bins=[np.nanquantile(maxc_arr, np.linspace(0, 1, 1000))], ) -xdem.spatialstats.plot_1d_binning(df, "maxc", "nmad", "Maximum absolute curvature (100 m$^{-1}$)", "NMAD of dh (m)") +xdem.spatialstats.plot_1d_binning(df_bin, "maxc", "nmad", "Maximum absolute curvature (100 m$^{-1}$)", "NMAD of dh (m)") # %% # Here's our simplified relation! We now have both slope and maximum absolute curvature with clear variability of @@ -152,7 +157,7 @@ # # We need to explore the variability with both slope and curvature at the same time: -df = xdem.spatialstats.nd_binning( +df_bin = xdem.spatialstats.nd_binning( values=dh_arr, list_var=[slope_arr, maxc_arr], list_var_names=["slope", "maxc"], @@ -161,7 +166,7 @@ ) xdem.spatialstats.plot_2d_binning( - df, + df_bin, var_name_1="slope", var_name_2="maxc", statistic_name="nmad", @@ -182,8 +187,8 @@ np.nanquantile(slope_arr, np.linspace(0, 0.95, 20)), np.nanquantile(slope_arr, np.linspace(0.96, 0.99, 5)), np.nanquantile(slope_arr, np.linspace(0.991, 1, 10)), - ] - ) + ], + ), ) custom_bin_curvature = np.unique( @@ -192,11 +197,11 @@ np.nanquantile(maxc_arr, np.linspace(0, 0.95, 20)), np.nanquantile(maxc_arr, np.linspace(0.96, 0.99, 5)), np.nanquantile(maxc_arr, np.linspace(0.991, 1, 10)), - ] - ) + ], + ), ) -df = xdem.spatialstats.nd_binning( +df_bin = xdem.spatialstats.nd_binning( values=dh_arr, list_var=[slope_arr, maxc_arr], list_var_names=["slope", "maxc"], @@ -204,7 +209,7 @@ list_var_bins=[custom_bin_slope, custom_bin_curvature], ) xdem.spatialstats.plot_2d_binning( - df, + df_bin, "slope", "maxc", "nmad", @@ -232,7 +237,7 @@ # in the interpolation, we set a ``min_count`` value at 30 samples. unscaled_dh_err_fun = xdem.spatialstats.interp_nd_binning( - df, list_var_names=["slope", "maxc"], statistic="nmad", min_count=30 + df_bin, list_var_names=["slope", "maxc"], statistic="nmad", min_count=30, ) # %% @@ -243,10 +248,8 @@ dh_err_stable = unscaled_dh_err_fun((slope_arr, maxc_arr)) print( - "The spread of elevation difference is {:.2f} " - "compared to a mean predicted elevation error of {:.2f}.".format( - xdem.spatialstats.nmad(dh_arr), np.nanmean(dh_err_stable) - ) + f"The spread of elevation difference is {xdem.spatialstats.nmad(dh_arr):.2f} " + f"compared to a mean predicted elevation error of {np.nanmean(dh_err_stable):.2f}.", ) # %% @@ -254,13 +257,14 @@ # :func:`xdem.spatialstats.two_step_standardization` function, and get our final error function. zscores, dh_err_fun = xdem.spatialstats.two_step_standardization( - dh_arr, list_var=[slope_arr, maxc_arr], unscaled_error_fun=unscaled_dh_err_fun + dh_arr, list_var=[slope_arr, maxc_arr], unscaled_error_fun=unscaled_dh_err_fun, ) for s, c in [(0.0, 0.1), (50.0, 0.1), (0.0, 20.0), (50.0, 20.0)]: print( - "Elevation measurement error for slope of {:.0f} degrees, " - "curvature of {:.2f} m-1: {:.1f}".format(s, c / 100, dh_err_fun((s, c))) + " meters." + f"Elevation measurement error for slope of {s:.0f} degrees, " + f"curvature of {c / 100:.2f} m-1: {dh_err_fun((s, c)):.1f}" + f" meters.", ) # %% diff --git a/examples/advanced/plot_norm_regional_hypso.py b/examples/advanced/plot_norm_regional_hypso.py index 5672acb7f..114d82a75 100644 --- a/examples/advanced/plot_norm_regional_hypso.py +++ b/examples/advanced/plot_norm_regional_hypso.py @@ -1,18 +1,21 @@ -""" -Normalized regional hypsometric interpolation +"""Normalized regional hypsometric interpolation ============================================= .. caution:: This functionality is specific to glaciers, and might be removed in future package versions. There are many ways of interpolating gaps in elevation differences. In the case of glaciers, one very useful fact is that elevation change generally varies with elevation. -This means that if valid pixels exist in a certain elevation bin, their values can be used to fill other pixels in the same approximate elevation. -Filling gaps by elevation is the main basis of "hypsometric interpolation approaches", of which there are many variations of. +This means that if valid pixels exist in a certain elevation bin, +their values can be used to fill other pixels in the same approximate elevation. +Filling gaps by elevation is the main basis of "hypsometric interpolation approaches", +of which there are many variations of. -One problem with simple hypsometric approaches is that they may not work for glaciers with different elevation ranges and scales. +One problem with simple hypsometric approaches is that they may not work for glaciers +with different elevation ranges and scales. Let's say we have two glaciers: one gigantic reaching from 0-1000 m, and one small from 900-1100 m. Usually in the 2000s, glaciers thin rapidly at the bottom, while they may be neutral or only thin slightly in the top. -If we extrapolate the hypsometric signal of the gigantic glacier to use on the small one, it may seem like the smaller glacier has almost no change whatsoever. +If we extrapolate the hypsometric signal of the gigantic glacier to use on the small one, +it may seem like the smaller glacier has almost no change whatsoever. This may be right, or it may be catastrophically wrong! Normalized regional hypsometric interpolation solves the scale and elevation range problems in one go. It: @@ -69,8 +72,10 @@ # %% # The normalized hypsometric signal shows the tendency for elevation change as a function of elevation. # The magnitude may vary between glaciers, but the shape is generally similar. -# Normalizing by both elevation and elevation change, and then re-scaling the signal to every glacier, ensures that it is as accurate as possible. -# **NOTE**: The hypsometric signal does not need to be generated separately; it will be created by :func:`xdem.volume.norm_regional_hypsometric_interpolation`. +# Normalizing by both elevation and elevation change, and then re-scaling the signal to every glacier, +# ensures that it is as accurate as possible. +# **NOTE**: The hypsometric signal does not need to be generated separately; +# it will be created by :func:`xdem.volume.norm_regional_hypsometric_interpolation`. # Generating it first, however, allows us to visualize and validate it. ddem = dem_2009 - dem_1990 @@ -93,7 +98,7 @@ # The signal can now be used (or simply estimated again if not provided) to interpolate the DEM. ddem_filled = xdem.volume.norm_regional_hypsometric_interpolation( - voided_ddem=ddem_voided, ref_dem=dem_2009, glacier_index_map=glacier_index_map, regional_signal=signal + voided_ddem=ddem_voided, ref_dem=dem_2009, glacier_index_map=glacier_index_map, regional_signal=signal, ) @@ -115,5 +120,6 @@ # %% # As we see, the median is close to zero, while the NMAD varies slightly more. -# This is expected, as the regional signal is good for multiple glaciers at once, but it cannot account for difficult local topography and meteorological conditions. +# This is expected, as the regional signal is good for multiple glaciers at once, +# but it cannot account for difficult local topography and meteorological conditions. # It is therefore highly recommended for large regions; just don't zoom in too close! diff --git a/examples/advanced/plot_slope_methods.py b/examples/advanced/plot_slope_methods.py index 12289bdeb..9e877f417 100644 --- a/examples/advanced/plot_slope_methods.py +++ b/examples/advanced/plot_slope_methods.py @@ -1,5 +1,4 @@ -""" -Slope and aspect methods +"""Slope and aspect methods ======================== Terrain slope and aspect can be estimated using different methods. @@ -7,11 +6,13 @@ See also the :ref:`terrain-attributes` feature page. -**References:** `Horn (1981) `_, `Zevenbergen and Thorne (1987) `_. +**References:** `Horn (1981) `_, + `Zevenbergen and Thorne (1987) `_. """ import matplotlib.pyplot as plt import numpy as np +from geoutils.raster import RasterType import xdem @@ -20,8 +21,28 @@ dem = xdem.DEM(xdem.examples.get_path("longyearbyen_ref_dem")) -def plot_attribute(attribute, cmap, label=None, vlim=None): - +def plot_attribute(attribute: RasterType, + cmap: str, + label: str | None = None, + vlim: float | None = None) -> None: + """Plot a specified attribute. + + Parameters + ---------- + attribute : RasterType + Attribute to plot. + cmap : str + Colorbar used from matplotlib. + label : str + Colorbar title. + vlim : float + Colorbar upper and lower limits. + + Returns + ------- + None + + """ if vlim is not None: if isinstance(vlim, (int, np.integer, float, np.floating)): vlims = {"vmin": -vlim, "vmax": vlim} @@ -38,7 +59,6 @@ def plot_attribute(attribute, cmap, label=None, vlim=None): plt.show() - # %% # Slope with method of Horn (1981) (GDAL default), based on a refined # approximation of the gradient (page 18, bottom left, and pages 20-21). @@ -59,7 +79,9 @@ def plot_attribute(attribute, cmap, label=None, vlim=None): diff_slope = slope_horn - slope_zevenberg -plot_attribute(diff_slope, "RdYlBu", "Slope of Horn (1981) minus\n slope of Zevenberg and Thorne (1987) (°)", vlim=3) +plot_attribute(diff_slope, + "RdYlBu", + "Slope of Horn (1981) minus\n slope of Zevenberg and Thorne (1987) (°)", vlim=3) # %% # The differences are negative, implying that the method of Horn always provides flatter slopes. @@ -86,7 +108,7 @@ def plot_attribute(attribute, cmap, label=None, vlim=None): var_name="maxc", statistic_name="nanmedian", label_var="Maximum absolute curvature (100 m$^{-1}$)", - label_statistic="Slope of Horn (1981) minus\n " "slope of Zevenberg and Thorne (1987) (°)", + label_statistic="Slope of Horn (1981) minus\n slope of Zevenberg and Thorne (1987) (°)", ) @@ -101,7 +123,9 @@ def plot_attribute(attribute, cmap, label=None, vlim=None): diff_aspect_mod = np.minimum(diff_aspect % 360, 360 - diff_aspect % 360) plot_attribute( - diff_aspect_mod, "Spectral", "Aspect of Horn (1981) minus\n aspect of Zevenberg and Thorne (1987) (°)", vlim=[0, 90] + diff_aspect_mod, + "Spectral", + "Aspect of Horn (1981) minus\n aspect of Zevenberg and Thorne (1987) (°)", vlim=[0, 90], ) # %% diff --git a/examples/advanced/plot_standardization.py b/examples/advanced/plot_standardization.py index 7fa58d94b..329978946 100644 --- a/examples/advanced/plot_standardization.py +++ b/examples/advanced/plot_standardization.py @@ -1,5 +1,4 @@ -""" -Standardization for stable terrain as error proxy +"""Standardization for stable terrain as error proxy ================================================= Digital elevation models have both a precision that can vary with terrain or instrument-related variables, and @@ -24,7 +23,8 @@ from xdem.spatialstats import nmad # %% -# We start by estimating the elevation heteroscedasticity and deriving a terrain-dependent measurement error as a function of both +# We start by estimating the elevation heteroscedasticity and +# deriving a terrain-dependent measurement error as a function of both # slope and maximum curvature, as shown in the :ref:`sphx_glr_basic_examples_plot_infer_heterosc.py` example. # Load the data @@ -35,7 +35,7 @@ # Compute the slope and maximum curvature slope, planc, profc = xdem.terrain.get_terrain_attribute( - dem=ref_dem, attribute=["slope", "planform_curvature", "profile_curvature"] + dem=ref_dem, attribute=["slope", "planform_curvature", "profile_curvature"], ) # Remove values on unstable terrain @@ -55,8 +55,8 @@ np.nanquantile(slope_arr, np.linspace(0, 0.95, 20)), np.nanquantile(slope_arr, np.linspace(0.96, 0.99, 5)), np.nanquantile(slope_arr, np.linspace(0.991, 1, 10)), - ] - ) + ], + ), ) custom_bin_curvature = np.unique( @@ -65,12 +65,12 @@ np.nanquantile(maxc_arr, np.linspace(0, 0.95, 20)), np.nanquantile(maxc_arr, np.linspace(0.96, 0.99, 5)), np.nanquantile(maxc_arr, np.linspace(0.991, 1, 10)), - ] - ) + ], + ), ) # Perform 2D binning to estimate the measurement error with slope and maximum curvature -df = xdem.spatialstats.nd_binning( +df_bin = xdem.spatialstats.nd_binning( values=dh_arr, list_var=[slope_arr, maxc_arr], list_var_names=["slope", "maxc"], @@ -80,7 +80,7 @@ # Estimate an interpolant of the measurement error with slope and maximum curvature slope_curv_to_dh_err = xdem.spatialstats.interp_nd_binning( - df, list_var_names=["slope", "maxc"], statistic="nmad", min_count=30 + df_bin, list_var_names=["slope", "maxc"], statistic="nmad", min_count=30, ) maxc = np.maximum(np.abs(profc), np.abs(planc)) @@ -122,7 +122,8 @@ plt.show() # %% -# Now, we can perform an analysis of spatial correlation as shown in the :ref:`sphx_glr_advanced_examples_plot_variogram_estimation_modelling.py` +# Now, we can perform an analysis of spatial correlation as shown in the +# :ref:`sphx_glr_advanced_examples_plot_variogram_estimation_modelling.py` # example, by estimating a variogram and fitting a sum of two models. # Dowd's variogram is used for robustness in conjunction with the NMAD (see :ref:`robuststats-corr`). df_vgm = xdem.spatialstats.sample_empirical_variogram( @@ -135,7 +136,7 @@ ) func_sum_vgm, params_vgm = xdem.spatialstats.fit_sum_model_variogram( - ["Gaussian", "Spherical"], empirical_variogram=df_vgm + ["Gaussian", "Spherical"], empirical_variogram=df_vgm, ) xdem.spatialstats.plot_variogram( df_vgm, @@ -189,11 +190,11 @@ # %% # We calculate the number of effective samples for each glacier based on the variogram svendsen_neff = xdem.spatialstats.neff_circular_approx_numerical( - area=svendsen_shp.ds.area.values[0], params_variogram_model=params_vgm + area=svendsen_shp.ds.area.values[0], params_variogram_model=params_vgm, ) medals_neff = xdem.spatialstats.neff_circular_approx_numerical( - area=medals_shp.ds.area.values[0], params_variogram_model=params_vgm + area=medals_shp.ds.area.values[0], params_variogram_model=params_vgm, ) print(f"Number of effective samples of Svendsenbreen glacier: {svendsen_neff:.1f}") @@ -255,5 +256,6 @@ # %% # Because of slightly higher slopes and curvatures, the final uncertainty for Medalsbreen is larger by about 10%. # The differences between the mean terrain slope and curvatures of stable terrain and those of glaciers is quite limited -# on Svalbard. In high moutain terrain, such as the Alps or Himalayas, the difference between stable terrain and glaciers, +# on Svalbard. In high moutain terrain, such as the Alps or Himalayas, +# the difference between stable terrain and glaciers, # and among glaciers, would be much larger. diff --git a/examples/advanced/plot_variogram_estimation_modelling.py b/examples/advanced/plot_variogram_estimation_modelling.py index f40e49c4a..5e26862c6 100644 --- a/examples/advanced/plot_variogram_estimation_modelling.py +++ b/examples/advanced/plot_variogram_estimation_modelling.py @@ -1,8 +1,8 @@ -""" -Estimation and modelling of spatial variograms +"""Estimation and modelling of spatial variograms ============================================== -Digital elevation models have errors that are often `correlated in space `_. +Digital elevation models have errors that are often +`correlated in space `_. While many DEM studies used solely short-range `variograms `_ to estimate the correlation of elevation measurement errors, recent studies show that variograms of multiple ranges provide larger, more reliable estimates of spatial correlation for DEMs. @@ -12,13 +12,15 @@ by :func:`~xdem.spatialstats.infer_spatial_correlation_from_stable` exemplified in # :ref:`sphx_glr_basic_examples_plot_infer_spatial_correlation.py`. -We first estimate an empirical variogram with :func:`~xdem.spatialstats.sample_empirical_variogram` based on routines of `SciKit-GStat +We first estimate an empirical variogram with :func: +`~xdem.spatialstats.sample_empirical_variogram` based on routines of `SciKit-GStat `_. We then fit the empirical variogram with a sum of variogram models using :func:`~xdem.spatialstats.fit_sum_model_variogram`. Finally, we perform spatial propagation for a range of averaging area using :func:`~xdem.spatialstats.number_effective_samples`, and empirically validate the improved robustness of our results using :func:`~xdem.spatialstats.patches_method`, an intensive Monte-Carlo sampling approach. -**References:** `Rolstad et al. (2009) `_, `Hugonnet et al. (2022) `_. +**References:** `Rolstad et al. (2009) `_, + `Hugonnet et al. (2022) `_. """ import geoutils as gu @@ -49,7 +51,8 @@ print(f"NMAD: {xdem.spatialstats.nmad(dh.data):.2f} meters.") # %% -# The two measures of dispersion are quite similar showing that, on average, there is a small influence of outliers on the +# The two measures of dispersion are quite similar showing that, +# on average, there is a small influence of outliers on the # elevation differences. The per-pixel precision is about :math:`\pm` 2.5 meters. # **Does this mean that every pixel has an independent measurement error of** :math:`\pm` **2.5 meters?** # Let's plot the elevation differences to visually check the quality of the data. @@ -62,7 +65,8 @@ # %% # Additionally, we notice that the elevation differences are still polluted by unrealistically large elevation -# differences near glaciers, probably because the glacier inventory is more recent than the data, hence with too small outlines. +# differences near glaciers, probably because the glacier inventory is more recent than the data, +# hence with too small outlines. # To remedy this, we filter large elevation differences outside 4 NMAD. dh.set_mask(np.abs(dh.data) > 4 * xdem.spatialstats.nmad(dh.data)) @@ -80,16 +84,17 @@ # conveniently by :func:`~xdem.spatialstats.sample_empirical_variogram`. # Dowd's variogram is used for # robustness in conjunction with the NMAD (see :ref:`robuststats-corr`). -df = xdem.spatialstats.sample_empirical_variogram( - values=dh, subsample=500, n_variograms=5, estimator="dowd", random_state=42 +df_vgm = xdem.spatialstats.sample_empirical_variogram( + values=dh, subsample=500, n_variograms=5, estimator="dowd", random_state=42, ) # %% -# .. note:: In this example, we add a ``random_state`` argument to yield a reproducible random sampling of pixels within the grid. +# .. note:: In this example, we add a ``random_state`` +# argument to yield a reproducible random sampling of pixels within the grid. # %% # We plot the empirical variogram: -xdem.spatialstats.plot_variogram(df) +xdem.spatialstats.plot_variogram(df_vgm) # %% # With this plot, it is hard to conclude anything! Properly visualizing the empirical variogram is one of the most @@ -102,31 +107,36 @@ # %% # **Log scale:** -xdem.spatialstats.plot_variogram(df, xscale="log") +xdem.spatialstats.plot_variogram(df_vgm, xscale="log") # %% # **Subpanels with linear scale:** -xdem.spatialstats.plot_variogram(df, xscale_range_split=[100, 1000, 10000]) +xdem.spatialstats.plot_variogram(df_vgm, xscale_range_split=[100, 1000, 10000]) # %% # We identify: -# - a short-range (i.e., correlation length) correlation, likely due to effects of resolution. It has a large partial sill (correlated variance), meaning that the elevation measurement errors are strongly correlated until a range of ~100 m. -# - a longer range correlation, with a smaller partial sill, meaning the part of the elevation measurement errors remain correlated over a longer distance. +# - a short-range (i.e., correlation length) correlation, likely due to effects of resolution. +# It has a large partial sill (correlated variance), meaning that the elevation +# measurement errors are strongly correlated until a range of ~100 m. +# - a longer range correlation, with a smaller partial sill, meaning the part of +# the elevation measurement errors remain correlated over a longer distance. # -# In order to show the difference between accounting only for the most noticeable, short-range correlation, or adding the -# long-range correlation, we fit this empirical variogram with two different models: a single spherical model, and -# the sum of two spherical models (two ranges). For this, we use :func:`xdem.spatialstats.fit_sum_model_variogram`, which -# is based on `scipy.optimize.curve_fit `_: +# In order to show the difference between accounting only for the most noticeable, +# short-range correlation, or adding the long-range correlation, +# we fit this empirical variogram with two different models: a single spherical model, +# and the sum of two spherical models (two ranges). +# For this, we use :func:`xdem.spatialstats.fit_sum_model_variogram`, which is based on +# `scipy.optimize.curve_fit `_: func_sum_vgm1, params_vgm1 = xdem.spatialstats.fit_sum_model_variogram( - list_models=["Spherical"], empirical_variogram=df + list_models=["Spherical"], empirical_variogram=df_vgm, ) func_sum_vgm2, params_vgm2 = xdem.spatialstats.fit_sum_model_variogram( - list_models=["Spherical", "Spherical"], empirical_variogram=df + list_models=["Spherical", "Spherical"], empirical_variogram=df_vgm, ) xdem.spatialstats.plot_variogram( - df, + df_vgm, list_fit_fun=[func_sum_vgm1, func_sum_vgm2], list_fit_fun_label=["Single-range model", "Double-range model"], xscale_range_split=[100, 1000, 10000], @@ -138,9 +148,10 @@ # # **So one could wonder: is it really important to account for this small additional "bump" in the variogram?** # -# To answer this, we compute the precision of the DEM integrated over a certain surface area based on spatial integration of the -# variogram models using :func:`xdem.spatialstats.neff_circ`, with areas varying from pixel size to grid size. -# Numerical and exact integration of variogram is fast, allowing us to estimate errors for a wide range of areas rapidly. +# To answer this, we compute the precision of the DEM integrated over a +# certain surface area based on spatial integration of the variogram models using :func:`xdem.spatialstats.neff_circ`, +# with areas varying from pixel size to grid size. Numerical and exact integration of variogram is fast, +# allowing us to estimate errors for a wide range of areas rapidly. areas = np.linspace(20, 10000, 50) ** 2 @@ -162,7 +173,8 @@ # %% # We add an empirical error based on intensive Monte-Carlo sampling ("patches" method) to validate our results. # This method is implemented in :func:`xdem.spatialstats.patches_method`. Here, we sample fewer areas to avoid for the -# patches method to run over long processing times, increasing from areas of 5 pixels to areas of 10000 pixels exponentially. +# patches method to run over long processing times, +# increasing from areas of 5 pixels to areas of 10000 pixels exponentially. areas_emp = [4000 * 2 ** (i) for i in range(10)] df_patches = xdem.spatialstats.patches_method(dh, gsd=dh.res[0], areas=areas_emp, n_patches=200) @@ -251,5 +263,8 @@ # Our final estimation is now very close to the empirical error estimate. # # Some take-home points: -# 1. Long-range correlations are very important to reliably estimate measurement errors integrated in space, even if they have a small partial sill i.e. correlated variance, -# 2. Ideally, the grid must only contain correlation patterns significantly smaller than the grid size to verify second-order stationarity. Otherwise, be wary of small biases of central tendency, i.e. fully correlated measurement errors! +# 1. Long-range correlations are very important to reliably estimate measurement errors integrated in space, +# even if they have a small partial sill i.e. correlated variance, +# 2. Ideally, the grid must only contain correlation patterns significantly smaller than the grid size to verify +# second-order stationarity. Otherwise, be wary of small biases of central tendency, +# i.e. fully correlated measurement errors! diff --git a/examples/basic/__init__.py b/examples/basic/__init__.py new file mode 100644 index 000000000..2d0798532 --- /dev/null +++ b/examples/basic/__init__.py @@ -0,0 +1,18 @@ +# Copyright (c) 2024 Centre National d'Etudes Spatiales (CNES). +# +# This file is part of the xDEM project: +# https://github.com/glaciohack/xdem +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""xDEM basic examples module init file.""" diff --git a/examples/basic/plot_dem_subtraction.py b/examples/basic/plot_dem_subtraction.py index 9081c47b3..920031a58 100644 --- a/examples/basic/plot_dem_subtraction.py +++ b/examples/basic/plot_dem_subtraction.py @@ -1,13 +1,15 @@ -""" -DEM differencing +"""DEM differencing ================ Subtracting a DEM with another one should be easy. -xDEM allows to use any operator on :class:`xdem.DEM` objects, such as :func:`+` or :func:`-` as well as most NumPy functions -while respecting nodata values and checking that georeferencing is consistent. This functionality is inherited from `GeoUtils' Raster class `_. +xDEM allows to use any operator on :class:`xdem.DEM` objects, such as :func:`+` or +:func:`-` as well as most NumPy functions +while respecting nodata values and checking that georeferencing is consistent. + This functionality is inherited from `GeoUtils' Raster class `_. -Before DEMs can be compared, they need to be reprojected to the same grid and have the same 3D CRSs. The :func:`~xdem.DEM.reproject` and :func:`~xdem.DEM.to_vcrs` methods are used for this. +Before DEMs can be compared, they need to be reprojected to the same grid and have the same 3D CRSs. + The :func:`~xdem.DEM.reproject` and :func:`~xdem.DEM.to_vcrs` methods are used for this. """ @@ -28,7 +30,8 @@ dem_1990.info() # %% -# In this particular case, the two DEMs are already on the same grid (they have the same bounds, resolution and coordinate system). +# In this particular case, the two DEMs are already on the same grid +# (they have the same bounds, resolution and coordinate system). # If they don't, we need to reproject one DEM to fit the other using :func:`xdem.DEM.reproject`: dem_1990 = dem_1990.reproject(dem_2009) @@ -37,9 +40,12 @@ # Oops! # GeoUtils just warned us that ``dem_1990`` did not need reprojection. We can hide this output with ``silent``. # By default, :func:`~xdem.DEM.reproject` uses "bilinear" resampling (assuming resampling is needed). -# Other options are detailed at `geoutils.Raster.reproject() `_ and `rasterio.enums.Resampling `_. +# Other options are detailed at +# `geoutils.Raster.reproject() `_ +# and `rasterio.enums.Resampling `_. # noqa: E501 # -# We now compute the difference by simply substracting, passing ``stats=True`` to :func:`xdem.DEM.info` to print statistics. +# We now compute the difference by simply substracting, +# passing ``stats=True`` to :func:`xdem.DEM.info` to print statistics. ddem = dem_2009 - dem_1990 diff --git a/examples/basic/plot_icp_coregistration.py b/examples/basic/plot_icp_coregistration.py index a69baf72b..400f3399f 100644 --- a/examples/basic/plot_icp_coregistration.py +++ b/examples/basic/plot_icp_coregistration.py @@ -1,5 +1,4 @@ -""" -Iterative closest point coregistration +"""Iterative closest point coregistration ====================================== Iterative closest point (ICP) is a registration method accounting for both rotations and translations. @@ -32,7 +31,8 @@ # %% # To try the effects of rotation, we can artificially rotate the DEM using a transformation matrix. # Here, a rotation of just one degree is attempted. -# But keep in mind: the window is 6 km wide; 1 degree of rotation at the center equals to a 52 m vertical difference at the edges! +# But keep in mind: the window is 6 km wide; 1 degree of rotation at the center equals +# to a 52 m vertical difference at the edges! rotation = np.deg2rad(1) rotation_matrix = np.array( @@ -41,7 +41,7 @@ [0, 1, 0, 0], [-np.sin(rotation), 0, np.cos(rotation), 0], [0, 0, 0, 1], - ] + ], ) centroid = [dem.bounds.left + dem.width / 2, dem.bounds.bottom + dem.height / 2, np.nanmean(dem)] # This will apply the matrix along the center of the DEM @@ -90,7 +90,8 @@ # %% # The results show what we expected: # -# - **ICP** alone handled the rotational offset, but left a horizontal offset as it is not sub-pixel accurate (in this case, the resolution is 20x20m). +# - **ICP** alone handled the rotational offset, but left a horizontal offset as it is not sub-pixel accurate +# (in this case, the resolution is 20x20m). # - **Nuth and Kääb** barely helped at all, since the offset is purely rotational. # - **ICP + Nuth and Kääb** first handled the rotation, then fit the reference with sub-pixel accuracy. # diff --git a/examples/basic/plot_infer_heterosc.py b/examples/basic/plot_infer_heterosc.py index 1878c69a5..6249fbf0c 100644 --- a/examples/basic/plot_infer_heterosc.py +++ b/examples/basic/plot_infer_heterosc.py @@ -1,5 +1,4 @@ -""" -Elevation error map +"""Elevation error map =================== Digital elevation models have a precision that can vary with terrain and instrument-related variables. Here, we @@ -30,7 +29,7 @@ # %% # Then, we run the pipeline for inference of elevation heteroscedasticity from stable terrain: errors, df_binning, error_function = xdem.spatialstats.infer_heteroscedasticity_from_stable( - dvalues=dh, list_var=[slope, maximum_curvature], list_var_names=["slope", "maxc"], unstable_mask=glacier_outlines + dvalues=dh, list_var=[slope, maximum_curvature], list_var_names=["slope", "maxc"], unstable_mask=glacier_outlines, ) # %% @@ -39,19 +38,20 @@ # %% # The second output is the dataframe of 2D binning with slope and maximum curvature: -df_binning +print(df_binning) # %% # The third output is the 2D binning interpolant, i.e. an error function with the slope and maximum curvature # (*Note: below we divide the maximum curvature by 100 to convert it in* m\ :sup:`-1` ): for slope, maxc in [(0, 0), (40, 0), (0, 5), (40, 5)]: print( - "Error for a slope of {:.0f} degrees and" - " {:.2f} m-1 max. curvature: {:.1f} m".format(slope, maxc / 100, error_function((slope, maxc))) + f"Error for a slope of {slope:.0f} degrees and" + f" {maxc / 100:.2f} m-1 max. curvature: {error_function((slope, maxc)):.1f} m", ) # %% # This pipeline will not always work optimally with default parameters: spread estimates can be affected by skewed # distributions, the binning by extreme range of values, some DEMs do not have any error variability with terrain (e.g., -# terrestrial photogrammetry). **To learn how to tune more parameters and use the subfunctions, see the gallery example:** +# terrestrial photogrammetry). +# **To learn how to tune more parameters and use the subfunctions, see the gallery example:** # :ref:`sphx_glr_advanced_examples_plot_heterosc_estimation_modelling.py`! diff --git a/examples/basic/plot_infer_spatial_correlation.py b/examples/basic/plot_infer_spatial_correlation.py index 1e29ca270..f2881bd89 100644 --- a/examples/basic/plot_infer_spatial_correlation.py +++ b/examples/basic/plot_infer_spatial_correlation.py @@ -1,12 +1,12 @@ -""" -Spatial correlation of errors +"""Spatial correlation of errors ============================= Digital elevation models have errors that are spatially correlated due to instrument or processing effects. Here, we rely on a non-stationary spatial statistics framework to estimate and model spatial correlations in elevation error. We use a sum of variogram forms to model this correlation, with stable terrain as an error proxy for moving terrain. -**References:** `Rolstad et al. (2009) `_, `Hugonnet et al. (2022) `_. +**References:** `Rolstad et al. (2009) `_, +`Hugonnet et al. (2022) `_. """ import geoutils as gu @@ -30,7 +30,7 @@ df_model_params, spatial_corr_function, ) = xdem.spatialstats.infer_spatial_correlation_from_stable( - dvalues=dh, list_models=["Gaussian", "Spherical"], unstable_mask=glacier_outlines, random_state=42 + dvalues=dh, list_models=["Gaussian", "Spherical"], unstable_mask=glacier_outlines, random_state=42, ) # %% @@ -40,20 +40,18 @@ # "experimental" variance value of the variogram in that bin, the ``count`` the number of pairwise samples, and # ``err_exp`` the 1-sigma error of the "experimental" variance, if more than one variogram is estimated with the # ``n_variograms`` parameter. -df_empirical_variogram +print(df_empirical_variogram) # %% # The second output is the dataframe of optimized model parameters (``range``, ``sill``, and possibly ``smoothness``) # for a sum of gaussian and spherical models: -df_model_params +print(df_model_params) # %% # The third output is the spatial correlation function with spatial lags, derived from the variogram: for spatial_lag in [0, 100, 1000, 10000, 30000]: print( - "Errors are correlated at {:.1f}% for a {:,.0f} m spatial lag".format( - spatial_corr_function(spatial_lag) * 100, spatial_lag - ) + f"Errors are correlated at {spatial_corr_function(spatial_lag) * 100:.1f}% for a {spatial_lag:,.0f} m spatial lag", ) # %% diff --git a/examples/basic/plot_logging_configuration.py b/examples/basic/plot_logging_configuration.py index 5f0a347b7..7fabb4421 100644 --- a/examples/basic/plot_logging_configuration.py +++ b/examples/basic/plot_logging_configuration.py @@ -1,5 +1,4 @@ -""" -Configuring verbosity level +"""Configuring verbosity level =========================== This example demonstrates how to configure verbosity level, or logging, using a coregistration method. diff --git a/examples/basic/plot_nuth_kaab.py b/examples/basic/plot_nuth_kaab.py index 4ede9a1b2..81745caae 100644 --- a/examples/basic/plot_nuth_kaab.py +++ b/examples/basic/plot_nuth_kaab.py @@ -1,5 +1,4 @@ -""" -Nuth and Kääb coregistration +"""Nuth and Kääb coregistration ============================ The Nuth and Kääb coregistration corrects horizontal and vertical shifts, and is especially performant for precise @@ -26,7 +25,8 @@ inlier_mask = ~glacier_outlines.create_mask(reference_dem) # %% -# The DEM to be aligned (a 1990 photogrammetry-derived DEM) has some vertical and horizontal biases that we want to reduce. +# The DEM to be aligned (a 1990 photogrammetry-derived DEM) +# has some vertical and horizontal biases that we want to reduce. # These can be visualized by plotting a change map: diff_before = reference_dem - dem_to_be_aligned @@ -64,4 +64,5 @@ # %% # In the plot above, one may notice a positive (blue) tendency toward the east. # The 1990 DEM is a mosaic, and likely has a "seam" near there. -# :ref:`sphx_glr_advanced_examples_plot_blockwise_coreg.py` tackles this issue, using a nonlinear coregistration approach. +# :ref:`sphx_glr_advanced_examples_plot_blockwise_coreg.py` tackles this issue, +# using a nonlinear coregistration approach. diff --git a/examples/basic/plot_spatial_error_propagation.py b/examples/basic/plot_spatial_error_propagation.py index 4e272b0c9..780c52a83 100644 --- a/examples/basic/plot_spatial_error_propagation.py +++ b/examples/basic/plot_spatial_error_propagation.py @@ -1,5 +1,4 @@ -""" -Spatial propagation of elevation errors +"""Spatial propagation of elevation errors ======================================= Propagating elevation errors spatially accounting for heteroscedasticity and spatial correlation is complex. It @@ -7,7 +6,8 @@ other operation), which is computationally intensive. Here, we rely on published formulations to perform computationally-efficient spatial propagation for the mean of elevation (or elevation differences) in an area. -**References:** `Rolstad et al. (2009) `_, `Hugonnet et al. (2022) `_. +**References:** `Rolstad et al. (2009) `_, +`Hugonnet et al. (2022) `_. """ import geoutils as gu @@ -20,7 +20,8 @@ # %% # We load the same data, and perform the same calculations on heteroscedasticity and spatial correlations of errors as -# in the :ref:`sphx_glr_basic_examples_plot_infer_heterosc.py` and :ref:`sphx_glr_basic_examples_plot_infer_spatial_correlation.py` +# in the :ref:`sphx_glr_basic_examples_plot_infer_heterosc.py` and +# :ref:`sphx_glr_basic_examples_plot_infer_spatial_correlation.py` # examples. dh = xdem.DEM(xdem.examples.get_path("longyearbyen_ddem")) @@ -28,7 +29,7 @@ glacier_outlines = gu.Vector(xdem.examples.get_path("longyearbyen_glacier_outlines")) slope, maximum_curvature = xdem.terrain.get_terrain_attribute(ref_dem, attribute=["slope", "maximum_curvature"]) errors, df_binning, error_function = xdem.spatialstats.infer_heteroscedasticity_from_stable( - dvalues=dh, list_var=[slope, maximum_curvature], list_var_names=["slope", "maxc"], unstable_mask=glacier_outlines + dvalues=dh, list_var=[slope, maximum_curvature], list_var_names=["slope", "maxc"], unstable_mask=glacier_outlines, ) # %% @@ -36,7 +37,7 @@ # as it removes the variance variability due to heteroscedasticity. zscores = dh / errors emp_variogram, params_variogram_model, spatial_corr_function = xdem.spatialstats.infer_spatial_correlation_from_stable( - dvalues=zscores, list_models=["Gaussian", "Spherical"], unstable_mask=glacier_outlines, random_state=42 + dvalues=zscores, list_models=["Gaussian", "Spherical"], unstable_mask=glacier_outlines, random_state=42, ) # %% @@ -48,7 +49,7 @@ glacier_outlines.ds[glacier_outlines.ds["NAME"] == "Medalsbreen"], ] stderr_glaciers = xdem.spatialstats.spatial_error_propagation( - areas=areas, errors=errors, params_variogram_model=params_variogram_model + areas=areas, errors=errors, params_variogram_model=params_variogram_model, ) for glacier_name, stderr_gla in [("Brombreen", stderr_glaciers[0]), ("Medalsbreen", stderr_glaciers[1])]: @@ -60,7 +61,7 @@ # sizes, but is less accurate to estimate the standard error of a certain area shape. areas = 10 ** np.linspace(1, 12) stderrs = xdem.spatialstats.spatial_error_propagation( - areas=areas, errors=errors, params_variogram_model=params_variogram_model + areas=areas, errors=errors, params_variogram_model=params_variogram_model, ) plt.plot(areas / 10**6, stderrs) plt.xlabel("Averaging area (km²)") @@ -72,7 +73,7 @@ colors="red", linestyles="dashed", label="Disk area with radius the\n1st correlation range of {:,.0f} meters".format( - params_variogram_model["range"].values[0] + params_variogram_model["range"].values[0], ), ) plt.vlines( @@ -82,7 +83,7 @@ colors="blue", linestyles="dashed", label="Disk area with radius the\n2nd correlation range of {:,.0f} meters".format( - params_variogram_model["range"].values[1] + params_variogram_model["range"].values[1], ), ) plt.xscale("log") diff --git a/examples/basic/plot_terrain_attributes.py b/examples/basic/plot_terrain_attributes.py index 8d5599d6e..86dd84a46 100644 --- a/examples/basic/plot_terrain_attributes.py +++ b/examples/basic/plot_terrain_attributes.py @@ -1,5 +1,4 @@ -""" -Terrain attributes +"""Terrain attributes ================== Terrain attributes generated from a DEM have a multitude of uses for analytic and visual purposes. diff --git a/pyproject.toml b/pyproject.toml index ceef909ff..40af1a646 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,8 +12,10 @@ build-backend = "setuptools.build_meta" version_file = "xdem/_version.py" fallback_version = "0.0.1" -[tool.black] -target_version = ['py310'] +#[tool.black] +#target_version = ['py310'] + + [tool.pytest.ini_options] addopts = "--doctest-modules -W error::UserWarning" diff --git a/setup.py b/setup.py index 7f7e78e1f..6607ad8bd 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,4 @@ -"""This file now only serves for backward-compatibility for routines explicitly calling python setup.py""" +"""The file now only serves for backward-compatibility for routines explicitly calling python setup.py""" from setuptools import setup diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 000000000..ea2a35c64 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1,18 @@ +# Copyright (c) 2024 Centre National d'Etudes Spatiales (CNES). +# +# This file is part of the xDEM project: +# https://github.com/glaciohack/xdem +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""xDEM tests module init file.""" diff --git a/tests/conftest.py b/tests/conftest.py index 675084e15..56d7d1d17 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,4 +1,6 @@ -from typing import Callable, List, Union +"""Functions to test configuration.""" + +from collections.abc import Callable import geoutils as gu import numpy as np @@ -11,10 +13,9 @@ @pytest.fixture(scope="session") # type: ignore def raster_to_rda() -> Callable[[RasterType], rd.rdarray]: + """Allows to convert geoutils.Raster to richDEM rdarray through decorator.""" def _raster_to_rda(rst: RasterType) -> rd.rdarray: - """ - Convert geoutils.Raster to richDEM rdarray. - """ + """Convert geoutils.Raster to richDEM rdarray.""" arr = rst.data.filled(rst.nodata).squeeze() rda = rd.rdarray(arr, no_data=rst.nodata) rda.geotransform = rst.transform.to_gdal() @@ -25,10 +26,9 @@ def _raster_to_rda(rst: RasterType) -> rd.rdarray: @pytest.fixture(scope="session") # type: ignore def get_terrainattr_richdem(raster_to_rda: Callable[[RasterType], rd.rdarray]) -> Callable[[RasterType, str], NDArrayf]: + """Allows to get terrain attribute for DEM opened with geoutils.Raster using RichDEM through decorator.""" def _get_terrainattr_richdem(rst: RasterType, attribute: str = "slope_radians") -> NDArrayf: - """ - Derive terrain attribute for DEM opened with geoutils.Raster using RichDEM. - """ + """Derive terrain attribute for DEM opened with geoutils.Raster using RichDEM.""" rda = raster_to_rda(rst) terrattr = rd.TerrainAttribute(rda, attrib=attribute) terrattr[terrattr == terrattr.no_data] = np.nan @@ -39,24 +39,23 @@ def _get_terrainattr_richdem(rst: RasterType, attribute: str = "slope_radians") @pytest.fixture(scope="session") # type: ignore def get_terrain_attribute_richdem( - get_terrainattr_richdem: Callable[[RasterType, str], NDArrayf] -) -> Callable[[RasterType, Union[str, list[str]], bool, float, float, float], Union[RasterType, list[RasterType]]]: + get_terrainattr_richdem: Callable[[RasterType, str], NDArrayf], +) -> Callable[[RasterType, str | list[str], bool, float, float, float], RasterType | list[RasterType]]: + """Allows to get one or multiple terrain attributes from a DEM using RichDEM through decorator.""" def _get_terrain_attribute_richdem( dem: RasterType, - attribute: Union[str, List[str]], + attribute: str | list[str], degrees: bool = True, hillshade_altitude: float = 45.0, hillshade_azimuth: float = 315.0, hillshade_z_factor: float = 1.0, - ) -> Union[RasterType, List[RasterType]]: - """ - Derive one or multiple terrain attributes from a DEM using RichDEM. - """ + ) -> RasterType | list[RasterType]: + """Derive one or multiple terrain attributes from a DEM using RichDEM.""" if isinstance(attribute, str): attribute = [attribute] if not isinstance(dem, gu.Raster): - raise ValueError("DEM must be a geoutils.Raster object.") + raise TypeError("DEM must be a geoutils.Raster object.") terrain_attributes = {} diff --git a/tests/test_coreg/__init__.py b/tests/test_coreg/__init__.py index e69de29bb..2bd15bc2c 100644 --- a/tests/test_coreg/__init__.py +++ b/tests/test_coreg/__init__.py @@ -0,0 +1,18 @@ +# Copyright (c) 2024 xDEM developers +# +# This file is part of the xDEM project: +# https://github.com/glaciohack/xdem +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""xDEM test_coreg module init file.""" diff --git a/tests/test_coreg/test_affine.py b/tests/test_coreg/test_affine.py index a642038c6..66456a632 100644 --- a/tests/test_coreg/test_affine.py +++ b/tests/test_coreg/test_affine.py @@ -3,6 +3,7 @@ from __future__ import annotations import warnings +from typing import ClassVar import geopandas as gpd import geoutils @@ -22,7 +23,6 @@ def load_examples(crop: bool = True) -> tuple[RasterType, RasterType, Vector]: """Load example files to try coregistration methods with.""" - reference_dem = Raster(examples.get_path("longyearbyen_ref_dem")) to_be_aligned_dem = Raster(examples.get_path("longyearbyen_tba_dem")) glacier_mask = Vector(examples.get_path("longyearbyen_glacier_outlines")) @@ -43,8 +43,7 @@ def load_examples(crop: bool = True) -> tuple[RasterType, RasterType, Vector]: def gdal_reproject_horizontal_shift_samecrs(filepath_example: str, xoff: float, yoff: float) -> NDArrayNum: - """ - Reproject horizontal shift in same CRS with GDAL for testing purposes. + """Reproject horizontal shift in same CRS with GDAL for testing purposes. :param filepath_example: Path to raster file. :param xoff: X shift in georeferenced unit. @@ -52,7 +51,6 @@ def gdal_reproject_horizontal_shift_samecrs(filepath_example: str, xoff: float, :return: Reprojected shift array in the same CRS. """ - from osgeo import gdal, gdalconst # Open source raster from file @@ -97,24 +95,24 @@ class TestAffineCoreg: # Check all point-raster possibilities supported # Use the reference DEM for both, it will be artificially misaligned during tests # Raster-Raster - fit_args_rst_rst = dict(reference_elev=ref, to_be_aligned_elev=tba, inlier_mask=inlier_mask) + fit_args_rst_rst: ClassVar[dict] = {"reference_elev":ref, "to_be_aligned_elev":tba, "inlier_mask":inlier_mask} # Convert DEMs to points with a bit of subsampling for speed-up ref_pts = ref.to_pointcloud(data_column_name="z", subsample=50000, random_state=42).ds tba_pts = ref.to_pointcloud(data_column_name="z", subsample=50000, random_state=42).ds # Raster-Point - fit_args_rst_pts = dict(reference_elev=ref, to_be_aligned_elev=tba_pts, inlier_mask=inlier_mask) + fit_args_rst_pts: ClassVar[dict] = {"reference_elev":ref, "to_be_aligned_elev":tba_pts, "inlier_mask":inlier_mask} # Point-Raster - fit_args_pts_rst = dict(reference_elev=ref_pts, to_be_aligned_elev=tba, inlier_mask=inlier_mask) + fit_args_pts_rst: ClassVar[dict] = {"reference_elev":ref_pts, "to_be_aligned_elev":tba, "inlier_mask":inlier_mask} - all_fit_args = [fit_args_rst_rst, fit_args_rst_pts, fit_args_pts_rst] + all_fit_args: ClassVar[list[dict, dict, dict]] = [fit_args_rst_rst, fit_args_rst_pts, fit_args_pts_rst] # Create some 3D coordinates with Z coordinates being 0 to try the apply functions. points_arr = np.array([[1, 2, 3, 4], [1, 2, 3, 4], [0, 0, 0, 0]], dtype="float64").T points = gpd.GeoDataFrame( - geometry=gpd.points_from_xy(x=points_arr[:, 0], y=points_arr[:, 1], crs=ref.crs), data={"z": points_arr[:, 2]} + geometry=gpd.points_from_xy(x=points_arr[:, 0], y=points_arr[:, 1], crs=ref.crs), data={"z": points_arr[:, 2]}, ) @pytest.mark.parametrize( @@ -123,15 +121,15 @@ class TestAffineCoreg: ) # type: ignore def test_reproject_horizontal_shift_samecrs__gdal(self, xoff_yoff: tuple[float, float]) -> None: """Check that the same-CRS reprojection based on SciPy (replacing Rasterio due to subpixel errors) - is accurate by comparing to GDAL.""" - + is accurate by comparing to GDAL. + """ ref = load_examples(crop=False)[0] # Reproject with SciPy xoff, yoff = xoff_yoff dst_transform = _translate(transform=ref.transform, xoff=xoff, yoff=yoff, distance_unit="georeferenced") output = _reproject_horizontal_shift_samecrs( - raster_arr=ref.data, src_transform=ref.transform, dst_transform=dst_transform + raster_arr=ref.data, src_transform=ref.transform, dst_transform=dst_transform, ) # Reproject with GDAL @@ -157,7 +155,7 @@ def test_reproject_horizontal_shift_samecrs__gdal(self, xoff_yoff: tuple[float, assert np.array_equal(np.logical_or(mask_dilated_plus_one, ~np.isfinite(output2)), mask_dilated_plus_one) def test_from_classmethods(self) -> None: - + """Check from_matrix, from_translation functions and that making Coreg object from a nan translation fails.""" # Check that the from_matrix function works as expected. vshift = 5 matrix = np.diag(np.ones(4, dtype=float)) @@ -181,7 +179,6 @@ def test_from_classmethods(self) -> None: def test_raise_all_nans(self) -> None: """Check that the coregistration approaches fail gracefully when given only nans.""" - dem1 = np.ones((50, 50), dtype=float) dem2 = dem1.copy() + np.nan affine = rio.transform.from_origin(0, 0, 1, 1) @@ -199,12 +196,13 @@ def test_raise_all_nans(self) -> None: pytest.raises(ValueError, icp.fit, dem1, dem2, transform=affine) - @pytest.mark.parametrize("fit_args", all_fit_args) # type: ignore - @pytest.mark.parametrize("shifts", [(20, 5, 2), (-50, 100, 2)]) # type: ignore - @pytest.mark.parametrize("coreg_method", [coreg.NuthKaab, coreg.DhMinimize, coreg.ICP]) # type: ignore - def test_coreg_translations__synthetic(self, fit_args, shifts, coreg_method) -> None: - """ - Test the horizontal/vertical shift coregistrations with synthetic shifted data. These tests include NuthKaab, + @pytest.mark.parametrize("fit_args", all_fit_args) + @pytest.mark.parametrize("shifts", [(20, 5, 2), (-50, 100, 2)]) + @pytest.mark.parametrize("coreg_method", [coreg.NuthKaab, coreg.DhMinimize, coreg.ICP]) + def test_coreg_translations__synthetic(self, fit_args: dict, + shifts: tuple(int, int, int), + coreg_method: type[AffineCoreg]) -> None: + """Test the horizontal/vertical shift coregistrations with synthetic shifted data. These tests include NuthKaab, ICP and DhMinimize. We test all combinaison of inputs: raster-raster, point-raster and raster-point. @@ -214,7 +212,6 @@ def test_coreg_translations__synthetic(self, fit_args, shifts, coreg_method) -> 99% of the variance from the initial elevation differences (hence, that the direction of coregistration has to be the right one; and that there is no other errors introduced in the process). """ - warnings.filterwarnings("ignore", message="Covariance of the parameters*") horizontal_coreg = coreg_method() @@ -252,8 +249,8 @@ def test_coreg_translations__synthetic(self, fit_args, shifts, coreg_method) -> dh = ref - coreg_elev.reproject(ref) # Plots for debugging - PLOT = False - if PLOT and isinstance(dh, geoutils.Raster): + plots = False + if plots and isinstance(dh, geoutils.Raster): import matplotlib.pyplot as plt init_dh.plot() @@ -273,14 +270,11 @@ def test_coreg_translations__synthetic(self, fit_args, shifts, coreg_method) -> (coreg.DhMinimize, (10.0850892, 2.898172, -1.943001)), (coreg.ICP, (8.73833, 1.584255, -1.943957)), ], - ) # type: ignore + ) def test_coreg_translations__example( - self, coreg_method__shift: tuple[type[AffineCoreg], tuple[float, float, float]] + self, coreg_method__shift: tuple[type[AffineCoreg], tuple[float, float, float]], ) -> None: - """ - Test that the translation co-registration outputs are always exactly the same on the real example data. - """ - + """Test that the translation co-registration outputs are always exactly the same on the real example data.""" # Use entire DEMs here (to compare to original values from older package versions) ref, tba = load_examples(crop=False)[0:2] inlier_mask = ~self.outlines.create_mask(ref) @@ -292,18 +286,16 @@ def test_coreg_translations__example( c.fit(ref, tba, inlier_mask=inlier_mask, random_state=42) # Check the output translations match the exact values - shifts = [c.meta["outputs"]["affine"][k] for k in ["shift_x", "shift_y", "shift_z"]] # type: ignore + shifts = [c.meta["outputs"]["affine"][k] for k in ["shift_x", "shift_y", "shift_z"]] assert shifts == pytest.approx(expected_shifts) - @pytest.mark.parametrize("fit_args", all_fit_args) # type: ignore - @pytest.mark.parametrize("vshift", [0.2, 10.0, 1000.0]) # type: ignore - def test_coreg_vertical_translation__synthetic(self, fit_args, vshift) -> None: - """ - Test the vertical shift coregistration with synthetic shifted data. These tests include VerticalShift. + @pytest.mark.parametrize("fit_args", all_fit_args) + @pytest.mark.parametrize("vshift", [0.2, 10.0, 1000.0]) + def test_coreg_vertical_translation__synthetic(self, fit_args: dict, vshift: float) -> None: + """Test the vertical shift coregistration with synthetic shifted data. These tests include VerticalShift. We test all combinaison of inputs: raster-raster, point-raster and raster-point. """ - # Create a vertical shift correction instance vshiftcorr = coreg.VerticalShift() @@ -337,8 +329,8 @@ def test_coreg_vertical_translation__synthetic(self, fit_args, vshift) -> None: dh = ref - coreg_elev # Plots for debugging - PLOT = False - if PLOT and isinstance(dh, geoutils.Raster): + plots = False + if plots and isinstance(dh, geoutils.Raster): import matplotlib.pyplot as plt init_dh.plot() @@ -351,14 +343,13 @@ def test_coreg_vertical_translation__synthetic(self, fit_args, vshift) -> None: assert np.nanmedian(dh) == pytest.approx(0, abs=10e-6) assert np.nanvar(dh) == pytest.approx(0, abs=10e-6) - @pytest.mark.parametrize("coreg_method__vshift", [(coreg.VerticalShift, -2.305015)]) # type: ignore + @pytest.mark.parametrize("coreg_method__vshift", [(coreg.VerticalShift, -2.305015)]) def test_coreg_vertical_translation__example( - self, coreg_method__vshift: tuple[type[AffineCoreg], tuple[float, float, float]] + self, coreg_method__vshift: tuple[type[AffineCoreg], tuple[float, float, float]], ) -> None: + """Test that the vertical translation co-registration output + is always exactly the same on the real example data. """ - Test that the vertical translation co-registration output is always exactly the same on the real example data. - """ - # Use entire DEMs here (to compare to original values from older package versions) ref, tba = load_examples(crop=False)[0:2] inlier_mask = ~self.outlines.create_mask(ref) @@ -374,12 +365,14 @@ def test_coreg_vertical_translation__example( vshift = c.meta["outputs"]["affine"]["shift_z"] assert vshift == pytest.approx(expected_vshift) - @pytest.mark.parametrize("fit_args", all_fit_args) # type: ignore - @pytest.mark.parametrize("shifts_rotations", [(20, 5, 0, 0.02, 0.05, 0.1), (-50, 100, 0, 10, 5, 4)]) # type: ignore - @pytest.mark.parametrize("coreg_method", [coreg.ICP]) # type: ignore - def test_coreg_rigid__synthetic(self, fit_args, shifts_rotations, coreg_method) -> None: - """ - Test the rigid coregistrations with synthetic misaligned (shifted and rotatedà data. These tests include ICP. + @pytest.mark.parametrize("fit_args", all_fit_args) + @pytest.mark.parametrize("shifts_rotations", [(20, 5, 0, 0.02, 0.05, 0.1), (-50, 100, 0, 10, 5, 4)]) + @pytest.mark.parametrize("coreg_method", [coreg.ICP]) + def test_coreg_rigid__synthetic(self, + fit_args: dict, + shifts_rotations: tuple, + coreg_method: type[AffineCoreg]) -> None: + """Test the rigid coregistrations with synthetic misaligned (shifted and rotatedà data. These tests include ICP. We test all combinaison of inputs: raster-raster, point-raster and raster-point. @@ -388,7 +381,6 @@ def test_coreg_rigid__synthetic(self, fit_args, shifts_rotations, coreg_method) 95% of the variance from the initial elevation differences (hence, that the direction of coregistration has to be the right one; and that there is no other errors introduced in the process). """ - # Initiate coregistration horizontal_coreg = coreg_method() @@ -416,7 +408,7 @@ def test_coreg_rigid__synthetic(self, fit_args, shifts_rotations, coreg_method) # Convert to point cloud if input was point cloud if isinstance(elev_fit_args["to_be_aligned_elev"], gpd.GeoDataFrame): ref_shifted_rotated = ref_shifted_rotated.to_pointcloud( - data_column_name="z", subsample=50000, random_state=42 + data_column_name="z", subsample=50000, random_state=42, ).ds elev_fit_args["to_be_aligned_elev"] = ref_shifted_rotated @@ -429,7 +421,7 @@ def test_coreg_rigid__synthetic(self, fit_args, shifts_rotations, coreg_method) invert_fit_matrix = coreg.invert_matrix(fit_matrix) invert_fit_shifts = invert_fit_matrix[:3, 3] invert_fit_rotations = pytransform3d.rotations.euler_from_matrix( - invert_fit_matrix[0:3, 0:3], i=0, j=1, k=2, extrinsic=True + invert_fit_matrix[0:3, 0:3], i=0, j=1, k=2, extrinsic=True, ) invert_fit_rotations = np.rad2deg(invert_fit_rotations) assert np.allclose(shifts, invert_fit_shifts, rtol=1) @@ -447,8 +439,8 @@ def test_coreg_rigid__synthetic(self, fit_args, shifts_rotations, coreg_method) dh = ref - coreg_elev # Plots for debugging - PLOT = False - if PLOT and isinstance(dh, geoutils.Raster): + plots = False + if plots and isinstance(dh, geoutils.Raster): import matplotlib.pyplot as plt init_dh.plot() @@ -463,14 +455,11 @@ def test_coreg_rigid__synthetic(self, fit_args, shifts_rotations, coreg_method) @pytest.mark.parametrize( "coreg_method__shifts_rotations", [(coreg.ICP, (8.738332, 1.584255, -1.943957, 0.0069004, -0.00703, -0.0119733))], - ) # type: ignore + ) def test_coreg_rigid__example( - self, coreg_method__shifts_rotations: tuple[type[AffineCoreg], tuple[float, float, float]] + self, coreg_method__shifts_rotations: tuple[type[AffineCoreg], tuple[float, float, float]], ) -> None: - """ - Test that the rigid co-registration outputs is always exactly the same on the real example data. - """ - + """Test that the rigid co-registration outputs is always exactly the same on the real example data.""" # Use entire DEMs here (to compare to original values from older package versions) ref, tba = load_examples(crop=False)[0:2] inlier_mask = ~self.outlines.create_mask(ref) diff --git a/tests/test_coreg/test_base.py b/tests/test_coreg/test_base.py index 34f8529f0..4381f44fd 100644 --- a/tests/test_coreg/test_base.py +++ b/tests/test_coreg/test_base.py @@ -5,7 +5,8 @@ import inspect import re import warnings -from typing import Any, Callable, Iterable, Mapping +from collections.abc import Callable, Iterable, Mapping +from typing import Any, ClassVar import geopandas as gpd import geoutils as gu @@ -26,7 +27,6 @@ def load_examples() -> tuple[RasterType, RasterType, Vector]: """Load example files to try coregistration methods with.""" - reference_dem = Raster(examples.get_path("longyearbyen_ref_dem")) to_be_aligned_dem = Raster(examples.get_path("longyearbyen_tba_dem")) glacier_mask = Vector(examples.get_path("longyearbyen_glacier_outlines")) @@ -47,21 +47,19 @@ def load_examples() -> tuple[RasterType, RasterType, Vector]: def assert_coreg_meta_equal(input1: Any, input2: Any) -> bool: """Short test function to check equality of coreg dictionary values.""" - # Different equality check based on input: number, callable, array, dataframe if not isinstance(input1, type(input2)): return False - elif isinstance(input1, (str, float, int, np.floating, np.integer, tuple, list)) or callable(input1): + if isinstance(input1, (str, float, int, np.floating, np.integer, tuple, list)) or callable(input1): return input1 == input2 - elif isinstance(input1, np.ndarray): + if isinstance(input1, np.ndarray): return np.array_equal(input1, input2, equal_nan=True) - elif isinstance(input1, pd.DataFrame): + if isinstance(input1, pd.DataFrame): return input1.equals(input2) # If input is a dictionary, we recursively call this function to check equality of all its sub-keys - elif isinstance(input1, dict): - return all(assert_coreg_meta_equal(input1[k], input2[k]) for k in input1.keys()) - else: - raise TypeError(f"Input type {type(input1)} not supported for this test function.") + if isinstance(input1, dict): + return all(assert_coreg_meta_equal(input1[k], input2[k]) for k in input1) + raise TypeError(f"Input type {type(input1)} not supported for this test function.") class TestCoregClass: @@ -69,38 +67,36 @@ class TestCoregClass: ref, tba, outlines = load_examples() # Load example reference, to-be-aligned and mask. inlier_mask = ~outlines.create_mask(ref) - fit_params = dict(reference_elev=ref, to_be_aligned_elev=tba, inlier_mask=inlier_mask) + fit_params: ClassVar[dict] = {"reference_elev":ref, "to_be_aligned_elev":tba, "inlier_mask":inlier_mask} # Create some 3D coordinates with Z coordinates being 0 to try the apply functions. points_arr = np.array([[1, 2, 3, 4], [1, 2, 3, 4], [0, 0, 0, 0]], dtype="float64").T points = gpd.GeoDataFrame( - geometry=gpd.points_from_xy(x=points_arr[:, 0], y=points_arr[:, 1], crs=ref.crs), data={"z": points_arr[:, 2]} + geometry=gpd.points_from_xy(x=points_arr[:, 0], y=points_arr[:, 1], crs=ref.crs), data={"z": points_arr[:, 2]}, ) def test_init(self) -> None: """Test instantiation of Coreg""" - c = coreg.Coreg() - assert c._fit_called is False - assert c._is_affine is None - assert c._needs_vars is False + assert c.fit_called is False + assert c.is_affine is None + assert c.needs_vars is False def test_info(self) -> None: - """ - Test all coreg keys required for info() exist by mapping all sub-keys in CoregDict and comparing to + """Test all coreg keys required for info() exist by mapping all sub-keys in CoregDict and comparing to coreg.base.dict_key_to_str. Check the info() string return contains the right text for a given key. """ # This recursive function will find all sub-keys that are not TypedDict within a TypedDict def recursive_typeddict_items(typed_dict: Mapping[str, Any]) -> Iterable[str]: - for key, value in typed_dict.__annotations__.items(): - try: + try: + for key, value in typed_dict.__annotations__.items(): # noqa: B007 sub_typed_dict = getattr(coreg.base, value.__forward_arg__) if type(sub_typed_dict) is type(typed_dict): yield from recursive_typeddict_items(sub_typed_dict) - except AttributeError: - yield key + except AttributeError: + yield key # All subkeys list_coregdict_keys = list(recursive_typeddict_items(coreg.base.CoregDict)) # type: ignore @@ -121,7 +117,7 @@ def recursive_typeddict_items(typed_dict: Mapping[str, Any]) -> Iterable[str]: list_missing_keys = [k for k in list_coregdict_keys if (k not in list_info_keys and k not in list_exceptions)] if len(list_missing_keys) > 0: raise AssertionError( - f"Missing keys in coreg.base.dict_key_to_str " f"for Coreg.info(): {', '.join(list_missing_keys)}" + f"Missing keys in coreg.base.dict_key_to_str for Coreg.info(): {', '.join(list_missing_keys)}", ) # Check that info() contains the mapped string for an example @@ -131,13 +127,12 @@ def recursive_typeddict_items(typed_dict: Mapping[str, Any]) -> Iterable[str]: @pytest.mark.parametrize("coreg_class", [coreg.VerticalShift, coreg.ICP, coreg.NuthKaab]) # type: ignore def test_copy(self, coreg_class: Callable[[], Coreg]) -> None: """Test that copying work expectedly (that no attributes still share references).""" - # Create a coreg instance and copy it. corr = coreg_class() corr_copy = corr.copy() # Assign some attributes and .metadata after copying, respecting the CoregDict type class - corr._meta["outputs"]["affine"] = {"shift_z": 30} + corr._meta["outputs"]["affine"] = {"shift_z": 30} # noqa: SLF001 # Make sure these don't appear in the copy assert corr_copy.meta != corr.meta @@ -169,7 +164,6 @@ def test_error_method(self) -> None: @pytest.mark.parametrize("subsample", [10, 10000, 0.5, 1]) # type: ignore def test_get_subsample_on_valid_mask(self, subsample: float | int) -> None: """Test the subsampling function called by all subclasses""" - # Define a valid mask width = height = 50 rng = np.random.default_rng(42) @@ -177,7 +171,7 @@ def test_get_subsample_on_valid_mask(self, subsample: float | int) -> None: # Define a class with a subsample and random_state in the .metadata coreg = Coreg(meta={"subsample": subsample, "random_state": 42}) - subsample_mask = coreg._get_subsample_on_valid_mask(valid_mask=valid_mask) + subsample_mask = coreg._get_subsample_on_valid_mask(valid_mask=valid_mask) # noqa: SLF001 # Check that it returns a same-shaped array that is boolean assert np.shape(valid_mask) == np.shape(subsample_mask) @@ -193,18 +187,18 @@ def test_get_subsample_on_valid_mask(self, subsample: float | int) -> None: subsample_val = subsample assert np.count_nonzero(subsample_mask) == min(subsample_val, np.count_nonzero(valid_mask)) - all_coregs = [ - coreg.VerticalShift, - coreg.NuthKaab, - coreg.ICP, - coreg.Deramp, - coreg.TerrainBias, - coreg.DirectionalBias, - ] + all_coregs: ClassVar[list] = [ + coreg.VerticalShift, + coreg.NuthKaab, + coreg.ICP, + coreg.Deramp, + coreg.TerrainBias, + coreg.DirectionalBias, + ] @pytest.mark.parametrize("coreg_class", all_coregs) # type: ignore def test_subsample(self, coreg_class: Callable) -> None: # type: ignore - + """Tests coregistration on subsample matrices vs. full matrices""" # Check that default value is set properly coreg_full = coreg_class() argspec = inspect.getfullargspec(coreg_class) @@ -249,7 +243,6 @@ def test_subsample(self, coreg_class: Callable) -> None: # type: ignore def test_subsample__pipeline(self) -> None: """Test that the subsample argument works as intended for pipelines""" - # Check definition during instantiation pipe = coreg.VerticalShift(subsample=200) + coreg.Deramp(subsample=5000) @@ -265,7 +258,6 @@ def test_subsample__pipeline(self) -> None: def test_subsample__errors(self) -> None: """Check proper errors are raised when using the subsample argument""" - # A warning should be raised when overriding with fit if non-default parameter was passed during instantiation vshift = coreg.VerticalShift(subsample=100) @@ -275,7 +267,7 @@ def test_subsample__errors(self) -> None: "Subsample argument passed to fit() will override non-default " "subsample value defined at instantiation. To silence this " "warning: only define 'subsample' in either fit(subsample=...) " - "or instantiation e.g. VerticalShift(subsample=...)." + "or instantiation e.g. VerticalShift(subsample=...).", ), ): vshift.fit(**self.fit_params, subsample=1000) @@ -288,7 +280,7 @@ def test_subsample__errors(self) -> None: "Subsample argument passed to fit() will override non-default " "subsample values defined for individual steps of the pipeline. " "To silence this warning: only define 'subsample' in either " - "fit(subsample=...) or instantiation e.g., VerticalShift(subsample=...)." + "fit(subsample=...) or instantiation e.g., VerticalShift(subsample=...).", ), ): pipe.fit(**self.fit_params, subsample=1000) @@ -301,13 +293,13 @@ def test_subsample__errors(self) -> None: "Subsample argument passed to fit() will override non-default subsample " "values defined in the step within the blockwise method. To silence this " "warning: only define 'subsample' in either fit(subsample=...) or " - "instantiation e.g., VerticalShift(subsample=...)." + "instantiation e.g., VerticalShift(subsample=...).", ), ): block.fit(**self.fit_params, subsample=1000) def test_coreg_raster_and_ndarray_args(self) -> None: - + """Test coregistration raster and ndarray args""" # Create a small sample-DEM dem1 = xdem.DEM.from_array( np.arange(25, dtype="int32").reshape(5, 5), @@ -344,7 +336,7 @@ def test_coreg_raster_and_ndarray_args(self) -> None: # Validate that the return formats were the expected ones, and that they are equal. # Issue - dem2_a does not have the same shape, the first dimension is being squeezed - # TODO - Fix coreg.apply? + # TODO(...): Fix coreg.apply? assert isinstance(dem2_r, xdem.DEM) assert isinstance(dem2_a, np.ma.masked_array) assert np.ma.allequal(dem2_r.data.squeeze(), dem2_a) @@ -377,8 +369,7 @@ def test_coreg_raster_and_ndarray_args(self) -> None: ], ) # type: ignore def test_apply_resample(self, inputs: list[Any]) -> None: - """ - Test that the option resample of coreg.apply works as expected. + """Test that the option resample of coreg.apply works as expected. For vertical correction only (VerticalShift, Deramp...), option True or False should yield same results. For horizontal shifts (NuthKaab etc), georef should differ, but DEMs should be the same after resampling. For others, the method is not implemented. @@ -399,9 +390,8 @@ def test_apply_resample(self, inputs: list[Any]) -> None: with pytest.raises(NotImplementedError, match="Option `resample=False` not supported*"): coreg_method.apply(tba_dem, resample=False) return - else: - dem_coreg_resample = coreg_method.apply(tba_dem) - dem_coreg_noresample = coreg_method.apply(tba_dem, resample=False) + dem_coreg_resample = coreg_method.apply(tba_dem) + dem_coreg_noresample = coreg_method.apply(tba_dem, resample=False) if comp == "strict": # Both methods should yield the exact same output @@ -425,7 +415,6 @@ def test_apply_resample(self, inputs: list[Any]) -> None: @pytest.mark.parametrize("coreg_class", all_coregs) # type: ignore def test_fit_and_apply(self, coreg_class: Callable) -> None: # type: ignore """Check that fit_and_apply returns the same results as using fit, then apply, for any coreg.""" - # Initiate two similar coregs coreg_fit_then_apply = coreg_class() coreg_fit_and_apply = coreg_class() @@ -446,12 +435,11 @@ def test_fit_and_apply(self, coreg_class: Callable) -> None: # type: ignore assert aligned_and.raster_equal(aligned_then, warn_failure_reason=True) assert all( assert_coreg_meta_equal(coreg_fit_and_apply.meta[k], coreg_fit_then_apply.meta[k]) - for k in coreg_fit_and_apply.meta.keys() + for k in coreg_fit_and_apply.meta ) def test_fit_and_apply__pipeline(self) -> None: """Check if it works for a pipeline""" - # Initiate two similar coregs coreg_fit_then_apply = coreg.NuthKaab() + coreg.Deramp() coreg_fit_and_apply = coreg.NuthKaab() + coreg.Deramp() @@ -467,16 +455,16 @@ def test_fit_and_apply__pipeline(self) -> None: assert list(coreg_fit_and_apply.pipeline[0].meta.keys()) == list(coreg_fit_then_apply.pipeline[0].meta.keys()) assert all( assert_coreg_meta_equal( - coreg_fit_and_apply.pipeline[0].meta[k], coreg_fit_then_apply.pipeline[0].meta[k] # type: ignore + coreg_fit_and_apply.pipeline[0].meta[k], coreg_fit_then_apply.pipeline[0].meta[k], # type: ignore ) - for k in coreg_fit_and_apply.pipeline[0].meta.keys() + for k in coreg_fit_and_apply.pipeline[0].meta ) assert list(coreg_fit_and_apply.pipeline[1].meta.keys()) == list(coreg_fit_then_apply.pipeline[1].meta.keys()) assert all( assert_coreg_meta_equal( - coreg_fit_and_apply.pipeline[1].meta[k], coreg_fit_then_apply.pipeline[1].meta[k] # type: ignore + coreg_fit_and_apply.pipeline[1].meta[k], coreg_fit_then_apply.pipeline[1].meta[k], # type: ignore ) - for k in coreg_fit_and_apply.pipeline[1].meta.keys() + for k in coreg_fit_and_apply.pipeline[1].meta ) @pytest.mark.parametrize( @@ -540,15 +528,14 @@ def test_fit_and_apply__pipeline(self) -> None: "None", "fit", "error", - "Input elevation data should be a raster, " "an array or a geodataframe.", + "Input elevation data should be a raster, an array or a geodataframe.", ), ("dem1 + np.nan", "dem2", "None", "None", "fit", "error", "'reference_dem' had only NaNs"), ("dem1", "dem2 + np.nan", "None", "None", "fit", "error", "'dem_to_be_aligned' had only NaNs"), ], ) # type: ignore def test_coreg_raises(self, combination: tuple[str, str, str, str, str, str, str]) -> None: - """ - Assert that the expected warnings/errors are triggered under different circumstances. + """Assert that the expected warnings/errors are triggered under different circumstances. The 'combination' param contains this in order: 1. The reference_dem (will be eval'd) @@ -559,7 +546,6 @@ def test_coreg_raises(self, combination: tuple[str, str, str, str, str, str, str 6. The expected outcome of the test. 7. The error/warning message (if applicable) """ - ref_dem, tba_dem, transform, crs, testing_step, result, text = combination # Create a small sample-DEM @@ -622,36 +608,36 @@ class TestCoregPipeline: ref, tba, outlines = load_examples() # Load example reference, to-be-aligned and mask. inlier_mask = ~outlines.create_mask(ref) - fit_params = dict( - reference_elev=ref.data, - to_be_aligned_elev=tba.data, - inlier_mask=inlier_mask, - transform=ref.transform, - crs=ref.crs, - ) + fit_params: ClassVar[dict] = { + "reference_elev":ref.data, + "to_be_aligned_elev":tba.data, + "inlier_mask":inlier_mask, + "transform":ref.transform, + "crs":ref.crs, + } # Create some 3D coordinates with Z coordinates being 0 to try the apply functions. points_arr = np.array([[1, 2, 3, 4], [1, 2, 3, 4], [0, 0, 0, 0]], dtype="float64").T points = gpd.GeoDataFrame( - geometry=gpd.points_from_xy(x=points_arr[:, 0], y=points_arr[:, 1], crs=ref.crs), data={"z": points_arr[:, 2]} + geometry=gpd.points_from_xy(x=points_arr[:, 0], y=points_arr[:, 1], crs=ref.crs), data={"z": points_arr[:, 2]}, ) @pytest.mark.parametrize("coreg_class", [coreg.VerticalShift, coreg.ICP, coreg.NuthKaab]) # type: ignore def test_copy(self, coreg_class: Callable[[], Coreg]) -> None: - + """Test the pipeline copy.""" # Create a pipeline, add some .metadata, and copy it pipeline = coreg_class() + coreg_class() - pipeline.pipeline[0]._meta["outputs"]["affine"] = {"shift_z": 1} + pipeline.pipeline[0]._meta["outputs"]["affine"] = {"shift_z": 1} # noqa: SLF001 pipeline_copy = pipeline.copy() # Add some more .metadata after copying (this should not be transferred) - pipeline_copy.pipeline[0]._meta["outputs"]["affine"].update({"shift_y": 0.5 * 30}) + pipeline_copy.pipeline[0]._meta["outputs"]["affine"].update({"shift_y": 0.5 * 30}) # noqa: SLF001 assert pipeline.pipeline[0].meta != pipeline_copy.pipeline[0].meta - assert pipeline_copy.pipeline[0]._meta["outputs"]["affine"]["shift_z"] + assert pipeline_copy.pipeline[0]._meta["outputs"]["affine"]["shift_z"] # noqa: SLF001 def test_pipeline(self) -> None: - + """Test the pipeline from two vertical shift correction approaches.""" # Create a pipeline from two coreg methods. pipeline = coreg.CoregPipeline([coreg.VerticalShift(), coreg.NuthKaab()]) pipeline.fit(**self.fit_params, subsample=5000, random_state=42) @@ -671,20 +657,19 @@ def test_pipeline(self) -> None: # TODO: Figure out why DirectionalBias + DirectionalBias pipeline fails with Scipy error # on bounds constraints on Mac only? - all_coregs = [ - coreg.VerticalShift, - coreg.NuthKaab, - coreg.ICP, - coreg.Deramp, - coreg.TerrainBias, - # coreg.DirectionalBias, - ] + all_coregs: ClassVar[list] = [ + coreg.VerticalShift, + coreg.NuthKaab, + coreg.ICP, + coreg.Deramp, + coreg.TerrainBias, + # coreg.DirectionalBias, + ] @pytest.mark.parametrize("coreg1", all_coregs) # type: ignore @pytest.mark.parametrize("coreg2", all_coregs) # type: ignore def test_pipeline_combinations__nobiasvar(self, coreg1: Callable[[], Coreg], coreg2: Callable[[], Coreg]) -> None: """Test pipelines with all combinations of coregistration subclasses (without bias variables)""" - # Create a pipeline from one affine and one biascorr methods. pipeline = coreg.CoregPipeline([coreg1(), coreg2()]) pipeline.fit(**self.fit_params, subsample=5000, random_state=42) @@ -696,34 +681,32 @@ def test_pipeline_combinations__nobiasvar(self, coreg1: Callable[[], Coreg], cor @pytest.mark.parametrize( "coreg2_init_kwargs", [ - dict(bias_var_names=["slope"], fit_or_bin="bin"), - dict(bias_var_names=["slope", "aspect"], fit_or_bin="bin"), + {"bias_var_names":["slope"], "fit_or_bin":"bin"}, + {"bias_var_names":["slope", "aspect"], "fit_or_bin":"bin"}, ], ) # type: ignore def test_pipeline_combinations__biasvar( - self, coreg1: Callable[[], Coreg], coreg2_init_kwargs: dict[str, str] + self, coreg1: Callable[[], Coreg], coreg2_init_kwargs: dict[str, str], ) -> None: """Test pipelines with all combinations of coregistration subclasses with bias variables""" - # Create a pipeline from one affine and one biascorr methods pipeline = coreg.CoregPipeline([coreg1(), coreg.BiasCorr(**coreg2_init_kwargs)]) # type: ignore bias_vars = {"slope": xdem.terrain.slope(self.ref), "aspect": xdem.terrain.aspect(self.ref)} pipeline.fit(**self.fit_params, bias_vars=bias_vars, subsample=5000, random_state=42) aligned_dem, _ = pipeline.apply( - self.tba.data, transform=self.ref.transform, crs=self.ref.crs, bias_vars=bias_vars + self.tba.data, transform=self.ref.transform, crs=self.ref.crs, bias_vars=bias_vars, ) assert aligned_dem.shape == self.ref.data.squeeze().shape def test_pipeline__errors(self) -> None: """Test pipeline raises proper errors.""" - pipeline = coreg.CoregPipeline([coreg.NuthKaab(), coreg.BiasCorr()]) with pytest.raises( ValueError, match=re.escape( "No `bias_vars` passed to .fit() for bias correction step " - " of the pipeline." + " of the pipeline.", ), ): pipeline.fit(**self.fit_params) @@ -735,7 +718,7 @@ def test_pipeline__errors(self) -> None: "No `bias_vars` passed to .fit() for bias correction step " "of the pipeline. As you are using several bias correction steps requiring" " `bias_vars`, don't forget to explicitly define their `bias_var_names` " - "during instantiation, e.g. BiasCorr(bias_var_names=['slope'])." + "during instantiation, e.g. BiasCorr(bias_var_names=['slope']).", ), ): pipeline2.fit(**self.fit_params) @@ -745,7 +728,7 @@ def test_pipeline__errors(self) -> None: match=re.escape( "When using several bias correction steps requiring `bias_vars` in a pipeline," "the `bias_var_names` need to be explicitly defined at each step's " - "instantiation, e.g. BiasCorr(bias_var_names=['slope'])." + "instantiation, e.g. BiasCorr(bias_var_names=['slope']).", ), ): pipeline2.fit(**self.fit_params, bias_vars={"slope": xdem.terrain.slope(self.ref)}) @@ -755,18 +738,18 @@ def test_pipeline__errors(self) -> None: ValueError, match=re.escape( "Not all keys of `bias_vars` in .fit() match the `bias_var_names` defined during " - "instantiation of the bias correction step : ['slope']." + "instantiation of the bias correction step : ['slope'].", ), ): pipeline3.fit(**self.fit_params, bias_vars={"ncc": xdem.terrain.slope(self.ref)}) def test_pipeline_pts(self) -> None: - + """Check that the pipeline runs without error.""" pipeline = coreg.NuthKaab() + coreg.DhMinimize() ref_points = self.ref.to_pointcloud(subsample=5000, random_state=42).ds ref_points["E"] = ref_points.geometry.x ref_points["N"] = ref_points.geometry.y - ref_points.rename(columns={"b1": "z"}, inplace=True) + ref_points = ref_points.rename(columns={"b1": "z"}) # Check that this runs without error pipeline.fit(reference_elev=ref_points, to_be_aligned_elev=self.tba) @@ -780,7 +763,7 @@ def test_pipeline_pts(self) -> None: ) def test_coreg_add(self) -> None: - + """Check that by adding two coregs results the resulting vertical shift is N* the vertical shift""" # Test with a vertical shift of 4 vshift = 4 @@ -809,7 +792,6 @@ def test_coreg_add(self) -> None: def test_pipeline_consistency(self) -> None: """Check that pipelines properties are respected: reflectivity, fusion of same coreg""" - # Test 1: Fusion of same coreg # Many vertical shifts many_vshifts = coreg.VerticalShift() + coreg.VerticalShift() + coreg.VerticalShift() @@ -850,25 +832,25 @@ class TestBlockwiseCoreg: ref, tba, outlines = load_examples() # Load example reference, to-be-aligned and mask. inlier_mask = ~outlines.create_mask(ref) - fit_params = dict( - reference_elev=ref.data, - to_be_aligned_elev=tba.data, - inlier_mask=inlier_mask, - transform=ref.transform, - crs=ref.crs, - ) + fit_params: ClassVar[dict] = { + "reference_elev":ref.data, + "to_be_aligned_elev":tba.data, + "inlier_mask":inlier_mask, + "transform":ref.transform, + "crs":ref.crs, + } # Create some 3D coordinates with Z coordinates being 0 to try the apply functions. points_arr = np.array([[1, 2, 3, 4], [1, 2, 3, 4], [0, 0, 0, 0]], dtype="float64").T points = gpd.GeoDataFrame( - geometry=gpd.points_from_xy(x=points_arr[:, 0], y=points_arr[:, 1], crs=ref.crs), data={"z": points_arr[:, 2]} + geometry=gpd.points_from_xy(x=points_arr[:, 0], y=points_arr[:, 1], crs=ref.crs), data={"z": points_arr[:, 2]}, ) @pytest.mark.parametrize( - "pipeline", [coreg.VerticalShift(), coreg.VerticalShift() + coreg.NuthKaab()] + "pipeline", [coreg.VerticalShift(), coreg.VerticalShift() + coreg.NuthKaab()], ) # type: ignore @pytest.mark.parametrize("subdivision", [4, 10]) # type: ignore def test_blockwise_coreg(self, pipeline: Coreg, subdivision: int) -> None: - + """Check blockwise coreg""" blockwise = coreg.BlockwiseCoreg(step=pipeline, subdivision=subdivision) # Results can not yet be extracted (since fit has not been called) and should raise an error @@ -966,7 +948,7 @@ class TestAffineManipulation: # Vertical and horizontal shifts matrix_translations = matrix_identity.copy() - matrix_translations[:3, 3] = [0.5, 1, 1.5] + matrix_translations[:3, 3] = [0.5, 1, 1.5] # noqa: RUF012 # Single rotation rotation = np.deg2rad(5) @@ -985,17 +967,19 @@ class TestAffineManipulation: rot_matrix = pytransform3d.rotations.matrix_from_euler(e=e, i=0, j=1, k=2, extrinsic=True) matrix_all = matrix_rotations.copy() matrix_all[0:3, 0:3] = rot_matrix - matrix_all[:3, 3] = [0.5, 1, 1.5] + matrix_all[:3, 3] = [0.5, 1, 1.5] # noqa: RUF012 - list_matrices = [matrix_identity, matrix_vertical, matrix_translations, matrix_rotations, matrix_all] + list_matrices: ClassVar[list] = [matrix_identity, + matrix_vertical, + matrix_translations, + matrix_rotations, + matrix_all] @pytest.mark.parametrize("matrix", list_matrices) # type: ignore def test_apply_matrix__points_opencv(self, matrix: NDArrayf) -> None: - """ - Test that apply matrix's exact transformation for points (implemented with NumPy matrix multiplication) + """Test that apply matrix's exact transformation for points (implemented with NumPy matrix multiplication) is exactly the same as the one of OpenCV (optional dependency). """ - # Create random points points = np.random.default_rng(42).normal(size=(10, 3)) @@ -1016,8 +1000,8 @@ def test_apply_matrix__points_opencv(self, matrix: NDArrayf) -> None: @pytest.mark.parametrize("matrix", list_matrices) # type: ignore def test_apply_matrix__raster(self, regrid_method: None | str, matrix: NDArrayf) -> None: """Test that apply matrix gives consistent results between points and rasters (thus validating raster - implementation, as point implementation is validated above), for all possible regridding methods.""" - + implementation, as point implementation is validated above), for all possible regridding methods. + """ # Create a synthetic raster and convert to point cloud # dem = gu.Raster(self.ref) dem_arr = np.linspace(0, 2, 25).reshape(5, 5) @@ -1041,7 +1025,6 @@ def test_apply_matrix__raster(self, regrid_method: None | str, matrix: NDArrayf) def test_apply_matrix__raster_nodata(self) -> None: """Test the nodatas created by apply_matrix are consistent between methods""" - # Use matrix with all transformations matrix = self.matrix_all @@ -1069,14 +1052,13 @@ def test_apply_matrix__raster_nodata(self) -> None: # Verify nodata masks are located within two pixels of each other (1 pixel can be added by griddata, # and 1 removed by regular-grid interpolation by the iterative method) smallest_mask = ~binary_dilation( - ~mask_nodata_it, iterations=2 + ~mask_nodata_it, iterations=2, ) # Invert before dilate to avoid spreading at the edges # All smallest mask value should exist in the mask of griddata assert np.array_equal(np.logical_or(smallest_mask, mask_nodata_gd), mask_nodata_gd) def test_apply_matrix__raster_realdata(self) -> None: """Testing real data no complex matrix only to avoid all loops""" - # Use real data dem = self.ref dem.crop((dem.bounds.left, dem.bounds.bottom, dem.bounds.left + 2000, dem.bounds.bottom + 2000)) @@ -1120,7 +1102,6 @@ def test_apply_matrix__raster_realdata(self) -> None: def test_warp_dem() -> None: """Test that the warp_dem function works expectedly.""" - small_dem = np.zeros((5, 10), dtype="float32") small_transform = rio.transform.from_origin(0, 5, 1, 1) @@ -1163,7 +1144,7 @@ def test_warp_dem() -> None: [460, 480, 200], [10, 460, 200], [250, 250, 200], - ] + ], ) # Copy the source coordinates and apply some shifts @@ -1189,7 +1170,7 @@ def test_warp_dem() -> None: # Warp the DEM using the source-destination coordinates. transformed_dem = coreg.base.warp_dem( - dem=dem, transform=transform, source_coords=source_coords, destination_coords=dest_coords, resampling="linear" + dem=dem, transform=transform, source_coords=source_coords, destination_coords=dest_coords, resampling="linear", ) # Try to undo the warp by reversing the source-destination coordinates. diff --git a/tests/test_coreg/test_biascorr.py b/tests/test_coreg/test_biascorr.py index bd8e62476..3449366d4 100644 --- a/tests/test_coreg/test_biascorr.py +++ b/tests/test_coreg/test_biascorr.py @@ -4,6 +4,7 @@ import re import warnings +from typing import ClassVar import geopandas as gpd import geoutils as gu @@ -21,7 +22,6 @@ def load_examples() -> tuple[gu.Raster, gu.Raster, gu.Vector]: """Load example files to try coregistration methods with.""" - reference_dem = gu.Raster(examples.get_path("longyearbyen_ref_dem")) to_be_aligned_dem = gu.Raster(examples.get_path("longyearbyen_tba_dem")) glacier_mask = gu.Vector(examples.get_path("longyearbyen_glacier_outlines")) @@ -46,23 +46,22 @@ class TestBiasCorr: # Check all possibilities supported by biascorr: # Raster-Raster - fit_args_rst_rst = dict(reference_elev=ref, to_be_aligned_elev=tba, inlier_mask=inlier_mask) + fit_args_rst_rst: ClassVar[dict] = {"reference_elev":ref, "to_be_aligned_elev":tba, "inlier_mask":inlier_mask} # Convert DEMs to points with a bit of subsampling for speed-up tba_pts = tba.to_pointcloud(data_column_name="z", subsample=50000, random_state=42).ds ref_pts = ref.to_pointcloud(data_column_name="z", subsample=50000, random_state=42).ds # Raster-Point - fit_args_rst_pts = dict(reference_elev=ref, to_be_aligned_elev=tba_pts, inlier_mask=inlier_mask) + fit_args_rst_pts: ClassVar[dict] = {"reference_elev":ref, "to_be_aligned_elev":tba_pts, "inlier_mask":inlier_mask} # Point-Raster - fit_args_pts_rst = dict(reference_elev=ref_pts, to_be_aligned_elev=tba, inlier_mask=inlier_mask) + fit_args_pts_rst: ClassVar[dict] = {"reference_elev":ref_pts, "to_be_aligned_elev":tba, "inlier_mask":inlier_mask} - all_fit_args = [fit_args_rst_rst, fit_args_rst_pts, fit_args_pts_rst] + all_fit_args: ClassVar[list(dict, dict, dict)] = [fit_args_rst_rst, fit_args_rst_pts, fit_args_pts_rst] def test_biascorr(self) -> None: """Test the parent class BiasCorr instantiation.""" - # Create a bias correction instance bcorr = biascorr.BiasCorr() @@ -75,9 +74,9 @@ def test_biascorr(self) -> None: assert bcorr.meta["inputs"]["fitorbin"]["bias_var_names"] is None # Check that the _is_affine attribute is set correctly - assert not bcorr._is_affine + assert not bcorr._is_affine # noqa: SLF001 assert bcorr.meta["inputs"]["fitorbin"]["fit_or_bin"] == "fit" - assert bcorr._needs_vars is True + assert bcorr._needs_vars is True # noqa: SLF001 # Or with default bin arguments bcorr2 = biascorr.BiasCorr(fit_or_bin="bin") @@ -111,107 +110,105 @@ def test_biascorr(self) -> None: def test_biascorr__errors(self) -> None: """Test the errors that should be raised by BiasCorr.""" - # And raises an error when "fit" or "bin" is wrongly passed with pytest.raises(ValueError, match="Argument `fit_or_bin` must be 'bin_and_fit', 'fit' or 'bin'."): - biascorr.BiasCorr(fit_or_bin=True) # type: ignore + biascorr.BiasCorr(fit_or_bin=True) # For fit function with pytest.raises( TypeError, match=re.escape( "Argument `fit_func` must be a function (callable) or the string '{}', " - "got .".format("', '".join(biascorr.fit_workflows.keys())) + "got .".format("', '".join(biascorr.fit_workflows.keys())), ), ): - biascorr.BiasCorr(fit_func="yay") # type: ignore + biascorr.BiasCorr(fit_func="yay") # For fit optimizer with pytest.raises( - TypeError, match=re.escape("Argument `fit_optimizer` must be a function (callable), " "got .") + TypeError, match=re.escape("Argument `fit_optimizer` must be a function (callable), got ."), ): - biascorr.BiasCorr(fit_optimizer=3) # type: ignore + biascorr.BiasCorr(fit_optimizer=3) # For bin sizes with pytest.raises( TypeError, match=re.escape( "Argument `bin_sizes` must be an integer, or a dictionary of integers or iterables, " - "got ." + "got .", ), ): - biascorr.BiasCorr(fit_or_bin="bin", bin_sizes={"a": 1.5}) # type: ignore + biascorr.BiasCorr(fit_or_bin="bin", bin_sizes={"a": 1.5}) # For bin statistic with pytest.raises( - TypeError, match=re.escape("Argument `bin_statistic` must be a function (callable), " "got .") + TypeError, match=re.escape("Argument `bin_statistic` must be a function (callable), got ."), ): - biascorr.BiasCorr(fit_or_bin="bin", bin_statistic="count") # type: ignore + biascorr.BiasCorr(fit_or_bin="bin", bin_statistic="count") # For bin apply method with pytest.raises( TypeError, match=re.escape( - "Argument `bin_apply_method` must be the string 'linear' or 'per_bin', " "got ." + "Argument `bin_apply_method` must be the string 'linear' or 'per_bin', got .", ), ): - biascorr.BiasCorr(fit_or_bin="bin", bin_apply_method=1) # type: ignore + biascorr.BiasCorr(fit_or_bin="bin", bin_apply_method=1) # When wrong number of parameters are passed # Copy fit parameters fit_args = self.fit_args_rst_rst.copy() + bias_vars_dict = {"elevation": self.ref, "slope": xdem.terrain.slope(self.ref)} + bcorr1d = biascorr.BiasCorr(bias_var_names=["elevation"]) with pytest.raises( ValueError, - match=re.escape("A number of 1 variable(s) has to be provided through the argument 'bias_vars', " "got 2."), + match=re.escape("A number of 1 variable(s) has to be provided through the argument 'bias_vars', got 2."), ): - bias_vars_dict = {"elevation": self.ref, "slope": xdem.terrain.slope(self.ref)} - bcorr1d = biascorr.BiasCorr(bias_var_names=["elevation"]) bcorr1d.fit(**fit_args, bias_vars=bias_vars_dict) + bias_vars_dict = {"elevation": self.ref} + bcorr2d = biascorr.BiasCorr(bias_var_names=["elevation", "slope"]) with pytest.raises( ValueError, - match=re.escape("A number of 2 variable(s) has to be provided through the argument " "'bias_vars', got 1."), + match=re.escape("A number of 2 variable(s) has to be provided through the argument 'bias_vars', got 1."), ): - bias_vars_dict = {"elevation": self.ref} - bcorr2d = biascorr.BiasCorr(bias_var_names=["elevation", "slope"]) bcorr2d.fit(**fit_args, bias_vars=bias_vars_dict) + bcorr1d2 = biascorr.BiasCorr(bias_var_names=["ncc"]) + bias_vars_dict = {"elevation": self.ref} # When variables don't match with pytest.raises( ValueError, match=re.escape( - "The keys of `bias_vars` do not match the `bias_var_names` defined during " "instantiation: ['ncc']." + "The keys of `bias_vars` do not match the `bias_var_names` defined during instantiation: ['ncc'].", ), ): - bcorr1d2 = biascorr.BiasCorr(bias_var_names=["ncc"]) - bias_vars_dict = {"elevation": self.ref} bcorr1d2.fit(**fit_args, bias_vars=bias_vars_dict) + bcorr2d2 = biascorr.BiasCorr(bias_var_names=["elevation", "ncc"]) + bias_vars_dict = {"elevation": self.ref, "slope": xdem.terrain.slope(self.ref)} with pytest.raises( ValueError, match=re.escape( "The keys of `bias_vars` do not match the `bias_var_names` defined during " - "instantiation: ['elevation', 'ncc']." + "instantiation: ['elevation', 'ncc'].", ), ): - bcorr2d2 = biascorr.BiasCorr(bias_var_names=["elevation", "ncc"]) - bias_vars_dict = {"elevation": self.ref, "slope": xdem.terrain.slope(self.ref)} bcorr2d2.fit(**fit_args, bias_vars=bias_vars_dict) - @pytest.mark.parametrize("fit_args", all_fit_args) # type: ignore + @pytest.mark.parametrize("fit_args", all_fit_args) @pytest.mark.parametrize( - "fit_func", ("norder_polynomial", "nfreq_sumsin", lambda x, a, b: x[0] * a + b) - ) # type: ignore + "fit_func", [("norder_polynomial", "nfreq_sumsin", lambda x, a, b: x[0] * a + b)], + ) @pytest.mark.parametrize( "fit_optimizer", [ scipy.optimize.curve_fit, ], - ) # type: ignore - def test_biascorr__fit_1d(self, fit_args, fit_func, fit_optimizer, capsys) -> None: + ) + def test_biascorr__fit_1d(self, fit_args: dict, fit_func, fit_optimizer, capsys) -> None: # noqa: ANN001 """Test the _fit_func and apply_func methods of BiasCorr for the fit case (called by all its subclasses).""" - # Create a bias correction object bcorr = biascorr.BiasCorr(fit_or_bin="fit", fit_func=fit_func, fit_optimizer=fit_optimizer) @@ -234,19 +231,18 @@ def test_biascorr__fit_1d(self, fit_args, fit_func, fit_optimizer, capsys) -> No # Apply the correction bcorr.apply(elev=self.tba, bias_vars=bias_vars_dict) - @pytest.mark.parametrize("fit_args", [fit_args_rst_pts, fit_args_rst_rst]) # type: ignore + @pytest.mark.parametrize("fit_args", [fit_args_rst_pts, fit_args_rst_rst]) @pytest.mark.parametrize( - "fit_func", (polynomial_2d, lambda x, a, b, c, d: a * x[0] + b * x[1] + c / x[0] + d) - ) # type: ignore + "fit_func", [(polynomial_2d, lambda x, a, b, c, d: a * x[0] + b * x[1] + c / x[0] + d)], + ) @pytest.mark.parametrize( "fit_optimizer", [ scipy.optimize.curve_fit, ], - ) # type: ignore - def test_biascorr__fit_2d(self, fit_args, fit_func, fit_optimizer) -> None: + ) + def test_biascorr__fit_2d(self, fit_args: dict, fit_func: tuple, fit_optimizer) -> None: # noqa: ANN001 """Test the _fit_func and apply_func methods of BiasCorr for the fit case (called by all its subclasses).""" - # Create a bias correction object bcorr = biascorr.BiasCorr(fit_or_bin="fit", fit_func=fit_func, fit_optimizer=fit_optimizer) @@ -265,12 +261,11 @@ def test_biascorr__fit_2d(self, fit_args, fit_func, fit_optimizer) -> None: # Apply the correction bcorr.apply(elev=self.tba, bias_vars=bias_vars_dict) - @pytest.mark.parametrize("fit_args", all_fit_args) # type: ignore - @pytest.mark.parametrize("bin_sizes", (10, {"elevation": 20}, {"elevation": (0, 500, 1000)})) # type: ignore - @pytest.mark.parametrize("bin_statistic", [np.median, np.nanmean]) # type: ignore - def test_biascorr__bin_1d(self, fit_args, bin_sizes, bin_statistic) -> None: + @pytest.mark.parametrize("fit_args", all_fit_args) + @pytest.mark.parametrize("bin_sizes", [(10, {"elevation": 20}, {"elevation": (0, 500, 1000)})]) + @pytest.mark.parametrize("bin_statistic", [np.median, np.nanmean]) + def test_biascorr__bin_1d(self, fit_args: dict, bin_sizes: tuple, bin_statistic) -> None: # noqa: ANN001 """Test the _fit_func and apply_func methods of BiasCorr for the fit case (called by all its subclasses).""" - # Create a bias correction object bcorr = biascorr.BiasCorr(fit_or_bin="bin", bin_sizes=bin_sizes, bin_statistic=bin_statistic) @@ -288,12 +283,11 @@ def test_biascorr__bin_1d(self, fit_args, bin_sizes, bin_statistic) -> None: # Apply the correction bcorr.apply(elev=self.tba, bias_vars=bias_vars_dict) - @pytest.mark.parametrize("fit_args", all_fit_args) # type: ignore - @pytest.mark.parametrize("bin_sizes", (10, {"elevation": (0, 500, 1000), "slope": (0, 20, 40)})) # type: ignore - @pytest.mark.parametrize("bin_statistic", [np.median, np.nanmean]) # type: ignore - def test_biascorr__bin_2d(self, fit_args, bin_sizes, bin_statistic) -> None: + @pytest.mark.parametrize("fit_args", all_fit_args) + @pytest.mark.parametrize("bin_sizes", [(10, {"elevation": (0, 500, 1000), "slope": (0, 20, 40)})]) + @pytest.mark.parametrize("bin_statistic", [np.median, np.nanmean]) + def test_biascorr__bin_2d(self, fit_args: dict, bin_sizes: tuple, bin_statistic) -> None: # noqa: ANN001 """Test the _fit_func and apply_func methods of BiasCorr for the fit case (called by all its subclasses).""" - # Create a bias correction object bcorr = biascorr.BiasCorr(fit_or_bin="bin", bin_sizes=bin_sizes, bin_statistic=bin_statistic) @@ -311,26 +305,32 @@ def test_biascorr__bin_2d(self, fit_args, bin_sizes, bin_statistic) -> None: # Apply the correction bcorr.apply(elev=self.tba, bias_vars=bias_vars_dict) - @pytest.mark.parametrize("fit_args", all_fit_args) # type: ignore + @pytest.mark.parametrize("fit_args", all_fit_args) @pytest.mark.parametrize( - "fit_func", ("norder_polynomial", "nfreq_sumsin", lambda x, a, b: x[0] * a + b) - ) # type: ignore + "fit_func", [("norder_polynomial", "nfreq_sumsin", lambda x, a, b: x[0] * a + b)], + ) @pytest.mark.parametrize( "fit_optimizer", [ scipy.optimize.curve_fit, ], - ) # type: ignore - @pytest.mark.parametrize("bin_sizes", (10, {"elevation": np.arange(0, 1000, 100)})) # type: ignore - @pytest.mark.parametrize("bin_statistic", [np.median, np.nanmean]) # type: ignore - def test_biascorr__bin_and_fit_1d(self, fit_args, fit_func, fit_optimizer, bin_sizes, bin_statistic) -> None: + ) + @pytest.mark.parametrize("bin_sizes", [(10, {"elevation": np.arange(0, 1000, 100)})]) + @pytest.mark.parametrize("bin_statistic", [np.median, np.nanmean]) + def test_biascorr__bin_and_fit_1d(self, + fit_args: dict, + fit_func: tuple, + fit_optimizer, # noqa: ANN001 + bin_sizes: tuple, + bin_statistic) -> None: # noqa: ANN001 """Test the _fit_func and apply_func methods of BiasCorr for the bin_and_fit case (called by all subclasses).""" - # Curve fit can be unhappy in certain circumstances for numerical estimation of covariance # We don't care for this test warnings.filterwarnings("ignore", message="Covariance of the parameters could not be estimated*") # Apply the transform can create data exactly equal to the nodata - warnings.filterwarnings("ignore", category=UserWarning, message="Unmasked values equal to the nodata value*") + warnings.filterwarnings("ignore", + category=UserWarning, + message="Unmasked values equal to the nodata value*") # Ignore SciKit-Learn warnings warnings.filterwarnings("ignore", message="Maximum number of iterations*") @@ -362,21 +362,25 @@ def test_biascorr__bin_and_fit_1d(self, fit_args, fit_func, fit_optimizer, bin_s # Apply the correction bcorr.apply(elev=self.tba, bias_vars=bias_vars_dict) - @pytest.mark.parametrize("fit_args", all_fit_args) # type: ignore + @pytest.mark.parametrize("fit_args", all_fit_args) @pytest.mark.parametrize( - "fit_func", (polynomial_2d, lambda x, a, b, c, d: a * x[0] + b * x[1] + c / x[0] + d) - ) # type: ignore + "fit_func", [(polynomial_2d, lambda x, a, b, c, d: a * x[0] + b * x[1] + c / x[0] + d)], + ) @pytest.mark.parametrize( "fit_optimizer", [ scipy.optimize.curve_fit, ], - ) # type: ignore - @pytest.mark.parametrize("bin_sizes", (10, {"elevation": (0, 500, 1000), "slope": (0, 20, 40)})) # type: ignore - @pytest.mark.parametrize("bin_statistic", [np.median, np.nanmean]) # type: ignore - def test_biascorr__bin_and_fit_2d(self, fit_args, fit_func, fit_optimizer, bin_sizes, bin_statistic) -> None: + ) + @pytest.mark.parametrize("bin_sizes", [(10, {"elevation": (0, 500, 1000), "slope": (0, 20, 40)})]) + @pytest.mark.parametrize("bin_statistic", [np.median, np.nanmean]) + def test_biascorr__bin_and_fit_2d(self, + fit_args: dict, + fit_func: tuple, + fit_optimizer, # noqa: ANN001 + bin_sizes: tuple, + bin_statistic) -> None: # noqa: ANN001 """Test the _fit_func and apply_func methods of BiasCorr for the bin_and_fit case (called by all subclasses).""" - # Create a bias correction object bcorr = biascorr.BiasCorr( fit_or_bin="bin_and_fit", @@ -403,7 +407,6 @@ def test_biascorr__bin_and_fit_2d(self, fit_args, fit_func, fit_optimizer, bin_s def test_directionalbias(self) -> None: """Test the subclass DirectionalBias.""" - # Try default "fit" parameters instantiation dirbias = biascorr.DirectionalBias(angle=45) @@ -413,17 +416,16 @@ def test_directionalbias(self) -> None: dirbias.meta["inputs"]["fitorbin"]["fit_optimizer"] == biascorr.fit_workflows["nfreq_sumsin"]["optimizer"] ) assert dirbias.meta["inputs"]["specific"]["angle"] == 45 - assert dirbias._needs_vars is False + assert dirbias._needs_vars is False # noqa: SLF001 # Check that variable names are defined during instantiation assert dirbias.meta["inputs"]["fitorbin"]["bias_var_names"] == ["angle"] - @pytest.mark.parametrize("fit_args", all_fit_args) # type: ignore - @pytest.mark.parametrize("angle", [20, 90]) # type: ignore - @pytest.mark.parametrize("nb_freq", [1, 2, 3]) # type: ignore - def test_directionalbias__synthetic(self, fit_args, angle, nb_freq) -> None: + @pytest.mark.parametrize("fit_args", all_fit_args) + @pytest.mark.parametrize("angle", [20, 90]) + @pytest.mark.parametrize("nb_freq", [1, 2, 3]) + def test_directionalbias__synthetic(self, fit_args: dict, angle: int, nb_freq: int) -> None: """Test the subclass DirectionalBias with synthetic data.""" - # Get along track xx = gu.raster.get_xy_rotated(self.ref, along_track_angle=angle)[0] @@ -494,7 +496,6 @@ def test_directionalbias__synthetic(self, fit_args, angle, nb_freq) -> None: def test_deramp(self) -> None: """Test the subclass Deramp.""" - # Try default "fit" parameters instantiation deramp = biascorr.Deramp() @@ -502,16 +503,15 @@ def test_deramp(self) -> None: assert deramp.meta["inputs"]["fitorbin"]["fit_func"] == polynomial_2d assert deramp.meta["inputs"]["fitorbin"]["fit_optimizer"] == scipy.optimize.curve_fit assert deramp.meta["inputs"]["specific"]["poly_order"] == 2 - assert deramp._needs_vars is False + assert deramp._needs_vars is False # noqa: SLF001 # Check that variable names are defined during instantiation assert deramp.meta["inputs"]["fitorbin"]["bias_var_names"] == ["xx", "yy"] - @pytest.mark.parametrize("fit_args", all_fit_args) # type: ignore - @pytest.mark.parametrize("order", [1, 2, 3, 4]) # type: ignore - def test_deramp__synthetic(self, fit_args, order: int) -> None: + @pytest.mark.parametrize("fit_args", all_fit_args) + @pytest.mark.parametrize("order", [1, 2, 3, 4]) + def test_deramp__synthetic(self, fit_args: dict, order: int) -> None: """Run the deramp for varying polynomial orders using a synthetic elevation difference.""" - # Get coordinates xx, yy = np.meshgrid(np.arange(0, self.ref.shape[1]), np.arange(0, self.ref.shape[0])) @@ -539,7 +539,9 @@ def test_deramp__synthetic(self, fit_args, order: int) -> None: fit_params = deramp.meta["outputs"]["fitorbin"]["fit_params"] assert np.shape(fit_params) == np.shape(params) assert np.allclose( - params.reshape(order + 1, order + 1)[-1:, -1:], fit_params.reshape(order + 1, order + 1)[-1:, -1:], rtol=0.1 + params.reshape(order + 1, order + 1)[-1:, -1:], + fit_params.reshape(order + 1, order + 1)[-1:, -1:], + rtol=0.1, ) # Run apply and check that 99% of the variance was corrected @@ -549,7 +551,6 @@ def test_deramp__synthetic(self, fit_args, order: int) -> None: def test_terrainbias(self) -> None: """Test the subclass TerrainBias.""" - # Try default "fit" parameters instantiation tb = biascorr.TerrainBias() @@ -557,14 +558,13 @@ def test_terrainbias(self) -> None: assert tb.meta["inputs"]["fitorbin"]["bin_sizes"] == 100 assert tb.meta["inputs"]["fitorbin"]["bin_statistic"] == np.nanmedian assert tb.meta["inputs"]["specific"]["terrain_attribute"] == "maximum_curvature" - assert tb._needs_vars is False + assert tb._needs_vars is False # noqa: SLF001 assert tb.meta["inputs"]["fitorbin"]["bias_var_names"] == ["maximum_curvature"] - @pytest.mark.parametrize("fit_args", all_fit_args) # type: ignore - def test_terrainbias__synthetic(self, fit_args) -> None: + @pytest.mark.parametrize("fit_args", all_fit_args) + def test_terrainbias__synthetic(self, fit_args: dict) -> None: """Test the subclass TerrainBias.""" - # Get maximum curvature maxc = xdem.terrain.get_terrain_attribute(self.ref, attribute="maximum_curvature") diff --git a/tests/test_coreg/test_workflows.py b/tests/test_coreg/test_workflows.py index 3e7f01579..22abc9197 100644 --- a/tests/test_coreg/test_workflows.py +++ b/tests/test_coreg/test_workflows.py @@ -2,8 +2,8 @@ from __future__ import annotations -import os import tempfile +from pathlib import Path import numpy as np import pandas as pd @@ -18,7 +18,6 @@ def load_examples() -> tuple[RasterType, RasterType, Vector]: """Load example files to try coregistration methods with.""" - reference_raster = Raster(examples.get_path("longyearbyen_ref_dem")) to_be_aligned_raster = Raster(examples.get_path("longyearbyen_tba_dem")) glacier_mask = Vector(examples.get_path("longyearbyen_glacier_outlines")) @@ -29,7 +28,6 @@ def load_examples() -> tuple[RasterType, RasterType, Vector]: class TestWorkflows: def test_create_inlier_mask(self) -> None: """Test that the create_inlier_mask function works expectedly.""" - ref, tba, outlines = load_examples() # Load example reference, to-be-aligned and outlines # - Assert that without filtering create_inlier_mask behaves as if calling Vector.create_mask - # @@ -96,14 +94,14 @@ def test_create_inlier_mask(self) -> None: nmad_factor = 3 ddem = tba - ref inlier_mask_comp3 = (np.abs(ddem.data - np.median(ddem)) < nmad_factor * xdem.spatialstats.nmad(ddem)).filled( - False + False, ) inlier_mask = create_inlier_mask(tba, ref, filtering=True, slope_lim=[0, 90], nmad_factor=nmad_factor) assert np.all(inlier_mask == inlier_mask_comp3) # Test the sum of both inlier_mask = create_inlier_mask( - tba, ref, shp_list=[], inout=[], filtering=True, slope_lim=slope_lim, nmad_factor=nmad_factor + tba, ref, shp_list=[], inout=[], filtering=True, slope_lim=slope_lim, nmad_factor=nmad_factor, ) inlier_mask_all = inlier_mask_comp2 & inlier_mask_comp3 assert np.all(inlier_mask == inlier_mask_all) @@ -185,8 +183,7 @@ def test_create_inlier_mask(self) -> None: create_inlier_mask(tba, ref, filtering=True, slope_lim=[1, 120]) def test_dem_coregistration(self) -> None: - """ - Test that the dem_coregistration function works expectedly. + """Test that the dem_coregistration function works expectedly. Tests the features that are specific to dem_coregistration. For example, many features are tested in create_inlier_mask, so not tested again here. TODO: Add DEMs with different projection/grid to test that regridding works as expected. @@ -214,7 +211,7 @@ def test_dem_coregistration(self) -> None: coreg_method_ref = xdem.coreg.NuthKaab() + xdem.coreg.VerticalShift() inlier_mask = create_inlier_mask(tba_dem, ref_dem) coreg_method_ref.fit( - ref_dem.astype(np.float32), tba_dem.astype(np.float32), inlier_mask=inlier_mask, random_state=42 + ref_dem.astype(np.float32), tba_dem.astype(np.float32), inlier_mask=inlier_mask, random_state=42, ) dem_coreg_ref = coreg_method_ref.apply(tba_dem.astype(np.float32), resample=False) assert dem_coreg.raster_equal(dem_coreg_ref, warn_failure_reason=True) @@ -250,14 +247,14 @@ def test_dem_coregistration(self) -> None: # Testing with plot out_fig = tempfile.NamedTemporaryFile(suffix=".png", mode="w", delete=False) - assert os.path.getsize(out_fig.name) == 0 + assert Path(out_fig.name).stat().st_size == 0 dem_coregistration(tba_dem, ref_dem, plot=True, out_fig=out_fig.name) - assert os.path.getsize(out_fig.name) > 0 + assert Path(out_fig.name).stat().st_size > 0 out_fig.close() # Testing different coreg method dem_coreg2, coreg_method2, coreg_stats2, inlier_mask2 = dem_coregistration( - tba_dem, ref_dem, coreg_method=xdem.coreg.Deramp() + tba_dem, ref_dem, coreg_method=xdem.coreg.Deramp(), ) assert isinstance(coreg_method2, xdem.coreg.Deramp) assert abs(coreg_stats2["med_orig"].values) > abs(coreg_stats2["med_coreg"].values) @@ -269,10 +266,10 @@ def test_dem_coregistration(self) -> None: coreg_pipeline = xdem.coreg.affine.NuthKaab() + xdem.coreg.affine.VerticalShift() dem_coreg2, coreg_method2, coreg_stats2, inlier_mask2 = dem_coregistration( - tba_dem, ref_dem, coreg_method=coreg_pipeline, estimated_initial_shift=test_shift_list, random_state=42 + tba_dem, ref_dem, coreg_method=coreg_pipeline, estimated_initial_shift=test_shift_list, random_state=42, ) dem_coreg3, coreg_method3, coreg_stats3, inlier_mask3 = dem_coregistration( - tba_dem, ref_dem, coreg_method=coreg_pipeline, random_state=42 + tba_dem, ref_dem, coreg_method=coreg_pipeline, random_state=42, ) assert tba_dem.raster_equal(tba_dem_origin) assert isinstance(coreg_method2, xdem.coreg.CoregPipeline) @@ -293,10 +290,10 @@ def test_dem_coregistration(self) -> None: coreg_simple = xdem.coreg.affine.DhMinimize() dem_coreg2, coreg_method2, coreg_stats2, inlier_mask2 = dem_coregistration( - tba_dem, ref_dem, coreg_method=coreg_simple, estimated_initial_shift=test_shift_tuple, random_state=42 + tba_dem, ref_dem, coreg_method=coreg_simple, estimated_initial_shift=test_shift_tuple, random_state=42, ) dem_coreg3, coreg_method3, coreg_stats3, inlier_mask3 = dem_coregistration( - tba_dem, ref_dem, coreg_method=coreg_simple, random_state=42 + tba_dem, ref_dem, coreg_method=coreg_simple, random_state=42, ) assert isinstance(coreg_method2, xdem.coreg.AffineCoreg) assert isinstance(coreg_method3, xdem.coreg.AffineCoreg) diff --git a/tests/test_ddem.py b/tests/test_ddem.py index e41b078f3..23aff6879 100644 --- a/tests/test_ddem.py +++ b/tests/test_ddem.py @@ -60,7 +60,7 @@ def test_regional_hypso(self) -> None: ddem.interpolate(method="regional_hypsometric", reference_elevation=self.dem_2009, mask=self.outlines_1990) - assert ddem._filled_data is not None + assert ddem._filled_data is not None # noqa: SLF001 assert isinstance(ddem.filled_data, np.ndarray) assert ddem.filled_data.shape == ddem.data.shape diff --git a/tests/test_dem.py b/tests/test_dem.py index ec7bbe550..c7f5dceb7 100644 --- a/tests/test_dem.py +++ b/tests/test_dem.py @@ -1,11 +1,11 @@ -""" Functions to test the DEM tools.""" +"""Functions to test the DEM tools.""" from __future__ import annotations import os import tempfile import warnings -from typing import Any +from typing import Any, ClassVar import geoutils as gu import numpy as np @@ -53,14 +53,14 @@ def test_init(self) -> None: ( np.array_equal(dem.data, dem2.data, equal_nan=True), np.array_equal(dem2.data, dem3.data, equal_nan=True), - ) + ), ) assert np.logical_and.reduce( ( np.all(dem.data.mask == dem2.data.mask), np.all(dem2.data.mask == dem3.data.mask), - ) + ), ) # Check that an error is raised when more than one band is provided @@ -74,7 +74,6 @@ def test_init(self) -> None: def test_init__vcrs(self) -> None: """Test that vcrs is set properly during instantiation.""" - # Tests 1: instantiation with a file that has a 2D CRS # First, check a DEM that does not have any vertical CRS set @@ -110,7 +109,6 @@ def test_init__vcrs(self) -> None: def test_from_array(self) -> None: """Test that overridden from_array works as expected.""" - # Create a 5x5 DEM data = np.ones((5, 5)) transform = rio.transform.from_bounds(0, 0, 1, 1, 5, 5) @@ -126,11 +124,10 @@ def test_from_array(self) -> None: assert dem.transform == transform assert dem.crs == crs assert dem.nodata == nodata - assert dem.vcrs == xdem.vcrs._vcrs_from_user_input(vcrs_input=vcrs) + assert dem.vcrs == xdem.vcrs._vcrs_from_user_input(vcrs_input=vcrs) # noqa: SLF001 def test_from_array__vcrs(self) -> None: """Test that overridden from_array rightly sets the vertical CRS.""" - # Create a 5x5 DEM with a 2D CRS transform = rio.transform.from_bounds(0, 0, 1, 1, 5, 5) dem = DEM.from_array(data=np.ones((5, 5)), transform=transform, crs=CRS("EPSG:4326"), nodata=None, vcrs=None) @@ -142,25 +139,24 @@ def test_from_array__vcrs(self) -> None: # One with a 2D and the ellipsoid vertical CRS dem = DEM.from_array( - data=np.ones((5, 5)), transform=transform, crs=CRS("EPSG:4326"), nodata=None, vcrs="Ellipsoid" + data=np.ones((5, 5)), transform=transform, crs=CRS("EPSG:4326"), nodata=None, vcrs="Ellipsoid", ) assert dem.vcrs == "Ellipsoid" # One with a compound CRS dem = DEM.from_array( - data=np.ones((5, 5)), transform=transform, crs=CRS("EPSG:4326+5773"), nodata=None, vcrs=None + data=np.ones((5, 5)), transform=transform, crs=CRS("EPSG:4326+5773"), nodata=None, vcrs=None, ) assert dem.vcrs == CRS("EPSG:5773") # One with a CRS and vertical CRS dem = DEM.from_array( - data=np.ones((5, 5)), transform=transform, crs=CRS("EPSG:4326"), nodata=None, vcrs=CRS("EPSG:5773") + data=np.ones((5, 5)), transform=transform, crs=CRS("EPSG:4326"), nodata=None, vcrs=CRS("EPSG:5773"), ) assert dem.vcrs == CRS("EPSG:5773") def test_copy(self) -> None: - """ - Test that the copy method works as expected for DEM. In particular + """Test that the copy method works as expected for DEM. In particular when copying r to r2: - if r.data is modified and r copied, the updated data is copied - if r is copied, r.data changed, r2.data should be unchanged @@ -205,7 +201,6 @@ def test_copy(self) -> None: def test_set_vcrs(self) -> None: """Tests to set the vertical CRS.""" - fn_dem = xdem.examples.get_path("longyearbyen_ref_dem") dem = DEM(fn_dem) @@ -243,8 +238,7 @@ def test_set_vcrs(self) -> None: dem.set_vcrs(new_vcrs="is_lmi_Icegeoid_ISN93.tif") # Check that non-existing grids raise errors - with pytest.warns(UserWarning, match="Grid not found in*"): - with pytest.raises( + with pytest.warns(UserWarning, match="Grid not found in*"), pytest.raises( ValueError, match="The provided grid 'the best grid' does not exist at https://cdn.proj.org/. " "Provide an existing grid.", @@ -253,7 +247,6 @@ def test_set_vcrs(self) -> None: def test_to_vcrs(self) -> None: """Tests the conversion of vertical CRS.""" - fn_dem = xdem.examples.get_path("longyearbyen_ref_dem") dem = DEM(fn_dem) @@ -284,7 +277,7 @@ def test_to_vcrs(self) -> None: assert median_after - median_before == pytest.approx(-32, rel=0.1) # Check that the results are consistent with the operation done independently - ccrs_dest = xdem.vcrs._build_ccrs_from_crs_and_vcrs(dem.crs, xdem.vcrs._vcrs_from_user_input("EGM96")) + ccrs_dest = xdem.vcrs._build_ccrs_from_crs_and_vcrs(dem.crs, xdem.vcrs.vcrs_from_user_input("EGM96")) # noqa: SLF001 transformer = Transformer.from_crs(crs_from=ccrs_init, crs_to=ccrs_dest, always_xy=True) xx, yy = dem.coords() @@ -297,34 +290,32 @@ def test_to_vcrs(self) -> None: def test_to_vcrs__equal_warning(self) -> None: """Test that DEM.to_vcrs() does not transform if both 3D CRS are equal.""" - fn_dem = xdem.examples.get_path("longyearbyen_ref_dem") dem = DEM(fn_dem) # With both inputs as names dem.set_vcrs("EGM96") with pytest.warns( - UserWarning, match="Source and destination vertical CRS are the same, " "skipping vertical transformation." + UserWarning, match="Source and destination vertical CRS are the same, skipping vertical transformation.", ): dem.to_vcrs("EGM96") # With one input as name, the other as CRS dem.set_vcrs("Ellipsoid") with pytest.warns( - UserWarning, match="Source and destination vertical CRS are the same, " "skipping vertical transformation." + UserWarning, match="Source and destination vertical CRS are the same, skipping vertical transformation.", ): dem.to_vcrs(CRS("EPSG:4979")) # Compare to manually-extracted shifts at specific coordinates for the geoid grids - egm96_chile = {"grid": "us_nga_egm96_15.tif", "lon": -68, "lat": -20, "shift": 42} - egm08_chile = {"grid": "us_nga_egm08_25.tif", "lon": -68, "lat": -20, "shift": 42} - geoid96_alaska = {"grid": "us_noaa_geoid06_ak.tif", "lon": -145, "lat": 62, "shift": 15} - isn93_iceland = {"grid": "is_lmi_Icegeoid_ISN93.tif", "lon": -18, "lat": 65, "shift": 68} + egm96_chile: ClassVar[dict] = {"grid": "us_nga_egm96_15.tif", "lon": -68, "lat": -20, "shift": 42} + egm08_chile: ClassVar[dict] = {"grid": "us_nga_egm08_25.tif", "lon": -68, "lat": -20, "shift": 42} + geoid96_alaska: ClassVar[dict] = {"grid": "us_noaa_geoid06_ak.tif", "lon": -145, "lat": 62, "shift": 15} + isn93_iceland: ClassVar[dict] = {"grid": "is_lmi_Icegeoid_ISN93.tif", "lon": -18, "lat": 65, "shift": 68} - @pytest.mark.parametrize("grid_shifts", [egm08_chile, egm08_chile, geoid96_alaska, isn93_iceland]) # type: ignore + @pytest.mark.parametrize("grid_shifts", [egm96_chile, egm08_chile, geoid96_alaska, isn93_iceland]) # type: ignore def test_to_vcrs__grids(self, grid_shifts: dict[str, Any]) -> None: """Tests grids to convert vertical CRS.""" - # Most grids aren't going to be downloaded, so this warning can be raised warnings.filterwarnings("ignore", category=UserWarning, message="Grid not found in *") @@ -332,7 +323,12 @@ def test_to_vcrs__grids(self, grid_shifts: dict[str, Any]) -> None: dem = DEM.from_array( data=np.array([[100, 100]]), transform=rio.transform.from_bounds( - grid_shifts["lon"], grid_shifts["lat"], grid_shifts["lon"] + 0.01, grid_shifts["lat"] + 0.01, 0.01, 0.01 + grid_shifts["lon"], + grid_shifts["lat"], + grid_shifts["lon"] + 0.01, + grid_shifts["lat"] + 0.01, + 0.01, + 0.01, ), crs=CRS.from_epsg(4326), nodata=None, @@ -351,7 +347,6 @@ def test_to_vcrs__grids(self, grid_shifts: dict[str, Any]) -> None: @pytest.mark.parametrize("terrain_attribute", xdem.terrain.available_attributes) # type: ignore def test_terrain_attributes_wrappers(self, terrain_attribute: str) -> None: """Check the terrain attributes corresponds to the ones derived in the terrain module.""" - fn_dem = xdem.examples.get_path("longyearbyen_ref_dem") dem = DEM(fn_dem) @@ -361,7 +356,7 @@ def test_terrain_attributes_wrappers(self, terrain_attribute: str) -> None: assert dem_class_attr.raster_equal(terrain_module_attr) def test_coregister_3d_wrapper(self) -> None: - + """Test the coregister 3d wrapper.""" fn_ref = xdem.examples.get_path("longyearbyen_ref_dem") fn_tba = xdem.examples.get_path("longyearbyen_tba_dem") @@ -377,7 +372,7 @@ def test_coregister_3d_wrapper(self) -> None: assert dem_class_aligned.raster_equal(coreg_module_aligned) def test_estimate_uncertainty(self) -> None: - + """Test the estimate_uncertainty module.""" fn_ref = xdem.examples.get_path("longyearbyen_ref_dem") fn_tba = xdem.examples.get_path("longyearbyen_tba_dem") diff --git a/tests/test_demcollection.py b/tests/test_demcollection.py index b273bd1fa..23a2c2ca1 100644 --- a/tests/test_demcollection.py +++ b/tests/test_demcollection.py @@ -2,6 +2,7 @@ import datetime import warnings +from datetime import timezone import geoutils as gu import numpy as np @@ -16,8 +17,10 @@ class TestDEMCollection: outlines_2010 = gu.Vector(xdem.examples.get_path("longyearbyen_glacier_outlines_2010")) def test_init(self) -> None: - - timestamps = [datetime.datetime(1990, 8, 1), datetime.datetime(2009, 8, 1), datetime.datetime(2060, 8, 1)] + """Test that the DEMs of the collection are consistent.""" + timestamps = [datetime.datetime(1990, 8, 1, tzinfo=timezone.utc), + datetime.datetime(2009, 8, 1, tzinfo=timezone.utc), + datetime.datetime(2060, 8, 1, tzinfo=timezone.utc)] scott_1990 = gu.Vector(self.outlines_1990.ds.loc[self.outlines_1990.ds["NAME"] == "Scott Turnerbreen"]) scott_2010 = gu.Vector(self.outlines_2010.ds.loc[self.outlines_2010.ds["NAME"] == "Scott Turnerbreen"]) @@ -33,7 +36,7 @@ def test_init(self) -> None: dems = xdem.DEMCollection( [self.dem_1990, self.dem_2009, dem_2060], timestamps=timestamps, - outlines=dict(zip(timestamps[:2], [self.outlines_1990, self.outlines_2010])), + outlines=dict(zip(timestamps[:2], [self.outlines_1990, self.outlines_2010], strict=False)), reference_dem=1, ) @@ -81,8 +84,8 @@ def test_init(self) -> None: def test_dem_datetimes(self) -> None: """Try to create the DEMCollection without the timestamps argument (instead relying on datetime attributes).""" - self.dem_1990.datetime = datetime.datetime(1990, 8, 1) - self.dem_2009.datetime = datetime.datetime(2009, 8, 1) + self.dem_1990.datetime = datetime.datetime(1990, 8, 1, tzinfo=timezone.utc) + self.dem_2009.datetime = datetime.datetime(2009, 8, 1, tzinfo=timezone.utc) dems = xdem.DEMCollection([self.dem_1990, self.dem_2009]) @@ -90,10 +93,10 @@ def test_dem_datetimes(self) -> None: def test_ddem_interpolation(self) -> None: """Test that dDEM interpolation works as it should.""" - # Create a DEMCollection object dems = xdem.DEMCollection( - [self.dem_2009, self.dem_1990], timestamps=[datetime.datetime(year, 8, 1) for year in (2009, 1990)] + [self.dem_2009, self.dem_1990], + timestamps=[datetime.datetime(year, 8, 1, tzinfo=timezone.utc) for year in (2009, 1990)], ) # Create dDEMs diff --git a/tests/test_doc.py b/tests/test_doc.py index 5270a6df3..76e1e1575 100644 --- a/tests/test_doc.py +++ b/tests/test_doc.py @@ -5,25 +5,26 @@ import platform import shutil import warnings +from pathlib import Path import sphinx.cmd.build class TestDocs: - docs_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../", "doc/") + docs_dir = os.path.join(Path(Path(__file__).resolve()).parent, "../", "doc/") n_threads = os.getenv("N_CPUS") def test_example_code(self) -> None: """Try running each python script in the doc/source/code\ - directory and check that it doesn't raise an error.""" - current_dir = os.getcwd() + directory and check that it doesn't raise an error. + """ + current_dir = Path.cwd() os.chdir(os.path.join(self.docs_dir, "source")) def run_code(filename: str) -> None: """Run a python script in one thread.""" - with open(filename) as infile: - # Run everything except plt.show() calls. - with warnings.catch_warnings(): + # Run everything except plt.show() calls. + with Path(filename).open() as infile, warnings.catch_warnings(): # When running the code asynchronously, matplotlib complains a bit ignored_warnings = [ "Starting a Matplotlib GUI outside of the main thread", @@ -33,12 +34,14 @@ def run_code(filename: str) -> None: for warning_text in ignored_warnings: warnings.filterwarnings("ignore", warning_text) try: - exec(infile.read().replace("plt.show()", "plt.close()")) + with Path(infile).open() as file: + file.replace("plt.show()", "plt.close()") + #exec(infile.read().replace("plt.show()", "plt.close()")) except Exception as exception: if isinstance(exception, DeprecationWarning): logging.warning(exception) else: - raise RuntimeError(f"Failed on {filename}") from exception + raise TypeError(f"Failed on {filename}") from exception filenames = [os.path.join("code", filename) for filename in os.listdir("code/") if filename.endswith(".py")] @@ -55,7 +58,6 @@ def run_code(filename: str) -> None: def test_build(self) -> None: """Try building the doc and see if it works.""" - # Ignore all warnings raised in the documentation # (some UserWarning are shown on purpose in certain examples, so they shouldn't make the test fail, # and most other warnings are for Sphinx developers, not meant to be seen by us; or we can check on RTD) @@ -64,7 +66,7 @@ def test_build(self) -> None: # Test only on Linux if platform.system() == "Linux": # Remove the build directory if it exists. - if os.path.isdir(os.path.join(self.docs_dir, "build")): + if Path(os.path.join(self.docs_dir, "build")).is_dir(): shutil.rmtree(os.path.join(self.docs_dir, "build")) return_code = sphinx.cmd.build.main( @@ -73,7 +75,7 @@ def test_build(self) -> None: "1", os.path.join(self.docs_dir, "source"), os.path.join(self.docs_dir, "build", "html"), - ] + ], ) assert return_code == 0 diff --git a/tests/test_examples.py b/tests/test_examples.py index 65119e909..a2db2ac29 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -13,7 +13,6 @@ def load_examples() -> tuple[Raster, Raster, Vector, Raster]: """Load example files to try coregistration methods with.""" - ref_dem = Raster(examples.get_path("longyearbyen_ref_dem")) tba_dem = Raster(examples.get_path("longyearbyen_tba_dem")) glacier_mask = Vector(examples.get_path("longyearbyen_glacier_outlines")) @@ -48,7 +47,6 @@ class TestExamples: ) # type: ignore def test_array_content(self, rst_and_truevals: tuple[Raster, NDArrayf]) -> None: """Let's ensure the data arrays in the examples are always the same by checking randomly some values""" - rst = rst_and_truevals[0] truevals = rst_and_truevals[1] rng = np.random.default_rng(42) @@ -60,7 +58,6 @@ def test_array_content(self, rst_and_truevals: tuple[Raster, NDArrayf]) -> None: @pytest.mark.parametrize("rst_and_truenodata", [(ref_dem, 0), (tba_dem, 0), (ddem, 0)]) # type: ignore def test_array_nodata(self, rst_and_truenodata: tuple[Raster, int]) -> None: """Let's also check that the data arrays have always the same number of not finite values""" - rst = rst_and_truenodata[0] truenodata = rst_and_truenodata[1] mask = gu.raster.get_array_and_mask(rst)[1] diff --git a/tests/test_filters.py b/tests/test_filters.py index bc034d2bb..dd9f61dca 100644 --- a/tests/test_filters.py +++ b/tests/test_filters.py @@ -18,7 +18,6 @@ class TestFilters: def test_gauss(self) -> None: """Test applying the various Gaussian filters on DEMs with/without NaNs""" - # Test applying scipy's Gaussian filter # smoothing should not yield values below.above original DEM dem_array = self.dem_1990.data @@ -67,7 +66,6 @@ def test_gauss(self) -> None: def test_dist_filter(self) -> None: """Test that distance_filter works""" - # Calculate dDEM ddem = self.dem_2009.data - self.dem_1990.data diff --git a/tests/test_fit.py b/tests/test_fit.py index 3c1df46f3..3c9f3dc0e 100644 --- a/tests/test_fit.py +++ b/tests/test_fit.py @@ -1,6 +1,4 @@ -""" -Functions to test the fitting tools. -""" +"""Functions to test the fitting tools.""" import platform import warnings @@ -24,7 +22,7 @@ class TestRobustFitting: ], ) # type: ignore def test_robust_norder_polynomial_fit(self, pkg_estimator: str) -> None: - + """Test the robustness of the polynomial fitted.""" # Define x vector x = np.linspace(-50, 50, 10000) # Define exact polynomial @@ -45,13 +43,13 @@ def test_robust_norder_polynomial_fit(self, pkg_estimator: str) -> None: ) # Check coefficients are constrained - assert deg == 3 or deg == 4 + assert deg in {3, 4} error_margins = [100, 5, 2, 1] for i in range(4): assert coefs[i] == pytest.approx(true_coefs[i], abs=error_margins[i]) def test_robust_norder_polynomial_fit_noise_and_outliers(self) -> None: - + """Test the robustness of the polynomial fitted after adding noise and outliers.""" # Ignore sklearn convergence warnings warnings.filterwarnings("ignore", category=UserWarning, message="lbfgs failed to converge") @@ -70,7 +68,7 @@ def test_robust_norder_polynomial_fit_noise_and_outliers(self) -> None: # Run with the "Linear" estimator coefs, deg = xdem.fit.robust_norder_polynomial_fit( - x, y, estimator_name="Linear", linear_pkg="scipy", loss="soft_l1", method="trf", f_scale=0.5 + x, y, estimator_name="Linear", linear_pkg="scipy", loss="soft_l1", method="trf", f_scale=0.5, ) # TODO: understand why this is not robust since moving from least_squares() to curve_fit(), while the @@ -86,13 +84,13 @@ def test_robust_norder_polynomial_fit_noise_and_outliers(self) -> None: # The sklearn Linear solution with MSE cost function will not be robust coefs2, deg2 = xdem.fit.robust_norder_polynomial_fit( - x, y, estimator_name="Linear", linear_pkg="sklearn", cost_func=mean_squared_error, margin_improvement=50 + x, y, estimator_name="Linear", linear_pkg="sklearn", cost_func=mean_squared_error, margin_improvement=50, ) # It won't find the right degree because of the outliers and noise assert deg2 != 3 # Using the median absolute error should improve the fit coefs3, deg3 = xdem.fit.robust_norder_polynomial_fit( - x, y, estimator_name="Linear", linear_pkg="sklearn", cost_func=median_absolute_error, margin_improvement=50 + x, y, estimator_name="Linear", linear_pkg="sklearn", cost_func=median_absolute_error, margin_improvement=50, ) # Will find the right degree, but won't find the right coefficients because of the outliers and noise assert deg3 == 3 @@ -119,7 +117,7 @@ def test_robust_norder_polynomial_fit_noise_and_outliers(self) -> None: assert coefs6[i + 1] == pytest.approx(true_coefs[i + 1], abs=1) def test_robust_nfreq_sumsin_fit(self) -> None: - + """Test the robustness of the estimated sum of sinusoid fitted.""" # Define X vector x = np.linspace(0, 10, 1000) # Define exact sum of sinusoid signal @@ -146,11 +144,11 @@ def test_robust_nfreq_sumsin_fit(self) -> None: # Check that using custom arguments does not trigger an error bounds = [(1, 7), (1, 10), (0, 2 * np.pi), (1, 7), (0.1, 4), (0, 2 * np.pi)] coefs, deg = xdem.fit.robust_nfreq_sumsin_fit( - x, y, bounds_amp_wave_phase=bounds, max_nb_frequency=2, hop_length=0.01, random_state=42, niter=1 + x, y, bounds_amp_wave_phase=bounds, max_nb_frequency=2, hop_length=0.01, random_state=42, niter=1, ) def test_robust_nfreq_simsin_fit_noise_and_outliers(self) -> None: - + """Test the robustness of the estimated sum of sinusoid fitted after adding noise and outliers.""" # Check robustness to outliers rng = np.random.default_rng(42) # Define X vector diff --git a/tests/test_misc.py b/tests/test_misc.py index 4b974cd0c..81e71ba92 100644 --- a/tests/test_misc.py +++ b/tests/test_misc.py @@ -4,6 +4,7 @@ import os import re +from pathlib import Path import pytest import yaml # type: ignore @@ -16,13 +17,12 @@ class TestMisc: def test_environment_files(self) -> None: """Check that environment yml files are properly written: all dependencies of env are also in dev-env""" - - fn_env = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "environment.yml")) - fn_devenv = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "dev-environment.yml")) + fn_env = Path(os.path.join(Path(__file__).parent, "..", "environment.yml")).resolve() + fn_devenv = Path(os.path.join(Path(__file__).parent, "..", "dev-environment.yml")).resolve() # Load the yml as dictionaries - yaml_env = yaml.safe_load(open(fn_env)) - yaml_devenv = yaml.safe_load(open(fn_devenv)) + yaml_env = yaml.safe_load(Path(fn_env).open()) + yaml_devenv = yaml.safe_load(Path(fn_devenv).open()) # Extract the dependencies values conda_dep_env = yaml_env["dependencies"] @@ -48,8 +48,7 @@ def test_environment_files(self) -> None: @pytest.mark.parametrize("deprecation_increment", [-1, 0, 1, None]) # type: ignore @pytest.mark.parametrize("details", [None, "It was completely useless!", "dunnowhy"]) # type: ignore def test_deprecate(self, deprecation_increment: int | None, details: str | None) -> None: - """ - Test the deprecation warnings/errors. + """Test the deprecation warnings/errors. If the removal_version is larger than the current, it should warn. If the removal_version is smaller or equal, it should raise an error. @@ -57,7 +56,6 @@ def test_deprecate(self, deprecation_increment: int | None, details: str | None) :param deprecation_increment: The version number relative to the current version. :param details: An optional explanation for the description. """ - current_version = Version(Version(xdem.__version__).base_version) # Set the removal version to be the current version plus the increment (e.g. 0.0.5 + 1 -> 0.0.6) @@ -84,7 +82,7 @@ def useless_func() -> int: return 1 # Example of why Version needs to be used below - assert not "0.0.10" > "0.0.8" + # assert not "0.0.10" > "0.0.8"environment yml files assert Version("0.0.10") > Version("0.0.8") # If True, a warning is expected. If False, a ValueError is expected. @@ -116,8 +114,8 @@ def useless_func() -> int: with pytest.raises(ValueError, match=re.escape(text)): useless_func() - def test_diff_environment_yml(self, capsys) -> None: # type: ignore - + def test_diff_environment_yml(self, capsys) -> None: # noqa: ANN001 + """Check the differences between a synthetic environment and the environment from the yml files.""" # Test with synthetic environment env = {"dependencies": ["python==3.9", "numpy", "pandas"]} devenv = {"dependencies": ["python==3.9", "numpy", "pandas", "opencv"]} diff --git a/tests/test_spatialstats.py b/tests/test_spatialstats.py index 1785b626d..d6e01ab29 100644 --- a/tests/test_spatialstats.py +++ b/tests/test_spatialstats.py @@ -27,7 +27,6 @@ def load_ref_and_diff() -> tuple[Raster, Raster, NDArrayf, Vector]: """Load example files to try coregistration methods with.""" - reference_raster = Raster(examples.get_path("longyearbyen_ref_dem")) outlines = Vector(examples.get_path("longyearbyen_glacier_outlines")) @@ -44,7 +43,6 @@ class TestStats: def test_nmad(self) -> None: """Test NMAD functionality runs on any type of input""" - # Check that the NMAD is computed the same with a raster, masked array or NaN array nmad_raster = nmad(self.diff) nmad_ma = nmad(self.diff.data) @@ -66,69 +64,68 @@ class TestBinning: # Derive terrain attributes slope, aspect, maximum_curv = xdem.terrain.get_terrain_attribute( - ref, attribute=["slope", "aspect", "maximum_curvature"] + ref, attribute=["slope", "aspect", "maximum_curvature"], ) def test_nd_binning(self) -> None: """Check that the nd_binning function works adequately and save dataframes to files for later tests""" - # Subsampler indices = gu.raster.subsample_array( - self.diff.data.flatten(), subsample=10000, return_indices=True, random_state=42 + self.diff.data.flatten(), subsample=10000, return_indices=True, random_state=42, ) # 1D binning, by default will create 10 bins - df = xdem.spatialstats.nd_binning( + df_binning = xdem.spatialstats.nd_binning( values=self.diff.data.flatten()[indices], list_var=[self.slope.data.flatten()[indices]], list_var_names=["slope"], ) # Check length matches - assert df.shape[0] == 10 + assert df_binning.shape[0] == 10 # Check bin edges match the minimum and maximum of binning variable - assert np.nanmin(self.slope.data.flatten()[indices]) == np.min(pd.IntervalIndex(df.slope).left) - assert np.nanmax(self.slope.data.flatten()[indices]) == np.max(pd.IntervalIndex(df.slope).right) + assert np.nanmin(self.slope.data.flatten()[indices]) == np.min(pd.IntervalIndex(df_binning.slope).left) + assert np.nanmax(self.slope.data.flatten()[indices]) == np.max(pd.IntervalIndex(df_binning.slope).right) # NMAD should go up quite a bit with slope, more than 8 m between the two extreme bins - assert df.nmad.values[-1] - df.nmad.values[0] > 8 + assert df_binning.nmad.values[-1] - df_binning.nmad.values[0] > 8 # 1D binning with 20 bins - df = xdem.spatialstats.nd_binning( + df_binning = xdem.spatialstats.nd_binning( values=self.diff.data.flatten()[indices], list_var=[self.slope.data.flatten()[indices]], list_var_names=["slope"], list_var_bins=20, ) # Check length matches - assert df.shape[0] == 20 + assert df_binning.shape[0] == 20 # Define function for custom stat def percentile_80(a: NDArrayf) -> np.floating[Any]: return np.nanpercentile(a, 80) # Check the function runs with custom functions - df = xdem.spatialstats.nd_binning( + df_binning = xdem.spatialstats.nd_binning( values=self.diff.data.flatten()[indices], list_var=[self.slope.data.flatten()[indices]], list_var_names=["slope"], statistics=[percentile_80], ) # Check that the count is added automatically by the function when not user-defined - assert "count" in df.columns.values + assert "count" in df_binning.columns.values # 2D binning - df = xdem.spatialstats.nd_binning( + df_binning = xdem.spatialstats.nd_binning( values=self.diff.data.flatten()[indices], list_var=[self.slope.data.flatten()[indices], self.ref.data.flatten()[indices]], list_var_names=["slope", "elevation"], ) # Dataframe should contain two 1D binning of length 10 and one 2D binning of length 100 - assert df.shape[0] == (10 + 10 + 100) + assert df_binning.shape[0] == (10 + 10 + 100) # 3D binning - df = xdem.spatialstats.nd_binning( + df_binning = xdem.spatialstats.nd_binning( values=self.diff.data.flatten()[indices], list_var=[ self.slope.data.flatten()[indices], @@ -141,37 +138,39 @@ def percentile_80(a: NDArrayf) -> np.floating[Any]: # Dataframe should contain three 1D binning of length 10 and three 2D binning of length 100 and one 2D binning # of length 1000 - assert df.shape[0] == (4**3 + 3 * 4**2 + 3 * 4) + assert df_binning.shape[0] == (4**3 + 3 * 4**2 + 3 * 4) # Save for later use - df.to_csv(os.path.join(examples._EXAMPLES_DIRECTORY, "df_3d_binning_slope_elevation_aspect.csv"), index=False) + df_binning.to_csv(os.path.join(examples.EXAMPLES_DIRECTORY,"df_3d_binning_slope_elevation_aspect.csv"), + index=False) def test_interp_nd_binning_artificial_data(self) -> None: """Check that the N-dimensional interpolation works correctly using artificial data""" - # Check the function works with a classic input (see example) - df = pd.DataFrame( + df_binning = pd.DataFrame( { "var1": [1, 2, 3, 1, 2, 3, 1, 2, 3], "var2": [1, 1, 1, 2, 2, 2, 3, 3, 3], "statistic": [1, 2, 3, 4, 5, 6, 7, 8, 9], - } + }, ) arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9]).reshape((3, 3)) fun = xdem.spatialstats.interp_nd_binning( - df, list_var_names=["var1", "var2"], statistic="statistic", min_count=None + df_binning, list_var_names=["var1", "var2"], statistic="statistic", min_count=None, ) # Check that the dimensions are rightly ordered - assert fun((1, 3)) == df[np.logical_and(df["var1"] == 1, df["var2"] == 3)]["statistic"].values[0] - assert fun((3, 1)) == df[np.logical_and(df["var1"] == 3, df["var2"] == 1)]["statistic"].values[0] + assert fun((1, 3)) == df_binning[np.logical_and(df_binning["var1"] == 1, + df_binning["var2"] == 3)]["statistic"].values[0] + assert fun((3, 1)) == df_binning[np.logical_and(df_binning["var1"] == 3, + df_binning["var2"] == 1)]["statistic"].values[0] # Check interpolation falls right on values for points (1, 1), (1, 2) etc... for i in range(3): for j in range(3): - x = df["var1"][3 * i + j] - y = df["var2"][3 * i + j] - stat = df["statistic"][3 * i + j] + x = df_binning["var1"][3 * i + j] + y = df_binning["var2"][3 * i + j] + stat = df_binning["statistic"][3 * i + j] assert fun((x, y)) == stat # Check bilinear interpolation inside the grid @@ -254,34 +253,34 @@ def test_interp_nd_binning_artificial_data(self) -> None: vec2 = np.arange(1, 4) vec3 = np.arange(1, 5) x, y, z = np.meshgrid(vec1, vec2, vec3) - df = pd.DataFrame( - {"var1": x.ravel(), "var2": y.ravel(), "var3": z.ravel(), "statistic": np.arange(len(x.ravel()))} + df_binning = pd.DataFrame( + {"var1": x.ravel(), "var2": y.ravel(), "var3": z.ravel(), "statistic": np.arange(len(x.ravel()))}, ) fun = xdem.spatialstats.interp_nd_binning( - df, list_var_names=["var1", "var2", "var3"], statistic="statistic", min_count=None + df_binning, list_var_names=["var1", "var2", "var3"], statistic="statistic", min_count=None, ) for i in vec1: for j in vec2: for k in vec3: assert ( fun((i, j, k)) - == df[np.logical_and.reduce((df["var1"] == i, df["var2"] == j, df["var3"] == k))][ - "statistic" - ].values[0] + == df_binning[np.logical_and.reduce((df_binning["var1"] == i, + df_binning["var2"] == j, + df_binning["var3"] == k))]["statistic"].values[0] ) # Check that the linear extrapolation respects nearest neighbour and doesn't go negative # The following example used to give a negative value - df = pd.DataFrame( + df_binning = pd.DataFrame( { "var1": [1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4], "var2": [0, 0, 0, 0, 5, 5, 5, 5, 5.5, 5.5, 5.5, 5.5, 6, 6, 6, 6], "statistic": [0, 0, 0, 0, 1, 1, 1, 1, np.nan, 1, 1, np.nan, np.nan, 0, 0, np.nan], - } + }, ) fun = xdem.spatialstats.interp_nd_binning( - df, list_var_names=["var1", "var2"], statistic="statistic", min_count=None + df_binning, list_var_names=["var1", "var2"], statistic="statistic", min_count=None, ) # Check it is now positive or equal to zero @@ -289,14 +288,13 @@ def test_interp_nd_binning_artificial_data(self) -> None: def test_interp_nd_binning_realdata(self) -> None: """Check that the function works well with outputs from the nd_binning function""" - # Read nd_binning output - df = pd.read_csv( - os.path.join(examples._EXAMPLES_DIRECTORY, "df_3d_binning_slope_elevation_aspect.csv"), index_col=None + df_binning = pd.read_csv( + os.path.join(examples.EXAMPLES_DIRECTORY, "df_3d_binning_slope_elevation_aspect.csv"), index_col=None, ) # First, in 1D - fun = xdem.spatialstats.interp_nd_binning(df, list_var_names="slope") + fun = xdem.spatialstats.interp_nd_binning(df_binning, list_var_names="slope") # Check a value is returned inside the grid assert np.isfinite(fun(([15],))) @@ -306,10 +304,10 @@ def test_interp_nd_binning_realdata(self) -> None: assert all(np.isfinite(fun(([-5, 50],)))) # Check when the first passed binning variable contains NaNs because of other binning variable - fun = xdem.spatialstats.interp_nd_binning(df, list_var_names="elevation") + fun = xdem.spatialstats.interp_nd_binning(df_binning, list_var_names="elevation") # Then, in 2D - fun = xdem.spatialstats.interp_nd_binning(df, list_var_names=["slope", "elevation"]) + fun = xdem.spatialstats.interp_nd_binning(df_binning, list_var_names=["slope", "elevation"]) # Check a value is returned inside the grid assert np.isfinite(fun(([15], [1000]))) @@ -319,7 +317,7 @@ def test_interp_nd_binning_realdata(self) -> None: assert all(np.isfinite(fun(([-5, 50], [-500, 3000])))) # Then in 3D - fun = xdem.spatialstats.interp_nd_binning(df, list_var_names=["slope", "elevation", "aspect"]) + fun = xdem.spatialstats.interp_nd_binning(df_binning, list_var_names=["slope", "elevation", "aspect"]) # Check a value is returned inside the grid assert np.isfinite(fun(([15], [1000], [np.pi]))) @@ -330,15 +328,14 @@ def test_interp_nd_binning_realdata(self) -> None: def test_get_perbin_nd_binning(self) -> None: """Test the get per-bin function.""" - # Read nd_binning output - df = pd.read_csv( - os.path.join(examples._EXAMPLES_DIRECTORY, "df_3d_binning_slope_elevation_aspect.csv"), index_col=None + df_binning = pd.read_csv( + os.path.join(examples.EXAMPLES_DIRECTORY, "df_3d_binning_slope_elevation_aspect.csv"), index_col=None, ) # Get values for arrays from the above 3D binning perbin_values = xdem.spatialstats.get_perbin_nd_binning( - df=df, + df=df_binning, list_var=[ self.slope.data, self.ref.data, @@ -351,10 +348,10 @@ def test_get_perbin_nd_binning(self) -> None: assert np.shape(self.slope.data) == np.shape(perbin_values) # Check that the bin are rightly recognized - df = df[df.nd == 3] + df_binning = df_binning[df_binning.nd == 3] # Convert the intervals from string due to saving to file for var in ["slope", "elevation", "aspect"]: - df[var] = [xdem.spatialstats._pandas_str_to_interval(x) for x in df[var]] + df_binning[var] = [xdem.spatialstats._pandas_str_to_interval(x) for x in df_binning[var]] # noqa: SLF001 # Take 1000 random points in the array rng = np.random.default_rng(42) @@ -376,10 +373,10 @@ def test_get_perbin_nd_binning(self) -> None: # Isolate the bin in the dataframe index_bin = np.logical_and.reduce( ( - [h in interv for interv in df["elevation"]], - [slp in interv for interv in df["slope"]], - [asp in interv for interv in df["aspect"]], - ) + [h in interv for interv in df_binning["elevation"]], + [slp in interv for interv in df_binning["slope"]], + [asp in interv for interv in df_binning["aspect"]], + ), ) # It might not exist in the binning intervals (if extreme values were not subsampled in test_nd_binning) if np.count_nonzero(index_bin) == 0: @@ -388,7 +385,7 @@ def test_get_perbin_nd_binning(self) -> None: assert np.count_nonzero(index_bin) == 1 # Get the statistic value and verify that this was the one returned by the function - statistic_value = df["nanmedian"][index_bin].values[0] + statistic_value = df_binning["nanmedian"][index_bin].values[0] # Nan equality does not work, so we compare finite values first if ~np.isnan(statistic_value): assert statistic_value == perbin_values[x, y] @@ -398,7 +395,6 @@ def test_get_perbin_nd_binning(self) -> None: def test_two_step_standardization(self) -> None: """Test two-step standardization function""" - # Reproduce the first steps of binning df_binning = xdem.spatialstats.nd_binning( values=self.diff[~self.mask], @@ -407,7 +403,7 @@ def test_two_step_standardization(self) -> None: statistics=[xdem.spatialstats.nmad], ) unscaled_fun = xdem.spatialstats.interp_nd_binning( - df_binning, list_var_names=["var1", "var2"], statistic="nmad" + df_binning, list_var_names=["var1", "var2"], statistic="nmad", ) # The zscore spread should not be one right after binning zscores = self.diff[~self.mask] / unscaled_fun((self.slope[~self.mask], self.maximum_curv[~self.mask])) @@ -432,18 +428,17 @@ def test_two_step_standardization(self) -> None: test_slopes = np.linspace(0, 50, 50) test_max_curvs = np.linspace(0, 10, 50) assert np.array_equal( - unscaled_fun((test_slopes, test_max_curvs)) * scale_fac_std, final_func((test_slopes, test_max_curvs)) + unscaled_fun((test_slopes, test_max_curvs)) * scale_fac_std, final_func((test_slopes, test_max_curvs)), ) def test_estimate_model_heteroscedasticity_and_infer_from_stable(self) -> None: """Test consistency of outputs and errors in wrapper functions for estimation of heteroscedasticity""" - # Test infer function errors_1, df_binning_1, err_fun_1 = xdem.spatialstats.infer_heteroscedasticity_from_stable( - dvalues=self.diff, list_var=[self.slope, self.maximum_curv], unstable_mask=self.outlines + dvalues=self.diff, list_var=[self.slope, self.maximum_curv], unstable_mask=self.outlines, ) - df_binning_2, err_fun_2 = xdem.spatialstats._estimate_model_heteroscedasticity( + df_binning_2, err_fun_2 = xdem.spatialstats._estimate_model_heteroscedasticity( # noqa: SLF001 dvalues=self.diff[~self.mask], list_var=[self.slope[~self.mask], self.maximum_curv[~self.mask]], list_var_names=["var1", "var2"], @@ -460,20 +455,20 @@ def test_estimate_model_heteroscedasticity_and_infer_from_stable(self) -> None: assert np.array_equal(errors_1_arr, errors_2_arr, equal_nan=True) # Save for use in TestVariogram - errors_1.save(os.path.join(examples._EXAMPLES_DIRECTORY, "dh_error.tif")) + errors_1.save(os.path.join(examples._EXAMPLES_DIRECTORY, "dh_error.tif")) # noqa: SLF001 # Check that errors are raised with wrong input with pytest.raises(ValueError, match="The values must be a Raster or NumPy array, or a list of those."): xdem.spatialstats.infer_heteroscedasticity_from_stable( - dvalues="not_an_array", stable_mask=~self.mask, list_var=[self.slope.get_nanarray()] + dvalues="not_an_array", stable_mask=~self.mask, list_var=[self.slope.get_nanarray()], ) with pytest.raises(ValueError, match="The stable mask must be a Vector, Mask, GeoDataFrame or NumPy array."): xdem.spatialstats.infer_heteroscedasticity_from_stable( - dvalues=self.diff, stable_mask="not_a_vector_or_array", list_var=[self.slope.get_nanarray()] + dvalues=self.diff, stable_mask="not_a_vector_or_array", list_var=[self.slope.get_nanarray()], ) with pytest.raises(ValueError, match="The unstable mask must be a Vector, Mask, GeoDataFrame or NumPy array."): xdem.spatialstats.infer_heteroscedasticity_from_stable( - dvalues=self.diff, unstable_mask="not_a_vector_or_array", list_var=[self.slope.get_nanarray()] + dvalues=self.diff, unstable_mask="not_a_vector_or_array", list_var=[self.slope.get_nanarray()], ) with pytest.raises( @@ -482,29 +477,31 @@ def test_estimate_model_heteroscedasticity_and_infer_from_stable(self) -> None: "values contain a Raster.", ): xdem.spatialstats.infer_heteroscedasticity_from_stable( - dvalues=self.diff.get_nanarray(), stable_mask=self.outlines, list_var=[self.slope.get_nanarray()] + dvalues=self.diff.get_nanarray(), stable_mask=self.outlines, list_var=[self.slope.get_nanarray()], ) def test_plot_binning(self) -> None: - + """Check that all the plottings fail with a warning when using invalid data.""" # Define placeholder data - df = pd.DataFrame({"var1": [0, 1, 2], "var2": [2, 3, 4], "statistic": [0, 0, 0]}) + df_binning = pd.DataFrame({"var1": [0, 1, 2], "var2": [2, 3, 4], "statistic": [0, 0, 0]}) # Check that the 1D plotting fails with a warning if the variable or statistic is not well-defined with pytest.raises(ValueError, match='The variable "var3" is not part of the provided dataframe column names.'): - xdem.spatialstats.plot_1d_binning(df, var_name="var3", statistic_name="statistic") + xdem.spatialstats.plot_1d_binning(df_binning, var_name="var3", statistic_name="statistic") with pytest.raises( - ValueError, match='The statistic "stat" is not part of the provided dataframe column names.' + ValueError, match='The statistic "stat" is not part of the provided dataframe column names.', ): - xdem.spatialstats.plot_1d_binning(df, var_name="var1", statistic_name="stat") + xdem.spatialstats.plot_1d_binning(df_binning, var_name="var1", statistic_name="stat") # Same for the 2D plotting with pytest.raises(ValueError, match='The variable "var3" is not part of the provided dataframe column names.'): - xdem.spatialstats.plot_2d_binning(df, var_name_1="var3", var_name_2="var1", statistic_name="statistic") + xdem.spatialstats.plot_2d_binning(df_binning, var_name_1="var3", + var_name_2="var1", + statistic_name="statistic") with pytest.raises( - ValueError, match='The statistic "stat" is not part of the provided dataframe column names.' + ValueError, match='The statistic "stat" is not part of the provided dataframe column names.', ): - xdem.spatialstats.plot_2d_binning(df, var_name_1="var1", var_name_2="var1", statistic_name="stat") + xdem.spatialstats.plot_2d_binning(df_binning, var_name_1="var1", var_name_2="var1", statistic_name="stat") class TestVariogram: @@ -513,47 +510,45 @@ class TestVariogram: def test_sample_multirange_variogram_default(self) -> None: """Verify that the default function runs, and its basic output""" - # Check the variogram output is consistent for a random state - df = xdem.spatialstats.sample_empirical_variogram(values=self.diff, subsample=10, random_state=42) - # assert df["exp"][15] == pytest.approx(5.11900520324707, abs=1e-3) - assert df["lags"][15] == pytest.approx(5120) - assert df["count"][15] == 2 + df_vgm = xdem.spatialstats.sample_empirical_variogram(values=self.diff, subsample=10, random_state=42) + # assert df_vgm["exp"][15] == pytest.approx(5.11900520324707, abs=1e-3) + assert df_vgm["lags"][15] == pytest.approx(5120) + assert df_vgm["count"][15] == 2 # With a single run, no error can be estimated - assert all(np.isnan(df.err_exp.values)) + assert all(np.isnan(df_vgm.err_exp.values)) # Check that all type of coordinate inputs work # Only the array and the ground sampling distance xdem.spatialstats.sample_empirical_variogram( - values=self.diff.data, gsd=self.diff.res[0], subsample=10, random_state=42 + values=self.diff.data, gsd=self.diff.res[0], subsample=10, random_state=42, ) # Test multiple runs - df2 = xdem.spatialstats.sample_empirical_variogram( - values=self.diff, subsample=10, random_state=42, n_variograms=2 + df_vgm2 = xdem.spatialstats.sample_empirical_variogram( + values=self.diff, subsample=10, random_state=42, n_variograms=2, ) # Check that an error is estimated - assert any(~np.isnan(df2.err_exp.values)) + assert any(~np.isnan(df_vgm2.err_exp.values)) # Test that running on several cores does not trigger any error xdem.spatialstats.sample_empirical_variogram( - values=self.diff, subsample=10, random_state=42, n_variograms=2, n_jobs=2 + values=self.diff, subsample=10, random_state=42, n_variograms=2, n_jobs=2, ) # Test plotting of empirical variogram by itself if PLOT: - xdem.spatialstats.plot_variogram(df2) + xdem.spatialstats.plot_variogram(df_vgm2) def test_sample_empirical_variogram_speed(self) -> None: """Verify that no speed is lost outside of routines on variogram sampling by comparing manually to skgstat""" - values = self.diff subsample = 10 # First, run the xDEM wrapper function # t0 = time.time() - df = xdem.spatialstats.sample_empirical_variogram(values=values, subsample=subsample, random_state=42) + df_vgm = xdem.spatialstats.sample_empirical_variogram(values=values, subsample=subsample, random_state=42) # t1 = time.time() # Second, do it manually with skgstat @@ -568,7 +563,7 @@ def test_sample_empirical_variogram_speed(self) -> None: # Redefine parameters fed to skgstat manually # Maxlag maxlag = np.sqrt( - (np.max(coords[:, 0]) - np.min(coords[:, 0])) ** 2 + (np.max(coords[:, 1]) - np.min(coords[:, 1])) ** 2 + (np.max(coords[:, 0]) - np.min(coords[:, 0])) ** 2 + (np.max(coords[:, 1]) - np.min(coords[:, 1])) ** 2, ) # Binning function @@ -585,8 +580,8 @@ def test_sample_empirical_variogram_speed(self) -> None: shape = values.shape keyword_arguments = {"subsample": subsample, "extent": extent, "shape": shape} - runs, samples, ratio_subsample = xdem.spatialstats._choose_cdist_equidistant_sampling_parameters( - **keyword_arguments + runs, samples, ratio_subsample = xdem.spatialstats._choose_cdist_equidistant_sampling_parameters( # noqa: SLF001 + **keyword_arguments, ) # Index of valid values @@ -603,7 +598,7 @@ def test_sample_empirical_variogram_speed(self) -> None: # Now even for a n_variograms=1 we sample other integers for the random number generator rnd=np.random.default_rng(42).choice(1, 1, replace=False), ) - V = skgstat.Variogram( + vgm = skgstat.Variogram( rems, values=values_arr[~mask_nodata].ravel(), normalize=False, @@ -614,21 +609,21 @@ def test_sample_empirical_variogram_speed(self) -> None: # t4 = time.time() # Get bins, empirical variogram values, and bin count - bins, exp = V.get_empirical(bin_center=False) - count = V.bin_count + bins, exp = vgm.get_empirical(bin_center=False) + count = vgm.bin_count # Write to dataframe - df2 = pd.DataFrame() - df2 = df2.assign(exp=exp, bins=bins, count=count) - df2 = df2.rename(columns={"bins": "lags"}) - df2["err_exp"] = np.nan - df2.drop(df2.tail(1).index, inplace=True) - df2 = df2.astype({"exp": "float64", "err_exp": "float64", "lags": "float64", "count": "int64"}) + df_vgm2 = pd.DataFrame() + df_vgm2 = df_vgm2.assign(exp=exp, bins=bins, count=count) + df_vgm2 = df_vgm2.rename(columns={"bins": "lags"}) + df_vgm2["err_exp"] = np.nan + df_vgm2 = df_vgm2.drop(df_vgm2.tail(1).index) + df_vgm2 = df_vgm2.astype({"exp": "float64", "err_exp": "float64", "lags": "float64", "count": "int64"}) # t2 = time.time() # Check if the two frames are equal - pd.testing.assert_frame_equal(df, df2) + pd.testing.assert_frame_equal(df_vgm, df_vgm2) # Check that the two ways are taking the same time with 50% margin # time_method_1 = t1 - t0 @@ -641,30 +636,28 @@ def test_sample_empirical_variogram_speed(self) -> None: # assert time_metricspace_variogram == pytest.approx(time_method_2, rel=0.3) @pytest.mark.parametrize( - "subsample_method", ["pdist_point", "pdist_ring", "pdist_disk", "cdist_point"] + "subsample_method", ["pdist_point", "pdist_ring", "pdist_disk", "cdist_point"], ) # type: ignore - def test_sample_multirange_variogram_methods(self, subsample_method) -> None: + def test_sample_multirange_variogram_methods(self, subsample_method: str) -> None: """Verify that all other methods run""" - # Check the variogram estimation runs for several methods - df = xdem.spatialstats.sample_empirical_variogram( - values=self.diff, subsample=10, random_state=42, subsample_method=subsample_method + df_vgm = xdem.spatialstats.sample_empirical_variogram( + values=self.diff, subsample=10, random_state=42, subsample_method=subsample_method, ) - assert not df.empty + assert not df_vgm.empty # Check that the output is correct expected_columns = ["exp", "lags", "count"] expected_dtypes = [np.float64, np.float64, np.int64] for col in expected_columns: # Check that the column exists - assert col in df.columns + assert col in df_vgm.columns # Check that the column has the correct dtype - assert df[col].dtype == expected_dtypes[expected_columns.index(col)] + assert df_vgm[col].dtype == expected_dtypes[expected_columns.index(col)] def test_sample_multirange_variogram_args(self) -> None: """Verify that optional parameters run only for their specific method, raise warning otherwise""" - # Define parameters pdist_args: EmpiricalVariogramKArgs = {"pdist_multi_ranges": [0, self.diff.res[0] * 5, self.diff.res[0] * 10]} cdist_args: EmpiricalVariogramKArgs = {"ratio_subsample": 0.5, "runs": 10} @@ -674,7 +667,7 @@ def test_sample_multirange_variogram_args(self) -> None: with pytest.warns(UserWarning): # An argument only use by cdist with a pdist method xdem.spatialstats.sample_empirical_variogram( - values=self.diff, subsample=10, random_state=42, subsample_method="pdist_ring", **cdist_args + values=self.diff, subsample=10, random_state=42, subsample_method="pdist_ring", **cdist_args, ) with pytest.warns(UserWarning): @@ -699,12 +692,12 @@ def test_sample_multirange_variogram_args(self) -> None: # Check the function passes optional arguments specific to pdist methods without warning xdem.spatialstats.sample_empirical_variogram( - values=self.diff, subsample=10, random_state=42, subsample_method="pdist_ring", **pdist_args + values=self.diff, subsample=10, random_state=42, subsample_method="pdist_ring", **pdist_args, ) # Check the function passes optional arguments specific to cdist methods without warning xdem.spatialstats.sample_empirical_variogram( - values=self.diff, random_state=42, subsample=10, subsample_method="cdist_equidistant", **cdist_args + values=self.diff, random_state=42, subsample=10, subsample_method="cdist_equidistant", **cdist_args, ) # N is the number of samples in an ensemble @@ -712,7 +705,6 @@ def test_sample_multirange_variogram_args(self) -> None: @pytest.mark.parametrize("shape", [(50, 50), (100, 100), (500, 500)]) # type: ignore def test_choose_cdist_equidistant_sampling_parameters(self, subsample: int, shape: tuple[int, int]) -> None: """Verify that the automatically-derived parameters of equidistant sampling are sound""" - # Assign an arbitrary extent extent = (0, 1, 0, 1) @@ -722,8 +714,8 @@ def test_choose_cdist_equidistant_sampling_parameters(self, subsample: int, shap # Run the function keyword_arguments = {"subsample": subsample, "extent": extent, "shape": shape} - runs, samples, ratio_subsample = xdem.spatialstats._choose_cdist_equidistant_sampling_parameters( - **keyword_arguments + runs, samples, ratio_subsample = xdem.spatialstats._choose_cdist_equidistant_sampling_parameters( # noqa: SLF001 + **keyword_arguments, ) # There is at least 2 samples @@ -745,15 +737,13 @@ def test_choose_cdist_equidistant_sampling_parameters(self, subsample: int, shap def test_errors_subsample_parameter(self) -> None: """Tests that an error is raised when the subsample argument is too little""" - keyword_arguments = {"subsample": 3, "extent": (0, 1, 0, 1), "shape": (10, 10)} with pytest.raises(ValueError, match="The number of subsamples needs to be at least 10."): - xdem.spatialstats._choose_cdist_equidistant_sampling_parameters(**keyword_arguments) + xdem.spatialstats._choose_cdist_equidistant_sampling_parameters(**keyword_arguments) # noqa: SLF001 def test_multirange_fit_performance(self) -> None: """Verify that the fitting works with artificial dataset""" - # First, generate a sum of modelled variograms: ranges and partial sills for three models params_real = (100, 0.7, 1000, 0.2, 10000, 0.1) r1, ps1, r2, ps2, r3, ps3 = params_real @@ -770,11 +760,11 @@ def test_multirange_fit_performance(self) -> None: sigma = np.ones(len(x)) * sig # Put all in a dataframe - df = pd.DataFrame() - df = df.assign(lags=x, exp=y_simu, err_exp=sigma) + df_vgm = pd.DataFrame() + df_vgm = df_vgm.assign(lags=x, exp=y_simu, err_exp=sigma) # Run the fitting - fun, params_est = xdem.spatialstats.fit_sum_model_variogram(["spherical", "spherical", "spherical"], df) + fun, params_est = xdem.spatialstats.fit_sum_model_variogram(["spherical", "spherical", "spherical"], df_vgm) for i in range(len(params_est)): # Assert all parameters were correctly estimated within a 30% relative margin @@ -782,18 +772,17 @@ def test_multirange_fit_performance(self) -> None: assert params_real[2 * i + 1] == pytest.approx(params_est["psill"].values[i], rel=0.3) if PLOT: - xdem.spatialstats.plot_variogram(df, list_fit_fun=[fun]) + xdem.spatialstats.plot_variogram(df_vgm, list_fit_fun=[fun]) def test_check_params_variogram_model(self) -> None: """Verify that the checking function for the modelled variogram parameters dataframe returns adequate errors""" - # Check when missing a column with pytest.raises( ValueError, - match='The dataframe with variogram parameters must contain the columns "model",' ' "range" and "psill".', + match='The dataframe with variogram parameters must contain the columns "model", "range" and "psill".', ): - xdem.spatialstats._check_validity_params_variogram( - pd.DataFrame(data={"model": ["spherical"], "range": [100]}) + xdem.spatialstats._check_validity_params_variogram( # noqa: SLF001 + pd.DataFrame(data={"model": ["spherical"], "range": [100]}), ) # Check with wrong model format @@ -804,32 +793,32 @@ def test_check_params_variogram_model(self) -> None: + ", ".join(list_supported_models) + ".", ): - xdem.spatialstats._check_validity_params_variogram( - pd.DataFrame(data={"model": ["Supraluminal"], "range": [100], "psill": [1]}) + xdem.spatialstats._check_validity_params_variogram( # noqa: SLF001 + pd.DataFrame(data={"model": ["Supraluminal"], "range": [100], "psill": [1]}), ) # Check with wrong range format with pytest.raises(ValueError, match="The variogram ranges must be float or integer."): - xdem.spatialstats._check_validity_params_variogram( - pd.DataFrame(data={"model": ["spherical"], "range": ["a"], "psill": [1]}) + xdem.spatialstats._check_validity_params_variogram( # noqa: SLF001 + pd.DataFrame(data={"model": ["spherical"], "range": ["a"], "psill": [1]}), ) # Check with negative range with pytest.raises(ValueError, match="The variogram ranges must have non-zero, positive values."): - xdem.spatialstats._check_validity_params_variogram( - pd.DataFrame(data={"model": ["spherical"], "range": [-1], "psill": [1]}) + xdem.spatialstats._check_validity_params_variogram( # noqa: SLF001 + pd.DataFrame(data={"model": ["spherical"], "range": [-1], "psill": [1]}), ) # Check with wrong partial sill format with pytest.raises(ValueError, match="The variogram partial sills must be float or integer."): - xdem.spatialstats._check_validity_params_variogram( - pd.DataFrame(data={"model": ["spherical"], "range": [100], "psill": ["a"]}) + xdem.spatialstats._check_validity_params_variogram( # noqa: SLF001 + pd.DataFrame(data={"model": ["spherical"], "range": [100], "psill": ["a"]}), ) # Check with negative partial sill with pytest.raises(ValueError, match="The variogram partial sills must have non-zero, positive values."): - xdem.spatialstats._check_validity_params_variogram( - pd.DataFrame(data={"model": ["spherical"], "range": [100], "psill": [-1]}) + xdem.spatialstats._check_validity_params_variogram( # noqa: SLF001 + pd.DataFrame(data={"model": ["spherical"], "range": [100], "psill": [-1]}), ) # Check with a model that requires smoothness and without the smoothness column @@ -838,25 +827,24 @@ def test_check_params_variogram_model(self) -> None: match='The dataframe with variogram parameters must contain the column "smooth" ' "for the smoothness factor when using Matern or Stable models.", ): - xdem.spatialstats._check_validity_params_variogram( - pd.DataFrame(data={"model": ["stable"], "range": [100], "psill": [1]}) + xdem.spatialstats._check_validity_params_variogram( # noqa: SLF001 + pd.DataFrame(data={"model": ["stable"], "range": [100], "psill": [1]}), ) # Check with wrong smoothness format with pytest.raises(ValueError, match="The variogram smoothness parameter must be float or integer."): - xdem.spatialstats._check_validity_params_variogram( - pd.DataFrame(data={"model": ["stable"], "range": [100], "psill": [1], "smooth": ["a"]}) + xdem.spatialstats._check_validity_params_variogram( # noqa: SLF001 + pd.DataFrame(data={"model": ["stable"], "range": [100], "psill": [1], "smooth": ["a"]}), ) # Check with negative smoothness with pytest.raises(ValueError, match="The variogram smoothness parameter must have non-zero, positive values."): - xdem.spatialstats._check_validity_params_variogram( - pd.DataFrame(data={"model": ["stable"], "range": [100], "psill": [1], "smooth": [-1]}) + xdem.spatialstats._check_validity_params_variogram( # noqa: SLF001 + pd.DataFrame(data={"model": ["stable"], "range": [100], "psill": [1], "smooth": [-1]}), ) def test_estimate_model_spatial_correlation_and_infer_from_stable(self) -> None: """Test consistency of outputs and errors in wrapper functions for estimation of spatial correlation""" - warnings.filterwarnings("ignore", category=RuntimeWarning, message="Mean of empty slice") # Keep only data on stable @@ -864,29 +852,29 @@ def test_estimate_model_spatial_correlation_and_infer_from_stable(self) -> None: diff_on_stable.set_mask(self.mask) # Load the error map from TestBinning - errors = Raster(os.path.join(examples._EXAMPLES_DIRECTORY, "dh_error.tif")) + errors = Raster(os.path.join(examples._EXAMPLES_DIRECTORY, "dh_error.tif")) # noqa: SLF001 # Standardize the differences zscores = diff_on_stable / errors # Run wrapper estimate and model function - emp_vgm_1, params_model_vgm_1, _ = xdem.spatialstats._estimate_model_spatial_correlation( - dvalues=zscores, list_models=["Gau", "Sph"], subsample=10, random_state=42 + emp_vgm_1, params_model_vgm_1, _ = xdem.spatialstats._estimate_model_spatial_correlation( # noqa: SLF001 + dvalues=zscores, list_models=["Gau", "Sph"], subsample=10, random_state=42, ) # Check that the output matches that of the original function under the same random state emp_vgm_2 = xdem.spatialstats.sample_empirical_variogram( - values=zscores, estimator="dowd", subsample=10, random_state=42 + values=zscores, estimator="dowd", subsample=10, random_state=42, ) pd.testing.assert_frame_equal(emp_vgm_1, emp_vgm_2) params_model_vgm_2 = xdem.spatialstats.fit_sum_model_variogram( - list_models=["Gau", "Sph"], empirical_variogram=emp_vgm_2 + list_models=["Gau", "Sph"], empirical_variogram=emp_vgm_2, )[1] pd.testing.assert_frame_equal(params_model_vgm_1, params_model_vgm_2) # Run wrapper infer from stable function with a Raster and the mask, and check the consistency there as well emp_vgm_3, params_model_vgm_3, _ = xdem.spatialstats.infer_spatial_correlation_from_stable( - dvalues=zscores, stable_mask=~self.mask, list_models=["Gau", "Sph"], subsample=10, random_state=42 + dvalues=zscores, stable_mask=~self.mask, list_models=["Gau", "Sph"], subsample=10, random_state=42, ) pd.testing.assert_frame_equal(emp_vgm_1, emp_vgm_3) pd.testing.assert_frame_equal(params_model_vgm_1, params_model_vgm_3) @@ -915,21 +903,21 @@ def test_estimate_model_spatial_correlation_and_infer_from_stable(self) -> None: ) # Save the modelled variogram for later used in TestNeffEstimation params_model_vgm_5.to_csv( - os.path.join(examples._EXAMPLES_DIRECTORY, "df_variogram_model_params.csv"), index=False + os.path.join(examples._EXAMPLES_DIRECTORY, "df_variogram_model_params.csv"), index=False, # noqa: SLF001 ) # Check that errors are raised with wrong input with pytest.raises(ValueError, match="The values must be a Raster or NumPy array, or a list of those."): xdem.spatialstats.infer_spatial_correlation_from_stable( - dvalues="not_an_array", stable_mask=~self.mask, list_models=["Gau", "Sph"], random_state=42 + dvalues="not_an_array", stable_mask=~self.mask, list_models=["Gau", "Sph"], random_state=42, ) with pytest.raises(ValueError, match="The stable mask must be a Vector, Mask, GeoDataFrame or NumPy array."): xdem.spatialstats.infer_spatial_correlation_from_stable( - dvalues=self.diff, stable_mask="not_a_vector_or_array", list_models=["Gau", "Sph"], random_state=42 + dvalues=self.diff, stable_mask="not_a_vector_or_array", list_models=["Gau", "Sph"], random_state=42, ) with pytest.raises(ValueError, match="The unstable mask must be a Vector, Mask, GeoDataFrame or NumPy array."): xdem.spatialstats.infer_spatial_correlation_from_stable( - dvalues=self.diff, unstable_mask="not_a_vector_or_array", list_models=["Gau", "Sph"], random_state=42 + dvalues=self.diff, unstable_mask="not_a_vector_or_array", list_models=["Gau", "Sph"], random_state=42, ) diff_on_stable_arr = gu.raster.get_array_and_mask(diff_on_stable)[0] with pytest.raises( @@ -938,45 +926,44 @@ def test_estimate_model_spatial_correlation_and_infer_from_stable(self) -> None: "values contain a Raster.", ): xdem.spatialstats.infer_spatial_correlation_from_stable( - dvalues=diff_on_stable_arr, stable_mask=self.outlines, list_models=["Gau", "Sph"], random_state=42 + dvalues=diff_on_stable_arr, stable_mask=self.outlines, list_models=["Gau", "Sph"], random_state=42, ) def test_empirical_fit_plotting(self) -> None: """Verify that the shape of the empirical variogram output works with the fit and plotting""" - # Check the variogram estimation runs for a random state - df = xdem.spatialstats.sample_empirical_variogram( - values=self.diff.data, gsd=self.diff.res[0], subsample=50, random_state=42 + df_vgm = xdem.spatialstats.sample_empirical_variogram( + values=self.diff.data, gsd=self.diff.res[0], subsample=50, random_state=42, ) # Single model fit - fun, _ = xdem.spatialstats.fit_sum_model_variogram(["spherical"], empirical_variogram=df) + fun, _ = xdem.spatialstats.fit_sum_model_variogram(["spherical"], empirical_variogram=df_vgm) # Triple model fit fun2, _ = xdem.spatialstats.fit_sum_model_variogram( - ["spherical", "spherical", "spherical"], empirical_variogram=df + ["spherical", "spherical", "spherical"], empirical_variogram=df_vgm, ) if PLOT: # Plot with a single model fit - xdem.spatialstats.plot_variogram(df, list_fit_fun=[fun]) + xdem.spatialstats.plot_variogram(df_vgm, list_fit_fun=[fun]) # Plot with a triple model fit - xdem.spatialstats.plot_variogram(df, list_fit_fun=[fun2]) + xdem.spatialstats.plot_variogram(df_vgm, list_fit_fun=[fun2]) # Check that errors are raised with wrong inputs # If the experimental variogram values "exp" are not passed with pytest.raises( - ValueError, match='The expected variable "exp" is not part of the provided dataframe column names.' + ValueError, match='The expected variable "exp" is not part of the provided dataframe column names.', ): xdem.spatialstats.plot_variogram(pd.DataFrame(data={"wrong_name": [1], "lags": [1], "count": [100]})) # If the spatial lags "lags" are not passed with pytest.raises( - ValueError, match='The expected variable "lags" is not part of the provided dataframe column names.' + ValueError, match='The expected variable "lags" is not part of the provided dataframe column names.', ): xdem.spatialstats.plot_variogram(pd.DataFrame(data={"exp": [1], "wrong_name": [1], "count": [100]})) # If the pairwise sample count "count" is not passed with pytest.raises( - ValueError, match='The expected variable "count" is not part of the provided dataframe column names.' + ValueError, match='The expected variable "count" is not part of the provided dataframe column names.', ): xdem.spatialstats.plot_variogram(pd.DataFrame(data={"exp": [1], "lags": [1], "wrong_name": [100]})) @@ -991,17 +978,17 @@ class TestNeffEstimation: @pytest.mark.parametrize("area", [10 ** (2 * i) for i in range(3)]) # type: ignore def test_neff_circular_single_range(self, range1: float, psill1: float, model1: float, area: float) -> None: """Test the accuracy of numerical integration for one to three models of spherical, gaussian or exponential - forms to get the number of effective samples""" - + forms to get the number of effective samples + """ params_variogram_model = pd.DataFrame(data={"model": [model1], "range": [range1], "psill": [psill1]}) # Exact integration neff_circ_exact = xdem.spatialstats.neff_circular_approx_theoretical( - area=area, params_variogram_model=params_variogram_model + area=area, params_variogram_model=params_variogram_model, ) # Numerical integration neff_circ_numer = xdem.spatialstats.neff_circular_approx_numerical( - area=area, params_variogram_model=params_variogram_model + area=area, params_variogram_model=params_variogram_model, ) # Check results are the exact same @@ -1013,11 +1000,11 @@ def test_neff_circular_single_range(self, range1: float, psill1: float, model1: @pytest.mark.parametrize("model1", ["spherical", "exponential", "gaussian", "cubic"]) # type: ignore @pytest.mark.parametrize("model2", ["spherical", "exponential", "gaussian", "cubic"]) # type: ignore def test_neff_circular_three_ranges( - self, range1: float, range2: float, range3: float, model1: float, model2: float + self, range1: float, range2: float, range3: float, model1: float, model2: float, ) -> None: """Test the accuracy of numerical integration for one to three models of spherical, gaussian or - exponential forms""" - + exponential forms + """ area = 1000 psill1 = 1 psill2 = 1 @@ -1029,16 +1016,16 @@ def test_neff_circular_three_ranges( "model": [model1, model2, model3], "range": [range1, range2, range3], "psill": [psill1, psill2, psill3], - } + }, ) # Exact integration neff_circ_exact = xdem.spatialstats.neff_circular_approx_theoretical( - area=area, params_variogram_model=params_variogram_model + area=area, params_variogram_model=params_variogram_model, ) # Numerical integration neff_circ_numer = xdem.spatialstats.neff_circular_approx_numerical( - area=area, params_variogram_model=params_variogram_model + area=area, params_variogram_model=params_variogram_model, ) # Check results are the exact same @@ -1046,7 +1033,6 @@ def test_neff_circular_three_ranges( def test_neff_exact_and_approx_hugonnet(self) -> None: """Test the exact and approximated calculation of the number of effective sample by double covariance sum""" - # Generate a gridded dataset with varying errors associated to each pixel shape = (15, 15) errors = np.ones(shape) @@ -1062,19 +1048,19 @@ def test_neff_exact_and_approx_hugonnet(self) -> None: # Create a list of variogram that, summed, represent the spatial correlation params_variogram_model = pd.DataFrame( - data={"model": ["spherical", "gaussian"], "range": [5, 50], "psill": [0.5, 0.5]} + data={"model": ["spherical", "gaussian"], "range": [5, 50], "psill": [0.5, 0.5]}, ) # Check that the function runs with default parameters # t0 = time.time() neff_exact = xdem.spatialstats.neff_exact( - coords=coords, errors=errors, params_variogram_model=params_variogram_model + coords=coords, errors=errors, params_variogram_model=params_variogram_model, ) # t1 = time.time() # Check that the non-vectorized version gives the same result neff_exact_nv = xdem.spatialstats.neff_exact( - coords=coords, errors=errors, params_variogram_model=params_variogram_model, vectorized=False + coords=coords, errors=errors, params_variogram_model=params_variogram_model, vectorized=False, ) # t2 = time.time() assert neff_exact == pytest.approx(neff_exact_nv, rel=0.001) @@ -1085,7 +1071,7 @@ def test_neff_exact_and_approx_hugonnet(self) -> None: # Check that the approximation function runs with default parameters, sampling 100 out of 250 samples # t3 = time.time() neff_approx = xdem.spatialstats.neff_hugonnet_approx( - coords=coords, errors=errors, params_variogram_model=params_variogram_model, subsample=100, random_state=42 + coords=coords, errors=errors, params_variogram_model=params_variogram_model, subsample=100, random_state=42, ) # t4 = time.time() @@ -1110,15 +1096,14 @@ def test_neff_exact_and_approx_hugonnet(self) -> None: def test_number_effective_samples(self) -> None: """Test that the wrapper function for neff functions behaves correctly and that output values are robust""" - # The function should return the same result as neff_circular_approx_numerical when using a numerical area area = 10000 params_variogram_model = pd.DataFrame( - data={"model": ["spherical", "gaussian"], "range": [300, 3000], "psill": [0.5, 0.5]} + data={"model": ["spherical", "gaussian"], "range": [300, 3000], "psill": [0.5, 0.5]}, ) neff1 = xdem.spatialstats.neff_circular_approx_numerical( - area=area, params_variogram_model=params_variogram_model + area=area, params_variogram_model=params_variogram_model, ) neff2 = xdem.spatialstats.number_effective_samples(area=area, params_variogram_model=params_variogram_model) @@ -1167,7 +1152,7 @@ def test_number_effective_samples(self) -> None: # Check that the number of effective samples matches that of the circular approximation within 25% area_brom = np.sum(outlines_brom.ds.area.values) neff4 = xdem.spatialstats.number_effective_samples( - area=area_brom, params_variogram_model=params_variogram_model + area=area_brom, params_variogram_model=params_variogram_model, ) assert neff4 == pytest.approx(neff2, rel=0.25) # The circular approximation is always conservative, so should yield a smaller value @@ -1180,26 +1165,25 @@ def test_number_effective_samples(self) -> None: "of the shortest correlation range, which might result in large memory usage.", ): xdem.spatialstats.number_effective_samples( - area=outlines_brom, params_variogram_model=params_variogram_model + area=outlines_brom, params_variogram_model=params_variogram_model, ) with pytest.raises(ValueError, match="Area must be a float, integer, Vector subclass or geopandas dataframe."): xdem.spatialstats.number_effective_samples( - area="not supported", params_variogram_model=params_variogram_model + area="not supported", params_variogram_model=params_variogram_model, ) with pytest.raises(ValueError, match="The rasterize resolution must be a float, integer or Raster subclass."): xdem.spatialstats.number_effective_samples( - area=outlines_brom, params_variogram_model=params_variogram_model, rasterize_resolution=(10, 10) + area=outlines_brom, params_variogram_model=params_variogram_model, rasterize_resolution=(10, 10), ) def test_spatial_error_propagation(self) -> None: """Test that the spatial error propagation wrapper function runs properly""" - # Load the error map from TestBinning - errors = Raster(os.path.join(examples._EXAMPLES_DIRECTORY, "dh_error.tif")) + errors = Raster(os.path.join(examples._EXAMPLES_DIRECTORY, "dh_error.tif")) # noqa: SLF001 # Load the spatial correlation from TestVariogram params_variogram_model = pd.read_csv( - os.path.join(examples._EXAMPLES_DIRECTORY, "df_variogram_model_params.csv"), index_col=None + os.path.join(examples._EXAMPLES_DIRECTORY, "df_variogram_model_params.csv"), index_col=None, # noqa: SLF001 ) # Run the function with vector areas @@ -1219,7 +1203,7 @@ def test_spatial_error_propagation(self) -> None: # Run the function with numeric areas (sum needed for Medalsbreen that has two separate polygons) areas_numeric = [np.sum(area_vec.area.values) for area_vec in areas_vector] list_stderr = xdem.spatialstats.spatial_error_propagation( - areas=areas_numeric, errors=errors, params_variogram_model=params_variogram_model + areas=areas_numeric, errors=errors, params_variogram_model=params_variogram_model, ) # Check that the outputs are consistent: the numeric method should always give a neff that is almost the same @@ -1231,10 +1215,9 @@ def test_spatial_error_propagation(self) -> None: class TestSubSampling: def test_circular_masking(self) -> None: """Test that the circular masking works as intended""" - # using default (center should be [2,2], radius 2) - circ = xdem.spatialstats._create_circular_mask((5, 5)) - circ2 = xdem.spatialstats._create_circular_mask((5, 5), center=(2, 2), radius=2) + circ = xdem.spatialstats._create_circular_mask((5, 5)) # noqa: SLF001 + circ2 = xdem.spatialstats._create_circular_mask((5, 5), center=(2, 2), radius=2) # noqa: SLF001 # check default center and radius are derived properly assert np.array_equal(circ, circ2) @@ -1247,30 +1230,29 @@ def test_circular_masking(self) -> None: # check distance is not a multiple of pixels (more accurate subsampling) # will create a 1-pixel mask around the center - circ3 = xdem.spatialstats._create_circular_mask((5, 5), center=(1, 1), radius=1) + circ3 = xdem.spatialstats._create_circular_mask((5, 5), center=(1, 1), radius=1) # noqa: SLF001 eq_circ3 = np.zeros((5, 5), dtype=bool) eq_circ3[1, 1] = True assert np.array_equal(circ3, eq_circ3) # will create a square mask (<1.5 pixel) around the center - circ4 = xdem.spatialstats._create_circular_mask((5, 5), center=(1, 1), radius=1.5) + circ4 = xdem.spatialstats._create_circular_mask((5, 5), center=(1, 1), radius=1.5) # noqa: SLF001 # should not be the same as radius = 1 assert not np.array_equal(circ3, circ4) def test_ring_masking(self) -> None: """Test that the ring masking works as intended""" - # by default, the mask is only an outside circle (ring of size 0) - ring1 = xdem.spatialstats._create_ring_mask((5, 5)) - circ1 = xdem.spatialstats._create_circular_mask((5, 5)) + ring1 = xdem.spatialstats._create_ring_mask((5, 5)) # noqa: SLF001 + circ1 = xdem.spatialstats._create_circular_mask((5, 5)) # noqa: SLF001 assert np.array_equal(ring1, circ1) # test rings with different inner radius - ring2 = xdem.spatialstats._create_ring_mask((5, 5), in_radius=1, out_radius=2) - ring3 = xdem.spatialstats._create_ring_mask((5, 5), in_radius=0, out_radius=2) - ring4 = xdem.spatialstats._create_ring_mask((5, 5), in_radius=1.5, out_radius=2) + ring2 = xdem.spatialstats._create_ring_mask((5, 5), in_radius=1, out_radius=2) # noqa: SLF001 + ring3 = xdem.spatialstats._create_ring_mask((5, 5), in_radius=0, out_radius=2) # noqa: SLF001 + ring4 = xdem.spatialstats._create_ring_mask((5, 5), in_radius=1.5, out_radius=2) # noqa: SLF001 assert np.logical_and(~np.array_equal(ring2, ring3), ~np.array_equal(ring3, ring4)) @@ -1284,14 +1266,13 @@ def test_ring_masking(self) -> None: class TestPatchesMethod: def test_patches_method_loop_quadrant(self) -> None: """Check that the patches method with quadrant loops (vectorized=False) functions correctly""" - diff, mask = load_ref_and_diff()[1:3] gsd = diff.res[0] area = 100000 # Check the patches method runs - df, df_full = xdem.spatialstats.patches_method( + df_patches, df_full = xdem.spatialstats.patches_method( diff, unstable_mask=mask, gsd=gsd, @@ -1303,13 +1284,13 @@ def test_patches_method_loop_quadrant(self) -> None: ) # First, the summary dataframe - assert df.shape == (1, 4) - assert all(df.columns == ["nmad", "nb_indep_patches", "exact_areas", "areas"]) + assert df_patches.shape == (1, 4) + assert all(df_patches.columns == ["nmad", "nb_indep_patches", "exact_areas", "areas"]) # Check the sampling is fixed for a random state - # assert df["nmad"][0] == pytest.approx(1.8401465163449207, abs=1e-3) - assert df["nb_indep_patches"][0] == 100 - assert df["exact_areas"][0] == pytest.approx(df["areas"][0], rel=0.2) + # assert df_patches["nmad"][0] == pytest.approx(1.8401465163449207, abs=1e-3) + assert df_patches["nb_indep_patches"][0] == 100 + assert df_patches["exact_areas"][0] == pytest.approx(df_patches["areas"][0], rel=0.2) # Then, the full dataframe assert df_full.shape == (100, 5) @@ -1322,14 +1303,13 @@ def test_patches_method_loop_quadrant(self) -> None: def test_patches_method_convolution(self) -> None: """Check that the patches method with convolution (vectorized=True) functions correctly""" - diff, mask = load_ref_and_diff()[1:3] gsd = diff.res[0] area = 100000 # First, the patches method runs with scipy - df = xdem.spatialstats.patches_method( + df_patches = xdem.spatialstats.patches_method( diff, unstable_mask=mask, gsd=gsd, @@ -1339,12 +1319,12 @@ def test_patches_method_convolution(self) -> None: convolution_method="scipy", ) - assert df.shape == (2, 4) - assert all(df.columns == ["nmad", "nb_indep_patches", "exact_areas", "areas"]) - assert df["exact_areas"][0] == pytest.approx(df["areas"][0], rel=0.2) + assert df_patches.shape == (2, 4) + assert all(df_patches.columns == ["nmad", "nb_indep_patches", "exact_areas", "areas"]) + assert df_patches["exact_areas"][0] == pytest.approx(df_patches["areas"][0], rel=0.2) # Second, with numba - # df, df_full = xdem.spatialstats.patches_method( + # df_patches, df_full = xdem.spatialstats.patches_method( # diff, # unstable_mask=mask.squeeze(), # gsd=gsd, @@ -1354,6 +1334,6 @@ def test_patches_method_convolution(self) -> None: # convolution_method='numba', # return_in_patch_statistics=True) # - # assert df.shape == (1, 4) - # assert all(df.columns == ['nmad', 'nb_indep_patches', 'exact_areas', 'areas']) - # assert df['exact_areas'][0] == pytest.approx(df['areas'][0], rel=0.2) + # assert df_patches.shape == (1, 4) + # assert all(df_patches.columns == ['nmad', 'nb_indep_patches', 'exact_areas', 'areas']) + # assert df_patches['exact_areas'][0] == pytest.approx(df_patches['areas'][0], rel=0.2) diff --git a/tests/test_terrain.py b/tests/test_terrain.py index 933ec224c..b2554bf7f 100644 --- a/tests/test_terrain.py +++ b/tests/test_terrain.py @@ -1,3 +1,5 @@ +"""Functions to test terrain attributes.""" + from __future__ import annotations import os @@ -77,22 +79,20 @@ class TestTerrainAttribute: ], ) # type: ignore def test_attribute_functions_against_gdaldem(self, attribute: str) -> None: - """ - Test that all attribute functions give the same results as those of GDALDEM within a small tolerance. + """Test that all attribute functions give the same results as those of GDALDEM within a small tolerance. :param attribute: The attribute to test (e.g. 'slope') """ - functions = { "slope_Horn": lambda dem: xdem.terrain.slope(dem.data, resolution=dem.res, degrees=True), "aspect_Horn": lambda dem: xdem.terrain.aspect(dem.data, degrees=True), "hillshade_Horn": lambda dem: xdem.terrain.hillshade(dem.data, resolution=dem.res), "slope_Zevenberg": lambda dem: xdem.terrain.slope( - dem.data, resolution=dem.res, method="ZevenbergThorne", degrees=True + dem.data, resolution=dem.res, method="ZevenbergThorne", degrees=True, ), "aspect_Zevenberg": lambda dem: xdem.terrain.aspect(dem.data, method="ZevenbergThorne", degrees=True), "hillshade_Zevenberg": lambda dem: xdem.terrain.hillshade( - dem.data, resolution=dem.res, method="ZevenbergThorne" + dem.data, resolution=dem.res, method="ZevenbergThorne", ), "tri_Riley": lambda dem: xdem.terrain.terrain_ruggedness_index(dem.data, method="Riley"), "tri_Wilson": lambda dem: xdem.terrain.terrain_ruggedness_index(dem.data, method="Wilson"), @@ -183,13 +183,13 @@ def test_attribute_functions_against_gdaldem(self, attribute: str) -> None: "attribute", ["slope_Horn", "aspect_Horn", "hillshade_Horn", "curvature", "profile_curvature", "planform_curvature"], ) # type: ignore - def test_attribute_functions_against_richdem(self, attribute: str, get_terrain_attribute_richdem) -> None: - """ - Test that all attribute functions give the same results as those of RichDEM within a small tolerance. + def test_attribute_functions_against_richdem(self, + attribute: str, + get_terrain_attribute_richdem) -> None: # noqa: ANN001 + """Test that all attribute functions give the same results as those of RichDEM within a small tolerance. :param attribute: The attribute to test (e.g. 'slope') """ - # Functions for xdem-implemented methods functions_xdem = { "slope_Horn": lambda dem: xdem.terrain.slope(dem, resolution=dem.res, degrees=True), @@ -208,7 +208,7 @@ def test_attribute_functions_against_richdem(self, attribute: str, get_terrain_a "curvature": lambda dem: get_terrain_attribute_richdem(dem, attribute="curvature"), "profile_curvature": lambda dem: get_terrain_attribute_richdem(dem, attribute="profile_curvature"), "planform_curvature": lambda dem: get_terrain_attribute_richdem( - dem, attribute="planform_curvature", degrees=True + dem, attribute="planform_curvature", degrees=True, ), } @@ -283,7 +283,6 @@ def test_hillshade_errors(self) -> None: def test_hillshade(self) -> None: """Test hillshade-specific settings.""" - zfactor_1 = xdem.terrain.hillshade(self.dem.data, resolution=self.dem.res, z_factor=1.0) zfactor_10 = xdem.terrain.hillshade(self.dem.data, resolution=self.dem.res, z_factor=10.0) @@ -297,17 +296,16 @@ def test_hillshade(self) -> None: assert np.nanmean(low_altitude) < np.nanmean(high_altitude) @pytest.mark.parametrize( - "name", ["curvature", "planform_curvature", "profile_curvature", "maximum_curvature"] + "name", ["curvature", "planform_curvature", "profile_curvature", "maximum_curvature"], ) # type: ignore def test_curvatures(self, name: str) -> None: """Test the curvature functions""" - # Copy the DEM to ensure that the inter-test state is unchanged, and because the mask will be modified. dem = self.dem.copy() # Derive curvature without any gaps curvature = xdem.terrain.get_terrain_attribute( - dem.data, attribute=name, resolution=dem.res, edge_method="nearest" + dem.data, attribute=name, resolution=dem.res, edge_method="nearest", ) # Validate that the array has the same shape as the input and that all values are finite. @@ -332,14 +330,13 @@ def test_curvatures(self, name: str) -> None: def test_get_terrain_attribute(self) -> None: """Test the get_terrain_attribute function by itself.""" - # Validate that giving only one terrain attribute only returns that, and not a list of len() == 1 slope = xdem.terrain.get_terrain_attribute(self.dem.data, "slope", resolution=self.dem.res) assert isinstance(slope, np.ndarray) # Create three products at the same time slope2, _, hillshade = xdem.terrain.get_terrain_attribute( - self.dem.data, ["slope", "aspect", "hillshade"], resolution=self.dem.res + self.dem.data, ["slope", "aspect", "hillshade"], resolution=self.dem.res, ) # Create a hillshade using its own function @@ -355,24 +352,23 @@ def test_get_terrain_attribute(self) -> None: def test_get_terrain_attribute_errors(self) -> None: """Test the get_terrain_attribute function raises appropriate errors.""" - # Below, re.escape() is needed to match expressions that have special characters (e.g., parenthesis, bracket) with pytest.raises( ValueError, match=re.escape( - "Slope method 'DoesNotExist' is not supported. Must be one of: " "['Horn', 'ZevenbergThorne']" + "Slope method 'DoesNotExist' is not supported. Must be one of: ['Horn', 'ZevenbergThorne']", ), ): xdem.terrain.slope(self.dem.data, method="DoesNotExist") with pytest.raises( ValueError, - match=re.escape("TRI method 'DoesNotExist' is not supported. Must be one of: " "['Riley', 'Wilson']"), + match=re.escape("TRI method 'DoesNotExist' is not supported. Must be one of: ['Riley', 'Wilson']"), ): xdem.terrain.terrain_ruggedness_index(self.dem.data, method="DoesNotExist") def test_raster_argument(self) -> None: - + """Test raster argument.""" slope, aspect = xdem.terrain.get_terrain_attribute(self.dem, attribute=["slope", "aspect"]) assert slope != aspect @@ -384,11 +380,9 @@ def test_raster_argument(self) -> None: assert slope.crs == self.dem.crs == aspect.crs def test_rugosity_jenness(self) -> None: - """ - Test the rugosity with the same example as in Jenness (2004), + """Test the rugosity with the same example as in Jenness (2004), https://doi.org/10.2193/0091-7648(2004)032[0829:CLSAFD]2.0.CO;2. """ - # Derive rugosity from the function dem = np.array([[190, 170, 155], [183, 165, 145], [175, 160, 122]], dtype="float32") @@ -406,7 +400,6 @@ def test_rugosity_jenness(self) -> None: @pytest.mark.parametrize("resolution", np.linspace(0.01, 100, 10)) # type: ignore def test_rugosity_simple_cases(self, dh: float, resolution: float) -> None: """Test the rugosity calculation for simple cases.""" - # We here check the value for a fully symmetric case: the rugosity calculation can be simplified because all # eight triangles have the same surface area, see Jenness (2004). @@ -424,21 +417,20 @@ def test_rugosity_simple_cases(self, dh: float, resolution: float) -> None: # Formula for area A of one triangle s = (side1 + side2 + side3) / 2.0 - A = np.sqrt(s * (s - side1) * (s - side2) * (s - side3)) + a = np.sqrt(s * (s - side1) * (s - side2) * (s - side3)) # We sum the area of the eight triangles, and divide by the planimetric area (resolution squared) - r = 8 * A / (resolution**2) + r = 8 * a / (resolution**2) # Check rugosity value is valid assert r == pytest.approx(rugosity[1, 1], rel=10 ** (-6)) def test_get_quadric_coefficients(self) -> None: """Test the outputs and exceptions of the get_quadric_coefficients() function.""" - dem = np.array([[1, 1, 1], [1, 2, 1], [1, 1, 1]], dtype="float32") coefficients = xdem.terrain.get_quadric_coefficients( - dem, resolution=1.0, edge_method="nearest", make_rugosity=True + dem, resolution=1.0, edge_method="nearest", make_rugosity=True, ) # Check all coefficients are finite with an edge method diff --git a/tests/test_vcrs.py b/tests/test_vcrs.py index 8d0dc0170..245394ac4 100644 --- a/tests/test_vcrs.py +++ b/tests/test_vcrs.py @@ -5,7 +5,7 @@ import pathlib import re import warnings -from typing import Any +from typing import Any, ClassVar import numpy as np import pytest @@ -18,13 +18,12 @@ class TestVCRS: def test_parse_vcrs_name_from_product(self) -> None: """Test parsing of vertical CRS name from DEM product name.""" - # Check that the value for the key is returned by the function - for product in xdem.vcrs.vcrs_dem_products.keys(): - assert xdem.vcrs._parse_vcrs_name_from_product(product) == xdem.vcrs.vcrs_dem_products[product] + for product in xdem.vcrs.vcrs_dem_products: + assert xdem.vcrs._parse_vcrs_name_from_product(product) == xdem.vcrs.vcrs_dem_products[product] # noqa: SLF001 # And that, otherwise, it's a None - assert xdem.vcrs._parse_vcrs_name_from_product("BESTDEM") is None + assert xdem.vcrs._parse_vcrs_name_from_product("BESTDEM") is None # noqa: SLF001 # Expect outputs for the inputs @pytest.mark.parametrize( @@ -39,12 +38,11 @@ def test_parse_vcrs_name_from_product(self) -> None: ) # type: ignore def test_vcrs_from_crs(self, input_output: tuple[CRS, CRS]) -> None: """Test the extraction of a vertical CRS from a CRS.""" - - input = input_output[0] + input = input_output[0] # noqa: A001 output = input_output[1] # Extract vertical CRS from CRS - vcrs = xdem.vcrs._vcrs_from_crs(crs=input) + vcrs = xdem.vcrs._vcrs_from_crs(crs=input) # noqa: SLF001 # Check that the result is as expected if isinstance(output, CRS): @@ -67,45 +65,42 @@ def test_vcrs_from_crs(self, input_output: tuple[CRS, CRS]) -> None: ) # type: ignore def test_vcrs_from_user_input(self, vcrs_input: str | pathlib.Path | int | CRS) -> None: """Tests the function _vcrs_from_user_input for varying user inputs, for which it will return a CRS.""" - # Most grids aren't going to be downloaded, so this warning can be raised warnings.filterwarnings("ignore", category=UserWarning, message="Grid not found in *") # Get user input - vcrs = xdem.dem._vcrs_from_user_input(vcrs_input) + vcrs = xdem.dem._vcrs_from_user_input(vcrs_input) # noqa: SLF001 # Check output type assert isinstance(vcrs, CRS) assert vcrs.is_vertical @pytest.mark.parametrize( - "vcrs_input", ["Ellipsoid", "ellipsoid", "wgs84", 4326, 4979, CRS.from_epsg(4326), CRS.from_epsg(4979)] + "vcrs_input", ["Ellipsoid", "ellipsoid", "wgs84", 4326, 4979, CRS.from_epsg(4326), CRS.from_epsg(4979)], ) # type: ignore def test_vcrs_from_user_input__ellipsoid(self, vcrs_input: str | int) -> None: """Tests the function _vcrs_from_user_input for inputs where it returns "Ellipsoid".""" - # Get user input - vcrs = xdem.vcrs._vcrs_from_user_input(vcrs_input) + vcrs = xdem.vcrs._vcrs_from_user_input(vcrs_input) # noqa: SLF001 # Check output type assert vcrs == "Ellipsoid" def test_vcrs_from_user_input__errors(self) -> None: """Tests errors of vcrs_from_user_input.""" - # Check that an error is raised when the type is wrong with pytest.raises(TypeError, match="New vertical CRS must be a string, path or VerticalCRS, received.*"): - xdem.vcrs._vcrs_from_user_input(np.zeros(1)) # type: ignore + xdem.vcrs._vcrs_from_user_input(np.zeros(1)) # noqa: SLF001 # Check that an error is raised if the CRS is not vertical with pytest.raises( ValueError, match=re.escape( "New vertical CRS must have a vertical axis, 'WGS 84 / UTM " - "zone 1N' does not (check with `CRS.is_vertical`)." + "zone 1N' does not (check with `CRS.is_vertical`).", ), ): - xdem.vcrs._vcrs_from_user_input(32601) + xdem.vcrs._vcrs_from_user_input(32601) # noqa: SLF001 # Check that a warning is raised if the CRS has other dimensions than vertical with pytest.warns( @@ -113,38 +108,36 @@ def test_vcrs_from_user_input__errors(self) -> None: match="New vertical CRS has a vertical dimension but also other components, " "extracting the vertical reference only.", ): - xdem.vcrs._vcrs_from_user_input(CRS("EPSG:4326+5773")) + xdem.vcrs._vcrs_from_user_input(CRS("EPSG:4326+5773")) # noqa: SLF001 @pytest.mark.parametrize( - "grid", ["us_noaa_geoid06_ak.tif", "is_lmi_Icegeoid_ISN93.tif", "us_nga_egm08_25.tif", "us_nga_egm96_15.tif"] + "grid", ["us_noaa_geoid06_ak.tif", "is_lmi_Icegeoid_ISN93.tif", "us_nga_egm08_25.tif", "us_nga_egm96_15.tif"], ) # type: ignore def test_build_vcrs_from_grid(self, grid: str) -> None: """Test that vertical CRS are correctly built from grid""" - # Most grids aren't going to be downloaded, so this warning can be raised warnings.filterwarnings("ignore", category=UserWarning, message="Grid not found in *") # Build vertical CRS - vcrs = xdem.vcrs._build_vcrs_from_grid(grid=grid) + vcrs = xdem.vcrs._build_vcrs_from_grid(grid=grid) # noqa: SLF001 assert vcrs.is_vertical # Check that the explicit construction yields the same CRS as "the old init way" (see function description) - vcrs_oldway = xdem.vcrs._build_vcrs_from_grid(grid=grid, old_way=True) + vcrs_oldway = xdem.vcrs._build_vcrs_from_grid(grid=grid, old_way=True) # noqa: SLF001 assert vcrs.equals(vcrs_oldway) # Test for WGS84 in 2D and 3D, UTM, CompoundCRS, everything should work @pytest.mark.parametrize( - "crs", [CRS("EPSG:4326"), CRS("EPSG:4979"), CRS("32610"), CRS("EPSG:4326+5773")] + "crs", [CRS("EPSG:4326"), CRS("EPSG:4979"), CRS("32610"), CRS("EPSG:4326+5773")], ) # type: ignore @pytest.mark.parametrize("vcrs_input", [CRS("EPSG:5773"), "is_lmi_Icegeoid_ISN93.tif", "EGM96"]) # type: ignore def test_build_ccrs_from_crs_and_vcrs(self, crs: CRS, vcrs_input: CRS | str) -> None: """Test the function build_ccrs_from_crs_and_vcrs.""" - # Most grids aren't going to be downloaded, so this warning can be raised warnings.filterwarnings("ignore", category=UserWarning, message="Grid not found in *") # Get the vertical CRS from user input - vcrs = xdem.vcrs._vcrs_from_user_input(vcrs_input=vcrs_input) + vcrs = xdem.vcrs._vcrs_from_user_input(vcrs_input=vcrs_input) # noqa: SLF001 # Build the compound CRS @@ -155,7 +148,7 @@ def test_build_ccrs_from_crs_and_vcrs(self, crs: CRS, vcrs_input: CRS | str) -> # If the version is higher than 3.5.0, it should pass if Version(pyproj.__version__) > Version("3.5.0"): - ccrs = xdem.vcrs._build_ccrs_from_crs_and_vcrs(crs=crs, vcrs=vcrs) + ccrs = xdem.vcrs._build_ccrs_from_crs_and_vcrs(crs=crs, vcrs=vcrs) # noqa: SLF001 # Otherwise, it should raise an error else: with pytest.raises( @@ -164,33 +157,31 @@ def test_build_ccrs_from_crs_and_vcrs(self, crs: CRS, vcrs_input: CRS | str) -> "with a new vertical CRS. Update your dependencies or pass the 2D source CRS " "manually.", ): - xdem.vcrs._build_ccrs_from_crs_and_vcrs(crs=crs, vcrs=vcrs) - return None + xdem.vcrs._build_ccrs_from_crs_and_vcrs(crs=crs, vcrs=vcrs) # noqa: SLF001 + return # If the CRS is 2D, it should pass else: - ccrs = xdem.vcrs._build_ccrs_from_crs_and_vcrs(crs=crs, vcrs=vcrs) + ccrs = xdem.vcrs._build_ccrs_from_crs_and_vcrs(crs=crs, vcrs=vcrs) # noqa: SLF001 assert isinstance(ccrs, CRS) assert ccrs.is_vertical def test_build_ccrs_from_crs_and_vcrs__errors(self) -> None: """Test errors are correctly raised from the build_ccrs function.""" - with pytest.raises( - ValueError, match="Invalid vcrs given. Must be a vertical " "CRS or the literal string 'Ellipsoid'." + ValueError, match="Invalid vcrs given. Must be a vertical CRS or the literal string 'Ellipsoid'.", ): - xdem.vcrs._build_ccrs_from_crs_and_vcrs(crs=CRS("EPSG:4326"), vcrs="NotAVerticalCRS") # type: ignore + xdem.vcrs._build_ccrs_from_crs_and_vcrs(crs=CRS("EPSG:4326"), vcrs="NotAVerticalCRS") # noqa: SLF001 # Compare to manually-extracted shifts at specific coordinates for the geoid grids - egm96_chile = {"grid": "us_nga_egm96_15.tif", "lon": -68, "lat": -20, "shift": 42} - egm08_chile = {"grid": "us_nga_egm08_25.tif", "lon": -68, "lat": -20, "shift": 42} - geoid96_alaska = {"grid": "us_noaa_geoid06_ak.tif", "lon": -145, "lat": 62, "shift": 15} - isn93_iceland = {"grid": "is_lmi_Icegeoid_ISN93.tif", "lon": -18, "lat": 65, "shift": 68} + egm96_chile: ClassVar[dict] = {"grid": "us_nga_egm96_15.tif", "lon": -68, "lat": -20, "shift": 42} + egm08_chile: ClassVar[dict] = {"grid": "us_nga_egm08_25.tif", "lon": -68, "lat": -20, "shift": 42} + geoid96_alaska: ClassVar[dict] = {"grid": "us_noaa_geoid06_ak.tif", "lon": -145, "lat": 62, "shift": 15} + isn93_iceland: ClassVar[dict] = {"grid": "is_lmi_Icegeoid_ISN93.tif", "lon": -18, "lat": 65, "shift": 68} - @pytest.mark.parametrize("grid_shifts", [egm08_chile, egm08_chile, geoid96_alaska, isn93_iceland]) # type: ignore + @pytest.mark.parametrize("grid_shifts", [egm96_chile, egm08_chile, geoid96_alaska, isn93_iceland]) # type: ignore def test_transform_zz(self, grid_shifts: dict[str, Any]) -> None: """Tests grids to convert vertical CRS.""" - # Most grids aren't going to be downloaded, so this warning can be raised warnings.filterwarnings("ignore", category=UserWarning, message="Grid not found in *") @@ -199,14 +190,14 @@ def test_transform_zz(self, grid_shifts: dict[str, Any]) -> None: xx = grid_shifts["lon"] yy = grid_shifts["lat"] crs_from = CRS.from_epsg(4326) - ccrs_from = xdem.vcrs._build_ccrs_from_crs_and_vcrs(crs=crs_from, vcrs="Ellipsoid") + ccrs_from = xdem.vcrs._build_ccrs_from_crs_and_vcrs(crs=crs_from, vcrs="Ellipsoid") # noqa: SLF001 # Build the compound CRS - vcrs_to = xdem.vcrs._vcrs_from_user_input(vcrs_input=grid_shifts["grid"]) - ccrs_to = xdem.vcrs._build_ccrs_from_crs_and_vcrs(crs=crs_from, vcrs=vcrs_to) + vcrs_to = xdem.vcrs._vcrs_from_user_input(vcrs_input=grid_shifts["grid"]) # noqa: SLF001 + ccrs_to = xdem.vcrs._build_ccrs_from_crs_and_vcrs(crs=crs_from, vcrs=vcrs_to) # noqa: SLF001 # Apply the transformation - zz_trans = xdem.vcrs._transform_zz(crs_from=ccrs_from, crs_to=ccrs_to, xx=xx, yy=yy, zz=zz) + zz_trans = xdem.vcrs._transform_zz(crs_from=ccrs_from, crs_to=ccrs_to, xx=xx, yy=yy, zz=zz) # noqa: SLF001 # Compare the elevation difference z_diff = 100 - zz_trans diff --git a/tests/test_volume.py b/tests/test_volume.py index da6ae9186..7eb50ee51 100644 --- a/tests/test_volume.py +++ b/tests/test_volume.py @@ -33,7 +33,7 @@ def test_bin_ddem(self) -> None: ) ddem_stds = xdem.volume.hypsometric_binning( - ddem[self.mask], self.dem_2009[self.mask], aggregation_function=np.std + ddem[self.mask], self.dem_2009[self.mask], aggregation_function=np.std, ) assert ddem_stds["value"].mean() < 50 assert np.abs(np.mean(ddem_bins["value"] - ddem_bins_masked["value"])) < 0.01 @@ -61,7 +61,6 @@ def test_interpolate_ddem_bins(self) -> None: def test_area_calculation(self) -> None: """Test the area calculation function.""" - ddem = self.dem_2009 - self.dem_1990 ddem_bins = xdem.volume.hypsometric_binning(ddem[self.mask], self.dem_2009[self.mask]) @@ -71,12 +70,12 @@ def test_area_calculation(self) -> None: # Test the area calculation with normal parameters. bin_area = xdem.volume.calculate_hypsometry_area( - ddem_bins, self.dem_2009[self.mask], pixel_size=self.dem_2009.res[0] + ddem_bins, self.dem_2009[self.mask], pixel_size=self.dem_2009.res[0], ) # Test area calculation with differing pixel x/y resolution. xdem.volume.calculate_hypsometry_area( - ddem_bins, self.dem_2009[self.mask], pixel_size=(self.dem_2009.res[0], self.dem_2009.res[0] + 1) + ddem_bins, self.dem_2009[self.mask], pixel_size=(self.dem_2009.res[0], self.dem_2009.res[0] + 1), ) # Add some nans to the reference DEM @@ -93,7 +92,7 @@ def test_area_calculation(self) -> None: # Try to pass an incorrect timeframe= parameter try: xdem.volume.calculate_hypsometry_area( - ddem_bins, self.dem_2009[self.mask], pixel_size=self.dem_2009.res[0], timeframe="blergh" + ddem_bins, self.dem_2009[self.mask], pixel_size=self.dem_2009.res[0], timeframe="blergh", ) except ValueError as exception: if "Argument 'timeframe=blergh' is invalid" not in str(exception): @@ -118,13 +117,13 @@ def test_ddem_bin_methods(self) -> None: # Make a fixed amount of bins equal_count_bins = xdem.volume.hypsometric_binning( - ddem[self.mask], self.dem_2009[self.mask], bins=50, kind="count" + ddem[self.mask], self.dem_2009[self.mask], bins=50, kind="count", ) assert equal_count_bins.shape[0] == 50 # Make 50 bins with approximately the same area (pixel count) quantile_bins = xdem.volume.hypsometric_binning( - ddem[self.mask], self.dem_2009[self.mask], bins=50, kind="quantile" + ddem[self.mask], self.dem_2009[self.mask], bins=50, kind="quantile", ) assert quantile_bins.shape[0] == 50 @@ -152,9 +151,9 @@ class TestNormHypsometric: @pytest.mark.parametrize("n_bins", [5, 10, 20]) # type: ignore def test_regional_signal(self, n_bins: int) -> None: - + """Test regional signal.""" signal = xdem.volume.get_regional_hypsometric_signal( - ddem=self.ddem, ref_dem=self.dem_2009, glacier_index_map=self.glacier_index_map, n_bins=n_bins + ddem=self.ddem, ref_dem=self.dem_2009, glacier_index_map=self.glacier_index_map, n_bins=n_bins, ) assert signal["w_mean"].min() >= 0.0 @@ -165,13 +164,13 @@ def test_regional_signal(self, n_bins: int) -> None: assert np.all(np.isfinite(signal.values)) def test_interpolate_small(self) -> None: - + """Test that no interpolation is done when the coverage is too small.""" dem = np.arange(16, dtype="float32").reshape(4, 4) ddem = dem / 10 glacier_index_map = np.ones_like(dem) signal = xdem.volume.get_regional_hypsometric_signal( - ddem=ddem, ref_dem=dem, glacier_index_map=glacier_index_map, n_bins=10 + ddem=ddem, ref_dem=dem, glacier_index_map=glacier_index_map, n_bins=10, ) # Make it so that only 1/4 of the values exist. @@ -205,19 +204,21 @@ def test_interpolate_small(self) -> None: assert np.nanmax(np.abs((interpolated_ddem - ddem_orig)[np.isnan(ddem)])) < 0.1 def test_regional_hypsometric_interp(self) -> None: - + """Test regional hypsometric interpolation.""" # Extract a normalized regional hypsometric signal. ddem = self.dem_2009 - self.dem_1990 signal = xdem.volume.get_regional_hypsometric_signal( - ddem=self.ddem, ref_dem=self.dem_2009, glacier_index_map=self.glacier_index_map + ddem=self.ddem, ref_dem=self.dem_2009, glacier_index_map=self.glacier_index_map, ) if False: import matplotlib.pyplot as plt plt.fill_between( - signal.index.mid, signal["median"] - signal["std"], signal["median"] + signal["std"], label="Median±std" + signal.index.mid, + signal["median"] - signal["std"], signal["median"] + signal["std"], + label="Median±std", ) plt.plot(signal.index.mid, signal["median"], color="black", linestyle=":", label="Median") plt.plot(signal.index.mid, signal["w_mean"], color="black", label="Weighted mean") @@ -234,11 +235,11 @@ def test_regional_hypsometric_interp(self) -> None: ddem.data.mask.ravel()[rng.choice(ddem.data.size, int(ddem.data.size * 0.80), replace=False)] = True # Fill the dDEM using the de-normalized signal. filled_ddem = xdem.volume.norm_regional_hypsometric_interpolation( - voided_ddem=ddem, ref_dem=self.dem_2009, glacier_index_map=self.glacier_index_map + voided_ddem=ddem, ref_dem=self.dem_2009, glacier_index_map=self.glacier_index_map, ) # Fill the dDEM using the de-normalized signal and create an idealized dDEM idealized_ddem = xdem.volume.norm_regional_hypsometric_interpolation( - voided_ddem=ddem, ref_dem=self.dem_2009, glacier_index_map=self.glacier_index_map, idealized_ddem=True + voided_ddem=ddem, ref_dem=self.dem_2009, glacier_index_map=self.glacier_index_map, idealized_ddem=True, ) assert not np.array_equal(filled_ddem, idealized_ddem) diff --git a/xdem/__init__.py b/xdem/__init__.py index fc94abc2a..44e16a2b8 100644 --- a/xdem/__init__.py +++ b/xdem/__init__.py @@ -15,6 +15,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +"""xDEM module init file.""" from xdem import ( # noqa coreg, @@ -38,5 +39,5 @@ "running from the source directory, please instead " "create a new virtual environment (using conda or " "virtualenv) and then install it in-place by running: " - "pip install -e ." + "pip install -e .", ) diff --git a/xdem/_typing.py b/xdem/_typing.py index 4fb201e0e..a859427f7 100644 --- a/xdem/_typing.py +++ b/xdem/_typing.py @@ -24,7 +24,7 @@ import numpy as np # Only for Python >= 3.9 -if sys.version_info.minor >= 9: +if sys.version_info.minor >= 9: # noqa from numpy.typing import NDArray # this syntax works starting on Python 3.9 diff --git a/xdem/coreg/__init__.py b/xdem/coreg/__init__.py index a20d8699e..a1a34a8b0 100644 --- a/xdem/coreg/__init__.py +++ b/xdem/coreg/__init__.py @@ -16,8 +16,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -""" -DEM coregistration classes and functions, including affine methods, bias corrections (i.e. non-affine) and filters. +"""DEM coregistration classes and functions, including affine methods, +bias corrections (i.e. non-affine) and filters. """ from xdem.coreg.affine import ( # noqa diff --git a/xdem/coreg/affine.py b/xdem/coreg/affine.py index fd69eecbb..b4429852e 100644 --- a/xdem/coreg/affine.py +++ b/xdem/coreg/affine.py @@ -22,7 +22,8 @@ import logging import warnings -from typing import Any, Callable, Iterable, Literal, TypeVar +from collections.abc import Callable, Iterable +from typing import Any, Literal, TypeVar import xdem.coreg.base @@ -80,18 +81,16 @@ def _check_inputs_bin_before_fit( bin_sizes: int | dict[str, int | Iterable[float]], bin_statistic: Callable[[NDArrayf], np.floating[Any]], ) -> None: - """ - Check input types of fit or bin_and_fit affine functions. + """Check input types of fit or bin_and_fit affine functions. :param bin_before_fit: Whether to bin data before fitting the coregistration function. :param fit_optimizer: Optimizer to minimize the coregistration function. :param bin_sizes: Size (if integer) or edges (if iterable) for binning variables later passed in .fit(). :param bin_statistic: Statistic of central tendency (e.g., mean) to apply during the binning. """ - if not callable(fit_optimizer): raise TypeError( - "Argument `fit_optimizer` must be a function (callable), " "got {}.".format(type(fit_optimizer)) + f"Argument `fit_optimizer` must be a function (callable), got {type(fit_optimizer)}.", ) if bin_before_fit: @@ -103,12 +102,12 @@ def _check_inputs_bin_before_fit( ): raise TypeError( "Argument `bin_sizes` must be an integer, or a dictionary of integers or iterables, " - "got {}.".format(type(bin_sizes)) + f"got {type(bin_sizes)}.", ) if not callable(bin_statistic): raise TypeError( - "Argument `bin_statistic` must be a function (callable), " "got {}.".format(type(bin_statistic)) + f"Argument `bin_statistic` must be a function (callable), got {type(bin_statistic)}.", ) @@ -119,8 +118,8 @@ def _iterate_method( tolerance: float, max_iterations: int, ) -> Any: - """ - Function to iterate a method (e.g. ICP, Nuth and Kääb) until it reaches a tolerance or maximum number of iterations. + """Function to iterate a method (e.g. ICP, Nuth and Kääb) until it reaches + a tolerance or maximum number of iterations. :param method: Method that needs to be iterated to derive a transformation. Take argument "inputs" as its input, and outputs three terms: a "statistic" to compare to tolerance, "updated inputs" with this transformation, and @@ -132,7 +131,6 @@ def _iterate_method( :return: Final output of iterated method. """ - # Initiate inputs new_inputs = iterating_input @@ -167,21 +165,19 @@ def _subsample_on_mask_interpolator( area_or_point: Literal["Area", "Point"] | None, z_name: str, ) -> tuple[Callable[[float, float], NDArrayf], None | dict[str, NDArrayf]]: - """ - Mirrors coreg.base._subsample_on_mask, but returning an interpolator of elevation difference and subsampled + """Mirrors coreg.base._subsample_on_mask, but returning an interpolator of elevation difference and subsampled coordinates for efficiency in iterative affine methods. Perform subsampling on mask for raster-raster or point-raster datasets on valid points of all inputs (including potential auxiliary variables), returning coordinates along with an interpolator. """ - # For two rasters if isinstance(ref_elev, np.ndarray) and isinstance(tba_elev, np.ndarray): # Derive coordinates and interpolator coords = _coords(transform=transform, shape=ref_elev.shape, area_or_point=area_or_point, grid=True) tba_elev_interpolator = _reproject_horizontal_shift_samecrs( - tba_elev, src_transform=transform, return_interpolator=True + tba_elev, src_transform=transform, return_interpolator=True, ) # Subsample coordinates @@ -189,7 +185,6 @@ def _subsample_on_mask_interpolator( def sub_dh_interpolator(shift_x: float, shift_y: float) -> NDArrayf: """Elevation difference interpolator for shifted coordinates of the subsample.""" - # TODO: Align array axes in _reproject_horizontal... ? # Get interpolator of dh for shifted coordinates; Y and X are inverted here due to raster axes return ref_elev[sub_mask] - tba_elev_interpolator((sub_coords[1] + shift_y, sub_coords[0] + shift_x)) @@ -197,7 +192,7 @@ def sub_dh_interpolator(shift_x: float, shift_y: float) -> NDArrayf: # Subsample auxiliary variables with the mask if aux_vars is not None: sub_bias_vars = {} - for var in aux_vars.keys(): + for var in aux_vars: sub_bias_vars[var] = aux_vars[var][sub_mask] else: sub_bias_vars = None @@ -227,26 +222,24 @@ def sub_dh_interpolator(shift_x: float, shift_y: float) -> NDArrayf: def sub_dh_interpolator(shift_x: float, shift_y: float) -> NDArrayf: """Elevation difference interpolator for shifted coordinates of the subsample.""" - # Always return ref minus tba if ref == "point": return pts_elev[z_name][sub_mask].values - rst_elev_interpolator( - (sub_coords[1] + shift_y, sub_coords[0] + shift_x) + (sub_coords[1] + shift_y, sub_coords[0] + shift_x), ) # Also invert the shift direction on the raster interpolator, so that the shift is the same relative to # the reference (returns the right shift relative to the reference no matter if it is point or raster) - else: - return ( - rst_elev_interpolator((sub_coords[1] - shift_y, sub_coords[0] - shift_x)) - - pts_elev[z_name][sub_mask].values - ) + return ( + rst_elev_interpolator((sub_coords[1] - shift_y, sub_coords[0] - shift_x)) + - pts_elev[z_name][sub_mask].values + ) # Interpolate arrays of bias variables to the subsample point coordinates if aux_vars is not None: sub_bias_vars = {} - for var in aux_vars.keys(): + for var in aux_vars: sub_bias_vars[var] = _interp_points( - array=aux_vars[var], transform=transform, points=sub_coords, area_or_point=area_or_point + array=aux_vars[var], transform=transform, points=sub_coords, area_or_point=area_or_point, ) else: sub_bias_vars = None @@ -264,8 +257,8 @@ def _preprocess_pts_rst_subsample_interpolator( z_name: str, aux_vars: None | dict[str, NDArrayf] = None, ) -> tuple[Callable[[float, float], NDArrayf], None | dict[str, NDArrayf], int]: - """ - Mirrors coreg.base._preprocess_pts_rst_subsample, but returning an interpolator for efficiency in iterative methods. + """Mirrors coreg.base._preprocess_pts_rst_subsample, + but returning an interpolator for efficiency in iterative methods. Pre-process raster-raster or point-raster datasets into an elevation difference interpolator at the same points, and subsample arrays for auxiliary variables, with subsampled coordinates to evaluate the interpolator. @@ -273,7 +266,6 @@ def _preprocess_pts_rst_subsample_interpolator( Returns dh interpolator, tuple of 1D arrays of subsampled coordinates, and dictionary of 1D arrays of subsampled auxiliary variables. """ - # Get subsample mask (a 2D array for raster-raster, a 1D array of length the point data for point-raster) sub_mask = _get_subsample_mask_pts_rst( params_random=params_random, @@ -313,8 +305,7 @@ def _preprocess_pts_rst_subsample_interpolator( def _nuth_kaab_fit_func(xx: NDArrayf, *params: tuple[float, float, float]) -> NDArrayf: - """ - Nuth and Kääb (2011) fitting function. + """Nuth and Kääb (2011) fitting function. Describes the elevation differences divided by the slope tangente (y) as a 1D function of the aspect. @@ -336,8 +327,7 @@ def _nuth_kaab_bin_fit( aspect: NDArrayf, params_fit_or_bin: InFitOrBinDict, ) -> tuple[float, float, float]: - """ - Optimize the Nuth and Kääb (2011) function based on observed values of elevation differences, slope tangent and + """Optimize the Nuth and Kääb (2011) function based on observed values of elevation differences, slope tangent and aspect at the same locations, using either fitting or binning + fitting. :param dh: 1D array of elevation differences (in georeferenced unit, typically meters). @@ -348,7 +338,6 @@ def _nuth_kaab_bin_fit( :returns: Optimized parameters of Nuth and Kääb (2011) fit function: easting, northing, and vertical offsets (in georeferenced unit). """ - # Slope tangents near zero were removed beforehand, so errors should never happen here with np.errstate(divide="ignore", invalid="ignore"): y = dh / slope_tan @@ -386,15 +375,13 @@ def _nuth_kaab_aux_vars( ref_elev: NDArrayf | gpd.GeoDataFrame, tba_elev: NDArrayf | gpd.GeoDataFrame, ) -> tuple[NDArrayf, NDArrayf]: - """ - Deriving slope tangent and aspect auxiliary variables expected by the Nuth and Kääb (2011) algorithm. + """Deriving slope tangent and aspect auxiliary variables expected by the Nuth and Kääb (2011) algorithm. :return: Slope tangent and aspect (radians). """ def _calculate_slope_and_aspect_nuthkaab(dem: NDArrayf) -> tuple[NDArrayf, NDArrayf]: - """ - Calculate the tangent of slope and aspect of a DEM, in radians, as needed for the Nuth & Kaab algorithm. + """Calculate the tangent of slope and aspect of a DEM, in radians, as needed for the Nuth & Kaab algorithm. For now, this method using the gradient is more efficient than slope/aspect derived in the terrain module. @@ -402,7 +389,6 @@ def _calculate_slope_and_aspect_nuthkaab(dem: NDArrayf) -> tuple[NDArrayf, NDArr :returns: The tangent of slope and aspect (in radians) of the DEM. """ - # Gradient implementation # # Calculate the gradient of the slope gradient_y, gradient_x = np.gradient(dem) @@ -424,11 +410,11 @@ def _calculate_slope_and_aspect_nuthkaab(dem: NDArrayf) -> tuple[NDArrayf, NDArr raise TypeError( "The Nuth and Kääb (2011) coregistration does not support two point clouds, one elevation " - "dataset in the pair must be a DEM." + "dataset in the pair must be a DEM.", ) # If inputs are both rasters, derive terrain attributes from ref and get 2D dh interpolator - elif isinstance(ref_elev, np.ndarray) and isinstance(tba_elev, np.ndarray): + if isinstance(ref_elev, np.ndarray) and isinstance(tba_elev, np.ndarray): # Derive slope and aspect from the reference as default slope_tan, aspect = _calculate_slope_and_aspect_nuthkaab(ref_elev) @@ -455,8 +441,7 @@ def _nuth_kaab_iteration_step( res: tuple[int, int], params_fit_bin: InFitOrBinDict, ) -> tuple[tuple[float, float, float], float]: - """ - Iteration step of Nuth and Kääb (2011), passed to the iterate_method function. + """Iteration step of Nuth and Kääb (2011), passed to the iterate_method function. Returns newly incremented coordinate offsets, and new statistic to compare to tolerance to reach. @@ -467,7 +452,6 @@ def _nuth_kaab_iteration_step( :param aspect: Array of aspect. :param res: Resolution of DEM. """ - # Calculate the elevation difference with offsets dh_step = dh_interpolator(coords_offsets[0], coords_offsets[1]) # Tests show that using the median vertical offset significantly speeds up the algorithm compared to @@ -482,7 +466,7 @@ def _nuth_kaab_iteration_step( raise ValueError( "The subsample contains no more valid values. This can happen is the horizontal shift to " "correct is very large, or if the algorithm diverged. To ensure all possible points can " - "be used at any iteration step, use subsample=1." + "be used at any iteration step, use subsample=1.", ) dh_step = dh_step[mask_valid] slope_tan = slope_tan[mask_valid] @@ -490,7 +474,7 @@ def _nuth_kaab_iteration_step( # Estimate the horizontal shift from the implementation by Nuth and Kääb (2011) easting_offset, northing_offset, _ = _nuth_kaab_bin_fit( - dh=dh_step, slope_tan=slope_tan, aspect=aspect, params_fit_or_bin=params_fit_bin + dh=dh_step, slope_tan=slope_tan, aspect=aspect, params_fit_or_bin=params_fit_bin, ) # Increment the offsets by the new offset @@ -519,11 +503,10 @@ def nuth_kaab( params_fit_or_bin: InFitOrBinDict, params_random: InRandomDict, z_name: str, - weights: NDArrayf | None = None, - **kwargs: Any, + weights: NDArrayf | None = None, # noqa: ARG001 + **kwargs: Any, # noqa: ARG001 ) -> tuple[tuple[float, float, float], int]: - """ - Nuth and Kääb (2011) iterative coregistration. + """Nuth and Kääb (2011) iterative coregistration. :return: Final estimated offset: east, north, vertical (in georeferenced units). """ @@ -534,7 +517,7 @@ def nuth_kaab( raise NotImplementedError( f"NuthKaab coregistration only works with a projected CRS, current CRS is {crs}. Reproject " f"your DEMs with DEM.reproject() in a local projected CRS such as UTM, that you can find " - f"using DEM.get_metric_crs()." + f"using DEM.get_metric_crs().", ) # First, derive auxiliary variables of Nuth and Kääb (slope tangent, and aspect) for any point-raster input @@ -586,15 +569,13 @@ def _dh_minimize_fit_func( coords_offsets: tuple[float, float], dh_interpolator: Callable[[float, float], NDArrayf], ) -> NDArrayf: - """ - Fitting function of dh minimization method, returns the NMAD of elevation differences. + """Fitting function of dh minimization method, returns the NMAD of elevation differences. :param coords_offsets: Coordinate offsets at this iteration (easting, northing) in georeferenced unit. :param dh_interpolator: Interpolator returning elevation differences at the subsampled points for a certain horizontal offset (see _preprocess_pts_rst_subsample_interpolator). :returns: NMAD of residuals. """ - # Calculate the elevation difference dh = dh_interpolator(coords_offsets[0], coords_offsets[1]).flatten() @@ -606,8 +587,7 @@ def _dh_minimize_fit( params_fit_or_bin: InFitOrBinDict, **kwargs: Any, ) -> tuple[float, float, float]: - """ - Optimize the statistical dispersion of the elevation differences residuals. + """Optimize the statistical dispersion of the elevation differences residuals. :param dh_interpolator: Interpolator returning elevation differences at the subsampled points for a certain horizontal offset (see _preprocess_pts_rst_subsample_interpolator). @@ -626,7 +606,7 @@ def fit_func(coords_offsets: tuple[float, float]) -> np.floating[Any]: # Default parameters depending on optimizer used if params_fit_or_bin["fit_minimizer"] == scipy.optimize.minimize: - if "method" not in kwargs.keys(): + if "method" not in kwargs: kwargs.update({"method": "Nelder-Mead"}) # This method has trouble when initialized with 0,0, so defaulting to 1,1 # (tip from Simon Gascoin: https://github.com/GlacioHack/xdem/pull/595#issuecomment-2387104719) @@ -634,9 +614,9 @@ def fit_func(coords_offsets: tuple[float, float]) -> np.floating[Any]: elif _HAS_NOISYOPT and params_fit_or_bin["fit_minimizer"] == minimizeCompass: kwargs.update({"errorcontrol": False}) - if "deltatol" not in kwargs.keys(): + if "deltatol" not in kwargs: kwargs.update({"deltatol": 0.004}) - if "feps" not in kwargs.keys(): + if "feps" not in kwargs: kwargs.update({"feps": 10e-5}) results = params_fit_or_bin["fit_minimizer"](fit_func, init_offsets, **kwargs) @@ -658,16 +638,14 @@ def dh_minimize( params_random: InRandomDict, params_fit_or_bin: InFitOrBinDict, z_name: str, - weights: NDArrayf | None = None, - **kwargs: Any, + weights: NDArrayf | None = None, # noqa: ARG001 + **kwargs: Any, # noqa: ARG001 ) -> tuple[tuple[float, float, float], int]: - """ - Elevation difference minimization coregistration method, for any point-raster or raster-raster input, + """Elevation difference minimization coregistration method, for any point-raster or raster-raster input, including subsampling and interpolation to the same points. :return: Final estimated offset: east, north, vertical (in georeferenced units). """ - logging.info("Running dh minimization coregistration.") # Perform preprocessing: subsampling and interpolation of inputs and auxiliary vars at same points @@ -704,12 +682,9 @@ def vertical_shift( vshift_reduc_func: Callable[[NDArrayf], np.floating[Any]], z_name: str, weights: NDArrayf | None = None, - **kwargs: Any, + **kwargs: Any, # noqa: ARG001 ) -> tuple[float, int]: - """ - Vertical shift coregistration, for any point-raster or raster-raster input, including subsampling. - """ - + """Vertical shift coregistration, for any point-raster or raster-raster input, including subsampling.""" logging.info("Running vertical shift coregistration") # Pre-process point-raster inputs to the same subsampled points @@ -748,8 +723,7 @@ def vertical_shift( class AffineCoreg(Coreg): - """ - Generic affine coregistration class. + """Generic affine coregistration class. Builds additional common affine methods on top of the generic Coreg class. Made to be subclassed. @@ -766,7 +740,6 @@ def __init__( meta: dict[str, Any] | None = None, ) -> None: """Instantiate a generic AffineCoreg method.""" - if meta is None: meta = {} # Define subsample size @@ -776,7 +749,8 @@ def __init__( if matrix is not None: with warnings.catch_warnings(): # This error is fixed in the upcoming 1.8 - warnings.filterwarnings("ignore", message="`np.float` is a deprecated alias for the builtin `float`") + warnings.filterwarnings("ignore", + message="`np.float` is a deprecated alias for the builtin `float`") valid_matrix = pytransform3d.transformations.check_transform(matrix) self._meta["outputs"]["affine"] = {"matrix": valid_matrix} @@ -787,12 +761,10 @@ def to_matrix(self) -> NDArrayf: return self._to_matrix_func() def to_translations(self) -> tuple[float, float, float]: - """ - Extract X/Y/Z translations from the affine transformation matrix. + """Extract X/Y/Z translations from the affine transformation matrix. :return: Easting, northing and vertical translations (in georeferenced unit). """ - matrix = self.to_matrix() shift_x = matrix[0, 3] shift_y = matrix[1, 3] @@ -801,14 +773,12 @@ def to_translations(self) -> tuple[float, float, float]: return shift_x, shift_y, shift_z def to_rotations(self) -> tuple[float, float, float]: - """ - Extract X/Y/Z euler rotations (extrinsic convention) from the affine transformation matrix. + """Extract X/Y/Z euler rotations (extrinsic convention) from the affine transformation matrix. Warning: This function only works for a rigid transformation (rotation and translation). :return: Extrinsinc Euler rotations along easting, northing and vertical directions (degrees). """ - matrix = self.to_matrix() rots = pytransform3d.rotations.euler_from_matrix(matrix, i=0, j=1, k=2, extrinsic=True, strict_check=True) rots = np.rad2deg(np.array(rots)) @@ -836,14 +806,12 @@ def _preprocess_rst_pts_subsample_interpolator( area_or_point: Literal["Area", "Point"] | None = None, z_name: str = "z", ) -> tuple[Callable[[float, float], NDArrayf], None | dict[str, NDArrayf]]: - """ - Pre-process raster-raster or point-raster datasets into 1D arrays subsampled at the same points + """Pre-process raster-raster or point-raster datasets into 1D arrays subsampled at the same points (and interpolated in the case of point-raster input). Return 1D arrays of reference elevation, to-be-aligned elevation and dictionary of 1D arrays of auxiliary variables at subsampled points. """ - # Get random parameters params_random = self._meta["inputs"]["random"] @@ -877,8 +845,7 @@ def _preprocess_rst_pts_subsample_interpolator( @classmethod def from_matrix(cls, matrix: NDArrayf) -> AffineCoreg: - """ - Instantiate a generic Coreg class from a transformation matrix. + """Instantiate a generic Coreg class from a transformation matrix. :param matrix: A 4x4 transformation matrix. Shape must be (4,4). @@ -896,8 +863,7 @@ def from_matrix(cls, matrix: NDArrayf) -> AffineCoreg: @classmethod def from_translations(cls, x_off: float = 0.0, y_off: float = 0.0, z_off: float = 0.0) -> AffineCoreg: - """ - Instantiate a generic Coreg class from a X/Y/Z translation. + """Instantiate a generic Coreg class from a X/Y/Z translation. :param x_off: The offset to apply in the X (west-east) direction. :param y_off: The offset to apply in the Y (south-north) direction. @@ -918,8 +884,7 @@ def from_translations(cls, x_off: float = 0.0, y_off: float = 0.0, z_off: float @classmethod def from_rotations(cls, x_rot: float = 0.0, y_rot: float = 0.0, z_rot: float = 0.0) -> AffineCoreg: - """ - Instantiate a generic Coreg class from a X/Y/Z rotation. + """Instantiate a generic Coreg class from a X/Y/Z rotation. :param x_rot: The rotation (degrees) to apply around the X (west-east) direction. :param y_rot: The rotation (degrees) to apply around the Y (south-north) direction. @@ -929,7 +894,6 @@ def from_rotations(cls, x_rot: float = 0.0, y_rot: float = 0.0, z_rot: float = 0 :returns: An instantiated generic Coreg class. """ - # Initialize a diagonal matrix matrix = np.diag(np.ones(4, dtype=float)) # Convert rotations to radians @@ -953,8 +917,7 @@ def _to_matrix_func(self) -> NDArrayf: class VerticalShift(AffineCoreg): - """ - Vertical translation alignment. + """Vertical translation alignment. Estimates the mean vertical offset between two elevation datasets based on a reductor function (median, mean, or any custom reductor function). @@ -964,11 +927,10 @@ class VerticalShift(AffineCoreg): """ def __init__( - self, vshift_reduc_func: Callable[[NDArrayf], np.floating[Any]] = np.median, subsample: float | int = 1.0 + self, vshift_reduc_func: Callable[[NDArrayf], np.floating[Any]] = np.median, subsample: float | int = 1.0, ) -> None: # pylint: # disable=super-init-not-called - """ - Instantiate a vertical shift alignment object. + """Instantiate a vertical shift alignment object. :param vshift_reduc_func: Reductor function to estimate the central tendency of the vertical shift. Defaults to the median. @@ -992,7 +954,6 @@ def _fit_rst_rst( **kwargs: Any, ) -> None: """Estimate the vertical shift using the vshift_func.""" - # Method is the same for 2D or 1D elevation differences, so we can simply re-direct to fit_rst_pts self._fit_rst_pts( ref_elev=ref_elev, @@ -1020,7 +981,6 @@ def _fit_rst_pts( **kwargs: Any, ) -> None: """Estimate the vertical shift using the vshift_func.""" - # Get parameters stored in class params_random = self._meta["inputs"]["random"] @@ -1051,8 +1011,7 @@ def _to_matrix_func(self) -> NDArrayf: class ICP(AffineCoreg): - """ - Iterative closest point registration, based on Besl and McKay (1992), https://doi.org/10.1117/12.57955. + """Iterative closest point registration, based on Besl and McKay (1992), https://doi.org/10.1117/12.57955. Estimates a rigid transform (rotation + translation) between two elevation datasets. @@ -1073,8 +1032,7 @@ def __init__( num_levels: int = 6, subsample: float | int = 5e5, ) -> None: - """ - Instantiate an ICP coregistration object. + """Instantiate an ICP coregistration object. :param max_iterations: The maximum allowed iterations before stopping. :param tolerance: The residual change threshold after which to stop the iterations. @@ -1107,7 +1065,6 @@ def _fit_rst_rst( **kwargs: Any, ) -> None: """Estimate the rigid transform from tba_dem to ref_dem.""" - if weights is not None: warnings.warn("ICP was given weights, but does not support it.") @@ -1122,7 +1079,7 @@ def _fit_rst_rst( normal_up = 1 - np.linalg.norm([normal_east, normal_north], axis=0) valid_mask = np.logical_and.reduce( - (inlier_mask, np.isfinite(ref_elev), np.isfinite(normal_east), np.isfinite(normal_north)) + (inlier_mask, np.isfinite(ref_elev), np.isfinite(normal_east), np.isfinite(normal_north)), ) subsample_mask = self._get_subsample_on_valid_mask(valid_mask=valid_mask) @@ -1201,7 +1158,7 @@ def _fit_rst_pts( normal_east[valid_mask], normal_north[valid_mask], normal_up[valid_mask], - ] + ], ).squeeze() # TODO: Should be a way to not duplicate this column and just feed it directly @@ -1222,7 +1179,7 @@ def _fit_rst_pts( points["point"] = point_elev[["E", "N", z_name, "nx", "ny", "nz"]].values - for key in points: + for key in points: # noqa: PLC0206 points[key] = points[key][~np.any(np.isnan(points[key]), axis=1)].astype("float32") points[key][:, 0] -= resolution[0] / 2 points[key][:, 1] -= resolution[1] / 2 @@ -1244,7 +1201,7 @@ def _fit_rst_pts( raise ValueError( "Not enough valid points in input data." f"'reference_dem' had {points['ref'].size} valid points." - f"'dem_to_be_aligned' had {points['tba'].size} valid points." + f"'dem_to_be_aligned' had {points['tba'].size} valid points.", ) # If raster was reference, invert the matrix @@ -1268,8 +1225,7 @@ def _fit_rst_pts( class NuthKaab(AffineCoreg): - """ - Nuth and Kääb (2011) coregistration, https://doi.org/10.5194/tc-5-271-2011. + """Nuth and Kääb (2011) coregistration, https://doi.org/10.5194/tc-5-271-2011. Estimate horizontal and vertical translations by iterative slope/aspect alignment. @@ -1288,8 +1244,7 @@ def __init__( bin_statistic: Callable[[NDArrayf], np.floating[Any]] = np.nanmedian, subsample: int | float = 5e5, ) -> None: - """ - Instantiate a new Nuth and Kääb (2011) coregistration object. + """Instantiate a new Nuth and Kääb (2011) coregistration object. :param max_iterations: The maximum allowed iterations before stopping. :param offset_threshold: The residual offset threshold after which to stop the iterations (in pixels). @@ -1300,10 +1255,12 @@ def __init__( :param bin_statistic: Statistic of central tendency (e.g., mean) to apply during the binning. :param subsample: Subsample the input for speed-up. <1 is parsed as a fraction. >1 is a pixel count. """ - # Input checks _check_inputs_bin_before_fit( - bin_before_fit=bin_before_fit, fit_optimizer=fit_optimizer, bin_sizes=bin_sizes, bin_statistic=bin_statistic + bin_before_fit=bin_before_fit, + fit_optimizer=fit_optimizer, + bin_sizes=bin_sizes, + bin_statistic=bin_statistic, ) # Define iterative parameters @@ -1340,7 +1297,6 @@ def _fit_rst_rst( **kwargs: Any, ) -> None: """Estimate the x/y/z offset between two DEMs.""" - # Method is the same for 2D or 1D elevation differences, so we can simply re-direct to fit_rst_pts self._fit_rst_pts( ref_elev=ref_elev, @@ -1368,10 +1324,7 @@ def _fit_rst_pts( bias_vars: dict[str, NDArrayf] | None = None, **kwargs: Any, ) -> None: - """ - Estimate the x/y/z offset between a DEM and points cloud. - """ - + """Estimate the x/y/z offset between a DEM and points cloud.""" # Get parameters stored in class params_random = self._meta["inputs"]["random"] params_fit_or_bin = self._meta["inputs"]["fitorbin"] @@ -1400,7 +1353,6 @@ def _fit_rst_pts( def _to_matrix_func(self) -> NDArrayf: """Return a transformation matrix from the estimated offsets.""" - # We add a translation, on the last column matrix = np.diag(np.ones(4, dtype=float)) matrix[0, 3] += self._meta["outputs"]["affine"]["shift_x"] @@ -1411,8 +1363,7 @@ def _to_matrix_func(self) -> NDArrayf: class DhMinimize(AffineCoreg): - """ - Elevation difference minimization coregistration. + """Elevation difference minimization coregistration. Estimates vertical and horizontal translations. @@ -1427,14 +1378,12 @@ def __init__( fit_loss_func: Callable[[NDArrayf], np.floating[Any]] = nmad, subsample: int | float = 5e5, ) -> None: - """ - Instantiate dh minimization object. + """Instantiate dh minimization object. :param fit_minimizer: Minimizer for the coregistration function. :param fit_loss_func: Loss function for the minimization of residuals. :param subsample: Subsample the input for speed-up. <1 is parsed as a fraction. >1 is a pixel count. """ - meta_fit = {"fit_or_bin": "fit", "fit_minimizer": fit_minimizer, "fit_loss_func": fit_loss_func} super().__init__(subsample=subsample, meta=meta_fit) # type: ignore @@ -1506,7 +1455,6 @@ def _fit_rst_pts( def _to_matrix_func(self) -> NDArrayf: """Return a transformation matrix from the estimated offsets.""" - matrix = np.diag(np.ones(4, dtype=float)) matrix[0, 3] += self._meta["outputs"]["affine"]["shift_x"] matrix[1, 3] += self._meta["outputs"]["affine"]["shift_y"] diff --git a/xdem/coreg/base.py b/xdem/coreg/base.py index 242278e34..5e534da83 100644 --- a/xdem/coreg/base.py +++ b/xdem/coreg/base.py @@ -25,13 +25,10 @@ import inspect import logging import warnings +from collections.abc import Callable, Generator, Iterable, Mapping from typing import ( Any, - Callable, - Generator, - Iterable, Literal, - Mapping, TypedDict, TypeVar, overload, @@ -136,8 +133,7 @@ def _calculate_ddem_stats( stats_list: tuple[Callable[[NDArrayf], Number], ...] | None = None, stats_labels: tuple[str, ...] | None = None, ) -> dict[str, float]: - """ - Calculate standard statistics of ddem, e.g., to be used to compare before/after coregistration. + """Calculate standard statistics of ddem, e.g., to be used to compare before/after coregistration. Default statistics are: count, mean, median, NMAD and std. :param ddem: The DEM difference to be analyzed. @@ -155,11 +151,11 @@ def _calculate_ddem_stats( # Check that stats_list and stats_labels are correct if len(stats_list) != len(stats_labels): raise ValueError("Number of items in `stats_list` and `stats_labels` should be identical.") - for stat, label in zip(stats_list, stats_labels): + for stat, label in zip(stats_list, stats_labels, strict=False): if not callable(stat): - raise ValueError(f"Item {stat} in `stats_list` should be a callable/function.") + raise TypeError(f"Item {stat} in `stats_list` should be a callable/function.") if not isinstance(label, str): - raise ValueError(f"Item {label} in `stats_labels` should be a string.") + raise TypeError(f"Item {label} in `stats_labels` should be a string.") # Get the mask of valid and inliers pixels nan_mask = ~np.isfinite(ddem) @@ -169,7 +165,7 @@ def _calculate_ddem_stats( # Calculate stats stats = {} - for stat, label in zip(stats_list, stats_labels): + for stat, label in zip(stats_list, stats_labels, strict=False): stats[label] = stat(valid_ddem) return stats @@ -184,12 +180,11 @@ def _preprocess_coreg_fit_raster_raster( area_or_point: Literal["Area", "Point"] | None = None, ) -> tuple[NDArrayf, NDArrayf, NDArrayb, affine.Affine, rio.crs.CRS, Literal["Area", "Point"] | None]: """Pre-processing and checks of fit() for two raster input.""" - # Validate that both inputs are valid array-like (or Raster) types. if not all(isinstance(dem, (np.ndarray, gu.Raster)) for dem in (reference_dem, dem_to_be_aligned)): raise ValueError( "Both DEMs need to be array-like (implement a numpy array interface)." - f"'reference_dem': {reference_dem}, 'dem_to_be_aligned': {dem_to_be_aligned}" + f"'reference_dem': {reference_dem}, 'dem_to_be_aligned': {dem_to_be_aligned}", ) # If both DEMs are Rasters, validate that 'dem_to_be_aligned' is in the right grid. Then extract its data. @@ -289,7 +284,6 @@ def _preprocess_coreg_fit_raster_point( area_or_point: Literal["Area", "Point"] | None = None, ) -> tuple[NDArrayf, gpd.GeoDataFrame, NDArrayb, affine.Affine, rio.crs.CRS, Literal["Area", "Point"] | None]: """Pre-processing and checks of fit for raster-point input.""" - # TODO: Convert to point cloud once class is done # TODO: Raise warnings consistently with raster-raster function, see Amelie's Dask PR? #525 if isinstance(raster_elev, gu.Raster): @@ -330,10 +324,9 @@ def _preprocess_coreg_fit_raster_point( def _preprocess_coreg_fit_point_point( - reference_elev: gpd.GeoDataFrame, to_be_aligned_elev: gpd.GeoDataFrame + reference_elev: gpd.GeoDataFrame, to_be_aligned_elev: gpd.GeoDataFrame, ) -> tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]: """Pre-processing and checks of fit for point-point input.""" - ref_elev = reference_elev tba_elev = to_be_aligned_elev.to_crs(crs=reference_elev.crs) @@ -356,7 +349,6 @@ def _preprocess_coreg_fit( Literal["Area", "Point"] | None, ]: """Pre-processing and checks of fit for any input.""" - if not all( isinstance(elev, (np.ndarray, gu.Raster, gpd.GeoDataFrame)) for elev in (reference_elev, to_be_aligned_elev) ): @@ -403,7 +395,7 @@ def _preprocess_coreg_fit( # If both inputs are points, simply reproject to the same CRS else: ref_elev, tba_elev = _preprocess_coreg_fit_point_point( - reference_elev=reference_elev, to_be_aligned_elev=to_be_aligned_elev + reference_elev=reference_elev, to_be_aligned_elev=to_be_aligned_elev, ) return ref_elev, tba_elev, inlier_mask, transform, crs, area_or_point @@ -415,9 +407,8 @@ def _preprocess_coreg_apply( crs: rio.crs.CRS | None = None, ) -> tuple[NDArrayf | gpd.GeoDataFrame, affine.Affine, rio.crs.CRS]: """Pre-processing and checks of apply for any input.""" - if not isinstance(elev, (np.ndarray, gu.Raster, gpd.GeoDataFrame)): - raise ValueError("Input elevation data should be a raster, an array or a geodataframe.") + raise TypeError("Input elevation data should be a raster, an array or a geodataframe.") # If input is geodataframe if isinstance(elev, gpd.GeoDataFrame): @@ -458,7 +449,6 @@ def _postprocess_coreg_apply_pts( applied_elev: gpd.GeoDataFrame, ) -> gpd.GeoDataFrame: """Post-processing and checks of apply for point input.""" - # TODO: Convert CRS back if the CRS did not match the one of the fit? return applied_elev @@ -472,13 +462,11 @@ def _postprocess_coreg_apply_rst( resample: bool, resampling: rio.warp.Resampling | None = None, ) -> tuple[NDArrayf | gu.Raster, affine.Affine]: - """ - Post-processing and checks of apply for raster input. + """Post-processing and checks of apply for raster input. Here, "elev" and "transform" corresponds to user input, and are required to transform back the output that is composed of "applied_elev" and "out_transform". """ - # Ensure the dtype is OK applied_elev = applied_elev.astype("float32") @@ -486,7 +474,7 @@ def _postprocess_coreg_apply_rst( if isinstance(elev, gu.Raster): nodata = elev.nodata else: - nodata = raster._default_nodata(elev.dtype) + nodata = raster._default_nodata(elev.dtype) # noqa: SLF001 # Resample the array on the original grid if resample: @@ -520,8 +508,7 @@ def _postprocess_coreg_apply_rst( if isinstance(elev, gu.Raster): out_dem = elev.from_array(applied_elev, out_transform, crs, nodata=elev.nodata) return out_dem, out_transform - else: - return applied_elev, out_transform + return applied_elev, out_transform def _postprocess_coreg_apply( @@ -533,13 +520,11 @@ def _postprocess_coreg_apply( resample: bool, resampling: rio.warp.Resampling | None = None, ) -> tuple[NDArrayf | gpd.GeoDataFrame, affine.Affine]: - """ - Post-processing and checks of apply for any input. + """Post-processing and checks of apply for any input. Here, "elev" and "transform" corresponds to user input, and are required to transform back the output that is composed of "applied_elev" and "out_transform". """ - # Define resampling resampling = resampling if isinstance(resampling, rio.warp.Resampling) else _resampling_method_from_str(resampling) @@ -566,12 +551,10 @@ def _postprocess_coreg_apply( def _get_subsample_on_valid_mask(params_random: InRandomDict, valid_mask: NDArrayb) -> NDArrayb: - """ - Get mask of values to subsample on valid mask (works for both 1D or 2D arrays). + """Get mask of values to subsample on valid mask (works for both 1D or 2D arrays). :param valid_mask: Mask of valid values (inlier and not nodata). """ - # This should never happen if params_random["subsample"] is None: raise ValueError("Subsample should have been defined in metadata before reaching this class method.") @@ -580,11 +563,11 @@ def _get_subsample_on_valid_mask(params_random: InRandomDict, valid_mask: NDArra if np.count_nonzero(valid_mask) == 0: raise ValueError( "There is no valid points common to the input and auxiliary data (bias variables, or " - "derivatives required for this method, for example slope, aspect, etc)." + "derivatives required for this method, for example slope, aspect, etc).", ) # If subsample is not equal to one, subsampling should be performed. - elif params_random["subsample"] != 1.0: + if params_random["subsample"] != 1.0: # Build a low memory masked array with invalid values masked to pass to subsampling ma_valid = np.ma.masked_array(data=np.ones(np.shape(valid_mask), dtype=bool), mask=~valid_mask) @@ -607,7 +590,9 @@ def _get_subsample_on_valid_mask(params_random: InRandomDict, valid_mask: NDArra subsample_mask = valid_mask logging.debug( - "Using a subsample of %d among %d valid values.", np.count_nonzero(subsample_mask), np.count_nonzero(valid_mask) + "Using a subsample of %d among %d valid values.", + np.count_nonzero(subsample_mask), + np.count_nonzero(valid_mask), ) return subsample_mask @@ -622,19 +607,17 @@ def _get_subsample_mask_pts_rst( area_or_point: Literal["Area", "Point"] | None, aux_vars: None | dict[str, NDArrayf] = None, ) -> NDArrayb: - """ - Get subsample mask for raster-raster or point-raster datasets on valid points of all inputs (including + """Get subsample mask for raster-raster or point-raster datasets on valid points of all inputs (including potential auxiliary variables). Returns a boolean array to use for subsampling (2D for raster-raster, 1D for point-raster to be used on point). """ - # TODO: Return more detailed error message for no valid points (which variable was full of NaNs?) if isinstance(ref_elev, gpd.GeoDataFrame) and isinstance(tba_elev, gpd.GeoDataFrame): raise TypeError( "This pre-processing function is only intended for raster-point or raster-raster methods, " - "not point-point methods." + "not point-point methods.", ) # For two rasters @@ -648,7 +631,7 @@ def _get_subsample_mask_pts_rst( np.isfinite(ref_elev), np.isfinite(tba_elev), *(np.isfinite(var) for var in aux_vars.values()), - ) + ), ) else: valid_mask = np.logical_and.reduce((inlier_mask, np.isfinite(ref_elev), np.isfinite(tba_elev))) @@ -672,7 +655,7 @@ def _get_subsample_mask_pts_rst( # Get valid mask ahead of subsampling to have the exact number of requested subsamples by user if aux_vars is not None: valid_mask = np.logical_and.reduce( - (inlier_mask, np.isfinite(rst_elev), *(np.isfinite(var) for var in aux_vars.values())) + (inlier_mask, np.isfinite(rst_elev), *(np.isfinite(var) for var in aux_vars.values())), ) else: valid_mask = np.logical_and.reduce((inlier_mask, np.isfinite(rst_elev))) @@ -682,7 +665,7 @@ def _get_subsample_mask_pts_rst( # Interpolates boolean mask as integers # TODO: Pass area_or_point all the way to here valid_mask = np.floor( - _interp_points(array=valid_mask, transform=transform, points=pts, area_or_point=area_or_point) + _interp_points(array=valid_mask, transform=transform, points=pts, area_or_point=area_or_point), ).astype(bool) # If there is a subsample, it needs to be done now on the point dataset to reduce later calculations @@ -702,14 +685,12 @@ def _subsample_on_mask( area_or_point: Literal["Area", "Point"] | None, z_name: str, ) -> tuple[NDArrayf, NDArrayf, None | dict[str, NDArrayf]]: - """ - Perform subsampling on mask for raster-raster or point-raster datasets on valid points of all inputs (including + """Perform subsampling on mask for raster-raster or point-raster datasets on valid points of all inputs (including potential auxiliary variables). Returns 1D arrays of subsampled inputs: reference elevation, to-be-aligned elevation and auxiliary variables (in dictionary). """ - # For two rasters if isinstance(ref_elev, np.ndarray) and isinstance(tba_elev, np.ndarray): @@ -718,7 +699,7 @@ def _subsample_on_mask( sub_tba = tba_elev[sub_mask] if aux_vars is not None: sub_bias_vars = {} - for var in aux_vars.keys(): + for var in aux_vars: sub_bias_vars[var] = aux_vars[var][sub_mask] else: sub_bias_vars = None @@ -750,9 +731,9 @@ def _subsample_on_mask( # Interpolate arrays of bias variables to the subsample point coordinates if aux_vars is not None: sub_bias_vars = {} - for var in aux_vars.keys(): + for var in aux_vars: sub_bias_vars[var] = _interp_points( - array=aux_vars[var], transform=transform, points=pts, area_or_point=area_or_point + array=aux_vars[var], transform=transform, points=pts, area_or_point=area_or_point, ) else: sub_bias_vars = None @@ -766,19 +747,17 @@ def _preprocess_pts_rst_subsample( tba_elev: NDArrayf | gpd.GeoDataFrame, inlier_mask: NDArrayb, transform: rio.transform.Affine, # Never None thanks to Coreg.fit() pre-process - crs: rio.crs.CRS, # Never None thanks to Coreg.fit() pre-process + crs: rio.crs.CRS, # Never None thanks to Coreg.fit() pre-process # noqa: ARG001 area_or_point: Literal["Area", "Point"] | None, z_name: str, aux_vars: None | dict[str, NDArrayf] = None, ) -> tuple[NDArrayf, NDArrayf, None | dict[str, NDArrayf]]: - """ - Pre-process raster-raster or point-raster datasets into 1D arrays subsampled at the same points + """Pre-process raster-raster or point-raster datasets into 1D arrays subsampled at the same points (and interpolated in the case of point-raster input). Return 1D arrays of reference elevation, to-be-aligned elevation and dictionary of 1D arrays of auxiliary variables at subsampled points. """ - # Get subsample mask (a 2D array for raster-raster, a 1D array of length the point data for point-raster) sub_mask = _get_subsample_mask_pts_rst( params_random=params_random, @@ -846,8 +825,7 @@ def _bin_or_and_fit_nd( weights: None | NDArrayf = None, **kwargs: Any, ) -> tuple[pd.DataFrame | None, tuple[NDArrayf, Any] | None]: - """ - Generic binning and/or fitting method to model values along N variables for a coregistration/correction, + """Generic binning and/or fitting method to model values along N variables for a coregistration/correction, used for all affine and bias-correction subclasses. Expects either 2D arrays for rasters, or 1D arrays for points. @@ -857,7 +835,6 @@ def _bin_or_and_fit_nd( :param bias_vars: Auxiliary variables for certain bias correction classes, as raster or arrays. :param weights: Array of weights for the coregistration. """ - if fit_or_bin is None: raise ValueError("This function should not be called for methods not supporting fit_or_bin logic.") @@ -869,17 +846,17 @@ def _bin_or_and_fit_nd( nd = params_fit_or_bin["nd"] if nd is not None and len(bias_vars) != nd: raise ValueError( - "A number of {} variable(s) has to be provided through the argument 'bias_vars', " - "got {}.".format(nd, len(bias_vars)) + f"A number of {nd} variable(s) has to be provided through the argument 'bias_vars', " + f"got {len(bias_vars)}.", ) # If bias var names were explicitly passed at instantiation, check that they match the one from the dict if params_fit_or_bin["bias_var_names"] is not None: - if not sorted(bias_vars.keys()) == sorted(params_fit_or_bin["bias_var_names"]): - raise ValueError( - "The keys of `bias_vars` do not match the `bias_var_names` defined during " - "instantiation: {}.".format(params_fit_or_bin["bias_var_names"]) - ) + if sorted(bias_vars.keys()) != sorted(params_fit_or_bin["bias_var_names"]): + msg = ("The keys of `bias_vars` do not match the `bias_var_names` defined during " + "instantiation: {}.".format(params_fit_or_bin["bias_var_names"]) + ) + raise ValueError(msg) # Get number of variables nd = len(bias_vars) @@ -918,7 +895,7 @@ def _bin_or_and_fit_nd( absolute_sigma=True, **kwargs, ) - df = None + df_binning = None # Option 2: Run binning and save dataframe of result elif fit_or_bin == "bin": @@ -928,7 +905,7 @@ def _bin_or_and_fit_nd( params_fit_or_bin["bin_statistic"].__name__, ) - df = nd_binning( + df_binning = nd_binning( values=values, list_var=list(bias_vars.values()), list_var_names=list(bias_vars.keys()), @@ -946,7 +923,7 @@ def _bin_or_and_fit_nd( params_fit_or_bin["fit_func"].__name__, ) - df = nd_binning( + df_binning = nd_binning( values=values, list_var=list(bias_vars.values()), list_var_names=list(bias_vars.keys()), @@ -956,10 +933,10 @@ def _bin_or_and_fit_nd( # Now, we need to pass this new data to the fitting function and optimizer # We use only the N-D binning estimates (maximum dimension, equal to length of variable list) - df_nd = df[df.nd == len(bias_vars)] + df_nd = df_binning[df_binning.nd == len(bias_vars)] # We get the middle of bin values for variable, and statistic for the diff - new_vars = [pd.IntervalIndex(df_nd[var_name]).mid.values for var_name in bias_vars.keys()] + new_vars = [pd.IntervalIndex(df_nd[var_name]).mid.values for var_name in bias_vars] new_diff = df_nd[params_fit_or_bin["bin_statistic"].__name__].values # TODO: pass a new sigma based on "count" and original sigma (and correlation?)? # sigma values would have to be binned above also @@ -980,7 +957,7 @@ def _bin_or_and_fit_nd( ) logging.debug("%dD bias estimated.", nd) - return df, results + return df_binning, results ############################################### @@ -1008,7 +985,6 @@ def _apply_matrix_pts_arr( invert: bool = False, ) -> tuple[NDArrayf, NDArrayf, NDArrayf]: """Apply matrix to points as arrays with array outputs (to improve speed in some functions).""" - # Invert matrix if required if invert: matrix = invert_matrix(matrix) @@ -1037,8 +1013,7 @@ def _apply_matrix_pts( centroid: tuple[float, float, float] | None = None, z_name: str = "z", ) -> gpd.GeoDataFrame: - """ - Apply a 3D affine transformation matrix to a 3D elevation point cloud. + """Apply a 3D affine transformation matrix to a 3D elevation point cloud. :param epc: Elevation point cloud. :param matrix: Affine (4x4) transformation matrix to apply to the DEM. @@ -1049,7 +1024,6 @@ def _apply_matrix_pts( :return: Transformed elevation point cloud. """ - # Apply transformation to X/Y/Z arrays tx, ty, tz = _apply_matrix_pts_arr( x=epc.geometry.x.values, @@ -1076,19 +1050,17 @@ def _iterate_affine_regrid_small_rotations( centroid: tuple[float, float, float] | None = None, resampling: Literal["nearest", "linear", "cubic", "quintic"] = "linear", ) -> tuple[NDArrayf, rio.transform.Affine]: - """ - Iterative process to find the best reprojection of affine transformation for small rotations. + """Iterative process to find the best reprojection of affine transformation for small rotations. Faster than regridding point cloud by triangulation of points (for instance with scipy.interpolate.griddata). """ - # Convert DEM to elevation point cloud, keeping all exact grid coordinates X/Y even for NaNs dem_rst = gu.Raster.from_array(dem, transform=transform, crs=None, nodata=99999) epc = dem_rst.to_pointcloud(data_column_name="z", skip_nodata=False).ds # Exact affine transform of elevation point cloud (which yields irregular coordinates in 2D) tz0 = _apply_matrix_pts_arr( - x=epc.geometry.x.values, y=epc.geometry.y.values, z=epc.z.values, matrix=matrix, centroid=centroid + x=epc.geometry.x.values, y=epc.geometry.y.values, z=epc.z.values, matrix=matrix, centroid=centroid, )[2] # We need to find the elevation Z of a transformed DEM at the exact grid coordinates X,Y @@ -1099,7 +1071,7 @@ def _iterate_affine_regrid_small_rotations( # (We create the interpolator only once here for computational speed, instead of using Raster.interp_points) xycoords = dem_rst.coords(grid=False) z_interp = scipy.interpolate.RegularGridInterpolator( - points=(np.flip(xycoords[1], axis=0), xycoords[0]), values=dem, method=resampling, bounds_error=False + points=(np.flip(xycoords[1], axis=0), xycoords[0]), values=dem, method=resampling, bounds_error=False, ) # 2/ As a first guess of a transformed DEM elevation Z near the grid coordinates, we initialize with the elevations @@ -1150,14 +1122,14 @@ def _iterate_affine_regrid_small_rotations( x0, y0, z0 = _apply_matrix_pts_arr(x=tx, y=ty, z=tz, matrix=matrix, centroid=centroid) # Only check residuals after first iteration (to remove NaNs) then every 5 iterations to reduce computing time - if niter == 1 or niter == niter_check: + if niter in {1, niter_check}: # Compute difference between exact grid coordinates and current coordinates, and stop if tolerance reached diff_x = x0 - x diff_y = y0 - y logging.debug( - "Residual check at iteration number %d:" "\n Mean diff x: %f" "\n Mean diff y: %f", + "Residual check at iteration number %d:\n Mean diff x: %f\n Mean diff y: %f", niter, np.nanmean(np.abs(diff_x)), np.nanmean(np.abs(diff_y)), @@ -1180,11 +1152,10 @@ def _iterate_affine_regrid_small_rotations( zfinal[~ind_converged] = z0 break # Otherwise, save Z for new converged points and keep only not converged in next iterations (for speed) - else: - zfinal[~ind_converged] = z0 - x = x[~subind_converged] - y = y[~subind_converged] - z0 = z0[~subind_converged] + zfinal[~ind_converged] = z0 + x = x[~subind_converged] + y = y[~subind_converged] + z0 = z0[~subind_converged] # Otherwise, for this check, update convergence status for points not having converged yet ind_converged[~ind_converged] = subind_converged @@ -1196,21 +1167,19 @@ def _iterate_affine_regrid_small_rotations( # 4/ Write the regular-grid point cloud back into a raster epc.z = zfinal # We just replace the Z of the original grid to ensure exact coordinates transformed_dem = dem_rst.from_pointcloud_regular( - epc, transform=transform, shape=dem.shape, data_column_name="z", nodata=-99999 + epc, transform=transform, shape=dem.shape, data_column_name="z", nodata=-99999, ) return transformed_dem.data.filled(np.nan), transform def _get_rotations_from_matrix(matrix: NDArrayf) -> tuple[float, float, float]: - """ - Get rotation angles along each axis from the 4x4 affine matrix, derived as Euler extrinsic angles in degrees. + """Get rotation angles along each axis from the 4x4 affine matrix, derived as Euler extrinsic angles in degrees. :param matrix: Affine matrix. :return: Euler extrinsic rotation angles along X, Y and Z (degrees). """ - # The rotation matrix is composed of the first 3 rows/columns rot_matrix = matrix[0:3, 0:3] angles = pytransform3d.rotations.euler_from_matrix(R=rot_matrix, i=0, j=1, k=2, extrinsic=True) @@ -1226,8 +1195,7 @@ def _apply_matrix_rst( resampling: Literal["nearest", "linear", "cubic", "quintic"] = "linear", force_regrid_method: Literal["iterative", "griddata"] | None = None, ) -> tuple[NDArrayf, rio.transform.Affine]: - """ - Apply a 3D affine transformation matrix to a 2.5D DEM. + """Apply a 3D affine transformation matrix to a 2.5D DEM. See details in description of apply_matrix(). @@ -1243,7 +1211,6 @@ def _apply_matrix_rst( :returns: Transformed DEM, Transform. """ - # Invert matrix if required if invert: matrix = invert_matrix(matrix) @@ -1269,9 +1236,9 @@ def _apply_matrix_rst( # 3/ If matrix contains only small rotations (less than 20 degrees), use the fast iterative reprojection rotations = _get_rotations_from_matrix(matrix) - if all(np.abs(rot) < 20 for rot in rotations) and force_regrid_method is None or force_regrid_method == "iterative": + if (all(np.abs(rot) < 20 for rot in rotations) and force_regrid_method is None) or force_regrid_method == "iterative": # noqa: E501 new_dem, transform = _iterate_affine_regrid_small_rotations( - dem=dem, transform=transform, matrix=matrix, centroid=centroid, resampling=resampling + dem=dem, transform=transform, matrix=matrix, centroid=centroid, resampling=resampling, ) return new_dem, transform @@ -1282,7 +1249,7 @@ def _apply_matrix_rst( trans_epc = _apply_matrix_pts(epc, matrix=matrix, centroid=centroid) new_dem = _grid_pointcloud( - trans_epc, grid_coords=dem_rst.coords(grid=False), data_column_name="z", resampling=resampling + trans_epc, grid_coords=dem_rst.coords(grid=False), data_column_name="z", resampling=resampling, ) return new_dem, transform @@ -1317,8 +1284,7 @@ def _reproject_horizontal_shift_samecrs( return_interpolator: bool = False, resampling: Literal["nearest", "linear", "cubic", "quintic", "slinear", "pchip", "splinef2d"] = "linear", ) -> NDArrayf | Callable[[tuple[NDArrayf, NDArrayf]], NDArrayf]: - """ - Reproject a raster only for a horizontal shift (transform update) in the same CRS. + """Reproject a raster only for a horizontal shift (transform update) in the same CRS. This function exists independently of Raster.reproject() because Rasterio has unexplained reprojection issues that can create non-negligible sub-pixel shifts that should be crucially avoided for coregistration. @@ -1326,7 +1292,6 @@ def _reproject_horizontal_shift_samecrs( Here we use SciPy interpolation instead, modified for nodata propagation in geoutils.interp_points(). """ - # We are reprojecting the raster array relative to itself without changing its pixel interpretation, so we can # force any pixel interpretation (area_or_point) without it having any influence on the result, here "Area" if not return_interpolator: @@ -1392,8 +1357,7 @@ def apply_matrix( z_name: str = "z", **kwargs: Any, ) -> tuple[NDArrayf, affine.Affine] | gu.Raster | gpd.GeoDataFrame: - """ - Apply a 3D affine transformation matrix to a 3D elevation point cloud or 2.5D DEM. + """Apply a 3D affine transformation matrix to a 3D elevation point cloud or 2.5D DEM. For an elevation point cloud, the transformation is exact. @@ -1427,41 +1391,39 @@ def apply_matrix( :return: Affine transformed elevation point cloud or DEM. """ - # Apply matrix to elevation point cloud if isinstance(elev, gpd.GeoDataFrame): return _apply_matrix_pts(epc=elev, matrix=matrix, invert=invert, centroid=centroid, z_name=z_name) # Or apply matrix to raster (often requires re-gridding) + + # First, we apply the affine matrix for the array/transform + if isinstance(elev, gu.Raster): + transform = elev.transform + dem = elev.data.filled(np.nan) else: + dem = elev + applied_dem, out_transform = _apply_matrix_rst( + dem=dem, + transform=transform, + matrix=matrix, + invert=invert, + centroid=centroid, + resampling=resampling, + **kwargs, + ) - # First, we apply the affine matrix for the array/transform - if isinstance(elev, gu.Raster): - transform = elev.transform - dem = elev.data.filled(np.nan) - else: - dem = elev - applied_dem, out_transform = _apply_matrix_rst( - dem=dem, - transform=transform, - matrix=matrix, - invert=invert, - centroid=centroid, - resampling=resampling, - **kwargs, + # Then, if resample is True, we reproject the DEM from its out_transform onto the transform + if resample: + applied_dem = _reproject_horizontal_shift_samecrs( + applied_dem, src_transform=out_transform, dst_transform=transform, resampling=resampling, ) + out_transform = transform - # Then, if resample is True, we reproject the DEM from its out_transform onto the transform - if resample: - applied_dem = _reproject_horizontal_shift_samecrs( - applied_dem, src_transform=out_transform, dst_transform=transform, resampling=resampling - ) - out_transform = transform - - # We return a raster if input was a raster - if isinstance(elev, gu.Raster): - applied_dem = gu.Raster.from_array(applied_dem, out_transform, elev.crs, elev.nodata) - return applied_dem - return applied_dem, out_transform + # We return a raster if input was a raster + if isinstance(elev, gu.Raster): + applied_dem = gu.Raster.from_array(applied_dem, out_transform, elev.crs, elev.nodata) + return applied_dem + return applied_dem, out_transform ########################################### @@ -1469,15 +1431,15 @@ def apply_matrix( ########################################### -class NotImplementedCoregFit(NotImplementedError): - """ - Error subclass for not implemented coregistration fit methods; mainly to differentiate with NotImplementedError +class NotImplementedCoregFitError(NotImplementedError): + """Error subclass for not implemented coregistration fit methods; + mainly to differentiate with NotImplementedError """ -class NotImplementedCoregApply(NotImplementedError): - """ - Error subclass for not implemented coregistration fit methods; mainly to differentiate with NotImplementedError +class NotImplementedCoregApplyError(NotImplementedError): + """Error subclass for not implemented coregistration fit methods; + mainly to differentiate with NotImplementedError """ @@ -1611,8 +1573,7 @@ class OutputCoregDict(TypedDict, total=False): class CoregDict(TypedDict, total=False): - """ - Defining the type of each possible key in the metadata dictionary of Coreg classes. + """Defining the type of each possible key in the metadata dictionary of Coreg classes. The parameter total=False means that the key are not required. In the recent PEP 655 ( https://peps.python.org/pep-0655/) there is an easy way to specific Required or NotRequired for each key, if we want to change this in the future. @@ -1632,8 +1593,7 @@ class CoregDict(TypedDict, total=False): class Coreg: - """ - Generic co-registration processing class. + """Generic co-registration processing class. Used to implement methods common to all processing steps (rigid alignment, bias corrections, filtering). Those are: instantiation, copying and addition (which casts to a Pipeline object). @@ -1649,7 +1609,6 @@ class Coreg: def __init__(self, meta: dict[str, Any] | None = None) -> None: """Instantiate a generic processing step method.""" - # Automatically sort input keys into their appropriate nested level using only the TypedDicts defined # above which make up the CoregDict altogether dict_meta = CoregDict(inputs={}, outputs={}) @@ -1665,10 +1624,10 @@ def __init__(self, meta: dict[str, Any] | None = None) -> None: # Join all keys for input check all_keys = [k for lv in keys_per_level for k in lv] - for k in meta.keys(): + for k in meta: if k not in all_keys: raise ValueError( - f"Coregistration metadata key {k} is not supported. " f"Should be one of {', '.join(all_keys)}" + f"Coregistration metadata key {k} is not supported. Should be one of {', '.join(all_keys)}", ) # Add keys to inputs @@ -1696,7 +1655,7 @@ def copy(self: CoregType) -> CoregType: def __add__(self, other: CoregType) -> CoregPipeline: """Return a pipeline consisting of self and the other processing function.""" if not isinstance(other, Coreg): - raise ValueError(f"Incompatible add type: {type(other)}. Expected 'Coreg' subclass") + raise TypeError(f"Incompatible add type: {type(other)}. Expected 'Coreg' subclass") return CoregPipeline([self, other]) @property @@ -1715,9 +1674,9 @@ def is_affine(self) -> bool: @property def is_translation(self) -> bool | None: - + """Test if the matrix gives a translation.""" # If matrix exists in keys, or can be derived from to_matrix(), we conclude - if "matrix" in self._meta["outputs"]["affine"].keys(): + if "matrix" in self._meta["outputs"]["affine"]: matrix = self._meta["outputs"]["affine"]["matrix"] else: try: @@ -1733,7 +1692,6 @@ def is_translation(self) -> bool | None: @property def meta(self) -> CoregDict: """Metadata dictionary of the coregistration.""" - return self._meta @overload @@ -1744,7 +1702,6 @@ def info(self, as_str: Literal[True]) -> str: ... def info(self, as_str: bool = False) -> None | str: """Summarize information about this coregistration.""" - # Define max tabulation: longest name + 2 spaces tab = np.max([len(v) for v in dict_key_to_str.values()]) + 2 @@ -1760,8 +1717,7 @@ def recursive_items(dictionary: Mapping[str, Any]) -> Iterable[tuple[str, Any]]: # Formatting function for key values, rounding up digits for numbers and returning function names def format_coregdict_values(val: Any, tab: int) -> str: - """ - Format coregdict values for printing. + """Format coregdict values for printing. :param val: Input value. :param tab: Tabulation (if value is printed on multiple lines). @@ -1814,7 +1770,7 @@ def dec_round_to_n(x: float | np.floating[Any], n: int) -> int: "Inputs\n", ] for lk, lv in sublevels.items(): - if lk in self._meta["inputs"].keys(): + if lk in self._meta["inputs"]: existing_level_keys = [ (k, v) for k, v in self._meta["inputs"][lk].items() if k in existing_deep_keys # type: ignore ] @@ -1830,7 +1786,7 @@ def dec_round_to_n(x: float | np.floating[Any], n: int) -> int: # If dict not empty if self._meta["outputs"]: for lk, lv in sublevels.items(): - if lk in self._meta["outputs"].keys(): + if lk in self._meta["outputs"]: existing_level_keys = [ (k, v) for k, v in self._meta["outputs"][lk].items() if k in existing_deep_keys # type: ignore ] @@ -1852,17 +1808,14 @@ def dec_round_to_n(x: float | np.floating[Any], n: int) -> int: # Return as string or print (default) if as_str: return "".join(final_str) - else: - print("".join(final_str)) - return None + print("".join(final_str)) + return None def _get_subsample_on_valid_mask(self, valid_mask: NDArrayb) -> NDArrayb: - """ - Get mask of values to subsample on valid mask. + """Get mask of values to subsample on valid mask. :param valid_mask: Mask of valid values (inlier and not nodata). """ - # Get random parameters params_random = self._meta["inputs"]["random"] @@ -1889,14 +1842,12 @@ def _preprocess_rst_pts_subsample( area_or_point: Literal["Area", "Point"] | None = None, z_name: str = "z", ) -> tuple[NDArrayf, NDArrayf, None | dict[str, NDArrayf]]: - """ - Pre-process raster-raster or point-raster datasets into 1D arrays subsampled at the same points + """Pre-process raster-raster or point-raster datasets into 1D arrays subsampled at the same points (and interpolated in the case of point-raster input). Return 1D arrays of reference elevation, to-be-aligned elevation and dictionary of 1D arrays of auxiliary variables at subsampled points. """ - # Get random parameters params_random: InRandomDict = self._meta["inputs"]["random"] @@ -1942,8 +1893,7 @@ def fit( random_state: int | np.random.Generator | None = None, **kwargs: Any, ) -> CoregType: - """ - Estimate the coregistration transform on the given DEMs. + """Estimate the coregistration transform on the given DEMs. :param reference_elev: Reference elevation, either a DEM or an elevation point cloud. :param to_be_aligned_elev: To-be-aligned elevation, either a DEM or an elevation point cloud. @@ -1957,7 +1907,6 @@ def fit( :param z_name: Column name to use as elevation, only for point elevation data passed as geodataframe. :param random_state: Random state or seed number to use for calculations (to fix random sampling). """ - if weights is not None: raise NotImplementedError("Weights have not yet been implemented") @@ -1974,7 +1923,7 @@ def fit( warnings.warn( "Subsample argument passed to fit() will override non-default subsample value defined at " "instantiation. To silence this warning: only define 'subsample' in either fit(subsample=...) or " - "instantiation e.g. VerticalShift(subsample=...)." + "instantiation e.g. VerticalShift(subsample=...).", ) # In any case, override! @@ -2011,7 +1960,7 @@ def fit( if self._is_affine: warnings.warn("This coregistration method is affine, ignoring `bias_vars` passed to fit().") - for var in bias_vars.keys(): + for var in bias_vars: bias_vars[var] = gu.raster.get_array_and_mask(bias_vars[var])[0] main_args.update({"bias_vars": bias_vars}) @@ -2078,8 +2027,7 @@ def apply( z_name: str = "z", **kwargs: Any, ) -> RasterType | gpd.GeoDataFrame | tuple[NDArrayf, rio.transform.Affine] | tuple[MArrayf, rio.transform.Affine]: - """ - Apply the estimated transform to a DEM. + """Apply the estimated transform to a DEM. :param elev: Elevation to apply the transform to, either a DEM or an elevation point cloud. :param bias_vars: Only for some bias correction classes. 2D array of bias variables used. @@ -2106,7 +2054,7 @@ def apply( if self._is_affine: warnings.warn("This coregistration method is affine, ignoring `bias_vars` passed to apply().") - for var in bias_vars.keys(): + for var in bias_vars: bias_vars[var] = gu.raster.get_array_and_mask(bias_vars[var])[0] main_args.update({"bias_vars": bias_vars}) @@ -2128,8 +2076,7 @@ def apply( # Only return object if raster or geodataframe, also return transform if object was an array if isinstance(applied_elev, (gu.Raster, gpd.GeoDataFrame)): return applied_elev - else: - return applied_elev, out_transform + return applied_elev, out_transform @overload def fit_and_apply( @@ -2228,7 +2175,6 @@ def fit_and_apply( :param fit_kwargs: Keyword arguments to be passed to fit. :param apply_kwargs: Keyword argument to be passed to apply. """ - if fit_kwargs is None: fit_kwargs = {} if apply_kwargs is None: @@ -2273,8 +2219,7 @@ def residuals( subsample: float | int = 1.0, random_state: int | np.random.Generator | None = None, ) -> NDArrayf: - """ - Calculate the residual offsets (the difference) between two DEMs after applying the transformation. + """Calculate the residual offsets (the difference) between two DEMs after applying the transformation. :param reference_elev: 2D array of elevation values acting reference. :param to_be_aligned_elev: 2D array of elevation values to be aligned. @@ -2287,7 +2232,6 @@ def residuals( :returns: A 1D array of finite residuals. """ - # Apply the transformation to the dem to be aligned aligned_elev = self.apply(to_be_aligned_elev, transform=transform, crs=crs)[0] @@ -2346,8 +2290,7 @@ def error( crs: rio.crs.CRS | None = None, area_or_point: Literal["Area", "Point"] | None = None, ) -> np.floating[Any] | float | np.integer[Any] | int | list[np.floating[Any] | float | np.integer[Any] | int]: - """ - Calculate the error of a coregistration approach. + """Calculate the error of a coregistration approach. Choices: - "nmad": Default. The Normalized Median Absolute Deviation of the residuals. @@ -2404,7 +2347,7 @@ def count(res: NDArrayf) -> int: except KeyError as exception: raise ValueError( f"Invalid 'error_type'{'s' if len(error_type) > 1 else ''}: " - f"'{error_type}'. Choices: {list(error_functions.keys())}" + f"'{error_type}'. Choices: {list(error_functions.keys())}", ) from exception return errors if len(errors) > 1 else errors[0] @@ -2413,11 +2356,9 @@ def _fit_func( self, **kwargs: Any, ) -> None: - """ - Distribute to _fit_rst_rst, fit_rst_pts or fit_pts_pts depending on input and method availability. + """Distribute to _fit_rst_rst, fit_rst_pts or fit_pts_pts depending on input and method availability. Needs to be _fit_func of the main class to simplify calls from CoregPipeline and BlockwiseCoreg. """ - # Determine if input is raster-raster, raster-point or point-point if all(isinstance(dem, np.ndarray) for dem in (kwargs["ref_elev"], kwargs["tba_elev"])): rop = "r-r" @@ -2436,7 +2377,7 @@ def _fit_func( try: self._fit_rst_rst(**kwargs) # Otherwise, convert the tba raster to points and try raster-points - except NotImplementedCoregFit: + except NotImplementedCoregFitError: warnings.warn( f"No raster-raster method found for coregistration {self.__class__.__name__}, " f"trying raster-point method by converting to-be-aligned DEM to points.", @@ -2454,7 +2395,7 @@ def _fit_func( if rop == "r-p" or try_rp: try: self._fit_rst_pts(**kwargs) - except NotImplementedCoregFit: + except NotImplementedCoregFitError: warnings.warn( f"No raster-point method found for coregistration {self.__class__.__name__}, " f"trying point-point method by converting all elevation data to points.", @@ -2472,24 +2413,22 @@ def _fit_func( if rop == "p-p" or try_pp: try: self._fit_pts_pts(**kwargs) - except NotImplementedCoregFit: + except NotImplementedCoregFitError: if try_pp and try_rp: - raise NotImplementedCoregFit( + raise NotImplementedCoregFitError( f"No raster-raster, raster-point or point-point method found for " - f"coregistration {self.__class__.__name__}." + f"coregistration {self.__class__.__name__}.", ) - elif try_pp: - raise NotImplementedCoregFit( - f"No raster-point or point-point method found for coregistration {self.__class__.__name__}." - ) - else: - raise NotImplementedCoregFit( - f"No point-point method found for coregistration {self.__class__.__name__}." + if try_pp: + raise NotImplementedCoregFitError( + f"No raster-point or point-point method found for coregistration {self.__class__.__name__}.", ) + raise NotImplementedCoregFitError( + f"No point-point method found for coregistration {self.__class__.__name__}.", + ) def _apply_func(self, **kwargs: Any) -> tuple[NDArrayf | gpd.GeoDataFrame, affine.Affine]: """Distribute to _apply_rst and _apply_pts based on input and method availability.""" - # If input is a raster if isinstance(kwargs["elev"], np.ndarray): @@ -2499,7 +2438,7 @@ def _apply_func(self, **kwargs: Any) -> tuple[NDArrayf | gpd.GeoDataFrame, affin applied_elev, out_transform = self._apply_rst(**kwargs) # pylint: disable=assignment-from-no-return # If it doesn't exist, use apply_matrix() - except NotImplementedCoregApply: + except NotImplementedCoregApplyError: if self.is_affine: # This only works for affine, however. @@ -2508,7 +2447,7 @@ def _apply_func(self, **kwargs: Any) -> tuple[NDArrayf | gpd.GeoDataFrame, affin if not kwargs["resample"]: raise NotImplementedError( f"Option `resample=False` not supported by {self.__class__}," - f" only available for translation coregistrations such as NuthKaab." + f" only available for translation coregistrations such as NuthKaab.", ) # Apply the matrix around the centroid (if defined, otherwise just from the center). @@ -2531,7 +2470,7 @@ def _apply_func(self, **kwargs: Any) -> tuple[NDArrayf | gpd.GeoDataFrame, affin applied_elev = self._apply_pts(**kwargs) # If it doesn't exist, use apply_matrix() - except NotImplementedCoregApply: + except NotImplementedCoregApplyError: if self.is_affine: applied_elev = _apply_matrix_pts( @@ -2551,23 +2490,21 @@ def _bin_or_and_fit_nd( # type: ignore values: NDArrayf, bias_vars: None | dict[str, NDArrayf] = None, weights: None | NDArrayf = None, - **kwargs, + **kwargs, # noqa: ANN003 ) -> None: - """ - Generic binning and/or fitting method to model values along N variables for a coregistration/correction, + """Generic binning and/or fitting method to model values along N variables for a coregistration/correction, used for all affine and bias-correction subclasses. Expects either 2D arrays for rasters, or 1D arrays for points. Should only be called through subclassing. """ - # Store bias variable names from the dictionary if undefined if self._meta["inputs"]["fitorbin"]["bias_var_names"] is None: self._meta["inputs"]["fitorbin"]["bias_var_names"] = list(bias_vars.keys()) # Run the fit or bin, passing the dictionary of parameters params_fit_or_bin = self._meta["inputs"]["fitorbin"] - df, results = _bin_or_and_fit_nd( + df_binning, results = _bin_or_and_fit_nd( fit_or_bin=self._meta["inputs"]["fitorbin"]["fit_or_bin"], params_fit_or_bin=params_fit_or_bin, values=values, @@ -2603,8 +2540,8 @@ def _bin_or_and_fit_nd( # type: ignore self._meta["outputs"]["fitorbin"].update({"fit_params": params}) # Save results of binning if it was performed - elif self._meta["inputs"]["fitorbin"]["fit_or_bin"] in ["bin", "bin_and_fit"] and df is not None: - self._meta["outputs"]["fitorbin"].update({"bin_dataframe": df}) + elif self._meta["inputs"]["fitorbin"]["fit_or_bin"] in ["bin", "bin_and_fit"] and df_binning is not None: + self._meta["outputs"]["fitorbin"].update({"bin_dataframe": df_binning}) def _fit_rst_rst( self, @@ -2620,7 +2557,7 @@ def _fit_rst_rst( **kwargs: Any, ) -> None: # FOR DEVELOPERS: This function needs to be implemented by subclassing. - raise NotImplementedCoregFit("This step has to be implemented by subclassing.") + raise NotImplementedCoregFitError("This step has to be implemented by subclassing.") def _fit_rst_pts( self, @@ -2636,7 +2573,7 @@ def _fit_rst_pts( **kwargs: Any, ) -> None: # FOR DEVELOPERS: This function needs to be implemented by subclassing. - raise NotImplementedCoregFit("This step has to be implemented by subclassing.") + raise NotImplementedCoregFitError("This step has to be implemented by subclassing.") def _fit_pts_pts( self, @@ -2651,7 +2588,7 @@ def _fit_pts_pts( **kwargs: Any, ) -> None: # FOR DEVELOPERS: This function needs to be implemented by subclassing. - raise NotImplementedCoregFit("This step has to be implemented by subclassing.") + raise NotImplementedCoregFitError("This step has to be implemented by subclassing.") def _apply_rst( self, @@ -2663,7 +2600,7 @@ def _apply_rst( ) -> tuple[NDArrayf, rio.transform.Affine]: # FOR DEVELOPERS: This function needs to be implemented by subclassing. - raise NotImplementedCoregApply("This should have been implemented by subclassing.") + raise NotImplementedCoregApplyError("This should have been implemented by subclassing.") def _apply_pts( self, @@ -2674,17 +2611,14 @@ def _apply_pts( ) -> gpd.GeoDataFrame: # FOR DEVELOPERS: This function needs to be implemented by subclassing. - raise NotImplementedCoregApply("This should have been implemented by subclassing.") + raise NotImplementedCoregApplyError("This should have been implemented by subclassing.") class CoregPipeline(Coreg): - """ - A sequential set of co-registration processing steps. - """ + """A sequential set of co-registration processing steps.""" def __init__(self, pipeline: list[Coreg]) -> None: - """ - Instantiate a new processing pipeline. + """Instantiate a new processing pipeline. :param: Processing steps to run in the sequence they are given. """ @@ -2693,6 +2627,7 @@ def __init__(self, pipeline: list[Coreg]) -> None: super().__init__() def __repr__(self) -> str: + """Return a string representation of the pipeline.""" return f"Pipeline: {self.pipeline}" @overload @@ -2703,21 +2638,19 @@ def info(self, as_str: Literal[True]) -> str: ... def info(self, as_str: bool = False) -> None | str: """Summarize information about this coregistration.""" - # Get the pipeline information for each step as a string final_str = [] for i, step in enumerate(self.pipeline): - final_str.append(f"Pipeline step {i}:\n" f"################\n") + final_str.append(f"Pipeline step {i}:\n################\n") step_str = step.info(as_str=True) final_str.append(step_str) # Return as string or print (default) if as_str: return "".join(final_str) - else: - print("".join(final_str)) - return None + print("".join(final_str)) + return None def copy(self: CoregType) -> CoregType: """Return an identical copy of the class.""" @@ -2730,15 +2663,14 @@ def copy(self: CoregType) -> CoregType: def _parse_bias_vars(self, step: int, bias_vars: dict[str, NDArrayf] | None) -> dict[str, NDArrayf]: """Parse bias variables for a pipeline step requiring them.""" - # Get number of non-affine coregistration requiring bias variables to be passed - nb_needs_vars = sum(c._needs_vars for c in self.pipeline) + nb_needs_vars = sum(c._needs_vars for c in self.pipeline) # noqa: SLF001 # Get step object coreg = self.pipeline[step] # Check that all variable names of this were passed - var_names = coreg._meta["inputs"]["fitorbin"]["bias_var_names"] + var_names = coreg._meta["inputs"]["fitorbin"]["bias_var_names"] # noqa: SLF001 # Raise error if bias_vars is None if bias_vars is None: @@ -2747,7 +2679,7 @@ def _parse_bias_vars(self, step: int, bias_vars: dict[str, NDArrayf] | None) -> msg += ( " As you are using several bias correction steps requiring `bias_vars`, don't forget to " "explicitly define their `bias_var_names` during " - "instantiation, e.g. {}(bias_var_names=['slope']).".format(coreg.__class__.__name__) + f"instantiation, e.g. {coreg.__class__.__name__}(bias_var_names=['slope'])." ) raise ValueError(msg) @@ -2756,14 +2688,14 @@ def _parse_bias_vars(self, step: int, bias_vars: dict[str, NDArrayf] | None) -> raise ValueError( "When using several bias correction steps requiring `bias_vars` in a pipeline," "the `bias_var_names` need to be explicitly defined at each step's " - "instantiation, e.g. {}(bias_var_names=['slope']).".format(coreg.__class__.__name__) + f"instantiation, e.g. {coreg.__class__.__name__}(bias_var_names=['slope']).", ) # Raise error if the variables explicitly assigned don't match the ones passed in bias_vars - if not all(n in bias_vars.keys() for n in var_names): + if not all(n in bias_vars for n in var_names): raise ValueError( "Not all keys of `bias_vars` in .fit() match the `bias_var_names` defined during " - "instantiation of the bias correction step {}: {}.".format(coreg.__class__, var_names) + f"instantiation of the bias correction step {coreg.__class__}: {var_names}.", ) # Add subset dict for this pipeline step to args of fit and apply @@ -2785,7 +2717,7 @@ def fit( random_state: int | np.random.Generator | None = None, **kwargs: Any, ) -> CoregType: - + """Fit module.""" # Check if subsample arguments are different from their default value for any of the coreg steps: # get default value in argument spec and "subsample" stored in meta, and compare both are consistent argspec = [inspect.getfullargspec(c.__class__) for c in self.pipeline] @@ -2798,7 +2730,7 @@ def fit( warnings.warn( "Subsample argument passed to fit() will override non-default subsample values defined for" " individual steps of the pipeline. To silence this warning: only define 'subsample' in " - "either fit(subsample=...) or instantiation e.g., VerticalShift(subsample=...)." + "either fit(subsample=...) or instantiation e.g., VerticalShift(subsample=...).", ) # Filter warnings of individual pipelines now that the one above was raised warnings.filterwarnings("ignore", message="Subsample argument passed to*", category=UserWarning) @@ -2834,7 +2766,7 @@ def fit( main_args_apply = {"elev": tba_dem_mod, "transform": out_transform, "crs": crs, "z_name": z_name} # If non-affine method that expects a bias_vars argument - if coreg._needs_vars: + if coreg._needs_vars: # noqa: SLF001 step_bias_vars = self._parse_bias_vars(step=i, bias_vars=bias_vars) main_args_fit.update({"bias_vars": step_bias_vars}) @@ -2894,7 +2826,7 @@ def apply( z_name: str = "z", **kwargs: Any, ) -> RasterType | gpd.GeoDataFrame: ... - + """.""" # Need to override base Coreg method to work on pipeline steps def apply( self, @@ -2907,7 +2839,7 @@ def apply( z_name: str = "z", **kwargs: Any, ) -> RasterType | gpd.GeoDataFrame | tuple[NDArrayf, rio.transform.Affine] | tuple[MArrayf, rio.transform.Affine]: - + """.""" # First step and preprocessing if not self._fit_called and self._meta["outputs"]["affine"].get("matrix") is None: raise AssertionError(".fit() does not seem to have been called yet") @@ -2930,7 +2862,7 @@ def apply( } # If non-affine method that expects a bias_vars argument - if coreg._needs_vars: + if coreg._needs_vars: # noqa: SLF001 step_bias_vars = self._parse_bias_vars(step=i, bias_vars=bias_vars) main_args_apply.update({"bias_vars": step_bias_vars}) @@ -2954,8 +2886,7 @@ def apply( # Only return object if raster or geodataframe, also return transform if object was an array if isinstance(applied_elev, (gu.Raster, gpd.GeoDataFrame)): return applied_elev - else: - return applied_elev, out_transform + return applied_elev, out_transform def __iter__(self) -> Generator[Coreg]: """Iterate over the pipeline steps.""" @@ -2995,8 +2926,7 @@ def _to_matrix_func(self) -> NDArrayf: class BlockwiseCoreg(Coreg): - """ - Block-wise co-registration processing class to run a step in segmented parts of the grid. + """Block-wise co-registration processing class to run a step in segmented parts of the grid. A processing class of choice is run on an arbitrary subdivision of the raster. When later applying the step the optimal warping is interpolated based on X/Y/Z shifts from the coreg algorithm at the grid points. @@ -3014,8 +2944,7 @@ def __init__( n_threads: int | None = None, warn_failures: bool = False, ) -> None: - """ - Instantiate a blockwise processing object. + """Instantiate a blockwise processing object. :param step: An instantiated co-registration step object to fit in the subdivided DEMs. :param subdivision: The number of chunks to divide the DEMs in. E.g. 4 means four different transforms. @@ -3024,8 +2953,8 @@ def __init__( :param warn_failures: Trigger or ignore warnings for each exception/warning in each block. """ if isinstance(step, type): - raise ValueError( - "The 'step' argument must be an instantiated Coreg subclass. " "Hint: write e.g. ICP() instead of ICP" + raise TypeError( + "The 'step' argument must be an instantiated Coreg subclass. Hint: write e.g. ICP() instead of ICP", ) self.procstep = step self.subdivision = subdivision @@ -3052,7 +2981,7 @@ def fit( random_state: int | np.random.Generator | None = None, **kwargs: Any, ) -> CoregType: - + """.""" if isinstance(reference_elev, gpd.GeoDataFrame) and isinstance(to_be_aligned_elev, gpd.GeoDataFrame): raise NotImplementedError("Blockwise coregistration does not yet support two elevation point cloud inputs.") @@ -3063,7 +2992,7 @@ def fit( else: steps = list(self.procstep.pipeline) argspec = [inspect.getfullargspec(s.__class__) for s in steps] - sub_meta = [s._meta["inputs"]["random"]["subsample"] for s in steps] + sub_meta = [s._meta["inputs"]["random"]["subsample"] for s in steps] # noqa: SLF001 sub_is_default = [ argspec[i].defaults[argspec[i].args.index("subsample") - 1] == sub_meta[i] # type: ignore for i in range(len(argspec)) @@ -3072,7 +3001,7 @@ def fit( warnings.warn( "Subsample argument passed to fit() will override non-default subsample values defined in the" " step within the blockwise method. To silence this warning: only define 'subsample' in " - "either fit(subsample=...) or instantiation e.g., VerticalShift(subsample=...)." + "either fit(subsample=...) or instantiation e.g., VerticalShift(subsample=...).", ) # Pre-process the inputs, by reprojecting and subsampling, without any subsampling (done in each step) @@ -3096,12 +3025,13 @@ def fit( indices = np.unique(groups) progress_bar = tqdm( - total=indices.size, desc="Processing chunks", disable=logging.getLogger().getEffectiveLevel() > logging.INFO + total=indices.size, + desc="Processing chunks", + disable=logging.getLogger().getEffectiveLevel() > logging.INFO, ) def process(i: int) -> dict[str, Any] | BaseException | None: - """ - Process a chunk in a thread-safe way. + """Process a chunk in a thread-safe way. :returns: * If it succeeds: A dictionary of the fitting metadata. @@ -3173,7 +3103,7 @@ def process(i: int) -> dict[str, Any] | BaseException | None: # Assign the closest finite value as the representative point representative_row, representative_col = finites[closest][0][0] meta["representative_x"], meta["representative_y"] = rio.transform.xy( - transform_subset, representative_row, representative_col + transform_subset, representative_row, representative_col, ) repr_val = ref_subset[representative_row, representative_col] @@ -3189,7 +3119,7 @@ def process(i: int) -> dict[str, Any] | BaseException | None: # "coreg_meta" key) # This can then be iteratively restored when the apply function should be called. meta.update( - {key: value for key, value in procstep.meta.items() if key not in ["step_meta"] + list(meta.keys())} + {key: value for key, value in procstep.meta.items() if key not in ["step_meta", list(meta.keys())]}, ) progress_bar.update() @@ -3224,7 +3154,7 @@ def process(i: int) -> dict[str, Any] | BaseException | None: + "\n".join(map(str, exceptions[:5])) + f"\n... and {len(exceptions) - 5} more" if len(exceptions) > 5 - else "" + else "", ) if self.warn_failures: @@ -3232,10 +3162,10 @@ def process(i: int) -> dict[str, Any] | BaseException | None: warnings.warn(str(exception)) # Set the _fit_called parameters (only identical copies of self.coreg have actually been called) - self.procstep._fit_called = True + self.procstep._fit_called = True # noqa: SLF001 if isinstance(self.procstep, CoregPipeline): for step in self.procstep.pipeline: - step._fit_called = True + step._fit_called = True # noqa: SLF001 # Flag that the fitting function has been called. self._fit_called = True @@ -3243,20 +3173,18 @@ def process(i: int) -> dict[str, Any] | BaseException | None: return self def _restore_metadata(self, meta: CoregDict) -> None: - """ - Given some metadata, set it in the right place. + """Given some metadata, set it in the right place. :param meta: A metadata file to update self._meta """ - self.procstep._meta.update(meta) + self.procstep._meta.update(meta) # noqa: SLF001 if isinstance(self.procstep, CoregPipeline) and "pipeline" in meta: for i, step in enumerate(self.procstep.pipeline): - step._meta.update(meta["pipeline"][i]) + step._meta.update(meta["pipeline"][i]) # noqa: SLF001 def to_points(self) -> NDArrayf: - """ - Convert the blockwise coregistration matrices to 3D (source -> destination) points. + """Convert the blockwise coregistration matrices to 3D (source -> destination) points. The returned shape is (N, 3, 2) where the dimensions represent: 0. The point index where N is equal to the amount of subdivisions. @@ -3287,7 +3215,10 @@ def to_points(self) -> NDArrayf: new_position = self.procstep.apply(old_position) new_pos_arr = np.reshape( - [new_position.geometry.x.values, new_position.geometry.y.values, new_position["z"].values], (1, 3) + [new_position.geometry.x.values, + new_position.geometry.y.values, + new_position["z"].values], + (1, 3), ) points = np.append(points, np.dstack((old_pos_arr, new_pos_arr)), axis=0) @@ -3295,8 +3226,7 @@ def to_points(self) -> NDArrayf: return points def stats(self) -> pd.DataFrame: - """ - Return statistics for each chunk in the blockwise coregistration. + """Return statistics for each chunk in the blockwise coregistration. * center_{x,y,z}: The center coordinate of the chunk in georeferenced units. * {x,y,z}_off: The calculated offset in georeferenced units. @@ -3327,7 +3257,7 @@ def stats(self) -> pd.DataFrame: "inlier_count": chunk_meta[i]["inlier_count"], "nmad": chunk_meta[i]["nmad"], "median": chunk_meta[i]["median"], - } + }, ) stats_df = pd.DataFrame(statistics) @@ -3336,8 +3266,7 @@ def stats(self) -> pd.DataFrame: return stats_df def subdivide_array(self, shape: tuple[int, ...]) -> NDArrayf: - """ - Return the grid subdivision for a given DEM shape. + """Return the grid subdivision for a given DEM shape. :param shape: The shape of the input DEM. @@ -3375,7 +3304,7 @@ def _apply_rst( [bounds.right - resolution[0] / 2, bounds.top - resolution[1] / 2, representative_height], [bounds.left + resolution[0] / 2, bounds.bottom + resolution[1] / 2, representative_height], [bounds.right - resolution[0] / 2, bounds.bottom + resolution[1] / 2, representative_height], - ] + ], ) edges_source = gpd.GeoDataFrame( geometry=gpd.points_from_xy(x=edges_source_arr[:, 0], y=edges_source_arr[:, 1], crs=None), @@ -3384,7 +3313,7 @@ def _apply_rst( edges_dest = self.apply(edges_source) edges_dest_arr = np.array( - [edges_dest.geometry.x.values, edges_dest.geometry.y.values, edges_dest["z"].values] + [edges_dest.geometry.x.values, edges_dest.geometry.y.values, edges_dest["z"].values], ).T edges = np.dstack((edges_source_arr, edges_dest_arr)) @@ -3401,14 +3330,14 @@ def _apply_rst( return warped_dem, transform def _apply_pts( - self, elev: gpd.GeoDataFrame, z_name: str = "z", bias_vars: dict[str, NDArrayf] | None = None, **kwargs: Any + self, elev: gpd.GeoDataFrame, z_name: str = "z", bias_vars: dict[str, NDArrayf] | None = None, **kwargs: Any, ) -> gpd.GeoDataFrame: """Apply the scaling model to a set of points.""" points = self.to_points() new_coords = np.array([elev.geometry.x.values, elev.geometry.y.values, elev["z"].values]).T - for dim in range(0, 3): + for dim in range(3): with warnings.catch_warnings(): # ZeroDivisionErrors may happen when the transformation is empty (which is fine) warnings.filterwarnings("ignore", message="ZeroDivisionError") @@ -3422,7 +3351,7 @@ def _apply_pts( new_coords[:, dim] += model(elev.geometry.x.values, elev.geometry.y.values) gdf_new_coords = gpd.GeoDataFrame( - geometry=gpd.points_from_xy(x=new_coords[:, 0], y=new_coords[:, 1], crs=None), data={"z": new_coords[:, 2]} + geometry=gpd.points_from_xy(x=new_coords[:, 0], y=new_coords[:, 1], crs=None), data={"z": new_coords[:, 2]}, ) return gdf_new_coords @@ -3437,8 +3366,7 @@ def warp_dem( trim_border: bool = True, dilate_mask: bool = True, ) -> NDArrayf: - """ - (22/08/24: Method currently used only for blockwise coregistration) + """(22/08/24: Method currently used only for blockwise coregistration) Warp a DEM using a set of source-destination 2D or 3D coordinates. :param dem: The DEM to warp. Allowed shapes are (1, row, col) or (row, col) @@ -3457,12 +3385,12 @@ def warp_dem( if source_coords.shape != destination_coords.shape: raise ValueError( f"Incompatible shapes: source_coords '({source_coords.shape})' and " - f"destination_coords '({destination_coords.shape})' shapes must be the same" + f"destination_coords '({destination_coords.shape})' shapes must be the same", ) if (len(source_coords.shape) > 2) or (source_coords.shape[1] < 2) or (source_coords.shape[1] > 3): raise ValueError( "Invalid coordinate shape. Expected 2D or 3D coordinates of shape (N, 2) or (N, 3). " - f"Got '{source_coords.shape}'" + f"Got '{source_coords.shape}'", ) allowed_resampling_strs = ["nearest", "linear", "cubic"] if resampling not in allowed_resampling_strs: @@ -3524,7 +3452,10 @@ def warp_dem( ) new_mask = ( skimage.transform.warp( - image=dem_mask, inverse_map=np.moveaxis(new_indices, 2, 0), output_shape=dem_arr.shape, cval=False + image=dem_mask, + inverse_map=np.moveaxis(new_indices, 2, 0), + output_shape=dem_arr.shape, + cval=False, ) > 0 ) diff --git a/xdem/coreg/biascorr.py b/xdem/coreg/biascorr.py index f16740562..553677ccc 100644 --- a/xdem/coreg/biascorr.py +++ b/xdem/coreg/biascorr.py @@ -20,7 +20,8 @@ from __future__ import annotations import logging -from typing import Any, Callable, Iterable, Literal, TypeVar +from collections.abc import Callable, Iterable +from typing import Any, Literal, TypeVar import geopandas as gpd import geoutils as gu @@ -37,8 +38,7 @@ class BiasCorr(Coreg): - """ - Bias-correction (non-rigid alignment) simultaneously with any number and type of variables. + """Bias-correction (non-rigid alignment) simultaneously with any number and type of variables. Variables for bias-correction can include the elevation coordinates (deramping, directional biases), terrain attributes (terrain corrections), or any other user-input variable (quality metrics, land cover). @@ -48,19 +48,18 @@ class BiasCorr(Coreg): def __init__( self, - fit_or_bin: Literal["bin_and_fit"] | Literal["fit"] | Literal["bin"] = "fit", + fit_or_bin: Literal["bin_and_fit", "fit", "bin"] = "fit", fit_func: ( - Callable[..., NDArrayf] | Literal["norder_polynomial"] | Literal["nfreq_sumsin"] + Literal["norder_polynomial", "nfreq_sumsin"] | Callable[..., NDArrayf] ) = "norder_polynomial", fit_optimizer: Callable[..., tuple[NDArrayf, Any]] = scipy.optimize.curve_fit, bin_sizes: int | dict[str, int | Iterable[float]] = 10, bin_statistic: Callable[[NDArrayf], np.floating[Any]] = np.nanmedian, - bin_apply_method: Literal["linear"] | Literal["per_bin"] = "linear", - bias_var_names: Iterable[str] = None, + bin_apply_method: Literal["linear", "per_bin"] = "linear", + bias_var_names: Iterable[str] | None = None, subsample: float | int = 1.0, - ): - """ - Instantiate an N-dimensional bias correction using binning, fitting or both sequentially. + ) -> None: + """Instantiate an N-dimensional bias correction using binning, fitting or both sequentially. All fit arguments apply to "fit" and "bin_and_fit", and bin arguments to "bin" and "bin_and_fit". @@ -82,18 +81,19 @@ def __init__( if fit_or_bin in ["fit", "bin_and_fit"]: # Check input types for "fit" to raise user-friendly errors - if not (callable(fit_func) or (isinstance(fit_func, str) and fit_func in fit_workflows.keys())): - raise TypeError( - "Argument `fit_func` must be a function (callable) " - "or the string '{}', got {}.".format("', '".join(fit_workflows.keys()), type(fit_func)) + if not (callable(fit_func) or (isinstance(fit_func, str) and fit_func in fit_workflows)): + msg = ( + "Argument `fit_func` must be a function (callable) " + "or the string '{}', got {}.".format("', '".join(fit_workflows.keys()), type(fit_func)) ) + raise TypeError(msg) if not callable(fit_optimizer): raise TypeError( - "Argument `fit_optimizer` must be a function (callable), " "got {}.".format(type(fit_optimizer)) + f"Argument `fit_optimizer` must be a function (callable), got {type(fit_optimizer)}.", ) # If a workflow was called, override optimizer and pass proper function - if isinstance(fit_func, str) and fit_func in fit_workflows.keys(): + if isinstance(fit_func, str) and fit_func in fit_workflows: # Looks like a typing bug here, see: https://github.com/python/mypy/issues/10740 fit_optimizer = fit_workflows[fit_func]["optimizer"] # type: ignore fit_func = fit_workflows[fit_func]["func"] # type: ignore @@ -107,18 +107,18 @@ def __init__( ): raise TypeError( "Argument `bin_sizes` must be an integer, or a dictionary of integers or iterables, " - "got {}.".format(type(bin_sizes)) + f"got {type(bin_sizes)}.", ) if not callable(bin_statistic): raise TypeError( - "Argument `bin_statistic` must be a function (callable), " "got {}.".format(type(bin_statistic)) + f"Argument `bin_statistic` must be a function (callable), got {type(bin_statistic)}.", ) if not isinstance(bin_apply_method, str): raise TypeError( "Argument `bin_apply_method` must be the string 'linear' or 'per_bin', " - "got {}.".format(type(bin_apply_method)) + f"got {type(bin_apply_method)}.", ) list_bias_var_names = list(bias_var_names) if bias_var_names is not None else None @@ -174,10 +174,9 @@ def _fit_rst_rst_and_rst_pts( # type: ignore z_name: str, bias_vars: None | dict[str, NDArrayf] = None, weights: None | NDArrayf = None, - **kwargs, + **kwargs, # noqa: ANN003 ) -> None: """Function for fitting raster-raster and raster-point for bias correction methods.""" - # Pre-process raster-point input sub_ref, sub_tba, sub_bias_vars = self._preprocess_rst_pts_subsample( ref_elev=ref_elev, @@ -215,7 +214,6 @@ def _fit_rst_rst( **kwargs: Any, ) -> None: """Called by other classes""" - self._fit_rst_rst_and_rst_pts( ref_elev=ref_elev, tba_elev=tba_elev, @@ -243,7 +241,6 @@ def _fit_rst_pts( **kwargs: Any, ) -> None: """Called by other classes""" - self._fit_rst_rst_and_rst_pts( ref_elev=ref_elev, tba_elev=tba_elev, @@ -270,39 +267,39 @@ def _apply_rst( # type: ignore raise ValueError("At least one `bias_var` should be passed to the `apply` function, got None.") # Check the bias_vars passed match the ones stored for this bias correction class - if not sorted(bias_vars.keys()) == sorted(self._meta["inputs"]["fitorbin"]["bias_var_names"]): - raise ValueError( + if sorted(bias_vars.keys()) != sorted(self._meta["inputs"]["fitorbin"]["bias_var_names"]): + msg = ( "The keys of `bias_vars` do not match the `bias_var_names` defined during " "instantiation or fitting: {}.".format(self._meta["inputs"]["fitorbin"]["bias_var_names"]) ) + raise ValueError(msg) # Apply function to get correction (including if binning was done before) if self.meta["inputs"]["fitorbin"]["fit_or_bin"] in ["fit", "bin_and_fit"]: corr = self._meta["inputs"]["fitorbin"]["fit_func"]( - tuple(bias_vars.values()), *self._meta["outputs"]["fitorbin"]["fit_params"] + tuple(bias_vars.values()), *self._meta["outputs"]["fitorbin"]["fit_params"], ) # Apply binning to get correction - else: - if self._meta["inputs"]["fitorbin"]["bin_apply_method"] == "linear": - # N-D interpolation of binning - bin_interpolator = xdem.spatialstats.interp_nd_binning( - df=self._meta["outputs"]["fitorbin"]["bin_dataframe"], - list_var_names=list(bias_vars.keys()), - statistic=self._meta["inputs"]["fitorbin"]["bin_statistic"], - ) - corr = bin_interpolator(tuple(var.flatten() for var in bias_vars.values())) - first_var = list(bias_vars.keys())[0] - corr = corr.reshape(np.shape(bias_vars[first_var])) + elif self._meta["inputs"]["fitorbin"]["bin_apply_method"] == "linear": + # N-D interpolation of binning + bin_interpolator = xdem.spatialstats.interp_nd_binning( + df=self._meta["outputs"]["fitorbin"]["bin_dataframe"], + list_var_names=list(bias_vars.keys()), + statistic=self._meta["inputs"]["fitorbin"]["bin_statistic"], + ) + corr = bin_interpolator(tuple(var.flatten() for var in bias_vars.values())) + first_var = next(iter(bias_vars.keys())) + corr = corr.reshape(np.shape(bias_vars[first_var])) - else: - # Get N-D binning statistic for each pixel of the new list of variables - corr = xdem.spatialstats.get_perbin_nd_binning( - df=self._meta["outputs"]["fitorbin"]["bin_dataframe"], - list_var=list(bias_vars.values()), - list_var_names=list(bias_vars.keys()), - statistic=self._meta["inputs"]["fitorbin"]["bin_statistic"], - ) + else: + # Get N-D binning statistic for each pixel of the new list of variables + corr = xdem.spatialstats.get_perbin_nd_binning( + df=self._meta["outputs"]["fitorbin"]["bin_dataframe"], + list_var=list(bias_vars.values()), + list_var_names=list(bias_vars.keys()), + statistic=self._meta["inputs"]["fitorbin"]["bin_statistic"], + ) dem_corr = elev + corr @@ -310,8 +307,7 @@ def _apply_rst( # type: ignore class DirectionalBias(BiasCorr): - """ - Bias correction for directional biases, for example along- or across-track of satellite angle. + """Bias correction for directional biases, for example along- or across-track of satellite angle. The binning and/or fitting correction parameters are stored in the `self.meta["outputs"]["fitorbin"]`. """ @@ -319,16 +315,15 @@ class DirectionalBias(BiasCorr): def __init__( self, angle: float = 0, - fit_or_bin: Literal["bin_and_fit"] | Literal["fit"] | Literal["bin"] = "bin_and_fit", - fit_func: Callable[..., NDArrayf] | Literal["norder_polynomial"] | Literal["nfreq_sumsin"] = "nfreq_sumsin", + fit_or_bin: Literal["bin_and_fit", "fit", "bin"] = "bin_and_fit", + fit_func: Literal["norder_polynomial", "nfreq_sumsin"] | Callable[..., NDArrayf] = "nfreq_sumsin", fit_optimizer: Callable[..., tuple[NDArrayf, Any]] = scipy.optimize.curve_fit, bin_sizes: int | dict[str, int | Iterable[float]] = 100, bin_statistic: Callable[[NDArrayf], np.floating[Any]] = np.nanmedian, - bin_apply_method: Literal["linear"] | Literal["per_bin"] = "linear", + bin_apply_method: Literal["linear", "per_bin"] = "linear", subsample: float | int = 1.0, - ): - """ - Instantiate a directional bias correction. + ) -> None: + """Instantiate a directional bias correction. :param angle: Angle in which to perform the directional correction (degrees) with 0° corresponding to X axis direction and increasing clockwise. @@ -344,7 +339,7 @@ def __init__( :param subsample: Subsample the input for speed-up. <1 is parsed as a fraction. >1 is a pixel count. """ super().__init__( - fit_or_bin, fit_func, fit_optimizer, bin_sizes, bin_statistic, bin_apply_method, ["angle"], subsample + fit_or_bin, fit_func, fit_optimizer, bin_sizes, bin_statistic, bin_apply_method, ["angle"], subsample, ) self._meta["inputs"]["specific"]["angle"] = angle self._needs_vars = False @@ -358,9 +353,9 @@ def _fit_rst_rst( # type: ignore crs: rio.crs.CRS, area_or_point: Literal["Area", "Point"] | None, z_name: str, - bias_vars: dict[str, NDArrayf] = None, + bias_vars: dict[str, NDArrayf] | None = None, weights: None | NDArrayf = None, - **kwargs, + **kwargs, # noqa: ANN003 ) -> None: logging.info("Estimating rotated coordinates.") @@ -392,9 +387,9 @@ def _fit_rst_pts( # type: ignore crs: rio.crs.CRS, area_or_point: Literal["Area", "Point"] | None, z_name: str, - bias_vars: dict[str, NDArrayf] = None, + bias_vars: dict[str, NDArrayf] | None = None, weights: None | NDArrayf = None, - **kwargs, + **kwargs, # noqa: ANN003 ) -> None: # Figure out which data is raster format to get gridded attributes @@ -445,8 +440,7 @@ def _apply_rst( class TerrainBias(BiasCorr): - """ - Correct a bias according to terrain, such as elevation or curvature. + """Correct a bias according to terrain, such as elevation or curvature. With elevation: often useful for nadir image DEM correction, where the focal length is slightly miscalculated. With curvature: often useful for a difference of DEMs with different effective resolution. @@ -461,18 +455,17 @@ class TerrainBias(BiasCorr): def __init__( self, terrain_attribute: str = "maximum_curvature", - fit_or_bin: Literal["bin_and_fit"] | Literal["fit"] | Literal["bin"] = "bin", + fit_or_bin: Literal["bin_and_fit", "fit", "bin"] = "bin", fit_func: ( - Callable[..., NDArrayf] | Literal["norder_polynomial"] | Literal["nfreq_sumsin"] + Literal["norder_polynomial", "nfreq_sumsin"] | Callable[..., NDArrayf] ) = "norder_polynomial", fit_optimizer: Callable[..., tuple[NDArrayf, Any]] = scipy.optimize.curve_fit, bin_sizes: int | dict[str, int | Iterable[float]] = 100, bin_statistic: Callable[[NDArrayf], np.floating[Any]] = np.nanmedian, - bin_apply_method: Literal["linear"] | Literal["per_bin"] = "linear", + bin_apply_method: Literal["linear", "per_bin"] = "linear", subsample: float | int = 1.0, - ): - """ - Instantiate a terrain bias correction. + ) -> None: + """Instantiate a terrain bias correction. :param terrain_attribute: Terrain attribute to use for correction. :param fit_or_bin: Whether to fit or bin, or both. Use "fit" to correct by optimizing a function or @@ -486,7 +479,6 @@ def __init__( between bins, or "per_bin" to apply the statistic for each bin. :param subsample: Subsample the input for speed-up. <1 is parsed as a fraction. >1 is a pixel count. """ - super().__init__( fit_or_bin, fit_func, @@ -510,9 +502,9 @@ def _fit_rst_rst( # type: ignore crs: rio.crs.CRS, area_or_point: Literal["Area", "Point"] | None, z_name: str, - bias_vars: dict[str, NDArrayf] = None, + bias_vars: dict[str, NDArrayf] | None = None, weights: None | NDArrayf = None, - **kwargs, + **kwargs, # noqa: ANN003 ) -> None: # If already passed by user, pass along @@ -520,16 +512,15 @@ def _fit_rst_rst( # type: ignore attr = bias_vars[self._meta["inputs"]["specific"]["terrain_attribute"]] # If only declared during instantiation + # Derive terrain attribute + elif self._meta["inputs"]["specific"]["terrain_attribute"] == "elevation": + attr = ref_elev else: - # Derive terrain attribute - if self._meta["inputs"]["specific"]["terrain_attribute"] == "elevation": - attr = ref_elev - else: - attr = xdem.terrain.get_terrain_attribute( - dem=ref_elev, - attribute=self._meta["inputs"]["specific"]["terrain_attribute"], - resolution=(transform[0], abs(transform[4])), - ) + attr = xdem.terrain.get_terrain_attribute( + dem=ref_elev, + attribute=self._meta["inputs"]["specific"]["terrain_attribute"], + resolution=(transform[0], abs(transform[4])), + ) # Run the parent function super()._fit_rst_rst_and_rst_pts( @@ -554,9 +545,9 @@ def _fit_rst_pts( # type: ignore crs: rio.crs.CRS, area_or_point: Literal["Area", "Point"] | None, z_name: str, - bias_vars: dict[str, NDArrayf] = None, + bias_vars: dict[str, NDArrayf] | None = None, weights: None | NDArrayf = None, - **kwargs, + **kwargs, # noqa: ANN003 ) -> None: # If already passed by user, pass along @@ -617,8 +608,7 @@ def _apply_rst( class Deramp(BiasCorr): - """ - Correct for a 2D polynomial along X/Y coordinates, for example from residual camera model deformations + """Correct for a 2D polynomial along X/Y coordinates, for example from residual camera model deformations (dome-like errors) or tilts (rotational errors). The correction parameters are stored in the `self.meta["outputs"]["fitorbin"]` key "fit_params", that can be passed @@ -628,16 +618,15 @@ class Deramp(BiasCorr): def __init__( self, poly_order: int = 2, - fit_or_bin: Literal["bin_and_fit"] | Literal["fit"] | Literal["bin"] = "fit", + fit_or_bin: Literal["bin_and_fit", "fit", "bin"] = "fit", fit_func: Callable[..., NDArrayf] = polynomial_2d, fit_optimizer: Callable[..., tuple[NDArrayf, Any]] = scipy.optimize.curve_fit, bin_sizes: int | dict[str, int | Iterable[float]] = 10, bin_statistic: Callable[[NDArrayf], np.floating[Any]] = np.nanmedian, - bin_apply_method: Literal["linear"] | Literal["per_bin"] = "linear", + bin_apply_method: Literal["linear", "per_bin"] = "linear", subsample: float | int = 5e5, - ): - """ - Instantiate a directional bias correction. + ) -> None: + """Instantiate a directional bias correction. :param poly_order: Order of the 2D polynomial to fit. :param fit_or_bin: Whether to fit or bin, or both. Use "fit" to correct by optimizing a function or @@ -675,7 +664,7 @@ def _fit_rst_rst( # type: ignore z_name: str, bias_vars: dict[str, NDArrayf] | None = None, weights: None | NDArrayf = None, - **kwargs, + **kwargs, # noqa: ANN003 ) -> None: # The number of parameters in the first guess defines the polynomial order when calling np.polyval2d @@ -709,7 +698,7 @@ def _fit_rst_pts( # type: ignore z_name: str, bias_vars: dict[str, NDArrayf] | None = None, weights: None | NDArrayf = None, - **kwargs, + **kwargs, # noqa: ANN003 ) -> None: # Figure out which data is raster format to get gridded attributes diff --git a/xdem/coreg/workflows.py b/xdem/coreg/workflows.py index 4ec4b3905..282e805bf 100644 --- a/xdem/coreg/workflows.py +++ b/xdem/coreg/workflows.py @@ -49,8 +49,7 @@ def create_inlier_mask( nmad_factor: Number = 5, slope_lim: list[Number] | tuple[Number, Number] = (0.1, 40), ) -> NDArrayf: - """ - Create a mask of inliers pixels to be used for coregistration. The following pixels can be excluded: + """Create a mask of inliers pixels to be used for coregistration. The following pixels can be excluded: - pixels within polygons of file(s) in shp_list (with corresponding inout element set to 1) - useful for \ masking unstable terrain like glaciers. - pixels outside polygons of file(s) in shp_list (with corresponding inout element set to -1) - useful to \ @@ -76,14 +75,14 @@ def create_inlier_mask( # - Sanity check on inputs - # # Check correct input type of shp_list if not isinstance(shp_list, (list, tuple)): - raise ValueError("Argument `shp_list` must be a list/tuple.") + raise TypeError("Argument `shp_list` must be a list/tuple.") for el in shp_list: if not isinstance(el, (str, gu.Vector)): - raise ValueError("Argument `shp_list` must be a list/tuple of strings or geoutils.Vector instance.") + raise TypeError("Argument `shp_list` must be a list/tuple of strings or geoutils.Vector instance.") # Check correct input type of inout if not isinstance(inout, (list, tuple)): - raise ValueError("Argument `inout` must be a list/tuple.") + raise TypeError("Argument `inout` must be a list/tuple.") if len(shp_list) > 0: if len(inout) == 0: @@ -99,7 +98,7 @@ def create_inlier_mask( # Check slope_lim type if not isinstance(slope_lim, (list, tuple)): - raise ValueError("Argument `slope_lim` must be a list/tuple.") + raise TypeError("Argument `slope_lim` must be a list/tuple.") if len(slope_lim) != 2: raise ValueError("Argument `slope_lim` must contain 2 elements.") for el in slope_lim: @@ -161,11 +160,10 @@ def dem_coregistration( slope_lim: list[Number] | tuple[Number, Number] = (0.1, 40), random_state: int | np.random.Generator | None = None, plot: bool = False, - out_fig: str = None, + out_fig: str | None = None, estimated_initial_shift: list[Number] | tuple[Number, Number] | None = None, ) -> tuple[DEM, Coreg | CoregPipeline, pd.DataFrame, NDArrayf]: - """ - A one-line function to coregister a selected DEM to a reference DEM. + """A one-line function to coregister a selected DEM to a reference DEM. Reads both DEMs, reprojects them on the same grid, mask pixels based on shapefile(s), filter steep slopes and \ outliers, run the coregistration, returns the coregistered DEM and some statistics. @@ -198,29 +196,28 @@ def dem_coregistration( 3) DataFrame of coregistration statistics (count of obs, median and NMAD over stable terrain) before and after \ coregistration and 4) the inlier_mask used. """ - # Define default Coreg if None is passed if coreg_method is None: coreg_method = NuthKaab() + VerticalShift() # Check inputs if not isinstance(coreg_method, Coreg): - raise ValueError("Argument `coreg_method` must be an xdem.coreg instance (e.g. xdem.coreg.NuthKaab()).") + raise TypeError("Argument `coreg_method` must be an xdem.coreg instance (e.g. xdem.coreg.NuthKaab()).") if isinstance(ref_dem_path, str): if not isinstance(src_dem_path, str): - raise ValueError( + raise TypeError( f"Argument `ref_dem_path` is string but `src_dem_path` has type {type(src_dem_path)}." - "Both must have same type." + "Both must have same type.", ) elif isinstance(ref_dem_path, gu.Raster): if not isinstance(src_dem_path, gu.Raster): - raise ValueError( + raise TypeError( f"Argument `ref_dem_path` is of Raster type but `src_dem_path` has type {type(src_dem_path)}." - "Both must have same type." + "Both must have same type.", ) else: - raise ValueError("Argument `ref_dem_path` must be either a string or a Raster.") + raise TypeError("Argument `ref_dem_path` must be either a string or a Raster.") if grid not in ["ref", "src"]: raise ValueError(f"Argument `grid` must be either 'ref' or 'src' - currently set to {grid}.") @@ -233,18 +230,18 @@ def dem_coregistration( and all(isinstance(val, (float, int)) for val in estimated_initial_shift) ): raise ValueError( - "Argument `estimated_initial_shift` must be a list or tuple of exactly two numerical values." + "Argument `estimated_initial_shift` must be a list or tuple of exactly two numerical values.", ) if isinstance(coreg_method, CoregPipeline): if not any(isinstance(step, AffineCoreg) for step in coreg_method.pipeline): raise TypeError( "An initial shift has been provided, but none of the coregistration methods in the pipeline " - "are affine. At least one affine coregistration method (e.g., AffineCoreg) is required." + "are affine. At least one affine coregistration method (e.g., AffineCoreg) is required.", ) elif not isinstance(coreg_method, AffineCoreg): raise TypeError( "An initial shift has been provided, but the coregistration method is not affine. " - "An affine coregistration method (e.g., AffineCoreg) is required." + "An affine coregistration method (e.g., AffineCoreg) is required.", ) # Load both DEMs @@ -307,20 +304,19 @@ def dem_coregistration( if estimated_initial_shift: def update_shift( - coreg_method: Coreg | CoregPipeline, shift_x: float = shift_x, shift_y: float = shift_y + coreg_method: Coreg | CoregPipeline, shift_x: float = shift_x, shift_y: float = shift_y, ) -> None: if isinstance(coreg_method, CoregPipeline): for step in coreg_method.pipeline: update_shift(step) - else: - # check if the keys exists - if "outputs" in coreg_method.meta and "affine" in coreg_method.meta["outputs"]: - if "shift_x" in coreg_method.meta["outputs"]["affine"]: - coreg_method.meta["outputs"]["affine"]["shift_x"] += shift_x - logging.debug(f"Updated shift_x by {shift_x} in {coreg_method}") - if "shift_y" in coreg_method.meta["outputs"]["affine"]: - coreg_method.meta["outputs"]["affine"]["shift_y"] += shift_y - logging.debug(f"Updated shift_y by {shift_y} in {coreg_method}") + # check if the keys exists + elif "outputs" in coreg_method.meta and "affine" in coreg_method.meta["outputs"]: + if "shift_x" in coreg_method.meta["outputs"]["affine"]: + coreg_method.meta["outputs"]["affine"]["shift_x"] += shift_x + logging.debug("Updated shift_x by %s in %s", shift_x, coreg_method) + if "shift_y" in coreg_method.meta["outputs"]["affine"]: + coreg_method.meta["outputs"]["affine"]["shift_y"] += shift_y + logging.debug("Updated shift_y by %s in %s", shift_y, coreg_method) update_shift(coreg_method) @@ -350,7 +346,7 @@ def update_shift( cb = plt.colorbar() cb.set_label("Elevation change (m)") ax2.set_title( - f"After coreg\n\n\nmean = {mean_coreg:.1f} m - med = {med_coreg:.1f} m - NMAD = {nmad_coreg:.1f} m" + f"After coreg\n\n\nmean = {mean_coreg:.1f} m - med = {med_coreg:.1f} m - NMAD = {nmad_coreg:.1f} m", ) plt.tight_layout() diff --git a/xdem/ddem.py b/xdem/ddem.py index d842d57e3..501e1613c 100644 --- a/xdem/ddem.py +++ b/xdem/ddem.py @@ -37,8 +37,7 @@ def _mask_as_array(reference_raster: gu.Raster, mask: str | gu.Vector | gu.Raster) -> NDArrayf: - """ - Convert a given mask into an array. + """Convert a given mask into an array. :param reference_raster: The raster to use for rasterizing the mask if the mask is a vector. :param mask: A valid Vector, Raster or a respective filepath to a mask. @@ -67,29 +66,34 @@ def _mask_as_array(reference_raster: gu.Raster, mask: str | gu.Vector | gu.Raste mask_array = mask.create_mask(reference_raster, as_array=True) elif isinstance(mask, gu.Raster): # The true value is the maximum value in the raster, unless the maximum value is 0 or False - true_value = np.nanmax(mask.data) if not np.nanmax(mask.data) in [0, False] else True + true_value = np.nanmax(mask.data) if np.nanmax(mask.data) not in [0, False] else True mask_array = (mask.data == true_value).squeeze() else: raise TypeError( - f"Mask has invalid type: {type(mask)}. Expected one of: " f"{[gu.Raster, gu.Vector, str, type(None)]}" + f"Mask has invalid type: {type(mask)}. Expected one of: {[gu.Raster, gu.Vector, str, type(None)]}", ) return mask_array -class dDEM(Raster): # type: ignore +class dDem(Raster): # noqa: N801 """A difference-DEM object.""" - def __init__(self, raster: gu.Raster, start_time: np.datetime64, end_time: np.datetime64, error: Any | None = None): - """ - Create a dDEM object from a Raster. + def __init__( + self, + raster: gu.Raster, + start_time: np.datetime64, + end_time: np.datetime64, + error: Any | None = None, + ) -> None: + """Create a dDem object from a Raster. :param raster: A georeferenced Raster object. - :param start_time: The starting time of the dDEM. - :param end_time: The end time of the dDEM. - :param error: An error measure for the dDEM (UNUSED). + :param start_time: The starting time of the dDem. + :param end_time: The end time of the dDem. + :param error: An error measure for the dDem (UNUSED). - :returns: A new dDEM instance. + :returns: A new dDem instance. """ # super().__init__(raster) @@ -101,22 +105,20 @@ def __init__(self, raster: gu.Raster, start_time: np.datetime64, end_time: np.da self._fill_method = "" def __str__(self) -> str: - """Return a summary of the dDEM.""" - return f"dDEM from {self.start_time} to {self.end_time}.\n\n{super().__str__()}" + """Return a summary of the dDem.""" + return f"dDem from {self.start_time} to {self.end_time}.\n\n{super().__str__()}" - def copy(self, new_array: NDArrayf = None) -> dDEM: + def copy(self, new_array: NDArrayf = None) -> dDem: """Return a copy of the DEM.""" - if new_array is None: new_array = self.data.copy() - new_ddem = dDEM.from_array(new_array, self.transform, self.crs, self.start_time, self.end_time) + new_ddem = dDem.from_array(new_array, self.transform, self.crs, self.start_time, self.end_time) return new_ddem @property def filled_data(self) -> NDArrayf | None: - """ - Get the filled data array if it exists, or else the original data if it has no nans. + """Get the filled data array if it exists, or else the original data if it has no nans. Returns None if the filled_data array does not exist, and the original data has nans. @@ -132,7 +134,6 @@ def filled_data(self) -> NDArrayf | None: @filled_data.setter def filled_data(self, array: NDArrayf) -> None: """Set the filled_data attribute and make sure that it is valid.""" - assert ( self.data.size == array.size ), f"Array shape '{array.shape}' differs from the data shape '{self.data.shape}'" @@ -158,20 +159,19 @@ def from_array( start_time: np.datetime64, end_time: np.datetime64, nodata: int | float | None = None, - error: float = None, - ) -> dDEM: # type: ignore - """ - Create a new dDEM object from an array. + error: float | None = None, + ) -> dDem: # type: ignore + """Create a new dDem object from an array. - :param data: The dDEM data array. + :param data: The dDem data array. :param transform: A geometric transform. - :param crs: The coordinate reference system of the dDEM. - :param start_time: The starting time of the dDEM. - :param end_time: The end time of the dDEM. - :param error: An error measure for the dDEM. + :param crs: The coordinate reference system of the dDem. + :param start_time: The starting time of the dDem. + :param end_time: The end time of the dDem. + :param error: An error measure for the dDem. :param nodata: The nodata value. - :returns: A new dDEM instance. + :returns: A new dDem instance. """ return cls( gu.Raster.from_array(data=data, transform=transform, crs=crs, nodata=nodata), @@ -186,8 +186,7 @@ def interpolate( reference_elevation: NDArrayf | np.ma.masked_array[Any, np.dtype[np.floating[Any]]] | xdem.DEM = None, mask: NDArrayf | xdem.DEM | gu.Vector = None, ) -> NDArrayf | None: - """ - Interpolate the dDEM using the given method. + """Interpolate the dDem using the given method. :param method: The method to use for interpolation. :param reference_elevation: Reference DEM. Only required for hypsometric approaches. @@ -222,7 +221,7 @@ def interpolate( ddem_mask = nans.copy().squeeze() for i in entries.index: feature_mask = (gu.Vector(entries.loc[entries.index == i]).create_mask(self, as_array=True)).reshape( - interpolated_ddem.shape + interpolated_ddem.shape, ) if np.count_nonzero(feature_mask) == 0: continue @@ -232,8 +231,8 @@ def interpolate( warnings.filterwarnings("ignore", "invalid value encountered in divide") interpolated_ddem = np.asarray( xdem.volume.hypsometric_interpolation( - interpolated_ddem, reference_elevation.data, mask=feature_mask - ) + interpolated_ddem, reference_elevation.data, mask=feature_mask, + ), ) except ValueError as exception: # Skip the feature if too few glacier values exist. @@ -244,7 +243,7 @@ def interpolate( ddem_mask[feature_mask] = False # All values that were nan in the start and are without the updated validity mask should now be nan - # The above interpolates values outside of the dDEM, so this is necessary. + # The above interpolates values outside of the dDem, so this is necessary. interpolated_ddem[ddem_mask] = np.nan diff = abs(np.nanmean(interpolated_ddem - self.data)) @@ -259,7 +258,7 @@ def interpolate( mask_array = _mask_as_array(self, mask).reshape(self.data.shape) self.filled_data = xdem.volume.hypsometric_interpolation( - self.data, reference_elevation.data, mask=mask_array + self.data, reference_elevation.data, mask=mask_array, ).data else: diff --git a/xdem/dem.py b/xdem/dem.py index 7d6e6f26c..e75c1b74e 100644 --- a/xdem/dem.py +++ b/xdem/dem.py @@ -16,12 +16,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""This module defines the DEM class.""" +"""The module defines the DEM class.""" from __future__ import annotations import pathlib import warnings -from typing import Any, Callable, Literal, overload +from collections.abc import Callable +from typing import Any, Literal, overload import geopandas as gpd import numpy as np @@ -54,8 +55,7 @@ class DEM(Raster): # type: ignore - """ - The digital elevation model. + """The digital elevation model. The DEM has a single main attribute in addition to that inherited from :class:`geoutils.Raster`: vcrs: :class:`pyproj.VerticalCRS` @@ -93,8 +93,7 @@ def __init__( downsample: int = 1, nodata: int | float | None = None, ) -> None: - """ - Instantiate a digital elevation model. + """Instantiate a digital elevation model. The vertical reference of the DEM can be defined by passing the `vcrs` argument. Otherwise, a vertical reference is tentatively parsed from the DEM product name. @@ -110,7 +109,6 @@ def __init__( :param downsample: Downsample the array once loaded by a round factor. Default is no downsampling. :param nodata: Nodata value to be used (overwrites the metadata). Default reads from metadata. """ - self.data: NDArrayf self._vcrs: VerticalCRS | Literal["Ellipsoid"] | None = None self._vcrs_name: str | None = None @@ -122,23 +120,22 @@ def __init__( setattr(self, key, filename_or_dataset.__dict__[key]) return # Else rely on parent Raster class options (including raised errors) - else: - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", message="Parse metadata from file not implemented") - super().__init__( - filename_or_dataset, - load_data=load_data, - parse_sensor_metadata=parse_sensor_metadata, - silent=silent, - downsample=downsample, - nodata=nodata, - ) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", message="Parse metadata from file not implemented") + super().__init__( + filename_or_dataset, + load_data=load_data, + parse_sensor_metadata=parse_sensor_metadata, + silent=silent, + downsample=downsample, + nodata=nodata, + ) # Ensure DEM has only one band: self.bands can be None when data is not loaded through the Raster class if self.bands is not None and len(self.bands) > 1: raise ValueError( "DEM rasters should be composed of one band only. Either use argument `bands` to specify " - "a single band on opening, or use .split_bands() on an opened raster." + "a single band on opening, or use .split_bands() on an opened raster.", ) # If the CRS in the raster metadata has a 3rd dimension, could set it as a vertical reference @@ -149,10 +146,10 @@ def __init__( if vcrs is not None: # Raise a warning if the two are not the same vcrs_user = _vcrs_from_user_input(vcrs) - if not vcrs_from_crs == vcrs_user: + if vcrs_from_crs != vcrs_user: warnings.warn( "The CRS in the raster metadata already has a vertical component, " - "the user-input '{}' will override it.".format(vcrs) + f"the user-input '{vcrs}' will override it.", ) # Otherwise, use the one from the raster 3D CRS else: @@ -167,14 +164,12 @@ def __init__( self.set_vcrs(vcrs) def copy(self, new_array: NDArrayf | None = None) -> DEM: - """ - Copy the DEM, possibly updating the data array. + """Copy the DEM, possibly updating the data array. :param new_array: New data array. :return: Copied DEM. """ - new_dem = super().copy(new_array=new_array) # type: ignore # The rest of attributes are immutable, including pyproj.CRS for attrs in dem_attrs: @@ -190,10 +185,10 @@ def from_array( crs: CRS | int | None, nodata: int | float | None = None, area_or_point: Literal["Area", "Point"] | None = None, - tags: dict[str, Any] = None, + tags: dict[str, Any] | None = None, cast_nodata: bool = True, vcrs: ( - Literal["Ellipsoid"] | Literal["EGM08"] | Literal["EGM96"] | str | pathlib.Path | VerticalCRS | int | None + Literal["Ellipsoid", "EGM08", "EGM96"] | str | pathlib.Path | VerticalCRS | int | None ) = None, ) -> DEM: """Create a DEM from a numpy array and the georeferencing information. @@ -227,19 +222,16 @@ def from_array( @property def vcrs(self) -> VerticalCRS | Literal["Ellipsoid"] | None: """Vertical coordinate reference system of the DEM.""" - return self._vcrs @property def vcrs_grid(self) -> str | None: """Grid path of vertical coordinate reference system of the DEM.""" - return self._vcrs_grid @property def vcrs_name(self) -> str | None: """Name of vertical coordinate reference system of the DEM.""" - if self.vcrs is not None: # If it is the ellipsoid if isinstance(self.vcrs, str): @@ -255,15 +247,13 @@ def vcrs_name(self) -> str | None: def set_vcrs( self, - new_vcrs: Literal["Ellipsoid"] | Literal["EGM08"] | Literal["EGM96"] | str | pathlib.Path | VerticalCRS | int, + new_vcrs: Literal["Ellipsoid", "EGM08", "EGM96"] | str | pathlib.Path | VerticalCRS | int, ) -> None: - """ - Set the vertical coordinate reference system of the DEM. + """Set the vertical coordinate reference system of the DEM. :param new_vcrs: Vertical coordinate reference system either as a name ("Ellipsoid", "EGM08", "EGM96"), an EPSG code or pyproj.crs.VerticalCRS, or a path to a PROJ grid file (https://github.com/OSGeo/PROJ-data). """ - # Get vertical CRS and set it and the grid self._vcrs = _vcrs_from_user_input(vcrs_input=new_vcrs) self._vcrs_grid = _grid_from_user_input(vcrs_input=new_vcrs) @@ -271,12 +261,10 @@ def set_vcrs( @property def ccrs(self) -> CompoundCRS | CRS | None: """Compound horizontal and vertical coordinate reference system of the DEM.""" - if self.vcrs is not None: ccrs = _build_ccrs_from_crs_and_vcrs(crs=self.crs, vcrs=self.vcrs) return ccrs - else: - return None + return None @overload def to_vcrs( @@ -319,8 +307,7 @@ def to_vcrs( ) = None, inplace: bool = False, ) -> DEM | None: - """ - Convert the DEM to another vertical coordinate reference system. + """Convert the DEM to another vertical coordinate reference system. :param vcrs: Destination vertical CRS. Either as a name ("WGS84", "EGM08", "EGM96"), an EPSG code or pyproj.crs.VerticalCRS, or a path to a PROJ grid file (https://github.com/OSGeo/PROJ-data) @@ -329,11 +316,10 @@ def to_vcrs( :return: DEM with vertical reference transformed, or None. """ - if self.vcrs is None and force_source_vcrs is None: raise ValueError( "The current DEM has no vertical reference, define one with .set_vref() " - "or by passing `src_vcrs` to perform a conversion." + "or by passing `src_vcrs` to perform a conversion.", ) # Initial Compound CRS (only exists if vertical CRS is not None, as checked above) @@ -371,20 +357,20 @@ def to_vcrs( self.set_vcrs(new_vcrs=vcrs) return None # Otherwise, return new DEM - else: - return DEM.from_array( - data=new_data, - transform=self.transform, - crs=self.crs, - nodata=self.nodata, - area_or_point=self.area_or_point, - tags=self.tags, - vcrs=vcrs, - cast_nodata=False, - ) + return DEM.from_array( + data=new_data, + transform=self.transform, + crs=self.crs, + nodata=self.nodata, + area_or_point=self.area_or_point, + tags=self.tags, + vcrs=vcrs, + cast_nodata=False, + ) @copy_doc(terrain, remove_dem_res_params=True) def slope(self, method: str = "Horn", degrees: bool = True) -> RasterType: + """Return slope.""" return terrain.slope(self, method=method, degrees=degrees) @copy_doc(terrain, remove_dem_res_params=True) @@ -393,63 +379,64 @@ def aspect( method: str = "Horn", degrees: bool = True, ) -> RasterType: - + """Return aspect.""" return terrain.aspect(self, method=method, degrees=degrees) @copy_doc(terrain, remove_dem_res_params=True) def hillshade( - self, method: str = "Horn", azimuth: float = 315.0, altitude: float = 45.0, z_factor: float = 1.0 + self, method: str = "Horn", azimuth: float = 315.0, altitude: float = 45.0, z_factor: float = 1.0, ) -> RasterType: - + """Return hillshade.""" return terrain.hillshade(self, method=method, azimuth=azimuth, altitude=altitude, z_factor=z_factor) @copy_doc(terrain, remove_dem_res_params=True) def curvature(self) -> RasterType: - + """Return terrain curvature.""" return terrain.curvature(self) @copy_doc(terrain, remove_dem_res_params=True) def planform_curvature(self) -> RasterType: - + """Return platform curvature.""" return terrain.planform_curvature(self) @copy_doc(terrain, remove_dem_res_params=True) def profile_curvature(self) -> RasterType: - + """Return profile curvature.""" return terrain.profile_curvature(self) @copy_doc(terrain, remove_dem_res_params=True) def maximum_curvature(self) -> RasterType: - + """Return maximum curvature.""" return terrain.maximum_curvature(self) @copy_doc(terrain, remove_dem_res_params=True) def topographic_position_index(self, window_size: int = 3) -> RasterType: - + """Return topographic position index.""" return terrain.topographic_position_index(self, window_size=window_size) @copy_doc(terrain, remove_dem_res_params=True) def terrain_ruggedness_index(self, method: str = "Riley", window_size: int = 3) -> RasterType: - + """Return terrain ruggedness index.""" return terrain.terrain_ruggedness_index(self, method=method, window_size=window_size) @copy_doc(terrain, remove_dem_res_params=True) def roughness(self, window_size: int = 3) -> RasterType: - + """Return roughness.""" return terrain.roughness(self, window_size=window_size) @copy_doc(terrain, remove_dem_res_params=True) def rugosity(self) -> RasterType: - + """Return rugosity.""" return terrain.rugosity(self) @copy_doc(terrain, remove_dem_res_params=True) def fractal_roughness(self, window_size: int = 13) -> RasterType: - + """Return fractal roughness.""" return terrain.fractal_roughness(self, window_size=window_size) @copy_doc(terrain, remove_dem_res_params=True) def get_terrain_attribute(self, attribute: str | list[str], **kwargs: Any) -> RasterType | list[RasterType]: + """Return terrain attribute.""" return terrain.get_terrain_attribute(self, attribute=attribute, **kwargs) def coregister_3d( @@ -457,11 +444,10 @@ def coregister_3d( reference_elev: DEM | gpd.GeoDataFrame, coreg_method: coreg.Coreg = None, inlier_mask: Mask | NDArrayb = None, - bias_vars: dict[str, NDArrayf | MArrayf | RasterType] = None, + bias_vars: dict[str, NDArrayf | MArrayf | RasterType] | None = None, **kwargs: Any, ) -> DEM: - """ - Coregister DEM to a reference DEM in three dimensions. + """Coregister DEM to a reference DEM in three dimensions. Any coregistration method or pipeline from xdem.Coreg can be passed. Default is only horizontal and vertical shifts of Nuth and Kääb (2011). @@ -474,7 +460,6 @@ def coregister_3d( :return: Coregistered DEM. """ - if coreg_method is None: coreg_method = coreg.NuthKaab() @@ -492,7 +477,7 @@ def estimate_uncertainty( other_elev: DEM | gpd.GeoDataFrame, stable_terrain: Mask | NDArrayb = None, approach: Literal["H2022", "R2009", "Basic"] = "H2022", - precision_of_other: Literal["finer"] | Literal["same"] = "finer", + precision_of_other: Literal["finer", "same"] = "finer", spread_estimator: Callable[[NDArrayf], np.floating[Any]] = nmad, variogram_estimator: Literal["matheron", "cressie", "genton", "dowd"] = "dowd", list_vars: tuple[RasterType | str, ...] = ("slope", "maximum_curvature"), @@ -500,8 +485,7 @@ def estimate_uncertainty( z_name: str = "z", random_state: int | np.random.Generator | None = None, ) -> tuple[RasterType, Variogram]: - """ - Estimate uncertainty of DEM. + """Estimate uncertainty of DEM. Derives either a map of variable errors (based on slope and curvature by default) and a function describing the spatial correlation of error (between 0 and 1) with spatial lag (distance between observations). @@ -532,7 +516,6 @@ def estimate_uncertainty( :return: Uncertainty raster, Variogram of uncertainty correlation. """ - # Summarize approach steps approach_dict = { "H2022": {"heterosc": True, "multi_range": True}, @@ -568,7 +551,7 @@ def estimate_uncertainty( # Estimate variable error from these variables sig_dh = infer_heteroscedasticity_from_stable( - dvalues=dh, list_var=list_var_rast, spread_statistic=spread_estimator, stable_mask=stable_terrain + dvalues=dh, list_var=list_var_rast, spread_statistic=spread_estimator, stable_mask=stable_terrain, )[0] # Otherwise, return a constant error raster else: diff --git a/xdem/demcollection.py b/xdem/demcollection.py index 81fff3124..2934554d2 100644 --- a/xdem/demcollection.py +++ b/xdem/demcollection.py @@ -39,9 +39,8 @@ def __init__( timestamps: list[datetime.datetime] | None = None, outlines: gu.Vector | dict[datetime.datetime, gu.Vector] | None = None, reference_dem: int | gu.Raster = 0, - ): - """ - Create a new temporal DEM collection. + ) -> None: + """Create a new temporal DEM collection. :param dems: A list of DEMs. :param timestamps: A list of DEM timestamps. @@ -55,7 +54,7 @@ def __init__( timestamp_attributes = [dem.datetime for dem in dems] if any(stamp is None for stamp in timestamp_attributes): raise ValueError( - "Argument `timestamps` not provided and the given DEMs do not all have datetime " "attributes" + "Argument `timestamps` not provided and the given DEMs do not all have datetime attributes", ) timestamps = timestamp_attributes @@ -76,18 +75,20 @@ def __init__( if isinstance(reference_dem, (int, np.integer)): self.reference_index = np.argwhere(indices == reference_dem)[0][0] elif isinstance(reference_dem, gu.Raster): - self.reference_index = [i for i, dem in enumerate(self.dems) if dem is reference_dem][0] + self.reference_index = next(i for i, dem in enumerate(self.dems) if dem is reference_dem) if outlines is None: self.outlines: dict[np.datetime64, gu.Vector] = {} elif isinstance(outlines, gu.Vector): self.outlines = {self.timestamps[self.reference_index]: outlines} elif all(isinstance(value, gu.Vector) for value in outlines.values()): - self.outlines = dict(zip(np.array(list(outlines.keys())).astype("datetime64[ns]"), outlines.values())) + self.outlines = dict(zip(np.array(list(outlines.keys())).astype("datetime64[ns]"), + outlines.values(), + strict=False)) else: raise ValueError( f"Invalid format on 'outlines': {type(outlines)}," - " expected one of ['gu.Vector', 'dict[datetime.datetime, gu.Vector']" + " expected one of ['gu.Vector', 'dict[datetime.datetime, gu.Vector']", ) @property @@ -101,8 +102,7 @@ def reference_timestamp(self) -> np.datetime64: return self.timestamps[self.reference_index] def subtract_dems(self, resampling_method: str = "cubic_spline") -> list[xdem.dDEM]: - """ - Generate dDEMs by subtracting all DEMs to the reference. + """Generate dDEMs by subtracting all DEMs to the reference. :param resampling_method: The resampling method to use if reprojection is needed. @@ -135,8 +135,7 @@ def subtract_dems(self, resampling_method: str = "cubic_spline") -> list[xdem.dD return self.ddems def interpolate_ddems(self, method: str = "linear") -> list[NDArrayf]: - """ - Interpolate all the dDEMs in the DEMCollection object using the chosen interpolation method. + """Interpolate all the dDEMs in the DEMCollection object using the chosen interpolation method. :param method: The chosen interpolation method. """ @@ -147,8 +146,7 @@ def interpolate_ddems(self, method: str = "linear") -> list[NDArrayf]: return [ddem.filled_data for ddem in self.ddems] def get_ddem_mask(self, ddem: xdem.dDEM, outlines_filter: str | None = None) -> NDArrayf: - """ - Get a fitting dDEM mask for a provided dDEM. + """Get a fitting dDEM mask for a provided dDEM. The mask is created by evaluating these factors, in order: @@ -183,17 +181,16 @@ def get_ddem_mask(self, ddem: xdem.dDEM, outlines_filter: str | None = None) -> mask = outlines[ddem.start_time].create_mask(ddem, as_array=True) # If only one outlines file exist, use that as a mask. elif len(outlines) == 1: - mask = list(outlines.values())[0].create_mask(ddem, as_array=True) + mask = next(iter(outlines.values())).create_mask(ddem, as_array=True) # If no fitting outlines were found, make a full true boolean mask in its stead. else: mask = np.ones(shape=ddem.data.shape, dtype=bool) return mask.reshape(ddem.data.shape) def get_dh_series( - self, outlines_filter: str | None = None, mask: NDArrayf | None = None, nans_ok: bool = False + self, outlines_filter: str | None = None, mask: NDArrayf | None = None, nans_ok: bool = False, ) -> pd.DataFrame: - """ - Return a dataframe of mean dDEM values and respective areas for every timestamp. + """Return a dataframe of mean dDEM values and respective areas for every timestamp. The values are always compared to the reference DEM timestamp. @@ -228,10 +225,9 @@ def get_dh_series( return dh_values def get_dv_series( - self, outlines_filter: str | None = None, mask: NDArrayf | None = None, nans_ok: bool = False + self, outlines_filter: str | None = None, mask: NDArrayf | None = None, nans_ok: bool = False, ) -> pd.Series: - """ - Return a series of mean volume change (dV) for every timestamp. + """Return a series of mean volume change (dV) for every timestamp. The values are always compared to the reference DEM timestamp. @@ -252,8 +248,7 @@ def get_cumulative_series( mask: NDArrayf | None = None, nans_ok: bool = False, ) -> pd.Series: - """ - Get the cumulative dH (elevation) or dV (volume) since the first timestamp. + """Get the cumulative dH (elevation) or dV (volume) since the first timestamp. :param kind: The kind of series. Can be dh or dv. :param outlines_filter: A query to filter the outline vectors. Example: "name_column == 'specific glacier'". @@ -274,12 +269,12 @@ def get_cumulative_series( # Simplify the index to just "year" (implicitly still the same as above) cumulative_dh = pd.Series(dtype=d_series.dtype) cumulative_dh[self.reference_timestamp] = 0.0 - for i, value in zip(d_series.index, d_series.values): - non_reference_year = [date for date in [i.left, i.right] if date != self.reference_timestamp][0] + for i, value in zip(d_series.index, d_series.values, strict=False): + non_reference_year = next(date for date in [i.left, i.right] if date != self.reference_timestamp) cumulative_dh.loc[non_reference_year] = -value # Sort the dates (just to be sure. It should already be sorted) - cumulative_dh.sort_index(inplace=True) + cumulative_dh = cumulative_dh.sort_index() # Subtract the entire series by the first value to cumulative_dh -= cumulative_dh.iloc[0] diff --git a/xdem/examples.py b/xdem/examples.py index 144180fb7..1b8dcf101 100644 --- a/xdem/examples.py +++ b/xdem/examples.py @@ -22,21 +22,22 @@ import tempfile import urllib.request from distutils.dir_util import copy_tree +from pathlib import Path import geoutils as gu import xdem -_EXAMPLES_DIRECTORY = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "examples", "data")) +_EXAMPLES_DIRECTORY = Path(os.path.join(Path(__file__).parent, "..", "examples", "data")).resolve() # Absolute filepaths to the example files. _FILEPATHS_DATA = { "longyearbyen_ref_dem": os.path.join(_EXAMPLES_DIRECTORY, "Longyearbyen", "data", "DEM_2009_ref.tif"), "longyearbyen_tba_dem": os.path.join(_EXAMPLES_DIRECTORY, "Longyearbyen", "data", "DEM_1990.tif"), "longyearbyen_glacier_outlines": os.path.join( - _EXAMPLES_DIRECTORY, "Longyearbyen", "data", "glacier_mask", "CryoClim_GAO_SJ_1990.shp" + _EXAMPLES_DIRECTORY, "Longyearbyen", "data", "glacier_mask", "CryoClim_GAO_SJ_1990.shp", ), "longyearbyen_glacier_outlines_2010": os.path.join( - _EXAMPLES_DIRECTORY, "Longyearbyen", "data", "glacier_mask", "CryoClim_GAO_SJ_2010.shp" + _EXAMPLES_DIRECTORY, "Longyearbyen", "data", "glacier_mask", "CryoClim_GAO_SJ_2010.shp", ), } @@ -49,8 +50,7 @@ def download_longyearbyen_examples(overwrite: bool = False) -> None: - """ - Fetch the Longyearbyen example files. + """Fetch the Longyearbyen example files. :param overwrite: Do not download the files again if they already exist. """ @@ -61,8 +61,8 @@ def download_longyearbyen_examples(overwrite: bool = False) -> None: # If we ask for overwrite, also remove the processed test data if overwrite: for fn in list(_FILEPATHS_PROCESSED.values()): - if os.path.exists(fn): - os.remove(fn) + if Path(fn).exists(): + Path(fn).unlink() # Static commit hash to be bumped every time it needs to be. commit = "ff5ede952fc422ebd2a3c6340041a118850bc905" @@ -75,22 +75,22 @@ def download_longyearbyen_examples(overwrite: bool = False) -> None: temp_dir = tempfile.TemporaryDirectory() tar_path = os.path.join(temp_dir.name, "data.tar.gz") - response = urllib.request.urlopen(url) + response = urllib.request.urlopen(url) # noqa: S310 # If the response was right, download the tarball to the temporary directory if response.getcode() == 200: - with open(tar_path, "wb") as outfile: + with Path(tar_path).open("wb") as outfile: outfile.write(response.read()) else: raise ValueError(f"Longyearbyen data fetch gave non-200 response: {response.status_code}.") # Extract the tarball with tarfile.open(tar_path) as tar: - tar.extractall(temp_dir.name) + tar.extractall(temp_dir.name) # noqa: S202 # Find the first directory in the temp_dir (should only be one) and construct the Longyearbyen data dir path. dir_name = os.path.join( temp_dir.name, - [dirname for dirname in os.listdir(temp_dir.name) if os.path.isdir(os.path.join(temp_dir.name, dirname))][0], + next(dirname for dirname in os.listdir(temp_dir.name) if Path(os.path.join(temp_dir.name, dirname)).is_dir()), "data", "Longyearbyen", ) @@ -100,22 +100,20 @@ def download_longyearbyen_examples(overwrite: bool = False) -> None: def process_coregistered_examples(name: str, overwrite: bool = False) -> None: - """ - Process the Longyearbyen example files into a dDEM (to avoid repeating this in many test/documentation steps). + """Process the Longyearbyen example files into a dDEM (to avoid repeating this in many test/documentation steps). :param name: Name of test data :param overwrite: Do not download the files again if they already exist. """ - # If the file called already exists and overwrite is False, do nothing - if not overwrite and os.path.isfile(_FILEPATHS_PROCESSED[name]): + if not overwrite and Path(_FILEPATHS_PROCESSED[name]).is_file(): return # Check that data is downloaded before attempting processing download_longyearbyen_examples(overwrite=False) # If the ddem file does not exist, create it - if not os.path.isfile(_FILEPATHS_PROCESSED["longyearbyen_ddem"]): + if not Path(_FILEPATHS_PROCESSED["longyearbyen_ddem"]).is_file(): reference_raster = gu.Raster(_FILEPATHS_DATA["longyearbyen_ref_dem"]) to_be_aligned_raster = gu.Raster(_FILEPATHS_DATA["longyearbyen_tba_dem"]) glacier_mask = gu.Vector(_FILEPATHS_DATA["longyearbyen_glacier_outlines"]) @@ -129,11 +127,11 @@ def process_coregistered_examples(name: str, overwrite: bool = False) -> None: diff = reference_raster - aligned_raster # Save it so that future calls won't need to recreate the file - os.makedirs(os.path.dirname(_FILEPATHS_PROCESSED["longyearbyen_ddem"]), exist_ok=True) + Path(Path(_FILEPATHS_PROCESSED["longyearbyen_ddem"]).parent, exist_ok=True).mkdir(parents=True) diff.save(_FILEPATHS_PROCESSED["longyearbyen_ddem"]) # If the tba_dem_coreg file does not exist, create it - if not os.path.isfile(_FILEPATHS_PROCESSED["longyearbyen_tba_dem_coreg"]): + if not Path(_FILEPATHS_PROCESSED["longyearbyen_tba_dem_coreg"]).is_file(): dem_2009 = xdem.DEM(get_path("longyearbyen_ref_dem"), silent=True) ddem = xdem.DEM(get_path("longyearbyen_ddem"), silent=True) @@ -143,21 +141,18 @@ def process_coregistered_examples(name: str, overwrite: bool = False) -> None: def get_path(name: str) -> str: - """ - Get path of example data. List of available files can be found in "examples.available". + """Get path of example data. List of available files can be found in "examples.available". :param name: Name of test data :return: """ - if name in list(_FILEPATHS_DATA.keys()): download_longyearbyen_examples() return _FILEPATHS_DATA[name] - elif name in list(_FILEPATHS_PROCESSED.keys()): + if name in list(_FILEPATHS_PROCESSED.keys()): process_coregistered_examples(name) return _FILEPATHS_PROCESSED[name] - else: - raise ValueError( - 'Data name should be one of "' - + '" , "'.join(list(_FILEPATHS_DATA.keys()) + list(_FILEPATHS_PROCESSED.keys())) - + '".' - ) + raise ValueError( + 'Data name should be one of "' + + '" , "'.join(list(_FILEPATHS_DATA.keys()) + list(_FILEPATHS_PROCESSED.keys())) + + '".', + ) diff --git a/xdem/filters.py b/xdem/filters.py index c80b87eb5..fb39f20aa 100644 --- a/xdem/filters.py +++ b/xdem/filters.py @@ -34,8 +34,7 @@ def gaussian_filter_scipy(array: NDArrayf, sigma: float) -> NDArrayf: - """ - Apply a Gaussian filter to a raster that may contain NaNs, using scipy's implementation. + """Apply a Gaussian filter to a raster that may contain NaNs, using scipy's implementation. gaussian_filter_cv is recommended as it is usually faster, but this depends on the value of sigma. N.B: kernel_size is set automatically based on sigma. @@ -55,30 +54,28 @@ def gaussian_filter_scipy(array: NDArrayf, sigma: float) -> NDArrayf: # If array contain NaNs, need a more sophisticated approach # Inspired by https://stackoverflow.com/a/36307291 - else: - # Run filter on a copy with NaNs set to 0 - array_no_nan = array.copy() - array_no_nan[np.isnan(array)] = 0 - gauss_no_nan = scipy.ndimage.gaussian_filter(array_no_nan, sigma=sigma) - del array_no_nan + # Run filter on a copy with NaNs set to 0 + array_no_nan = array.copy() + array_no_nan[np.isnan(array)] = 0 + gauss_no_nan = scipy.ndimage.gaussian_filter(array_no_nan, sigma=sigma) + del array_no_nan - # Mask of NaN values - nan_mask = 0 * array.copy() + 1 - nan_mask[np.isnan(array)] = 0 - gauss_mask = scipy.ndimage.gaussian_filter(nan_mask, sigma=sigma) - del nan_mask + # Mask of NaN values + nan_mask = 0 * array.copy() + 1 + nan_mask[np.isnan(array)] = 0 + gauss_mask = scipy.ndimage.gaussian_filter(nan_mask, sigma=sigma) + del nan_mask - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", message="invalid value encountered") - gauss = gauss_no_nan / gauss_mask + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", message="invalid value encountered") + gauss = gauss_no_nan / gauss_mask - return gauss + return gauss def gaussian_filter_cv(array: NDArrayf, sigma: float) -> NDArrayf: - """ - Apply a Gaussian filter to a raster that may contain NaNs, using OpenCV's implementation. + """Apply a Gaussian filter to a raster that may contain NaNs, using OpenCV's implementation. Arguments are for now hard-coded to be identical to scipy. N.B: kernel_size is set automatically based on sigma @@ -141,8 +138,7 @@ def gaussian_filter_cv(array: NDArrayf, sigma: float) -> NDArrayf: def distance_filter(array: NDArrayf, radius: float, outlier_threshold: float) -> NDArrayf: - """ - Filter out pixels whose value is distant more than a set threshold from the average value of all neighbor \ + """Filter out pixels whose value is distant more than a set threshold from the average value of all neighbor \ pixels within a given radius. Filtered pixels are set to NaN. diff --git a/xdem/fit.py b/xdem/fit.py index 8ea647c56..901b8acd7 100644 --- a/xdem/fit.py +++ b/xdem/fit.py @@ -16,15 +16,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -""" -Functions to perform normal, weighted and robust fitting. -""" +"""Functions to perform normal, weighted and robust fitting.""" from __future__ import annotations import inspect import logging import warnings -from typing import Any, Callable +from collections.abc import Callable +from typing import Any import numpy as np import scipy @@ -50,8 +49,7 @@ def rmse(z: NDArrayf) -> float: - """ - Return root mean square error + """Return root mean square error :param z: Residuals between predicted and true value :return: Root Mean Square Error """ @@ -59,8 +57,7 @@ def rmse(z: NDArrayf) -> float: def huber_loss(z: NDArrayf) -> float: - """ - Huber loss cost (reduces the weight of outliers) + """Huber loss cost (reduces the weight of outliers) :param z: Residuals between predicted and true values :return: Huber cost """ @@ -70,8 +67,7 @@ def huber_loss(z: NDArrayf) -> float: def soft_loss(z: NDArrayf, scale: float = 0.5) -> float: - """ - Soft loss cost (reduces the weight of outliers) + """Soft loss cost (reduces the weight of outliers) :param z: Residuals between predicted and true values :param scale: Scale factor :return: Soft loss cost @@ -85,13 +81,11 @@ def soft_loss(z: NDArrayf, scale: float = 0.5) -> float: def sumsin_1d(xx: NDArrayf, *params: NDArrayf) -> NDArrayf: - """ - Sum of N sinusoids in 1D. + """Sum of N sinusoids in 1D. :param xx: Array of coordinates. :param params: 3 x N parameters in order of amplitude (Y unit), wavelength (X unit) and phase (radians). """ - # Squeeze input in case it is a 1-D tuple or such xx = np.array(xx).squeeze() @@ -113,8 +107,7 @@ def sumsin_1d(xx: NDArrayf, *params: NDArrayf) -> NDArrayf: def polynomial_1d(xx: NDArrayf, *params: NDArrayf) -> NDArrayf: - """ - N-order 1D polynomial. + """N-order 1D polynomial. :param xx: 1D array of values. :param params: N polynomial parameters. @@ -125,22 +118,20 @@ def polynomial_1d(xx: NDArrayf, *params: NDArrayf) -> NDArrayf: def polynomial_2d(xx: tuple[NDArrayf, NDArrayf], *params: NDArrayf) -> NDArrayf: - """ - N-order 2D polynomial. + """N-order 2D polynomial. :param xx: The two 1D array of values. :param params: The N parameters (a, b, c, etc.) of the polynomial. :returns: Output value. """ - # The number of parameters of np.polyval2d is order^2, so a square array needs to be passed poly_order = np.sqrt(len(params)) if not poly_order.is_integer(): raise ValueError( "The parameters of the 2D polynomial should have a length equal to order^2, " - "see np.polyval2d for more details." + "see np.polyval2d for more details.", ) # We reshape the parameter into the N x N shape expected by NumPy @@ -155,8 +146,7 @@ def polynomial_2d(xx: tuple[NDArrayf, NDArrayf], *params: NDArrayf) -> NDArrayf: def _choice_best_order(cost: NDArrayf, margin_improvement: float = 20.0) -> int: - """ - Choice of the best order (polynomial, sum of sinusoids) with a margin of improvement. The best cost value does + """Choice of the best order (polynomial, sum of sinusoids) with a margin of improvement. The best cost value does not necessarily mean the best predictive fit because high-degree polynomials tend to overfit, and sum of sinusoids as well. To mitigate this issue, we should choose the lesser order from which improvement becomes negligible. @@ -165,7 +155,6 @@ def _choice_best_order(cost: NDArrayf, margin_improvement: float = 20.0) -> int: :return: degree: degree for the best-fit polynomial """ - # Get percentage of spread from the minimal cost ind_min = cost.argmin() min_cost = cost[ind_min] @@ -177,10 +166,10 @@ def _choice_best_order(cost: NDArrayf, margin_improvement: float = 20.0) -> int: # Choose the good-performance cost with lowest degree ind = next((i for i, j in enumerate(below_margin) if j)) - logging.debug("Order " + str(ind_min + 1) + " has the minimum cost value of " + str(min_cost)) + logging.debug("Order %s has the minimum cost value of %s", str(ind_min + 1), str(min_cost)) logging.debug( - "Order " + str(ind + 1) + " is selected as its cost is within a " + str(margin_improvement) + "% margin of" - " the minimum cost" + "Order %s is selected as its cost is within a %s % margin of" + " the minimum cost", str(ind + 1), str(margin_improvement), ) return ind @@ -194,8 +183,7 @@ def _wrapper_scipy_leastsquares( p0: NDArrayf = None, **kwargs: Any, ) -> tuple[float, NDArrayf]: - """ - Wrapper function for scipy.optimize.least_squares: passes down keyword, extracts cost and final parameters, print + """Wrapper function for scipy.optimize.least_squares: passes down keyword, extracts cost and final parameters, print statements in the console :param f: Function to fit. @@ -204,7 +192,6 @@ def _wrapper_scipy_leastsquares( :param y: Y vector. :return: """ - # Get arguments of scipy.optimize.curve_fit and subfunction least_squares fun_args = scipy.optimize.curve_fit.__code__.co_varnames[: scipy.optimize.curve_fit.__code__.co_argcount] ls_args = scipy.optimize.least_squares.__code__.co_varnames[: scipy.optimize.least_squares.__code__.co_argcount] @@ -235,12 +222,9 @@ def _wrapper_scipy_leastsquares( coefs = np.array([np.round(coef, 5) for coef in coefs]) # If a specific loss function was passed, construct it to get the cost - if "loss" in kwargs.keys(): + if "loss" in kwargs: loss = kwargs["loss"] - if "f_scale" in kwargs.keys(): - f_scale = kwargs["f_scale"] - else: - f_scale = 1.0 + f_scale = kwargs.get("f_scale", 1.0) from scipy.optimize._lsq.least_squares import construct_loss_function loss_func = construct_loss_function(m=ydata.size, loss=loss, f_scale=f_scale) @@ -261,8 +245,7 @@ def _wrapper_sklearn_robustlinear( estimator_name: str = "Linear", **kwargs: Any, ) -> tuple[float, NDArrayf]: - """ - Wrapper function of sklearn.linear_models: passes down keyword, extracts cost and final parameters, sets random + """Wrapper function of sklearn.linear_models: passes down keyword, extracts cost and final parameters, sets random states, scales input and de-scales output data, prints out statements :param model: Function model to fit (e.g., Polynomial features) @@ -310,7 +293,7 @@ def _wrapper_sklearn_robustlinear( # Run with data # The sample weight can only be passed if it exists in the estimator call - if sigma is not None and "sample_weight" in inspect.signature(est.fit).parameters.keys(): + if sigma is not None and "sample_weight" in inspect.signature(est.fit).parameters: # The weight is the inverse of the squared standard error sample_weight = 1 / sigma**2 # The argument name to pass it through a pipeline is "estimatorname__sample_weight" @@ -347,8 +330,7 @@ def robust_norder_polynomial_fit( random_state: int | np.random.Generator | None = None, **kwargs: Any, ) -> tuple[NDArrayf, int]: - """ - Given 1D vectors x and y, compute a robust polynomial fit to the data. Order is chosen automatically by comparing + """Given 1D vectors x and y, compute a robust polynomial fit to the data. Order is chosen automatically by comparing residuals for multiple fit orders of a given estimator. Any keyword argument will be passed down to scipy.optimize.least_squares and sklearn linear estimators. @@ -368,9 +350,9 @@ def robust_norder_polynomial_fit( :returns coefs, degree: Polynomial coefficients and degree for the best-fit polynomial """ # Remove "f" and "absolute sigma" arguments passed, as both are fixed here - if "f" in kwargs.keys(): + if "f" in kwargs: kwargs.pop("f") - if "absolute_sigma" in kwargs.keys(): + if "absolute_sigma" in kwargs: kwargs.pop("absolute_sigma") # Raise errors for input string parameters @@ -409,7 +391,7 @@ def robust_norder_polynomial_fit( # Run the linear method with scipy try: cost, coef = _wrapper_scipy_leastsquares( - f=polynomial_1d, xdata=x, ydata=y, p0=p0, sigma=sigma, **kwargs + f=polynomial_1d, xdata=x, ydata=y, p0=p0, sigma=sigma, **kwargs, ) except RuntimeError: cost = np.inf @@ -425,7 +407,7 @@ def robust_norder_polynomial_fit( # Run the linear method with sklearn cost, coef = _wrapper_sklearn_robustlinear( - model, estimator_name=estimator_name, cost_func=cost_func, xdata=x, ydata=y, sigma=sigma, **kwargs + model, estimator_name=estimator_name, cost_func=cost_func, xdata=x, ydata=y, sigma=sigma, **kwargs, ) list_costs[deg - 1] = cost @@ -444,9 +426,7 @@ def _cost_sumofsin( cost_func: Callable[[NDArrayf], float], *p: NDArrayf, ) -> float: - """ - Calculate robust cost function for sum of sinusoids - """ + """Calculate robust cost function for sum of sinusoids""" z = y - sumsin_1d(x, *p) return cost_func(z) @@ -454,17 +434,16 @@ def _cost_sumofsin( def robust_nfreq_sumsin_fit( xdata: NDArrayf, ydata: NDArrayf, - sigma: NDArrayf | None = None, + sigma: NDArrayf | None = None, # noqa: ARG001 max_nb_frequency: int = 3, bounds_amp_wave_phase: list[tuple[float, float]] | None = None, cost_func: Callable[[NDArrayf], float] = soft_loss, - subsample: float | int = 1, + subsample: float | int = 1, # noqa: ARG001 hop_length: float | None = None, random_state: int | np.random.Generator | None = None, **kwargs: Any, ) -> tuple[NDArrayf, int]: - """ - Given 1D vectors x and y, compute a robust sum of sinusoid fit to the data. The number of frequency is chosen + """Given 1D vectors x and y, compute a robust sum of sinusoid fit to the data. The number of frequency is chosen automatically by comparing residuals for multiple fit orders of a given estimator. Any keyword argument will be passed down to scipy.optimize.basinhopping. @@ -485,11 +464,10 @@ def robust_nfreq_sumsin_fit( :returns coefs, degree: sinusoid coefficients (amplitude, frequency, phase) x N, Number N of summed sinusoids """ - # Remove "f" and "absolute sigma" arguments passed, as both are fixed here - if "f" in kwargs.keys(): + if "f" in kwargs: kwargs.pop("f") - if "absolute_sigma" in kwargs.keys(): + if "absolute_sigma" in kwargs: kwargs.pop("absolute_sigma") # Extract xdata from iterable @@ -497,9 +475,9 @@ def robust_nfreq_sumsin_fit( xdata = xdata[0] # Check if there is a number of iterations to stop the run if the global minimum candidate remains the same. - if "niter_success" not in kwargs.keys(): + if "niter_success" not in kwargs: # Check if there is a number of basin-hopping iterations passed down to the function. - if "niter" not in kwargs.keys(): + if "niter" not in kwargs: niter_success = 40 else: niter_success = min(40, kwargs.get("niter")) # type: ignore @@ -557,7 +535,7 @@ def wrapper_cost_sumofsin(p: NDArrayf, x: NDArrayf, y: NDArrayf) -> float: logging.debug(ub) # Minimize the globalization with a larger number of points - minimizer_kwargs = dict(args=(xdata, ydata), bounds=scipy_bounds) + minimizer_kwargs = {"args":(xdata, ydata), "bounds":scipy_bounds} myresults = scipy.optimize.basinhopping( wrapper_cost_sumofsin, p0, @@ -611,7 +589,7 @@ def wrapper_cost_sumofsin(p: NDArrayf, x: NDArrayf, y: NDArrayf) -> float: new_phases = final_coefs[2::3][indices] final_coefs = np.array( - [(new_amplitudes[i], new_wavelengths[i], new_phases[i]) for i in range(final_degree)] + [(new_amplitudes[i], new_wavelengths[i], new_phases[i]) for i in range(final_degree)], ).flatten() # The number of frequencies corresponds to the final index plus one diff --git a/xdem/misc.py b/xdem/misc.py index d6ad04a32..8e468225f 100644 --- a/xdem/misc.py +++ b/xdem/misc.py @@ -22,7 +22,9 @@ import copy import functools import warnings -from typing import Any, Callable +from collections.abc import Callable +from pathlib import Path +from typing import Any from packaging.version import Version @@ -47,10 +49,9 @@ def generate_random_field( - shape: tuple[int, int], corr_size: int, random_state: int | np.random.Generator | None = None + shape: tuple[int, int], corr_size: int, random_state: int | np.random.Generator | None = None, ) -> NDArrayf: - """ - Generate a semi-random gaussian field (to simulate a DEM or DEM error) + """Generate a semi-random gaussian field (to simulate a DEM or DEM error) :param shape: The output shape of the field. :param corr_size: The correlation size of the field. @@ -65,7 +66,6 @@ def generate_random_field( :returns: A numpy array of semi-random values from 0 to 1 """ - rng = np.random.default_rng(random_state) if not _has_cv2: @@ -91,9 +91,8 @@ def generate_random_field( return field -def deprecate(removal_version: Version = None, details: str = None) -> Callable[[Any], Any]: - """ - Trigger a DeprecationWarning for the decorated function. +def deprecate(removal_version: Version = None, details: str | None = None) -> Callable[[Any], Any]: + """Trigger a DeprecationWarning for the decorated function. :param func: The function to be deprecated. :param removal_version: Optional. The version at which this will be removed. @@ -156,8 +155,7 @@ def copy_doc( module_to_copy: object, remove_dem_res_params: bool = False, ) -> Callable: # type: ignore - """ - A decorator to copy docstring from a function to another one while replacing the docstring. + """A decorator to copy docstring from a function to another one while replacing the docstring. Used for copying xdem.terrain documentation to xdem.DEM. :param module_to_copy: Name of module to copy the function from @@ -201,24 +199,22 @@ def decorator(decorated: Callable) -> Callable: # type: ignore def diff_environment_yml( - fn_env: str | dict[str, Any], fn_devenv: str | dict[str, Any], print_dep: str = "both", input_dict: bool = False + fn_env: str | dict[str, Any], fn_devenv: str | dict[str, Any], print_dep: str = "both", input_dict: bool = False, ) -> None: - """ - Compute the difference between environment.yml and dev-environment.yml for setup of continuous integration, + """Compute the difference between environment.yml and dev-environment.yml for setup of continuous integration, while checking that all the dependencies listed in environment.yml are also in dev-environment.yml :param fn_env: Filename path to environment.yml :param fn_devenv: Filename path to dev-environment.yml :param print_dep: Whether to print conda differences "conda", pip differences "pip" or both. :param input_dict: Whether to consider the input as a dict (for testing purposes). """ - if not _has_yaml: raise ValueError("Test dependency needed. Install 'pyyaml'.") if not input_dict: # Load the yml as dictionaries - yaml_env = yaml.safe_load(open(fn_env)) # type: ignore - yaml_devenv = yaml.safe_load(open(fn_devenv)) # type: ignore + yaml_env = yaml.safe_load(Path(fn_env).open()) # type: ignore + yaml_devenv = yaml.safe_load(Path(fn_devenv).open()) # type: ignore else: # We need a copy as we'll pop things out and don't want to affect input # dict.copy() is shallow and does not work with embedded list in dicts (as is the case here) @@ -246,7 +242,7 @@ def diff_environment_yml( diff_pip_check = list(set(pip_dep_env) - set(pip_dep_devenv)) if len(diff_pip_check) != 0: raise ValueError( - "The following pip dependencies are listed in env but not dev-env: " + ",".join(diff_pip_check) + "The following pip dependencies are listed in env but not dev-env: " + ",".join(diff_pip_check), ) # The diff below computes the dependencies that are in dev-env but not in env, to add during CI diff --git a/xdem/spatialstats.py b/xdem/spatialstats.py index 552b0deed..47b76c3c9 100644 --- a/xdem/spatialstats.py +++ b/xdem/spatialstats.py @@ -25,11 +25,11 @@ import math as m import multiprocessing as mp import warnings -from typing import Any, Callable, Iterable, Literal, TypedDict, overload +from collections.abc import Callable, Iterable +from typing import Any, Literal, TypedDict, overload import geopandas as gpd -import matplotlib -import matplotlib.colors as colors +import matplotlib as mpl import matplotlib.pyplot as plt import numba import numpy as np @@ -37,6 +37,7 @@ from geoutils.raster import Mask, Raster, RasterType, subsample_array from geoutils.raster.array import get_array_and_mask from geoutils.vector.vector import Vector, VectorType +from matplotlib import colors from numpy.typing import ArrayLike from scipy import integrate from scipy.interpolate import RegularGridInterpolator, griddata @@ -54,8 +55,7 @@ def nmad(data: NDArrayf | RasterType, nfact: float = 1.4826) -> np.floating[Any]: - """ - Calculate the normalized median absolute deviation (NMAD) of an array. + """Calculate the normalized median absolute deviation (NMAD) of an array. Default scaling factor is 1.4826 to scale the median absolute deviation (MAD) to the dispersion of a normal distribution (see https://en.wikipedia.org/wiki/Median_absolute_deviation#Relation_to_standard_deviation, and e.g. Höhle and Höhle (2009), http://dx.doi.org/10.1016/j.isprsjprs.2009.02.003) @@ -82,8 +82,7 @@ def nd_binning( statistics: Iterable[str | Callable[[NDArrayf], np.floating[Any]]] = ("count", np.nanmedian, nmad), list_ranges: list[float] | None = None, ) -> pd.DataFrame: - """ - N-dimensional binning of values according to one or several explanatory variables with computed statistics in + """N-dimensional binning of values according to one or several explanatory variables with computed statistics in each bin. By default, the sample count, the median and the normalized absolute median deviation (NMAD). The count is always computed, no matter user input. Values input is a (N,) array and variable input is a L-sized list of flattened arrays of similar dimensions (N,). @@ -99,7 +98,6 @@ def nd_binning( min/max of the data :return: """ - # We separate 1d, 2d and nd binning, because propagating statistics between different dimensional binning is not # always feasible using scipy because it allows for several dimensional binning, while it's not straightforward in # pandas @@ -131,7 +129,7 @@ def nd_binning( # Get statistics for j, statistic in enumerate(statistics): stats_binned_1d, bedges_1d = binned_statistic( - x=var, values=values, statistic=statistic, bins=list_var_bins[i], range=list_ranges + x=var, values=values, statistic=statistic, bins=list_var_bins[i], range=list_ranges, )[:2] # Save in a dataframe df_stats_1d[statistics_name[j]] = stats_binned_1d @@ -177,13 +175,12 @@ def nd_binning( if len(list_var) > 2: for j, statistic in enumerate(statistics): stats_binned_2d, list_bedges = binned_statistic_dd( - sample=list_var, values=values, statistic=statistic, bins=list_var_bins, range=list_ranges + sample=list_var, values=values, statistic=statistic, bins=list_var_bins, range=list_ranges, )[0:2] df_stats_nd[statistics_name[j]] = stats_binned_2d.flatten() - list_ii = [] - # Loop through the bin edges and create IntervalIndexes from them (to get both - for bedges in list_bedges: - list_ii.append(pd.IntervalIndex.from_breaks(bedges, closed="left")) + + # Loop through the bin edges and create IntervalIndexes from them (to get both) + list_ii = [pd.IntervalIndex.from_breaks(bedges, closed="left") for bedges in list_bedges] # Create nd indexes in nd-array and flatten for each variable iind = np.meshgrid(*list_ii) @@ -207,28 +204,26 @@ def nd_binning( def _pandas_str_to_interval(istr: str) -> float | pd.Interval: if isinstance(istr, float): return np.nan - else: - c_left = istr[0] == "[" - c_right = istr[-1] == "]" - closed = {(True, False): "left", (False, True): "right", (True, True): "both", (False, False): "neither"}[ - c_left, c_right - ] - left, right = map(float, istr[1:-1].split(",")) - try: - return pd.Interval(left, right, closed) - except Exception: - return np.nan + c_left = istr[0] == "[" + c_right = istr[-1] == "]" + closed = {(True, False): "left", (False, True): "right", (True, True): "both", (False, False): "neither"}[ + c_left, c_right, + ] + left, right = map(float, istr[1:-1].split(",")) + try: + return pd.Interval(left, right, closed) + except Exception: + return np.nan def interp_nd_binning( df: pd.DataFrame, list_var_names: str | list[str], statistic: str | Callable[[NDArrayf], np.floating[Any]] = nmad, - interpolate_method: Literal["nearest"] | Literal["linear"] = "linear", + interpolate_method: Literal["nearest", "linear"] = "linear", min_count: int | None = 100, ) -> Callable[[tuple[ArrayLike, ...]], NDArrayf]: - """ - Estimate an interpolant function for an N-dimensional binning. Preferably based on the output of nd_binning. + """Estimate an interpolant function for an N-dimensional binning. Preferably based on the output of nd_binning. For more details on the input dataframe, and associated list of variable name and statistic, see nd_binning. First, interpolates nodata values of the irregular N-D binning grid with scipy.griddata. @@ -317,7 +312,7 @@ def interp_nd_binning( df_sub = df_sub[np.logical_and.reduce([np.isfinite(df_sub[var].values) for var in list_var_names])] if df_sub.empty: raise ValueError( - "Dataframe does not contain a nd binning with the variables corresponding to the list of variables." + "Dataframe does not contain a nd binning with the variables corresponding to the list of variables.", ) # Check that the statistic data series contain valid data if all(~np.isfinite(df_sub[statistic_name].values)): @@ -335,7 +330,7 @@ def interp_nd_binning( raise ValueError( "Dataframe does not contain any valid statistic values after filtering with min_count = " + str(min_count) - + "." + + ".", ) # Get a list of middle values for the binning coordinates, to define a nd grid @@ -402,7 +397,7 @@ def interp_nd_binning( # to nearest neighbour by duplicating edge points # (does not support NaNs, hence the need for 2/ above) interp_fun = RegularGridInterpolator( - tuple(list_bmid_extended), values_grid_nearest2, method="linear", bounds_error=False, fill_value=None + tuple(list_bmid_extended), values_grid_nearest2, method="linear", bounds_error=False, fill_value=None, ) return interp_fun # type: ignore @@ -415,8 +410,8 @@ def get_perbin_nd_binning( statistic: str | Callable[[NDArrayf], np.floating[Any]] = np.nanmedian, min_count: int | None = 0, ) -> NDArrayf: - """ - Get per-bin array statistic for a list of array input variables, based on the results of an independent N-D binning. + """Get per-bin array statistic for a list of array input variables, + based on the results of an independent N-D binning. For example, get a 2D array of elevation uncertainty based on 2D arrays of slope and curvature and a related binning (for uncertainty analysis) or get a 2D array of elevation bias based on 2D arrays of rotated X coordinates (for @@ -430,7 +425,6 @@ def get_perbin_nd_binning( :return: The array of statistic values corresponding to the input variables. """ - # Prepare output values_out = np.zeros(np.shape(list_var[0])) * np.nan @@ -464,17 +458,17 @@ def get_perbin_nd_binning( for var_name in list_var_names: if any(isinstance(x, pd.Interval) for x in df_sub[var_name].values): continue - elif any(isinstance(_pandas_str_to_interval(x), pd.Interval) for x in df_sub[var_name]): + if any(isinstance(_pandas_str_to_interval(x), pd.Interval) for x in df_sub[var_name]): df_sub[var_name] = [_pandas_str_to_interval(x) for x in df_sub[var_name]] else: - ValueError("The bin intervals of the dataframe should be pandas.Interval.") + raise ValueError("The bin intervals of the dataframe should be pandas.Interval.") # Apply operator in the nd binning # We compute the masks linked to each 1D bin in a single for loop, to optimize speed - L = len(list_var) + len_list_var = len(list_var) all_mask_vars = [] all_interval_vars = [] - for k in range(L): + for k in range(len_list_var): # Get variable name and list of intervals in the dataframe var_name = list_var_names[k] list_interval_var = np.unique(df_sub[var_name].values) @@ -490,17 +484,17 @@ def get_perbin_nd_binning( all_interval_vars.append(list_interval_var) # We perform the K-D binning by logically combining the masks - all_ranges = [range(len(all_interval_vars[k])) for k in range(L)] + all_ranges = [range(len(all_interval_vars[k])) for k in range(len_list_var)] for indices in itertools.product(*all_ranges): # Get mask of the specific bin, skip if empty - mask_bin = np.logical_and.reduce([all_mask_vars[k][indices[k]] for k in range(L)]) + mask_bin = np.logical_and.reduce([all_mask_vars[k][indices[k]] for k in range(len_list_var)]) if np.count_nonzero(mask_bin) == 0: continue # Get the statistic index_bin = np.logical_and.reduce( - [df_sub[list_var_names[k]] == all_interval_vars[k][indices[k]] for k in range(L)] + [df_sub[list_var_names[k]] == all_interval_vars[k][indices[k]] for k in range(len_list_var)], ) statistic_bin = df_sub[statistic_name][index_bin].values[0] @@ -520,9 +514,8 @@ def two_step_standardization( spread_statistic: Callable[[NDArrayf], np.floating[Any]] = nmad, fac_spread_outliers: float | None = 7, ) -> tuple[NDArrayf, Callable[[tuple[ArrayLike, ...]], NDArrayf]]: - """ - Standardize the proxy differenced values using the modelled heteroscedasticity, re-scaled to the spread statistic, - and generate the final standardization function. + """Standardize the proxy differenced values using the modelled heteroscedasticity, + re-scaled to the spread statistic, and generate the final standardization function. :param dvalues: Proxy values as array of size (N,) (i.e., differenced values where signal should be zero such as elevation differences on stable terrain) @@ -533,7 +526,6 @@ def two_step_standardization( :return: Standardized values array of size (N,), Function to destandardize """ - # Standardize a first time with the function zscores = dvalues / unscaled_error_fun(tuple(list_var)) @@ -565,10 +557,9 @@ def _estimate_model_heteroscedasticity( min_count: int | None = 100, fac_spread_outliers: float | None = 7, ) -> tuple[pd.DataFrame, Callable[[tuple[NDArrayf, ...]], NDArrayf]]: - """ - Estimate and model the heteroscedasticity (i.e., variability in error) according to a list of explanatory variables - from a proxy of differenced values (e.g., elevation differences), if possible compared to a source of higher - precision. + """Estimate and model the heteroscedasticity (i.e., variability in error) + according to a list of explanatory variables from a proxy of differenced values (e.g., elevation differences), + if possible compared to a source of higher precision. This function performs N-D data binning with the list of explanatory variable for a spread statistic, then performs N-D interpolation on this statistic, scales the output with a two-step standardization to return an error @@ -588,9 +579,8 @@ def _estimate_model_heteroscedasticity( :return: Dataframe of binned spread statistic with explanatory variables, Error function with explanatory variables """ - # Perform N-D binning with the differenced values computing the spread statistic - df = nd_binning( + df_binning = nd_binning( values=dvalues, list_var=list_var, list_var_names=list_var_names, @@ -599,7 +589,10 @@ def _estimate_model_heteroscedasticity( ) # Perform N-D linear interpolation for the spread statistic - fun = interp_nd_binning(df, list_var_names=list_var_names, statistic=spread_statistic.__name__, min_count=min_count) + fun = interp_nd_binning(df_binning, + list_var_names=list_var_names, + statistic=spread_statistic.__name__, + min_count=min_count) # Get the final function based on a two-step standardization final_fun = two_step_standardization( @@ -610,7 +603,7 @@ def _estimate_model_heteroscedasticity( fac_spread_outliers=fac_spread_outliers, )[1] - return df, final_fun + return df_binning, final_fun @overload @@ -640,8 +633,7 @@ def _preprocess_values_with_mask_to_array( gsd: float | None = None, preserve_shape: bool = True, ) -> tuple[list[NDArrayf] | NDArrayf, float]: - """ - Preprocess input values provided as Raster or ndarray with a stable and/or unstable mask provided as Vector or + """Preprocess input values provided as Raster or ndarray with a stable and/or unstable mask provided as Vector or ndarray into an array of stable values. By default, the shape is preserved and the masked values converted to NaNs. @@ -656,7 +648,6 @@ def _preprocess_values_with_mask_to_array( :return: Array of stable terrain values, Ground sampling distance """ - # Check inputs: needs to be Raster, array or a list of those if not isinstance(values, (Raster, np.ndarray, list)) or ( isinstance(values, list) and not all(isinstance(val, (Raster, np.ndarray)) for val in values) @@ -675,7 +666,7 @@ def _preprocess_values_with_mask_to_array( any_raster = isinstance(values, Raster) if not any_raster and isinstance(include_mask, (Vector, gpd.GeoDataFrame)): raise ValueError( - "The stable mask can only passed as a Vector or GeoDataFrame if the input values contain a Raster." + "The stable mask can only passed as a Vector or GeoDataFrame if the input values contain a Raster.", ) # If there is only one array or Raster, put alone in a list @@ -766,7 +757,7 @@ def infer_heteroscedasticity_from_stable( list_var: list[NDArrayf | RasterType], stable_mask: NDArrayf | Mask | VectorType | gpd.GeoDataFrame = None, unstable_mask: NDArrayf | Mask | VectorType | gpd.GeoDataFrame = None, - list_var_names: list[str] = None, + list_var_names: list[str] | None = None, spread_statistic: Callable[[NDArrayf], np.floating[Any]] = nmad, list_var_bins: int | tuple[int, ...] | tuple[NDArrayf] | None = None, min_count: int | None = 100, @@ -780,7 +771,7 @@ def infer_heteroscedasticity_from_stable( list_var: list[NDArrayf | RasterType], stable_mask: NDArrayf | Mask | VectorType | gpd.GeoDataFrame = None, unstable_mask: NDArrayf | Mask | VectorType | gpd.GeoDataFrame = None, - list_var_names: list[str] = None, + list_var_names: list[str] | None = None, spread_statistic: Callable[[NDArrayf], np.floating[Any]] = nmad, list_var_bins: int | tuple[int, ...] | tuple[NDArrayf] | None = None, min_count: int | None = 100, @@ -793,14 +784,13 @@ def infer_heteroscedasticity_from_stable( list_var: list[NDArrayf | RasterType], stable_mask: NDArrayf | Mask | VectorType | gpd.GeoDataFrame = None, unstable_mask: NDArrayf | Mask | VectorType | gpd.GeoDataFrame = None, - list_var_names: list[str] = None, + list_var_names: list[str] | None = None, spread_statistic: Callable[[NDArrayf], np.floating[Any]] = nmad, list_var_bins: int | tuple[int, ...] | tuple[NDArrayf] | None = None, min_count: int | None = 100, fac_spread_outliers: float | None = 7, ) -> tuple[NDArrayf | RasterType, pd.DataFrame, Callable[[tuple[NDArrayf, ...]], NDArrayf]]: - """ - Infer heteroscedasticity from differenced values on stable terrain and a list of explanatory variables. + """Infer heteroscedasticity from differenced values on stable terrain and a list of explanatory variables. This function returns an error map, a dataframe of spread values and the error function with explanatory variables. It is a convenience wrapper for `estimate_model_heteroscedasticity` to work on either Raster or array, compute the @@ -826,20 +816,19 @@ def infer_heteroscedasticity_from_stable( Dataframe of binned spread statistic with explanatory variables, Error function with explanatory variables """ - # Create placeholder variables names if those don't exist if list_var_names is None: list_var_names = ["var" + str(i + 1) for i in range(len(list_var))] # Get the arrays for proxy values and explanatory variables list_all_arr, gsd = _preprocess_values_with_mask_to_array( - values=[dvalues] + list_var, include_mask=stable_mask, exclude_mask=unstable_mask, preserve_shape=False + values=[dvalues] + list_var, include_mask=stable_mask, exclude_mask=unstable_mask, preserve_shape=False, # noqa: RUF005 ) dvalues_stable_arr = list_all_arr[0] list_var_stable_arr = list_all_arr[1:] # Estimate and model the heteroscedasticity using only stable terrain - df, fun = _estimate_model_heteroscedasticity( + df_htct, fun = _estimate_model_heteroscedasticity( dvalues=dvalues_stable_arr, list_var=list_var_stable_arr, list_var_names=list_var_names, @@ -855,23 +844,20 @@ def infer_heteroscedasticity_from_stable( # Return the right type, depending on dvalues input if isinstance(dvalues, Raster): - return dvalues.copy(new_array=error), df, fun - else: - return error, df, fun + return dvalues.copy(new_array=error), df_htct, fun + return error, df_htct, fun def _create_circular_mask( - shape: tuple[int, int], center: tuple[int, int] | None = None, radius: float | None = None + shape: tuple[int, int], center: tuple[int, int] | None = None, radius: float | None = None, ) -> NDArrayf: - """ - Create circular mask on a raster, defaults to the center of the array and its half width + """Create circular mask on a raster, defaults to the center of the array and its half width :param shape: shape of array :param center: center :param radius: radius :return: """ - w, h = shape if center is None: # use the middle of the image @@ -900,8 +886,7 @@ def _create_ring_mask( in_radius: float = 0, out_radius: float | None = None, ) -> NDArrayf: - """ - Create ring mask on a raster, defaults to the center of the array and a circle mask of half width of the array + """Create ring mask on a raster, defaults to the center of the array and a circle mask of half width of the array :param shape: shape of array :param center: center @@ -909,7 +894,6 @@ def _create_ring_mask( :param out_radius: outside radius :return: """ - w, h = shape if center is None: @@ -934,11 +918,10 @@ def _subsample_wrapper( subsample: int = 10000, subsample_method: str = "pdist_ring", inside_radius: float = 0, - outside_radius: float = None, + outside_radius: float | None = None, random_state: int | np.random.Generator | None = None, ) -> tuple[NDArrayf, NDArrayf]: - """ - (Not used by default) + """(Not used by default) Wrapper for subsampling pdist methods """ nx, ny = shape @@ -952,7 +935,7 @@ def _subsample_wrapper( center_y = rng.choice(ny, 1)[0] if subsample_method == "pdist_ring": subindex = _create_ring_mask( - (nx, ny), center=(center_x, center_y), in_radius=inside_radius, out_radius=outside_radius + (nx, ny), center=(center_x, center_y), in_radius=inside_radius, out_radius=outside_radius, ) else: subindex = _create_circular_mask((nx, ny), center=(center_x, center_y), radius=outside_radius) @@ -984,12 +967,10 @@ def _aggregate_pdist_empirical_variogram( # removable then in this function **kwargs: Any, ) -> pd.DataFrame: - """ - (Not used by default) + """(Not used by default) Aggregating subfunction of sample_empirical_variogram for pdist methods. The pairwise differences are calculated within each subsample. """ - # If no multi_ranges are provided, define a logical default behaviour with the pixel size and grid size if subsample_method in ["pdist_disk", "pdist_ring"]: @@ -1007,7 +988,7 @@ def _aggregate_pdist_empirical_variogram( # Define subsampling parameters list_inside_radius = [] list_outside_radius: list[float | None] = [] - binned_ranges = [0.0] + pdist_multi_ranges + binned_ranges = [0.0] + pdist_multi_ranges # noqa: RUF005 for i in range(len(binned_ranges) - 1): # Radiuses need to be passed as pixel sizes, dividing by ground sampling distance @@ -1046,21 +1027,19 @@ def _aggregate_pdist_empirical_variogram( # Aggregate runs list_df_range.append(df_range) - df = pd.concat(list_df_range) + df_vgm = pd.concat(list_df_range) - return df + return df_vgm def _get_pdist_empirical_variogram(values: NDArrayf, coords: NDArrayf, **kwargs: Any) -> pd.DataFrame: - """ - Get empirical variogram from skgstat.Variogram object calculating pairwise distances within the sample + """Get empirical variogram from skgstat.Variogram object calculating pairwise distances within the sample :param values: Values :param coords: Coordinates :return: Empirical variogram (variance, upper bound of lag bin, counts) """ - # Remove random_state keyword argument that is not used kwargs.pop("random_state") @@ -1076,22 +1055,21 @@ def _get_pdist_empirical_variogram(values: NDArrayf, coords: NDArrayf, **kwargs: filtered_kwargs = {k: kwargs[k] for k in variogram_args if k in kwargs} # Derive variogram with default MetricSpace (equivalent to scipy.pdist) - V = skg.Variogram(coordinates=coords, values=values, normalize=False, fit_method=None, **filtered_kwargs) + vgm = skg.Variogram(coordinates=coords, values=values, normalize=False, fit_method=None, **filtered_kwargs) # Get bins, empirical variogram values, and bin count - bins, exp = V.get_empirical() - count = V.bin_count + bins, exp = vgm.get_empirical() + count = vgm.bin_count # Write to dataframe - df = pd.DataFrame() - df = df.assign(exp=exp, bins=bins, count=count) + df_vgm = pd.DataFrame() + df_vgm = df_vgm.assign(exp=exp, bins=bins, count=count) - return df + return df_vgm def _choose_cdist_equidistant_sampling_parameters(**kwargs: Any) -> tuple[int, int, float]: - """ - Add a little calculation to partition the "subsample" argument automatically into the "run" and "samples" + """Add a little calculation to partition the "subsample" argument automatically into the "run" and "samples" arguments of RasterEquidistantMetricSpace, to have a similar number of points than with a classic pdist method. We compute the arguments to match a N0**2/2 number of pairwise comparison, N0 being the "subsample" input, and @@ -1110,7 +1088,6 @@ def _choose_cdist_equidistant_sampling_parameters(**kwargs: Any) -> tuple[int, i !! Different variables: !! The "samples" of RasterEquidistantMetricSpace is N, while the "subsample" passed is N0. """ - # First, we extract the extent, shape and subsample values from the keyword arguments extent = kwargs["extent"] shape = kwargs["shape"] @@ -1118,10 +1095,7 @@ def _choose_cdist_equidistant_sampling_parameters(**kwargs: Any) -> tuple[int, i # We define the number of rings to 10 in order to get a decent equidistant sampling, we'll later adjust the # ratio_sampling to force that number to 10 - if "nb_rings" in kwargs.keys(): - nb_rings = kwargs["nb_rings"] - else: - nb_rings = 10 + nb_rings = kwargs.get("nb_rings", 10) # For one run (R=1), and two samples per disk/ring (N=2), and the number of rings X=10, this requires N0 to be at # least 10: min_subsample = np.ceil(np.sqrt(2 * nb_rings * 2**2) + 1) @@ -1172,21 +1146,19 @@ def _choose_cdist_equidistant_sampling_parameters(**kwargs: Any) -> tuple[int, i def _get_cdist_empirical_variogram( - values: NDArrayf, coords: NDArrayf, subsample_method: str, **kwargs: Any + values: NDArrayf, coords: NDArrayf, subsample_method: str, **kwargs: Any, ) -> pd.DataFrame: - """ - Get empirical variogram from skgstat.Variogram object calculating pairwise distances between two sample collections - of a MetricSpace (see scikit-gstat documentation for more details) + """Get empirical variogram from skgstat.Variogram object calculating pairwise distances + between two sample collections of a MetricSpace (see scikit-gstat documentation for more details) :param values: Values :param coords: Coordinates :return: Empirical variogram (variance, upper bound of lag bin, counts) """ - if subsample_method == "cdist_equidistant": - if "runs" not in kwargs.keys() and "samples" not in kwargs.keys(): + if "runs" not in kwargs and "samples" not in kwargs: # We define subparameters for the equidistant technique to match the number of pairwise comparison # that would have a classic "subsample" with pdist, except if those parameters are already user-defined runs, samples, ratio_subsample = _choose_cdist_equidistant_sampling_parameters(**kwargs) @@ -1230,41 +1202,39 @@ def _get_cdist_empirical_variogram( # Filter corresponding arguments before passing to MetricSpace function filtered_ms_kwargs = {k: kwargs[k] for k in ms_args if k in kwargs} - M = ms(coords=coords, **filtered_ms_kwargs) + metric_space = ms(coords=coords, **filtered_ms_kwargs) # Filter corresponding arguments before passing to Variogram function filtered_var_kwargs = {k: kwargs[k] for k in variogram_args if k in kwargs} - V = skg.Variogram(M, values=values, normalize=False, fit_method=None, **filtered_var_kwargs) + vgm = skg.Variogram(metric_space, values=values, normalize=False, fit_method=None, **filtered_var_kwargs) # Get bins, empirical variogram values, and bin count - bins, exp = V.get_empirical(bin_center=False) - count = V.bin_count + bins, exp = vgm.get_empirical(bin_center=False) + count = vgm.bin_count # Write to dataframe - df = pd.DataFrame() - df = df.assign(exp=exp, bins=bins, count=count) + df_vgm = pd.DataFrame() + df_vgm = df_vgm.assign(exp=exp, bins=bins, count=count) - return df + return df_vgm def _wrapper_get_empirical_variogram(argdict: dict[str, Any]) -> pd.DataFrame: - """ - Multiprocessing wrapper for get_pdist_empirical_variogram and get_cdist_empirical variogram + """Multiprocessing wrapper for get_pdist_empirical_variogram and get_cdist_empirical variogram :param argdict: Keyword argument to pass to get_pdist/cdist_empirical_variogram :return: Empirical variogram (variance, upper bound of lag bin, counts) """ - logging.debug("Working on run " + str(argdict["i"]) + " out of " + str(argdict["imax"])) + logging.debug("Working on run %s out of %s", str(argdict["i"]), str(argdict["imax"])) argdict.pop("i") argdict.pop("imax") if argdict["subsample_method"] in ["cdist_equidistant", "cdist_point"]: # Simple wrapper for the skgstat Variogram function for cdist methods return _get_cdist_empirical_variogram(**argdict) - else: - # Aggregating several skgstat Variogram after iterative subsampling of specific points in the Raster - return _aggregate_pdist_empirical_variogram(**argdict) + # Aggregating several skgstat Variogram after iterative subsampling of specific points in the Raster + return _aggregate_pdist_empirical_variogram(**argdict) class EmpiricalVariogramKArgs(TypedDict, total=False): @@ -1280,7 +1250,7 @@ class EmpiricalVariogramKArgs(TypedDict, total=False): def sample_empirical_variogram( values: NDArrayf | RasterType, - gsd: float = None, + gsd: float | None = None, coords: NDArrayf = None, subsample: int = 1000, subsample_method: str = "cdist_equidistant", @@ -1291,8 +1261,7 @@ def sample_empirical_variogram( # remove some type ignores from this function in the future **kwargs: int | list[float] | float | str | Any, ) -> pd.DataFrame: - """ - Sample empirical variograms with binning adaptable to multiple ranges and spatial subsampling adapted for raster + """Sample empirical variograms with binning adaptable to multiple ranges and spatial subsampling adapted for raster data. Returns an empirical variogram (empirical variance, upper bound of spatial lag bin, count of pairwise samples). @@ -1352,35 +1321,35 @@ def sample_empirical_variogram( elif isinstance(values, (np.ndarray, np.ma.masked_array)): values, mask = get_array_and_mask(values) else: - raise ValueError("Values must be of type NDArrayf, np.ma.masked_array or Raster subclass.") + raise TypeError("Values must be of type NDArrayf, np.ma.masked_array or Raster subclass.") values = values.squeeze() # Then, check if the logic between values, coords and gsd is respected if (gsd is not None or subsample_method in ["cdist_equidistant", "pdist_disk", "pdist_ring"]) and values.ndim == 1: raise ValueError( 'Values array must be 2D when using any of the "cdist_equidistant", "pdist_disk" and ' - '"pdist_ring" methods, or providing a ground sampling distance instead of coordinates.' + '"pdist_ring" methods, or providing a ground sampling distance instead of coordinates.', ) - elif coords is not None and values.ndim != 1: + if coords is not None and values.ndim != 1: raise ValueError("Values array must be 1D when providing coordinates.") - elif coords is not None and (coords.shape[0] != 2 and coords.shape[1] != 2): + if coords is not None and (coords.shape[0] != 2 and coords.shape[1] != 2): raise ValueError("The coordinates array must have one dimension with length equal to 2") - elif values.ndim == 2 and gsd is None: + if values.ndim == 2 and gsd is None: raise ValueError("The ground sampling distance must be defined when passing a 2D values array.") # Check the subsample method provided exists, otherwise list options if subsample_method not in ["cdist_equidistant", "cdist_point", "pdist_point", "pdist_disk", "pdist_ring"]: raise TypeError( 'The subsampling method must be one of "cdist_equidistant, "cdist_point", "pdist_point", ' - '"pdist_disk" or "pdist_ring".' + '"pdist_disk" or "pdist_ring".', ) # Check that, for several runs, the binning function is an Iterable, otherwise skgstat might provide variogram # values over slightly different binnings due to randomly changing subsample maximum lags - if n_variograms > 1 and "bin_func" in kwargs.keys() and not isinstance(kwargs.get("bin_func"), Iterable): + if n_variograms > 1 and "bin_func" in kwargs and not isinstance(kwargs.get("bin_func"), Iterable): warnings.warn( "Using a named binning function of scikit-gstat might provide different binnings for each " "independent run. To remediate that issue, pass bin_func as an Iterable of right bin edges, " - "(or use default bin_func)." + "(or use default bin_func).", ) # Defaulting to coordinates if those are provided @@ -1405,11 +1374,11 @@ def sample_empirical_variogram( extent = (np.min(coords[:, 0]), np.max(coords[:, 0]), np.min(coords[:, 1]), np.max(coords[:, 1])) # Get the maximum lag from the coordinates before keeping only valid data, if it was not provided - if "maxlag" not in kwargs.keys(): + if "maxlag" not in kwargs: # We define maximum lag as the maximum distance between coordinates (needed to provide custom bins, otherwise # skgstat rewrites the maxlag with the subsample of coordinates provided) maxlag = np.sqrt( - (np.max(coords[:, 0]) - np.min(coords[:, 0])) ** 2 + (np.max(coords[:, 1]) - np.min(coords[:, 1])) ** 2 + (np.max(coords[:, 0]) - np.min(coords[:, 0])) ** 2 + (np.max(coords[:, 1]) - np.min(coords[:, 1])) ** 2, ) kwargs.update({"maxlag": maxlag}) @@ -1419,7 +1388,7 @@ def sample_empirical_variogram( values = values[ind_valid] coords = coords[ind_valid, :] - if "bin_func" not in kwargs.keys(): + if "bin_func" not in kwargs: # If no bin_func is provided, we provide an Iterable to provide a custom binning function to skgstat, # because otherwise bins might be inconsistent across runs bin_func = [] @@ -1455,7 +1424,7 @@ def sample_empirical_variogram( # Create a list of child random states per number of variograms list_random_state: list[None | np.random.Generator] = list( - rng.choice(n_variograms, n_variograms, replace=False) + rng.choice(n_variograms, n_variograms, replace=False), ) else: list_random_state = [None for i in range(n_variograms)] @@ -1480,7 +1449,7 @@ def sample_empirical_variogram( list_df_run.append(df_run) else: - logging.info("Using " + str(n_jobs) + " cores...") + logging.info("Using %s cores...", str(n_jobs)) pool = mp.Pool(n_jobs, maxtasksperchild=1) list_argdict = [ @@ -1492,22 +1461,22 @@ def sample_empirical_variogram( pool.join() # Aggregate multiple ranges subsampling - df = pd.concat(list_df_run) + df_vgm = pd.concat(list_df_run) # For a single run, no multi-run sigma estimated if n_variograms == 1: - df = df.rename(columns={"bins": "lags"}) - df["err_exp"] = np.nan + df_vgm = df_vgm.rename(columns={"bins": "lags"}) + df_vgm["err_exp"] = np.nan # For several runs, group results, use mean as empirical variogram, estimate sigma, and sum the counts else: - df_grouped = df.groupby("bins", dropna=False) + df_grouped = df_vgm.groupby("bins", dropna=False) df_mean = df_grouped[["exp"]].mean() df_std = df_grouped[["exp"]].std() df_count = df_grouped[["count"]].sum() df_mean["lags"] = df_mean.index.values df_mean["err_exp"] = df_std["exp"] / np.sqrt(n_variograms) df_mean["count"] = df_count["count"] - df = df_mean + df_vgm = df_mean # Fix variance error for Dowd's variogram in SciKit-GStat @@ -1515,23 +1484,22 @@ def sample_empirical_variogram( from packaging.version import Version if Version(skg.__version__) <= Version("1.0.0"): - if "estimator" in kwargs.keys() and kwargs["estimator"] == "dowd": + if "estimator" in kwargs and kwargs["estimator"] == "dowd": # Correction: we divide all experimental variance values by 2 - df.exp.values /= 2 - df.err_exp.values /= 2 + df_vgm.exp.values /= 2 + df_vgm.err_exp.values /= 2 # Remove the last spatial lag bin which is always undersampled - df.drop(df.tail(1).index, inplace=True) + df_vgm = df_vgm.drop(df_vgm.tail(1).index) # Force output dtype (default differs on different OS) - df = df.astype({"exp": "float64", "err_exp": "float64", "lags": "float64", "count": "int64"}) + df_vgm = df_vgm.astype({"exp": "float64", "err_exp": "float64", "lags": "float64", "count": "int64"}) - return df + return df_vgm def _get_skgstat_variogram_model_name(model: str | Callable[[NDArrayf, float, float], NDArrayf]) -> str: """Function to identify a SciKit-GStat variogram model from a string or a function""" - list_supported_models = ["spherical", "gaussian", "exponential", "cubic", "stable", "matern"] if callable(model): @@ -1549,21 +1517,20 @@ def _get_skgstat_variogram_model_name(model: str | Callable[[NDArrayf, float, fl raise ValueError( f"Variogram model name {model} not recognized. Supported models are: " + ", ".join(list_supported_models) - + "." + + ".", ) else: - raise ValueError( + raise TypeError( "Variogram models can be passed as strings or skgstat.models function. " - "Supported models are: " + ", ".join(list_supported_models) + "." + "Supported models are: " + ", ".join(list_supported_models) + ".", ) return model_name def get_variogram_model_func(params_variogram_model: pd.DataFrame) -> Callable[[NDArrayf], NDArrayf]: - """ - Construct the sum of spatial variogram function from a dataframe of variogram parameters. + """Construct the sum of spatial variogram function from a dataframe of variogram parameters. :param params_variogram_model: Dataframe of variogram models to sum with three to four columns, "model" for the model types (e.g., ["spherical", "matern"]), "range" for the correlation ranges (e.g., [2, 100]), "psill" for @@ -1572,7 +1539,6 @@ def get_variogram_model_func(params_variogram_model: pd.DataFrame) -> Callable[[ :return: Function of sum of variogram with spatial lags. """ - # Check input dataframe _check_validity_params_variogram(params_variogram_model) @@ -1600,8 +1566,7 @@ def sum_model(h: NDArrayf) -> NDArrayf: def covariance_from_variogram(params_variogram_model: pd.DataFrame) -> Callable[[NDArrayf], NDArrayf]: - """ - Construct the spatial covariance function from a dataframe of variogram parameters. + """Construct the spatial covariance function from a dataframe of variogram parameters. The covariance function is the sum of partial sills "PS" minus the sum of associated variograms "gamma": C = PS - gamma @@ -1612,7 +1577,6 @@ def covariance_from_variogram(params_variogram_model: pd.DataFrame) -> Callable[ :return: Covariance function with spatial lags """ - # Check input dataframe _check_validity_params_variogram(params_variogram_model) @@ -1629,8 +1593,7 @@ def cov(h: NDArrayf) -> NDArrayf: def correlation_from_variogram(params_variogram_model: pd.DataFrame) -> Callable[[NDArrayf], NDArrayf]: - """ - Construct the spatial correlation function from a dataframe of variogram parameters. + """Construct the spatial correlation function from a dataframe of variogram parameters. The correlation function is the covariance function "C" divided by the sum of partial sills "PS": rho = C / PS :param params_variogram_model: Dataframe of variogram models to sum with three to four columns, "model" for the @@ -1640,7 +1603,6 @@ def correlation_from_variogram(params_variogram_model: pd.DataFrame) -> Callable :return: Correlation function with spatial lags """ - # Check input dataframe _check_validity_params_variogram(params_variogram_model) @@ -1659,12 +1621,11 @@ def rho(h: NDArrayf) -> NDArrayf: def fit_sum_model_variogram( list_models: list[str | Callable[[NDArrayf, float, float], NDArrayf]], empirical_variogram: pd.DataFrame, - bounds: list[tuple[float, float]] = None, - p0: list[float] = None, - maxfev: int = None, + bounds: list[tuple[float, float]] | None = None, + p0: list[float] | None = None, + maxfev: int | None = None, ) -> tuple[Callable[[NDArrayf], NDArrayf], pd.DataFrame]: - """ - Fit a sum of variogram models to an empirical variogram, with weighted least-squares based on sampling errors. To + """Fit a sum of variogram models to an empirical variogram, with weighted least-squares based on sampling errors. To use preferably with the empirical variogram dataframe returned by the `sample_empirical_variogram` function. :param list_models: List of K variogram models to sum for the fit in order from short to long ranges. Can either be @@ -1764,15 +1725,15 @@ def variogram_sum(h: float, *args: list[float]) -> float: model_name = _get_skgstat_variogram_model_name(model) # For models that expect 2 parameters if model_name in ["spherical", "gaussian", "exponential", "cubic"]: - df = pd.DataFrame() - df = df.assign(model=[model_name], range=[cof[i]], psill=[cof[i + 1]]) + df_model = pd.DataFrame() + df_model = df_model.assign(model=[model_name], range=[cof[i]], psill=[cof[i + 1]]) i += 2 # For models that expect 3 parameters elif model_name in ["stable", "matern"]: - df = pd.DataFrame() - df = df.assign(model=[model_name], range=[cof[i]], psill=[cof[i + 1]], smooth=[cof[i + 2]]) + df_model = pd.DataFrame() + df_model = df_model.assign(model=[model_name], range=[cof[i]], psill=[cof[i + 1]], smooth=[cof[i + 2]]) i += 3 - list_df.append(df) + list_df.append(df_model) df_params = pd.concat(list_df) # Also pass the function of sum of variogram @@ -1785,19 +1746,18 @@ def _estimate_model_spatial_correlation( dvalues: NDArrayf | RasterType, list_models: list[str | Callable[[NDArrayf, float, float], NDArrayf]], estimator: str = "dowd", - gsd: float = None, + gsd: float | None = None, coords: NDArrayf = None, subsample: int = 1000, subsample_method: str = "cdist_equidistant", n_variograms: int = 1, n_jobs: int = 1, random_state: int | np.random.Generator | None = None, - bounds: list[tuple[float, float]] = None, - p0: list[float] = None, + bounds: list[tuple[float, float]] | None = None, + p0: list[float] | None = None, **kwargs: Any, ) -> tuple[pd.DataFrame, pd.DataFrame, Callable[[NDArrayf], NDArrayf]]: - """ - Estimate and model the spatial correlation of the input variable by empirical variogram sampling and fitting of a + """Estimate and model the spatial correlation of the input variable by empirical variogram sampling and fitting of a sum of variogram model. The spatial correlation is returned as a function of spatial lags (in units of the input coordinates) which gives a @@ -1827,7 +1787,6 @@ def _estimate_model_spatial_correlation( :return: Dataframe of empirical variogram, Dataframe of optimized model parameters, Function of spatial correlation (0 to 1) with spatial lags """ - empirical_variogram = sample_empirical_variogram( values=dvalues, estimator=estimator, @@ -1842,7 +1801,7 @@ def _estimate_model_spatial_correlation( ) params_variogram_model = fit_sum_model_variogram( - list_models=list_models, empirical_variogram=empirical_variogram, bounds=bounds, p0=p0 + list_models=list_models, empirical_variogram=empirical_variogram, bounds=bounds, p0=p0, )[1] spatial_correlation_func = correlation_from_variogram(params_variogram_model=params_variogram_model) @@ -1857,20 +1816,19 @@ def infer_spatial_correlation_from_stable( unstable_mask: NDArrayf | Mask | VectorType | gpd.GeoDataFrame = None, errors: NDArrayf | RasterType = None, estimator: str = "dowd", - gsd: float = None, + gsd: float | None = None, coords: NDArrayf = None, subsample: int = 1000, subsample_method: str = "cdist_equidistant", n_variograms: int = 1, n_jobs: int = 1, - bounds: list[tuple[float, float]] = None, - p0: list[float] = None, + bounds: list[tuple[float, float]] | None = None, + p0: list[float] | None = None, random_state: int | np.random.Generator | None = None, **kwargs: Any, ) -> tuple[pd.DataFrame, pd.DataFrame, Callable[[NDArrayf], NDArrayf]]: - """ - Infer spatial correlation of errors from differenced values on stable terrain and a list of variogram model to fit - as a sum. + """Infer spatial correlation of errors from differenced values on stable terrain and + a list of variogram model to fit as a sum. This function returns a dataframe of the empirical variogram, a dataframe of optimized model parameters, and a spatial correlation function. The spatial correlation is returned as a function of spatial lags @@ -1906,9 +1864,8 @@ def infer_spatial_correlation_from_stable( :return: Dataframe of empirical variogram, Dataframe of optimized model parameters, Function of spatial correlation (0 to 1) with spatial lags """ - dvalues_stable_arr, gsd = _preprocess_values_with_mask_to_array( - values=dvalues, include_mask=stable_mask, exclude_mask=unstable_mask, gsd=gsd + values=dvalues, include_mask=stable_mask, exclude_mask=unstable_mask, gsd=gsd, ) # Perform standardization if error array is provided @@ -1943,12 +1900,11 @@ def infer_spatial_correlation_from_stable( def _check_validity_params_variogram(params_variogram_model: pd.DataFrame) -> None: """Check the validity of the modelled variogram parameters dataframe (mostly in the case it is passed manually).""" - # Check that expected columns exists expected_columns = ["model", "range", "psill"] if not all(c in params_variogram_model for c in expected_columns): raise ValueError( - 'The dataframe with variogram parameters must contain the columns "model", "range" and "psill".' + 'The dataframe with variogram parameters must contain the columns "model", "range" and "psill".', ) # Check that the format of variogram models are correct @@ -1958,14 +1914,14 @@ def _check_validity_params_variogram(params_variogram_model: pd.DataFrame) -> No # Check that the format of ranges, sills are correct for r in params_variogram_model["range"].values: if not isinstance(r, (float, np.floating, int, np.integer)): - raise ValueError("The variogram ranges must be float or integer.") + raise TypeError("The variogram ranges must be float or integer.") if r <= 0: raise ValueError("The variogram ranges must have non-zero, positive values.") # Check that the format of ranges, sills are correct for p in params_variogram_model["psill"].values: if not isinstance(p, (float, np.floating, int, np.integer)): - raise ValueError("The variogram partial sills must be float or integer.") + raise TypeError("The variogram partial sills must be float or integer.") if p <= 0: raise ValueError("The variogram partial sills must have non-zero, positive values.") @@ -1974,7 +1930,7 @@ def _check_validity_params_variogram(params_variogram_model: pd.DataFrame) -> No if "smooth" not in params_variogram_model: raise ValueError( 'The dataframe with variogram parameters must contain the column "smooth" for ' - "the smoothness factor when using Matern or Stable models." + "the smoothness factor when using Matern or Stable models.", ) for i in range(len(params_variogram_model)): if params_variogram_model["model"].values[i] in ["stable", "matern"]: @@ -1986,8 +1942,7 @@ def _check_validity_params_variogram(params_variogram_model: pd.DataFrame) -> No def neff_circular_approx_theoretical(area: float, params_variogram_model: pd.DataFrame) -> float: - """ - Number of effective samples approximated from exact disk integration of a sum of any number of variogram models + """Number of effective samples approximated from exact disk integration of a sum of any number of variogram models of spherical, gaussian, exponential or cubic form over a disk of a certain area. This approximation performs best for areas with a shape close to that of a disk. Inspired by Rolstad et al. (2009): http://dx.doi.org/10.3189/002214309789470950. @@ -2011,7 +1966,6 @@ def neff_circular_approx_theoretical(area: float, params_variogram_model: pd.Dat :return: Number of effective samples """ - # Check input dataframe _check_validity_params_variogram(params_variogram_model) @@ -2025,46 +1979,46 @@ def neff_circular_approx_theoretical(area: float, params_variogram_model: pd.Dat # Spherical: h * covariance = c1 * h * ( 1 - 3/2 * h / a1 + 1/2 * (h/a1)**3 ) # = c1 * (h - 3/2 * h**2 / a1 + 1/2 * h**4 / a1**3) - # Spherical: radial integral of above from 0 to L: - # SE**2 = 2 / (L**2) * c1 * (L**2 / 2 - 3/2 * L**3 / 3 / a1 + 1/2 * 1/5 * L**5 / a1**3) - # which leads to SE**2 = c1 * (1 - L / a1 + 1/5 * (L/a1)**3 ) - # If spherical model is above the spherical range a1: SE**2 = c1 /5 * (a1/L)**2 + # Spherical: radial integral of above from 0 to l1: + # SE**2 = 2 / (l1**2) * c1 * (l1**2 / 2 - 3/2 * l1**3 / 3 / a1 + 1/2 * 1/5 * l1**5 / a1**3) + # which leads to SE**2 = c1 * (1 - l1 / a1 + 1/5 * (l1/a1)**3 ) + # If spherical model is above the spherical range a1: SE**2 = c1 /5 * (a1/l1)**2 - def spherical_exact_integral(a1: float, c1: float, L: float) -> float: + def spherical_exact_integral(a1: float, c1: float, l1: float) -> float: if l_equiv <= a1: - squared_se = c1 * (1 - L / a1 + 1 / 5 * (L / a1) ** 3) + squared_se = c1 * (1 - l1 / a1 + 1 / 5 * (l1 / a1) ** 3) else: - squared_se = c1 / 5 * (a1 / L) ** 2 + squared_se = c1 / 5 * (a1 / l1) ** 2 return squared_se # Exponential: h * covariance = c1 * h * exp(-h/a); a = a1/3 - # Exponential: radial integral of above from 0 to L: SE**2 = 2 / (L**2) * c1 * a * (a - exp(-L/a) * (a + L)) + # Exponential: radial integral of above from 0 to l1: SE**2 = 2 / (l1**2) * c1 * a * (a - exp(-l1/a) * (a + l1)) - def exponential_exact_integral(a1: float, c1: float, L: float) -> float: + def exponential_exact_integral(a1: float, c1: float, l1: float) -> float: a = a1 / 3 - squared_se = 2 * c1 * (a / L) ** 2 * (1 - np.exp(-L / a) * (1 + L / a)) + squared_se = 2 * c1 * (a / l1) ** 2 * (1 - np.exp(-l1 / a) * (1 + l1 / a)) return squared_se # Gaussian: h * covariance = c1 * h * exp(-h**2/a**2) ; a = a1/2 - # Gaussian: radial integral of above from 0 to L: SE**2 = 2 / (L**2) * c1 * 1/2 * a**2 * (1 - exp(-L**2/a**2)) + # Gaussian: radial integral of above from 0 to l1: SE**2 = 2 / (l1**2) * c1 * 1/2 * a**2 * (1 - exp(-l1**2/a**2)) - def gaussian_exact_integral(a1: float, c1: float, L: float) -> float: + def gaussian_exact_integral(a1: float, c1: float, l1: float) -> float: a = a1 / 2 - squared_se = c1 * (a / L) ** 2 * (1 - np.exp(-(L**2) / a**2)) + squared_se = c1 * (a / l1) ** 2 * (1 - np.exp(-(l1**2) / a**2)) return squared_se # Cubic: h * covariance = c1 * h * (1 - (7 * (h**2 / a**2)) + ((35 / 4) * (h**3 / a**3)) - # ((7 / 2) * (h**5 / a**5)) + ((3 / 4) * (h**7 / a**7))) - # Cubic: radial integral of above from 0 to L: - # SE**2 = c1 * (6*a**7 -21*a**5*L**2 + 21*a**4*L**3 - 6*a**2*L**5 + L**7) / (6*a**7) + # Cubic: radial integral of above from 0 to l1: + # SE**2 = c1 * (6*a**7 -21*a**5*l1**2 + 21*a**4*l1**3 - 6*a**2*l1**5 + l1**7) / (6*a**7) - def cubic_exact_integral(a1: float, c1: float, L: float) -> float: + def cubic_exact_integral(a1: float, c1: float, l1: float) -> float: if l_equiv <= a1: squared_se = ( - c1 * (6 * a1**7 - 21 * a1**5 * L**2 + 21 * a1**4 * L**3 - 6 * a1**2 * L**5 + L**7) / (6 * a1**7) + c1 * (6 * a1**7 - 21 * a1**5 * l1**2 + 21 * a1**4 * l1**3 - 6 * a1**2 * l1**5 + l1**7) / (6 * a1**7) ) else: - squared_se = 1 / 6 * c1 * a1**2 / L**2 + squared_se = 1 / 6 * c1 * a1**2 / l1**2 return squared_se squared_se = 0.0 @@ -2092,8 +2046,7 @@ def cubic_exact_integral(a1: float, c1: float, L: float) -> float: def _integrate_fun(fun: Callable[[NDArrayf], NDArrayf], low_b: float, upp_b: float) -> float: - """ - Numerically integrate function between an upper and lower bounds + """Numerically integrate function between an upper and lower bounds :param fun: Function to integrate :param low_b: Lower bound :param upp_b: Upper bound @@ -2104,8 +2057,8 @@ def _integrate_fun(fun: Callable[[NDArrayf], NDArrayf], low_b: float, upp_b: flo def neff_circular_approx_numerical(area: float | int, params_variogram_model: pd.DataFrame) -> float: - """ - Number of effective samples derived from numerical integration for any sum of variogram models over a circular area. + """Number of effective samples derived from numerical integration for + any sum of variogram models over a circular area. This is a generalization of Rolstad et al. (2009): http://dx.doi.org/10.3189/002214309789470950, which is verified against exact integration of `neff_circular_approx_theoretical`. This approximation performs best for areas with a shape close to that of a disk. @@ -2123,7 +2076,6 @@ def neff_circular_approx_numerical(area: float | int, params_variogram_model: pd :returns: Number of effective samples """ - # Check input dataframe _check_validity_params_variogram(params_variogram_model) @@ -2150,10 +2102,9 @@ def hcov_sum(h: NDArrayf) -> NDArrayf: def neff_exact( - coords: NDArrayf, errors: NDArrayf, params_variogram_model: pd.DataFrame, vectorized: bool = True + coords: NDArrayf, errors: NDArrayf, params_variogram_model: pd.DataFrame, vectorized: bool = True, ) -> float: - """ - Exact number of effective samples derived from a double sum of covariance with euclidean coordinates based on + """Exact number of effective samples derived from a double sum of covariance with euclidean coordinates based on the provided variogram parameters. This method works for any shape of area. :param coords: Center coordinates with size (N,2) for each spatial support (typically, pixel) @@ -2166,7 +2117,6 @@ def neff_exact( :return: Number of effective samples """ - # Check input dataframe _check_validity_params_variogram(params_variogram_model) @@ -2203,7 +2153,7 @@ def neff_exact( pds_matrix = squareform(pds) # Vectorize calculation var = np.sum( - errors.reshape((-1, 1)) @ errors.reshape((1, -1)) * rho(pds_matrix.flatten()).reshape(pds_matrix.shape) + errors.reshape((-1, 1)) @ errors.reshape((1, -1)) * rho(pds_matrix.flatten()).reshape(pds_matrix.shape), ) # The number of effective sample is the fraction of total sill by squared standard error @@ -2221,9 +2171,9 @@ def neff_hugonnet_approx( vectorized: bool = True, random_state: int | np.random.Generator | None = None, ) -> float: - """ - Approximated number of effective samples derived from a double sum of covariance subsetted on one of the two sums, - based on euclidean coordinates with the provided variogram parameters. This method works for any shape of area. + """Approximated number of effective samples derived from a double sum of covariance subsetted on one of the two + sums, based on euclidean coordinates with the provided variogram parameters. + This method works for any shape of area. See Hugonnet et al. (2022), https://doi.org/10.1109/jstars.2022.3188922, in particular Supplementary Fig. S16. :param coords: Center coordinates with size (N,2) for each spatial support (typically, pixel) @@ -2238,7 +2188,6 @@ def neff_hugonnet_approx( :return: Number of effective samples """ - # Define random state rng = np.random.default_rng(random_state) @@ -2289,7 +2238,7 @@ def neff_hugonnet_approx( var = np.sum( errors.reshape((-1, 1)) @ errors_sub.reshape((1, -1)) - * rho(pds_matrix_sub.flatten()).reshape(pds_matrix_sub.shape) + * rho(pds_matrix_sub.flatten()).reshape(pds_matrix_sub.shape), ) # The number of effective sample is the fraction of total sill by squared standard error @@ -2305,9 +2254,8 @@ def number_effective_samples( rasterize_resolution: RasterType | float = None, **kwargs: Any, ) -> float: - """ - Compute the number of effective samples, i.e. the number of uncorrelated samples, in an area accounting for spatial - correlations described by a sum of variogram models. + """Compute the number of effective samples, i.e. the number of uncorrelated samples, + in an area accounting for spatial correlations described by a sum of variogram models. This function wraps two methods: @@ -2336,7 +2284,6 @@ def number_effective_samples( :return: Number of effective samples """ - # Check input for variogram parameters _check_validity_params_variogram(params_variogram_model=params_variogram_model) @@ -2349,22 +2296,22 @@ def number_effective_samples( # If the input is a geopandas dataframe, put into a Vector object if isinstance(area, gpd.GeoDataFrame): - V = Vector(area) + v_area = Vector(area) else: - V = area + v_area = area if rasterize_resolution is None: rasterize_resolution = np.min(params_variogram_model["range"].values) / 5.0 warnings.warn( "Resolution for vector rasterization is not defined and thus set at 20% of the shortest " - "correlation range, which might result in large memory usage." + "correlation range, which might result in large memory usage.", ) # Rasterize with numeric resolution or Raster metadata if isinstance(rasterize_resolution, (float, int, np.floating, np.integer)): # We only need relative mask and coordinates, not absolute - mask = V.create_mask(xres=rasterize_resolution, as_array=True) + mask = v_area.create_mask(xres=rasterize_resolution, as_array=True) x = rasterize_resolution * np.arange(0, mask.shape[0]) y = rasterize_resolution * np.arange(0, mask.shape[1]) coords = np.array(np.meshgrid(y, x)) @@ -2373,22 +2320,22 @@ def number_effective_samples( elif isinstance(rasterize_resolution, Raster): # With a Raster we can get the coordinates directly - mask = V.create_mask(raster=rasterize_resolution, as_array=True).squeeze() + mask = v_area.create_mask(raster=rasterize_resolution, as_array=True).squeeze() coords = np.array(rasterize_resolution.coords()) coords_on_mask = coords[:, mask].T else: - raise ValueError("The rasterize resolution must be a float, integer or Raster subclass.") + raise TypeError("The rasterize resolution must be a float, integer or Raster subclass.") # Here we don't care about heteroscedasticity, so all errors are standardized to one errors_on_mask = np.ones(len(coords_on_mask)) neff = neff_hugonnet_approx( - coords=coords_on_mask, errors=errors_on_mask, params_variogram_model=params_variogram_model, **kwargs + coords=coords_on_mask, errors=errors_on_mask, params_variogram_model=params_variogram_model, **kwargs, ) else: - raise ValueError("Area must be a float, integer, Vector subclass or geopandas dataframe.") + raise TypeError("Area must be a float, integer, Vector subclass or geopandas dataframe.") return neff @@ -2399,8 +2346,8 @@ def spatial_error_propagation( params_variogram_model: pd.Dataframe, **kwargs: Any, ) -> list[float]: - """ - Spatial propagation of elevation errors to an area using the estimated heteroscedasticity and spatial correlations. + """Spatial propagation of elevation errors to an area using the estimated + heteroscedasticity and spatial correlations. This function is based on the `number_effective_samples` function to estimate uncorrelated samples. If given a vector area, it uses Equation 18 of Hugonnet et al. (2022), https://doi.org/10.1109/jstars.2022.3188922. If given @@ -2420,13 +2367,12 @@ def spatial_error_propagation( :return: List of standard errors (1-sigma) for the input areas """ - standard_errors = [] errors_arr = get_array_and_mask(errors)[0] for area in areas: # We estimate the number of effective samples in the area neff = number_effective_samples( - area=area, params_variogram_model=params_variogram_model, rasterize_resolution=errors, **kwargs + area=area, params_variogram_model=params_variogram_model, rasterize_resolution=errors, **kwargs, ) # We compute the average error in this area @@ -2450,8 +2396,7 @@ def spatial_error_propagation( def _std_err_finite(std: float, neff_tot: float, neff: float) -> float: - """ - Standard error formula for a subsample of a finite ensemble. + """Standard error formula for a subsample of a finite ensemble. :param std: standard deviation :param neff_tot: maximum number of effective samples @@ -2463,8 +2408,7 @@ def _std_err_finite(std: float, neff_tot: float, neff: float) -> float: def _std_err(std: float, neff: float) -> float: - """ - Standard error formula. + """Standard error formula. :param std: standard deviation :param neff: number of effective samples @@ -2475,8 +2419,7 @@ def _std_err(std: float, neff: float) -> float: def _distance_latlon(tup1: tuple[float, float], tup2: tuple[float, float], earth_rad: float = 6373000) -> float: - """ - Distance between two lat/lon coordinates projected on a spheroid + """Distance between two lat/lon coordinates projected on a spheroid ref: https://stackoverflow.com/questions/19412462/getting-distance-between-two-points-based-on-latitude-longitude :param tup1: lon/lat coordinates of first point :param tup2: lon/lat coordinates of second point @@ -2501,17 +2444,15 @@ def _distance_latlon(tup1: tuple[float, float], tup2: tuple[float, float], earth def _scipy_convolution(imgs: NDArrayf, filters: NDArrayf, output: NDArrayf) -> None: - """ - Scipy convolution on a number n_N of 2D images of size N1 x N2 using a number of kernels n_M of sizes M1 x M2. + """Scipy convolution on a number n_N of 2D images of size N1 x N2 using a number of kernels n_M of sizes M1 x M2. :param imgs: Input array of size (n_N, N1, N2) with n_N images of size N1 x N2 :param filters: Input array of filters of size (n_M, M1, M2) with n_M filters of size M1 x M2 :param output: Initialized output array of size (n_N, n_M, N1, N2) """ - - for i_N in np.arange(imgs.shape[0]): - for i_M in np.arange(filters.shape[0]): - output[i_N, i_M, :, :] = fftconvolve(imgs[i_N, :, :], filters[i_M, :, :], mode="same") + for i_n in np.arange(imgs.shape[0]): + for i_m in np.arange(filters.shape[0]): + output[i_n, i_m, :, :] = fftconvolve(imgs[i_n, :, :], filters[i_m, :, :], mode="same") nd4type = numba.double[:, :, :, :] @@ -2520,8 +2461,7 @@ def _scipy_convolution(imgs: NDArrayf, filters: NDArrayf, output: NDArrayf) -> N @numba.njit((nd3type, nd3type, nd4type)) # type: ignore def _numba_convolution(imgs: NDArrayf, filters: NDArrayf, output: NDArrayf) -> None: - """ - Numba convolution on a number n_N of 2D images of size N1 x N2 using a number of kernels n_M of sizes M1 x M2. + """Numba convolution on a number n_N of 2D images of size N1 x N2 using a number of kernels n_M of sizes M1 x M2. :param imgs: Input array of size (n_N, N1, N2) with n_N images of size N1 x N2 :param filters: Input array of filters of size (n_M, M1, M2) with n_M filters of size M1 x M2 @@ -2542,8 +2482,7 @@ def _numba_convolution(imgs: NDArrayf, filters: NDArrayf, output: NDArrayf) -> N def convolution(imgs: NDArrayf, filters: NDArrayf, method: str = "scipy") -> NDArrayf: - """ - Convolution on a number n_N of 2D images of size N1 x N2 using a number of kernels n_M of sizes M1 x M2, using + """Convolution on a number n_N of 2D images of size N1 x N2 using a number of kernels n_M of sizes M1 x M2, using either scipy.signal.fftconvolve or accelerated numba loops. Note that the indexes on n_M and n_N correspond to first axes on the array to speed up computations (prefetching). Inspired by: https://laurentperrinet.github.io/sciblog/posts/2017-09-20-the-fastest-2d-convolution-in-the-world.html @@ -2554,10 +2493,9 @@ def convolution(imgs: NDArrayf, filters: NDArrayf, method: str = "scipy") -> NDA :return: Filled array of outputs of size (n_N, n_M, N1, N2) """ - # Initialize output array according to input shapes - n_N, N1, N2 = imgs.shape - n_M, M1, M2 = filters.shape + n_N, N1, N2 = imgs.shape # noqa: N806 + n_M, M1, M2 = filters.shape # noqa: N806 output = np.zeros((n_N, n_M, N1, N2)) if method.lower() == "scipy": @@ -2575,10 +2513,9 @@ def convolution(imgs: NDArrayf, filters: NDArrayf, method: str = "scipy") -> NDA def mean_filter_nan( - img: NDArrayf, kernel_size: int, kernel_shape: str = "circular", method: str = "scipy" + img: NDArrayf, kernel_size: int, kernel_shape: str = "circular", method: str = "scipy", ) -> tuple[NDArrayf, NDArrayf, int]: - """ - Apply a mean filter to an image with a square or circular kernel of size p and with NaN values ignored. + """Apply a mean filter to an image with a square or circular kernel of size p and with NaN values ignored. :param img: Input array of size (N1, N2) :param kernel_size: Size M of kernel, which will be a symmetrical (M, M) kernel @@ -2588,7 +2525,6 @@ def mean_filter_nan( :return: Array of size (N1, N2) with mean values, Array of size (N1, N2) with number of valid pixels, Number of pixels in the kernel """ - # Simplify kernel size notation p = kernel_size @@ -2645,9 +2581,7 @@ def _patches_convolution( statistic_between_patches: Callable[[NDArrayf], np.floating[Any]] = nmad, return_in_patch_statistics: bool = False, ) -> tuple[float, float, float] | tuple[float, float, float, pd.DataFrame]: - """ - - :param values: Values as array of shape (N1, N2) with NaN for masked values + """:param values: Values as array of shape (N1, N2) with NaN for masked values :param gsd: Ground sampling distance :param area: Size of integration area (squared unit of ground sampling distance) :param perc_min_valid: Minimum valid area in the patch @@ -2661,7 +2595,6 @@ def _patches_convolution( :return: Statistic between patches, Number of patches, Exact discretized area, (Optional) Dataframe of per-patch statistics """ - # Get kernel size to match area # If circular, it corresponds to the diameter if patch_shape.lower() == "circular": @@ -2675,7 +2608,7 @@ def _patches_convolution( logging.info("Computing the convolution on the entire array...") mean_img, nb_valid_img, nb_pixel_per_kernel = mean_filter_nan( - img=values, kernel_size=kernel_size, kernel_shape=patch_shape, method=method + img=values, kernel_size=kernel_size, kernel_shape=patch_shape, method=method, ) # Exclude mean values if number of valid pixels is less than a percentage of the kernel size @@ -2698,11 +2631,11 @@ def _patches_convolution( if return_in_patch_statistics: # Create dataframe of independent patches for one independent setting - df = pd.DataFrame( + df_patches = pd.DataFrame( data={ "nanmean": mean_img[::kernel_size, ::kernel_size].ravel(), "count": nb_valid_img[::kernel_size, ::kernel_size].ravel(), - } + }, ) # We then use the average of the statistic computed for different sets of independent patches to get a more robust @@ -2712,9 +2645,8 @@ def _patches_convolution( exact_area = nb_pixel_per_kernel * gsd**2 if return_in_patch_statistics: - return average_statistic, nb_independent_patches, exact_area, df - else: - return average_statistic, nb_independent_patches, exact_area + return average_statistic, nb_independent_patches, exact_area, df_patches + return average_statistic, nb_independent_patches, exact_area def _patches_loop_quadrants( @@ -2729,9 +2661,7 @@ def _patches_loop_quadrants( random_state: int | np.random.Generator | None = None, return_in_patch_statistics: bool = False, ) -> tuple[float, float, float] | tuple[float, float, float, pd.DataFrame]: - """ - Patches method for empirical estimation of the standard error over an integration area - + """Patches method for empirical estimation of the standard error over an integration area :param values: Values as array of shape (N1, N2) with NaN for masked values :param gsd: Ground sampling distance @@ -2748,7 +2678,6 @@ def _patches_loop_quadrants( :return: Statistic between patches, Number of patches, Exact discretized area, Dataframe of per-patch statistics """ - list_statistics_in_patch = list(statistics_in_patch) # Add count by default list_statistics_in_patch.append("count") @@ -2801,7 +2730,7 @@ def _patches_loop_quadrants( # Get patch by masking the square or circular quadrant if patch_shape.lower() == "square": patch = values[ - kernel_size * i : kernel_size * (i + 1), kernel_size * j : kernel_size * (j + 1) + kernel_size * i : kernel_size * (i + 1), kernel_size * j : kernel_size * (j + 1), ].flatten() elif patch_shape.lower() == "circular": center_x = np.floor(kernel_size * (i + 1 / 2)) @@ -2818,20 +2747,20 @@ def _patches_loop_quadrants( u = u + 1 if u > n_patches: break - logging.info("Found valid quadrant " + str(u) + " (maximum: " + str(n_patches) + ")") + logging.info("Found valid quadrant %s (maximum: %s)", str(u), str(n_patches)) - df = pd.DataFrame() - df = df.assign(tile=[str(i) + "_" + str(j)]) + df_stats = pd.DataFrame() + df_stats = df_stats.assign(tile=[str(i) + "_" + str(j)]) for j, statistic in enumerate(list_statistics_in_patch): if isinstance(statistic, str): if statistic == "count": - df[statistic] = [nb_pixel_valid] + df_stats[statistic] = [nb_pixel_valid] else: raise ValueError('No other string than "count" are supported for named statistics.') else: - df[statistics_name[j]] = [statistic(patch[np.isfinite(patch)].astype("float64"))] + df_stats[statistics_name[j]] = [statistic(patch[np.isfinite(patch)].astype("float64"))] - list_df.append(df) + list_df.append(df_stats) # Get remaining samples to draw remaining_nsamp = n_patches - u @@ -2853,15 +2782,14 @@ def _patches_loop_quadrants( if return_in_patch_statistics: return average_statistic, nb_independent_patches, exact_area, df_all - else: - return average_statistic, nb_independent_patches, exact_area + return average_statistic, nb_independent_patches, exact_area @overload def patches_method( values: NDArrayf | RasterType, areas: list[float], - gsd: float = None, + gsd: float | None = None, stable_mask: NDArrayf | VectorType | gpd.GeoDataFrame = None, unstable_mask: NDArrayf | VectorType | gpd.GeoDataFrame = None, statistics_in_patch: tuple[Callable[[NDArrayf], np.floating[Any]] | str] = (np.nanmean,), @@ -2881,7 +2809,7 @@ def patches_method( def patches_method( values: NDArrayf | RasterType, areas: list[float], - gsd: float = None, + gsd: float | None = None, stable_mask: NDArrayf | VectorType | gpd.GeoDataFrame = None, unstable_mask: NDArrayf | VectorType | gpd.GeoDataFrame = None, statistics_in_patch: tuple[Callable[[NDArrayf], np.floating[Any]] | str] = (np.nanmean,), @@ -2900,7 +2828,7 @@ def patches_method( def patches_method( values: NDArrayf | RasterType, areas: list[float], - gsd: float = None, + gsd: float | None = None, stable_mask: NDArrayf | VectorType | gpd.GeoDataFrame = None, unstable_mask: NDArrayf | VectorType | gpd.GeoDataFrame = None, statistics_in_patch: tuple[Callable[[NDArrayf], np.floating[Any]] | str] = (np.nanmean,), @@ -2913,8 +2841,7 @@ def patches_method( return_in_patch_statistics: bool = False, random_state: int | np.random.Generator | None = None, ) -> pd.DataFrame | tuple[pd.DataFrame, pd.DataFrame]: - """ - Monte Carlo patches method that samples multiple patches of terrain, square or circular, of a certain area and + """Monte Carlo patches method that samples multiple patches of terrain, square or circular, of a certain area and computes a statistic in each patch. Then, another statistic is computed between all patches. Typically, a statistic of central tendency (e.g., the mean) is computed for each patch, then a statistic of spread (e.g., the NMAD) is computed on the central tendency of all the patches. This specific procedure gives an empirical estimate of the @@ -2953,10 +2880,9 @@ def patches_method( :return: Dataframe of statistic between patches with independent patches count and exact areas, (Optional) Dataframe of per-patch statistics """ - # Get values with NaNs on unstable terrain, preserving the shape by default values_arr, gsd = _preprocess_values_with_mask_to_array( - values=values, include_mask=stable_mask, exclude_mask=unstable_mask, gsd=gsd + values=values, include_mask=stable_mask, exclude_mask=unstable_mask, gsd=gsd, ) # Initialize list of dataframe for the statistic on all patches @@ -3004,10 +2930,10 @@ def patches_method( if return_in_patch_statistics: # Here we'd need to write overload for all the patch subfunctions... maybe we can do this more easily with # the function behaviour, ignoring for now. - df: pd.DataFrame = outputs[3] # type: ignore - df["areas"] = area - df["exact_areas"] = outputs[2] - list_df.append(df) + df_patches: pd.DataFrame = outputs[3] # type: ignore + df_patches["areas"] = area + df_patches["exact_areas"] = outputs[2] + list_df.append(df_patches) # Produce final dataframe of statistic between patches per area df_statistic = pd.DataFrame( @@ -3016,32 +2942,30 @@ def patches_method( "nb_indep_patches": list_nb_patches, "exact_areas": list_exact_areas, "areas": areas, - } + }, ) if return_in_patch_statistics: # Concatenate the complete dataframe df_tot = pd.concat(list_df) return df_statistic, df_tot - else: - return df_statistic + return df_statistic def plot_variogram( df: pd.DataFrame, - list_fit_fun: list[Callable[[NDArrayf], NDArrayf]] = None, - list_fit_fun_label: list[str] = None, - ax: matplotlib.axes.Axes = None, + list_fit_fun: list[Callable[[NDArrayf], NDArrayf]] | None = None, + list_fit_fun_label: list[str] | None = None, + ax: mpl.axes.Axes = None, xscale: str = "linear", - xscale_range_split: list[float] = None, - xlabel: str = None, - ylabel: str = None, - xlim: str = None, - ylim: str = None, - out_fname: str = None, + xscale_range_split: list[float] | None = None, + xlabel: str | None = None, + ylabel: str | None = None, + xlim: str | None = None, + ylim: str | None = None, + out_fname: str | None = None, ) -> None: - """ - Plot empirical variogram, and optionally also plot one or several model fits. + """Plot empirical variogram, and optionally also plot one or several model fits. Input dataframe is expected to be the output of xdem.spatialstats.sample_empirical_variogram. Input function model is expected to be the output of xdem.spatialstats.fit_sum_model_variogram. @@ -3059,12 +2983,11 @@ def plot_variogram( :param out_fname: File to save the variogram plot to :return: """ - # Create axes if they are not passed if ax is None: fig = plt.figure() ax = plt.subplot(111) - elif isinstance(ax, matplotlib.axes.Axes): + elif isinstance(ax, mpl.axes.Axes): fig = ax.figure else: raise ValueError("ax must be a matplotlib.axes.Axes instance or None") @@ -3104,7 +3027,7 @@ def plot_variogram( first_xmin = np.min(df.lags) / 2 else: first_xmin = 0 - xscale_range_split = [first_xmin] + xscale_range_split + xscale_range_split = [first_xmin] + xscale_range_split # noqa: RUF005 # Add maximum distance if not in input if xscale_range_split[-1] != np.max(df.lags): xscale_range_split.append(np.max(df.lags)) @@ -3135,7 +3058,7 @@ def plot_variogram( ax0.set_xticks([]) # Plot the histogram manually with fill_between - interval_var = [0] + list(df.lags) + interval_var = [0] + list(df.lags) # noqa: RUF005 for i in range(len(df)): count = df["count"].values[i] ax0.fill_between( @@ -3160,7 +3083,7 @@ def plot_variogram( ax1 = ax.inset_axes(grid[3:, xgridmin[k] : xgridmax[k]].get_position(fig).bounds) # Get the lags bin centers - bins_center = np.subtract(df.lags, np.diff([0] + df.lags.tolist()) / 2) + bins_center = np.subtract(df.lags, np.diff([0] + df.lags.tolist()) / 2) # noqa: RUF005 # If all the estimated errors are all NaN (single run), simply plot the empirical variogram if np.all(np.isnan(df.err_exp)): @@ -3196,11 +3119,10 @@ def plot_variogram( if ylim is not None: ax1.set_ylim(ylim) + elif np.all(np.isnan(df.err_exp)): + ax1.set_ylim((0, 1.05 * np.nanmax(df.exp))) else: - if np.all(np.isnan(df.err_exp)): - ax1.set_ylim((0, 1.05 * np.nanmax(df.exp))) - else: - ax1.set_ylim((0, np.nanmax(df.exp) + np.nanmean(df.err_exp))) + ax1.set_ylim((0, np.nanmax(df.exp) + np.nanmean(df.err_exp))) if k == int(nb_subpanels / 2): ax1.set_xlabel(xlabel) @@ -3222,11 +3144,10 @@ def plot_1d_binning( label_var: str | None = None, label_statistic: str | None = None, min_count: int = 30, - ax: matplotlib.axes.Axes | None = None, - out_fname: str = None, + ax: mpl.axes.Axes | None = None, + out_fname: str | None = None, ) -> None: - """ - Plot a statistic and its count along a single binning variable. + """Plot a statistic and its count along a single binning variable. Input is expected to be formatted as the output of the xdem.spatialstats.nd_binning function. :param df: Output dataframe of nd_binning @@ -3238,12 +3159,11 @@ def plot_1d_binning( :param ax: Plotting ax to use, creates a new one by default :param out_fname: File to save the variogram plot to """ - # Create axes if ax is None: fig = plt.figure() ax = plt.subplot(111) - elif isinstance(ax, matplotlib.axes.Axes): + elif isinstance(ax, mpl.axes.Axes): fig = ax.figure else: raise ValueError("ax must be a matplotlib.axes.Axes instance or None.") @@ -3313,7 +3233,7 @@ def plot_1d_binning( va="center", fontweight="bold", transform=ax0.transAxes, - bbox=dict(facecolor="white", alpha=0.8), + bbox={"facecolor":"white", "alpha":0.8}, ) ax0.set_ylim((0, 1.1 * np.max(df_sub["count"].values))) @@ -3338,18 +3258,17 @@ def plot_2d_binning( label_var_name_1: str | None = None, label_var_name_2: str | None = None, label_statistic: str | None = None, - cmap: matplotlib.colors.Colormap = plt.cm.Reds, + cmap: mpl.colors.Colormap = plt.cm.Reds, min_count: int = 30, scale_var_1: str = "linear", scale_var_2: str = "linear", vmin: np.floating[Any] = None, vmax: np.floating[Any] = None, nodata_color: str | tuple[float, float, float, float] = "yellow", - ax: matplotlib.axes.Axes | None = None, - out_fname: str = None, + ax: mpl.axes.Axes | None = None, + out_fname: str | None = None, ) -> None: - """ - Plot one statistic and its count along two binning variables. + """Plot one statistic and its count along two binning variables. Input is expected to be formatted as the output of the xdem.spatialstats.nd_binning function. :param df: Output dataframe of nd_binning @@ -3369,19 +3288,18 @@ def plot_2d_binning( :param ax: Plotting ax to use, creates a new one by default :param out_fname: File to save the variogram plot to """ - # Create axes if ax is None: fig = plt.figure(figsize=(8, 6)) ax = plt.subplot(111) - elif isinstance(ax, matplotlib.axes.Axes): + elif isinstance(ax, mpl.axes.Axes): fig = ax.figure else: raise ValueError("ax must be a matplotlib.axes.Axes instance or None.") if var_name_1 not in df.columns.values: raise ValueError(f'The variable "{var_name_1}" is not part of the provided dataframe column names.') - elif var_name_2 not in df.columns.values: + if var_name_2 not in df.columns.values: raise ValueError(f'The variable "{var_name_2}" is not part of the provided dataframe column names.') if statistic_name not in df.columns.values: @@ -3409,7 +3327,7 @@ def plot_2d_binning( df.nd == 2, np.isfinite(pd.IntervalIndex(df[var_name_1]).mid), np.isfinite(pd.IntervalIndex(df[var_name_2]).mid), - ) + ), ) ].copy() # Remove statistic calculated in bins with too low count @@ -3465,7 +3383,7 @@ def plot_2d_binning( va="center", fontweight="bold", transform=ax0.transAxes, - bbox=dict(facecolor="white", alpha=0.8), + bbox={"facecolor":"white", "alpha":0.8}, ) # Second, a vertical axis on the right to plot the sample histogram of the second variable @@ -3511,7 +3429,7 @@ def plot_2d_binning( fontweight="bold", transform=ax1.transAxes, rotation=90, - bbox=dict(facecolor="white", alpha=0.8), + bbox={"facecolor":"white", "alpha":0.8}, ) # Third, an axis to plot the data as a colored grid @@ -3524,12 +3442,12 @@ def plot_2d_binning( # Create custom colormap col_bounds = np.array([vmin, np.mean(np.asarray([vmin, vmax])), vmax]) - cb = [] cb_val = np.linspace(0, 1, len(col_bounds)) - for j in range(len(cb_val)): - cb.append(cmap(cb_val[j])) + cb = [cmap(cb_val[j]) for j in range(len(cb_val))] cmap_cus = colors.LinearSegmentedColormap.from_list( - "my_cb", list(zip((col_bounds - min(col_bounds)) / (max(col_bounds - min(col_bounds))), cb)), N=1000 + "my_cb", + list(zip((col_bounds - min(col_bounds)) / (max(col_bounds - min(col_bounds))), cb, strict=False)), + N=1000, ) # Plot a 2D colored grid using fill_between diff --git a/xdem/terrain.py b/xdem/terrain.py index 151002aba..56e006b1c 100644 --- a/xdem/terrain.py +++ b/xdem/terrain.py @@ -20,7 +20,8 @@ from __future__ import annotations import warnings -from typing import Sized, overload +from collections.abc import Sized +from typing import overload import geoutils as gu import numba @@ -53,16 +54,15 @@ def _get_quadric_coefficients( edge_method: str = "none", make_rugosity: bool = False, ) -> NDArrayf: - """ - Run the pixel-wise analysis in parallel for a 3x3 window using the resolution. + """Run the pixel-wise analysis in parallel for a 3x3 window using the resolution. See the xdem.terrain.get_quadric_coefficients() docstring for more info. """ # Rename the resolution - L = resolution + L = resolution # noqa: N806 # Allocate the output. - output = np.full((12,) + dem.shape, fill_value=np.nan) + output = np.full((12,) + dem.shape, fill_value=np.nan) # noqa: RUF005 # Convert the string to a number (fewer bytes to compare each iteration) if fill_method == "median": @@ -87,7 +87,7 @@ def _get_quadric_coefficients( # Extract the pixel and its 8 immediate neighbours. # If the border is reached, just duplicate the closest neighbour to obtain 9 values. - Z = np.empty((9,), dtype=dem.dtype) + Z = np.empty((9,), dtype=dem.dtype) # noqa: N806 count = 0 # If edge_method == "none", validate that it's not near an edge. If so, leave the nans without filling. @@ -177,28 +177,28 @@ def _get_quadric_coefficients( # pixels and 1 segment between surrounding pixels; pixel 4 is the center # above 4 the index of center-surrounding segment decrease by 1, as the center pixel was skipped # Triangle 1: pixels 3 and 0 - T1 = [hsl[3], hsl[0], hsl[12]] + T1 = [hsl[3], hsl[0], hsl[12]] # noqa: N806 # Triangle 2: pixels 0 and 1 - T2 = [hsl[0], hsl[1], hsl[8]] + T2 = [hsl[0], hsl[1], hsl[8]] # noqa: N806 # Triangle 3: pixels 1 and 2 - T3 = [hsl[1], hsl[2], hsl[9]] + T3 = [hsl[1], hsl[2], hsl[9]] # noqa: N806 # Triangle 4: pixels 2 and 5 - T4 = [hsl[2], hsl[4], hsl[14]] + T4 = [hsl[2], hsl[4], hsl[14]] # noqa: N806 # Triangle 5: pixels 5 and 8 - T5 = [hsl[4], hsl[7], hsl[15]] + T5 = [hsl[4], hsl[7], hsl[15]] # noqa: N806 # Triangle 6: pixels 8 and 7 - T6 = [hsl[7], hsl[6], hsl[11]] + T6 = [hsl[7], hsl[6], hsl[11]] # noqa: N806 # Triangle 7: pixels 7 and 6 - T7 = [hsl[6], hsl[5], hsl[10]] + T7 = [hsl[6], hsl[5], hsl[10]] # noqa: N806 # Triangle 8: pixels 6 and 3 - T8 = [hsl[5], hsl[3], hsl[13]] + T8 = [hsl[5], hsl[3], hsl[13]] # noqa: N806 - list_T = [T1, T2, T3, T4, T5, T6, T7, T8] + list_T = [T1, T2, T3, T4, T5, T6, T7, T8] # noqa: N806 # Finally, we compute the 3D surface areas of the 8 triangles - A = np.empty((8,)) + A = np.empty((8,)) # noqa: N806 count = 0 - for T in list_T: + for T in list_T: # noqa: N806 # Half sum of lengths hs = sum(T) / 2 # Surface area of triangle @@ -236,8 +236,7 @@ def get_quadric_coefficients( edge_method: str = "none", make_rugosity: bool = False, ) -> NDArrayf: - """ - Computes quadric and other coefficients on a fixed 3x3 pixel window, and that depends on the resolution. + """Computes quadric and other coefficients on a fixed 3x3 pixel window, and that depends on the resolution. Returns the 9 coefficients of a quadric surface fit to every pixel in the raster, the 2 coefficients of optimized slope gradient, and the rugosity. @@ -304,7 +303,7 @@ def get_quadric_coefficients( if len(dem_arr.shape) != 2: raise ValueError( f"Invalid input array shape: {dem.shape}, parsed into {dem_arr.shape}. " - "Expected 2D array or 3D array of shape (1, row, col)." + "Expected 2D array or 3D array of shape (1, row, col).", ) if any(dim < 3 for dim in dem_arr.shape): @@ -312,12 +311,12 @@ def get_quadric_coefficients( # Resolution is in other tools accepted as a tuple. Here, it must be just one number, so it's best to sanity check. if isinstance(resolution, Sized): - raise ValueError("Resolution must be the same for X and Y directions.") + raise TypeError("Resolution must be the same for X and Y directions.") allowed_fill_methods = ["median", "mean", "none"] allowed_edge_methods = ["nearest", "wrap", "none"] for value, name, allowed in zip( - [fill_method, edge_method], ["fill", "edge"], (allowed_fill_methods, allowed_edge_methods) + [fill_method, edge_method], ["fill", "edge"], (allowed_fill_methods, allowed_edge_methods), strict=False, ): if value.lower() not in allowed: raise ValueError(f"Invalid {name} method: '{value}'. Choices: {allowed}.") @@ -345,14 +344,12 @@ def _get_windowed_indexes( window_size: int = 3, make_fractal_roughness: bool = False, ) -> NDArrayf: - """ - Run the pixel-wise analysis in parallel for any window size without using the resolution. + """Run the pixel-wise analysis in parallel for any window size without using the resolution. See the xdem.terrain.get_windowed_indexes() docstring for more info. """ - # Allocate the outputs. - output = np.full((5,) + dem.shape, fill_value=np.nan) + output = np.full((5,) + dem.shape, fill_value=np.nan) # noqa: RUF005 # Half window size hw = int(np.floor(window_size / 2)) @@ -380,7 +377,7 @@ def _get_windowed_indexes( # Extract the pixel and its 8 immediate neighbours. # If the border is reached, just duplicate the closest neighbour to obtain 9 values. - Z = np.empty((window_size**2,), dtype=dem.dtype) + Z = np.empty((window_size**2,), dtype=dem.dtype) # noqa: N806 count = 0 # If edge_method == "none", validate that it's not near an edge. If so, leave the nans without filling. @@ -427,7 +424,7 @@ def _get_windowed_indexes( # Difference pixels between specific cells: only useful for Terrain Ruggedness Index count = 0 index_middle_pixel = int((window_size**2 - 1) / 2) - S = np.empty((window_size**2,)) + S = np.empty((window_size**2,)) # noqa: N806 for _j in range(-hw, -hw + window_size): for _k in range(-hw, -hw + window_size): S[count] = np.abs(Z[count] - Z[index_middle_pixel]) @@ -437,14 +434,14 @@ def _get_windowed_indexes( # Fractal roughness computation according to the box-counting method of Taud and Parrot (2005) # First, we compute the number of voxels for each pixel of Equation 4 count = 0 - V = np.empty((window_size, window_size)) + V = np.empty((window_size, window_size)) # noqa: N806 for j in range(-hw, -hw + window_size): for k in range(-hw, -hw + window_size): - T = Z[count] - Z[index_middle_pixel] + T = Z[count] - Z[index_middle_pixel] # noqa: N806 # The following is the equivalent of np.clip, written like this for numba if T < 0: V[hw + j, hw + k] = 0 - elif T > window_size: + elif window_size < T: V[hw + j, hw + k] = window_size else: V[hw + j, hw + k] = T @@ -454,19 +451,16 @@ def _get_windowed_indexes( # size, following Equation 5 # Get all the divisors of the half window size - list_box_sizes = [] - for j in range(1, hw + 1): - if hw % j == 0: - list_box_sizes.append(j) + list_box_sizes = [j for j in range(1, hw + 1) if hw % j == 0] - Ns = np.empty((len(list_box_sizes),)) - for l0 in range(0, len(list_box_sizes)): + Ns = np.empty((len(list_box_sizes),)) # noqa: N806 + for l0 in range(len(list_box_sizes)): # We loop over boxes of size q x q in the cube q = list_box_sizes[l0] - sumNs = 0 - for j in range(0, int((window_size - 1) / q)): - for k in range(0, int((window_size - 1) / q)): - sumNs += np.max(V[slice(j * q, (j + 1) * q), slice(k * q, (k + 1) * q)].flatten()) + sumNs = 0 # noqa: N806 + for j in range(int((window_size - 1) / q)): + for k in range(int((window_size - 1) / q)): + sumNs += np.max(V[slice(j * q, (j + 1) * q), slice(k * q, (k + 1) * q)].flatten()) # noqa: N806 Ns[l0] = sumNs / q # Finally, we calculate the slope of the logarithm of Ns with q @@ -479,13 +473,13 @@ def _get_windowed_indexes( m_x = np.mean(x) m_y = np.mean(y) # Cross-deviation and deviation about x - SS_xy = np.sum(y * x) - n * m_y * m_x - SS_xx = np.sum(x * x) - n * m_x * m_x + SS_xy = np.sum(y * x) - n * m_y * m_x # noqa: N806 + SS_xx = np.sum(x * x) - n * m_x * m_x # noqa: N806 # Calculating slope b_1 = SS_xy / SS_xx # The fractal dimension D is the opposite of the slope - D = -b_1 + D = -b_1 # noqa: N806 # First output is the Terrain Ruggedness Index from Riley et al. (1999): squareroot of squared sum of # differences between center and neighbouring pixels @@ -514,8 +508,7 @@ def get_windowed_indexes( window_size: int = 3, make_fractal_roughness: bool = False, ) -> NDArrayf: - """ - Return terrain indexes based on a windowed calculation of variable size, independent of the resolution. + """Return terrain indexes based on a windowed calculation of variable size, independent of the resolution. Includes: @@ -576,7 +569,7 @@ def get_windowed_indexes( if len(dem_arr.shape) != 2: raise ValueError( f"Invalid input array shape: {dem.shape}, parsed into {dem_arr.shape}. " - "Expected 2D array or 3D array of shape (1, row, col)" + "Expected 2D array or 3D array of shape (1, row, col)", ) if any(dim < 3 for dim in dem_arr.shape): @@ -588,7 +581,7 @@ def get_windowed_indexes( allowed_fill_methods = ["median", "mean", "none"] allowed_edge_methods = ["nearest", "wrap", "none"] for value, name, allowed in zip( - [fill_method, edge_method], ["fill", "edge"], (allowed_fill_methods, allowed_edge_methods) + [fill_method, edge_method], ["fill", "edge"], (allowed_fill_methods, allowed_edge_methods), strict=False, ): if value.lower() not in allowed: raise ValueError(f"Invalid {name} method: '{value}'. Choices: {allowed}") @@ -690,8 +683,7 @@ def get_terrain_attribute( edge_method: str = "none", window_size: int = 3, ) -> NDArrayf | list[NDArrayf] | RasterType | list[RasterType]: - """ - Derive one or multiple terrain attributes from a DEM. + """Derive one or multiple terrain attributes from a DEM. The attributes are based on: - Slope, aspect, hillshade (first method) from Horn (1981), http://dx.doi.org/10.1109/PROC.1981.11918, @@ -709,7 +701,6 @@ def get_terrain_attribute( More details on the equations in the functions get_quadric_coefficients() and get_windowed_indexes(). Attributes: - * 'slope': The slope in degrees or radians (degs: 0=flat, 90=vertical). Default method: "Horn". * 'aspect': The slope aspect in degrees or radians (degs: 0=N, 90=E, 180=S, 270=W). * 'hillshade': The shaded slope in relation to its aspect. @@ -759,6 +750,7 @@ def get_terrain_attribute( [0., 0., 0.]]) :returns: One or multiple arrays of the requested attribute(s) + """ if isinstance(dem, gu.Raster): if resolution is None: @@ -842,7 +834,7 @@ def get_terrain_attribute( if resolution[0] != resolution[1]: raise ValueError( f"Quadric surface fit requires the same X and Y resolution ({resolution} was given). " - f"This was required by: {attributes_requiring_surface_fit}" + f"This was required by: {attributes_requiring_surface_fit}", ) terrain_attributes["surface_fit"] = get_quadric_coefficients( dem=dem_arr, @@ -859,7 +851,7 @@ def get_terrain_attribute( # http://dx.doi.org/10.1109/PROC.1981.11918. terrain_attributes["slope"] = np.arctan( (terrain_attributes["surface_fit"][9, :, :] ** 2 + terrain_attributes["surface_fit"][10, :, :] ** 2) - ** 0.5 + ** 0.5, ) elif slope_method == "ZevenbergThorne": @@ -868,7 +860,7 @@ def get_terrain_attribute( # SLOPE = ARCTAN((G²+H²)**(1/2)) terrain_attributes["slope"] = np.arctan( (terrain_attributes["surface_fit"][6, :, :] ** 2 + terrain_attributes["surface_fit"][7, :, :] ** 2) - ** 0.5 + ** 0.5, ) if make_aspect: @@ -881,7 +873,7 @@ def get_terrain_attribute( # This uses the estimates from Horn (1981). terrain_attributes["aspect"] = ( -np.arctan2( - -terrain_attributes["surface_fit"][9, :, :], terrain_attributes["surface_fit"][10, :, :] + -terrain_attributes["surface_fit"][9, :, :], terrain_attributes["surface_fit"][10, :, :], ) - np.pi ) % (2 * np.pi) @@ -1044,8 +1036,7 @@ def slope( degrees: bool = True, resolution: float | tuple[float, float] | None = None, ) -> NDArrayf | Raster: - """ - Generate a slope map for a DEM, returned in degrees by default. + """Generate a slope map for a DEM, returned in degrees by default. Based on Horn (1981), http://dx.doi.org/10.1109/PROC.1981.11918 and on Zevenbergen and Thorne (1987), http://dx.doi.org/10.1002/esp.3290120107. @@ -1092,8 +1083,7 @@ def aspect( method: str = "Horn", degrees: bool = True, ) -> NDArrayf | Raster: - """ - Calculate the aspect of each cell in a DEM, returned in degrees by default. The aspect of flat slopes is 180° by + """Calculate the aspect of each cell in a DEM, returned in degrees by default. The aspect of flat slopes is 180° by default (as in GDAL). Based on Horn (1981), http://dx.doi.org/10.1109/PROC.1981.11918 and on Zevenbergen and Thorne (1987), @@ -1156,8 +1146,7 @@ def hillshade( z_factor: float = 1.0, resolution: float | tuple[float, float] | None = None, ) -> NDArrayf | RasterType: - """ - Generate a hillshade from the given DEM. The value 0 is used for nodata, and 1 to 255 for hillshading. + """Generate a hillshade from the given DEM. The value 0 is used for nodata, and 1 to 255 for hillshading. Based on Horn (1981), http://dx.doi.org/10.1109/PROC.1981.11918. @@ -1203,8 +1192,7 @@ def curvature( dem: NDArrayf | MArrayf | RasterType, resolution: float | tuple[float, float] | None = None, ) -> NDArrayf | RasterType: - """ - Calculate the terrain curvature (second derivative of elevation) in m-1 multiplied by 100. + """Calculate the terrain curvature (second derivative of elevation) in m-1 multiplied by 100. Based on Zevenbergen and Thorne (1987), http://dx.doi.org/10.1002/esp.3290120107. @@ -1248,8 +1236,7 @@ def planform_curvature( dem: NDArrayf | MArrayf | RasterType, resolution: float | tuple[float, float] | None = None, ) -> NDArrayf | RasterType: - """ - Calculate the terrain curvature perpendicular to the direction of the slope in m-1 multiplied by 100. + """Calculate the terrain curvature perpendicular to the direction of the slope in m-1 multiplied by 100. Based on Zevenbergen and Thorne (1987), http://dx.doi.org/10.1002/esp.3290120107. @@ -1284,10 +1271,9 @@ def profile_curvature(dem: RasterType, resolution: float | tuple[float, float] | def profile_curvature( - dem: NDArrayf | MArrayf | RasterType, resolution: float | tuple[float, float] | None = None + dem: NDArrayf | MArrayf | RasterType, resolution: float | tuple[float, float] | None = None, ) -> NDArrayf | RasterType: - """ - Calculate the terrain curvature parallel to the direction of the slope in m-1 multiplied by 100. + """Calculate the terrain curvature parallel to the direction of the slope in m-1 multiplied by 100. Based on Zevenbergen and Thorne (1987), http://dx.doi.org/10.1002/esp.3290120107. @@ -1322,10 +1308,9 @@ def maximum_curvature(dem: RasterType, resolution: float | tuple[float, float] | def maximum_curvature( - dem: NDArrayf | MArrayf | RasterType, resolution: float | tuple[float, float] | None = None + dem: NDArrayf | MArrayf | RasterType, resolution: float | tuple[float, float] | None = None, ) -> NDArrayf | RasterType: - """ - Calculate the signed maximum profile or planform curvature parallel to the direction of the slope in m-1 + """Calculate the signed maximum profile or planform curvature parallel to the direction of the slope in m-1 multiplied by 100. Based on Zevenbergen and Thorne (1987), http://dx.doi.org/10.1002/esp.3290120107. @@ -1349,8 +1334,7 @@ def topographic_position_index(dem: RasterType, window_size: int = 3) -> RasterT def topographic_position_index(dem: NDArrayf | MArrayf | RasterType, window_size: int = 3) -> NDArrayf | RasterType: - """ - Calculates the Topographic Position Index, the difference to the average of neighbouring pixels. Output is in the + """Calculates the Topographic Position Index, the difference to the average of neighbouring pixels. Output is in the unit of the DEM (typically meters). Based on: Weiss (2001), http://www.jennessent.com/downloads/TPI-poster-TNC_18x22.pdf. @@ -1386,10 +1370,9 @@ def terrain_ruggedness_index(dem: RasterType, method: str = "Riley", window_size def terrain_ruggedness_index( - dem: NDArrayf | MArrayf | RasterType, method: str = "Riley", window_size: int = 3 + dem: NDArrayf | MArrayf | RasterType, method: str = "Riley", window_size: int = 3, ) -> NDArrayf | RasterType: - """ - Calculates the Terrain Ruggedness Index, the cumulated differences to neighbouring pixels. Output is in the + """Calculates the Terrain Ruggedness Index, the cumulated differences to neighbouring pixels. Output is in the unit of the DEM (typically meters). Based either on: @@ -1420,7 +1403,7 @@ def terrain_ruggedness_index( :returns: The terrain ruggedness index array of the DEM (unit of the DEM). """ return get_terrain_attribute( - dem=dem, attribute="terrain_ruggedness_index", tri_method=method, window_size=window_size + dem=dem, attribute="terrain_ruggedness_index", tri_method=method, window_size=window_size, ) @@ -1433,9 +1416,8 @@ def roughness(dem: RasterType, window_size: int = 3) -> RasterType: ... def roughness(dem: NDArrayf | MArrayf | RasterType, window_size: int = 3) -> NDArrayf | RasterType: - """ - Calculates the roughness, the maximum difference between neighbouring pixels, for any window size. Output is in the - unit of the DEM (typically meters). + """Calculates the roughness, the maximum difference between neighbouring pixels, for any window size. + Output is in the unit of the DEM (typically meters). Based on: Dartnell (2000), https://environment.sfsu.edu/node/11292. @@ -1476,10 +1458,9 @@ def rugosity( def rugosity( - dem: NDArrayf | MArrayf | RasterType, resolution: float | tuple[float, float] | None = None + dem: NDArrayf | MArrayf | RasterType, resolution: float | tuple[float, float] | None = None, ) -> NDArrayf | RasterType: - """ - Calculates the rugosity, the ratio between real area and planimetric area. Only available for a 3x3 window. The + """Calculates the rugosity, the ratio between real area and planimetric area. Only available for a 3x3 window. The output is unitless. Based on: Jenness (2004), https://doi.org/10.2193/0091-7648(2004)032[0829:CLSAFD]2.0.CO;2. @@ -1515,8 +1496,7 @@ def fractal_roughness(dem: RasterType, window_size: int = 13) -> RasterType: ... def fractal_roughness(dem: NDArrayf | MArrayf | RasterType, window_size: int = 13) -> NDArrayf | RasterType: - """ - Calculates the fractal roughness, the local 3D fractal dimension. Can only be computed on window sizes larger or + """Calculates the fractal roughness, the local 3D fractal dimension. Can only be computed on window sizes larger or equal to 5x5, defaults to 13x13. Output unit is a fractal dimension between 1 and 3. Based on: Taud et Parrot (2005), https://doi.org/10.4000/geomorphologie.622. diff --git a/xdem/vcrs.py b/xdem/vcrs.py index 6ab534a2e..899bcf049 100644 --- a/xdem/vcrs.py +++ b/xdem/vcrs.py @@ -23,6 +23,7 @@ import os import pathlib import warnings +from pathlib import Path from typing import Literal, TypedDict import pyproj @@ -58,32 +59,25 @@ def _parse_vcrs_name_from_product(product: str) -> str | None: - """ - Parse vertical CRS name from DEM product name. + """Parse vertical CRS name from DEM product name. :param product: Product name (typically from satimg.parse_metadata_from_fn). :return: vcrs_name: Vertical CRS name. """ - - if product in vcrs_dem_products.keys(): - vcrs_name = vcrs_dem_products[product] - else: - vcrs_name = None + vcrs_name = vcrs_dem_products.get(product) return vcrs_name def _build_ccrs_from_crs_and_vcrs(crs: CRS, vcrs: CRS | Literal["Ellipsoid"]) -> CompoundCRS | CRS: - """ - Build a compound CRS from a horizontal CRS and a vertical CRS. + """Build a compound CRS from a horizontal CRS and a vertical CRS. :param crs: Horizontal CRS. :param vcrs: Vertical CRS. :return: Compound CRS (horizontal + vertical). """ - # If a vertical CRS was passed, build a compound CRS with horizontal + vertical # This requires transforming the horizontal CRS to 2D in case it was 3D # Using CRS() because rasterio.CRS does not allow to call .name otherwise... @@ -105,14 +99,13 @@ def _build_ccrs_from_crs_and_vcrs(crs: CRS, vcrs: CRS | Literal["Ellipsoid"]) -> raise NotImplementedError( "pyproj >= 3.5.1 is required to demote a 3D CRS to 2D and be able to compound " "with a new vertical CRS. Update your dependencies or pass the 2D source CRS " - "manually." + "manually.", ) # If 2D - else: - ccrs = CompoundCRS( - name="Horizontal: " + CRS(crs).name + "; Vertical: " + vcrs.name, - components=[crs_from, vcrs], - ) + ccrs = CompoundCRS( + name="Horizontal: " + CRS(crs).name + "; Vertical: " + vcrs.name, + components=[crs_from, vcrs], + ) # Else if "Ellipsoid" was passed, there is no vertical reference # We still have to return the CRS in 3D @@ -125,20 +118,18 @@ def _build_ccrs_from_crs_and_vcrs(crs: CRS, vcrs: CRS | Literal["Ellipsoid"]) -> def _build_vcrs_from_grid(grid: str, old_way: bool = False) -> CompoundCRS: - """ - Build a compound CRS from a vertical CRS grid path. + """Build a compound CRS from a vertical CRS grid path. :param grid: Path to grid for vertical reference. :param old_way: Whether to use the new or old way of building the compound CRS with pyproj (for testing purposes). :return: Compound CRS (horizontal + vertical). """ - - if not os.path.exists(os.path.join(pyproj.datadir.get_data_dir(), grid)): + if not Path(os.path.join(pyproj.datadir.get_data_dir(), grid)).exists(): warnings.warn( "Grid not found in " + str(pyproj.datadir.get_data_dir()) - + ". Attempting to download from https://cdn.proj.org/..." + + ". Attempting to download from https://cdn.proj.org/...", ) from pyproj.sync import _download_resource_file @@ -151,8 +142,8 @@ def _build_vcrs_from_grid(grid: str, old_way: bool = False) -> CompoundCRS: ) except http.client.InvalidURL: raise ValueError( - "The provided grid '{}' does not exist at https://cdn.proj.org/. " - "Provide an existing grid.".format(grid) + f"The provided grid '{grid}' does not exist at https://cdn.proj.org/. " + "Provide an existing grid.", ) # The old way: see https://gis.stackexchange.com/questions/352277/. @@ -166,7 +157,7 @@ def _build_vcrs_from_grid(grid: str, old_way: bool = False) -> CompoundCRS: else: # First, we build a bounds CRS (the vertical CRS relative to geographic) vertical_crs = VerticalCRS( - name="unknown using geoidgrids=" + grid, datum='VDATUM["unknown using geoidgrids=' + grid + '"]' + name="unknown using geoidgrids=" + grid, datum='VDATUM["unknown using geoidgrids=' + grid + '"]', ) geographic3d_crs = GeographicCRS( name="WGS 84", @@ -187,7 +178,7 @@ def _build_vcrs_from_grid(grid: str, old_way: bool = False) -> CompoundCRS: "name": "Geoid (height correction) model file", "value": grid, "id": {"authority": "EPSG", "code": 8666}, - } + }, ], }, ) @@ -209,14 +200,13 @@ class VCRSMetaDict(TypedDict, total=False): def _vcrs_from_crs(crs: CRS) -> CRS: """Get the vertical CRS from a CRS.""" - # Check if CRS is 3D if len(crs.axis_info) > 2: # Check if CRS has a vertical compound if any(subcrs.is_vertical for subcrs in crs.sub_crs_list): # Then we get the first vertical CRS (should be only one anyway) - vcrs = [subcrs for subcrs in crs.sub_crs_list if subcrs.is_vertical][0] + vcrs = next(subcrs for subcrs in crs.sub_crs_list if subcrs.is_vertical) # Otherwise, it's a 3D CRS based on an ellipsoid else: vcrs = "Ellipsoid" @@ -228,17 +218,15 @@ def _vcrs_from_crs(crs: CRS) -> CRS: def _vcrs_from_user_input( - vcrs_input: Literal["Ellipsoid"] | Literal["EGM08"] | Literal["EGM96"] | str | pathlib.Path | CRS | int, + vcrs_input: Literal["Ellipsoid", "EGM08", "EGM96"] | str | pathlib.Path | CRS | int, ) -> VerticalCRS | BoundCRS | Literal["Ellipsoid"]: - """ - Parse vertical CRS from user input. + """Parse vertical CRS from user input. :param vcrs_input: Vertical coordinate reference system either as a name ("Ellipsoid", "EGM08", "EGM96"), an EPSG code or pyproj.crs.VerticalCRS, or a path to a PROJ grid file (https://github.com/OSGeo/PROJ-data). :return: Vertical CRS. """ - # Raise errors if input type is wrong (allow CRS instead of VerticalCRS for broader error messages below) if not isinstance(vcrs_input, (str, pathlib.Path, CRS, int)): raise TypeError(f"New vertical CRS must be a string, path or VerticalCRS, received {type(vcrs_input)}.") @@ -261,29 +249,28 @@ def _vcrs_from_user_input( # Raise errors if the CRS constructed is not vertical or has other components if isinstance(vcrs, CRS) and not vcrs.is_vertical: raise ValueError( - "New vertical CRS must have a vertical axis, '{}' does not " - "(check with `CRS.is_vertical`).".format(vcrs.name) + f"New vertical CRS must have a vertical axis, '{vcrs.name}' does not " + "(check with `CRS.is_vertical`).", ) - elif isinstance(vcrs, CRS) and vcrs.is_vertical and len(vcrs.axis_info) > 2: + if isinstance(vcrs, CRS) and vcrs.is_vertical and len(vcrs.axis_info) > 2: warnings.warn( "New vertical CRS has a vertical dimension but also other components, " - "extracting the vertical reference only." + "extracting the vertical reference only.", ) vcrs = _vcrs_from_crs(vcrs) # If a string was passed + # If a name is passed, define CRS based on dict + elif isinstance(vcrs_input, str) and vcrs_input.upper() in _vcrs_meta: + vcrs_meta = _vcrs_meta[vcrs_input] + vcrs = CRS.from_epsg(vcrs_meta["epsg"]) + # Otherwise, attempt to read a grid from the string else: - # If a name is passed, define CRS based on dict - if isinstance(vcrs_input, str) and vcrs_input.upper() in _vcrs_meta.keys(): - vcrs_meta = _vcrs_meta[vcrs_input] - vcrs = CRS.from_epsg(vcrs_meta["epsg"]) - # Otherwise, attempt to read a grid from the string + if isinstance(vcrs_input, pathlib.Path): + grid = vcrs_input.name else: - if isinstance(vcrs_input, pathlib.Path): - grid = vcrs_input.name - else: - grid = vcrs_input - vcrs = _build_vcrs_from_grid(grid=grid) + grid = vcrs_input + vcrs = _build_vcrs_from_grid(grid=grid) return vcrs @@ -293,7 +280,7 @@ def _grid_from_user_input(vcrs_input: str | pathlib.Path | int | CRS) -> str | N # If a grid or name was passed, get grid name if isinstance(vcrs_input, (str, pathlib.Path)): # If the string is within the supported names - if isinstance(vcrs_input, str) and vcrs_input in _vcrs_meta.keys(): + if isinstance(vcrs_input, str) and vcrs_input in _vcrs_meta: grid = _vcrs_meta[vcrs_input]["grid"] # If it's a pathlib path elif isinstance(vcrs_input, pathlib.Path): @@ -312,10 +299,9 @@ def _grid_from_user_input(vcrs_input: str | pathlib.Path | int | CRS) -> str | N def _transform_zz( - crs_from: CRS, crs_to: CRS, xx: NDArrayf, yy: NDArrayf, zz: MArrayf | NDArrayf | int | float + crs_from: CRS, crs_to: CRS, xx: NDArrayf, yy: NDArrayf, zz: MArrayf | NDArrayf | int | float, ) -> MArrayf | NDArrayf | int | float: - """ - Transform elevation to a new 3D CRS. + """Transform elevation to a new 3D CRS. :param crs_from: Source CRS. :param crs_to: Destination CRS. @@ -325,7 +311,6 @@ def _transform_zz( :return: Transformed Z coordinates. """ - # Find all possible transforms with warnings.catch_warnings(): warnings.filterwarnings("ignore", "Best transformation is not available") diff --git a/xdem/volume.py b/xdem/volume.py index 1ae5aeb57..b9a0a24e9 100644 --- a/xdem/volume.py +++ b/xdem/volume.py @@ -21,7 +21,8 @@ import logging import warnings -from typing import Any, Callable +from collections.abc import Callable +from typing import Any import matplotlib.pyplot as plt import numpy as np @@ -54,8 +55,7 @@ def hypsometric_binning( kind: str = "fixed", aggregation_function: Callable[[NDArrayf], float] = np.median, ) -> pd.DataFrame: - """ - Separate the dDEM in discrete elevation bins. + """Separate the dDEM in discrete elevation bins. The elevation bins will be calculated based on all ref_dem valid pixels. ddem may contain NaN/masked values over the same area, they will be excluded before the aggregation. @@ -127,7 +127,7 @@ def hypsometric_binning( # Collect the results in a dataframe output = pd.DataFrame( - index=pd.IntervalIndex.from_breaks(zbins), data=np.vstack([values, counts]).T, columns=["value", "count"] + index=pd.IntervalIndex.from_breaks(zbins), data=np.vstack([values, counts]).T, columns=["value", "count"], ) return output @@ -140,8 +140,7 @@ def interpolate_hypsometric_bins( order: int = 3, count_threshold: int | None = None, ) -> pd.DataFrame: - """ - Interpolate hypsometric bins using any valid Pandas interpolation technique. + """Interpolate hypsometric bins using any valid Pandas interpolation technique. NOTE: It will not extrapolate! @@ -171,9 +170,8 @@ def interpolate_hypsometric_bins( # Cannot interpolate -> leave as it is warnings.warn("Not enough valid bins for interpolation -> returning copy", UserWarning) return hypsometric_bins.copy() - else: - # Interpolate all bins that are NaN. - bins[value_column] = bins[value_column].interpolate(method=method, order=order, limit_direction="both") + # Interpolate all bins that are NaN. + bins[value_column] = bins[value_column].interpolate(method=method, order=order, limit_direction="both") # If some points were temporarily set to NaN (to exclude from the interpolation), re-set them. if count_threshold is not None: @@ -192,8 +190,7 @@ def fit_hypsometric_bins_poly( iterations: int = 1, count_threshold: int | None = None, ) -> pd.Series: - """ - Fit a polynomial to the hypsometric bins. + """Fit a polynomial to the hypsometric bins. :param hypsometric_bins: Bins where nans will be interpolated. :param value_column: The name of the column in 'hypsometric_bins' to use as values. @@ -235,7 +232,9 @@ def fit_hypsometric_bins_poly( # Save as pandas' DataFrame output = pd.DataFrame( - index=hypsometric_bins.index, data=np.vstack([interpolated_values, bins["count"]]).T, columns=["value", "count"] + index=hypsometric_bins.index, + data=np.vstack([interpolated_values, bins["count"]]).T, + columns=["value", "count"], ) return output @@ -247,8 +246,7 @@ def calculate_hypsometry_area( pixel_size: float | tuple[float, float], timeframe: str = "reference", ) -> pd.Series: - """ - Calculate the associated representative area of the given dDEM bins. + """Calculate the associated representative area of the given dDEM bins. By default, the area bins will be representative of the mean timing between the reference and nonreference DEM: elevations = ref_dem - (h_vs_dh_funcion(ref_dem) / 2) @@ -268,7 +266,7 @@ def calculate_hypsometry_area( if timeframe not in ["reference", "nonreference", "mean"]: raise ValueError( - f"Argument 'timeframe={timeframe}' is invalid. Choices: ['reference', 'nonreference', 'mean']." + f"Argument 'timeframe={timeframe}' is invalid. Choices: ['reference', 'nonreference', 'mean'].", ) if isinstance(ddem_bins, pd.DataFrame): @@ -280,7 +278,7 @@ def calculate_hypsometry_area( # Generate a continuous elevation vs. dDEM function ddem_func = scipy.interpolate.interp1d( - ddem_bins.index.mid, ddem_bins.values, kind="linear", fill_value="extrapolate" + ddem_bins.index.mid, ddem_bins.values, kind="linear", fill_value="extrapolate", ) # Generate average elevations by subtracting half of the dDEM's values to the reference DEM @@ -310,8 +308,7 @@ def idw_interpolation( extrapolate: bool = False, force_fill: bool = False, ) -> NDArrayf: - """ - Interpolate a 2D array using rasterio's fillnodata. + """Interpolate a 2D array using rasterio's fillnodata. :param array: An array with NaNs or a masked array to interpolate. :param max_search_distance: The maximum number of pixels to search in all directions to find values \ @@ -322,20 +319,20 @@ def idw_interpolation( :returns: A filled array with no NaNs """ if not _has_cv2: - raise ValueError("Optional dependency needed. Install 'opencv'.") + raise ValueError(f"Optional dependency needed. Install 'opencv'.") # Create a mask for where nans exist nan_mask = get_mask_from_array(array) interpolated_array = rasterio.fill.fillnodata( - array.copy(), mask=(~nan_mask).astype("uint8"), max_search_distance=max_search_distance + array.copy(), mask=(~nan_mask).astype("uint8"), max_search_distance=max_search_distance, ) # Remove extrapolated values: gaps up to the size of max_search_distance are kept, # but surfaces that artificially grow on the edges are removed if not extrapolate: interp_mask = cv2.morphologyEx( - (~nan_mask).squeeze().astype("uint8"), cv2.MORPH_CLOSE, kernel=np.ones((max_search_distance - 1,) * 2) + (~nan_mask).squeeze().astype("uint8"), cv2.MORPH_CLOSE, kernel=np.ones((max_search_distance - 1,) * 2), ).astype("bool") if np.ndim(array) == 3: interpolated_array[:, ~interp_mask] = np.nan @@ -362,8 +359,7 @@ def hypsometric_interpolation( ref_dem: NDArrayf | MArrayf, mask: NDArrayf, ) -> MArrayf: - """ - Interpolate a dDEM using hypsometric interpolation within the given mask. + """Interpolate a dDEM using hypsometric interpolation within the given mask. Using `ref_dem`, elevation bins of constant height (hard-coded to 50 m for now) are created. Gaps in `voided-ddem`, within the provided `mask`, are filled with the median dDEM value within that bin. @@ -394,7 +390,7 @@ def hypsometric_interpolation( interpolated_gradient = xdem.volume.interpolate_hypsometric_bins(gradient) gradient_model = scipy.interpolate.interp1d( - interpolated_gradient.index.mid, interpolated_gradient["value"].values, fill_value="extrapolate" + interpolated_gradient.index.mid, interpolated_gradient["value"].values, fill_value="extrapolate", ) # Create an idealized dDEM using the relationship between elevation and dDEM @@ -417,11 +413,10 @@ def local_hypsometric_interpolation( mask: NDArrayf, min_coverage: float = 0.2, count_threshold: int | None = 1, - nodata: float | int = -9999, + nodata: int | float = -9999, plot: bool = False, ) -> MArrayf: - """ - Interpolate a dDEM using local hypsometric interpolation. + """Interpolate a dDEM using local hypsometric interpolation. The algorithm loops through each features in the vector file. The dDEM is assumed to have been created as "voided_ddem = reference_dem - other_dem". @@ -503,15 +498,16 @@ def local_hypsometric_interpolation( # At least 2 points needed for interp1d, if not skip feature nvalues = len(interpolated_gradient["value"].values) - if nvalues < 2: + nvalues_min = 2 + if nvalues < nvalues_min: warnings.warn( - f"Not enough valid bins for feature with index {index:d} -> skipping interpolation", UserWarning + f"Not enough valid bins for feature with index {index:d} -> skipping interpolation", UserWarning, ) continue # Create a model for 2D interpolation gradient_model = scipy.interpolate.interp1d( - interpolated_gradient.index.mid, interpolated_gradient["value"].values, fill_value="extrapolate" + interpolated_gradient.index.mid, interpolated_gradient["value"].values, fill_value="extrapolate", ) if plot: @@ -522,11 +518,11 @@ def local_hypsometric_interpolation( plt.figure(figsize=(12, 8)) plt.subplot(121) plt.imshow( - (mask == index)[rowmin:rowmax, colmin:colmax], cmap="Greys", vmin=0, vmax=2, interpolation="none" + (mask == index)[rowmin:rowmax, colmin:colmax], cmap="Greys", vmin=0, vmax=2, interpolation="none", ) plt.imshow( - local_ddem[rowmin:rowmax, colmin:colmax], cmap="RdYlBu", vmin=-vmax, vmax=vmax, interpolation="none" + local_ddem[rowmin:rowmax, colmin:colmax], cmap="RdYlBu", vmin=-vmax, vmax=vmax, interpolation="none", ) plt.colorbar() plt.title(f"ddem for geometry # {index:d}") @@ -560,7 +556,7 @@ def local_hypsometric_interpolation( corrected_ddem[~np.isfinite(corrected_ddem)] = nodata output = np.ma.masked_array( - corrected_ddem, mask=(corrected_ddem == nodata) # mask=((mask != 0) & (ddem_mask | dem_mask)) + corrected_ddem, mask=(corrected_ddem == nodata), # mask=((mask != 0) & (ddem_mask | dem_mask)) ).reshape(orig_shape) assert output is not None @@ -575,8 +571,7 @@ def get_regional_hypsometric_signal( n_bins: int = 20, min_coverage: float = 0.05, ) -> pd.DataFrame: - """ - Get the normalized regional hypsometric elevation change signal, read "the general shape of it". + """Get the normalized regional hypsometric elevation change signal, read "the general shape of it". :param ddem: The dDEM to analyse. :param ref_dem: A void-free reference DEM. @@ -675,10 +670,10 @@ def norm_regional_hypsometric_interpolation( min_coverage: float = 0.1, regional_signal: pd.DataFrame | None = None, min_elevation_range: float = 0.33, + *, idealized_ddem: bool = False, ) -> NDArrayf: - """ - Interpolate missing values by scaling the normalized regional hypsometric signal to each glacier separately. + """Interpolate missing values by scaling the normalized regional hypsometric signal to each glacier separately. Only missing values are interpolated. The rest of the glacier's values are fixed. @@ -706,7 +701,7 @@ def norm_regional_hypsometric_interpolation( # If the regional signal was not given as an argument, find it from the dDEM. if regional_signal is None: regional_signal = get_regional_hypsometric_signal( - ddem=ddem_arr, ref_dem=ref_arr, glacier_index_map=glacier_index_map + ddem=ddem_arr, ref_dem=ref_arr, glacier_index_map=glacier_index_map, ) # The unique indices are the unique glaciers. @@ -716,7 +711,7 @@ def norm_regional_hypsometric_interpolation( ddem_filled = ddem_arr.copy() # Loop over all glaciers and fill the dDEM accordingly. for i in tqdm( - unique_indices, desc="Interpolating dDEM", disable=logging.getLogger().getEffectiveLevel() > logging.INFO + unique_indices, desc="Interpolating dDEM", disable=logging.getLogger().getEffectiveLevel() > logging.INFO, ): if i == 0: # i==0 is assumed to mean stable ground. continue @@ -777,7 +772,7 @@ def norm_regional_hypsometric_interpolation( # The weights are the squared inverse of the standard deviation of each bin. bin_weights = bin_stds["value"].values[non_empty_bins] / np.sqrt( - hypsometric_bins["count"].values[non_empty_bins] + hypsometric_bins["count"].values[non_empty_bins], ) bin_weights[bin_weights == 0.0] = 1e-8 # Avoid divide by zero problems. @@ -796,7 +791,7 @@ def norm_regional_hypsometric_interpolation( # Create a linear model from the elevations and the scaled regional signal. model = scipy.interpolate.interp1d( - signal.index.mid, np.poly1d(coeffs)(signal.values), bounds_error=False, fill_value="extrapolate" + signal.index.mid, np.poly1d(coeffs)(signal.values), bounds_error=False, fill_value="extrapolate", ) # Find which values to fill using the model (all nans within the glacier extent)