From ec7b5c4609d4381487ec21748af05e759fed4f63 Mon Sep 17 00:00:00 2001 From: Felipe Carlos Date: Fri, 15 Nov 2024 23:40:35 -0300 Subject: [PATCH 1/3] replace probability fractions with NA in classification results --- R/api_classify.R | 7 +-- tests/testthat/test-classification.R | 74 ++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 5 deletions(-) diff --git a/R/api_classify.R b/R/api_classify.R index e9c238758..74fc88256 100755 --- a/R/api_classify.R +++ b/R/api_classify.R @@ -106,8 +106,6 @@ # Should bbox of resulting tile be updated? update_bbox <- nrow(chunks) != nchunks } - # Compute fractions probability - probs_fractions <- 1 / length(.ml_labels(ml_model)) # Process jobs in parallel block_files <- .jobs_map_parallel_chr(chunks, function(chunk) { # Job block @@ -171,10 +169,9 @@ scale <- .scale(band_conf) if (.has(scale) && scale != 1) { values <- values / scale - probs_fractions <- probs_fractions / scale } - # Mask NA pixels with same probabilities for all classes - values[na_mask, ] <- probs_fractions + # Put NA back in the result + values[na_mask, ] <- NA # Log .debug_log( event = "start_block_data_save", diff --git a/tests/testthat/test-classification.R b/tests/testthat/test-classification.R index 4256d9bbe..3657818f4 100644 --- a/tests/testthat/test-classification.R +++ b/tests/testthat/test-classification.R @@ -56,3 +56,77 @@ test_that("Classify error bands 1", { ) ) }) + +test_that("Classify with NA values", { + # load cube + data_dir <- system.file("extdata/raster/mod13q1", package = "sits") + raster_cube <- sits_cube( + source = "BDC", + collection = "MOD13Q1-6.1", + data_dir = data_dir, + tiles = "012010", + bands = "NDVI", + start_date = "2013-09-14", + end_date = "2014-08-29", + multicores = 2, + progress = FALSE + ) + # preparation - create directory to save NA + data_dir <- paste0(tempdir(), "/na-cube") + dir.create(data_dir, recursive = TRUE, showWarnings = FALSE) + # preparation - insert NA in cube + raster_cube <- sits_apply( + data = raster_cube, + NDVI_NA = ifelse(NDVI > 0.5, NA, NDVI), + output_dir = data_dir + ) + raster_cube <- sits_select(raster_cube, bands = "NDVI_NA") + .fi(raster_cube) <- .fi(raster_cube) |> + dplyr::mutate(band = "NDVI") + # preparation - create a random forest model + rfor_model <- sits_train(samples_modis_ndvi, sits_rfor(num_trees = 40)) + # test classification with NA + class_map <- sits_classify( + data = raster_cube, + ml_model = rfor_model, + output_dir = tempdir(), + progress = FALSE + ) + class_map_rst <- terra::rast(class_map[["file_info"]][[1]][["path"]]) + expect_true(anyNA(class_map_rst[])) +}) + +test_that("Classify with exclusion mask", { + # load cube + data_dir <- system.file("extdata/raster/mod13q1", package = "sits") + raster_cube <- sits_cube( + source = "BDC", + collection = "MOD13Q1-6.1", + data_dir = data_dir, + tiles = "012010", + bands = "NDVI", + start_date = "2013-09-14", + end_date = "2014-08-29", + multicores = 2, + progress = FALSE + ) + # preparation - create a random forest model + rfor_model <- sits_train(samples_modis_ndvi, sits_rfor(num_trees = 40)) + # test classification with NA + class_map <- suppressWarnings( + sits_classify( + data = raster_cube, + ml_model = rfor_model, + output_dir = tempdir(), + exclusion_mask = c( + xmin = -55.63478, + ymin = -11.63328, + xmax = -55.54080, + ymax = -11.56978 + ), + progress = FALSE + ) + ) + class_map_rst <- terra::rast(class_map[["file_info"]][[1]][["path"]]) + expect_true(anyNA(class_map_rst[])) +}) From 4f47c8e3840004ac87ed765538d19baecd85ff13 Mon Sep 17 00:00:00 2001 From: Felipe Carlos Date: Mon, 18 Nov 2024 15:48:15 -0300 Subject: [PATCH 2/3] fix already existing file detection in sits_cube_copy --- R/api_download.R | 12 ++++++++++- tests/testthat/test-cube_copy.R | 38 +++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/R/api_download.R b/R/api_download.R index 051048d62..ae9c87e39 100644 --- a/R/api_download.R +++ b/R/api_download.R @@ -40,6 +40,16 @@ ) # Try to download while (n_tries > 0) { + # Check if the output file already exists + if (.raster_is_valid(output_file)) { + local_asset <- .tile_from_file( + file = output_file, base_tile = asset, + band = .tile_bands(asset), update_bbox = TRUE, + labels = .tile_labels(asset) + ) + + return(local_asset) + } # Update token (for big tiffs and slow networks) asset <- .cube_token_generator(asset) # Crop and download @@ -50,7 +60,7 @@ output_file = output_file, gdal_params = gdal_params ), - default = NULL + .default = NULL ) # Check if the downloaded file is valid if (.has(local_asset) && .raster_is_valid(output_file)) { diff --git a/tests/testthat/test-cube_copy.R b/tests/testthat/test-cube_copy.R index ca593b08f..31316a895 100644 --- a/tests/testthat/test-cube_copy.R +++ b/tests/testthat/test-cube_copy.R @@ -214,3 +214,41 @@ test_that("Copy remote cube works (specific region with resampling)", { unlink(data_dir, recursive = TRUE) }) + +test_that("Copy invalid files", { + data_dir <- system.file("extdata/raster/mod13q1", package = "sits") + + cube <- sits_cube( + source = "BDC", + collection = "MOD13Q1-6.1", + data_dir = data_dir, + multicores = 2, + progress = FALSE + ) + + # Editing cube with invalid files + # (skipping the first line to bypass the cube check and simulate a + # cube containing invalid files) + .fi(cube) <- .fi(cube) |> + dplyr::mutate( + path = ifelse( + dplyr::row_number() > 1, + paste0(path, "_invalid-file"), + path + ) + ) + + + cube_local <- sits_cube_copy( + cube = cube, + output_dir = tempdir(), + progress = FALSE + ) + + expect_equal(nrow(cube_local), 1) + expect_equal(length(sits_timeline(cube_local)), 1) + + # Clean + files <- cube_local[["file_info"]][[1]][["path"]] + unlink(files) +}) From 4956cdce1255264070e3d918d9ae902f57ea4436 Mon Sep 17 00:00:00 2001 From: Felipe Carlos Date: Mon, 18 Nov 2024 18:02:27 -0300 Subject: [PATCH 3/3] handling NA values in smooth_bayes --- src/smooth_bayes.cpp | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/src/smooth_bayes.cpp b/src/smooth_bayes.cpp index 8450b1083..2b36a6c6a 100644 --- a/src/smooth_bayes.cpp +++ b/src/smooth_bayes.cpp @@ -15,6 +15,7 @@ IntegerVector locus_neigh(int size, int leg) { } return res; } + // [[Rcpp::export]] NumericVector bayes_smoother_fraction(const NumericMatrix& logits, const int& nrows, @@ -31,8 +32,6 @@ NumericVector bayes_smoother_fraction(const NumericMatrix& logits, // compute locus mirror IntegerVector loci = locus_neigh(nrows, leg); IntegerVector locj = locus_neigh(ncols, leg); - // compute number of neighbors to be used - int neigh_high = std::ceil(neigh_fraction * window_size * window_size); // compute values for each pixel for (int i = 0; i < nrows; ++i) { for (int j = 0; j < ncols; ++j) { @@ -43,27 +42,34 @@ NumericVector bayes_smoother_fraction(const NumericMatrix& logits, for (int wj = 0; wj < window_size; ++wj) neigh(wi * window_size + wj) = logits(loci(wi + i) * ncols + locj(wj + j), band); + // remove NA + NumericVector neigh2 = na_omit(neigh); if (neigh_fraction < 1.0) - // Sort the neighbor logit values - neigh.sort(true); - // Create a vector to store the highest values + neigh2.sort(true); + // compute number of neighbors to be used + int neigh_high = std::ceil(neigh_fraction * neigh2.length()); + // create a vector to store the highest values NumericVector high_values(neigh_high); // copy the highest values to the new vector int nh = 0; - for(NumericVector::iterator it = neigh.begin(); - it != neigh.begin() + neigh_high; ++it) { + for(NumericVector::iterator it = neigh2.begin(); + it != neigh2.begin() + neigh_high; ++it) { high_values(nh++) = (*it); } // get the estimates for prior // normal with mean m0 and variance s0 - double s0 = var(high_values); - double m0 = mean(high_values); + double s0 = var(noNA(high_values)); + double m0 = mean(noNA(high_values)); // get the current value double x0 = logits(i * ncols + j, band); - // weight for Bayesian estimator - double w = s0/(s0 + smoothness(band)); - // apply Bayesian smoother - res(i * ncols + j, band) = w * x0 + (1 - w) * m0; + if (std::isnan(x0)) { + res(i * ncols + j, band) = m0; + } else { + // weight for Bayesian estimator + double w = s0/(s0 + smoothness(band)); + // apply Bayesian smoother + res(i * ncols + j, band) = w * x0 + (1 - w) * m0; + } } } }