e-sensing · M3nin0 · Nov 18, 2024 · Nov 16, 2024 · Nov 18, 2024 · Nov 18, 2024
diff --git a/R/api_classify.R b/R/api_classify.R
@@ -106,8 +106,6 @@
         # Should bbox of resulting tile be updated?
         update_bbox <- nrow(chunks) != nchunks
     }
-    # Compute fractions probability
-    probs_fractions <- 1 / length(.ml_labels(ml_model))
     # Process jobs in parallel
     block_files <- .jobs_map_parallel_chr(chunks, function(chunk) {
         # Job block
@@ -171,10 +169,9 @@
         scale <- .scale(band_conf)
         if (.has(scale) && scale != 1) {
             values <- values / scale
-            probs_fractions  <- probs_fractions / scale
         }
-        # Mask NA pixels with same probabilities for all classes
-        values[na_mask, ] <- probs_fractions
+        # Put NA back in the result
+        values[na_mask, ] <- NA
         # Log
         .debug_log(
             event = "start_block_data_save",

diff --git a/R/api_download.R b/R/api_download.R
@@ -40,6 +40,16 @@
     )
     # Try to download
     while (n_tries > 0) {
+        # Check if the output file already exists
+        if (.raster_is_valid(output_file)) {
+            local_asset <- .tile_from_file(
+                file = output_file, base_tile = asset,
+                band = .tile_bands(asset), update_bbox = TRUE,
+                labels = .tile_labels(asset)
+            )
+
+            return(local_asset)
+        }
         # Update token (for big tiffs and slow networks)
         asset <- .cube_token_generator(asset)
         # Crop and download
@@ -50,7 +60,7 @@
                 output_file = output_file,
                 gdal_params = gdal_params
             ),
-            default = NULL
+            .default = NULL
         )
         # Check if the downloaded file is valid
         if (.has(local_asset) && .raster_is_valid(output_file)) {

diff --git a/src/smooth_bayes.cpp b/src/smooth_bayes.cpp
@@ -15,6 +15,7 @@ IntegerVector locus_neigh(int size, int leg) {
     }
     return res;
 }
+
 // [[Rcpp::export]]
 NumericVector bayes_smoother_fraction(const NumericMatrix& logits,
                                       const int& nrows,
@@ -31,8 +32,6 @@ NumericVector bayes_smoother_fraction(const NumericMatrix& logits,
     // compute locus mirror
     IntegerVector loci = locus_neigh(nrows, leg);
     IntegerVector locj = locus_neigh(ncols, leg);
-    // compute number of neighbors to be used
-    int neigh_high = std::ceil(neigh_fraction * window_size * window_size);
     // compute values for each pixel
     for (int i = 0; i < nrows; ++i) {
         for (int j = 0; j < ncols; ++j) {
@@ -43,27 +42,34 @@ NumericVector bayes_smoother_fraction(const NumericMatrix& logits,
                     for (int wj = 0; wj < window_size; ++wj)
                         neigh(wi * window_size + wj) =
                             logits(loci(wi + i) * ncols + locj(wj + j), band);
+                // remove NA
+                NumericVector neigh2 = na_omit(neigh);
                 if (neigh_fraction < 1.0)
-                    // Sort the neighbor logit values
-                    neigh.sort(true);
-                // Create a vector to store the highest values
+                    neigh2.sort(true);
+                // compute number of neighbors to be used
+                int neigh_high = std::ceil(neigh_fraction * neigh2.length());
+                // create a vector to store the highest values
                 NumericVector high_values(neigh_high);
                 // copy the highest values to the new vector
                 int nh = 0;
-                for(NumericVector::iterator it = neigh.begin();
-                    it != neigh.begin() + neigh_high; ++it) {
+                for(NumericVector::iterator it = neigh2.begin();
+                    it != neigh2.begin() + neigh_high; ++it) {
                     high_values(nh++) = (*it);
                 }
                 // get the estimates for prior
                 // normal with mean m0 and variance s0
-                double s0 = var(high_values);
-                double m0 = mean(high_values);
+                double s0 = var(noNA(high_values));
+                double m0 = mean(noNA(high_values));
                 // get the current value
                 double x0 = logits(i * ncols + j, band);
-                // weight for Bayesian estimator
-                double w = s0/(s0 + smoothness(band));
-                // apply Bayesian smoother
-                res(i * ncols + j, band) = w * x0 + (1 - w) * m0;
+                if (std::isnan(x0)) {
+                    res(i * ncols + j, band) = m0;
+                } else {
+                    // weight for Bayesian estimator
+                    double w = s0/(s0 + smoothness(band));
+                    // apply Bayesian smoother
+                    res(i * ncols + j, band) = w * x0 + (1 - w) * m0;
+                }
             }
         }
     }

diff --git a/tests/testthat/test-classification.R b/tests/testthat/test-classification.R
@@ -56,3 +56,77 @@ test_that("Classify error bands 1", {
         )
     )
 })
+
+test_that("Classify with NA values", {
+    # load cube
+    data_dir <- system.file("extdata/raster/mod13q1", package = "sits")
+    raster_cube <- sits_cube(
+        source = "BDC",
+        collection = "MOD13Q1-6.1",
+        data_dir = data_dir,
+        tiles = "012010",
+        bands = "NDVI",
+        start_date = "2013-09-14",
+        end_date = "2014-08-29",
+        multicores = 2,
+        progress = FALSE
+    )
+    # preparation - create directory to save NA
+    data_dir <- paste0(tempdir(), "/na-cube")
+    dir.create(data_dir, recursive = TRUE, showWarnings = FALSE)
+    # preparation - insert NA in cube
+    raster_cube <- sits_apply(
+        data = raster_cube,
+        NDVI_NA = ifelse(NDVI > 0.5, NA, NDVI),
+        output_dir = data_dir
+    )
+    raster_cube <- sits_select(raster_cube, bands = "NDVI_NA")
+    .fi(raster_cube) <- .fi(raster_cube) |>
+                            dplyr::mutate(band = "NDVI")
+    # preparation - create a random forest model
+    rfor_model <- sits_train(samples_modis_ndvi, sits_rfor(num_trees = 40))
+    # test classification with NA
+    class_map <- sits_classify(
+        data = raster_cube,
+        ml_model = rfor_model,
+        output_dir = tempdir(),
+        progress = FALSE
+    )
+    class_map_rst <- terra::rast(class_map[["file_info"]][[1]][["path"]])
+    expect_true(anyNA(class_map_rst[]))
+})
+
+test_that("Classify with exclusion mask", {
+    # load cube
+    data_dir <- system.file("extdata/raster/mod13q1", package = "sits")
+    raster_cube <- sits_cube(
+        source = "BDC",
+        collection = "MOD13Q1-6.1",
+        data_dir = data_dir,
+        tiles = "012010",
+        bands = "NDVI",
+        start_date = "2013-09-14",
+        end_date = "2014-08-29",
+        multicores = 2,
+        progress = FALSE
+    )
+    # preparation - create a random forest model
+    rfor_model <- sits_train(samples_modis_ndvi, sits_rfor(num_trees = 40))
+    # test classification with NA
+    class_map <- suppressWarnings(
+        sits_classify(
+            data = raster_cube,
+            ml_model = rfor_model,
+            output_dir = tempdir(),
+            exclusion_mask = c(
+                xmin = -55.63478,
+                ymin = -11.63328,
+                xmax = -55.54080,
+                ymax = -11.56978
+            ),
+            progress = FALSE
+        )
+    )
+    class_map_rst <- terra::rast(class_map[["file_info"]][[1]][["path"]])
+    expect_true(anyNA(class_map_rst[]))
+})
diff --git a/tests/testthat/test-cube_copy.R b/tests/testthat/test-cube_copy.R
@@ -214,3 +214,41 @@ test_that("Copy remote cube works (specific region with resampling)", {
 
     unlink(data_dir, recursive = TRUE)
 })
+
+test_that("Copy invalid files", {
+    data_dir <- system.file("extdata/raster/mod13q1", package = "sits")
+
+    cube <- sits_cube(
+        source = "BDC",
+        collection = "MOD13Q1-6.1",
+        data_dir = data_dir,
+        multicores = 2,
+        progress = FALSE
+    )
+
+    # Editing cube with invalid files
+    # (skipping the first line to bypass the cube check and simulate a
+    # cube containing invalid files)
+    .fi(cube) <- .fi(cube) |>
+                    dplyr::mutate(
+                        path = ifelse(
+                            dplyr::row_number() > 1,
+                            paste0(path, "_invalid-file"),
+                            path
+                        )
+                    )
+
+
+    cube_local <- sits_cube_copy(
+        cube = cube,
+        output_dir = tempdir(),
+        progress = FALSE
+    )
+
+    expect_equal(nrow(cube_local), 1)
+    expect_equal(length(sits_timeline(cube_local)), 1)
+
+    # Clean
+    files <- cube_local[["file_info"]][[1]][["path"]]
+    unlink(files)
+})