Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

replace probability fractions with NA in classification results #1239

Merged
merged 4 commits into from
Nov 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions R/api_classify.R
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,6 @@
# Should bbox of resulting tile be updated?
update_bbox <- nrow(chunks) != nchunks
}
# Compute fractions probability
probs_fractions <- 1 / length(.ml_labels(ml_model))
# Process jobs in parallel
block_files <- .jobs_map_parallel_chr(chunks, function(chunk) {
# Job block
Expand Down Expand Up @@ -171,10 +169,9 @@
scale <- .scale(band_conf)
if (.has(scale) && scale != 1) {
values <- values / scale
probs_fractions <- probs_fractions / scale
}
# Mask NA pixels with same probabilities for all classes
values[na_mask, ] <- probs_fractions
# Put NA back in the result
values[na_mask, ] <- NA
# Log
.debug_log(
event = "start_block_data_save",
Expand Down
12 changes: 11 additions & 1 deletion R/api_download.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,16 @@
)
# Try to download
while (n_tries > 0) {
# Check if the output file already exists
if (.raster_is_valid(output_file)) {
local_asset <- .tile_from_file(
file = output_file, base_tile = asset,
band = .tile_bands(asset), update_bbox = TRUE,
labels = .tile_labels(asset)
)

return(local_asset)
}
# Update token (for big tiffs and slow networks)
asset <- .cube_token_generator(asset)
# Crop and download
Expand All @@ -50,7 +60,7 @@
output_file = output_file,
gdal_params = gdal_params
),
default = NULL
.default = NULL
)
# Check if the downloaded file is valid
if (.has(local_asset) && .raster_is_valid(output_file)) {
Expand Down
32 changes: 19 additions & 13 deletions src/smooth_bayes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ IntegerVector locus_neigh(int size, int leg) {
}
return res;
}

// [[Rcpp::export]]
NumericVector bayes_smoother_fraction(const NumericMatrix& logits,
const int& nrows,
Expand All @@ -31,8 +32,6 @@ NumericVector bayes_smoother_fraction(const NumericMatrix& logits,
// compute locus mirror
IntegerVector loci = locus_neigh(nrows, leg);
IntegerVector locj = locus_neigh(ncols, leg);
// compute number of neighbors to be used
int neigh_high = std::ceil(neigh_fraction * window_size * window_size);
// compute values for each pixel
for (int i = 0; i < nrows; ++i) {
for (int j = 0; j < ncols; ++j) {
Expand All @@ -43,27 +42,34 @@ NumericVector bayes_smoother_fraction(const NumericMatrix& logits,
for (int wj = 0; wj < window_size; ++wj)
neigh(wi * window_size + wj) =
logits(loci(wi + i) * ncols + locj(wj + j), band);
// remove NA
NumericVector neigh2 = na_omit(neigh);
if (neigh_fraction < 1.0)
// Sort the neighbor logit values
neigh.sort(true);
// Create a vector to store the highest values
neigh2.sort(true);
// compute number of neighbors to be used
int neigh_high = std::ceil(neigh_fraction * neigh2.length());
// create a vector to store the highest values
NumericVector high_values(neigh_high);
// copy the highest values to the new vector
int nh = 0;
for(NumericVector::iterator it = neigh.begin();
it != neigh.begin() + neigh_high; ++it) {
for(NumericVector::iterator it = neigh2.begin();
it != neigh2.begin() + neigh_high; ++it) {
high_values(nh++) = (*it);
}
// get the estimates for prior
// normal with mean m0 and variance s0
double s0 = var(high_values);
double m0 = mean(high_values);
double s0 = var(noNA(high_values));
double m0 = mean(noNA(high_values));
// get the current value
double x0 = logits(i * ncols + j, band);
// weight for Bayesian estimator
double w = s0/(s0 + smoothness(band));
// apply Bayesian smoother
res(i * ncols + j, band) = w * x0 + (1 - w) * m0;
if (std::isnan(x0)) {
res(i * ncols + j, band) = m0;
} else {
// weight for Bayesian estimator
double w = s0/(s0 + smoothness(band));
// apply Bayesian smoother
res(i * ncols + j, band) = w * x0 + (1 - w) * m0;
}
}
}
}
Expand Down
74 changes: 74 additions & 0 deletions tests/testthat/test-classification.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,77 @@ test_that("Classify error bands 1", {
)
)
})

test_that("Classify with NA values", {
# load cube
data_dir <- system.file("extdata/raster/mod13q1", package = "sits")
raster_cube <- sits_cube(
source = "BDC",
collection = "MOD13Q1-6.1",
data_dir = data_dir,
tiles = "012010",
bands = "NDVI",
start_date = "2013-09-14",
end_date = "2014-08-29",
multicores = 2,
progress = FALSE
)
# preparation - create directory to save NA
data_dir <- paste0(tempdir(), "/na-cube")
dir.create(data_dir, recursive = TRUE, showWarnings = FALSE)
# preparation - insert NA in cube
raster_cube <- sits_apply(
data = raster_cube,
NDVI_NA = ifelse(NDVI > 0.5, NA, NDVI),
output_dir = data_dir
)
raster_cube <- sits_select(raster_cube, bands = "NDVI_NA")
.fi(raster_cube) <- .fi(raster_cube) |>
dplyr::mutate(band = "NDVI")
# preparation - create a random forest model
rfor_model <- sits_train(samples_modis_ndvi, sits_rfor(num_trees = 40))
# test classification with NA
class_map <- sits_classify(
data = raster_cube,
ml_model = rfor_model,
output_dir = tempdir(),
progress = FALSE
)
class_map_rst <- terra::rast(class_map[["file_info"]][[1]][["path"]])
expect_true(anyNA(class_map_rst[]))
})

test_that("Classify with exclusion mask", {
# load cube
data_dir <- system.file("extdata/raster/mod13q1", package = "sits")
raster_cube <- sits_cube(
source = "BDC",
collection = "MOD13Q1-6.1",
data_dir = data_dir,
tiles = "012010",
bands = "NDVI",
start_date = "2013-09-14",
end_date = "2014-08-29",
multicores = 2,
progress = FALSE
)
# preparation - create a random forest model
rfor_model <- sits_train(samples_modis_ndvi, sits_rfor(num_trees = 40))
# test classification with NA
class_map <- suppressWarnings(
sits_classify(
data = raster_cube,
ml_model = rfor_model,
output_dir = tempdir(),
exclusion_mask = c(
xmin = -55.63478,
ymin = -11.63328,
xmax = -55.54080,
ymax = -11.56978
),
progress = FALSE
)
)
class_map_rst <- terra::rast(class_map[["file_info"]][[1]][["path"]])
expect_true(anyNA(class_map_rst[]))
})
38 changes: 38 additions & 0 deletions tests/testthat/test-cube_copy.R
Original file line number Diff line number Diff line change
Expand Up @@ -214,3 +214,41 @@ test_that("Copy remote cube works (specific region with resampling)", {

unlink(data_dir, recursive = TRUE)
})

test_that("Copy invalid files", {
data_dir <- system.file("extdata/raster/mod13q1", package = "sits")

cube <- sits_cube(
source = "BDC",
collection = "MOD13Q1-6.1",
data_dir = data_dir,
multicores = 2,
progress = FALSE
)

# Editing cube with invalid files
# (skipping the first line to bypass the cube check and simulate a
# cube containing invalid files)
.fi(cube) <- .fi(cube) |>
dplyr::mutate(
path = ifelse(
dplyr::row_number() > 1,
paste0(path, "_invalid-file"),
path
)
)


cube_local <- sits_cube_copy(
cube = cube,
output_dir = tempdir(),
progress = FALSE
)

expect_equal(nrow(cube_local), 1)
expect_equal(length(sits_timeline(cube_local)), 1)

# Clean
files <- cube_local[["file_info"]][[1]][["path"]]
unlink(files)
})
Loading