diff --git a/post-processing/.gitignore b/post-processing/.gitignore index 655c3ed..9a23a9b 100644 --- a/post-processing/.gitignore +++ b/post-processing/.gitignore @@ -4,4 +4,5 @@ *.zip *.geojson *.csv -aux.R \ No newline at end of file +aux.R +output diff --git a/post-processing/R/gs_create_jenks_breaks_color_palette.R b/post-processing/R/gs_create_jenks_breaks_color_palette.R new file mode 100644 index 0000000..aa4e71f --- /dev/null +++ b/post-processing/R/gs_create_jenks_breaks_color_palette.R @@ -0,0 +1,17 @@ +gs_create_jenks_breaks_color_palette <- function(src, k, option = "inferno", begin = 0.2, end = 1.0, direction = -1){ + + z_value <- raster::raster(src) %>% + raster::values() + + z_value <- z_value[!is.na(z_value)] + + z_class <- BAMMtools::getJenksBreaks(z_value, k = k) + + m_class <- matrix(c(0, z_class[-length(z_class)], z_class), ncol = 2) + + m_class <- cbind(m_class, viridis::viridis(option = "inferno", begin = begin, end = end, direction = direction, n = k)) %>% + cbind(paste(paste0("(", m_class[,1]), paste0(m_class[,2], "]"), sep = ",")) + + return(m_class) + +} diff --git a/post-processing/R/gs_create_sld_color_palette.R b/post-processing/R/gs_create_sld_color_palette.R new file mode 100644 index 0000000..a547bc5 --- /dev/null +++ b/post-processing/R/gs_create_sld_color_palette.R @@ -0,0 +1,44 @@ +gs_create_sld_color_palette <- function(x){ + + sld <- xml2::read_xml(' + + + + + + + + + + + + grid + + 1 + + + + + + + + ') + + x[nrow(x),2] <- as.numeric(x[nrow(x),2]) + 1 + + for(j in 1:nrow(x)){ + sld %>% + xml2::xml_child(., "sld:UserLayer") %>% + xml2::xml_child(., "sld:UserStyle") %>% + xml2::xml_child(., "sld:FeatureTypeStyle") %>% + xml2::xml_child(., "sld:Rule") %>% + xml2::xml_child(., "sld:RasterSymbolizer") %>% + xml2::xml_child(., "sld:ColorMap") %>% + xml2::xml_add_child(., "sld:ColorMapEntry", color=stringr::str_sub(x[j,3], start = 1, end = 7), label=x[j,4], opacity="1", quantity=x[j,2], .where = "after") %>% + xml_root() + } + + # message(sld) + return(sld) + +} diff --git a/post-processing/calculate_area_weights.py b/post-processing/calculate_area_weights.py index 1ddab7a..9ca5a2e 100755 --- a/post-processing/calculate_area_weights.py +++ b/post-processing/calculate_area_weights.py @@ -1,15 +1,15 @@ #!/usr/bin/python import sys import argparse -import multiprocessing import rasterio import fiona import numpy as np from shapely.geometry import shape, box, MultiPolygon -from joblib import Parallel, delayed from rasterio import features from affine import Affine - +import multiprocessing +from functools import partial + # initiate the parser parser = argparse.ArgumentParser() @@ -35,10 +35,11 @@ def _rasterize_geom(geom, dim, affine_trans, all_touched): all_touched = all_touched) return out_array -def _calculate_cell_coverage(r, c, affine_trans, geom): - +def _calculate_cell_coverage(idx, affine_trans, geom): + # idx = (row, col) + # Construct the geometry of grid cell from its boundaries - window = ((r, r+1), (c, c+1)) + window = ((idx[0], idx[0]+1), (idx[1], idx[1]+1)) ((row_min, row_max), (col_min, col_max)) = window x_min, y_min = (col_min, row_max) * affine_trans x_max, y_max = (col_max, row_min) * affine_trans @@ -49,16 +50,15 @@ def _calculate_cell_coverage(r, c, affine_trans, geom): cell_intersection = cell.intersection(geom) # calculate the percentage of cell covered by the polygon - cell_coverage = int(cell_intersection.area / cell.area * 100) - + cell_coverage = int(round(cell_intersection.area / cell.area * 100)) + return cell_coverage -#def main(argv): if __name__ == "__main__": # convert arguments to numeric data types - #GDAL: (c, a, b, f, d, e) - #Affine: (a, b, c, d, e, f) + # GDAL: (c, a, b, f, d, e) + # Affine: (a, b, c, d, e, f) xmin = float(args.xmin) xmax = float(args.xmax) ymin = float(args.ymin) @@ -104,8 +104,11 @@ def _calculate_cell_coverage(r, c, affine_trans, geom): # calculate percentage of coverage for cells touching polygons borders num_cores = multiprocessing.cpu_count() - print "Calculate percentage of coverage for cells touching polygons borders using ", num_cores, " cores" - perc_raster[rows, cols] = Parallel(n_jobs=num_cores)(delayed(_calculate_cell_coverage)(i, j, affine_trans, geom) for i, j in idx) + print "Calculate percentage of coverage for", len(rows), "cells touching polygons borders using ", num_cores, " cores" + # perc_raster[rows, cols] = Parallel(n_jobs=num_cores)(delayed(_calculate_cell_coverage)(i, j, affine_trans, geom) for i, j in idx) + pool = multiprocessing.Pool(processes=num_cores) + perc_raster[rows, cols] = pool.map(partial(_calculate_cell_coverage, affine_trans=affine_trans, geom=geom), idx) + print "Writing results to ", args.outputfile with rasterio.open(args.outputfile, 'w', **profile) as dst: dst.write(perc_raster, 1) diff --git a/post-processing/create_data_release.R b/post-processing/create_data_release.R index 4821131..54e50e7 100644 --- a/post-processing/create_data_release.R +++ b/post-processing/create_data_release.R @@ -11,22 +11,20 @@ library(units) library(nngeo) library(sf) library(raster) -readRenviron(".Renviron") +library(xml2) +library(stringr) +library(viridis) +source("./R/gs_create_sld_color_palette.R") +source("./R/gs_create_jenks_breaks_color_palette.R") + # define release version --------------------------------------------------------------- release_version <- "v1" +dir.create("./output", showWarnings = FALSE, recursive = TRUE) # -------------------------------------------------------------------------------------- -# get raw data from PostGIS database --------------------------------------------------- -conn <- DBI::dbConnect(RPostgreSQL::PostgreSQL(), - host = Sys.getenv("db_host"), - port = Sys.getenv("db_port"), - dbname = Sys.getenv("db_name"), - user = Sys.getenv("db_user"), - password = Sys.getenv("db_password")) - -raw_mining_polygons <- sf::st_read(conn, "mine_polygon") -DBI::dbDisconnect(conn) +# get raw data ------------------------------------------------------------------------- +raw_mining_polygons <- sf::st_read("./input/global_mining_polygons_v1r6_raw.gpkg") # -------------------------------------------------------------------------------------- # clean overlaps, invalid shapes, and holes smaller than 1ha --------------------------- @@ -44,18 +42,18 @@ mining_polygons <- raw_mining_polygons %>% # -------------------------------------------------------------------------------------- # get world map from Eurostat --------------------------------------------------------- -if(!file.exists("./countries_polygon.gpkg")){ +if(!file.exists("./output/countries_polygon.gpkg")){ download.file(url = "https://ec.europa.eu/eurostat/cache/GISCO/distribution/v2/countries/download/ref-countries-2016-01m.geojson.zip", - destfile = "./ref-countries-2016-01m.geojson.zip", mode = "w") - unzip(zipfile = "./ref-countries-2016-01m.geojson.zip", files = "CNTR_RG_01M_2016_4326.geojson", exdir = "./", overwrite = TRUE) - sf::st_read(dsn = "./CNTR_RG_01M_2016_4326.geojson") %>% + destfile = "./output/ref-countries-2016-01m.geojson.zip", mode = "w") + unzip(zipfile = "./output/ref-countries-2016-01m.geojson.zip", files = "CNTR_RG_01M_2016_4326.geojson", exdir = "./output", overwrite = TRUE) + sf::st_read(dsn = "./output/CNTR_RG_01M_2016_4326.geojson") %>% dplyr::select(ISO3_CODE, COUNTRY_NAME = NAME_ENGL) %>% lwgeom::st_make_valid() %>% sf::st_cast("POLYGON") %>% - sf::st_write(dsn = "./countries_polygon.gpkg", delete_dsn = TRUE) + sf::st_write(dsn = "./output/countries_polygon.gpkg", delete_dsn = TRUE) } -world_map <- sf::st_read(dsn = "./countries_polygon.gpkg") %>% +world_map <- sf::st_read(dsn = "./output/countries_polygon.gpkg") %>% sf::st_transform("+proj=laea +datum=WGS84") # -------------------------------------------------------------------------------------- @@ -107,7 +105,7 @@ mining_polygons <- mining_polygons %>% # -------------------------------------------------------------------------------------- # write release data to GeoPackage ----------------------------------------------------- -path_to_mining_polygons <- paste0("./global_mining_polygons_",release_version,".gpkg") +path_to_mining_polygons <- paste0("./output/global_mining_polygons_",release_version,".gpkg") sf::st_write(mining_polygons, layer = "mining_polygons", dsn = path_to_mining_polygons, delete_dsn = TRUE) @@ -121,26 +119,73 @@ mining_polygons %>% dplyr::summarise(AREA = sum(AREA)) %>% dplyr::ungroup() %>% dplyr::arrange(dplyr::desc(AREA)) %>% - readr::write_csv(paste0("./global_mining_area_per_country_",release_version,".csv")) + readr::write_csv(paste0("./output/global_mining_area_per_country_",release_version,".csv")) # -------------------------------------------------------------------------------------- -# create 30sec global grid with a percentage of mining coverage per cell --------------- +# create global 30arcsecond grid (approximately 1 kilometer) +# with the mining area weights per cell # For help see: system("./calculate_area_weights.py -h") -tmp_file <- tempfile(pattern = "file", tmpdir = tempdir(), fileext = ".tif") +path_to_area_weights <- "./output/tmp_global_mining_area_weights_30arcsecond.tif" +path_to_land_mask <- Sys.getenv(paste0("path_to_land_mask_30arcsecond")) system.time(system(paste0("./calculate_area_weights.py \\ -i ",path_to_mining_polygons," \\ - -o ",tmp_file," \\ + -o ",path_to_area_weights," \\ -xmin ", -180," \\ -xmax ", 180," \\ -ymin ", -90," \\ -ymax ", 90," \\ - -ncol ", 43200," \\ - -nrow ", 21600))) + -ncol ", 43200," \\ + -nrow ", 21600))) -path_to_land_mask_raster <- Sys.getenv("path_to_land_mask_raster") -path_to_mining_30sec_grid <- paste0("./global_miningcover_30sec_",release_version,".tif") +# mask land 30arcsecond grid (approximately 1 kilometer) +raster::rasterOptions(progress = "text") raster::beginCluster(n = 12) -system.time(raster::clusterR(x = raster::stack(list(tmp_file, path_to_land_mask_raster)), - fun = raster::overlay, args = list(fun = function(x, m) x * m), - filename = path_to_mining_30sec_grid, datatype = 'INT2U', options = c("compress=LZW"), overwrite = TRUE, verbose = TRUE)) + +r_prec <- raster::stack(raster::raster(path_to_land_mask), raster::raster(path_to_area_weights)) %>% + raster::clusterR(raster::overlay, args = list(fun = function(m, w) m * w), + filename = paste0("./output/global_miningarea_percentage_", release_version,"_30arcsecond.tif"), + datatype = 'INT1U', options = "compress=LZW", overwrite = TRUE, verbose = TRUE) + +r_area <- raster::stack(list(a = raster::area(r_prec), w = r_prec)) %>% + raster::clusterR(raster::overlay, args = list(fun = function(a, w) a * w / 100), + filename = paste0("./output/global_miningarea_", release_version,"_30arcsecond.tif"), + datatype = 'FLT4S', options = "compress=LZW", overwrite = TRUE, verbose = TRUE) + +grid_levels <- tibble::tribble( ~name, ~fact, + "5arcminute", 10, # aggregation factor from 30arcsecond to 5arcminute (approximately 10 kilometer) + "30arcminute", 60) # aggregation factor from 30arcsecond to 30arcminute (approximately 55 kilometer) + +for(g in 1:nrow(grid_levels) ){ + + fact <- grid_levels$fact[g] + grid_name <- grid_levels$name[g] + fname_perc <- paste0("./output/global_miningarea_percentage_",release_version,"_",grid_name,".tif") + fname_area <- paste0("./output/global_miningarea_",release_version,"_",grid_name,".tif") + + print(paste("Writing", grid_name, "grid to", fname_perc)) + print(paste("Aggregation factor ", fact)) + + print(paste("Aggregate grid cell area to ", grid_name)) + r_agg_area <- raster::aggregate(r_area, fact = fact, fun = sum, na.rm = TRUE, + filename = fname_area, datatype = 'FLT4S', options = "compress=LZW", overwrite = TRUE, verbose = TRUE) + + + r_agg_prec <- raster::stack(list(ta = raster::area(r_agg_area), ma = r_agg_area)) %>% + raster::clusterR(raster::overlay, args = list(fun = function(ta, ma) round(ma / ta * 100)), + filename = fname_perc, datatype = 'INT1U', options = "compress=LZW", overwrite = TRUE, verbose = TRUE) + +} + +# Create Geoserver visualization layer 5arcminute grid (approximately 1 kilometer) +gs_file <- paste0("./output/global_miningarea_percentage_",release_version,"_",grid_name,"_gs.tif") +paste0("./output/global_miningarea_percentage_",release_version,"_5arcminute.tif") %>% + raster::raster() %>% + raster::clusterR(raster::overlay, args = list(fun = function(w) w / w * w), + filename = gs_file, datatype = 'INT1U', options = "compress=LZW", overwrite = TRUE, verbose = TRUE) + +print("Calculating Jenks Natural Breaks for visualization") +gs_create_jenks_breaks_color_palette(src = gs_file, k = 10) %>% + gs_create_sld_color_palette() %>% + xml2::write_xml(stringr::str_replace_all(gs_file, ".tif", ".xml")) + raster::endCluster()