From cdfdcfdb2e60bd49e317214e9074212e814030f3 Mon Sep 17 00:00:00 2001 From: vwmaus Date: Thu, 23 Jan 2020 17:10:09 +0000 Subject: [PATCH] Update scripts for data release. Add scripts to create mining grid layer release. --- post-processing/calculate_area_weights.py | 111 ++++++++++++++++++ .../create_1arcsecond_grid_tiles.R | 53 +++++++++ post-processing/create_data_release.R | 47 ++++++-- 3 files changed, 199 insertions(+), 12 deletions(-) create mode 100755 post-processing/calculate_area_weights.py create mode 100644 post-processing/create_1arcsecond_grid_tiles.R diff --git a/post-processing/calculate_area_weights.py b/post-processing/calculate_area_weights.py new file mode 100755 index 0000000..1ddab7a --- /dev/null +++ b/post-processing/calculate_area_weights.py @@ -0,0 +1,111 @@ +#!/usr/bin/python +import sys +import argparse +import multiprocessing +import rasterio +import fiona +import numpy as np +from shapely.geometry import shape, box, MultiPolygon +from joblib import Parallel, delayed +from rasterio import features +from affine import Affine + +# initiate the parser +parser = argparse.ArgumentParser() + +# add long and short argument +parser.add_argument("--inputfile", "-i", help="set path to the input geometries. The input file must have polygons/multipolygons") +parser.add_argument("--outputfile", "-o", help="set path to output raster file") +parser.add_argument("--xmin", "-xmin", help="set the bounding coordinates of the output raster. The coordinates must have the same CRS as the input file") +parser.add_argument("--xmax", "-xmax", help="set the bounding coordinates of the output raster. The coordinates must have the same CRS as the input file") +parser.add_argument("--ymin", "-ymin", help="set the bounding coordinates of the output raster. The coordinates must have the same CRS as the input file") +parser.add_argument("--ymax", "-ymax", help="set the bounding coordinates of the output raster. The coordinates must have the same CRS as the input file") +parser.add_argument("--nrow", "-nrow", help="set number of rows of the output raster file") +parser.add_argument("--ncol", "-ncol", help="set number of columns of the output raster file") + +# read arguments from the command line +args = parser.parse_args() + +def _rasterize_geom(geom, dim, affine_trans, all_touched): + out_array = features.rasterize( + [(geom, 1)], + out_shape = dim, + transform = affine_trans, + fill = 0, + all_touched = all_touched) + return out_array + +def _calculate_cell_coverage(r, c, affine_trans, geom): + + # Construct the geometry of grid cell from its boundaries + window = ((r, r+1), (c, c+1)) + ((row_min, row_max), (col_min, col_max)) = window + x_min, y_min = (col_min, row_max) * affine_trans + x_max, y_max = (col_max, row_min) * affine_trans + bounds = (x_min, y_min, x_max, y_max) + cell = box(*bounds) + + # get the intersection between the cell and the polygons + cell_intersection = cell.intersection(geom) + + # calculate the percentage of cell covered by the polygon + cell_coverage = int(cell_intersection.area / cell.area * 100) + + return cell_coverage + +#def main(argv): +if __name__ == "__main__": + + # convert arguments to numeric data types + #GDAL: (c, a, b, f, d, e) + #Affine: (a, b, c, d, e, f) + xmin = float(args.xmin) + xmax = float(args.xmax) + ymin = float(args.ymin) + ymax = float(args.ymax) + dim = (int(args.nrow), int(args.ncol)) + dy = abs((ymax - ymin) / dim[0]) + dx = abs((xmax - xmin) / dim[1]) + affine_trans = Affine(dx, 0.0, xmin, 0.0, -dy, ymax) + + # open input geometries and transform to destination crs + feat = fiona.open(args.inputfile) + + # select featuers within bounding box + print "Selecting featuers within bounding box" + feat_intersecting = feat.filter(bbox=(xmin, ymin, xmax, ymax)) + geom = MultiPolygon([shape(pol['geometry']) for pol in feat_intersecting]) + + if geom.is_empty: + sys.exit("warning: Nothing to do. There are not geometries overlapping the bounding box. Check the bounding coordinates and the polygons CRS") + + profile = { + 'affine': affine_trans, + 'height': dim[0], + 'width': dim[1], + 'count': 1, + 'crs': {'init': feat.crs['init']}, + 'driver': 'GTiff', + 'dtype': 'uint8', + 'compress': 'lzw', + 'nodata': None, + 'tiled': False, + 'transform': affine_trans} + + # fill grid cells intersecting polygons with 100% + print "Filling grid cells 100% coverd by polygons" + perc_raster = _rasterize_geom(geom, dim, affine_trans, all_touched=True) * 100 + + # get cells touching polygons borders + print "Selecting cells touching polygons borders" + boundary = _rasterize_geom(geom.boundary, dim, affine_trans, all_touched=True) + idx = zip(*np.where(boundary == 1)) + rows, cols = zip(*idx) + + # calculate percentage of coverage for cells touching polygons borders + num_cores = multiprocessing.cpu_count() + print "Calculate percentage of coverage for cells touching polygons borders using ", num_cores, " cores" + perc_raster[rows, cols] = Parallel(n_jobs=num_cores)(delayed(_calculate_cell_coverage)(i, j, affine_trans, geom) for i, j in idx) + + with rasterio.open(args.outputfile, 'w', **profile) as dst: + dst.write(perc_raster, 1) diff --git a/post-processing/create_1arcsecond_grid_tiles.R b/post-processing/create_1arcsecond_grid_tiles.R new file mode 100644 index 0000000..57507dd --- /dev/null +++ b/post-processing/create_1arcsecond_grid_tiles.R @@ -0,0 +1,53 @@ +# -------------------------------------------------------------------------------------- +# this script creates grid talies with the percentage of mining cover per grid cell +# based ond the mining polygons +# -------------------------------------------------------------------------------------- +release_version <- "v1" + +# -------------------------------------------------------------------------------------- +# required packages -------------------------------------------------------------------- +# this script also depends on the python libraries: sys, argparse, multiprocessing, +# rasterio, fiona, numpy, shapely, joblib, delayed, affine and the python script ./calculate_area_weights.py +library(tidyverse) +library(raster) +readRenviron(".Renviron") + +# -------------------------------------------------------------------------------------- +# define input parameters -------------------------------------------------------------- +release_version <- "v1" +mining_polygons_file_path <- paste0("./global_mining_polygons_",release_version,".gpkg") +mining_gir_path <- Sys.getenv("mining_gir_path") # Output path +tile_grid_path <- Sys.getenv("tile_grid_path") # Hansen_GFC-2017-v1.5_treecover2000* files +dir.create(mining_gir_path, recursive = TRUE, showWarnings = FALSE) + +# -------------------------------------------------------------------------------------- +# create mining grid tiles ------------------------------------------------------------- +tiles <- dir(tile_grid_path, pattern = ".tif$", full.names = TRUE) +pb <- progress_estimated(length(tiles)) +for(tl in tiles){ + + # get tile + r_tl <- raster::raster(tl) + + # set output file name + out_tl_path <- basename(tl) %>% + stringr::str_replace("Hansen_GFC-2017-v1.5_treecover2000", paste0("/miningcover_1arcsecond_",release_version)) %>% + stringr::str_glue(mining_gir_path, .) + + # calculate tile area weights + # For help see: system("./calculate_area_weights.py -h") + system(paste0("./calculate_area_weights.py \\ + -i ",mining_polygons_file_path," \\ + -o ",out_tl_path," \\ + -xmin ",extent(r_tl)[1]," \\ + -xmax ",extent(r_tl)[2]," \\ + -ymin ",extent(r_tl)[3]," \\ + -ymax ",extent(r_tl)[4]," \\ + -ncol ",ncol(r_tl)," \\ + -nrow ",nrow(r_tl))) + + pb$tick()$print() + +} + + diff --git a/post-processing/create_data_release.R b/post-processing/create_data_release.R index 9a9a12c..4821131 100644 --- a/post-processing/create_data_release.R +++ b/post-processing/create_data_release.R @@ -1,7 +1,5 @@ # -------------------------------------------------------------------------------------- # this script creates a release version of the mining polygons ------------------------- -# define release version --------------------------------------------------------------- -release_version <- "v1" # -------------------------------------------------------------------------------------- # required packages -------------------------------------------------------------------- @@ -12,18 +10,22 @@ library(lwgeom) library(units) library(nngeo) library(sf) +library(raster) +readRenviron(".Renviron") + +# define release version --------------------------------------------------------------- +release_version <- "v1" # -------------------------------------------------------------------------------------- # get raw data from PostGIS database --------------------------------------------------- conn <- DBI::dbConnect(RPostgreSQL::PostgreSQL(), - host = Sys.getenv("db_host"), - port = Sys.getenv("db_port"), - dbname = Sys.getenv("db_name"), - user = Sys.getenv("db_user"), - password = Sys.getenv("db_password")) + host = Sys.getenv("db_host"), + port = Sys.getenv("db_port"), + dbname = Sys.getenv("db_name"), + user = Sys.getenv("db_user"), + password = Sys.getenv("db_password")) raw_mining_polygons <- sf::st_read(conn, "mine_polygon") -raw_mining_polygons <- sf::st_read("global_mining_polygons_v1r5_raw.gpkg") DBI::dbDisconnect(conn) # -------------------------------------------------------------------------------------- @@ -78,7 +80,7 @@ country_names <- mining_polygons %>% dplyr::ungroup() %>% dplyr::select(-area) %>% dplyr::bind_rows(country_names) - + # correct for missing intersection by selecting the closest country country_names <- world_map %>% dplyr::slice(sf::st_nearest_feature(mining_polygons[which(sapply(ids_intersects, length) < 1),], .)) %>% @@ -100,13 +102,14 @@ mining_polygons <- mining_polygons %>% sf::st_transform(gfcanalysis::utm_zone(sf::as_Spatial(.), proj4string = TRUE)) %>% sf::st_area() %>% units::set_units(km^2) - }) - ) + }) + ) # -------------------------------------------------------------------------------------- # write release data to GeoPackage ----------------------------------------------------- +path_to_mining_polygons <- paste0("./global_mining_polygons_",release_version,".gpkg") sf::st_write(mining_polygons, layer = "mining_polygons", - dsn = paste0("./global_mining_polygons_",release_version,".gpkg"), delete_dsn = TRUE) + dsn = path_to_mining_polygons, delete_dsn = TRUE) # -------------------------------------------------------------------------------------- # write summary of mining aea per country in (km2) ------------------------------------- @@ -120,4 +123,24 @@ mining_polygons %>% dplyr::arrange(dplyr::desc(AREA)) %>% readr::write_csv(paste0("./global_mining_area_per_country_",release_version,".csv")) +# -------------------------------------------------------------------------------------- +# create 30sec global grid with a percentage of mining coverage per cell --------------- +# For help see: system("./calculate_area_weights.py -h") +tmp_file <- tempfile(pattern = "file", tmpdir = tempdir(), fileext = ".tif") +system.time(system(paste0("./calculate_area_weights.py \\ + -i ",path_to_mining_polygons," \\ + -o ",tmp_file," \\ + -xmin ", -180," \\ + -xmax ", 180," \\ + -ymin ", -90," \\ + -ymax ", 90," \\ + -ncol ", 43200," \\ + -nrow ", 21600))) +path_to_land_mask_raster <- Sys.getenv("path_to_land_mask_raster") +path_to_mining_30sec_grid <- paste0("./global_miningcover_30sec_",release_version,".tif") +raster::beginCluster(n = 12) +system.time(raster::clusterR(x = raster::stack(list(tmp_file, path_to_land_mask_raster)), + fun = raster::overlay, args = list(fun = function(x, m) x * m), + filename = path_to_mining_30sec_grid, datatype = 'INT2U', options = c("compress=LZW"), overwrite = TRUE, verbose = TRUE)) +raster::endCluster()