Skip to content

Commit

Permalink
Add fixes raster processing. Add processing for Geoserver WMS
Browse files Browse the repository at this point in the history
  • Loading branch information
vwmaus committed Feb 14, 2020
1 parent 1285101 commit 3bde680
Show file tree
Hide file tree
Showing 5 changed files with 153 additions and 43 deletions.
3 changes: 2 additions & 1 deletion post-processing/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@
*.zip
*.geojson
*.csv
aux.R
aux.R
output
17 changes: 17 additions & 0 deletions post-processing/R/gs_create_jenks_breaks_color_palette.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
gs_create_jenks_breaks_color_palette <- function(src, k, option = "inferno", begin = 0.2, end = 1.0, direction = -1){

z_value <- raster::raster(src) %>%
raster::values()

z_value <- z_value[!is.na(z_value)]

z_class <- BAMMtools::getJenksBreaks(z_value, k = k)

m_class <- matrix(c(0, z_class[-length(z_class)], z_class), ncol = 2)

m_class <- cbind(m_class, viridis::viridis(option = "inferno", begin = begin, end = end, direction = direction, n = k)) %>%
cbind(paste(paste0("(", m_class[,1]), paste0(m_class[,2], "]"), sep = ","))

return(m_class)

}
44 changes: 44 additions & 0 deletions post-processing/R/gs_create_sld_color_palette.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
gs_create_sld_color_palette <- function(x){

sld <- xml2::read_xml('<?xml version="1.0" encoding="UTF-8"?>
<sld:StyledLayerDescriptor xmlns="http://www.opengis.net/sld" xmlns:gml="http://www.opengis.net/gml" xmlns:ogc="http://www.opengis.net/ogc" xmlns:sld="http://www.opengis.net/sld" version="1.0.0">
<sld:UserLayer>
<sld:LayerFeatureConstraints>
<sld:FeatureTypeConstraint/>
</sld:LayerFeatureConstraints>
<sld:UserStyle>
<sld:Title/>
<sld:FeatureTypeStyle>
<sld:Rule>
<sld:RasterSymbolizer>
<sld:Geometry>
<ogc:PropertyName>grid</ogc:PropertyName>
</sld:Geometry>
<sld:Opacity>1</sld:Opacity>
<sld:ColorMap type="intervals" extended="true">
</sld:ColorMap>
</sld:RasterSymbolizer>
</sld:Rule>
</sld:FeatureTypeStyle>
</sld:UserStyle>
</sld:UserLayer>
</sld:StyledLayerDescriptor>')

x[nrow(x),2] <- as.numeric(x[nrow(x),2]) + 1

for(j in 1:nrow(x)){
sld %>%
xml2::xml_child(., "sld:UserLayer") %>%
xml2::xml_child(., "sld:UserStyle") %>%
xml2::xml_child(., "sld:FeatureTypeStyle") %>%
xml2::xml_child(., "sld:Rule") %>%
xml2::xml_child(., "sld:RasterSymbolizer") %>%
xml2::xml_child(., "sld:ColorMap") %>%
xml2::xml_add_child(., "sld:ColorMapEntry", color=stringr::str_sub(x[j,3], start = 1, end = 7), label=x[j,4], opacity="1", quantity=x[j,2], .where = "after") %>%
xml_root()
}

# message(sld)
return(sld)

}
29 changes: 16 additions & 13 deletions post-processing/calculate_area_weights.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
#!/usr/bin/python
import sys
import argparse
import multiprocessing
import rasterio
import fiona
import numpy as np
from shapely.geometry import shape, box, MultiPolygon
from joblib import Parallel, delayed
from rasterio import features
from affine import Affine

import multiprocessing
from functools import partial

# initiate the parser
parser = argparse.ArgumentParser()

Expand All @@ -35,10 +35,11 @@ def _rasterize_geom(geom, dim, affine_trans, all_touched):
all_touched = all_touched)
return out_array

def _calculate_cell_coverage(r, c, affine_trans, geom):

def _calculate_cell_coverage(idx, affine_trans, geom):
# idx = (row, col)

# Construct the geometry of grid cell from its boundaries
window = ((r, r+1), (c, c+1))
window = ((idx[0], idx[0]+1), (idx[1], idx[1]+1))
((row_min, row_max), (col_min, col_max)) = window
x_min, y_min = (col_min, row_max) * affine_trans
x_max, y_max = (col_max, row_min) * affine_trans
Expand All @@ -49,16 +50,15 @@ def _calculate_cell_coverage(r, c, affine_trans, geom):
cell_intersection = cell.intersection(geom)

# calculate the percentage of cell covered by the polygon
cell_coverage = int(cell_intersection.area / cell.area * 100)
cell_coverage = int(round(cell_intersection.area / cell.area * 100))

return cell_coverage

#def main(argv):
if __name__ == "__main__":

# convert arguments to numeric data types
#GDAL: (c, a, b, f, d, e)
#Affine: (a, b, c, d, e, f)
# GDAL: (c, a, b, f, d, e)
# Affine: (a, b, c, d, e, f)
xmin = float(args.xmin)
xmax = float(args.xmax)
ymin = float(args.ymin)
Expand Down Expand Up @@ -104,8 +104,11 @@ def _calculate_cell_coverage(r, c, affine_trans, geom):

# calculate percentage of coverage for cells touching polygons borders
num_cores = multiprocessing.cpu_count()
print "Calculate percentage of coverage for cells touching polygons borders using ", num_cores, " cores"
perc_raster[rows, cols] = Parallel(n_jobs=num_cores)(delayed(_calculate_cell_coverage)(i, j, affine_trans, geom) for i, j in idx)
print "Calculate percentage of coverage for", len(rows), "cells touching polygons borders using ", num_cores, " cores"
# perc_raster[rows, cols] = Parallel(n_jobs=num_cores)(delayed(_calculate_cell_coverage)(i, j, affine_trans, geom) for i, j in idx)
pool = multiprocessing.Pool(processes=num_cores)
perc_raster[rows, cols] = pool.map(partial(_calculate_cell_coverage, affine_trans=affine_trans, geom=geom), idx)

print "Writing results to ", args.outputfile
with rasterio.open(args.outputfile, 'w', **profile) as dst:
dst.write(perc_raster, 1)
103 changes: 74 additions & 29 deletions post-processing/create_data_release.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,20 @@ library(units)
library(nngeo)
library(sf)
library(raster)
readRenviron(".Renviron")
library(xml2)
library(stringr)
library(viridis)
source("./R/gs_create_sld_color_palette.R")
source("./R/gs_create_jenks_breaks_color_palette.R")


# define release version ---------------------------------------------------------------
release_version <- "v1"
dir.create("./output", showWarnings = FALSE, recursive = TRUE)

# --------------------------------------------------------------------------------------
# get raw data from PostGIS database ---------------------------------------------------
conn <- DBI::dbConnect(RPostgreSQL::PostgreSQL(),
host = Sys.getenv("db_host"),
port = Sys.getenv("db_port"),
dbname = Sys.getenv("db_name"),
user = Sys.getenv("db_user"),
password = Sys.getenv("db_password"))

raw_mining_polygons <- sf::st_read(conn, "mine_polygon")
DBI::dbDisconnect(conn)
# get raw data -------------------------------------------------------------------------
raw_mining_polygons <- sf::st_read("./input/global_mining_polygons_v1r6_raw.gpkg")

# --------------------------------------------------------------------------------------
# clean overlaps, invalid shapes, and holes smaller than 1ha ---------------------------
Expand All @@ -44,18 +42,18 @@ mining_polygons <- raw_mining_polygons %>%

# --------------------------------------------------------------------------------------
# get world map from Eurostat ---------------------------------------------------------
if(!file.exists("./countries_polygon.gpkg")){
if(!file.exists("./output/countries_polygon.gpkg")){
download.file(url = "https://ec.europa.eu/eurostat/cache/GISCO/distribution/v2/countries/download/ref-countries-2016-01m.geojson.zip",
destfile = "./ref-countries-2016-01m.geojson.zip", mode = "w")
unzip(zipfile = "./ref-countries-2016-01m.geojson.zip", files = "CNTR_RG_01M_2016_4326.geojson", exdir = "./", overwrite = TRUE)
sf::st_read(dsn = "./CNTR_RG_01M_2016_4326.geojson") %>%
destfile = "./output/ref-countries-2016-01m.geojson.zip", mode = "w")
unzip(zipfile = "./output/ref-countries-2016-01m.geojson.zip", files = "CNTR_RG_01M_2016_4326.geojson", exdir = "./output", overwrite = TRUE)
sf::st_read(dsn = "./output/CNTR_RG_01M_2016_4326.geojson") %>%
dplyr::select(ISO3_CODE, COUNTRY_NAME = NAME_ENGL) %>%
lwgeom::st_make_valid() %>%
sf::st_cast("POLYGON") %>%
sf::st_write(dsn = "./countries_polygon.gpkg", delete_dsn = TRUE)
sf::st_write(dsn = "./output/countries_polygon.gpkg", delete_dsn = TRUE)
}

world_map <- sf::st_read(dsn = "./countries_polygon.gpkg") %>%
world_map <- sf::st_read(dsn = "./output/countries_polygon.gpkg") %>%
sf::st_transform("+proj=laea +datum=WGS84")

# --------------------------------------------------------------------------------------
Expand Down Expand Up @@ -107,7 +105,7 @@ mining_polygons <- mining_polygons %>%

# --------------------------------------------------------------------------------------
# write release data to GeoPackage -----------------------------------------------------
path_to_mining_polygons <- paste0("./global_mining_polygons_",release_version,".gpkg")
path_to_mining_polygons <- paste0("./output/global_mining_polygons_",release_version,".gpkg")
sf::st_write(mining_polygons, layer = "mining_polygons",
dsn = path_to_mining_polygons, delete_dsn = TRUE)

Expand All @@ -121,26 +119,73 @@ mining_polygons %>%
dplyr::summarise(AREA = sum(AREA)) %>%
dplyr::ungroup() %>%
dplyr::arrange(dplyr::desc(AREA)) %>%
readr::write_csv(paste0("./global_mining_area_per_country_",release_version,".csv"))
readr::write_csv(paste0("./output/global_mining_area_per_country_",release_version,".csv"))

# --------------------------------------------------------------------------------------
# create 30sec global grid with a percentage of mining coverage per cell ---------------
# create global 30arcsecond grid (approximately 1 kilometer)
# with the mining area weights per cell
# For help see: system("./calculate_area_weights.py -h")
tmp_file <- tempfile(pattern = "file", tmpdir = tempdir(), fileext = ".tif")
path_to_area_weights <- "./output/tmp_global_mining_area_weights_30arcsecond.tif"
path_to_land_mask <- Sys.getenv(paste0("path_to_land_mask_30arcsecond"))
system.time(system(paste0("./calculate_area_weights.py \\
-i ",path_to_mining_polygons," \\
-o ",tmp_file," \\
-o ",path_to_area_weights," \\
-xmin ", -180," \\
-xmax ", 180," \\
-ymin ", -90," \\
-ymax ", 90," \\
-ncol ", 43200," \\
-nrow ", 21600)))
-ncol ", 43200," \\
-nrow ", 21600)))

path_to_land_mask_raster <- Sys.getenv("path_to_land_mask_raster")
path_to_mining_30sec_grid <- paste0("./global_miningcover_30sec_",release_version,".tif")
# mask land 30arcsecond grid (approximately 1 kilometer)
raster::rasterOptions(progress = "text")
raster::beginCluster(n = 12)
system.time(raster::clusterR(x = raster::stack(list(tmp_file, path_to_land_mask_raster)),
fun = raster::overlay, args = list(fun = function(x, m) x * m),
filename = path_to_mining_30sec_grid, datatype = 'INT2U', options = c("compress=LZW"), overwrite = TRUE, verbose = TRUE))

r_prec <- raster::stack(raster::raster(path_to_land_mask), raster::raster(path_to_area_weights)) %>%
raster::clusterR(raster::overlay, args = list(fun = function(m, w) m * w),
filename = paste0("./output/global_miningarea_percentage_", release_version,"_30arcsecond.tif"),
datatype = 'INT1U', options = "compress=LZW", overwrite = TRUE, verbose = TRUE)

r_area <- raster::stack(list(a = raster::area(r_prec), w = r_prec)) %>%
raster::clusterR(raster::overlay, args = list(fun = function(a, w) a * w / 100),
filename = paste0("./output/global_miningarea_", release_version,"_30arcsecond.tif"),
datatype = 'FLT4S', options = "compress=LZW", overwrite = TRUE, verbose = TRUE)

grid_levels <- tibble::tribble( ~name, ~fact,
"5arcminute", 10, # aggregation factor from 30arcsecond to 5arcminute (approximately 10 kilometer)
"30arcminute", 60) # aggregation factor from 30arcsecond to 30arcminute (approximately 55 kilometer)

for(g in 1:nrow(grid_levels) ){

fact <- grid_levels$fact[g]
grid_name <- grid_levels$name[g]
fname_perc <- paste0("./output/global_miningarea_percentage_",release_version,"_",grid_name,".tif")
fname_area <- paste0("./output/global_miningarea_",release_version,"_",grid_name,".tif")

print(paste("Writing", grid_name, "grid to", fname_perc))
print(paste("Aggregation factor ", fact))

print(paste("Aggregate grid cell area to ", grid_name))
r_agg_area <- raster::aggregate(r_area, fact = fact, fun = sum, na.rm = TRUE,
filename = fname_area, datatype = 'FLT4S', options = "compress=LZW", overwrite = TRUE, verbose = TRUE)


r_agg_prec <- raster::stack(list(ta = raster::area(r_agg_area), ma = r_agg_area)) %>%
raster::clusterR(raster::overlay, args = list(fun = function(ta, ma) round(ma / ta * 100)),
filename = fname_perc, datatype = 'INT1U', options = "compress=LZW", overwrite = TRUE, verbose = TRUE)

}

# Create Geoserver visualization layer 5arcminute grid (approximately 1 kilometer)
gs_file <- paste0("./output/global_miningarea_percentage_",release_version,"_",grid_name,"_gs.tif")
paste0("./output/global_miningarea_percentage_",release_version,"_5arcminute.tif") %>%
raster::raster() %>%
raster::clusterR(raster::overlay, args = list(fun = function(w) w / w * w),
filename = gs_file, datatype = 'INT1U', options = "compress=LZW", overwrite = TRUE, verbose = TRUE)

print("Calculating Jenks Natural Breaks for visualization")
gs_create_jenks_breaks_color_palette(src = gs_file, k = 10) %>%
gs_create_sld_color_palette() %>%
xml2::write_xml(stringr::str_replace_all(gs_file, ".tif", ".xml"))

raster::endCluster()

0 comments on commit 3bde680

Please sign in to comment.