|
| 1 | +#!/bin/bash |
| 2 | +# GIS Data Processing Workflow |
| 3 | +# Copyright (C) 2022, University of Saskatchewan |
| 4 | +# Copyright (C) 2021, Wouter Knoben |
| 5 | +# |
| 6 | +# This file is part of GIS Data Processing Workflow |
| 7 | +# |
| 8 | +# This program is free software: you can redistribute it and/or modify |
| 9 | +# it under the terms of the GNU General Public License as published by |
| 10 | +# the Free Software Foundation, either version 3 of the License, or |
| 11 | +# (at your option) any later version. |
| 12 | +# |
| 13 | +# This program is distributed in the hope that it will be useful, |
| 14 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 16 | +# GNU General Public License for more details. |
| 17 | +# |
| 18 | +# You should have received a copy of the GNU General Public License |
| 19 | +# along with this program. If not, see <http://www.gnu.org/licenses/>. |
| 20 | + |
| 21 | +# ========================= |
| 22 | +# Credits and contributions |
| 23 | +# ========================= |
| 24 | +# 1. Parts of the code are taken from https://www.shellscript.sh/tips/getopt/index.html |
| 25 | +# 2. General ideas of GeoTIFF subsetting are taken from https://github.com/CH-Earth/CWARHM |
| 26 | +# developed mainly by Wouter Knoben (hence the header copyright credit). See the preprint |
| 27 | +# at: https://www.essoar.org/doi/10.1002/essoar.10509195.1 |
| 28 | + |
| 29 | + |
| 30 | +# ================ |
| 31 | +# General comments |
| 32 | +# ================ |
| 33 | +# * All variables are camelCased for distinguishing from function names; |
| 34 | +# * function names are all in lower_case with words seperated by underscore for legibility; |
| 35 | +# * shell style is based on Google Open Source Projects' |
| 36 | +# Style Guide: https://google.github.io/styleguide/shellguide.html |
| 37 | + |
| 38 | + |
| 39 | +# =============== |
| 40 | +# Usage Functions |
| 41 | +# =============== |
| 42 | +short_usage() { |
| 43 | + echo "usage: $(basename $0) -cio DIR -v var1[,var2[...]] [-r INT] [-se DATE] [-ln REAL,REAL] [-f PATH] [-t BOOL] [-a stat1[,stat2,[...]] [-q q1[,q2[...]]]] [-p STR] " |
| 44 | +} |
| 45 | + |
| 46 | + |
| 47 | +# argument parsing using getopt - WORKS ONLY ON LINUX BY DEFAULT |
| 48 | +parsedArguments=$(getopt -a -n depth-to-bedrock -o i:o:v:r:s:e:l:n:f:t:a:q:p:c: --long dataset-dir:,output-dir:,variable:,crs:,start-date:,end-date:,lat-lims:,lon-lims:,shape-file:,print-geotiff:,stat:,quantile:,prefix:,cache: -- "$@") |
| 49 | +validArguments=$? |
| 50 | +if [ "$validArguments" != "0" ]; then |
| 51 | + short_usage; |
| 52 | + exit 1; |
| 53 | +fi |
| 54 | + |
| 55 | +# check if no options were passed |
| 56 | +if [ $# -eq 0 ]; then |
| 57 | + echo "$(basename $0): ERROR! arguments missing"; |
| 58 | + exit 1; |
| 59 | +fi |
| 60 | + |
| 61 | +# check long and short options passed |
| 62 | +eval set -- "$parsedArguments" |
| 63 | +while : |
| 64 | +do |
| 65 | + case "$1" in |
| 66 | + -i | --dataset-dir) geotiffDir="$2" ; shift 2 ;; # required |
| 67 | + -o | --output-dir) outputDir="$2" ; shift 2 ;; # required |
| 68 | + -v | --variable) variables="$2" ; shift 2 ;; # required |
| 69 | + -r | --crs) crs="$2" ; shift 2 ;; # required |
| 70 | + -s | --start-date) startDate="$2" ; shift 2 ;; # redundant - added for compatibility |
| 71 | + -e | --end-date) endDate="$2" ; shift 2 ;; # redundant - added for compatibility |
| 72 | + -l | --lat-lims) latLims="$2" ; shift 2 ;; # required - could be redundant |
| 73 | + -n | --lon-lims) lonLims="$2" ; shift 2 ;; # required - could be redundant |
| 74 | + -f | --shape-file) shapefile="$2" ; shift 2 ;; # required - could be redundant |
| 75 | + -t | --print-geotiff) printGeotiff="$2" ; shift 2 ;; # required |
| 76 | + -a | --stat) stats="$2" ; shift 2 ;; # optional |
| 77 | + -q | --quantile) quantiles="$2" ; shift 2 ;; # optional |
| 78 | + -p | --prefix) prefix="$2" ; shift 2 ;; # optional |
| 79 | + -c | --cache) cache="$2" ; shift 2 ;; # required |
| 80 | + |
| 81 | + # -- means the end of the arguments; drop this, and break out of the while loop |
| 82 | + --) shift; break ;; |
| 83 | + |
| 84 | + # in case of invalid option |
| 85 | + *) |
| 86 | + echo "$(basename $0): ERROR! invalid option '$1'"; |
| 87 | + short_usage; exit 1 ;; |
| 88 | + esac |
| 89 | +done |
| 90 | + |
| 91 | +# check if $ensemble is provided |
| 92 | +if [[ -n "$startDate" ]] || [[ -n "$endDate" ]]; then |
| 93 | + echo "$(basename $0): ERROR! redundant argument (time extents) provided"; |
| 94 | + exit 1; |
| 95 | +fi |
| 96 | + |
| 97 | +# check the prefix if not set |
| 98 | +if [[ -z $prefix ]]; then |
| 99 | + prefix="depth_to_bedrock_" |
| 100 | +fi |
| 101 | + |
| 102 | +# parse comma-delimited variables |
| 103 | +IFS=',' read -ra variables <<< "${variables}" |
| 104 | + |
| 105 | + |
| 106 | +# ===================== |
| 107 | +# Necessary Assumptions |
| 108 | +# ===================== |
| 109 | +# TZ to be set to UTC to avoid invalid dates due to Daylight Saving |
| 110 | +alias date='TZ=UTC date' |
| 111 | +# expand aliases for the one stated above |
| 112 | +shopt -s expand_aliases |
| 113 | +# hard-coded paths |
| 114 | +renvCache="/project/rpp-kshook/Climate_Forcing_Data/assets/r-envs/" # general renv cache path |
| 115 | +exactextractrCache="${renvCache}/exact-extract-env" # exactextractr renv cache path |
| 116 | +renvPackagePath="${renvCache}/renv_0.15.5.tar.gz" # renv_0.15.5 source path |
| 117 | + |
| 118 | + |
| 119 | +# ========================== |
| 120 | +# Necessary Global Variables |
| 121 | +# ========================== |
| 122 | +# the structure of the original file names are one of the following: |
| 123 | +# * %{var}_M_sl%{soil_layer_depth_number}_250m_ll.tif |
| 124 | +# * %{var}_250m_ll.tif |
| 125 | +# but the user is expected to include complete variable (file) name in the |
| 126 | +# `--variable` input argument comma-separated list. |
| 127 | + |
| 128 | + |
| 129 | +# =================== |
| 130 | +# Necessary Functions |
| 131 | +# =================== |
| 132 | +# Modules below available on Compute Canada (CC) Graham Cluster Server |
| 133 | +load_core_modules () { |
| 134 | + module -q purge |
| 135 | + module -q load gcc/9.3.0 |
| 136 | + module -q load r/4.1.2 |
| 137 | + module -q load gdal/3.0.4 |
| 138 | + module -q load udunits/2.2.28 |
| 139 | + module -q load geos/3.10.2 |
| 140 | + module -q load proj/9.0.0 |
| 141 | +} |
| 142 | +load_core_modules |
| 143 | + |
| 144 | + |
| 145 | +# ================= |
| 146 | +# Useful One-liners |
| 147 | +# ================= |
| 148 | +# sorting a comma-delimited string of real numbers |
| 149 | +sort_comma_delimited () { IFS=',' read -ra arr <<< "$*"; echo ${arr[*]} | tr " " "\n" | sort -n | tr "\n" " "; } |
| 150 | + |
| 151 | +# log date format |
| 152 | +logDate () { echo "($(date +"%Y-%m-%d %H:%M:%S")) "; } |
| 153 | + |
| 154 | + |
| 155 | +####################################### |
| 156 | +# subset GeoTIFFs |
| 157 | +# |
| 158 | +# Globals: |
| 159 | +# latLims: comma-delimited latitude |
| 160 | +# limits |
| 161 | +# lonLims: comma-delimited longitude |
| 162 | +# limits |
| 163 | +# |
| 164 | +# Arguments: |
| 165 | +# sourceVrt: source vrt file |
| 166 | +# destPath: destionation path (inclu- |
| 167 | +# ding file name) |
| 168 | +# |
| 169 | +# Outputs: |
| 170 | +# one mosaiced (merged) GeoTIFF under |
| 171 | +# the $destDir |
| 172 | +####################################### |
| 173 | +subset_geotiff () { |
| 174 | + # local variables |
| 175 | + local latMin |
| 176 | + local latMax |
| 177 | + local lonMin |
| 178 | + local lonMax |
| 179 | + local sortedLats |
| 180 | + local sortedLons |
| 181 | + # reading arguments |
| 182 | + local sourceVrt="$1" |
| 183 | + local destPath="$2" |
| 184 | + |
| 185 | + # extracting minimum and maximum of latitude and longitude respectively |
| 186 | + ## latitude |
| 187 | + sortedLats=($(sort_comma_delimited "$latLims")) |
| 188 | + latMin="${sortedLats[0]}" |
| 189 | + latMax="${sortedLats[1]}" |
| 190 | + ## longitude |
| 191 | + sortedLons=($(sort_comma_delimited "$lonLims")) |
| 192 | + lonMin="${sortedLons[0]}" |
| 193 | + lonMax="${sortedLons[1]}" |
| 194 | + |
| 195 | + # subset based on lat/lon - flush to disk at 500MB |
| 196 | + GDAL_CACHEMAX=500 |
| 197 | + gdal_translate --config GDAL_CACHEMAX 500 \ |
| 198 | + -co COMPRESS="DEFLATE" \ |
| 199 | + -co BIGTIFF="YES" \ |
| 200 | + -projwin $lonMin $latMax $lonMax $latMin "${sourceVrt}" "${destPath}" \ |
| 201 | + > /dev/null; |
| 202 | +} |
| 203 | + |
| 204 | + |
| 205 | +# =============== |
| 206 | +# Data Processing |
| 207 | +# =============== |
| 208 | +# display info |
| 209 | +echo "$(logDate)$(basename $0): processing Depth-to-Bedrock GeoTIFF(s)..." |
| 210 | + |
| 211 | +# make the output directory |
| 212 | +echo "$(logDate)$(basename $0): creating output directory under $outputDir" |
| 213 | +mkdir -p "$outputDir" # making the output directory |
| 214 | +mkdir -p "$cache" # making the cache directory |
| 215 | + |
| 216 | +# if shapefile is provided extract the extents from it |
| 217 | +if [[ -n $shapefile ]]; then |
| 218 | + # extract the shapefile extent |
| 219 | + IFS=' ' read -ra shapefileExtents <<< "$(ogrinfo -so -al "$shapefile" | sed 's/[),(]//g' | grep Extent)" |
| 220 | + # transform the extents in case they are not in EPSG:4326 |
| 221 | + IFS=':' read -ra sourceProj4 <<< "$(gdalsrsinfo $shapefile | grep -e "PROJ.4")" # source Proj4 value |
| 222 | + if [[ -n $sourceProj4 ]]; then |
| 223 | + : |
| 224 | + else |
| 225 | + echo "$(logDate)$(basename $0): WARNING! Assuming WSG84 CRS for the input ESRI shapefile" |
| 226 | + sourceProj4=("PROJ.4" " +proj=longlat +datum=WGS84 +no_defs") # made an array for compatibility with the following statements |
| 227 | + fi |
| 228 | + |
| 229 | + # transform limits and assing to variables |
| 230 | + IFS=' ' read -ra leftBottomLims <<< $(echo "${shapefileExtents[@]:1:2}" | gdaltransform -s_srs "${sourceProj4[1]}" -t_srs EPSG:4326 -output_xy) |
| 231 | + IFS=' ' read -ra rightTopLims <<< $(echo "${shapefileExtents[@]:4:5}" | gdaltransform -s_srs "${sourceProj4[1]}" -t_srs EPSG:4326 -output_xy) |
| 232 | + # define $latLims and $lonLims from $shapefileExtents |
| 233 | + lonLims="${leftBottomLims[0]},${rightTopLims[0]}" |
| 234 | + latLims="${leftBottomLims[1]},${rightTopLims[1]}" |
| 235 | +fi |
| 236 | + |
| 237 | +# subset and produce stats if needed |
| 238 | +if [[ "$printGeotiff" == "true" ]]; then |
| 239 | + echo "$(logDate)$(basename $0): subsetting GeoTIFFs under $outputDir" |
| 240 | + for var in "${variables[@]}"; do |
| 241 | + # subset based on lat and lon values |
| 242 | + subset_geotiff "${geotiffDir}/${var}.tif" "${outputDir}/${prefix}${var}.tif" |
| 243 | + done |
| 244 | +fi |
| 245 | + |
| 246 | +## make R renv project directory |
| 247 | +if [[ -n "$shapefile" ]] && [[ -n $stats ]]; then |
| 248 | + echo "$(logDate)$(basename $0): Extracting stats under $outputDir" |
| 249 | + mkdir -p "$cache/r-virtual-env/" |
| 250 | + ## make R renv in $cache |
| 251 | + virtualEnvPath="$cache/r-virtual-env/" |
| 252 | + cp "$(dirname $0)/../assets/renv.lock" "$virtualEnvPath" |
| 253 | + ## load necessary modules - excessive, mainly for clarification |
| 254 | + load_core_modules |
| 255 | + |
| 256 | + ## make the temporary directory for installing r packages |
| 257 | + tempInstallPath="$cache/r-packages" |
| 258 | + mkdir -p "$tempInstallPath" |
| 259 | + export R_LIBS_USER="$tempInstallPath" |
| 260 | + |
| 261 | + # extract given stats for each variable |
| 262 | + for var in "${variables[@]}"; do |
| 263 | + ## build renv and create stats |
| 264 | + Rscript "$(dirname $0)/../assets/stats.R" \ |
| 265 | + "$tempInstallPath" \ |
| 266 | + "$exactextractrCache" \ |
| 267 | + "$renvPackagePath" \ |
| 268 | + "$virtualEnvPath" \ |
| 269 | + "$virtualEnvPath" \ |
| 270 | + "${virtualEnvPath}/renv.lock" \ |
| 271 | + "${geotiffDir}/${var}.tif" \ |
| 272 | + "$shapefile" \ |
| 273 | + "$outputDir/${prefix}stats_${var}.csv" \ |
| 274 | + "$stats" \ |
| 275 | + "$quantiles" >> "${outputDir}/${prefix}stats_${var}.log" 2>&1; |
| 276 | + done |
| 277 | +fi |
| 278 | + |
| 279 | +# remove unnecessary files |
| 280 | +mkdir "$HOME/empty_dir" |
| 281 | +echo "$(logDate)$(basename $0): deleting temporary files from $cache" |
| 282 | +rsync --quiet -aP --delete "$HOME/empty_dir/" "$cache" |
| 283 | +rm -r "$cache" |
| 284 | +echo "$(logDate)$(basename $0): temporary files from $cache are removed" |
| 285 | +echo "$(logDate)$(basename $0): results are produced under $outputDir" |
| 286 | + |
0 commit comments