From 1a4f4664e2275f2d13c5dce528124b141b05759b Mon Sep 17 00:00:00 2001 From: Clay Morrow Date: Fri, 20 Jan 2023 23:00:34 -0600 Subject: [PATCH] updated build pipeline --- .Rbuildignore | 8 + .gitignore | 4 + build.R | 3 + build_pipeline.R | 3 + check_pkg.R | 3 + docs/Alaska.html | 1015 ++++++++++++++++++++++-------------------- render_vignette.R | 4 + vignettes/Alaska.Rmd | 22 +- 8 files changed, 564 insertions(+), 498 deletions(-) create mode 100644 build.R create mode 100644 build_pipeline.R create mode 100644 check_pkg.R create mode 100644 render_vignette.R diff --git a/.Rbuildignore b/.Rbuildignore index 04187a2..61f5752 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -29,3 +29,11 @@ data/ndvi_AK3000\.rda cran-comments\.md README\.html + +# build pipeline +build.R +render_vignette.R +check_pkg.R +build_pipeline.R +builds/ +figure/ diff --git a/.gitignore b/.gitignore index 9e3135d..6c89ea2 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,7 @@ data-raw /Meta/ README.html + +# built versions of the package +builds/ +figure/ diff --git a/build.R b/build.R new file mode 100644 index 0000000..7179fd7 --- /dev/null +++ b/build.R @@ -0,0 +1,3 @@ +Rcpp::compileAttributes() +devtools::document() +devtools::build(path = "builds") diff --git a/build_pipeline.R b/build_pipeline.R new file mode 100644 index 0000000..a143763 --- /dev/null +++ b/build_pipeline.R @@ -0,0 +1,3 @@ +source("check_pkg.R") +source("build.R") +source("render_vignette.R") diff --git a/check_pkg.R b/check_pkg.R new file mode 100644 index 0000000..68406bd --- /dev/null +++ b/check_pkg.R @@ -0,0 +1,3 @@ +Rcpp::compileAttributes() +devtools::document() +devtools::check() diff --git a/docs/Alaska.html b/docs/Alaska.html index 2ff5d7c..9b5e708 100644 --- a/docs/Alaska.html +++ b/docs/Alaska.html @@ -8,11 +8,10 @@ - - + Alaska @@ -29,298 +28,311 @@ } }); + + + + + + + + + + + - - + + + + + + + + @@ -329,13 +341,21 @@ +
+ + + + +
@@ -348,20 +368,19 @@

Introduction

needed:

devtools::install_github("morrowcj/remotePARTS")

Then, ensure that the package is loaded into your library:

-
library(remotePARTS)
+
library(remotePARTS)

This vignette will use dplyr and ggplot2 for visualizing the data:

-
library(dplyr)
-## 
-## Attaching package: 'dplyr'
-## The following objects are masked from 'package:stats':
-## 
-##     filter, lag
-## The following objects are masked from 'package:base':
-## 
-##     intersect, setdiff, setequal, union
-library(ggplot2)
-## Warning: package 'ggplot2' was built under R version 4.0.5
+
library(dplyr)
+## 
+## Attaching package: 'dplyr'
+## The following objects are masked from 'package:stats':
+## 
+##     filter, lag
+## The following objects are masked from 'package:base':
+## 
+##     intersect, setdiff, setequal, union
+library(ggplot2)

Alaska datasets

@@ -374,8 +393,8 @@

Alaska datasets

maps).

For this vignette, we well also create a smaller 3000 pixel subsample ndvi_AK3000 for demonstrative purposes:

-
data("ndvi_AK10000")
-ndvi_AK3000 <- ndvi_AK10000[seq_len(3000),] # first 3000 pixels from the random 10K
+
data("ndvi_AK10000")
+ndvi_AK3000 <- ndvi_AK10000[seq_len(3000),] # first 3000 pixels from the random 10K

ndvi_AK10000 is a data.frame with 37 columns. lng and lat are longitude and latitude, respectively. AR_coef and CLS_coef @@ -391,67 +410,67 @@

Alaska datasets

not handle any missing data. It is essential that all missing data are removed prior to conducting any analyses.

-
str(ndvi_AK10000)
-## 'data.frame':    10000 obs. of  37 variables:
-##  $ lng     : num  -151 -145 -144 -163 -145 ...
-##  $ lat     : num  63.3 64.7 62.7 67.7 65.8 ...
-##  $ AR_coef : num  0.01071 0.003586 0.002488 0.000373 0.002505 ...
-##  $ CLS_coef: num  0.00529 0.001537 0.002276 0.000476 0.001514 ...
-##  $ land    : Factor w/ 10 levels "Evergr needle",..: 6 7 7 6 7 7 6 6 7 7 ...
-##  $ ndvi1982: num  1.88 7.68 8.46 6.85 10.25 ...
-##  $ ndvi1983: num  1.74 8.14 7.99 6.97 10.35 ...
-##  $ ndvi1984: num  1.96 8.47 8.04 6.67 10.29 ...
-##  $ ndvi1985: num  1.74 7.72 7.96 6.9 10.02 ...
-##  $ ndvi1986: num  1.92 8.31 8.64 6.43 10.39 ...
-##  $ ndvi1987: num  2.04 8.89 8.4 7 10.47 ...
-##  $ ndvi1988: num  2.13 8.5 8.32 7.12 10.05 ...
-##  $ ndvi1989: num  1.93 8.99 8.79 6.24 10.16 ...
-##  $ ndvi1990: num  1.73 8.66 8.45 7.46 10.31 ...
-##  $ ndvi1991: num  1.99 8.74 8.34 6.86 10.28 ...
-##  $ ndvi1992: num  1.74 7.99 7.94 6.53 9.77 ...
-##  $ ndvi1993: num  1.98 8.62 7.93 6.97 10.35 ...
-##  $ ndvi1994: num  2.04 8.59 8.26 7.15 10.25 ...
-##  $ ndvi1995: num  1.9 8.1 8.55 7.08 10.18 ...
-##  $ ndvi1996: num  2.09 8.61 8.23 7.08 10.33 ...
-##  $ ndvi1997: num  1.81 8.77 8.99 7.24 10.23 ...
-##  $ ndvi1998: num  1.95 7.9 8.57 7.45 9.99 ...
-##  $ ndvi1999: num  1.75 8.14 8.61 6.75 10.04 ...
-##  $ ndvi2000: num  1.71 7.5 8.37 6.7 9.92 ...
-##  $ ndvi2001: num  1.57 7.76 8.29 6.66 9.69 ...
-##  $ ndvi2002: num  1.72 7.83 8.62 7.09 9.76 ...
-##  $ ndvi2003: num  1.95 7.94 8.35 6.87 9.76 ...
-##  $ ndvi2004: num  2.2 8.37 8.83 7.3 11.74 ...
-##  $ ndvi2005: num  2.18 8.45 8.63 7.18 11.62 ...
-##  $ ndvi2006: num  2.06 8.32 8.36 6.74 11.59 ...
-##  $ ndvi2007: num  2.39 8.53 8.63 7.47 11.91 ...
-##  $ ndvi2008: num  2.36 8.6 8.41 7 11.8 ...
-##  $ ndvi2009: num  2.35 10.02 9.27 6.59 11.62 ...
-##  $ ndvi2010: num  2.95 11.09 9.26 7.09 11.46 ...
-##  $ ndvi2011: num  2.5 8.9 8.94 7.05 12.15 ...
-##  $ ndvi2012: num  2.71 8.71 8.39 6.21 10.97 ...
-##  $ ndvi2013: num  2.54 9 8.95 6.93 10.38 ...
+
str(ndvi_AK10000)
+## 'data.frame':    10000 obs. of  37 variables:
+##  $ lng     : num  -151 -145 -144 -163 -145 ...
+##  $ lat     : num  63.3 64.7 62.7 67.7 65.8 ...
+##  $ AR_coef : num  0.01071 0.003586 0.002488 0.000373 0.002505 ...
+##  $ CLS_coef: num  0.00529 0.001537 0.002276 0.000476 0.001514 ...
+##  $ land    : Factor w/ 10 levels "Evergr needle",..: 6 7 7 6 7 7 6 6 7 7 ...
+##  $ ndvi1982: num  1.88 7.68 8.46 6.85 10.25 ...
+##  $ ndvi1983: num  1.74 8.14 7.99 6.97 10.35 ...
+##  $ ndvi1984: num  1.96 8.47 8.04 6.67 10.29 ...
+##  $ ndvi1985: num  1.74 7.72 7.96 6.9 10.02 ...
+##  $ ndvi1986: num  1.92 8.31 8.64 6.43 10.39 ...
+##  $ ndvi1987: num  2.04 8.89 8.4 7 10.47 ...
+##  $ ndvi1988: num  2.13 8.5 8.32 7.12 10.05 ...
+##  $ ndvi1989: num  1.93 8.99 8.79 6.24 10.16 ...
+##  $ ndvi1990: num  1.73 8.66 8.45 7.46 10.31 ...
+##  $ ndvi1991: num  1.99 8.74 8.34 6.86 10.28 ...
+##  $ ndvi1992: num  1.74 7.99 7.94 6.53 9.77 ...
+##  $ ndvi1993: num  1.98 8.62 7.93 6.97 10.35 ...
+##  $ ndvi1994: num  2.04 8.59 8.26 7.15 10.25 ...
+##  $ ndvi1995: num  1.9 8.1 8.55 7.08 10.18 ...
+##  $ ndvi1996: num  2.09 8.61 8.23 7.08 10.33 ...
+##  $ ndvi1997: num  1.81 8.77 8.99 7.24 10.23 ...
+##  $ ndvi1998: num  1.95 7.9 8.57 7.45 9.99 ...
+##  $ ndvi1999: num  1.75 8.14 8.61 6.75 10.04 ...
+##  $ ndvi2000: num  1.71 7.5 8.37 6.7 9.92 ...
+##  $ ndvi2001: num  1.57 7.76 8.29 6.66 9.69 ...
+##  $ ndvi2002: num  1.72 7.83 8.62 7.09 9.76 ...
+##  $ ndvi2003: num  1.95 7.94 8.35 6.87 9.76 ...
+##  $ ndvi2004: num  2.2 8.37 8.83 7.3 11.74 ...
+##  $ ndvi2005: num  2.18 8.45 8.63 7.18 11.62 ...
+##  $ ndvi2006: num  2.06 8.32 8.36 6.74 11.59 ...
+##  $ ndvi2007: num  2.39 8.53 8.63 7.47 11.91 ...
+##  $ ndvi2008: num  2.36 8.6 8.41 7 11.8 ...
+##  $ ndvi2009: num  2.35 10.02 9.27 6.59 11.62 ...
+##  $ ndvi2010: num  2.95 11.09 9.26 7.09 11.46 ...
+##  $ ndvi2011: num  2.5 8.9 8.94 7.05 12.15 ...
+##  $ ndvi2012: num  2.71 8.71 8.39 6.21 10.97 ...
+##  $ ndvi2013: num  2.54 9 8.95 6.93 10.38 ...

For this demonstration, we are interested in asking the following questions using these data: “Is NDVI in Alaska increasing over time?”; “Are Alaska’s NDVI time trends associated with land-cover classes?”; and “Do Alaska’s NDVI time trends differ with latitude?”

The figure below shows a temporal cross-section of these data for 1982, 1998, and 2013.

-
reshape2::melt(ndvi_AK10000, measure = c("ndvi1982", "ndvi1998", "ndvi2013")) %>% 
-  ggplot(aes(x = lng, y = lat, col = value )) + 
-  geom_point(size = .1) +
-  labs(col = "ndvi") +
-  facet_wrap(~ gsub("ndvi", "", variable), ncol = 3) +
-  scale_color_viridis_c(option = "magma") +
-  labs(x = "Longitude", y = "Latitude")
-

+
reshape2::melt(ndvi_AK10000, measure = c("ndvi1982", "ndvi1998", "ndvi2013")) %>% 
+  ggplot(aes(x = lng, y = lat, col = value )) + 
+  geom_point(size = .1) +
+  labs(col = "ndvi") +
+  facet_wrap(~ gsub("ndvi", "", variable), ncol = 3) +
+  scale_color_viridis_c(option = "magma") +
+  labs(x = "Longitude", y = "Latitude")
+

The following figure shows how Alaska’s three primary land-cover classes are distributed.

-
ndvi_AK10000 %>%  
-ggplot(aes(x = lng, y = lat, col = land)) + 
-  geom_point(size = .1) + 
-  scale_color_viridis_d(direction = -1, end = .9) +
-  labs(y = "Latitude", x = "Longitude", col = "Land cover", fill = "Land cover")
-

+
ndvi_AK10000 %>%  
+ggplot(aes(x = lng, y = lat, col = land)) + 
+  geom_point(size = .1) + 
+  scale_color_viridis_d(direction = -1, end = .9) +
+  labs(y = "Latitude", x = "Longitude", col = "Land cover", fill = "Land cover")
+

Use help("ndvi_AK) to see documentation for these datasets.

@@ -492,23 +511,23 @@

Time-series analysis

maximum likelihood (REML). To do so, we must extract only our NDVI columns as the matrix Y. We’ll do this by matching all column names containing “ndvi” and slicing the data.frame:

-
ndvi.cols <- grep("ndvi", names(ndvi_AK3000), value = TRUE)
-Y <- as.matrix(ndvi_AK3000[, ndvi.cols])
+
ndvi.cols <- grep("ndvi", names(ndvi_AK3000), value = TRUE)
+Y <- as.matrix(ndvi_AK3000[, ndvi.cols])

We also need a 2-column coordinate matrix coords:

-
coords <- as.matrix(ndvi_AK3000[, c("lng", "lat")])
+
coords <- as.matrix(ndvi_AK3000[, c("lng", "lat")])

Y and coords are then passed to fitAR_map() with default settings:

-
ARfit <- fitAR_map(Y = Y, coords = coords)
+
ARfit <- fitAR_map(Y = Y, coords = coords)

Coefficient estimates can be obtained from ARfit with coefficients(). The first column is the estimate of \(\beta_0\), \(\hat{\beta_0}\), and the second is \(\hat{\beta_1}\).

-
head(coefficients(ARfit))
-##      (Intercept)            t
-## [1,]    1.703446  0.021923615
-## [2,]    7.980618  0.030464243
-## [3,]    8.146784  0.021130056
-## [4,]    6.883592  0.002582853
-## [5,]   10.081968  0.026466877
-## [6,]    7.628119 -0.010395964
+
head(coefficients(ARfit))
+##      (Intercept)            t
+## [1,]    1.703446  0.021923615
+## [2,]    7.980618  0.030464243
+## [3,]    8.146784  0.021130056
+## [4,]    6.883592  0.002582853
+## [5,]   10.081968  0.026466877
+## [6,]    7.628119 -0.010395964

These time-series analyses calculate the time trend in the raw NDVI data. In most situations it makes sense to ask if there are time trends in the relative NDVI values, that is, changes in NDVI relative to the @@ -520,22 +539,22 @@

Time-series analysis

ARfit$coefficients, and since the coefficients for the trend are in the column of the coefficients matrix named t, the scaling is performed as

-
ARfit$coefficients[, "t"] <- ARfit$coefficients[,"t"]/rowMeans(ndvi_AK3000[, ndvi.cols])
-ndvi_AK3000$AR_coef <- coefficients(ARfit)[, "t"] # save time trend coefficient
+
ARfit$coefficients[, "t"] <- ARfit$coefficients[,"t"]/rowMeans(ndvi_AK3000[, ndvi.cols])
+ndvi_AK3000$AR_coef <- coefficients(ARfit)[, "t"] # save time trend coefficient

These scaled values of the time trend are then stored in the ndvi_AK3000 data frame.

Below is an image of the estimated coefficients (pre-calculated and scaled) for the full ndvi_AK10000. From this, it appears that northern latitudes may be greening faster than more southern latitudes.

-
ndvi_AK10000 %>% 
-  ggplot(aes(x = lng, y = lat, col = AR_coef)) + 
-  geom_point(size = .1) + 
-  scale_color_gradient2(high = "red", low = "blue", 
-                        mid = "grey90", midpoint = 0) + 
-  guides(fill = "none") + 
-  labs(y = "Latitude", x = "Longitude", col = expression(beta[1]))
-

+
ndvi_AK10000 %>% 
+  ggplot(aes(x = lng, y = lat, col = AR_coef)) + 
+  geom_point(size = .1) + 
+  scale_color_gradient2(high = "red", low = "blue", 
+                        mid = "grey90", midpoint = 0) + 
+  guides(fill = "none") + 
+  labs(y = "Latitude", x = "Longitude", col = expression(beta[1]))
+

fitAR_map and its conditional least-squares counterpart, fitCLS_map, are wrappers for the functions fitAR and fitCLS which conduct individual time @@ -556,7 +575,7 @@

Distance

The first step is to calculate the distances among pixels as a distance matrix D. Here, we’ll calculate relative distances with distm_scaled() from our coordinate matrix.

-
D <- distm_scaled(coords)
+
D <- distm_scaled(coords)

distm_scaled() scales distances across the spatial domain so that the greatest distance between two pixels is 1. Note that because distm_scaled() uses @@ -580,13 +599,13 @@

Covariance

that the covariance structure among pixels is given by \(\Sigma = \eta I + (1-\eta)V\) where \(I\) is the identity matrix.

If we know the range parameter \(r\), we can calculate V from D with covar_exp():

-
r <- 0.1
-V <- covar_exp(D, r)
+
r <- 0.1
+V <- covar_exp(D, r)

And we could add a known nugget to V to obtain Sigma:

-
nugget <- 0.2 
-I <- diag(nrow(V)) # identity matrix
-Sigma <- nugget*I + (1-nugget)*V
+
nugget <- 0.2 
+I <- diag(nrow(V)) # identity matrix
+Sigma <- nugget*I + (1-nugget)*V

See ?covar_exp() for a description of the covariance functions provided by remotePARTS and for more information regarding their use.

@@ -623,35 +642,35 @@

Known parameters

V. This means that even with only the 3000-pixel subset, it will take a few minutes to finish the computations on most computers.

-
GLS.0 <- fitGLS(formula = AR_coef ~ 0 + land, data = ndvi_AK3000, V = V, nugget = nugget)
+
GLS.0 <- fitGLS(formula = AR_coef ~ 0 + land, data = ndvi_AK3000, V = V, nugget = nugget)

Note also that fitGLS adds the nugget to V internally. If we wanted to do this ourselves, we could pass the covariance matrix Sigma which already contains a nugget component and then set the nugget argument of fitGLS to 0:

-
fitGLS(formula = AR_coef ~ 0 + land, data = ndvi_AK3000, V = Sigma, nugget = 0) # equivalent
+
fitGLS(formula = AR_coef ~ 0 + land, data = ndvi_AK3000, V = Sigma, nugget = 0) # equivalent

The estimates for our land class effects can be extracted with coefficients().

-
coefficients(GLS.0)
-## landShrubland   landSavanna landGrassland 
-##  0.0008646565  0.0002554862  0.0011294576
+
coefficients(GLS.0)
+## landShrubland   landSavanna landGrassland 
+##  0.0008646565  0.0002554862  0.0011294576

The full model fit is given by

-
GLS.0
-## 
-## Call:
-## fitGLS(formula = AR_coef ~ 0 + land, data = ndvi_AK3000, V = V, 
-##     nugget = nugget)
-## 
-## t-tests:
-##                     Est t.stat pval.t
-## landShrubland 0.0008647 0.5065 0.6126
-## landSavanna   0.0002555 0.1493 0.8813
-## landGrassland 0.0011295 0.6648 0.5062
-## 
-## F-test:
-##                model df_F    SSE       MSE logLik Fstat  pval_F
-## 1 AR_coef ~ 0 + land    2 0.1128 3.764e-05  12808 4.461 0.01163
-## 2        AR_coef ~ 1 2997 0.1131 3.775e-05  12802    NA      NA
+
GLS.0
+## 
+## Call:
+## fitGLS(formula = AR_coef ~ 0 + land, data = ndvi_AK3000, V = V, 
+##     nugget = nugget)
+## 
+## t-tests:
+##                     Est t.stat pval.t
+## landShrubland 0.0008647 0.5065 0.6126
+## landSavanna   0.0002555 0.1493 0.8813
+## landGrassland 0.0011295 0.6648 0.5062
+## 
+## F-test:
+##                model df_F    SSE       MSE logLik Fstat  pval_F
+## 1 AR_coef ~ 0 + land    2 0.1128 3.764e-05  12808 4.461 0.01163
+## 2        AR_coef ~ 1 2997 0.1131 3.775e-05  12802    NA      NA

Note that, although none of the three land-cover classes shows a statistically significant time trend in (proportional) NDVI, the F-test shows that there is a statistically significant difference among @@ -687,12 +706,11 @@

Spatial parameters

start the search for the optimal range parameter at 0.1. For this example, we will also specify fit.n = 3000, which ensures that all pixels are used to estimate spatial parameters.

-
corfit <- fitCor(resids = residuals(ARfit), coords = coords, 
-                covar_FUN = "covar_exp", start = list(range = 0.1), 
-                fit.n = 3000)
-(range.opt = corfit$spcor)
-##     range 
-## 0.1083729
+
corfit <- fitCor(resids = residuals(ARfit), coords = coords, covar_FUN = "covar_exp", 
+                 start = list(range = 0.1), fit.n = 3000)
+(range.opt = corfit$spcor)
+##     range 
+## 0.1083729

By default, fitCor() uses distm_scaled() to calculate distances from the coordinate matrix, but any function that returns a distance matrix can be specified with the @@ -701,10 +719,9 @@

Spatial parameters

instead use distm_km() to calculate distance in km instead of relative distances, we would need to scale our starting range parameter by the maximum distance in km of our map:

-
max.dist <- max(distm_km(coords))
-corfit.km <- fitCor(resids = residuals(ARfit), coords = coords, 
-                   covar_FUN = "covar_exp", start = list(range = max.dist*0.1),
-                   distm_FUN = "distm_km", fit.n = 3000)
+
max.dist <- max(distm_km(coords))
+corfit.km <- fitCor(resids = residuals(ARfit), coords = coords, covar_FUN = "covar_exp", 
+                    start = list(range = max.dist*0.1), distm_FUN = "distm_km", fit.n = 3000)

Note that, depending on the covariance function used, not all parameters will need scaling. For example, covar_exppow() is an exponential-power covariance function and takes a range and shape @@ -712,7 +729,7 @@

Spatial parameters

?fitCor() for more details.

After we’ve obtained our range parameter estimate, we can use it to re-calculate the V matrix:

-
V.opt <- covar_exp(D, range.opt)
+
V.opt <- covar_exp(D, range.opt)

Nugget

@@ -728,20 +745,19 @@

Nugget

F-tests, the default reduced model is the intercept-only model, although it is also possible to specify alternative reduced models as a formula in the formula0 option.

-
GLS.opt <- fitGLS(formula = AR_coef ~ 0 + land, data = ndvi_AK3000, V = V.opt, nugget = NA, 
-                 no.F = FALSE)
-(nug.opt = GLS.opt$nugget)
-## [1] 0.1342314
-coefficients(GLS.opt)
-## landShrubland   landSavanna landGrassland 
-##  0.0009201230  0.0003899361  0.0011467324
+
GLS.opt <- fitGLS(formula = AR_coef ~ 0 + land, data = ndvi_AK3000, V = V.opt, nugget = NA, no.F = FALSE)
+(nug.opt = GLS.opt$nugget)
+## [1] 0.1342314
+coefficients(GLS.opt)
+## landShrubland   landSavanna landGrassland 
+##  0.0009201230  0.0003899361  0.0011467324

Let’s compare our GLS from earlier with this one with optimized parameters:

-
rbind(GLS.0 = c(range = r, nugget = GLS.0$nugget, logLik = GLS.0$logLik, MSE = GLS.0$MSE),
-      GLS.opt = c(range = range.opt, nugget = GLS.opt$nugget, logLik = GLS.opt$logLik, MSE = GLS.opt$MSE))
-##             range    nugget   logLik          MSE
-## GLS.0   0.1000000 0.2000000 12807.91 3.763878e-05
-## GLS.opt 0.1083729 0.1342314 12809.46 5.003165e-05
+
rbind(GLS.0 = c(range = r, nugget = GLS.0$nugget, logLik = GLS.0$logLik, MSE = GLS.0$MSE),
+      GLS.opt = c(range = range.opt, nugget = GLS.opt$nugget, logLik = GLS.opt$logLik, MSE = GLS.opt$MSE))
+##             range    nugget   logLik          MSE
+## GLS.0   0.1000000 0.2000000 12807.91 3.763878e-05
+## GLS.opt 0.1083729 0.1342314 12809.46 5.003165e-05

Note that in this example, logLik for GLS.opt is not functionally different than logLik for GLS.0. This indicates that using @@ -759,17 +775,20 @@

Simultaneous parameter estimation

likelihood of a GLS given the data. This task is computationally slower than optimizing nugget alone with fitGLS() and therefore will take some time to run.

-
fitopt <- fitGLS_opt(formula = AR_coef ~ 0 + land, data = ndvi_AK3000, 
-                       coords = ndvi_AK3000[, c("lng", "lat")], 
-                       covar_FUN = "covar_exp", 
-                       start = c(range = .1, nugget = .2))
-fitopt$opt$par
-##      range     nugget 
-## 0.03660171 0.26859993 
-fitopt$GLS$logLik
-## [1] 13276.15
-fitopt$GLS$MSE
-## [1] 1.720775e-05
+
fitopt <- fitGLS_opt(formula = AR_coef ~ 0 + land, data = ndvi_AK3000, 
+                     coords = ndvi_AK3000[, c("lng", "lat")], 
+                     covar_FUN = "covar_exp", 
+                     start = c(range = .1, nugget = .2),
+                     method = "BFGS", # use BFGS algorightm (see ?stats::optim())
+                     control = list(reltol = 1e-5)  # lower the convergence tolerance (see ?stats::optim()) 
+                     )
+fitopt$opt$par
+#      range     nugget 
+# 0.02497874 0.17914929 
+fitopt$GLS$logLik
+# [1] 12824.77
+fitopt$GLS$MSE
+# [1] 2.475972e-05

Note that, because fitGLS_opt() does not require time series residuals, it is possible to use fitGLS_opt() for statistical problems involving only spatial variables. In other words, @@ -798,19 +817,19 @@

Intercept-only model

If we want to test the hypothesis that “there was a trend in Alaska NDVI Alaska from 1982-2013”, we can regress the AR coefficient on an intercept-only GLS model:

-
(GLS.int <- fitGLS(AR_coef ~ 1, data = ndvi_AK3000, V = V.opt, nugget = nug.opt, no.F = TRUE))
-## 
-## Call:
-## fitGLS(formula = AR_coef ~ 1, data = ndvi_AK3000, V = V.opt, 
-##     nugget = nug.opt, no.F = TRUE)
-## 
-## t-tests:
-##                  Est t.stat pval.t
-## (Intercept) 0.000972 0.4564 0.6481
-## 
-## Model statistics:
-##      SSE       MSE logLik
-## 1 0.1503 5.011e-05  12806
+
(GLS.int <- fitGLS(AR_coef ~ 1, data = ndvi_AK3000, V = V.opt, nugget = nug.opt, no.F = TRUE))
+## 
+## Call:
+## fitGLS(formula = AR_coef ~ 1, data = ndvi_AK3000, V = V.opt, 
+##     nugget = nug.opt, no.F = TRUE)
+## 
+## t-tests:
+##                  Est t.stat pval.t
+## (Intercept) 0.000972 0.4564 0.6481
+## 
+## Model statistics:
+##      SSE       MSE logLik
+## 1 0.1503 5.011e-05  12806

We can see from the t-test that the intercept is not statistically different from zero. In other words, there is no map-level temporal trend in NDVI across the entire data set. We have not performed an @@ -822,22 +841,22 @@

Land-cover effects

If we want to test the hypothesis that “trends in Alaskan NDVI differ by land- cover class”, we can use GLS.opt() from earlier:

-
GLS.opt
-## 
-## Call:
-## fitGLS(formula = AR_coef ~ 0 + land, data = ndvi_AK3000, V = V.opt, 
-##     nugget = NA, no.F = FALSE)
-## 
-## t-tests:
-##                     Est t.stat pval.t
-## landShrubland 0.0009201 0.4306 0.6668
-## landSavanna   0.0003899 0.1822 0.8554
-## landGrassland 0.0011467 0.5385 0.5903
-## 
-## F-test:
-##                model df_F    SSE       MSE logLik Fstat  pval_F
-## 1 AR_coef ~ 0 + land    2 0.1499 5.003e-05  12809 3.263 0.03841
-## 2        AR_coef ~ 1 2997 0.1503 5.014e-05  12805    NA      NA
+
GLS.opt
+## 
+## Call:
+## fitGLS(formula = AR_coef ~ 0 + land, data = ndvi_AK3000, V = V.opt, 
+##     nugget = NA, no.F = FALSE)
+## 
+## t-tests:
+##                     Est t.stat pval.t
+## landShrubland 0.0009201 0.4306 0.6668
+## landSavanna   0.0003899 0.1822 0.8554
+## landGrassland 0.0011467 0.5385 0.5903
+## 
+## F-test:
+##                model df_F    SSE       MSE logLik Fstat  pval_F
+## 1 AR_coef ~ 0 + land    2 0.1499 5.003e-05  12809 3.263 0.03841
+## 2        AR_coef ~ 1 2997 0.1503 5.014e-05  12805    NA      NA

The t-tests show that the trend in NDVI, for all land-cover classes, was not statistically different from zero, meaning that NDVI did not show a statistically significant trend in any land-cover class. The @@ -851,22 +870,21 @@

Latitude effects

Finally, to test the hypothesis that “temporal trends in NDVI differ with latitude”, we can regress the AR coefficient on latitude in our GLS model:

-
(GLS.lat <- fitGLS(AR_coef ~ 1 + lat, data = ndvi_AK3000, V = V.opt, nugget = nug.opt,
-                   no.F = FALSE))
-## 
-## Call:
-## fitGLS(formula = AR_coef ~ 1 + lat, data = ndvi_AK3000, V = V.opt, 
-##     nugget = nug.opt, no.F = FALSE)
-## 
-## t-tests:
-##                    Est   t.stat pval.t
-## (Intercept) -8.617e-04 -0.04362 0.9652
-## lat          2.986e-05  0.09337 0.9256
-## 
-## F-test:
-##               model df_F    SSE       MSE logLik    Fstat pval_F
-## 1 AR_coef ~ 1 + lat    1 0.1503 5.012e-05  12806 0.008719 0.9256
-## 2       AR_coef ~ 1 2998 0.1503 5.012e-05  12806       NA     NA
+
(GLS.lat <- fitGLS(AR_coef ~ 1 + lat, data = ndvi_AK3000, V = V.opt, nugget = nug.opt, no.F = FALSE))
+## 
+## Call:
+## fitGLS(formula = AR_coef ~ 1 + lat, data = ndvi_AK3000, V = V.opt, 
+##     nugget = nug.opt, no.F = FALSE)
+## 
+## t-tests:
+##                    Est   t.stat pval.t
+## (Intercept) -8.617e-04 -0.04362 0.9652
+## lat          2.986e-05  0.09337 0.9256
+## 
+## F-test:
+##               model df_F    SSE       MSE logLik    Fstat pval_F
+## 1 AR_coef ~ 1 + lat    1 0.1503 5.012e-05  12806 0.008719 0.9256
+## 2       AR_coef ~ 1 2998 0.1503 5.012e-05  12806       NA     NA

The t-tests show that temporal trends in NDVI did not differ with latitude. Note that the p-value from the F-test is equivalent to that of the t-test p-value for effect of latitude.

@@ -916,7 +934,7 @@

Partitioned GLS

set so you don’t have to. These are in the AR_coef column of ndvi_AK10000. However, we used a complete data set, so you will need to remove rare land-cover classes.

-
df = ndvi_AK10000
+
df = ndvi_AK10000

Step (1) is to divide pixels up into partitions, which is done with the function sample_parition(). Passing sample_partitions, the number of pixels in our map, and the @@ -926,10 +944,9 @@

Partitioned GLS

Each of these 20 samples (partitions) are non-overlapping, containing no repeats. Setting npart = NA will automatically give the maximum number of partitions possible (i.e., 20).

-
pm <- sample_partitions(npix = nrow(df), partsize = 1500, npart = NA)
-## [1] "calculating npart"
-dim(pm)
-## [1] 1500    6
+
pm <- sample_partitions(npix = nrow(df), partsize = 1500, npart = NA)
+dim(pm)
+## [1] 1500    6

Once we have our partition matrix, fitGLS_parititon() performs steps (2)-(4) of the analyses. The input is similar to fitGLS. For this example, we specify (i) a formula, (ii) @@ -947,37 +964,37 @@

Partitioned GLS

code as an R object partGLS.ndviAK so that you can look at its output without having to execute the function.

The model was fit with this code:

-
partGLS_ndviAK <- fitGLS_partition(formula = AR_coef ~ 0 + land, data = df, 
-                                   partmat = pm, covar_FUN = "covar_exp", 
-                                   covar.pars = list(range = range.opt),
-                                   nugget = nug.opt, ncores = 4)
+
partGLS_ndviAK <- fitGLS_partition(formula = AR_coef ~ 0 + land, data = df, 
+                                   partmat = pm, covar_FUN = "covar_exp", 
+                                   covar.pars = list(range = range.opt),
+                                   nugget = nug.opt, ncores = 4)

and can be loaded with

-
data(partGLS_ndviAK)
+
data(partGLS_ndviAK)

Here are the t-tests, that show that land cover class does not significantly affect NDVI trend:

-
partGLS_ndviAK$overall$t.test
-## $p.t
-##                        Est          SE    t.stat    pval.t
-## landShrubland 0.0013094359 0.001899589 0.6893260 0.4906359
-## landSavanna   0.0004379666 0.001902064 0.2302586 0.8178960
-## landGrassland 0.0016210956 0.001894683 0.8556027 0.3922404
-## 
-## $covar_coef
-##              [,1]         [,2]         [,3]
-## [1,] 3.623978e-06 3.569499e-06 3.561642e-06
-## [2,] 3.566697e-06 3.633439e-06 3.537596e-06
-## [3,] 3.562415e-06 3.540957e-06 3.605232e-06
+
partGLS_ndviAK$overall$t.test
+## $p.t
+##                        Est          SE    t.stat    pval.t
+## landShrubland 0.0013094359 0.001899589 0.6893260 0.4906359
+## landSavanna   0.0004379666 0.001902064 0.2302586 0.8178960
+## landGrassland 0.0016210956 0.001894683 0.8556027 0.3922404
+## 
+## $covar_coef
+##              [,1]         [,2]         [,3]
+## [1,] 3.623978e-06 3.569499e-06 3.561642e-06
+## [2,] 3.566697e-06 3.633439e-06 3.537596e-06
+## [3,] 3.562415e-06 3.540957e-06 3.605232e-06

It is highly recommended that users read the full documentation (?fitGLS_parition()) before using fitGLS_partition to analyze any data.

Here is the p-value for the chisqr test of the partitioned GLS

-
partGLS_ndviAK$overall$pval.chisqr
-## pval.chisqr 
-##       1e-06 
-## attr(,"rankdef.MSR")
-## [1] 0
+
partGLS_ndviAK$overall$pval.chisqr
+## pval.chisqr 
+##       1e-06 
+## attr(,"rankdef.MSR")
+## [1] 0

This again, indicates that the model which includes land cover classes better explains variation in NDVI trends than the intercept-only model. Note that the p-value is much lower than the p-value from the @@ -1014,6 +1031,36 @@

Conclusions

+ +
+ + + + + + + diff --git a/render_vignette.R b/render_vignette.R new file mode 100644 index 0000000..3accd7d --- /dev/null +++ b/render_vignette.R @@ -0,0 +1,4 @@ +rmarkdown::render(input = "vignettes/Alaska.Rmd", + output_format = "html_document", + output_file = "Alaska.html", + output_dir = "docs/") diff --git a/vignettes/Alaska.Rmd b/vignettes/Alaska.Rmd index 562df10..c737a4c 100644 --- a/vignettes/Alaska.Rmd +++ b/vignettes/Alaska.Rmd @@ -253,8 +253,7 @@ this covariance function. ```{r visualize_range, fig.asp = 1, fig.width = 4.5, include = FALSE} curve(covar_exp(x, r = .1), xlab = "distance", ylab = "covar_exp(d, r)") curve(covar_exp(x, r = .2), add = TRUE, col = "red") -legend("topright", legend = c("0.1", "0.2"), title = "r", - col = c("black", "red"), lty = 1) +legend("topright", legend = c("0.1", "0.2"), title = "r", col = c("black", "red"), lty = 1) ``` `V` represents the correlation among points if all variation is accounted for. @@ -267,8 +266,7 @@ $I$ is the identity matrix. nugget <- .3 curve(covar_exp(x, r = .1), xlab = "distance", ylab = "covar_exp(d, r)") curve((1-nugget)*covar_exp(x, r = .1), add = TRUE, col = "red") -legend("topright", legend = c(0, .2), title = expression(eta), - col = c("black", "red"), lty = 1) +legend("topright", legend = c(0, .2), title = expression(eta), col = c("black", "red"), lty = 1) ``` If we know the range parameter $r$, we can calculate `V` from `D` with `covar_exp()`: @@ -381,9 +379,8 @@ at 0.1. For this example, we will also specify `fit.n = 3000`, which ensures that all pixels are used to estimate spatial parameters. ```{r fit_range_parameter} -corfit <- fitCor(resids = residuals(ARfit), coords = coords, - covar_FUN = "covar_exp", start = list(range = 0.1), - fit.n = 3000) +corfit <- fitCor(resids = residuals(ARfit), coords = coords, covar_FUN = "covar_exp", + start = list(range = 0.1), fit.n = 3000) (range.opt = corfit$spcor) ``` @@ -397,9 +394,8 @@ map: ```{r fit_range_km, eval = FALSE} max.dist <- max(distm_km(coords)) -corfit.km <- fitCor(resids = residuals(ARfit), coords = coords, - covar_FUN = "covar_exp", start = list(range = max.dist*0.1), - distm_FUN = "distm_km", fit.n = 3000) +corfit.km <- fitCor(resids = residuals(ARfit), coords = coords, covar_FUN = "covar_exp", + start = list(range = max.dist*0.1), distm_FUN = "distm_km", fit.n = 3000) ``` Note that, depending on the covariance function used, not all parameters will @@ -428,8 +424,7 @@ so that F-tests are calculated. For the F-tests, the default reduced model is th it is also possible to specify alternative reduced models as a formula in the `formula0` option. ```{r optimized_nugget} -GLS.opt <- fitGLS(formula = AR_coef ~ 0 + land, data = ndvi_AK3000, V = V.opt, nugget = NA, - no.F = FALSE) +GLS.opt <- fitGLS(formula = AR_coef ~ 0 + land, data = ndvi_AK3000, V = V.opt, nugget = NA, no.F = FALSE) (nug.opt = GLS.opt$nugget) coefficients(GLS.opt) ``` @@ -528,8 +523,7 @@ Finally, to test the hypothesis that "temporal trends in NDVI differ with latitude", we can regress the AR coefficient on latitude in our GLS model: ```{r fit_GLS_latitude} -(GLS.lat <- fitGLS(AR_coef ~ 1 + lat, data = ndvi_AK3000, V = V.opt, nugget = nug.opt, - no.F = FALSE)) +(GLS.lat <- fitGLS(AR_coef ~ 1 + lat, data = ndvi_AK3000, V = V.opt, nugget = nug.opt, no.F = FALSE)) ``` The t-tests show that temporal trends in NDVI did not differ with latitude.