From 1cf5aecfc9a1f3482ee25bf92654bc63d0d527c4 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Wed, 13 Jul 2022 18:09:04 -0700 Subject: [PATCH 01/60] Updated code for separate PR to deal with coupling. --- tests/testthat/test-epix_slide.R | 24 ++++++++++++++++++++++ tests/testthat/test-methods-epi_archive.R | 25 ----------------------- 2 files changed, 24 insertions(+), 25 deletions(-) create mode 100644 tests/testthat/test-epix_slide.R diff --git a/tests/testthat/test-epix_slide.R b/tests/testthat/test-epix_slide.R new file mode 100644 index 00000000..84057586 --- /dev/null +++ b/tests/testthat/test-epix_slide.R @@ -0,0 +1,24 @@ +# (epi_archive) slide tests +test_that("epix_slide only works on an epi_archive",{ + expect_error(epix_slide(data.frame(x=1))) +}) + +test_that("epix_slide works as intended",{ + x2 <- ea$clone()$DT %>% + filter(geo_value == "ca", version <= as.Date("2020-06-09")) %>% + select(-percent_cli,-case_rate_7d_av) %>% + mutate(binary = 2^(row_number()-1)) %>% + as_epi_archive() + + time_values <- seq(as.Date("2020-06-01"), + as.Date("2020-06-09"), + by = "1 day") + xx <- epix_slide(x = x2, + f = ~ sum(.x$binary), + n = 3, + group_by = geo_value, + ref_time_values = time_values, + new_col_name = '3d_sum_binary') + + # No test here as this is broken +}) \ No newline at end of file diff --git a/tests/testthat/test-methods-epi_archive.R b/tests/testthat/test-methods-epi_archive.R index 452b77e6..91367271 100644 --- a/tests/testthat/test-methods-epi_archive.R +++ b/tests/testthat/test-methods-epi_archive.R @@ -65,28 +65,3 @@ test_that("data.table merging works as intended",{ as_epi_archive(merge(dt1,dt2)) ) }) - -# (epi_archive) slide tests -test_that("epix_slide only works on an epi_archive",{ - expect_error(epix_slide(data.frame(x=1))) -}) - -test_that("epix_slide works as intended",{ - x2 <- ea$clone()$DT %>% - filter(geo_value == "ca", version <= as.Date("2020-06-09")) %>% - select(-percent_cli,-case_rate_7d_av) %>% - mutate(binary = 2^(row_number()-1)) %>% - as_epi_archive() - - time_values <- seq(as.Date("2020-06-01"), - as.Date("2020-06-09"), - by = "1 day") - xx <- epix_slide(x = x2, - f = ~ sum(.x$binary), - n = 3, - group_by = geo_value, - ref_time_values = time_values, - new_col_name = '3d_sum_binary') - - # No test here as this is broken -}) \ No newline at end of file From d730876feaa41f179b73f07c3eb4cd73424dcad6 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Wed, 13 Jul 2022 18:12:02 -0700 Subject: [PATCH 02/60] Removed file that can cause conflicts due to differing branches. --- tests/testthat/test-epix_slide.R | 5 +- tests/testthat/test-methods-epi_archive.R | 67 ----------------------- 2 files changed, 4 insertions(+), 68 deletions(-) delete mode 100644 tests/testthat/test-methods-epi_archive.R diff --git a/tests/testthat/test-epix_slide.R b/tests/testthat/test-epix_slide.R index 84057586..49de23ec 100644 --- a/tests/testthat/test-epix_slide.R +++ b/tests/testthat/test-epix_slide.R @@ -1,4 +1,7 @@ -# (epi_archive) slide tests +library(dplyr) + +ea <- archive_cases_dv_subset$clone() + test_that("epix_slide only works on an epi_archive",{ expect_error(epix_slide(data.frame(x=1))) }) diff --git a/tests/testthat/test-methods-epi_archive.R b/tests/testthat/test-methods-epi_archive.R deleted file mode 100644 index 91367271..00000000 --- a/tests/testthat/test-methods-epi_archive.R +++ /dev/null @@ -1,67 +0,0 @@ -library(dplyr) - -ea <- archive_cases_dv_subset$clone() - -# epix_as_of tests -test_that("epix_as_of behaves identically to as_of method",{ - expect_identical(epix_as_of(ea,max_version = min(ea$DT$version)), - ea$as_of(max_version = min(ea$DT$version))) -}) - -test_that("Errors are thrown due to bad as_of inputs",{ - # max_version cannot be of string class rather than date class - expect_error(ea$as_of("2020-01-01")) - # max_version cannot be later than latest version - expect_error(ea$as_of(as.Date("2025-01-01"))) - # max_version cannot be a vector - expect_error(ea$as_of(c(as.Date("2020-01-01"), as.Date("2020-01-02")))) -}) - -test_that("Warning against max_version being same as edf's max version",{ - expect_warning(ea$as_of(max_version = max(ea$DT$version))) - expect_warning(ea$as_of(max_version = min(ea$DT$version)),NA) -}) - -test_that("as_of properly grabs the data",{ - df_as_of <- ea %>% - epix_as_of(max_version = as.Date("2020-07-01")) %>% - na.omit() %>% - as.data.frame() - - df_filter <- ea$DT %>% - filter(version == as.Date("2020-07-01")) %>% - na.omit() %>% - select(-version) %>% - as.data.frame() - - expect_equal(df_as_of[1:4],df_filter) -}) - -# epix_merge tests -test_that("epix_merge requires second argument to be a data.table or - epi_archive",{ - expect_error(epix_merge(ea,data.frame(x=1))) -}) - -test_that("data.table merging is utilized if second argument is a data.table",{ - dt1 <- select(ea$DT , -case_rate_7d_av) - ea1 <- as_epi_archive(dt1) - dt2 <- select(ea$DT , -percent_cli) - - expect_identical( - epix_merge(ea1,dt2), - merge(dt1,dt2) - ) -}) - -test_that("data.table merging works as intended",{ - ea <- archive_cases_dv_subset$clone() - dt1 <- select(ea$DT , -case_rate_7d_av) - ea1 <- as_epi_archive(dt1) - dt2 <- select(ea$DT , -percent_cli) - - expect_identical( - as_epi_archive(ea$DT), - as_epi_archive(merge(dt1,dt2)) - ) -}) From 2d96c5a26aec23308d2bd1196adfeab822459d7f Mon Sep 17 00:00:00 2001 From: kenmawer Date: Wed, 13 Jul 2022 19:37:38 -0700 Subject: [PATCH 03/60] Updated tests. --- R/archive.R | 4 ++-- tests/testthat/test-epix_slide.R | 13 ++++++++++++- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/R/archive.R b/R/archive.R index e81315a7..67d202fc 100644 --- a/R/archive.R +++ b/R/archive.R @@ -301,8 +301,8 @@ epi_archive = } # If a custom time step is specified, then redefine units - before_num = n-1 - if (!missing(time_step)) before_num = time_step(n-1) + before_num = n + if (!missing(time_step)) before_num = time_step(n) # What to group by? If missing, set according to internal keys if (missing(group_by)) { diff --git a/tests/testthat/test-epix_slide.R b/tests/testthat/test-epix_slide.R index 49de23ec..2e611db6 100644 --- a/tests/testthat/test-epix_slide.R +++ b/tests/testthat/test-epix_slide.R @@ -21,7 +21,18 @@ test_that("epix_slide works as intended",{ n = 3, group_by = geo_value, ref_time_values = time_values, - new_col_name = '3d_sum_binary') + new_col_name = 'sum_binary') # No test here as this is broken + + xx2 <- x2$DT %>% + filter(time_value + 1 == version) %>% + mutate(sum_binary = lag(binary) + lag(binary,2) + lag(binary,3)) %>% + select(-version,-binary) %>% + as_tibble() + + xx2 <- tail(xx2,-2) + xx2[1,3] <- 1 + + expect_identical(xx,xx2) }) \ No newline at end of file From 4c45aa5f736c50c16c01168b4a38e4ee7df94a39 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Wed, 13 Jul 2022 19:37:46 -0700 Subject: [PATCH 04/60] Fixed a typo. --- R/methods-epi_archive.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R index eb1544f1..bb60f18e 100644 --- a/R/methods-epi_archive.R +++ b/R/methods-epi_archive.R @@ -191,7 +191,7 @@ epix_merge = function(x, y, ..., locf = TRUE, nan = NA) { #' # 0 day which has no results, for 2020-06-01 #' # 1 day, for 2020-06-02 #' # 2 days, for the rest of the results -#' # never 3 days dur to data latency +#' # never 3 days due to data latency #' #' time_values <- seq(as.Date("2020-06-01"), #' as.Date("2020-06-15"), From f06b7feee67d484b531fc32e5888f446fffda7a1 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Wed, 13 Jul 2022 19:48:58 -0700 Subject: [PATCH 05/60] Still needs work. --- tests/testthat/test-epix_slide.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-epix_slide.R b/tests/testthat/test-epix_slide.R index 2e611db6..dfb862e2 100644 --- a/tests/testthat/test-epix_slide.R +++ b/tests/testthat/test-epix_slide.R @@ -31,8 +31,9 @@ test_that("epix_slide works as intended",{ select(-version,-binary) %>% as_tibble() - xx2 <- tail(xx2,-2) + xx2 <- tail(xx2,-1) xx2[1,3] <- 1 + xx2[2,3] <- 33 expect_identical(xx,xx2) }) \ No newline at end of file From 86832491207e012b07f75c7bf5e7a9f1ffc5f1ef Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 15 Jul 2022 11:16:17 -0700 Subject: [PATCH 06/60] Made updates as to be able to check after misunderstandings. --- R/archive.R | 4 ++-- tests/testthat/test-epix_slide.R | 16 +--------------- 2 files changed, 3 insertions(+), 17 deletions(-) diff --git a/R/archive.R b/R/archive.R index 67d202fc..e81315a7 100644 --- a/R/archive.R +++ b/R/archive.R @@ -301,8 +301,8 @@ epi_archive = } # If a custom time step is specified, then redefine units - before_num = n - if (!missing(time_step)) before_num = time_step(n) + before_num = n-1 + if (!missing(time_step)) before_num = time_step(n-1) # What to group by? If missing, set according to internal keys if (missing(group_by)) { diff --git a/tests/testthat/test-epix_slide.R b/tests/testthat/test-epix_slide.R index dfb862e2..fb659b18 100644 --- a/tests/testthat/test-epix_slide.R +++ b/tests/testthat/test-epix_slide.R @@ -18,22 +18,8 @@ test_that("epix_slide works as intended",{ by = "1 day") xx <- epix_slide(x = x2, f = ~ sum(.x$binary), - n = 3, + n = 5, group_by = geo_value, ref_time_values = time_values, new_col_name = 'sum_binary') - - # No test here as this is broken - - xx2 <- x2$DT %>% - filter(time_value + 1 == version) %>% - mutate(sum_binary = lag(binary) + lag(binary,2) + lag(binary,3)) %>% - select(-version,-binary) %>% - as_tibble() - - xx2 <- tail(xx2,-1) - xx2[1,3] <- 1 - xx2[2,3] <- 33 - - expect_identical(xx,xx2) }) \ No newline at end of file From 3f42e60ca760cc4764c6ff598e73e34e5f198952 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 15 Jul 2022 12:28:15 -0700 Subject: [PATCH 07/60] Added spacing. --- tests/testthat/test-epix_slide.R | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/testthat/test-epix_slide.R b/tests/testthat/test-epix_slide.R index fb659b18..5d65bfc8 100644 --- a/tests/testthat/test-epix_slide.R +++ b/tests/testthat/test-epix_slide.R @@ -16,6 +16,7 @@ test_that("epix_slide works as intended",{ time_values <- seq(as.Date("2020-06-01"), as.Date("2020-06-09"), by = "1 day") + xx <- epix_slide(x = x2, f = ~ sum(.x$binary), n = 5, From 08b22e5d11c10d89e9a9832833a3d200b253d67b Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 15 Jul 2022 13:53:03 -0700 Subject: [PATCH 08/60] Fixed a typo. --- man/epix_slide.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/epix_slide.Rd b/man/epix_slide.Rd index b6f7a323..24f2114f 100644 --- a/man/epix_slide.Rd +++ b/man/epix_slide.Rd @@ -131,7 +131,7 @@ is equivalent to: # 0 day which has no results, for 2020-06-01 # 1 day, for 2020-06-02 # 2 days, for the rest of the results -# never 3 days dur to data latency +# never 3 days due to data latency time_values <- seq(as.Date("2020-06-01"), as.Date("2020-06-15"), From d5a402c793bc05af21c676b9a94f47a7826a93a0 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 15 Jul 2022 13:53:11 -0700 Subject: [PATCH 09/60] Added a test that should be granted. --- tests/testthat/test-epix_slide.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/testthat/test-epix_slide.R b/tests/testthat/test-epix_slide.R index 5d65bfc8..e848a0ed 100644 --- a/tests/testthat/test-epix_slide.R +++ b/tests/testthat/test-epix_slide.R @@ -23,4 +23,6 @@ test_that("epix_slide works as intended",{ group_by = geo_value, ref_time_values = time_values, new_col_name = 'sum_binary') + + expect_equal(colnames(xx)[2], "version") }) \ No newline at end of file From dfc3566fa151733c619000cf7f56c82eefff3488 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 15 Jul 2022 14:16:11 -0700 Subject: [PATCH 10/60] Finally fixed epix_slide to work with the right value. --- R/archive.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/archive.R b/R/archive.R index e81315a7..78f8fe7c 100644 --- a/R/archive.R +++ b/R/archive.R @@ -324,7 +324,7 @@ epi_archive = # Computation for one group, one time value comp_one_grp = function(.data_group, f, ..., - time_value, + version, key_vars, new_col) { # Carry out the specified computation @@ -370,7 +370,7 @@ epi_archive = # Note that we've already recycled comp value to make size stable, # so tibble() will just recycle time value appropriately - return(tibble::tibble(time_value = time_value, + return(tibble::tibble(version = version, !!new_col := comp_value)) } @@ -384,7 +384,7 @@ epi_archive = dplyr::group_by(!!!group_by) %>% dplyr::group_modify(comp_one_grp, f = f, ..., - time_value = t, + version = t, key_vars = key_vars, new_col = new_col, .keep = TRUE) %>% @@ -412,7 +412,7 @@ epi_archive = dplyr::group_by(!!!group_by) %>% dplyr::group_modify(comp_one_grp, f = f, quo = quo, - time_value = t, + version = t, key_vars = key_vars, new_col = new_col, .keep = TRUE) %>% From 62ce720a57bdeef65b42b8cff59116445c1cbc8c Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 15 Jul 2022 15:03:22 -0700 Subject: [PATCH 11/60] Updated data for testing --- tests/testthat/test-epix_slide.R | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-epix_slide.R b/tests/testthat/test-epix_slide.R index e848a0ed..230bf999 100644 --- a/tests/testthat/test-epix_slide.R +++ b/tests/testthat/test-epix_slide.R @@ -10,19 +10,29 @@ test_that("epix_slide works as intended",{ x2 <- ea$clone()$DT %>% filter(geo_value == "ca", version <= as.Date("2020-06-09")) %>% select(-percent_cli,-case_rate_7d_av) %>% - mutate(binary = 2^(row_number()-1)) %>% + mutate(binary = 2^(row_number())) %>% as_epi_archive() time_values <- seq(as.Date("2020-06-01"), as.Date("2020-06-09"), by = "1 day") - xx <- epix_slide(x = x2, + xx1 <- epix_slide(x = x2, f = ~ sum(.x$binary), n = 5, group_by = geo_value, ref_time_values = time_values, new_col_name = 'sum_binary') - expect_equal(colnames(xx)[2], "version") + xx2 <- tibble(geo_value = rep("ca",7), + version = as.Date("2020-06-01") + 1:7, + sum_binary = c(2^1, + 2^6+2^1, + 2^11+2^6+2^1, + 2^16+2^11+2^6+2^1, + 2^19+2^16+2^12+2^7, + 2^21+2^19+2^16+2^13, + 2^22+2^21+2^19+2^17)) + + expect_identical(xx1,xx2) }) \ No newline at end of file From 5ee25e6d5f7feb0d42fb27333098de549c3e6aac Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 15 Jul 2022 15:13:39 -0700 Subject: [PATCH 12/60] Still needs ref_time_values refactoring. --- tests/testthat/test-epix_slide.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-epix_slide.R b/tests/testthat/test-epix_slide.R index 230bf999..baf1d858 100644 --- a/tests/testthat/test-epix_slide.R +++ b/tests/testthat/test-epix_slide.R @@ -35,4 +35,4 @@ test_that("epix_slide works as intended",{ 2^22+2^21+2^19+2^17)) expect_identical(xx1,xx2) -}) \ No newline at end of file +}) From ee9e9f66878ec65f9aee39cc0a93809c693469ea Mon Sep 17 00:00:00 2001 From: kenmawer Date: Mon, 18 Jul 2022 11:43:28 -0700 Subject: [PATCH 13/60] Refactored `n` to `max_version_gap` and addressed a typo. --- R/archive.R | 6 +++--- R/methods-epi_archive.R | 13 +++++++------ man/epi_archive.Rd | 2 +- man/epix_slide.Rd | 15 ++++++++------- tests/testthat/test-epix_slide.R | 2 +- 5 files changed, 20 insertions(+), 18 deletions(-) diff --git a/R/archive.R b/R/archive.R index 78f8fe7c..d0c59be7 100644 --- a/R/archive.R +++ b/R/archive.R @@ -286,7 +286,7 @@ epi_archive = #' details. #' @importFrom data.table key #' @importFrom rlang !! !!! enquo enquos is_quosure sym syms - slide = function(f, ..., n = 7, group_by, ref_time_values, + slide = function(f, ..., max_version_gap = 7, group_by, ref_time_values, time_step, new_col_name = "slide_value", as_list_col = FALSE, names_sep = "_", all_rows = FALSE) { @@ -301,8 +301,8 @@ epi_archive = } # If a custom time step is specified, then redefine units - before_num = n-1 - if (!missing(time_step)) before_num = time_step(n-1) + before_num = max_version_gap-1 + if (!missing(time_step)) before_num = time_step(max_version_gap-1) # What to group by? If missing, set according to internal keys if (missing(group_by)) { diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R index bb60f18e..7203e197 100644 --- a/R/methods-epi_archive.R +++ b/R/methods-epi_archive.R @@ -113,7 +113,8 @@ epix_merge = function(x, y, ..., locf = TRUE, nan = NA) { #' via `f`. Alternatively, if `f` is missing, then the current argument is #' interpreted as an expression for tidy evaluation. #' @param n Number of time steps to use in the running window. For example, if -#' `n = 7`, and one time step is one day, then to produce a value on January 7 +#' `max_version_gap = 7`, and one time step is one day, then to produce a +#' value on January 7 #' we apply the given function or formula to data in between January 1 and #' 7. Default is 7. #' @param group_by The variable(s) to group by before slide computation. If @@ -176,11 +177,11 @@ epix_merge = function(x, y, ..., locf = TRUE, nan = NA) { #' Finally, this is simply a wrapper around the `slide()` method of the #' `epi_archive` class, so if `x` is an `epi_archive` object, then: #' ``` -#' epix_slide(x, new_var = comp(old_var), n = 120) +#' epix_slide(x, new_var = comp(old_var), max_version_gap = 120) #' ``` #' is equivalent to: #' ``` -#' x$slide(x, new_var = comp(old_var), n = 120) +#' x$slide(new_var = comp(old_var), max_version_gap = 120) #' ``` #' #' @importFrom rlang enquo @@ -198,15 +199,15 @@ epix_merge = function(x, y, ..., locf = TRUE, nan = NA) { #' by = "1 day") #' epix_slide(x = archive_cases_dv_subset, #' f = ~ mean(.x$case_rate), -#' n = 3, +#' max_version_gap = 3, #' group_by = geo_value, #' ref_time_values = time_values, #' new_col_name = 'case_rate_3d_av') -epix_slide = function(x, f, ..., n = 7, group_by, ref_time_values, +epix_slide = function(x, f, ..., max_version_gap = 7, group_by, ref_time_values, time_step, new_col_name = "slide_value", as_list_col = FALSE, names_sep = "_", all_rows = FALSE) { if (!inherits(x, "epi_archive")) Abort("`x` must be of class `epi_archive`.") - return(x$slide(f, ..., n = n, + return(x$slide(f, ..., max_version_gap = max_version_gap, group_by = enquo(group_by), ref_time_values = ref_time_values, time_step = time_step, diff --git a/man/epi_archive.Rd b/man/epi_archive.Rd index 84ac9406..bb3eaaeb 100644 --- a/man/epi_archive.Rd +++ b/man/epi_archive.Rd @@ -193,7 +193,7 @@ details. \if{html}{\out{
}}\preformatted{epi_archive$slide( f, ..., - n = 7, + max_version_gap = 7, group_by, ref_time_values, time_step, diff --git a/man/epix_slide.Rd b/man/epix_slide.Rd index 24f2114f..bc67fdea 100644 --- a/man/epix_slide.Rd +++ b/man/epix_slide.Rd @@ -8,7 +8,7 @@ epix_slide( x, f, ..., - n = 7, + max_version_gap = 7, group_by, ref_time_values, time_step, @@ -34,11 +34,6 @@ sliding window of \code{n} time steps.} via \code{f}. Alternatively, if \code{f} is missing, then the current argument is interpreted as an expression for tidy evaluation.} -\item{n}{Number of time steps to use in the running window. For example, if -\code{n = 7}, and one time step is one day, then to produce a value on January 7 -we apply the given function or formula to data in between January 1 and -7. Default is 7.} - \item{group_by}{The variable(s) to group by before slide computation. If missing, then the keys in the underlying data table, excluding \code{time_value} and \code{version}, will be used for grouping. To omit a grouping entirely, use @@ -72,6 +67,12 @@ from \code{new_col_name} entirely.} combination of grouping variables and unique time values in the underlying data table. Otherwise, there will be one row in the output for each time value in \code{x} that acts as a reference time value. Default is \code{FALSE}.} + +\item{n}{Number of time steps to use in the running window. For example, if +\code{max_version_gap = 7}, and one time step is one day, then to produce a +value on January 7 +we apply the given function or formula to data in between January 1 and +7. Default is 7.} } \value{ A tibble whose columns are: the grouping variables, \code{time_value}, @@ -138,7 +139,7 @@ time_values <- seq(as.Date("2020-06-01"), by = "1 day") epix_slide(x = archive_cases_dv_subset, f = ~ mean(.x$case_rate), - n = 3, + max_version_gap = 3, group_by = geo_value, ref_time_values = time_values, new_col_name = 'case_rate_3d_av') diff --git a/tests/testthat/test-epix_slide.R b/tests/testthat/test-epix_slide.R index baf1d858..95e6522d 100644 --- a/tests/testthat/test-epix_slide.R +++ b/tests/testthat/test-epix_slide.R @@ -19,7 +19,7 @@ test_that("epix_slide works as intended",{ xx1 <- epix_slide(x = x2, f = ~ sum(.x$binary), - n = 5, + max_version_gap = 5, group_by = geo_value, ref_time_values = time_values, new_col_name = 'sum_binary') From ce44a28c7f1a3dd7f182cbd91efa6beab0471ca7 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Mon, 18 Jul 2022 11:54:32 -0700 Subject: [PATCH 14/60] Refactored code. --- man/epix_slide.Rd | 4 ++-- vignettes/archive.Rmd | 9 ++++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/man/epix_slide.Rd b/man/epix_slide.Rd index bc67fdea..dbe4d808 100644 --- a/man/epix_slide.Rd +++ b/man/epix_slide.Rd @@ -118,12 +118,12 @@ version-aware sliding is necessary (as it its purpose). Finally, this is simply a wrapper around the \code{slide()} method of the \code{epi_archive} class, so if \code{x} is an \code{epi_archive} object, then: -\if{html}{\out{
}}\preformatted{epix_slide(x, new_var = comp(old_var), n = 120) +\if{html}{\out{
}}\preformatted{epix_slide(x, new_var = comp(old_var), max_version_gap = 120) }\if{html}{\out{
}} is equivalent to: -\if{html}{\out{
}}\preformatted{x$slide(x, new_var = comp(old_var), n = 120) +\if{html}{\out{
}}\preformatted{x$slide(new_var = comp(old_var), max_version_gap = 120) }\if{html}{\out{
}} } \examples{ diff --git a/vignettes/archive.Rmd b/vignettes/archive.Rmd index e729b32d..cecee288 100644 --- a/vignettes/archive.Rmd +++ b/vignettes/archive.Rmd @@ -342,7 +342,8 @@ fc_time_values <- seq(as.Date("2020-08-01"), as.Date("2021-12-01"), by = "1 month") -z <- epix_slide(x, fc = prob_arx(x = percent_cli, y = case_rate_7d_av), n = 120, +z <- epix_slide(x, fc = prob_arx(x = percent_cli, y = case_rate_7d_av), + max_version_gap = 120, ref_time_values = fc_time_values, group_by = geo_value) head(z, 10) @@ -372,14 +373,16 @@ x_latest <- epix_as_of(x, max_version = max(x$DT$version)) k_week_ahead <- function(x, ahead = 7, as_of = TRUE) { if (as_of) { x %>% - epix_slide(fc = prob_arx(percent_cli, case_rate_7d_av, ahead = ahead), n = 120, + epix_slide(fc = prob_arx(percent_cli, case_rate_7d_av, ahead = ahead), + max_version_gap = 120, ref_time_values = fc_time_values, group_by = geo_value) %>% mutate(target_date = time_value + ahead, as_of = TRUE) } else { x_latest %>% group_by(geo_value) %>% - epi_slide(fc = prob_arx(percent_cli, case_rate_7d_av, ahead = ahead), n = 120, + epi_slide(fc = prob_arx(percent_cli, case_rate_7d_av, ahead = ahead), + max_version_gap = 120, ref_time_values = fc_time_values) %>% mutate(target_date = time_value + ahead, as_of = FALSE) } From 2a0c97e99e3e8521b11d5d89d19f0fed61934dbb Mon Sep 17 00:00:00 2001 From: kenmawer Date: Mon, 18 Jul 2022 11:56:19 -0700 Subject: [PATCH 15/60] Changed outdated name. --- R/methods-epi_archive.R | 3 ++- man/epix_slide.Rd | 13 +++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R index 7203e197..a60a0465 100644 --- a/R/methods-epi_archive.R +++ b/R/methods-epi_archive.R @@ -112,7 +112,8 @@ epix_merge = function(x, y, ..., locf = TRUE, nan = NA) { #' @param ... Additional arguments to pass to the function or formula specified #' via `f`. Alternatively, if `f` is missing, then the current argument is #' interpreted as an expression for tidy evaluation. -#' @param n Number of time steps to use in the running window. For example, if +#' @param max_version_gap Number of time steps to use in the running window. +#' For example, if #' `max_version_gap = 7`, and one time step is one day, then to produce a #' value on January 7 #' we apply the given function or formula to data in between January 1 and diff --git a/man/epix_slide.Rd b/man/epix_slide.Rd index dbe4d808..8c968264 100644 --- a/man/epix_slide.Rd +++ b/man/epix_slide.Rd @@ -34,6 +34,13 @@ sliding window of \code{n} time steps.} via \code{f}. Alternatively, if \code{f} is missing, then the current argument is interpreted as an expression for tidy evaluation.} +\item{max_version_gap}{Number of time steps to use in the running window. +For example, if +\code{max_version_gap = 7}, and one time step is one day, then to produce a +value on January 7 +we apply the given function or formula to data in between January 1 and +7. Default is 7.} + \item{group_by}{The variable(s) to group by before slide computation. If missing, then the keys in the underlying data table, excluding \code{time_value} and \code{version}, will be used for grouping. To omit a grouping entirely, use @@ -67,12 +74,6 @@ from \code{new_col_name} entirely.} combination of grouping variables and unique time values in the underlying data table. Otherwise, there will be one row in the output for each time value in \code{x} that acts as a reference time value. Default is \code{FALSE}.} - -\item{n}{Number of time steps to use in the running window. For example, if -\code{max_version_gap = 7}, and one time step is one day, then to produce a -value on January 7 -we apply the given function or formula to data in between January 1 and -7. Default is 7.} } \value{ A tibble whose columns are: the grouping variables, \code{time_value}, From 35f53d4a9f530eaa22d8bcc5c88881611a2a8511 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Mon, 18 Jul 2022 14:11:37 -0700 Subject: [PATCH 16/60] Removed defaults for `slide`. --- R/archive.R | 2 +- R/methods-epi_archive.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/archive.R b/R/archive.R index d0c59be7..096dfc8d 100644 --- a/R/archive.R +++ b/R/archive.R @@ -286,7 +286,7 @@ epi_archive = #' details. #' @importFrom data.table key #' @importFrom rlang !! !!! enquo enquos is_quosure sym syms - slide = function(f, ..., max_version_gap = 7, group_by, ref_time_values, + slide = function(f, ..., max_version_gap, group_by, ref_time_values, time_step, new_col_name = "slide_value", as_list_col = FALSE, names_sep = "_", all_rows = FALSE) { diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R index a60a0465..868ac014 100644 --- a/R/methods-epi_archive.R +++ b/R/methods-epi_archive.R @@ -204,7 +204,7 @@ epix_merge = function(x, y, ..., locf = TRUE, nan = NA) { #' group_by = geo_value, #' ref_time_values = time_values, #' new_col_name = 'case_rate_3d_av') -epix_slide = function(x, f, ..., max_version_gap = 7, group_by, ref_time_values, +epix_slide = function(x, f, ..., max_version_gap, group_by, ref_time_values, time_step, new_col_name = "slide_value", as_list_col = FALSE, names_sep = "_", all_rows = FALSE) { if (!inherits(x, "epi_archive")) Abort("`x` must be of class `epi_archive`.") From 0258a0339e41f17ebb8478e59f91c6fa64d24473 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Mon, 18 Jul 2022 16:12:42 -0700 Subject: [PATCH 17/60] Finally fixed an annoying bug on `advanced.Rmd`. --- R/methods-epi_archive.R | 3 ++- man/epi_archive.Rd | 2 +- man/epix_slide.Rd | 2 +- vignettes/advanced.Rmd | 12 ++++++------ 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R index 868ac014..1079a598 100644 --- a/R/methods-epi_archive.R +++ b/R/methods-epi_archive.R @@ -208,7 +208,8 @@ epix_slide = function(x, f, ..., max_version_gap, group_by, ref_time_values, time_step, new_col_name = "slide_value", as_list_col = FALSE, names_sep = "_", all_rows = FALSE) { if (!inherits(x, "epi_archive")) Abort("`x` must be of class `epi_archive`.") - return(x$slide(f, ..., max_version_gap = max_version_gap, + return(x$slide(f, ..., + max_version_gap = max_version_gap, group_by = enquo(group_by), ref_time_values = ref_time_values, time_step = time_step, diff --git a/man/epi_archive.Rd b/man/epi_archive.Rd index bb3eaaeb..9e245709 100644 --- a/man/epi_archive.Rd +++ b/man/epi_archive.Rd @@ -193,7 +193,7 @@ details. \if{html}{\out{
}}\preformatted{epi_archive$slide( f, ..., - max_version_gap = 7, + max_version_gap, group_by, ref_time_values, time_step, diff --git a/man/epix_slide.Rd b/man/epix_slide.Rd index 8c968264..5e37b056 100644 --- a/man/epix_slide.Rd +++ b/man/epix_slide.Rd @@ -8,7 +8,7 @@ epix_slide( x, f, ..., - max_version_gap = 7, + max_version_gap, group_by, ref_time_values, time_step, diff --git a/vignettes/advanced.Rmd b/vignettes/advanced.Rmd index a76c5225..2061f86a 100644 --- a/vignettes/advanced.Rmd +++ b/vignettes/advanced.Rmd @@ -61,12 +61,12 @@ df %>% df %>% mutate(version = time_value) %>% as_epi_archive() %>% - epix_slide(x_2dav = mean(x), n = 2, ref_time_values = as.Date("2020-06-02")) + epix_slide(x_2dav = mean(x), max_version_gap = 2, ref_time_values = as.Date("2020-06-02")) df %>% mutate(version = time_value) %>% as_epi_archive() %>% - epix_slide(~ mean(.x$x), n = 2, ref_time_values = as.Date("2020-06-02")) + epix_slide(~ mean(.x$x), max_version_gap = 2, ref_time_values = as.Date("2020-06-02")) ``` When the slide computation returns an atomic vector (rather than a single value) @@ -153,7 +153,7 @@ df %>% as_epi_archive() %>% epix_slide(a = data.frame(x_2dav = mean(x), x_2dma = mad(x)), ref_time_values = as.Date("2020-06-02"), - n = 2, as_list_col = FALSE, names_sep = NULL) + max_version_gap = 2, as_list_col = FALSE, names_sep = NULL) ``` ## Multi-row outputs @@ -352,10 +352,10 @@ fc_time_values <- seq(as.Date("2020-08-01"), k_week_ahead <- function(x, ahead = 7, as_of = TRUE) { if (as_of) { x %>% - epix_slide(fc = prob_arx(percent_cli, case_rate_7d_av, geo_value, time_value, + epix_slide(fc = prob_arx(percent_cli, case_rate_7d_av, geo_value, version, args = prob_arx_args(ahead = ahead)), - n = 120, ref_time_values = fc_time_values) %>% - mutate(target_date = time_value + ahead, as_of = TRUE, + max_version_gap = 120, ref_time_values = fc_time_values) %>% + mutate(target_date = version + ahead, as_of = TRUE, geo_value = fc_geo_value) } else { From c1369c27f70e71b5d16248032f31ea11fd818ba1 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Tue, 19 Jul 2022 09:29:39 -0700 Subject: [PATCH 18/60] Fixed errors; archive vignette still has errors. --- vignettes/advanced.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/advanced.Rmd b/vignettes/advanced.Rmd index 2061f86a..1a3b9385 100644 --- a/vignettes/advanced.Rmd +++ b/vignettes/advanced.Rmd @@ -352,7 +352,7 @@ fc_time_values <- seq(as.Date("2020-08-01"), k_week_ahead <- function(x, ahead = 7, as_of = TRUE) { if (as_of) { x %>% - epix_slide(fc = prob_arx(percent_cli, case_rate_7d_av, geo_value, version, + epix_slide(fc = prob_arx(percent_cli, case_rate_7d_av, geo_value, time_value, args = prob_arx_args(ahead = ahead)), max_version_gap = 120, ref_time_values = fc_time_values) %>% mutate(target_date = version + ahead, as_of = TRUE, From 04302b636cfe4835b9f925b34d5c997697fac4dc Mon Sep 17 00:00:00 2001 From: kenmawer Date: Tue, 19 Jul 2022 16:17:17 -0700 Subject: [PATCH 19/60] Updated datasets as to run properly. --- R/archive.R | 1 + vignettes/archive.Rmd | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/R/archive.R b/R/archive.R index 096dfc8d..13543241 100644 --- a/R/archive.R +++ b/R/archive.R @@ -376,6 +376,7 @@ epi_archive = # If f is not missing, then just go ahead, slide by group if (!missing(f)) { + if (rlang::is_formula(f)) f = rlang::as_function(f) x = purrr::map_dfr(ref_time_values, function(t) { diff --git a/vignettes/archive.Rmd b/vignettes/archive.Rmd index cecee288..98edf498 100644 --- a/vignettes/archive.Rmd +++ b/vignettes/archive.Rmd @@ -255,7 +255,7 @@ head(x$DT) ``` ```{r, echo=FALSE, message=FALSE, warning=FALSE} -x <- archive_cases_dv_subset +x <- as_epi_archive(archive_cases_dv_subset$DT) print(x) head(x$DT) ``` @@ -376,7 +376,7 @@ k_week_ahead <- function(x, ahead = 7, as_of = TRUE) { epix_slide(fc = prob_arx(percent_cli, case_rate_7d_av, ahead = ahead), max_version_gap = 120, ref_time_values = fc_time_values, group_by = geo_value) %>% - mutate(target_date = time_value + ahead, as_of = TRUE) + mutate(target_date = version + ahead, as_of = TRUE) } else { x_latest %>% @@ -384,7 +384,7 @@ k_week_ahead <- function(x, ahead = 7, as_of = TRUE) { epi_slide(fc = prob_arx(percent_cli, case_rate_7d_av, ahead = ahead), max_version_gap = 120, ref_time_values = fc_time_values) %>% - mutate(target_date = time_value + ahead, as_of = FALSE) + mutate(target_date = version + ahead, as_of = FALSE) } } From feb0f47b3265767f256dbc518305631781a00256 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Tue, 19 Jul 2022 17:30:29 -0700 Subject: [PATCH 20/60] 0 errors or warnings! --- vignettes/archive.Rmd | 120 +++++++++++++++++++++++++++--------------- 1 file changed, 79 insertions(+), 41 deletions(-) diff --git a/vignettes/archive.Rmd b/vignettes/archive.Rmd index 98edf498..af5fee8b 100644 --- a/vignettes/archive.Rmd +++ b/vignettes/archive.Rmd @@ -285,52 +285,88 @@ slide vignette to accomodate exogenous variables in the autoregressive model, which is often referred to as an ARX model. ```{r} -prob_arx <- function(x, y, lags = c(0, 7, 14), ahead = 7, min_train_window = 20, - lower_level = 0.05, upper_level = 0.95, symmetrize = TRUE, - intercept = FALSE, nonneg = TRUE) { +library(tidyr) +library(purrr) + +prob_arx_args <- function(lags = c(0, 7, 14), + ahead = 7, + min_train_window = 20, + lower_level = 0.05, + upper_level = 0.95, + symmetrize = TRUE, + intercept = FALSE, + nonneg = TRUE) { + return(list(lags = lags, + ahead = ahead, + min_train_window = min_train_window, + lower_level = lower_level, + upper_level = upper_level, + symmetrize = symmetrize, + intercept = intercept, + nonneg = nonneg)) +} + +prob_arx <- function(x, y, geo_value, time_value, args = prob_arx_args()) { # Return NA if insufficient training data - if (length(y) < min_train_window + max(lags) + ahead) { - return(data.frame(point = NA, lower = NA, upper = NA)) + if (length(y) < args$min_train_window + max(args$lags) + args$ahead) { + return(data.frame(geo_value = unique(geo_value), # Return geo value! + point = NA, lower = NA, upper = NA)) } - # Useful transformations + # Set up x, y, lags list if (!missing(x)) x <- data.frame(x, y) else x <- data.frame(y) - if (!is.list(lags)) lags <- list(lags) - lags = rep(lags, length.out = ncol(x)) + if (!is.list(args$lags)) args$lags <- list(args$lags) + args$lags = rep(args$lags, length.out = ncol(x)) # Build features and response for the AR model, and then fit it - dat <- do.call( - data.frame, - unlist( # Below we loop through and build the lagged features - purrr::map(1:ncol(x), function(i) { - purrr::map(lags[[i]], function(j) lag(x[,i], n = j)) - }), - recursive = FALSE)) - names(dat) = paste0("x", 1:ncol(dat)) - if (intercept) dat$x0 = rep(1, nrow(dat)) - dat$y <- lead(y, n = ahead) - obj <- lm(y ~ . + 0, data = dat) + dat <- + tibble(i = 1:ncol(x), lag = args$lags) %>% + unnest(lag) %>% + mutate(name = paste0("x", 1:nrow(.))) %>% + # One list element for each lagged feature + pmap(function(i, lag, name) { + tibble(geo_value = geo_value, + time_value = time_value + lag, # Shift back + !!name := x[,i]) + }) %>% + # One list element for the response vector + c(list( + tibble(geo_value = geo_value, + time_value = time_value - args$ahead, # Shift forward + y = y))) %>% + # Combine them together into one data frame + reduce(full_join, by = c("geo_value", "time_value")) %>% + arrange(time_value) + if (args$intercept) dat$x0 = rep(1, nrow(dat)) + obj <- lm(y ~ . + 0, data = select(dat, -geo_value, -time_value)) + + # Use LOCF to fill NAs in the latest feature values (do this by geo value) + setDT(dat) # Convert to a data.table object by reference + cols <- setdiff(names(dat), c("geo_value", "time_value")) + dat[, (cols) := nafill(.SD, type = "locf"), .SDcols = cols, by = "geo_value"] - # Use LOCF to fill NAs in the latest feature values, make a prediction - setDT(dat) - setnafill(dat, type = "locf") - point <- predict(obj, newdata = tail(dat, 1)) + # Make predictions + test_time_value = max(time_value) + point <- predict(obj, newdata = dat %>% + dplyr::group_by(geo_value) %>% + dplyr::filter(time_value == test_time_value)) - # Compute a band + # Compute bands r <- residuals(obj) - s <- ifelse(symmetrize, -1, NA) # Should the residuals be symmetrized? - q <- quantile(c(r, s * r), probs = c(lower_level, upper_level), na.rm = TRUE) + s <- ifelse(args$symmetrize, -1, NA) # Should the residuals be symmetrized? + q <- quantile(c(r, s * r), probs = c(args$lower, args$upper), na.rm = TRUE) lower <- point + q[1] upper <- point + q[2] # Clip at zero if we need to, then return - if (nonneg) { - point = max(point, 0) - lower = max(lower, 0) - upper = max(upper, 0) + if (args$nonneg) { + point = pmax(point, 0) + lower = pmax(lower, 0) + upper = pmax(upper, 0) } - return(data.frame(point = point, lower = lower, upper = upper)) + return(data.frame(geo_value = unique(geo_value), # Return geo value! + point = point, lower = lower, upper = upper)) } ``` @@ -342,7 +378,8 @@ fc_time_values <- seq(as.Date("2020-08-01"), as.Date("2021-12-01"), by = "1 month") -z <- epix_slide(x, fc = prob_arx(x = percent_cli, y = case_rate_7d_av), +z <- epix_slide(x, fc = prob_arx(percent_cli, case_rate_7d_av, geo_value, time_value, + args = prob_arx_args(ahead = 7)), max_version_gap = 120, ref_time_values = fc_time_values, group_by = geo_value) @@ -373,21 +410,22 @@ x_latest <- epix_as_of(x, max_version = max(x$DT$version)) k_week_ahead <- function(x, ahead = 7, as_of = TRUE) { if (as_of) { x %>% - epix_slide(fc = prob_arx(percent_cli, case_rate_7d_av, ahead = ahead), - max_version_gap = 120, - ref_time_values = fc_time_values, group_by = geo_value) %>% - mutate(target_date = version + ahead, as_of = TRUE) + epix_slide(fc = prob_arx(percent_cli, case_rate_7d_av, geo_value, time_value, + args = prob_arx_args(ahead = ahead)), + max_version_gap = 120, ref_time_values = fc_time_values) %>% + mutate(target_date = version + ahead, as_of = TRUE, + geo_value = fc_geo_value) } else { x_latest %>% - group_by(geo_value) %>% - epi_slide(fc = prob_arx(percent_cli, case_rate_7d_av, ahead = ahead), - max_version_gap = 120, - ref_time_values = fc_time_values) %>% - mutate(target_date = version + ahead, as_of = FALSE) + epi_slide(fc = prob_arx(percent_cli, case_rate_7d_av, geo_value, time_value, + args = prob_arx_args(ahead = ahead)), + n = 120, ref_time_values = fc_time_values) %>% + mutate(target_date = time_value + ahead, as_of = FALSE) } } + # Generate the forecasts, and bind them together fc <- bind_rows(k_week_ahead(x, ahead = 7, as_of = TRUE), k_week_ahead(x, ahead = 14, as_of = TRUE), From 08a580d6526d86cb57e40526e463e0adea7d4dd5 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Tue, 19 Jul 2022 19:09:21 -0700 Subject: [PATCH 21/60] Refactor of `ref_time_values` still needs fixing! --- R/archive.R | 12 ++++++------ R/methods-epi_archive.R | 8 ++++---- man/epi_archive.Rd | 2 +- man/epix_slide.Rd | 6 +++--- tests/testthat/test-epix_slide.R | 4 ++-- vignettes/advanced.Rmd | 14 +++++++------- vignettes/archive.Rmd | 8 ++++---- 7 files changed, 27 insertions(+), 27 deletions(-) diff --git a/R/archive.R b/R/archive.R index 13543241..d6d49523 100644 --- a/R/archive.R +++ b/R/archive.R @@ -286,17 +286,17 @@ epi_archive = #' details. #' @importFrom data.table key #' @importFrom rlang !! !!! enquo enquos is_quosure sym syms - slide = function(f, ..., max_version_gap, group_by, ref_time_values, + slide = function(f, ..., max_version_gap, group_by, ref_versions, time_step, new_col_name = "slide_value", as_list_col = FALSE, names_sep = "_", all_rows = FALSE) { # If missing, then set ref time values to be everything; else make # sure we intersect with observed time values - if (missing(ref_time_values)) { - ref_time_values = unique(self$DT$time_value) + if (missing(ref_versions)) { + ref_versions = unique(self$DT$time_value) } else { - ref_time_values = ref_time_values[ref_time_values %in% + ref_versions = ref_versions[ref_versions %in% unique(self$DT$time_value)] } @@ -379,7 +379,7 @@ epi_archive = if (rlang::is_formula(f)) f = rlang::as_function(f) - x = purrr::map_dfr(ref_time_values, function(t) { + x = purrr::map_dfr(ref_versions, function(t) { self$as_of(t, min_time_value = t - before_num) %>% tibble::as_tibble() %>% dplyr::group_by(!!!group_by) %>% @@ -407,7 +407,7 @@ epi_archive = f = function(x, quo, ...) rlang::eval_tidy(quo, x) new_col = sym(names(rlang::quos_auto_name(quos))) - x = purrr::map_dfr(ref_time_values, function(t) { + x = purrr::map_dfr(ref_versions, function(t) { self$as_of(t, min_time_value = t - before_num) %>% tibble::as_tibble() %>% dplyr::group_by(!!!group_by) %>% diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R index 63d10ef8..87d9c5b1 100644 --- a/R/methods-epi_archive.R +++ b/R/methods-epi_archive.R @@ -122,7 +122,7 @@ epix_merge = function(x, y, ..., locf = TRUE, nan = NA) { #' missing, then the keys in the underlying data table, excluding `time_value` #' and `version`, will be used for grouping. To omit a grouping entirely, use #' `group_by = NULL`. -#' @param ref_time_values Time values for sliding computations, meaning, each +#' @param ref_versions Time values for sliding computations, meaning, each #' element of this vector serves as the reference time point for one sliding #' window. If missing, then this will be set to all unique time values in the #' underlying data table, by default. @@ -202,16 +202,16 @@ epix_merge = function(x, y, ..., locf = TRUE, nan = NA) { #' f = ~ mean(.x$case_rate_7d_av), #' n = 3, #' group_by = geo_value, -#' ref_time_values = time_values, +#' ref_versions = time_values, #' new_col_name = 'case_rate_3d_av') -epix_slide = function(x, f, ..., max_version_gap, group_by, ref_time_values, +epix_slide = function(x, f, ..., max_version_gap, group_by, ref_versions, time_step, new_col_name = "slide_value", as_list_col = FALSE, names_sep = "_", all_rows = FALSE) { if (!inherits(x, "epi_archive")) Abort("`x` must be of class `epi_archive`.") return(x$slide(f, ..., max_version_gap = max_version_gap, group_by = enquo(group_by), - ref_time_values = ref_time_values, + ref_versions = ref_versions, time_step = time_step, new_col_name = new_col_name, as_list_col = as_list_col, diff --git a/man/epi_archive.Rd b/man/epi_archive.Rd index 9e245709..8b0cfb2a 100644 --- a/man/epi_archive.Rd +++ b/man/epi_archive.Rd @@ -195,7 +195,7 @@ details. ..., max_version_gap, group_by, - ref_time_values, + ref_versions, time_step, new_col_name = "slide_value", as_list_col = FALSE, diff --git a/man/epix_slide.Rd b/man/epix_slide.Rd index e791ab59..78932025 100644 --- a/man/epix_slide.Rd +++ b/man/epix_slide.Rd @@ -10,7 +10,7 @@ epix_slide( ..., max_version_gap, group_by, - ref_time_values, + ref_versions, time_step, new_col_name = "slide_value", as_list_col = FALSE, @@ -46,7 +46,7 @@ missing, then the keys in the underlying data table, excluding \code{time_value} and \code{version}, will be used for grouping. To omit a grouping entirely, use \code{group_by = NULL}.} -\item{ref_time_values}{Time values for sliding computations, meaning, each +\item{ref_versions}{Time values for sliding computations, meaning, each element of this vector serves as the reference time point for one sliding window. If missing, then this will be set to all unique time values in the underlying data table, by default.} @@ -142,6 +142,6 @@ epix_slide(x = archive_cases_dv_subset, f = ~ mean(.x$case_rate_7d_av), n = 3, group_by = geo_value, - ref_time_values = time_values, + ref_versions = time_values, new_col_name = 'case_rate_3d_av') } diff --git a/tests/testthat/test-epix_slide.R b/tests/testthat/test-epix_slide.R index 95e6522d..bef6c32d 100644 --- a/tests/testthat/test-epix_slide.R +++ b/tests/testthat/test-epix_slide.R @@ -13,7 +13,7 @@ test_that("epix_slide works as intended",{ mutate(binary = 2^(row_number())) %>% as_epi_archive() - time_values <- seq(as.Date("2020-06-01"), + versions <- seq(as.Date("2020-06-01"), as.Date("2020-06-09"), by = "1 day") @@ -21,7 +21,7 @@ test_that("epix_slide works as intended",{ f = ~ sum(.x$binary), max_version_gap = 5, group_by = geo_value, - ref_time_values = time_values, + ref_versions = versions, new_col_name = 'sum_binary') xx2 <- tibble(geo_value = rep("ca",7), diff --git a/vignettes/advanced.Rmd b/vignettes/advanced.Rmd index 1a3b9385..f84e194e 100644 --- a/vignettes/advanced.Rmd +++ b/vignettes/advanced.Rmd @@ -15,7 +15,7 @@ ensure the result of a slide operation is *size stable*, meaning, it will return something whose length is the same as the number of appearances of reference time values for the slide computation in the given data frame/table (this defaults to all time values, but can be some given subset when `ref_time_values` -is specified). +or `ref_versions` is specified, respectively). The output of a slide computation should either be an atomic value/vector, or a data frame. This data frame can have multiple columns, multiple rows, or both. @@ -61,12 +61,12 @@ df %>% df %>% mutate(version = time_value) %>% as_epi_archive() %>% - epix_slide(x_2dav = mean(x), max_version_gap = 2, ref_time_values = as.Date("2020-06-02")) + epix_slide(x_2dav = mean(x), max_version_gap = 2, ref_versions = as.Date("2020-06-02")) df %>% mutate(version = time_value) %>% as_epi_archive() %>% - epix_slide(~ mean(.x$x), max_version_gap = 2, ref_time_values = as.Date("2020-06-02")) + epix_slide(~ mean(.x$x), max_version_gap = 2, ref_versions = as.Date("2020-06-02")) ``` When the slide computation returns an atomic vector (rather than a single value) @@ -152,7 +152,7 @@ df %>% mutate(version = time_value) %>% as_epi_archive() %>% epix_slide(a = data.frame(x_2dav = mean(x), x_2dma = mad(x)), - ref_time_values = as.Date("2020-06-02"), + ref_versions = as.Date("2020-06-02"), max_version_gap = 2, as_list_col = FALSE, names_sep = NULL) ``` @@ -344,7 +344,7 @@ data. ```{r, message = FALSE, warning = FALSE, fig.width = 9, fig.height = 6} # Latest snapshot of data, and forecast dates x_latest <- epix_as_of(x, max_version = max(x$DT$version)) -fc_time_values <- seq(as.Date("2020-08-01"), +fc_verions <- seq(as.Date("2020-08-01"), as.Date("2021-12-01"), by = "1 month") @@ -354,7 +354,7 @@ k_week_ahead <- function(x, ahead = 7, as_of = TRUE) { x %>% epix_slide(fc = prob_arx(percent_cli, case_rate_7d_av, geo_value, time_value, args = prob_arx_args(ahead = ahead)), - max_version_gap = 120, ref_time_values = fc_time_values) %>% + max_version_gap = 120, ref_versions = fc_versions) %>% mutate(target_date = version + ahead, as_of = TRUE, geo_value = fc_geo_value) } @@ -362,7 +362,7 @@ k_week_ahead <- function(x, ahead = 7, as_of = TRUE) { x_latest %>% epi_slide(fc = prob_arx(percent_cli, case_rate_7d_av, geo_value, time_value, args = prob_arx_args(ahead = ahead)), - n = 120, ref_time_values = fc_time_values) %>% + n = 120, ref_versions = fc_versions) %>% mutate(target_date = time_value + ahead, as_of = FALSE) } } diff --git a/vignettes/archive.Rmd b/vignettes/archive.Rmd index af5fee8b..aa726ae1 100644 --- a/vignettes/archive.Rmd +++ b/vignettes/archive.Rmd @@ -374,14 +374,14 @@ Next we slide this forecaster over the working `epi_archive` object, in order to forecast COVID-19 case rates 7 days into the future. ```{r} -fc_time_values <- seq(as.Date("2020-08-01"), +fc_versions <- seq(as.Date("2020-08-01"), as.Date("2021-12-01"), by = "1 month") z <- epix_slide(x, fc = prob_arx(percent_cli, case_rate_7d_av, geo_value, time_value, args = prob_arx_args(ahead = 7)), max_version_gap = 120, - ref_time_values = fc_time_values, group_by = geo_value) + ref_versions = fc_versions, group_by = geo_value) head(z, 10) ``` @@ -412,7 +412,7 @@ k_week_ahead <- function(x, ahead = 7, as_of = TRUE) { x %>% epix_slide(fc = prob_arx(percent_cli, case_rate_7d_av, geo_value, time_value, args = prob_arx_args(ahead = ahead)), - max_version_gap = 120, ref_time_values = fc_time_values) %>% + max_version_gap = 120, ref_versions = fc_versions) %>% mutate(target_date = version + ahead, as_of = TRUE, geo_value = fc_geo_value) } @@ -420,7 +420,7 @@ k_week_ahead <- function(x, ahead = 7, as_of = TRUE) { x_latest %>% epi_slide(fc = prob_arx(percent_cli, case_rate_7d_av, geo_value, time_value, args = prob_arx_args(ahead = ahead)), - n = 120, ref_time_values = fc_time_values) %>% + n = 120, ref_versions = fc_versions) %>% mutate(target_date = time_value + ahead, as_of = FALSE) } } From 666c4f7b73fda0a1c730b528cb39587c3f6f1733 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Wed, 20 Jul 2022 10:29:17 -0700 Subject: [PATCH 22/60] Finally refactored incorrect time_values on epix_slide. --- R/methods-epi_archive.R | 4 ++-- man/epix_slide.Rd | 4 ++-- vignettes/advanced.Rmd | 4 ++-- vignettes/archive.Rmd | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R index 87d9c5b1..3a5af3e0 100644 --- a/R/methods-epi_archive.R +++ b/R/methods-epi_archive.R @@ -195,14 +195,14 @@ epix_merge = function(x, y, ..., locf = TRUE, nan = NA) { #' # 2 days, for the rest of the results #' # never 3 days due to data latency #' -#' time_values <- seq(as.Date("2020-06-01"), +#' versions <- seq(as.Date("2020-06-01"), #' as.Date("2020-06-15"), #' by = "1 day") #' epix_slide(x = archive_cases_dv_subset, #' f = ~ mean(.x$case_rate_7d_av), #' n = 3, #' group_by = geo_value, -#' ref_versions = time_values, +#' ref_versions = versions, #' new_col_name = 'case_rate_3d_av') epix_slide = function(x, f, ..., max_version_gap, group_by, ref_versions, time_step, new_col_name = "slide_value", diff --git a/man/epix_slide.Rd b/man/epix_slide.Rd index 78932025..348c31a8 100644 --- a/man/epix_slide.Rd +++ b/man/epix_slide.Rd @@ -135,13 +135,13 @@ is equivalent to: # 2 days, for the rest of the results # never 3 days due to data latency -time_values <- seq(as.Date("2020-06-01"), +versions <- seq(as.Date("2020-06-01"), as.Date("2020-06-15"), by = "1 day") epix_slide(x = archive_cases_dv_subset, f = ~ mean(.x$case_rate_7d_av), n = 3, group_by = geo_value, - ref_versions = time_values, + ref_versions = versions, new_col_name = 'case_rate_3d_av') } diff --git a/vignettes/advanced.Rmd b/vignettes/advanced.Rmd index f84e194e..9074413f 100644 --- a/vignettes/advanced.Rmd +++ b/vignettes/advanced.Rmd @@ -344,7 +344,7 @@ data. ```{r, message = FALSE, warning = FALSE, fig.width = 9, fig.height = 6} # Latest snapshot of data, and forecast dates x_latest <- epix_as_of(x, max_version = max(x$DT$version)) -fc_verions <- seq(as.Date("2020-08-01"), +fc_versions <- seq(as.Date("2020-08-01"), as.Date("2021-12-01"), by = "1 month") @@ -362,7 +362,7 @@ k_week_ahead <- function(x, ahead = 7, as_of = TRUE) { x_latest %>% epi_slide(fc = prob_arx(percent_cli, case_rate_7d_av, geo_value, time_value, args = prob_arx_args(ahead = ahead)), - n = 120, ref_versions = fc_versions) %>% + n = 120, ref_time_values = fc_versions) %>% mutate(target_date = time_value + ahead, as_of = FALSE) } } diff --git a/vignettes/archive.Rmd b/vignettes/archive.Rmd index aa726ae1..e82ef267 100644 --- a/vignettes/archive.Rmd +++ b/vignettes/archive.Rmd @@ -420,7 +420,7 @@ k_week_ahead <- function(x, ahead = 7, as_of = TRUE) { x_latest %>% epi_slide(fc = prob_arx(percent_cli, case_rate_7d_av, geo_value, time_value, args = prob_arx_args(ahead = ahead)), - n = 120, ref_versions = fc_versions) %>% + n = 120, ref_time_values = fc_versions) %>% mutate(target_date = time_value + ahead, as_of = FALSE) } } From 3134a55b4d4b511bd230bb0dbc7b67645d47a94e Mon Sep 17 00:00:00 2001 From: kenmawer Date: Thu, 21 Jul 2022 09:36:39 -0700 Subject: [PATCH 23/60] Changed ref_versions to refer to versions by default. --- R/archive.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/archive.R b/R/archive.R index d6d49523..79d08ddf 100644 --- a/R/archive.R +++ b/R/archive.R @@ -286,14 +286,15 @@ epi_archive = #' details. #' @importFrom data.table key #' @importFrom rlang !! !!! enquo enquos is_quosure sym syms - slide = function(f, ..., max_version_gap, group_by, ref_versions, + slide = function(f, ..., max_version_gap, group_by, + ref_versions, time_step, new_col_name = "slide_value", as_list_col = FALSE, names_sep = "_", all_rows = FALSE) { # If missing, then set ref time values to be everything; else make # sure we intersect with observed time values if (missing(ref_versions)) { - ref_versions = unique(self$DT$time_value) + ref_versions = unique(self$DT$version) } else { ref_versions = ref_versions[ref_versions %in% From bb39209145b7d88fa56f7e46c8b895630aac314e Mon Sep 17 00:00:00 2001 From: kenmawer Date: Tue, 26 Jul 2022 15:07:50 -0700 Subject: [PATCH 24/60] Some cleanup of slide; still incomplete. --- DESCRIPTION | 2 +- R/slide.R | 36 ++++++++++-------------------------- man/as_epi_df.Rd | 10 +++++----- man/epi_slide.Rd | 2 +- 4 files changed, 17 insertions(+), 33 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 00b78130..18a3bdfb 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -56,7 +56,7 @@ Config/testthat/edition: 3 Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.0 +RoxygenNote: 7.2.1 Depends: R (>= 2.10) URL: https://cmu-delphi.github.io/epiprocess/ diff --git a/R/slide.R b/R/slide.R index 5861353b..9e1b9752 100644 --- a/R/slide.R +++ b/R/slide.R @@ -114,8 +114,7 @@ #' epi_slide(a = data.frame(cases_2dav = mean(cases), #' cases_2dma = mad(cases)), #' n = 2, as_list_col = TRUE) -epi_slide = function(x, f, ..., n = 7, ref_time_values, - align = c("right", "center", "left"), before, time_step, +epi_slide = function(x, f, ..., before, after, ref_time_values, time_step, new_col_name = "slide_value", as_list_col = FALSE, names_sep = "_", all_rows = FALSE) { # Check we have an `epi_df` object @@ -133,33 +132,18 @@ epi_slide = function(x, f, ..., n = 7, ref_time_values, ref_time_values = ref_time_values[ref_time_values %in% unique(x$time_value)] } - - # If before is missing, then use align to set up alignment - if (missing(before)) { - align = match.arg(align) - if (align == "right") { - before_num = n-1 - after_num = 0 - } - else if (align == "center") { - before_num = floor((n-1)/2) - after_num = ceiling((n-1)/2) - } - else { - before_num = 0 - after_num = n-1 - } - } # Otherwise set up alignment based on passed before value - else { - if (before < 0 || before > n-1) { - Abort("`before` must be in between 0 and n-1`.") - } - - before_num = before - after_num = n-1-before + if (before < 0 ||after < 0) { + Abort("`before` and `after` must be at least 0.") } + + if (floor(before) < ceiling(before) || floor(after) < ceiling(after)) { + Abort("`before` and `after` must be integers.") + } + + before_num = before + after_num = after # If a custom time step is specified, then redefine units if (!missing(time_step)) { diff --git a/man/as_epi_df.Rd b/man/as_epi_df.Rd index 5d1b1335..b5df1302 100644 --- a/man/as_epi_df.Rd +++ b/man/as_epi_df.Rd @@ -51,9 +51,9 @@ examples. } \section{Methods (by class)}{ \itemize{ -\item \code{epi_df}: Simply returns the \code{epi_df} object unchanged. +\item \code{as_epi_df(epi_df)}: Simply returns the \code{epi_df} object unchanged. -\item \code{tbl_df}: The input tibble \code{x} must contain the columns +\item \code{as_epi_df(tbl_df)}: The input tibble \code{x} must contain the columns \code{geo_value} and \code{time_value}. All other columns will be preserved as is, and treated as measured variables. If \code{as_of} is missing, then the function will try to guess it from an \code{as_of}, \code{issue}, or \code{version} column of \code{x} @@ -61,14 +61,14 @@ will try to guess it from an \code{as_of}, \code{issue}, or \code{version} colum (stored in its attributes); if this fails, then the current day-time will be used. -\item \code{data.frame}: Works analogously to \code{as_epi_df.tbl_df()}. +\item \code{as_epi_df(data.frame)}: Works analogously to \code{as_epi_df.tbl_df()}. -\item \code{tbl_ts}: Works analogously to \code{as_epi_df.tbl_df()}, except that +\item \code{as_epi_df(tbl_ts)}: Works analogously to \code{as_epi_df.tbl_df()}, except that the \code{tbl_ts} class is dropped, and any key variables (other than "geo_value") are added to the metadata of the returned object, under the \code{other_keys} field. -}} +}} \examples{ # Convert a `tsibble` that has county code as an extra key # Notice that county code should be a character string to preserve any leading zeroes diff --git a/man/epi_slide.Rd b/man/epi_slide.Rd index 9c8c2102..d63e3563 100644 --- a/man/epi_slide.Rd +++ b/man/epi_slide.Rd @@ -8,7 +8,7 @@ epi_slide( x, f, ..., - n = 7, + n, ref_time_values, align = c("right", "center", "left"), before, From 5984d8d7b03de1ed9b70ae40efbcff88d88452ab Mon Sep 17 00:00:00 2001 From: kenmawer Date: Tue, 26 Jul 2022 16:25:08 -0700 Subject: [PATCH 25/60] Still needs changes as before and after numbers are wrong. --- R/slide.R | 33 ++++++++++------------------ man/epi_slide.Rd | 38 +++++++++++---------------------- tests/testthat/test-epi_slide.R | 12 +++++------ vignettes/slide.Rmd | 12 +++++------ 4 files changed, 35 insertions(+), 60 deletions(-) diff --git a/R/slide.R b/R/slide.R index 9e1b9752..7344ab32 100644 --- a/R/slide.R +++ b/R/slide.R @@ -19,24 +19,14 @@ #' @param ... Additional arguments to pass to the function or formula specified #' via `f`. Alternatively, if `f` is missing, then the current argument is #' interpreted as an expression for tidy evaluation. See details. -#' @param n Number of time steps to use in the running window. For example, if -#' `n = 7`, one time step is one day, and the alignment is "right", then to -#' produce a value on January 7 we apply the given function or formula to data -#' in between January 1 and 7. Default is 7. +#' @param before A nonnegative integer specifying the number of days before to +#' extract data from. Set to 0 for a "left" alignment in slide. +#' @param after A nonnegative integer specifying the number of days after to +#' extract data from. Set to 0 for a "right" alignment in slide. #' @param ref_time_values Time values for sliding computations, meaning, each #' element of this vector serves as the reference time point for one sliding #' window. If missing, then this will be set to all unique time values in the #' underlying data table, by default. -#' @param align One of "right", "center", or "left", indicating the alignment of -#' the sliding window relative to the reference time point. If the alignment -#' is "center" and `n` is even, then one more time point will be used after -#' the reference time point than before. Default is "right". -#' @param before Positive integer less than `n`, specifying the number of time -#' points to use in the sliding window strictly before the reference time -#' point. For example, setting `before = n-1` would be the same as setting -#' `align = "right"`. The `before` argument allows for more flexible -#' specification of alignment than the `align` parameter, and if specified, -#' overrides `align`. #' @param time_step Optional function used to define the meaning of one time #' step, which if specified, overrides the default choice based on the #' `time_value` column. This function must take a positive integer and return @@ -76,11 +66,11 @@ #' If `f` is missing, then an expression for tidy evaluation can be specified, #' for example, as in: #' ``` -#' epi_slide(x, cases_7dav = mean(cases), n = 7) +#' epi_slide(x, cases_7dav = mean(cases), before = 7) #' ``` #' which would be equivalent to: #' ``` -#' epi_slide(x, function(x, ...) mean(x$cases), n = 7, +#' epi_slide(x, function(x, ...) mean(x$cases), before = 7, #' new_col_name = "cases_7dav") #' ``` #' Thus, to be clear, when the computation is specified via an expression for @@ -95,16 +85,14 @@ #' # slide a 7-day trailing average formula on cases #' jhu_csse_daily_subset %>% #' group_by(geo_value) %>% -#' epi_slide(cases_7dav = mean(cases), n = 7, -#' align = "right") %>% +#' epi_slide(cases_7dav = mean(cases), before = 7) %>% #' # rmv a nonessential var. to ensure new col is printed #' dplyr::select(-death_rate_7d_av) #' #' # slide a left-aligned 7-day average #' jhu_csse_daily_subset %>% #' group_by(geo_value) %>% -#' epi_slide(cases_7dav = mean(cases), n = 7, -#' align = "left") %>% +#' epi_slide(cases_7dav = mean(cases), before = 7) %>% #' # rmv a nonessential var. to ensure new col is printed #' dplyr::select(-death_rate_7d_av) #' @@ -113,8 +101,9 @@ #' group_by(geo_value) %>% #' epi_slide(a = data.frame(cases_2dav = mean(cases), #' cases_2dma = mad(cases)), -#' n = 2, as_list_col = TRUE) -epi_slide = function(x, f, ..., before, after, ref_time_values, time_step, +#' before = 2, as_list_col = TRUE) +epi_slide = function(x, f, ..., before = 0, after = 0, ref_time_values, + time_step, new_col_name = "slide_value", as_list_col = FALSE, names_sep = "_", all_rows = FALSE) { # Check we have an `epi_df` object diff --git a/man/epi_slide.Rd b/man/epi_slide.Rd index d63e3563..01f42f01 100644 --- a/man/epi_slide.Rd +++ b/man/epi_slide.Rd @@ -8,10 +8,9 @@ epi_slide( x, f, ..., - n, + before = 0, + after = 0, ref_time_values, - align = c("right", "center", "left"), - before, time_step, new_col_name = "slide_value", as_list_col = FALSE, @@ -38,28 +37,17 @@ to the groupings that would be described by \code{g} if \code{f} was a function. via \code{f}. Alternatively, if \code{f} is missing, then the current argument is interpreted as an expression for tidy evaluation. See details.} -\item{n}{Number of time steps to use in the running window. For example, if -\code{n = 7}, one time step is one day, and the alignment is "right", then to -produce a value on January 7 we apply the given function or formula to data -in between January 1 and 7. Default is 7.} +\item{before}{A nonnegative integer specifying the number of days before to +extract data from. Set to 0 for a "left" alignment in slide.} + +\item{after}{A nonnegative integer specifying the number of days after to +extract data from. Set to 0 for a "right" alignment in slide.} \item{ref_time_values}{Time values for sliding computations, meaning, each element of this vector serves as the reference time point for one sliding window. If missing, then this will be set to all unique time values in the underlying data table, by default.} -\item{align}{One of "right", "center", or "left", indicating the alignment of -the sliding window relative to the reference time point. If the alignment -is "center" and \code{n} is even, then one more time point will be used after -the reference time point than before. Default is "right".} - -\item{before}{Positive integer less than \code{n}, specifying the number of time -points to use in the sliding window strictly before the reference time -point. For example, setting \code{before = n-1} would be the same as setting -\code{align = "right"}. The \code{before} argument allows for more flexible -specification of alignment than the \code{align} parameter, and if specified, -overrides \code{align}.} - \item{time_step}{Optional function used to define the meaning of one time step, which if specified, overrides the default choice based on the \code{time_value} column. This function must take a positive integer and return @@ -109,12 +97,12 @@ specified function or formula \code{f}, or through post-processing. If \code{f} is missing, then an expression for tidy evaluation can be specified, for example, as in: -\if{html}{\out{
}}\preformatted{epi_slide(x, cases_7dav = mean(cases), n = 7) +\if{html}{\out{
}}\preformatted{epi_slide(x, cases_7dav = mean(cases), before = 7) }\if{html}{\out{
}} which would be equivalent to: -\if{html}{\out{
}}\preformatted{epi_slide(x, function(x, ...) mean(x$cases), n = 7, +\if{html}{\out{
}}\preformatted{epi_slide(x, function(x, ...) mean(x$cases), before = 7, new_col_name = "cases_7dav") }\if{html}{\out{
}} @@ -127,16 +115,14 @@ through the \code{new_col_name} argument. # slide a 7-day trailing average formula on cases jhu_csse_daily_subset \%>\% group_by(geo_value) \%>\% - epi_slide(cases_7dav = mean(cases), n = 7, - align = "right") \%>\% + epi_slide(cases_7dav = mean(cases), before = 7) \%>\% # rmv a nonessential var. to ensure new col is printed dplyr::select(-death_rate_7d_av) # slide a left-aligned 7-day average jhu_csse_daily_subset \%>\% group_by(geo_value) \%>\% - epi_slide(cases_7dav = mean(cases), n = 7, - align = "left") \%>\% + epi_slide(cases_7dav = mean(cases), before = 7) \%>\% # rmv a nonessential var. to ensure new col is printed dplyr::select(-death_rate_7d_av) @@ -145,5 +131,5 @@ through the \code{new_col_name} argument. group_by(geo_value) \%>\% epi_slide(a = data.frame(cases_2dav = mean(cases), cases_2dma = mad(cases)), - n = 2, as_list_col = TRUE) + before = 2, as_list_col = TRUE) } diff --git a/tests/testthat/test-epi_slide.R b/tests/testthat/test-epi_slide.R index f363ae34..da521881 100644 --- a/tests/testthat/test-epi_slide.R +++ b/tests/testthat/test-epi_slide.R @@ -10,25 +10,25 @@ f = function(x, ...) dplyr::tibble(value=mean(x$value), count=length(x$value)) ## --- These cases generate the error: --- test_that("`ref_time_values` + `align` that result in no slide data, generate the error", { - expect_error(edf %>% group_by(geo_value) %>% epi_slide(f, n=3L, ref_time_values=as.Date("2020-01-01")), + expect_error(edf %>% group_by(geo_value) %>% epi_slide(f, before=3L, ref_time_values=as.Date("2020-01-01")), "starting and/or stopping times for sliding are out of bounds") # before the first, no data in the slide windows - expect_error(edf %>% group_by(geo_value) %>% epi_slide(f, n=3L, ref_time_values=as.Date("2020-01-01")+207L), + expect_error(edf %>% group_by(geo_value) %>% epi_slide(f, before=3L, ref_time_values=as.Date("2020-01-01")+207L), "starting and/or stopping times for sliding are out of bounds") # beyond the last, no data in window }) test_that("`ref_time_values` + `align` that have some slide data, but generate the error due to ref. time being out of time range", { - expect_error(edf %>% group_by(geo_value) %>% epi_slide(f, n=3L, ref_time_values=as.Date("2020-01-01"), align="left"), + expect_error(edf %>% group_by(geo_value) %>% epi_slide(f, after=3L, ref_time_values=as.Date("2020-01-01")), "starting and/or stopping times for sliding are out of bounds") # before the first, but we'd expect there to be data in the window - expect_error(edf %>% group_by(geo_value) %>% epi_slide(f, n=3L, ref_time_values=as.Date("2020-01-01")+201L), + expect_error(edf %>% group_by(geo_value) %>% epi_slide(f, before=3L, ref_time_values=as.Date("2020-01-01")+201L), "starting and/or stopping times for sliding are out of bounds") # beyond the last, but still with data in window }) ## --- These cases doesn't generate the error: --- test_that("these doesn't produce an error; the error appears only if the ref time values are out of the range for every group", { - expect_identical(edf %>% group_by(geo_value) %>% epi_slide(f, n=3L, ref_time_values=as.Date("2020-01-01")+200L) %>% + expect_identical(edf %>% group_by(geo_value) %>% epi_slide(f, before=3L, ref_time_values=as.Date("2020-01-01")+200L) %>% dplyr::select("geo_value","slide_value_value"), dplyr::tibble(geo_value = "ak", slide_value_value = 199)) # out of range for one group - expect_identical(edf %>% group_by(geo_value) %>% epi_slide(f, n=3L, ref_time_values=as.Date("2020-01-04")) %>% + expect_identical(edf %>% group_by(geo_value) %>% epi_slide(f, before=3L, ref_time_values=as.Date("2020-01-04")) %>% dplyr::select("geo_value","slide_value_value"), dplyr::tibble(geo_value = c("ak", "al"), slide_value_value = c(2, -2))) # not out of range for either group }) \ No newline at end of file diff --git a/vignettes/slide.Rmd b/vignettes/slide.Rmd index 8bb11eab..d126ed93 100644 --- a/vignettes/slide.Rmd +++ b/vignettes/slide.Rmd @@ -71,7 +71,7 @@ order to smooth the signal, by passing in a formula for the first argument of ```{r} x %>% group_by(geo_value) %>% - epi_slide(~ mean(.x$cases), n = 7) %>% + epi_slide(~ mean(.x$cases), before = 7) %>% head(10) ``` @@ -85,7 +85,7 @@ front using the `new_col_name` argument: ```{r} x <- x %>% group_by(geo_value) %>% - epi_slide(~ mean(.x$cases), n = 7, new_col_name = "cases_7dav") + epi_slide(~ mean(.x$cases), before = 7, new_col_name = "cases_7dav") head(x, 10) ``` @@ -101,7 +101,7 @@ arguments. Recreating the last example of a 7-day trailing average: ```{r} x <- x %>% group_by(geo_value) %>% - epi_slide(function(x, ...) mean(x$cases), n = 7, new_col_name = "cases_7dav") + epi_slide(function(x, ...) mean(x$cases), before = 7, new_col_name = "cases_7dav") head(x, 10) ``` @@ -117,7 +117,7 @@ would in a call to `dplyr::mutate()`, or any of the `dplyr` verbs. For example: ```{r} x <- x %>% group_by(geo_value) %>% - epi_slide(cases_7dav = mean(cases), n = 7) + epi_slide(cases_7dav = mean(cases), before = 7) head(x, 10) ``` @@ -209,7 +209,7 @@ fc_time_values <- seq(as.Date("2020-06-01"), by = "1 months") x %>% group_by(geo_value) %>% - epi_slide(fc = prob_ar(cases_7dav), n = 120, + epi_slide(fc = prob_ar(cases_7dav), before = 120, ref_time_values = fc_time_values) %>% head(10) ``` @@ -233,7 +233,7 @@ so that we can call it a few times. k_week_ahead <- function(x, ahead = 7) { x %>% group_by(geo_value) %>% - epi_slide(fc = prob_ar(cases_7dav, ahead = ahead), n = 120, + epi_slide(fc = prob_ar(cases_7dav, ahead = ahead), before = 120, ref_time_values = fc_time_values, all_rows = TRUE) %>% mutate(target_date = time_value + ahead) } From b68af0bd094661dd9b69187cd68e09fca26a749c Mon Sep 17 00:00:00 2001 From: kenmawer Date: Tue, 26 Jul 2022 16:33:34 -0700 Subject: [PATCH 26/60] Changed bad formatting. --- R/slide.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/slide.R b/R/slide.R index 7344ab32..a36fcace 100644 --- a/R/slide.R +++ b/R/slide.R @@ -123,7 +123,7 @@ epi_slide = function(x, f, ..., before = 0, after = 0, ref_time_values, } # Otherwise set up alignment based on passed before value - if (before < 0 ||after < 0) { + if (before < 0 || after < 0) { Abort("`before` and `after` must be at least 0.") } From b3229f23962e725664e4a0bbe468d0a8fa5c8a64 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Tue, 26 Jul 2022 17:07:17 -0700 Subject: [PATCH 27/60] Still needs refactoring. --- tests/testthat/test-epi_slide.R | 2 +- vignettes/slide.Rmd | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/testthat/test-epi_slide.R b/tests/testthat/test-epi_slide.R index da521881..82104fe7 100644 --- a/tests/testthat/test-epi_slide.R +++ b/tests/testthat/test-epi_slide.R @@ -25,7 +25,7 @@ test_that("`ref_time_values` + `align` that have some slide data, but generate t ## --- These cases doesn't generate the error: --- test_that("these doesn't produce an error; the error appears only if the ref time values are out of the range for every group", { - expect_identical(edf %>% group_by(geo_value) %>% epi_slide(f, before=3L, ref_time_values=as.Date("2020-01-01")+200L) %>% + expect_identical(edf %>% group_by(geo_value) %>% epi_slide(f, before=2L, ref_time_values=as.Date("2020-01-01")+200L) %>% dplyr::select("geo_value","slide_value_value"), dplyr::tibble(geo_value = "ak", slide_value_value = 199)) # out of range for one group expect_identical(edf %>% group_by(geo_value) %>% epi_slide(f, before=3L, ref_time_values=as.Date("2020-01-04")) %>% diff --git a/vignettes/slide.Rmd b/vignettes/slide.Rmd index d126ed93..50ff99b1 100644 --- a/vignettes/slide.Rmd +++ b/vignettes/slide.Rmd @@ -71,7 +71,7 @@ order to smooth the signal, by passing in a formula for the first argument of ```{r} x %>% group_by(geo_value) %>% - epi_slide(~ mean(.x$cases), before = 7) %>% + epi_slide(~ mean(.x$cases), before = 6) %>% head(10) ``` @@ -85,7 +85,7 @@ front using the `new_col_name` argument: ```{r} x <- x %>% group_by(geo_value) %>% - epi_slide(~ mean(.x$cases), before = 7, new_col_name = "cases_7dav") + epi_slide(~ mean(.x$cases), before = 6, new_col_name = "cases_7dav") head(x, 10) ``` @@ -101,7 +101,7 @@ arguments. Recreating the last example of a 7-day trailing average: ```{r} x <- x %>% group_by(geo_value) %>% - epi_slide(function(x, ...) mean(x$cases), before = 7, new_col_name = "cases_7dav") + epi_slide(function(x, ...) mean(x$cases), before = 6, new_col_name = "cases_7dav") head(x, 10) ``` @@ -117,7 +117,7 @@ would in a call to `dplyr::mutate()`, or any of the `dplyr` verbs. For example: ```{r} x <- x %>% group_by(geo_value) %>% - epi_slide(cases_7dav = mean(cases), before = 7) + epi_slide(cases_7dav = mean(cases), before = 6) head(x, 10) ``` @@ -160,7 +160,7 @@ units of the `time_value` column; so, days, in the working `epi_df` being considered in this vignette). ```{r} -prob_ar <- function(y, lags = c(0, 7, 14), ahead = 7, min_train_window = 20, +prob_ar <- function(y, lags = c(0, 7, 14), ahead = 6, min_train_window = 20, lower_level = 0.05, upper_level = 0.95, symmetrize = TRUE, intercept = FALSE, nonneg = TRUE) { # Return NA if insufficient training data @@ -209,7 +209,7 @@ fc_time_values <- seq(as.Date("2020-06-01"), by = "1 months") x %>% group_by(geo_value) %>% - epi_slide(fc = prob_ar(cases_7dav), before = 120, + epi_slide(fc = prob_ar(cases_7dav), before = 119, ref_time_values = fc_time_values) %>% head(10) ``` @@ -233,7 +233,7 @@ so that we can call it a few times. k_week_ahead <- function(x, ahead = 7) { x %>% group_by(geo_value) %>% - epi_slide(fc = prob_ar(cases_7dav, ahead = ahead), before = 120, + epi_slide(fc = prob_ar(cases_7dav, ahead = ahead), before = 119, ref_time_values = fc_time_values, all_rows = TRUE) %>% mutate(target_date = time_value + ahead) } From d18d98c5fb3ddbcf1ca71c415bad5d0c2a50539e Mon Sep 17 00:00:00 2001 From: kenmawer Date: Tue, 26 Jul 2022 17:08:35 -0700 Subject: [PATCH 28/60] Redocumented with changes; still needs changes. --- R/slide.R | 6 +++--- man/epi_slide.Rd | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/slide.R b/R/slide.R index a36fcace..146e934d 100644 --- a/R/slide.R +++ b/R/slide.R @@ -85,14 +85,14 @@ #' # slide a 7-day trailing average formula on cases #' jhu_csse_daily_subset %>% #' group_by(geo_value) %>% -#' epi_slide(cases_7dav = mean(cases), before = 7) %>% +#' epi_slide(cases_7dav = mean(cases), before = 6) %>% #' # rmv a nonessential var. to ensure new col is printed #' dplyr::select(-death_rate_7d_av) #' #' # slide a left-aligned 7-day average #' jhu_csse_daily_subset %>% #' group_by(geo_value) %>% -#' epi_slide(cases_7dav = mean(cases), before = 7) %>% +#' epi_slide(cases_7dav = mean(cases), before = 6) %>% #' # rmv a nonessential var. to ensure new col is printed #' dplyr::select(-death_rate_7d_av) #' @@ -101,7 +101,7 @@ #' group_by(geo_value) %>% #' epi_slide(a = data.frame(cases_2dav = mean(cases), #' cases_2dma = mad(cases)), -#' before = 2, as_list_col = TRUE) +#' before = 1, as_list_col = TRUE) epi_slide = function(x, f, ..., before = 0, after = 0, ref_time_values, time_step, new_col_name = "slide_value", as_list_col = FALSE, diff --git a/man/epi_slide.Rd b/man/epi_slide.Rd index 01f42f01..6631b374 100644 --- a/man/epi_slide.Rd +++ b/man/epi_slide.Rd @@ -115,14 +115,14 @@ through the \code{new_col_name} argument. # slide a 7-day trailing average formula on cases jhu_csse_daily_subset \%>\% group_by(geo_value) \%>\% - epi_slide(cases_7dav = mean(cases), before = 7) \%>\% + epi_slide(cases_7dav = mean(cases), before = 6) \%>\% # rmv a nonessential var. to ensure new col is printed dplyr::select(-death_rate_7d_av) # slide a left-aligned 7-day average jhu_csse_daily_subset \%>\% group_by(geo_value) \%>\% - epi_slide(cases_7dav = mean(cases), before = 7) \%>\% + epi_slide(cases_7dav = mean(cases), before = 6) \%>\% # rmv a nonessential var. to ensure new col is printed dplyr::select(-death_rate_7d_av) @@ -131,5 +131,5 @@ through the \code{new_col_name} argument. group_by(geo_value) \%>\% epi_slide(a = data.frame(cases_2dav = mean(cases), cases_2dma = mad(cases)), - before = 2, as_list_col = TRUE) + before = 1, as_list_col = TRUE) } From 35811f15c09434bada36ec26e9e0561dfab73151 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 29 Jul 2022 11:28:03 -0700 Subject: [PATCH 29/60] Seems like merge is broken. --- vignettes/advanced.Rmd | 26 +++++++++++++------------- vignettes/aggregation.Rmd | 4 ++-- vignettes/archive.Rmd | 2 +- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/vignettes/advanced.Rmd b/vignettes/advanced.Rmd index 04c73af3..237dd3ae 100644 --- a/vignettes/advanced.Rmd +++ b/vignettes/advanced.Rmd @@ -46,17 +46,17 @@ df <- tibble( # 2-day trailing average, per geo value df %>% group_by(geo_value) %>% - epi_slide(x_2dav = mean(x), n = 2) + epi_slide(x_2dav = mean(x), before = 1) # 2-day trailing average, marginally df %>% - epi_slide(x_2dav = mean(x), n = 2) + epi_slide(x_2dav = mean(x), before = 1) ``` ```{r, include = FALSE} # More checks (not included) df %>% - epi_slide(x_2dav = mean(x), n = 2, ref_time_values = as.Date("2020-06-02")) + epi_slide(x_2dav = mean(x), before = 1, ref_time_values = as.Date("2020-06-02")) df %>% mutate(version = time_value) %>% @@ -76,7 +76,7 @@ same result as the last one. ```{r} df %>% - epi_slide(y_2dav = rep(mean(x), 3), n = 2) + epi_slide(y_2dav = rep(mean(x), 3), before = 1) ``` However, if the output is an atomic vector (rather than a single value) and it @@ -85,7 +85,7 @@ are trying to return 2 things for 3 states. ```{r, error = TRUE} df %>% - epi_slide(x_2dav = rep(mean(x), 2), n = 2) + epi_slide(x_2dav = rep(mean(x), 2), before = 1) ``` ## Multi-column outputs @@ -101,7 +101,7 @@ object returned by `epi_slide()` has a list column containing the slide values. df2 <- df %>% group_by(geo_value) %>% epi_slide(a = data.frame(x_2dav = mean(x), x_2dma = mad(x)), - n = 2, as_list_col = TRUE) + before = 1, as_list_col = TRUE) class(df2$a) length(df2$a) @@ -119,7 +119,7 @@ slide computation (here `x_2dav` and `x_2dma`) separated by "_". df %>% group_by(geo_value) %>% epi_slide(a = data.frame(x_2dav = mean(x), x_2dma = mad(x)), - n = 2, as_list_col = FALSE) + before = 1, as_list_col = FALSE) ``` We can use `names_sep = NULL` (which gets passed to `tidyr::unnest()`) to drop @@ -129,7 +129,7 @@ the prefix associated with list column name, in naming the unnested columns. df %>% group_by(geo_value) %>% epi_slide(a = data.frame(x_2dav = mean(x), x_2dma = mad(x)), - n = 2, as_list_col = FALSE, names_sep = NULL) + before = 1, as_list_col = FALSE, names_sep = NULL) ``` Furthermore, `epi_slide()` will recycle the single row data frame as needed in @@ -138,7 +138,7 @@ order to make the result size stable, just like the case for atomic values. ```{r} df %>% epi_slide(a = data.frame(x_2dav = mean(x), x_2dma = mad(x)), - n = 2, as_list_col = FALSE, names_sep = NULL) + before = 1, as_list_col = FALSE, names_sep = NULL) ``` ```{r, include = FALSE} @@ -146,7 +146,7 @@ df %>% df %>% epi_slide(a = data.frame(x_2dav = mean(x), x_2dma = mad(x)), ref_time_values = as.Date("2020-06-02"), - n = 2, as_list_col = FALSE, names_sep = NULL) + before = 1, as_list_col = FALSE, names_sep = NULL) df %>% mutate(version = time_value) %>% @@ -181,7 +181,7 @@ df %>% filter(time_value == max(time_value)), interval = "prediction", level = 0.9) )) - }, n = 2, new_col_name = "fc", names_sep = NULL) + }, before = 1, new_col_name = "fc", names_sep = NULL) ``` ## Version-aware forecasting, revisited @@ -222,7 +222,7 @@ x <- y1 %>% version = issue, percent_cli = value ) %>% - as_epi_archive() + as_epi_archive(compactify=FALSE) # mutating merge operation: x$merge(y2 %>% @@ -230,7 +230,7 @@ x$merge(y2 %>% version = issue, case_rate_7d_av = value ) %>% - as_epi_archive() + as_epi_archive(compactify=FALSE) ) ``` diff --git a/vignettes/aggregation.Rmd b/vignettes/aggregation.Rmd index 617f0983..7abdb23b 100644 --- a/vignettes/aggregation.Rmd +++ b/vignettes/aggregation.Rmd @@ -189,7 +189,7 @@ running `epi_slide()` on the zero-filled data brings these trailing averages xt %>% as_epi_df() %>% group_by(geo_value) %>% - epi_slide(cases_7dav = mean(cases), n = 7) %>% + epi_slide(cases_7dav = mean(cases), before = 6) %>% filter(geo_value == "Plymouth, MA", abs(time_value - as.Date("2021-07-01")) <= 3) %>% print(n = 7) @@ -197,7 +197,7 @@ xt %>% xt_filled %>% as_epi_df() %>% group_by(geo_value) %>% - epi_slide(cases_7dav = mean(cases), n = 7) %>% + epi_slide(cases_7dav = mean(cases), before = 6) %>% filter(geo_value == "Plymouth, MA", abs(time_value - as.Date("2021-07-01")) <= 3) %>% print(n = 7) diff --git a/vignettes/archive.Rmd b/vignettes/archive.Rmd index 1b8d0622..1ca545ca 100644 --- a/vignettes/archive.Rmd +++ b/vignettes/archive.Rmd @@ -394,7 +394,7 @@ k_week_ahead <- function(x, ahead = 7, as_of = TRUE) { else { x_latest %>% group_by(geo_value) %>% - epi_slide(fc = prob_arx(percent_cli, case_rate_7d_av, ahead = ahead), n = 120, + epi_slide(fc = prob_arx(percent_cli, case_rate_7d_av, ahead = ahead), before = 119, ref_time_values = fc_time_values) %>% mutate(target_date = time_value + ahead, as_of = FALSE) } From 06a61904dd7ba86de004158d037dd9557417c67c Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 29 Jul 2022 11:35:57 -0700 Subject: [PATCH 30/60] Updates relating to checking on main. --- DESCRIPTION | 2 +- man/as_epi_df.Rd | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 00b78130..18a3bdfb 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -56,7 +56,7 @@ Config/testthat/edition: 3 Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.0 +RoxygenNote: 7.2.1 Depends: R (>= 2.10) URL: https://cmu-delphi.github.io/epiprocess/ diff --git a/man/as_epi_df.Rd b/man/as_epi_df.Rd index 5d1b1335..b5df1302 100644 --- a/man/as_epi_df.Rd +++ b/man/as_epi_df.Rd @@ -51,9 +51,9 @@ examples. } \section{Methods (by class)}{ \itemize{ -\item \code{epi_df}: Simply returns the \code{epi_df} object unchanged. +\item \code{as_epi_df(epi_df)}: Simply returns the \code{epi_df} object unchanged. -\item \code{tbl_df}: The input tibble \code{x} must contain the columns +\item \code{as_epi_df(tbl_df)}: The input tibble \code{x} must contain the columns \code{geo_value} and \code{time_value}. All other columns will be preserved as is, and treated as measured variables. If \code{as_of} is missing, then the function will try to guess it from an \code{as_of}, \code{issue}, or \code{version} column of \code{x} @@ -61,14 +61,14 @@ will try to guess it from an \code{as_of}, \code{issue}, or \code{version} colum (stored in its attributes); if this fails, then the current day-time will be used. -\item \code{data.frame}: Works analogously to \code{as_epi_df.tbl_df()}. +\item \code{as_epi_df(data.frame)}: Works analogously to \code{as_epi_df.tbl_df()}. -\item \code{tbl_ts}: Works analogously to \code{as_epi_df.tbl_df()}, except that +\item \code{as_epi_df(tbl_ts)}: Works analogously to \code{as_epi_df.tbl_df()}, except that the \code{tbl_ts} class is dropped, and any key variables (other than "geo_value") are added to the metadata of the returned object, under the \code{other_keys} field. -}} +}} \examples{ # Convert a `tsibble` that has county code as an extra key # Notice that county code should be a character string to preserve any leading zeroes From ee109639a3c5d3098a17c6a8036c5afe9f14d5ff Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 29 Jul 2022 13:08:42 -0700 Subject: [PATCH 31/60] Seems broken beyond repair. --- R/slide.R | 4 ++-- man/epi_slide.Rd | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/slide.R b/R/slide.R index 146e934d..b49dd878 100644 --- a/R/slide.R +++ b/R/slide.R @@ -66,11 +66,11 @@ #' If `f` is missing, then an expression for tidy evaluation can be specified, #' for example, as in: #' ``` -#' epi_slide(x, cases_7dav = mean(cases), before = 7) +#' epi_slide(x, cases_7dav = mean(cases), before = 6) #' ``` #' which would be equivalent to: #' ``` -#' epi_slide(x, function(x, ...) mean(x$cases), before = 7, +#' epi_slide(x, function(x, ...) mean(x$cases), before = 6, #' new_col_name = "cases_7dav") #' ``` #' Thus, to be clear, when the computation is specified via an expression for diff --git a/man/epi_slide.Rd b/man/epi_slide.Rd index 6631b374..236fa30f 100644 --- a/man/epi_slide.Rd +++ b/man/epi_slide.Rd @@ -97,12 +97,12 @@ specified function or formula \code{f}, or through post-processing. If \code{f} is missing, then an expression for tidy evaluation can be specified, for example, as in: -\if{html}{\out{
}}\preformatted{epi_slide(x, cases_7dav = mean(cases), before = 7) +\if{html}{\out{
}}\preformatted{epi_slide(x, cases_7dav = mean(cases), before = 6) }\if{html}{\out{
}} which would be equivalent to: -\if{html}{\out{
}}\preformatted{epi_slide(x, function(x, ...) mean(x$cases), before = 7, +\if{html}{\out{
}}\preformatted{epi_slide(x, function(x, ...) mean(x$cases), before = 6, new_col_name = "cases_7dav") }\if{html}{\out{
}} From 05d84ca8d73e41fe661aefa9a196ecfaaf87ef93 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 29 Jul 2022 13:22:35 -0700 Subject: [PATCH 32/60] Fixed tests. --- tests/testthat/test-epi_slide.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/testthat/test-epi_slide.R b/tests/testthat/test-epi_slide.R index 82104fe7..3589012d 100644 --- a/tests/testthat/test-epi_slide.R +++ b/tests/testthat/test-epi_slide.R @@ -10,16 +10,16 @@ f = function(x, ...) dplyr::tibble(value=mean(x$value), count=length(x$value)) ## --- These cases generate the error: --- test_that("`ref_time_values` + `align` that result in no slide data, generate the error", { - expect_error(edf %>% group_by(geo_value) %>% epi_slide(f, before=3L, ref_time_values=as.Date("2020-01-01")), + expect_error(edf %>% group_by(geo_value) %>% epi_slide(f, before=2L, ref_time_values=as.Date("2020-01-01")), "starting and/or stopping times for sliding are out of bounds") # before the first, no data in the slide windows - expect_error(edf %>% group_by(geo_value) %>% epi_slide(f, before=3L, ref_time_values=as.Date("2020-01-01")+207L), + expect_error(edf %>% group_by(geo_value) %>% epi_slide(f, before=2L, ref_time_values=as.Date("2020-01-01")+207L), "starting and/or stopping times for sliding are out of bounds") # beyond the last, no data in window }) test_that("`ref_time_values` + `align` that have some slide data, but generate the error due to ref. time being out of time range", { - expect_error(edf %>% group_by(geo_value) %>% epi_slide(f, after=3L, ref_time_values=as.Date("2020-01-01")), + expect_error(edf %>% group_by(geo_value) %>% epi_slide(f, after=2L, ref_time_values=as.Date("2020-01-01")), "starting and/or stopping times for sliding are out of bounds") # before the first, but we'd expect there to be data in the window - expect_error(edf %>% group_by(geo_value) %>% epi_slide(f, before=3L, ref_time_values=as.Date("2020-01-01")+201L), + expect_error(edf %>% group_by(geo_value) %>% epi_slide(f, before=2L, ref_time_values=as.Date("2020-01-01")+201L), "starting and/or stopping times for sliding are out of bounds") # beyond the last, but still with data in window }) @@ -28,7 +28,7 @@ test_that("these doesn't produce an error; the error appears only if the ref tim expect_identical(edf %>% group_by(geo_value) %>% epi_slide(f, before=2L, ref_time_values=as.Date("2020-01-01")+200L) %>% dplyr::select("geo_value","slide_value_value"), dplyr::tibble(geo_value = "ak", slide_value_value = 199)) # out of range for one group - expect_identical(edf %>% group_by(geo_value) %>% epi_slide(f, before=3L, ref_time_values=as.Date("2020-01-04")) %>% + expect_identical(edf %>% group_by(geo_value) %>% epi_slide(f, before=2L, ref_time_values=as.Date("2020-01-04")) %>% dplyr::select("geo_value","slide_value_value"), dplyr::tibble(geo_value = c("ak", "al"), slide_value_value = c(2, -2))) # not out of range for either group }) \ No newline at end of file From 846b6cac6d248f5400a69a0d31ee2507994aa4e1 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 29 Jul 2022 13:45:08 -0700 Subject: [PATCH 33/60] Fixed improper use of n. --- R/outliers.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/outliers.R b/R/outliers.R index 6cc2ffb1..9a759532 100644 --- a/R/outliers.R +++ b/R/outliers.R @@ -179,7 +179,7 @@ detect_outlr_rm = function(x = seq_along(y), y, n = 21, # Calculate lower and upper thresholds and replacement value z = z %>% - epi_slide(fitted = median(y), n = n, align = "center") %>% + epi_slide(fitted = median(y), before = floor((n-1)/2), after = ceiling((n-1)/2)) %>% dplyr::mutate(resid = y - fitted) %>% roll_iqr(n = n, detection_multiplier = detection_multiplier, @@ -332,7 +332,7 @@ roll_iqr = function(z, n, detection_multiplier, min_radius, if (typeof(z$y) == "integer") as_type = as.integer else as_type = as.numeric - epi_slide(z, roll_iqr = stats::IQR(resid), n = n, align = "center") %>% + epi_slide(z, roll_iqr = stats::IQR(resid), before = floor((n-1)/2), after = ceiling((n-1)/2), align = "center") %>% dplyr::mutate( lower = pmax(min_lower, fitted - pmax(min_radius, detection_multiplier * roll_iqr)), From d55e6b854c8a923e92aed6a04b7c671d25d9f01d Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 29 Jul 2022 14:48:34 -0700 Subject: [PATCH 34/60] This finally runs without errors. --- R/outliers.R | 2 +- vignettes/advanced.Rmd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/outliers.R b/R/outliers.R index 9a759532..d4eb40ee 100644 --- a/R/outliers.R +++ b/R/outliers.R @@ -332,7 +332,7 @@ roll_iqr = function(z, n, detection_multiplier, min_radius, if (typeof(z$y) == "integer") as_type = as.integer else as_type = as.numeric - epi_slide(z, roll_iqr = stats::IQR(resid), before = floor((n-1)/2), after = ceiling((n-1)/2), align = "center") %>% + epi_slide(z, roll_iqr = stats::IQR(resid), before = floor((n-1)/2), after = ceiling((n-1)/2)) %>% dplyr::mutate( lower = pmax(min_lower, fitted - pmax(min_radius, detection_multiplier * roll_iqr)), diff --git a/vignettes/advanced.Rmd b/vignettes/advanced.Rmd index 237dd3ae..b9373048 100644 --- a/vignettes/advanced.Rmd +++ b/vignettes/advanced.Rmd @@ -362,7 +362,7 @@ k_week_ahead <- function(x, ahead = 7, as_of = TRUE) { x_latest %>% epi_slide(fc = prob_arx(percent_cli, case_rate_7d_av, geo_value, time_value, args = prob_arx_args(ahead = ahead)), - n = 120, ref_time_values = fc_time_values) %>% + before = 119, ref_time_values = fc_time_values) %>% mutate(target_date = time_value + ahead, as_of = FALSE) } } From 1158c8a791bff406247207f960131f049f5726d0 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 29 Jul 2022 14:53:25 -0700 Subject: [PATCH 35/60] Note that epix_slide still hasn't been updated, and some epi_slide documentation still neneds to be modified. --- vignettes/aggregation.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/aggregation.Rmd b/vignettes/aggregation.Rmd index 7abdb23b..b572b51c 100644 --- a/vignettes/aggregation.Rmd +++ b/vignettes/aggregation.Rmd @@ -177,7 +177,7 @@ Explicit imputation for missingness (zero-filling in our case) can be important for protecting against bugs in all sorts of downstream tasks. For example, even something as simple as a 7-day trailing average is complicated by missingness. The function `epi_slide()` looks for all rows within a window of 7 days anchored -on the right at the reference time point (when `n = 7` and `align = "right"`). +on the right at the reference time point (when `after = 6`). But when some days in a given week are missing because they were censored because they had small case counts, taking an average of the observed case counts can be misleading and is unintentionally biased upwards. Meanwhile, From bbf5d6b8315a9a35ce1be5c312ea910e75b59ad3 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 5 Aug 2022 15:34:17 -0700 Subject: [PATCH 36/60] Need to ensure tests pass. --- R/slide.R | 15 ++++++++++++++- man/epi_slide.Rd | 2 +- tests/testthat/test-epi_slide.R | 10 ++++++++++ 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/R/slide.R b/R/slide.R index b49dd878..b1724785 100644 --- a/R/slide.R +++ b/R/slide.R @@ -102,10 +102,11 @@ #' epi_slide(a = data.frame(cases_2dav = mean(cases), #' cases_2dma = mad(cases)), #' before = 1, as_list_col = TRUE) -epi_slide = function(x, f, ..., before = 0, after = 0, ref_time_values, +epi_slide = function(x, f, ..., before, after = 0, ref_time_values, time_step, new_col_name = "slide_value", as_list_col = FALSE, names_sep = "_", all_rows = FALSE) { + # Check we have an `epi_df` object if (!inherits(x, "epi_df")) Abort("`x` must be of class `epi_df`.") @@ -122,6 +123,18 @@ epi_slide = function(x, f, ..., before = 0, after = 0, ref_time_values, unique(x$time_value)] } + # Before cannot be missing if after is set to 0. If after is set to a nonzero + # number, then before must be set to 0 + if (missing(before)) { + if (after == 0) { + Abort("`before` cannot be missing when `after` is set to 0.") + } else { + Warn("`before` missing but `after` nonzero; + `before` has been set to 0.") + before = 0 + } + } + # Otherwise set up alignment based on passed before value if (before < 0 || after < 0) { Abort("`before` and `after` must be at least 0.") diff --git a/man/epi_slide.Rd b/man/epi_slide.Rd index 236fa30f..e913b53b 100644 --- a/man/epi_slide.Rd +++ b/man/epi_slide.Rd @@ -8,7 +8,7 @@ epi_slide( x, f, ..., - before = 0, + before, after = 0, ref_time_values, time_step, diff --git a/tests/testthat/test-epi_slide.R b/tests/testthat/test-epi_slide.R index 3589012d..4c6d5d23 100644 --- a/tests/testthat/test-epi_slide.R +++ b/tests/testthat/test-epi_slide.R @@ -9,6 +9,16 @@ edf = dplyr::bind_rows( f = function(x, ...) dplyr::tibble(value=mean(x$value), count=length(x$value)) ## --- These cases generate the error: --- +test_that("`after` must be defined as a non-zero integer if `before` is missing", { + expect_error(edf %>% group_by(geo_value) %>% epi_slide(f, after = 0L, ref_time_values=as.Date("2020-01-01")), + "`before` cannot be missing when `after` is set to 0.") +}) + +test_that({ + expect_warning(edf %>% group_by(geo_value) %>% epi_slide(f, after = 1L, ref_time_values=as.Date("2020-01-01")+1L), + "`before` missing but `after` nonzero; `before` has been set to 0.") +}) + test_that("`ref_time_values` + `align` that result in no slide data, generate the error", { expect_error(edf %>% group_by(geo_value) %>% epi_slide(f, before=2L, ref_time_values=as.Date("2020-01-01")), "starting and/or stopping times for sliding are out of bounds") # before the first, no data in the slide windows From b55d411d510dea72d6085397dbc4ec6982330fd9 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 5 Aug 2022 15:43:42 -0700 Subject: [PATCH 37/60] This shouldn't be here. --- test-correlation.R | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 test-correlation.R diff --git a/test-correlation.R b/test-correlation.R new file mode 100644 index 00000000..e69de29b From b22ace377c8f311f060a8da36515239c4f2478ed Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 5 Aug 2022 17:45:55 -0700 Subject: [PATCH 38/60] Removed repetitive code and added more tests. --- tests/testthat/test-epi_slide.R | 36 +++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/tests/testthat/test-epi_slide.R b/tests/testthat/test-epi_slide.R index 4c6d5d23..47375379 100644 --- a/tests/testthat/test-epi_slide.R +++ b/tests/testthat/test-epi_slide.R @@ -4,41 +4,55 @@ edf = dplyr::bind_rows( dplyr::tibble(geo_value = "ak", time_value = as.Date("2020-01-01") + 1:200, value=1:200), dplyr::tibble(geo_value = "al", time_value=as.Date("2020-01-01") + 1:5, value=-(1:5)) ) %>% - as_epi_df() + as_epi_df() %>% + group_by(geo_value) f = function(x, ...) dplyr::tibble(value=mean(x$value), count=length(x$value)) ## --- These cases generate the error: --- test_that("`after` must be defined as a non-zero integer if `before` is missing", { - expect_error(edf %>% group_by(geo_value) %>% epi_slide(f, after = 0L, ref_time_values=as.Date("2020-01-01")), + expect_error(epi_slide(edf, f, after = 0L, ref_time_values=as.Date("2020-01-01")), "`before` cannot be missing when `after` is set to 0.") }) -test_that({ - expect_warning(edf %>% group_by(geo_value) %>% epi_slide(f, after = 1L, ref_time_values=as.Date("2020-01-01")+1L), - "`before` missing but `after` nonzero; `before` has been set to 0.") +test_that("Warn user against having a blank `before`",{ + expect_warning(epi_slide(edf, f, after = 1L, ref_time_values=as.Date("2020-01-01")+1L), + regexp="`before` missing but `after` nonzero; `before` has been set to 0.") +}) + +test_that("Both `before` and `after` must be nonnegative integers",{ + expect_error(epi_slide(edf, f, before = -1L, ref_time_values=as.Date("2020-01-01")+2L), + "`before` and `after` must be at least 0.") + expect_error(epi_slide(edf, f, before = 2L, after = -1L, ref_time_values=as.Date("2020-01-01")+2L), + "`before` and `after` must be at least 0.") + expect_error(epi_slide(edf, f, before = 0.5, ref_time_values=as.Date("2020-01-01")+2L), + "`before` and `after` must be integers.") + expect_error(epi_slide(edf, f, before = 1, after = 0.5, ref_time_values=as.Date("2020-01-01")+2L), + "`before` and `after` must be integers.") + # The before and after values can be numerics that are integerish + expect_error(epi_slide(edf, f, before = 1, after = 1, ref_time_values=as.Date("2020-01-01")+2L),NA) }) test_that("`ref_time_values` + `align` that result in no slide data, generate the error", { - expect_error(edf %>% group_by(geo_value) %>% epi_slide(f, before=2L, ref_time_values=as.Date("2020-01-01")), + expect_error(epi_slide(edf, f, before=2L, ref_time_values=as.Date("2020-01-01")), "starting and/or stopping times for sliding are out of bounds") # before the first, no data in the slide windows - expect_error(edf %>% group_by(geo_value) %>% epi_slide(f, before=2L, ref_time_values=as.Date("2020-01-01")+207L), + expect_error(epi_slide(edf, f, before=2L, ref_time_values=as.Date("2020-01-01")+207L), "starting and/or stopping times for sliding are out of bounds") # beyond the last, no data in window }) test_that("`ref_time_values` + `align` that have some slide data, but generate the error due to ref. time being out of time range", { - expect_error(edf %>% group_by(geo_value) %>% epi_slide(f, after=2L, ref_time_values=as.Date("2020-01-01")), + expect_error(epi_slide(edf, f, before=0L, after=2L, ref_time_values=as.Date("2020-01-01")), "starting and/or stopping times for sliding are out of bounds") # before the first, but we'd expect there to be data in the window - expect_error(edf %>% group_by(geo_value) %>% epi_slide(f, before=2L, ref_time_values=as.Date("2020-01-01")+201L), + expect_error(epi_slide(edf, f, before=2L, ref_time_values=as.Date("2020-01-01")+201L), "starting and/or stopping times for sliding are out of bounds") # beyond the last, but still with data in window }) ## --- These cases doesn't generate the error: --- test_that("these doesn't produce an error; the error appears only if the ref time values are out of the range for every group", { - expect_identical(edf %>% group_by(geo_value) %>% epi_slide(f, before=2L, ref_time_values=as.Date("2020-01-01")+200L) %>% + expect_identical(epi_slide(edf, f, before=2L, ref_time_values=as.Date("2020-01-01")+200L) %>% dplyr::select("geo_value","slide_value_value"), dplyr::tibble(geo_value = "ak", slide_value_value = 199)) # out of range for one group - expect_identical(edf %>% group_by(geo_value) %>% epi_slide(f, before=2L, ref_time_values=as.Date("2020-01-04")) %>% + expect_identical(epi_slide(edf, f, before=2L, ref_time_values=as.Date("2020-01-04")) %>% dplyr::select("geo_value","slide_value_value"), dplyr::tibble(geo_value = c("ak", "al"), slide_value_value = c(2, -2))) # not out of range for either group }) \ No newline at end of file From 1038e1565a31ad86b798034c86b9cf06a03a0c4f Mon Sep 17 00:00:00 2001 From: kenmawer Date: Tue, 9 Aug 2022 09:51:08 -0700 Subject: [PATCH 39/60] Ran document after updating to epidatr. --- man/as_epi_archive.Rd | 12 ++++++++---- man/as_epi_df.Rd | 10 +++++----- man/epi_archive.Rd | 42 +++++++++++++++++++++--------------------- man/epi_slide.Rd | 2 +- man/epix_as_of.Rd | 12 ++++++++---- man/epix_slide.Rd | 12 ++++++++---- 6 files changed, 51 insertions(+), 39 deletions(-) diff --git a/man/as_epi_archive.Rd b/man/as_epi_archive.Rd index d13ba4d6..a98798cc 100644 --- a/man/as_epi_archive.Rd +++ b/man/as_epi_archive.Rd @@ -91,11 +91,15 @@ examples. } \details{ This simply a wrapper around the \code{new()} method of the \code{epi_archive} -class, so for example:\preformatted{x <- as_epi_archive(df, geo_type = "state", time_type = "day") -} +class, so for example: -would be equivalent to:\preformatted{x <- epi_archive$new(df, geo_type = "state", time_type = "day") -} +\if{html}{\out{
}}\preformatted{x <- as_epi_archive(df, geo_type = "state", time_type = "day") +}\if{html}{\out{
}} + +would be equivalent to: + +\if{html}{\out{
}}\preformatted{x <- epi_archive$new(df, geo_type = "state", time_type = "day") +}\if{html}{\out{
}} } \examples{ # Simple ex. with necessary keys diff --git a/man/as_epi_df.Rd b/man/as_epi_df.Rd index 6d7592e4..851aed7e 100644 --- a/man/as_epi_df.Rd +++ b/man/as_epi_df.Rd @@ -52,9 +52,9 @@ examples. } \section{Methods (by class)}{ \itemize{ -\item \code{epi_df}: Simply returns the \code{epi_df} object unchanged. +\item \code{as_epi_df(epi_df)}: Simply returns the \code{epi_df} object unchanged. -\item \code{tbl_df}: The input tibble \code{x} must contain the columns +\item \code{as_epi_df(tbl_df)}: The input tibble \code{x} must contain the columns \code{geo_value} and \code{time_value}. All other columns will be preserved as is, and treated as measured variables. If \code{as_of} is missing, then the function will try to guess it from an \code{as_of}, \code{issue}, or \code{version} column of \code{x} @@ -62,14 +62,14 @@ will try to guess it from an \code{as_of}, \code{issue}, or \code{version} colum (stored in its attributes); if this fails, then the current day-time will be used. -\item \code{data.frame}: Works analogously to \code{as_epi_df.tbl_df()}. +\item \code{as_epi_df(data.frame)}: Works analogously to \code{as_epi_df.tbl_df()}. -\item \code{tbl_ts}: Works analogously to \code{as_epi_df.tbl_df()}, except that +\item \code{as_epi_df(tbl_ts)}: Works analogously to \code{as_epi_df.tbl_df()}, except that the \code{tbl_ts} class is dropped, and any key variables (other than "geo_value") are added to the metadata of the returned object, under the \code{other_keys} field. -}} +}} \examples{ # Convert a `tsibble` that has county code as an extra key # Notice that county code should be a character string to preserve any leading zeroes diff --git a/man/epi_archive.Rd b/man/epi_archive.Rd index 0b198eab..026f27e1 100644 --- a/man/epi_archive.Rd +++ b/man/epi_archive.Rd @@ -114,18 +114,18 @@ toy_epi_archive \section{Methods}{ \subsection{Public methods}{ \itemize{ -\item \href{#method-new}{\code{epi_archive$new()}} -\item \href{#method-print}{\code{epi_archive$print()}} -\item \href{#method-as_of}{\code{epi_archive$as_of()}} -\item \href{#method-fill_through_version}{\code{epi_archive$fill_through_version()}} -\item \href{#method-merge}{\code{epi_archive$merge()}} -\item \href{#method-slide}{\code{epi_archive$slide()}} -\item \href{#method-clone}{\code{epi_archive$clone()}} +\item \href{#method-epi_archive-new}{\code{epi_archive$new()}} +\item \href{#method-epi_archive-print}{\code{epi_archive$print()}} +\item \href{#method-epi_archive-as_of}{\code{epi_archive$as_of()}} +\item \href{#method-epi_archive-fill_through_version}{\code{epi_archive$fill_through_version()}} +\item \href{#method-epi_archive-merge}{\code{epi_archive$merge()}} +\item \href{#method-epi_archive-slide}{\code{epi_archive$slide()}} +\item \href{#method-epi_archive-clone}{\code{epi_archive$clone()}} } } \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-new}{}}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-epi_archive-new}{}}} \subsection{Method \code{new()}}{ Creates a new \code{epi_archive} object. \subsection{Usage}{ @@ -195,8 +195,8 @@ An \code{epi_archive} object. } } \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-print}{}}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-epi_archive-print}{}}} \subsection{Method \code{print()}}{ \subsection{Usage}{ \if{html}{\out{
}}\preformatted{epi_archive$print()}\if{html}{\out{
}} @@ -204,8 +204,8 @@ An \code{epi_archive} object. } \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-as_of}{}}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-epi_archive-as_of}{}}} \subsection{Method \code{as_of()}}{ Generates a snapshot in \code{epi_df} format as of a given version. See the documentation for the wrapper function \code{\link[=epix_as_of]{epix_as_of()}} for details. @@ -215,8 +215,8 @@ See the documentation for the wrapper function \code{\link[=epix_as_of]{epix_as_ } \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-fill_through_version}{}}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-epi_archive-fill_through_version}{}}} \subsection{Method \code{fill_through_version()}}{ Fill in unobserved history using requested scheme by mutating \code{self} and potentially reseating its fields. See @@ -237,8 +237,8 @@ version, which doesn't mutate the input archive but might alias its fields. } } \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-merge}{}}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-epi_archive-merge}{}}} \subsection{Method \code{merge()}}{ Merges another \code{epi_archive} with the current one, mutating the current one by reseating its \code{DT} and several other fields, but avoiding @@ -267,8 +267,8 @@ does not alias either archive's \code{DT}. } } \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-slide}{}}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-epi_archive-slide}{}}} \subsection{Method \code{slide()}}{ Slides a given function over variables in an \code{epi_archive} object. See the documentation for the wrapper function \code{\link[=epix_slide]{epix_slide()}} for @@ -290,8 +290,8 @@ details. } \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-clone}{}}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-epi_archive-clone}{}}} \subsection{Method \code{clone()}}{ The objects of this class are cloneable with this method. \subsection{Usage}{ diff --git a/man/epi_slide.Rd b/man/epi_slide.Rd index dc35b005..e913b53b 100644 --- a/man/epi_slide.Rd +++ b/man/epi_slide.Rd @@ -104,7 +104,7 @@ which would be equivalent to: \if{html}{\out{
}}\preformatted{epi_slide(x, function(x, ...) mean(x$cases), before = 6, new_col_name = "cases_7dav") -} +}\if{html}{\out{
}} Thus, to be clear, when the computation is specified via an expression for tidy evaluation (first example, above), then the name for the new column is diff --git a/man/epix_as_of.Rd b/man/epix_as_of.Rd index 6dc72a44..4053cd28 100644 --- a/man/epix_as_of.Rd +++ b/man/epix_as_of.Rd @@ -29,11 +29,15 @@ examples. } \details{ This is simply a wrapper around the \code{as_of()} method of the -\code{epi_archive} class, so if \code{x} is an \code{epi_archive} object, then:\preformatted{epix_as_of(x, max_version = v) -} +\code{epi_archive} class, so if \code{x} is an \code{epi_archive} object, then: -is equivalent to:\preformatted{x$as_of(max_version = v) -} +\if{html}{\out{
}}\preformatted{epix_as_of(x, max_version = v) +}\if{html}{\out{
}} + +is equivalent to: + +\if{html}{\out{
}}\preformatted{x$as_of(max_version = v) +}\if{html}{\out{
}} } \examples{ # warning message of data latency shown diff --git a/man/epix_slide.Rd b/man/epix_slide.Rd index 2acae1a1..79e9c1c3 100644 --- a/man/epix_slide.Rd +++ b/man/epix_slide.Rd @@ -115,11 +115,15 @@ should never be used in place of \code{epi_slide()}, and only used when version-aware sliding is necessary (as it its purpose). Finally, this is simply a wrapper around the \code{slide()} method of the -\code{epi_archive} class, so if \code{x} is an \code{epi_archive} object, then:\preformatted{epix_slide(x, new_var = comp(old_var), n = 120) -} +\code{epi_archive} class, so if \code{x} is an \code{epi_archive} object, then: -is equivalent to:\preformatted{x$slide(x, new_var = comp(old_var), n = 120) -} +\if{html}{\out{
}}\preformatted{epix_slide(x, new_var = comp(old_var), n = 120) +}\if{html}{\out{
}} + +is equivalent to: + +\if{html}{\out{
}}\preformatted{x$slide(x, new_var = comp(old_var), n = 120) +}\if{html}{\out{
}} } \examples{ # these dates are reference time points for the 3 day average sliding window From 6e2b207455c8eb4f76f9bc9b777b3662f03094e4 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Tue, 9 Aug 2022 10:19:31 -0700 Subject: [PATCH 40/60] Addressed first two comments. --- R/slide.R | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/R/slide.R b/R/slide.R index b1724785..91edd0e7 100644 --- a/R/slide.R +++ b/R/slide.R @@ -6,7 +6,8 @@ #' #' @param x The `epi_df` object under consideration. #' @param f Function or formula to slide over variables in `x`. To "slide" means -#' to apply a function or formula over a running window of `n` time steps +#' to apply a function or formula over a running window of `before` +#' and `after` time steps #' (where one time step is typically one day or one week; see details for more #' explanation). If a function, `f` should take `x`, an `epi_df` with the same #' names as the non-grouping columns, followed by `g` to refer to the one row @@ -19,10 +20,12 @@ #' @param ... Additional arguments to pass to the function or formula specified #' via `f`. Alternatively, if `f` is missing, then the current argument is #' interpreted as an expression for tidy evaluation. See details. -#' @param before A nonnegative integer specifying the number of days before to -#' extract data from. Set to 0 for a "left" alignment in slide. -#' @param after A nonnegative integer specifying the number of days after to -#' extract data from. Set to 0 for a "right" alignment in slide. +#' @param before A nonnegative integer specifying the number of time steps +#' before each of the `ref_time_values` to extract data from. +#' Set to 0 for a "left" alignment in slide. +#' @param after A nonnegative integer specifying the number of time steps after +#' each of the `ref_time_values` to extract data from. Set to 0 for a "right" +#' alignment in slide. #' @param ref_time_values Time values for sliding computations, meaning, each #' element of this vector serves as the reference time point for one sliding #' window. If missing, then this will be set to all unique time values in the From 77b5bb92b03c47ecf9928eda7df309b30fb07407 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Tue, 9 Aug 2022 10:31:31 -0700 Subject: [PATCH 41/60] Replaced `n` in details. --- R/slide.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/R/slide.R b/R/slide.R index 91edd0e7..ce1463c3 100644 --- a/R/slide.R +++ b/R/slide.R @@ -53,14 +53,15 @@ #' according to the `new_col_name` argument. #' #' @details To "slide" means to apply a function or formula over a running -#' window of `n` time steps, where the unit (the meaning of one time step) is +#' window of `before` time steps before and `after` time steps after, +#' where the unit (the meaning of one time step) is #' implicitly defined by the way the `time_value` column treats addition and #' subtraction; for example, if the time values are coded as `Date` objects, #' then one time step is one day, since `as.Date("2022-01-01") + 1` equals #' `as.Date("2022-01-02")`. Alternatively, the time step can be set explicitly #' using the `time_step` argument (which if specified would override the -#' default choice based on `time_value` column). If less than `n` time steps -#' are available at any given reference time value, then `epi_slide()` still +#' default choice based on `time_value` column). If certain time steps +#' are unavailable at any given reference time value, then `epi_slide()` still #' attempts to perform the computation anyway (it does not require a complete #' window). The issue of what to do with partial computations (those run on #' incomplete windows) is therefore left up to the user, either through the From db99a67002b098046f72a790f120724de2b5a6b7 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Tue, 9 Aug 2022 15:27:39 -0700 Subject: [PATCH 42/60] Updated some poorly typed documentation and an imporperly refactored slide. --- R/slide.R | 16 ++++++++++------ man/epi_slide.Rd | 27 ++++++++++++++++++--------- tests/testthat/test-epi_slide.R | 2 +- 3 files changed, 29 insertions(+), 16 deletions(-) diff --git a/R/slide.R b/R/slide.R index ce1463c3..49a0dcf6 100644 --- a/R/slide.R +++ b/R/slide.R @@ -22,10 +22,15 @@ #' interpreted as an expression for tidy evaluation. See details. #' @param before A nonnegative integer specifying the number of time steps #' before each of the `ref_time_values` to extract data from. -#' Set to 0 for a "left" alignment in slide. +#' Set to 0 for a "left" alignment for the sliding window, meaning that no +#' `time_value` after the slide will be used for the sliding calculation. +#' It is mandatory to specify a `before` value, unless `after` is specified +#' as a non-zero value. In this case, `before` will be assumed to be 0. +#' However, this usage is discouraged and will thus produce a warning. #' @param after A nonnegative integer specifying the number of time steps after -#' each of the `ref_time_values` to extract data from. Set to 0 for a "right" -#' alignment in slide. +#' each of the `ref_time_values` to extract data from. +#' Set to 0 for a "right" alignment for the sliding window, meaning that no +#' `time_value` before the slide will be used for the sliding calculation. #' @param ref_time_values Time values for sliding computations, meaning, each #' element of this vector serves as the reference time point for one sliding #' window. If missing, then this will be set to all unique time values in the @@ -96,7 +101,7 @@ #' # slide a left-aligned 7-day average #' jhu_csse_daily_subset %>% #' group_by(geo_value) %>% -#' epi_slide(cases_7dav = mean(cases), before = 6) %>% +#' epi_slide(cases_7dav = mean(cases), before = 0, after = 6) %>% #' # rmv a nonessential var. to ensure new col is printed #' dplyr::select(-death_rate_7d_av) #' @@ -133,8 +138,7 @@ epi_slide = function(x, f, ..., before, after = 0, ref_time_values, if (after == 0) { Abort("`before` cannot be missing when `after` is set to 0.") } else { - Warn("`before` missing but `after` nonzero; - `before` has been set to 0.") + Warn("`before` is missing, but `after` is nonzero. `before` has been set to 0.") before = 0 } } diff --git a/man/epi_slide.Rd b/man/epi_slide.Rd index e913b53b..98c4aec4 100644 --- a/man/epi_slide.Rd +++ b/man/epi_slide.Rd @@ -22,7 +22,8 @@ epi_slide( \item{x}{The \code{epi_df} object under consideration.} \item{f}{Function or formula to slide over variables in \code{x}. To "slide" means -to apply a function or formula over a running window of \code{n} time steps +to apply a function or formula over a running window of \code{before} +and \code{after} time steps (where one time step is typically one day or one week; see details for more explanation). If a function, \code{f} should take \code{x}, an \code{epi_df} with the same names as the non-grouping columns, followed by \code{g} to refer to the one row @@ -37,11 +38,18 @@ to the groupings that would be described by \code{g} if \code{f} was a function. via \code{f}. Alternatively, if \code{f} is missing, then the current argument is interpreted as an expression for tidy evaluation. See details.} -\item{before}{A nonnegative integer specifying the number of days before to -extract data from. Set to 0 for a "left" alignment in slide.} +\item{before}{A nonnegative integer specifying the number of time steps +before each of the \code{ref_time_values} to extract data from. +Set to 0 for a "left" alignment for the sliding window, meaning that no +\code{time_value} after the slide will be used for the sliding calculation. +It is mandatory to specify a \code{before} value, unless \code{after} is specified +as a non-zero value. In this case, \code{before} will be assumed to be 0. +However, this usage is discouraged and will thus produce a warning.} -\item{after}{A nonnegative integer specifying the number of days after to -extract data from. Set to 0 for a "right" alignment in slide.} +\item{after}{A nonnegative integer specifying the number of time steps after +each of the \code{ref_time_values} to extract data from. +Set to 0 for a "right" alignment for the sliding window, meaning that no +\code{time_value} before the slide will be used for the sliding calculation.} \item{ref_time_values}{Time values for sliding computations, meaning, each element of this vector serves as the reference time point for one sliding @@ -81,14 +89,15 @@ examples. } \details{ To "slide" means to apply a function or formula over a running -window of \code{n} time steps, where the unit (the meaning of one time step) is +window of \code{before} time steps before and \code{after} time steps after, +where the unit (the meaning of one time step) is implicitly defined by the way the \code{time_value} column treats addition and subtraction; for example, if the time values are coded as \code{Date} objects, then one time step is one day, since \code{as.Date("2022-01-01") + 1} equals \code{as.Date("2022-01-02")}. Alternatively, the time step can be set explicitly using the \code{time_step} argument (which if specified would override the -default choice based on \code{time_value} column). If less than \code{n} time steps -are available at any given reference time value, then \code{epi_slide()} still +default choice based on \code{time_value} column). If certain time steps +are unavailable at any given reference time value, then \code{epi_slide()} still attempts to perform the computation anyway (it does not require a complete window). The issue of what to do with partial computations (those run on incomplete windows) is therefore left up to the user, either through the @@ -122,7 +131,7 @@ through the \code{new_col_name} argument. # slide a left-aligned 7-day average jhu_csse_daily_subset \%>\% group_by(geo_value) \%>\% - epi_slide(cases_7dav = mean(cases), before = 6) \%>\% + epi_slide(cases_7dav = mean(cases), before = 0, after = 6) \%>\% # rmv a nonessential var. to ensure new col is printed dplyr::select(-death_rate_7d_av) diff --git a/tests/testthat/test-epi_slide.R b/tests/testthat/test-epi_slide.R index 47375379..6f550d32 100644 --- a/tests/testthat/test-epi_slide.R +++ b/tests/testthat/test-epi_slide.R @@ -17,7 +17,7 @@ test_that("`after` must be defined as a non-zero integer if `before` is missing" test_that("Warn user against having a blank `before`",{ expect_warning(epi_slide(edf, f, after = 1L, ref_time_values=as.Date("2020-01-01")+1L), - regexp="`before` missing but `after` nonzero; `before` has been set to 0.") + regexp="`before` is missing, but `after` is nonzero. `before` has been set to\n0.") }) test_that("Both `before` and `after` must be nonnegative integers",{ From 0456aff8098ae9d8023b470e6d1538fc3cc43b82 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Tue, 9 Aug 2022 18:03:31 -0700 Subject: [PATCH 43/60] Cleared unclear documentation and removed redundancy with slide's code. --- R/slide.R | 30 +++++++++++++++++++----------- man/epi_slide.Rd | 19 +++++++++++++++---- tests/testthat/test-epi_slide.R | 2 +- 3 files changed, 35 insertions(+), 16 deletions(-) diff --git a/R/slide.R b/R/slide.R index 49a0dcf6..d6f4acca 100644 --- a/R/slide.R +++ b/R/slide.R @@ -22,15 +22,19 @@ #' interpreted as an expression for tidy evaluation. See details. #' @param before A nonnegative integer specifying the number of time steps #' before each of the `ref_time_values` to extract data from. -#' Set to 0 for a "left" alignment for the sliding window, meaning that no +#' Set to 0 for a "left" (trailing) alignment for the sliding window, meaning +#' that no #' `time_value` after the slide will be used for the sliding calculation. #' It is mandatory to specify a `before` value, unless `after` is specified #' as a non-zero value. In this case, `before` will be assumed to be 0. #' However, this usage is discouraged and will thus produce a warning. #' @param after A nonnegative integer specifying the number of time steps after #' each of the `ref_time_values` to extract data from. -#' Set to 0 for a "right" alignment for the sliding window, meaning that no +#' Set to 0 for a "right" (leading) alignment for the sliding window, meaning +#' that no #' `time_value` before the slide will be used for the sliding calculation. +#' To specify this to be centrally aligned, set `before` and `after` to be +#' the same. #' @param ref_time_values Time values for sliding computations, meaning, each #' element of this vector serves as the reference time point for one sliding #' window. If missing, then this will be set to all unique time values in the @@ -98,11 +102,18 @@ #' # rmv a nonessential var. to ensure new col is printed #' dplyr::select(-death_rate_7d_av) #' -#' # slide a left-aligned 7-day average +#' # slide a 7-day leading average #' jhu_csse_daily_subset %>% #' group_by(geo_value) %>% #' epi_slide(cases_7dav = mean(cases), before = 0, after = 6) %>% #' # rmv a nonessential var. to ensure new col is printed +#' dplyr::select(-death_rate_7d_av) +#' +#' # slide a 7-day centre-aligned average +#' jhu_csse_daily_subset %>% +#' group_by(geo_value) %>% +#' epi_slide(cases_7dav = mean(cases), before = 3, after = 3) %>% +#' # rmv a nonessential var. to ensure new col is printed #' dplyr::select(-death_rate_7d_av) #' #' # nested new columns @@ -138,7 +149,7 @@ epi_slide = function(x, f, ..., before, after = 0, ref_time_values, if (after == 0) { Abort("`before` cannot be missing when `after` is set to 0.") } else { - Warn("`before` is missing, but `after` is nonzero. `before` has been set to 0.") + Warn("`before` missing, `after` nonzero; assuming that left-aligned/leading window is desired and setting `before` = 0.") before = 0 } } @@ -151,20 +162,17 @@ epi_slide = function(x, f, ..., before, after = 0, ref_time_values, if (floor(before) < ceiling(before) || floor(after) < ceiling(after)) { Abort("`before` and `after` must be integers.") } - - before_num = before - after_num = after # If a custom time step is specified, then redefine units if (!missing(time_step)) { - before_num = time_step(before_num) - after_num = time_step(after_num) + before = time_step(before) + after = time_step(after) } # Now set up starts and stops for sliding/hopping time_range = range(unique(x$time_value)) - starts = in_range(ref_time_values - before_num, time_range) - stops = in_range(ref_time_values + after_num, time_range) + starts = in_range(ref_time_values - before, time_range) + stops = in_range(ref_time_values + after, time_range) if( length(starts) == 0 || length(stops) == 0 ) { Abort("The starting and/or stopping times for sliding are out of bounds with respect to the range of times in your data. Check your settings for ref_time_values and align (and before, if specified).") diff --git a/man/epi_slide.Rd b/man/epi_slide.Rd index 98c4aec4..a40c8f10 100644 --- a/man/epi_slide.Rd +++ b/man/epi_slide.Rd @@ -40,7 +40,8 @@ interpreted as an expression for tidy evaluation. See details.} \item{before}{A nonnegative integer specifying the number of time steps before each of the \code{ref_time_values} to extract data from. -Set to 0 for a "left" alignment for the sliding window, meaning that no +Set to 0 for a "left" (trailing) alignment for the sliding window, meaning +that no \code{time_value} after the slide will be used for the sliding calculation. It is mandatory to specify a \code{before} value, unless \code{after} is specified as a non-zero value. In this case, \code{before} will be assumed to be 0. @@ -48,8 +49,11 @@ However, this usage is discouraged and will thus produce a warning.} \item{after}{A nonnegative integer specifying the number of time steps after each of the \code{ref_time_values} to extract data from. -Set to 0 for a "right" alignment for the sliding window, meaning that no -\code{time_value} before the slide will be used for the sliding calculation.} +Set to 0 for a "right" (leading) alignment for the sliding window, meaning +that no +\code{time_value} before the slide will be used for the sliding calculation. +To specify this to be centrally aligned, set \code{before} and \code{after} to be +the same.} \item{ref_time_values}{Time values for sliding computations, meaning, each element of this vector serves as the reference time point for one sliding @@ -128,11 +132,18 @@ through the \code{new_col_name} argument. # rmv a nonessential var. to ensure new col is printed dplyr::select(-death_rate_7d_av) - # slide a left-aligned 7-day average + # slide a 7-day leading average jhu_csse_daily_subset \%>\% group_by(geo_value) \%>\% epi_slide(cases_7dav = mean(cases), before = 0, after = 6) \%>\% # rmv a nonessential var. to ensure new col is printed + dplyr::select(-death_rate_7d_av) + + # slide a 7-day centre-aligned average + jhu_csse_daily_subset \%>\% + group_by(geo_value) \%>\% + epi_slide(cases_7dav = mean(cases), before = 3, after = 3) \%>\% + # rmv a nonessential var. to ensure new col is printed dplyr::select(-death_rate_7d_av) # nested new columns diff --git a/tests/testthat/test-epi_slide.R b/tests/testthat/test-epi_slide.R index 6f550d32..3ff19edf 100644 --- a/tests/testthat/test-epi_slide.R +++ b/tests/testthat/test-epi_slide.R @@ -17,7 +17,7 @@ test_that("`after` must be defined as a non-zero integer if `before` is missing" test_that("Warn user against having a blank `before`",{ expect_warning(epi_slide(edf, f, after = 1L, ref_time_values=as.Date("2020-01-01")+1L), - regexp="`before` is missing, but `after` is nonzero. `before` has been set to\n0.") + regexp="`before` missing, `after` nonzero; assuming that left-aligned/leading\nwindow is desired and setting `before` = 0.") }) test_that("Both `before` and `after` must be nonnegative integers",{ From 950ee8ce2610dde667ba3d8d82ede21c12527e3b Mon Sep 17 00:00:00 2001 From: kenmawer Date: Tue, 9 Aug 2022 18:20:07 -0700 Subject: [PATCH 44/60] Added a test for blank `after`. --- tests/testthat/test-epi_slide.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/testthat/test-epi_slide.R b/tests/testthat/test-epi_slide.R index 3ff19edf..fed4e513 100644 --- a/tests/testthat/test-epi_slide.R +++ b/tests/testthat/test-epi_slide.R @@ -11,6 +11,8 @@ f = function(x, ...) dplyr::tibble(value=mean(x$value), count=length(x$value)) ## --- These cases generate the error: --- test_that("`after` must be defined as a non-zero integer if `before` is missing", { + expect_error(epi_slide(edf, f, ref_time_values=as.Date("2020-01-01")), + "`before` cannot be missing when `after` is set to 0.") expect_error(epi_slide(edf, f, after = 0L, ref_time_values=as.Date("2020-01-01")), "`before` cannot be missing when `after` is set to 0.") }) From d43cede74ea3b9bc76595228c9049779f09909a4 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Tue, 9 Aug 2022 18:29:35 -0700 Subject: [PATCH 45/60] Refactored edf with grouped. --- tests/testthat/test-epi_slide.R | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/tests/testthat/test-epi_slide.R b/tests/testthat/test-epi_slide.R index fed4e513..8b77eb27 100644 --- a/tests/testthat/test-epi_slide.R +++ b/tests/testthat/test-epi_slide.R @@ -1,6 +1,6 @@ ## Create an epi. df and a function to test epi_slide with -edf = dplyr::bind_rows( +grouped = dplyr::bind_rows( dplyr::tibble(geo_value = "ak", time_value = as.Date("2020-01-01") + 1:200, value=1:200), dplyr::tibble(geo_value = "al", time_value=as.Date("2020-01-01") + 1:5, value=-(1:5)) ) %>% @@ -11,50 +11,50 @@ f = function(x, ...) dplyr::tibble(value=mean(x$value), count=length(x$value)) ## --- These cases generate the error: --- test_that("`after` must be defined as a non-zero integer if `before` is missing", { - expect_error(epi_slide(edf, f, ref_time_values=as.Date("2020-01-01")), + expect_error(epi_slide(grouped, f, ref_time_values=as.Date("2020-01-01")), "`before` cannot be missing when `after` is set to 0.") - expect_error(epi_slide(edf, f, after = 0L, ref_time_values=as.Date("2020-01-01")), + expect_error(epi_slide(grouped, f, after = 0L, ref_time_values=as.Date("2020-01-01")), "`before` cannot be missing when `after` is set to 0.") }) test_that("Warn user against having a blank `before`",{ - expect_warning(epi_slide(edf, f, after = 1L, ref_time_values=as.Date("2020-01-01")+1L), + expect_warning(epi_slide(grouped, f, after = 1L, ref_time_values=as.Date("2020-01-01")+1L), regexp="`before` missing, `after` nonzero; assuming that left-aligned/leading\nwindow is desired and setting `before` = 0.") }) test_that("Both `before` and `after` must be nonnegative integers",{ - expect_error(epi_slide(edf, f, before = -1L, ref_time_values=as.Date("2020-01-01")+2L), + expect_error(epi_slide(grouped, f, before = -1L, ref_time_values=as.Date("2020-01-01")+2L), "`before` and `after` must be at least 0.") - expect_error(epi_slide(edf, f, before = 2L, after = -1L, ref_time_values=as.Date("2020-01-01")+2L), + expect_error(epi_slide(grouped, f, before = 2L, after = -1L, ref_time_values=as.Date("2020-01-01")+2L), "`before` and `after` must be at least 0.") - expect_error(epi_slide(edf, f, before = 0.5, ref_time_values=as.Date("2020-01-01")+2L), + expect_error(epi_slide(grouped, f, before = 0.5, ref_time_values=as.Date("2020-01-01")+2L), "`before` and `after` must be integers.") - expect_error(epi_slide(edf, f, before = 1, after = 0.5, ref_time_values=as.Date("2020-01-01")+2L), + expect_error(epi_slide(grouped, f, before = 1, after = 0.5, ref_time_values=as.Date("2020-01-01")+2L), "`before` and `after` must be integers.") # The before and after values can be numerics that are integerish - expect_error(epi_slide(edf, f, before = 1, after = 1, ref_time_values=as.Date("2020-01-01")+2L),NA) + expect_error(epi_slide(grouped, f, before = 1, after = 1, ref_time_values=as.Date("2020-01-01")+2L),NA) }) test_that("`ref_time_values` + `align` that result in no slide data, generate the error", { - expect_error(epi_slide(edf, f, before=2L, ref_time_values=as.Date("2020-01-01")), + expect_error(epi_slide(grouped, f, before=2L, ref_time_values=as.Date("2020-01-01")), "starting and/or stopping times for sliding are out of bounds") # before the first, no data in the slide windows - expect_error(epi_slide(edf, f, before=2L, ref_time_values=as.Date("2020-01-01")+207L), + expect_error(epi_slide(grouped, f, before=2L, ref_time_values=as.Date("2020-01-01")+207L), "starting and/or stopping times for sliding are out of bounds") # beyond the last, no data in window }) test_that("`ref_time_values` + `align` that have some slide data, but generate the error due to ref. time being out of time range", { - expect_error(epi_slide(edf, f, before=0L, after=2L, ref_time_values=as.Date("2020-01-01")), + expect_error(epi_slide(grouped, f, before=0L, after=2L, ref_time_values=as.Date("2020-01-01")), "starting and/or stopping times for sliding are out of bounds") # before the first, but we'd expect there to be data in the window - expect_error(epi_slide(edf, f, before=2L, ref_time_values=as.Date("2020-01-01")+201L), + expect_error(epi_slide(grouped, f, before=2L, ref_time_values=as.Date("2020-01-01")+201L), "starting and/or stopping times for sliding are out of bounds") # beyond the last, but still with data in window }) ## --- These cases doesn't generate the error: --- test_that("these doesn't produce an error; the error appears only if the ref time values are out of the range for every group", { - expect_identical(epi_slide(edf, f, before=2L, ref_time_values=as.Date("2020-01-01")+200L) %>% + expect_identical(epi_slide(grouped, f, before=2L, ref_time_values=as.Date("2020-01-01")+200L) %>% dplyr::select("geo_value","slide_value_value"), dplyr::tibble(geo_value = "ak", slide_value_value = 199)) # out of range for one group - expect_identical(epi_slide(edf, f, before=2L, ref_time_values=as.Date("2020-01-04")) %>% + expect_identical(epi_slide(grouped, f, before=2L, ref_time_values=as.Date("2020-01-04")) %>% dplyr::select("geo_value","slide_value_value"), dplyr::tibble(geo_value = c("ak", "al"), slide_value_value = c(2, -2))) # not out of range for either group }) \ No newline at end of file From 039f33ff60cac0a5fa9124db425f94c895d933e0 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Wed, 10 Aug 2022 11:14:23 -0700 Subject: [PATCH 46/60] More fixes. --- R/slide.R | 20 +++++++++++++++----- man/epi_slide.Rd | 11 +++++++---- tests/testthat/test-epi_slide.R | 30 +++++++++++++++++++++++------- vignettes/aggregation.Rmd | 2 +- 4 files changed, 46 insertions(+), 17 deletions(-) diff --git a/R/slide.R b/R/slide.R index d6f4acca..140226fa 100644 --- a/R/slide.R +++ b/R/slide.R @@ -22,16 +22,19 @@ #' interpreted as an expression for tidy evaluation. See details. #' @param before A nonnegative integer specifying the number of time steps #' before each of the `ref_time_values` to extract data from. -#' Set to 0 for a "left" (trailing) alignment for the sliding window, meaning +#' This must be a vector of length 1. +#' Set to 0 for a right-aligned/trailing sliding window, meaning #' that no #' `time_value` after the slide will be used for the sliding calculation. #' It is mandatory to specify a `before` value, unless `after` is specified -#' as a non-zero value. In this case, `before` will be assumed to be 0. +#' as a non-zero value. In this case, `before` will be assumed to be 0, as it +#' assumes the user wants to do a left-aligned/leading sliding window. #' However, this usage is discouraged and will thus produce a warning. #' @param after A nonnegative integer specifying the number of time steps after #' each of the `ref_time_values` to extract data from. -#' Set to 0 for a "right" (leading) alignment for the sliding window, meaning -#' that no +#' This must be a vector of length 1. The default value for +#' this is 0. Set to 0 for a left-aligned/leading sliding +#' window, meaning that no #' `time_value` before the slide will be used for the sliding calculation. #' To specify this to be centrally aligned, set `before` and `after` to be #' the same. @@ -143,6 +146,11 @@ epi_slide = function(x, f, ..., before, after = 0, ref_time_values, unique(x$time_value)] } + # We must ensure that both before and after are of length 1 + if (length(after) != 1L || (!missing(before) && length(before) != 1L)) { + Abort("`before` and `after` must be vectors of length 1.") + } + # Before cannot be missing if after is set to 0. If after is set to a nonzero # number, then before must be set to 0 if (missing(before)) { @@ -159,7 +167,9 @@ epi_slide = function(x, f, ..., before, after = 0, ref_time_values, Abort("`before` and `after` must be at least 0.") } - if (floor(before) < ceiling(before) || floor(after) < ceiling(after)) { + if (!(is.numeric(before) && is.numeric(after))|| + floor(before) < ceiling(before) || + floor(after) < ceiling(after)) { Abort("`before` and `after` must be integers.") } diff --git a/man/epi_slide.Rd b/man/epi_slide.Rd index a40c8f10..1e4c77af 100644 --- a/man/epi_slide.Rd +++ b/man/epi_slide.Rd @@ -40,17 +40,20 @@ interpreted as an expression for tidy evaluation. See details.} \item{before}{A nonnegative integer specifying the number of time steps before each of the \code{ref_time_values} to extract data from. -Set to 0 for a "left" (trailing) alignment for the sliding window, meaning +This must be a vector of length 1. +Set to 0 for a right-aligned/trailing sliding window, meaning that no \code{time_value} after the slide will be used for the sliding calculation. It is mandatory to specify a \code{before} value, unless \code{after} is specified -as a non-zero value. In this case, \code{before} will be assumed to be 0. +as a non-zero value. In this case, \code{before} will be assumed to be 0, as it +assumes the user wants to do a left-aligned/leading sliding window. However, this usage is discouraged and will thus produce a warning.} \item{after}{A nonnegative integer specifying the number of time steps after each of the \code{ref_time_values} to extract data from. -Set to 0 for a "right" (leading) alignment for the sliding window, meaning -that no +This must be a vector of length 1. The default value for +this is 0. Set to 0 for a left-aligned/leading sliding +window, meaning that no \code{time_value} before the slide will be used for the sliding calculation. To specify this to be centrally aligned, set \code{before} and \code{after} to be the same.} diff --git a/tests/testthat/test-epi_slide.R b/tests/testthat/test-epi_slide.R index 8b77eb27..9da9ce4f 100644 --- a/tests/testthat/test-epi_slide.R +++ b/tests/testthat/test-epi_slide.R @@ -9,17 +9,21 @@ grouped = dplyr::bind_rows( f = function(x, ...) dplyr::tibble(value=mean(x$value), count=length(x$value)) -## --- These cases generate the error: --- +## --- These cases generate errors (or not): --- +test_that("`before` and `after` are both vectors of length 1", { + expect_error(epi_slide(grouped, f, before = c(0,1), after = 0, ref_time_values=as.Date("2020-01-01") + 3), + "`before` and `after` must be vectors of length 1.") + expect_error(epi_slide(grouped, f, before = 1, after = c(0,1), ref_time_values=as.Date("2020-01-01") + 3), + "`before` and `after` must be vectors of length 1.") +}) + test_that("`after` must be defined as a non-zero integer if `before` is missing", { expect_error(epi_slide(grouped, f, ref_time_values=as.Date("2020-01-01")), "`before` cannot be missing when `after` is set to 0.") expect_error(epi_slide(grouped, f, after = 0L, ref_time_values=as.Date("2020-01-01")), "`before` cannot be missing when `after` is set to 0.") -}) - -test_that("Warn user against having a blank `before`",{ - expect_warning(epi_slide(grouped, f, after = 1L, ref_time_values=as.Date("2020-01-01")+1L), - regexp="`before` missing, `after` nonzero; assuming that left-aligned/leading\nwindow is desired and setting `before` = 0.") + expect_error(epi_slide(grouped, f, before = 0L, ref_time_values=as.Date("2020-01-01")+1L), + NA) }) test_that("Both `before` and `after` must be nonnegative integers",{ @@ -27,9 +31,13 @@ test_that("Both `before` and `after` must be nonnegative integers",{ "`before` and `after` must be at least 0.") expect_error(epi_slide(grouped, f, before = 2L, after = -1L, ref_time_values=as.Date("2020-01-01")+2L), "`before` and `after` must be at least 0.") + expect_error(epi_slide(grouped, f, before = "a", ref_time_values=as.Date("2020-01-01")+2L), + "`before` and `after` must be integers.") + expect_error(epi_slide(grouped, f, before = 1L, after = "a", ref_time_values=as.Date("2020-01-01")+2L), + "`before` and `after` must be integers.") expect_error(epi_slide(grouped, f, before = 0.5, ref_time_values=as.Date("2020-01-01")+2L), "`before` and `after` must be integers.") - expect_error(epi_slide(grouped, f, before = 1, after = 0.5, ref_time_values=as.Date("2020-01-01")+2L), + expect_error(epi_slide(grouped, f, before = 1L, after = 0.5, ref_time_values=as.Date("2020-01-01")+2L), "`before` and `after` must be integers.") # The before and after values can be numerics that are integerish expect_error(epi_slide(grouped, f, before = 1, after = 1, ref_time_values=as.Date("2020-01-01")+2L),NA) @@ -49,6 +57,14 @@ test_that("`ref_time_values` + `align` that have some slide data, but generate t "starting and/or stopping times for sliding are out of bounds") # beyond the last, but still with data in window }) +## --- These cases generate warnings (or not): --- +test_that("Warn user against having a blank `before`",{ + expect_warning(epi_slide(grouped, f, after = 1L, ref_time_values=as.Date("2020-01-01")+1L), + regexp="`before` missing, `after` nonzero; assuming that left-aligned/leading\nwindow is desired and setting `before` = 0.") + expect_warning(epi_slide(grouped, f, before = 0L, after = 1L, + ref_time_values=as.Date("2020-01-01")+1L), NA) +}) + ## --- These cases doesn't generate the error: --- test_that("these doesn't produce an error; the error appears only if the ref time values are out of the range for every group", { expect_identical(epi_slide(grouped, f, before=2L, ref_time_values=as.Date("2020-01-01")+200L) %>% diff --git a/vignettes/aggregation.Rmd b/vignettes/aggregation.Rmd index a3a57bb7..47097c9c 100644 --- a/vignettes/aggregation.Rmd +++ b/vignettes/aggregation.Rmd @@ -177,7 +177,7 @@ Explicit imputation for missingness (zero-filling in our case) can be important for protecting against bugs in all sorts of downstream tasks. For example, even something as simple as a 7-day trailing average is complicated by missingness. The function `epi_slide()` looks for all rows within a window of 7 days anchored -on the right at the reference time point (when `after = 6`). +on the right at the reference time point (when `before = 6`). But when some days in a given week are missing because they were censored because they had small case counts, taking an average of the observed case counts can be misleading and is unintentionally biased upwards. Meanwhile, From 93738aa4f9731708928c233567e0e3053b11370c Mon Sep 17 00:00:00 2001 From: kenmawer Date: Wed, 10 Aug 2022 11:20:04 -0700 Subject: [PATCH 47/60] Updated `align`. --- tests/testthat/test-epi_slide.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-epi_slide.R b/tests/testthat/test-epi_slide.R index 9da9ce4f..0d9f9038 100644 --- a/tests/testthat/test-epi_slide.R +++ b/tests/testthat/test-epi_slide.R @@ -43,14 +43,14 @@ test_that("Both `before` and `after` must be nonnegative integers",{ expect_error(epi_slide(grouped, f, before = 1, after = 1, ref_time_values=as.Date("2020-01-01")+2L),NA) }) -test_that("`ref_time_values` + `align` that result in no slide data, generate the error", { +test_that("`ref_time_values` + `before` + `after` that result in no slide data, generate the error", { expect_error(epi_slide(grouped, f, before=2L, ref_time_values=as.Date("2020-01-01")), "starting and/or stopping times for sliding are out of bounds") # before the first, no data in the slide windows expect_error(epi_slide(grouped, f, before=2L, ref_time_values=as.Date("2020-01-01")+207L), "starting and/or stopping times for sliding are out of bounds") # beyond the last, no data in window }) -test_that("`ref_time_values` + `align` that have some slide data, but generate the error due to ref. time being out of time range", { +test_that("`ref_time_values` + `before` + `after` that have some slide data, but generate the error due to ref. time being out of time range", { expect_error(epi_slide(grouped, f, before=0L, after=2L, ref_time_values=as.Date("2020-01-01")), "starting and/or stopping times for sliding are out of bounds") # before the first, but we'd expect there to be data in the window expect_error(epi_slide(grouped, f, before=2L, ref_time_values=as.Date("2020-01-01")+201L), From 8c601f8a16b2e84eff6bfcd36b554e1cec041166 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Wed, 10 Aug 2022 11:20:45 -0700 Subject: [PATCH 48/60] Fixed inconsistency with test formatting. --- tests/testthat/test-epi_slide.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-epi_slide.R b/tests/testthat/test-epi_slide.R index 0d9f9038..5af3e006 100644 --- a/tests/testthat/test-epi_slide.R +++ b/tests/testthat/test-epi_slide.R @@ -60,7 +60,7 @@ test_that("`ref_time_values` + `before` + `after` that have some slide data, but ## --- These cases generate warnings (or not): --- test_that("Warn user against having a blank `before`",{ expect_warning(epi_slide(grouped, f, after = 1L, ref_time_values=as.Date("2020-01-01")+1L), - regexp="`before` missing, `after` nonzero; assuming that left-aligned/leading\nwindow is desired and setting `before` = 0.") + "`before` missing, `after` nonzero; assuming that left-aligned/leading\nwindow is desired and setting `before` = 0.") expect_warning(epi_slide(grouped, f, before = 0L, after = 1L, ref_time_values=as.Date("2020-01-01")+1L), NA) }) From cfe2b5526457fbbf580824948917f575a892fa0f Mon Sep 17 00:00:00 2001 From: kenmawer Date: Wed, 10 Aug 2022 11:58:51 -0700 Subject: [PATCH 49/60] Updated compactify on a vignette, added two tests for NA and put a test for testing negative integers later. --- R/slide.R | 11 ++++++----- tests/testthat/test-epi_slide.R | 4 ++++ vignettes/advanced.Rmd | 4 ++-- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/R/slide.R b/R/slide.R index 140226fa..0e3edf78 100644 --- a/R/slide.R +++ b/R/slide.R @@ -162,16 +162,17 @@ epi_slide = function(x, f, ..., before, after = 0, ref_time_values, } } - # Otherwise set up alignment based on passed before value - if (before < 0 || after < 0) { - Abort("`before` and `after` must be at least 0.") - } - if (!(is.numeric(before) && is.numeric(after))|| floor(before) < ceiling(before) || floor(after) < ceiling(after)) { Abort("`before` and `after` must be integers.") } + + + # Otherwise set up alignment based on passed before value + if (before < 0 || after < 0) { + Abort("`before` and `after` must be at least 0.") + } # If a custom time step is specified, then redefine units if (!missing(time_step)) { diff --git a/tests/testthat/test-epi_slide.R b/tests/testthat/test-epi_slide.R index 5af3e006..c9e04fc3 100644 --- a/tests/testthat/test-epi_slide.R +++ b/tests/testthat/test-epi_slide.R @@ -39,6 +39,10 @@ test_that("Both `before` and `after` must be nonnegative integers",{ "`before` and `after` must be integers.") expect_error(epi_slide(grouped, f, before = 1L, after = 0.5, ref_time_values=as.Date("2020-01-01")+2L), "`before` and `after` must be integers.") + expect_error(epi_slide(grouped, f, before = NA, ref_time_values=as.Date("2020-01-01")+2L), + "`before` and `after` must be integers.") + expect_error(epi_slide(grouped, f, before = 1L, after = NA, ref_time_values=as.Date("2020-01-01")+2L), + "`before` and `after` must be integers.") # The before and after values can be numerics that are integerish expect_error(epi_slide(grouped, f, before = 1, after = 1, ref_time_values=as.Date("2020-01-01")+2L),NA) }) diff --git a/vignettes/advanced.Rmd b/vignettes/advanced.Rmd index f8590491..87496a33 100644 --- a/vignettes/advanced.Rmd +++ b/vignettes/advanced.Rmd @@ -222,7 +222,7 @@ x <- y1 %>% version = issue, percent_cli = value ) %>% - as_epi_archive(compactify=FALSE) + as_epi_archive(compactify=TRUE) # mutating merge operation: x$merge(y2 %>% @@ -230,7 +230,7 @@ x$merge(y2 %>% version = issue, case_rate_7d_av = value ) %>% - as_epi_archive(compactify=FALSE) + as_epi_archive(compactify=TRUE) ) ``` From 271ffe6ccd2f3ea048ce06b9ddf044d01330ab55 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 12 Aug 2022 17:19:29 -0700 Subject: [PATCH 50/60] Still needs work. --- R/methods-epi_archive.R | 2 +- tests/testthat/test-methods-epi_archive.R | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R index 2977c544..3044cf36 100644 --- a/R/methods-epi_archive.R +++ b/R/methods-epi_archive.R @@ -446,7 +446,7 @@ epix_merge = function(x, y, #' by = "1 day") #' epix_slide(x = archive_cases_dv_subset, #' f = ~ mean(.x$case_rate_7d_av), -#' n = 3, +#' max_version_gap = 3, #' group_by = geo_value, #' ref_versions = versions, #' new_col_name = 'case_rate_3d_av') diff --git a/tests/testthat/test-methods-epi_archive.R b/tests/testthat/test-methods-epi_archive.R index d0434f59..23f9f167 100644 --- a/tests/testthat/test-methods-epi_archive.R +++ b/tests/testthat/test-methods-epi_archive.R @@ -71,13 +71,13 @@ test_that("quosure passing issue in epix_slide is resolved + other potential iss compactify = TRUE) reference_by_modulus = epix_slide(x = ea, f = ~ mean(.x$case_rate_7d_av), - n = 3, + max_version_gap = 3, group_by = modulus, ref_time_values = time_values, new_col_name = 'case_rate_3d_av') reference_by_both = epix_slide(x = ea, f = ~ mean(.x$case_rate_7d_av), - n = 3, + max_version_gap = 3, group_by = c(geo_value, modulus), ref_time_values = time_values, new_col_name = 'case_rate_3d_av') @@ -85,7 +85,7 @@ test_that("quosure passing issue in epix_slide is resolved + other potential iss expect_identical( ea$slide( f = ~ mean(.x$case_rate_7d_av), - n = 3, + max_version_gap = 3, group_by = modulus, ref_time_values = time_values, new_col_name = 'case_rate_3d_av' @@ -96,7 +96,7 @@ test_that("quosure passing issue in epix_slide is resolved + other potential iss expect_identical( epix_slide(x = ea, f = ~ mean(.x$case_rate_7d_av), - n = 3, + max_version_gap = 3, group_by = "modulus", ref_time_values = time_values, new_col_name = 'case_rate_3d_av'), @@ -105,7 +105,7 @@ test_that("quosure passing issue in epix_slide is resolved + other potential iss expect_identical( ea$slide( f = ~ mean(.x$case_rate_7d_av), - n = 3, + max_version_gap = 3, group_by = "modulus", ref_time_values = time_values, new_col_name = 'case_rate_3d_av' @@ -121,7 +121,7 @@ test_that("quosure passing issue in epix_slide is resolved + other potential iss expect_identical( epix_slide(x = ea, f = ~ mean(.x$case_rate_7d_av), - n = 3, + max_version_gap = 3, group_by = tidyselect::all_of(my_group_by), ref_time_values = time_values, new_col_name = 'case_rate_3d_av'), @@ -130,7 +130,7 @@ test_that("quosure passing issue in epix_slide is resolved + other potential iss expect_identical( ea$slide( f = ~ mean(.x$case_rate_7d_av), - n = 3, + max_version_gap = 3, group_by = tidyselect::all_of(my_group_by), ref_time_values = time_values, new_col_name = 'case_rate_3d_av' @@ -141,7 +141,7 @@ test_that("quosure passing issue in epix_slide is resolved + other potential iss expect_identical( epix_slide(x = ea, f = ~ mean(.x$case_rate_7d_av), - n = 3, + max_version_gap = 3, ref_time_values = time_values, new_col_name = 'case_rate_3d_av'), reference_by_both @@ -149,7 +149,7 @@ test_that("quosure passing issue in epix_slide is resolved + other potential iss expect_identical( ea$slide( f = ~ mean(.x$case_rate_7d_av), - n = 3, + max_version_gap = 3, ref_time_values = time_values, new_col_name = 'case_rate_3d_av' ), From b0d751bf7989d497b55a561909238822a7496ed7 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Mon, 15 Aug 2022 09:29:54 -0700 Subject: [PATCH 51/60] Still needs work, as slide necessitates that group_by be a string. --- R/methods-epi_archive.R | 2 +- man/epix_slide.Rd | 2 +- tests/testthat/test-epix_slide.R | 11 ++++++++++- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R index 3044cf36..02b0e379 100644 --- a/R/methods-epi_archive.R +++ b/R/methods-epi_archive.R @@ -456,7 +456,7 @@ epix_slide = function(x, f, ..., max_version_gap, group_by, ref_versions, if (!inherits(x, "epi_archive")) Abort("`x` must be of class `epi_archive`.") return(x$slide(f, ..., max_version_gap = max_version_gap, - group_by = enquo(group_by), + group_by = group_by, ref_versions = ref_versions, time_step = time_step, new_col_name = new_col_name, diff --git a/man/epix_slide.Rd b/man/epix_slide.Rd index 82d1ac83..a6902b5d 100644 --- a/man/epix_slide.Rd +++ b/man/epix_slide.Rd @@ -140,7 +140,7 @@ versions <- seq(as.Date("2020-06-01"), by = "1 day") epix_slide(x = archive_cases_dv_subset, f = ~ mean(.x$case_rate_7d_av), - n = 3, + max_version_gap = 3, group_by = geo_value, ref_versions = versions, new_col_name = 'case_rate_3d_av') diff --git a/tests/testthat/test-epix_slide.R b/tests/testthat/test-epix_slide.R index bef6c32d..c88ff261 100644 --- a/tests/testthat/test-epix_slide.R +++ b/tests/testthat/test-epix_slide.R @@ -1,4 +1,5 @@ library(dplyr) +library(rlang) ea <- archive_cases_dv_subset$clone() @@ -34,5 +35,13 @@ test_that("epix_slide works as intended",{ 2^21+2^19+2^16+2^13, 2^22+2^21+2^19+2^17)) - expect_identical(xx1,xx2) + expect_identical(xx1,xx2) # * + + xx3 <- x2$slide(f = ~ sum(.x$binary), + max_version_gap = 5, + group_by = "geo_value", + ref_versions = versions, + new_col_name = 'sum_binary') + + expect_identical(xx1,xx3) # This and * Imply xx2 and xx3 are identical }) From 0498777929ca4d30537454b58418da27e936bed1 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Mon, 15 Aug 2022 10:24:14 -0700 Subject: [PATCH 52/60] Still needs fixing. --- NAMESPACE | 1 + R/archive.R | 2 +- test-correlation.R | 0 3 files changed, 2 insertions(+), 1 deletion(-) delete mode 100644 test-correlation.R diff --git a/NAMESPACE b/NAMESPACE index eeffd7b3..a290ab27 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -80,6 +80,7 @@ importFrom(rlang,arg_match) importFrom(rlang,enquo) importFrom(rlang,enquos) importFrom(rlang,is_quosure) +importFrom(rlang,quo_is_missing) importFrom(rlang,sym) importFrom(rlang,syms) importFrom(stats,cor) diff --git a/R/archive.R b/R/archive.R index 85247c53..8707bec1 100644 --- a/R/archive.R +++ b/R/archive.R @@ -583,7 +583,7 @@ epi_archive = #' object. See the documentation for the wrapper function [`epix_slide()`] for #' details. #' @importFrom data.table key -#' @importFrom rlang !! !!! enquo enquos is_quosure sym syms +#' @importFrom rlang !! !!! enquo quo_is_missing enquos is_quosure sym syms slide = function(f, ..., max_version_gap, group_by, ref_versions, time_step, new_col_name = "slide_value", diff --git a/test-correlation.R b/test-correlation.R deleted file mode 100644 index e69de29b..00000000 From b9b9e6a16f06a8340b15a9eaf6cd748e595c66a0 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Mon, 15 Aug 2022 10:26:42 -0700 Subject: [PATCH 53/60] I somehow broke this. --- R/methods-epi_archive.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R index b8b892fb..8bb17825 100644 --- a/R/methods-epi_archive.R +++ b/R/methods-epi_archive.R @@ -456,7 +456,7 @@ epix_slide = function(x, f, ..., max_version_gap, group_by, ref_versions, if (!inherits(x, "epi_archive")) Abort("`x` must be of class `epi_archive`.") return(x$slide(f, ..., max_version_gap = max_version_gap, - group_by = group_by, + group_by = enquo(group_by), ref_versions = ref_versions, time_step = time_step, new_col_name = new_col_name, From 28a9c12abf73cc1e3379dabd1d57ffa3bf752ca6 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Mon, 15 Aug 2022 11:09:28 -0700 Subject: [PATCH 54/60] Vignettes are still failing. --- R/methods-epi_archive.R | 2 +- vignettes/advanced.Rmd | 2 +- vignettes/archive.Rmd | 4 ++-- vignettes/compactify.Rmd | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R index 8bb17825..cbd567e2 100644 --- a/R/methods-epi_archive.R +++ b/R/methods-epi_archive.R @@ -456,7 +456,7 @@ epix_slide = function(x, f, ..., max_version_gap, group_by, ref_versions, if (!inherits(x, "epi_archive")) Abort("`x` must be of class `epi_archive`.") return(x$slide(f, ..., max_version_gap = max_version_gap, - group_by = enquo(group_by), + group_by = {{group_by}}, ref_versions = ref_versions, time_step = time_step, new_col_name = new_col_name, diff --git a/vignettes/advanced.Rmd b/vignettes/advanced.Rmd index 4e3c6b13..692df858 100644 --- a/vignettes/advanced.Rmd +++ b/vignettes/advanced.Rmd @@ -362,7 +362,7 @@ k_week_ahead <- function(x, ahead = 7, as_of = TRUE) { x_latest %>% epi_slide(fc = prob_arx(percent_cli, case_rate_7d_av, geo_value, time_value, args = prob_arx_args(ahead = ahead)), - n = 120, ref_time_values = fc_versions) %>% + before = 119, ref_time_values = fc_versions) %>% mutate(target_date = time_value + ahead, as_of = FALSE) } } diff --git a/vignettes/archive.Rmd b/vignettes/archive.Rmd index ea70dacb..5a13fc4e 100644 --- a/vignettes/archive.Rmd +++ b/vignettes/archive.Rmd @@ -434,8 +434,8 @@ k_week_ahead <- function(x, ahead = 7, as_of = TRUE) { else { x_latest %>% group_by(geo_value) %>% - epi_slide(fc = prob_arx(percent_cli, case_rate_7d_av, ahead = ahead), before = 119, - ref_time_values = fc_time_values) %>% + epi_slide(fc = prob_arx(percent_cli, case_rate_7d_av, geo_value, time_value, ahead = ahead), before = 119, + ref_time_values = fc_versions) %>% mutate(target_date = time_value + ahead, as_of = FALSE) } } diff --git a/vignettes/compactify.Rmd b/vignettes/compactify.Rmd index 034235b3..a011ed43 100644 --- a/vignettes/compactify.Rmd +++ b/vignettes/compactify.Rmd @@ -102,7 +102,7 @@ speeds <- rbind(speeds, speed_test(iterate_as_of,"as_of_1000x")) # Performance of slide slide_median <- function(my_ea) { - my_ea$slide(median = median(case_rate_7d_av), n = 7) + my_ea$slide(median = median(case_rate_7d_av), before = 6) } speeds <- rbind(speeds, speed_test(slide_median,"slide_median")) From b407fdd8205525ded291632c364871377b2813e1 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Mon, 15 Aug 2022 11:55:03 -0700 Subject: [PATCH 55/60] Check fails, but code chunk has `error = TRUE`. --- vignettes/archive.Rmd | 4 ++-- vignettes/compactify.Rmd | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/vignettes/archive.Rmd b/vignettes/archive.Rmd index 5a13fc4e..728fb2a0 100644 --- a/vignettes/archive.Rmd +++ b/vignettes/archive.Rmd @@ -433,8 +433,8 @@ k_week_ahead <- function(x, ahead = 7, as_of = TRUE) { } else { x_latest %>% - group_by(geo_value) %>% - epi_slide(fc = prob_arx(percent_cli, case_rate_7d_av, geo_value, time_value, ahead = ahead), before = 119, + epi_slide(fc = prob_arx(percent_cli, case_rate_7d_av, geo_value, time_value, + args = prob_arx_args(ahead = ahead)), before = 119, ref_time_values = fc_versions) %>% mutate(target_date = time_value + ahead, as_of = FALSE) } diff --git a/vignettes/compactify.Rmd b/vignettes/compactify.Rmd index a011ed43..7addd474 100644 --- a/vignettes/compactify.Rmd +++ b/vignettes/compactify.Rmd @@ -102,7 +102,7 @@ speeds <- rbind(speeds, speed_test(iterate_as_of,"as_of_1000x")) # Performance of slide slide_median <- function(my_ea) { - my_ea$slide(median = median(case_rate_7d_av), before = 6) + my_ea$slide(median = median(case_rate_7d_av), max_version_gap = 6) } speeds <- rbind(speeds, speed_test(slide_median,"slide_median")) From d14c0e82e31eb01da49800b36efd0e124e42cf22 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Mon, 15 Aug 2022 12:55:07 -0700 Subject: [PATCH 56/60] Updated incorrect info. --- R/growth_rate.R | 2 +- man/growth_rate.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/growth_rate.R b/R/growth_rate.R index d3ca9e31..c22f7aab 100644 --- a/R/growth_rate.R +++ b/R/growth_rate.R @@ -73,7 +73,7 @@ #' implicitly defined by the `x` variable; for example, if `x` is a vector of #' `Date` objects, `h = 7`, and the reference point is January 7, then the #' sliding window contains all data in between January 1 and 14 (matching the -#' behavior of `epi_slide()` with `n = 2 * h` and `align = "center"`). +#' behavior of `epi_slide()` with `before = 6` and `after = 7`). #' #' @section Additional Arguments: #' For the global methods, "smooth_spline" and "trend_filter", additional diff --git a/man/growth_rate.Rd b/man/growth_rate.Rd index 173eff43..b339b2fc 100644 --- a/man/growth_rate.Rd +++ b/man/growth_rate.Rd @@ -105,7 +105,7 @@ reference point is at most \code{h}. Note that the unit for this distance is implicitly defined by the \code{x} variable; for example, if \code{x} is a vector of \code{Date} objects, \code{h = 7}, and the reference point is January 7, then the sliding window contains all data in between January 1 and 14 (matching the -behavior of \code{epi_slide()} with \code{n = 2 * h} and \code{align = "center"}). +behavior of \code{epi_slide()} with \code{before = 6} and \code{after = 7}). } \section{Additional Arguments}{ From 2b8690f3f6a42a8b39dd4a45f668a06abe4436ff Mon Sep 17 00:00:00 2001 From: kenmawer Date: Wed, 17 Aug 2022 09:46:08 -0700 Subject: [PATCH 57/60] Added changes to awkward spacing and for the argument of before and after. --- R/growth_rate.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/growth_rate.R b/R/growth_rate.R index c22f7aab..9778a920 100644 --- a/R/growth_rate.R +++ b/R/growth_rate.R @@ -73,7 +73,7 @@ #' implicitly defined by the `x` variable; for example, if `x` is a vector of #' `Date` objects, `h = 7`, and the reference point is January 7, then the #' sliding window contains all data in between January 1 and 14 (matching the -#' behavior of `epi_slide()` with `before = 6` and `after = 7`). +#' behavior of `epi_slide()` with `before = h-1` and `after = h`). #' #' @section Additional Arguments: #' For the global methods, "smooth_spline" and "trend_filter", additional @@ -104,12 +104,12 @@ #' # COVID cases growth rate by state using default method relative change #' jhu_csse_daily_subset %>% #' group_by(geo_value) %>% -#' mutate(cases_gr = growth_rate(x = time_value, y = cases)) +#' mutate(cases_gr = growth_rate(x = time_value, y = cases)) #' #' # Log scale, degree 4 polynomial and 6-fold cross validation #' jhu_csse_daily_subset %>% #' group_by(geo_value) %>% -#' mutate(gr_poly = growth_rate( x = time_value, y = cases, log_scale = TRUE, ord = 4, k = 6)) +#' mutate(gr_poly = growth_rate(x = time_value, y = cases, log_scale = TRUE, ord = 4, k = 6)) growth_rate = function(x = seq_along(y), y, x0 = x, method = c("rel_change", "linear_reg", From 3bd1368d78d654a9ae615c7dae38d44f51a132d7 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Wed, 17 Aug 2022 09:47:01 -0700 Subject: [PATCH 58/60] I documented it. --- man/growth_rate.Rd | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/man/growth_rate.Rd b/man/growth_rate.Rd index b339b2fc..890d1968 100644 --- a/man/growth_rate.Rd +++ b/man/growth_rate.Rd @@ -105,7 +105,7 @@ reference point is at most \code{h}. Note that the unit for this distance is implicitly defined by the \code{x} variable; for example, if \code{x} is a vector of \code{Date} objects, \code{h = 7}, and the reference point is January 7, then the sliding window contains all data in between January 1 and 14 (matching the -behavior of \code{epi_slide()} with \code{before = 6} and \code{after = 7}). +behavior of \code{epi_slide()} with \code{before = h-1} and \code{after = h}). } \section{Additional Arguments}{ @@ -138,10 +138,10 @@ user. # COVID cases growth rate by state using default method relative change jhu_csse_daily_subset \%>\% group_by(geo_value) \%>\% - mutate(cases_gr = growth_rate(x = time_value, y = cases)) + mutate(cases_gr = growth_rate(x = time_value, y = cases)) # Log scale, degree 4 polynomial and 6-fold cross validation jhu_csse_daily_subset \%>\% group_by(geo_value) \%>\% - mutate(gr_poly = growth_rate( x = time_value, y = cases, log_scale = TRUE, ord = 4, k = 6)) + mutate(gr_poly = growth_rate(x = time_value, y = cases, log_scale = TRUE, ord = 4, k = 6)) } From 581648d51b583373ccaf8674d47a6a855fb09a3a Mon Sep 17 00:00:00 2001 From: kenmawer Date: Wed, 17 Aug 2022 19:57:42 -0700 Subject: [PATCH 59/60] Refactored many things, but this still needs work. --- R/archive.R | 31 ++++++++++++----------- R/methods-epi_archive.R | 20 +++++++-------- man/epi_archive.Rd | 4 +-- man/epix_slide.Rd | 18 ++++++------- tests/testthat/test-epix_slide.R | 8 +++--- tests/testthat/test-methods-epi_archive.R | 18 ++++++------- vignettes/advanced.Rmd | 12 ++++----- vignettes/archive.Rmd | 6 ++--- vignettes/compactify.Rmd | 2 +- 9 files changed, 60 insertions(+), 59 deletions(-) diff --git a/R/archive.R b/R/archive.R index 8707bec1..58136ed5 100644 --- a/R/archive.R +++ b/R/archive.R @@ -584,24 +584,24 @@ epi_archive = #' details. #' @importFrom data.table key #' @importFrom rlang !! !!! enquo quo_is_missing enquos is_quosure sym syms - slide = function(f, ..., max_version_gap, group_by, - ref_versions, + slide = function(f, ..., before, group_by, + ref_time_values, time_step, new_col_name = "slide_value", as_list_col = FALSE, names_sep = "_", all_rows = FALSE) { # If missing, then set ref time values to be everything; else make # sure we intersect with observed time values - if (missing(ref_versions)) { - ref_versions = unique(self$DT$version) + if (missing(ref_time_values)) { + ref_time_values = unique(self$DT$time_values) } else { - ref_versions = ref_versions[ref_versions %in% + ref_time_values = ref_time_values[ref_time_values %in% unique(self$DT$time_value)] } # If a custom time step is specified, then redefine units - before_num = max_version_gap-1 - if (!missing(time_step)) before_num = time_step(max_version_gap-1) + before_num = before-1 + if (!missing(time_step)) before_num = time_step(before-1) # What to group by? If missing, set according to internal keys; # otherwise, tidyselect. @@ -620,7 +620,7 @@ epi_archive = # Computation for one group, one time value comp_one_grp = function(.data_group, f, ..., - version, + ref_time_value, key_vars, new_col) { # Carry out the specified computation @@ -666,7 +666,7 @@ epi_archive = # Note that we've already recycled comp value to make size stable, # so tibble() will just recycle time value appropriately - return(tibble::tibble(version = version, + return(tibble::tibble(time_value = ref_time_value, !!new_col := comp_value)) } @@ -675,13 +675,14 @@ epi_archive = if (rlang::is_formula(f)) f = rlang::as_function(f) - x = purrr::map_dfr(ref_versions, function(t) { - self$as_of(t, min_time_value = t - before_num) %>% + x = purrr::map_dfr(ref_time_values, function(ref_time_value) { + self$as_of(ref_time_value, + min_time_value = ref_time_value - before_num) %>% tibble::as_tibble() %>% dplyr::group_by(!!!group_by) %>% dplyr::group_modify(comp_one_grp, f = f, ..., - version = t, + ref_time_value = ref_time_value, key_vars = key_vars, new_col = new_col, .keep = TRUE) %>% @@ -703,13 +704,13 @@ epi_archive = f = function(x, quo, ...) rlang::eval_tidy(quo, x) new_col = sym(names(rlang::quos_auto_name(quos))) - x = purrr::map_dfr(ref_versions, function(t) { - self$as_of(t, min_time_value = t - before_num) %>% + x = purrr::map_dfr(ref_time_values, function(ref_time_value) { + self$as_of(t, min_time_value = ref_time_value - before_num) %>% tibble::as_tibble() %>% dplyr::group_by(!!!group_by) %>% dplyr::group_modify(comp_one_grp, f = f, quo = quo, - version = t, + ref_time_value = ref_time_value, key_vars = key_vars, new_col = new_col, .keep = TRUE) %>% diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R index cbd567e2..a7874e2b 100644 --- a/R/methods-epi_archive.R +++ b/R/methods-epi_archive.R @@ -358,9 +358,9 @@ epix_merge = function(x, y, #' @param ... Additional arguments to pass to the function or formula specified #' via `f`. Alternatively, if `f` is missing, then the current argument is #' interpreted as an expression for tidy evaluation. -#' @param max_version_gap Number of time steps to use in the running window. +#' @param before Number of time steps to use in the running window. #' For example, if -#' `max_version_gap = 7`, and one time step is one day, then to produce a +#' `before = 7`, and one time step is one day, then to produce a #' value on January 7 #' we apply the given function or formula to data in between January 1 and #' 7. @@ -368,7 +368,7 @@ epix_merge = function(x, y, #' missing, then the keys in the underlying data table, excluding `time_value` #' and `version`, will be used for grouping. To omit a grouping entirely, use #' `group_by = NULL`. -#' @param ref_versions Time values for sliding computations, meaning, each +#' @param ref_time_values Time values for sliding computations, meaning, each #' element of this vector serves as the reference time point for one sliding #' window. If missing, then this will be set to all unique time values in the #' underlying data table, by default. @@ -424,11 +424,11 @@ epix_merge = function(x, y, #' Finally, this is simply a wrapper around the `slide()` method of the #' `epi_archive` class, so if `x` is an `epi_archive` object, then: #' ``` -#' epix_slide(x, new_var = comp(old_var), max_version_gap = 120) +#' epix_slide(x, new_var = comp(old_var), before = 120) #' ``` #' is equivalent to: #' ``` -#' x$slide(new_var = comp(old_var), max_version_gap = 120) +#' x$slide(new_var = comp(old_var), before = 120) #' ``` #' #' @importFrom rlang enquo @@ -446,18 +446,18 @@ epix_merge = function(x, y, #' by = "1 day") #' epix_slide(x = archive_cases_dv_subset, #' f = ~ mean(.x$case_rate_7d_av), -#' max_version_gap = 3, +#' before = 3, #' group_by = geo_value, -#' ref_versions = versions, +#' ref_time_values = versions, #' new_col_name = 'case_rate_3d_av') -epix_slide = function(x, f, ..., max_version_gap, group_by, ref_versions, +epix_slide = function(x, f, ..., before, group_by, ref_time_values, time_step, new_col_name = "devslide_value", as_list_col = FALSE, names_sep = "_", all_rows = FALSE) { if (!inherits(x, "epi_archive")) Abort("`x` must be of class `epi_archive`.") return(x$slide(f, ..., - max_version_gap = max_version_gap, + before = before, group_by = {{group_by}}, - ref_versions = ref_versions, + ref_time_values = ref_time_values, time_step = time_step, new_col_name = new_col_name, as_list_col = as_list_col, diff --git a/man/epi_archive.Rd b/man/epi_archive.Rd index b670e195..998ade9e 100644 --- a/man/epi_archive.Rd +++ b/man/epi_archive.Rd @@ -277,9 +277,9 @@ details. \if{html}{\out{
}}\preformatted{epi_archive$slide( f, ..., - max_version_gap, + before, group_by, - ref_versions, + ref_time_values, time_step, new_col_name = "slide_value", as_list_col = FALSE, diff --git a/man/epix_slide.Rd b/man/epix_slide.Rd index a2fab224..cd937fcf 100644 --- a/man/epix_slide.Rd +++ b/man/epix_slide.Rd @@ -8,9 +8,9 @@ epix_slide( x, f, ..., - max_version_gap, + before, group_by, - ref_versions, + ref_time_values, time_step, new_col_name = "devslide_value", as_list_col = FALSE, @@ -34,9 +34,9 @@ sliding window of \code{n} time steps.} via \code{f}. Alternatively, if \code{f} is missing, then the current argument is interpreted as an expression for tidy evaluation.} -\item{max_version_gap}{Number of time steps to use in the running window. +\item{before}{Number of time steps to use in the running window. For example, if -\code{max_version_gap = 7}, and one time step is one day, then to produce a +\code{before = 7}, and one time step is one day, then to produce a value on January 7 we apply the given function or formula to data in between January 1 and 7.} @@ -46,7 +46,7 @@ missing, then the keys in the underlying data table, excluding \code{time_value} and \code{version}, will be used for grouping. To omit a grouping entirely, use \code{group_by = NULL}.} -\item{ref_versions}{Time values for sliding computations, meaning, each +\item{ref_time_values}{Time values for sliding computations, meaning, each element of this vector serves as the reference time point for one sliding window. If missing, then this will be set to all unique time values in the underlying data table, by default.} @@ -119,12 +119,12 @@ version-aware sliding is necessary (as it its purpose). Finally, this is simply a wrapper around the \code{slide()} method of the \code{epi_archive} class, so if \code{x} is an \code{epi_archive} object, then: -\if{html}{\out{
}}\preformatted{epix_slide(x, new_var = comp(old_var), max_version_gap = 120) +\if{html}{\out{
}}\preformatted{epix_slide(x, new_var = comp(old_var), before = 120) }\if{html}{\out{
}} is equivalent to: -\if{html}{\out{
}}\preformatted{x$slide(new_var = comp(old_var), max_version_gap = 120) +\if{html}{\out{
}}\preformatted{x$slide(new_var = comp(old_var), before = 120) }\if{html}{\out{
}} } \examples{ @@ -140,8 +140,8 @@ versions <- seq(as.Date("2020-06-01"), by = "1 day") epix_slide(x = archive_cases_dv_subset, f = ~ mean(.x$case_rate_7d_av), - max_version_gap = 3, + before = 3, group_by = geo_value, - ref_versions = versions, + ref_time_values = versions, new_col_name = 'case_rate_3d_av') } diff --git a/tests/testthat/test-epix_slide.R b/tests/testthat/test-epix_slide.R index c88ff261..91f0a965 100644 --- a/tests/testthat/test-epix_slide.R +++ b/tests/testthat/test-epix_slide.R @@ -20,9 +20,9 @@ test_that("epix_slide works as intended",{ xx1 <- epix_slide(x = x2, f = ~ sum(.x$binary), - max_version_gap = 5, + before = 5, group_by = geo_value, - ref_versions = versions, + ref_time_values = versions, new_col_name = 'sum_binary') xx2 <- tibble(geo_value = rep("ca",7), @@ -38,9 +38,9 @@ test_that("epix_slide works as intended",{ expect_identical(xx1,xx2) # * xx3 <- x2$slide(f = ~ sum(.x$binary), - max_version_gap = 5, + before = 5, group_by = "geo_value", - ref_versions = versions, + ref_time_values = versions, new_col_name = 'sum_binary') expect_identical(xx1,xx3) # This and * Imply xx2 and xx3 are identical diff --git a/tests/testthat/test-methods-epi_archive.R b/tests/testthat/test-methods-epi_archive.R index 23f9f167..306dd0c3 100644 --- a/tests/testthat/test-methods-epi_archive.R +++ b/tests/testthat/test-methods-epi_archive.R @@ -71,13 +71,13 @@ test_that("quosure passing issue in epix_slide is resolved + other potential iss compactify = TRUE) reference_by_modulus = epix_slide(x = ea, f = ~ mean(.x$case_rate_7d_av), - max_version_gap = 3, + before = 3, group_by = modulus, ref_time_values = time_values, new_col_name = 'case_rate_3d_av') reference_by_both = epix_slide(x = ea, f = ~ mean(.x$case_rate_7d_av), - max_version_gap = 3, + before = 3, group_by = c(geo_value, modulus), ref_time_values = time_values, new_col_name = 'case_rate_3d_av') @@ -85,7 +85,7 @@ test_that("quosure passing issue in epix_slide is resolved + other potential iss expect_identical( ea$slide( f = ~ mean(.x$case_rate_7d_av), - max_version_gap = 3, + before = 3, group_by = modulus, ref_time_values = time_values, new_col_name = 'case_rate_3d_av' @@ -96,7 +96,7 @@ test_that("quosure passing issue in epix_slide is resolved + other potential iss expect_identical( epix_slide(x = ea, f = ~ mean(.x$case_rate_7d_av), - max_version_gap = 3, + before = 3, group_by = "modulus", ref_time_values = time_values, new_col_name = 'case_rate_3d_av'), @@ -105,7 +105,7 @@ test_that("quosure passing issue in epix_slide is resolved + other potential iss expect_identical( ea$slide( f = ~ mean(.x$case_rate_7d_av), - max_version_gap = 3, + before = 3, group_by = "modulus", ref_time_values = time_values, new_col_name = 'case_rate_3d_av' @@ -121,7 +121,7 @@ test_that("quosure passing issue in epix_slide is resolved + other potential iss expect_identical( epix_slide(x = ea, f = ~ mean(.x$case_rate_7d_av), - max_version_gap = 3, + before = 3, group_by = tidyselect::all_of(my_group_by), ref_time_values = time_values, new_col_name = 'case_rate_3d_av'), @@ -130,7 +130,7 @@ test_that("quosure passing issue in epix_slide is resolved + other potential iss expect_identical( ea$slide( f = ~ mean(.x$case_rate_7d_av), - max_version_gap = 3, + before = 3, group_by = tidyselect::all_of(my_group_by), ref_time_values = time_values, new_col_name = 'case_rate_3d_av' @@ -141,7 +141,7 @@ test_that("quosure passing issue in epix_slide is resolved + other potential iss expect_identical( epix_slide(x = ea, f = ~ mean(.x$case_rate_7d_av), - max_version_gap = 3, + before = 3, ref_time_values = time_values, new_col_name = 'case_rate_3d_av'), reference_by_both @@ -149,7 +149,7 @@ test_that("quosure passing issue in epix_slide is resolved + other potential iss expect_identical( ea$slide( f = ~ mean(.x$case_rate_7d_av), - max_version_gap = 3, + before = 3, ref_time_values = time_values, new_col_name = 'case_rate_3d_av' ), diff --git a/vignettes/advanced.Rmd b/vignettes/advanced.Rmd index 692df858..285a8e07 100644 --- a/vignettes/advanced.Rmd +++ b/vignettes/advanced.Rmd @@ -15,7 +15,7 @@ ensure the result of a slide operation is *size stable*, meaning, it will return something whose length is the same as the number of appearances of reference time values for the slide computation in the given data frame/table (this defaults to all time values, but can be some given subset when `ref_time_values` -or `ref_versions` is specified, respectively). +or `ref_time_values` is specified, respectively). The output of a slide computation should either be an atomic value/vector, or a data frame. This data frame can have multiple columns, multiple rows, or both. @@ -61,12 +61,12 @@ df %>% df %>% mutate(version = time_value) %>% as_epi_archive() %>% - epix_slide(x_2dav = mean(x), max_version_gap = 2, ref_versions = as.Date("2020-06-02")) + epix_slide(x_2dav = mean(x), before = 2, ref_time_values = as.Date("2020-06-02")) df %>% mutate(version = time_value) %>% as_epi_archive() %>% - epix_slide(~ mean(.x$x), max_version_gap = 2, ref_versions = as.Date("2020-06-02")) + epix_slide(~ mean(.x$x), before = 2, ref_time_values = as.Date("2020-06-02")) ``` When the slide computation returns an atomic vector (rather than a single value) @@ -152,8 +152,8 @@ df %>% mutate(version = time_value) %>% as_epi_archive() %>% epix_slide(a = data.frame(x_2dav = mean(x), x_2dma = mad(x)), - ref_versions = as.Date("2020-06-02"), - max_version_gap = 2, as_list_col = FALSE, names_sep = NULL) + ref_time_values = as.Date("2020-06-02"), + before = 2, as_list_col = FALSE, names_sep = NULL) ``` ## Multi-row outputs @@ -354,7 +354,7 @@ k_week_ahead <- function(x, ahead = 7, as_of = TRUE) { x %>% epix_slide(fc = prob_arx(percent_cli, case_rate_7d_av, geo_value, time_value, args = prob_arx_args(ahead = ahead)), - max_version_gap = 120, ref_versions = fc_versions) %>% + before = 120, ref_time_values = fc_versions) %>% mutate(target_date = version + ahead, as_of = TRUE, geo_value = fc_geo_value) } diff --git a/vignettes/archive.Rmd b/vignettes/archive.Rmd index 728fb2a0..85151788 100644 --- a/vignettes/archive.Rmd +++ b/vignettes/archive.Rmd @@ -395,8 +395,8 @@ fc_versions <- seq(as.Date("2020-08-01"), z <- epix_slide(x, fc = prob_arx(percent_cli, case_rate_7d_av, geo_value, time_value, args = prob_arx_args(ahead = 7)), - max_version_gap = 120, - ref_versions = fc_versions, group_by = geo_value) + before = 120, + ref_time_values = fc_versions, group_by = geo_value) head(z, 10) ``` @@ -427,7 +427,7 @@ k_week_ahead <- function(x, ahead = 7, as_of = TRUE) { x %>% epix_slide(fc = prob_arx(percent_cli, case_rate_7d_av, geo_value, time_value, args = prob_arx_args(ahead = ahead)), - max_version_gap = 120, ref_versions = fc_versions) %>% + before = 120, ref_time_values = fc_versions) %>% mutate(target_date = version + ahead, as_of = TRUE, geo_value = fc_geo_value) } diff --git a/vignettes/compactify.Rmd b/vignettes/compactify.Rmd index 7addd474..a011ed43 100644 --- a/vignettes/compactify.Rmd +++ b/vignettes/compactify.Rmd @@ -102,7 +102,7 @@ speeds <- rbind(speeds, speed_test(iterate_as_of,"as_of_1000x")) # Performance of slide slide_median <- function(my_ea) { - my_ea$slide(median = median(case_rate_7d_av), max_version_gap = 6) + my_ea$slide(median = median(case_rate_7d_av), before = 6) } speeds <- rbind(speeds, speed_test(slide_median,"slide_median")) From 9980ec2b2e62279c0e8c7d95f8bbc3b64fcb91a3 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Thu, 18 Aug 2022 11:17:44 -0700 Subject: [PATCH 60/60] IDK what happened, but the refactoring went wrong. --- tests/testthat/test-epix_slide.R | 49 ++++++++++++++------------------ 1 file changed, 22 insertions(+), 27 deletions(-) diff --git a/tests/testthat/test-epix_slide.R b/tests/testthat/test-epix_slide.R index 91f0a965..c54ad73c 100644 --- a/tests/testthat/test-epix_slide.R +++ b/tests/testthat/test-epix_slide.R @@ -1,46 +1,41 @@ library(dplyr) library(rlang) -ea <- archive_cases_dv_subset$clone() - test_that("epix_slide only works on an epi_archive",{ expect_error(epix_slide(data.frame(x=1))) }) test_that("epix_slide works as intended",{ - x2 <- ea$clone()$DT %>% - filter(geo_value == "ca", version <= as.Date("2020-06-09")) %>% - select(-percent_cli,-case_rate_7d_av) %>% - mutate(binary = 2^(row_number())) %>% + x <- tibble::tribble(~version, ~time_value, + 5, c(1:2,4), + 6, c(1:2,4:5), + 7, 2:6) %>% + tidyr::unnest(time_value) + + xx <- bind_cols(geo_value = rep("x",12), + arrange(x,time_value,version), + binary = 2^(1:12)) %>% as_epi_archive() - versions <- seq(as.Date("2020-06-01"), - as.Date("2020-06-09"), - by = "1 day") + time_values <- 2:5 - xx1 <- epix_slide(x = x2, - f = ~ sum(.x$binary), - before = 5, - group_by = geo_value, - ref_time_values = versions, - new_col_name = 'sum_binary') + xx1 <- epix_slide(x = xx, + f = ~ sum(.xx$binary), + before = 3, + group_by = geo_value, + ref_time_values = versions, + new_col_name = "sum_binary") - xx2 <- tibble(geo_value = rep("ca",7), - version = as.Date("2020-06-01") + 1:7, - sum_binary = c(2^1, - 2^6+2^1, - 2^11+2^6+2^1, - 2^16+2^11+2^6+2^1, - 2^19+2^16+2^12+2^7, - 2^21+2^19+2^16+2^13, - 2^22+2^21+2^19+2^17)) + xx2 <- tibble(geo_value = rep("x",5), + time_value = as.Date("2020-06-01") + 1:5, + sum_binary = c(3)) expect_identical(xx1,xx2) # * - xx3 <- x2$slide(f = ~ sum(.x$binary), - before = 5, + xx3 <- xx$slide(f = ~ sum(.xx$binary), + before = 3, group_by = "geo_value", - ref_time_values = versions, + ref_time_values = time_values, new_col_name = 'sum_binary') expect_identical(xx1,xx3) # This and * Imply xx2 and xx3 are identical