From 39225f09f5e738fd2eb5c4e719a1665a8220ab8b Mon Sep 17 00:00:00 2001 From: dsweber2 Date: Mon, 8 Jan 2024 16:36:50 -0800 Subject: [PATCH 01/10] patch: get select working with grouping --- DESCRIPTION | 1 + NAMESPACE | 1 + tests/testthat/test-epi_df.R | 22 +++++++++++++++++++--- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index dc48eb86..339d5681 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -76,6 +76,7 @@ Collate: 'data.R' 'epi_df.R' 'epiprocess.R' + 'group_by_epi_df_methods.R' 'methods-epi_archive.R' 'grouped_epi_archive.R' 'growth_rate.R' diff --git a/NAMESPACE b/NAMESPACE index 73db3483..c59004c8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -23,6 +23,7 @@ S3method(groups,grouped_epi_archive) S3method(next_after,Date) S3method(next_after,integer) S3method(print,epi_df) +S3method(select,epi_df) S3method(summary,epi_df) S3method(ungroup,epi_df) S3method(ungroup,grouped_epi_archive) diff --git a/tests/testthat/test-epi_df.R b/tests/testthat/test-epi_df.R index 10b0015e..047438bb 100644 --- a/tests/testthat/test-epi_df.R +++ b/tests/testthat/test-epi_df.R @@ -36,6 +36,22 @@ test_that("as_epi_df errors when additional_metadata is not a list", { pol = rep(c("blue", "swing", "swing"), each = 2)) expect_error( - as_epi_df(ex_input, additional_metadata = c(other_keys = "state", "pol")), - "`additional_metadata` must be a list type.") -}) \ No newline at end of file + as_epi_df(ex_input, additional_metadata = c(other_keys = "state", "pol")), + "`additional_metadata` must be a list type." + ) +}) + + +test_that("grouped epi_df maintains type for select", { + tib <- tibble::tibble( + x = 1:10, y = 1:10, + time_value = rep(seq(as.Date("2020-01-01"), by = 1, length.out = 5), times = 2), + geo_value = rep(c("ca", "hi"), each = 5) + ) + + epi_tib <- epiprocess::new_epi_df(tib) + epi_tib + grouped_epi <- epi_tib %>% group_by(geo_value) + selected_df <- grouped_epi %>% select(-y) + expect_true("epi_df" %in% class(selected_df)) +}) From 05aa645469588be96b5c6706722f6e5ab6f2a6f2 Mon Sep 17 00:00:00 2001 From: dsweber2 Date: Tue, 9 Jan 2024 10:34:29 -0800 Subject: [PATCH 02/10] need the actual method --- R/group_by_epi_df_methods.R | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 R/group_by_epi_df_methods.R diff --git a/R/group_by_epi_df_methods.R b/R/group_by_epi_df_methods.R new file mode 100644 index 00000000..29e41762 --- /dev/null +++ b/R/group_by_epi_df_methods.R @@ -0,0 +1,10 @@ +#' @importFrom dplyr select +#' @export +select.epi_df <- function(.data, ...) { + selected <- NextMethod(.data) + return(dplyr_reconstruct(selected, .data)) +} + +# others to consider: +# - arrange +# - From ac5f369839190ccdb604a0413a098651bf2352fa Mon Sep 17 00:00:00 2001 From: dsweber2 Date: Tue, 9 Jan 2024 19:04:37 -0800 Subject: [PATCH 03/10] more thorough testing --- tests/testthat/test-epi_df.R | 85 ++++++++++++++++++++++++++++-------- 1 file changed, 68 insertions(+), 17 deletions(-) diff --git a/tests/testthat/test-epi_df.R b/tests/testthat/test-epi_df.R index 047438bb..fbd31f7b 100644 --- a/tests/testthat/test-epi_df.R +++ b/tests/testthat/test-epi_df.R @@ -1,23 +1,27 @@ test_that("new_epi_df works as intended", { # Empty tibble - wmsg = capture_warnings(a <- new_epi_df()) - expect_match(wmsg[1], - "Unknown or uninitialised column: `geo_value`.") - expect_match(wmsg[2], - "Unknown or uninitialised column: `time_value`.") + wmsg <- capture_warnings(a <- new_epi_df()) + expect_match( + wmsg[1], + "Unknown or uninitialised column: `geo_value`." + ) + expect_match( + wmsg[2], + "Unknown or uninitialised column: `time_value`." + ) expect_true(is_epi_df(a)) expect_identical(attributes(a)$metadata$geo_type, "custom") expect_identical(attributes(a)$metadata$time_type, "custom") expect_true(lubridate::is.POSIXt(attributes(a)$metadata$as_of)) - + # Simple non-empty tibble with geo_value and time_value cols tib <- tibble::tibble( x = 1:10, y = 1:10, time_value = rep(seq(as.Date("2020-01-01"), by = 1, length.out = 5), times = 2), geo_value = rep(c("ca", "hi"), each = 5) ) - - epi_tib = new_epi_df(tib) + + epi_tib <- new_epi_df(tib) expect_true(is_epi_df(epi_tib)) expect_length(epi_tib, 4L) expect_identical(attributes(epi_tib)$metadata$geo_type, "state") @@ -32,26 +36,73 @@ test_that("as_epi_df errors when additional_metadata is not a list", { dplyr::slice_tail(n = 6) %>% tsibble::as_tsibble() %>% dplyr::mutate( - state = rep("MA",6), - pol = rep(c("blue", "swing", "swing"), each = 2)) - + state = rep("MA", 6), + pol = rep(c("blue", "swing", "swing"), each = 2) + ) + expect_error( as_epi_df(ex_input, additional_metadata = c(other_keys = "state", "pol")), "`additional_metadata` must be a list type." ) }) +# select fixes +tib <- tibble::tibble( + x = 1:10, y = 1:10, + time_value = rep(seq(as.Date("2020-01-01"), + by = 1, length.out = 5 + ), times = 2), + geo_value = rep(c("ca", "hi"), each = 5) +) +epi_tib <- epiprocess::new_epi_df(tib) test_that("grouped epi_df maintains type for select", { + grouped_epi <- epi_tib %>% group_by(geo_value) + selected_df <- grouped_epi %>% select(-y) + expect_true("epi_df" %in% class(selected_df)) + # make sure that the attributes are right + epi_attr <- attributes(selected_df) + expect_identical(epi_attr$names, c("geo_value", "time_value", "x")) + expect_identical(epi_attr$row.names, seq(1, 10)) + expect_identical(epi_attr$groups, attributes(grouped_epi)$groups) + expect_identical(epi_attr$metadata, attributes(epi_tib)$metadata) +}) + +test_that("grouped epi_df drops type when dropping keys", { + grouped_epi <- epi_tib %>% group_by(geo_value) + selected_df <- grouped_epi %>% select(geo_value) + expect_true(!("epi_df" %in% class(selected_df))) +}) + +test_that("grouped epi_df handles extra keys correctly", { tib <- tibble::tibble( x = 1:10, y = 1:10, - time_value = rep(seq(as.Date("2020-01-01"), by = 1, length.out = 5), times = 2), - geo_value = rep(c("ca", "hi"), each = 5) + time_value = rep(seq(as.Date("2020-01-01"), + by = 1, length.out = 5 + ), times = 2), + geo_value = rep(c("ca", "hi"), each = 5), + extra_key = rep(seq(as.Date("2020-01-01"), + by = 1, length.out = 5 + ), times = 2) ) - - epi_tib <- epiprocess::new_epi_df(tib) - epi_tib + epi_tib <- epiprocess::new_epi_df(tib, + additional_metadata = list(other_keys = "extra_key") + ) + attributes(epi_tib) grouped_epi <- epi_tib %>% group_by(geo_value) - selected_df <- grouped_epi %>% select(-y) + selected_df <- grouped_epi %>% select(-extra_key) + selected_df expect_true("epi_df" %in% class(selected_df)) + # make sure that the attributes are right + old_attr <- attributes(epi_tib) + epi_attr <- attributes(selected_df) + expect_identical(epi_attr$names, c("geo_value", "time_value", "x", "y")) + expect_identical(epi_attr$row.names, seq(1, 10)) + expect_identical(epi_attr$groups, attributes(grouped_epi)$groups) + expect_identical(epi_attr$metadata, list( + geo_type = "state", time_type = + "day", + as_of = old_attr$metadata$as_of, + other_keys = character(0) + )) }) From b9bac788b3c50555b885cc0d4c3b7fefe62b68e1 Mon Sep 17 00:00:00 2001 From: "Logan C. Brooks" Date: Fri, 12 Jan 2024 18:37:16 -0800 Subject: [PATCH 04/10] Fix `names<-.epi_df`, introduce failing test for renaming in `select` --- R/epi_df.R | 5 ++++- R/group_by_epi_df_methods.R | 2 +- R/methods-epi_df.R | 11 +++++++++-- tests/testthat/test-methods-epi_df.R | 14 ++++++++++++++ 4 files changed, 28 insertions(+), 4 deletions(-) diff --git a/R/epi_df.R b/R/epi_df.R index 045c4aaf..53dca62b 100644 --- a/R/epi_df.R +++ b/R/epi_df.R @@ -122,6 +122,9 @@ new_epi_df = function(x = tibble::tibble(), geo_type, time_type, as_of, if (!is.list(additional_metadata)) { Abort("`additional_metadata` must be a list type.") } + if (is.null(additional_metadata[["other_keys"]])) { + additional_metadata[["other_keys"]] <- character(0L) + } # If geo type is missing, then try to guess it if (missing(geo_type)) { @@ -334,4 +337,4 @@ as_epi_df.tbl_ts = function(x, geo_type, time_type, as_of, #' @export is_epi_df = function(x) { inherits(x, "epi_df") -} \ No newline at end of file +} diff --git a/R/group_by_epi_df_methods.R b/R/group_by_epi_df_methods.R index 29e41762..8d02a887 100644 --- a/R/group_by_epi_df_methods.R +++ b/R/group_by_epi_df_methods.R @@ -2,7 +2,7 @@ #' @export select.epi_df <- function(.data, ...) { selected <- NextMethod(.data) - return(dplyr_reconstruct(selected, .data)) + return (dplyr_reconstruct(selected, .data)) } # others to consider: diff --git a/R/methods-epi_df.R b/R/methods-epi_df.R index 6429b867..82acc107 100644 --- a/R/methods-epi_df.R +++ b/R/methods-epi_df.R @@ -180,9 +180,16 @@ dplyr_row_slice.epi_df = function(data, i, ...) { #' @export `names<-.epi_df` = function(x, value) { old_names = names(x) - old_other_keys = attributes(x)$metadata$other_keys + old_other_keys = attr(x, "metadata")[["other_keys"]] result = NextMethod() - attributes(x)$metadata$other_keys <- value[match(old_other_keys, old_names)] + new_other_keys_almost <- value[match(old_other_keys, old_names)] + attr(result, "metadata")[["other_keys"]] <- + # patch until we require `other_keys` to be `chr`; match NULL-ness of input `other_keys`: + # if (length(new_other_keys_almost) == 0L) NULL + # if (is.null(old_other_keys)) NULL + # else new_other_keys_almost + new_other_keys_almost + # decay to non-`epi_df` if needed: dplyr::dplyr_reconstruct(result, result) } diff --git a/tests/testthat/test-methods-epi_df.R b/tests/testthat/test-methods-epi_df.R index 9d03cf93..c9e38fff 100644 --- a/tests/testthat/test-methods-epi_df.R +++ b/tests/testthat/test-methods-epi_df.R @@ -124,3 +124,17 @@ test_that("Metadata and grouping are dropped by `as_tibble`", { !any(c("metadata", "groups") %in% names(attributes(grouped_converted))) ) }) + +test_that("Renaming columns gives appropriate colnames and metadata", { + edf <- tibble::tibble(geo_value = 1, time_value = 1, age = 1, value = 1) %>% + as_epi_df(additional_metadata = list(other_keys = "age")) + renamed_edf1 <- edf %>% + `[`(c("geo_value", "time_value", "age", "value")) %>% + `names<-`(c("geo_value", "time_value", "age_group", "value")) + expect_identical(names(renamed_edf1), c("geo_value", "time_value", "age_group", "value")) + expect_identical(attr(renamed_edf1, "metadata")$other_keys, c("age_group")) + renamed_edf2 <- edf %>% + as_epi_df(additional_metadata = list(other_keys = "age")) %>% + select(geo_value, time_value, age_group = age, value) + expect_identical(renamed_edf1, renamed_edf2) +}) From 42eb793d02332bcf6773860869e3cc91b16dff6f Mon Sep 17 00:00:00 2001 From: "Logan C. Brooks" Date: Fri, 12 Jan 2024 18:43:40 -0800 Subject: [PATCH 05/10] Clean up some commented-out code --- R/methods-epi_df.R | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/R/methods-epi_df.R b/R/methods-epi_df.R index 82acc107..5dea964c 100644 --- a/R/methods-epi_df.R +++ b/R/methods-epi_df.R @@ -182,13 +182,8 @@ dplyr_row_slice.epi_df = function(data, i, ...) { old_names = names(x) old_other_keys = attr(x, "metadata")[["other_keys"]] result = NextMethod() - new_other_keys_almost <- value[match(old_other_keys, old_names)] - attr(result, "metadata")[["other_keys"]] <- - # patch until we require `other_keys` to be `chr`; match NULL-ness of input `other_keys`: - # if (length(new_other_keys_almost) == 0L) NULL - # if (is.null(old_other_keys)) NULL - # else new_other_keys_almost - new_other_keys_almost + new_other_keys <- value[match(old_other_keys, old_names)] + attr(result, "metadata")[["other_keys"]] <- new_other_keys # decay to non-`epi_df` if needed: dplyr::dplyr_reconstruct(result, result) } From 510e6d45b626b684265f13fa337402f284d31343 Mon Sep 17 00:00:00 2001 From: "Logan C. Brooks" Date: Fri, 12 Jan 2024 19:14:29 -0800 Subject: [PATCH 06/10] Fix `names<-` metadata for grouped `epi_df`s --- R/methods-epi_df.R | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/R/methods-epi_df.R b/R/methods-epi_df.R index 5dea964c..6e4666e7 100644 --- a/R/methods-epi_df.R +++ b/R/methods-epi_df.R @@ -179,11 +179,13 @@ dplyr_row_slice.epi_df = function(data, i, ...) { #' @export `names<-.epi_df` = function(x, value) { - old_names = names(x) - old_other_keys = attr(x, "metadata")[["other_keys"]] - result = NextMethod() + old_names <- names(x) + old_metadata <- attr(x, "metadata") + old_other_keys <- old_metadata[["other_keys"]] new_other_keys <- value[match(old_other_keys, old_names)] - attr(result, "metadata")[["other_keys"]] <- new_other_keys + new_metadata <- old_metadata + new_metadata[["other_keys"]] <- new_other_keys + result <- reclass(NextMethod(), new_metadata) # decay to non-`epi_df` if needed: dplyr::dplyr_reconstruct(result, result) } From 9cc34370156f5f4d7862c25ecaef5bdf816740cd Mon Sep 17 00:00:00 2001 From: "Logan C. Brooks" Date: Fri, 12 Jan 2024 19:16:09 -0800 Subject: [PATCH 07/10] Fix grouped `epi_df` `select` when renaming --- R/group_by_epi_df_methods.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/group_by_epi_df_methods.R b/R/group_by_epi_df_methods.R index 8d02a887..acdab378 100644 --- a/R/group_by_epi_df_methods.R +++ b/R/group_by_epi_df_methods.R @@ -2,7 +2,8 @@ #' @export select.epi_df <- function(.data, ...) { selected <- NextMethod(.data) - return (dplyr_reconstruct(selected, .data)) + might_decay <- reclass(selected, attr(selected, "metadata")) + return(dplyr_reconstruct(might_decay, might_decay)) } # others to consider: From 113121fdf5ac4119bf387db63948496bbb2378c7 Mon Sep 17 00:00:00 2001 From: "Logan C. Brooks" Date: Fri, 12 Jan 2024 19:32:15 -0800 Subject: [PATCH 08/10] Add grouped epi_df names<- test, spruce up related tests --- tests/testthat/test-epi_df.R | 11 +++++------ tests/testthat/test-methods-epi_df.R | 17 +++++++++++++++++ 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/tests/testthat/test-epi_df.R b/tests/testthat/test-epi_df.R index fbd31f7b..decd6fd7 100644 --- a/tests/testthat/test-epi_df.R +++ b/tests/testthat/test-epi_df.R @@ -59,19 +59,20 @@ epi_tib <- epiprocess::new_epi_df(tib) test_that("grouped epi_df maintains type for select", { grouped_epi <- epi_tib %>% group_by(geo_value) selected_df <- grouped_epi %>% select(-y) - expect_true("epi_df" %in% class(selected_df)) + expect_true(inherits(selected_df, "epi_df")) # make sure that the attributes are right epi_attr <- attributes(selected_df) expect_identical(epi_attr$names, c("geo_value", "time_value", "x")) expect_identical(epi_attr$row.names, seq(1, 10)) expect_identical(epi_attr$groups, attributes(grouped_epi)$groups) expect_identical(epi_attr$metadata, attributes(epi_tib)$metadata) + expect_identical(selected_df, epi_tib %>% select(-y) %>% group_by(geo_value)) }) test_that("grouped epi_df drops type when dropping keys", { grouped_epi <- epi_tib %>% group_by(geo_value) selected_df <- grouped_epi %>% select(geo_value) - expect_true(!("epi_df" %in% class(selected_df))) + expect_true(!inherits(selected_df, "epi_df")) }) test_that("grouped epi_df handles extra keys correctly", { @@ -91,8 +92,7 @@ test_that("grouped epi_df handles extra keys correctly", { attributes(epi_tib) grouped_epi <- epi_tib %>% group_by(geo_value) selected_df <- grouped_epi %>% select(-extra_key) - selected_df - expect_true("epi_df" %in% class(selected_df)) + expect_true(inherits(selected_df, "epi_df")) # make sure that the attributes are right old_attr <- attributes(epi_tib) epi_attr <- attributes(selected_df) @@ -100,8 +100,7 @@ test_that("grouped epi_df handles extra keys correctly", { expect_identical(epi_attr$row.names, seq(1, 10)) expect_identical(epi_attr$groups, attributes(grouped_epi)$groups) expect_identical(epi_attr$metadata, list( - geo_type = "state", time_type = - "day", + geo_type = "state", time_type = "day", as_of = old_attr$metadata$as_of, other_keys = character(0) )) diff --git a/tests/testthat/test-methods-epi_df.R b/tests/testthat/test-methods-epi_df.R index c9e38fff..6be7e89b 100644 --- a/tests/testthat/test-methods-epi_df.R +++ b/tests/testthat/test-methods-epi_df.R @@ -138,3 +138,20 @@ test_that("Renaming columns gives appropriate colnames and metadata", { select(geo_value, time_value, age_group = age, value) expect_identical(renamed_edf1, renamed_edf2) }) + +test_that("Renaming columns while grouped gives appropriate colnames and metadata", { + gedf <- tibble::tibble(geo_value = 1, time_value = 1, age = 1, value = 1) %>% + as_epi_df(additional_metadata = list(other_keys = "age")) %>% + group_by(geo_value) + renamed_gedf1 <- gedf %>% + `[`(c("geo_value", "time_value", "age", "value")) %>% + `names<-`(c("geo_value", "time_value", "age_group", "value")) + expect_true(inherits(renamed_gedf1, "epi_df")) + expect_true(inherits(renamed_gedf1, "grouped_df")) + expect_identical(names(renamed_gedf1), c("geo_value", "time_value", "age_group", "value")) + expect_identical(attr(renamed_gedf1, "metadata")$other_keys, c("age_group")) + renamed_gedf2 <- gedf %>% + as_epi_df(additional_metadata = list(other_keys = "age")) %>% + select(geo_value, time_value, age_group = age, value) + expect_identical(renamed_gedf1, renamed_gedf2) +}) From 0bf702f11cfd9ac13a65b7a1b62384937d38e33e Mon Sep 17 00:00:00 2001 From: "Logan C. Brooks" Date: Fri, 12 Jan 2024 19:41:32 -0800 Subject: [PATCH 09/10] Comment adding context for `group_by_epi_df_methods.R` --- R/group_by_epi_df_methods.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/R/group_by_epi_df_methods.R b/R/group_by_epi_df_methods.R index acdab378..b531178f 100644 --- a/R/group_by_epi_df_methods.R +++ b/R/group_by_epi_df_methods.R @@ -1,3 +1,9 @@ +# These methods (and maybe some others in methods-epi_df.R) are here to augment +# `?dplyr_extending` implementations to get the correct behavior on grouped +# `epi_df`s. It would be nice if there were a way to replace these with a +# generic core that automatically fixed all misbehaving methods; see +# brainstorming within Issue #223. + #' @importFrom dplyr select #' @export select.epi_df <- function(.data, ...) { From ff3dfae096a10b26f494a2d6427cafdafe83b14f Mon Sep 17 00:00:00 2001 From: dsweber2 Date: Tue, 16 Jan 2024 12:48:46 -0800 Subject: [PATCH 10/10] doc: minor annotations --- tests/testthat/test-methods-epi_df.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/testthat/test-methods-epi_df.R b/tests/testthat/test-methods-epi_df.R index 6be7e89b..aeb08ced 100644 --- a/tests/testthat/test-methods-epi_df.R +++ b/tests/testthat/test-methods-epi_df.R @@ -128,11 +128,13 @@ test_that("Metadata and grouping are dropped by `as_tibble`", { test_that("Renaming columns gives appropriate colnames and metadata", { edf <- tibble::tibble(geo_value = 1, time_value = 1, age = 1, value = 1) %>% as_epi_df(additional_metadata = list(other_keys = "age")) + # renaming using base R renamed_edf1 <- edf %>% `[`(c("geo_value", "time_value", "age", "value")) %>% `names<-`(c("geo_value", "time_value", "age_group", "value")) expect_identical(names(renamed_edf1), c("geo_value", "time_value", "age_group", "value")) expect_identical(attr(renamed_edf1, "metadata")$other_keys, c("age_group")) + # renaming using select renamed_edf2 <- edf %>% as_epi_df(additional_metadata = list(other_keys = "age")) %>% select(geo_value, time_value, age_group = age, value) @@ -143,13 +145,17 @@ test_that("Renaming columns while grouped gives appropriate colnames and metadat gedf <- tibble::tibble(geo_value = 1, time_value = 1, age = 1, value = 1) %>% as_epi_df(additional_metadata = list(other_keys = "age")) %>% group_by(geo_value) + # renaming using base R renamed_gedf1 <- gedf %>% `[`(c("geo_value", "time_value", "age", "value")) %>% `names<-`(c("geo_value", "time_value", "age_group", "value")) + # tets type preservation expect_true(inherits(renamed_gedf1, "epi_df")) expect_true(inherits(renamed_gedf1, "grouped_df")) + # the names are right expect_identical(names(renamed_gedf1), c("geo_value", "time_value", "age_group", "value")) expect_identical(attr(renamed_gedf1, "metadata")$other_keys, c("age_group")) + # renaming using select renamed_gedf2 <- gedf %>% as_epi_df(additional_metadata = list(other_keys = "age")) %>% select(geo_value, time_value, age_group = age, value)