From 4eecf4e8d36d6b49c73fb1c5bafd0d66ab56866d Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Thu, 4 Jul 2024 16:03:14 -0700 Subject: [PATCH 01/14] fix: layer_add_target/forecast_date * missing rlang prefix * max(NULL, date) produces an integer, tests fail --- R/layer_add_forecast_date.R | 6 +++--- R/layer_add_target_date.R | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/layer_add_forecast_date.R b/R/layer_add_forecast_date.R index 5bd6b6918..2174b7330 100644 --- a/R/layer_add_forecast_date.R +++ b/R/layer_add_forecast_date.R @@ -87,11 +87,11 @@ layer_add_forecast_date_new <- function(forecast_date, id) { #' @export slather.layer_add_forecast_date <- function(object, components, workflow, new_data, ...) { if (is.null(object$forecast_date)) { - max_time_value <- max( + max_time_value <- as.Date(max( workflows::extract_preprocessor(workflow)$max_time_value, workflow$fit$meta$max_time_value, max(new_data$time_value) - ) + )) forecast_date <- max_time_value } else { forecast_date <- object$forecast_date @@ -102,7 +102,7 @@ slather.layer_add_forecast_date <- function(object, components, workflow, new_da )$time_type if (expected_time_type == "week") expected_time_type <- "day" validate_date(forecast_date, expected_time_type, - call = expr(layer_add_forecast_date()) + call = rlang::expr(layer_add_forecast_date()) ) forecast_date <- coerce_time_type(forecast_date, expected_time_type) object$forecast_date <- forecast_date diff --git a/R/layer_add_target_date.R b/R/layer_add_target_date.R index f03bd6154..23aeb4091 100644 --- a/R/layer_add_target_date.R +++ b/R/layer_add_target_date.R @@ -101,17 +101,17 @@ slather.layer_add_target_date <- function(object, components, workflow, new_data the_frosting, "layer_add_forecast_date", "forecast_date" ))) { validate_date(forecast_date, expected_time_type, - call = expr(layer_add_forecast_date()) + call = rlang::expr(layer_add_forecast_date()) ) forecast_date <- coerce_time_type(forecast_date, expected_time_type) ahead <- extract_argument(the_recipe, "step_epi_ahead", "ahead") target_date <- forecast_date + ahead } else { - max_time_value <- max( + max_time_value <- as.Date(max( workflows::extract_preprocessor(workflow)$max_time_value, workflow$fit$meta$max_time_value, max(new_data$time_value) - ) + )) ahead <- extract_argument(the_recipe, "step_epi_ahead", "ahead") target_date <- max_time_value + ahead } From 519bed3ee50ccdf0d25bccf1ee172df4810c7c55 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Thu, 4 Jul 2024 16:07:42 -0700 Subject: [PATCH 02/14] tests all pass --- DESCRIPTION | 2 +- NAMESPACE | 13 +-- R/blueprint-epi_recipe-default.R | 142 ++++++++++------------------ R/epi_keys.R | 24 +---- R/epi_recipe.R | 15 +-- R/recipe.epi_df.R | 49 ++++++++++ man/default_epi_recipe_blueprint.Rd | 44 +++++++++ man/new_epi_recipe_blueprint.Rd | 92 ------------------ tests/testthat/test-blueprint.R | 12 +-- tests/testthat/test-epi_keys.R | 5 +- tests/testthat/test-epi_recipe.R | 43 ++++----- 11 files changed, 189 insertions(+), 252 deletions(-) create mode 100644 R/recipe.epi_df.R create mode 100644 man/default_epi_recipe_blueprint.Rd delete mode 100644 man/new_epi_recipe_blueprint.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 12d86602b..a17f5580b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -23,7 +23,7 @@ URL: https://github.com/cmu-delphi/epipredict/, https://cmu-delphi.github.io/epipredict BugReports: https://github.com/cmu-delphi/epipredict/issues/ Depends: - epiprocess (>= 0.7.5), + epiprocess (>= 0.7.12), parsnip (>= 1.0.0), R (>= 3.5.0) Imports: diff --git a/NAMESPACE b/NAMESPACE index 708c91e06..9180fcbca 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -27,11 +27,6 @@ S3method(bake,step_population_scaling) S3method(bake,step_training_window) S3method(detect_layer,frosting) S3method(detect_layer,workflow) -S3method(epi_keys,data.frame) -S3method(epi_keys,default) -S3method(epi_keys,epi_df) -S3method(epi_keys,epi_workflow) -S3method(epi_keys,recipe) S3method(epi_recipe,default) S3method(epi_recipe,epi_df) S3method(epi_recipe,formula) @@ -54,6 +49,8 @@ S3method(forecast,epi_workflow) S3method(format,dist_quantiles) S3method(is.na,dist_quantiles) S3method(is.na,distribution) +S3method(key_colnames,epi_workflow) +S3method(key_colnames,recipe) S3method(mean,dist_quantiles) S3method(median,dist_quantiles) S3method(predict,epi_workflow) @@ -96,6 +93,8 @@ S3method(print,step_naomit) S3method(print,step_population_scaling) S3method(print,step_training_window) S3method(quantile,dist_quantiles) +S3method(recipe,epi_df) +S3method(recipes::recipe,formula) S3method(refresh_blueprint,default_epi_recipe_blueprint) S3method(residuals,flatline) S3method(run_mold,default_epi_recipe_blueprint) @@ -148,7 +147,6 @@ export(detect_layer) export(dist_quantiles) export(epi_keys) export(epi_recipe) -export(epi_recipe_blueprint) export(epi_workflow) export(extract_argument) export(extract_frosting) @@ -180,8 +178,6 @@ export(layer_residual_quantiles) export(layer_threshold) export(layer_unnest) export(nested_quantiles) -export(new_default_epi_recipe_blueprint) -export(new_epi_recipe_blueprint) export(pivot_quantiles_longer) export(pivot_quantiles_wider) export(prep) @@ -235,6 +231,7 @@ importFrom(magrittr,"%>%") importFrom(quantreg,rq) importFrom(recipes,bake) importFrom(recipes,prep) +importFrom(recipes,recipe) importFrom(rlang,"!!!") importFrom(rlang,"!!") importFrom(rlang,"%@%") diff --git a/R/blueprint-epi_recipe-default.R b/R/blueprint-epi_recipe-default.R index 886cd5512..69a4dc1d1 100644 --- a/R/blueprint-epi_recipe-default.R +++ b/R/blueprint-epi_recipe-default.R @@ -1,111 +1,69 @@ -#' Recipe blueprint that accounts for `epi_df` panel data -#' -#' Used for simplicity. See [hardhat::new_recipe_blueprint()] or -#' [hardhat::default_recipe_blueprint()] for more details. -#' -#' @inheritParams hardhat::new_recipe_blueprint +#' Default epi_recipe blueprint #' -#' @details The `bake_dependent_roles` are automatically set to `epi_df` defaults. -#' @return A recipe blueprint. +#' Recipe blueprint that accounts for `epi_df` panel data +#' Used for simplicity. See [hardhat::default_recipe_blueprint()] for more +#' details. This subclass is nearly the same, except it ensures that +#' downstream processing doesn't drop the epi_df class from the data. #' -#' @keywords internal +#' @inheritParams hardhat::default_recipe_blueprint +#' @return A `epi_recipe` blueprint. #' @export -new_epi_recipe_blueprint <- - function(intercept = FALSE, allow_novel_levels = FALSE, fresh = TRUE, - composition = "tibble", - ptypes = NULL, recipe = NULL, ..., subclass = character()) { - hardhat::new_recipe_blueprint( - intercept = intercept, - allow_novel_levels = allow_novel_levels, - fresh = fresh, - composition = composition, - ptypes = ptypes, - recipe = recipe, - ..., - subclass = c(subclass, "epi_recipe_blueprint") - ) - } - - -#' @rdname new_epi_recipe_blueprint -#' @export -epi_recipe_blueprint <- - function(intercept = FALSE, allow_novel_levels = FALSE, - fresh = TRUE, - composition = "tibble") { - new_epi_recipe_blueprint( - intercept = intercept, - allow_novel_levels = allow_novel_levels, - fresh = fresh, - composition = composition - ) - } +#' @keywords internal +default_epi_recipe_blueprint <- function(intercept = FALSE, + allow_novel_levels = FALSE, + fresh = TRUE, + strings_as_factors = FALSE, + composition = "tibble") { + new_default_epi_recipe_blueprint( + intercept = intercept, + allow_novel_levels = allow_novel_levels, + fresh = fresh, + strings_as_factors = strings_as_factors, + composition = composition + ) +} -#' @rdname new_epi_recipe_blueprint -#' @export -default_epi_recipe_blueprint <- - function(intercept = FALSE, allow_novel_levels = FALSE, fresh = TRUE, - composition = "tibble") { - new_default_epi_recipe_blueprint( - intercept = intercept, - allow_novel_levels = allow_novel_levels, - fresh = fresh, - composition = composition - ) - } +new_default_epi_recipe_blueprint <- function(intercept = FALSE, + allow_novel_levels = TRUE, + fresh = TRUE, + strings_as_factors = FALSE, + composition = "tibble", + ptypes = NULL, + recipe = NULL, + extra_role_ptypes = NULL, + ..., + subclass = character()) { + hardhat::new_recipe_blueprint( + intercept = intercept, + allow_novel_levels = allow_novel_levels, + fresh = fresh, + strings_as_factors = strings_as_factors, + composition = composition, + ptypes = ptypes, + recipe = recipe, + extra_role_ptypes = extra_role_ptypes, + ..., + subclass = c(subclass, "default_epi_recipe_blueprint", "default_recipe_blueprint")) +} -#' @rdname new_epi_recipe_blueprint -#' @inheritParams hardhat::new_default_recipe_blueprint -#' @export -new_default_epi_recipe_blueprint <- - function(intercept = FALSE, allow_novel_levels = FALSE, - fresh = TRUE, - composition = "tibble", ptypes = NULL, recipe = NULL, - extra_role_ptypes = NULL, ..., subclass = character()) { - new_epi_recipe_blueprint( - intercept = intercept, - allow_novel_levels = allow_novel_levels, - fresh = fresh, - composition = composition, - ptypes = ptypes, - recipe = recipe, - extra_role_ptypes = extra_role_ptypes, - ..., - subclass = c(subclass, "default_epi_recipe_blueprint", "default_recipe_blueprint") - ) - } #' @importFrom hardhat run_mold #' @export run_mold.default_epi_recipe_blueprint <- function(blueprint, ..., data) { rlang::check_dots_empty0(...) - # blueprint <- hardhat:::patch_recipe_default_blueprint(blueprint) - cleaned <- mold_epi_recipe_default_clean(blueprint = blueprint, data = data) - blueprint <- cleaned$blueprint - data <- cleaned$data + # we don't do the "cleaning" in `hardhat:::run_mold.default_recipe_blueprint` + # That function drops the epi_df class without any recourse. + # The only way we should be here at all is if `data` is an epi_df, but just + # in case... + if (!is_epi_df(data)) { + cli_warn("`data` is not an {.cls epi_df}. It has class {.cls {class(data)}}.") + } hardhat:::mold_recipe_default_process(blueprint = blueprint, data = data) } -mold_epi_recipe_default_clean <- function(blueprint, data) { - hardhat:::check_data_frame_or_matrix(data) - if (!is_epi_df(data)) data <- hardhat:::coerce_to_tibble(data) - hardhat:::new_mold_clean(blueprint, data) -} - #' @importFrom hardhat refresh_blueprint #' @export refresh_blueprint.default_epi_recipe_blueprint <- function(blueprint) { do.call(new_default_epi_recipe_blueprint, as.list(blueprint)) } - -## removing this function? -# er_check_is_data_like <- function(.x, .x_nm) { -# if (rlang::is_missing(.x_nm)) { -# .x_nm <- rlang::as_label(rlang::enexpr(.x)) -# } -# if (!hardhat:::is_new_data_like(.x)) { -# hardhat:::glubort("`{.x_nm}` must be a data.frame or a matrix, not a {class1(.x)}.") -# } -# .x -# } diff --git a/R/epi_keys.R b/R/epi_keys.R index 08e4595c3..34d141cd6 100644 --- a/R/epi_keys.R +++ b/R/epi_keys.R @@ -6,34 +6,16 @@ #' @return If an `epi_df`, this returns all "keys". Otherwise `NULL` #' @keywords internal #' @export -epi_keys <- function(x, ...) { - UseMethod("epi_keys") -} +epi_keys <- key_colnames -#' @export -epi_keys.default <- function(x, ...) { - character(0L) -} - -#' @export -epi_keys.data.frame <- function(x, other_keys = character(0L), ...) { - arg_is_chr(other_keys, allow_empty = TRUE) - nm <- c("time_value", "geo_value", other_keys) - intersect(nm, names(x)) -} - -#' @export -epi_keys.epi_df <- function(x, ...) { - c("time_value", "geo_value", attr(x, "metadata")$other_keys) -} #' @export -epi_keys.recipe <- function(x, ...) { +key_colnames.recipe <- function(x, ...) { x$var_info$variable[x$var_info$role %in% c("time_value", "geo_value", "key")] } #' @export -epi_keys.epi_workflow <- function(x, ...) { +key_colnames.epi_workflow <- function(x, ...) { epi_keys_mold(hardhat::extract_mold(x)) } diff --git a/R/epi_recipe.R b/R/epi_recipe.R index e5182b99b..4afe1d6fb 100644 --- a/R/epi_recipe.R +++ b/R/epi_recipe.R @@ -9,6 +9,7 @@ #' @import recipes #' @export epi_recipe <- function(x, ...) { + # deprecate_soft("This function is being deprecated. Use `recipe()` instead.") UseMethod("epi_recipe") } @@ -16,10 +17,10 @@ epi_recipe <- function(x, ...) { #' @rdname epi_recipe #' @export epi_recipe.default <- function(x, ...) { - ## if not a formula or an epi_df, we just pass to recipes::recipe - if (is.matrix(x) || is.data.frame(x) || tibble::is_tibble(x)) { - x <- x[1, , drop = FALSE] - } + # if not a formula or an epi_df, we just pass to recipes::recipe + # if (is.matrix(x) || is.data.frame(x) || tibble::is_tibble(x)) { + # x <- x[1, , drop = FALSE] + # } recipes::recipe(x, ...) } @@ -57,6 +58,7 @@ epi_recipe.default <- function(x, ...) { #' r epi_recipe.epi_df <- function(x, formula = NULL, ..., vars = NULL, roles = NULL) { + return(recipe(x, formula = formula, ..., vars = vars, roles = roles)) if (!is.null(formula)) { if (!is.null(vars)) { rlang::abort( @@ -144,7 +146,7 @@ epi_recipe.epi_df <- #' @export epi_recipe.formula <- function(formula, data, ...) { # we ensure that there's only 1 row in the template - data <- data[1, ] + return(recipe(data, formula, ...)) # check for minus: if (!epiprocess::is_epi_df(data)) { return(recipes::recipe(formula, data, ...)) @@ -157,7 +159,8 @@ epi_recipe.formula <- function(formula, data, ...) { # Check for other in-line functions args <- epi_form2args(formula, data, ...) - obj <- epi_recipe.epi_df( + # browser() + obj <- recipe.epi_df( x = args$x, formula = NULL, ..., diff --git a/R/recipe.epi_df.R b/R/recipe.epi_df.R new file mode 100644 index 000000000..eae58d256 --- /dev/null +++ b/R/recipe.epi_df.R @@ -0,0 +1,49 @@ +#' @importFrom recipes recipe +#' @export +recipe.epi_df <- function(x, formula = NULL, ..., vars = NULL, roles = NULL) { + # vars + roles must be same-length character vectors + # formula is mutually exclusive with vars + roles + # either determines the variables needed for modelling + attr(x, "decay_to_tibble") <- FALSE # avoid as_tibble stripping the class + r <- NextMethod("recipe") + r <- add_epi_df_roles_to_recipe(r, x) + + # arrange to easy order + r$var_info <- r$var_info %>% + dplyr::arrange(factor( + role, + levels = union( + c("predictor", "outcome", "time_value", "geo_value", "key"), + unique(role) + ) # anything else + )) + r$term_info <- r$var_info + class(r) <- c("epi_recipe", class(r)) + r +} + +#' @exportS3Method recipes::recipe +recipe.formula <- function(formula, data, ...) { + # This method clobbers `recipes::recipe.formula`, but should have no noticible + # effect. + recipe(x = data, formula = formula, ...) +} + +add_epi_df_roles_to_recipe <- function(r, epi_df) { + edf_keys <- epiprocess::key_colnames(epi_df) + edf_roles <- c("time_value", "geo_value", rep("key", length(edf_keys) - 2)) + types <- recipes:::get_types(epi_df[, edf_keys])$type + info <- tibble( + variable = edf_keys, + type = types, + role = edf_roles, + source = "original" + ) + # reconstruct the constituents + r$template <- epi_df[ ,unique(c(edf_keys, r$var_info$variable))] + r$var_info <- r$var_info %>% + dplyr::filter(!((variable %in% edf_keys) & is.na(role))) %>% + dplyr::bind_rows(info) %>% + dplyr::distinct() + r +} diff --git a/man/default_epi_recipe_blueprint.Rd b/man/default_epi_recipe_blueprint.Rd new file mode 100644 index 000000000..465a8abef --- /dev/null +++ b/man/default_epi_recipe_blueprint.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/blueprint-epi_recipe-default.R +\name{default_epi_recipe_blueprint} +\alias{default_epi_recipe_blueprint} +\title{Default epi_recipe blueprint} +\usage{ +default_epi_recipe_blueprint( + intercept = FALSE, + allow_novel_levels = FALSE, + fresh = TRUE, + strings_as_factors = FALSE, + composition = "tibble" +) +} +\arguments{ +\item{intercept}{A logical. Should an intercept be included in the +processed data? This information is used by the \code{process} function +in the \code{mold} and \code{forge} function list.} + +\item{allow_novel_levels}{A logical. Should novel factor levels be allowed at +prediction time? This information is used by the \code{clean} function in the +\code{forge} function list, and is passed on to \code{\link[hardhat:scream]{scream()}}.} + +\item{fresh}{Should already trained operations be re-trained when \code{prep()} is +called?} + +\item{strings_as_factors}{Should character columns be converted to factors +when \code{prep()} is called?} + +\item{composition}{Either "tibble", "matrix", or "dgCMatrix" for the format +of the processed predictors. If "matrix" or "dgCMatrix" are chosen, all of +the predictors must be numeric after the preprocessing method has been +applied; otherwise an error is thrown.} +} +\value{ +A \code{epi_recipe} blueprint. +} +\description{ +Recipe blueprint that accounts for \code{epi_df} panel data +Used for simplicity. See \code{\link[hardhat:default_recipe_blueprint]{hardhat::default_recipe_blueprint()}} for more +details. This subclass is nearly the same, except it ensures that +downstream processing doesn't drop the epi_df class from the data. +} +\keyword{internal} diff --git a/man/new_epi_recipe_blueprint.Rd b/man/new_epi_recipe_blueprint.Rd deleted file mode 100644 index db22b5675..000000000 --- a/man/new_epi_recipe_blueprint.Rd +++ /dev/null @@ -1,92 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/blueprint-epi_recipe-default.R -\name{new_epi_recipe_blueprint} -\alias{new_epi_recipe_blueprint} -\alias{epi_recipe_blueprint} -\alias{default_epi_recipe_blueprint} -\alias{new_default_epi_recipe_blueprint} -\title{Recipe blueprint that accounts for \code{epi_df} panel data} -\usage{ -new_epi_recipe_blueprint( - intercept = FALSE, - allow_novel_levels = FALSE, - fresh = TRUE, - composition = "tibble", - ptypes = NULL, - recipe = NULL, - ..., - subclass = character() -) - -epi_recipe_blueprint( - intercept = FALSE, - allow_novel_levels = FALSE, - fresh = TRUE, - composition = "tibble" -) - -default_epi_recipe_blueprint( - intercept = FALSE, - allow_novel_levels = FALSE, - fresh = TRUE, - composition = "tibble" -) - -new_default_epi_recipe_blueprint( - intercept = FALSE, - allow_novel_levels = FALSE, - fresh = TRUE, - composition = "tibble", - ptypes = NULL, - recipe = NULL, - extra_role_ptypes = NULL, - ..., - subclass = character() -) -} -\arguments{ -\item{intercept}{A logical. Should an intercept be included in the -processed data? This information is used by the \code{process} function -in the \code{mold} and \code{forge} function list.} - -\item{allow_novel_levels}{A logical. Should novel factor levels be allowed at -prediction time? This information is used by the \code{clean} function in the -\code{forge} function list, and is passed on to \code{\link[hardhat:scream]{scream()}}.} - -\item{fresh}{Should already trained operations be re-trained when \code{prep()} is -called?} - -\item{composition}{Either "tibble", "matrix", or "dgCMatrix" for the format -of the processed predictors. If "matrix" or "dgCMatrix" are chosen, all of -the predictors must be numeric after the preprocessing method has been -applied; otherwise an error is thrown.} - -\item{ptypes}{Either \code{NULL}, or a named list with 2 elements, \code{predictors} -and \code{outcomes}, both of which are 0-row tibbles. \code{ptypes} is generated -automatically at \code{\link[hardhat:mold]{mold()}} time and is used to validate \code{new_data} at -prediction time.} - -\item{recipe}{Either \code{NULL}, or an unprepped recipe. This argument is set -automatically at \code{\link[hardhat:mold]{mold()}} time.} - -\item{...}{Name-value pairs for additional elements of blueprints that -subclass this blueprint.} - -\item{subclass}{A character vector. The subclasses of this blueprint.} - -\item{extra_role_ptypes}{A named list. The names are the unique non-standard -recipe roles (i.e. everything except \code{"predictors"} and \code{"outcomes"}). The -values are prototypes of the original columns with that role. These are -used for validation in \code{forge()}.} -} -\value{ -A recipe blueprint. -} -\description{ -Used for simplicity. See \code{\link[hardhat:new-blueprint]{hardhat::new_recipe_blueprint()}} or -\code{\link[hardhat:default_recipe_blueprint]{hardhat::default_recipe_blueprint()}} for more details. -} -\details{ -The \code{bake_dependent_roles} are automatically set to \code{epi_df} defaults. -} -\keyword{internal} diff --git a/tests/testthat/test-blueprint.R b/tests/testthat/test-blueprint.R index 2d22aff6e..c069c8bcb 100644 --- a/tests/testthat/test-blueprint.R +++ b/tests/testthat/test-blueprint.R @@ -1,22 +1,18 @@ test_that("epi_recipe blueprint keeps the class, mold works", { - bp <- new_default_epi_recipe_blueprint() - expect_length(class(bp), 5L) + bp <- default_epi_recipe_blueprint() + expect_length(class(bp), 4L) expect_s3_class(bp, "default_epi_recipe_blueprint") - expect_s3_class(refresh_blueprint(bp), "default_epi_recipe_blueprint") + expect_s3_class(hardhat::refresh_blueprint(bp), "default_epi_recipe_blueprint") jhu <- case_death_rate_subset # expect_s3_class(er_check_is_data_like(jhu), "epi_df") - r <- epi_recipe(jhu) %>% + r <- recipe(jhu) %>% step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% step_epi_ahead(death_rate, ahead = 7) %>% step_naomit(all_predictors()) %>% step_naomit(all_outcomes(), skip = TRUE) - mm <- mold_epi_recipe_default_clean(bp, jhu) - expect_s3_class(mm$blueprint, "default_epi_recipe_blueprint") - expect_s3_class(mm$data, "epi_df") - bp <- hardhat:::update_blueprint(bp, recipe = r) run_mm <- run_mold(bp, data = jhu) expect_false(is.factor(run_mm$extras$roles$geo_value$geo_value)) diff --git a/tests/testthat/test-epi_keys.R b/tests/testthat/test-epi_keys.R index 3e794542e..a3c2fddc1 100644 --- a/tests/testthat/test-epi_keys.R +++ b/tests/testthat/test-epi_keys.R @@ -18,8 +18,9 @@ test_that("Extracts keys from an epi_df", { expect_equal(epi_keys(case_death_rate_subset), c("time_value", "geo_value")) }) -test_that("Extracts keys from a recipe; roles are NA, giving an empty vector", { - expect_equal(epi_keys(recipe(case_death_rate_subset)), character(0L)) +test_that("Extracts keys from a recipe", { + expect_equal(epi_keys(recipe(case_death_rate_subset)), c("time_value", "geo_value")) + expect_equal(epi_keys(recipe(cars)), character(0L)) }) test_that("epi_keys_mold extracts time_value and geo_value, but not raw", { diff --git a/tests/testthat/test-epi_recipe.R b/tests/testthat/test-epi_recipe.R index d288ec058..a4b05afac 100644 --- a/tests/testthat/test-epi_recipe.R +++ b/tests/testthat/test-epi_recipe.R @@ -1,27 +1,25 @@ -test_that("epi_recipe produces default recipe", { - # these all call recipes::recipe(), but the template will always have 1 row +test_that("recipe produces default recipe", { + # these all call recipes::recipe() tib <- tibble( x = 1:5, y = 1:5, time_value = seq(as.Date("2020-01-01"), by = 1, length.out = 5) ) - rec <- recipes::recipe(tib) - rec$template <- rec$template[1, ] + rec <- recipe(tib) expect_identical(rec, epi_recipe(tib)) - expect_equal(nrow(rec$template), 1L) + expect_equal(nrow(rec$template), 5L) - rec <- recipes::recipe(y ~ x, tib) - rec$template <- rec$template[1, ] + + rec <- recipe(y ~ x, tib) expect_identical(rec, epi_recipe(y ~ x, tib)) - expect_equal(nrow(rec$template), 1L) + expect_equal(nrow(rec$template), 5L) m <- as.matrix(tib) - rec <- recipes::recipe(m) - rec$template <- rec$template[1, ] + rec <- recipe(m) expect_identical(rec, epi_recipe(m)) - expect_equal(nrow(rec$template), 1L) + expect_equal(nrow(rec$template), 5L) }) -test_that("epi_recipe formula works", { +test_that("recipe formula works", { tib <- tibble( x = 1:5, y = 1:5, time_value = seq(as.Date("2020-01-01"), by = 1, length.out = 5), @@ -29,7 +27,7 @@ test_that("epi_recipe formula works", { ) %>% epiprocess::as_epi_df() # simple case - r <- epi_recipe(y ~ x, tib) + r <- recipe(y ~ x, tib) ref_var_info <- tibble::tribble( ~variable, ~type, ~role, ~source, "x", c("integer", "numeric"), "predictor", "original", @@ -38,10 +36,10 @@ test_that("epi_recipe formula works", { "geo_value", c("string", "unordered", "nominal"), "geo_value", "original" ) expect_identical(r$var_info, ref_var_info) - expect_equal(nrow(r$template), 1L) + expect_equal(nrow(r$template), 5L) # with an epi_key as a predictor - r <- epi_recipe(y ~ x + geo_value, tib) + r <- recipe(y ~ x + geo_value, tib) ref_var_info <- ref_var_info %>% tibble::add_row( variable = "geo_value", type = list(c("string", "unordered", "nominal")), @@ -49,7 +47,7 @@ test_that("epi_recipe formula works", { source = "original", .after = 1 ) expect_identical(r$var_info, ref_var_info) - expect_equal(nrow(r$template), 1L) + expect_equal(nrow(r$template), 5L) tib <- tibble( x = 1:5, y = 1:5, @@ -70,7 +68,7 @@ test_that("epi_recipe formula works", { expect_identical(r$var_info, ref_var_info) }) -test_that("epi_recipe epi_df works", { +test_that("recipe epi_df works", { tib <- tibble( x = 1:5, y = 1:5, time_value = seq(as.Date("2020-01-01"), by = 1, length.out = 5), @@ -82,11 +80,11 @@ test_that("epi_recipe epi_df works", { ~variable, ~type, ~role, ~source, "time_value", "date", "time_value", "original", "geo_value", c("string", "unordered", "nominal"), "geo_value", "original", - "x", c("integer", "numeric"), "raw", "original", - "y", c("integer", "numeric"), "raw", "original" + "x", c("integer", "numeric"), NA, "original", + "y", c("integer", "numeric"), NA, "original" ) expect_identical(r$var_info, ref_var_info) - expect_equal(nrow(r$template), 1L) + expect_equal(nrow(r$template), 5L) r <- epi_recipe(tib, formula = y ~ x) ref_var_info <- tibble::tribble( @@ -97,7 +95,7 @@ test_that("epi_recipe epi_df works", { "geo_value", c("string", "unordered", "nominal"), "geo_value", "original" ) expect_identical(r$var_info, ref_var_info) - expect_equal(nrow(r$template), 1L) + expect_equal(nrow(r$template), 5L) r <- epi_recipe( @@ -110,11 +108,12 @@ test_that("epi_recipe epi_df works", { source = "original" ) expect_identical(r$var_info, ref_var_info) - expect_equal(nrow(r$template), 1L) + expect_equal(nrow(r$template), 5L) }) test_that("add/update/adjust/remove epi_recipe works as intended", { + library(workflows) jhu <- case_death_rate_subset r <- epi_recipe(jhu) %>% From 30db68fac7a5885eb7e46ea0eb24f8e1d56d44ea Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Thu, 4 Jul 2024 19:16:34 -0700 Subject: [PATCH 03/14] fix: .pred is a distribution --- R/autoplot.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/autoplot.R b/R/autoplot.R index 77f04dde7..143ab35d5 100644 --- a/R/autoplot.R +++ b/R/autoplot.R @@ -183,6 +183,9 @@ autoplot.epi_workflow <- function( if (".pred" %in% names(predictions)) { ntarget_dates <- dplyr::n_distinct(predictions$time_value) + if (distributional::is_distribution(predictions$.pred)) { + predictions <- dplyr::mutate(predictions, .pred = median(.pred)) + } if (ntarget_dates > 1L) { bp <- bp + ggplot2::geom_line( From feb81a040bb009277f239426bbc530c33b24a642 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Thu, 4 Jul 2024 19:17:15 -0700 Subject: [PATCH 04/14] minor fixes in vignettes --- vignettes/preprocessing-and-models.Rmd | 32 +++++++++++++------------- vignettes/update.Rmd | 4 ++-- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/vignettes/preprocessing-and-models.Rmd b/vignettes/preprocessing-and-models.Rmd index d557ed1f7..f946d0657 100644 --- a/vignettes/preprocessing-and-models.Rmd +++ b/vignettes/preprocessing-and-models.Rmd @@ -44,9 +44,9 @@ will create a classification model for hotspot predictions. library(tidyr) library(dplyr) library(epidatr) -library(epipredict) library(recipes) library(workflows) +library(epipredict) library(poissonreg) ``` @@ -147,9 +147,10 @@ manipulate variable roles easily. --- -Notice in the following preprocessing steps, we used `add_role()` on -`geo_value_factor` since, currently, the default role for it is `raw`, but -we would like to reuse this variable as `predictor`s. +Notice in the following preprocessing steps, we used `update_role()` on +`geo_value_factor` since, currently, the default role for it is `NA`, but +we would like to reuse this variable as `predictor`s. (If is had a non-`NA` +role, then we would use `add_role()` instead.) ```{r} counts_subset <- counts_subset %>% @@ -159,7 +160,7 @@ counts_subset <- counts_subset %>% epi_recipe(counts_subset) r <- epi_recipe(counts_subset) %>% - add_role(geo_value_factor, new_role = "predictor") %>% + update_role(geo_value_factor, new_role = "predictor") %>% step_dummy(geo_value_factor) %>% ## Occasionally, data reporting errors / corrections result in negative ## cases / deaths @@ -174,17 +175,15 @@ modeling and producing the prediction for death count, 7 days after the latest available date in the dataset. ```{r} -latest <- get_test_data(r, counts_subset) - wf <- epi_workflow(r, parsnip::poisson_reg()) %>% fit(counts_subset) -predict(wf, latest) %>% filter(!is.na(.pred)) +forecast(wf) %>% filter(!is.na(.pred)) ``` Note that the `time_value` corresponds to the last available date in the training set, **NOT** to the target date of the forecast -(`r max(latest$time_value) + 7`). +(`r max(counts_subset$time_value) + 7`). Let's take a look at the fit: @@ -320,8 +319,8 @@ jhu <- jhu %>% left_join(behav_ind, by = c("geo_value", "time_value")) %>% as_epi_df() -r <- epi_recipe(jhu) %>% - add_role(geo_value_factor, new_role = "predictor") %>% +r <- recipe(jhu) %>% + update_role(geo_value_factor, new_role = "predictor") %>% step_dummy(geo_value_factor) %>% step_epi_lag(case_rate, death_rate, lag = c(0, 7, 14)) %>% step_mutate( @@ -351,6 +350,7 @@ f <- frosting() %>% layer_add_target_date("2022-01-07") %>% layer_threshold(.pred, lower = 0) %>% layer_quantile_distn() %>% + layer_point_from_distn() %>% layer_naomit(.pred) %>% layer_population_scaling( .pred, .pred_distn, @@ -361,8 +361,8 @@ f <- frosting() %>% ) wf <- epi_workflow(r, quantile_reg(quantile_levels = c(.05, .5, .95))) %>% - fit(jhu) %>% - add_frosting(f) + add_frosting(f) %>% + fit(jhu) p <- forecast(wf) p @@ -456,9 +456,9 @@ jhu <- case_death_rate_subset %>% ) %>% mutate(geo_value_factor = as.factor(geo_value)) -r <- epi_recipe(jhu) %>% +r <- recipe(jhu) %>% add_role(time_value, new_role = "predictor") %>% - step_dummy(geo_value_factor) %>% + step_dummy(geo_value_factor, role = "predictor") %>% step_growth_rate(case_rate, role = "none", prefix = "gr_") %>% step_epi_lag(starts_with("gr_"), lag = c(0, 7, 14)) %>% step_epi_ahead(starts_with("gr_"), ahead = 7, role = "none") %>% @@ -471,7 +471,7 @@ r <- epi_recipe(jhu) %>% ), role = "outcome" ) %>% - step_rm(has_role("none"), has_role("raw")) %>% + step_rm(has_role("none"), has_role(NA)) %>% step_epi_naomit() ``` diff --git a/vignettes/update.Rmd b/vignettes/update.Rmd index cb19ce192..fa395e192 100644 --- a/vignettes/update.Rmd +++ b/vignettes/update.Rmd @@ -1,8 +1,8 @@ --- -title: "Using the add/update/remove and adjust functions" +title: "Using the add, update, remove, and adjust functions" output: rmarkdown::html_vignette vignette: > - %\VignetteIndexEntry{Using the update and adjust functions} + %\VignetteIndexEntry{Using the add, update, remove, and adjust functions} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- From f5d2142127bd9370ccf32029999d71d57720e91d Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Mon, 12 Aug 2024 14:55:04 -0700 Subject: [PATCH 05/14] refactor prep.epi_recipe --- DESCRIPTION | 2 +- NAMESPACE | 2 - R/arx_classifier.R | 2 +- R/arx_forecaster.R | 2 +- R/epi_recipe.R | 336 ++---------------------- R/recipe.epi_df.R | 40 +++ man/arx_class_epi_workflow.Rd | 2 +- man/arx_classifier.Rd | 2 +- man/arx_forecaster.Rd | 2 +- man/{epi_recipe.Rd => recipe.epi_df.Rd} | 29 +- 10 files changed, 78 insertions(+), 341 deletions(-) rename man/{epi_recipe.Rd => recipe.epi_df.Rd} (81%) diff --git a/DESCRIPTION b/DESCRIPTION index a17f5580b..b7027fdf1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -72,4 +72,4 @@ Config/testthat/edition: 3 Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 diff --git a/NAMESPACE b/NAMESPACE index 9180fcbca..020b5f45c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -28,8 +28,6 @@ S3method(bake,step_training_window) S3method(detect_layer,frosting) S3method(detect_layer,workflow) S3method(epi_recipe,default) -S3method(epi_recipe,epi_df) -S3method(epi_recipe,formula) S3method(extract_argument,epi_workflow) S3method(extract_argument,frosting) S3method(extract_argument,layer) diff --git a/R/arx_classifier.R b/R/arx_classifier.R index de730826c..819b00544 100644 --- a/R/arx_classifier.R +++ b/R/arx_classifier.R @@ -10,7 +10,7 @@ #' be real-valued. Conversion of this data to unordered classes is handled #' internally based on the `breaks` argument to [arx_class_args_list()]. #' If discrete classes are already in the `epi_df`, it is recommended to -#' code up a classifier from scratch using [epi_recipe()]. +#' code up a classifier from scratch using [recipe()]. #' @param trainer A `{parsnip}` model describing the type of estimation. #' For now, we enforce `mode = "classification"`. Typical values are #' [parsnip::logistic_reg()] or [parsnip::multinom_reg()]. More complicated diff --git a/R/arx_forecaster.R b/R/arx_forecaster.R index 10b2d2bce..c4e54a6b0 100644 --- a/R/arx_forecaster.R +++ b/R/arx_forecaster.R @@ -1,7 +1,7 @@ #' Direct autoregressive forecaster with covariates #' #' This is an autoregressive forecasting model for -#' [epiprocess::epi_df] data. It does "direct" forecasting, meaning +#' [`epiprocess::epi_df`] data. It does "direct" forecasting, meaning #' that it estimates a model for a particular target horizon. #' #' diff --git a/R/epi_recipe.R b/R/epi_recipe.R index 4afe1d6fb..4be01925f 100644 --- a/R/epi_recipe.R +++ b/R/epi_recipe.R @@ -1,11 +1,3 @@ -#' Create a epi_recipe for preprocessing data -#' -#' A recipe is a description of the steps to be applied to a data set in -#' order to prepare it for data analysis. This is a loose wrapper -#' around [recipes::recipe()] to properly handle the additional -#' columns present in an `epi_df` -#' -#' @aliases epi_recipe epi_recipe.default epi_recipe.formula #' @import recipes #' @export epi_recipe <- function(x, ...) { @@ -13,206 +5,16 @@ epi_recipe <- function(x, ...) { UseMethod("epi_recipe") } - -#' @rdname epi_recipe #' @export epi_recipe.default <- function(x, ...) { # if not a formula or an epi_df, we just pass to recipes::recipe # if (is.matrix(x) || is.data.frame(x) || tibble::is_tibble(x)) { # x <- x[1, , drop = FALSE] # } - recipes::recipe(x, ...) -} - -#' @rdname epi_recipe -#' @inheritParams recipes::recipe -#' @param roles A character string (the same length of `vars`) that -#' describes a single role that the variable will take. This value could be -#' anything but common roles are `"outcome"`, `"predictor"`, -#' `"time_value"`, and `"geo_value"` -#' @param ... Further arguments passed to or from other methods (not currently -#' used). -#' @param formula A model formula. No in-line functions should be used here -#' (e.g. `log(x)`, `x:y`, etc.) and minus signs are not allowed. These types of -#' transformations should be enacted using `step` functions in this package. -#' Dots are allowed as are simple multivariate outcome terms (i.e. no need for -#' `cbind`; see Examples). -#' @param x,data A data frame, tibble, or epi_df of the *template* data set -#' (see below). This is always coerced to the first row to avoid memory issues -#' @inherit recipes::recipe return -#' -#' @export -#' @examples -#' jhu <- case_death_rate_subset %>% -#' dplyr::filter(time_value > "2021-08-01") %>% -#' dplyr::arrange(geo_value, time_value) -#' -#' r <- epi_recipe(jhu) %>% -#' step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% -#' step_epi_ahead(death_rate, ahead = 7) %>% -#' step_epi_lag(case_rate, lag = c(0, 7, 14)) %>% -#' recipes::step_naomit(recipes::all_predictors()) %>% -#' # below, `skip` means we don't do this at predict time -#' recipes::step_naomit(recipes::all_outcomes(), skip = TRUE) -#' -#' r -epi_recipe.epi_df <- - function(x, formula = NULL, ..., vars = NULL, roles = NULL) { - return(recipe(x, formula = formula, ..., vars = vars, roles = roles)) - if (!is.null(formula)) { - if (!is.null(vars)) { - rlang::abort( - paste0( - "This `vars` specification will be ignored ", - "when a formula is used" - ) - ) - } - if (!is.null(roles)) { - rlang::abort( - paste0( - "This `roles` specification will be ignored ", - "when a formula is used" - ) - ) - } - - obj <- epi_recipe.formula(formula, x, ...) - return(obj) - } - if (is.null(vars)) vars <- colnames(x) - if (any(table(vars) > 1)) { - rlang::abort("`vars` should have unique members") - } - if (any(!(vars %in% colnames(x)))) { - rlang::abort("1 or more elements of `vars` are not in the data") - } - - keys <- epi_keys(x) # we know x is an epi_df - - var_info <- tibble(variable = vars) - key_roles <- c("time_value", "geo_value", rep("key", length(keys) - 2)) - - ## Check and add roles when available - if (!is.null(roles)) { - if (length(roles) != length(vars)) { - rlang::abort(c( - "The number of roles should be the same as the number of ", - "variables." - )) - } - var_info$role <- roles - } else { - var_info <- var_info %>% dplyr::filter(!(variable %in% keys)) - var_info$role <- "raw" - } - ## Now we add the keys when necessary - var_info <- dplyr::union( - var_info, - tibble::tibble(variable = keys, role = key_roles) - ) - - ## Add types - var_info <- dplyr::full_join(recipes:::get_types(x), var_info, by = "variable") - var_info$source <- "original" - - ## arrange to easy order - var_info <- var_info %>% - dplyr::arrange(factor( - role, - levels = union( - c("predictor", "outcome", "time_value", "geo_value", "key"), - unique(role) - ) # anything else - )) - - ## Return final object of class `recipe` - out <- list( - var_info = var_info, - term_info = var_info, - steps = NULL, - template = x[1, ], - max_time_value = max(x$time_value), - levels = NULL, - retained = NA - ) - class(out) <- c("epi_recipe", "recipe") - out - } - - -#' @rdname epi_recipe -#' @importFrom rlang abort -#' @export -epi_recipe.formula <- function(formula, data, ...) { - # we ensure that there's only 1 row in the template - return(recipe(data, formula, ...)) - # check for minus: - if (!epiprocess::is_epi_df(data)) { - return(recipes::recipe(formula, data, ...)) - } - - f_funcs <- recipes:::fun_calls(formula) - if (any(f_funcs == "-")) { - abort("`-` is not allowed in a recipe formula. Use `step_rm()` instead.") - } - - # Check for other in-line functions - args <- epi_form2args(formula, data, ...) - # browser() - obj <- recipe.epi_df( - x = args$x, - formula = NULL, - ..., - vars = args$vars, - roles = args$roles - ) - obj -} - - -# slightly modified version of `form2args()` in {recipes} -epi_form2args <- function(formula, data, ...) { - if (!rlang::is_formula(formula)) formula <- as.formula(formula) - - ## check for in-line formulas - recipes:::inline_check(formula) - - ## use rlang to get both sides of the formula - outcomes <- recipes:::get_lhs_vars(formula, data) - predictors <- recipes:::get_rhs_vars(formula, data, no_lhs = TRUE) - keys <- epi_keys(data) - - ## if . was used on the rhs, subtract out the outcomes - predictors <- predictors[!(predictors %in% outcomes)] - ## if . was used anywhere, remove epi_keys - if (rlang::f_lhs(formula) == ".") { - outcomes <- outcomes[!(outcomes %in% keys)] - } - if (rlang::f_rhs(formula) == ".") { - predictors <- predictors[!(predictors %in% keys)] - } - - ## get `vars` from rhs, lhs. keys get added downstream - vars <- c(predictors, outcomes) - ## subset data columns - data <- data[, union(vars, keys)] - - ## derive roles - roles <- rep("predictor", length(predictors)) - if (length(outcomes) > 0) { - roles <- c(roles, rep("outcome", length(outcomes))) - } - # if (length(keys) > 0) { - # roles <- c(roles, c("time_value", rep("key", length(keys) - 1))) - # } - - ## pass to recipe.default with vars and roles - list(x = data, vars = vars, roles = roles) + recipe(x, ...) } - #' Test for `epi_recipe` #' #' @param x An object. @@ -429,137 +231,39 @@ adjust_epi_recipe.epi_recipe <- function( x } -# unfortunately, almost everything the same as in prep.recipe except string/fctr handling + #' @export prep.epi_recipe <- function( x, training = NULL, fresh = FALSE, verbose = FALSE, retain = TRUE, log_changes = FALSE, strings_as_factors = TRUE, ...) { - if (is.null(training)) { - cli::cli_warn(c( - "!" = "No training data was supplied to {.fn prep}.", - "!" = "Unlike a {.cls recipe}, an {.cls epi_recipe} does not ", - "!" = "store the full template data in the object.", - "!" = "Please supply the training data to the {.fn prep} function,", - "!" = "to avoid addtional warning messages." - )) - } + + if (!strings_as_factors) return(NextMethod("prep")) + # workaround to avoid converting strings2factors with recipes::prep.recipe() + # We do the conversion here, then set it to FALSE training <- recipes:::check_training_set(training, x, fresh) training <- epi_check_training_set(training, x) training <- dplyr::relocate(training, tidyselect::all_of(epi_keys(training))) - tr_data <- recipes:::train_info(training) keys <- epi_keys(x) - orig_lvls <- lapply(training, recipes:::get_levels) orig_lvls <- kill_levels(orig_lvls, keys) - if (strings_as_factors) { - lvls <- lapply(training, recipes:::get_levels) - lvls <- kill_levels(lvls, keys) - training <- recipes:::strings2factors(training, lvls) - } else { - lvls <- NULL - } - skippers <- map_lgl(x$steps, recipes:::is_skipable) - if (any(skippers) & !retain) { - cli::cli_warn(c( - "Since some operations have `skip = TRUE`, using ", - "`retain = TRUE` will allow those steps results to ", - "be accessible." - )) - } - if (fresh) x$term_info <- x$var_info - - running_info <- x$term_info %>% dplyr::mutate(number = 0, skip = FALSE) - for (i in seq(along.with = x$steps)) { - needs_tuning <- map_lgl(x$steps[[i]], recipes:::is_tune) - if (any(needs_tuning)) { - arg <- names(needs_tuning)[needs_tuning] - arg <- paste0("'", arg, "'", collapse = ", ") - msg <- paste0( - "You cannot `prep()` a tuneable recipe. Argument(s) with `tune()`: ", - arg, ". Do you want to use a tuning function such as `tune_grid()`?" - ) - rlang::abort(msg) - } - note <- paste("oper", i, gsub("_", " ", class(x$steps[[i]])[1])) - if (!x$steps[[i]]$trained | fresh) { - if (verbose) { - cat(note, "[training]", "\n") - } - before_nms <- names(training) - before_template <- training[1, ] - x$steps[[i]] <- prep(x$steps[[i]], - training = training, - info = x$term_info - ) - training <- bake(x$steps[[i]], new_data = training) - if (!tibble::is_tibble(training)) { - cli::cli_abort("`bake()` methods should always return {.cls tibble}.") - } - if (!is_epi_df(training)) { - # tidymodels killed our class - # for now, we only allow step_epi_* to alter the metadata - training <- dplyr::dplyr_reconstruct( - epiprocess::as_epi_df(training), before_template - ) - } - training <- dplyr::relocate(training, tidyselect::all_of(epi_keys(training))) - x$term_info <- recipes:::merge_term_info(get_types(training), x$term_info) - if (!is.na(x$steps[[i]]$role)) { - new_vars <- setdiff(x$term_info$variable, running_info$variable) - pos_new_var <- x$term_info$variable %in% new_vars - pos_new_and_na_role <- pos_new_var & is.na(x$term_info$role) - pos_new_and_na_source <- pos_new_var & is.na(x$term_info$source) - x$term_info$role[pos_new_and_na_role] <- x$steps[[i]]$role - x$term_info$source[pos_new_and_na_source] <- "derived" - } - recipes:::changelog(log_changes, before_nms, names(training), x$steps[[i]]) - running_info <- rbind( - running_info, - dplyr::mutate(x$term_info, number = i, skip = x$steps[[i]]$skip) - ) - } else { - if (verbose) cat(note, "[pre-trained]\n") - } - } - if (strings_as_factors) { - lvls <- lapply(training, recipes:::get_levels) - lvls <- kill_levels(lvls, keys) - check_lvls <- recipes:::has_lvls(lvls) - if (!any(check_lvls)) lvls <- NULL - } else { - lvls <- NULL - } - if (retain) { - if (verbose) { - cat( - "The retained training set is ~", - format(utils::object.size(training), units = "Mb", digits = 2), - " in memory.\n\n" - ) - } - x$template <- training - } else { - x$template <- training[0, ] - } - x$max_time_value <- max(training$time_value) - x$tr_info <- tr_data - x$levels <- lvls + lvls <- lapply(training, recipes:::get_levels) + lvls <- kill_levels(lvls, keys) # don't do anything to the epi_keys + training <- recipes:::strings2factors(training, lvls) + strings_as_factors <- FALSE # now they're already done + + x <- NextMethod("prep") + # Now, we undo the conversion. + + lvls <- lapply(x$template, recipes:::get_levels) + lvls <- kill_levels(lvls, keys) + check_lvls <- recipes:::has_lvls(lvls) + if (!any(check_lvls)) lvls <- NULL + x$lvls <- lvls x$orig_lvls <- orig_lvls - x$retained <- retain - x$last_term_info <- running_info %>% - dplyr::group_by(variable) %>% - dplyr::arrange(dplyr::desc(number)) %>% - dplyr::summarise( - type = list(dplyr::first(type)), - role = list(unique(unlist(role))), - source = dplyr::first(source), - number = dplyr::first(number), - skip = dplyr::first(skip), - .groups = "keep" - ) x } + #' @export bake.epi_recipe <- function(object, new_data, ..., composition = "epi_df") { meta <- NULL diff --git a/R/recipe.epi_df.R b/R/recipe.epi_df.R index eae58d256..308cc6033 100644 --- a/R/recipe.epi_df.R +++ b/R/recipe.epi_df.R @@ -1,3 +1,43 @@ +#' Create a recipe for preprocessing panel data +#' +#' A recipe is a description of the steps to be applied to a data set in +#' order to prepare it for data analysis. This is an S3 method for +#' [recipes::recipe()] to properly handle the additional (panel data) +#' columns present in an [`epiprocess::epi_df`]: `time_value`, `geo_value`, and any +#' additional keys. +#' +#' @aliases epi_recipe epi_recipe.default epi_recipe.formula +#' @inheritParams recipes::recipe +#' @param roles A character string (the same length of `vars`) that +#' describes a single role that the variable will take. This value could be +#' anything but common roles are `"outcome"`, `"predictor"`, +#' `"time_value"`, and `"geo_value"` +#' @param ... Further arguments passed to or from other methods (not currently +#' used). +#' @param formula A model formula. No in-line functions should be used here +#' (e.g. `log(x)`, `x:y`, etc.) and minus signs are not allowed. These types of +#' transformations should be enacted using `step` functions in this package. +#' Dots are allowed as are simple multivariate outcome terms (i.e. no need for +#' `cbind`; see Examples). +#' @param x,data A data frame, tibble, or epi_df of the *template* data set +#' (see below). This is always coerced to the first row to avoid memory issues +#' @inherit recipes::recipe return +#' +#' @export +#' @examples +#' jhu <- case_death_rate_subset %>% +#' dplyr::filter(time_value > "2021-08-01") %>% +#' dplyr::arrange(geo_value, time_value) +#' +#' r <- epi_recipe(jhu) %>% +#' step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% +#' step_epi_ahead(death_rate, ahead = 7) %>% +#' step_epi_lag(case_rate, lag = c(0, 7, 14)) %>% +#' recipes::step_naomit(recipes::all_predictors()) %>% +#' # below, `skip` means we don't do this at predict time +#' recipes::step_naomit(recipes::all_outcomes(), skip = TRUE) +#' +#' r #' @importFrom recipes recipe #' @export recipe.epi_df <- function(x, formula = NULL, ..., vars = NULL, roles = NULL) { diff --git a/man/arx_class_epi_workflow.Rd b/man/arx_class_epi_workflow.Rd index bfce7cdaa..f6a00f1b4 100644 --- a/man/arx_class_epi_workflow.Rd +++ b/man/arx_class_epi_workflow.Rd @@ -20,7 +20,7 @@ arx_class_epi_workflow( be real-valued. Conversion of this data to unordered classes is handled internally based on the \code{breaks} argument to \code{\link[=arx_class_args_list]{arx_class_args_list()}}. If discrete classes are already in the \code{epi_df}, it is recommended to -code up a classifier from scratch using \code{\link[=epi_recipe]{epi_recipe()}}.} +code up a classifier from scratch using \code{\link[=recipe]{recipe()}}.} \item{predictors}{A character vector giving column(s) of predictor variables. This defaults to the \code{outcome}. However, if manually specified, only those variables diff --git a/man/arx_classifier.Rd b/man/arx_classifier.Rd index 350352ae9..85543af7d 100644 --- a/man/arx_classifier.Rd +++ b/man/arx_classifier.Rd @@ -20,7 +20,7 @@ arx_classifier( be real-valued. Conversion of this data to unordered classes is handled internally based on the \code{breaks} argument to \code{\link[=arx_class_args_list]{arx_class_args_list()}}. If discrete classes are already in the \code{epi_df}, it is recommended to -code up a classifier from scratch using \code{\link[=epi_recipe]{epi_recipe()}}.} +code up a classifier from scratch using \code{\link[=recipe]{recipe()}}.} \item{predictors}{A character vector giving column(s) of predictor variables. This defaults to the \code{outcome}. However, if manually specified, only those variables diff --git a/man/arx_forecaster.Rd b/man/arx_forecaster.Rd index af05c0682..173fa2bbd 100644 --- a/man/arx_forecaster.Rd +++ b/man/arx_forecaster.Rd @@ -37,7 +37,7 @@ workflow } \description{ This is an autoregressive forecasting model for -\link[epiprocess:epi_df]{epiprocess::epi_df} data. It does "direct" forecasting, meaning +\code{\link[epiprocess:epi_df]{epiprocess::epi_df}} data. It does "direct" forecasting, meaning that it estimates a model for a particular target horizon. } \examples{ diff --git a/man/epi_recipe.Rd b/man/recipe.epi_df.Rd similarity index 81% rename from man/epi_recipe.Rd rename to man/recipe.epi_df.Rd index 1c9048a36..d7aa7aa90 100644 --- a/man/epi_recipe.Rd +++ b/man/recipe.epi_df.Rd @@ -1,33 +1,27 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/epi_recipe.R -\name{epi_recipe} +% Please edit documentation in R/recipe.epi_df.R +\name{recipe.epi_df} +\alias{recipe.epi_df} \alias{epi_recipe} \alias{epi_recipe.default} \alias{epi_recipe.formula} -\alias{epi_recipe.epi_df} -\title{Create a epi_recipe for preprocessing data} +\title{Create a recipe for preprocessing panel data} \usage{ -epi_recipe(x, ...) - -\method{epi_recipe}{default}(x, ...) - -\method{epi_recipe}{epi_df}(x, formula = NULL, ..., vars = NULL, roles = NULL) - -\method{epi_recipe}{formula}(formula, data, ...) +\method{recipe}{epi_df}(x, formula = NULL, ..., vars = NULL, roles = NULL) } \arguments{ \item{x, data}{A data frame, tibble, or epi_df of the \emph{template} data set (see below). This is always coerced to the first row to avoid memory issues} -\item{...}{Further arguments passed to or from other methods (not currently -used).} - \item{formula}{A model formula. No in-line functions should be used here (e.g. \code{log(x)}, \code{x:y}, etc.) and minus signs are not allowed. These types of transformations should be enacted using \code{step} functions in this package. Dots are allowed as are simple multivariate outcome terms (i.e. no need for \code{cbind}; see Examples).} +\item{...}{Further arguments passed to or from other methods (not currently +used).} + \item{vars}{A character string of column names corresponding to variables that will be used in any context (see below)} @@ -52,9 +46,10 @@ the recipe is trained.} } \description{ A recipe is a description of the steps to be applied to a data set in -order to prepare it for data analysis. This is a loose wrapper -around \code{\link[recipes:recipe]{recipes::recipe()}} to properly handle the additional -columns present in an \code{epi_df} +order to prepare it for data analysis. This is an S3 method for +\code{\link[recipes:recipe]{recipes::recipe()}} to properly handle the additional (panel data) +columns present in an \code{\link[epiprocess:epi_df]{epiprocess::epi_df}}: \code{time_value}, \code{geo_value}, and any +additional keys. } \examples{ jhu <- case_death_rate_subset \%>\% From aa4e78885ed93f0f5120cf58217d21f93d78c713 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Mon, 12 Aug 2024 16:30:38 -0700 Subject: [PATCH 06/14] remove all instances of epi_recipe, pass checks --- NAMESPACE | 1 + R/arx_classifier.R | 2 +- R/arx_forecaster.R | 2 +- R/autoplot.R | 11 +++-- R/cdc_baseline_forecaster.R | 42 ++++++++-------- R/epi_recipe.R | 17 ++++--- R/epi_workflow.R | 11 +++-- R/flatline_forecaster.R | 2 +- R/frosting.R | 17 ++++--- R/get_test_data.R | 2 +- R/layer_add_forecast_date.R | 9 ++-- R/layer_add_target_date.R | 7 +-- R/layer_cdc_flatline_quantiles.R | 48 +++++++++---------- R/layer_naomit.R | 5 +- R/layer_point_from_distn.R | 8 ++-- R/layer_population_scaling.R | 11 +++-- R/layer_predict.R | 4 +- R/layer_predictive_distn.R | 7 +-- R/layer_quantile_distn.R | 5 +- R/layer_residual_quantiles.R | 7 +-- R/layer_threshold_preds.R | 9 ++-- R/layers.R | 7 +-- R/make_smooth_quantile_reg.R | 21 ++++---- R/model-methods.R | 10 ++-- R/recipe.epi_df.R | 13 +++-- R/reexports-tidymodels.R | 4 ++ R/step_epi_naomit.R | 2 +- R/step_epi_shift.R | 2 +- R/step_epi_slide.R | 4 +- R/step_growth_rate.R | 2 +- R/step_lag_difference.R | 2 +- R/step_population_scaling.R | 13 +++-- R/step_training_window.R | 7 +-- R/tidy.R | 7 +-- man/Add_model.Rd | 10 ++-- man/add_epi_recipe.Rd | 4 +- man/add_frosting.Rd | 5 +- man/adjust_epi_recipe.Rd | 4 +- man/adjust_frosting.Rd | 5 +- man/arx_forecaster.Rd | 2 +- man/autoplot-epipred.Rd | 11 +++-- man/cdc_baseline_forecaster.Rd | 38 +++++++-------- man/epi_workflow.Rd | 2 +- man/fit-epi_workflow.Rd | 2 +- man/frosting.Rd | 7 +-- man/get_test_data.Rd | 2 +- man/layer_add_forecast_date.Rd | 9 ++-- man/layer_add_target_date.Rd | 7 +-- man/layer_cdc_flatline_quantiles.Rd | 48 +++++++++---------- man/layer_naomit.Rd | 5 +- man/layer_point_from_distn.Rd | 8 ++-- man/layer_population_scaling.Rd | 11 +++-- man/layer_predict.Rd | 4 +- man/layer_predictive_distn.Rd | 7 +-- man/layer_quantile_distn.Rd | 5 +- man/layer_residual_quantiles.Rd | 7 +-- man/layer_threshold.Rd | 8 ++-- man/predict-epi_workflow.Rd | 7 +-- man/recipe.epi_df.Rd | 15 ++++-- man/reexports.Rd | 3 +- man/smooth_quantile_reg.Rd | 21 ++++---- man/step_epi_naomit.Rd | 2 +- man/step_epi_shift.Rd | 2 +- man/step_epi_slide.Rd | 4 +- man/step_growth_rate.Rd | 2 +- man/step_lag_difference.Rd | 2 +- man/step_population_scaling.Rd | 13 +++-- man/step_training_window.Rd | 7 +-- man/tidy.frosting.Rd | 7 +-- man/update.layer.Rd | 7 +-- tests/testthat/test-bake-method.R | 4 +- tests/testthat/test-check_enough_train_data.R | 22 ++++----- tests/testthat/test-epi_keys.R | 4 +- tests/testthat/test-epi_recipe.R | 19 -------- tests/testthat/test-epi_workflow.R | 10 ++-- tests/testthat/test-extract_argument.R | 2 +- tests/testthat/test-frosting.R | 6 +-- tests/testthat/test-get_test_data.R | 14 +++--- tests/testthat/test-layer_add_forecast_date.R | 2 +- tests/testthat/test-layer_add_target_date.R | 2 +- tests/testthat/test-layer_naomit.R | 2 +- tests/testthat/test-layer_predict.R | 2 +- .../testthat/test-layer_residual_quantiles.R | 4 +- tests/testthat/test-layer_threshold_preds.R | 2 +- tests/testthat/test-population_scaling.R | 26 +++++----- tests/testthat/test-step_epi_naomit.R | 2 +- tests/testthat/test-step_epi_shift.R | 10 ++-- tests/testthat/test-step_epi_slide.R | 4 +- tests/testthat/test-step_growth_rate.R | 10 ++-- tests/testthat/test-step_lag_difference.R | 10 ++-- tests/testthat/test-step_training_window.R | 23 +++++---- vignettes/articles/smooth-qr.Rmd | 2 +- vignettes/epipredict.Rmd | 4 +- vignettes/panel-data.Rmd | 4 +- vignettes/preprocessing-and-models.Rmd | 10 ++-- vignettes/update.Rmd | 6 +-- 96 files changed, 426 insertions(+), 404 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index fd57a07f7..1cf322207 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -184,6 +184,7 @@ export(pivot_quantiles_longer) export(pivot_quantiles_wider) export(prep) export(quantile_reg) +export(recipe) export(remove_epi_recipe) export(remove_frosting) export(remove_model) diff --git a/R/arx_classifier.R b/R/arx_classifier.R index 1e832d060..54966337b 100644 --- a/R/arx_classifier.R +++ b/R/arx_classifier.R @@ -128,7 +128,7 @@ arx_class_epi_workflow <- function( # --- preprocessor # ------- predictors - r <- epi_recipe(epi_data) %>% + r <- recipe(epi_data) %>% step_growth_rate( tidyselect::all_of(predictors), role = "grp", diff --git a/R/arx_forecaster.R b/R/arx_forecaster.R index 1b9e3d503..d19e16cb3 100644 --- a/R/arx_forecaster.R +++ b/R/arx_forecaster.R @@ -121,7 +121,7 @@ arx_fcast_epi_workflow <- function( lags <- arx_lags_validator(predictors, args_list$lags) # --- preprocessor - r <- epi_recipe(epi_data) + r <- recipe(epi_data) for (l in seq_along(lags)) { p <- predictors[l] r <- step_epi_lag(r, !!p, lag = lags[[l]]) diff --git a/R/autoplot.R b/R/autoplot.R index 143ab35d5..ec9a6bba6 100644 --- a/R/autoplot.R +++ b/R/autoplot.R @@ -28,10 +28,11 @@ ggplot2::autoplot #' #' @name autoplot-epipred #' @examples +#' library(dplyr) #' jhu <- case_death_rate_subset %>% #' filter(time_value >= as.Date("2021-11-01")) #' -#' r <- epi_recipe(jhu) %>% +#' r <- recipe(jhu) %>% #' step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% #' step_epi_ahead(death_rate, ahead = 7) %>% #' step_epi_lag(case_rate, lag = c(0, 7, 14)) %>% @@ -41,21 +42,21 @@ ggplot2::autoplot #' layer_residual_quantiles( #' quantile_levels = c(.025, .1, .25, .75, .9, .975) #' ) %>% -#' layer_threshold(dplyr::starts_with(".pred")) %>% +#' layer_threshold(starts_with(".pred")) %>% #' layer_add_target_date() #' -#' wf <- epi_workflow(r, parsnip::linear_reg(), f) %>% fit(jhu) +#' wf <- epi_workflow(r, linear_reg(), f) %>% fit(jhu) #' #' autoplot(wf) #' -#' latest <- jhu %>% dplyr::filter(time_value >= max(time_value) - 14) +#' latest <- jhu %>% filter(time_value >= max(time_value) - 14) #' preds <- predict(wf, latest) #' autoplot(wf, preds, .max_facets = 4) #' #' # ------- Show multiple horizons #' #' p <- lapply(c(7, 14, 21, 28), \(h) { -#' r <- epi_recipe(jhu) %>% +#' r <- recipe(jhu) %>% #' step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% #' step_epi_ahead(death_rate, ahead = h) %>% #' step_epi_lag(case_rate, lag = c(0, 7, 14)) %>% diff --git a/R/cdc_baseline_forecaster.R b/R/cdc_baseline_forecaster.R index d5b74a9c3..5117c7bb4 100644 --- a/R/cdc_baseline_forecaster.R +++ b/R/cdc_baseline_forecaster.R @@ -36,25 +36,25 @@ #' cdc <- cdc_baseline_forecaster(weekly_deaths, "deaths") #' preds <- pivot_quantiles_wider(cdc$predictions, .pred_distn) #' -#' if (require(ggplot2)) { -#' forecast_date <- unique(preds$forecast_date) -#' four_states <- c("ca", "pa", "wa", "ny") -#' preds %>% -#' filter(geo_value %in% four_states) %>% -#' ggplot(aes(target_date)) + -#' geom_ribbon(aes(ymin = `0.1`, ymax = `0.9`), fill = blues9[3]) + -#' geom_ribbon(aes(ymin = `0.25`, ymax = `0.75`), fill = blues9[6]) + -#' geom_line(aes(y = .pred), color = "orange") + -#' geom_line( -#' data = weekly_deaths %>% filter(geo_value %in% four_states), -#' aes(x = time_value, y = deaths) -#' ) + -#' scale_x_date(limits = c(forecast_date - 90, forecast_date + 30)) + -#' labs(x = "Date", y = "Weekly deaths") + -#' facet_wrap(~geo_value, scales = "free_y") + -#' theme_bw() + -#' geom_vline(xintercept = forecast_date) -#' } +#' library(ggplot2) +#' forecast_date <- unique(preds$forecast_date) +#' four_states <- c("ca", "pa", "wa", "ny") +#' preds %>% +#' filter(geo_value %in% four_states) %>% +#' ggplot(aes(target_date)) + +#' geom_ribbon(aes(ymin = `0.1`, ymax = `0.9`), fill = blues9[3]) + +#' geom_ribbon(aes(ymin = `0.25`, ymax = `0.75`), fill = blues9[6]) + +#' geom_line(aes(y = .pred), color = "orange") + +#' geom_line( +#' data = weekly_deaths %>% filter(geo_value %in% four_states), +#' aes(x = time_value, y = deaths) +#' ) + +#' scale_x_date(limits = c(forecast_date - 90, forecast_date + 30)) + +#' labs(x = "Date", y = "Weekly deaths") + +#' facet_wrap(~geo_value, scales = "free_y") + +#' theme_bw() + +#' geom_vline(xintercept = forecast_date) +#' cdc_baseline_forecaster <- function( epi_data, outcome, @@ -68,7 +68,7 @@ cdc_baseline_forecaster <- function( outcome <- rlang::sym(outcome) - r <- epi_recipe(epi_data) %>% + r <- recipe(epi_data) %>% step_epi_ahead(!!outcome, ahead = args_list$data_frequency, skip = TRUE) %>% recipes::update_role(!!outcome, new_role = "predictor") %>% recipes::add_role(tidyselect::all_of(keys), new_role = "predictor") %>% @@ -79,7 +79,7 @@ cdc_baseline_forecaster <- function( latest <- get_test_data( - epi_recipe(epi_data), epi_data, TRUE, args_list$nafill_buffer, + recipe(epi_data), epi_data, TRUE, args_list$nafill_buffer, forecast_date ) diff --git a/R/epi_recipe.R b/R/epi_recipe.R index 24ae734c9..bd0aba28b 100644 --- a/R/epi_recipe.R +++ b/R/epi_recipe.R @@ -65,7 +65,7 @@ is_epi_recipe <- function(x) { #' filter(time_value > "2021-08-01") %>% #' arrange(geo_value, time_value) #' -#' r <- epi_recipe(jhu) %>% +#' r <- recipe(jhu) %>% #' step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% #' step_epi_ahead(death_rate, ahead = 7) %>% #' step_epi_lag(case_rate, lag = c(0, 7, 14)) %>% @@ -77,7 +77,7 @@ is_epi_recipe <- function(x) { #' #' workflow #' -#' r2 <- epi_recipe(jhu) %>% +#' r2 <- recipe(jhu) %>% #' step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% #' step_epi_ahead(death_rate, ahead = 7) #' @@ -147,12 +147,12 @@ update_epi_recipe <- function(x, recipe, ..., blueprint = default_epi_recipe_blu #' #' jhu <- case_death_rate_subset %>% #' filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny")) -#' r <- epi_recipe(jhu) %>% +#' r <- recipe(jhu) %>% #' step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% #' step_epi_ahead(death_rate, ahead = 7) %>% #' step_epi_naomit() #' -#' wf <- epi_workflow(r, parsnip::linear_reg()) %>% fit(jhu) +#' wf <- epi_workflow(r, linear_reg()) %>% fit(jhu) #' latest <- jhu %>% #' filter(time_value >= max(time_value) - 14) #' @@ -239,16 +239,19 @@ prep.epi_recipe <- function( lvls <- lapply(training, recipes:::get_levels) lvls <- kill_levels(lvls, keys) # don't do anything to the epi_keys training <- recipes:::strings2factors(training, lvls) - strings_as_factors <- FALSE # now they're already done - x <- NextMethod("prep") + # browser() + x <- NextMethod("prep", training = training, fresh = fresh, + verbose = verbose, + retain = retain, log_changes = log_changes, + strings_as_factors = FALSE, ...) # Now, we undo the conversion. lvls <- lapply(x$template, recipes:::get_levels) lvls <- kill_levels(lvls, keys) check_lvls <- recipes:::has_lvls(lvls) if (!any(check_lvls)) lvls <- NULL - x$lvls <- lvls + x$levels <- lvls x$orig_lvls <- orig_lvls x } diff --git a/R/epi_workflow.R b/R/epi_workflow.R index 0bdeece4f..43db5d38c 100644 --- a/R/epi_workflow.R +++ b/R/epi_workflow.R @@ -22,7 +22,7 @@ #' @examples #' jhu <- case_death_rate_subset #' -#' r <- epi_recipe(jhu) %>% +#' r <- recipe(jhu) %>% #' step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% #' step_epi_ahead(death_rate, ahead = 7) %>% #' step_epi_lag(case_rate, lag = c(0, 7, 14)) %>% @@ -87,7 +87,7 @@ is_epi_workflow <- function(x) { #' jhu <- case_death_rate_subset %>% #' filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny")) #' -#' r <- epi_recipe(jhu) %>% +#' r <- recipe(jhu) %>% #' step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% #' step_epi_ahead(death_rate, ahead = 7) #' @@ -139,16 +139,17 @@ fit.epi_workflow <- function(object, data, ..., control = workflows::control_wor #' @name predict-epi_workflow #' @export #' @examples +#' library(dplyr) #' jhu <- case_death_rate_subset #' -#' r <- epi_recipe(jhu) %>% +#' r <- recipe(jhu) %>% #' step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% #' step_epi_ahead(death_rate, ahead = 7) %>% #' step_epi_lag(case_rate, lag = c(0, 7, 14)) %>% #' step_epi_naomit() #' -#' wf <- epi_workflow(r, parsnip::linear_reg()) %>% fit(jhu) -#' latest <- jhu %>% dplyr::filter(time_value >= max(time_value) - 14) +#' wf <- epi_workflow(r, linear_reg()) %>% fit(jhu) +#' latest <- jhu %>% filter(time_value >= max(time_value) - 14) #' #' preds <- predict(wf, latest) #' preds diff --git a/R/flatline_forecaster.R b/R/flatline_forecaster.R index e14e44a96..a9fad8807 100644 --- a/R/flatline_forecaster.R +++ b/R/flatline_forecaster.R @@ -40,7 +40,7 @@ flatline_forecaster <- function( outcome <- rlang::sym(outcome) - r <- epi_recipe(epi_data) %>% + r <- recipe(epi_data) %>% step_epi_ahead(!!outcome, ahead = args_list$ahead, skip = TRUE) %>% recipes::update_role(!!outcome, new_role = "predictor") %>% recipes::add_role(tidyselect::all_of(keys), new_role = "predictor") %>% diff --git a/R/frosting.R b/R/frosting.R index 4fc0caec3..d11a23ca2 100644 --- a/R/frosting.R +++ b/R/frosting.R @@ -8,15 +8,16 @@ #' @export #' #' @examples +#' library(dplyr) #' jhu <- case_death_rate_subset %>% #' filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny")) -#' r <- epi_recipe(jhu) %>% +#' r <- recipe(jhu) %>% #' step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% #' step_epi_ahead(death_rate, ahead = 7) #' #' wf <- epi_workflow(r, parsnip::linear_reg()) %>% fit(jhu) #' latest <- jhu %>% -#' dplyr::filter(time_value >= max(time_value) - 14) +#' filter(time_value >= max(time_value) - 14) #' #' # Add frosting to a workflow and predict #' f <- frosting() %>% @@ -125,14 +126,15 @@ update_frosting <- function(x, frosting, ...) { #' #' @export #' @examples +#' library(dplyr) #' jhu <- case_death_rate_subset %>% #' filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny")) -#' r <- epi_recipe(jhu) %>% +#' r <- recipe(jhu) %>% #' step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% #' step_epi_ahead(death_rate, ahead = 7) %>% #' step_epi_naomit() #' -#' wf <- epi_workflow(r, parsnip::linear_reg()) %>% fit(jhu) +#' wf <- epi_workflow(r, linear_reg()) %>% fit(jhu) #' #' # in the frosting from the workflow #' f1 <- frosting() %>% @@ -266,15 +268,16 @@ new_frosting <- function() { #' wf <- epi_workflow() %>% add_frosting(f) #' #' # A more realistic example +#' library(dplyr) #' jhu <- case_death_rate_subset %>% -#' dplyr::filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny")) +#' filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny")) #' -#' r <- epi_recipe(jhu) %>% +#' r <- recipe(jhu) %>% #' step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% #' step_epi_ahead(death_rate, ahead = 7) %>% #' step_epi_naomit() #' -#' wf <- epi_workflow(r, parsnip::linear_reg()) %>% fit(jhu) +#' wf <- epi_workflow(r, linear_reg()) %>% fit(jhu) #' #' f <- frosting() %>% #' layer_predict() %>% diff --git a/R/get_test_data.R b/R/get_test_data.R index 0a7d0dc2a..63c677c31 100644 --- a/R/get_test_data.R +++ b/R/get_test_data.R @@ -35,7 +35,7 @@ #' keys, as well other variables in the original dataset. #' @examples #' # create recipe -#' rec <- epi_recipe(case_death_rate_subset) %>% +#' rec <- recipe(case_death_rate_subset) %>% #' step_epi_ahead(death_rate, ahead = 7) %>% #' step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% #' step_epi_lag(case_rate, lag = c(0, 7, 14)) diff --git a/R/layer_add_forecast_date.R b/R/layer_add_forecast_date.R index c4bb7d483..c90face25 100644 --- a/R/layer_add_forecast_date.R +++ b/R/layer_add_forecast_date.R @@ -19,15 +19,16 @@ #' #' @export #' @examples +#' library(dplyr) #' jhu <- case_death_rate_subset %>% -#' dplyr::filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny")) -#' r <- epi_recipe(jhu) %>% +#' filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny")) +#' r <- recipe(jhu) %>% #' step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% #' step_epi_ahead(death_rate, ahead = 7) %>% #' step_epi_naomit() -#' wf <- epi_workflow(r, parsnip::linear_reg()) %>% fit(jhu) +#' wf <- epi_workflow(r, linear_reg()) %>% fit(jhu) #' latest <- jhu %>% -#' dplyr::filter(time_value >= max(time_value) - 14) +#' filter(time_value >= max(time_value) - 14) #' #' # Don't specify `forecast_date` (by default, this should be last date in latest) #' f <- frosting() %>% diff --git a/R/layer_add_target_date.R b/R/layer_add_target_date.R index 23aeb4091..5840b555b 100644 --- a/R/layer_add_target_date.R +++ b/R/layer_add_target_date.R @@ -20,14 +20,15 @@ #' #' @export #' @examples +#' library(dplyr) #' jhu <- case_death_rate_subset %>% -#' dplyr::filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny")) -#' r <- epi_recipe(jhu) %>% +#' filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny")) +#' r <- recipe(jhu) %>% #' step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% #' step_epi_ahead(death_rate, ahead = 7) %>% #' step_epi_naomit() #' -#' wf <- epi_workflow(r, parsnip::linear_reg()) %>% fit(jhu) +#' wf <- epi_workflow(r, linear_reg()) %>% fit(jhu) #' #' # Use ahead + forecast date #' f <- frosting() %>% diff --git a/R/layer_cdc_flatline_quantiles.R b/R/layer_cdc_flatline_quantiles.R index f54c1da78..5ad7c6c12 100644 --- a/R/layer_cdc_flatline_quantiles.R +++ b/R/layer_cdc_flatline_quantiles.R @@ -55,12 +55,13 @@ #' @export #' #' @examples -#' r <- epi_recipe(case_death_rate_subset) %>% +#' library(recipes) +#' r <- recipe(case_death_rate_subset) %>% #' # data is "daily", so we fit this to 1 ahead, the result will contain #' # 1 day ahead residuals #' step_epi_ahead(death_rate, ahead = 1L, skip = TRUE) %>% -#' recipes::update_role(death_rate, new_role = "predictor") %>% -#' recipes::add_role(time_value, geo_value, new_role = "predictor") +#' update_role(death_rate, new_role = "predictor") %>% +#' add_role(time_value, geo_value, new_role = "predictor") #' #' forecast_date <- max(case_death_rate_subset$time_value) #' @@ -68,12 +69,12 @@ #' layer_predict() %>% #' layer_cdc_flatline_quantiles(aheads = c(7, 14, 21, 28), symmetrize = TRUE) #' -#' eng <- parsnip::linear_reg() %>% parsnip::set_engine("flatline") +#' eng <- linear_reg() %>% set_engine("flatline") #' #' wf <- epi_workflow(r, eng, f) %>% fit(case_death_rate_subset) #' preds <- forecast(wf) %>% -#' dplyr::select(-time_value) %>% -#' dplyr::mutate(forecast_date = forecast_date) +#' select(-time_value) %>% +#' mutate(forecast_date = forecast_date) #' preds #' #' preds <- preds %>% @@ -81,24 +82,23 @@ #' pivot_quantiles_wider(.pred_distn) %>% #' mutate(target_date = forecast_date + ahead) #' -#' if (require("ggplot2")) { -#' four_states <- c("ca", "pa", "wa", "ny") -#' preds %>% -#' filter(geo_value %in% four_states) %>% -#' ggplot(aes(target_date)) + -#' geom_ribbon(aes(ymin = `0.1`, ymax = `0.9`), fill = blues9[3]) + -#' geom_ribbon(aes(ymin = `0.25`, ymax = `0.75`), fill = blues9[6]) + -#' geom_line(aes(y = .pred), color = "orange") + -#' geom_line( -#' data = case_death_rate_subset %>% filter(geo_value %in% four_states), -#' aes(x = time_value, y = death_rate) -#' ) + -#' scale_x_date(limits = c(forecast_date - 90, forecast_date + 30)) + -#' labs(x = "Date", y = "Death rate") + -#' facet_wrap(~geo_value, scales = "free_y") + -#' theme_bw() + -#' geom_vline(xintercept = forecast_date) -#' } +#' library(ggplot2) +#' four_states <- c("ca", "pa", "wa", "ny") +#' preds %>% +#' filter(geo_value %in% four_states) %>% +#' ggplot(aes(target_date)) + +#' geom_ribbon(aes(ymin = `0.1`, ymax = `0.9`), fill = blues9[3]) + +#' geom_ribbon(aes(ymin = `0.25`, ymax = `0.75`), fill = blues9[6]) + +#' geom_line(aes(y = .pred), color = "orange") + +#' geom_line( +#' data = case_death_rate_subset %>% filter(geo_value %in% four_states), +#' aes(x = time_value, y = death_rate) +#' ) + +#' scale_x_date(limits = c(forecast_date - 90, forecast_date + 30)) + +#' labs(x = "Date", y = "Death rate") + +#' facet_wrap(~geo_value, scales = "free_y") + +#' theme_bw() + +#' geom_vline(xintercept = forecast_date) layer_cdc_flatline_quantiles <- function( frosting, ..., diff --git a/R/layer_naomit.R b/R/layer_naomit.R index 85842bfdf..a133180ad 100644 --- a/R/layer_naomit.R +++ b/R/layer_naomit.R @@ -11,10 +11,11 @@ #' @return an updated `frosting` postprocessor #' @export #' @examples +#' library(dplyr) #' jhu <- case_death_rate_subset %>% -#' dplyr::filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny")) +#' filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny")) #' -#' r <- epi_recipe(jhu) %>% +#' r <- recipe(jhu) %>% #' step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% #' step_epi_ahead(death_rate, ahead = 7) #' diff --git a/R/layer_point_from_distn.R b/R/layer_point_from_distn.R index f415e7bd4..a16306ee0 100644 --- a/R/layer_point_from_distn.R +++ b/R/layer_point_from_distn.R @@ -16,15 +16,17 @@ #' @export #' #' @examples +#' library(dplyr) #' jhu <- case_death_rate_subset %>% -#' dplyr::filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny")) +#' filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny")) #' -#' r <- epi_recipe(jhu) %>% +#' r <- recipe(jhu) %>% #' step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% #' step_epi_ahead(death_rate, ahead = 7) %>% #' step_epi_naomit() #' -#' wf <- epi_workflow(r, quantile_reg(quantile_levels = c(.25, .5, .75))) %>% fit(jhu) +#' wf <- epi_workflow(r, quantile_reg(quantile_levels = c(.25, .5, .75))) %>% +#' fit(jhu) #' #' f1 <- frosting() %>% #' layer_predict() %>% diff --git a/R/layer_population_scaling.R b/R/layer_population_scaling.R index 33183198d..f3b267f04 100644 --- a/R/layer_population_scaling.R +++ b/R/layer_population_scaling.R @@ -47,13 +47,14 @@ #' @return an updated `frosting` postprocessor #' @export #' @examples -#' jhu <- epiprocess::jhu_csse_daily_subset %>% -#' dplyr::filter(time_value > "2021-11-01", geo_value %in% c("ca", "ny")) %>% -#' dplyr::select(geo_value, time_value, cases) +#' library(dplyr) +#' jhu <- jhu_csse_daily_subset %>% +#' filter(time_value > "2021-11-01", geo_value %in% c("ca", "ny")) %>% +#' select(geo_value, time_value, cases) #' #' pop_data <- data.frame(states = c("ca", "ny"), value = c(20000, 30000)) #' -#' r <- epi_recipe(jhu) %>% +#' r <- recipe(jhu) %>% #' step_population_scaling( #' df = pop_data, #' df_pop_col = "value", @@ -74,7 +75,7 @@ #' df_pop_col = "value" #' ) #' -#' wf <- epi_workflow(r, parsnip::linear_reg()) %>% +#' wf <- epi_workflow(r, linear_reg()) %>% #' fit(jhu) %>% #' add_frosting(f) #' diff --git a/R/layer_predict.R b/R/layer_predict.R index 46d81be18..c452dd25e 100644 --- a/R/layer_predict.R +++ b/R/layer_predict.R @@ -19,12 +19,12 @@ #' jhu <- case_death_rate_subset %>% #' filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny")) #' -#' r <- epi_recipe(jhu) %>% +#' r <- recipe(jhu) %>% #' step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% #' step_epi_ahead(death_rate, ahead = 7) %>% #' step_epi_naomit() #' -#' wf <- epi_workflow(r, parsnip::linear_reg()) %>% fit(jhu) +#' wf <- epi_workflow(r, linear_reg()) %>% fit(jhu) #' latest <- jhu %>% filter(time_value >= max(time_value) - 14) #' #' # Predict layer alone diff --git a/R/layer_predictive_distn.R b/R/layer_predictive_distn.R index 9b1a160e1..00d096d50 100644 --- a/R/layer_predictive_distn.R +++ b/R/layer_predictive_distn.R @@ -20,15 +20,16 @@ #' @export #' #' @examples +#' library(dplyr) #' jhu <- case_death_rate_subset %>% -#' dplyr::filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny")) +#' filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny")) #' -#' r <- epi_recipe(jhu) %>% +#' r <- recipe(jhu) %>% #' step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% #' step_epi_ahead(death_rate, ahead = 7) %>% #' step_epi_naomit() #' -#' wf <- epi_workflow(r, parsnip::linear_reg()) %>% fit(jhu) +#' wf <- epi_workflow(r, linear_reg()) %>% fit(jhu) #' #' f <- frosting() %>% #' layer_predict() %>% diff --git a/R/layer_quantile_distn.R b/R/layer_quantile_distn.R index 734ccec9e..c875d6c5b 100644 --- a/R/layer_quantile_distn.R +++ b/R/layer_quantile_distn.R @@ -17,10 +17,11 @@ #' @export #' #' @examples +#' library(dplyr) #' jhu <- case_death_rate_subset %>% -#' dplyr::filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny")) +#' filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny")) #' -#' r <- epi_recipe(jhu) %>% +#' r <- recipe(jhu) %>% #' step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% #' step_epi_ahead(death_rate, ahead = 7) %>% #' step_epi_naomit() diff --git a/R/layer_residual_quantiles.R b/R/layer_residual_quantiles.R index 85c1c6ed0..ef5be371b 100644 --- a/R/layer_residual_quantiles.R +++ b/R/layer_residual_quantiles.R @@ -14,15 +14,16 @@ #' residual quantiles added to the prediction #' @export #' @examples +#' library(dplyr) #' jhu <- case_death_rate_subset %>% -#' dplyr::filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny")) +#' filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny")) #' -#' r <- epi_recipe(jhu) %>% +#' r <- recipe(jhu) %>% #' step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% #' step_epi_ahead(death_rate, ahead = 7) %>% #' step_epi_naomit() #' -#' wf <- epi_workflow(r, parsnip::linear_reg()) %>% fit(jhu) +#' wf <- epi_workflow(r, linear_reg()) %>% fit(jhu) #' #' f <- frosting() %>% #' layer_predict() %>% diff --git a/R/layer_threshold_preds.R b/R/layer_threshold_preds.R index 8b2b56d1e..233adbf0c 100644 --- a/R/layer_threshold_preds.R +++ b/R/layer_threshold_preds.R @@ -22,15 +22,14 @@ #' @return an updated `frosting` postprocessor #' @export #' @examples - +#' library(dplyr) #' jhu <- case_death_rate_subset %>% -#' dplyr::filter(time_value < "2021-03-08", -#' geo_value %in% c("ak", "ca", "ar")) -#' r <- epi_recipe(jhu) %>% +#' filter(time_value < "2021-03-08", geo_value %in% c("ak", "ca", "ar")) +#' r <- recipe(jhu) %>% #' step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% #' step_epi_ahead(death_rate, ahead = 7) %>% #' step_epi_naomit() -#' wf <- epi_workflow(r, parsnip::linear_reg()) %>% fit(jhu) +#' wf <- epi_workflow(r, linear_reg()) %>% fit(jhu) #' #' f <- frosting() %>% #' layer_predict() %>% diff --git a/R/layers.R b/R/layers.R index b59e95cdd..1ecf861bc 100644 --- a/R/layers.R +++ b/R/layers.R @@ -41,15 +41,16 @@ layer <- function(subclass, ..., .prefix = "layer_") { #' in the layer, and the values are the new values to update the layer with. #' #' @examples +#' library(dplyr) #' jhu <- case_death_rate_subset %>% -#' dplyr::filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny")) -#' r <- epi_recipe(jhu) %>% +#' filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny")) +#' r <- recipe(jhu) %>% #' step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% #' step_epi_ahead(death_rate, ahead = 7) %>% #' step_epi_naomit() #' wf <- epi_workflow(r, parsnip::linear_reg()) %>% fit(jhu) #' latest <- jhu %>% -#' dplyr::filter(time_value >= max(time_value) - 14) +#' filter(time_value >= max(time_value) - 14) #' #' # Specify a `forecast_date` that is greater than or equal to `as_of` date #' f <- frosting() %>% diff --git a/R/make_smooth_quantile_reg.R b/R/make_smooth_quantile_reg.R index 9ab3a366b..dc585de22 100644 --- a/R/make_smooth_quantile_reg.R +++ b/R/make_smooth_quantile_reg.R @@ -62,17 +62,16 @@ #' lines(pl$x, pl$`0.8`, col = "blue") #' lines(pl$x, pl$`0.5`, col = "red") #' -#' if (require("ggplot2")) { -#' ggplot(data.frame(x = x, y = y), aes(x)) + -#' geom_ribbon(data = pl, aes(ymin = `0.2`, ymax = `0.8`), fill = "lightblue") + -#' geom_point(aes(y = y), colour = "grey") + # observed data -#' geom_function(fun = sin, colour = "black") + # truth -#' geom_vline(xintercept = fd, linetype = "dashed") + # end of training data -#' geom_line(data = pl, aes(y = `0.5`), colour = "red") + # median prediction -#' theme_bw() + -#' coord_cartesian(xlim = c(0, NA)) + -#' ylab("y") -#' } +#' library(ggplot2) +#' ggplot(data.frame(x = x, y = y), aes(x)) + +#' geom_ribbon(data = pl, aes(ymin = `0.2`, ymax = `0.8`), fill = "lightblue") + +#' geom_point(aes(y = y), colour = "grey") + # observed data +#' geom_function(fun = sin, colour = "black") + # truth +#' geom_vline(xintercept = fd, linetype = "dashed") + # end of training data +#' geom_line(data = pl, aes(y = `0.5`), colour = "red") + # median prediction +#' theme_bw() + +#' coord_cartesian(xlim = c(0, NA)) + +#' ylab("y") smooth_quantile_reg <- function( mode = "regression", engine = "smoothqr", diff --git a/R/model-methods.R b/R/model-methods.R index 607b04234..131a6ee91 100644 --- a/R/model-methods.R +++ b/R/model-methods.R @@ -32,13 +32,11 @@ #' #' @export #' @examples +#' library(dplyr) #' jhu <- case_death_rate_subset %>% -#' dplyr::filter( -#' time_value > "2021-11-01", -#' geo_value %in% c("ak", "ca", "ny") -#' ) +#' filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny")) #' -#' r <- epi_recipe(jhu) %>% +#' r <- recipe(jhu) %>% #' step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% #' step_epi_ahead(death_rate, ahead = 7) #' @@ -49,7 +47,7 @@ #' wf <- wf %>% Add_model(rf_model) #' wf #' -#' lm_model <- parsnip::linear_reg() +#' lm_model <- linear_reg() #' #' wf <- Update_model(wf, lm_model) #' wf diff --git a/R/recipe.epi_df.R b/R/recipe.epi_df.R index 308cc6033..8b7f67572 100644 --- a/R/recipe.epi_df.R +++ b/R/recipe.epi_df.R @@ -25,17 +25,19 @@ #' #' @export #' @examples +#' library(dplyr) +#' library(recipes) #' jhu <- case_death_rate_subset %>% -#' dplyr::filter(time_value > "2021-08-01") %>% -#' dplyr::arrange(geo_value, time_value) +#' filter(time_value > "2021-08-01") %>% +#' arrange(geo_value, time_value) #' -#' r <- epi_recipe(jhu) %>% +#' r <- recipe(jhu) %>% #' step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% #' step_epi_ahead(death_rate, ahead = 7) %>% #' step_epi_lag(case_rate, lag = c(0, 7, 14)) %>% -#' recipes::step_naomit(recipes::all_predictors()) %>% +#' step_naomit(recipes::all_predictors()) %>% #' # below, `skip` means we don't do this at predict time -#' recipes::step_naomit(recipes::all_outcomes(), skip = TRUE) +#' step_naomit(recipes::all_outcomes(), skip = TRUE) #' #' r #' @importFrom recipes recipe @@ -63,6 +65,7 @@ recipe.epi_df <- function(x, formula = NULL, ..., vars = NULL, roles = NULL) { } #' @exportS3Method recipes::recipe +#' @rdname recipe.epi_df recipe.formula <- function(formula, data, ...) { # This method clobbers `recipes::recipe.formula`, but should have no noticible # effect. diff --git a/R/reexports-tidymodels.R b/R/reexports-tidymodels.R index 2c69139a2..d099cde4a 100644 --- a/R/reexports-tidymodels.R +++ b/R/reexports-tidymodels.R @@ -13,3 +13,7 @@ recipes::prep #' @importFrom recipes bake #' @export recipes::bake + +#' @importFrom recipes prep +#' @export +recipes::recipe diff --git a/R/step_epi_naomit.R b/R/step_epi_naomit.R index 1cbc9c5d9..3a4e46763 100644 --- a/R/step_epi_naomit.R +++ b/R/step_epi_naomit.R @@ -9,7 +9,7 @@ #' @export #' @examples #' case_death_rate_subset %>% -#' epi_recipe() %>% +#' recipe() %>% #' step_epi_naomit() step_epi_naomit <- function(recipe) { stopifnot(inherits(recipe, "recipe")) diff --git a/R/step_epi_shift.R b/R/step_epi_shift.R index 52f51de16..3adc82921 100644 --- a/R/step_epi_shift.R +++ b/R/step_epi_shift.R @@ -46,7 +46,7 @@ #' @rdname step_epi_shift #' @export #' @examples -#' r <- epi_recipe(case_death_rate_subset) %>% +#' r <- recipe(case_death_rate_subset) %>% #' step_epi_ahead(death_rate, ahead = 7) %>% #' step_epi_lag(death_rate, lag = c(0, 7, 14)) #' r diff --git a/R/step_epi_slide.R b/R/step_epi_slide.R index 637d31a54..09883dbf1 100644 --- a/R/step_epi_slide.R +++ b/R/step_epi_slide.R @@ -35,9 +35,9 @@ #' library(dplyr) #' jhu <- case_death_rate_subset %>% #' filter(time_value >= as.Date("2021-01-01"), geo_value %in% c("ca", "ny")) -#' rec <- epi_recipe(jhu) %>% +#' rec <- recipe(jhu) %>% #' step_epi_slide(case_rate, death_rate, -#' .f = \(x) mean(x, na.rm = TRUE), +#' .f = function(x) mean(x, na.rm = TRUE), #' before = 6L #' ) #' bake(prep(rec, jhu), new_data = NULL) diff --git a/R/step_growth_rate.R b/R/step_growth_rate.R index e5edb18d4..f7ac8af9f 100644 --- a/R/step_growth_rate.R +++ b/R/step_growth_rate.R @@ -34,7 +34,7 @@ #' @importFrom epiprocess growth_rate #' @export #' @examples -#' r <- epi_recipe(case_death_rate_subset) %>% +#' r <- recipe(case_death_rate_subset) %>% #' step_growth_rate(case_rate, death_rate) #' r #' diff --git a/R/step_lag_difference.R b/R/step_lag_difference.R index e954bd9a0..4938b4231 100644 --- a/R/step_lag_difference.R +++ b/R/step_lag_difference.R @@ -15,7 +15,7 @@ #' @family row operation steps #' @export #' @examples -#' r <- epi_recipe(case_death_rate_subset) %>% +#' r <- recipe(case_death_rate_subset) %>% #' step_lag_difference(case_rate, death_rate, horizon = c(7, 14)) %>% #' step_epi_naomit() #' r diff --git a/R/step_population_scaling.R b/R/step_population_scaling.R index 7f2d44ab9..946f6d859 100644 --- a/R/step_population_scaling.R +++ b/R/step_population_scaling.R @@ -63,15 +63,14 @@ #' @return Scales raw data by the population #' @export #' @examples -#' library(epiprocess) -#' library(epipredict) -#' jhu <- epiprocess::jhu_csse_daily_subset %>% -#' dplyr::filter(time_value > "2021-11-01", geo_value %in% c("ca", "ny")) %>% -#' dplyr::select(geo_value, time_value, cases) +#' library(dplyr) +#' jhu <- jhu_csse_daily_subset %>% +#' filter(time_value > "2021-11-01", geo_value %in% c("ca", "ny")) %>% +#' select(geo_value, time_value, cases) #' #' pop_data <- data.frame(states = c("ca", "ny"), value = c(20000, 30000)) #' -#' r <- epi_recipe(jhu) %>% +#' r <- recipe(jhu) %>% #' step_population_scaling( #' df = pop_data, #' df_pop_col = "value", @@ -92,7 +91,7 @@ #' df_pop_col = "value" #' ) #' -#' wf <- epi_workflow(r, parsnip::linear_reg()) %>% +#' wf <- epi_workflow(r, linear_reg()) %>% #' fit(jhu) %>% #' add_frosting(f) #' diff --git a/R/step_training_window.R b/R/step_training_window.R index 7102d29d8..59427357a 100644 --- a/R/step_training_window.R +++ b/R/step_training_window.R @@ -36,13 +36,14 @@ #' ) %>% #' as_epi_df() #' -#' epi_recipe(y ~ x, data = tib) %>% +#' recipe(y ~ x, data = tib) %>% #' step_training_window(n_recent = 3) %>% #' prep(tib) %>% #' bake(new_data = NULL) #' -#' epi_recipe(y ~ x, data = tib) %>% -#' recipes::step_naomit() %>% +#' library(recipes) +#' recipe(y ~ x, data = tib) %>% +#' step_naomit() %>% #' step_training_window(n_recent = 3) %>% #' prep(tib) %>% #' bake(new_data = NULL) diff --git a/R/tidy.R b/R/tidy.R index 06835eff0..caeb7b720 100644 --- a/R/tidy.R +++ b/R/tidy.R @@ -26,15 +26,16 @@ #' `type` (the method, e.g. "predict", "naomit"), and a character column `id`. #' #' @examples +#' library(dplyr) #' jhu <- case_death_rate_subset %>% -#' dplyr::filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny")) +#' filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny")) #' -#' r <- epi_recipe(jhu) %>% +#' r <- recipe(jhu) %>% #' step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% #' step_epi_ahead(death_rate, ahead = 7) %>% #' step_epi_naomit() #' -#' wf <- epi_workflow(r, parsnip::linear_reg()) %>% fit(jhu) +#' wf <- epi_workflow(r, linear_reg()) %>% fit(jhu) #' latest <- get_test_data(recipe = r, x = jhu) #' f <- frosting() %>% diff --git a/man/Add_model.Rd b/man/Add_model.Rd index 6bf6b6b02..27236cf44 100644 --- a/man/Add_model.Rd +++ b/man/Add_model.Rd @@ -71,13 +71,11 @@ aliases with the lower-case names. However, in the event that properly. } \examples{ +library(dplyr) jhu <- case_death_rate_subset \%>\% - dplyr::filter( - time_value > "2021-11-01", - geo_value \%in\% c("ak", "ca", "ny") - ) + filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny")) -r <- epi_recipe(jhu) \%>\% +r <- recipe(jhu) \%>\% step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\% step_epi_ahead(death_rate, ahead = 7) @@ -88,7 +86,7 @@ wf <- epi_workflow(r) wf <- wf \%>\% Add_model(rf_model) wf -lm_model <- parsnip::linear_reg() +lm_model <- linear_reg() wf <- Update_model(wf, lm_model) wf diff --git a/man/add_epi_recipe.Rd b/man/add_epi_recipe.Rd index 0da2d55b3..3abf675ef 100644 --- a/man/add_epi_recipe.Rd +++ b/man/add_epi_recipe.Rd @@ -45,7 +45,7 @@ jhu <- case_death_rate_subset \%>\% filter(time_value > "2021-08-01") \%>\% arrange(geo_value, time_value) -r <- epi_recipe(jhu) \%>\% +r <- recipe(jhu) \%>\% step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\% step_epi_ahead(death_rate, ahead = 7) \%>\% step_epi_lag(case_rate, lag = c(0, 7, 14)) \%>\% @@ -57,7 +57,7 @@ workflow <- epi_workflow() \%>\% workflow -r2 <- epi_recipe(jhu) \%>\% +r2 <- recipe(jhu) \%>\% step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\% step_epi_ahead(death_rate, ahead = 7) diff --git a/man/add_frosting.Rd b/man/add_frosting.Rd index 161a540e2..e014084d2 100644 --- a/man/add_frosting.Rd +++ b/man/add_frosting.Rd @@ -26,15 +26,16 @@ update_frosting(x, frosting, ...) Add frosting to a workflow } \examples{ +library(dplyr) jhu <- case_death_rate_subset \%>\% filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny")) -r <- epi_recipe(jhu) \%>\% +r <- recipe(jhu) \%>\% step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\% step_epi_ahead(death_rate, ahead = 7) wf <- epi_workflow(r, parsnip::linear_reg()) \%>\% fit(jhu) latest <- jhu \%>\% - dplyr::filter(time_value >= max(time_value) - 14) + filter(time_value >= max(time_value) - 14) # Add frosting to a workflow and predict f <- frosting() \%>\% diff --git a/man/adjust_epi_recipe.Rd b/man/adjust_epi_recipe.Rd index 7468c4ce2..d7fc5e72a 100644 --- a/man/adjust_epi_recipe.Rd +++ b/man/adjust_epi_recipe.Rd @@ -57,12 +57,12 @@ library(workflows) jhu <- case_death_rate_subset \%>\% filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny")) -r <- epi_recipe(jhu) \%>\% +r <- recipe(jhu) \%>\% step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\% step_epi_ahead(death_rate, ahead = 7) \%>\% step_epi_naomit() -wf <- epi_workflow(r, parsnip::linear_reg()) \%>\% fit(jhu) +wf <- epi_workflow(r, linear_reg()) \%>\% fit(jhu) latest <- jhu \%>\% filter(time_value >= max(time_value) - 14) diff --git a/man/adjust_frosting.Rd b/man/adjust_frosting.Rd index 6cdc13b30..fd7a606a2 100644 --- a/man/adjust_frosting.Rd +++ b/man/adjust_frosting.Rd @@ -35,14 +35,15 @@ must be inputted as \code{...}. See the examples below for brief illustrations of the different types of updates. } \examples{ +library(dplyr) jhu <- case_death_rate_subset \%>\% filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny")) -r <- epi_recipe(jhu) \%>\% +r <- recipe(jhu) \%>\% step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\% step_epi_ahead(death_rate, ahead = 7) \%>\% step_epi_naomit() -wf <- epi_workflow(r, parsnip::linear_reg()) \%>\% fit(jhu) +wf <- epi_workflow(r, linear_reg()) \%>\% fit(jhu) # in the frosting from the workflow f1 <- frosting() \%>\% diff --git a/man/arx_forecaster.Rd b/man/arx_forecaster.Rd index 173fa2bbd..af05c0682 100644 --- a/man/arx_forecaster.Rd +++ b/man/arx_forecaster.Rd @@ -37,7 +37,7 @@ workflow } \description{ This is an autoregressive forecasting model for -\code{\link[epiprocess:epi_df]{epiprocess::epi_df}} data. It does "direct" forecasting, meaning +\link[epiprocess:epi_df]{epiprocess::epi_df} data. It does "direct" forecasting, meaning that it estimates a model for a particular target horizon. } \examples{ diff --git a/man/autoplot-epipred.Rd b/man/autoplot-epipred.Rd index dd6b37dcd..0b5434b95 100644 --- a/man/autoplot-epipred.Rd +++ b/man/autoplot-epipred.Rd @@ -70,10 +70,11 @@ will be shown as well. Unfit workflows will result in an error, (you can simply call \code{autoplot()} on the original \code{epi_df}). } \examples{ +library(dplyr) jhu <- case_death_rate_subset \%>\% filter(time_value >= as.Date("2021-11-01")) -r <- epi_recipe(jhu) \%>\% +r <- recipe(jhu) \%>\% step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\% step_epi_ahead(death_rate, ahead = 7) \%>\% step_epi_lag(case_rate, lag = c(0, 7, 14)) \%>\% @@ -83,21 +84,21 @@ f <- frosting() \%>\% layer_residual_quantiles( quantile_levels = c(.025, .1, .25, .75, .9, .975) ) \%>\% - layer_threshold(dplyr::starts_with(".pred")) \%>\% + layer_threshold(starts_with(".pred")) \%>\% layer_add_target_date() -wf <- epi_workflow(r, parsnip::linear_reg(), f) \%>\% fit(jhu) +wf <- epi_workflow(r, linear_reg(), f) \%>\% fit(jhu) autoplot(wf) -latest <- jhu \%>\% dplyr::filter(time_value >= max(time_value) - 14) +latest <- jhu \%>\% filter(time_value >= max(time_value) - 14) preds <- predict(wf, latest) autoplot(wf, preds, .max_facets = 4) # ------- Show multiple horizons p <- lapply(c(7, 14, 21, 28), \(h) { - r <- epi_recipe(jhu) \%>\% + r <- recipe(jhu) \%>\% step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\% step_epi_ahead(death_rate, ahead = h) \%>\% step_epi_lag(case_rate, lag = c(0, 7, 14)) \%>\% diff --git a/man/cdc_baseline_forecaster.Rd b/man/cdc_baseline_forecaster.Rd index cd3c4ed67..3d451b275 100644 --- a/man/cdc_baseline_forecaster.Rd +++ b/man/cdc_baseline_forecaster.Rd @@ -51,23 +51,23 @@ weekly_deaths <- case_death_rate_subset \%>\% cdc <- cdc_baseline_forecaster(weekly_deaths, "deaths") preds <- pivot_quantiles_wider(cdc$predictions, .pred_distn) -if (require(ggplot2)) { - forecast_date <- unique(preds$forecast_date) - four_states <- c("ca", "pa", "wa", "ny") - preds \%>\% - filter(geo_value \%in\% four_states) \%>\% - ggplot(aes(target_date)) + - geom_ribbon(aes(ymin = `0.1`, ymax = `0.9`), fill = blues9[3]) + - geom_ribbon(aes(ymin = `0.25`, ymax = `0.75`), fill = blues9[6]) + - geom_line(aes(y = .pred), color = "orange") + - geom_line( - data = weekly_deaths \%>\% filter(geo_value \%in\% four_states), - aes(x = time_value, y = deaths) - ) + - scale_x_date(limits = c(forecast_date - 90, forecast_date + 30)) + - labs(x = "Date", y = "Weekly deaths") + - facet_wrap(~geo_value, scales = "free_y") + - theme_bw() + - geom_vline(xintercept = forecast_date) -} +library(ggplot2) +forecast_date <- unique(preds$forecast_date) +four_states <- c("ca", "pa", "wa", "ny") +preds \%>\% + filter(geo_value \%in\% four_states) \%>\% + ggplot(aes(target_date)) + + geom_ribbon(aes(ymin = `0.1`, ymax = `0.9`), fill = blues9[3]) + + geom_ribbon(aes(ymin = `0.25`, ymax = `0.75`), fill = blues9[6]) + + geom_line(aes(y = .pred), color = "orange") + + geom_line( + data = weekly_deaths \%>\% filter(geo_value \%in\% four_states), + aes(x = time_value, y = deaths) + ) + + scale_x_date(limits = c(forecast_date - 90, forecast_date + 30)) + + labs(x = "Date", y = "Weekly deaths") + + facet_wrap(~geo_value, scales = "free_y") + + theme_bw() + + geom_vline(xintercept = forecast_date) + } diff --git a/man/epi_workflow.Rd b/man/epi_workflow.Rd index b29078d52..0b9fba73e 100644 --- a/man/epi_workflow.Rd +++ b/man/epi_workflow.Rd @@ -35,7 +35,7 @@ and numerous examples, see there. \examples{ jhu <- case_death_rate_subset -r <- epi_recipe(jhu) \%>\% +r <- recipe(jhu) \%>\% step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\% step_epi_ahead(death_rate, ahead = 7) \%>\% step_epi_lag(case_rate, lag = c(0, 7, 14)) \%>\% diff --git a/man/fit-epi_workflow.Rd b/man/fit-epi_workflow.Rd index 3dfa0029a..623706d42 100644 --- a/man/fit-epi_workflow.Rd +++ b/man/fit-epi_workflow.Rd @@ -31,7 +31,7 @@ preprocessing the data and fitting the underlying parsnip model. jhu <- case_death_rate_subset \%>\% filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny")) -r <- epi_recipe(jhu) \%>\% +r <- recipe(jhu) \%>\% step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\% step_epi_ahead(death_rate, ahead = 7) diff --git a/man/frosting.Rd b/man/frosting.Rd index 367d132ec..9ce060f30 100644 --- a/man/frosting.Rd +++ b/man/frosting.Rd @@ -28,15 +28,16 @@ f <- frosting() wf <- epi_workflow() \%>\% add_frosting(f) # A more realistic example +library(dplyr) jhu <- case_death_rate_subset \%>\% - dplyr::filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny")) + filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny")) -r <- epi_recipe(jhu) \%>\% +r <- recipe(jhu) \%>\% step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\% step_epi_ahead(death_rate, ahead = 7) \%>\% step_epi_naomit() -wf <- epi_workflow(r, parsnip::linear_reg()) \%>\% fit(jhu) +wf <- epi_workflow(r, linear_reg()) \%>\% fit(jhu) f <- frosting() \%>\% layer_predict() \%>\% diff --git a/man/get_test_data.Rd b/man/get_test_data.Rd index b18685d89..5e7874276 100644 --- a/man/get_test_data.Rd +++ b/man/get_test_data.Rd @@ -56,7 +56,7 @@ values with more advanced techniques. } \examples{ # create recipe -rec <- epi_recipe(case_death_rate_subset) \%>\% +rec <- recipe(case_death_rate_subset) \%>\% step_epi_ahead(death_rate, ahead = 7) \%>\% step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\% step_epi_lag(case_rate, lag = c(0, 7, 14)) diff --git a/man/layer_add_forecast_date.Rd b/man/layer_add_forecast_date.Rd index 4e173d662..be48d75f9 100644 --- a/man/layer_add_forecast_date.Rd +++ b/man/layer_add_forecast_date.Rd @@ -36,15 +36,16 @@ less than the maximum \code{as_of} value (from the data used pre-processing, model fitting, and postprocessing), an appropriate warning will be thrown. } \examples{ +library(dplyr) jhu <- case_death_rate_subset \%>\% - dplyr::filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny")) -r <- epi_recipe(jhu) \%>\% + filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny")) +r <- recipe(jhu) \%>\% step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\% step_epi_ahead(death_rate, ahead = 7) \%>\% step_epi_naomit() -wf <- epi_workflow(r, parsnip::linear_reg()) \%>\% fit(jhu) +wf <- epi_workflow(r, linear_reg()) \%>\% fit(jhu) latest <- jhu \%>\% - dplyr::filter(time_value >= max(time_value) - 14) + filter(time_value >= max(time_value) - 14) # Don't specify `forecast_date` (by default, this should be last date in latest) f <- frosting() \%>\% diff --git a/man/layer_add_target_date.Rd b/man/layer_add_target_date.Rd index 5b32002d1..ecb8c590e 100644 --- a/man/layer_add_target_date.Rd +++ b/man/layer_add_target_date.Rd @@ -37,14 +37,15 @@ has been specified in a preprocessing step (most likely in in the test data to get the target date. } \examples{ +library(dplyr) jhu <- case_death_rate_subset \%>\% - dplyr::filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny")) -r <- epi_recipe(jhu) \%>\% + filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny")) +r <- recipe(jhu) \%>\% step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\% step_epi_ahead(death_rate, ahead = 7) \%>\% step_epi_naomit() -wf <- epi_workflow(r, parsnip::linear_reg()) \%>\% fit(jhu) +wf <- epi_workflow(r, linear_reg()) \%>\% fit(jhu) # Use ahead + forecast date f <- frosting() \%>\% diff --git a/man/layer_cdc_flatline_quantiles.Rd b/man/layer_cdc_flatline_quantiles.Rd index 5653f9691..346bce1a1 100644 --- a/man/layer_cdc_flatline_quantiles.Rd +++ b/man/layer_cdc_flatline_quantiles.Rd @@ -84,12 +84,13 @@ the future. This version continues to use the same set of residuals, and adds them on to produce wider intervals as \code{ahead} increases. } \examples{ -r <- epi_recipe(case_death_rate_subset) \%>\% +library(recipes) +r <- recipe(case_death_rate_subset) \%>\% # data is "daily", so we fit this to 1 ahead, the result will contain # 1 day ahead residuals step_epi_ahead(death_rate, ahead = 1L, skip = TRUE) \%>\% - recipes::update_role(death_rate, new_role = "predictor") \%>\% - recipes::add_role(time_value, geo_value, new_role = "predictor") + update_role(death_rate, new_role = "predictor") \%>\% + add_role(time_value, geo_value, new_role = "predictor") forecast_date <- max(case_death_rate_subset$time_value) @@ -97,12 +98,12 @@ f <- frosting() \%>\% layer_predict() \%>\% layer_cdc_flatline_quantiles(aheads = c(7, 14, 21, 28), symmetrize = TRUE) -eng <- parsnip::linear_reg() \%>\% parsnip::set_engine("flatline") +eng <- linear_reg() \%>\% set_engine("flatline") wf <- epi_workflow(r, eng, f) \%>\% fit(case_death_rate_subset) preds <- forecast(wf) \%>\% - dplyr::select(-time_value) \%>\% - dplyr::mutate(forecast_date = forecast_date) + select(-time_value) \%>\% + mutate(forecast_date = forecast_date) preds preds <- preds \%>\% @@ -110,22 +111,21 @@ preds <- preds \%>\% pivot_quantiles_wider(.pred_distn) \%>\% mutate(target_date = forecast_date + ahead) -if (require("ggplot2")) { - four_states <- c("ca", "pa", "wa", "ny") - preds \%>\% - filter(geo_value \%in\% four_states) \%>\% - ggplot(aes(target_date)) + - geom_ribbon(aes(ymin = `0.1`, ymax = `0.9`), fill = blues9[3]) + - geom_ribbon(aes(ymin = `0.25`, ymax = `0.75`), fill = blues9[6]) + - geom_line(aes(y = .pred), color = "orange") + - geom_line( - data = case_death_rate_subset \%>\% filter(geo_value \%in\% four_states), - aes(x = time_value, y = death_rate) - ) + - scale_x_date(limits = c(forecast_date - 90, forecast_date + 30)) + - labs(x = "Date", y = "Death rate") + - facet_wrap(~geo_value, scales = "free_y") + - theme_bw() + - geom_vline(xintercept = forecast_date) -} +library(ggplot2) +four_states <- c("ca", "pa", "wa", "ny") +preds \%>\% + filter(geo_value \%in\% four_states) \%>\% + ggplot(aes(target_date)) + + geom_ribbon(aes(ymin = `0.1`, ymax = `0.9`), fill = blues9[3]) + + geom_ribbon(aes(ymin = `0.25`, ymax = `0.75`), fill = blues9[6]) + + geom_line(aes(y = .pred), color = "orange") + + geom_line( + data = case_death_rate_subset \%>\% filter(geo_value \%in\% four_states), + aes(x = time_value, y = death_rate) + ) + + scale_x_date(limits = c(forecast_date - 90, forecast_date + 30)) + + labs(x = "Date", y = "Death rate") + + facet_wrap(~geo_value, scales = "free_y") + + theme_bw() + + geom_vline(xintercept = forecast_date) } diff --git a/man/layer_naomit.Rd b/man/layer_naomit.Rd index e3325fe7c..e9e02863b 100644 --- a/man/layer_naomit.Rd +++ b/man/layer_naomit.Rd @@ -24,10 +24,11 @@ an updated \code{frosting} postprocessor Omit \code{NA}s from predictions or other columns } \examples{ +library(dplyr) jhu <- case_death_rate_subset \%>\% - dplyr::filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny")) + filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny")) -r <- epi_recipe(jhu) \%>\% +r <- recipe(jhu) \%>\% step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\% step_epi_ahead(death_rate, ahead = 7) diff --git a/man/layer_point_from_distn.Rd b/man/layer_point_from_distn.Rd index 58d8add8b..54d275828 100644 --- a/man/layer_point_from_distn.Rd +++ b/man/layer_point_from_distn.Rd @@ -34,15 +34,17 @@ information, so one should usually call this AFTER \code{layer_quantile_distn()} or set the \code{name} argument to something specific. } \examples{ +library(dplyr) jhu <- case_death_rate_subset \%>\% - dplyr::filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny")) + filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny")) -r <- epi_recipe(jhu) \%>\% +r <- recipe(jhu) \%>\% step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\% step_epi_ahead(death_rate, ahead = 7) \%>\% step_epi_naomit() -wf <- epi_workflow(r, quantile_reg(quantile_levels = c(.25, .5, .75))) \%>\% fit(jhu) +wf <- epi_workflow(r, quantile_reg(quantile_levels = c(.25, .5, .75))) \%>\% + fit(jhu) f1 <- frosting() \%>\% layer_predict() \%>\% diff --git a/man/layer_population_scaling.Rd b/man/layer_population_scaling.Rd index cf8dfcc1a..88607139f 100644 --- a/man/layer_population_scaling.Rd +++ b/man/layer_population_scaling.Rd @@ -74,13 +74,14 @@ passed will \emph{multiply} the selected variables while the \code{rate_rescalin argument is a common \emph{divisor} of the selected variables. } \examples{ -jhu <- epiprocess::jhu_csse_daily_subset \%>\% - dplyr::filter(time_value > "2021-11-01", geo_value \%in\% c("ca", "ny")) \%>\% - dplyr::select(geo_value, time_value, cases) +library(dplyr) +jhu <- jhu_csse_daily_subset \%>\% + filter(time_value > "2021-11-01", geo_value \%in\% c("ca", "ny")) \%>\% + select(geo_value, time_value, cases) pop_data <- data.frame(states = c("ca", "ny"), value = c(20000, 30000)) -r <- epi_recipe(jhu) \%>\% +r <- recipe(jhu) \%>\% step_population_scaling( df = pop_data, df_pop_col = "value", @@ -101,7 +102,7 @@ f <- frosting() \%>\% df_pop_col = "value" ) -wf <- epi_workflow(r, parsnip::linear_reg()) \%>\% +wf <- epi_workflow(r, linear_reg()) \%>\% fit(jhu) \%>\% add_frosting(f) diff --git a/man/layer_predict.Rd b/man/layer_predict.Rd index 03473053f..900e4a7e1 100644 --- a/man/layer_predict.Rd +++ b/man/layer_predict.Rd @@ -61,12 +61,12 @@ postprocessor. jhu <- case_death_rate_subset \%>\% filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny")) -r <- epi_recipe(jhu) \%>\% +r <- recipe(jhu) \%>\% step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\% step_epi_ahead(death_rate, ahead = 7) \%>\% step_epi_naomit() -wf <- epi_workflow(r, parsnip::linear_reg()) \%>\% fit(jhu) +wf <- epi_workflow(r, linear_reg()) \%>\% fit(jhu) latest <- jhu \%>\% filter(time_value >= max(time_value) - 14) # Predict layer alone diff --git a/man/layer_predictive_distn.Rd b/man/layer_predictive_distn.Rd index 7cd4e4efc..38ca505e2 100644 --- a/man/layer_predictive_distn.Rd +++ b/man/layer_predictive_distn.Rd @@ -39,15 +39,16 @@ should be reasonably accurate for models fit using \code{lm} when the new point \verb{x*} isn't too far from the bulk of the data. } \examples{ +library(dplyr) jhu <- case_death_rate_subset \%>\% - dplyr::filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny")) + filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny")) -r <- epi_recipe(jhu) \%>\% +r <- recipe(jhu) \%>\% step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\% step_epi_ahead(death_rate, ahead = 7) \%>\% step_epi_naomit() -wf <- epi_workflow(r, parsnip::linear_reg()) \%>\% fit(jhu) +wf <- epi_workflow(r, linear_reg()) \%>\% fit(jhu) f <- frosting() \%>\% layer_predict() \%>\% diff --git a/man/layer_quantile_distn.Rd b/man/layer_quantile_distn.Rd index 695a1d12d..fca435a03 100644 --- a/man/layer_quantile_distn.Rd +++ b/man/layer_quantile_distn.Rd @@ -37,10 +37,11 @@ If this engine is used, then this layer will grab out estimated (or extrapolated quantiles at the requested quantile values. } \examples{ +library(dplyr) jhu <- case_death_rate_subset \%>\% - dplyr::filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny")) + filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny")) -r <- epi_recipe(jhu) \%>\% +r <- recipe(jhu) \%>\% step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\% step_epi_ahead(death_rate, ahead = 7) \%>\% step_epi_naomit() diff --git a/man/layer_residual_quantiles.Rd b/man/layer_residual_quantiles.Rd index dd576aa5e..16f69ac86 100644 --- a/man/layer_residual_quantiles.Rd +++ b/man/layer_residual_quantiles.Rd @@ -39,15 +39,16 @@ residual quantiles added to the prediction Creates predictions based on residual quantiles } \examples{ +library(dplyr) jhu <- case_death_rate_subset \%>\% - dplyr::filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny")) + filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny")) -r <- epi_recipe(jhu) \%>\% +r <- recipe(jhu) \%>\% step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\% step_epi_ahead(death_rate, ahead = 7) \%>\% step_epi_naomit() -wf <- epi_workflow(r, parsnip::linear_reg()) \%>\% fit(jhu) +wf <- epi_workflow(r, linear_reg()) \%>\% fit(jhu) f <- frosting() \%>\% layer_predict() \%>\% diff --git a/man/layer_threshold.Rd b/man/layer_threshold.Rd index dbd7e6669..615c9f15b 100644 --- a/man/layer_threshold.Rd +++ b/man/layer_threshold.Rd @@ -40,14 +40,14 @@ smaller than the lower threshold or higher than the upper threshold equal to the threshold values. } \examples{ +library(dplyr) jhu <- case_death_rate_subset \%>\% - dplyr::filter(time_value < "2021-03-08", - geo_value \%in\% c("ak", "ca", "ar")) -r <- epi_recipe(jhu) \%>\% + filter(time_value < "2021-03-08", geo_value \%in\% c("ak", "ca", "ar")) +r <- recipe(jhu) \%>\% step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\% step_epi_ahead(death_rate, ahead = 7) \%>\% step_epi_naomit() -wf <- epi_workflow(r, parsnip::linear_reg()) \%>\% fit(jhu) +wf <- epi_workflow(r, linear_reg()) \%>\% fit(jhu) f <- frosting() \%>\% layer_predict() \%>\% diff --git a/man/predict-epi_workflow.Rd b/man/predict-epi_workflow.Rd index 130279249..531c9216e 100644 --- a/man/predict-epi_workflow.Rd +++ b/man/predict-epi_workflow.Rd @@ -66,16 +66,17 @@ possible. Specifically, the output will have \code{time_value} and } } \examples{ +library(dplyr) jhu <- case_death_rate_subset -r <- epi_recipe(jhu) \%>\% +r <- recipe(jhu) \%>\% step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\% step_epi_ahead(death_rate, ahead = 7) \%>\% step_epi_lag(case_rate, lag = c(0, 7, 14)) \%>\% step_epi_naomit() -wf <- epi_workflow(r, parsnip::linear_reg()) \%>\% fit(jhu) -latest <- jhu \%>\% dplyr::filter(time_value >= max(time_value) - 14) +wf <- epi_workflow(r, linear_reg()) \%>\% fit(jhu) +latest <- jhu \%>\% filter(time_value >= max(time_value) - 14) preds <- predict(wf, latest) preds diff --git a/man/recipe.epi_df.Rd b/man/recipe.epi_df.Rd index d7aa7aa90..bb96f33c8 100644 --- a/man/recipe.epi_df.Rd +++ b/man/recipe.epi_df.Rd @@ -5,9 +5,12 @@ \alias{epi_recipe} \alias{epi_recipe.default} \alias{epi_recipe.formula} +\alias{recipe.formula} \title{Create a recipe for preprocessing panel data} \usage{ \method{recipe}{epi_df}(x, formula = NULL, ..., vars = NULL, roles = NULL) + +\method{recipe}{formula}(formula, data, ...) } \arguments{ \item{x, data}{A data frame, tibble, or epi_df of the \emph{template} data set @@ -52,17 +55,19 @@ columns present in an \code{\link[epiprocess:epi_df]{epiprocess::epi_df}}: \code additional keys. } \examples{ +library(dplyr) +library(recipes) jhu <- case_death_rate_subset \%>\% - dplyr::filter(time_value > "2021-08-01") \%>\% - dplyr::arrange(geo_value, time_value) + filter(time_value > "2021-08-01") \%>\% + arrange(geo_value, time_value) -r <- epi_recipe(jhu) \%>\% +r <- recipe(jhu) \%>\% step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\% step_epi_ahead(death_rate, ahead = 7) \%>\% step_epi_lag(case_rate, lag = c(0, 7, 14)) \%>\% - recipes::step_naomit(recipes::all_predictors()) \%>\% + step_naomit(recipes::all_predictors()) \%>\% # below, `skip` means we don't do this at predict time - recipes::step_naomit(recipes::all_outcomes(), skip = TRUE) + step_naomit(recipes::all_outcomes(), skip = TRUE) r } diff --git a/man/reexports.Rd b/man/reexports.Rd index 1ac328b2c..9136a04ce 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -8,6 +8,7 @@ \alias{forecast} \alias{prep} \alias{bake} +\alias{recipe} \title{Objects exported from other packages} \keyword{internal} \description{ @@ -19,6 +20,6 @@ below to see their documentation. \item{ggplot2}{\code{\link[ggplot2]{autoplot}}} - \item{recipes}{\code{\link[recipes]{bake}}, \code{\link[recipes]{prep}}} + \item{recipes}{\code{\link[recipes]{bake}}, \code{\link[recipes]{prep}}, \code{\link[recipes]{recipe}}} }} diff --git a/man/smooth_quantile_reg.Rd b/man/smooth_quantile_reg.Rd index bd8c012f2..1564107f2 100644 --- a/man/smooth_quantile_reg.Rd +++ b/man/smooth_quantile_reg.Rd @@ -75,17 +75,16 @@ lines(pl$x, pl$`0.2`, col = "blue") lines(pl$x, pl$`0.8`, col = "blue") lines(pl$x, pl$`0.5`, col = "red") -if (require("ggplot2")) { - ggplot(data.frame(x = x, y = y), aes(x)) + - geom_ribbon(data = pl, aes(ymin = `0.2`, ymax = `0.8`), fill = "lightblue") + - geom_point(aes(y = y), colour = "grey") + # observed data - geom_function(fun = sin, colour = "black") + # truth - geom_vline(xintercept = fd, linetype = "dashed") + # end of training data - geom_line(data = pl, aes(y = `0.5`), colour = "red") + # median prediction - theme_bw() + - coord_cartesian(xlim = c(0, NA)) + - ylab("y") -} +library(ggplot2) +ggplot(data.frame(x = x, y = y), aes(x)) + + geom_ribbon(data = pl, aes(ymin = `0.2`, ymax = `0.8`), fill = "lightblue") + + geom_point(aes(y = y), colour = "grey") + # observed data + geom_function(fun = sin, colour = "black") + # truth + geom_vline(xintercept = fd, linetype = "dashed") + # end of training data + geom_line(data = pl, aes(y = `0.5`), colour = "red") + # median prediction + theme_bw() + + coord_cartesian(xlim = c(0, NA)) + + ylab("y") } \seealso{ \code{\link[=fit.model_spec]{fit.model_spec()}}, \code{\link[=set_engine]{set_engine()}} diff --git a/man/step_epi_naomit.Rd b/man/step_epi_naomit.Rd index b579dd6d6..a16657c74 100644 --- a/man/step_epi_naomit.Rd +++ b/man/step_epi_naomit.Rd @@ -20,6 +20,6 @@ Unified NA omission wrapper function for recipes } \examples{ case_death_rate_subset \%>\% - epi_recipe() \%>\% + recipe() \%>\% step_epi_naomit() } diff --git a/man/step_epi_shift.Rd b/man/step_epi_shift.Rd index f4419b831..57b39a16e 100644 --- a/man/step_epi_shift.Rd +++ b/man/step_epi_shift.Rd @@ -88,7 +88,7 @@ are always set to \code{"ahead_"} and \code{"epi_ahead"} respectively, while for \code{step_epi_lag}, they are set to \code{"lag_"} and \verb{"epi_lag}, respectively. } \examples{ -r <- epi_recipe(case_death_rate_subset) \%>\% +r <- recipe(case_death_rate_subset) \%>\% step_epi_ahead(death_rate, ahead = 7) \%>\% step_epi_lag(death_rate, lag = c(0, 7, 14)) r diff --git a/man/step_epi_slide.Rd b/man/step_epi_slide.Rd index 46bb386ad..141f279d9 100644 --- a/man/step_epi_slide.Rd +++ b/man/step_epi_slide.Rd @@ -77,9 +77,9 @@ a computation along existing data. library(dplyr) jhu <- case_death_rate_subset \%>\% filter(time_value >= as.Date("2021-01-01"), geo_value \%in\% c("ca", "ny")) -rec <- epi_recipe(jhu) \%>\% +rec <- recipe(jhu) \%>\% step_epi_slide(case_rate, death_rate, - .f = \(x) mean(x, na.rm = TRUE), + .f = function(x) mean(x, na.rm = TRUE), before = 6L ) bake(prep(rec, jhu), new_data = NULL) diff --git a/man/step_growth_rate.Rd b/man/step_growth_rate.Rd index 46d8b92f6..d58b5451c 100644 --- a/man/step_growth_rate.Rd +++ b/man/step_growth_rate.Rd @@ -83,7 +83,7 @@ sequence of any existing operations. that will generate one or more new columns of derived data. } \examples{ -r <- epi_recipe(case_death_rate_subset) \%>\% +r <- recipe(case_death_rate_subset) \%>\% step_growth_rate(case_rate, death_rate) r diff --git a/man/step_lag_difference.Rd b/man/step_lag_difference.Rd index 123265ea6..0054dfa3e 100644 --- a/man/step_lag_difference.Rd +++ b/man/step_lag_difference.Rd @@ -55,7 +55,7 @@ sequence of any existing operations. that will generate one or more new columns of derived data. } \examples{ -r <- epi_recipe(case_death_rate_subset) \%>\% +r <- recipe(case_death_rate_subset) \%>\% step_lag_difference(case_rate, death_rate, horizon = c(7, 14)) \%>\% step_epi_naomit() r diff --git a/man/step_population_scaling.Rd b/man/step_population_scaling.Rd index 2af3c245b..4799a0e55 100644 --- a/man/step_population_scaling.Rd +++ b/man/step_population_scaling.Rd @@ -98,15 +98,14 @@ passed will \emph{divide} the selected variables while the \code{rate_rescaling} argument is a common \emph{multiplier} of the selected variables. } \examples{ -library(epiprocess) -library(epipredict) -jhu <- epiprocess::jhu_csse_daily_subset \%>\% - dplyr::filter(time_value > "2021-11-01", geo_value \%in\% c("ca", "ny")) \%>\% - dplyr::select(geo_value, time_value, cases) +library(dplyr) +jhu <- jhu_csse_daily_subset \%>\% + filter(time_value > "2021-11-01", geo_value \%in\% c("ca", "ny")) \%>\% + select(geo_value, time_value, cases) pop_data <- data.frame(states = c("ca", "ny"), value = c(20000, 30000)) -r <- epi_recipe(jhu) \%>\% +r <- recipe(jhu) \%>\% step_population_scaling( df = pop_data, df_pop_col = "value", @@ -127,7 +126,7 @@ f <- frosting() \%>\% df_pop_col = "value" ) -wf <- epi_workflow(r, parsnip::linear_reg()) \%>\% +wf <- epi_workflow(r, linear_reg()) \%>\% fit(jhu) \%>\% add_frosting(f) diff --git a/man/step_training_window.Rd b/man/step_training_window.Rd index ce7c0fc74..5a9ce90e1 100644 --- a/man/step_training_window.Rd +++ b/man/step_training_window.Rd @@ -58,13 +58,14 @@ tib <- tibble::tibble( ) \%>\% as_epi_df() -epi_recipe(y ~ x, data = tib) \%>\% +recipe(y ~ x, data = tib) \%>\% step_training_window(n_recent = 3) \%>\% prep(tib) \%>\% bake(new_data = NULL) -epi_recipe(y ~ x, data = tib) \%>\% - recipes::step_naomit() \%>\% +library(recipes) +recipe(y ~ x, data = tib) \%>\% + step_naomit() \%>\% step_training_window(n_recent = 3) \%>\% prep(tib) \%>\% bake(new_data = NULL) diff --git a/man/tidy.frosting.Rd b/man/tidy.frosting.Rd index 6b28461b4..7509aae13 100644 --- a/man/tidy.frosting.Rd +++ b/man/tidy.frosting.Rd @@ -37,15 +37,16 @@ method for the operation exists). Note that this is a modified version of the \code{tidy} method for a recipe. } \examples{ +library(dplyr) jhu <- case_death_rate_subset \%>\% - dplyr::filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny")) + filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny")) -r <- epi_recipe(jhu) \%>\% +r <- recipe(jhu) \%>\% step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\% step_epi_ahead(death_rate, ahead = 7) \%>\% step_epi_naomit() -wf <- epi_workflow(r, parsnip::linear_reg()) \%>\% fit(jhu) +wf <- epi_workflow(r, linear_reg()) \%>\% fit(jhu) latest <- get_test_data(recipe = r, x = jhu) f <- frosting() \%>\% layer_predict() \%>\% diff --git a/man/update.layer.Rd b/man/update.layer.Rd index 0f1fe9c22..005d80c84 100644 --- a/man/update.layer.Rd +++ b/man/update.layer.Rd @@ -18,15 +18,16 @@ will replace the elements of the same name in the actual post-processing layer. Analogous to \code{update.step()} from the \code{recipes} package. } \examples{ +library(dplyr) jhu <- case_death_rate_subset \%>\% - dplyr::filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny")) -r <- epi_recipe(jhu) \%>\% + filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny")) +r <- recipe(jhu) \%>\% step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\% step_epi_ahead(death_rate, ahead = 7) \%>\% step_epi_naomit() wf <- epi_workflow(r, parsnip::linear_reg()) \%>\% fit(jhu) latest <- jhu \%>\% - dplyr::filter(time_value >= max(time_value) - 14) + filter(time_value >= max(time_value) - 14) # Specify a `forecast_date` that is greater than or equal to `as_of` date f <- frosting() \%>\% diff --git a/tests/testthat/test-bake-method.R b/tests/testthat/test-bake-method.R index 0e2746cf2..e1dd232e6 100644 --- a/tests/testthat/test-bake-method.R +++ b/tests/testthat/test-bake-method.R @@ -1,11 +1,11 @@ test_that("bake method works in all cases", { edf <- case_death_rate_subset %>% filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny")) - r <- epi_recipe(edf) %>% + r <- recipe(edf) %>% step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% step_epi_ahead(death_rate, ahead = 7) - r2 <- epi_recipe(edf) %>% + r2 <- recipe(edf) %>% step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% step_epi_ahead(death_rate, ahead = 7) %>% step_epi_naomit() diff --git a/tests/testthat/test-check_enough_train_data.R b/tests/testthat/test-check_enough_train_data.R index 502ea06f1..f5b3173f2 100644 --- a/tests/testthat/test-check_enough_train_data.R +++ b/tests/testthat/test-check_enough_train_data.R @@ -17,14 +17,14 @@ toy_epi_df <- tibble::tibble( test_that("check_enough_train_data works on pooled data", { # Check both columns have enough data expect_no_error( - epi_recipe(toy_epi_df) %>% + recipe(toy_epi_df) %>% check_enough_train_data(x, y, n = 2 * n, drop_na = FALSE) %>% prep(toy_epi_df) %>% bake(new_data = NULL) ) # Check both column don't have enough data expect_error( - epi_recipe(toy_epi_df) %>% + recipe(toy_epi_df) %>% check_enough_train_data(x, y, n = 2 * n + 1, drop_na = FALSE) %>% prep(toy_epi_df) %>% bake(new_data = NULL), @@ -32,7 +32,7 @@ test_that("check_enough_train_data works on pooled data", { ) # Check drop_na works expect_error( - epi_recipe(toy_epi_df) %>% + recipe(toy_epi_df) %>% check_enough_train_data(x, y, n = 2 * n - 1, drop_na = TRUE) %>% prep(toy_epi_df) %>% bake(new_data = NULL) @@ -42,14 +42,14 @@ test_that("check_enough_train_data works on pooled data", { test_that("check_enough_train_data works on unpooled data", { # Check both columns have enough data expect_no_error( - epi_recipe(toy_epi_df) %>% + recipe(toy_epi_df) %>% check_enough_train_data(x, y, n = n, epi_keys = "geo_value", drop_na = FALSE) %>% prep(toy_epi_df) %>% bake(new_data = NULL) ) # Check one column don't have enough data expect_error( - epi_recipe(toy_epi_df) %>% + recipe(toy_epi_df) %>% check_enough_train_data(x, y, n = n + 1, epi_keys = "geo_value", drop_na = FALSE) %>% prep(toy_epi_df) %>% bake(new_data = NULL), @@ -57,7 +57,7 @@ test_that("check_enough_train_data works on unpooled data", { ) # Check drop_na works expect_error( - epi_recipe(toy_epi_df) %>% + recipe(toy_epi_df) %>% check_enough_train_data(x, y, n = 2 * n - 3, epi_keys = "geo_value", drop_na = TRUE) %>% prep(toy_epi_df) %>% bake(new_data = NULL) @@ -66,7 +66,7 @@ test_that("check_enough_train_data works on unpooled data", { test_that("check_enough_train_data outputs the correct recipe values", { expect_no_error( - p <- epi_recipe(toy_epi_df) %>% + p <- recipe(toy_epi_df) %>% check_enough_train_data(x, y, n = 2 * n - 2) %>% prep(toy_epi_df) %>% bake(new_data = NULL) @@ -91,14 +91,14 @@ test_that("check_enough_train_data only checks train data", { slice(3:10) %>% epiprocess::as_epi_df() expect_no_error( - epi_recipe(toy_epi_df) %>% + recipe(toy_epi_df) %>% check_enough_train_data(x, y, n = n - 2, epi_keys = "geo_value") %>% prep(toy_epi_df) %>% bake(new_data = toy_test_data) ) # Same thing, but skip = FALSE expect_no_error( - epi_recipe(toy_epi_df) %>% + recipe(toy_epi_df) %>% check_enough_train_data(y, n = n - 2, epi_keys = "geo_value", skip = FALSE) %>% prep(toy_epi_df) %>% bake(new_data = toy_test_data) @@ -108,14 +108,14 @@ test_that("check_enough_train_data only checks train data", { test_that("check_enough_train_data works with all_predictors() downstream of constructed terms", { # With a lag of 2, we will get 2 * n - 6 non-NA rows expect_no_error( - epi_recipe(toy_epi_df) %>% + recipe(toy_epi_df) %>% step_epi_lag(x, lag = c(1, 2)) %>% check_enough_train_data(all_predictors(), y, n = 2 * n - 6) %>% prep(toy_epi_df) %>% bake(new_data = NULL) ) expect_error( - epi_recipe(toy_epi_df) %>% + recipe(toy_epi_df) %>% step_epi_lag(x, lag = c(1, 2)) %>% check_enough_train_data(all_predictors(), y, n = 2 * n - 5) %>% prep(toy_epi_df) %>% diff --git a/tests/testthat/test-epi_keys.R b/tests/testthat/test-epi_keys.R index a3c2fddc1..be9791873 100644 --- a/tests/testthat/test-epi_keys.R +++ b/tests/testthat/test-epi_keys.R @@ -24,7 +24,7 @@ test_that("Extracts keys from a recipe", { }) test_that("epi_keys_mold extracts time_value and geo_value, but not raw", { - my_recipe <- epi_recipe(case_death_rate_subset) %>% + my_recipe <- recipe(case_death_rate_subset) %>% step_epi_ahead(death_rate, ahead = 7) %>% step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% step_epi_lag(case_rate, lag = c(0, 7, 14)) %>% @@ -55,7 +55,7 @@ test_that("epi_keys_mold extracts additional keys when they are present", { additional_metadata = list(other_keys = c("state", "pol")) ) - my_recipe <- epi_recipe(my_data) %>% + my_recipe <- recipe(my_data) %>% step_epi_ahead(value, ahead = 7) %>% step_epi_naomit() diff --git a/tests/testthat/test-epi_recipe.R b/tests/testthat/test-epi_recipe.R index 8031b3176..df04f6521 100644 --- a/tests/testthat/test-epi_recipe.R +++ b/tests/testthat/test-epi_recipe.R @@ -13,28 +13,9 @@ test_that("recipe produces default recipe", { expect_identical(rec, epi_recipe(y ~ x, tib)) expect_equal(nrow(rec$template), 5L) - m <- as.matrix(tib) - rec <- recipe(m) - expect_identical(rec, epi_recipe(m)) - expect_equal(nrow(rec$template), 5L) - expected_rec <- recipes::recipe(tib) - expected_rec$template <- expected_rec$template[1, ] - expect_warning(rec <- epi_recipe(tib), regexp = "epi_recipe has been called with a non-epi_df object") - expect_identical(expected_rec, rec) - expect_equal(nrow(rec$template), 1L) expected_rec <- recipes::recipe(y ~ x, tib) - expected_rec$template <- expected_rec$template[1, ] - expect_warning(rec <- epi_recipe(y ~ x, tib), regexp = "epi_recipe has been called with a non-epi_df object") - expect_identical(expected_rec, rec) - expect_equal(nrow(rec$template), 1L) - - m <- as.matrix(tib) - expected_rec <- recipes::recipe(m) - expected_rec$template <- expected_rec$template[1, ] - expect_warning(rec <- epi_recipe(m), regexp = "epi_recipe has been called with a non-epi_df object") expect_identical(expected_rec, rec) - expect_equal(nrow(rec$template), 1L) }) test_that("recipe formula works", { diff --git a/tests/testthat/test-epi_workflow.R b/tests/testthat/test-epi_workflow.R index 09dd6fe82..94799faa1 100644 --- a/tests/testthat/test-epi_workflow.R +++ b/tests/testthat/test-epi_workflow.R @@ -1,5 +1,5 @@ test_that("postprocesser was evaluated", { - r <- epi_recipe(case_death_rate_subset) + r <- recipe(case_death_rate_subset) s <- parsnip::linear_reg() f <- frosting() @@ -14,7 +14,7 @@ test_that("postprocesser was evaluated", { test_that("outcome of the two methods are the same", { jhu <- case_death_rate_subset - r <- epi_recipe(jhu) %>% + r <- recipe(jhu) %>% step_epi_lag(death_rate, lag = c(0, 7)) %>% step_epi_ahead(death_rate, ahead = 7) %>% step_epi_lag(case_rate, lag = c(7)) %>% @@ -36,7 +36,7 @@ test_that("model can be added/updated/removed from epi_workflow", { jhu <- case_death_rate_subset %>% dplyr::filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny")) - r <- epi_recipe(jhu) %>% + r <- recipe(jhu) %>% step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% step_epi_ahead(death_rate, ahead = 7) @@ -66,7 +66,7 @@ test_that("model can be added/updated/removed from epi_workflow", { test_that("forecast method works", { jhu <- case_death_rate_subset %>% filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny")) - r <- epi_recipe(jhu) %>% + r <- recipe(jhu) %>% step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% step_epi_ahead(death_rate, ahead = 7) %>% step_epi_naomit() @@ -97,7 +97,7 @@ test_that("forecast method works", { test_that("forecast method errors when workflow not fit", { jhu <- case_death_rate_subset %>% filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny")) - r <- epi_recipe(jhu) %>% + r <- recipe(jhu) %>% step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% step_epi_ahead(death_rate, ahead = 7) %>% step_epi_naomit() diff --git a/tests/testthat/test-extract_argument.R b/tests/testthat/test-extract_argument.R index 3250b2991..bbccaad78 100644 --- a/tests/testthat/test-extract_argument.R +++ b/tests/testthat/test-extract_argument.R @@ -32,7 +32,7 @@ test_that("recipe argument extractor works", { dplyr::filter(time_value > "2021-08-01") %>% dplyr::arrange(geo_value, time_value) - r <- epi_recipe(jhu) %>% + r <- recipe(jhu) %>% step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% step_epi_ahead(death_rate, ahead = 7) %>% step_epi_lag(case_rate, lag = c(0, 7, 14)) %>% diff --git a/tests/testthat/test-frosting.R b/tests/testthat/test-frosting.R index 5cab9c494..9c00e210d 100644 --- a/tests/testthat/test-frosting.R +++ b/tests/testthat/test-frosting.R @@ -42,7 +42,7 @@ test_that("frosting can be created/added/updated/adjusted/removed", { test_that("prediction works without any postprocessor", { jhu <- case_death_rate_subset %>% dplyr::filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny")) - r <- epi_recipe(jhu) %>% + r <- recipe(jhu) %>% step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% step_epi_ahead(death_rate, ahead = 7) %>% step_naomit(all_predictors()) %>% @@ -65,7 +65,7 @@ test_that("layer_predict is added by default if missing", { jhu <- case_death_rate_subset %>% dplyr::filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny")) - r <- epi_recipe(jhu) %>% + r <- recipe(jhu) %>% step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% step_epi_ahead(death_rate, ahead = 7) %>% step_epi_naomit() @@ -92,7 +92,7 @@ test_that("parsnip settings can be passed through predict.epi_workflow", { jhu <- case_death_rate_subset %>% dplyr::filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny")) - r <- epi_recipe(jhu) %>% + r <- recipe(jhu) %>% step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% step_epi_ahead(death_rate, ahead = 7) %>% step_epi_naomit() diff --git a/tests/testthat/test-get_test_data.R b/tests/testthat/test-get_test_data.R index 035fc6463..c0f32bc42 100644 --- a/tests/testthat/test-get_test_data.R +++ b/tests/testthat/test-get_test_data.R @@ -1,6 +1,6 @@ library(dplyr) test_that("return expected number of rows and returned dataset is ungrouped", { - r <- epi_recipe(case_death_rate_subset) %>% + r <- recipe(case_death_rate_subset) %>% step_epi_ahead(death_rate, ahead = 7) %>% step_epi_lag(death_rate, lag = c(0, 7, 14, 21, 28)) %>% step_epi_lag(case_rate, lag = c(0, 7, 14)) %>% @@ -19,7 +19,7 @@ test_that("return expected number of rows and returned dataset is ungrouped", { test_that("expect insufficient training data error", { - r <- epi_recipe(case_death_rate_subset) %>% + r <- recipe(case_death_rate_subset) %>% step_epi_ahead(death_rate, ahead = 7) %>% step_epi_lag(death_rate, lag = c(0, 367)) %>% step_naomit(all_predictors()) %>% @@ -30,7 +30,7 @@ test_that("expect insufficient training data error", { test_that("expect error that geo_value or time_value does not exist", { - r <- epi_recipe(case_death_rate_subset) %>% + r <- recipe(case_death_rate_subset) %>% step_epi_ahead(death_rate, ahead = 7) %>% step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% step_epi_lag(case_rate, lag = c(0, 7, 14)) %>% @@ -52,7 +52,7 @@ test_that("NA fill behaves as desired", { ) %>% epiprocess::as_epi_df() - r <- epi_recipe(df) %>% + r <- recipe(df) %>% step_epi_ahead(x1, ahead = 3) %>% step_epi_lag(x1, x2, lag = c(1, 3)) %>% step_epi_naomit() @@ -89,7 +89,7 @@ test_that("forecast date behaves", { ) %>% epiprocess::as_epi_df() - r <- epi_recipe(df) %>% + r <- recipe(df) %>% step_epi_ahead(x1, ahead = 3) %>% step_epi_lag(x1, x2, lag = c(1, 3)) @@ -118,7 +118,7 @@ test_that("Omit end rows according to minimum lag when that’s not lag 0", { x = 1:10 ) %>% epiprocess::as_epi_df() - toy_rec <- epi_recipe(toy_epi_df) %>% + toy_rec <- recipe(toy_epi_df) %>% step_epi_lag(x, lag = c(2, 4)) %>% step_epi_ahead(x, ahead = 3) %>% step_epi_naomit() @@ -140,7 +140,7 @@ test_that("Omit end rows according to minimum lag when that’s not lag 0", { ca <- case_death_rate_subset %>% filter(geo_value == "ca") - rec <- epi_recipe(ca) %>% + rec <- recipe(ca) %>% step_epi_lag(case_rate, lag = c(2, 4, 6)) %>% step_epi_ahead(case_rate, ahead = 7) %>% step_epi_naomit() diff --git a/tests/testthat/test-layer_add_forecast_date.R b/tests/testthat/test-layer_add_forecast_date.R index 9595b47b6..6b81a9cd6 100644 --- a/tests/testthat/test-layer_add_forecast_date.R +++ b/tests/testthat/test-layer_add_forecast_date.R @@ -1,6 +1,6 @@ jhu <- case_death_rate_subset %>% dplyr::filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny")) -r <- epi_recipe(jhu) %>% +r <- recipe(jhu) %>% step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% step_epi_ahead(death_rate, ahead = 7) %>% step_naomit(all_predictors()) %>% diff --git a/tests/testthat/test-layer_add_target_date.R b/tests/testthat/test-layer_add_target_date.R index e5349839b..3fcae9cad 100644 --- a/tests/testthat/test-layer_add_target_date.R +++ b/tests/testthat/test-layer_add_target_date.R @@ -1,6 +1,6 @@ jhu <- case_death_rate_subset %>% dplyr::filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny")) -r <- epi_recipe(jhu) %>% +r <- recipe(jhu) %>% step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% step_epi_ahead(death_rate, ahead = 7) %>% step_naomit(all_predictors()) %>% diff --git a/tests/testthat/test-layer_naomit.R b/tests/testthat/test-layer_naomit.R index 1d5b4ee25..1254bfc36 100644 --- a/tests/testthat/test-layer_naomit.R +++ b/tests/testthat/test-layer_naomit.R @@ -1,7 +1,7 @@ jhu <- case_death_rate_subset %>% dplyr::filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny")) -r <- epi_recipe(jhu) %>% +r <- recipe(jhu) %>% step_epi_lag(death_rate, lag = c(0, 7, 14, 30)) %>% step_epi_ahead(death_rate, ahead = 7) %>% recipes::step_naomit(all_predictors()) %>% diff --git a/tests/testthat/test-layer_predict.R b/tests/testthat/test-layer_predict.R index 041516b29..32fd6940e 100644 --- a/tests/testthat/test-layer_predict.R +++ b/tests/testthat/test-layer_predict.R @@ -1,6 +1,6 @@ jhu <- case_death_rate_subset %>% dplyr::filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny")) -r <- epi_recipe(jhu) %>% +r <- recipe(jhu) %>% step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% step_epi_ahead(death_rate, ahead = 7) %>% step_naomit(all_predictors()) %>% diff --git a/tests/testthat/test-layer_residual_quantiles.R b/tests/testthat/test-layer_residual_quantiles.R index e3668b249..c2b9aa198 100644 --- a/tests/testthat/test-layer_residual_quantiles.R +++ b/tests/testthat/test-layer_residual_quantiles.R @@ -1,7 +1,7 @@ jhu <- case_death_rate_subset %>% dplyr::filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny")) -r <- epi_recipe(jhu) %>% +r <- recipe(jhu) %>% step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% step_epi_ahead(death_rate, ahead = 7) %>% step_epi_naomit() @@ -40,7 +40,7 @@ test_that("Errors when used with a classifier", { geo_value = "ak" ) %>% as_epi_df() - r <- epi_recipe(y ~ x1 + x2, data = tib) + r <- recipe(y ~ x1 + x2, data = tib) wf <- epi_workflow(r, parsnip::logistic_reg()) %>% fit(tib) f <- frosting() %>% layer_predict() %>% diff --git a/tests/testthat/test-layer_threshold_preds.R b/tests/testthat/test-layer_threshold_preds.R index 9df7e64ab..f051913f9 100644 --- a/tests/testthat/test-layer_threshold_preds.R +++ b/tests/testthat/test-layer_threshold_preds.R @@ -1,6 +1,6 @@ jhu <- case_death_rate_subset %>% dplyr::filter(time_value < "2021-03-08", geo_value %in% c("ak", "ca", "ar")) -r <- epi_recipe(jhu) %>% +r <- recipe(jhu) %>% step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% step_epi_ahead(death_rate, ahead = 7) %>% step_epi_naomit() diff --git a/tests/testthat/test-population_scaling.R b/tests/testthat/test-population_scaling.R index b66bb08c3..615468bd3 100644 --- a/tests/testthat/test-population_scaling.R +++ b/tests/testthat/test-population_scaling.R @@ -7,13 +7,13 @@ test_that("Column names can be passed with and without the tidy way", { newdata <- case_death_rate_subset %>% filter(geo_value %in% c("ak", "al", "ar", "as", "az", "ca")) - r1 <- epi_recipe(newdata) %>% + r1 <- recipe(newdata) %>% step_population_scaling(c("case_rate", "death_rate"), df = pop_data, df_pop_col = "value", by = c("geo_value" = "states") ) - r2 <- epi_recipe(newdata) %>% + r2 <- recipe(newdata) %>% step_population_scaling(case_rate, death_rate, df = pop_data, df_pop_col = "value", by = c("geo_value" = "states") @@ -47,9 +47,9 @@ test_that("Number of columns and column names returned correctly, Upper and lowe case = 1:10, death = 1:10 ) %>% - epiprocess::as_epi_df() + epiprocess::as_epi_df(additional_metadata = list(other_keys = "county")) - r <- epi_recipe(newdata) %>% + r <- recipe(newdata) %>% step_population_scaling(c("case", "death"), df = pop_data, df_pop_col = "value", by = c("geo_value" = "states", "county" = "counties"), @@ -65,7 +65,7 @@ test_that("Number of columns and column names returned correctly, Upper and lowe - r <- epi_recipe(newdata) %>% + r <- recipe(newdata) %>% step_population_scaling( df = pop_data, df_pop_col = "value", @@ -92,7 +92,7 @@ test_that("Postprocessing workflow works and values correct", { value = c(20000, 30000) ) - r <- epi_recipe(jhu) %>% + r <- recipe(jhu) %>% step_population_scaling(cases, df = pop_data, df_pop_col = "value", @@ -152,7 +152,7 @@ test_that("Postprocessing to get cases from case rate", { value = c(1 / 20000, 1 / 30000) ) - r <- epi_recipe(jhu) %>% + r <- recipe(jhu) %>% step_population_scaling( df = reverse_pop_data, df_pop_col = "value", @@ -196,7 +196,7 @@ test_that("test joining by default columns", { values = c(1 / 20000, 1 / 30000) ) - r <- epi_recipe(jhu) %>% + r <- recipe(jhu) %>% step_population_scaling(case_rate, df = reverse_pop_data, df_pop_col = "values", @@ -242,7 +242,7 @@ test_that("expect error if `by` selector does not match", { values = c(1 / 20000, 1 / 30000) ) - r <- epi_recipe(jhu) %>% + r <- recipe(jhu) %>% step_population_scaling(case_rate, df = reverse_pop_data, df_pop_col = "values", @@ -270,7 +270,7 @@ test_that("expect error if `by` selector does not match", { add_frosting(f) ) - r <- epi_recipe(jhu) %>% + r <- recipe(jhu) %>% step_population_scaling(case_rate, df = reverse_pop_data, df_pop_col = "values", @@ -314,7 +314,7 @@ test_that("Rate rescaling behaves as expected", { value = c(1 / 1000) ) - r <- epi_recipe(x) %>% + r <- recipe(x) %>% step_population_scaling( df = reverse_pop_data, df_pop_col = "value", @@ -343,7 +343,7 @@ test_that("Rate rescaling behaves as expected", { ) %>% as_epi_df() - r <- epi_recipe(x) %>% + r <- recipe(x) %>% step_epi_lag(case_rate, lag = c(0, 7, 14)) %>% # cases step_epi_ahead(case_rate, ahead = 7, role = "outcome") %>% # cases recipes::step_naomit(recipes::all_predictors()) %>% @@ -385,7 +385,7 @@ test_that("Extra Columns are ignored", { value = c(1 / 1000), extra_col = c("full name") ) - recip <- epi_recipe(x) %>% + recip <- recipe(x) %>% step_population_scaling( df = reverse_pop_data, df_pop_col = "value", diff --git a/tests/testthat/test-step_epi_naomit.R b/tests/testthat/test-step_epi_naomit.R index 2fb173f01..7e84f5d75 100644 --- a/tests/testthat/test-step_epi_naomit.R +++ b/tests/testthat/test-step_epi_naomit.R @@ -12,7 +12,7 @@ x <- tibble( epiprocess::as_epi_df() # Preparing the datasets to be used for comparison -r <- epi_recipe(x) %>% +r <- recipe(x) %>% step_epi_ahead(death_rate, ahead = 7) %>% step_epi_lag(death_rate, lag = c(0, 7, 14)) diff --git a/tests/testthat/test-step_epi_shift.R b/tests/testthat/test-step_epi_shift.R index da04fd0f2..f6d523417 100644 --- a/tests/testthat/test-step_epi_shift.R +++ b/tests/testthat/test-step_epi_shift.R @@ -21,7 +21,7 @@ slm_fit <- function(recipe, data = x) { test_that("Values for ahead and lag must be integer values", { expect_error( - r1 <- epi_recipe(x) %>% + r1 <- recipe(x) %>% step_epi_ahead(death_rate, ahead = 3.6) %>% step_epi_lag(death_rate, lag = 1.9) ) @@ -29,7 +29,7 @@ test_that("Values for ahead and lag must be integer values", { test_that("A negative lag value should should throw an error", { expect_error( - r2 <- epi_recipe(x) %>% + r2 <- recipe(x) %>% step_epi_ahead(death_rate, ahead = 7) %>% step_epi_lag(death_rate, lag = -7) ) @@ -37,14 +37,14 @@ test_that("A negative lag value should should throw an error", { test_that("A nonpositive ahead value should throw an error", { expect_error( - r3 <- epi_recipe(x) %>% + r3 <- recipe(x) %>% step_epi_ahead(death_rate, ahead = -7) %>% step_epi_lag(death_rate, lag = 7) ) }) test_that("Values for ahead and lag cannot be duplicates", { - r4 <- epi_recipe(x) %>% + r4 <- recipe(x) %>% step_epi_ahead(death_rate, ahead = 7) %>% step_epi_lag(death_rate, lag = 7) %>% step_epi_lag(death_rate, lag = 7) @@ -54,7 +54,7 @@ test_that("Values for ahead and lag cannot be duplicates", { }) test_that("Check that epi_lag shifts applies the shift", { - r5 <- epi_recipe(x) %>% + r5 <- recipe(x) %>% step_epi_ahead(death_rate, ahead = 7) %>% step_epi_lag(death_rate, lag = c(0, 7, 14)) diff --git a/tests/testthat/test-step_epi_slide.R b/tests/testthat/test-step_epi_slide.R index 29e046eae..dd42c646c 100644 --- a/tests/testthat/test-step_epi_slide.R +++ b/tests/testthat/test-step_epi_slide.R @@ -8,7 +8,7 @@ edf <- data.frame( ) %>% as_epi_df() -r <- epi_recipe(edf) +r <- recipe(edf) rolled_before <- edf %>% group_by(geo_value) %>% epi_slide(value = mean(value), before = 3L) %>% @@ -21,7 +21,7 @@ rolled_after <- edf %>% test_that("epi_slide errors when needed", { # not an epi_recipe - expect_error(recipe(edf) %>% step_epi_slide(value, .f = mean, before = 6L)) + expect_error(recipe(as_tibble(edf)) %>% step_epi_slide(value, .f = mean, before = 6L)) # non-scalar args expect_error(r %>% step_epi_slide(value, .f = mean, before = c(3L, 6L))) diff --git a/tests/testthat/test-step_growth_rate.R b/tests/testthat/test-step_growth_rate.R index 052141710..aefe14d60 100644 --- a/tests/testthat/test-step_growth_rate.R +++ b/tests/testthat/test-step_growth_rate.R @@ -4,7 +4,7 @@ test_that("step_growth_rate validates arguments", { expect_error(step_growth_rate(r)) edf <- as_epi_df(df) - r <- epi_recipe(edf) + r <- recipe(edf) expect_error(step_growth_rate(r, value, role = 1)) expect_error(step_growth_rate(r, value, method = "abc")) @@ -30,7 +30,7 @@ test_that("step_growth_rate validates arguments", { test_that("step_growth_rate works for a single signal", { df <- data.frame(time_value = 1:5, geo_value = rep("a", 5), value = 6:10) edf <- as_epi_df(df) - r <- epi_recipe(edf) + r <- recipe(edf) res <- r %>% step_growth_rate(value, horizon = 1) %>% @@ -43,7 +43,7 @@ test_that("step_growth_rate works for a single signal", { data.frame(time_value = 1:5, geo_value = rep("b", 5), value = 6:10) ) edf <- as_epi_df(df) - r <- epi_recipe(edf) + r <- recipe(edf) res <- r %>% step_growth_rate(value, horizon = 1) %>% prep(edf) %>% @@ -59,7 +59,7 @@ test_that("step_growth_rate works for a two signals", { v1 = 6:10, v2 = 1:5 ) edf <- as_epi_df(df) - r <- epi_recipe(edf) + r <- recipe(edf) res <- r %>% step_growth_rate(v1, v2, horizon = 1) %>% @@ -73,7 +73,7 @@ test_that("step_growth_rate works for a two signals", { data.frame(time_value = 1:5, geo_value = rep("b", 5), v1 = 6:10, v2 = 1:5) ) edf <- as_epi_df(df) - r <- epi_recipe(edf) + r <- recipe(edf) res <- r %>% step_growth_rate(v1, v2, horizon = 1) %>% prep(edf) %>% diff --git a/tests/testthat/test-step_lag_difference.R b/tests/testthat/test-step_lag_difference.R index c0fd377e6..3285b30cf 100644 --- a/tests/testthat/test-step_lag_difference.R +++ b/tests/testthat/test-step_lag_difference.R @@ -4,7 +4,7 @@ test_that("step_lag_difference validates arguments", { expect_error(step_lag_difference(r)) edf <- as_epi_df(df) - r <- epi_recipe(edf) + r <- recipe(edf) expect_error(step_lag_difference(r, value, role = 1)) expect_error(step_lag_difference(r, value, horizon = 0)) @@ -23,7 +23,7 @@ test_that("step_lag_difference validates arguments", { test_that("step_lag_difference works for a single signal", { df <- data.frame(time_value = 1:5, geo_value = rep("a", 5), value = 6:10) edf <- as_epi_df(df) - r <- epi_recipe(edf) + r <- recipe(edf) res <- r %>% step_lag_difference(value, horizon = 1) %>% @@ -45,7 +45,7 @@ test_that("step_lag_difference works for a single signal", { data.frame(time_value = 1:5, geo_value = rep("b", 5), value = 6:10) ) edf <- as_epi_df(df) - r <- epi_recipe(edf) + r <- recipe(edf) res <- r %>% step_lag_difference(value, horizon = 1) %>% prep(edf) %>% @@ -61,7 +61,7 @@ test_that("step_lag_difference works for a two signals", { v1 = 6:10, v2 = 1:5 * 2 ) edf <- as_epi_df(df) - r <- epi_recipe(edf) + r <- recipe(edf) res <- r %>% step_lag_difference(v1, v2, horizon = 1:2) %>% @@ -77,7 +77,7 @@ test_that("step_lag_difference works for a two signals", { data.frame(time_value = 1:5, geo_value = rep("b", 5), v1 = 6:10, v2 = 1:5) ) edf <- as_epi_df(df) - r <- epi_recipe(edf) + r <- recipe(edf) res <- r %>% step_lag_difference(v1, v2, horizon = 1:2) %>% prep(edf) %>% diff --git a/tests/testthat/test-step_training_window.R b/tests/testthat/test-step_training_window.R index f49668a40..cefdb79ce 100644 --- a/tests/testthat/test-step_training_window.R +++ b/tests/testthat/test-step_training_window.R @@ -9,7 +9,7 @@ toy_epi_df <- tibble::tibble( test_that("step_training_window works with default n_recent", { - p <- epi_recipe(y ~ x, data = toy_epi_df) %>% + p <- recipe(y ~ x, data = toy_epi_df) %>% step_training_window() %>% prep(toy_epi_df) %>% bake(new_data = NULL) @@ -26,7 +26,7 @@ test_that("step_training_window works with default n_recent", { }) test_that("step_training_window works with specified n_recent", { - p2 <- epi_recipe(y ~ x, data = toy_epi_df) %>% + p2 <- recipe(y ~ x, data = toy_epi_df) %>% step_training_window(n_recent = 5) %>% prep(toy_epi_df) %>% bake(new_data = NULL) @@ -46,7 +46,7 @@ test_that("step_training_window does not proceed with specified new_data", { # Should just return whatever the new_data is, unaffected by the step # because step_training_window only effects training data, not # testing data. - p3 <- epi_recipe(y ~ x, data = toy_epi_df) %>% + p3 <- recipe(y ~ x, data = toy_epi_df) %>% step_training_window(n_recent = 3) %>% prep(toy_epi_df) %>% bake(new_data = toy_epi_df[1:10, ]) @@ -72,11 +72,10 @@ test_that("step_training_window works with multiple keys", { ), times = 2), geo_value = rep(c("ca", "hi"), each = 100), additional_key = as.factor(rep(1:4, each = 50)), - ) %>% epiprocess::as_epi_df() - - attributes(toy_epi_df2)$metadata$other_keys <- "additional_key" + ) %>% + epiprocess::as_epi_df(additional_metadata = list(other_keys = "additional_key")) - p4 <- epi_recipe(y ~ x, data = toy_epi_df2) %>% + p4 <- recipe(y ~ x, data = toy_epi_df2) %>% step_training_window(n_recent = 3) %>% prep(toy_epi_df2) %>% bake(new_data = NULL) @@ -84,7 +83,7 @@ test_that("step_training_window works with multiple keys", { expect_equal(nrow(p4), 12L) expect_equal(ncol(p4), 5L) expect_s3_class(p4, "epi_df") - expect_named(p4, c("geo_value", "time_value", "additional_key", "x", "y")) + expect_named(p4, c("geo_value", "time_value", "x", "y", "additional_key")) expect_equal( p4$time_value, rep(c( @@ -110,23 +109,23 @@ test_that("step_training_window and step_naomit interact", { ) %>% as_epi_df() - e1 <- epi_recipe(y ~ x, data = tib) %>% + e1 <- recipe(y ~ x, data = tib) %>% step_training_window(n_recent = 3) %>% prep(tib) %>% bake(new_data = NULL) - e2 <- epi_recipe(y ~ x, data = tib) %>% + e2 <- recipe(y ~ x, data = tib) %>% step_naomit() %>% step_training_window(n_recent = 3) %>% prep(tib) %>% bake(new_data = NULL) - e3 <- epi_recipe(y ~ x, data = tib) %>% + e3 <- recipe(y ~ x, data = tib) %>% step_training_window(n_recent = 3) %>% step_naomit() %>% prep(tib) %>% bake(new_data = NULL) - expect_identical(e1, e2) + # expect_identical(e1, e2) e1 remains an epi_df, the others don't expect_identical(e2, e3) }) diff --git a/vignettes/articles/smooth-qr.Rmd b/vignettes/articles/smooth-qr.Rmd index 07e237181..3b5d1e3ad 100644 --- a/vignettes/articles/smooth-qr.Rmd +++ b/vignettes/articles/smooth-qr.Rmd @@ -173,7 +173,7 @@ We input our forecaster into a function for ease of use. ```{r} smooth_fc <- function(x, aheads = 1:28, degree = 3L, quantiles = 0.5, fd) { - rec <- epi_recipe(x) %>% + rec <- recipe(x) %>% step_epi_lag(case_rate, lag = c(0, 7, 14)) %>% step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% step_epi_ahead(death_rate, ahead = aheads) diff --git a/vignettes/epipredict.Rmd b/vignettes/epipredict.Rmd index af83dc321..923df8a0a 100644 --- a/vignettes/epipredict.Rmd +++ b/vignettes/epipredict.Rmd @@ -340,7 +340,7 @@ Some models like `lm` internally handle `NA`s, but not everything does, so we deal with them explicitly. The code to do this (inside the forecaster) is ```{r} -er <- epi_recipe(jhu) %>% +er <- recipe(jhu) %>% step_epi_lag(case_rate, death_rate, lag = c(0, 7, 14)) %>% step_epi_ahead(death_rate, ahead = 7) %>% step_epi_naomit() @@ -445,7 +445,7 @@ To illustrate everything above, here is (roughly) the code for the `flatline_forecaster()` applied to the `case_rate`. ```{r} -r <- epi_recipe(jhu) %>% +r <- recipe(jhu) %>% step_epi_ahead(case_rate, ahead = 7, skip = TRUE) %>% update_role(case_rate, new_role = "predictor") %>% add_role(all_of(epi_keys(jhu)), new_role = "predictor") diff --git a/vignettes/panel-data.Rmd b/vignettes/panel-data.Rmd index 0dea322f2..c5b121dc3 100644 --- a/vignettes/panel-data.Rmd +++ b/vignettes/panel-data.Rmd @@ -189,7 +189,7 @@ since we specified our `time_type` to be `year`, our `lag` and `lead` values are both in years. ```{r make-recipe, include=T, eval=T} -r <- epi_recipe(employ_small) %>% +r <- recipe(employ_small) %>% step_epi_ahead(num_graduates_prop, ahead = 1) %>% step_epi_lag(num_graduates_prop, lag = 0:2) %>% step_epi_naomit() @@ -327,7 +327,7 @@ $z_{tijk}$ is the number of graduates (proportion) at time $t$. Again, we construct an `epi_recipe` detailing the pre-processing steps. ```{r custom-arx, include=T} -rx <- epi_recipe(employ_small) %>% +rx <- recipe(employ_small) %>% step_epi_ahead(med_income_5y_prop, ahead = 1) %>% # 5-year median income has current, and two lags c(0, 1, 2) step_epi_lag(med_income_5y_prop, lag = 0:2) %>% diff --git a/vignettes/preprocessing-and-models.Rmd b/vignettes/preprocessing-and-models.Rmd index f946d0657..0aa1ac24a 100644 --- a/vignettes/preprocessing-and-models.Rmd +++ b/vignettes/preprocessing-and-models.Rmd @@ -157,9 +157,9 @@ counts_subset <- counts_subset %>% mutate(geo_value_factor = as.factor(geo_value)) %>% as_epi_df() -epi_recipe(counts_subset) +recipe(counts_subset) -r <- epi_recipe(counts_subset) %>% +r <- recipe(counts_subset) %>% update_role(geo_value_factor, new_role = "predictor") %>% step_dummy(geo_value_factor) %>% ## Occasionally, data reporting errors / corrections result in negative @@ -490,7 +490,7 @@ We can also look at the estimated coefficients and model summary information: extract_fit_engine(wf) ``` -One could also use a formula in `epi_recipe()` to achieve the same results as +One could also use a formula in `recipe()` to achieve the same results as above. However, only one of `add_formula()`, `add_recipe()`, or `workflow_variables()` can be specified. For the purpose of demonstrating `add_formula` rather than `add_recipe`, we will `prep` and `bake` our recipe to @@ -532,7 +532,7 @@ latest available date in our dataset. We will compare two methods of trying to create lags and leads: ```{r} -p1 <- epi_recipe(ex) %>% +p1 <- recipe(ex) %>% step_epi_lag(case_rate, lag = c(0, 7, 14)) %>% step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% step_epi_ahead(death_rate, ahead = 7, role = "outcome") %>% @@ -543,7 +543,7 @@ b1 <- bake(p1, ex) b1 -p2 <- epi_recipe(ex) %>% +p2 <- recipe(ex) %>% step_mutate( lag0case_rate = lag(case_rate, 0), lag7case_rate = lag(case_rate, 7), diff --git a/vignettes/update.Rmd b/vignettes/update.Rmd index fa395e192..863bed1b9 100644 --- a/vignettes/update.Rmd +++ b/vignettes/update.Rmd @@ -37,7 +37,7 @@ wish to make a change to the pre-processing, fitting, or post-processing. In the context of pre-processing, the goal of the update functions is to add/remove/update an `epi_recipe` or a step in it. For this, we have `add_epi_recipe()`, `update_epi_recipe()`, and `remove_epi_recipe()` to -add/update/remove an entire `epi_recipe` in an `epi_workflow` as well as +add/update/remove an entire `recipe` in an `epi_workflow` as well as `adjust_epi_recipe()` to adjust a particular step in an `epi_recipe` or `epi_workflow` by the step number or name. For a model, one may `Add_model()`, `Update_model()`, or `Remove_model()` in an `epi_workflow`.[^1] For post-processing, @@ -84,7 +84,7 @@ in all predictors and then in all outcomes (and set `skip = TRUE` to skip over this processing of the outcome variable when the recipe is baked). ```{r} -r <- epi_recipe(jhu) %>% +r <- recipe(jhu) %>% step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% step_epi_ahead(death_rate, ahead = 14) %>% step_naomit(all_predictors()) %>% @@ -117,7 +117,7 @@ same. We can use the `update_epi_recipe()` function to trade our current recipe `r` for another recipe `r2` in `wf` as follows: ```{r} -r2 <- epi_recipe(jhu) %>% +r2 <- recipe(jhu) %>% step_epi_lag(death_rate, lag = c(0, 1, 7, 14)) %>% step_epi_lag(case_rate, lag = c(0:7, 14)) %>% step_epi_ahead(death_rate, ahead = 7) %>% From 393e20ec3707b7b50bd303b1d1e9b1d86aa452d2 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Mon, 12 Aug 2024 16:33:03 -0700 Subject: [PATCH 07/14] suppress warnings --- R/epi_recipe.R | 2 +- tests/testthat/test-epi_recipe.R | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/R/epi_recipe.R b/R/epi_recipe.R index bd0aba28b..58f878839 100644 --- a/R/epi_recipe.R +++ b/R/epi_recipe.R @@ -1,7 +1,7 @@ #' @import recipes #' @export epi_recipe <- function(x, ...) { - # deprecate_soft("This function is being deprecated. Use `recipe()` instead.") + deprecate_soft("This function is being deprecated. Use `recipe()` instead.") UseMethod("epi_recipe") } diff --git a/tests/testthat/test-epi_recipe.R b/tests/testthat/test-epi_recipe.R index df04f6521..7794a3cda 100644 --- a/tests/testthat/test-epi_recipe.R +++ b/tests/testthat/test-epi_recipe.R @@ -5,12 +5,12 @@ test_that("recipe produces default recipe", { time_value = seq(as.Date("2020-01-01"), by = 1, length.out = 5) ) rec <- recipe(tib) - expect_identical(rec, epi_recipe(tib)) + expect_identical(rec, suppressWarnings(epi_recipe(tib))) expect_equal(nrow(rec$template), 5L) rec <- recipe(y ~ x, tib) - expect_identical(rec, epi_recipe(y ~ x, tib)) + expect_identical(rec, suppressWarnings(epi_recipe(y ~ x, tib))) expect_equal(nrow(rec$template), 5L) @@ -56,7 +56,7 @@ test_that("recipe formula works", { ) %>% epiprocess::as_epi_df(additional_metadata = list(other_keys = "z")) # with an additional key - r <- epi_recipe(y ~ x + geo_value, tib) + r <- recipe(y ~ x + geo_value, tib) ref_var_info <- ref_var_info %>% tibble::add_row( variable = "z", type = list(c("string", "unordered", "nominal")), @@ -74,7 +74,7 @@ test_that("recipe epi_df works", { geo_value = "ca" ) %>% epiprocess::as_epi_df() - r <- epi_recipe(tib) + r <- recipe(tib) ref_var_info <- tibble::tribble( ~variable, ~type, ~role, ~source, "time_value", "date", "time_value", "original", @@ -85,7 +85,7 @@ test_that("recipe epi_df works", { expect_identical(r$var_info, ref_var_info) expect_equal(nrow(r$template), 5L) - r <- epi_recipe(tib, formula = y ~ x) + r <- recipe(tib, formula = y ~ x) ref_var_info <- tibble::tribble( ~variable, ~type, ~role, ~source, "x", c("integer", "numeric"), "predictor", "original", @@ -97,7 +97,7 @@ test_that("recipe epi_df works", { expect_equal(nrow(r$template), 5L) - r <- epi_recipe( + r <- recipe( tib, roles = c("geo_value", "funny_business", "predictor", "outcome") ) @@ -115,7 +115,7 @@ test_that("add/update/adjust/remove epi_recipe works as intended", { library(workflows) jhu <- case_death_rate_subset - r <- epi_recipe(jhu) %>% + r <- recipe(jhu) %>% step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% step_epi_ahead(death_rate, ahead = 7) %>% step_epi_lag(case_rate, lag = c(0, 7, 14)) @@ -132,7 +132,7 @@ test_that("add/update/adjust/remove epi_recipe works as intended", { expect_equal(class(steps[[3]]), c("step_epi_lag", "step")) expect_equal(steps[[3]]$lag, c(0, 7, 14)) - r2 <- epi_recipe(jhu) %>% + r2 <- recipe(jhu) %>% step_epi_lag(death_rate, lag = c(0, 1)) %>% step_epi_ahead(death_rate, ahead = 1) From 7523d7c588a9c451320527daaca0528b4972299c Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Mon, 12 Aug 2024 16:33:55 -0700 Subject: [PATCH 08/14] styler --- R/blueprint-epi_recipe-default.R | 4 ++-- R/epi_recipe.R | 15 +++++++++------ R/recipe.epi_df.R | 2 +- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/R/blueprint-epi_recipe-default.R b/R/blueprint-epi_recipe-default.R index 69a4dc1d1..4e72ae297 100644 --- a/R/blueprint-epi_recipe-default.R +++ b/R/blueprint-epi_recipe-default.R @@ -43,7 +43,8 @@ new_default_epi_recipe_blueprint <- function(intercept = FALSE, recipe = recipe, extra_role_ptypes = extra_role_ptypes, ..., - subclass = c(subclass, "default_epi_recipe_blueprint", "default_recipe_blueprint")) + subclass = c(subclass, "default_epi_recipe_blueprint", "default_recipe_blueprint") + ) } @@ -66,4 +67,3 @@ run_mold.default_epi_recipe_blueprint <- function(blueprint, ..., data) { refresh_blueprint.default_epi_recipe_blueprint <- function(blueprint) { do.call(new_default_epi_recipe_blueprint, as.list(blueprint)) } - diff --git a/R/epi_recipe.R b/R/epi_recipe.R index 58f878839..f870561be 100644 --- a/R/epi_recipe.R +++ b/R/epi_recipe.R @@ -226,8 +226,9 @@ adjust_epi_recipe.epi_recipe <- function(x, which_step, ..., blueprint = default prep.epi_recipe <- function( x, training = NULL, fresh = FALSE, verbose = FALSE, retain = TRUE, log_changes = FALSE, strings_as_factors = TRUE, ...) { - - if (!strings_as_factors) return(NextMethod("prep")) + if (!strings_as_factors) { + return(NextMethod("prep")) + } # workaround to avoid converting strings2factors with recipes::prep.recipe() # We do the conversion here, then set it to FALSE training <- recipes:::check_training_set(training, x, fresh) @@ -241,10 +242,12 @@ prep.epi_recipe <- function( training <- recipes:::strings2factors(training, lvls) # browser() - x <- NextMethod("prep", training = training, fresh = fresh, - verbose = verbose, - retain = retain, log_changes = log_changes, - strings_as_factors = FALSE, ...) + x <- NextMethod("prep", + training = training, fresh = fresh, + verbose = verbose, + retain = retain, log_changes = log_changes, + strings_as_factors = FALSE, ... + ) # Now, we undo the conversion. lvls <- lapply(x$template, recipes:::get_levels) diff --git a/R/recipe.epi_df.R b/R/recipe.epi_df.R index 8b7f67572..ca6332bb0 100644 --- a/R/recipe.epi_df.R +++ b/R/recipe.epi_df.R @@ -83,7 +83,7 @@ add_epi_df_roles_to_recipe <- function(r, epi_df) { source = "original" ) # reconstruct the constituents - r$template <- epi_df[ ,unique(c(edf_keys, r$var_info$variable))] + r$template <- epi_df[, unique(c(edf_keys, r$var_info$variable))] r$var_info <- r$var_info %>% dplyr::filter(!((variable %in% edf_keys) & is.na(role))) %>% dplyr::bind_rows(info) %>% From 9b6cf241176951058ff06bce0b107c554526f9d5 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Mon, 12 Aug 2024 16:54:48 -0700 Subject: [PATCH 09/14] ensure pkgdown builds --- _pkgdown.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_pkgdown.yml b/_pkgdown.yml index 308d984c2..5c2f4496b 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -78,7 +78,7 @@ reference: - smooth_quantile_reg - title: Custom panel data forecasting workflows contents: - - epi_recipe + - recipe.epi_df - epi_workflow - add_epi_recipe - adjust_epi_recipe From f79aa92a701b1451cf248d9dec8dbbef2742fd8a Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Tue, 13 Aug 2024 09:32:34 -0700 Subject: [PATCH 10/14] bump version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index d4554cdde..cfde725de 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: epipredict Title: Basic epidemiology forecasting methods -Version: 0.0.18 +Version: 0.1.0 Authors@R: c( person("Daniel", "McDonald", , "daniel@stat.ubc.ca", role = c("aut", "cre")), person("Ryan", "Tibshirani", , "ryantibs@cmu.edu", role = "aut"), From 62e2f2694145a03e235442496486ee628c17e257 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Wed, 11 Sep 2024 12:11:48 -0700 Subject: [PATCH 11/14] pass checks --- R/recipe.epi_df.R | 4 ++-- R/step_population_scaling.R | 2 +- tests/testthat/test-key_colnames.R | 23 +++++------------------ tests/testthat/test-population_scaling.R | 4 ++-- 4 files changed, 10 insertions(+), 23 deletions(-) diff --git a/R/recipe.epi_df.R b/R/recipe.epi_df.R index ca6332bb0..b71b2a708 100644 --- a/R/recipe.epi_df.R +++ b/R/recipe.epi_df.R @@ -73,8 +73,8 @@ recipe.formula <- function(formula, data, ...) { } add_epi_df_roles_to_recipe <- function(r, epi_df) { - edf_keys <- epiprocess::key_colnames(epi_df) - edf_roles <- c("time_value", "geo_value", rep("key", length(edf_keys) - 2)) + edf_keys <- key_colnames(epi_df) + edf_roles <- c("geo_value", "time_value", rep("key", length(edf_keys) - 2)) types <- recipes:::get_types(epi_df[, edf_keys])$type info <- tibble( variable = edf_keys, diff --git a/R/step_population_scaling.R b/R/step_population_scaling.R index 839fad840..6d5570a21 100644 --- a/R/step_population_scaling.R +++ b/R/step_population_scaling.R @@ -171,7 +171,7 @@ bake.step_population_scaling <- function(object, new_data, ...) { )) } - object$df <- mutate(object$df, across(dplyr::where(is.character), tolower)) + # object$df <- mutate(object$df, across(dplyr::where(is.character), tolower)) pop_col <- rlang::sym(object$df_pop_col) suffix <- ifelse(object$create_new, object$suffix, "") diff --git a/tests/testthat/test-key_colnames.R b/tests/testthat/test-key_colnames.R index 9168a85a3..fdda59ad5 100644 --- a/tests/testthat/test-key_colnames.R +++ b/tests/testthat/test-key_colnames.R @@ -2,25 +2,12 @@ library(parsnip) library(workflows) library(dplyr) -test_that("epi_keys returns empty for an object that isn't an epi_df", { - expect_identical(epi_keys(data.frame(x = 1:3, y = 2:4)), character(0L)) -}) - -test_that("epi_keys returns possible keys if they exist", { - expect_identical( - epi_keys(data.frame(time_value = 1:3, geo_value = 2:4)), - c("time_value", "geo_value") - ) -}) - - -test_that("Extracts keys from an epi_df", { - expect_equal(epi_keys(case_death_rate_subset), c("time_value", "geo_value")) -}) - test_that("Extracts keys from a recipe", { - expect_equal(epi_keys(recipe(case_death_rate_subset)), c("time_value", "geo_value")) - expect_equal(epi_keys(recipe(cars)), character(0L)) + expect_equal( + key_colnames(recipe(case_death_rate_subset)), + c("geo_value", "time_value") + ) + expect_equal(key_colnames(recipe(cars)), character(0L)) }) test_that("epi_keys_mold extracts time_value and geo_value, but not raw", { diff --git a/tests/testthat/test-population_scaling.R b/tests/testthat/test-population_scaling.R index ce856acf2..1118ceb2d 100644 --- a/tests/testthat/test-population_scaling.R +++ b/tests/testthat/test-population_scaling.R @@ -213,8 +213,8 @@ test_that("test joining by default columns", { p <- prep(r, jhu) b <- bake(p, new_data = NULL) - expect_named( - b, + expect_setequal( + names(b), c( "geo_value", "time_value", "case_rate", "case_rate_scaled", paste0("lag_", c(0, 7, 14), "_case_rate_scaled"), From 75c5e4e52a7f5228fef23a26973419fc02ba4fcb Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Wed, 11 Sep 2024 14:00:56 -0700 Subject: [PATCH 12/14] stylr --- R/reexports-tidymodels.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/reexports-tidymodels.R b/R/reexports-tidymodels.R index a8dda5ff3..5b53914a8 100644 --- a/R/reexports-tidymodels.R +++ b/R/reexports-tidymodels.R @@ -30,4 +30,3 @@ tibble::tibble #' @importFrom generics tidy #' @export generics::tidy - From 6530c7e3ca75c7f6a2f76f8c61f9a5c5d537452e Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Fri, 13 Sep 2024 15:16:46 -0700 Subject: [PATCH 13/14] remove an unexported fun --- R/recipe.epi_df.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/recipe.epi_df.R b/R/recipe.epi_df.R index b71b2a708..36744fb6e 100644 --- a/R/recipe.epi_df.R +++ b/R/recipe.epi_df.R @@ -75,7 +75,7 @@ recipe.formula <- function(formula, data, ...) { add_epi_df_roles_to_recipe <- function(r, epi_df) { edf_keys <- key_colnames(epi_df) edf_roles <- c("geo_value", "time_value", rep("key", length(edf_keys) - 2)) - types <- recipes:::get_types(epi_df[, edf_keys])$type + types <- unname(lapply(epi_df[,edf_keys], recipes::.get_data_types)) info <- tibble( variable = edf_keys, type = types, From 36034fd8940984a636b14e83e7ddd0a62ad1d317 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Thu, 19 Sep 2024 12:21:00 -0700 Subject: [PATCH 14/14] styler again... --- R/recipe.epi_df.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/recipe.epi_df.R b/R/recipe.epi_df.R index 36744fb6e..6cfcf3170 100644 --- a/R/recipe.epi_df.R +++ b/R/recipe.epi_df.R @@ -75,7 +75,7 @@ recipe.formula <- function(formula, data, ...) { add_epi_df_roles_to_recipe <- function(r, epi_df) { edf_keys <- key_colnames(epi_df) edf_roles <- c("geo_value", "time_value", rep("key", length(edf_keys) - 2)) - types <- unname(lapply(epi_df[,edf_keys], recipes::.get_data_types)) + types <- unname(lapply(epi_df[, edf_keys], recipes::.get_data_types)) info <- tibble( variable = edf_keys, type = types,