cmu-delphi
diff --git a/‎.Rbuildignore
+2-1 b/‎.Rbuildignore
+2-1
diff --git a/‎DESCRIPTION
+2-1 b/‎DESCRIPTION
+2-1
diff --git a/‎DEVELOPMENT.md
+2 b/‎DEVELOPMENT.md
+2
diff --git a/‎NAMESPACE
+25-1 b/‎NAMESPACE
+25-1
diff --git a/‎NEWS.md
+7 b/‎NEWS.md
+7
diff --git a/‎R/arx_classifier.R
+63-27 b/‎R/arx_classifier.R
+63-27
@@ -20,4 +20,5 @@
 ^doc$
 ^Meta$
 ^.lintr$
-^.venv$
+^.venv$
+^inst/templates$
@@ -1,6 +1,6 @@
 Package: epipredict
 Title: Basic epidemiology forecasting methods
-Version: 0.1.0
+Version: 0.1.1
 Authors@R: c(
     person("Daniel J.", "McDonald", , "[email protected]", role = c("aut", "cre")),
     person("Ryan", "Tibshirani", , "[email protected]", role = "aut"),
@@ -40,6 +40,7 @@ Imports:
     magrittr,
     recipes (>= 1.0.4),
     rlang (>= 1.1.0),
+    purrr,
     stats,
     tibble,
     tidyr,
 
@@ -32,7 +32,9 @@ The `main` version is available at `file:///<local path>/epidatr/epipredict/inde
 You can also build the docs manually and launch the site with python. From the terminal, this looks like
 
 ```bash
+R -e 'pkgdown::clean_site()'
 R -e 'devtools::document()'
+R -e 'pkgdown::build_site()'
 python -m http.server -d docs
 ```
 
 
@@ -19,6 +19,7 @@ S3method(autoplot,canned_epipred)
 S3method(autoplot,epi_workflow)
 S3method(bake,check_enough_train_data)
 S3method(bake,epi_recipe)
+S3method(bake,step_adjust_latency)
 S3method(bake,step_epi_ahead)
 S3method(bake,step_epi_lag)
 S3method(bake,step_epi_slide)
@@ -58,6 +59,7 @@ S3method(predict,epi_workflow)
 S3method(predict,flatline)
 S3method(prep,check_enough_train_data)
 S3method(prep,epi_recipe)
+S3method(prep,step_adjust_latency)
 S3method(prep,step_epi_ahead)
 S3method(prep,step_epi_lag)
 S3method(prep,step_epi_slide)
@@ -87,6 +89,7 @@ S3method(print,layer_quantile_distn)
 S3method(print,layer_residual_quantiles)
 S3method(print,layer_threshold)
 S3method(print,layer_unnest)
+S3method(print,step_adjust_latency)
 S3method(print,step_epi_ahead)
 S3method(print,step_epi_lag)
 S3method(print,step_epi_slide)
@@ -195,6 +198,7 @@ export(remove_frosting)
 export(remove_model)
 export(slather)
 export(smooth_quantile_reg)
+export(step_adjust_latency)
 export(step_epi_ahead)
 export(step_epi_lag)
 export(step_epi_naomit)
@@ -225,6 +229,7 @@ importFrom(checkmate,test_numeric)
 importFrom(checkmate,test_scalar)
 importFrom(cli,cli_abort)
 importFrom(cli,cli_warn)
+importFrom(dplyr,"%>%")
 importFrom(dplyr,across)
 importFrom(dplyr,all_of)
 importFrom(dplyr,any_of)
@@ -235,13 +240,20 @@ importFrom(dplyr,everything)
 importFrom(dplyr,filter)
 importFrom(dplyr,full_join)
 importFrom(dplyr,group_by)
+importFrom(dplyr,group_by_at)
+importFrom(dplyr,join_by)
 importFrom(dplyr,left_join)
 importFrom(dplyr,mutate)
+importFrom(dplyr,n)
+importFrom(dplyr,pull)
 importFrom(dplyr,relocate)
 importFrom(dplyr,rename)
+importFrom(dplyr,rowwise)
 importFrom(dplyr,select)
 importFrom(dplyr,summarise)
 importFrom(dplyr,summarize)
+importFrom(dplyr,tibble)
+importFrom(dplyr,tribble)
 importFrom(dplyr,ungroup)
 importFrom(epiprocess,epi_slide)
 importFrom(epiprocess,growth_rate)
@@ -255,18 +267,20 @@ importFrom(ggplot2,geom_line)
 importFrom(ggplot2,geom_linerange)
 importFrom(ggplot2,geom_point)
 importFrom(ggplot2,geom_ribbon)
+importFrom(glue,glue)
+importFrom(hardhat,extract_recipe)
 importFrom(hardhat,refresh_blueprint)
 importFrom(hardhat,run_mold)
 importFrom(magrittr,"%>%")
 importFrom(recipes,bake)
+importFrom(recipes,detect_step)
 importFrom(recipes,prep)
 importFrom(recipes,rand_id)
 importFrom(rlang,"!!!")
 importFrom(rlang,"!!")
 importFrom(rlang,"%@%")
 importFrom(rlang,"%||%")
 importFrom(rlang,":=")
-importFrom(rlang,abort)
 importFrom(rlang,arg_match)
 importFrom(rlang,as_function)
 importFrom(rlang,caller_arg)
@@ -276,16 +290,19 @@ importFrom(rlang,enquos)
 importFrom(rlang,expr)
 importFrom(rlang,global_env)
 importFrom(rlang,inject)
+importFrom(rlang,is_empty)
 importFrom(rlang,is_logical)
 importFrom(rlang,is_null)
 importFrom(rlang,is_true)
+importFrom(rlang,list2)
 importFrom(rlang,set_names)
 importFrom(rlang,sym)
 importFrom(stats,as.formula)
 importFrom(stats,family)
 importFrom(stats,lm)
 importFrom(stats,median)
 importFrom(stats,model.frame)
+importFrom(stats,na.omit)
 importFrom(stats,poly)
 importFrom(stats,predict)
 importFrom(stats,qnorm)
@@ -294,6 +311,12 @@ importFrom(stats,residuals)
 importFrom(tibble,as_tibble)
 importFrom(tibble,tibble)
 importFrom(tidyr,crossing)
+importFrom(tidyr,drop_na)
+importFrom(tidyr,expand_grid)
+importFrom(tidyr,fill)
+importFrom(tidyr,unnest)
+importFrom(tidyselect,all_of)
+importFrom(utils,capture.output)
 importFrom(vctrs,as_list_of)
 importFrom(vctrs,field)
 importFrom(vctrs,new_rcrd)
@@ -303,3 +326,4 @@ importFrom(vctrs,vec_data)
 importFrom(vctrs,vec_ptype_abbr)
 importFrom(vctrs,vec_ptype_full)
 importFrom(vctrs,vec_recycle_common)
+importFrom(workflows,extract_preprocessor)
@@ -2,6 +2,13 @@
 
 Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.0.x will indicate PR's.
 
+# epipredict 0.2
+
+## features
+- Add `step_adjust_latency`, which give several methods to adjust the forecast if the `forecast_date` is after the last day of data.
+
+## bugfixes
+
 # epipredict 0.1
 
 - simplify `layer_residual_quantiles()` to avoid timesuck in `utils::methods()`
 
@@ -55,11 +55,18 @@ arx_classifier <- function(
   wf <- arx_class_epi_workflow(epi_data, outcome, predictors, trainer, args_list)
   wf <- fit(wf, epi_data)
 
+  if (args_list$adjust_latency == "none") {
+    forecast_date_default <- max(epi_data$time_value)
+    if (!is.null(args_list$forecast_date) && args_list$forecast_date != forecast_date_default) {
+      cli_warn("The specified forecast date {args_list$forecast_date} doesn't match the date from which the forecast is occurring {forecast_date}.")
+    }
+  } else {
+    forecast_date_default <- attributes(epi_data)$metadata$as_of
+  }
+  forecast_date <- args_list$forecast_date %||% forecast_date_default
+  target_date <- args_list$target_date %||% (forecast_date + args_list$ahead)
   preds <- forecast(
     wf,
-    fill_locf = TRUE,
-    n_recent = args_list$nafill_buffer,
-    forecast_date = args_list$forecast_date %||% max(epi_data$time_value)
   ) %>%
     as_tibble() %>%
     select(-time_value)
@@ -125,27 +132,39 @@ arx_class_epi_workflow <- function(
   if (!(is.null(trainer) || is_classification(trainer))) {
     cli_abort("`trainer` must be a {.pkg parsnip} model of mode 'classification'.")
   }
+
+  if (args_list$adjust_latency == "none") {
+    forecast_date_default <- max(epi_data$time_value)
+    if (!is.null(args_list$forecast_date) && args_list$forecast_date != forecast_date_default) {
+      cli_warn("The specified forecast date {args_list$forecast_date} doesn't match the date from which the forecast is occurring {forecast_date}.")
+    }
+  } else {
+    forecast_date_default <- attributes(epi_data)$metadata$as_of
+  }
+  forecast_date <- args_list$forecast_date %||% forecast_date_default
+  target_date <- args_list$target_date %||% (forecast_date + args_list$ahead)
+
   lags <- arx_lags_validator(predictors, args_list$lags)
 
   # --- preprocessor
   # ------- predictors
   r <- epi_recipe(epi_data) %>%
     step_growth_rate(
-      dplyr::all_of(predictors),
+      all_of(predictors),
       role = "grp",
       horizon = args_list$horizon,
       method = args_list$method,
       log_scale = args_list$log_scale,
       additional_gr_args_list = args_list$additional_gr_args
     )
   for (l in seq_along(lags)) {
-    p <- predictors[l]
-    p <- as.character(glue::glue_data(args_list, "gr_{horizon}_{method}_{p}"))
-    r <- step_epi_lag(r, !!p, lag = lags[[l]])
+    pred_names <- predictors[l]
+    pred_names <- as.character(glue::glue_data(args_list, "gr_{horizon}_{method}_{pred_names}"))
+    r <- step_epi_lag(r, !!pred_names, lag = lags[[l]])
   }
   # ------- outcome
   if (args_list$outcome_transform == "lag_difference") {
-    o <- as.character(
+    pre_out_name <- as.character(
       glue::glue_data(args_list, "lag_diff_{horizon}_{outcome}")
     )
     r <- r %>%
@@ -156,7 +175,7 @@ arx_class_epi_workflow <- function(
       )
   }
   if (args_list$outcome_transform == "growth_rate") {
-    o <- as.character(
+    pre_out_name <- as.character(
       glue::glue_data(args_list, "gr_{horizon}_{method}_{outcome}")
     )
     if (!(outcome %in% predictors)) {
@@ -171,11 +190,30 @@ arx_class_epi_workflow <- function(
         )
     }
   }
-  o2 <- rlang::sym(paste0("ahead_", args_list$ahead, "_", o))
+  # regex that will match any amount of adjustment for the ahead
+  ahead_out_name_regex <- glue::glue("ahead_[0-9]*_{pre_out_name}")
+  method_adjust_latency <- args_list$adjust_latency
+  if (method_adjust_latency != "none") {
+    if (method_adjust_latency != "extend_ahead") {
+      cli_abort("only extend_ahead is currently supported",
+        class = "epipredict__arx_classifier__adjust_latency_unsupported_method"
+      )
+    }
+    r <- r %>% step_adjust_latency(!!pre_out_name,
+      fixed_forecast_date = forecast_date,
+      method = method_adjust_latency
+    )
+  }
   r <- r %>%
-    step_epi_ahead(!!o, ahead = args_list$ahead, role = "pre-outcome") %>%
-    recipes::step_mutate(
-      outcome_class = cut(!!o2, breaks = args_list$breaks),
+    step_epi_ahead(!!pre_out_name, ahead = args_list$ahead, role = "pre-outcome")
+  r <- r %>%
+    step_mutate(
+      across(
+        matches(ahead_out_name_regex),
+        ~ cut(.x, breaks = args_list$breaks),
+        .names = "outcome_class",
+        .unpack = TRUE
+      ),
       role = "outcome"
     ) %>%
     step_epi_naomit() %>%
@@ -192,10 +230,6 @@ arx_class_epi_workflow <- function(
     )
   }
 
-
-  forecast_date <- args_list$forecast_date %||% max(epi_data$time_value)
-  target_date <- args_list$target_date %||% (forecast_date + args_list$ahead)
-
   # --- postprocessor
   f <- frosting() %>% layer_predict() # %>% layer_naomit()
   f <- layer_add_forecast_date(f, forecast_date = forecast_date) %>%
@@ -260,13 +294,14 @@ arx_class_args_list <- function(
     n_training = Inf,
     forecast_date = NULL,
     target_date = NULL,
+    adjust_latency = c("none", "extend_ahead", "extend_lags", "locf"),
+    warn_latency = TRUE,
     outcome_transform = c("growth_rate", "lag_difference"),
     breaks = 0.25,
     horizon = 7L,
     method = c("rel_change", "linear_reg"),
     log_scale = FALSE,
     additional_gr_args = list(),
-    nafill_buffer = Inf,
     check_enough_data_n = NULL,
     check_enough_data_epi_keys = NULL,
     ...) {
@@ -276,15 +311,15 @@ arx_class_args_list <- function(
   method <- rlang::arg_match(method)
   outcome_transform <- rlang::arg_match(outcome_transform)
 
-  arg_is_scalar(ahead, n_training, horizon, log_scale)
+  adjust_latency <- rlang::arg_match(adjust_latency)
+  arg_is_scalar(ahead, n_training, horizon, log_scale, adjust_latency, warn_latency)
   arg_is_scalar(forecast_date, target_date, allow_null = TRUE)
   arg_is_date(forecast_date, target_date, allow_null = TRUE)
   arg_is_nonneg_int(ahead, lags, horizon)
   arg_is_numeric(breaks)
   arg_is_lgl(log_scale)
   arg_is_pos(n_training)
   if (is.finite(n_training)) arg_is_pos_int(n_training)
-  if (is.finite(nafill_buffer)) arg_is_pos_int(nafill_buffer, allow_null = TRUE)
   if (!is.list(additional_gr_args)) {
     cli_abort(c(
       "`additional_gr_args` must be a {.cls list}.",
@@ -297,10 +332,13 @@ arx_class_args_list <- function(
 
   if (!is.null(forecast_date) && !is.null(target_date)) {
     if (forecast_date + ahead != target_date) {
-      cli::cli_warn(c(
-        "`forecast_date` + `ahead` must equal `target_date`.",
-        i = "{.val {forecast_date}} + {.val {ahead}} != {.val {target_date}}."
-      ))
+      cli_warn(
+        paste0(
+          "`forecast_date`  {.val {forecast_date}} +",
+          " `ahead` {.val {ahead}} must equal `target_date` {.val {target_date}}."
+        ),
+        class = "epipredict__arx_args__inconsistent_target_ahead_forecaste_date"
+      )
     }
   }
 
@@ -318,13 +356,13 @@ arx_class_args_list <- function(
       breaks,
       forecast_date,
       target_date,
+      adjust_latency,
       outcome_transform,
       max_lags,
       horizon,
       method,
       log_scale,
       additional_gr_args,
-      nafill_buffer,
       check_enough_data_n,
       check_enough_data_epi_keys
     ),
@@ -337,5 +375,3 @@ print.arx_class <- function(x, ...) {
   name <- "ARX Classifier"
   NextMethod(name = name, ...)
 }
-
-# this is a trivial change to induce a check