diff --git a/DESCRIPTION b/DESCRIPTION
index ad32b71e..01a09444 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -27,9 +27,11 @@ Imports:
     purrr,
     recipes (>= 1.0.4),
     rlang,
+    slider,
     targets,
     tibble,
-    tidyr
+    tidyr,
+    zeallot
 Suggests:
     ggplot2,
     knitr,
diff --git a/NAMESPACE b/NAMESPACE
index 72811f7d..3298ff06 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -28,15 +28,19 @@ export(make_shared_grids)
 export(make_target_ensemble_grid)
 export(make_target_param_grid)
 export(overprediction)
-export(perform_sanity_checks)
 export(read_external_predictions_data)
+export(rolling_mean)
+export(rolling_sd)
 export(run_evaluation_measure)
 export(run_workflow_and_format)
+export(sanitize_args_predictors_trainer)
 export(scaled_pop)
 export(sharpness)
 export(single_id)
 export(slide_forecaster)
+export(smoothed_scaled)
 export(underprediction)
+export(update_predictors)
 export(weighted_interval_score)
 importFrom(assertthat,assert_that)
 importFrom(cli,cli_abort)
@@ -81,13 +85,16 @@ importFrom(epipredict,step_epi_naomit)
 importFrom(epipredict,step_population_scaling)
 importFrom(epipredict,step_training_window)
 importFrom(epiprocess,as_epi_df)
+importFrom(epiprocess,epi_slide)
 importFrom(epiprocess,epix_slide)
 importFrom(magrittr,"%<>%")
 importFrom(magrittr,"%>%")
 importFrom(purrr,imap)
 importFrom(purrr,map)
 importFrom(purrr,map2_vec)
+importFrom(purrr,map_chr)
 importFrom(purrr,map_vec)
+importFrom(purrr,reduce)
 importFrom(purrr,transpose)
 importFrom(recipes,all_numeric)
 importFrom(rlang,"!!")
@@ -96,6 +103,7 @@ importFrom(rlang,.data)
 importFrom(rlang,quo)
 importFrom(rlang,sym)
 importFrom(rlang,syms)
+importFrom(slider,slide_dbl)
 importFrom(targets,tar_config_get)
 importFrom(targets,tar_group)
 importFrom(targets,tar_read)
@@ -105,3 +113,4 @@ importFrom(tidyr,drop_na)
 importFrom(tidyr,expand_grid)
 importFrom(tidyr,pivot_wider)
 importFrom(tidyr,unnest)
+importFrom(zeallot,"%<-%")
diff --git a/R/data_transforms.R b/R/data_transforms.R
new file mode 100644
index 00000000..7329823d
--- /dev/null
+++ b/R/data_transforms.R
@@ -0,0 +1,114 @@
+# various reusable transforms to apply before handing to epipredict
+
+#' extract the non-key, non-smoothed columns from epi_data
+#' @keywords internal
+#' @param epi_data the `epi_df`
+#' @param cols vector of column names to use. If `NULL`, fill with all non-key columns
+get_trainable_names <- function(epi_data, cols) {
+  if (is.null(cols)) {
+    cols <- get_nonkey_names(epi_data)
+    # exclude anything with the same naming schema as the rolling average/sd created below
+    cols <- cols[!grepl("_\\w{1,2}\\d+", cols)]
+  }
+  return(cols)
+}
+
+#' just the names which aren't keys for an epi_df
+#' @description
+#' names, but it excludes keys
+#' @param epi_data the epi_df
+get_nonkey_names <- function(epi_data) {
+  cols <- names(epi_data)
+  cols <- cols[!(cols %in% c("geo_value", "time_value", attr(epi_data, "metadata")$other_keys))]
+  return(cols)
+}
+
+
+#' update the predictors to only contain the smoothed/sd versions of cols
+#' @description
+#' modifies the list of preditors so that any which have been modified have the
+#'   modified versions included, and not the original. Should only be applied
+#'   after both rolling_mean and rolling_sd.
+#' @param epi_data the epi_df, only included to get the non-key column names
+#' @param cols_modified the list of columns which have been modified. If this is `NULL`, that means we were modifying every column.
+#' @param predictors the initial set of predictors; any unmodified are kept, any modified are replaced with the modified versions (e.g. "a" becoming "a_m17").
+#' @importFrom purrr map map_chr reduce
+#' @return returns an updated list of predictors, with modified columns replaced and non-modified columns left intact.
+#' @export
+update_predictors <- function(epi_data, cols_modified, predictors) {
+  if (!is.null(cols_modified)) {
+    # if cols_modified isn't null, make sure we include predictors that weren't modified
+    unchanged_predictors <- map(cols_modified, ~ !grepl(.x, predictors, fixed = TRUE)) %>% reduce(`&`)
+    unchanged_predictors <- predictors[unchanged_predictors]
+  } else {
+    # if it's null, we've modified every predictor
+    unchanged_predictors <- character(0L)
+  }
+  # all the non-key names
+  col_names <- get_nonkey_names(epi_data)
+  is_present <- function(original_predictor) {
+    grepl(original_predictor, col_names) & !(col_names %in% predictors)
+  }
+  is_modified <- map(predictors, is_present) %>% reduce(`|`)
+  new_predictors <- col_names[is_modified]
+  return(c(unchanged_predictors, new_predictors))
+}
+
+#' get a rolling average for the named columns
+#' @description
+#' add column(s) that are the rolling means of the specified columns, as
+#'   implemented by slider. Defaults to the previous 7 days.
+#' Currently only group_by's on the geo_value. Should probably extend to more
+#'   keys if you have them
+#' @param epi_data the dataset
+#' @param width the number of days (or examples, the sliding isn't time-aware) to use
+#' @param cols_to_mean the non-key columns to take the mean over. `NULL` means all
+#' @importFrom slider slide_dbl
+#' @importFrom epiprocess epi_slide
+#' @export
+rolling_mean <- function(epi_data, width = 7L, cols_to_mean = NULL) {
+  cols_to_mean <- get_trainable_names(epi_data, cols_to_mean)
+  epi_data %<>% group_by(geo_value)
+  for (col in cols_to_mean) {
+    mean_name <- paste0(col, "_m", width)
+    epi_data %<>% epi_slide(~ mean(.x[[col]], rm.na = TRUE), before = width-1L, new_col_name = mean_name)
+  }
+  epi_data %<>% ungroup()
+  return(epi_data)
+}
+
+#' get a rolling standard deviation for the named columns
+#' @description
+#' A rolling standard deviation, based off of a rolling mean. First it
+#'   calculates a rolling mean with width `mean_width`, and then squares the
+#'   difference between that and the actual value, averaged over `sd_width`.
+#' @param epi_data the dataset
+#' @param sd_width the number of days (or examples, the sliding isn't
+#'   time-aware) to use for the standard deviation calculation
+#' @param mean_width like `sd_width`, but it governs the mean. Should be less
+#'   than the `sd_width`, and if `NULL` (the default) it is half of `sd_width`
+#'   (so 14 in the complete default case)
+#' @param cols_to_sd the non-key columns to take the sd over. `NULL` means all
+#' @param keep_mean bool, if `TRUE`, it retains keeps the mean column
+#' @importFrom epiprocess epi_slide
+#' @export
+rolling_sd <- function(epi_data, sd_width = 28L, mean_width = NULL, cols_to_sd = NULL, keep_mean = FALSE) {
+  if (is.null(mean_width)) {
+    mean_width <- as.integer(ceiling(sd_width / 2))
+  }
+  cols_to_sd <- get_trainable_names(epi_data, cols_to_sd)
+  result <- epi_data
+  for (col in cols_to_sd) {
+    result %<>% group_by(geo_value)
+    mean_name <- paste0(col, "_m", mean_width)
+    sd_name <- paste0(col, "_sd", sd_width)
+    result %<>% epi_slide(~ mean(.x[[col]], na.rm = TRUE), before = mean_width-1L, new_col_name = mean_name)
+    result %<>% epi_slide(~ sqrt(mean((.x[[mean_name]] - .x[[col]])^2, na.rm = TRUE)), before = sd_width-1, new_col_name = sd_name)
+    if (!keep_mean) {
+      # TODO make sure the extra info sticks around
+      result %<>% select(-{{ mean_name }})
+    }
+    result %<>% dplyr_reconstruct(epi_data)
+  }
+  result %<>% ungroup()
+}
diff --git a/R/data_validation.R b/R/data_validation.R
index da4c134e..e9319334 100644
--- a/R/data_validation.R
+++ b/R/data_validation.R
@@ -13,7 +13,7 @@
 #'   include empty strings
 #' @param args_list the args list created by [`epipredict::arx_args_list`]
 #' @export
-perform_sanity_checks <- function(epi_data,
+sanitize_args_predictors_trainer <- function(epi_data,
                                   outcome,
                                   predictors,
                                   trainer,
@@ -56,7 +56,7 @@ perform_sanity_checks <- function(epi_data,
 #' @export
 confirm_sufficient_data <- function(epi_data, ahead, args_input, buffer = 9) {
   if (!is.null(args_input$lags)) {
-    lag_max <- max(args_input$lags)
+    lag_max <- max(unlist(args_input$lags))
   } else {
     lag_max <- 14 # default value of 2 weeks
   }
diff --git a/R/epipredict_utilities.R b/R/epipredict_utilities.R
index fc56925f..083ce35f 100644
--- a/R/epipredict_utilities.R
+++ b/R/epipredict_utilities.R
@@ -2,25 +2,25 @@
 #' add the default steps for arx_forecaster
 #' @description
 #' add the default steps for arx_forecaster
-#' @param rec an [`epipredict::epi_recipe`]
+#' @param preproc an [`epipredict::epi_recipe`]
 #' @param outcome a character of the column to be predicted
 #' @param predictors a character vector of the columns used as predictors
 #' @param args_list an [`epipredict::arx_args_list`]
 #' @seealso [arx_postprocess] for the layer equivalent
 #' @importFrom epipredict step_epi_lag step_epi_ahead step_epi_naomit step_training_window
 #' @export
-arx_preprocess <- function(rec, outcome, predictors, args_list) {
+arx_preprocess <- function(preproc, outcome, predictors, args_list) {
   # input already validated
   lags <- args_list$lags
   for (l in seq_along(lags)) {
     p <- predictors[l]
-    rec %<>% step_epi_lag(!!p, lag = lags[[l]])
+    preproc %<>% step_epi_lag(!!p, lag = lags[[l]])
   }
-  rec %<>%
+  preproc %<>%
     step_epi_ahead(!!outcome, ahead = args_list$ahead) %>%
     step_epi_naomit() %>%
     step_training_window(n_recent = args_list$n_training)
-  return(rec)
+  return(preproc)
 }
 
 # TODO replace with `layer_arx_forecaster`
diff --git a/R/forecaster_flatline.R b/R/forecaster_flatline.R
index ab0c5075..33687240 100644
--- a/R/forecaster_flatline.R
+++ b/R/forecaster_flatline.R
@@ -40,9 +40,7 @@ flatline_fc <- function(epi_data,
   args_list <- do.call(flatline_args_list, args_input)
   # if you want to ignore extra_sources, setting predictors is the way to do it
   predictors <- c(outcome, extra_sources)
-  argsPredictorsTrainer <- perform_sanity_checks(epi_data, outcome, predictors, NULL, args_list)
-  args_list <- argsPredictorsTrainer[[1]]
-  predictors <- argsPredictorsTrainer[[2]]
+  c(args_list, predictors, trainer) %<-% sanitize_args_predictors_trainer(epi_data, outcome, predictors, NULL, args_list)
   # end of the copypasta
   # finally, any other pre-processing (e.g. smoothing) that isn't performed by
   # epipredict
diff --git a/R/forecaster_scaled_pop.R b/R/forecaster_scaled_pop.R
index 6db0b8b0..6c138caa 100644
--- a/R/forecaster_scaled_pop.R
+++ b/R/forecaster_scaled_pop.R
@@ -35,9 +35,10 @@
 #' @param quantile_levels The quantile levels to predict. Defaults to those required by
 #'   covidhub.
 #' @seealso some utilities for making forecasters: [format_storage],
-#'   [perform_sanity_checks]
+#'   [sanitize_args_predictors_trainer]
 #' @importFrom epipredict epi_recipe step_population_scaling frosting arx_args_list layer_population_scaling
 #' @importFrom tibble tibble
+#' @importFrom zeallot %<-%
 #' @importFrom recipes all_numeric
 #' @export
 scaled_pop <- function(epi_data,
@@ -73,13 +74,10 @@ scaled_pop <- function(epi_data,
   args_input[["ahead"]] <- effective_ahead
   args_input[["quantile_levels"]] <- quantile_levels
   args_list <- do.call(arx_args_list, args_input)
-  # if you want to ignore extra_sources, setting predictors is the way to do it
+  # if you want to hardcode particular predictors in a particular forecaster
   predictors <- c(outcome, extra_sources)
   # TODO: Partial match quantile_level coming from here (on Dmitry's machine)
-  argsPredictorsTrainer <- perform_sanity_checks(epi_data, outcome, predictors, trainer, args_list)
-  args_list <- argsPredictorsTrainer[[1]]
-  predictors <- argsPredictorsTrainer[[2]]
-  trainer <- argsPredictorsTrainer[[3]]
+  c(args_list, predictors, trainer) %<-% sanitize_args_predictors_trainer(epi_data, outcome, predictors, trainer, args_list)
   # end of the copypasta
   # finally, any other pre-processing (e.g. smoothing) that isn't performed by
   # epipredict
diff --git a/R/forecaster_smoothed_scaled.R b/R/forecaster_smoothed_scaled.R
new file mode 100644
index 00000000..8428cc08
--- /dev/null
+++ b/R/forecaster_smoothed_scaled.R
@@ -0,0 +1,147 @@
+#' predict on smoothed data and the standard deviation
+#' @description
+#' This is a variant of `scaled_pop`, which predicts on a smoothed version of
+#'   the data. Even if the target is smoothed when used as a /predictor/, as a
+#'   /target/ it still uses the raw value (this captures some of the noise).  It
+#'   also uses a rolling standard deviation as an auxillary signal, window of
+#'   withd `sd_width`, which by default is 28 days.
+#' If you are using `sd_width`, you should restrict the lags on the `sd` to only
+#'   include `0`, so set your lags to be e.g. `list(c(0,7,14), c(0))`.
+#' @param epi_data the actual data used
+#' @param outcome the name of the target variable
+#' @param extra_sources the name of any extra columns to use. This list could be
+#'   empty
+#' @param ahead (this is relative to the `as_of` field of the `epi_df`, which is
+#'   likely *not* the same as the `ahead` used by epipredict, which is relative
+#'   to the max time value of the `epi_df`. how to handle this is a modelling
+#'   question left up to each forecaster; see latency_adjusting.R for the
+#'   existing examples)
+#' @param pop_scaling bool; if `TRUE`, assume all numeric columns are on the
+#'   count scale and translate them to a rate scale for model fitting.
+#'   Predictions will be translated back to count scale. Any
+#'   `layer_residual_quantiles` (for non-`"quantile_reg"` `trainer`s) will be
+#'   done on the rate scale. When specifying predictor lags, note that rate
+#'   variables will use the same names as and overwrite the count variables.
+#'   Rates here will be counts per 100k population, based on
+#'   `epipredict::state_census`.
+#' @param trainer optional; parsnip model specification to use for the core
+#'   fitting & prediction (the `spec` of the internal
+#'   [`epipredict::epi_workflow`]).  Default is `parsnip::linear_reg()`.
+#' @param smooth_width the number of days over which to do smoothing. If `NULL`,
+#'   then no smoothing is applied.
+#' @param smooth_cols the names of the columns to smooth. If `NULL` it smooths
+#'   everything
+#' @param sd_width the number of days over which to take a moving average of the
+#'   standard deviation. If `NULL`, the sd_width isn't included.
+#' @param sd_mean_width to calculate the sd, we need a window size for the mean
+#'   used.
+#' @param sd_cols the names of the columns to smooth. If `NULL` its includes
+#'   the sd of everything
+#' @param quantile_levels The quantile levels to predict. Defaults to those
+#' @param ... any additional arguments as used by [arx_args_list]
+#'   required by covidhub.
+#' @seealso some utilities for making forecasters: [format_storage],
+#'   [sanitize_args_predictors_trainer]
+#' @importFrom epipredict epi_recipe step_population_scaling frosting arx_args_list layer_population_scaling
+#' @importFrom tibble tibble
+#' @importFrom recipes all_numeric
+#' @importFrom zeallot %<-%
+#' @export
+smoothed_scaled <- function(epi_data,
+                            outcome,
+                            extra_sources = "",
+                            ahead = 1,
+                            pop_scaling = TRUE,
+                            trainer = parsnip::linear_reg(),
+                            quantile_levels = covidhub_probs(),
+                            smooth_width = 7,
+                            smooth_cols = NULL,
+                            sd_width = 28,
+                            sd_mean_width = 14,
+                            sd_cols = NULL,
+                            ...) {
+  # perform any preprocessing not supported by epipredict
+  # this is a temp fix until a real fix gets put into epipredict
+  epi_data <- clear_lastminute_nas(epi_data)
+  # one that every forecaster will need to handle: how to manage max(time_value)
+  # that's older than the `as_of` date
+  c(epi_data, effective_ahead) %<-% extend_ahead(epi_data, ahead)
+  # see latency_adjusting for other examples
+  args_input <- list(...)
+  # edge case where there is no data or less data than the lags; eventually epipredict will handle this
+  if (!confirm_sufficient_data(epi_data, effective_ahead, args_input)) {
+    null_result <- epi_data[0L, c("geo_value", attr(epi_data, "metadata", exact = TRUE)[["other_keys"]])] %>%
+      mutate(
+        forecast_date = epi_data$time_value[0],
+        target_end_date = epi_data$time_value[0],
+        quantile = numeric(),
+        value = numeric()
+      )
+    return(null_result)
+  }
+  args_input[["ahead"]] <- effective_ahead
+  args_input[["quantile_levels"]] <- quantile_levels
+  args_list <- do.call(arx_args_list, args_input)
+  # `extra_sources` sets which variables beyond the outcome are lagged and used as predictors
+  # any which are modified by `rolling_mean` or `rolling_sd` have their original values dropped later
+  predictors <- c(outcome, extra_sources)
+  # end of the copypasta
+  # finally, any other pre-processing (e.g. smoothing) that isn't performed by
+  # epipredict
+  # smoothing
+  keep_mean <- !is.null(smooth_width) && !is.null(sd_mean_width) &&
+    smooth_width == sd_mean_width # do we (not) need to do the mean separately?
+  if (!is.null(smooth_width) && !keep_mean) {
+    epi_data %<>% rolling_mean(
+      width = smooth_width,
+      cols_to_mean = smooth_cols
+    )
+  }
+
+  # measuring standard deviation
+  if (!is.null(sd_width)) {
+    epi_data %<>% rolling_sd(
+      sd_width = sd_width,
+      mean_width = sd_mean_width,
+      cols_to_sd = sd_cols,
+      keep_mean = keep_mean
+    )
+  }
+  # and need to make sure we exclude the original variables as predictors
+  predictors <- update_predictors(epi_data, c(smooth_cols, sd_cols), predictors)
+  # TODO: Partial match quantile_level coming from here (on Dmitry's machine)
+  c(args_list, predictors, trainer) %<-% sanitize_args_predictors_trainer(epi_data, outcome, predictors, trainer, args_list)
+  # preprocessing supported by epipredict
+  preproc <- epi_recipe(epi_data)
+  if (pop_scaling) {
+    preproc %<>% step_population_scaling(
+      all_numeric(),
+      df = epipredict::state_census,
+      df_pop_col = "pop",
+      create_new = FALSE,
+      rate_rescaling = 1e5,
+      by = c("geo_value" = "abbr")
+    )
+  }
+  preproc %<>% arx_preprocess(outcome, predictors, args_list)
+
+  # postprocessing supported by epipredict
+  postproc <- frosting()
+  postproc %<>% arx_postprocess(trainer, args_list)
+  if (pop_scaling) {
+    postproc %<>% layer_population_scaling(
+      .pred, .pred_distn,
+      df = epipredict::state_census,
+      df_pop_col = "pop",
+      create_new = FALSE,
+      rate_rescaling = 1e5,
+      by = c("geo_value" = "abbr")
+    )
+  }
+  # with all the setup done, we execute and format
+  pred <- run_workflow_and_format(preproc, postproc, trainer, epi_data)
+  # now pred has the columns
+  # (geo_value, forecast_date, target_end_date, quantile, value)
+  # finally, any postprocessing not supported by epipredict e.g. calibration
+  return(pred)
+}
diff --git a/R/targets_utils.R b/R/targets_utils.R
index feac27c2..d3fd435e 100644
--- a/R/targets_utils.R
+++ b/R/targets_utils.R
@@ -127,19 +127,19 @@ make_shared_grids <- function() {
       forecaster = "scaled_pop",
       trainer = c("linreg", "quantreg"),
       ahead = c(1:7, 14, 21, 28),
-      pop_scaling = c(FALSE)
+      pop_scaling = FALSE
     ),
     tidyr::expand_grid(
       forecaster = "scaled_pop",
       trainer = c("linreg", "quantreg"),
       ahead = c(1:7, 14, 21, 28),
-      lags = list(c(0, 3, 5, 7, 14), c(0, 7, 14)),
-      pop_scaling = c(FALSE)
+      lags = list(c(0, 3, 5, 7, 14), c(0, 7, 14), c(0,7,14,24)),
+      pop_scaling = FALSE
     ),
     tidyr::expand_grid(
       forecaster = "flatline_fc",
-      ahead = 1:7
-    )
+      ahead = c(1:7, 14, 21, 28)
+    ),
   )
 }
 #' Make list of common ensembles for forecasting experiments across projects
diff --git a/app.R b/app.R
index c5ecf1a2..064c4ffe 100644
--- a/app.R
+++ b/app.R
@@ -170,7 +170,7 @@ shinyApp(
           verticalLayout(
             plotlyOutput("main_plot", height = "90em"),
             h2("forecaster name -> parameters"),
-            #textOutput("forecaster_param_title"),
+            # textOutput("forecaster_param_title"),
             dataTableOutput("forecaster_table"),
             h2("ensemble name -> parameters"),
             dataTableOutput("ensemble_table")
@@ -261,7 +261,7 @@ shinyApp(
         # Use scatterplot or lines depending on the x var.
         {
           if (input$x_var %in% c(input$facet_vars, "geo_value", "forecaster", "ahead")) {
-            . + geom_point(aes(size = n)) + expand_limits(size = 0)
+            . + geom_point(aes(size = n / 100)) + expand_limits(size = 0)
           } else {
             . + geom_line()
           }
diff --git a/covid_hosp_explore.R b/covid_hosp_explore.R
index f9aa0d8e..456b4b3e 100644
--- a/covid_hosp_explore.R
+++ b/covid_hosp_explore.R
@@ -18,6 +18,13 @@ make_unique_grids <- function() {
       ahead = c(1:7, 14, 21, 28),
       lags = list(c(0, 3, 5, 7, 14), c(0, 7, 14)),
       pop_scaling = TRUE
+    ),
+    tidyr::expand_grid(
+      forecaster = "smoothed_scaled",
+      trainer = c("quantreg"),
+      ahead = c(1:7, 14, 21, 28),
+      lags = list(list(c(0, 3, 5, 7, 14), c(0), c(0, 3, 5, 7, 14), c(0),), list(c(0, 7, 14), c(0)), list(c(0,2,4,7,14,21,28), c(0))),
+      pop_scaling = TRUE
     )
   )
 }
diff --git a/flu_hosp_explore.R b/flu_hosp_explore.R
index a3486080..31d0ffc1 100644
--- a/flu_hosp_explore.R
+++ b/flu_hosp_explore.R
@@ -5,7 +5,15 @@ source("extras/targets-common.R")
 
 # Add custom parameter combinations in the list below.
 make_unique_grids <- function() {
-  list()
+  list(
+    tidyr::expand_grid(
+      forecaster = "smoothed_scaled",
+      trainer = c("quantreg"),
+      ahead = c(1:7, 14, 21, 28),
+      lags = list(list(c(0, 3, 5, 7, 14), c(0), c(0, 3, 5, 7, 14), c(0),), list(c(0, 7, 14), c(0)), list(c(0,2,4,7,14,21,28), c(0))),
+      pop_scaling = FALSE
+    )
+  )
 }
 make_unique_ensemble_grid <- function() {
   tribble(
diff --git a/man/arx_preprocess.Rd b/man/arx_preprocess.Rd
index 35e2e0b0..44ea895a 100644
--- a/man/arx_preprocess.Rd
+++ b/man/arx_preprocess.Rd
@@ -4,10 +4,10 @@
 \alias{arx_preprocess}
 \title{add the default steps for arx_forecaster}
 \usage{
-arx_preprocess(rec, outcome, predictors, args_list)
+arx_preprocess(preproc, outcome, predictors, args_list)
 }
 \arguments{
-\item{rec}{an \code{\link[epipredict:epi_recipe]{epipredict::epi_recipe}}}
+\item{preproc}{an \code{\link[epipredict:epi_recipe]{epipredict::epi_recipe}}}
 
 \item{outcome}{a character of the column to be predicted}
 
diff --git a/man/get_nonkey_names.Rd b/man/get_nonkey_names.Rd
new file mode 100644
index 00000000..242ebabd
--- /dev/null
+++ b/man/get_nonkey_names.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data_transforms.R
+\name{get_nonkey_names}
+\alias{get_nonkey_names}
+\title{just the names which aren't keys for an epi_df}
+\usage{
+get_nonkey_names(epi_data)
+}
+\arguments{
+\item{epi_data}{the epi_df}
+}
+\description{
+names, but it excludes keys
+}
diff --git a/man/get_trainable_names.Rd b/man/get_trainable_names.Rd
new file mode 100644
index 00000000..8f5faf10
--- /dev/null
+++ b/man/get_trainable_names.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data_transforms.R
+\name{get_trainable_names}
+\alias{get_trainable_names}
+\title{extract the non-key, non-smoothed columns from epi_data}
+\usage{
+get_trainable_names(epi_data, cols)
+}
+\arguments{
+\item{epi_data}{the \code{epi_df}}
+
+\item{cols}{vector of column names to use. If \code{NULL}, fill with all non-key columns}
+}
+\description{
+extract the non-key, non-smoothed columns from epi_data
+}
+\keyword{internal}
diff --git a/man/rolling_mean.Rd b/man/rolling_mean.Rd
new file mode 100644
index 00000000..f8cf5000
--- /dev/null
+++ b/man/rolling_mean.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data_transforms.R
+\name{rolling_mean}
+\alias{rolling_mean}
+\title{get a rolling average for the named columns}
+\usage{
+rolling_mean(epi_data, width = 7L, cols_to_mean = NULL)
+}
+\arguments{
+\item{epi_data}{the dataset}
+
+\item{width}{the number of days (or examples, the sliding isn't time-aware) to use}
+
+\item{cols_to_mean}{the non-key columns to take the mean over. \code{NULL} means all}
+}
+\description{
+add column(s) that are the rolling means of the specified columns, as
+implemented by slider. Defaults to the previous 7 days.
+Currently only group_by's on the geo_value. Should probably extend to more
+keys if you have them
+}
diff --git a/man/rolling_sd.Rd b/man/rolling_sd.Rd
new file mode 100644
index 00000000..22f3ec0c
--- /dev/null
+++ b/man/rolling_sd.Rd
@@ -0,0 +1,33 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data_transforms.R
+\name{rolling_sd}
+\alias{rolling_sd}
+\title{get a rolling standard deviation for the named columns}
+\usage{
+rolling_sd(
+  epi_data,
+  sd_width = 28L,
+  mean_width = NULL,
+  cols_to_sd = NULL,
+  keep_mean = FALSE
+)
+}
+\arguments{
+\item{epi_data}{the dataset}
+
+\item{sd_width}{the number of days (or examples, the sliding isn't
+time-aware) to use for the standard deviation calculation}
+
+\item{mean_width}{like \code{sd_width}, but it governs the mean. Should be less
+than the \code{sd_width}, and if \code{NULL} (the default) it is half of \code{sd_width}
+(so 14 in the complete default case)}
+
+\item{cols_to_sd}{the non-key columns to take the sd over. \code{NULL} means all}
+
+\item{keep_mean}{bool, if \code{TRUE}, it retains keeps the mean column}
+}
+\description{
+A rolling standard deviation, based off of a rolling mean. First it
+calculates a rolling mean with width \code{mean_width}, and then squares the
+difference between that and the actual value, averaged over \code{sd_width}.
+}
diff --git a/man/perform_sanity_checks.Rd b/man/sanitize_args_predictors_trainer.Rd
similarity index 85%
rename from man/perform_sanity_checks.Rd
rename to man/sanitize_args_predictors_trainer.Rd
index 152b3bf1..b56706ee 100644
--- a/man/perform_sanity_checks.Rd
+++ b/man/sanitize_args_predictors_trainer.Rd
@@ -1,10 +1,16 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/data_validation.R
-\name{perform_sanity_checks}
-\alias{perform_sanity_checks}
+\name{sanitize_args_predictors_trainer}
+\alias{sanitize_args_predictors_trainer}
 \title{helper function for those writing forecasters}
 \usage{
-perform_sanity_checks(epi_data, outcome, predictors, trainer, args_list)
+sanitize_args_predictors_trainer(
+  epi_data,
+  outcome,
+  predictors,
+  trainer,
+  args_list
+)
 }
 \arguments{
 \item{epi_data}{the actual data used}
diff --git a/man/scaled_pop.Rd b/man/scaled_pop.Rd
index 880819eb..5522c85c 100644
--- a/man/scaled_pop.Rd
+++ b/man/scaled_pop.Rd
@@ -63,5 +63,5 @@ arguments:
 }
 \seealso{
 some utilities for making forecasters: \link{format_storage},
-\link{perform_sanity_checks}
+\link{sanitize_args_predictors_trainer}
 }
diff --git a/man/smoothed_scaled.Rd b/man/smoothed_scaled.Rd
new file mode 100644
index 00000000..16a3d2c0
--- /dev/null
+++ b/man/smoothed_scaled.Rd
@@ -0,0 +1,82 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/forecaster_smoothed_scaled.R
+\name{smoothed_scaled}
+\alias{smoothed_scaled}
+\title{predict on smoothed data and the standard deviation}
+\usage{
+smoothed_scaled(
+  epi_data,
+  outcome,
+  extra_sources = "",
+  ahead = 1,
+  pop_scaling = TRUE,
+  trainer = parsnip::linear_reg(),
+  quantile_levels = covidhub_probs(),
+  smooth_width = 7,
+  smooth_cols = NULL,
+  sd_width = 28,
+  sd_mean_width = 14,
+  sd_cols = NULL,
+  ...
+)
+}
+\arguments{
+\item{epi_data}{the actual data used}
+
+\item{outcome}{the name of the target variable}
+
+\item{extra_sources}{the name of any extra columns to use. This list could be
+empty}
+
+\item{ahead}{(this is relative to the \code{as_of} field of the \code{epi_df}, which is
+likely \emph{not} the same as the \code{ahead} used by epipredict, which is relative
+to the max time value of the \code{epi_df}. how to handle this is a modelling
+question left up to each forecaster; see latency_adjusting.R for the
+existing examples)}
+
+\item{pop_scaling}{bool; if \code{TRUE}, assume all numeric columns are on the
+count scale and translate them to a rate scale for model fitting.
+Predictions will be translated back to count scale. Any
+\code{layer_residual_quantiles} (for non-\code{"quantile_reg"} \code{trainer}s) will be
+done on the rate scale. When specifying predictor lags, note that rate
+variables will use the same names as and overwrite the count variables.
+Rates here will be counts per 100k population, based on
+\code{epipredict::state_census}.}
+
+\item{trainer}{optional; parsnip model specification to use for the core
+fitting & prediction (the \code{spec} of the internal
+\code{\link[epipredict:epi_workflow]{epipredict::epi_workflow}}).  Default is \code{parsnip::linear_reg()}.}
+
+\item{quantile_levels}{The quantile levels to predict. Defaults to those}
+
+\item{smooth_width}{the number of days over which to do smoothing. If \code{NULL},
+then no smoothing is applied.}
+
+\item{smooth_cols}{the names of the columns to smooth. If \code{NULL} it smooths
+everything}
+
+\item{sd_width}{the number of days over which to take a moving average of the
+standard deviation. If \code{NULL}, the sd_width isn't included.}
+
+\item{sd_mean_width}{to calculate the sd, we need a window size for the mean
+used.}
+
+\item{sd_cols}{the names of the columns to smooth. If \code{NULL} its includes
+the sd of everything}
+
+\item{...}{any additional arguments as used by \link{arx_args_list}
+required by covidhub.}
+}
+\description{
+This is a variant of \code{scaled_pop}, which predicts on a smoothed version of
+the data. Even if the target is smoothed when used as a /predictor/, as a
+/target/ it still uses the raw value (this captures some of the noise).  It
+also uses a rolling standard deviation as an auxillary signal, window of
+withd \code{sd_width}, which by default is 28 days.
+If you are using \code{sd_width}, you should restrict the lags on the \code{sd} to only
+include \code{0}, so set your lags to be e.g. \code{list(c(0,7,14), c(0))}.
+}
+\seealso{
+some utilities for making forecasters: \link{format_storage},
+\link{sanitize_args_predictors_trainer}
+}
diff --git a/man/update_predictors.Rd b/man/update_predictors.Rd
new file mode 100644
index 00000000..435e5aba
--- /dev/null
+++ b/man/update_predictors.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data_transforms.R
+\name{update_predictors}
+\alias{update_predictors}
+\title{update the predictors to only contain the smoothed/sd versions of cols}
+\usage{
+update_predictors(epi_data, cols_modified, predictors)
+}
+\arguments{
+\item{epi_data}{the epi_df, only included to get the non-key column names}
+
+\item{cols_modified}{the list of columns which have been modified. If this is \code{NULL}, that means we were modifying every column.}
+
+\item{predictors}{the initial set of predictors; any unmodified are kept, any modified are replaced with the modified versions (e.g. "a" becoming "a_m17").}
+}
+\value{
+returns an updated list of predictors, with modified columns replaced and non-modified columns left intact.
+}
+\description{
+modifies the list of preditors so that any which have been modified have the
+modified versions included, and not the original. Should only be applied
+after both rolling_mean and rolling_sd.
+}
diff --git a/tests/testthat/test-forecaster-utils.R b/tests/testthat/test-forecaster-utils.R
index 863acc80..dfe5f4ed 100644
--- a/tests/testthat/test-forecaster-utils.R
+++ b/tests/testthat/test-forecaster-utils.R
@@ -1,10 +1,10 @@
-test_that("perform_sanity_checks", {
+test_that("sanitize_args_predictors_trainer", {
   epi_data <- epipredict::case_death_rate_subset
   # don't need to test validate_forecaster_inputs as that's inherited
   # testing args_list inheritance
   ex_args <- arx_args_list()
-  expect_error(perform_sanity_checks(epi_data, "case_rate", c("case_rate"), 5, ex_args))
-  argsPredictors <- perform_sanity_checks(
+  expect_error(sanitize_args_predictors_trainer(epi_data, "case_rate", c("case_rate"), 5, ex_args))
+  argsPredictors <- sanitize_args_predictors_trainer(
     epi_data, "case_rate", c("case_rate", ""), parsnip::linear_reg(), ex_args
   )
   args_list <- argsPredictors[[1]]
diff --git a/tests/testthat/test-forecasters-basics.R b/tests/testthat/test-forecasters-basics.R
index c9d7a929..b0cc1da4 100644
--- a/tests/testthat/test-forecasters-basics.R
+++ b/tests/testthat/test-forecasters-basics.R
@@ -2,8 +2,10 @@ library(dplyr)
 # TODO better way to do this than copypasta
 forecasters <- list(
   c("scaled_pop", scaled_pop),
-  c("flatline_fc", flatline_fc)
+  c("flatline_fc", flatline_fc),
+  c("smoothed_scaled", smoothed_scaled)
 )
+forecaster <- c("scaled_pop", scaled_pop)
 for (forecaster in forecasters) {
   test_that(paste(forecaster[[1]], "gets the date and columns right"), {
     jhu <- epipredict::case_death_rate_subset %>%
diff --git a/tests/testthat/test-forecasters-data.R b/tests/testthat/test-forecasters-data.R
index 365811dd..4fc65f5d 100644
--- a/tests/testthat/test-forecasters-data.R
+++ b/tests/testthat/test-forecasters-data.R
@@ -6,8 +6,10 @@ forecasters <- tibble::tribble(
   ~forecaster, ~extra_params, ~extra_params_names, ~fc_name,
   scaled_pop, list(1, TRUE), list("ahead", "pop_scaling"), "scaled_pop",
   scaled_pop, list(1, FALSE), list("ahead", "pop_scaling"), "scaled_pop",
-  flatline_fc, list(1), list("ahead"), "flatline_fc"
+  flatline_fc, list(1), list("ahead"), "flatline_fc",
+  smoothed_scaled, list(1, list(c(0,7,14), c(0))), list("ahead", "lags"), "smoothed_scaled"
 )
+expects_nonequal <- c("scaled_pop", "smoothed_scaled")
 synth_mean <- 25
 synth_sd <- 2
 tiny_sd <- 1.0e-5
@@ -49,7 +51,7 @@ different_constants <- epiprocess::as_epi_archive(rbind(
 ))
 for (ii in 1:nrow(forecasters)) {
   test_that(paste(forecasters$fc_name[[ii]], " predicts a constant median for constant data"), {
-    if (forecasters$fc_name[[ii]] == "scaled_pop") {
+    if (any(forecasters$fc_name[[ii]] %in% expects_nonequal)) {
       suppressWarnings(expect_warning(res <- get_pred(different_constants, ii), regexp = "prediction from rank-deficient fit"))
     } else {
       res <- get_pred(different_constants, ii)
@@ -126,7 +128,7 @@ missing_state <- epiprocess::as_epi_archive(rbind(
 ))
 for (ii in seq_len(nrow(forecasters))) {
   test_that(paste(forecasters$fc_name[[ii]], "predicts well in the presence of only one state with variably delayed data"), {
-    if (forecasters$fc_name[[ii]] == "scaled_pop") {
+    if (any(forecasters$fc_name[[ii]] %in% expects_nonequal)) {
       suppressWarnings(expect_warning(res <- get_pred(missing_state, ii), regexp = "prediction from rank-deficient fit"))
     } else {
       res <- get_pred(missing_state, ii)
@@ -171,7 +173,7 @@ for (ii in seq_len(nrow(forecasters))) {
   test_that(paste(forecasters$fc_name[[ii]], "predicts a linear increasing slope correctly"), {
     # flatline will definitely fail this, so it's exempt
     if (!identical(forecasters$forecaster[[ii]], flatline_fc)) {
-      if (forecasters$fc_name[[ii]] == "scaled_pop") {
+      if (any(forecasters$fc_name[[ii]] %in% expects_nonequal)) {
         suppressWarnings(expect_warning(res <- get_pred(linear, ii), regexp = "prediction from rank-deficient fit"))
       } else {
         res <- get_pred(linear, ii)
diff --git a/tests/testthat/test-transforms.R b/tests/testthat/test-transforms.R
new file mode 100644
index 00000000..fdea75b2
--- /dev/null
+++ b/tests/testthat/test-transforms.R
@@ -0,0 +1,100 @@
+library(dplyr)
+n_days <- 40
+removed_date <- 10
+simple_dates <- seq(as.Date("2012-01-01"), by = "day", length.out = n_days)
+simple_dates <- simple_dates[-removed_date]
+rand_vals <- rnorm(n_days - 1)
+
+# Two states, with 2 variables. a is linear, going up in one state and down in the other
+# b is just random
+# note that day 10 is missing
+epi_data <- epiprocess::as_epi_df(rbind(tibble(
+  geo_value = "al",
+  time_value = simple_dates,
+  a = 1:(n_days - 1),
+  b = rand_vals
+), tibble(
+  geo_value = "ca",
+  time_value = simple_dates,
+  a = (n_days - 1):1,
+  b = rand_vals + 10
+)))
+test_that("rolling_mean generates correct mean", {
+  rolled <- rolling_mean(epi_data)
+  rolled
+  expect_equal(names(rolled), c("geo_value", "time_value", "a", "b", "a_m7", "b_m7"))
+  # hand specified rolling mean with a rear window of 7, noting that mean(1:7) = 4
+  linear_roll_mean <- c(seq(from = 1, by = .5, length.out = 7), seq(from = 5, to = 36, by = 1))
+  # day 10 is missing, so the average days 11-16 are thrown off, only using 6 values instead of 7
+  gap_starts <- epi_data %>% filter(geo_value == "al" & time_value == as.Date("2012-01-11")) %>% pull(a)
+  unusual_days <- map_vec(seq(from = 0, to = 5), \(d) mean(((gap_starts + d) - 0):((gap_starts + d) - 5)))
+  # stitching the lag induced hiccup into the "normal" mean values
+  expected_mean <- c(linear_roll_mean[1:9], unusual_days, linear_roll_mean[16:(n_days - 1)])
+  expected_mean
+
+  expect_equal(rolled %>% filter(geo_value == "al") %>% pull("a_m7"), expected_mean)
+  # Doing the same for California
+  # same, but "ca" is reversed, noting mean(40:(40-7)) =36.5
+  linear_reverse_roll_mean <- c(seq(from = 39, by = -0.5, length.out = 7), seq(from = 35, to = 4, by = -1))
+  # day 10 is missing, so days 11-16 are thrown off
+  gap_starts <- epi_data %>% filter(geo_value == "ca" & time_value == as.Date("2012-01-11")) %>% pull(a)
+  unusual_days <- map_vec(seq(from = 0, to = 5), \(d) mean(((gap_starts - d) + 0):((gap_starts - d) + 5)))
+  # stitching the lag induced hiccup into the "normal" mean values
+  expected_mean <- c(linear_reverse_roll_mean[1:9], unusual_days, linear_reverse_roll_mean[16:(n_days - 1)])
+  # actually testing
+  expect_equal(rolled %>% filter(geo_value == "ca") %>% pull("a_m7"), expected_mean)
+
+  # making sure the type is maintained
+  expect_true("epi_df" %in% class(rolled))
+})
+
+test_that("rolling_sd generates correct standard deviation", {
+  rolled <- rolling_sd(epi_data, keep_mean = TRUE)
+  expect_equal(names(rolled), c("geo_value", "time_value", "a", "b", "a_m14", "a_sd28", "b_m14", "b_sd28"))
+  # hand specified rolling mean with a rear window of 7, noting that mean(1:14) = 7.5
+  linear_roll_mean <- c(seq(from = 1, to = 7, by = .5), seq(from = 8, to = 16, by = 1), seq(from = 16.5, to = 32.5, by = 1))
+##   linear_roll_mean <- c(seq(from = 1, by = .5, length.out = 14), seq(from = 8.5, to = 32.5, by = 1))
+##   gap_starts <- epi_data %>% filter(geo_value == "al" & time_value == as.Date("2012-01-11")) %>% pull(a)
+##   unusual_days <- map_vec(seq(from = 0, to = 5), \(d) mean(((gap_starts + d) - 0):max((gap_starts + d) - 14, 1)))
+##   map(seq(from = 0, to = 5), \(d) mean(((gap_starts + d) - 0):max((gap_starts + d) - 13, 1)))
+##   linear_roll_mean
+## rolled %>% filter(geo_value == "al") %>% pull("a_m14")
+  expect_equal(rolled %>% filter(geo_value == "al") %>% pull("a_m14"), linear_roll_mean)
+  # and the standard deviation is
+  linear_roll_mean <- append(linear_roll_mean, NA, after = removed_date - 1)
+  linear_values <- 1:39
+  linear_values <- append(linear_values, NA, after = removed_date - 1)
+  linear_roll_sd <- sqrt(slider::slide_dbl((linear_values - linear_roll_mean)^2, \(x) mean(x, na.rm = TRUE), .before = 28 - 1))
+  # drop the extra date caused by the inclusion of the NAs
+  linear_roll_sd <- linear_roll_sd[-(removed_date)]
+  expect_equal(rolled %>% filter(geo_value == "al") %>% pull("a_sd28"), linear_roll_sd)
+  # even though ca is reversed, the changes are all the same, so the standard deviation is *exactly* the same values
+  expect_equal(rolled %>% filter(geo_value == "ca") %>% pull("a_sd28"), linear_roll_sd)
+  # doesn't break types
+  expect_true("epi_df" %in% class(rolled))
+})
+
+test_that("get_trainable_names pulls out mean and sd columns", {
+  rolled <- rolling_sd(epi_data, keep_mean = TRUE)
+  expect_equal(names(rolled), c("geo_value", "time_value", "a", "b", "a_m14", "a_sd28", "b_m14", "b_sd28"))
+  expect_equal(get_trainable_names(rolled, NULL), c("a", "b"))
+})
+# TODO example with NA's, example with missing days, only one column, keep_mean
+
+test_that("update_predictors keeps unmodified predictors", {
+  epi_data["c"] <- NaN
+  epi_data["d"] <- NaN
+  epi_data["b_m14"] <- NaN
+  epi_data["b_sd28"] <- NaN
+  predictors <- c("a", "b", "c") # everything but d
+  modified <- c("b", "c") # we want to exclude b but not its modified versions
+  expected_predictors <- c("a", "b_m14", "b_sd28")
+  expect_equal(update_predictors(epi_data, modified, predictors), expected_predictors)
+  expected_if_all_modified <- c("b_m14", "b_sd28")
+  expect_equal(update_predictors(epi_data, NULL, predictors), expected_if_all_modified)
+})
+
+test_that("rolling_sd doesn't keep the mean columns by default", {
+  rolled <- rolling_sd(epi_data)
+  expect_equal(names(rolled), c("geo_value", "time_value", "a", "b", "a_sd28", "b_sd28"))
+})