Skip to content

Commit 601abb3

Browse files
committed
first draft of extend_ahead
1 parent 6b19bf0 commit 601abb3

File tree

6 files changed

+165
-5
lines changed

6 files changed

+165
-5
lines changed

R/step_epi_shift.R

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@
2020
#' @param lag,ahead A vector of integers. Each specified column will
2121
#' be the lag or lead for each value in the vector. Lag integers must be
2222
#' nonnegative, while ahead integers must be positive.
23+
#' @param latency_adjustment a character. Determines the method by which the forecast handles data that doesn't extend to the day the forecast is made. The options are:
24+
#' - `"extend_ahead"`: actually forecasts from the last date. E.g. if there are 3 days of latency for a 4 day ahead forecast, the ahead used in practice is actually 7.
25+
#' - `"locf"`: carries forward the last observed value up to the forecast date.
26+
#' - `"extend_lags"`: per `epi_key` and `predictor`, adjusts the lag so that the shortest lag at predict time is
2327
#' @param prefix A prefix to indicate what type of variable this is
2428
#' @param default Determines what fills empty rows
2529
#' left by leading/lagging (defaults to NA).
@@ -60,6 +64,12 @@ step_epi_lag <-
6064
default = NA,
6165
columns = NULL,
6266
skip = FALSE,
67+
latency_adjustment = c(
68+
"None",
69+
"extend_ahead",
70+
"locf",
71+
"extend_lags"
72+
),
6373
id = rand_id("epi_lag")) {
6474
if (!is_epi_recipe(recipe)) {
6575
rlang::abort("This recipe step can only operate on an `epi_recipe`.")
@@ -72,8 +82,9 @@ step_epi_lag <-
7282
)
7383
)
7484
}
85+
latency_adjustment <- rlang::arg_match(latency_adjustment)
7586
arg_is_nonneg_int(lag)
76-
arg_is_chr_scalar(prefix, id)
87+
arg_is_chr_scalar(prefix, id, latency_adjustment)
7788
if (!is.null(columns)) {
7889
rlang::abort(c("The `columns` argument must be `NULL.",
7990
i = "Use `tidyselect` methods to choose columns to lag."
@@ -89,6 +100,7 @@ step_epi_lag <-
89100
prefix = prefix,
90101
default = default,
91102
keys = epi_keys(recipe),
103+
latency_adjustment = latency_adjustment,
92104
columns = columns,
93105
skip = skip,
94106
id = id
@@ -109,6 +121,12 @@ step_epi_ahead <-
109121
ahead,
110122
prefix = "ahead_",
111123
default = NA,
124+
latency_adjustment = c(
125+
"None",
126+
"extend_ahead",
127+
"locf",
128+
"extend_lags"
129+
),
112130
columns = NULL,
113131
skip = FALSE,
114132
id = rand_id("epi_ahead")) {
@@ -123,8 +141,9 @@ step_epi_ahead <-
123141
)
124142
)
125143
}
144+
latency_adjustment <- rlang::arg_match(latency_adjustment)
126145
arg_is_nonneg_int(ahead)
127-
arg_is_chr_scalar(prefix, id)
146+
arg_is_chr_scalar(prefix, id, latency_adjustment)
128147
if (!is.null(columns)) {
129148
rlang::abort(c("The `columns` argument must be `NULL.",
130149
i = "Use `tidyselect` methods to choose columns to lead."
@@ -140,6 +159,7 @@ step_epi_ahead <-
140159
prefix = prefix,
141160
default = default,
142161
keys = epi_keys(recipe),
162+
latency_adjustment = latency_adjustment,
143163
columns = columns,
144164
skip = skip,
145165
id = id
@@ -150,7 +170,7 @@ step_epi_ahead <-
150170

151171
step_epi_lag_new <-
152172
function(terms, role, trained, lag, prefix, default, keys,
153-
columns, skip, id) {
173+
latency_adjustment, columns, skip, id) {
154174
step(
155175
subclass = "epi_lag",
156176
terms = terms,
@@ -160,14 +180,15 @@ step_epi_lag_new <-
160180
prefix = prefix,
161181
default = default,
162182
keys = keys,
183+
latency_adjustment = latency_adjustment,
163184
columns = columns,
164185
skip = skip,
165186
id = id
166187
)
167188
}
168189

169190
step_epi_ahead_new <-
170-
function(terms, role, trained, ahead, prefix, default, keys,
191+
function(terms, role, trained, ahead, prefix, default, keys, latency_adjustment,
171192
columns, skip, id) {
172193
step(
173194
subclass = "epi_ahead",
@@ -177,6 +198,7 @@ step_epi_ahead_new <-
177198
ahead = ahead,
178199
prefix = prefix,
179200
default = default,
201+
latency_adjustment = latency_adjustment,
180202
keys = keys,
181203
columns = columns,
182204
skip = skip,
@@ -196,6 +218,7 @@ prep.step_epi_lag <- function(x, training, info = NULL, ...) {
196218
prefix = x$prefix,
197219
default = x$default,
198220
keys = x$keys,
221+
latency_adjustment = x$latency_adjustment,
199222
columns = recipes_eval_select(x$terms, training, info),
200223
skip = x$skip,
201224
id = x$id
@@ -212,6 +235,7 @@ prep.step_epi_ahead <- function(x, training, info = NULL, ...) {
212235
prefix = x$prefix,
213236
default = x$default,
214237
keys = x$keys,
238+
latency_adjustment = x$latency_adjustment,
215239
columns = recipes_eval_select(x$terms, training, info),
216240
skip = x$skip,
217241
id = x$id
@@ -257,7 +281,8 @@ bake.step_epi_lag <- function(object, new_data, ...) {
257281

258282
#' @export
259283
bake.step_epi_ahead <- function(object, new_data, ...) {
260-
grid <- tidyr::expand_grid(col = object$columns, ahead = object$ahead) %>%
284+
ahead <- adjust_latency(object, new_data)
285+
grid <- tidyr::expand_grid(col = object$columns, ahead = ahead) %>%
261286
dplyr::mutate(
262287
newname = glue::glue("{object$prefix}{ahead}_{col}"),
263288
shift_val = -ahead,

R/utils-shift.R

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
#' various ways of handling differences between the `as_of` date and the maximum
2+
#' time value
3+
#' @description
4+
#' adjust the ahead so that we will be predicting `ahead` days after the `as_of`
5+
#' date, rather than relative to the last day of data
6+
#' @keywords internal
7+
adjust_latency <- function(object, new_data) {
8+
method <- object$latency_adjustment
9+
ahead <- object$ahead
10+
if (is.na(method) || is.null(method) || method == "None") {
11+
return(object$ahead)
12+
} else if (method == "extend_ahead") {
13+
as_of <- attributes(new_data)$metadata$as_of
14+
if (FALSE && (typeof(as_of) != typeof(new_data$time_value))) {
15+
rlang::abort(glue::glue(
16+
"the data matrix `as_of` value is {as_of}, ",
17+
"and not a valid `time_type` with type ",
18+
"matching `time_value`'s type of ",
19+
"{typeof(new_data$time_value)}."
20+
))
21+
}
22+
# adjust the ahead so that we're predicting relative to the as_of date,
23+
# rather
24+
# than the last day of data
25+
time_values <- new_data$time_value
26+
if (length(time_values) > 0) {
27+
max_time <- max(time_values)
28+
shift_amount <- as.Date(as_of) - max_time
29+
if (is.null(as_of) || is.na(as_of)) {
30+
cli::cli_warn(glue::glue(
31+
"epi_data's `as_of` was {as_of}, setting to ",
32+
"the latest time value, {max_time}."
33+
))
34+
as_of <- max_time
35+
} else if (as_of < max_time) {
36+
cli::cli_abort(glue::glue(
37+
"`as_of` ({(as_of)}) is before the most ",
38+
"recent data ({max_time}). Remove before ",
39+
"predicting."
40+
))
41+
}
42+
effective_ahead <- as.integer(shift_amount + ahead)
43+
time_type <- attributes(new_data)$metadata$time_type
44+
45+
if ((grepl("day", time_type) && (shift_amount >= 10)) ||
46+
(grepl("week", time_type) && (shift_amount >= 4))||
47+
((time_type == "yearmonth") && (shift_amount >=2)) ||
48+
((time_type == "yearquarter") && (shift_amount >= 1)) ||
49+
((time_type == "year") && (shift_amount >= 1))) {
50+
cli::cli_warn(c(
51+
"!" = glue::glue("The ahead has been adjusted by {shift_amount}, ",
52+
"which is questionable for it's `time_type` of ",
53+
"{time_type}"),
54+
"i" = "input ahead: {ahead}",
55+
"i" = "shifted ahead: {effective_ahead}",
56+
"i" = "max_time = {max_time} -> as_of = {as_of}"
57+
))
58+
}
59+
return(effective_ahead)
60+
} else {
61+
rlang::abort("the `time_value` column of `new_data` is empty")
62+
}
63+
} else {
64+
rlang::abort(glue::glue(
65+
"Latency adjustment method {method} has not yet ",
66+
"been implemented for `step_epi_ahead`."
67+
))
68+
}
69+
}

man/adjust_latency.Rd

Lines changed: 14 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/step_epi_shift.Rd

Lines changed: 9 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/_snaps/utils-shift.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# extend_ahead warns in case of extreme adjustment
2+
3+
Code
4+
adjust_latency(object, x_adjust_ahead)
5+
Condition
6+
Warning:
7+
! The ahead has been adjusted by 100, which is questionable for it's `time_type` of day
8+
i input ahead: 7
9+
i shifted ahead: 107
10+
i max_time = 2021-07-19 -> as_of = 2021-10-27
11+
Output
12+
[1] 107
13+

tests/testthat/test-utils-shift.R

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
time_range <- as.Date("2021-01-01") + 0:199
2+
x_adjust_ahead <- tibble(
3+
geo_value = rep("place", 200),
4+
time_value = time_range,
5+
case_rate = sqrt(1:200) + atan(0.1 * 1:200) + sin(5 * 1:200) + 1,
6+
death_rate = atan(0.1 * 1:200) + cos(5 * 1:200) + 1
7+
) %>%
8+
as_epi_df(as_of = max(time_range) + 3)
9+
# confirm the delay is right
10+
11+
test_that("adjust_latency extend_ahead works", {
12+
# testing that POSIXct converts correctly (as well as basic types)
13+
expect_equal(
14+
attributes(x_adjust_ahead)$metadata$as_of - max(x_adjust_ahead$time_value),
15+
as.difftime(3, units = "days")
16+
)
17+
object <- list(latency_adjustment = "extend_ahead", ahead = 7)
18+
expect_no_error(adjusted_ahead <- adjust_latency(object, x_adjust_ahead))
19+
expect_type(adjusted_ahead, "integer")
20+
expect_equal(adjusted_ahead, 3 + 7)
21+
})
22+
23+
test_that("extend_ahead warns in case of extreme adjustment", {
24+
# warns if the ahead is relatively small
25+
attributes(x_adjust_ahead)$metadata$as_of <-
26+
max(x_adjust_ahead$time_value) + 100
27+
object <- list(latency_adjustment = "extend_ahead", ahead = 7)
28+
attributes(x_adjust_ahead)$metadata$time_type
29+
expect_snapshot(adjust_latency(object, x_adjust_ahead))
30+
})

0 commit comments

Comments
 (0)