@@ -65,12 +65,14 @@ ca <- ca %>%
65
65
select(-pop)
66
66
```
67
67
68
- Now, use ` epi_slide ()` , to calculate trailing 7 day averages of cases and deaths.
68
+ Now, use ` epi_slide_mean ()` , to calculate trailing 7 day averages of cases and deaths.
69
69
70
70
``` {r trailing-averages}
71
71
ca <- ca %>%
72
- epi_slide(cases = mean(cases), before = 6) %>%
73
- epi_slide(deaths = mean(deaths), before = 6)
72
+ epi_slide_mean(c(cases, deaths), .window_size = 7) %>%
73
+ epi_slide_mean(deaths, .window_size = 7) %>%
74
+ select(-cases, -deaths) %>%
75
+ rename(cases = cases_7dav, deaths = deaths_7dav)
74
76
```
75
77
76
78
Visualize the data.
@@ -222,13 +224,13 @@ MASE <- function(truth, prediction) {
222
224
```
223
225
224
226
``` {r training-error}
225
- pred_train <- predict(reg_lagged)
226
- train$pred_train <- c(rep(NA, k), pred_train)
227
+ pred_train <- predict(reg_lagged, train )
228
+ train$pred_train <- pred_train
227
229
228
- errors <- data.frame("MSE" = MSE(train$deaths[-(1:k )], pred_train),
229
- "MAE"= MAE(train$deaths[-(1:k )], pred_train),
230
- "MAPE" = MAPE(train$deaths[-(1:k )], pred_train),
231
- "MASE" = MASE(train$deaths[-(1:k )], pred_train),
230
+ errors <- data.frame("MSE" = MSE(train$deaths[-seq_len(7+k-1 )], pred_train[-seq_len(7+k-1)] ),
231
+ "MAE"= MAE(train$deaths[-seq_len(7+k-1 )], pred_train[-seq_len(7+k-1)] ),
232
+ "MAPE" = MAPE(train$deaths[-seq_len(7+k-1 )], pred_train[-seq_len(7+k-1)] ),
233
+ "MASE" = MASE(train$deaths[-seq_len(7+k-1 )], pred_train[-seq_len(7+k-1)] ),
232
234
row.names = "training")
233
235
errors
234
236
```
@@ -411,15 +413,17 @@ epi_pred_cv_trailing <- epi_slide(
411
413
trainer = linear_reg() %>% set_engine("lm"),
412
414
args_list = arx_args_list(lags = k-1, ahead = 1L)
413
415
)$predictions,
414
- # notice that `before ` is not simply equal to w-1 . That's because previously,
415
- # when considering a window from t to t+w, we had access to y_t, ..., y_{t+w}
416
- # and also to x_{t-k}, ..., x_{t+w-k}. (That's because of how we structured
416
+ # notice that `.window_size ` is not simply equal to w. That's because previously,
417
+ # when considering a window from t to t+w-1 , we had access to y_t, ..., y_{t+w-1 }
418
+ # and also to x_{t-k}, ..., x_{t+w-1- k}. (That's because of how we structured
417
419
# the dataframe after manually lagging x.) So we were cheating by saying that
418
- # the trailing window had length w, as its actual size was w+k!
419
- before = (w+k-1),
420
- ref_time_values = fc_time_values,
421
- new_col_name = "fc"
422
- )
420
+ # the trailing window had length w, as its actual size was (t+w-1)-(t-k)+1 = w+k!
421
+ .window_size = w + k,
422
+ .ref_time_values = fc_time_values,
423
+ .new_col_name = "fc"
424
+ ) |>
425
+ # split tibble-type column `fc` into multiple columns with names prefixed by `fc_`:
426
+ unpack(fc, names_sep = "_")
423
427
424
428
# they match exactly
425
429
head(epi_pred_cv_trailing %>% select(fc_.pred, fc_target_date))
@@ -441,8 +445,9 @@ epi_pred_cv <- epi_slide(
441
445
)$predictions,
442
446
before = Inf,
443
447
ref_time_values = fc_time_values,
444
- new_col_name = "fc"
445
- )
448
+ .new_col_name = "fc"
449
+ ) |>
450
+ unpack(fc, names_sep = "_")
446
451
447
452
# they match exactly
448
453
head(epi_pred_cv %>% select(fc_.pred, fc_target_date))
@@ -457,13 +462,13 @@ pred_all_past <- rep(NA, length = n - t0)
457
462
n_ahead <- 7
458
463
459
464
for (t in (t0+1):n) {
460
- reg_all_past = lm(deaths ~ lagged_cases, data = ca,
461
- subset = (1:n) <= (t-n_ahead))
462
- pred_all_past[t-t0] = predict(reg_all_past, newdata = data.frame(ca[t, ]))
465
+ reg_all_past = lm(deaths ~ lagged_cases, data = ca,
466
+ subset = (1:n) <= (t-n_ahead))
467
+ pred_all_past[t-t0] = predict(reg_all_past, newdata = data.frame(ca[t, ]))
463
468
}
464
469
465
470
test$pred_cv_7 <- pred_all_past
466
-
471
+
467
472
468
473
fc_time_values_7 <- seq(
469
474
from = as.Date("2021-02-23"),
@@ -481,8 +486,9 @@ epi_pred_cv_7 <- epi_slide(
481
486
)$predictions,
482
487
before = Inf,
483
488
ref_time_values = fc_time_values_7,
484
- new_col_name = "fc"
485
- )
489
+ .new_col_name = "fc"
490
+ ) |>
491
+ unpack(fc, names_sep = "_")
486
492
487
493
# they match
488
494
head(epi_pred_cv_7 %>% select(fc_.pred, fc_target_date))
@@ -721,8 +727,9 @@ ar_all_past <- epi_slide(
721
727
)$predictions,
722
728
before = Inf,
723
729
ref_time_values = fc_time_values,
724
- new_col_name = "all_past"
725
- )
730
+ .new_col_name = "all_past"
731
+ ) |>
732
+ unpack(all_past, names_sep = "_")
726
733
727
734
ar_trailing <- epi_slide(
728
735
ca,
@@ -734,8 +741,9 @@ ar_trailing <- epi_slide(
734
741
)$predictions,
735
742
before = w,
736
743
ref_time_values = fc_time_values,
737
- new_col_name = "trailing"
738
- )
744
+ .new_col_name = "trailing"
745
+ ) |>
746
+ unpack(trailing, names_sep = "_")
739
747
```
740
748
741
749
``` {r plot-ar-predictions}
@@ -946,8 +954,9 @@ arx_all_past <- epi_slide(
946
954
)$predictions,
947
955
before = Inf,
948
956
ref_time_values = fc_time_values,
949
- new_col_name = "all_past"
950
- )
957
+ .new_col_name = "all_past"
958
+ ) |>
959
+ unpack(all_past, names_sep = "_")
951
960
952
961
arx_trailing <- epi_slide(
953
962
ca,
@@ -960,8 +969,9 @@ arx_trailing <- epi_slide(
960
969
)$predictions,
961
970
before = (w+k-1),
962
971
ref_time_values = fc_time_values,
963
- new_col_name = "trailing"
964
- )
972
+ .new_col_name = "trailing"
973
+ ) |>
974
+ unpack(trailing, names_sep = "_")
965
975
966
976
967
977
```
@@ -1546,12 +1556,8 @@ data_archive <- data_archive %>%
1546
1556
ref_time_values = fc_time_values,
1547
1557
function(x, gk, rtv) {
1548
1558
x %>%
1549
- #group_by(geo_value) %>%
1550
- epi_slide_mean(case_rate, before = 6L) %>%
1551
- epi_slide_mean(death_rate, before = 6L) %>%
1552
- #ungroup() %>%
1553
- rename(case_rate_7d_av = slide_value_case_rate,
1554
- death_rate_7d_av = slide_value_death_rate)
1559
+ epi_slide_mean(case_rate, .window_size = 7L, .suffix = "_7d_av") %>%
1560
+ epi_slide_mean(death_rate, .window_size = 7L, .suffix = "_7d_av")
1555
1561
}
1556
1562
) %>%
1557
1563
rename(version = time_value) %>%
0 commit comments