Skip to content

Commit ac83e78

Browse files
authored
Merge pull request #51 from cmu-delphi/lcb/epiprocess-0.10.0
Update code for breaking changes in epiprocess through v0.10
2 parents 55d7c50 + a3e7ae0 commit ac83e78

File tree

5 files changed

+60
-63
lines changed

5 files changed

+60
-63
lines changed

_casestudies/forecast-covid/forecast-covid.Rmd

Lines changed: 45 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -65,12 +65,14 @@ ca <- ca %>%
6565
select(-pop)
6666
```
6767

68-
Now, use `epi_slide()`, to calculate trailing 7 day averages of cases and deaths.
68+
Now, use `epi_slide_mean()`, to calculate trailing 7 day averages of cases and deaths.
6969

7070
```{r trailing-averages}
7171
ca <- ca %>%
72-
epi_slide(cases = mean(cases), before = 6) %>%
73-
epi_slide(deaths = mean(deaths), before = 6)
72+
epi_slide_mean(c(cases, deaths), .window_size = 7) %>%
73+
epi_slide_mean(deaths, .window_size = 7) %>%
74+
select(-cases, -deaths) %>%
75+
rename(cases = cases_7dav, deaths = deaths_7dav)
7476
```
7577

7678
Visualize the data.
@@ -222,13 +224,13 @@ MASE <- function(truth, prediction) {
222224
```
223225

224226
```{r training-error}
225-
pred_train <- predict(reg_lagged)
226-
train$pred_train <- c(rep(NA, k), pred_train)
227+
pred_train <- predict(reg_lagged, train)
228+
train$pred_train <- pred_train
227229
228-
errors <- data.frame("MSE" = MSE(train$deaths[-(1:k)], pred_train),
229-
"MAE"= MAE(train$deaths[-(1:k)], pred_train),
230-
"MAPE" = MAPE(train$deaths[-(1:k)], pred_train),
231-
"MASE" = MASE(train$deaths[-(1:k)], pred_train),
230+
errors <- data.frame("MSE" = MSE(train$deaths[-seq_len(7+k-1)], pred_train[-seq_len(7+k-1)]),
231+
"MAE"= MAE(train$deaths[-seq_len(7+k-1)], pred_train[-seq_len(7+k-1)]),
232+
"MAPE" = MAPE(train$deaths[-seq_len(7+k-1)], pred_train[-seq_len(7+k-1)]),
233+
"MASE" = MASE(train$deaths[-seq_len(7+k-1)], pred_train[-seq_len(7+k-1)]),
232234
row.names = "training")
233235
errors
234236
```
@@ -411,15 +413,17 @@ epi_pred_cv_trailing <- epi_slide(
411413
trainer = linear_reg() %>% set_engine("lm"),
412414
args_list = arx_args_list(lags = k-1, ahead = 1L)
413415
)$predictions,
414-
# notice that `before` is not simply equal to w-1. That's because previously,
415-
# when considering a window from t to t+w, we had access to y_t, ..., y_{t+w}
416-
# and also to x_{t-k}, ..., x_{t+w-k}. (That's because of how we structured
416+
# notice that `.window_size` is not simply equal to w. That's because previously,
417+
# when considering a window from t to t+w-1, we had access to y_t, ..., y_{t+w-1}
418+
# and also to x_{t-k}, ..., x_{t+w-1-k}. (That's because of how we structured
417419
# the dataframe after manually lagging x.) So we were cheating by saying that
418-
# the trailing window had length w, as its actual size was w+k!
419-
before = (w+k-1),
420-
ref_time_values = fc_time_values,
421-
new_col_name = "fc"
422-
)
420+
# the trailing window had length w, as its actual size was (t+w-1)-(t-k)+1 = w+k!
421+
.window_size = w + k,
422+
.ref_time_values = fc_time_values,
423+
.new_col_name = "fc"
424+
) |>
425+
# split tibble-type column `fc` into multiple columns with names prefixed by `fc_`:
426+
unpack(fc, names_sep = "_")
423427
424428
# they match exactly
425429
head(epi_pred_cv_trailing %>% select(fc_.pred, fc_target_date))
@@ -441,8 +445,9 @@ epi_pred_cv <- epi_slide(
441445
)$predictions,
442446
before = Inf,
443447
ref_time_values = fc_time_values,
444-
new_col_name = "fc"
445-
)
448+
.new_col_name = "fc"
449+
) |>
450+
unpack(fc, names_sep = "_")
446451
447452
# they match exactly
448453
head(epi_pred_cv %>% select(fc_.pred, fc_target_date))
@@ -457,13 +462,13 @@ pred_all_past <- rep(NA, length = n - t0)
457462
n_ahead <- 7
458463
459464
for (t in (t0+1):n) {
460-
reg_all_past = lm(deaths ~ lagged_cases, data = ca,
461-
subset = (1:n) <= (t-n_ahead))
462-
pred_all_past[t-t0] = predict(reg_all_past, newdata = data.frame(ca[t, ]))
465+
reg_all_past = lm(deaths ~ lagged_cases, data = ca,
466+
subset = (1:n) <= (t-n_ahead))
467+
pred_all_past[t-t0] = predict(reg_all_past, newdata = data.frame(ca[t, ]))
463468
}
464469
465470
test$pred_cv_7 <- pred_all_past
466-
471+
467472
468473
fc_time_values_7 <- seq(
469474
from = as.Date("2021-02-23"),
@@ -481,8 +486,9 @@ epi_pred_cv_7 <- epi_slide(
481486
)$predictions,
482487
before = Inf,
483488
ref_time_values = fc_time_values_7,
484-
new_col_name = "fc"
485-
)
489+
.new_col_name = "fc"
490+
) |>
491+
unpack(fc, names_sep = "_")
486492
487493
# they match
488494
head(epi_pred_cv_7 %>% select(fc_.pred, fc_target_date))
@@ -721,8 +727,9 @@ ar_all_past <- epi_slide(
721727
)$predictions,
722728
before = Inf,
723729
ref_time_values = fc_time_values,
724-
new_col_name = "all_past"
725-
)
730+
.new_col_name = "all_past"
731+
) |>
732+
unpack(all_past, names_sep = "_")
726733
727734
ar_trailing <- epi_slide(
728735
ca,
@@ -734,8 +741,9 @@ ar_trailing <- epi_slide(
734741
)$predictions,
735742
before = w,
736743
ref_time_values = fc_time_values,
737-
new_col_name = "trailing"
738-
)
744+
.new_col_name = "trailing"
745+
) |>
746+
unpack(trailing, names_sep = "_")
739747
```
740748

741749
```{r plot-ar-predictions}
@@ -946,8 +954,9 @@ arx_all_past <- epi_slide(
946954
)$predictions,
947955
before = Inf,
948956
ref_time_values = fc_time_values,
949-
new_col_name = "all_past"
950-
)
957+
.new_col_name = "all_past"
958+
) |>
959+
unpack(all_past, names_sep = "_")
951960
952961
arx_trailing <- epi_slide(
953962
ca,
@@ -960,8 +969,9 @@ arx_trailing <- epi_slide(
960969
)$predictions,
961970
before = (w+k-1),
962971
ref_time_values = fc_time_values,
963-
new_col_name = "trailing"
964-
)
972+
.new_col_name = "trailing"
973+
) |>
974+
unpack(trailing, names_sep = "_")
965975
966976
967977
```
@@ -1546,12 +1556,8 @@ data_archive <- data_archive %>%
15461556
ref_time_values = fc_time_values,
15471557
function(x, gk, rtv) {
15481558
x %>%
1549-
#group_by(geo_value) %>%
1550-
epi_slide_mean(case_rate, before = 6L) %>%
1551-
epi_slide_mean(death_rate, before = 6L) %>%
1552-
#ungroup() %>%
1553-
rename(case_rate_7d_av = slide_value_case_rate,
1554-
death_rate_7d_av = slide_value_death_rate)
1559+
epi_slide_mean(case_rate, .window_size = 7L, .suffix = "_7d_av") %>%
1560+
epi_slide_mean(death_rate, .window_size = 7L, .suffix = "_7d_av")
15551561
}
15561562
) %>%
15571563
rename(version = time_value) %>%

_code/versioned_data.R

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,9 @@ if (!file.exists(file_path_1) | !file.exists(file_path_2)) {
5050
function(x, gk, rtv) {
5151
x |>
5252
group_by(geo_value) |>
53-
epi_slide_mean(case_rate, .window_size = 7L) |>
54-
epi_slide_mean(death_rate, .window_size = 7L) |>
55-
ungroup() |>
56-
rename(case_rate_7d_av = slide_value_case_rate,
57-
death_rate_7d_av = slide_value_death_rate)
53+
epi_slide_mean(case_rate, .window_size = 7L, .suffix = "_7d_av") |>
54+
epi_slide_mean(death_rate, .window_size = 7L, .suffix = "_7d_av") |>
55+
ungroup()
5856
}
5957
) |>
6058
rename(
@@ -74,11 +72,9 @@ if (!file.exists(file_path_1) | !file.exists(file_path_2)) {
7472
function(x, gk, rtv) {
7573
x |>
7674
group_by(geo_value) |>
77-
epi_slide_mean(death_rate, .window_size = 7L) |>
78-
epi_slide_mean(dv, .window_size = 7L) |>
79-
ungroup() |>
80-
rename(death_rate_7d_av = slide_value_death_rate,
81-
dv_7d_av = slide_value_dv
75+
epi_slide_mean(death_rate, .window_size = 7L, .suffix = "_7d_av") |>
76+
epi_slide_mean(dv, .window_size = 7L, .suffix = "_7d_av") |>
77+
ungroup()
8278
)
8379
}
8480
) |>

_code/weekly_hhs.R

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,8 @@ if (!file.exists(file_path)) {
110110
agg_columns,
111111
.window_size = 7L,
112112
na.rm = TRUE,
113-
.ref_time_values = valid_slide_days
113+
.ref_time_values = valid_slide_days,
114+
.prefix = "slide_value_"
114115
) %>%
115116
select(-all_of(agg_columns)) %>%
116117
rename_with(~ gsub("slide_value_", "", .x)) %>%

slides/day1-afternoon.qmd

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -580,7 +580,7 @@ case_rates_df <- case_rates_df |>
580580
as_epi_df(as_of = as.Date("2024-01-01")) |>
581581
group_by(geo_value) |>
582582
epi_slide_mean(scaled_cases, .window_size = 14, na.rm = TRUE) |>
583-
rename(smoothed_scaled_cases = slide_value_scaled_cases)
583+
rename(smoothed_scaled_cases = scaled_cases_14dav)
584584
head(case_rates_df)
585585
```
586586

@@ -2072,10 +2072,8 @@ nowcast_res <- archive |>
20722072
epix_slide(
20732073
.f = lm_mod_pred,
20742074
.before = 14, # 14-day training period
2075-
.versions = ref_time_values,
2076-
.new_col_name = "res"
2075+
.versions = ref_time_values
20772076
) |>
2078-
unnest() |> # Nesting creates a list-column of data frames; unnesting flattens it back out into regular columns.
20792077
mutate(targeted_nowcast_date = targeted_nowcast_dates, time_value = actual_nowcast_date) |>
20802078
ungroup()
20812079
@@ -2462,10 +2460,8 @@ nowcast_res <- archive |>
24622460
epix_slide(
24632461
.f = lm_mod_pred, # Pass the function defined above
24642462
.before = 30, # Training period of 30 days
2465-
.versions = ref_time_values, # Determines the day where training data goes up to (not inclusive)
2466-
.new_col_name = "res"
2463+
.versions = ref_time_values # Determines the day where training data goes up to (not inclusive)
24672464
) |>
2468-
unnest() |>
24692465
mutate(targeted_nowcast_date = targeted_nowcast_dates,
24702466
time_value = actual_nowcast_date)
24712467
@@ -2620,4 +2616,4 @@ cowplot::plot_grid(p1, p2)
26202616

26212617
* Group built [`{rtestim}`](https://dajmcdon.github.io/rtestim) doing for this nonparametrically.
26222618

2623-
* We may come back to this later...
2619+
* We may come back to this later...

slides/day2-morning.qmd

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1802,11 +1802,9 @@ ca_archive <- ca_archive |>
18021802
function(x, gk, rtv) {
18031803
x |>
18041804
group_by(geo_value) |>
1805-
epi_slide_mean(case_rate, .window_size = 7L) |>
1806-
epi_slide_mean(death_rate, .window_size = 7L) |>
1807-
ungroup() |>
1808-
rename(case_rate_7d_av = slide_value_case_rate,
1809-
death_rate_7d_av = slide_value_death_rate)
1805+
epi_slide_mean(case_rate, .window_size = 7L, .suffix = "_7d_av") |>
1806+
epi_slide_mean(death_rate, .window_size = 7L, .suffix = "_7d_av") |>
1807+
ungroup()
18101808
}
18111809
) |>
18121810
rename(

0 commit comments

Comments
 (0)