Skip to content

Commit f2ce813

Browse files
committed
simplify, adjust some text
1 parent 0d59728 commit f2ce813

File tree

1 file changed

+41
-22
lines changed

1 file changed

+41
-22
lines changed

vignettes/articles/sliding.Rmd

Lines changed: 41 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ claims and the number of new confirmed COVID-19 cases per 100,000 population
5858

5959
<details>
6060

61+
<summary>Load a data archive</summary>
62+
6163
We process as before, with the
6264
modification that we use `sync = locf` in `epix_merge()` so that the last
6365
version of each observation can be carried forward to extrapolate unavailable
@@ -89,7 +91,7 @@ Note that all of the warnings about the forecast date being less than the most
8991
recent update date of the data have been suppressed to avoid cluttering the
9092
output.
9193

92-
```{r make-arx-kweek, warning = FALSE}
94+
```{r arx-kweek-preliminaries, warning = FALSE}
9395
# Latest snapshot of data, and forecast dates
9496
x_latest <- epix_as_of(x, max_version = max(x$versions_end))
9597
fc_time_values <- seq(
@@ -105,8 +107,7 @@ k_week_ahead <- function(epi_df, outcome, predictors, ahead = 7, engine) {
105107
~ arx_forecaster(
106108
.x, outcome, predictors, engine,
107109
args_list = arx_args_list(ahead = ahead)
108-
) %>%
109-
extract2("predictions") %>%
110+
)$predictions %>%
110111
select(-geo_value),
111112
before = 120 - 1,
112113
ref_time_values = fc_time_values,
@@ -115,7 +116,9 @@ k_week_ahead <- function(epi_df, outcome, predictors, ahead = 7, engine) {
115116
select(geo_value, time_value, starts_with("fc")) %>%
116117
mutate(engine_type = engine$engine)
117118
}
119+
```
118120

121+
```{r make-arx-kweek}
119122
# Generate the forecasts and bind them together
120123
fc <- bind_rows(
121124
map(
@@ -146,11 +149,14 @@ rates. Note that even though we've fitted the model on all states, we'll just
146149
display the results for two states, California (CA) and Florida (FL), to get a
147150
sense of the model performance while keeping the graphic simple.
148151

149-
```{r plot-arx, message = FALSE, warning = FALSE, fig.width = 9, fig.height = 6}
152+
<details>
153+
154+
<summary>Code for plotting</summary>
155+
```{r plot-arx, message = FALSE, warning = FALSE}
150156
fc_cafl <- fc %>% filter(geo_value %in% c("ca", "fl"))
151157
x_latest_cafl <- x_latest %>% filter(geo_value %in% c("ca", "fl"))
152158
153-
ggplot(fc_cafl, aes(fc_target_date, group = time_value, fill = engine_type)) +
159+
p1 <- ggplot(fc_cafl, aes(fc_target_date, group = time_value, fill = engine_type)) +
154160
geom_line(
155161
data = x_latest_cafl, aes(x = time_value, y = case_rate),
156162
inherit.aes = FALSE, color = "gray50"
@@ -165,6 +171,11 @@ ggplot(fc_cafl, aes(fc_target_date, group = time_value, fill = engine_type)) +
165171
labs(x = "Date", y = "Reported COVID-19 case rates") +
166172
theme(legend.position = "none")
167173
```
174+
</details>
175+
176+
```{r show-plot1, fig.width = 9, fig.height = 6, echo=FALSE}
177+
p1
178+
```
168179

169180
For the two states of interest, simple linear regression clearly performs better
170181
than random forest in terms of accuracy of the predictions and does not result
@@ -185,6 +196,8 @@ to those models high variance predictions.
185196

186197
<details>
187198

199+
<summary>Data and forecasts. Similar to the above.</summary>
200+
188201
By leveraging the flexibility of `epiprocess`, we can apply the same techniques
189202
to data from other sources. Since some collaborators are in British Columbia,
190203
Canada, we'll do essentially the same thing for Canada as we did above.
@@ -312,6 +325,7 @@ combined data from all US states and territories) to train our model.
312325

313326
<details>
314327

328+
<summary>Download data using `{epidatr}`</summary>
315329
```{r load-data, eval=FALSE}
316330
# loading in the data
317331
states <- "*"
@@ -343,12 +357,6 @@ deaths_incidence_prop <- pub_covidcast(
343357
as_epi_archive(compactify = FALSE)
344358
345359
346-
fc_time_values <- seq(
347-
from = as.Date("2020-09-01"),
348-
to = as.Date("2021-12-31"),
349-
by = "1 month"
350-
)
351-
352360
x <- epix_merge(confirmed_incidence_prop, deaths_incidence_prop,
353361
sync = "locf"
354362
)
@@ -380,18 +388,21 @@ x <- x %>%
380388
saveRDS(x$DT, file = "case_death_rate_archive.rds")
381389
```
382390

383-
```{r load-stored-data, eval=FALSE}
391+
```{r load-stored-data}
384392
x <- readRDS("case_death_rate_archive.rds")
385393
x <- as_epi_archive(x)
386394
```
387-
388-
389395
</details>
390396

391397
Here we specify the ARX model.
392398

393399
```{r make-arx-model}
394400
aheads <- c(7, 14, 21)
401+
fc_time_values <- seq(
402+
from = as.Date("2020-09-01"),
403+
to = as.Date("2021-12-31"),
404+
by = "1 month"
405+
)
395406
forecaster <- function(x) {
396407
map(aheads, function(ahead) {
397408
arx_forecaster(
@@ -408,34 +419,37 @@ forecaster <- function(x) {
408419

409420
We can now use our forecaster function that we've created and use it in the
410421
pipeline for forecasting the predictions. We store the predictions into the
411-
`x_result` variable and calculate the most up to date version of the data in the
412-
epiarchive and store it as `x_latest`.
422+
`arx_preds` variable and calculate the most up to date version of the data in the
423+
epi archive and store it as `x_latest`.
413424

414425
```{r running-arx-forecaster}
415-
x_result <- x %>%
426+
arx_preds <- x %>%
416427
epix_slide(~ forecaster(.x),
417428
before = 120, ref_time_values = fc_time_values,
418429
names_sep = NULL
419430
) %>%
420431
mutate(engine_type = quantile_reg()$engine) %>%
421-
as_epi_df()
422-
x_result$ahead_val <- x_result$target_date - x_result$forecast_date
432+
as_epi_df() %>%
433+
mutate(ahead_val = target_date - forecast_date)
423434
424435
x_latest <- epix_as_of(x, max_version = max(x$versions_end))
425436
```
426437

427438
Now we plot both the actual and predicted 7 day average of the death rate for
428439
the chosen states
429440

430-
```{r plot-arx-asof, message = FALSE, warning = FALSE, fig.width = 9, fig.height = 6}
441+
<details>
442+
443+
<summary>Code for the plot</summary>
444+
```{r plot-arx-asof, message = FALSE, warning = FALSE}
431445
states_to_show <- c("ca", "ny", "mi", "az")
432-
fc_states <- x_result %>%
446+
fc_states <- arx_preds %>%
433447
filter(geo_value %in% states_to_show) %>%
434448
pivot_quantiles_wider(.pred_distn)
435449
436450
x_latest_states <- x_latest %>% filter(geo_value %in% states_to_show)
437451
438-
ggplot(fc_states, aes(target_date, group = time_value)) +
452+
p2 <- ggplot(fc_states, aes(target_date, group = time_value)) +
439453
geom_ribbon(aes(ymin = `0.05`, ymax = `0.95`, fill = geo_value), alpha = 0.4) +
440454
geom_line(
441455
data = x_latest_states, aes(x = time_value, y = death_rate_7d_av),
@@ -451,3 +465,8 @@ ggplot(fc_states, aes(target_date, group = time_value)) +
451465
labs(x = "Date", y = "7 day average COVID-19 death rates") +
452466
theme(legend.position = "none")
453467
```
468+
</details>
469+
470+
```{r show-plot2, fig.width = 9, fig.height = 6, echo = FALSE}
471+
p2
472+
```

0 commit comments

Comments
 (0)