Skip to content

Commit eb5d2a8

Browse files
committed
clean up d1 gfx
1 parent 4a21123 commit eb5d2a8

22 files changed

+7488
-7720
lines changed

_freeze/slides/day1-afternoon/execute-results/html.json

+2-2
Large diffs are not rendered by default.

_freeze/slides/day1-afternoon/figure-revealjs/final-vs-revisions-plot-1.svg

+474-474
Loading

_freeze/slides/day1-afternoon/figure-revealjs/nchs-plot-val-different-ver-1.svg

+893-893
Loading

_freeze/slides/day1-afternoon/figure-revealjs/nowcast-fun-plot-results-1.svg

+181-177
Loading

_freeze/slides/day1-afternoon/figure-revealjs/nowcast-smoothed-vis-1.svg

+193-189
Loading

_freeze/slides/day1-afternoon/figure-revealjs/plot-corr-lags-ex-1.svg

+34-24
Loading

_freeze/slides/day1-afternoon/figure-revealjs/plot-growth-rates-ex-1.svg

+1,787-1,963
Loading

_freeze/slides/day1-afternoon/figure-revealjs/plot-outlier-ex-1.svg

+58-31
Loading

_freeze/slides/day1-afternoon/figure-revealjs/regression-nowcast-plot-linreg-1.svg

+44-23
Loading

_freeze/slides/day1-afternoon/figure-revealjs/regression-nowcast-plot-quantreg-1.svg

+23-25
Loading

_freeze/slides/day1-afternoon/figure-revealjs/smoothed-original-plot-1.svg

+31-25
Loading

slides/day1-afternoon.qmd

+50-70
Original file line numberDiff line numberDiff line change
@@ -552,24 +552,32 @@ head(case_rates_df)
552552

553553
Congratulations for making it through this crash course! That's all for this `glimpse()` into the tidyverse.
554554

555+
# Epiverse Software Ecosystem
556+
557+
555558
## Epi. data processing with `epiprocess`
556559

557-
* `epiprocess` is a package that offers additional functionality to pre-process such epidemiological data.
560+
* `epiprocess` is a package that offers additional functionality to pre-process epidemiological data.
558561
* You can work with an `epi_df` like you can with a tibble by using dplyr verbs.
559562
* For example, on `cases_df`, we can easily use `epi_slide_mean()` to calculate trailing 14 day averages of cases:
560563

561564
```{r trailing-average-ex}
562565
#| echo: true
566+
#| output-location: column
563567
case_rates_df <- case_rates_df |>
564568
as_epi_df(as_of = as.Date("2024-01-01")) |>
565569
group_by(geo_value) |>
566-
epi_slide_mean(scaled_cases, .window_size = 14, na.rm = TRUE) |>
570+
epi_slide_mean(
571+
scaled_cases,
572+
.window_size = 14,
573+
na.rm = TRUE
574+
) |>
567575
rename(smoothed_scaled_cases = scaled_cases_14dav)
568-
head(case_rates_df)
576+
case_rates_df
569577
```
570578

571579
## Epi. data processing with `epiprocess`
572-
It is easy to produce an autoplot the smoothed confirmed daily cases for each `geo_value`:
580+
It is easy to produce an autoplot of the smoothed confirmed daily cases for each `geo_value`:
573581
```{r autoplot-ex}
574582
#| echo: true
575583
case_rates_df |>
@@ -586,14 +594,14 @@ ggplot(case_rates_df) +
586594
geom_line(aes(x = time_value, y = scaled_cases, color = geo_value), size = 0.25) +
587595
geom_line(aes(x = time_value, y = smoothed_scaled_cases, color = geo_value), size = 1) +
588596
facet_wrap(vars(geo_value), nrow = 1, scales = "free") +
589-
ylab("Cases per 100k") +
590-
theme_bw() +
597+
ylab("Cases per 100k") + xlab("Reference date") +
598+
scale_color_delphi() +
591599
theme(legend.position = "none") +
592600
guides(x = guide_axis(angle = 25))
593601
```
594602
Now, before exploring some more features of `epiprocess`, let's have a look at the epiverse software ecosystem it's part of...
595603

596-
# Epiverse Software Ecosystem
604+
597605

598606
## The epiverse ecosystem
599607
Interworking, community-driven, packages for epi tracking & forecasting.
@@ -712,6 +720,9 @@ rbind(
712720
mutate(lag = as.factor(lag)) |>
713721
ggplot(aes(cor)) +
714722
geom_density(aes(fill = lag, col = lag), alpha = 0.5) +
723+
scale_fill_delphi() +
724+
scale_color_delphi() +
725+
scale_y_continuous(expand = expansion(c(0, .05))) +
715726
labs(x = "Correlation", y = "Density", fill = "Lag", col = "Lag")
716727
```
717728

@@ -766,9 +777,7 @@ edfg <- filter(edf, geo_value %in% c("ut", "ca")) |>
766777
```
767778

768779
```{r plot-growth-rates-ex}
769-
#| fig-align: center
770-
#| fig-width: 12
771-
#| fig-height: 5
780+
#| fig-width: 10
772781
edfg |>
773782
select(-death_rate) |>
774783
mutate(`Growth Rate` = gr_cases) |>
@@ -783,8 +792,7 @@ edfg |>
783792
geom_hline(aes(yintercept = 0),
784793
data = tibble(name = "Growth Rate"),
785794
linetype = "dashed") +
786-
theme_bw() +
787-
scale_x_date(name = "Date") +
795+
scale_x_date(name = "Reference date", date_breaks = "6 months", date_labels = "%b %Y") +
788796
scale_y_continuous(name = NULL)
789797
```
790798

@@ -839,12 +847,12 @@ edfo |>
839847
) |>
840848
ggplot(aes(x = time_value)) +
841849
geom_line(aes(y = value, color = name)) +
842-
scale_color_brewer(palette = "Set1", name = "") +
850+
scale_color_manual(name = "", values = c(primary, tertiary)) +
843851
geom_hline(yintercept = 0) +
844852
facet_wrap(vars(geo_value), scales = "free_y", nrow = 1) +
845853
scale_x_date(minor_breaks = "month", date_labels = "%b %Y") +
846-
labs(x = "Date", y = "COVID-19 case rates") +
847-
theme(legend.position = c(.075, .8),
854+
labs(x = "Reference date", y = "COVID-19 case rates") +
855+
theme(legend.position = c(.15, .8),
848856
legend.background = element_rect(fill = NA),
849857
legend.key = element_rect(fill = NA))
850858
```
@@ -1308,9 +1316,7 @@ values_final <- epix_as_of(nchs_archive, max(nchs_archive$versions_end))
13081316

13091317
```{r final-vs-revisions-plot}
13101318
#| echo: false
1311-
#| fig-width: 9
1312-
#| fig-height: 4
1313-
#| out-height: "500px"
1319+
#| fig-width: 7
13141320
ggplot(value_at_lags, aes(x = time_value, y = mortality)) +
13151321
geom_line(aes(color = factor(lag))) +
13161322
facet_wrap(~ geo_value, scales = "free_y", ncol = 1) +
@@ -1340,9 +1346,7 @@ nchs_snapshots = map(versions, function(v) {
13401346

13411347
```{r nchs-plot-val-different-ver}
13421348
#| echo: false
1343-
#| fig-width: 9
1344-
#| fig-height: 4
1345-
#| out-height: "500px"
1349+
#| fig-width: 7
13461350
13471351
ggplot(nchs_snapshots |> filter(!latest),
13481352
aes(x = time_value, y = mortality)) +
@@ -1524,41 +1528,29 @@ We begin by templatizing our previous operations.
15241528

15251529
```{r nowcaster-to-slide}
15261530
#| echo: true
1527-
1528-
nowcaster = function(x, g, t, wl=180, appx=approx_final_lag) {
1529-
1530-
1531-
initial_data = x$DT |>
1531+
nowcaster <- function(x, g, t, wl=180, appx=approx_final_lag) {
1532+
initial_data <- x$DT |>
15321533
group_by(geo_value, time_value) |>
15331534
filter(version == min(version)) |>
15341535
filter(time_value >= t - wl - appx & time_value <= t - appx) |>
15351536
rename(initial_val = mortality) |>
15361537
select(geo_value, time_value, initial_val)
1537-
1538-
finalized_data = x$DT |>
1538+
finalized_data <- x$DT |>
15391539
group_by(geo_value, time_value) |>
15401540
filter(version == max(version)) |>
15411541
filter(time_value >= t - wl - appx & time_value <= t - appx) |>
15421542
rename(finalized_val = mortality) |>
15431543
select(geo_value, time_value, finalized_val)
1544-
1545-
ratio = finalized_data |>
1544+
ratio <- finalized_data |>
15461545
inner_join(initial_data, by = c("geo_value", "time_value")) |>
15471546
mutate(ratio = finalized_val / initial_val) |>
15481547
pull(ratio) |>
1549-
median(na.rm=TRUE)
1550-
1551-
last_avail = epix_as_of(x, t) |>
1548+
median(na.rm = TRUE)
1549+
last_avail <- epix_as_of(x, t) |>
15521550
slice_max(time_value) |>
15531551
pull(mortality)
1554-
1555-
1556-
res = tibble(geo_value = x$geo_value, target_date = t, nowcast = last_avail * ratio)
1557-
1558-
return(res)
1559-
1552+
tibble(geo_value = x$geo_value, target_date = t, nowcast = last_avail * ratio)
15601553
}
1561-
15621554
```
15631555

15641556
## Sanity check of `epix_slide()`
@@ -1727,40 +1719,30 @@ nowcasts <- nchs_archive |>
17271719

17281720
```{r nowcaster-to-slide-again}
17291721
#| echo: true
1730-
#| code-line-numbers: "|4,11"
1731-
1732-
nowcaster = function(x, g, t, wl=180, appx=approx_final_lag) {
1733-
1734-
initial_data = x$DT |>
1722+
#| code-line-numbers: "|3,9"
1723+
nowcaster <- function(x, g, t, wl=180, appx=approx_final_lag) {
1724+
initial_data <- x$DT |>
17351725
group_by(geo_value, time_value) |>
17361726
filter(version == min(version)) |>
17371727
filter(time_value >= t - wl - appx & time_value <= t - appx) |>
17381728
rename(initial_val = mortality) |>
17391729
select(geo_value, time_value, initial_val)
1740-
1741-
finalized_data = x$DT |>
1730+
finalized_data <- x$DT |>
17421731
group_by(geo_value, time_value) |>
17431732
filter(version == max(version)) |>
17441733
filter(time_value >= t - wl - appx & time_value <= t - appx) |>
17451734
rename(finalized_val = mortality) |>
17461735
select(geo_value, time_value, finalized_val)
1747-
1748-
ratio = finalized_data |>
1736+
ratio <- finalized_data |>
17491737
inner_join(initial_data, by = c("geo_value", "time_value")) |>
17501738
mutate(ratio = finalized_val / initial_val) |>
17511739
pull(ratio) |>
17521740
median(na.rm=TRUE)
1753-
1754-
last_avail = epix_as_of(x, t) |>
1741+
last_avail <- epix_as_of(x, t) |>
17551742
slice_max(time_value) |>
17561743
pull(mortality)
1757-
1758-
res = tibble(geo_value = x$geo_value, target_date = t, nowcast = last_avail * ratio)
1759-
1760-
return(res)
1761-
1744+
tibble(geo_value = x$geo_value, target_date = t, nowcast = last_avail * ratio)
17621745
}
1763-
17641746
```
17651747

17661748

@@ -1798,16 +1780,14 @@ finalized_val = nchs_archive$DT |>
17981780

17991781
```{r nowcast-fun-plot-results}
18001782
#| echo: false
1801-
1783+
#| fig-width: 7
18021784
ggplot() +
18031785
geom_line(data = finalized_val, aes(x = time_value, y = mortality, color = "Finalized")) +
18041786
geom_line(data = provisional_val, aes(x = target_date, y = value, color = "Provisional")) +
18051787
geom_line(data = nowcasts, aes(x = target_date, y = nowcast, color = "Nowcast")) +
1806-
scale_color_delphi() +
18071788
ylab("Mortality") +
1808-
xlab("Date") +
1809-
scale_color_delphi() +
1810-
theme(legend.position = "bottom")
1789+
xlab("Reference date") +
1790+
scale_color_delphi(name = "")
18111791
```
18121792

18131793
* The real-time counts tend to be biased below the finalized counts. Nowcasted values tend to provide a much better approximation of the truth (at least for these dates).
@@ -1831,7 +1811,7 @@ smoothed_nowcasts <- epi_slide(
18311811

18321812
```{r nowcast-smoothed-vis}
18331813
#| echo: false
1834-
1814+
#| fig-width: 7
18351815
cbPalette = c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442",
18361816
"#0072B2", "#D55E00", "#CC79A7")
18371817
@@ -1840,11 +1820,9 @@ ggplot() +
18401820
geom_line(data = provisional_val, aes(x = target_date, y = value, color = "Provisional")) +
18411821
geom_line(data = nowcasts, aes(x = target_date, y = nowcast, color = "Nowcast")) +
18421822
geom_line(data = smoothed_nowcasts, aes(x = time_value, y = smoothed_nowcasts, color = "Smoothed")) +
1843-
scale_color_delphi() +
1823+
scale_color_delphi(name = "") +
18441824
ylab("Mortality") +
1845-
xlab("Date") +
1846-
scale_color_delphi() +
1847-
theme(legend.position = "bottom")
1825+
xlab("Reference date")
18481826
```
18491827

18501828

@@ -2248,6 +2226,8 @@ compare two different configurations:
22482226
* one that also uses hospitalizations as a predictor
22492227
* and two that use quantile reg instead of linear reg
22502228

2229+
## Model settings
2230+
22512231
```{r regression-model-settings}
22522232
#| echo: true
22532233
@@ -2372,7 +2352,7 @@ nowcast_comparison |>
23722352
geom_line(aes(target_date, mortality)) +
23732353
geom_line(aes(target_date, prediction, color = Nowcaster)) +
23742354
scale_color_delphi() +
2375-
xlab("Date") +
2355+
xlab("Reference date") +
23762356
ylab("Mortality")
23772357
```
23782358

@@ -2387,8 +2367,8 @@ nowcast_comparison |>
23872367
ggplot() +
23882368
geom_line(aes(target_date, mortality)) +
23892369
geom_line(aes(target_date, prediction, color = Nowcaster)) +
2390-
scale_color_delphi() +
2391-
xlab("Date") +
2370+
scale_color_delphi(name = "") +
2371+
xlab("Reference date") +
23922372
ylab("Mortality")
23932373
```
23942374

0 commit comments

Comments
 (0)