@@ -552,24 +552,32 @@ head(case_rates_df)
552
552
553
553
Congratulations for making it through this crash course! That's all for this ` glimpse() ` into the tidyverse.
554
554
555
+ # Epiverse Software Ecosystem
556
+
557
+
555
558
## Epi. data processing with ` epiprocess `
556
559
557
- * ` epiprocess ` is a package that offers additional functionality to pre-process such epidemiological data.
560
+ * ` epiprocess ` is a package that offers additional functionality to pre-process epidemiological data.
558
561
* You can work with an ` epi_df ` like you can with a tibble by using dplyr verbs.
559
562
* For example, on ` cases_df ` , we can easily use ` epi_slide_mean() ` to calculate trailing 14 day averages of cases:
560
563
561
564
``` {r trailing-average-ex}
562
565
#| echo: true
566
+ #| output-location: column
563
567
case_rates_df <- case_rates_df |>
564
568
as_epi_df(as_of = as.Date("2024-01-01")) |>
565
569
group_by(geo_value) |>
566
- epi_slide_mean(scaled_cases, .window_size = 14, na.rm = TRUE) |>
570
+ epi_slide_mean(
571
+ scaled_cases,
572
+ .window_size = 14,
573
+ na.rm = TRUE
574
+ ) |>
567
575
rename(smoothed_scaled_cases = scaled_cases_14dav)
568
- head( case_rates_df)
576
+ case_rates_df
569
577
```
570
578
571
579
## Epi. data processing with ` epiprocess `
572
- It is easy to produce an autoplot the smoothed confirmed daily cases for each ` geo_value ` :
580
+ It is easy to produce an autoplot of the smoothed confirmed daily cases for each ` geo_value ` :
573
581
``` {r autoplot-ex}
574
582
#| echo: true
575
583
case_rates_df |>
@@ -586,14 +594,14 @@ ggplot(case_rates_df) +
586
594
geom_line(aes(x = time_value, y = scaled_cases, color = geo_value), size = 0.25) +
587
595
geom_line(aes(x = time_value, y = smoothed_scaled_cases, color = geo_value), size = 1) +
588
596
facet_wrap(vars(geo_value), nrow = 1, scales = "free") +
589
- ylab("Cases per 100k") +
590
- theme_bw () +
597
+ ylab("Cases per 100k") + xlab("Reference date") +
598
+ scale_color_delphi () +
591
599
theme(legend.position = "none") +
592
600
guides(x = guide_axis(angle = 25))
593
601
```
594
602
Now, before exploring some more features of ` epiprocess ` , let's have a look at the epiverse software ecosystem it's part of...
595
603
596
- # Epiverse Software Ecosystem
604
+
597
605
598
606
## The epiverse ecosystem
599
607
Interworking, community-driven, packages for epi tracking & forecasting.
@@ -712,6 +720,9 @@ rbind(
712
720
mutate(lag = as.factor(lag)) |>
713
721
ggplot(aes(cor)) +
714
722
geom_density(aes(fill = lag, col = lag), alpha = 0.5) +
723
+ scale_fill_delphi() +
724
+ scale_color_delphi() +
725
+ scale_y_continuous(expand = expansion(c(0, .05))) +
715
726
labs(x = "Correlation", y = "Density", fill = "Lag", col = "Lag")
716
727
```
717
728
@@ -766,9 +777,7 @@ edfg <- filter(edf, geo_value %in% c("ut", "ca")) |>
766
777
```
767
778
768
779
``` {r plot-growth-rates-ex}
769
- #| fig-align: center
770
- #| fig-width: 12
771
- #| fig-height: 5
780
+ #| fig-width: 10
772
781
edfg |>
773
782
select(-death_rate) |>
774
783
mutate(`Growth Rate` = gr_cases) |>
@@ -783,8 +792,7 @@ edfg |>
783
792
geom_hline(aes(yintercept = 0),
784
793
data = tibble(name = "Growth Rate"),
785
794
linetype = "dashed") +
786
- theme_bw() +
787
- scale_x_date(name = "Date") +
795
+ scale_x_date(name = "Reference date", date_breaks = "6 months", date_labels = "%b %Y") +
788
796
scale_y_continuous(name = NULL)
789
797
```
790
798
@@ -839,12 +847,12 @@ edfo |>
839
847
) |>
840
848
ggplot(aes(x = time_value)) +
841
849
geom_line(aes(y = value, color = name)) +
842
- scale_color_brewer(palette = "Set1 ", name = "" ) +
850
+ scale_color_manual(name = "", values = c(primary, tertiary) ) +
843
851
geom_hline(yintercept = 0) +
844
852
facet_wrap(vars(geo_value), scales = "free_y", nrow = 1) +
845
853
scale_x_date(minor_breaks = "month", date_labels = "%b %Y") +
846
- labs(x = "Date ", y = "COVID-19 case rates") +
847
- theme(legend.position = c(.075 , .8),
854
+ labs(x = "Reference date ", y = "COVID-19 case rates") +
855
+ theme(legend.position = c(.15 , .8),
848
856
legend.background = element_rect(fill = NA),
849
857
legend.key = element_rect(fill = NA))
850
858
```
@@ -1308,9 +1316,7 @@ values_final <- epix_as_of(nchs_archive, max(nchs_archive$versions_end))
1308
1316
1309
1317
``` {r final-vs-revisions-plot}
1310
1318
#| echo: false
1311
- #| fig-width: 9
1312
- #| fig-height: 4
1313
- #| out-height: "500px"
1319
+ #| fig-width: 7
1314
1320
ggplot(value_at_lags, aes(x = time_value, y = mortality)) +
1315
1321
geom_line(aes(color = factor(lag))) +
1316
1322
facet_wrap(~ geo_value, scales = "free_y", ncol = 1) +
@@ -1340,9 +1346,7 @@ nchs_snapshots = map(versions, function(v) {
1340
1346
1341
1347
``` {r nchs-plot-val-different-ver}
1342
1348
#| echo: false
1343
- #| fig-width: 9
1344
- #| fig-height: 4
1345
- #| out-height: "500px"
1349
+ #| fig-width: 7
1346
1350
1347
1351
ggplot(nchs_snapshots |> filter(!latest),
1348
1352
aes(x = time_value, y = mortality)) +
@@ -1524,41 +1528,29 @@ We begin by templatizing our previous operations.
1524
1528
1525
1529
``` {r nowcaster-to-slide}
1526
1530
#| echo: true
1527
-
1528
- nowcaster = function(x, g, t, wl=180, appx=approx_final_lag) {
1529
-
1530
-
1531
- initial_data = x$DT |>
1531
+ nowcaster <- function(x, g, t, wl=180, appx=approx_final_lag) {
1532
+ initial_data <- x$DT |>
1532
1533
group_by(geo_value, time_value) |>
1533
1534
filter(version == min(version)) |>
1534
1535
filter(time_value >= t - wl - appx & time_value <= t - appx) |>
1535
1536
rename(initial_val = mortality) |>
1536
1537
select(geo_value, time_value, initial_val)
1537
-
1538
- finalized_data = x$DT |>
1538
+ finalized_data <- x$DT |>
1539
1539
group_by(geo_value, time_value) |>
1540
1540
filter(version == max(version)) |>
1541
1541
filter(time_value >= t - wl - appx & time_value <= t - appx) |>
1542
1542
rename(finalized_val = mortality) |>
1543
1543
select(geo_value, time_value, finalized_val)
1544
-
1545
- ratio = finalized_data |>
1544
+ ratio <- finalized_data |>
1546
1545
inner_join(initial_data, by = c("geo_value", "time_value")) |>
1547
1546
mutate(ratio = finalized_val / initial_val) |>
1548
1547
pull(ratio) |>
1549
- median(na.rm=TRUE)
1550
-
1551
- last_avail = epix_as_of(x, t) |>
1548
+ median(na.rm = TRUE)
1549
+ last_avail <- epix_as_of(x, t) |>
1552
1550
slice_max(time_value) |>
1553
1551
pull(mortality)
1554
-
1555
-
1556
- res = tibble(geo_value = x$geo_value, target_date = t, nowcast = last_avail * ratio)
1557
-
1558
- return(res)
1559
-
1552
+ tibble(geo_value = x$geo_value, target_date = t, nowcast = last_avail * ratio)
1560
1553
}
1561
-
1562
1554
```
1563
1555
1564
1556
## Sanity check of ` epix_slide() `
@@ -1727,40 +1719,30 @@ nowcasts <- nchs_archive |>
1727
1719
1728
1720
``` {r nowcaster-to-slide-again}
1729
1721
#| echo: true
1730
- #| code-line-numbers: "|4,11"
1731
-
1732
- nowcaster = function(x, g, t, wl=180, appx=approx_final_lag) {
1733
-
1734
- initial_data = x$DT |>
1722
+ #| code-line-numbers: "|3,9"
1723
+ nowcaster <- function(x, g, t, wl=180, appx=approx_final_lag) {
1724
+ initial_data <- x$DT |>
1735
1725
group_by(geo_value, time_value) |>
1736
1726
filter(version == min(version)) |>
1737
1727
filter(time_value >= t - wl - appx & time_value <= t - appx) |>
1738
1728
rename(initial_val = mortality) |>
1739
1729
select(geo_value, time_value, initial_val)
1740
-
1741
- finalized_data = x$DT |>
1730
+ finalized_data <- x$DT |>
1742
1731
group_by(geo_value, time_value) |>
1743
1732
filter(version == max(version)) |>
1744
1733
filter(time_value >= t - wl - appx & time_value <= t - appx) |>
1745
1734
rename(finalized_val = mortality) |>
1746
1735
select(geo_value, time_value, finalized_val)
1747
-
1748
- ratio = finalized_data |>
1736
+ ratio <- finalized_data |>
1749
1737
inner_join(initial_data, by = c("geo_value", "time_value")) |>
1750
1738
mutate(ratio = finalized_val / initial_val) |>
1751
1739
pull(ratio) |>
1752
1740
median(na.rm=TRUE)
1753
-
1754
- last_avail = epix_as_of(x, t) |>
1741
+ last_avail <- epix_as_of(x, t) |>
1755
1742
slice_max(time_value) |>
1756
1743
pull(mortality)
1757
-
1758
- res = tibble(geo_value = x$geo_value, target_date = t, nowcast = last_avail * ratio)
1759
-
1760
- return(res)
1761
-
1744
+ tibble(geo_value = x$geo_value, target_date = t, nowcast = last_avail * ratio)
1762
1745
}
1763
-
1764
1746
```
1765
1747
1766
1748
@@ -1798,16 +1780,14 @@ finalized_val = nchs_archive$DT |>
1798
1780
1799
1781
``` {r nowcast-fun-plot-results}
1800
1782
#| echo: false
1801
-
1783
+ #| fig-width: 7
1802
1784
ggplot() +
1803
1785
geom_line(data = finalized_val, aes(x = time_value, y = mortality, color = "Finalized")) +
1804
1786
geom_line(data = provisional_val, aes(x = target_date, y = value, color = "Provisional")) +
1805
1787
geom_line(data = nowcasts, aes(x = target_date, y = nowcast, color = "Nowcast")) +
1806
- scale_color_delphi() +
1807
1788
ylab("Mortality") +
1808
- xlab("Date") +
1809
- scale_color_delphi() +
1810
- theme(legend.position = "bottom")
1789
+ xlab("Reference date") +
1790
+ scale_color_delphi(name = "")
1811
1791
```
1812
1792
1813
1793
* The real-time counts tend to be biased below the finalized counts. Nowcasted values tend to provide a much better approximation of the truth (at least for these dates).
@@ -1831,7 +1811,7 @@ smoothed_nowcasts <- epi_slide(
1831
1811
1832
1812
``` {r nowcast-smoothed-vis}
1833
1813
#| echo: false
1834
-
1814
+ #| fig-width: 7
1835
1815
cbPalette = c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442",
1836
1816
"#0072B2", "#D55E00", "#CC79A7")
1837
1817
@@ -1840,11 +1820,9 @@ ggplot() +
1840
1820
geom_line(data = provisional_val, aes(x = target_date, y = value, color = "Provisional")) +
1841
1821
geom_line(data = nowcasts, aes(x = target_date, y = nowcast, color = "Nowcast")) +
1842
1822
geom_line(data = smoothed_nowcasts, aes(x = time_value, y = smoothed_nowcasts, color = "Smoothed")) +
1843
- scale_color_delphi() +
1823
+ scale_color_delphi(name = "" ) +
1844
1824
ylab("Mortality") +
1845
- xlab("Date") +
1846
- scale_color_delphi() +
1847
- theme(legend.position = "bottom")
1825
+ xlab("Reference date")
1848
1826
```
1849
1827
1850
1828
@@ -2248,6 +2226,8 @@ compare two different configurations:
2248
2226
* one that also uses hospitalizations as a predictor
2249
2227
* and two that use quantile reg instead of linear reg
2250
2228
2229
+ ## Model settings
2230
+
2251
2231
``` {r regression-model-settings}
2252
2232
#| echo: true
2253
2233
@@ -2372,7 +2352,7 @@ nowcast_comparison |>
2372
2352
geom_line(aes(target_date, mortality)) +
2373
2353
geom_line(aes(target_date, prediction, color = Nowcaster)) +
2374
2354
scale_color_delphi() +
2375
- xlab("Date ") +
2355
+ xlab("Reference date ") +
2376
2356
ylab("Mortality")
2377
2357
```
2378
2358
@@ -2387,8 +2367,8 @@ nowcast_comparison |>
2387
2367
ggplot() +
2388
2368
geom_line(aes(target_date, mortality)) +
2389
2369
geom_line(aes(target_date, prediction, color = Nowcaster)) +
2390
- scale_color_delphi() +
2391
- xlab("Date ") +
2370
+ scale_color_delphi(name = "" ) +
2371
+ xlab("Reference date ") +
2392
2372
ylab("Mortality")
2393
2373
```
2394
2374
0 commit comments