Skip to content

Commit ac80ff8

Browse files
committed
add nearly prod forecaster!
1 parent b16d266 commit ac80ff8

17 files changed

+1647
-3842
lines changed

_code/forecaster_from_scratch.R

+72-1
Original file line numberDiff line numberDiff line change
@@ -35,4 +35,75 @@ nssp <- pub_covidcast(
3535
signal = "pct_ed_visits_influenza",
3636
time_type = "week",
3737
geo_type = "state",
38-
geo_values = "*")
38+
geo_values = "*"
39+
) |>
40+
select(geo_value, time_value, nssp = value)
41+
42+
43+
empty_data <- tibble(time_value = seq(max()))
44+
45+
flu_data <- hhs_v_nhsn |>
46+
select(time_value, geo_value, hhs = old_source) |>
47+
left_join(nssp |> mutate(time_value = time_value + 6),
48+
by = join_by(geo_value, time_value)
49+
)
50+
51+
n_geos <- n_distinct(flu_data$geo_value)
52+
max_time_value <- max(flu_data$time_value)
53+
empty_data <- tibble(
54+
time_value = rep(max_time_value + days(1:3 * 7), each = n_geos),
55+
geo_value = rep(unique(flu_data$geo_value), times = 3),
56+
nssp = NA,
57+
hhs = NA
58+
)
59+
60+
flu_data <- flu_data |>
61+
add_row(empty_data) |>
62+
mutate(epiweek = epiweek(time_value)) |>
63+
left_join(climate, by = join_by(geo_value, epiweek)) |>
64+
select(!epiweek) |>
65+
filter(geo_value %nin% c("as", "vi", "gu", "mp", "usa")) |>
66+
arrange(geo_value, time_value) |>
67+
as_epi_df()
68+
69+
r <- epi_recipe(flu_data) |>
70+
step_population_scaling(
71+
hhs, nssp,
72+
df = epidatasets::state_census,
73+
df_pop_col = "pop",
74+
create_new = FALSE,
75+
rate_rescaling = 1e5,
76+
by = c("geo_value" = "abbr")) |>
77+
step_mutate(hhs = hhs^(1/4), nssp = nssp^(1/4), climate_pred = climate_pred^(1/4)) |>
78+
step_epi_lag(hhs, lag = c(0, 7, 14)) |>
79+
step_epi_lag(nssp, lag = c(0, 7, 14)) |>
80+
step_epi_ahead(hhs, ahead = 14) |>
81+
step_epi_ahead(climate_pred, ahead = 14, role = "predictor") |>
82+
step_epi_naomit()
83+
84+
# Training engine
85+
e <- quantile_reg(quantile_levels = c(0.01, 0.025, 1:19 / 20, 0.975, 0.99)) # 23 ForecastHub quantiles
86+
87+
# A post-processing routine describing what to do to the predictions
88+
f <- frosting() |>
89+
layer_predict() |>
90+
layer_threshold(.pred, lower = 0)
91+
92+
93+
# Bundle up the preprocessor, training engine, and postprocessor
94+
# We use quantile regression
95+
ewf <- epi_workflow(r, e, f)
96+
97+
# Fit it to data (we could fit this to ANY data that has the same format)
98+
trained_ewf <- ewf |> fit(flu_data)
99+
100+
# we could make predictions using the same model on ANY test data
101+
preds <- forecast(trained_ewf) |>
102+
left_join(epidatasets::state_census |> select(pop, abbr), join_by(geo_value == abbr)) |>
103+
mutate(
104+
.pred = .pred^4 * pop / 1e5,
105+
forecast_date = time_value,
106+
target_date = forecast_date + days(14),
107+
time_value = NULL,
108+
pop = NULL
109+
)

_code/nhsn_v_hhs.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ to_compare <- df %>%
7070
mutate(time_value = as.Date(weekendingdate), geo_value = tolower(jurisdiction), nhsn = totalconfflunewadm) %>%
7171
select(-weekendingdate, -jurisdiction, -totalconfflunewadm) %>%
7272
full_join(hhs_arch, by = join_by(geo_value, time_value)) %>%
73-
select(time_value, geo_value, old_source = nhsn, new_source = hhs)
73+
select(time_value, geo_value, old_source = hhs, new_source = nhsn)
7474

7575
saveRDS(
7676
df |>

_data/hhs_v_nhsn.rds

10 Bytes
Binary file not shown.

_freeze/slides/day2-afternoon/execute-results/html.json

+2-2
Large diffs are not rendered by default.

_freeze/slides/day2-afternoon/figure-revealjs/arx-with-grf-1.svg

+2-2
Loading

_freeze/slides/day2-afternoon/figure-revealjs/arx-with-random-forests-plot-1.svg

+2-2
Loading

_freeze/slides/day2-afternoon/figure-revealjs/plot-flu-predictions-1.svg

-761
This file was deleted.

_freeze/slides/day2-afternoon/figure-revealjs/plot-prod-1.svg

+726
Loading

_freeze/slides/day2-afternoon/figure-revealjs/unnamed-chunk-2-1.svg

+8-8
Loading

0 commit comments

Comments
 (0)