@@ -35,4 +35,75 @@ nssp <- pub_covidcast(
35
35
signal = " pct_ed_visits_influenza" ,
36
36
time_type = " week" ,
37
37
geo_type = " state" ,
38
- geo_values = " *" )
38
+ geo_values = " *"
39
+ ) | >
40
+ select(geo_value , time_value , nssp = value )
41
+
42
+
43
+ empty_data <- tibble(time_value = seq(max()))
44
+
45
+ flu_data <- hhs_v_nhsn | >
46
+ select(time_value , geo_value , hhs = old_source ) | >
47
+ left_join(nssp | > mutate(time_value = time_value + 6 ),
48
+ by = join_by(geo_value , time_value )
49
+ )
50
+
51
+ n_geos <- n_distinct(flu_data $ geo_value )
52
+ max_time_value <- max(flu_data $ time_value )
53
+ empty_data <- tibble(
54
+ time_value = rep(max_time_value + days(1 : 3 * 7 ), each = n_geos ),
55
+ geo_value = rep(unique(flu_data $ geo_value ), times = 3 ),
56
+ nssp = NA ,
57
+ hhs = NA
58
+ )
59
+
60
+ flu_data <- flu_data | >
61
+ add_row(empty_data ) | >
62
+ mutate(epiweek = epiweek(time_value )) | >
63
+ left_join(climate , by = join_by(geo_value , epiweek )) | >
64
+ select(! epiweek ) | >
65
+ filter(geo_value %nin % c(" as" , " vi" , " gu" , " mp" , " usa" )) | >
66
+ arrange(geo_value , time_value ) | >
67
+ as_epi_df()
68
+
69
+ r <- epi_recipe(flu_data ) | >
70
+ step_population_scaling(
71
+ hhs , nssp ,
72
+ df = epidatasets :: state_census ,
73
+ df_pop_col = " pop" ,
74
+ create_new = FALSE ,
75
+ rate_rescaling = 1e5 ,
76
+ by = c(" geo_value" = " abbr" )) | >
77
+ step_mutate(hhs = hhs ^ (1 / 4 ), nssp = nssp ^ (1 / 4 ), climate_pred = climate_pred ^ (1 / 4 )) | >
78
+ step_epi_lag(hhs , lag = c(0 , 7 , 14 )) | >
79
+ step_epi_lag(nssp , lag = c(0 , 7 , 14 )) | >
80
+ step_epi_ahead(hhs , ahead = 14 ) | >
81
+ step_epi_ahead(climate_pred , ahead = 14 , role = " predictor" ) | >
82
+ step_epi_naomit()
83
+
84
+ # Training engine
85
+ e <- quantile_reg(quantile_levels = c(0.01 , 0.025 , 1 : 19 / 20 , 0.975 , 0.99 )) # 23 ForecastHub quantiles
86
+
87
+ # A post-processing routine describing what to do to the predictions
88
+ f <- frosting() | >
89
+ layer_predict() | >
90
+ layer_threshold(.pred , lower = 0 )
91
+
92
+
93
+ # Bundle up the preprocessor, training engine, and postprocessor
94
+ # We use quantile regression
95
+ ewf <- epi_workflow(r , e , f )
96
+
97
+ # Fit it to data (we could fit this to ANY data that has the same format)
98
+ trained_ewf <- ewf | > fit(flu_data )
99
+
100
+ # we could make predictions using the same model on ANY test data
101
+ preds <- forecast(trained_ewf ) | >
102
+ left_join(epidatasets :: state_census | > select(pop , abbr ), join_by(geo_value == abbr )) | >
103
+ mutate(
104
+ .pred = .pred ^ 4 * pop / 1e5 ,
105
+ forecast_date = time_value ,
106
+ target_date = forecast_date + days(14 ),
107
+ time_value = NULL ,
108
+ pop = NULL
109
+ )
0 commit comments