@@ -2075,8 +2075,7 @@ test <- training_test |> filter(time_value == trial_nowcast_date)
2075
2075
2076
2076
fit <- training |>
2077
2077
select(all_of(predictor_descriptions$predictor_name), mortality_semistable) |>
2078
- # Fit a linear model by trying to minimize MAE (mean absolute error):
2079
- quantreg::rq(formula = mortality_semistable ~ ., tau = 0.5)
2078
+ lm(formula = mortality_semistable ~ .)
2080
2079
2081
2080
pred <- tibble(
2082
2081
nowcast_date = trial_nowcast_date,
@@ -2116,6 +2115,7 @@ We'll wrap our nowcasting code in a function and `epix_slide()` again.
2116
2115
it's possible but a bit tricky to combine with our weekly-resolution
2117
2116
weekly-cadence archive.
2118
2117
* Exclude a potential predictor if it doesn't have much training data available.
2118
+ * Allow for linear regression or quantile regression at the median level (tau = 0.5)
2119
2119
2120
2120
``` {r regression-nowcaster-function}
2121
2121
#| echo: true
@@ -2179,9 +2179,15 @@ regression_nowcaster <- function(archive, settings, return_info = FALSE) {
2179
2179
test <- training_test |>
2180
2180
filter(time_value == nowcast_date)
2181
2181
2182
- fit <- training |>
2183
- select(any_of(predictor_descriptions$predictor_name), mortality_semistable) |>
2184
- quantreg::rq(formula = mortality_semistable ~ ., tau = 0.5)
2182
+ if (isTRUE(settings$median)) {
2183
+ fit <- training |>
2184
+ select(any_of(predictor_descriptions$predictor_name), mortality_semistable) |>
2185
+ quantreg::rq(formula = mortality_semistable ~ ., tau = 0.5)
2186
+ } else {
2187
+ fit <- training |>
2188
+ select(any_of(predictor_descriptions$predictor_name), mortality_semistable) |>
2189
+ lm(formula = mortality_semistable ~ .)
2190
+ }
2185
2191
2186
2192
pred <- tibble(
2187
2193
geo_value = "ca",
@@ -2255,6 +2261,7 @@ compare two different configurations:
2255
2261
2256
2262
* one with just mortality-based predictions
2257
2263
* one that also uses hospitalizations as a predictor
2264
+ * and two that use quantile reg instead of linear reg
2258
2265
2259
2266
``` {r regression-model-settings}
2260
2267
#| echo: true
@@ -2281,6 +2288,9 @@ reg2_settings <- list(
2281
2288
min_n_training_intersection = 20, # or else raise error
2282
2289
max_n_training_intersection = Inf # or else filter down rows
2283
2290
)
2291
+
2292
+ reg3_settings <- c(reg1_settings, median = TRUE)
2293
+ reg4_settings <- c(reg2_settings, median = TRUE)
2284
2294
```
2285
2295
2286
2296
``` {r regression-run-nowcasts-backtesting}
@@ -2321,12 +2331,18 @@ reg2_nowcasts <- hosp_mort_archive |>
2321
2331
.versions = all_nowcast_dates + 4, # assume we nowcast on Thursday, same day as assumed NCHS release
2322
2332
.all_versions = TRUE)
2323
2333
2334
+ reg3_nowcasts <- nchs_ca_archive |>
2335
+ epix_slide(~ regression_nowcaster(.x, reg3_settings), .versions = all_nowcast_dates, .all_versions = TRUE)
2336
+
2337
+ reg4_nowcasts <- hosp_mort_archive |>
2338
+ epix_slide(~ regression_nowcaster(.x, reg4_settings),
2339
+ .versions = all_nowcast_dates + 4, # assume we nowcast on Thursday, same day as assumed NCHS release
2340
+ .all_versions = TRUE)
2324
2341
```
2325
2342
2326
- ## Comparison
2343
+ ## Data wrangling
2327
2344
2328
- ``` {r regression-nowcast-plot-comparison}
2329
- #| fig-width: 9
2345
+ ``` {r regression-nowcast-wrangling}
2330
2346
2331
2347
ratio_nowcasts_archive <- nowcasts |>
2332
2348
filter(geo_value == "ca") |>
@@ -2339,7 +2355,9 @@ nowcast_comparison <-
2339
2355
locf_nowcasts |> rename(prediction_locf = prediction),
2340
2356
ratio_nowcasts_archive$DT |> as_tibble() |> rename(nowcast_date = version, target_date = time_value),
2341
2357
reg1_nowcasts |> rename(prediction_reg1 = prediction),
2342
- reg2_nowcasts |> rename(prediction_reg2 = prediction)#,
2358
+ reg2_nowcasts |> rename(prediction_reg2 = prediction),
2359
+ reg3_nowcasts |> rename(prediction_reg3 = prediction),
2360
+ reg4_nowcasts |> rename(prediction_reg4 = prediction)#,
2343
2361
# get_predictor_training_data(nchs_ca_archive, "mortality", 14L, "mortality_lag14_realtime") |>
2344
2362
# transmute(geo_value, nowcast_date = time_value, target_date = time_value, mortality_lag14_realtime)
2345
2363
) |>
@@ -2351,12 +2369,37 @@ nowcast_comparison <-
2351
2369
mutate(Nowcaster = recode(Nowcaster,
2352
2370
prediction_locf = "LOCF",
2353
2371
prediction_ratio = "LOCF ratio model",
2354
- prediction_reg1 = "Regression 1",
2355
- prediction_reg2 = "Regression 2",
2372
+ prediction_reg1 = "LinReg model",
2373
+ prediction_reg2 = "LinReg + hosp",
2374
+ prediction_reg3 = "QuantReg model",
2375
+ prediction_reg4 = "QuantReg + hosp",
2356
2376
.default = Nowcaster))
2377
+ ```
2378
+
2379
+ ## Comparison: linear regression
2380
+
2381
+ ``` {r regression-nowcast-plot-linreg}
2382
+ #| fig-width: 9
2383
+
2384
+ nowcast_comparison |>
2385
+ filter(target_date >= min(all_nowcast_dates) - 35,
2386
+ !(Nowcaster %in% c("QuantReg model", "QuantReg + hosp"))) |>
2387
+ ggplot() +
2388
+ geom_line(aes(target_date, mortality)) +
2389
+ geom_line(aes(target_date, prediction, color = Nowcaster)) +
2390
+ scale_color_delphi() +
2391
+ xlab("Date") +
2392
+ ylab("Mortality")
2393
+ ```
2394
+
2395
+ ## Comparison: quantile regression
2396
+
2397
+ ``` {r regression-nowcast-plot-quantreg}
2398
+ #| fig-width: 9
2357
2399
2358
2400
nowcast_comparison |>
2359
- filter(target_date >= min(all_nowcast_dates) - 35) |>
2401
+ filter(target_date >= min(all_nowcast_dates) - 35,
2402
+ !(Nowcaster %in% c("LinReg model", "LinReg + hosp"))) |>
2360
2403
ggplot() +
2361
2404
geom_line(aes(target_date, mortality)) +
2362
2405
geom_line(aes(target_date, prediction, color = Nowcaster)) +
@@ -2385,7 +2428,7 @@ nowcast_comparison |>
2385
2428
2386
2429
## Mea culpa
2387
2430
2388
- This quickly became very complicated and we've glossed over some core concepts.
2431
+ This quickly became complicated and we've glossed over some core concepts.
2389
2432
We'll explain concepts of regression, lagged features, and evaluation more
2390
2433
carefully tomorrow.
2391
2434
0 commit comments