Skip to content

Commit 39a9271

Browse files
authored
Merge pull request #56 from cmu-delphi/lcb-day1-regression
Separate linear and quantile regressions
2 parents fada02c + 61975ee commit 39a9271

File tree

1 file changed

+56
-13
lines changed

1 file changed

+56
-13
lines changed

slides/day1-afternoon.qmd

+56-13
Original file line numberDiff line numberDiff line change
@@ -2075,8 +2075,7 @@ test <- training_test |> filter(time_value == trial_nowcast_date)
20752075
20762076
fit <- training |>
20772077
select(all_of(predictor_descriptions$predictor_name), mortality_semistable) |>
2078-
# Fit a linear model by trying to minimize MAE (mean absolute error):
2079-
quantreg::rq(formula = mortality_semistable ~ ., tau = 0.5)
2078+
lm(formula = mortality_semistable ~ .)
20802079
20812080
pred <- tibble(
20822081
nowcast_date = trial_nowcast_date,
@@ -2116,6 +2115,7 @@ We'll wrap our nowcasting code in a function and `epix_slide()` again.
21162115
it's possible but a bit tricky to combine with our weekly-resolution
21172116
weekly-cadence archive.
21182117
* Exclude a potential predictor if it doesn't have much training data available.
2118+
* Allow for linear regression or quantile regression at the median level (tau = 0.5)
21192119

21202120
```{r regression-nowcaster-function}
21212121
#| echo: true
@@ -2179,9 +2179,15 @@ regression_nowcaster <- function(archive, settings, return_info = FALSE) {
21792179
test <- training_test |>
21802180
filter(time_value == nowcast_date)
21812181
2182-
fit <- training |>
2183-
select(any_of(predictor_descriptions$predictor_name), mortality_semistable) |>
2184-
quantreg::rq(formula = mortality_semistable ~ ., tau = 0.5)
2182+
if (isTRUE(settings$median)) {
2183+
fit <- training |>
2184+
select(any_of(predictor_descriptions$predictor_name), mortality_semistable) |>
2185+
quantreg::rq(formula = mortality_semistable ~ ., tau = 0.5)
2186+
} else {
2187+
fit <- training |>
2188+
select(any_of(predictor_descriptions$predictor_name), mortality_semistable) |>
2189+
lm(formula = mortality_semistable ~ .)
2190+
}
21852191
21862192
pred <- tibble(
21872193
geo_value = "ca",
@@ -2255,6 +2261,7 @@ compare two different configurations:
22552261

22562262
* one with just mortality-based predictions
22572263
* one that also uses hospitalizations as a predictor
2264+
* and two that use quantile reg instead of linear reg
22582265

22592266
```{r regression-model-settings}
22602267
#| echo: true
@@ -2281,6 +2288,9 @@ reg2_settings <- list(
22812288
min_n_training_intersection = 20, # or else raise error
22822289
max_n_training_intersection = Inf # or else filter down rows
22832290
)
2291+
2292+
reg3_settings <- c(reg1_settings, median = TRUE)
2293+
reg4_settings <- c(reg2_settings, median = TRUE)
22842294
```
22852295

22862296
```{r regression-run-nowcasts-backtesting}
@@ -2321,12 +2331,18 @@ reg2_nowcasts <- hosp_mort_archive |>
23212331
.versions = all_nowcast_dates + 4, # assume we nowcast on Thursday, same day as assumed NCHS release
23222332
.all_versions = TRUE)
23232333
2334+
reg3_nowcasts <- nchs_ca_archive |>
2335+
epix_slide(~ regression_nowcaster(.x, reg3_settings), .versions = all_nowcast_dates, .all_versions = TRUE)
2336+
2337+
reg4_nowcasts <- hosp_mort_archive |>
2338+
epix_slide(~ regression_nowcaster(.x, reg4_settings),
2339+
.versions = all_nowcast_dates + 4, # assume we nowcast on Thursday, same day as assumed NCHS release
2340+
.all_versions = TRUE)
23242341
```
23252342

2326-
## Comparison
2343+
## Data wrangling
23272344

2328-
```{r regression-nowcast-plot-comparison}
2329-
#| fig-width: 9
2345+
```{r regression-nowcast-wrangling}
23302346
23312347
ratio_nowcasts_archive <- nowcasts |>
23322348
filter(geo_value == "ca") |>
@@ -2339,7 +2355,9 @@ nowcast_comparison <-
23392355
locf_nowcasts |> rename(prediction_locf = prediction),
23402356
ratio_nowcasts_archive$DT |> as_tibble() |> rename(nowcast_date = version, target_date = time_value),
23412357
reg1_nowcasts |> rename(prediction_reg1 = prediction),
2342-
reg2_nowcasts |> rename(prediction_reg2 = prediction)#,
2358+
reg2_nowcasts |> rename(prediction_reg2 = prediction),
2359+
reg3_nowcasts |> rename(prediction_reg3 = prediction),
2360+
reg4_nowcasts |> rename(prediction_reg4 = prediction)#,
23432361
# get_predictor_training_data(nchs_ca_archive, "mortality", 14L, "mortality_lag14_realtime") |>
23442362
# transmute(geo_value, nowcast_date = time_value, target_date = time_value, mortality_lag14_realtime)
23452363
) |>
@@ -2351,12 +2369,37 @@ nowcast_comparison <-
23512369
mutate(Nowcaster = recode(Nowcaster,
23522370
prediction_locf = "LOCF",
23532371
prediction_ratio = "LOCF ratio model",
2354-
prediction_reg1 = "Regression 1",
2355-
prediction_reg2 = "Regression 2",
2372+
prediction_reg1 = "LinReg model",
2373+
prediction_reg2 = "LinReg + hosp",
2374+
prediction_reg3 = "QuantReg model",
2375+
prediction_reg4 = "QuantReg + hosp",
23562376
.default = Nowcaster))
2377+
```
2378+
2379+
## Comparison: linear regression
2380+
2381+
```{r regression-nowcast-plot-linreg}
2382+
#| fig-width: 9
2383+
2384+
nowcast_comparison |>
2385+
filter(target_date >= min(all_nowcast_dates) - 35,
2386+
!(Nowcaster %in% c("QuantReg model", "QuantReg + hosp"))) |>
2387+
ggplot() +
2388+
geom_line(aes(target_date, mortality)) +
2389+
geom_line(aes(target_date, prediction, color = Nowcaster)) +
2390+
scale_color_delphi() +
2391+
xlab("Date") +
2392+
ylab("Mortality")
2393+
```
2394+
2395+
## Comparison: quantile regression
2396+
2397+
```{r regression-nowcast-plot-quantreg}
2398+
#| fig-width: 9
23572399
23582400
nowcast_comparison |>
2359-
filter(target_date >= min(all_nowcast_dates) - 35) |>
2401+
filter(target_date >= min(all_nowcast_dates) - 35,
2402+
!(Nowcaster %in% c("LinReg model", "LinReg + hosp"))) |>
23602403
ggplot() +
23612404
geom_line(aes(target_date, mortality)) +
23622405
geom_line(aes(target_date, prediction, color = Nowcaster)) +
@@ -2385,7 +2428,7 @@ nowcast_comparison |>
23852428

23862429
## Mea culpa
23872430

2388-
This quickly became very complicated and we've glossed over some core concepts.
2431+
This quickly became complicated and we've glossed over some core concepts.
23892432
We'll explain concepts of regression, lagged features, and evaluation more
23902433
carefully tomorrow.
23912434

0 commit comments

Comments
 (0)