cmu-delphi · nmdefries · Dec 6, 2022 · Dec 14, 2022 · Jan 17, 2023
diff --git a/Report/create_reports.R b/Report/create_reports.R
@@ -3,6 +3,8 @@ library("optparse")
 library("dplyr")
 library("evalcast")
 library("lubridate")
+library("bettermc")
+library("parallel")
 
 # TODO: Contains fixed versions of WIS component metrics, to be ported over to evalcast
 # Redefines overprediction, underprediction and sharpness
@@ -30,25 +32,39 @@ prediction_cards_filepath <- case_when(
 
 options(warn = 1)
 
-# Requested forecasters that do not get included in final scores:
-#    Auquan-SEIR: Only predicts cumulative deaths
-#    CDDEP-ABM: No longer on Forecast Hub. Causes some warnings when trying to download.
+# Ignore requested forecasters that do not get included in final scores:
+#    Auquan-SEIR: Cumulative deaths predictions only
+#    CDDEP-ABM: No longer on Forecast Hub. Trying to download causes errors and warnings.
 #    CDDEP-SEIR_MCMC: County-level predictions only
 #    CUBoulder-COVIDLSTM: County-level predictions only
 #    FAIR-NRAR: County-level predictions only
-#    HKUST-DNN: Only predicts cumulative deaths
+#    HKUST-DNN: Cumulative deaths predictions only
 #    ISUandPKU-vSEIdR: Folder but no forecasts on Forecast Hub
 #    PandemicCentral-COVIDForest: County-level predictions only
 #    UT_GISAG-SPDM: County-level predictions only
-#    WalmartLabsML-LogForecasting: Only predicts cumulative deaths
+#    WalmartLabsML-LogForecasting: Cumulative deaths predictions only
 #    Yu_Group-CLEP: County-level predictions only
+drop_forecasters <- c(
+  "Auquan-SEIR",
+  "CDDEP-ABM",
+  "CDDEP-SEIR_MCMC",
+  "CUBoulder-COVIDLSTM",
+  "FAIR-NRAR",
+  "HKUST-DNN",
+  "ISUandPKU-vSEIdR",
+  "PandemicCentral-COVIDForest",
+  "UT_GISAG-SPDM",
+  "WalmartLabsML-LogForecasting",
+  "Yu_Group-CLEP"
+)
 forecasters <- unique(c(
   get_covidhub_forecaster_names(designations = c("primary", "secondary")),
   "COVIDhub-baseline", "COVIDhub-trained_ensemble", "COVIDhub-4_week_ensemble"
 ))
-locations <- covidHubUtils::hub_locations
+forecasters <- setdiff(forecasters, drop_forecasters)
 
 # also includes "us", which is national level data
+locations <- covidHubUtils::hub_locations
 state_geos <- locations %>%
   filter(nchar(.data$geo_value) == 2) %>%
   pull(.data$geo_value)
@@ -59,14 +75,26 @@ signals <- c(
 )
 
 data_pull_timestamp <- now(tzone = "UTC")
-predictions_cards <- get_covidhub_predictions(forecasters,
-  signal = signals,
-  ahead = 1:28,
-  geo_values = state_geos,
-  verbose = TRUE,
-  use_disk = TRUE
-) %>%
-  filter(!(incidence_period == "epiweek" & ahead > 4))
+
+cores <- detectCores()
+if (is.na(cores)) {
+  warning("Could not detect the number of CPU cores; parallel mode disabled")
+  cores <- 1
+}
+options(mc.cores = max(floor(cores / 2), 1L))
+
+print(paste("Getting forecasts for", length(forecasters), "forecasters."))
+predictions_cards <- bettermc::mclapply(forecasters, function(forecaster) {
+  get_covidhub_predictions(forecaster,
+    signal = signals,
+    ahead = 1:28,
+    geo_values = state_geos,
+    verbose = TRUE,
+    use_disk = TRUE
+  ) %>%
+    filter(!(incidence_period == "epiweek" & ahead > 4))
+}) %>%
+  bind_rows()
 
 options(warn = 0)