From d9395ae5d301c8a86d7b1a9350063e23fbc59a06 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 6 Dec 2022 10:48:55 -0500 Subject: [PATCH 1/2] parallelize forecast downloads --- Report/create_reports.R | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/Report/create_reports.R b/Report/create_reports.R index ebd3ad4..c977004 100644 --- a/Report/create_reports.R +++ b/Report/create_reports.R @@ -3,6 +3,8 @@ library("optparse") library("dplyr") library("evalcast") library("lubridate") +library("bettermc") +library("parallel") # TODO: Contains fixed versions of WIS component metrics, to be ported over to evalcast # Redefines overprediction, underprediction and sharpness @@ -59,14 +61,26 @@ signals <- c( ) data_pull_timestamp <- now(tzone = "UTC") -predictions_cards <- get_covidhub_predictions(forecasters, - signal = signals, - ahead = 1:28, - geo_values = state_geos, - verbose = TRUE, - use_disk = TRUE -) %>% - filter(!(incidence_period == "epiweek" & ahead > 4)) + +cores <- detectCores() +if (is.na(cores)) { + warning("Could not detect the number of CPU cores; parallel mode disabled") + cores <- 1 +} +options(mc.cores = max(floor(cores / 2), 1L)) + +print(paste("Getting forecasts for", length(forecasters), "forecasters.")) +predictions_cards <- bettermc::mclapply(forecasters, function(forecaster) { + get_covidhub_predictions(forecaster, + signal = signals, + ahead = 1:28, + geo_values = state_geos, + verbose = TRUE, + use_disk = TRUE + ) %>% + filter(!(incidence_period == "epiweek" & ahead > 4)) +}) %>% + bind_rows() options(warn = 0) From 4cd12b360508d409be214d77bafd7cf612fc55a6 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Wed, 14 Dec 2022 12:22:01 -0500 Subject: [PATCH 2/2] don't fetch forecasters that we know are invalid --- Report/create_reports.R | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/Report/create_reports.R b/Report/create_reports.R index c977004..94dc4c0 100644 --- a/Report/create_reports.R +++ b/Report/create_reports.R @@ -32,25 +32,39 @@ prediction_cards_filepath <- case_when( options(warn = 1) -# Requested forecasters that do not get included in final scores: -# Auquan-SEIR: Only predicts cumulative deaths -# CDDEP-ABM: No longer on Forecast Hub. Causes some warnings when trying to download. +# Ignore requested forecasters that do not get included in final scores: +# Auquan-SEIR: Cumulative deaths predictions only +# CDDEP-ABM: No longer on Forecast Hub. Trying to download causes errors and warnings. # CDDEP-SEIR_MCMC: County-level predictions only # CUBoulder-COVIDLSTM: County-level predictions only # FAIR-NRAR: County-level predictions only -# HKUST-DNN: Only predicts cumulative deaths +# HKUST-DNN: Cumulative deaths predictions only # ISUandPKU-vSEIdR: Folder but no forecasts on Forecast Hub # PandemicCentral-COVIDForest: County-level predictions only # UT_GISAG-SPDM: County-level predictions only -# WalmartLabsML-LogForecasting: Only predicts cumulative deaths +# WalmartLabsML-LogForecasting: Cumulative deaths predictions only # Yu_Group-CLEP: County-level predictions only +drop_forecasters <- c( + "Auquan-SEIR", + "CDDEP-ABM", + "CDDEP-SEIR_MCMC", + "CUBoulder-COVIDLSTM", + "FAIR-NRAR", + "HKUST-DNN", + "ISUandPKU-vSEIdR", + "PandemicCentral-COVIDForest", + "UT_GISAG-SPDM", + "WalmartLabsML-LogForecasting", + "Yu_Group-CLEP" +) forecasters <- unique(c( get_covidhub_forecaster_names(designations = c("primary", "secondary")), "COVIDhub-baseline", "COVIDhub-trained_ensemble", "COVIDhub-4_week_ensemble" )) -locations <- covidHubUtils::hub_locations +forecasters <- setdiff(forecasters, drop_forecasters) # also includes "us", which is national level data +locations <- covidHubUtils::hub_locations state_geos <- locations %>% filter(nchar(.data$geo_value) == 2) %>% pull(.data$geo_value)