Skip to content

Commit a4f5fd2

Browse files
authored
Merge pull request #83 from cmu-delphi/aws_fix
Aws fix
2 parents 77d7e5e + 26885f0 commit a4f5fd2

12 files changed

+353
-249
lines changed

Makefile

+6
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,9 @@ run:
77

88
run-nohup:
99
nohup Rscript run.R &
10+
11+
sync:
12+
Rscript sync.R
13+
14+
dashboard:
15+
Rscript dashboard.R

NAMESPACE

+4
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ export(make_shared_ensembles)
2727
export(make_shared_grids)
2828
export(make_target_ensemble_grid)
2929
export(make_target_param_grid)
30+
export(manage_S3_forecast_cache)
3031
export(overprediction)
3132
export(perform_sanity_checks)
3233
export(read_external_predictions_data)
@@ -39,6 +40,8 @@ export(slide_forecaster)
3940
export(underprediction)
4041
export(weighted_interval_score)
4142
importFrom(assertthat,assert_that)
43+
importFrom(aws.s3,get_bucket)
44+
importFrom(aws.s3,s3sync)
4245
importFrom(cli,cli_abort)
4346
importFrom(cli,hash_animal)
4447
importFrom(dplyr,across)
@@ -82,6 +85,7 @@ importFrom(epipredict,step_population_scaling)
8285
importFrom(epipredict,step_training_window)
8386
importFrom(epiprocess,as_epi_df)
8487
importFrom(epiprocess,epix_slide)
88+
importFrom(here,here)
8589
importFrom(magrittr,"%<>%")
8690
importFrom(magrittr,"%>%")
8791
importFrom(purrr,imap)

R/manage_S3.R

+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
#' Manage S3 cache
2+
#' @param rel_cache_dir The relative path to the cache directory, e.g.
3+
#' "data-processed/2021-09-01". Default is `"{tar_project}/objects"`
4+
#' @param bucket_name The name of the S3 bucket to sync.
5+
#' @param direction Set 'download' to download files, 'upload' to upload files,
6+
#' and `sync` to do both
7+
#' @param verbose Set to TRUE to print the files being synced.
8+
#' @param prefix specify the prefix for `s3sync`, which filters down which files
9+
#' to sync to those starting with `prefix`.
10+
#' @param tar_project which targets project we're working on
11+
#' @importFrom aws.s3 s3sync get_bucket
12+
#' @importFrom here here
13+
#' @export
14+
manage_S3_forecast_cache <- function(rel_cache_dir = NULL,
15+
bucket_name = "forecasting-team-data",
16+
direction = "sync",
17+
verbose = FALSE,
18+
prefix = Sys.getenv("AWS_S3_PREFIX", "exploration"),
19+
tar_project = Sys.getenv("TAR_PROJECT", "exploration")) {
20+
if (is.null(rel_cache_dir)) {
21+
cache_path <- tar_project
22+
} else {
23+
cache_path <- here(rel_cache_dir)
24+
}
25+
if (!dir.exists(cache_path)) dir.create(cache_path)
26+
27+
prefix <- paste0(prefix, "/", tar_project, "/")
28+
s3b <- get_bucket(bucket_name, prefix = prefix)
29+
if (direction == "sync") {
30+
if (verbose) {
31+
s3sync(cache_path, s3b, prefix = prefix)
32+
} else {
33+
sink("/dev/null")
34+
s3sync(cache_path, s3b, prefix = prefix, verbose = FALSE)
35+
sink()
36+
}
37+
} else {
38+
if (verbose) {
39+
s3sync(cache_path, s3b, prefix = prefix, direction = direction)
40+
} else {
41+
sink("/dev/null")
42+
s3sync(cache_path, s3b, prefix = prefix, direction = direction, verbose = FALSE)
43+
sink()
44+
}
45+
}
46+
return(TRUE)
47+
}

R/targets_utils.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,7 @@ make_external_names_and_scores <- function() {
273273
command = {
274274
readRDS(external_scores_path) %>%
275275
group_by(forecaster) %>%
276-
tar_group()
276+
targets::tar_group()
277277
},
278278
iteration = "group",
279279
garbage_collection = TRUE

README.md

+7-2
Original file line numberDiff line numberDiff line change
@@ -16,19 +16,24 @@ DEBUG_MODE=true
1616
USE_SHINY=false
1717
TAR_PROJECT=covid_hosp_explore
1818
EXTERNAL_SCORES_PATH=
19-
USE_AWS_S3=false
2019
AWS_S3_PREFIX=2023/exploration
2120

21+
# Pull from the bucket
22+
make sync
23+
24+
# Run only the dashboard, to display results run on other machines
25+
make dashboard
26+
2227
# Run the pipeline wrapper run.R.
2328
make run
29+
2430
```
2531

2632
- `EPIDATR_USE_CACHE` controls whether `epidatr` functions use the cache.
2733
- `DEBUG_MODE` controls whether `targets::tar_make` is run with the `callr_function=NULL`, which allows for debugging.
2834
- `USE_SHINY` controls whether we start a Shiny server after producing the targets.
2935
- `TAR_PROJECT` controls which `targets` project is run by `run.R`.
3036
- `EXTERNAL_SCORES_PATH` controls where external scores are loaded from. If not set, external scores are not used.
31-
- `USE_AWS_S3` controls whether we use AWS S3 to store the cache.
3237
- `AWS_S3_PREFIX` controls the prefix to use in the AWS S3 bucket (a prefix is a pseudo-directory in a bucket).
3338

3439
## Development

_targets.yaml

+13-12
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,17 @@
11
covid_hosp_explore:
2-
script: covid_hosp_explore.R
3-
store: covid_hosp_explore
4-
use_crew: yes
2+
script: covid_hosp_explore.R
3+
store: covid_hosp_explore
4+
use_crew: yes
5+
seconds_timeout: 86400
56
flu_hosp_explore:
6-
script: flu_hosp_explore.R
7-
store: flu_hosp_explore
8-
use_crew: yes
7+
script: flu_hosp_explore.R
8+
store: flu_hosp_explore
9+
use_crew: yes
910
flu_hosp_prod:
10-
script: flu_hosp_prod.R
11-
store: flu_hosp_prod
12-
use_crew: yes
11+
script: flu_hosp_prod.R
12+
store: flu_hosp_prod
13+
use_crew: yes
1314
covid_hosp_prod:
14-
script: covid_hosp_prod.R
15-
store: covid_hosp_prod
16-
use_crew: yes
15+
script: covid_hosp_prod.R
16+
store: covid_hosp_prod
17+
use_crew: yes

dashboard.R

+53
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
tar_project <- Sys.getenv("TAR_PROJECT", "covid_hosp_explore")
2+
external_scores_path <- Sys.getenv("EXTERNAL_SCORES_PATH", "")
3+
debug_mode <- as.logical(Sys.getenv("DEBUG_MODE", TRUE))
4+
use_shiny <- as.logical(Sys.getenv("USE_SHINY", FALSE))
5+
use_aws_s3_only <- as.logical(Sys.getenv("USE_AWS_S3_ONLY", FALSE))
6+
aws_s3_prefix <- Sys.getenv("AWS_S3_PREFIX", "exploration")
7+
aws_s3_prefix <- paste0(aws_s3_prefix, "/", tar_project)
8+
suppressPackageStartupMessages({
9+
library(targets)
10+
library(shiny)
11+
})
12+
# Prevent functions defined in /R dir from being loaded unnecessarily
13+
options(shiny.autoload.r = FALSE)
14+
15+
forecaster_options <- unique(tar_read(forecaster_params_grid)[["parent_id"]])
16+
# Map forecaster names to score files
17+
forecaster_options <- setNames(
18+
# File names
19+
paste0("score_", gsub(" ", ".", forecaster_options)),
20+
# Display names
21+
forecaster_options
22+
)
23+
24+
# Add ensembles
25+
ensemble_options <- unique(tar_read(ensemble_forecasters)[["parent_id"]])
26+
ensemble_options <- setNames(
27+
# File names
28+
paste0("ensemble_score_", ensemble_options),
29+
# Display names
30+
paste0("ensemble.", ensemble_options)
31+
)
32+
33+
external_options <- unique(tar_read(external_names))
34+
EXTERNAL_PREFIX <- "[external] "
35+
if (!is.null(external_options) && length(external_options) > 0) {
36+
external_options <- setNames(
37+
# File names
38+
# Get names of all branches of `external_scores` target by index. The way these
39+
# were specified, `external_names` provides the order of the branches.
40+
tar_branch_names(external_scores, seq_along(external_options)),
41+
# Display names
42+
paste0(
43+
EXTERNAL_PREFIX,
44+
gsub(" forecaster", "", gsub("_", " ", external_options, fixed = TRUE), fixed = TRUE)
45+
)
46+
)
47+
} else {
48+
external_options <- character(0)
49+
}
50+
51+
forecaster_options <- c(ensemble_options, forecaster_options, external_options)
52+
53+
runApp(here::here("app.R"), port = 3838)

extras/targets-common.R

+1-18
Original file line numberDiff line numberDiff line change
@@ -46,27 +46,10 @@ tar_option_set(
4646
# Set default crew controller.
4747
# https://books.ropensci.org/targets/crew.html#heterogeneous-workers
4848
resources = tar_resources(
49-
crew = tar_resources_crew(controller = "main_controller")
49+
crew = tar_resources_crew(controller = "main_controller", seconds_timeout = 24 * 60 * 60)
5050
)
5151
)
5252

53-
use_aws_s3 <- as.logical(Sys.getenv("USE_AWS_S3", FALSE))
54-
tar_project <- Sys.getenv("TAR_PROJECT", "covid_hosp_explore")
55-
aws_s3_prefix <- Sys.getenv("AWS_S3_PREFIX", "exploration")
56-
aws_s3_prefix <- paste0(aws_s3_prefix, "/", tar_project)
57-
if (use_aws_s3) {
58-
tar_option_set(
59-
repository = "aws",
60-
resources = tar_resources(
61-
aws = tar_resources_aws(
62-
bucket = "forecasting-team-data",
63-
prefix = aws_s3_prefix,
64-
region = "us-east-1"
65-
)
66-
)
67-
)
68-
}
69-
7053
linreg <- parsnip::linear_reg()
7154
quantreg <- epipredict::quantile_reg()
7255
ONE_AHEAD_FORECAST_NAME <- "forecast_by_ahead"

man/manage_S3_forecast_cache.Rd

+34
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)