Skip to content

Commit a7bcb03

Browse files
authored
Merge pull request #290 from dsweber2/newDataFormat
docs: add links to forecast-actual format repos
2 parents 711bba7 + 5c6a9d3 commit a7bcb03

File tree

2 files changed

+113
-0
lines changed

2 files changed

+113
-0
lines changed

app/assets/about.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,3 +127,18 @@ stateCases = tryCatch(
127127
```
128128

129129

130+
131+
##### Forecasts with actuals
132+
133+
If you are interested in getting the forecasts paired with the corresponding actual values (if you were e.g. testing different evaluation methods), that can be found in [the Amazon S3 bucket](https://forecast-eval.s3.us-east-2.amazonaws.com/) in 3 zip files.
134+
These files are static, generated using [the aggregation script](https://raw.githubusercontent.com/cmu-delphi/forecast-eval/main/app/assets/forecastsWithActuals.R), and forecast and actual data available on June 12, 2023. The latest forecast date available for each target signal is
135+
136+
* [cases](https://forecast-eval.s3.us-east-2.amazonaws.com/cases.zip): 2023-02-13
137+
* [hospitalizations](https://forecast-eval.s3.us-east-2.amazonaws.com/hospitalizations.zip):
138+
* 1 week: 2023-06-05
139+
* 2 week: 2023-06-05
140+
* 3 week: 2023-06-05
141+
* 4 week: 2023-06-05
142+
* [deaths](https://forecast-eval.s3.us-east-2.amazonaws.com/deaths.zip): 2023-03-06
143+
144+
If the S3 bucket is down, these files are also available on [Delphi's file-hosting site](https://www.cmu.edu/delphi-web/forecast-eval-scores).

app/assets/forecastsWithActuals.R

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
library(dplyr)
2+
library(tidyr)
3+
library(aws.s3)
4+
5+
Sys.setenv("AWS_DEFAULT_REGION" = "us-east-2")
6+
s3bucket <- tryCatch(
7+
{
8+
get_bucket(bucket = "forecast-eval")
9+
},
10+
error = function(e) {
11+
e
12+
}
13+
)
14+
15+
readbucket <- function(name) {
16+
tryCatch(
17+
{
18+
s3readRDS(object = name, bucket = s3bucket)
19+
},
20+
error = function(e) {
21+
e
22+
}
23+
)
24+
}
25+
26+
# Cases, deaths, hosp scores: needed for "actual"s
27+
cases <- bind_rows(
28+
readbucket("score_cards_nation_cases.rds"),
29+
readbucket("score_cards_state_cases.rds")
30+
)
31+
deaths <- bind_rows(
32+
readbucket("score_cards_nation_deaths.rds"),
33+
readbucket("score_cards_state_deaths.rds")
34+
)
35+
hosp <- bind_rows(
36+
readbucket("score_cards_nation_hospitalizations.rds"),
37+
readbucket("score_cards_state_hospitalizations.rds")
38+
)
39+
40+
# The big one: predictions from all forecasters
41+
pred <- readbucket("predictions_cards.rds")
42+
43+
# Cases
44+
pred_cases <- pred %>%
45+
filter(signal == "confirmed_incidence_num") %>%
46+
mutate(signal = NULL, data_source = NULL, incidence_period = NULL) %>%
47+
pivot_wider(
48+
names_from = quantile,
49+
values_from = value,
50+
names_prefix = "forecast_"
51+
)
52+
53+
actual_cases <- cases %>%
54+
select(ahead, geo_value, forecaster, forecast_date, target_end_date, actual)
55+
56+
joined_cases <- left_join(pred_cases, actual_cases)
57+
sum(is.na(actual_cases$actual)) == sum(is.na(joined_cases$actual))
58+
write.csv(joined_cases, "cases.csv")
59+
60+
# Deaths
61+
pred_deaths <- pred %>%
62+
filter(signal == "deaths_incidence_num") %>%
63+
mutate(signal = NULL, data_source = NULL, incidence_period = NULL) %>%
64+
pivot_wider(
65+
names_from = quantile,
66+
values_from = value,
67+
names_prefix = "forecast_"
68+
)
69+
70+
actual_deaths <- deaths %>%
71+
select(ahead, geo_value, forecaster, forecast_date, target_end_date, actual)
72+
73+
joined_deaths <- left_join(pred_deaths, actual_deaths)
74+
sum(is.na(actual_deaths$actual)) == sum(is.na(joined_deaths$actual))
75+
write.csv(joined_deaths, "deaths.csv")
76+
77+
# Hospitalizations: break up by weeks since we run into memory errors o/w!
78+
pred_hosp <- actual_hosp <- joined_hosp <- vector(mode = "list", length = 4)
79+
for (k in 1:4) {
80+
cat(k, "... ")
81+
days <- (k - 1) * 7 + 1:7
82+
pred_hosp[[k]] <- pred %>%
83+
filter(signal == "confirmed_admissions_covid_1d", ahead %in% days) %>%
84+
mutate(signal = NULL, data_source = NULL, incidence_period = NULL) %>%
85+
pivot_wider(
86+
names_from = quantile,
87+
values_from = value,
88+
names_prefix = "forecast_"
89+
)
90+
91+
actual_hosp[[k]] <- hosp %>%
92+
filter(ahead %in% days) %>%
93+
select(ahead, geo_value, forecaster, forecast_date, target_end_date, actual)
94+
95+
joined_hosp[[k]] <- left_join(pred_hosp[[k]], actual_hosp[[k]])
96+
cat(sum(is.na(actual_hosp[[k]]$act)) == sum(is.na(joined_hosp[[k]]$act)))
97+
write.csv(joined_hosp[[k]], sprintf("hospitalizations_%iwk.csv", k))
98+
}

0 commit comments

Comments
 (0)