Merge pull request #266 from cmu-delphi/ndefries/cleanup-dplyr-in-usage

nmdefries · web-flow · commit 49896afaece6 · 2023-04-19T16:56:04.000-04:00
Simplify `dplyr` use in dashboard `filter` logic
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -25,7 +25,8 @@ Imports:
  stringr,
  markdown,
  memoise,
- purrr
+ purrr,
+ data.table
 Suggests:
  styler,
  lintr,
diff --git a/app/R/data.R b/app/R/data.R
@@ -128,7 +128,7 @@ createS3DataLoader <- function() {
       s3Contents <<- newS3Contents
     }
     if (s3BucketHasChanged ||
-      !(targetVariable %in% names(df_list)) ||
+      !(targetVariable %chin% names(df_list)) ||
       nrow(df_list[[targetVariable]]) == 0) {
       df_list[[targetVariable]] <<- getAllData(s3DataFetcher, targetVariable)
       dataCreationDate <<- getCreationDate(s3DataFetcher)
diff --git a/app/R/data_manipulation.R b/app/R/data_manipulation.R
@@ -14,18 +14,18 @@ renameScoreCol <- function(filteredScoreDf, scoreType, coverageInterval) {
 
 filterOverAllLocations <- function(filteredScoreDf, scoreType, hasAsOfData = FALSE, filterDate) {
   locationsIntersect <- list()
-  filteredScoreDf <- filteredScoreDf %>% filter(!is.na(Score) | target_end_date >= filterDate)
+  filteredScoreDf <- filter(filteredScoreDf, !is.na(Score) | target_end_date >= filterDate)
   # Create df with col for all locations across each unique date, ahead and forecaster combo
   locationDf <- filteredScoreDf %>%
     group_by(forecaster, target_end_date, ahead) %>%
     summarize(location_list = paste(sort(unique(geo_value)), collapse = ","))
-  locationDf <- locationDf %>% filter(location_list != c("us"))
+  locationDf <- filter(locationDf, location_list != c("us"))
   # Create a list containing each row's location list
   locationList <- sapply(locationDf$location_list, function(x) strsplit(x, ","))
   locationList <- lapply(locationList, function(x) x[x != "us"])
   # Get the intersection of all the locations in these lists
   locationsIntersect <- unique(Reduce(intersect, locationList))
-  filteredScoreDf <- filteredScoreDf %>% filter(geo_value %in% locationsIntersect)
+  filteredScoreDf <- filter(filteredScoreDf, geo_value %chin% locationsIntersect)
   if (scoreType == "coverage") {
     if (hasAsOfData) {
       filteredScoreDf <- filteredScoreDf %>%
@@ -56,40 +56,23 @@ filterOverAllLocations <- function(filteredScoreDf, scoreType, hasAsOfData = FAL
 # Only use weekly aheads for hospitalizations
 # May change in the future
 filterHospitalizationsAheads <- function(scoreDf) {
-  scoreDf["weekday"] <- weekdays(as.Date(scoreDf$target_end_date))
-  scoreDf <- scoreDf %>% filter(weekday == HOSPITALIZATIONS_TARGET_DAY)
+  days_list <- c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday")
+  # Make sure to use `data.table`'s `wday`; `lubridate` has a function of the same name.
+  scoreDf["weekday"] <- days_list[data.table::wday(as.Date(scoreDf$target_end_date, "%Y-%m-%d"))]
+  scoreDf <- filter(scoreDf, weekday == HOSPITALIZATIONS_TARGET_DAY)
+  scoreDf$ahead_group <- case_when(
+    scoreDf$ahead >= HOSPITALIZATIONS_OFFSET & scoreDf$ahead < 7 + HOSPITALIZATIONS_OFFSET ~ 1L,
+    scoreDf$ahead >= 7 + HOSPITALIZATIONS_OFFSET & scoreDf$ahead < 14 + HOSPITALIZATIONS_OFFSET ~ 2L,
+    scoreDf$ahead >= 14 + HOSPITALIZATIONS_OFFSET & scoreDf$ahead < 21 + HOSPITALIZATIONS_OFFSET ~ 3L,
+    scoreDf$ahead >= 21 + HOSPITALIZATIONS_OFFSET & scoreDf$ahead < 28 + HOSPITALIZATIONS_OFFSET ~ 4L,
+    TRUE ~ NA_integer_
+  )
 
-  oneAheadDf <- scoreDf %>%
-    filter(ahead >= HOSPITALIZATIONS_OFFSET) %>%
-    filter(ahead < 7 + HOSPITALIZATIONS_OFFSET) %>%
-    group_by(target_end_date, forecaster) %>%
-    filter(ahead == min(ahead)) %>%
-    mutate(ahead = HOSPITALIZATIONS_AHEAD_OPTIONS[1])
-
-  return(bind_rows(
-    scoreDf %>%
-      filter(ahead >= HOSPITALIZATIONS_OFFSET) %>%
-      filter(ahead < 7 + HOSPITALIZATIONS_OFFSET) %>%
-      group_by(target_end_date, forecaster) %>%
-      filter(ahead == min(ahead)) %>%
-      mutate(ahead = HOSPITALIZATIONS_AHEAD_OPTIONS[1]),
-    scoreDf %>%
-      filter(ahead >= 7 + HOSPITALIZATIONS_OFFSET) %>%
-      filter(ahead < 14 + HOSPITALIZATIONS_OFFSET) %>%
-      group_by(target_end_date, forecaster) %>%
-      filter(ahead == min(ahead)) %>%
-      mutate(ahead = HOSPITALIZATIONS_AHEAD_OPTIONS[2]),
-    scoreDf %>%
-      filter(ahead >= 14 + HOSPITALIZATIONS_OFFSET) %>%
-      filter(ahead < 21 + HOSPITALIZATIONS_OFFSET) %>%
-      group_by(target_end_date, forecaster) %>%
-      filter(ahead == min(ahead)) %>%
-      mutate(ahead = HOSPITALIZATIONS_AHEAD_OPTIONS[3]),
+  return(
     scoreDf %>%
-      filter(ahead >= 21 + HOSPITALIZATIONS_OFFSET) %>%
-      filter(ahead < 28 + HOSPITALIZATIONS_OFFSET) %>%
-      group_by(target_end_date, forecaster) %>%
+      filter(!is.na(ahead_group)) %>%
+      group_by(target_end_date, forecaster, ahead_group) %>%
       filter(ahead == min(ahead)) %>%
-      mutate(ahead = HOSPITALIZATIONS_AHEAD_OPTIONS[4])
-  ))
+      mutate(ahead = HOSPITALIZATIONS_AHEAD_OPTIONS[ahead_group])
+  )
 }
diff --git a/app/R/exportScores.R b/app/R/exportScores.R
@@ -8,7 +8,7 @@ exportScoresUI <- function(id = "exportScores") {
 createExportScoresDataFrame <- function(scoreDf, targetVariable, scoreType, forecasters, loc, coverageInterval) {
   scoreDf <- filter(
     scoreDf[[targetVariable]],
-    forecaster %in% forecasters
+    forecaster %chin% forecasters
   )
   scoreDf <- renameScoreCol(scoreDf, scoreType, coverageInterval)
 
@@ -19,7 +19,7 @@ createExportScoresDataFrame <- function(scoreDf, targetVariable, scoreType, fore
     scoreDf <- filterOverAllLocations(scoreDf, scoreType)
     return(scoreDf[[1]])
   } else {
-    scoreDf <- scoreDf %>% filter(geo_value == tolower(loc))
+    scoreDf <- filter(scoreDf, geo_value == tolower(loc))
     scoreDf <- scoreDf[c(
       "ahead", "geo_value", "forecaster", "forecast_date",
       "data_source", "target_end_date", "Score", "actual"
diff --git a/app/global.R b/app/global.R
@@ -8,6 +8,7 @@ library(lubridate)
 library(viridis)
 library(tsibble)
 library(covidcast)
+library(data.table)
 
 appVersion <- "6.1.0"
 
@@ -35,10 +36,10 @@ ARCHIVE_TAB_SUFFIX <- "_archive"
 
 
 TARGET_VARS_BY_TAB <- list()
-TARGET_VARS_BY_TAB[[paste0("evaluations", CURRENT_TAB_SUFFIX)]] <- list(
+TARGET_VARS_BY_TAB[[paste0("evaluations", CURRENT_TAB_SUFFIX)]] <- c(
   "Hospital Admissions" = "Hospitalizations"
 )
-TARGET_VARS_BY_TAB[[paste0("evaluations", ARCHIVE_TAB_SUFFIX)]] <- list(
+TARGET_VARS_BY_TAB[[paste0("evaluations", ARCHIVE_TAB_SUFFIX)]] <- c(
   "Incident Deaths" = "Deaths",
   "Incident Cases" = "Cases"
 )
diff --git a/app/server.R b/app/server.R

Original file line number	Diff line number	Diff line change
`@@ -128,7 +128,7 @@ createS3DataLoader <- function() {`
`128`	`128`	`s3Contents <<- newS3Contents`
`129`	`129`	`}`
`130`	`130`	`if (s3BucketHasChanged \|\|`
`131`		`- !(targetVariable %in% names(df_list)) \|\|`
	`131`	`+ !(targetVariable %chin% names(df_list)) \|\|`
`132`	`132`	`nrow(df_list[[targetVariable]]) == 0) {`
`133`	`133`	`df_list[[targetVariable]] <<- getAllData(s3DataFetcher, targetVariable)`
`134`	`134`	`dataCreationDate <<- getCreationDate(s3DataFetcher)`