cmu-delphi
diff --git a/‎DESCRIPTION
+1 b/‎DESCRIPTION
+1
diff --git a/‎NAMESPACE
+14-1 b/‎NAMESPACE
+14-1
diff --git a/‎NEWS.md
+4 b/‎NEWS.md
+4
diff --git a/‎R/epi_df.R
+4-1 b/‎R/epi_df.R
+4-1
diff --git a/‎R/epiprocess-package.R
+12-1 b/‎R/epiprocess-package.R
+12-1
diff --git a/‎R/grouped_epi_archive.R
+1-1 b/‎R/grouped_epi_archive.R
+1-1
diff --git a/‎R/methods-epi_df.R
+84-27 b/‎R/methods-epi_df.R
+84-27
@@ -49,6 +49,7 @@ Imports:
     lifecycle (>= 1.0.1),
     lubridate,
     magrittr,
+    pkgconfig,
     purrr,
     rlang,
     slider,
 
@@ -1,6 +1,9 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method("$<-",epi_df)
 S3method("[",epi_df)
+S3method("[<-",epi_df)
+S3method("[[<-",epi_df)
 S3method("names<-",epi_df)
 S3method(Summary,epi_df)
 S3method(arrange_canonical,default)
@@ -153,7 +156,6 @@ importFrom(dplyr,"%>%")
 importFrom(dplyr,across)
 importFrom(dplyr,all_of)
 importFrom(dplyr,arrange)
-importFrom(dplyr,bind_rows)
 importFrom(dplyr,c_across)
 importFrom(dplyr,dplyr_col_modify)
 importFrom(dplyr,dplyr_reconstruct)
@@ -165,6 +167,7 @@ importFrom(dplyr,group_by_drop_default)
 importFrom(dplyr,group_map)
 importFrom(dplyr,group_modify)
 importFrom(dplyr,group_vars)
+importFrom(dplyr,grouped_df)
 importFrom(dplyr,groups)
 importFrom(dplyr,if_all)
 importFrom(dplyr,if_any)
@@ -188,6 +191,7 @@ importFrom(lubridate,as.period)
 importFrom(lubridate,days)
 importFrom(lubridate,weeks)
 importFrom(magrittr,"%>%")
+importFrom(purrr,list_rbind)
 importFrom(purrr,map)
 importFrom(purrr,map_lgl)
 importFrom(rlang,"!!!")
@@ -243,7 +247,16 @@ importFrom(tools,toTitleCase)
 importFrom(tsibble,as_tsibble)
 importFrom(utils,capture.output)
 importFrom(utils,tail)
+importFrom(vctrs,"vec_slice<-")
 importFrom(vctrs,vec_cast)
 importFrom(vctrs,vec_data)
 importFrom(vctrs,vec_duplicate_any)
 importFrom(vctrs,vec_equal)
+importFrom(vctrs,vec_in)
+importFrom(vctrs,vec_order)
+importFrom(vctrs,vec_rbind)
+importFrom(vctrs,vec_recycle_common)
+importFrom(vctrs,vec_rep)
+importFrom(vctrs,vec_size)
+importFrom(vctrs,vec_slice)
+importFrom(vctrs,vec_sort)
@@ -46,6 +46,8 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.x.y will indicat
 ## Improvements
 - `revision_summary()` now supports all `time_type`s.
 - The compactification tolerance setting now works with integer-type columns.
+- Various functions are now faster, using faster variants of core operations and
+  avoiding reconstructing grouped `epi_df`s when unnecessary.
 
 ## Bug fixes
 
@@ -56,6 +58,8 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.x.y will indicat
   forecasts in that format.
 - Fixed large compactification tolerances potentially removing all versions of
   some observations in certain cases when activity was flat.
+- `[<-`, `[[<-`, and `$<-` now properly retain `epi_df`-ness when used on
+  grouped `epi_df`s.
 
 ## Cleanup
 
 
@@ -189,7 +189,10 @@ new_epi_df <- function(x = tibble::tibble(geo_value = character(), time_value =
 
   # Reorder columns (geo_value, time_value, ...)
   if (nrow(x) > 0) {
-    x <- x %>% relocate(all_of(c("geo_value", other_keys, "time_value")), .before = 1)
+    all_names <- names(x)
+    ukey_names <- c("geo_value", other_keys, "time_value")
+    value_names <- all_names[!all_names %in% ukey_names]
+    x <- x[c(ukey_names, value_names)]
   }
 
   # Apply epi_df class, attach metadata, and return
 
@@ -20,19 +20,30 @@
 #' @importFrom data.table key
 #' @importFrom data.table setkeyv
 #' @importFrom dplyr arrange
+#' @importFrom dplyr grouped_df
 #' @importFrom dplyr is_grouped_df
 #' @importFrom dplyr select
 #' @importFrom lifecycle deprecated
+#' @importFrom purrr list_rbind
 #' @importFrom rlang %||%
 #' @importFrom rlang is_bare_integerish
 #' @importFrom tools toTitleCase
+#' @importFrom vctrs vec_cast
 #' @importFrom vctrs vec_data
 #' @importFrom vctrs vec_equal
+#' @importFrom vctrs vec_in
+#' @importFrom vctrs vec_order
+#' @importFrom vctrs vec_rbind
+#' @importFrom vctrs vec_recycle_common
+#' @importFrom vctrs vec_rep
+#' @importFrom vctrs vec_slice
+#' @importFrom vctrs vec_slice<-
+#' @importFrom vctrs vec_sort
 ## usethis namespace: end
 NULL
 
 utils::globalVariables(c(
-  ".x", ".group_key", ".ref_time_value", "resid",
+  ".", ".x", ".group_key", ".ref_time_value", "resid",
   "fitted", ".response", "geo_value", "time_value",
   "value", ".real", "lag", "max_value", "min_value",
   "median_value", "spread", "rel_spread", "lag_to",
 
@@ -332,7 +332,7 @@ epix_slide.grouped_epi_archive <- function(
     comp_value <- .slide_comp(.data_group, .group_key, .version, ...)
 
     # If this wasn't a tidyeval computation, we still need to check the output
-    # types. We'll let `group_modify` and `vec_rbind` deal with checking for
+    # types. We'll let `vec_rbind` and `bind_rows` deal with checking for
     # type compatibility between the outputs.
     if (!used_data_masking && !(
       # vctrs considers data.frames to be vectors, but we still check
 
@@ -1,27 +1,38 @@
 #' Convert to tibble
 #'
-#' Converts an `epi_df` object into a tibble, dropping metadata and any
-#' grouping.
+#' Converts an `epi_df` object into a tibble, dropping metadata, any
+#' grouping, and any unrelated classes and attributes.
 #'
 #' Advanced: if you are working with a third-party package that uses
 #' `as_tibble()` on `epi_df`s but you actually want them to remain `epi_df`s,
 #' use `attr(your_epi_df, "decay_to_tibble") <- FALSE` beforehand.
 #'
 #' @param x an `epi_df`
-#' @inheritParams tibble::as_tibble
-#' @importFrom tibble as_tibble
+#' @param ... if present, forwarded to [`tibble::as_tibble`]
+#' @importFrom tibble as_tibble new_tibble
+#' @importFrom rlang dots_n
+#' @importFrom vctrs vec_data vec_size
 #' @export
 as_tibble.epi_df <- function(x, ...) {
   # Note that some versions of `tsibble` overwrite `as_tibble.grouped_df`, which
-  # also impacts grouped `epi_df`s don't rely on `NextMethod()`. Destructure
-  # first instead.
-  destructured <- tibble::as_tibble(vctrs::vec_data(x), ...)
+  # also impacts grouped `epi_df`s, so don't rely on `NextMethod()`. Destructure
+  # and redispatch instead.
+  destructured <- vec_data(x) # -> data.frame, dropping extra attrs
+  tbl <- if (dots_n(...) == 0 &&
+    is.null(pkgconfig::get_config("tibble::rownames"))) { # nolint: indentation_linter
+    # perf: new_tibble instead of as_tibble.data.frame which performs
+    # extra checks whose defaults should be redundant here:
+    new_tibble(destructured)
+    # (^ We don't need to provide nrow= as we have >0 columns.)
+  } else {
+    as_tibble(destructured, ...)
+  }
   if (attr(x, "decay_to_tibble") %||% TRUE) {
-    destructured
+    tbl
   } else {
     # We specially requested via attr not to decay epi_df-ness but to drop any
-    # grouping.
-    reclass(destructured, attr(x, "metadata"))
+    # grouping. (Miscellaneous attrs are also dropped.)
+    reclass(tbl, attr(x, "metadata"))
   }
 }
 
@@ -151,7 +162,30 @@ dplyr_reconstruct.epi_df <- function(data, template) {
   # keep any grouping that has been applied:
   res <- NextMethod()
 
-  col_names <- names(res)
+  reconstruct_light_edf(res, template)
+}
+
+#' Like `dplyr_reconstruct.epi_df` but not recomputing any grouping
+#'
+#' In the move to our current not-quite-proper/effective "implementation" of
+#' [`dplyr::dplyr_extending`] for `epi_df`s, we moved a lot of checks in
+#' `dplyr_reconstruct` and used it instead of `reclass()` in various
+#' operations to prevent operations from outputting invalid metadata/classes,
+#' instead of more careful tailored and relevant checks. However, this actually
+#' introduced extra overhead due to `dplyr_reconstruct.epi_df()` passing off to
+#' `dplyr_reconstruct.grouped_df()` when grouped, which assumes that it will
+#' need to / should for safety recompute the groups, even when it'd be safe for
+#' it not to do so. In many operations, we're using `NextMethod()` to dispatch
+#' to `grouped_df` behavior if needed, and it should output something with valid
+#' groupings.
+#'
+#' This function serves the original purpose of performing `epi_df`-centric
+#' checks rather than just throwing on potentially-incorrect metadata like
+#' `reclass()`, but without unnecessary `dplyr_reconstruct()` delegation.
+#'
+#' @keywords internal
+reconstruct_light_edf <- function(data, template) {
+  col_names <- names(data)
 
   # Duplicate columns, cli_abort
   dup_col_names <- col_names[duplicated(col_names)]
@@ -169,23 +203,23 @@ dplyr_reconstruct.epi_df <- function(data, template) {
   if (not_epi_df) {
     # If we're calling on an `epi_df` from one of our own functions, we need to
     # decay to a non-`epi_df` result. If `dplyr` is calling, `x` is a tibble,
-    # `res` is not an `epi_df` yet (but might, e.g., be a `grouped_df`), and we
+    # `data` is not an `epi_df` yet (but might, e.g., be a `grouped_df`), and we
     # simply need to skip adding the metadata & class. Current `decay_epi_df`
     # should work in both cases.
-    return(decay_epi_df(res))
+    return(decay_epi_df(data))
   }
 
-  res <- reclass(res, attr(template, "metadata"))
+  data <- reclass(data, attr(template, "metadata"))
 
   # XXX we may want verify the `geo_type` and `time_type` here. If it's
   # significant overhead, we may also want to keep this less strict version
   # around and implement some extra S3 methods that use it, when appropriate.
 
   # Amend additional metadata if some other_keys cols are dropped in the subset
   old_other_keys <- attr(template, "metadata")$other_keys
-  attr(res, "metadata")$other_keys <- old_other_keys[old_other_keys %in% col_names]
+  attr(data, "metadata")$other_keys <- old_other_keys[old_other_keys %in% col_names]
 
-  res
+  data
 }
 
 #' @export
@@ -196,19 +230,40 @@ dplyr_reconstruct.epi_df <- function(data, template) {
     return(res)
   }
 
-  dplyr::dplyr_reconstruct(res, x)
+  reconstruct_light_edf(res, x)
+}
+
+#' @export
+`[<-.epi_df` <- function(x, i, j, ..., value) {
+  res <- NextMethod()
+
+  reconstruct_light_edf(res, x)
+}
+
+#' @export
+`[[<-.epi_df` <- function(x, i, j, ..., value) {
+  res <- NextMethod()
+
+  reconstruct_light_edf(res, x)
+}
+
+#' @export
+`$<-.epi_df` <- function(x, name, value) {
+  res <- NextMethod()
+
+  reconstruct_light_edf(res, x)
 }
 
 #' @importFrom dplyr dplyr_col_modify
 #' @export
 dplyr_col_modify.epi_df <- function(data, cols) {
-  dplyr::dplyr_reconstruct(NextMethod(), data)
+  reconstruct_light_edf(NextMethod(), data)
 }
 
 #' @importFrom dplyr dplyr_row_slice
 #' @export
 dplyr_row_slice.epi_df <- function(data, i, ...) {
-  dplyr::dplyr_reconstruct(NextMethod(), data)
+  reconstruct_light_edf(NextMethod(), data)
 }
 
 #' @export
@@ -222,7 +277,7 @@ dplyr_row_slice.epi_df <- function(data, i, ...) {
     new_metadata[["other_keys"]] <- new_other_keys
   }
   result <- reclass(NextMethod(), new_metadata)
-  dplyr::dplyr_reconstruct(result, result)
+  reconstruct_light_edf(result, result)
 }
 
 #' @method group_by epi_df
@@ -251,7 +306,7 @@ ungroup.epi_df <- function(x, ...) {
 #' @param .keep Boolean; see [`dplyr::group_modify`]
 #' @export
 group_modify.epi_df <- function(.data, .f, ..., .keep = FALSE) {
-  dplyr::dplyr_reconstruct(NextMethod(), .data)
+  reconstruct_light_edf(NextMethod(), .data)
 }
 
 #' "Complete" an `epi_df`, adding missing rows and/or replacing `NA`s
@@ -331,7 +386,7 @@ group_modify.epi_df <- function(.data, .f, ..., .keep = FALSE) {
 #'   )
 #' @export
 complete.epi_df <- function(data, ..., fill = list(), explicit = TRUE) {
-  result <- dplyr::dplyr_reconstruct(NextMethod(), data)
+  result <- reconstruct_light_edf(NextMethod(), data)
   if ("time_value" %in% names(rlang::call_match(dots_expand = FALSE)[["..."]])) {
     attr(result, "metadata")$time_type <- guess_time_type(result$time_value)
   }
@@ -343,7 +398,7 @@ complete.epi_df <- function(data, ..., fill = list(), explicit = TRUE) {
 #' @param data an `epi_df`
 #' @export
 unnest.epi_df <- function(data, ...) {
-  dplyr::dplyr_reconstruct(NextMethod(), data)
+  reconstruct_light_edf(NextMethod(), data)
 }
 
 # Simple reclass function
@@ -402,7 +457,7 @@ arrange_row_canonical.default <- function(x, ...) {
 arrange_row_canonical.epi_df <- function(x, ...) {
   rlang::check_dots_empty()
   cols <- key_colnames(x)
-  x %>% dplyr::arrange(dplyr::across(dplyr::all_of(cols)))
+  x[vctrs::vec_order(x[cols]), ]
 }
 
 arrange_col_canonical <- function(x, ...) {
@@ -421,8 +476,10 @@ arrange_col_canonical.default <- function(x, ...) {
 #' @export
 arrange_col_canonical.epi_df <- function(x, ...) {
   rlang::check_dots_empty()
-  cols <- key_colnames(x)
-  x %>% dplyr::relocate(dplyr::all_of(cols), .before = 1)
+  all_names <- names(x)
+  key_names <- key_colnames(x)
+  val_names <- all_names[!all_names %in% key_names]
+  x[c(key_names, val_names)]
 }
 
 #' Group an `epi_df` object by default keys
@@ -432,7 +489,7 @@ arrange_col_canonical.epi_df <- function(x, ...) {
 #' @export
 group_epi_df <- function(x, exclude = character()) {
   cols <- key_colnames(x, exclude = exclude)
-  x %>% group_by(across(all_of(cols)))
+  reclass(grouped_df(x, cols), attr(x, "metadata"))
 }
 
 #' Aggregate an `epi_df` object