Skip to content

Commit ca98924

Browse files
authored
Merge pull request #607 from cmu-delphi/lcb/perf-tweaks
Minor performance improvements and enabling fixes
2 parents 382485d + 4296485 commit ca98924

11 files changed

+192
-72
lines changed

DESCRIPTION

+1
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ Imports:
4949
lifecycle (>= 1.0.1),
5050
lubridate,
5151
magrittr,
52+
pkgconfig,
5253
purrr,
5354
rlang,
5455
slider,

NAMESPACE

+14-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
# Generated by roxygen2: do not edit by hand
22

3+
S3method("$<-",epi_df)
34
S3method("[",epi_df)
5+
S3method("[<-",epi_df)
6+
S3method("[[<-",epi_df)
47
S3method("names<-",epi_df)
58
S3method(Summary,epi_df)
69
S3method(arrange_canonical,default)
@@ -153,7 +156,6 @@ importFrom(dplyr,"%>%")
153156
importFrom(dplyr,across)
154157
importFrom(dplyr,all_of)
155158
importFrom(dplyr,arrange)
156-
importFrom(dplyr,bind_rows)
157159
importFrom(dplyr,c_across)
158160
importFrom(dplyr,dplyr_col_modify)
159161
importFrom(dplyr,dplyr_reconstruct)
@@ -165,6 +167,7 @@ importFrom(dplyr,group_by_drop_default)
165167
importFrom(dplyr,group_map)
166168
importFrom(dplyr,group_modify)
167169
importFrom(dplyr,group_vars)
170+
importFrom(dplyr,grouped_df)
168171
importFrom(dplyr,groups)
169172
importFrom(dplyr,if_all)
170173
importFrom(dplyr,if_any)
@@ -188,6 +191,7 @@ importFrom(lubridate,as.period)
188191
importFrom(lubridate,days)
189192
importFrom(lubridate,weeks)
190193
importFrom(magrittr,"%>%")
194+
importFrom(purrr,list_rbind)
191195
importFrom(purrr,map)
192196
importFrom(purrr,map_lgl)
193197
importFrom(rlang,"!!!")
@@ -243,7 +247,16 @@ importFrom(tools,toTitleCase)
243247
importFrom(tsibble,as_tsibble)
244248
importFrom(utils,capture.output)
245249
importFrom(utils,tail)
250+
importFrom(vctrs,"vec_slice<-")
246251
importFrom(vctrs,vec_cast)
247252
importFrom(vctrs,vec_data)
248253
importFrom(vctrs,vec_duplicate_any)
249254
importFrom(vctrs,vec_equal)
255+
importFrom(vctrs,vec_in)
256+
importFrom(vctrs,vec_order)
257+
importFrom(vctrs,vec_rbind)
258+
importFrom(vctrs,vec_recycle_common)
259+
importFrom(vctrs,vec_rep)
260+
importFrom(vctrs,vec_size)
261+
importFrom(vctrs,vec_slice)
262+
importFrom(vctrs,vec_sort)

NEWS.md

+4
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.x.y will indicat
4646
## Improvements
4747
- `revision_summary()` now supports all `time_type`s.
4848
- The compactification tolerance setting now works with integer-type columns.
49+
- Various functions are now faster, using faster variants of core operations and
50+
avoiding reconstructing grouped `epi_df`s when unnecessary.
4951

5052
## Bug fixes
5153

@@ -56,6 +58,8 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.x.y will indicat
5658
forecasts in that format.
5759
- Fixed large compactification tolerances potentially removing all versions of
5860
some observations in certain cases when activity was flat.
61+
- `[<-`, `[[<-`, and `$<-` now properly retain `epi_df`-ness when used on
62+
grouped `epi_df`s.
5963

6064
## Cleanup
6165

R/epi_df.R

+4-1
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,10 @@ new_epi_df <- function(x = tibble::tibble(geo_value = character(), time_value =
189189

190190
# Reorder columns (geo_value, time_value, ...)
191191
if (nrow(x) > 0) {
192-
x <- x %>% relocate(all_of(c("geo_value", other_keys, "time_value")), .before = 1)
192+
all_names <- names(x)
193+
ukey_names <- c("geo_value", other_keys, "time_value")
194+
value_names <- all_names[!all_names %in% ukey_names]
195+
x <- x[c(ukey_names, value_names)]
193196
}
194197

195198
# Apply epi_df class, attach metadata, and return

R/epiprocess-package.R

+12-1
Original file line numberDiff line numberDiff line change
@@ -20,19 +20,30 @@
2020
#' @importFrom data.table key
2121
#' @importFrom data.table setkeyv
2222
#' @importFrom dplyr arrange
23+
#' @importFrom dplyr grouped_df
2324
#' @importFrom dplyr is_grouped_df
2425
#' @importFrom dplyr select
2526
#' @importFrom lifecycle deprecated
27+
#' @importFrom purrr list_rbind
2628
#' @importFrom rlang %||%
2729
#' @importFrom rlang is_bare_integerish
2830
#' @importFrom tools toTitleCase
31+
#' @importFrom vctrs vec_cast
2932
#' @importFrom vctrs vec_data
3033
#' @importFrom vctrs vec_equal
34+
#' @importFrom vctrs vec_in
35+
#' @importFrom vctrs vec_order
36+
#' @importFrom vctrs vec_rbind
37+
#' @importFrom vctrs vec_recycle_common
38+
#' @importFrom vctrs vec_rep
39+
#' @importFrom vctrs vec_slice
40+
#' @importFrom vctrs vec_slice<-
41+
#' @importFrom vctrs vec_sort
3142
## usethis namespace: end
3243
NULL
3344

3445
utils::globalVariables(c(
35-
".x", ".group_key", ".ref_time_value", "resid",
46+
".", ".x", ".group_key", ".ref_time_value", "resid",
3647
"fitted", ".response", "geo_value", "time_value",
3748
"value", ".real", "lag", "max_value", "min_value",
3849
"median_value", "spread", "rel_spread", "lag_to",

R/grouped_epi_archive.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,7 @@ epix_slide.grouped_epi_archive <- function(
332332
comp_value <- .slide_comp(.data_group, .group_key, .version, ...)
333333

334334
# If this wasn't a tidyeval computation, we still need to check the output
335-
# types. We'll let `group_modify` and `vec_rbind` deal with checking for
335+
# types. We'll let `vec_rbind` and `bind_rows` deal with checking for
336336
# type compatibility between the outputs.
337337
if (!used_data_masking && !(
338338
# vctrs considers data.frames to be vectors, but we still check

R/methods-epi_df.R

+84-27
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,38 @@
11
#' Convert to tibble
22
#'
3-
#' Converts an `epi_df` object into a tibble, dropping metadata and any
4-
#' grouping.
3+
#' Converts an `epi_df` object into a tibble, dropping metadata, any
4+
#' grouping, and any unrelated classes and attributes.
55
#'
66
#' Advanced: if you are working with a third-party package that uses
77
#' `as_tibble()` on `epi_df`s but you actually want them to remain `epi_df`s,
88
#' use `attr(your_epi_df, "decay_to_tibble") <- FALSE` beforehand.
99
#'
1010
#' @param x an `epi_df`
11-
#' @inheritParams tibble::as_tibble
12-
#' @importFrom tibble as_tibble
11+
#' @param ... if present, forwarded to [`tibble::as_tibble`]
12+
#' @importFrom tibble as_tibble new_tibble
13+
#' @importFrom rlang dots_n
14+
#' @importFrom vctrs vec_data vec_size
1315
#' @export
1416
as_tibble.epi_df <- function(x, ...) {
1517
# Note that some versions of `tsibble` overwrite `as_tibble.grouped_df`, which
16-
# also impacts grouped `epi_df`s don't rely on `NextMethod()`. Destructure
17-
# first instead.
18-
destructured <- tibble::as_tibble(vctrs::vec_data(x), ...)
18+
# also impacts grouped `epi_df`s, so don't rely on `NextMethod()`. Destructure
19+
# and redispatch instead.
20+
destructured <- vec_data(x) # -> data.frame, dropping extra attrs
21+
tbl <- if (dots_n(...) == 0 &&
22+
is.null(pkgconfig::get_config("tibble::rownames"))) { # nolint: indentation_linter
23+
# perf: new_tibble instead of as_tibble.data.frame which performs
24+
# extra checks whose defaults should be redundant here:
25+
new_tibble(destructured)
26+
# (^ We don't need to provide nrow= as we have >0 columns.)
27+
} else {
28+
as_tibble(destructured, ...)
29+
}
1930
if (attr(x, "decay_to_tibble") %||% TRUE) {
20-
destructured
31+
tbl
2132
} else {
2233
# We specially requested via attr not to decay epi_df-ness but to drop any
23-
# grouping.
24-
reclass(destructured, attr(x, "metadata"))
34+
# grouping. (Miscellaneous attrs are also dropped.)
35+
reclass(tbl, attr(x, "metadata"))
2536
}
2637
}
2738

@@ -151,7 +162,30 @@ dplyr_reconstruct.epi_df <- function(data, template) {
151162
# keep any grouping that has been applied:
152163
res <- NextMethod()
153164

154-
col_names <- names(res)
165+
reconstruct_light_edf(res, template)
166+
}
167+
168+
#' Like `dplyr_reconstruct.epi_df` but not recomputing any grouping
169+
#'
170+
#' In the move to our current not-quite-proper/effective "implementation" of
171+
#' [`dplyr::dplyr_extending`] for `epi_df`s, we moved a lot of checks in
172+
#' `dplyr_reconstruct` and used it instead of `reclass()` in various
173+
#' operations to prevent operations from outputting invalid metadata/classes,
174+
#' instead of more careful tailored and relevant checks. However, this actually
175+
#' introduced extra overhead due to `dplyr_reconstruct.epi_df()` passing off to
176+
#' `dplyr_reconstruct.grouped_df()` when grouped, which assumes that it will
177+
#' need to / should for safety recompute the groups, even when it'd be safe for
178+
#' it not to do so. In many operations, we're using `NextMethod()` to dispatch
179+
#' to `grouped_df` behavior if needed, and it should output something with valid
180+
#' groupings.
181+
#'
182+
#' This function serves the original purpose of performing `epi_df`-centric
183+
#' checks rather than just throwing on potentially-incorrect metadata like
184+
#' `reclass()`, but without unnecessary `dplyr_reconstruct()` delegation.
185+
#'
186+
#' @keywords internal
187+
reconstruct_light_edf <- function(data, template) {
188+
col_names <- names(data)
155189

156190
# Duplicate columns, cli_abort
157191
dup_col_names <- col_names[duplicated(col_names)]
@@ -169,23 +203,23 @@ dplyr_reconstruct.epi_df <- function(data, template) {
169203
if (not_epi_df) {
170204
# If we're calling on an `epi_df` from one of our own functions, we need to
171205
# decay to a non-`epi_df` result. If `dplyr` is calling, `x` is a tibble,
172-
# `res` is not an `epi_df` yet (but might, e.g., be a `grouped_df`), and we
206+
# `data` is not an `epi_df` yet (but might, e.g., be a `grouped_df`), and we
173207
# simply need to skip adding the metadata & class. Current `decay_epi_df`
174208
# should work in both cases.
175-
return(decay_epi_df(res))
209+
return(decay_epi_df(data))
176210
}
177211

178-
res <- reclass(res, attr(template, "metadata"))
212+
data <- reclass(data, attr(template, "metadata"))
179213

180214
# XXX we may want verify the `geo_type` and `time_type` here. If it's
181215
# significant overhead, we may also want to keep this less strict version
182216
# around and implement some extra S3 methods that use it, when appropriate.
183217

184218
# Amend additional metadata if some other_keys cols are dropped in the subset
185219
old_other_keys <- attr(template, "metadata")$other_keys
186-
attr(res, "metadata")$other_keys <- old_other_keys[old_other_keys %in% col_names]
220+
attr(data, "metadata")$other_keys <- old_other_keys[old_other_keys %in% col_names]
187221

188-
res
222+
data
189223
}
190224

191225
#' @export
@@ -196,19 +230,40 @@ dplyr_reconstruct.epi_df <- function(data, template) {
196230
return(res)
197231
}
198232

199-
dplyr::dplyr_reconstruct(res, x)
233+
reconstruct_light_edf(res, x)
234+
}
235+
236+
#' @export
237+
`[<-.epi_df` <- function(x, i, j, ..., value) {
238+
res <- NextMethod()
239+
240+
reconstruct_light_edf(res, x)
241+
}
242+
243+
#' @export
244+
`[[<-.epi_df` <- function(x, i, j, ..., value) {
245+
res <- NextMethod()
246+
247+
reconstruct_light_edf(res, x)
248+
}
249+
250+
#' @export
251+
`$<-.epi_df` <- function(x, name, value) {
252+
res <- NextMethod()
253+
254+
reconstruct_light_edf(res, x)
200255
}
201256

202257
#' @importFrom dplyr dplyr_col_modify
203258
#' @export
204259
dplyr_col_modify.epi_df <- function(data, cols) {
205-
dplyr::dplyr_reconstruct(NextMethod(), data)
260+
reconstruct_light_edf(NextMethod(), data)
206261
}
207262

208263
#' @importFrom dplyr dplyr_row_slice
209264
#' @export
210265
dplyr_row_slice.epi_df <- function(data, i, ...) {
211-
dplyr::dplyr_reconstruct(NextMethod(), data)
266+
reconstruct_light_edf(NextMethod(), data)
212267
}
213268

214269
#' @export
@@ -222,7 +277,7 @@ dplyr_row_slice.epi_df <- function(data, i, ...) {
222277
new_metadata[["other_keys"]] <- new_other_keys
223278
}
224279
result <- reclass(NextMethod(), new_metadata)
225-
dplyr::dplyr_reconstruct(result, result)
280+
reconstruct_light_edf(result, result)
226281
}
227282

228283
#' @method group_by epi_df
@@ -251,7 +306,7 @@ ungroup.epi_df <- function(x, ...) {
251306
#' @param .keep Boolean; see [`dplyr::group_modify`]
252307
#' @export
253308
group_modify.epi_df <- function(.data, .f, ..., .keep = FALSE) {
254-
dplyr::dplyr_reconstruct(NextMethod(), .data)
309+
reconstruct_light_edf(NextMethod(), .data)
255310
}
256311

257312
#' "Complete" an `epi_df`, adding missing rows and/or replacing `NA`s
@@ -331,7 +386,7 @@ group_modify.epi_df <- function(.data, .f, ..., .keep = FALSE) {
331386
#' )
332387
#' @export
333388
complete.epi_df <- function(data, ..., fill = list(), explicit = TRUE) {
334-
result <- dplyr::dplyr_reconstruct(NextMethod(), data)
389+
result <- reconstruct_light_edf(NextMethod(), data)
335390
if ("time_value" %in% names(rlang::call_match(dots_expand = FALSE)[["..."]])) {
336391
attr(result, "metadata")$time_type <- guess_time_type(result$time_value)
337392
}
@@ -343,7 +398,7 @@ complete.epi_df <- function(data, ..., fill = list(), explicit = TRUE) {
343398
#' @param data an `epi_df`
344399
#' @export
345400
unnest.epi_df <- function(data, ...) {
346-
dplyr::dplyr_reconstruct(NextMethod(), data)
401+
reconstruct_light_edf(NextMethod(), data)
347402
}
348403

349404
# Simple reclass function
@@ -402,7 +457,7 @@ arrange_row_canonical.default <- function(x, ...) {
402457
arrange_row_canonical.epi_df <- function(x, ...) {
403458
rlang::check_dots_empty()
404459
cols <- key_colnames(x)
405-
x %>% dplyr::arrange(dplyr::across(dplyr::all_of(cols)))
460+
x[vctrs::vec_order(x[cols]), ]
406461
}
407462

408463
arrange_col_canonical <- function(x, ...) {
@@ -421,8 +476,10 @@ arrange_col_canonical.default <- function(x, ...) {
421476
#' @export
422477
arrange_col_canonical.epi_df <- function(x, ...) {
423478
rlang::check_dots_empty()
424-
cols <- key_colnames(x)
425-
x %>% dplyr::relocate(dplyr::all_of(cols), .before = 1)
479+
all_names <- names(x)
480+
key_names <- key_colnames(x)
481+
val_names <- all_names[!all_names %in% key_names]
482+
x[c(key_names, val_names)]
426483
}
427484

428485
#' Group an `epi_df` object by default keys
@@ -432,7 +489,7 @@ arrange_col_canonical.epi_df <- function(x, ...) {
432489
#' @export
433490
group_epi_df <- function(x, exclude = character()) {
434491
cols <- key_colnames(x, exclude = exclude)
435-
x %>% group_by(across(all_of(cols)))
492+
reclass(grouped_df(x, cols), attr(x, "metadata"))
436493
}
437494

438495
#' Aggregate an `epi_df` object

0 commit comments

Comments
 (0)