Skip to content

Commit 59be1eb

Browse files
authored
Merge pull request #259 from cmu-delphi/ndefries/epix-slide-versions
Allow `epix_slide` to access version history if desired
2 parents 219c7a0 + 8b356d6 commit 59be1eb

12 files changed

+678
-75
lines changed

DESCRIPTION

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ Authors@R: c(
66
person("Jacob", "Bien", role = "ctb"),
77
person("Logan", "Brooks", role = "aut"),
88
person("Rafael", "Catoia", role = "ctb"),
9+
person("Nat", "DeFries", role = "ctb"),
910
person("Daniel", "McDonald", role = "aut"),
1011
person("Rachel", "Lobay", role = "ctb"),
1112
person("Ken", "Mawer", role = "ctb"),

NAMESPACE

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ S3method(dplyr_col_modify,col_modify_recorder_df)
1111
S3method(dplyr_col_modify,epi_df)
1212
S3method(dplyr_reconstruct,epi_df)
1313
S3method(dplyr_row_slice,epi_df)
14+
S3method(epix_truncate_versions_after,epi_archive)
15+
S3method(epix_truncate_versions_after,grouped_epi_archive)
1416
S3method(group_by,epi_archive)
1517
S3method(group_by,epi_df)
1618
S3method(group_by,grouped_epi_archive)
@@ -38,6 +40,7 @@ export(epi_slide)
3840
export(epix_as_of)
3941
export(epix_merge)
4042
export(epix_slide)
43+
export(epix_truncate_versions_after)
4144
export(filter)
4245
export(group_by)
4346
export(group_modify)

NEWS.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,14 @@ development versions. A ".9999" suffix indicates a development version.
5858
* `epi_slide` and `epix_slide` now raise an error rather than silently filtering
5959
out `ref_time_values` that don't meet their expectations.
6060

61+
## New features:
62+
63+
* `epix_slide`, `<epi_archive>$slide` have a new parameter `all_versions`. With
64+
`all_versions=TRUE`, `epix_slide` will pass a filtered `epi_archive` to each
65+
computation rather than an `epi_df` snapshot. This enables, e.g., performing
66+
pseudoprospective forecasts with a revision-aware forecaster using nested
67+
`epix_slide` operations.
68+
6169
## Improvements:
6270

6371
* Added `dplyr::group_by` and `dplyr::ungroup` S3 methods for `epi_archive`

R/archive.R

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -452,7 +452,7 @@ epi_archive =
452452
#' @description Generates a snapshot in `epi_df` format as of a given version.
453453
#' See the documentation for the wrapper function [`epix_as_of()`] for details.
454454
#' @importFrom data.table between key
455-
as_of = function(max_version, min_time_value = -Inf) {
455+
as_of = function(max_version, min_time_value = -Inf, all_versions = FALSE) {
456456
# Self max version and other keys
457457
other_keys = setdiff(key(self$DT),
458458
c("geo_value", "time_value", "version"))
@@ -472,12 +472,23 @@ epi_archive =
472472
if (max_version > self$versions_end) {
473473
Abort("`max_version` must be at most `self$versions_end`.")
474474
}
475+
if (!rlang::is_bool(all_versions)) {
476+
Abort("`all_versions` must be TRUE or FALSE.")
477+
}
475478
if (!is.na(self$clobberable_versions_start) && max_version >= self$clobberable_versions_start) {
476479
Warn('Getting data as of some recent version which could still be overwritten (under routine circumstances) without assigning a new version number (a.k.a. "clobbered"). Thus, the snapshot that we produce here should not be expected to be reproducible later. See `?epi_archive` for more info and `?epix_as_of` on how to muffle.',
477480
class="epiprocess__snapshot_as_of_clobberable_version")
478481
}
479482

480483
# Filter by version and return
484+
if (all_versions) {
485+
result = epix_truncate_versions_after(self, max_version)
486+
# `self` has already been `clone`d in `epix_truncate_versions_after`
487+
# so we can modify the new archive's DT directly.
488+
result$DT = result$DT[time_value >= min_time_value, ]
489+
return(result)
490+
}
491+
481492
return(
482493
# Make sure to use data.table ways of filtering and selecting
483494
self$DT[time_value >= min_time_value &
@@ -559,6 +570,38 @@ epi_archive =
559570
return (invisible(self))
560571
},
561572
#####
573+
#' @description Filter to keep only older versions, mutating the archive by
574+
#' potentially reseating but not mutating some fields. `DT` is likely, but not
575+
#' guaranteed, to be copied. Returns the mutated archive
576+
#' [invisibly][base::invisible].
577+
#' @param x as in [`epix_truncate_versions_after`]
578+
#' @param max_version as in [`epix_truncate_versions_after`]
579+
truncate_versions_after = function(max_version) {
580+
if (length(max_version) != 1) {
581+
Abort("`max_version` cannot be a vector.")
582+
}
583+
if (is.na(max_version)) {
584+
Abort("`max_version` must not be NA.")
585+
}
586+
if (!identical(class(max_version), class(self$DT$version)) ||
587+
!identical(typeof(max_version), typeof(self$DT$version))) {
588+
Abort("`max_version` and `DT$version` must have same `class` and `typeof`.")
589+
}
590+
if (max_version > self$versions_end) {
591+
Abort("`max_version` must be at most `self$versions_end`.")
592+
}
593+
self$DT <- self$DT[self$DT$version <= max_version, colnames(self$DT), with=FALSE]
594+
# (^ this filter operation seems to always copy the DT, even if it
595+
# keeps every entry; we don't guarantee this behavior in
596+
# documentation, though, so we could change to alias in this case)
597+
if (!is.na(self$clobberable_versions_start) &&
598+
self$clobberable_versions_start > max_version) {
599+
self$clobberable_versions_start <- NA
600+
}
601+
self$versions_end <- max_version
602+
return (invisible(self))
603+
},
604+
#####
562605
#' @description Merges another `epi_archive` with the current one, mutating the
563606
#' current one by reseating its `DT` and several other fields, but avoiding
564607
#' mutation of the old `DT`; returns the current archive
@@ -597,7 +640,7 @@ epi_archive =
597640
slide = function(f, ..., before, ref_time_values,
598641
time_step, new_col_name = "slide_value",
599642
as_list_col = FALSE, names_sep = "_",
600-
all_rows = FALSE) {
643+
all_rows = FALSE, all_versions = FALSE) {
601644
# For an "ungrouped" slide, treat all rows as belonging to one big
602645
# group (group by 0 vars), like `dplyr::summarize`, and let the
603646
# resulting `grouped_epi_archive` handle the slide:
@@ -606,7 +649,7 @@ epi_archive =
606649
before = before, ref_time_values = ref_time_values,
607650
time_step = time_step, new_col_name = new_col_name,
608651
as_list_col = as_list_col, names_sep = names_sep,
609-
all_rows = all_rows
652+
all_rows = all_rows, all_versions = all_versions
610653
) %>%
611654
# We want a slide on ungrouped archives to output something
612655
# ungrouped, rather than retaining the trivial (0-variable)

0 commit comments

Comments
 (0)