Skip to content

Commit 1f2bd85

Browse files
committed
feat(filter.epi_archive): also guard against using measurement columns
1 parent 9a4a601 commit 1f2bd85

File tree

2 files changed

+52
-33
lines changed

2 files changed

+52
-33
lines changed

R/methods-epi_archive.R

+34-19
Original file line numberDiff line numberDiff line change
@@ -998,13 +998,15 @@ dplyr_col_modify.col_modify_recorder_df <- function(data, cols) {
998998
#'
999999
#' @details
10001000
#'
1001-
#' By default, using the `version` column is disabled as it's easy to
1002-
#' get unexpected results. See if either [`epix_as_of`] or [`epix_slide`]
1003-
#' works as an alternative. If they don't cover your use case, then you can
1004-
#' set `.format_aware = TRUE` to enable usage of the `version` column, but be
1005-
#' careful to:
1006-
#' * Factor in that `.data$DT` may be using a "compact" format based on diffing
1007-
#' consecutive versions; see details of [`as_epi_archive`]
1001+
#' By default, using the `version` column or measurement columns is disabled as
1002+
#' it's easy to get unexpected results. See if either [`epix_as_of`] or
1003+
#' [`epix_slide`] works as an alternative. If they don't cover your use case,
1004+
#' then you can set `.format_aware = TRUE` to enable usage of these columns, but
1005+
#' be careful to:
1006+
#' * Factor in that `.data$DT` may have been converted into a compact format
1007+
#' based on diffing consecutive versions, and the last version of each
1008+
#' observation in `.data$DT` will always be carried forward to future
1009+
#' `version`s`; see details of [`as_epi_archive`].
10081010
#' * Set `clobberable_versions_start` and `versions_end` of the result
10091011
#' appropriately after the `filter` call. They will be initialized with the
10101012
#' same values as in `.data`.
@@ -1024,13 +1026,14 @@ dplyr_col_modify.col_modify_recorder_df <- function(data, cols) {
10241026
#' archive_cases_dv_subset %>%
10251027
#' filter(as.POSIXlt(time_value)$wday == 6L)
10261028
#'
1027-
#' # Filtering involving versions requires extra care. See epix_as_of and
1028-
#' # epix_slide instead for some common operations. One semi-common operation
1029-
#' # that ends up being fairly simple is treating observations as finalized
1030-
#' # after some amount of time, and ignoring any revisions that were made after
1031-
#' # that point:
1029+
#' # Filtering involving the `version` column or measurement columns requires
1030+
#' # extra care. See epix_as_of and epix_slide instead for some common
1031+
#' # operations. One semi-common operation that ends up being fairly simple is
1032+
#' # treating observations as finalized after some amount of time, and ignoring
1033+
#' # any revisions that were made after that point:
10321034
#' archive_cases_dv_subset %>%
1033-
#' filter(version <= time_value + as.difftime(60, units = "days"),
1035+
#' filter(
1036+
#' version <= time_value + as.difftime(60, units = "days"),
10341037
#' .format_aware = TRUE
10351038
#' )
10361039
#'
@@ -1041,25 +1044,37 @@ filter.epi_archive <- function(.data, ..., .by = NULL, .format_aware = FALSE) {
10411044
out_tbl <- in_tbl %>%
10421045
filter(..., .by = .by)
10431046
} else {
1047+
measurement_colnames <- setdiff(names(.data$DT), key_colnames(.data))
1048+
forbidden_colnames <- c("version", measurement_colnames)
10441049
out_tbl <- in_tbl %>%
10451050
filter(
10461051
# Add our own fake filter arg to the user's ..., to update the data mask
10471052
# to prevent `version` column usage.
10481053
{
10491054
# We should be evaluating inside the data mask. To disable both
1050-
# `version` and `.data$version`, we need to go to the data mask's
1051-
# ------
1055+
# `version` and `.data$version` etc., we need to go to the ancestor
1056+
# environment containing the data mask's column bindings. This is
1057+
# likely just the parent env, but search to make sure, in a way akin
1058+
# to `<<-`:
10521059
e <- environment()
10531060
while (!identical(e, globalenv()) && !identical(e, emptyenv())) {
10541061
if ("version" %in% names(e)) {
1055-
# "version" is expected to be an active binding, and directly
1056-
# assigning over it has issues; explicitly `rm` first.
1057-
rm(list = "version", envir = e)
1062+
# This is where the column bindings are. Replace the forbidden ones.
1063+
# They are expected to be active bindings, so directly
1064+
# assigning has issues; `rm` first.
1065+
rm(list = forbidden_colnames, envir = e)
10581066
delayedAssign("version", cli::cli_abort(c(
1059-
"Using `version` in `filter` may produce unexpected results.",
1067+
"Using `version` in `filter.epi_archive` may produce unexpected results.",
10601068
">" = "See if `epix_as_of` or `epix_slide` would work instead.",
10611069
">" = "If not, see `?filter.epi_archive` details for how to proceed."
10621070
)), assign.env = e)
1071+
for (measurement_colname in measurement_colnames) {
1072+
delayedAssign(measurement_colname, cli::cli_abort(c(
1073+
"Using `{format_varname(measurement_colname)}`
1074+
in `filter.epi_archive` may produce unexpected results.",
1075+
">" = "See `?filter.epi_archive` details for how to proceed."
1076+
)), assign.env = e)
1077+
}
10631078
break
10641079
}
10651080
e <- parent.env(e)

man/filter.epi_archive.Rd

+18-14
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)