Skip to content

Allow epi_slide to access ref_time_value #318

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Jun 16, 2023
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,15 @@ importFrom(data.table,key)
importFrom(data.table,set)
importFrom(data.table,setkeyv)
importFrom(dplyr,arrange)
importFrom(dplyr,bind_rows)
importFrom(dplyr,dplyr_col_modify)
importFrom(dplyr,dplyr_reconstruct)
importFrom(dplyr,dplyr_row_slice)
importFrom(dplyr,filter)
importFrom(dplyr,group_by)
importFrom(dplyr,group_by_drop_default)
importFrom(dplyr,group_modify)
importFrom(dplyr,group_vars)
importFrom(dplyr,groups)
importFrom(dplyr,mutate)
importFrom(dplyr,relocate)
Expand Down
56 changes: 49 additions & 7 deletions R/slide.R
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,8 @@
#' through the `new_col_name` argument.
#'
#' @importFrom lubridate days weeks
#' @importFrom rlang .data .env !! enquo enquos sym
#' @importFrom dplyr bind_rows group_vars filter select
#' @importFrom rlang .data .env !! enquo enquos sym quo_set_env env
#' @export
#' @examples
#' # slide a 7-day trailing average formula on cases
Expand Down Expand Up @@ -158,11 +159,8 @@ epi_slide = function(x, f, ..., before, after, ref_time_values,

# Check that `f` takes enough args
if (!missing(f) && is.function(f)) {
check_sufficient_f_args(f)
check_sufficient_f_args(f, 3L)
}

# Arrange by increasing time_value
x = arrange(x, time_value)

if (missing(ref_time_values)) {
ref_time_values = unique(x$time_value)
Expand Down Expand Up @@ -223,6 +221,35 @@ epi_slide = function(x, f, ..., before, after, ref_time_values,
after <- time_step(after)
}

inrange_time_values_not_in_x = ref_time_values - before
inrange_time_values_not_in_x <- inrange_time_values_not_in_x[!(inrange_time_values_not_in_x %in% unique(x$time_value))]

# Do set up to let us recover `ref_time_value`s later.
# A helper column marking real observations.
x$.real = TRUE

# Create df containing phony data. Df has the same columns and attributes as
# `x`, but filled with `NA`s aside from grouping columns. Number of rows is
# equal to the number of `inrange_time_values_not_in_x` we have * the
# number of unique levels seen in the grouping columns.
before_time_values_df = data.frame(time_value=inrange_time_values_not_in_x)
if (length(group_vars(x)) != 0) {
before_time_values_df = dplyr::cross_join(
# Get unique combinations of grouping columns seen in real data.
unique(x[, group_vars(x)]),
before_time_values_df
)
}
# Automatically fill in all other columns from `x` with `NA`s, and carry
# attributes over to new df.
before_time_values_df <- bind_rows(x[0,], before_time_values_df)
before_time_values_df$.real <- FALSE

x <- bind_rows(before_time_values_df, x)

# Arrange by increasing time_value
x = arrange(x, time_value)

# Now set up starts and stops for sliding/hopping
time_range = range(unique(x$time_value))
starts = in_range(ref_time_values - before, time_range)
Expand Down Expand Up @@ -318,9 +345,15 @@ epi_slide = function(x, f, ..., before, after, ref_time_values,

# If f is not missing, then just go ahead, slide by group
if (!missing(f)) {
if (rlang::is_formula(f)) f = as_slide_computation(f)
f_rtv_wrapper = function(x, g, ...) {
ref_time_value = min(x$time_value) + before
x <- x[x$.real,]
f(x, g, ref_time_value, ...)
}
x = x %>%
group_modify(slide_one_grp,
f = f, ...,
f = f_rtv_wrapper, ...,
starts = starts,
stops = stops,
time_values = ref_time_values,
Expand All @@ -340,7 +373,12 @@ epi_slide = function(x, f, ..., before, after, ref_time_values,
}

quo = quos[[1]]
f = function(x, quo, ...) rlang::eval_tidy(quo, x)
f = function(.x, .group_key, quo, ...) {
.ref_time_value = min(.x$time_value) + before
.x <- .x[.x$.real,]
quo = quo_set_env(quo, env())
rlang::eval_tidy(quo, .x)
}
new_col = sym(names(rlang::quos_auto_name(quos)))

x = x %>%
Expand All @@ -358,5 +396,9 @@ epi_slide = function(x, f, ..., before, after, ref_time_values,
if (!as_list_col) {
x = unnest(x, !!new_col, names_sep = names_sep)
}

# Drop helper column `.real`.
x$.real <- NULL

return(x)
}
167 changes: 163 additions & 4 deletions tests/testthat/test-epi_slide.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,15 @@ grouped = dplyr::bind_rows(
as_epi_df() %>%
group_by(geo_value)

f = function(x, g) dplyr::tibble(value=mean(x$value), count=length(x$value))
small_x = dplyr::bind_rows(
dplyr::tibble(geo_value = "ak", time_value = d + 1:5, value=11:15),
dplyr::tibble(geo_value = "al", time_value = d + 1:5, value=-(1:5))
) %>%
as_epi_df(as_of = d + 6) %>%
group_by(geo_value)


f = function(x, g, t) dplyr::tibble(value=mean(x$value), count=length(x$value))

## --- These cases generate errors (or not): ---
test_that("`before` and `after` are both vectors of length 1", {
Expand Down Expand Up @@ -88,13 +96,164 @@ test_that("these doesn't produce an error; the error appears only if the ref tim
})

test_that("epi_slide alerts if the provided f doesn't take enough args", {
f_xg = function(x, g) dplyr::tibble(value=mean(x$value), count=length(x$value))
f_xgt = function(x, g, t) dplyr::tibble(value=mean(x$value), count=length(x$value))
# If `regexp` is NA, asserts that there should be no errors/messages.
expect_error(epi_slide(grouped, f_xg, before = 1L, ref_time_values = d+1), regexp = NA)
expect_warning(epi_slide(grouped, f_xg, before = 1L, ref_time_values = d+1), regexp = NA)
expect_error(epi_slide(grouped, f_xgt, before = 1L, ref_time_values = d+1), regexp = NA)
expect_warning(epi_slide(grouped, f_xgt, before = 1L, ref_time_values = d+1), regexp = NA)

f_x_dots = function(x, ...) dplyr::tibble(value=mean(x$value), count=length(x$value))
expect_warning(epi_slide(grouped, f_x_dots, before = 1L, ref_time_values = d+1),
regexp = "positional arguments before the `...` args",
class = "check_sufficient_f_args__f_needs_min_args_before_dots")
})

test_that("basic grouped epi_slide computation produces expected output", {
expected_output = dplyr::bind_rows(
dplyr::tibble(geo_value = "ak", time_value = d + 1:5, value = 11:15, slide_value=cumsum(11:15)),
dplyr::tibble(geo_value = "al", time_value = d + 1:5, value = -(1:5), slide_value=cumsum(-(1:5)))
) %>%
group_by(geo_value) %>%
as_epi_df(as_of = d + 6)

# formula
result1 <- epi_slide(small_x, f = ~sum(.x$value), before=50)
expect_identical(result1, expected_output)

# function
result2 <- epi_slide(small_x, f = function(x, g, t) sum(x$value), before=50)
expect_identical(result2, expected_output)

# dots
result3 <- epi_slide(small_x, slide_value = sum(value), before=50)
expect_identical(result3, expected_output)
})

test_that("ungrouped epi_slide computation completes successfully", {
expect_error(
small_x %>%
ungroup() %>%
epi_slide(before = 2,
slide_value = sum(.x$value)),
regexp=NA
)
})

test_that("basic ungrouped epi_slide computation produces expected output", {
expected_output = dplyr::bind_rows(
dplyr::tibble(geo_value = "ak", time_value = d + 1:5, value = 11:15, slide_value=cumsum(11:15))
) %>%
as_epi_df(as_of = d + 6)

result1 <- small_x %>%
ungroup() %>%
filter(geo_value == "ak") %>%
epi_slide(before = 50,
slide_value = sum(.x$value))
expect_identical(result1, expected_output)
})

test_that("epi_slide computation via formula can use ref_time_value", {
expected_output = dplyr::bind_rows(
dplyr::tibble(geo_value = "ak", time_value = d + 1:5, value = 11:15, slide_value=as.double(d + 1:5)),
dplyr::tibble(geo_value = "al", time_value = d + 1:5, value = -(1:5), slide_value=as.double(d + 1:5))
) %>%
group_by(geo_value) %>%
as_epi_df(as_of = d + 6)

result1 <- small_x %>%
epi_slide(f = ~ .ref_time_value,
before = 50)

expect_identical(result1, expected_output)

result2 <- small_x %>%
epi_slide(f = ~ .z,
before = 50)

expect_identical(result2, expected_output)

result3 <- small_x %>%
epi_slide(f = ~ ..3,
before = 50)

expect_identical(result3, expected_output)
})

test_that("epi_slide computation via function can use ref_time_value", {
expected_output = dplyr::bind_rows(
dplyr::tibble(geo_value = "ak", time_value = d + 1:5, value = 11:15, slide_value=as.double(d + 1:5)),
dplyr::tibble(geo_value = "al", time_value = d + 1:5, value = -(1:5), slide_value=as.double(d + 1:5))
) %>%
group_by(geo_value) %>%
as_epi_df(as_of = d + 6)

result1 <- small_x %>%
epi_slide(f = function(x, g, t) t,
before = 2)

expect_identical(result1, expected_output)
})

test_that("epi_slide computation via dots can use ref_time_value and group", {
# ref_time_value
expected_output = dplyr::bind_rows(
dplyr::tibble(geo_value = "ak", time_value = d + 1:5, value = 11:15, slide_value=as.double(d + 1:5)),
dplyr::tibble(geo_value = "al", time_value = d + 1:5, value = -(1:5), slide_value=as.double(d + 1:5))
) %>%
group_by(geo_value) %>%
as_epi_df(as_of = d + 6)

result1 <- small_x %>%
epi_slide(before = 50,
slide_value = .ref_time_value)

expect_identical(result1, expected_output)

result2 <- small_x %>%
epi_slide(before = 50,
slide_value = .env$.ref_time_value)

expect_identical(result2, expected_output)

# group_key
# Use group_key column
expected_output = dplyr::bind_rows(
dplyr::tibble(geo_value = "ak", time_value = d + 1:5, value = 11:15, slide_value="ak"),
dplyr::tibble(geo_value = "al", time_value = d + 1:5, value = -(1:5), slide_value="al")
) %>%
group_by(geo_value) %>%
as_epi_df(as_of = d + 6)

result3 <- small_x %>%
epi_slide(before = 2,
slide_value = .group_key$geo_value)

expect_identical(result3, expected_output)

# Use entire group_key object
expected_output = dplyr::bind_rows(
dplyr::tibble(geo_value = "ak", time_value = d + 1:5, value = 11:15, slide_value=1L),
dplyr::tibble(geo_value = "al", time_value = d + 1:5, value = -(1:5), slide_value=1L)
) %>%
group_by(geo_value) %>%
as_epi_df(as_of = d + 6)

result4 <- small_x %>%
epi_slide(before = 2,
slide_value = nrow(.group_key))

expect_identical(result4, expected_output)
})

test_that("epi_slide computation via dots outputs the same result using col names and the data var", {
expected_output <- small_x %>%
epi_slide(before = 2,
slide_value = max(time_value)) %>%
as_epi_df(as_of = d + 6)

result1 <- small_x %>%
epi_slide(before = 2,
slide_value = max(.x$time_value))

expect_identical(result1, expected_output)
})