@@ -261,58 +261,130 @@ epi_slide <- function(
261
261
# Check for duplicated time values within groups
262
262
assert(check_ukey_unique(ungroup(.x ), c(group_vars(.x ), " time_value" )))
263
263
264
- # Begin handling completion. This will create a complete time index between
265
- # the smallest and largest time values in the data. This is used to ensure
266
- # that the slide function is called with a complete window of data. Each slide
267
- # group will filter this down to between its min and max time values. We also
268
- # mark which dates were in the data and which were added by our completion.
269
- date_seq_list <- full_date_seq(.x , window_args $ before , window_args $ after , time_type )
270
- .x $ .real <- TRUE
264
+ # # Begin handling completion. This will create a complete time index between
265
+ # # the smallest and largest time values in the data. This is used to ensure
266
+ # # that the slide function is called with a complete window of data. Each slide
267
+ # # group will filter this down to between its min and max time values. We also
268
+ # # mark which dates were in the data and which were added by our completion.
269
+ # date_seq_list <- full_date_seq(.x, window_args$before, window_args$after, time_type)
270
+ # .x$.real <- TRUE
271
271
272
- # Create a wrapper that calculates and passes `.ref_time_value` to the
273
- # computation. `i` is contained in the `slide_comp_wrapper_factory`
274
- # environment such that when called within `slide_one_grp` `i` advances
275
- # through the list of reference time values within a group and then resets
276
- # back to 1 when switching groups.
277
- slide_comp_wrapper_factory <- function (kept_ref_time_values ) {
278
- i <- 1L
279
- slide_comp_wrapper <- function (.x , .group_key , ... ) {
280
- .ref_time_value <- kept_ref_time_values [[i ]]
281
- i <<- i + 1L
282
- .slide_comp(.x , .group_key , .ref_time_value , ... )
272
+ # # Create a wrapper that calculates and passes `.ref_time_value` to the
273
+ # # computation. `i` is contained in the `slide_comp_wrapper_factory`
274
+ # # environment such that when called within `slide_one_grp` `i` advances
275
+ # # through the list of reference time values within a group and then resets
276
+ # # back to 1 when switching groups.
277
+ # slide_comp_wrapper_factory <- function(kept_ref_time_values) {
278
+ # i <- 1L
279
+ # slide_comp_wrapper <- function(.x, .group_key, ...) {
280
+ # .ref_time_value <- kept_ref_time_values[[i]]
281
+ # i <<- i + 1L
282
+ # .slide_comp(.x, .group_key, .ref_time_value, ...)
283
+ # }
284
+ # slide_comp_wrapper
285
+ # }
286
+
287
+ # # - If .x is not grouped, then the trivial group is applied:
288
+ # # https://dplyr.tidyverse.org/reference/group_map.html
289
+ # # - We create a lambda that forwards the necessary slide arguments to
290
+ # # `epi_slide_one_group`.
291
+ # # - `...` from top of `epi_slide` are forwarded to `.f` here through
292
+ # # group_modify and through the lambda.
293
+ # result <- group_map(
294
+ # .x,
295
+ # .f = function(.data_group, .group_key, ...) {
296
+ # epi_slide_one_group(
297
+ # .data_group, .group_key, ...,
298
+ # .slide_comp_factory = slide_comp_wrapper_factory,
299
+ # .before = window_args$before,
300
+ # .after = window_args$after,
301
+ # .ref_time_values = .ref_time_values,
302
+ # .all_rows = .all_rows,
303
+ # .new_col_name = .new_col_name,
304
+ # .used_data_masking = used_data_masking,
305
+ # .time_type = time_type,
306
+ # .date_seq_list = date_seq_list
307
+ # )
308
+ # },
309
+ # ...,
310
+ # .keep = TRUE
311
+ # ) %>%
312
+ # list_rbind() %>%
313
+ # `[`(.$.real, names(.) != ".real") %>%
314
+ # arrange_col_canonical() %>%
315
+ # group_by(!!!.x_orig_groups)
316
+ before_n_steps <- time_delta_to_n_steps(window_args $ before , time_type )
317
+ after_n_steps <- time_delta_to_n_steps(window_args $ after , time_type )
318
+ unit_step <- unit_time_delta(time_type , format = " fast" )
319
+ simple_hop <- time_slide_to_simple_hop(.slide_comp = .slide_comp , ... , .before_n_steps = before_n_steps , .after_n_steps = after_n_steps )
320
+ result <- .x %> %
321
+ group_modify(function (grp_data , grp_key ) {
322
+ out_time_values <- ref_time_values_to_out_time_values(grp_data , .ref_time_values )
323
+ res <- grp_data
324
+ slide_values <- slide_window(grp_data , grp_key , simple_hop , before_n_steps , after_n_steps , unit_step , time_type , out_time_values )
325
+ # FIXME check, de-dupe, simplify, refactor, ...
326
+ if (.all_rows ) {
327
+ new_slide_values <- vec_cast(rep(NA , nrow(res )), slide_values )
328
+ vec_slice(new_slide_values , vec_match(out_time_values , res $ time_value )) <- slide_values
329
+ slide_values <- new_slide_values
330
+ } else {
331
+ res <- vec_slice(res , vec_match(out_time_values , res $ time_value ))
332
+ }
333
+
334
+ if (is.null(.new_col_name )) {
335
+ if (inherits(slide_values , " data.frame" )) {
336
+ # Sometimes slide_values can parrot back columns already in `res`; allow
337
+ # this, but balk if a column has the same name as one in `res` but a
338
+ # different value:
339
+ comp_nms <- names(slide_values )
340
+ overlaps_existing_names <- comp_nms %in% names(res )
341
+ for (comp_i in which(overlaps_existing_names )) {
342
+ if (! identical(slide_values [[comp_i ]], res [[comp_nms [[comp_i ]]]])) {
343
+ lines <- c(
344
+ cli :: format_error(c(
345
+ " New column and old column clash" ,
346
+ " x" = " slide computation output included a
347
+ {format_varname(comp_nms[[comp_i]])} column, but `.x` already had a
348
+ {format_varname(comp_nms[[comp_i]])} column with differing values" ,
349
+ " Here are examples of differing values, where the grouping variables were
350
+ {format_tibble_row(.group_key)}:"
351
+ )),
352
+ capture.output(print(waldo :: compare(
353
+ res [[comp_nms [[comp_i ]]]], slide_values [[comp_i ]],
354
+ x_arg = rlang :: expr_deparse(dplyr :: expr(`$`(!! " existing" , !! sym(comp_nms [[comp_i ]])))), # nolint: object_usage_linter
355
+ y_arg = rlang :: expr_deparse(dplyr :: expr(`$`(!! " comp_value" , !! sym(comp_nms [[comp_i ]])))) # nolint: object_usage_linter
356
+ ))),
357
+ cli :: format_message(c(
358
+ " >" = " You likely want to rename or remove this column from your slide
359
+ computation's output, or debug why it has a different value."
360
+ ))
361
+ )
362
+ rlang :: abort(paste(collapse = " \n " , lines ),
363
+ class = " epiprocess__epi_slide_output_vs_existing_column_conflict"
364
+ )
365
+ }
366
+ }
367
+ # Unpack into separate columns (without name prefix). If there are
368
+ # columns duplicating existing columns, de-dupe and order them as if they
369
+ # didn't exist in slide_values.
370
+ res <- dplyr :: bind_cols(res , slide_values [! overlaps_existing_names ])
371
+ } else {
372
+ # Apply default name (to vector or packed data.frame-type column):
373
+ if (" slide_value" %in% names(res )) {
374
+ cli_abort(c(" Cannot guess a good column name for your output" ,
375
+ " x" = " `slide_value` already exists in `.x`" ,
376
+ " >" = " Please provide a `.new_col_name`."
377
+ ))
378
+ }
379
+ res [[" slide_value" ]] <- slide_values
283
380
}
284
- slide_comp_wrapper
381
+ } else {
382
+ # Vector or packed data.frame-type column (note: overlaps with existing
383
+ # column names should already be forbidden by earlier validation):
384
+ res [[.new_col_name ]] <- slide_values
285
385
}
286
-
287
- # - If .x is not grouped, then the trivial group is applied:
288
- # https://dplyr.tidyverse.org/reference/group_map.html
289
- # - We create a lambda that forwards the necessary slide arguments to
290
- # `epi_slide_one_group`.
291
- # - `...` from top of `epi_slide` are forwarded to `.f` here through
292
- # group_modify and through the lambda.
293
- result <- group_map(
294
- .x ,
295
- .f = function (.data_group , .group_key , ... ) {
296
- epi_slide_one_group(
297
- .data_group , .group_key , ... ,
298
- .slide_comp_factory = slide_comp_wrapper_factory ,
299
- .before = window_args $ before ,
300
- .after = window_args $ after ,
301
- .ref_time_values = .ref_time_values ,
302
- .all_rows = .all_rows ,
303
- .new_col_name = .new_col_name ,
304
- .used_data_masking = used_data_masking ,
305
- .time_type = time_type ,
306
- .date_seq_list = date_seq_list
307
- )
308
- },
309
- ... ,
310
- .keep = TRUE
311
- ) %> %
312
- list_rbind() %> %
313
- `[`(. $ .real , names(. ) != " .real" ) %> %
314
- arrange_col_canonical() %> %
315
- group_by(!!! .x_orig_groups )
386
+ res
387
+ })
316
388
317
389
# If every group in epi_slide_one_group takes the
318
390
# length(available_ref_time_values) == 0 branch then we end up here.
0 commit comments