86
86
# ' @seealso [`epi_slide_opt`] [`epi_slide_mean`] [`epi_slide_sum`]
87
87
# ' @examples
88
88
# ' # slide a 7-day trailing average formula on cases
89
- # ' # This and other simple sliding means are much faster to do using
90
- # ' # the `epi_slide_mean` function instead.
89
+ # ' # Simple sliding means and sums are much faster to do using
90
+ # ' # the `epi_slide_mean` and `epi_slide_sum` functions instead.
91
91
# ' jhu_csse_daily_subset %>%
92
92
# ' group_by(geo_value) %>%
93
93
# ' epi_slide(cases_7dav = mean(cases), before = 6) %>%
@@ -377,7 +377,8 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values,
377
377
# '
378
378
# ' @importFrom dplyr bind_rows mutate %>% arrange tibble select
379
379
# ' @importFrom rlang enquo quo_get_expr as_label expr_label caller_arg
380
- # ' @importFrom purrr map map_lgl
380
+ # ' @importFrom tidyselect eval_select
381
+ # ' @importFrom purrr map map_lgl
381
382
# ' @importFrom data.table frollmean frollsum frollapply
382
383
# ' @importFrom lubridate as.period
383
384
# ' @importFrom checkmate assert_function
@@ -390,50 +391,50 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values,
390
391
# ' group_by(geo_value) %>%
391
392
# ' epi_slide_opt(
392
393
# ' cases,
393
- # ' f = data.table::frollmean, new_col_name = "cases_7dav", names_sep = NULL, before = 6
394
+ # ' f = data.table::frollmean, before = 6
394
395
# ' ) %>%
395
- # ' # Remove a nonessential var. to ensure new col is printed
396
- # ' dplyr::select(geo_value, time_value, cases, cases_7dav) %>%
396
+ # ' # Remove a nonessential var. to ensure new col is printed, and rename new col
397
+ # ' dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases ) %>%
397
398
# ' ungroup()
398
399
# '
399
400
# ' # slide a 7-day trailing average formula on cases. Adjust `frollmean` settings for speed
400
401
# ' # and accuracy, and to allow partially-missing windows.
401
402
# ' jhu_csse_daily_subset %>%
402
403
# ' group_by(geo_value) %>%
403
- # ' epi_slide_opt(cases,
404
- # ' f = data.table::frollmean ,
405
- # ' new_col_name = "cases_7dav", names_sep = NULL , before = 6,
404
+ # ' epi_slide_opt(
405
+ # ' cases ,
406
+ # ' f = data.table::frollmean , before = 6,
406
407
# ' # `frollmean` options
407
408
# ' na.rm = TRUE, algo = "exact", hasNA = TRUE
408
409
# ' ) %>%
409
- # ' dplyr::select(geo_value, time_value, cases, cases_7dav) %>%
410
+ # ' dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases ) %>%
410
411
# ' ungroup()
411
412
# '
412
413
# ' # slide a 7-day leading average
413
414
# ' jhu_csse_daily_subset %>%
414
415
# ' group_by(geo_value) %>%
415
416
# ' epi_slide_opt(
416
417
# ' cases,
417
- # ' f = slider::slide_mean, new_col_name = "cases_7dav", names_sep = NULL, after = 6
418
+ # ' f = slider::slide_mean, after = 6
418
419
# ' ) %>%
419
420
# ' # Remove a nonessential var. to ensure new col is printed
420
- # ' dplyr::select(geo_value, time_value, cases, cases_7dav) %>%
421
+ # ' dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases ) %>%
421
422
# ' ungroup()
422
423
# '
423
424
# ' # slide a 7-day centre-aligned sum. This can also be done with `epi_slide_sum`
424
425
# ' jhu_csse_daily_subset %>%
425
426
# ' group_by(geo_value) %>%
426
427
# ' epi_slide_opt(
427
428
# ' cases,
428
- # ' f = data.table::frollsum, new_col_name = "cases_7dav", names_sep = NULL, before = 3, after = 3
429
+ # ' f = data.table::frollsum, before = 3, after = 3
429
430
# ' ) %>%
430
431
# ' # Remove a nonessential var. to ensure new col is printed
431
- # ' dplyr::select(geo_value, time_value, cases, cases_7dav) %>%
432
+ # ' dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases ) %>%
432
433
# ' ungroup()
433
434
epi_slide_opt <- function (x , col_names , f , ... , before , after , ref_time_values ,
434
435
time_step ,
435
- new_col_name = " slide_value " , as_list_col = NULL ,
436
- names_sep = " _ " , all_rows = FALSE ) {
436
+ new_col_name = NULL , as_list_col = NULL ,
437
+ names_sep = NULL , all_rows = FALSE ) {
437
438
assert_class(x , " epi_df" )
438
439
439
440
if (nrow(x ) == 0L ) {
@@ -443,15 +444,27 @@ epi_slide_opt <- function(x, col_names, f, ..., before, after, ref_time_values,
443
444
" i" = " If this computation is occuring within an `epix_slide` call,
444
445
check that `epix_slide` `ref_time_values` argument was set appropriately"
445
446
),
446
- class = " epiprocess__epi_slide_mean__0_row_input " ,
447
+ class = " epiprocess__epi_slide_opt__0_row_input " ,
447
448
epiprocess__x = x
448
449
)
449
450
}
450
451
451
452
if (! is.null(as_list_col )) {
452
453
cli_abort(
453
- " `as_list_col` is not supported for `epi_slide_mean`" ,
454
- class = " epiproces__epi_slide_mean__list_not_supported"
454
+ " `as_list_col` is not supported for `epi_slide_[opt/mean/sum]`" ,
455
+ class = " epiprocess__epi_slide_opt__list_not_supported"
456
+ )
457
+ }
458
+ if (! is.null(new_col_name )) {
459
+ cli_abort(
460
+ " `new_col_name` is not supported for `epi_slide_[opt/mean/sum]`" ,
461
+ class = " epiprocess__epi_slide_opt__new_name_not_supported"
462
+ )
463
+ }
464
+ if (! is.null(names_sep )) {
465
+ cli_abort(
466
+ " `names_sep` is not supported for `epi_slide_[opt/mean/sum]`" ,
467
+ class = " epiprocess__epi_slide_opt__name_sep_not_supported"
455
468
)
456
469
}
457
470
@@ -543,48 +556,16 @@ epi_slide_opt <- function(x, col_names, f, ..., before, after, ref_time_values,
543
556
# `before` and `after` params.
544
557
window_size <- before + after + 1L
545
558
546
- col_names_quo <- enquo(col_names )
547
- col_names_chr <- as.character(rlang :: quo_get_expr(col_names_quo ))
548
- if (startsWith(rlang :: as_label(col_names_quo ), " c(" )) {
549
- # List or vector of col names. We need to drop the first element since it
550
- # will be either "c" (if built as a vector) or "list" (if built as a
551
- # list).
552
- col_names_chr <- col_names_chr [- 1 ]
553
- } else if (startsWith(rlang :: as_label(col_names_quo ), " list(" )) {
554
- cli_abort(
555
- " `col_names` must be a single tidy column name or a vector
556
- (`c()`) of tidy column names" ,
557
- class = " epiprocess__epi_slide_mean__col_names_in_list" ,
558
- epiprocess__col_names = col_names_chr
559
- )
560
- }
561
- # If single column name, do nothing.
562
-
563
- if (is.null(names_sep )) {
564
- if (length(new_col_name ) != length(col_names_chr )) {
565
- cli_abort(
566
- c(
567
- " `new_col_name` must be the same length as `col_names` when
568
- `names_sep` is NULL to avoid duplicate output column names."
569
- ),
570
- class = " epiprocess__epi_slide_mean__col_names_length_mismatch" ,
571
- epiprocess__new_col_name = new_col_name ,
572
- epiprocess__col_names = col_names_chr
573
- )
574
- }
575
- result_col_names <- new_col_name
576
- } else {
577
- if (length(new_col_name ) != 1L && length(new_col_name ) != length(col_names_chr )) {
578
- cli_abort(
579
- " `new_col_name` must be either length 1 or the same length as `col_names`." ,
580
- class = " epiprocess__epi_slide_mean__col_names_length_mismatch_and_not_one" ,
581
- epiprocess__new_col_name = new_col_name ,
582
- epiprocess__col_names = col_names_chr
583
- )
584
- }
585
- result_col_names <- paste(new_col_name , col_names_chr , sep = names_sep )
586
- }
587
-
559
+ # The position of a given column can be differ between input `x` and
560
+ # `.data_group` since the grouping step by default drops grouping columns.
561
+ # To avoid rerunning `eval_select` for every `.data_group`, convert
562
+ # positions of user-provided `col_names` into string column names. We avoid
563
+ # using `names(pos)` directly for robustness and in case we later want to
564
+ # allow users to rename fields via tidyselection.
565
+ pos <- eval_select(rlang :: enquo(col_names ), data = x , allow_rename = FALSE )
566
+ col_names_chr <- names(x )[pos ]
567
+ # Always rename results to "slide_value_<original column name>".
568
+ result_col_names <- paste0(" slide_value_" , col_names_chr )
588
569
slide_one_grp <- function (.data_group , .group_key , ... ) {
589
570
missing_times <- all_dates [! (all_dates %in% .data_group $ time_value )]
590
571
@@ -600,19 +581,19 @@ epi_slide_opt <- function(x, col_names, f, ..., before, after, ref_time_values,
600
581
# If a group contains duplicate time values, `frollmean` will still only
601
582
# use the last `k` obs. It isn't looking at dates, it just goes in row
602
583
# order. So if the computation is aggregating across multiple obs for the
603
- # same date, `epi_slide_mean` will produce incorrect results; `epi_slide`
604
- # should be used instead.
584
+ # same date, `epi_slide_opt` and derivates will produce incorrect
585
+ # results; `epi_slide` should be used instead.
605
586
if (anyDuplicated(.data_group $ time_value ) != 0L ) {
606
587
cli_abort(
607
588
c(
608
- " group contains duplicate time values. Using `epi_slide_mean ` on this
589
+ " group contains duplicate time values. Using `epi_slide_[opt/mean/sum] ` on this
609
590
group will result in incorrect results" ,
610
591
" i" = " Please change the grouping structure of the input data so that
611
592
each group has non-duplicate time values (e.g. `x %>% group_by(geo_value)
612
- %>% epi_slide_mean `)" ,
593
+ %>% epi_slide_opt(f = frollmean) `)" ,
613
594
" i" = " Use `epi_slide` to aggregate across groups"
614
595
),
615
- class = " epiprocess__epi_slide_mean__duplicate_time_values " ,
596
+ class = " epiprocess__epi_slide_opt__duplicate_time_values " ,
616
597
epiprocess__data_group = .data_group ,
617
598
epiprocess__group_key = .group_key
618
599
)
@@ -624,7 +605,7 @@ epi_slide_opt <- function(x, col_names, f, ..., before, after, ref_time_values,
624
605
" i" = c(" Input data may contain `time_values` closer together than the
625
606
expected `time_step` size" )
626
607
),
627
- class = " epiprocess__epi_slide_mean__unexpected_row_number " ,
608
+ class = " epiprocess__epi_slide_opt__unexpected_row_number " ,
628
609
epiprocess__data_group = .data_group ,
629
610
epiprocess__group_key = .group_key
630
611
)
@@ -669,7 +650,7 @@ epi_slide_opt <- function(x, col_names, f, ..., before, after, ref_time_values,
669
650
}
670
651
671
652
if (! is_epi_df(result )) {
672
- # `all_rows`handling strip epi_df format and metadata.
653
+ # `all_rows`handling strips epi_df format and metadata.
673
654
# Restore them.
674
655
result <- reclass(result , attributes(x )$ metadata )
675
656
}
@@ -700,50 +681,51 @@ epi_slide_opt <- function(x, col_names, f, ..., before, after, ref_time_values,
700
681
# ' # slide a 7-day trailing average formula on cases
701
682
# ' jhu_csse_daily_subset %>%
702
683
# ' group_by(geo_value) %>%
703
- # ' epi_slide_mean(cases, new_col_name = "cases_7dav", names_sep = NULL, before = 6) %>%
684
+ # ' epi_slide_mean(cases, before = 6) %>%
704
685
# ' # Remove a nonessential var. to ensure new col is printed
705
- # ' dplyr::select(geo_value, time_value, cases, cases_7dav) %>%
686
+ # ' dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases ) %>%
706
687
# ' ungroup()
707
688
# '
708
689
# ' # slide a 7-day trailing average formula on cases. Adjust `frollmean` settings for speed
709
690
# ' # and accuracy, and to allow partially-missing windows.
710
691
# ' jhu_csse_daily_subset %>%
711
692
# ' group_by(geo_value) %>%
712
- # ' epi_slide_mean(cases,
713
- # ' new_col_name = "cases_7dav", names_sep = NULL, before = 6,
693
+ # ' epi_slide_mean(
694
+ # ' cases,
695
+ # ' before = 6,
714
696
# ' # `frollmean` options
715
697
# ' na.rm = TRUE, algo = "exact", hasNA = TRUE
716
698
# ' ) %>%
717
- # ' dplyr::select(geo_value, time_value, cases, cases_7dav) %>%
699
+ # ' dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases ) %>%
718
700
# ' ungroup()
719
701
# '
720
702
# ' # slide a 7-day leading average
721
703
# ' jhu_csse_daily_subset %>%
722
704
# ' group_by(geo_value) %>%
723
- # ' epi_slide_mean(cases, new_col_name = "cases_7dav", names_sep = NULL, after = 6) %>%
705
+ # ' epi_slide_mean(cases, after = 6) %>%
724
706
# ' # Remove a nonessential var. to ensure new col is printed
725
- # ' dplyr::select(geo_value, time_value, cases, cases_7dav) %>%
707
+ # ' dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases ) %>%
726
708
# ' ungroup()
727
709
# '
728
710
# ' # slide a 7-day centre-aligned average
729
711
# ' jhu_csse_daily_subset %>%
730
712
# ' group_by(geo_value) %>%
731
- # ' epi_slide_mean(cases, new_col_name = "cases_7dav", names_sep = NULL, before = 3, after = 3) %>%
713
+ # ' epi_slide_mean(cases, before = 3, after = 3) %>%
732
714
# ' # Remove a nonessential var. to ensure new col is printed
733
- # ' dplyr::select(geo_value, time_value, cases, cases_7dav) %>%
715
+ # ' dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases ) %>%
734
716
# ' ungroup()
735
717
# '
736
718
# ' # slide a 14-day centre-aligned average
737
719
# ' jhu_csse_daily_subset %>%
738
720
# ' group_by(geo_value) %>%
739
- # ' epi_slide_mean(cases, new_col_name = "cases_14dav", names_sep = NULL, before = 6, after = 7) %>%
721
+ # ' epi_slide_mean(cases, before = 6, after = 7) %>%
740
722
# ' # Remove a nonessential var. to ensure new col is printed
741
- # ' dplyr::select(geo_value, time_value, cases, cases_14dav) %>%
723
+ # ' dplyr::select(geo_value, time_value, cases, cases_14dav = slide_value_cases ) %>%
742
724
# ' ungroup()
743
725
epi_slide_mean <- function (x , col_names , ... , before , after , ref_time_values ,
744
726
time_step ,
745
- new_col_name = " slide_value " , as_list_col = NULL ,
746
- names_sep = " _ " , all_rows = FALSE ) {
727
+ new_col_name = NULL , as_list_col = NULL ,
728
+ names_sep = NULL , all_rows = FALSE ) {
747
729
epi_slide_opt(
748
730
x = x ,
749
731
col_names = {{ col_names }},
@@ -783,14 +765,14 @@ epi_slide_mean <- function(x, col_names, ..., before, after, ref_time_values,
783
765
# ' # slide a 7-day trailing sum formula on cases
784
766
# ' jhu_csse_daily_subset %>%
785
767
# ' group_by(geo_value) %>%
786
- # ' epi_slide_sum(cases, new_col_name = "cases_7dsum", names_sep = NULL, before = 6) %>%
768
+ # ' epi_slide_sum(cases, before = 6) %>%
787
769
# ' # Remove a nonessential var. to ensure new col is printed
788
- # ' dplyr::select(geo_value, time_value, cases, cases_7dsum) %>%
770
+ # ' dplyr::select(geo_value, time_value, cases, cases_7dsum = slide_value_cases ) %>%
789
771
# ' ungroup()
790
772
epi_slide_sum <- function (x , col_names , ... , before , after , ref_time_values ,
791
773
time_step ,
792
- new_col_name = " slide_value " , as_list_col = NULL ,
793
- names_sep = " _ " , all_rows = FALSE ) {
774
+ new_col_name = NULL , as_list_col = NULL ,
775
+ names_sep = NULL , all_rows = FALSE ) {
794
776
epi_slide_opt(
795
777
x = x ,
796
778
col_names = {{ col_names }},
@@ -859,7 +841,7 @@ full_date_seq <- function(x, before, after, time_step) {
859
841
" i" = c(" The input data's `time_type` was probably `custom` or `day-time`.
860
842
These require also passing a `time_step` function." )
861
843
),
862
- class = " epiprocess__epi_slide_mean__unmappable_time_type " ,
844
+ class = " epiprocess__full_date_seq__unmappable_time_type " ,
863
845
epiprocess__time_type = ttype
864
846
)
865
847
}
0 commit comments