@@ -790,6 +790,8 @@ group_by.epi_archive = function(.data, ..., .add=FALSE, .drop=dplyr::group_by_dr
790
790
# ' as.Date("2020-06-15"),
791
791
# ' by = "1 day")
792
792
# '
793
+ # ' # A simple (but not very useful) example (see the archive vignette for a more
794
+ # ' # realistic one):
793
795
# ' archive_cases_dv_subset %>%
794
796
# ' group_by(geo_value) %>%
795
797
# ' epix_slide(f = ~ mean(.x$case_rate_7d_av),
@@ -801,39 +803,71 @@ group_by.epi_archive = function(.data, ..., .add=FALSE, .drop=dplyr::group_by_dr
801
803
# ' # values. The actual number of `time_value`s in each computation depends on
802
804
# ' # the reporting latency of the signal and `time_value` range covered by the
803
805
# ' # archive (2020-06-01 -- 2021-11-30 in this example). In this case, we have
804
- # ' # 0 `time_value`s, for ref time 2020-06-01 --> the result is automatically discarded
805
- # ' # 1 `time_value`, for ref time 2020-06-02
806
- # ' # 2 `time_value`s, for the rest of the results
807
- # ' # never 3 `time_value`s, due to data latency
808
- # '
809
- # '
806
+ # ' # * 0 `time_value`s, for ref time 2020-06-01 --> the result is automatically
807
+ # ' # discarded
808
+ # ' # * 1 `time_value`, for ref time 2020-06-02
809
+ # ' # * 2 `time_value`s, for the rest of the results
810
+ # ' # * never the 3 `time_value`s we would get from `epi_slide`, since, because
811
+ # ' # of data latency, we'll never have an observation
812
+ # ' # `time_value == ref_time_value` as of `ref_time_value`.
813
+ # ' # The example below shows this type of behavior in more detail.
814
+ # '
815
+ # ' # Examining characteristics of the data passed to each computation with
816
+ # ' # `all_versions=FALSE`.
817
+ # ' archive_cases_dv_subset %>%
818
+ # ' group_by(geo_value) %>%
819
+ # ' epix_slide(
820
+ # ' function(x, g) {
821
+ # ' tibble(
822
+ # ' time_range = if(nrow(x) == 0L) {
823
+ # ' "0 `time_value`s"
824
+ # ' } else {
825
+ # ' sprintf("%s -- %s", min(x$time_value), max(x$time_value))
826
+ # ' },
827
+ # ' n = nrow(x),
828
+ # ' class1 = class(x)[[1L]]
829
+ # ' )
830
+ # ' },
831
+ # ' before = 5, all_versions = FALSE,
832
+ # ' ref_time_values = ref_time_values, names_sep=NULL) %>%
833
+ # ' ungroup() %>%
834
+ # ' arrange(geo_value, time_value)
810
835
# '
811
836
# ' # --- Advanced: ---
812
837
# '
813
838
# ' # `epix_slide` with `all_versions=FALSE` (the default) applies a
814
839
# ' # version-unaware computation to several versions of the data. We can also
815
840
# ' # use `all_versions=TRUE` to apply a version-*aware* computation to several
816
- # ' # versions of the data. In this case, each computation should expect an
841
+ # ' # versions of the data, again looking at characteristics of the data passed
842
+ # ' # to each computation. In this case, each computation should expect an
817
843
# ' # `epi_archive` containing the relevant version data:
818
844
# '
819
845
# ' archive_cases_dv_subset %>%
820
846
# ' group_by(geo_value) %>%
821
847
# ' epix_slide(
822
848
# ' function(x, g) {
823
849
# ' tibble(
824
- # ' versions_end = max(x$versions_end),
850
+ # ' versions_start = if (nrow(x$DT) == 0L) {
851
+ # ' "NA (0 rows)"
852
+ # ' } else {
853
+ # ' toString(min(x$DT$version))
854
+ # ' },
855
+ # ' versions_end = x$versions_end,
825
856
# ' time_range = if(nrow(x$DT) == 0L) {
826
857
# ' "0 `time_value`s"
827
858
# ' } else {
828
859
# ' sprintf("%s -- %s", min(x$DT$time_value), max(x$DT$time_value))
829
860
# ' },
861
+ # ' n = nrow(x$DT),
830
862
# ' class1 = class(x)[[1L]]
831
863
# ' )
832
864
# ' },
833
- # ' before = 2 , all_versions = TRUE,
865
+ # ' before = 5 , all_versions = TRUE,
834
866
# ' ref_time_values = ref_time_values, names_sep=NULL) %>%
835
867
# ' ungroup() %>%
836
- # ' arrange(geo_value, time_value)
868
+ # ' # Focus on one geo_value so we can better see the columns above:
869
+ # ' filter(geo_value == "ca") %>%
870
+ # ' select(-geo_value)
837
871
# '
838
872
# ' @importFrom rlang enquo !!!
839
873
# ' @export
0 commit comments