Skip to content

Commit 6b8fad8

Browse files
authored
Merge pull request #284 from cmu-delphi/ndefries/all-versions/examples
add epix_slide example comparing same computation with all_vers FALSE
2 parents 527c1bb + a82e2f2 commit 6b8fad8

File tree

2 files changed

+87
-19
lines changed

2 files changed

+87
-19
lines changed

R/methods-epi_archive.R

+44-10
Original file line numberDiff line numberDiff line change
@@ -790,6 +790,8 @@ group_by.epi_archive = function(.data, ..., .add=FALSE, .drop=dplyr::group_by_dr
790790
#' as.Date("2020-06-15"),
791791
#' by = "1 day")
792792
#'
793+
#' # A simple (but not very useful) example (see the archive vignette for a more
794+
#' # realistic one):
793795
#' archive_cases_dv_subset %>%
794796
#' group_by(geo_value) %>%
795797
#' epix_slide(f = ~ mean(.x$case_rate_7d_av),
@@ -801,39 +803,71 @@ group_by.epi_archive = function(.data, ..., .add=FALSE, .drop=dplyr::group_by_dr
801803
#' # values. The actual number of `time_value`s in each computation depends on
802804
#' # the reporting latency of the signal and `time_value` range covered by the
803805
#' # archive (2020-06-01 -- 2021-11-30 in this example). In this case, we have
804-
#' # 0 `time_value`s, for ref time 2020-06-01 --> the result is automatically discarded
805-
#' # 1 `time_value`, for ref time 2020-06-02
806-
#' # 2 `time_value`s, for the rest of the results
807-
#' # never 3 `time_value`s, due to data latency
808-
#'
809-
#'
806+
#' # * 0 `time_value`s, for ref time 2020-06-01 --> the result is automatically
807+
#' # discarded
808+
#' # * 1 `time_value`, for ref time 2020-06-02
809+
#' # * 2 `time_value`s, for the rest of the results
810+
#' # * never the 3 `time_value`s we would get from `epi_slide`, since, because
811+
#' # of data latency, we'll never have an observation
812+
#' # `time_value == ref_time_value` as of `ref_time_value`.
813+
#' # The example below shows this type of behavior in more detail.
814+
#'
815+
#' # Examining characteristics of the data passed to each computation with
816+
#' # `all_versions=FALSE`.
817+
#' archive_cases_dv_subset %>%
818+
#' group_by(geo_value) %>%
819+
#' epix_slide(
820+
#' function(x, g) {
821+
#' tibble(
822+
#' time_range = if(nrow(x) == 0L) {
823+
#' "0 `time_value`s"
824+
#' } else {
825+
#' sprintf("%s -- %s", min(x$time_value), max(x$time_value))
826+
#' },
827+
#' n = nrow(x),
828+
#' class1 = class(x)[[1L]]
829+
#' )
830+
#' },
831+
#' before = 5, all_versions = FALSE,
832+
#' ref_time_values = ref_time_values, names_sep=NULL) %>%
833+
#' ungroup() %>%
834+
#' arrange(geo_value, time_value)
810835
#'
811836
#' # --- Advanced: ---
812837
#'
813838
#' # `epix_slide` with `all_versions=FALSE` (the default) applies a
814839
#' # version-unaware computation to several versions of the data. We can also
815840
#' # use `all_versions=TRUE` to apply a version-*aware* computation to several
816-
#' # versions of the data. In this case, each computation should expect an
841+
#' # versions of the data, again looking at characteristics of the data passed
842+
#' # to each computation. In this case, each computation should expect an
817843
#' # `epi_archive` containing the relevant version data:
818844
#'
819845
#' archive_cases_dv_subset %>%
820846
#' group_by(geo_value) %>%
821847
#' epix_slide(
822848
#' function(x, g) {
823849
#' tibble(
824-
#' versions_end = max(x$versions_end),
850+
#' versions_start = if (nrow(x$DT) == 0L) {
851+
#' "NA (0 rows)"
852+
#' } else {
853+
#' toString(min(x$DT$version))
854+
#' },
855+
#' versions_end = x$versions_end,
825856
#' time_range = if(nrow(x$DT) == 0L) {
826857
#' "0 `time_value`s"
827858
#' } else {
828859
#' sprintf("%s -- %s", min(x$DT$time_value), max(x$DT$time_value))
829860
#' },
861+
#' n = nrow(x$DT),
830862
#' class1 = class(x)[[1L]]
831863
#' )
832864
#' },
833-
#' before = 2, all_versions = TRUE,
865+
#' before = 5, all_versions = TRUE,
834866
#' ref_time_values = ref_time_values, names_sep=NULL) %>%
835867
#' ungroup() %>%
836-
#' arrange(geo_value, time_value)
868+
#' # Focus on one geo_value so we can better see the columns above:
869+
#' filter(geo_value == "ca") %>%
870+
#' select(-geo_value)
837871
#'
838872
#' @importFrom rlang enquo !!!
839873
#' @export

man/epix_slide.Rd

+43-9
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)