@@ -790,6 +790,8 @@ group_by.epi_archive = function(.data, ..., .add=FALSE, .drop=dplyr::group_by_dr
790790# ' as.Date("2020-06-15"),
791791# ' by = "1 day")
792792# '
793+ # ' # A simple (but not very useful) example (see the archive vignette for a more
794+ # ' # realistic one):
793795# ' archive_cases_dv_subset %>%
794796# ' group_by(geo_value) %>%
795797# ' epix_slide(f = ~ mean(.x$case_rate_7d_av),
@@ -801,39 +803,71 @@ group_by.epi_archive = function(.data, ..., .add=FALSE, .drop=dplyr::group_by_dr
801803# ' # values. The actual number of `time_value`s in each computation depends on
802804# ' # the reporting latency of the signal and `time_value` range covered by the
803805# ' # archive (2020-06-01 -- 2021-11-30 in this example). In this case, we have
804- # ' # 0 `time_value`s, for ref time 2020-06-01 --> the result is automatically discarded
805- # ' # 1 `time_value`, for ref time 2020-06-02
806- # ' # 2 `time_value`s, for the rest of the results
807- # ' # never 3 `time_value`s, due to data latency
808- # '
809- # '
806+ # ' # * 0 `time_value`s, for ref time 2020-06-01 --> the result is automatically
807+ # ' # discarded
808+ # ' # * 1 `time_value`, for ref time 2020-06-02
809+ # ' # * 2 `time_value`s, for the rest of the results
810+ # ' # * never the 3 `time_value`s we would get from `epi_slide`, since, because
811+ # ' # of data latency, we'll never have an observation
812+ # ' # `time_value == ref_time_value` as of `ref_time_value`.
813+ # ' # The example below shows this type of behavior in more detail.
814+ # '
815+ # ' # Examining characteristics of the data passed to each computation with
816+ # ' # `all_versions=FALSE`.
817+ # ' archive_cases_dv_subset %>%
818+ # ' group_by(geo_value) %>%
819+ # ' epix_slide(
820+ # ' function(x, g) {
821+ # ' tibble(
822+ # ' time_range = if(nrow(x) == 0L) {
823+ # ' "0 `time_value`s"
824+ # ' } else {
825+ # ' sprintf("%s -- %s", min(x$time_value), max(x$time_value))
826+ # ' },
827+ # ' n = nrow(x),
828+ # ' class1 = class(x)[[1L]]
829+ # ' )
830+ # ' },
831+ # ' before = 5, all_versions = FALSE,
832+ # ' ref_time_values = ref_time_values, names_sep=NULL) %>%
833+ # ' ungroup() %>%
834+ # ' arrange(geo_value, time_value)
810835# '
811836# ' # --- Advanced: ---
812837# '
813838# ' # `epix_slide` with `all_versions=FALSE` (the default) applies a
814839# ' # version-unaware computation to several versions of the data. We can also
815840# ' # use `all_versions=TRUE` to apply a version-*aware* computation to several
816- # ' # versions of the data. In this case, each computation should expect an
841+ # ' # versions of the data, again looking at characteristics of the data passed
842+ # ' # to each computation. In this case, each computation should expect an
817843# ' # `epi_archive` containing the relevant version data:
818844# '
819845# ' archive_cases_dv_subset %>%
820846# ' group_by(geo_value) %>%
821847# ' epix_slide(
822848# ' function(x, g) {
823849# ' tibble(
824- # ' versions_end = max(x$versions_end),
850+ # ' versions_start = if (nrow(x$DT) == 0L) {
851+ # ' "NA (0 rows)"
852+ # ' } else {
853+ # ' toString(min(x$DT$version))
854+ # ' },
855+ # ' versions_end = x$versions_end,
825856# ' time_range = if(nrow(x$DT) == 0L) {
826857# ' "0 `time_value`s"
827858# ' } else {
828859# ' sprintf("%s -- %s", min(x$DT$time_value), max(x$DT$time_value))
829860# ' },
861+ # ' n = nrow(x$DT),
830862# ' class1 = class(x)[[1L]]
831863# ' )
832864# ' },
833- # ' before = 2 , all_versions = TRUE,
865+ # ' before = 5 , all_versions = TRUE,
834866# ' ref_time_values = ref_time_values, names_sep=NULL) %>%
835867# ' ungroup() %>%
836- # ' arrange(geo_value, time_value)
868+ # ' # Focus on one geo_value so we can better see the columns above:
869+ # ' filter(geo_value == "ca") %>%
870+ # ' select(-geo_value)
837871# '
838872# ' @importFrom rlang enquo !!!
839873# ' @export
0 commit comments