@@ -537,7 +537,7 @@ get_before_after_from_window <- function(window_size, align, time_type) {
537537# '
538538# ' @template basic-slide-params
539539# ' @param .col_names <[`tidy-select`][dplyr_tidy_select]> An unquoted column
540- # ' name(e.g., `cases`), multiple column names (e.g., `c(cases, deaths)`),
540+ # ' name (e.g., `cases`), multiple column names (e.g., `c(cases, deaths)`),
541541# ' [other tidy-select expression][tidyselect::language], or a vector of
542542# ' characters (e.g. `c("cases", "deaths")`). Variable names can be used as if
543543# ' they were positions in the data frame, so expressions like `x:y` can be
@@ -559,13 +559,40 @@ get_before_after_from_window <- function(window_size, align, time_type) {
559559# ' `epi_slide_mean` and `epi_slide_sum`) take care of window completion
560560# ' automatically to prevent associated errors.
561561# ' @param ... Additional arguments to pass to the slide computation `.f`, for
562- # ' example, `algo` or `na.rm` in data.table functions. You don't need to
563- # ' specify `.x`, `.window_size`, or `.align` (or `before`/`after` for slider
564- # ' functions).
562+ # ' example, `algo` or `na.rm` in data.table functions. You don't need to
563+ # ' specify `.x`, `.window_size`, or `.align` (or `before`/`after` for slider
564+ # ' functions).
565+ # ' @param .prefix Optional [`glue::glue`] format string; name the slide result
566+ # ' column(s) by attaching this prefix to the corresponding input column(s).
567+ # ' Some shorthand is supported for basing the output names on `.window_size`
568+ # ' or other arguments; see "Prefix and suffix shorthand" below.
569+ # ' @param .suffix Optional [`glue::glue`] format string; like `.prefix`. The
570+ # ' default naming behavior is equivalent to `.suffix =
571+ # ' "_{.n}{.time_unit_abbr}{.align_abbr}{.f_abbr}"`. Can be used in combination
572+ # ' with `.prefix`.
573+ # ' @param .new_col_names Optional character vector with length matching the
574+ # ' number of input columns from `.col_names`; name the slide result column(s)
575+ # ' with these names. Cannot be used in combination with `.prefix` and/or
576+ # ' `.suffix`.
577+ # '
578+ # ' @section Prefix and suffix shorthand:
579+ # '
580+ # ' [`glue::glue`] format strings specially interpret content within curly
581+ # ' braces. E.g., `glue::glue("ABC{2 + 2}")` evaluates to `"ABC4"`. For `.prefix`
582+ # ' and `.suffix`, we provide `glue` with some additional variable bindings:
583+ # '
584+ # ' - `{.n}` will be the number of time steps in the computation
585+ # ' corresponding to the `.window_size`.
586+ # ' - `{.time_unit_abbr}` will be a lower-case letter corresponding to the
587+ # ' `time_type` of `.x`
588+ # ' - `{.align_abbr}` will be `""` if `.align` is the default of `"right"`;
589+ # ' otherwise, it will be the first letter of `.align`
590+ # ' - `{.f_abbr}` will be a short string based on what `.f`
565591# '
566592# ' @importFrom dplyr bind_rows mutate %>% arrange tibble select all_of
567- # ' @importFrom rlang enquo expr_label caller_arg
593+ # ' @importFrom rlang enquo expr_label caller_arg quo_get_env
568594# ' @importFrom tidyselect eval_select
595+ # ' @importFrom glue glue
569596# ' @importFrom purrr map map_lgl
570597# ' @importFrom data.table frollmean frollsum frollapply
571598# ' @importFrom lubridate as.period
@@ -577,8 +604,7 @@ get_before_after_from_window <- function(window_size, align, time_type) {
577604# ' # Compute a 7-day trailing average on cases.
578605# ' cases_deaths_subset %>%
579606# ' group_by(geo_value) %>%
580- # ' epi_slide_opt(cases, .f = data.table::frollmean, .window_size = 7) %>%
581- # ' dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases)
607+ # ' epi_slide_opt(cases, .f = data.table::frollmean, .window_size = 7)
582608# '
583609# ' # Same as above, but adjust `frollmean` settings for speed, accuracy, and
584610# ' # to allow partially-missing windows.
@@ -588,11 +614,11 @@ get_before_after_from_window <- function(window_size, align, time_type) {
588614# ' cases,
589615# ' .f = data.table::frollmean, .window_size = 7,
590616# ' algo = "exact", hasNA = TRUE, na.rm = TRUE
591- # ' ) %>%
592- # ' dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases)
617+ # ' )
593618epi_slide_opt <- function (
594619 .x , .col_names , .f , ... ,
595620 .window_size = NULL , .align = c(" right" , " center" , " left" ),
621+ .prefix = NULL , .suffix = NULL , .new_col_names = NULL ,
596622 .ref_time_values = NULL , .all_rows = FALSE ) {
597623 assert_class(.x , " epi_df" )
598624
@@ -620,7 +646,7 @@ epi_slide_opt <- function(
620646 if (" new_col_name" %in% provided_args || " .new_col_name" %in% provided_args ) {
621647 cli :: cli_abort(
622648 " epi_slide_opt: the argument `new_col_name` is not supported for `epi_slide_opt`. If you want to customize
623- the output column names, use `dplyr::rename` after the slide ." ,
649+ the output column names, use `.prefix =`, `.suffix =`, or `.new_col_**names** =` ." ,
624650 class = " epiprocess__epi_slide_opt__new_name_not_supported"
625651 )
626652 }
@@ -644,21 +670,37 @@ epi_slide_opt <- function(
644670 )
645671 }
646672
673+ # The position of a given column can be differ between input `.x` and
674+ # `.data_group` since the grouping step by default drops grouping columns.
675+ # To avoid rerunning `eval_select` for every `.data_group`, convert
676+ # positions of user-provided `col_names` into string column names. We avoid
677+ # using `names(pos)` directly for robustness and in case we later want to
678+ # allow users to rename fields via tidyselection.
679+ col_names_quo <- enquo(.col_names )
680+ pos <- eval_select(col_names_quo , data = .x , allow_rename = FALSE )
681+ col_names_chr <- names(.x )[pos ]
682+
647683 # Check that slide function `.f` is one of those short-listed from
648684 # `data.table` and `slider` (or a function that has the exact same
649685 # definition, e.g. if the function has been reexported or defined
650686 # locally).
651- if (any(map_lgl(
652- list (frollmean , frollsum , frollapply ),
653- ~ identical(.f , .x )
654- ))) {
655- f_from_package <- " data.table"
656- } else if (any(map_lgl(
657- list (slide_sum , slide_prod , slide_mean , slide_min , slide_max , slide_all , slide_any ),
658- ~ identical(.f , .x )
659- ))) {
660- f_from_package <- " slider"
661- } else {
687+ f_possibilities <-
688+ tibble :: tribble(
689+ ~ f , ~ package , ~ abbr ,
690+ frollmean , " data.table" , " av" ,
691+ frollsum , " data.table" , " sum" ,
692+ frollapply , " data.table" , " slide" ,
693+ slide_sum , " slider" , " sum" ,
694+ slide_prod , " slider" , " prod" ,
695+ slide_mean , " slider" , " av" ,
696+ slide_min , " slider" , " min" ,
697+ slide_max , " slider" , " max" ,
698+ slide_all , " slider" , " all" ,
699+ slide_any , " slider" , " any" ,
700+ )
701+ f_info <- f_possibilities %> %
702+ filter(map_lgl(.data $ f , ~ identical(.f , .x )))
703+ if (nrow(f_info ) == 0L ) {
662704 # `f` is from somewhere else and not supported
663705 cli_abort(
664706 c(
@@ -672,6 +714,7 @@ epi_slide_opt <- function(
672714 epiprocess__f = .f
673715 )
674716 }
717+ f_from_package <- f_info $ package
675718
676719 user_provided_rtvs <- ! is.null(.ref_time_values )
677720 if (! user_provided_rtvs ) {
@@ -702,22 +745,72 @@ epi_slide_opt <- function(
702745 validate_slide_window_arg(.window_size , time_type )
703746 window_args <- get_before_after_from_window(.window_size , .align , time_type )
704747
748+ # Handle output naming
749+ if ((! is.null(.prefix ) || ! is.null(.suffix )) && ! is.null(.new_col_names )) {
750+ cli_abort(
751+ " Can't use both .prefix/.suffix and .new_col_names at the same time." ,
752+ class = " epiprocess__epi_slide_opt_incompatible_naming_args"
753+ )
754+ }
755+ assert_string(.prefix , null.ok = TRUE )
756+ assert_string(.suffix , null.ok = TRUE )
757+ assert_character(.new_col_names , len = length(col_names_chr ), null.ok = TRUE )
758+ if (is.null(.prefix ) && is.null(.suffix ) && is.null(.new_col_names )) {
759+ .suffix <- " _{.n}{.time_unit_abbr}{.align_abbr}{.f_abbr}"
760+ # ^ does not account for any arguments specified to underlying functions via
761+ # `...` such as `na.rm =`, nor does it distinguish between functions from
762+ # different packages accomplishing the same type of computation. Those are
763+ # probably only set one way per task, so this probably produces cleaner
764+ # names without clashes (though maybe some confusion if switching between
765+ # code with different settings).
766+ }
767+ if (! is.null(.prefix ) || ! is.null(.suffix )) {
768+ .prefix <- .prefix %|| % " "
769+ .suffix <- .suffix %|| % " "
770+ if (identical(.window_size , Inf )) {
771+ n <- " running_"
772+ time_unit_abbr <- " "
773+ align_abbr <- " "
774+ } else {
775+ n <- time_delta_to_n_steps(.window_size , time_type )
776+ time_unit_abbr <- time_type_unit_abbr(time_type )
777+ align_abbr <- c(right = " " , center = " c" , left = " l" )[[.align ]]
778+ }
779+ glue_env <- rlang :: env(
780+ .n = n ,
781+ .time_unit_abbr = time_unit_abbr ,
782+ .align_abbr = align_abbr ,
783+ .f_abbr = f_info $ abbr ,
784+ quo_get_env(col_names_quo )
785+ )
786+ .new_col_names <- unclass(
787+ glue(.prefix , .envir = glue_env ) +
788+ col_names_chr +
789+ glue(.suffix , .envir = glue_env )
790+ )
791+ } else {
792+ # `.new_col_names` was provided by user; we don't need to do anything.
793+ }
794+ if (any(.new_col_names %in% names(.x ))) {
795+ cli_abort(c(
796+ " Naming conflict between new columns and existing columns" ,
797+ " x" = " Overlapping names: {format_varnames(intersect(.new_col_names, names(.x)))}"
798+ ), class = " epiprocess__epi_slide_opt_old_new_name_conflict" )
799+ }
800+ if (anyDuplicated(.new_col_names )) {
801+ cli_abort(c(
802+ " New column names contain duplicates" ,
803+ " x" = " Duplicated names: {format_varnames(unique(.new_col_names[duplicated(.new_col_names)]))}"
804+ ), class = " epiprocess__epi_slide_opt_new_name_duplicated" )
805+ }
806+ result_col_names <- .new_col_names
807+
705808 # Make a complete date sequence between min(.x$time_value) and max(.x$time_value).
706809 date_seq_list <- full_date_seq(.x , window_args $ before , window_args $ after , time_type )
707810 all_dates <- date_seq_list $ all_dates
708811 pad_early_dates <- date_seq_list $ pad_early_dates
709812 pad_late_dates <- date_seq_list $ pad_late_dates
710813
711- # The position of a given column can be differ between input `.x` and
712- # `.data_group` since the grouping step by default drops grouping columns.
713- # To avoid rerunning `eval_select` for every `.data_group`, convert
714- # positions of user-provided `col_names` into string column names. We avoid
715- # using `names(pos)` directly for robustness and in case we later want to
716- # allow users to rename fields via tidyselection.
717- pos <- eval_select(enquo(.col_names ), data = .x , allow_rename = FALSE )
718- col_names_chr <- names(.x )[pos ]
719- # Always rename results to "slide_value_<original column name>".
720- result_col_names <- paste0(" slide_value_" , col_names_chr )
721814 slide_one_grp <- function (.data_group , .group_key , ... ) {
722815 missing_times <- all_dates [! (all_dates %in% .data_group $ time_value )]
723816 # `frollmean` requires a full window to compute a result. Add NA values
@@ -827,8 +920,7 @@ epi_slide_opt <- function(
827920# ' # Compute a 7-day trailing average on cases.
828921# ' cases_deaths_subset %>%
829922# ' group_by(geo_value) %>%
830- # ' epi_slide_mean(cases, .window_size = 7) %>%
831- # ' dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases)
923+ # ' epi_slide_mean(cases, .window_size = 7)
832924# '
833925# ' # Same as above, but adjust `frollmean` settings for speed, accuracy, and
834926# ' # to allow partially-missing windows.
@@ -838,11 +930,11 @@ epi_slide_opt <- function(
838930# ' cases,
839931# ' .window_size = 7,
840932# ' na.rm = TRUE, algo = "exact", hasNA = TRUE
841- # ' ) %>%
842- # ' dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases)
933+ # ' )
843934epi_slide_mean <- function (
844935 .x , .col_names , ... ,
845936 .window_size = NULL , .align = c(" right" , " center" , " left" ),
937+ .prefix = NULL , .suffix = NULL , .new_col_names = NULL ,
846938 .ref_time_values = NULL , .all_rows = FALSE ) {
847939 # Deprecated argument handling
848940 provided_args <- rlang :: call_args_names(rlang :: call_match())
@@ -885,6 +977,9 @@ epi_slide_mean <- function(
885977 ... ,
886978 .window_size = .window_size ,
887979 .align = .align ,
980+ .prefix = .prefix ,
981+ .suffix = .suffix ,
982+ .new_col_names = .new_col_names ,
888983 .ref_time_values = .ref_time_values ,
889984 .all_rows = .all_rows
890985 )
@@ -899,11 +994,11 @@ epi_slide_mean <- function(
899994# ' # Compute a 7-day trailing sum on cases.
900995# ' cases_deaths_subset %>%
901996# ' group_by(geo_value) %>%
902- # ' epi_slide_sum(cases, .window_size = 7) %>%
903- # ' dplyr::select(geo_value, time_value, cases, cases_7dsum = slide_value_cases)
997+ # ' epi_slide_sum(cases, .window_size = 7)
904998epi_slide_sum <- function (
905999 .x , .col_names , ... ,
9061000 .window_size = NULL , .align = c(" right" , " center" , " left" ),
1001+ .prefix = NULL , .suffix = NULL , .new_col_names = NULL ,
9071002 .ref_time_values = NULL , .all_rows = FALSE ) {
9081003 # Deprecated argument handling
9091004 provided_args <- rlang :: call_args_names(rlang :: call_match())
@@ -945,6 +1040,9 @@ epi_slide_sum <- function(
9451040 ... ,
9461041 .window_size = .window_size ,
9471042 .align = .align ,
1043+ .prefix = .prefix ,
1044+ .suffix = .suffix ,
1045+ .new_col_names = .new_col_names ,
9481046 .ref_time_values = .ref_time_values ,
9491047 .all_rows = .all_rows
9501048 )
0 commit comments