@@ -13,30 +13,34 @@ epi_recipe <- function(x, ...) {
1313}
1414
1515
16- # ' @rdname epi_recipe
17- # ' @export
18- epi_recipe.default <- function (x , ... ) {
19- cli_abort(paste(
20- " `x` must be an {.cls epi_df} or a {.cls formula}," ,
21- " not a {.cls {class(x)[[1]]}}."
22- ))
23- }
2416
2517# ' @rdname epi_recipe
2618# ' @inheritParams recipes::recipe
2719# ' @param roles A character string (the same length of `vars`) that
2820# ' describes a single role that the variable will take. This value could be
2921# ' anything but common roles are `"outcome"`, `"predictor"`,
3022# ' `"time_value"`, and `"geo_value"`
23+ # ' @param reference_date Either a date of the same class as the `time_value`
24+ # ' column in the `epi_df` or `NULL`. If a date, it gives the date to which all
25+ # ' operations are relative. Typically, in real-time tasks this is the date that
26+ # ' the model is created (and presumably trained). In forecasting, this is
27+ # ' often the same as the most recent date of
28+ # ' data availability, but when data is "latent" (reported after the date to
29+ # ' which it corresponds), or if performing a nowcast, the `reference_date` may
30+ # ' be later than this. Setting `reference_date`
31+ # ' to a value BEFORE the most recent data is not a true "forecast",
32+ # ' because future data is being used to create the model, but this may be
33+ # ' reasonable in model building, nowcasting (predicting finalized values from
34+ # ' preliminary data), or if producing a backcast. If `NULL`, it will be set
35+ # ' to the `as_of` date of the `epi_df`.
3136# ' @param ... Further arguments passed to or from other methods (not currently
3237# ' used).
3338# ' @param formula A model formula. No in-line functions should be used here
3439# ' (e.g. `log(x)`, `x:y`, etc.) and minus signs are not allowed. These types of
3540# ' transformations should be enacted using `step` functions in this package.
3641# ' Dots are allowed as are simple multivariate outcome terms (i.e. no need for
3742# ' `cbind`; see Examples).
38- # ' @param x,data A data frame, tibble, or epi_df of the *template* data set
39- # ' (see below). This is always coerced to the first row to avoid memory issues
43+ # ' @param x,data An epi_df of the *template* data set (see below).
4044# ' @inherit recipes::recipe return
4145# '
4246# ' @export
@@ -56,100 +60,107 @@ epi_recipe.default <- function(x, ...) {
5660# ' step_naomit(all_outcomes(), skip = TRUE)
5761# '
5862# ' r
59- epi_recipe.epi_df <-
60- function (x , formula = NULL , ... , vars = NULL , roles = NULL ) {
61- attr(x , " decay_to_tibble" ) <- FALSE
62- if (! is.null(formula )) {
63- if (! is.null(vars )) {
64- cli_abort(paste0(
65- " This `vars` specification will be ignored " ,
63+ epi_recipe.epi_df <- function (x ,
64+ reference_date = NULL ,
65+ formula = NULL ,
66+ ... ,
67+ vars = NULL ,
68+ roles = NULL ) {
69+ attr(x , " decay_to_tibble" ) <- FALSE
70+ if (! is.null(formula )) {
71+ if (! is.null(vars )) {
72+ cli_abort(paste0(
73+ " This `vars` specification will be ignored " ,
74+ " when a formula is used"
75+ ))
76+ }
77+ if (! is.null(roles )) {
78+ cli_abort(
79+ paste0(
80+ " This `roles` specification will be ignored " ,
6681 " when a formula is used"
67- ))
68- }
69- if (! is.null(roles )) {
70- cli_abort(
71- paste0(
72- " This `roles` specification will be ignored " ,
73- " when a formula is used"
74- )
7582 )
76- }
77-
78- obj <- epi_recipe.formula(formula , x , ... )
79- return (obj )
80- }
81- if (is.null(vars )) vars <- colnames(x )
82- if (any(table(vars ) > 1 )) {
83- cli_abort(" `vars` should have unique members" )
84- }
85- if (any(! (vars %in% colnames(x )))) {
86- cli_abort(" 1 or more elements of `vars` are not in the data" )
83+ )
8784 }
8885
89- keys <- key_colnames(x ) # we know x is an epi_df
86+ obj <- epi_recipe.formula(formula , x , ... )
87+ return (obj )
88+ }
89+ if (is.null(vars )) vars <- colnames(x )
90+ if (any(table(vars ) > 1 )) {
91+ cli_abort(" `vars` should have unique members" )
92+ }
93+ if (any(! (vars %in% colnames(x )))) {
94+ cli_abort(" 1 or more elements of `vars` are not in the data" )
95+ }
9096
91- var_info <- tibble(variable = vars )
92- key_roles <- c(" geo_value" , rep(" key" , length(keys ) - 2 ), " time_value" )
97+ keys <- key_colnames(x ) # we know x is an epi_df
9398
94- # # Check and add roles when available
95- if (! is.null(roles )) {
96- if (length(roles ) != length(vars )) {
97- cli_abort(paste0(
98- " The number of roles should be the same as the number of " ,
99- " variables."
100- ))
101- }
102- var_info $ role <- roles
103- } else {
104- var_info <- var_info %> % filter(! (variable %in% keys ))
105- var_info $ role <- " raw"
106- }
107- # # Now we add the keys when necessary
108- var_info <- dplyr :: union(
109- var_info ,
110- tibble :: tibble(variable = keys , role = key_roles )
111- )
99+ var_info <- tibble(variable = vars )
100+ key_roles <- c(" geo_value" , rep(" key" , length(keys ) - 2 ), " time_value" )
112101
113- # # Add types
114- var_info <- full_join(recipes ::: get_types(x ), var_info , by = " variable" )
115- var_info $ source <- " original"
116-
117- # # arrange to easy order
118- var_info <- var_info %> %
119- arrange(factor (
120- role ,
121- levels = union(
122- c(" predictor" , " outcome" , " time_value" , " geo_value" , " key" ),
123- unique(role )
124- ) # anything else
102+ # # Check and add roles when available
103+ if (! is.null(roles )) {
104+ if (length(roles ) != length(vars )) {
105+ cli_abort(paste0(
106+ " The number of roles should be the same as the number of " ,
107+ " variables."
125108 ))
126-
127- # # Return final object of class `recipe`
128- out <- list (
129- var_info = var_info ,
130- term_info = var_info ,
131- steps = NULL ,
132- template = x [1 , ],
133- max_time_value = max(x $ time_value ),
134- levels = NULL ,
135- retained = NA
136- )
137- class(out ) <- c(" epi_recipe" , " recipe" )
138- out
109+ }
110+ var_info $ role <- roles
111+ } else {
112+ var_info <- var_info %> % filter(! (variable %in% keys ))
113+ var_info $ role <- " raw"
139114 }
115+ # # Now we add the keys when necessary
116+ var_info <- dplyr :: union(
117+ var_info ,
118+ tibble :: tibble(variable = keys , role = key_roles )
119+ )
120+
121+ # # Add types
122+ var_info <- full_join(recipes ::: get_types(x ), var_info , by = " variable" )
123+ var_info $ source <- " original"
124+
125+ # # arrange to easy order
126+ var_info <- var_info %> %
127+ arrange(factor (
128+ role ,
129+ levels = union(
130+ c(" predictor" , " outcome" , " time_value" , " geo_value" , " key" ),
131+ unique(role )
132+ ) # anything else
133+ ))
134+
135+ # # Return final object of class `recipe`
136+ max_time_value <- max(x $ time_value )
137+ reference_date <- reference_date %|| % attr(x , " metadata" )$ as_of
138+ out <- list (
139+ var_info = var_info ,
140+ term_info = var_info ,
141+ steps = NULL ,
142+ template = x [1 , ],
143+ max_time_value = max_time_value ,
144+ reference_date = reference_date ,
145+ levels = NULL ,
146+ retained = NA
147+ )
148+ class(out ) <- c(" epi_recipe" , " recipe" )
149+ out
150+ }
140151
141152
142153# ' @rdname epi_recipe
143154# ' @export
144- epi_recipe.formula <- function (formula , data , ... ) {
155+ epi_recipe.formula <- function (formula , data , reference_date = NULL , ... ) {
145156 # we ensure that there's only 1 row in the template
146157 data <- data [1 , ]
147158 # check for minus:
148159 if (! epiprocess :: is_epi_df(data )) {
149- cli_abort(paste(
150- " `epi_recipe()` has been called with a non-{.cls epi_df} object." ,
151- " Use `recipe()` instead."
152- ))
160+ cli_abort(
161+ " `epi_recipe()` has been called with a non-{.cls epi_df} object.
162+ Use `recipe()` instead."
163+ )
153164 }
154165
155166 attr(data , " decay_to_tibble" ) <- FALSE
0 commit comments