@@ -179,10 +179,10 @@ step_climate <-
179179 arg_is_lgl_scalar(skip )
180180
181181 time_aggr <- switch (time_type ,
182- epiweek = lubridate :: epiweek ,
183- week = lubridate :: isoweek ,
182+ epiweek = epiweek_leap ,
183+ week = isoweek_leap ,
184184 month = lubridate :: month ,
185- day = lubridate :: yday
185+ day = yday_leap
186186 )
187187
188188 recipes :: add_step(
@@ -258,22 +258,32 @@ prep.step_climate <- function(x, training, info = NULL, ...) {
258258 wts <- wts %|| % rep(1 , nrow(training ))
259259
260260 modulus <- switch (x $ time_type ,
261- epiweek = 53L ,
262- week = 53L ,
261+ epiweek = 52L , # only sometimes true
262+ week = 52L ,
263263 month = 12L ,
264- day = 365L
264+ day = 365L # only sometimes true
265265 )
266266
267267 fn <- switch (x $ center_method ,
268268 mean = function (x , w ) stats :: weighted.mean(x , w , na.rm = TRUE ),
269269 median = function (x , w ) median(x , na.rm = TRUE )
270270 )
271-
272- climate_table <- training %> %
271+ # suppose it's week 52, and there is no week 53 this year; then
272+ # as originally written for 1 week ahead this grabs from week 52+1 %% 53
273+ # which will be week 53, not week 1.
274+ ahead_period <- switch (x $ time_type ,
275+ epiweek = lubridate :: weeks(x $ forecast_ahead ),
276+ week = lubridate :: weeks(x $ forecast_ahead ),
277+ month = months(x $ forecast_ahead ),
278+ day = lubridate :: days(x $ forecast_ahead ),
279+ )
280+ climate_table <-
281+ training %> %
273282 mutate(
274- .idx = x $ time_aggr(time_value ), .weights = wts ,
275- .idx = (.idx - x $ forecast_ahead ) %% modulus ,
276- .idx = dplyr :: case_when(.idx == 0 ~ modulus , TRUE ~ .idx )
283+ # subtracts a month w/o rollover (usual behavior on weeks/days)
284+ .idx = time_value %m - % ahead_period ,
285+ .idx = x $ time_aggr(.idx ),
286+ .weights = wts
277287 ) %> %
278288 select(.idx , .weights , all_of(c(col_names , x $ epi_keys ))) %> %
279289 tidyr :: pivot_longer(all_of(unname(col_names ))) %> %
@@ -335,18 +345,75 @@ print.step_climate <- function(x, width = max(20, options()$width - 30), ...) {
335345# ' @param window_size the number of .idx entries before and after to include in
336346# ' the aggregation
337347# ' @param modulus the maximum value of `.idx`
348+ # ' @importFrom lubridate %m-%
338349roll_modular_multivec <- function (col , .idx , weights , aggr , window_size , modulus ) {
339350 tib <- tibble(col = col , weights = weights , .idx = .idx ) | >
340351 arrange(.idx ) | >
341352 tidyr :: nest(data = c(col , weights ), .by = .idx )
342- out <- double(nrow( tib ) )
353+ out <- double(modulus + 1 )
343354 for (iter in seq_along(out )) {
355+ # +1 from 1-indexing
344356 entries <- (iter - window_size ): (iter + window_size ) %% modulus
345357 entries [entries == 0 ] <- modulus
358+ # note that because we are 1-indexing, we're looking for indices that are 1
359+ # larger than the actual day/week in the year
360+ if (modulus == 365 ) {
361+ # we need to grab just the window around the leap day on the leap day
362+ if (iter == 366 ) {
363+ # there's an extra data point in front of the leap day
364+ entries <- (59 - window_size ): (59 + window_size - 1 ) %% modulus
365+ entries [entries == 0 ] <- modulus
366+ # adding in the leap day itself
367+ entries <- c(entries , 999 )
368+ } else if ((59 %in% entries ) || (60 %in% entries )) {
369+ # if we're on the Feb/March boundary for daily data, we need to add in the
370+ # leap day data
371+ entries <- c(entries , 999 )
372+ }
373+ } else if (modulus == 52 ) {
374+ # we need to grab just the window around the leap week on the leap week
375+ if (iter == 53 ) {
376+ entries <- (53 - window_size ): (53 + window_size - 1 ) %% 52
377+ entries [entries == 0 ] <- 52
378+ entries <- c(entries , 999 )
379+ } else if ((52 %in% entries ) || (1 %in% entries )) {
380+ # if we're on the year boundary for weekly data, we need to add in the
381+ # leap week data (which is the extra week at the end)
382+ entries <- c(entries , 999 )
383+ }
384+ }
346385 out [iter ] <- with(
347- purrr :: list_rbind(tib $ data [ entries ] ),
386+ purrr :: list_rbind(tib % > % filter( .idx %in% entries ) % > % pull( data ) ),
348387 aggr(col , weights )
349388 )
350389 }
351- tibble(.idx = unique(tib $ .idx ), climate_pred = out )
390+ tibble(.idx = unique(tib $ .idx ), climate_pred = out [seq_len(nrow(tib ))])
391+ }
392+
393+
394+ # ' a function that assigns Feb 29th to 999, and aligns all other dates the same
395+ # ' number in the year, regardless of whether it's a leap year
396+ # ' @keywords internal
397+ # ' @importFrom lubridate yday month leap_year
398+ yday_leap <- function (time_value ) {
399+ dplyr :: case_when(
400+ ! leap_year(time_value ) ~ yday(time_value ),
401+ leap_day(time_value ) ~ 999 ,
402+ TRUE ~ yday(time_value ) - as.numeric(month(time_value ) > 2L )
403+ )
404+ }
405+ leap_day <- function (x ) lubridate :: month(x ) == 2 & lubridate :: day(x ) == 29
406+ # ' epiweek, but it assigns week 53 the value of 999 instead so it mirrors the assignments in yday_leap
407+ # ' @keywords internal
408+ epiweek_leap <- function (time_value ) {
409+ week_values <- lubridate :: epiweek(time_value )
410+ week_values [week_values == 53 ] <- 999
411+ week_values
412+ }
413+ # ' isoweek, but it assigns week 53 the value of 999 instead so it mirrors the assignments in yday_leap
414+ # ' @keywords internal
415+ isoweek_leap <- function (time_value ) {
416+ week_values <- lubridate :: isoweek(time_value )
417+ week_values [week_values == 53 ] <- 999
418+ week_values
352419}
0 commit comments