Skip to content

Commit 47eb129

Browse files
authored
Merge pull request #599 from cmu-delphi/lcb/key_colnames-revision_summary-age_agg-updates
Update `epi_df.Rmd` rate aggregation, `key_colnames()`, `revision_summary()`
2 parents cc5e742 + 7571bf3 commit 47eb129

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+1988
-618
lines changed

DESCRIPTION

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: epiprocess
22
Type: Package
33
Title: Tools for basic signal processing in epidemiology
4-
Version: 0.10.1
4+
Version: 0.10.3
55
Authors@R: c(
66
person("Jacob", "Bien", role = "ctb"),
77
person("Logan", "Brooks", , "lcbrooks+github@andrew.cmu.edu", role = c("aut", "cre")),
@@ -56,13 +56,15 @@ Imports:
5656
tibble,
5757
tidyr,
5858
tidyselect (>= 1.2.0),
59+
tools,
5960
tsibble,
6061
utils,
6162
vctrs,
6263
waldo
6364
Suggests:
6465
devtools,
6566
epidatr,
67+
epipredict,
6668
here,
6769
knitr,
6870
outbreaks,
@@ -76,6 +78,7 @@ Remotes:
7678
cmu-delphi/delphidocs,
7779
cmu-delphi/epidatasets,
7880
cmu-delphi/epidatr,
81+
cmu-delphi/epipredict,
7982
glmgen/genlasso,
8083
reconverse/outbreaks
8184
Config/Needs/website: cmu-delphi/delphidocs
@@ -103,5 +106,6 @@ Collate:
103106
'reexports.R'
104107
'revision_analysis.R'
105108
'slide.R'
109+
'time-utils.R'
106110
'utils.R'
107111
'utils_pipe.R'

NAMESPACE

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,9 @@ S3method(guess_period,Date)
3939
S3method(guess_period,POSIXt)
4040
S3method(guess_period,default)
4141
S3method(key_colnames,data.frame)
42-
S3method(key_colnames,default)
4342
S3method(key_colnames,epi_archive)
4443
S3method(key_colnames,epi_df)
44+
S3method(key_colnames,tbl_ts)
4545
S3method(mean,epi_df)
4646
S3method(print,epi_archive)
4747
S3method(print,epi_df)
@@ -130,6 +130,8 @@ importFrom(cli,cli_li)
130130
importFrom(cli,cli_vec)
131131
importFrom(cli,cli_warn)
132132
importFrom(cli,format_message)
133+
importFrom(cli,pluralize)
134+
importFrom(cli,qty)
133135
importFrom(data.table,":=")
134136
importFrom(data.table,address)
135137
importFrom(data.table,as.data.table)
@@ -194,6 +196,8 @@ importFrom(rlang,arg_match)
194196
importFrom(rlang,caller_arg)
195197
importFrom(rlang,caller_env)
196198
importFrom(rlang,check_dots_empty)
199+
importFrom(rlang,check_dots_empty0)
200+
importFrom(rlang,dots_n)
197201
importFrom(rlang,enquo)
198202
importFrom(rlang,enquos)
199203
importFrom(rlang,env)
@@ -231,8 +235,11 @@ importFrom(tidyr,unnest)
231235
importFrom(tidyselect,any_of)
232236
importFrom(tidyselect,eval_select)
233237
importFrom(tidyselect,starts_with)
238+
importFrom(tools,toTitleCase)
234239
importFrom(tsibble,as_tsibble)
235240
importFrom(utils,capture.output)
236241
importFrom(utils,tail)
242+
importFrom(vctrs,vec_cast)
237243
importFrom(vctrs,vec_data)
244+
importFrom(vctrs,vec_detect_missing)
238245
importFrom(vctrs,vec_equal)

NEWS.md

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,37 @@
22

33
Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.x.y will indicate PR's.
44

5-
# epiprocess 0.10
5+
# epiprocess 0.11
66

77
## Breaking changes
8-
8+
- In `revision_summary()`:
9+
- Output now uses the name `lag_near_latest` instead of `time_near_latest`. To
10+
migrate, update references to `time_near_latest` to `lag_near_latest`.
11+
- `revision_summary(epi_arch)` without specifying the measurement column to
12+
analyze in `...` will no longer attempt to guess which one you intended if
13+
there are multiple possibilities to choose from (#571). If you attempt a
14+
complicated tidyselection that selects zero columns, this is also now an
15+
error. If you encounter such errors, manually specify the measurement column
16+
in `...`.
17+
- `min_waiting_period` now defines a nonstrict inequality instead of a strict
18+
one. To obtain the old bounds, bump the `min_waiting_period` up to the next
19+
possible value for your `time_type`.
20+
- In `key_colnames()`:
21+
- On regular (non-`epi_df`) data frames, now requires manual specification of
22+
`geo_keys`, `other_keys`, and `time_keys`.
23+
- The `extra_keys` argument has been deprecated and replaced with
24+
`other_keys`.
925

1026
## Improvements
11-
27+
- `revision_summary()` now supports all `time_type`s.
1228

1329
## Bug fixes
1430

31+
- Fixed aggregation of age-group-specific rates to overall rates in `epi_df` vignette (#587).
32+
- Fixed `key_colnames()` omitting some key columns on `epi_archive`s (#565).
33+
- Fixed `epi_archive` compactification raising an error on certain value column
34+
classes such as `"distribution"` (#541); it's now easier to form an archive of
35+
forecasts in that format.
1536

1637
## Cleanup
1738

R/archive.R

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -376,22 +376,19 @@ removed_by_compactify <- function(df, keys, tolerance) {
376376
#' [`dplyr::near`], otherwise it uses equality. `NA`'s and `NaN`'s are
377377
#' considered equal to themselves and each other.
378378
#' @importFrom dplyr lag if_else near
379+
#' @importFrom vctrs vec_detect_missing vec_equal
379380
#' @keywords internal
380381
is_locf <- function(vec, tolerance) { # nolint: object_usage_linter
381-
lag_vec <- dplyr::lag(vec)
382-
if (typeof(vec) == "double") {
382+
lag_vec <- lag(vec, 1L)
383+
if (inherits(vec, "numeric")) { # (no matrix/array/general support)
383384
res <- if_else(
384385
!is.na(vec) & !is.na(lag_vec),
385386
near(vec, lag_vec, tol = tolerance),
386387
is.na(vec) & is.na(lag_vec)
387388
)
388389
return(res)
389390
} else {
390-
res <- if_else(
391-
!is.na(vec) & !is.na(lag_vec),
392-
vec == lag_vec,
393-
is.na(vec) & is.na(lag_vec)
394-
)
391+
res <- vec_equal(vec, lag_vec, na_equal = TRUE)
395392
return(res)
396393
}
397394
}

R/epiprocess-package.R

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
#' @importFrom checkmate check_names
1515
#' @importFrom checkmate test_subset test_set_equal vname
1616
#' @importFrom cli cli_abort cli_warn
17+
#' @importFrom cli pluralize
18+
#' @importFrom cli qty
1719
#' @importFrom data.table as.data.table
1820
#' @importFrom data.table key
1921
#' @importFrom data.table setkeyv
@@ -23,6 +25,7 @@
2325
#' @importFrom lifecycle deprecated
2426
#' @importFrom rlang %||%
2527
#' @importFrom rlang is_bare_integerish
28+
#' @importFrom tools toTitleCase
2629
#' @importFrom vctrs vec_data
2730
#' @importFrom vctrs vec_equal
2831
## usethis namespace: end
@@ -32,6 +35,6 @@ utils::globalVariables(c(
3235
".x", ".group_key", ".ref_time_value", "resid",
3336
"fitted", ".response", "geo_value", "time_value",
3437
"value", ".real", "lag", "max_value", "min_value",
35-
"median_value", "spread", "rel_spread", "time_to",
36-
"time_near_latest", "n_revisions", "min_lag", "max_lag"
38+
"median_value", "spread", "rel_spread", "lag_to",
39+
"lag_near_latest", "n_revisions", "min_lag", "max_lag"
3740
))

R/key_colnames.R

Lines changed: 108 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,47 +1,133 @@
1-
#' Grab any keys associated to an epi_df
1+
#' Get names of columns that form a (unique) key associated with an object
22
#'
3-
#' @param x a data.frame, tibble, or epi_df
3+
#' This is entirely based on metadata and arguments passed; there are no
4+
#' explicit checks that the key actually is unique in any associated data
5+
#' structures.
6+
#'
7+
#' @param x an object, often a data frame or something similar. `{epiprocess}`
8+
#' includes implementations for [`epi_df`]s, [`epi_archive`]s,
9+
#' [`tsibble::tsibble`]s, and other data frames (including
10+
#' [`tibble::tibble`]s); other packages, like `{epipredict}`, can add more.
411
#' @param ... additional arguments passed on to methods
5-
#' @param other_keys an optional character vector of other keys to include
6-
#' @param exclude an optional character vector of keys to exclude
7-
#' @return If an `epi_df`, this returns all "keys". Otherwise `NULL`.
12+
#' @param geo_keys,other_keys,time_keys character vectors, sometimes optional;
13+
#' which variables (if any) should be considered as part of a unique
14+
#' key/identifier for data in `x`, dealing respectively with the associated
15+
#' geographical region, demographic/strain/other information needed in
16+
#' addition to the geographical region to identify individual time series in
17+
#' `x`, and time interval during which associated events occurred.
18+
#'
19+
#' Mandatory if `x` is a regular `data.frame` or `tibble`. Optional if `x` is
20+
#' an `epi_df`; the defaults are `"geo_value"`, the `epi_df`'s `other_keys`
21+
#' metadata, and `"time_value"`, respectively; if you provide these manually,
22+
#' they must match the defaults. (This behavior is to enable consistent and
23+
#' sane results when you can't guarantee whether `x` is an `epi_df` or just a
24+
#' `tibble`/`data.frame`. You don't need to use it if you know that `x` is
25+
#' definitely an `epi_df`.) Not accepted when `x` is a `tsibble` or an
26+
#' `epi_archive`.
27+
#' @param exclude an optional character vector of key column names to exclude
28+
#' from the result
29+
#' @return character vector
830
#' @keywords internal
931
#' @export
10-
key_colnames <- function(x, ...) {
11-
UseMethod("key_colnames")
12-
}
13-
14-
#' @rdname key_colnames
15-
#' @method key_colnames default
16-
#' @export
17-
key_colnames.default <- function(x, ...) {
18-
character(0L)
32+
key_colnames <- function(x, ..., exclude = character()) {
33+
provided_args <- rlang::call_args_names(rlang::call_match())
34+
if ("extra_keys" %in% provided_args) {
35+
lifecycle::deprecate_soft("0.9.6", "key_colnames(extra_keys=)", "key_colnames(other_keys=)")
36+
redispatch <- function(..., extra_keys) {
37+
key_colnames(..., other_keys = extra_keys)
38+
}
39+
redispatch(x, ..., exclude = exclude)
40+
} else {
41+
UseMethod("key_colnames")
42+
}
1943
}
2044

2145
#' @rdname key_colnames
46+
#' @importFrom rlang check_dots_empty0
2247
#' @method key_colnames data.frame
2348
#' @export
24-
key_colnames.data.frame <- function(x, other_keys = character(0L), exclude = character(0L), ...) {
49+
key_colnames.data.frame <- function(x, ...,
50+
geo_keys,
51+
other_keys,
52+
time_keys,
53+
exclude = character()) {
54+
check_dots_empty0(...)
55+
assert_character(geo_keys)
56+
assert_character(time_keys)
2557
assert_character(other_keys)
2658
assert_character(exclude)
27-
nm <- setdiff(c("geo_value", other_keys, "time_value"), exclude)
28-
intersect(nm, colnames(x))
59+
keys <- c(geo_keys, other_keys, time_keys)
60+
if (!all(keys %in% names(x))) {
61+
cli_abort(c(
62+
"Some of the specified key columns aren't present in `x`",
63+
"i" = "Specified keys: {format_varnames(keys)}",
64+
"i" = "Columns of x: {format_varnames(names(x))}",
65+
"x" = "Missing keys: {format_varnames(setdiff(keys, names(x)))}"
66+
), class = "epiprocess__key_colnames__keys_not_in_colnames")
67+
}
68+
setdiff(keys, exclude)
2969
}
3070

3171
#' @rdname key_colnames
3272
#' @method key_colnames epi_df
3373
#' @export
34-
key_colnames.epi_df <- function(x, exclude = character(0L), ...) {
74+
key_colnames.epi_df <- function(x, ...,
75+
geo_keys = "geo_value",
76+
other_keys = attr(x, "metadata")$other_keys,
77+
time_keys = "time_value",
78+
exclude = character()) {
79+
check_dots_empty0(...)
80+
if (!identical(geo_keys, "geo_value")) {
81+
cli_abort('If `x` is an `epi_df`, then `geo_keys` must be `"geo_value"`',
82+
class = "epiprocess__key_colnames__mismatched_geo_keys"
83+
)
84+
}
85+
if (!identical(time_keys, "time_value")) {
86+
cli_abort('If `x` is an `epi_df`, then `time_keys` must be `"time_value"`',
87+
class = "epiprocess__key_colnames__mismatched_time_keys"
88+
)
89+
}
90+
expected_other_keys <- attr(x, "metadata")$other_keys
91+
if (!identical(other_keys, expected_other_keys)) {
92+
cli_abort(c(
93+
"The provided `other_keys` argument didn't match the `other_keys` of `x`",
94+
"*" = "`other_keys` was {format_chr_with_quotes(other_keys)}",
95+
"*" = "`expected_other_keys` was {format_chr_with_quotes(expected_other_keys)}",
96+
"i" = "If you know that `x` will always be an `epi_df` and
97+
resolve this discrepancy by adjusting the metadata of `x`, you
98+
shouldn't have to pass `other_keys =` here anymore,
99+
unless you want to continue to perform this check."
100+
), class = "epiprocess__key_colnames__mismatched_other_keys")
101+
}
35102
assert_character(exclude)
36-
other_keys <- attr(x, "metadata")$other_keys
37103
setdiff(c("geo_value", other_keys, "time_value"), exclude)
38104
}
39105

106+
#' @rdname key_colnames
107+
#' @method key_colnames tbl_ts
108+
#' @export
109+
key_colnames.tbl_ts <- function(x, ..., exclude = character()) {
110+
check_dots_empty0(...)
111+
assert_character(exclude)
112+
idx <- tsibble::index_var(x)
113+
idx2 <- tsibble::index2_var(x)
114+
if (!identical(idx, idx2)) {
115+
cli_abort(c(
116+
"`x` is in the middle of a re-indexing operation with `index_by()`; it's unclear
117+
whether we should output the old unique key or the new unique key-to-be",
118+
"i" = "Old index: {format_varname(idx)}",
119+
"i" = "Pending new index: {format_varname(idx2)}",
120+
"Please complete (e.g., with `summarise()`) or remove the re-indexing operation."
121+
), class = "epiprocess__key_colnames__incomplete_reindexing_operation")
122+
}
123+
setdiff(c(tsibble::key_vars(x), idx), exclude)
124+
}
125+
40126
#' @rdname key_colnames
41127
#' @method key_colnames epi_archive
42128
#' @export
43-
key_colnames.epi_archive <- function(x, exclude = character(0L), ...) {
129+
key_colnames.epi_archive <- function(x, ..., exclude = character()) {
130+
check_dots_empty0(...)
44131
assert_character(exclude)
45-
other_keys <- attr(x, "metadata")$other_keys
46-
setdiff(c("geo_value", other_keys, "time_value"), exclude)
132+
setdiff(c("geo_value", x$other_keys, "time_value", "version"), exclude)
47133
}

0 commit comments

Comments
 (0)