|
1 | | -#' Grab any keys associated to an epi_df |
| 1 | +#' Get names of columns that form a (unique) key associated with an object |
2 | 2 | #' |
3 | | -#' @param x a data.frame, tibble, or epi_df |
| 3 | +#' This is entirely based on metadata and arguments passed; there are no |
| 4 | +#' explicit checks that the key actually is unique in any associated data |
| 5 | +#' structures. |
| 6 | +#' |
| 7 | +#' @param x an object, often a data frame or something similar. `{epiprocess}` |
| 8 | +#' includes implementations for [`epi_df`]s, [`epi_archive`]s, |
| 9 | +#' [`tsibble::tsibble`]s, and other data frames (including |
| 10 | +#' [`tibble::tibble`]s); other packages, like `{epipredict}`, can add more. |
4 | 11 | #' @param ... additional arguments passed on to methods |
5 | | -#' @param other_keys an optional character vector of other keys to include |
6 | | -#' @param exclude an optional character vector of keys to exclude |
7 | | -#' @return If an `epi_df`, this returns all "keys". Otherwise `NULL`. |
| 12 | +#' @param geo_keys,other_keys,time_keys character vectors, sometimes optional; |
| 13 | +#' which variables (if any) should be considered as part of a unique |
| 14 | +#' key/identifier for data in `x`, dealing respectively with the associated |
| 15 | +#' geographical region, demographic/strain/other information needed in |
| 16 | +#' addition to the geographical region to identify individual time series in |
| 17 | +#' `x`, and time interval during which associated events occurred. |
| 18 | +#' |
| 19 | +#' Mandatory if `x` is a regular `data.frame` or `tibble`. Optional if `x` is |
| 20 | +#' an `epi_df`; the defaults are `"geo_value"`, the `epi_df`'s `other_keys` |
| 21 | +#' metadata, and `"time_value"`, respectively; if you provide these manually, |
| 22 | +#' they must match the defaults. (This behavior is to enable consistent and |
| 23 | +#' sane results when you can't guarantee whether `x` is an `epi_df` or just a |
| 24 | +#' `tibble`/`data.frame`. You don't need to use it if you know that `x` is |
| 25 | +#' definitely an `epi_df`.) Not accepted when `x` is a `tsibble` or an |
| 26 | +#' `epi_archive`. |
| 27 | +#' @param exclude an optional character vector of key column names to exclude |
| 28 | +#' from the result |
| 29 | +#' @return character vector |
8 | 30 | #' @keywords internal |
9 | 31 | #' @export |
10 | | -key_colnames <- function(x, ...) { |
11 | | - UseMethod("key_colnames") |
12 | | -} |
13 | | - |
14 | | -#' @rdname key_colnames |
15 | | -#' @method key_colnames default |
16 | | -#' @export |
17 | | -key_colnames.default <- function(x, ...) { |
18 | | - character(0L) |
| 32 | +key_colnames <- function(x, ..., exclude = character()) { |
| 33 | + provided_args <- rlang::call_args_names(rlang::call_match()) |
| 34 | + if ("extra_keys" %in% provided_args) { |
| 35 | + lifecycle::deprecate_soft("0.9.6", "key_colnames(extra_keys=)", "key_colnames(other_keys=)") |
| 36 | + redispatch <- function(..., extra_keys) { |
| 37 | + key_colnames(..., other_keys = extra_keys) |
| 38 | + } |
| 39 | + redispatch(x, ..., exclude = exclude) |
| 40 | + } else { |
| 41 | + UseMethod("key_colnames") |
| 42 | + } |
19 | 43 | } |
20 | 44 |
|
21 | 45 | #' @rdname key_colnames |
| 46 | +#' @importFrom rlang check_dots_empty0 |
22 | 47 | #' @method key_colnames data.frame |
23 | 48 | #' @export |
24 | | -key_colnames.data.frame <- function(x, other_keys = character(0L), exclude = character(0L), ...) { |
| 49 | +key_colnames.data.frame <- function(x, ..., |
| 50 | + geo_keys, |
| 51 | + other_keys, |
| 52 | + time_keys, |
| 53 | + exclude = character()) { |
| 54 | + check_dots_empty0(...) |
| 55 | + assert_character(geo_keys) |
| 56 | + assert_character(time_keys) |
25 | 57 | assert_character(other_keys) |
26 | 58 | assert_character(exclude) |
27 | | - nm <- setdiff(c("geo_value", other_keys, "time_value"), exclude) |
28 | | - intersect(nm, colnames(x)) |
| 59 | + keys <- c(geo_keys, other_keys, time_keys) |
| 60 | + if (!all(keys %in% names(x))) { |
| 61 | + cli_abort(c( |
| 62 | + "Some of the specified key columns aren't present in `x`", |
| 63 | + "i" = "Specified keys: {format_varnames(keys)}", |
| 64 | + "i" = "Columns of x: {format_varnames(names(x))}", |
| 65 | + "x" = "Missing keys: {format_varnames(setdiff(keys, names(x)))}" |
| 66 | + ), class = "epiprocess__key_colnames__keys_not_in_colnames") |
| 67 | + } |
| 68 | + setdiff(keys, exclude) |
29 | 69 | } |
30 | 70 |
|
31 | 71 | #' @rdname key_colnames |
32 | 72 | #' @method key_colnames epi_df |
33 | 73 | #' @export |
34 | | -key_colnames.epi_df <- function(x, exclude = character(0L), ...) { |
| 74 | +key_colnames.epi_df <- function(x, ..., |
| 75 | + geo_keys = "geo_value", |
| 76 | + other_keys = attr(x, "metadata")$other_keys, |
| 77 | + time_keys = "time_value", |
| 78 | + exclude = character()) { |
| 79 | + check_dots_empty0(...) |
| 80 | + if (!identical(geo_keys, "geo_value")) { |
| 81 | + cli_abort('If `x` is an `epi_df`, then `geo_keys` must be `"geo_value"`', |
| 82 | + class = "epiprocess__key_colnames__mismatched_geo_keys" |
| 83 | + ) |
| 84 | + } |
| 85 | + if (!identical(time_keys, "time_value")) { |
| 86 | + cli_abort('If `x` is an `epi_df`, then `time_keys` must be `"time_value"`', |
| 87 | + class = "epiprocess__key_colnames__mismatched_time_keys" |
| 88 | + ) |
| 89 | + } |
| 90 | + expected_other_keys <- attr(x, "metadata")$other_keys |
| 91 | + if (!identical(other_keys, expected_other_keys)) { |
| 92 | + cli_abort(c( |
| 93 | + "The provided `other_keys` argument didn't match the `other_keys` of `x`", |
| 94 | + "*" = "`other_keys` was {format_chr_with_quotes(other_keys)}", |
| 95 | + "*" = "`expected_other_keys` was {format_chr_with_quotes(expected_other_keys)}", |
| 96 | + "i" = "If you know that `x` will always be an `epi_df` and |
| 97 | + resolve this discrepancy by adjusting the metadata of `x`, you |
| 98 | + shouldn't have to pass `other_keys =` here anymore, |
| 99 | + unless you want to continue to perform this check." |
| 100 | + ), class = "epiprocess__key_colnames__mismatched_other_keys") |
| 101 | + } |
35 | 102 | assert_character(exclude) |
36 | | - other_keys <- attr(x, "metadata")$other_keys |
37 | 103 | setdiff(c("geo_value", other_keys, "time_value"), exclude) |
38 | 104 | } |
39 | 105 |
|
| 106 | +#' @rdname key_colnames |
| 107 | +#' @method key_colnames tbl_ts |
| 108 | +#' @export |
| 109 | +key_colnames.tbl_ts <- function(x, ..., exclude = character()) { |
| 110 | + check_dots_empty0(...) |
| 111 | + assert_character(exclude) |
| 112 | + idx <- tsibble::index_var(x) |
| 113 | + idx2 <- tsibble::index2_var(x) |
| 114 | + if (!identical(idx, idx2)) { |
| 115 | + cli_abort(c( |
| 116 | + "`x` is in the middle of a re-indexing operation with `index_by()`; it's unclear |
| 117 | + whether we should output the old unique key or the new unique key-to-be", |
| 118 | + "i" = "Old index: {format_varname(idx)}", |
| 119 | + "i" = "Pending new index: {format_varname(idx2)}", |
| 120 | + "Please complete (e.g., with `summarise()`) or remove the re-indexing operation." |
| 121 | + ), class = "epiprocess__key_colnames__incomplete_reindexing_operation") |
| 122 | + } |
| 123 | + setdiff(c(tsibble::key_vars(x), idx), exclude) |
| 124 | +} |
| 125 | + |
40 | 126 | #' @rdname key_colnames |
41 | 127 | #' @method key_colnames epi_archive |
42 | 128 | #' @export |
43 | | -key_colnames.epi_archive <- function(x, exclude = character(0L), ...) { |
| 129 | +key_colnames.epi_archive <- function(x, ..., exclude = character()) { |
| 130 | + check_dots_empty0(...) |
44 | 131 | assert_character(exclude) |
45 | | - other_keys <- attr(x, "metadata")$other_keys |
46 | | - setdiff(c("geo_value", other_keys, "time_value"), exclude) |
| 132 | + setdiff(c("geo_value", x$other_keys, "time_value", "version"), exclude) |
47 | 133 | } |
0 commit comments