Skip to content

Commit ad70c69

Browse files
committed
minor refactor to make col name subs accessible
1 parent e3feab6 commit ad70c69

File tree

11 files changed

+122
-107
lines changed

11 files changed

+122
-107
lines changed

NAMESPACE

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ export(epix_merge)
6161
export(epix_slide)
6262
export(epix_truncate_versions_after)
6363
export(filter)
64+
export(geo_column_names)
6465
export(group_by)
6566
export(group_modify)
6667
export(growth_rate)
@@ -75,9 +76,11 @@ export(next_after)
7576
export(relocate)
7677
export(rename)
7778
export(slice)
79+
export(time_column_names)
7880
export(ungroup)
7981
export(unnest)
8082
export(validate_epi_archive)
83+
export(version_column_names)
8184
importFrom(checkmate,anyInfinite)
8285
importFrom(checkmate,anyMissing)
8386
importFrom(checkmate,assert)

R/archive.R

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -458,9 +458,9 @@ as_epi_archive <- function(
458458
versions_end = NULL, ...) {
459459
assert_data_frame(x)
460460
x <- rename(x, ...)
461-
x <- guess_time_column_name(x)
462-
x <- guess_geo_column_name(x)
463-
x <- guess_version_column_name(x)
461+
x <- guess_column_name(x, "time_value", time_column_names())
462+
x <- guess_column_name(x, "geo_value", geo_column_names())
463+
x <- guess_column_name(x, "version", version_column_names())
464464
if (!test_subset(c("geo_value", "time_value", "version"), names(x))) {
465465
cli_abort(
466466
"Columns `geo_value`, `time_value`, and `version` must be present in `x`."

R/epi_df.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -268,8 +268,8 @@ as_epi_df.tbl_df <- function(x, geo_type, time_type, as_of,
268268
...) {
269269
# possible standard substitutions for time_value
270270
x <- rename(x, ...)
271-
x <- guess_time_column_name(x)
272-
x <- guess_geo_column_name(x)
271+
x <- guess_column_name(x, "time_value", time_column_names())
272+
x <- guess_column_name(x, "geo_value", geo_column_names())
273273
if (!test_subset(c("geo_value", "time_value"), names(x))) {
274274
cli_abort(
275275
"Either columns `geo_value` and `time_value` must be present in `x`, or related columns (see the internal

R/utils.R

Lines changed: 55 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -459,102 +459,73 @@ upcase_snake_case <- function(vec) {
459459
c(vec, upper_vec)
460460
}
461461

462-
#' rename potential time_value columns
463-
#' @keywords internal
464-
guess_time_column_name <- function(x, substitutions = NULL) {
465-
if (!("time_value" %in% names(x))) {
466-
if (is.null(substitutions)) {
467-
substitutions <- c(
468-
time_value = "date",
469-
time_value = "time",
470-
time_value = "datetime",
471-
time_value = "dateTime",
472-
tmie_value = "date_time",
473-
time_value = "target_date",
474-
time_value = "week",
475-
time_value = "epiweek",
476-
time_value = "month",
477-
time_value = "mon",
478-
time_value = "year",
479-
time_value = "yearmon",
480-
time_value = "yearmonth",
481-
time_value = "yearMon",
482-
time_value = "yearMonth",
483-
time_value = "dates",
484-
time_value = "time_values",
485-
time_value = "target_dates"
486-
)
487-
substitutions <- upcase_snake_case(substitutions)
488-
}
489-
strsplit(substitutions, "_") %>%
490-
map(function(name) paste0(toupper(substr(name, 1, 1)), substr(name, 2, nchar(name)), collapse = "_")) %>%
491-
unlist()
492-
x <- tryCatch(x %>% rename(any_of(substitutions)),
493-
error = function(cond) {
494-
cli_abort("{names(x)[names(x) %in% substitutions]} are both/all valid substitutions.
495-
Either `rename` some yourself or drop some.")
496-
}
497-
)
498-
if (any(substitutions != "")) {
499-
cli_inform("inferring `time_value` column.")
500-
}
501-
}
502-
return(x)
462+
#' potential time_value columns
463+
#' @description
464+
#' the full list of potential substitutions for the `time_value` column name:
465+
#' `r time_column_names()`
466+
#' @export
467+
time_column_names <- function() {
468+
substitutions <- c(
469+
"time_value", "date", "time", "datetime", "dateTime", "date_time", "target_date",
470+
"week", "epiweek", "month", "mon", "year", "yearmon", "yearmonth",
471+
"yearMon", "yearMonth", "dates", "time_values", "target_dates", "time_Value"
472+
)
473+
substitutions <- upcase_snake_case(substitutions)
474+
names(substitutions) <- rep("time_value", length(substitutions))
475+
return(substitutions)
476+
}
477+
#
478+
#' potential geo_value columns
479+
#' @description
480+
#' the full list of potential substitutions for the `geo_value` column name:
481+
#' `r geo_column_names()`
482+
#' @export
483+
geo_column_names <- function() {
484+
substitutions <- c(
485+
"geo_value", "geo_values", "geo_id", "geos", "location", "jurisdiction", "fips", "zip",
486+
"county", "hrr", "msa", "state", "province", "nation", "states",
487+
"provinces", "counties", "geo_Value"
488+
)
489+
substitutions <- upcase_snake_case(substitutions)
490+
names(substitutions) <- rep("geo_value", length(substitutions))
491+
return(substitutions)
503492
}
504493

494+
#' potential version columns
495+
#' @description
496+
#' the full list of potential substitutions for the `version` column name:
497+
#' `r version_column_names()`
498+
#' @export
499+
version_column_names <- function() {
500+
substitutions <- c(
501+
"version", "issue", "release"
502+
)
503+
substitutions <- upcase_snake_case(substitutions)
504+
names(substitutions) <- rep("version", length(substitutions))
505+
return(substitutions)
506+
}
505507

508+
#' rename potential time_value columns
509+
#'
510+
#' @description
511+
#' potentially renames
512+
#' @param x the tibble to potentially rename
513+
#' @param substitions a named vector. the potential substitions, with every name `time_value`
506514
#' @keywords internal
507-
guess_geo_column_name <- function(x, substitutions = NULL) {
508-
if (!("time_value" %in% names(x))) {
509-
substitutions <- substitutions %||% c(
510-
geo_value = "geo_values",
511-
geo_value = "geo_id",
512-
geo_value = "geos",
513-
geo_value = "location",
514-
geo_value = "jurisdiction",
515-
geo_value = "fips",
516-
geo_value = "zip",
517-
geo_value = "county",
518-
geo_value = "hrr",
519-
geo_value = "msa",
520-
geo_value = "state",
521-
geo_value = "province",
522-
geo_value = "nation",
523-
geo_value = "states",
524-
geo_value = "provinces",
525-
geo_value = "counties"
526-
)
527-
substitutions <- upcase_snake_case(substitutions)
515+
guess_column_name <- function(x, column_name, substitutions) {
516+
if (!(column_name %in% names(x))) {
528517
x <- tryCatch(x %>% rename(any_of(substitutions)),
529518
error = function(cond) {
530519
cli_abort("{names(x)[names(x) %in% substitutions]} are both/all valid substitutions.
531520
Either `rename` some yourself or drop some.")
532521
}
533522
)
534-
if (any(substitutions != "")) {
535-
cli_inform("inferring `geo_value` column.")
523+
# if none of the names are in substitutions, and `column_name` isn't a column, we're missing a relevant column
524+
if (!any(names(x) %in% substitutions)) {
525+
cli_abort("There is no {column_name} column or similar name. See e.g. [`time_column_name()`] for a complete list")
536526
}
537-
}
538-
return(x)
539-
}
540-
541-
guess_version_column_name <- function(x, substitutions = NULL) {
542-
if (!("version" %in% names(x))) {
543-
if (is.null(substitutions)) {
544-
substitutions <- c(
545-
version = "issue",
546-
version = "release"
547-
)
548-
substitutions <- upcase_snake_case(substitutions)
549-
}
550-
x <- tryCatch(x %>% rename(any_of(substitutions)),
551-
error = function(cond) {
552-
cli_abort("{names(x)[names(x) %in% substitutions]} are both/all valid substitutions.
553-
Either `rename` some yourself or drop some.")
554-
}
555-
)
556527
if (any(substitutions != "")) {
557-
cli_inform("inferring `version` column.")
528+
cli_inform("inferring {column_name} column.")
558529
}
559530
}
560531
return(x)

man/geo_column_names.Rd

Lines changed: 12 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/guess_column_name.Rd

Lines changed: 17 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/guess_time_column_name.Rd

Lines changed: 0 additions & 12 deletions
This file was deleted.

man/time_column_names.Rd

Lines changed: 12 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/version_column_names.Rd

Lines changed: 12 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-archive.R

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,13 @@ dt <- archive_cases_dv_subset$DT
88

99
test_that("data.frame must contain geo_value, time_value and version columns", {
1010
expect_error(as_epi_archive(select(dt, -geo_value), compactify = FALSE),
11-
regexp = "Columns `geo_value`, `time_value`, and `version` must be present in `x`."
11+
regexp = "There is no geo_value column or similar name"
1212
)
13-
expect_error(as_epi_archive(select(dt, -time_value), compactify = FALSE),
14-
regexp = "Columns `geo_value`, `time_value`, and `version` must be present in `x`."
13+
expect_error(expect_message(as_epi_archive(select(dt, -time_value), compactify = FALSE)),
14+
regexp = "There is no time_value column or similar name"
1515
)
1616
expect_error(as_epi_archive(select(dt, -version), compactify = FALSE),
17-
regexp = "Columns `geo_value`, `time_value`, and `version` must be present in `x`."
17+
regexp = "There is no version column or similar name"
1818
)
1919
})
2020

0 commit comments

Comments
 (0)