Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -79,4 +79,4 @@ Copyright: file COPYRIGHTS
Encoding: UTF-8
Language: en-US
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3.9000
RoxygenNote: 7.3.2
55 changes: 32 additions & 23 deletions R/col_types.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@
#' @param .delim The delimiter to use when parsing. If the `delim` argument
#' used in the call to `vroom()` it takes precedence over the one specified in
#' `col_types`.
#' @param na Character vector of strings to interpret as missing values for the
#' column. If `NULL`, the column will use the missing values specified in the
#' `na` argument in the call to `vroom()`, otherwise, the missing values
#' specified here will take precedence. Set this option to `character()` to
#' indicate no missing values for the column.
#' @export
#' @aliases col_types
#' @examples
Expand Down Expand Up @@ -207,7 +212,11 @@ format.col_spec <- function(x, n = Inf, condense = NULL, colour = crayon::has_co
vapply(seq_along(cols),
function(i) {
col_funs <- sub("^collector_", "col_", class(cols[[i]])[[1]])
args <- vapply(cols[[i]], deparse2, character(1), sep = "\n ")
args <- cols[[i]]
if (is.null(args[["na"]])) {
args[["na"]] <- NULL
}
args <- vapply(args, deparse2, character(1), sep = "\n ")
args <- paste(names(args), args, sep = " = ", collapse = ", ")

col_funs <- paste0(col_funs, "(", args, ")")
Expand Down Expand Up @@ -624,32 +633,32 @@ color_type <- function(type) {

#' @rdname cols
#' @export
col_logical <- function(...) {
collector("logical", ...)
col_logical <- function(na = NULL, ...) {
collector("logical", na = na, ...)
}

#' @rdname cols
#' @export
col_integer <- function(...) {
collector("integer", ...)
col_integer <- function(na = NULL, ...) {
collector("integer", na = na, ...)
}

#' @rdname cols
#' @export
col_big_integer <- function(...) {
collector("big_integer", ...)
col_big_integer <- function(na = NULL, ...) {
collector("big_integer", na = na, ...)
}

#' @rdname cols
#' @export
col_double <- function(...) {
collector("double", ...)
col_double <- function(na = NULL, ...) {
collector("double", na = na, ...)
}

#' @rdname cols
#' @export
col_character <- function(...) {
collector("character", ...)
col_character <- function(na = NULL, ...) {
collector("character", na = na, ...)
}

#' @rdname cols
Expand All @@ -660,38 +669,38 @@ col_skip <- function(...) {

#' @rdname cols
#' @export
col_number <- function(...) {
collector("number", ...)
col_number <- function(na = NULL, ...) {
collector("number", na = na, ...)
}

#' @rdname cols
#' @export
col_guess <- function(...) {
collector("guess", ...)
col_guess <- function(na = NULL, ...) {
collector("guess", na = na, ...)
}

#' @inheritParams readr::col_factor
#' @rdname cols
#' @export
col_factor <- function(levels = NULL, ordered = FALSE, include_na = FALSE, ...) {
collector("factor", levels = levels, ordered = ordered, include_na = include_na, ...)
col_factor <- function(levels = NULL, ordered = FALSE, include_na = FALSE, na = NULL, ...) {
collector("factor", levels = levels, ordered = ordered, include_na = include_na, na = na, ...)
}

#' @inheritParams readr::col_datetime
#' @rdname cols
#' @export
col_datetime <- function(format = "", ...) {
collector("datetime", format = format, ...)
col_datetime <- function(format = "", na = NULL, ...) {
collector("datetime", format = format, na = na, ...)
}

#' @rdname cols
#' @export
col_date <- function(format = "", ...) {
collector("date", format = format, ...)
col_date <- function(format = "", na = NULL, ...) {
collector("date", format = format, na = na, ...)
}

#' @rdname cols
#' @export
col_time <- function(format = "", ...) {
collector("time", format = format, ...)
col_time <- function(format = "", na = NULL, ...) {
collector("time", format = format, na = na, ...)
}
4 changes: 3 additions & 1 deletion R/generator.R
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,9 @@ gen_tbl <- function(rows, cols = NULL, col_types = NULL, locale = default_locale
specs$cols[[i]] <- do.call(paste0("col_", type), list())
}
fun_nme <- paste0("gen_", type)
res[[i]] <- do.call(fun_nme, c(rows, specs$cols[[i]]))
args <- specs$cols[[i]]
args[["na"]] <- NULL
res[[i]] <- do.call(fun_nme, c(rows, args))
}

if (missing > 0) {
Expand Down
28 changes: 17 additions & 11 deletions man/cols.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 5 additions & 3 deletions man/vroom_fwf.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 5 additions & 2 deletions src/collectors.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ class collector {
type_(derive_type(cpp11::strings(data_.attr("class"))[0])),
altrep_(altrep) {}
column_type type() const { return type_; }
SEXP na() const { return data_["na"]; }
SEXP name() const { return name_; }
SEXP operator[](const char* nme) { return data_[nme]; }
bool use_altrep() {
Expand Down Expand Up @@ -170,6 +171,8 @@ inline collectors resolve_collectors(
std::string my_col_type = cpp11::strings(my_collector.attr("class"))[0];

if (my_col_type == "collector_guess") {
auto my_col_na = my_collector["na"];
auto my_col_na_res = Rf_isNull(my_col_na) ? na : my_col_na;
cpp11::writable::strings col_vals(guess_num);
for (R_xlen_t j = 0; j < guess_num - 1; ++j) {
size_t row = j * guess_step;
Expand All @@ -187,10 +190,10 @@ inline collectors resolve_collectors(
locale_info->encoder_.makeSEXP(str.begin(), str.end(), true);
}

auto type = guess_type__(col_vals, na, locale_info.get(), false);
auto type = guess_type__(col_vals, my_col_na_res, locale_info.get(), false);
auto fun_name = std::string("col_") + type;
auto col_type = vroom[fun_name.c_str()];
my_collectors[col] = col_type();
my_collectors[col] = col_type("na"_nm = my_col_na);
}
}

Expand Down
5 changes: 4 additions & 1 deletion src/columns.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,11 +129,14 @@ inline cpp11::list create_columns(
continue;
}

auto col_na = collector.na();
auto col_na_res = Rf_isNull(col_na) ? na : cpp11::strings(col_na);

// This is deleted in the finalizers when the vectors are GC'd by R
auto info = new vroom_vec_info{
idx->get_column(col),
num_threads,
std::make_shared<cpp11::strings>(na),
std::make_shared<cpp11::strings>(col_na_res),
locale_info,
*errors,
std::string()};
Expand Down
33 changes: 33 additions & 0 deletions tests/testthat/test-col-na.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
test_that("collector-level na overrides global na", {
test_vroom(
"a,b,c\na,foo,REFUSED\nb,REFUSED,NA\nOMITTED,bar,OMITTED\n",
col_types = cols(
a = col_character(na = "OMITTED"),
b = col_character(na = "REFUSED"),
c = col_character()
),
na = "NA",
equals = tibble::tibble(
a = c("a", "b", NA),
b = c("foo", NA, "bar"),
c = c("REFUSED", NA, "OMITTED"),
)
)
})

test_that("collector-level na works with col_guess", {
test_vroom(
"a,b,c\n1,1.1,REFUSED\n2,REFUSED,NA\nOMITTED,2.2,OMITTED\n",
col_types = cols(
a = col_guess(na = "OMITTED"),
b = col_guess(na = "REFUSED"),
c = col_guess(),
),
na = "NA",
equals = tibble::tibble(
a = c(1, 2, NA),
b = c(1.1, NA, 2.2),
c = c("REFUSED", NA, "OMITTED"),
)
)
})