Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cpp/src/arrow/util/converter.h
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,9 @@ struct MakeConverterImpl {
DICTIONARY_CASE(FloatType);
DICTIONARY_CASE(DoubleType);
DICTIONARY_CASE(BinaryType);
DICTIONARY_CASE(LargeBinaryType);
DICTIONARY_CASE(StringType);
DICTIONARY_CASE(LargeStringType);
DICTIONARY_CASE(FixedSizeBinaryType);
#undef DICTIONARY_CASE
default:
Expand Down
26 changes: 24 additions & 2 deletions r/src/array_to_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -595,7 +595,9 @@ class Converter_Dictionary : public Converter {
case Type::UINT16:
case Type::INT16:
case Type::INT32:
// TODO: also add int64, uint32, uint64 downcasts, if possible
case Type::UINT32:
case Type::INT64:
case Type::UINT64:
break;
default:
cpp11::stop("Cannot convert Dictionary Array of type `%s` to R",
Expand All @@ -612,6 +614,16 @@ class Converter_Dictionary : public Converter {
dictionary_ = CreateEmptyArray(dict_type.value_type());
}
}

// R factors store their codes in 32-bit integers, so dictionary arrays with
// more levels than that cannot be represented safely.
if (dictionary_->length() > std::numeric_limits<int>::max()) {
const auto& dict_type = checked_cast<const DictionaryType&>(*chunked_array->type());
cpp11::stop(
"Cannot convert Dictionary Array of type `%s` to R: dictionary has "
"more levels than an R factor can represent",
dict_type.ToString().c_str());
}
}

SEXP Allocate(R_xlen_t n) const {
Expand Down Expand Up @@ -653,6 +665,15 @@ class Converter_Dictionary : public Converter {
case Type::INT32:
return Ingest_some_nulls_Impl<arrow::Int32Type>(data, array, start, n,
chunk_index);
case Type::UINT32:
return Ingest_some_nulls_Impl<arrow::UInt32Type>(data, array, start, n,
chunk_index);
case Type::INT64:
return Ingest_some_nulls_Impl<arrow::Int64Type>(data, array, start, n,
chunk_index);
case Type::UINT64:
return Ingest_some_nulls_Impl<arrow::UInt64Type>(data, array, start, n,
chunk_index);
default:
break;
}
Expand Down Expand Up @@ -704,7 +725,8 @@ class Converter_Dictionary : public Converter {
// TODO (npr): this coercion should be optional, "dictionariesAsFactors" ;)
// Alternative: preserve the logical type of the dictionary values
// (e.g. if dict is timestamp, return a POSIXt R vector, not factor)
if (dictionary_->type_id() != Type::STRING) {
if (dictionary_->type_id() != Type::STRING &&
dictionary_->type_id() != Type::LARGE_STRING) {
cpp11::safe[Rf_warning]("Coercing dictionary values to R character factor levels");
}

Expand Down
16 changes: 8 additions & 8 deletions r/tests/testthat/test-Table.R
Original file line number Diff line number Diff line change
Expand Up @@ -357,18 +357,18 @@ test_that("Table handles null type (ARROW-7064)", {

test_that("Can create table with specific dictionary types", {
fact <- example_data[, "fct"]
int_types <- c(int8(), int16(), int32(), int64())
# TODO: test uint types when format allows
# uint_types <- c(uint8(), uint16(), uint32(), uint64()) # nolint
for (i in int_types) {
index_types <- c(int8(), int16(), int32(), int64(), uint8(), uint16(), uint32(), uint64())
for (i in index_types) {
sch <- schema(fct = dictionary(i, utf8()))
tab <- Table$create(fact, schema = sch)
expect_equal(sch, tab$schema)
if (i != int64()) {
# TODO: same downcast to int32 as we do for int64() type elsewhere
expect_equal_data_frame(tab, fact)
}
expect_equal_data_frame(tab, fact)
}

# large_utf8 values exercise the non-ALTREP conversion path
tab_large <- Table$create(fact, schema = schema(fct = dictionary(uint32(), large_utf8())))
expect_equal(tab_large$schema, schema(fct = dictionary(uint32(), large_utf8())))
expect_equal_data_frame(tab_large, fact)
})

test_that("Table unifies dictionary on conversion back to R (ARROW-8374)", {
Expand Down
Loading