Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 49 additions & 48 deletions dwio/nimble/velox/selective/VariableLengthColumnReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,51 @@ bool estimateMaterializedSizeImpl(
return true;
}

void tryReuseArrayVectorBase(
velox::ArrayVectorBase& vector,
velox::vector_size_t size,
velox::BufferPtr& offsets,
velox::BufferPtr& sizes) {
if (vector.offsets() && vector.offsets()->isMutable()) {
offsets = vector.mutableOffsets(size);
}
if (vector.sizes() && vector.sizes()->isMutable()) {
sizes = vector.mutableSizes(size);
}
}

velox::VectorPtr tryReuseWritableVector(const velox::VectorPtr& vector) {
if (vector && vector->encoding() != velox::VectorEncoding::Simple::CONSTANT &&
vector->encoding() != velox::VectorEncoding::Simple::DICTIONARY &&
vector.use_count() == 1) {
vector->resize(0);
return vector;
}
return nullptr;
}

void tryReuseArrayVector(
velox::ArrayVector& vector,
velox::vector_size_t size,
velox::BufferPtr& offsets,
velox::BufferPtr& sizes,
velox::VectorPtr& elements) {
tryReuseArrayVectorBase(vector, size, offsets, sizes);
elements = tryReuseWritableVector(vector.elements());
}

void tryReuseMapVector(
velox::MapVector& vector,
velox::vector_size_t size,
velox::BufferPtr& offsets,
velox::BufferPtr& sizes,
velox::VectorPtr& keys,
velox::VectorPtr& values) {
tryReuseArrayVectorBase(vector, size, offsets, sizes);
keys = tryReuseWritableVector(vector.mapKeys());
values = tryReuseWritableVector(vector.mapValues());
}

velox::DictionaryVector<velox::ComplexType>* prepareDictionaryArrayResult(
velox::VectorPtr& result,
const velox::TypePtr& type,
Expand All @@ -78,29 +123,11 @@ velox::DictionaryVector<velox::ComplexType>* prepareDictionaryArrayResult(
alphabet = dictionaryVector->valueVector();
auto arrayVector = alphabet->as<velox::ArrayVector>();
if (arrayVector) {
if (arrayVector->sizes() && arrayVector->sizes()->isMutable()) {
sizes = arrayVector->mutableSizes(size);
}
if (arrayVector->offsets() && arrayVector->offsets()->isMutable()) {
offsets = arrayVector->mutableOffsets(size);
}
if (arrayVector->elements() && arrayVector->elements().unique()) {
elements = arrayVector->elements();
elements->resize(0);
}
tryReuseArrayVector(*arrayVector, size, offsets, sizes, elements);
}
}
} else if (auto arrayVector = result->as<velox::ArrayVector>()) {
if (arrayVector->sizes() && arrayVector->sizes()->isMutable()) {
sizes = arrayVector->mutableSizes(size);
}
if (arrayVector->offsets() && arrayVector->offsets()->isMutable()) {
offsets = arrayVector->mutableOffsets(size);
}
if (arrayVector->elements() && arrayVector->elements().unique()) {
elements = arrayVector->elements();
elements->resize(0);
}
tryReuseArrayVector(*arrayVector, size, offsets, sizes, elements);
}
}

Expand Down Expand Up @@ -171,37 +198,11 @@ velox::DictionaryVector<velox::ComplexType>* prepareDictionaryMapResult(
alphabet = dictionaryVector->valueVector();
auto mapVector = alphabet->as<velox::MapVector>();
if (mapVector) {
if (mapVector->sizes() && mapVector->sizes()->isMutable()) {
sizes = mapVector->mutableSizes(size);
}
if (mapVector->offsets() && mapVector->offsets()->isMutable()) {
offsets = mapVector->mutableOffsets(size);
}
if (mapVector->mapKeys() && mapVector->mapKeys().unique()) {
keys = mapVector->mapKeys();
keys->resize(0);
}
if (mapVector->mapValues() && mapVector->mapValues().unique()) {
elements = mapVector->mapValues();
elements->resize(0);
}
tryReuseMapVector(*mapVector, size, offsets, sizes, keys, elements);
}
}
} else if (auto mapVector = result->as<velox::MapVector>()) {
if (mapVector->sizes() && mapVector->sizes()->isMutable()) {
sizes = mapVector->mutableSizes(size);
}
if (mapVector->offsets() && mapVector->offsets()->isMutable()) {
offsets = mapVector->mutableOffsets(size);
}
if (mapVector->mapKeys() && mapVector->mapKeys().unique()) {
keys = mapVector->mapKeys();
keys->resize(0);
}
if (mapVector->mapValues() && mapVector->mapValues().unique()) {
elements = mapVector->mapValues();
elements->resize(0);
}
tryReuseMapVector(*mapVector, size, offsets, sizes, keys, elements);
}
}

Expand Down
38 changes: 34 additions & 4 deletions dwio/nimble/velox/selective/tests/SelectiveNimbleReaderTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,15 @@
#include <gtest/gtest.h>

namespace facebook::nimble {
namespace {

using namespace facebook::velox;

enum FilterType { kNone, kKeep, kDrop };
auto format_as(FilterType filterType) {
return fmt::underlying(filterType);
}

namespace {

using namespace facebook::velox;

// This test suite covers the basic and mostly single batch test cases, as well
// as some corner cases that are hard to cover in randomized tests. We rely on
// E2EFilterTest for more comprehensive tests with multi stripes and multi
Expand Down Expand Up @@ -1006,6 +1005,37 @@ TEST_F(SelectiveNimbleReaderTest, arrayWithOffsetsLastRunFilteredOutAfterRead) {
{{{1}}, {{1}}}, {false, true}, {1, 1}, std::nullopt, true);
}

TEST_F(SelectiveNimbleReaderTest, arrayWithOffsetsReuseNullResult) {
auto vector = makeRowVector({
std::make_shared<MapVector>(
pool(),
MAP(BIGINT(), ARRAY(BIGINT())),
nullptr,
4,
makeIndices({0, 2, 4, 6}),
makeIndices({2, 2, 2, 2}),
makeFlatVector<int64_t>({1, 2, 1, 2, 1, 2, 1, 2}),
makeNullableArrayVector<int64_t>({
{std::nullopt, std::nullopt},
{std::optional(3)},
{std::nullopt, std::nullopt},
{std::optional(3)},
{std::nullopt},
{std::optional(3)},
{std::nullopt},
{std::optional(4)},
})),
});
VeloxWriterOptions writerOptions;
writerOptions.flatMapColumns = {"c0"};
writerOptions.dictionaryArrayColumns = {"c0"};
auto fileContent = test::createNimbleFile(*rootPool(), vector, writerOptions);
auto scanSpec = std::make_shared<common::ScanSpec>("root");
scanSpec->addAllChildFields(*vector->type());
auto readers = makeReaders(vector, fileContent, scanSpec);
validate(*vector, *readers.rowReader, 2, [](auto) { return true; });
}

TEST_F(SelectiveNimbleReaderTest, slidingWindowMapSubfieldPruning) {
common::BigintRange keyFilter(2, 2, false);
checkSlidingWindowMap(
Expand Down
Loading