diff --git a/dwio/nimble/velox/selective/VariableLengthColumnReader.cpp b/dwio/nimble/velox/selective/VariableLengthColumnReader.cpp index 052fbba9..26971b67 100644 --- a/dwio/nimble/velox/selective/VariableLengthColumnReader.cpp +++ b/dwio/nimble/velox/selective/VariableLengthColumnReader.cpp @@ -53,6 +53,51 @@ bool estimateMaterializedSizeImpl( return true; } +void tryReuseArrayVectorBase( + velox::ArrayVectorBase& vector, + velox::vector_size_t size, + velox::BufferPtr& offsets, + velox::BufferPtr& sizes) { + if (vector.offsets() && vector.offsets()->isMutable()) { + offsets = vector.mutableOffsets(size); + } + if (vector.sizes() && vector.sizes()->isMutable()) { + sizes = vector.mutableSizes(size); + } +} + +velox::VectorPtr tryReuseWritableVector(const velox::VectorPtr& vector) { + if (vector && vector->encoding() != velox::VectorEncoding::Simple::CONSTANT && + vector->encoding() != velox::VectorEncoding::Simple::DICTIONARY && + vector.use_count() == 1) { + vector->resize(0); + return vector; + } + return nullptr; +} + +void tryReuseArrayVector( + velox::ArrayVector& vector, + velox::vector_size_t size, + velox::BufferPtr& offsets, + velox::BufferPtr& sizes, + velox::VectorPtr& elements) { + tryReuseArrayVectorBase(vector, size, offsets, sizes); + elements = tryReuseWritableVector(vector.elements()); +} + +void tryReuseMapVector( + velox::MapVector& vector, + velox::vector_size_t size, + velox::BufferPtr& offsets, + velox::BufferPtr& sizes, + velox::VectorPtr& keys, + velox::VectorPtr& values) { + tryReuseArrayVectorBase(vector, size, offsets, sizes); + keys = tryReuseWritableVector(vector.mapKeys()); + values = tryReuseWritableVector(vector.mapValues()); +} + velox::DictionaryVector* prepareDictionaryArrayResult( velox::VectorPtr& result, const velox::TypePtr& type, @@ -78,29 +123,11 @@ velox::DictionaryVector* prepareDictionaryArrayResult( alphabet = dictionaryVector->valueVector(); auto arrayVector = alphabet->as(); if (arrayVector) { - if (arrayVector->sizes() && arrayVector->sizes()->isMutable()) { - sizes = arrayVector->mutableSizes(size); - } - if (arrayVector->offsets() && arrayVector->offsets()->isMutable()) { - offsets = arrayVector->mutableOffsets(size); - } - if (arrayVector->elements() && arrayVector->elements().unique()) { - elements = arrayVector->elements(); - elements->resize(0); - } + tryReuseArrayVector(*arrayVector, size, offsets, sizes, elements); } } } else if (auto arrayVector = result->as()) { - if (arrayVector->sizes() && arrayVector->sizes()->isMutable()) { - sizes = arrayVector->mutableSizes(size); - } - if (arrayVector->offsets() && arrayVector->offsets()->isMutable()) { - offsets = arrayVector->mutableOffsets(size); - } - if (arrayVector->elements() && arrayVector->elements().unique()) { - elements = arrayVector->elements(); - elements->resize(0); - } + tryReuseArrayVector(*arrayVector, size, offsets, sizes, elements); } } @@ -171,37 +198,11 @@ velox::DictionaryVector* prepareDictionaryMapResult( alphabet = dictionaryVector->valueVector(); auto mapVector = alphabet->as(); if (mapVector) { - if (mapVector->sizes() && mapVector->sizes()->isMutable()) { - sizes = mapVector->mutableSizes(size); - } - if (mapVector->offsets() && mapVector->offsets()->isMutable()) { - offsets = mapVector->mutableOffsets(size); - } - if (mapVector->mapKeys() && mapVector->mapKeys().unique()) { - keys = mapVector->mapKeys(); - keys->resize(0); - } - if (mapVector->mapValues() && mapVector->mapValues().unique()) { - elements = mapVector->mapValues(); - elements->resize(0); - } + tryReuseMapVector(*mapVector, size, offsets, sizes, keys, elements); } } } else if (auto mapVector = result->as()) { - if (mapVector->sizes() && mapVector->sizes()->isMutable()) { - sizes = mapVector->mutableSizes(size); - } - if (mapVector->offsets() && mapVector->offsets()->isMutable()) { - offsets = mapVector->mutableOffsets(size); - } - if (mapVector->mapKeys() && mapVector->mapKeys().unique()) { - keys = mapVector->mapKeys(); - keys->resize(0); - } - if (mapVector->mapValues() && mapVector->mapValues().unique()) { - elements = mapVector->mapValues(); - elements->resize(0); - } + tryReuseMapVector(*mapVector, size, offsets, sizes, keys, elements); } } diff --git a/dwio/nimble/velox/selective/tests/SelectiveNimbleReaderTest.cpp b/dwio/nimble/velox/selective/tests/SelectiveNimbleReaderTest.cpp index 08371f9e..1409e86a 100644 --- a/dwio/nimble/velox/selective/tests/SelectiveNimbleReaderTest.cpp +++ b/dwio/nimble/velox/selective/tests/SelectiveNimbleReaderTest.cpp @@ -22,16 +22,15 @@ #include namespace facebook::nimble { +namespace { + +using namespace facebook::velox; enum FilterType { kNone, kKeep, kDrop }; auto format_as(FilterType filterType) { return fmt::underlying(filterType); } -namespace { - -using namespace facebook::velox; - // This test suite covers the basic and mostly single batch test cases, as well // as some corner cases that are hard to cover in randomized tests. We rely on // E2EFilterTest for more comprehensive tests with multi stripes and multi @@ -1006,6 +1005,37 @@ TEST_F(SelectiveNimbleReaderTest, arrayWithOffsetsLastRunFilteredOutAfterRead) { {{{1}}, {{1}}}, {false, true}, {1, 1}, std::nullopt, true); } +TEST_F(SelectiveNimbleReaderTest, arrayWithOffsetsReuseNullResult) { + auto vector = makeRowVector({ + std::make_shared( + pool(), + MAP(BIGINT(), ARRAY(BIGINT())), + nullptr, + 4, + makeIndices({0, 2, 4, 6}), + makeIndices({2, 2, 2, 2}), + makeFlatVector({1, 2, 1, 2, 1, 2, 1, 2}), + makeNullableArrayVector({ + {std::nullopt, std::nullopt}, + {std::optional(3)}, + {std::nullopt, std::nullopt}, + {std::optional(3)}, + {std::nullopt}, + {std::optional(3)}, + {std::nullopt}, + {std::optional(4)}, + })), + }); + VeloxWriterOptions writerOptions; + writerOptions.flatMapColumns = {"c0"}; + writerOptions.dictionaryArrayColumns = {"c0"}; + auto fileContent = test::createNimbleFile(*rootPool(), vector, writerOptions); + auto scanSpec = std::make_shared("root"); + scanSpec->addAllChildFields(*vector->type()); + auto readers = makeReaders(vector, fileContent, scanSpec); + validate(*vector, *readers.rowReader, 2, [](auto) { return true; }); +} + TEST_F(SelectiveNimbleReaderTest, slidingWindowMapSubfieldPruning) { common::BigintRange keyFilter(2, 2, false); checkSlidingWindowMap(