Skip to content

Commit bf44e45

Browse files
helfmanfacebook-github-bot
authored andcommitted
Back out "Free up memory in DictionaryEncoding::encode faster" (#175)
Summary: Pull Request resolved: #175 The diff above will create a big memory bandwidth regression. Nimble Buffers are not meant to be short lived. Each buffer allocates a very big chunk of memory inside. The proper fix (which was planned to be implemented as part of the compression fallback change) was to have two buffers to be used at the time of encodings. One for temp memory, like this (which will be dicarded after all encodings are done), and one for holding the encoding results. Reverting this change for now. Reviewed By: sdruzkin Differential Revision: D75824819 fbshipit-source-id: f40fd2311dc456191b3822188d1deb770dc29f2d
1 parent b08bc8f commit bf44e45

File tree

1 file changed

+27
-31
lines changed

1 file changed

+27
-31
lines changed

dwio/nimble/encodings/DictionaryEncoding.h

Lines changed: 27 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -201,40 +201,36 @@ std::string_view DictionaryEncoding<T>::encode(
201201
std::span<const physicalType> values,
202202
Buffer& buffer) {
203203
const uint32_t valueCount = values.size();
204-
Buffer tempBuffer{buffer.getMemoryPool()};
205-
std::string_view serializedAlphabet;
206-
std::string_view serializedIndices;
207-
208-
// scope alphabet + indicies population to free up memory faster
209-
{
210-
const uint32_t alphabetCount = selection.statistics().uniqueCounts().size();
211-
folly::F14FastMap<physicalType, uint32_t> alphabetMapping;
212-
alphabetMapping.reserve(alphabetCount);
213-
{
214-
Vector<physicalType> alphabet{&buffer.getMemoryPool()};
215-
alphabet.reserve(alphabetCount);
216-
uint32_t index = 0;
217-
for (const auto& pair : selection.statistics().uniqueCounts()) {
218-
alphabet.push_back(pair.first);
219-
alphabetMapping.emplace(pair.first, index++);
220-
}
221-
serializedAlphabet = selection.template encodeNested<physicalType>(
222-
EncodingIdentifiers::Dictionary::Alphabet, {alphabet}, tempBuffer);
223-
}
204+
const uint32_t alphabetCount = selection.statistics().uniqueCounts().size();
205+
206+
folly::F14FastMap<physicalType, uint32_t> alphabetMapping;
207+
alphabetMapping.reserve(alphabetCount);
208+
Vector<physicalType> alphabet{&buffer.getMemoryPool()};
209+
alphabet.reserve(alphabetCount);
210+
uint32_t index = 0;
211+
for (const auto& pair : selection.statistics().uniqueCounts()) {
212+
alphabet.push_back(pair.first);
213+
alphabetMapping.emplace(pair.first, index++);
214+
}
224215

225-
Vector<uint32_t> indices{&buffer.getMemoryPool()};
226-
indices.reserve(valueCount);
227-
for (const auto& value : values) {
228-
auto it = alphabetMapping.find(value);
229-
NIMBLE_DASSERT(
230-
it != alphabetMapping.end(),
231-
"Statistics corruption. Missing alphabet entry.");
232-
indices.push_back(it->second);
233-
}
234-
serializedIndices = selection.template encodeNested<uint32_t>(
235-
EncodingIdentifiers::Dictionary::Indices, {indices}, tempBuffer);
216+
Vector<uint32_t> indices{&buffer.getMemoryPool()};
217+
indices.reserve(valueCount);
218+
for (const auto& value : values) {
219+
auto it = alphabetMapping.find(value);
220+
NIMBLE_DASSERT(
221+
it != alphabetMapping.end(),
222+
"Statistics corruption. Missing alphabet entry.");
223+
indices.push_back(it->second);
236224
}
237225

226+
Buffer tempBuffer{buffer.getMemoryPool()};
227+
std::string_view serializedAlphabet =
228+
selection.template encodeNested<physicalType>(
229+
EncodingIdentifiers::Dictionary::Alphabet, {alphabet}, tempBuffer);
230+
std::string_view serializedIndices =
231+
selection.template encodeNested<uint32_t>(
232+
EncodingIdentifiers::Dictionary::Indices, {indices}, tempBuffer);
233+
238234
const uint32_t encodingSize = Encoding::kPrefixSize + 4 +
239235
serializedAlphabet.size() + serializedIndices.size();
240236
char* reserved = buffer.reserve(encodingSize);

0 commit comments

Comments
 (0)