2222#include " dwio/nimble/tablet/Constants.h"
2323#include " dwio/nimble/velox/ChunkedStream.h"
2424#include " dwio/nimble/velox/EncodingLayoutTree.h"
25+ #include " dwio/nimble/velox/FlushPolicy.h"
2526#include " dwio/nimble/velox/SchemaSerialization.h"
2627#include " dwio/nimble/velox/StatsGenerated.h"
2728#include " dwio/nimble/velox/VeloxReader.h"
@@ -289,7 +290,7 @@ std::vector<velox::RowVectorPtr> generateBatches(
289290 velox::VectorFuzzer fuzzer (
290291 {.vectorSize = size, .nullRatio = 0.1 }, &pool, seed);
291292 std::vector<velox::RowVectorPtr> batches;
292-
293+ batches. reserve (batchCount);
293294 for (size_t i = 0 ; i < batchCount; ++i) {
294295 batches.push_back (fuzzer.fuzzInputFlatRow (type));
295296 }
@@ -1951,6 +1952,108 @@ TEST_F(VeloxWriterTests, RawSizeWritten) {
19511952 ASSERT_EQ (expectedRawSize, rawSize);
19521953}
19531954
1955+ struct ChunkFlushPolicyTestCase {
1956+ const size_t batchCount{20 };
1957+ const bool enableChunking{true };
1958+ const uint64_t targetStripeSizeBytes{256 << 10 };
1959+ const uint64_t writerMaxMemoryBytes{80 << 10 };
1960+ const uint64_t writerMinMemoryBytes{75 << 10 };
1961+ const double compressionRatio{1.0 };
1962+ const uint32_t minStreamChunkRawSize{100 };
1963+ const uint32_t expectedStripeCount{0 };
1964+ const uint32_t expectedMaxChunkCount{0 };
1965+ const uint32_t expectedMinChunkCount{0 };
1966+ };
1967+
1968+ class ChunkFlushPolicyTest
1969+ : public VeloxWriterTests,
1970+ public ::testing::WithParamInterface<ChunkFlushPolicyTestCase> {};
1971+
1972+ TEST_P (ChunkFlushPolicyTest, ChunkFlushPolicyIntegration) {
1973+ auto type = velox::ROW (
1974+ {{" BIGINT" , velox::BIGINT ()}, {" SMALLINT" , velox::SMALLINT ()}});
1975+ nimble::VeloxWriterOptions writerOptions{
1976+ .minStreamChunkRawSize = GetParam ().minStreamChunkRawSize ,
1977+ .flushPolicyFactory = GetParam ().enableChunking
1978+ ? []() -> std::unique_ptr<nimble::FlushPolicy> {
1979+ return std::make_unique<nimble::ChunkFlushPolicy>(
1980+ nimble::ChunkFlushPolicyConfig{
1981+ .writerMaxMemoryBytes = GetParam ().writerMaxMemoryBytes ,
1982+ .writerMinMemoryBytes = GetParam ().writerMinMemoryBytes ,
1983+ .targetStripeSizeBytes = GetParam ().targetStripeSizeBytes ,
1984+ .compressionRatio =
1985+ GetParam ().compressionRatio ,
1986+ });
1987+ }
1988+ : []() -> std::unique_ptr<nimble::FlushPolicy> {
1989+ return std::make_unique<nimble::StripeRawSizeFlushPolicy>(
1990+ GetParam ().targetStripeSizeBytes );
1991+ },
1992+ .enableChunking = GetParam ().enableChunking ,
1993+ };
1994+
1995+ std::string file;
1996+ auto writeFile = std::make_unique<velox::InMemoryWriteFile>(&file);
1997+
1998+ nimble::VeloxWriter writer (
1999+ *rootPool_, type, std::move (writeFile), std::move (writerOptions));
2000+ auto batches = generateBatches (
2001+ type,
2002+ GetParam ().batchCount ,
2003+ /* size=*/ 4000 ,
2004+ /* seed=*/ 20221110 ,
2005+ *leafPool_);
2006+
2007+ for (const auto & batch : batches) {
2008+ writer.write (batch);
2009+ }
2010+ writer.close ();
2011+
2012+ velox::InMemoryReadFile readFile (file);
2013+ auto selector = std::make_shared<velox::dwio::common::ColumnSelector>(type);
2014+ nimble::VeloxReader reader (*leafPool_, &readFile, std::move (selector));
2015+
2016+ // Verify stripe count
2017+ auto expectedStripeCount = GetParam ().expectedStripeCount ;
2018+ auto actualStripeCount = reader.tabletReader ().stripeCount ();
2019+ EXPECT_EQ (expectedStripeCount, actualStripeCount);
2020+
2021+ // Verify chunk count
2022+ auto chunkCountPair = [&]() {
2023+ nimble::TabletReader tablet{*leafPool_, &readFile};
2024+ uint32_t maxChunkCount = 0 ;
2025+ uint32_t minChunkCount = std::numeric_limits<uint32_t >::max ();
2026+
2027+ for (uint32_t index = 0 ; index < actualStripeCount; ++index) {
2028+ auto stripeIdentifier = tablet.getStripeIdentifier (index);
2029+ auto streamCount = tablet.streamCount (stripeIdentifier);
2030+
2031+ std::vector<uint32_t > streamIds (streamCount);
2032+ std::iota (streamIds.begin (), streamIds.end (), 0 );
2033+ auto streamLoaders = tablet.load (stripeIdentifier, streamIds);
2034+
2035+ for (auto & streamLoader : streamLoaders) {
2036+ if (!streamLoader) {
2037+ continue ;
2038+ }
2039+ nimble::InMemoryChunkedStream chunked{
2040+ *leafPool_, std::move (streamLoader)};
2041+ uint32_t chunkCount = 0 ;
2042+ while (chunked.hasNext ()) {
2043+ chunked.nextChunk ();
2044+ chunkCount++;
2045+ }
2046+ maxChunkCount = std::max (maxChunkCount, chunkCount);
2047+ minChunkCount = std::min (minChunkCount, chunkCount);
2048+ }
2049+ }
2050+ return std::make_pair (maxChunkCount, minChunkCount);
2051+ };
2052+ auto [maxChunkCount, minChunkCount] = chunkCountPair ();
2053+ EXPECT_EQ (GetParam ().expectedMaxChunkCount , maxChunkCount);
2054+ EXPECT_EQ (GetParam ().expectedMinChunkCount , minChunkCount);
2055+ }
2056+
19542057INSTANTIATE_TEST_CASE_P (
19552058 StripeRawSizeFlushPolicyTestSuite,
19562059 StripeRawSizeFlushPolicyTest,
@@ -1975,4 +2078,91 @@ INSTANTIATE_TEST_CASE_P(
19752078 .batchCount = 100 ,
19762079 .rawStripeSize = 256 << 20 ,
19772080 .stripeCount = 1 }));
2081+
2082+ INSTANTIATE_TEST_CASE_P (
2083+ ChunkFlushPolicyTestSuite,
2084+ ChunkFlushPolicyTest,
2085+ ::testing::Values (
2086+ // Base case (no chunking, RawStripeSizeFlushPolicy)
2087+ ChunkFlushPolicyTestCase{
2088+ .batchCount = 20 ,
2089+ .enableChunking = false ,
2090+ .targetStripeSizeBytes = 250 << 10 , // 250KB
2091+ .writerMaxMemoryBytes = 80 << 10 ,
2092+ .writerMinMemoryBytes = 75 << 10 ,
2093+ .compressionRatio = 1.0 ,
2094+ .minStreamChunkRawSize = 100 ,
2095+ .expectedStripeCount = 4 ,
2096+ .expectedMaxChunkCount = 1 ,
2097+ .expectedMinChunkCount = 1 ,
2098+ },
2099+ // Base case with default settings (has chunking)
2100+ ChunkFlushPolicyTestCase{
2101+ .batchCount = 20 ,
2102+ .enableChunking = true ,
2103+ .targetStripeSizeBytes = 250 << 10 , // 250KB
2104+ .writerMaxMemoryBytes = 80 << 10 ,
2105+ .writerMinMemoryBytes = 75 << 10 ,
2106+ .compressionRatio = 1.0 ,
2107+ .minStreamChunkRawSize = 100 ,
2108+ .expectedStripeCount = 3 ,
2109+ .expectedMaxChunkCount = 7 ,
2110+ .expectedMinChunkCount = 3 ,
2111+ },
2112+ // High memory regression threshold
2113+ // Produces file identical to RawStripeSizeFlushPolicy
2114+ ChunkFlushPolicyTestCase{
2115+ .batchCount = 20 ,
2116+ .enableChunking = true ,
2117+ .targetStripeSizeBytes = 256 << 10 ,
2118+ .writerMaxMemoryBytes = 500 << 10 , // +420KB
2119+ .writerMinMemoryBytes = 495 << 10 , // +420KB
2120+ .compressionRatio = 1.0 ,
2121+ .minStreamChunkRawSize = 100 ,
2122+ .expectedStripeCount = 4 ,
2123+ .expectedMaxChunkCount = 1 ,
2124+ .expectedMinChunkCount = 1 ,
2125+ },
2126+ // Low memory regression threshold
2127+ // Produces file with more chunks per stripe
2128+ ChunkFlushPolicyTestCase{
2129+ .batchCount = 20 ,
2130+ .enableChunking = true ,
2131+ .targetStripeSizeBytes = 256 << 10 ,
2132+ .writerMaxMemoryBytes = 40 << 10 , // -40KB
2133+ .writerMinMemoryBytes = 35 << 10 , // -40KB
2134+ .compressionRatio = 1.0 ,
2135+ .minStreamChunkRawSize = 100 ,
2136+ .expectedStripeCount = 3 ,
2137+ .expectedMaxChunkCount = 8 ,
2138+ .expectedMinChunkCount = 4 ,
2139+ },
2140+ // High target stripe size bytes (with disabled memory pressure
2141+ // optimization) produces fewer stripes. Single chunks.
2142+ ChunkFlushPolicyTestCase{
2143+ .batchCount = 20 ,
2144+ .enableChunking = true ,
2145+ .targetStripeSizeBytes = 900 << 10 , // +900KB
2146+ .writerMaxMemoryBytes = 2 << 20 , // +2MB
2147+ .writerMinMemoryBytes = 1 << 20 , // +1MB
2148+ .compressionRatio = 1.0 ,
2149+ .minStreamChunkRawSize = 100 ,
2150+ .expectedStripeCount = 1 , // -2 stripes
2151+ .expectedMaxChunkCount = 1 ,
2152+ .expectedMinChunkCount = 1 ,
2153+ },
2154+ // Low target stripe size bytes (with disabled memory pressure
2155+ // optimization) produces more stripes. Single chunks.
2156+ ChunkFlushPolicyTestCase{
2157+ .batchCount = 20 ,
2158+ .enableChunking = true ,
2159+ .targetStripeSizeBytes = 90 << 10 , // -160KB
2160+ .writerMaxMemoryBytes = 2 << 20 , // +2MB
2161+ .writerMinMemoryBytes = 1 << 20 , // +1MB
2162+ .compressionRatio = 1.0 ,
2163+ .minStreamChunkRawSize = 100 ,
2164+ .expectedStripeCount = 7 , // +6 stripes
2165+ .expectedMaxChunkCount = 1 ,
2166+ .expectedMinChunkCount = 1 ,
2167+ }));
19782168} // namespace facebook
0 commit comments