Skip to content

Commit c261c9e

Browse files
committed
add top-memory-consumers option in cli
1 parent 3e30f77 commit c261c9e

File tree

2 files changed

+29
-3
lines changed

2 files changed

+29
-3
lines changed

datafusion-cli/src/main.rs

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,16 @@
1717

1818
use std::collections::HashMap;
1919
use std::env;
20+
use std::num::NonZeroUsize;
2021
use std::path::Path;
2122
use std::process::ExitCode;
2223
use std::sync::{Arc, LazyLock};
2324

2425
use datafusion::error::{DataFusionError, Result};
2526
use datafusion::execution::context::SessionConfig;
26-
use datafusion::execution::memory_pool::{FairSpillPool, GreedyMemoryPool, MemoryPool};
27+
use datafusion::execution::memory_pool::{
28+
FairSpillPool, GreedyMemoryPool, MemoryPool, TrackConsumersPool,
29+
};
2730
use datafusion::execution::runtime_env::RuntimeEnvBuilder;
2831
use datafusion::execution::DiskManager;
2932
use datafusion::prelude::SessionContext;
@@ -118,6 +121,13 @@ struct Args {
118121
)]
119122
mem_pool_type: PoolType,
120123

124+
#[clap(
125+
long,
126+
help = "The number of top memory consumers to display when query fails due to memory exhaustion. If you don't want to track MemoryConsumers, set this value to 0",
127+
default_value = "3"
128+
)]
129+
top_memory_consumers: usize,
130+
121131
#[clap(
122132
long,
123133
help = "The max number of rows to display for 'Table' format\n[possible values: numbers(0/10/...), inf(no limit)]",
@@ -169,9 +179,22 @@ async fn main_inner() -> Result<()> {
169179
if let Some(memory_limit) = args.memory_limit {
170180
// set memory pool type
171181
let pool: Arc<dyn MemoryPool> = match args.mem_pool_type {
172-
PoolType::Fair => Arc::new(FairSpillPool::new(memory_limit)),
173-
PoolType::Greedy => Arc::new(GreedyMemoryPool::new(memory_limit)),
182+
PoolType::Fair if args.top_memory_consumers == 0 => {
183+
Arc::new(FairSpillPool::new(memory_limit))
184+
}
185+
PoolType::Fair => Arc::new(TrackConsumersPool::new(
186+
FairSpillPool::new(memory_limit),
187+
NonZeroUsize::new(args.top_memory_consumers).unwrap(),
188+
)),
189+
PoolType::Greedy if args.top_memory_consumers == 0 => {
190+
Arc::new(GreedyMemoryPool::new(memory_limit))
191+
}
192+
PoolType::Greedy => Arc::new(TrackConsumersPool::new(
193+
GreedyMemoryPool::new(memory_limit),
194+
NonZeroUsize::new(args.top_memory_consumers).unwrap(),
195+
)),
174196
};
197+
175198
rt_builder = rt_builder.with_memory_pool(pool)
176199
}
177200

docs/source/user-guide/cli/usage.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,9 @@ OPTIONS:
5757
--mem-pool-type <MEM_POOL_TYPE>
5858
Specify the memory pool type 'greedy' or 'fair', default to 'greedy'
5959

60+
--top_memory_consumers <TOP_MEMORY_CONSUMERS>
61+
The number of top memory consumers to display when query fails due to memory exhaustion. If you don't want to track MemoryConsumers, set this value to 0
62+
6063
-d, --disk-limit <DISK_LIMIT>
6164
Available disk space for spilling queries (e.g. '10g'), default to None (uses DataFusion's default value of '100g')
6265

0 commit comments

Comments
 (0)