Skip to content

Commit fb8eeb2

Browse files
Expose remaining parquet config options into ConfigOptions (try 2) (#4427)
* Expose remaining parquet config options into ConfigOptions (try 2) * fix: doctests * Update config doc * Update docs/source/user-guide/configs.md Co-authored-by: Dan Harris <[email protected]> * Update configuration docs Co-authored-by: Dan Harris <[email protected]>
1 parent 09aea09 commit fb8eeb2

File tree

17 files changed

+315
-152
lines changed

17 files changed

+315
-152
lines changed

benchmarks/src/bin/tpch.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -396,7 +396,8 @@ async fn get_table(
396396
}
397397
"parquet" => {
398398
let path = format!("{}/{}", path, table);
399-
let format = ParquetFormat::default().with_enable_pruning(true);
399+
let format = ParquetFormat::new(ctx.config_options())
400+
.with_enable_pruning(Some(true));
400401

401402
(Arc::new(format), path, DEFAULT_PARQUET_EXTENSION)
402403
}

datafusion-examples/examples/flight_server.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,9 @@ impl FlightService for FlightServiceImpl {
6767
) -> Result<Response<SchemaResult>, Status> {
6868
let request = request.into_inner();
6969

70-
let listing_options = ListingOptions::new(Arc::new(ParquetFormat::default()));
70+
let config = SessionConfig::new();
71+
let listing_options =
72+
ListingOptions::new(Arc::new(ParquetFormat::new(config.config_options())));
7173
let table_path =
7274
ListingTableUrl::parse(&request.path[0]).map_err(to_tonic_err)?;
7375

datafusion-examples/examples/parquet_sql_multiple_files.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@ async fn main() -> Result<()> {
3232
let testdata = datafusion::test_util::parquet_test_data();
3333

3434
// Configure listing options
35-
let file_format = ParquetFormat::default().with_enable_pruning(true);
35+
let file_format =
36+
ParquetFormat::new(ctx.config_options()).with_enable_pruning(Some(true));
3637
let listing_options = ListingOptions::new(Arc::new(file_format))
3738
.with_file_extension(FileType::PARQUET.get_ext());
3839

datafusion/core/src/config.rs

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,16 @@ pub const OPT_PARQUET_REORDER_FILTERS: &str =
6161
pub const OPT_PARQUET_ENABLE_PAGE_INDEX: &str =
6262
"datafusion.execution.parquet.enable_page_index";
6363

64+
/// Configuration option "datafusion.execution.parquet.pruning"
65+
pub const OPT_PARQUET_ENABLE_PRUNING: &str = "datafusion.execution.parquet.pruning";
66+
67+
/// Configuration option "datafusion.execution.parquet.skip_metadata"
68+
pub const OPT_PARQUET_SKIP_METADATA: &str = "datafusion.execution.parquet.skip_metadata";
69+
70+
/// Configuration option "datafusion.execution.parquet.metadata_size_hint"
71+
pub const OPT_PARQUET_METADATA_SIZE_HINT: &str =
72+
"datafusion.execution.parquet.metadata_size_hint";
73+
6474
/// Configuration option "datafusion.optimizer.skip_failed_rules"
6575
pub const OPT_OPTIMIZER_SKIP_FAILED_RULES: &str =
6676
"datafusion.optimizer.skip_failed_rules";
@@ -255,6 +265,29 @@ impl BuiltInConfigs {
255265
to reduce the number of rows decoded.",
256266
false,
257267
),
268+
ConfigDefinition::new_bool(
269+
OPT_PARQUET_ENABLE_PRUNING,
270+
"If true, the parquet reader attempts to skip entire row groups based \
271+
on the predicate in the query and the metadata (min/max values) stored in \
272+
the parquet file.",
273+
true,
274+
),
275+
ConfigDefinition::new_bool(
276+
OPT_PARQUET_SKIP_METADATA,
277+
"If true, the parquet reader skip the optional embedded metadata that may be in \
278+
the file Schema. This setting can help avoid schema conflicts when querying \
279+
multiple parquet files with schemas containing compatible types but different metadata.",
280+
true,
281+
),
282+
ConfigDefinition::new(
283+
OPT_PARQUET_METADATA_SIZE_HINT,
284+
"If specified, the parquet reader will try and fetch the last `size_hint` \
285+
bytes of the parquet file optimistically. If not specified, two read are required: \
286+
One read to fetch the 8-byte parquet footer and \
287+
another to fetch the metadata length encoded in the footer.",
288+
DataType::UInt64,
289+
ScalarValue::UInt64(None),
290+
),
258291
ConfigDefinition::new_bool(
259292
OPT_OPTIMIZER_SKIP_FAILED_RULES,
260293
"When set to true, the logical plan optimizer will produce warning \
@@ -424,6 +457,12 @@ impl ConfigOptions {
424457
get_conf_value!(self, UInt64, key, "u64")
425458
}
426459

460+
/// get a u64 configuration option as a usize
461+
pub fn get_usize(&self, key: &str) -> Option<usize> {
462+
let v = get_conf_value!(self, UInt64, key, "usize");
463+
v.and_then(|v| v.try_into().ok())
464+
}
465+
427466
/// get a string configuration option
428467
pub fn get_string(&self, key: &str) -> Option<String> {
429468
get_conf_value!(self, Utf8, key, "string")

0 commit comments

Comments
 (0)