-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Expose remaining parquet config options into ConfigOptions (try 2) #4427
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
ed1030b
497bbc2
42f1598
185a0e3
53ff1be
2ce9f09
305e4d3
92d117d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -61,6 +61,16 @@ pub const OPT_PARQUET_REORDER_FILTERS: &str = | |
pub const OPT_PARQUET_ENABLE_PAGE_INDEX: &str = | ||
"datafusion.execution.parquet.enable_page_index"; | ||
|
||
/// Configuration option "datafusion.execution.parquet.pruning" | ||
pub const OPT_PARQUET_ENABLE_PRUNING: &str = "datafusion.execution.parquet.pruning"; | ||
|
||
/// Configuration option "datafusion.execution.parquet.skip_metadata" | ||
pub const OPT_PARQUET_SKIP_METADATA: &str = "datafusion.execution.parquet.skip_metadata"; | ||
|
||
/// Configuration option "datafusion.execution.parquet.metadata_size_hint" | ||
pub const OPT_PARQUET_METADATA_SIZE_HINT: &str = | ||
"datafusion.execution.parquet.metadata_size_hint"; | ||
|
||
/// Configuration option "datafusion.optimizer.skip_failed_rules" | ||
pub const OPT_OPTIMIZER_SKIP_FAILED_RULES: &str = | ||
"datafusion.optimizer.skip_failed_rules"; | ||
|
@@ -255,6 +265,29 @@ impl BuiltInConfigs { | |
to reduce the number of rows decoded.", | ||
false, | ||
), | ||
ConfigDefinition::new_bool( | ||
OPT_PARQUET_ENABLE_PRUNING, | ||
"If true, the parquet reader attempts to skip entire row groups based \ | ||
on the predicate in the query and the metadata (min/max values) stored in \ | ||
the parquet file.", | ||
true, | ||
), | ||
ConfigDefinition::new_bool( | ||
OPT_PARQUET_SKIP_METADATA, | ||
"If true, the parquet reader skip the optional embedded metadata that may be in \ | ||
the file Schema. This setting can help avoid schema conflicts when querying \ | ||
multiple parquet files with schemas containing compatible types but different metadata.", | ||
true, | ||
), | ||
ConfigDefinition::new( | ||
OPT_PARQUET_METADATA_SIZE_HINT, | ||
"If specified, the parquet reader will try and fetch the last `size_hint` \ | ||
bytes of the parquet file optimistically. If not specified, two read are required: \ | ||
One read to fetch the 8-byte parquet footer and \ | ||
another to fetch the metadata length encoded in the footer.", | ||
DataType::UInt64, | ||
ScalarValue::UInt64(None), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As mentioned by @thinkharderdev on #3885 (comment) we should probably change this default to something reasonable (like 64K) but I would rather do that in a follow on PR There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Filed #4459 to track |
||
), | ||
ConfigDefinition::new_bool( | ||
OPT_OPTIMIZER_SKIP_FAILED_RULES, | ||
"When set to true, the logical plan optimizer will produce warning \ | ||
|
@@ -424,6 +457,12 @@ impl ConfigOptions { | |
get_conf_value!(self, UInt64, key, "u64") | ||
} | ||
|
||
/// get a u64 configuration option as a usize | ||
pub fn get_usize(&self, key: &str) -> Option<usize> { | ||
let v = get_conf_value!(self, UInt64, key, "usize"); | ||
v.and_then(|v| v.try_into().ok()) | ||
} | ||
|
||
/// get a string configuration option | ||
pub fn get_string(&self, key: &str) -> Option<String> { | ||
get_conf_value!(self, Utf8, key, "string") | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As the parquet format now reads defaults from
ConfigOptions
they need to be passed to the constructor