-
Notifications
You must be signed in to change notification settings - Fork 1k
parquet-rewrite: add write_batch_size and compression_level config #8642
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -47,48 +47,6 @@ use parquet::{ | |
| }, | ||
| }; | ||
|
|
||
| #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum, Debug)] | ||
| enum CompressionArgs { | ||
| /// No compression. | ||
| None, | ||
|
|
||
| /// Snappy | ||
| Snappy, | ||
|
|
||
| /// GZip | ||
| Gzip, | ||
|
|
||
| /// LZO | ||
| Lzo, | ||
|
|
||
| /// Brotli | ||
| Brotli, | ||
|
|
||
| /// LZ4 | ||
| Lz4, | ||
|
|
||
| /// Zstd | ||
| Zstd, | ||
|
|
||
| /// LZ4 Raw | ||
| Lz4Raw, | ||
| } | ||
|
|
||
| impl From<CompressionArgs> for Compression { | ||
| fn from(value: CompressionArgs) -> Self { | ||
| match value { | ||
| CompressionArgs::None => Self::UNCOMPRESSED, | ||
| CompressionArgs::Snappy => Self::SNAPPY, | ||
| CompressionArgs::Gzip => Self::GZIP(Default::default()), | ||
| CompressionArgs::Lzo => Self::LZO, | ||
| CompressionArgs::Brotli => Self::BROTLI(Default::default()), | ||
| CompressionArgs::Lz4 => Self::LZ4, | ||
| CompressionArgs::Zstd => Self::ZSTD(Default::default()), | ||
| CompressionArgs::Lz4Raw => Self::LZ4_RAW, | ||
| } | ||
| } | ||
| } | ||
|
|
||
| #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum, Debug)] | ||
| enum EncodingArgs { | ||
| /// Default byte encoding. | ||
|
|
@@ -216,8 +174,8 @@ struct Args { | |
| output: String, | ||
|
|
||
| /// Compression used for all columns. | ||
| #[clap(long, value_enum)] | ||
| compression: Option<CompressionArgs>, | ||
| #[clap(long)] | ||
| compression: Option<Compression>, | ||
|
||
|
|
||
| /// Encoding used for all columns, if dictionary is not enabled. | ||
| #[clap(long, value_enum)] | ||
|
|
@@ -286,6 +244,10 @@ struct Args { | |
| #[clap(long)] | ||
| writer_version: Option<WriterVersionArgs>, | ||
|
|
||
| /// Sets write batch size. | ||
| #[clap(long)] | ||
| write_batch_size: Option<usize>, | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Without batch-size, a small |
||
|
|
||
| /// Sets whether to coerce Arrow types to match Parquet specification | ||
| #[clap(long)] | ||
| coerce_types: Option<bool>, | ||
|
|
@@ -314,7 +276,7 @@ fn main() { | |
|
|
||
| let mut writer_properties_builder = WriterProperties::builder().set_key_value_metadata(kv_md); | ||
| if let Some(value) = args.compression { | ||
| writer_properties_builder = writer_properties_builder.set_compression(value.into()); | ||
| writer_properties_builder = writer_properties_builder.set_compression(value); | ||
| } | ||
|
|
||
| // setup encoding | ||
|
|
@@ -382,6 +344,9 @@ fn main() { | |
| if let Some(value) = args.coerce_types { | ||
| writer_properties_builder = writer_properties_builder.set_coerce_types(value); | ||
| } | ||
| if let Some(value) = args.write_batch_size { | ||
| writer_properties_builder = writer_properties_builder.set_write_batch_size(value); | ||
| } | ||
| let writer_properties = writer_properties_builder.build(); | ||
| let mut parquet_writer = ArrowWriter::try_new( | ||
| File::create(&args.output).expect("Unable to open output file"), | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
One point of this structure is to provide documentation. With your change help now looks like:
where before the compression help was:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't know, clap might already handle this well?