Skip to content

Commit fb080a2

Browse files
committed
remove unused max_statistics_size field
1 parent 3d6002a commit fb080a2

File tree

9 files changed

+36
-184
lines changed

9 files changed

+36
-184
lines changed

datafusion/common/src/config.rs

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -446,10 +446,6 @@ config_namespace! {
446446
/// default parquet writer setting
447447
pub statistics_enabled: Option<String>, default = Some("page".into())
448448

449-
/// (writing) Sets max statistics size for any column. If NULL, uses
450-
/// default parquet writer setting
451-
pub max_statistics_size: Option<usize>, default = Some(4096)
452-
453449
/// (writing) Target maximum number of rows in each row group (defaults to 1M
454450
/// rows). Writing larger row groups requires more memory to write, but
455451
/// can get better compression and be faster to read.
@@ -1621,10 +1617,6 @@ config_namespace_with_hashmap! {
16211617
/// Sets bloom filter number of distinct values. If NULL, uses
16221618
/// default parquet options
16231619
pub bloom_filter_ndv: Option<u64>, default = None
1624-
1625-
/// Sets max statistics size for the column path. If NULL, uses
1626-
/// default parquet options
1627-
pub max_statistics_size: Option<usize>, default = None
16281620
}
16291621
}
16301622

datafusion/common/src/file_options/parquet_writer.rs

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ use parquet::{
2626
basic::{BrotliLevel, GzipLevel, ZstdLevel},
2727
file::properties::{
2828
EnabledStatistics, WriterProperties, WriterPropertiesBuilder, WriterVersion,
29-
DEFAULT_MAX_STATISTICS_SIZE, DEFAULT_STATISTICS_ENABLED,
29+
DEFAULT_STATISTICS_ENABLED,
3030
},
3131
format::KeyValue,
3232
schema::types::ColumnPath,
@@ -129,11 +129,6 @@ impl TryFrom<&TableParquetOptions> for WriterPropertiesBuilder {
129129
builder =
130130
builder.set_column_bloom_filter_ndv(path.clone(), bloom_filter_ndv);
131131
}
132-
133-
if let Some(max_statistics_size) = options.max_statistics_size {
134-
builder =
135-
builder.set_column_max_statistics_size(path, max_statistics_size);
136-
}
137132
}
138133

139134
Ok(builder)
@@ -154,7 +149,6 @@ impl ParquetOptions {
154149
dictionary_enabled,
155150
dictionary_page_size_limit,
156151
statistics_enabled,
157-
max_statistics_size,
158152
max_row_group_size,
159153
created_by,
160154
column_index_truncate_length,
@@ -190,9 +184,6 @@ impl ParquetOptions {
190184
.and_then(|s| parse_statistics_string(s).ok())
191185
.unwrap_or(DEFAULT_STATISTICS_ENABLED),
192186
)
193-
.set_max_statistics_size(
194-
max_statistics_size.unwrap_or(DEFAULT_MAX_STATISTICS_SIZE),
195-
)
196187
.set_max_row_group_size(*max_row_group_size)
197188
.set_created_by(created_by.clone())
198189
.set_column_index_truncate_length(*column_index_truncate_length)
@@ -395,7 +386,6 @@ mod tests {
395386
compression: Some("zstd(22)".into()),
396387
dictionary_enabled: src_col_defaults.dictionary_enabled.map(|v| !v),
397388
statistics_enabled: Some("none".into()),
398-
max_statistics_size: Some(72),
399389
encoding: Some("RLE".into()),
400390
bloom_filter_enabled: Some(true),
401391
bloom_filter_fpp: Some(0.72),
@@ -419,7 +409,6 @@ mod tests {
419409
dictionary_enabled: Some(!defaults.dictionary_enabled.unwrap_or(false)),
420410
dictionary_page_size_limit: 42,
421411
statistics_enabled: Some("chunk".into()),
422-
max_statistics_size: Some(42),
423412
max_row_group_size: 42,
424413
created_by: "wordy".into(),
425414
column_index_truncate_length: Some(42),
@@ -473,7 +462,6 @@ mod tests {
473462
),
474463
bloom_filter_fpp: bloom_filter_default_props.map(|p| p.fpp),
475464
bloom_filter_ndv: bloom_filter_default_props.map(|p| p.ndv),
476-
max_statistics_size: Some(props.max_statistics_size(&col)),
477465
}
478466
}
479467

@@ -523,7 +511,6 @@ mod tests {
523511
compression: default_col_props.compression,
524512
dictionary_enabled: default_col_props.dictionary_enabled,
525513
statistics_enabled: default_col_props.statistics_enabled,
526-
max_statistics_size: default_col_props.max_statistics_size,
527514
bloom_filter_on_write: default_col_props
528515
.bloom_filter_enabled
529516
.unwrap_or_default(),

datafusion/proto-common/proto/datafusion_common.proto

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -472,10 +472,6 @@ message ParquetColumnOptions {
472472
oneof bloom_filter_ndv_opt {
473473
uint64 bloom_filter_ndv = 7;
474474
}
475-
476-
oneof max_statistics_size_opt {
477-
uint32 max_statistics_size = 8;
478-
}
479475
}
480476

481477
message ParquetOptions {
@@ -513,10 +509,6 @@ message ParquetOptions {
513509
string statistics_enabled = 13;
514510
}
515511

516-
oneof max_statistics_size_opt {
517-
uint64 max_statistics_size = 14;
518-
}
519-
520512
oneof column_index_truncate_length_opt {
521513
uint64 column_index_truncate_length = 17;
522514
}

datafusion/proto-common/src/from_proto/mod.rs

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -924,12 +924,6 @@ impl TryFrom<&protobuf::ParquetOptions> for ParquetOptions {
924924
protobuf::parquet_options::StatisticsEnabledOpt::StatisticsEnabled(v) => Some(v),
925925
})
926926
.unwrap_or(None),
927-
max_statistics_size: value
928-
.max_statistics_size_opt.as_ref()
929-
.map(|opt| match opt {
930-
protobuf::parquet_options::MaxStatisticsSizeOpt::MaxStatisticsSize(v) => Some(*v as usize),
931-
})
932-
.unwrap_or(None),
933927
max_row_group_size: value.max_row_group_size as usize,
934928
created_by: value.created_by.clone(),
935929
column_index_truncate_length: value
@@ -984,12 +978,6 @@ impl TryFrom<&protobuf::ParquetColumnOptions> for ParquetColumnOptions {
984978
protobuf::parquet_column_options::StatisticsEnabledOpt::StatisticsEnabled(v) => Some(v),
985979
})
986980
.unwrap_or(None),
987-
max_statistics_size: value
988-
.max_statistics_size_opt
989-
.map(|opt| match opt {
990-
protobuf::parquet_column_options::MaxStatisticsSizeOpt::MaxStatisticsSize(v) => Some(v as usize),
991-
})
992-
.unwrap_or(None),
993981
encoding: value
994982
.encoding_opt.clone()
995983
.map(|opt| match opt {

datafusion/proto-common/src/generated/pbjson.rs

Lines changed: 0 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -4448,9 +4448,6 @@ impl serde::Serialize for ParquetColumnOptions {
44484448
if self.bloom_filter_ndv_opt.is_some() {
44494449
len += 1;
44504450
}
4451-
if self.max_statistics_size_opt.is_some() {
4452-
len += 1;
4453-
}
44544451
let mut struct_ser = serializer.serialize_struct("datafusion_common.ParquetColumnOptions", len)?;
44554452
if let Some(v) = self.bloom_filter_enabled_opt.as_ref() {
44564453
match v {
@@ -4503,13 +4500,6 @@ impl serde::Serialize for ParquetColumnOptions {
45034500
}
45044501
}
45054502
}
4506-
if let Some(v) = self.max_statistics_size_opt.as_ref() {
4507-
match v {
4508-
parquet_column_options::MaxStatisticsSizeOpt::MaxStatisticsSize(v) => {
4509-
struct_ser.serialize_field("maxStatisticsSize", v)?;
4510-
}
4511-
}
4512-
}
45134503
struct_ser.end()
45144504
}
45154505
}
@@ -4532,8 +4522,6 @@ impl<'de> serde::Deserialize<'de> for ParquetColumnOptions {
45324522
"bloomFilterFpp",
45334523
"bloom_filter_ndv",
45344524
"bloomFilterNdv",
4535-
"max_statistics_size",
4536-
"maxStatisticsSize",
45374525
];
45384526

45394527
#[allow(clippy::enum_variant_names)]
@@ -4545,7 +4533,6 @@ impl<'de> serde::Deserialize<'de> for ParquetColumnOptions {
45454533
StatisticsEnabled,
45464534
BloomFilterFpp,
45474535
BloomFilterNdv,
4548-
MaxStatisticsSize,
45494536
}
45504537
impl<'de> serde::Deserialize<'de> for GeneratedField {
45514538
fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
@@ -4574,7 +4561,6 @@ impl<'de> serde::Deserialize<'de> for ParquetColumnOptions {
45744561
"statisticsEnabled" | "statistics_enabled" => Ok(GeneratedField::StatisticsEnabled),
45754562
"bloomFilterFpp" | "bloom_filter_fpp" => Ok(GeneratedField::BloomFilterFpp),
45764563
"bloomFilterNdv" | "bloom_filter_ndv" => Ok(GeneratedField::BloomFilterNdv),
4577-
"maxStatisticsSize" | "max_statistics_size" => Ok(GeneratedField::MaxStatisticsSize),
45784564
_ => Err(serde::de::Error::unknown_field(value, FIELDS)),
45794565
}
45804566
}
@@ -4601,7 +4587,6 @@ impl<'de> serde::Deserialize<'de> for ParquetColumnOptions {
46014587
let mut statistics_enabled_opt__ = None;
46024588
let mut bloom_filter_fpp_opt__ = None;
46034589
let mut bloom_filter_ndv_opt__ = None;
4604-
let mut max_statistics_size_opt__ = None;
46054590
while let Some(k) = map_.next_key()? {
46064591
match k {
46074592
GeneratedField::BloomFilterEnabled => {
@@ -4646,12 +4631,6 @@ impl<'de> serde::Deserialize<'de> for ParquetColumnOptions {
46464631
}
46474632
bloom_filter_ndv_opt__ = map_.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| parquet_column_options::BloomFilterNdvOpt::BloomFilterNdv(x.0));
46484633
}
4649-
GeneratedField::MaxStatisticsSize => {
4650-
if max_statistics_size_opt__.is_some() {
4651-
return Err(serde::de::Error::duplicate_field("maxStatisticsSize"));
4652-
}
4653-
max_statistics_size_opt__ = map_.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| parquet_column_options::MaxStatisticsSizeOpt::MaxStatisticsSize(x.0));
4654-
}
46554634
}
46564635
}
46574636
Ok(ParquetColumnOptions {
@@ -4662,7 +4641,6 @@ impl<'de> serde::Deserialize<'de> for ParquetColumnOptions {
46624641
statistics_enabled_opt: statistics_enabled_opt__,
46634642
bloom_filter_fpp_opt: bloom_filter_fpp_opt__,
46644643
bloom_filter_ndv_opt: bloom_filter_ndv_opt__,
4665-
max_statistics_size_opt: max_statistics_size_opt__,
46664644
})
46674645
}
46684646
}
@@ -4946,9 +4924,6 @@ impl serde::Serialize for ParquetOptions {
49464924
if self.statistics_enabled_opt.is_some() {
49474925
len += 1;
49484926
}
4949-
if self.max_statistics_size_opt.is_some() {
4950-
len += 1;
4951-
}
49524927
if self.column_index_truncate_length_opt.is_some() {
49534928
len += 1;
49544929
}
@@ -5063,15 +5038,6 @@ impl serde::Serialize for ParquetOptions {
50635038
}
50645039
}
50655040
}
5066-
if let Some(v) = self.max_statistics_size_opt.as_ref() {
5067-
match v {
5068-
parquet_options::MaxStatisticsSizeOpt::MaxStatisticsSize(v) => {
5069-
#[allow(clippy::needless_borrow)]
5070-
#[allow(clippy::needless_borrows_for_generic_args)]
5071-
struct_ser.serialize_field("maxStatisticsSize", ToString::to_string(&v).as_str())?;
5072-
}
5073-
}
5074-
}
50755041
if let Some(v) = self.column_index_truncate_length_opt.as_ref() {
50765042
match v {
50775043
parquet_options::ColumnIndexTruncateLengthOpt::ColumnIndexTruncateLength(v) => {
@@ -5158,8 +5124,6 @@ impl<'de> serde::Deserialize<'de> for ParquetOptions {
51585124
"dictionaryEnabled",
51595125
"statistics_enabled",
51605126
"statisticsEnabled",
5161-
"max_statistics_size",
5162-
"maxStatisticsSize",
51635127
"column_index_truncate_length",
51645128
"columnIndexTruncateLength",
51655129
"encoding",
@@ -5194,7 +5158,6 @@ impl<'de> serde::Deserialize<'de> for ParquetOptions {
51945158
Compression,
51955159
DictionaryEnabled,
51965160
StatisticsEnabled,
5197-
MaxStatisticsSize,
51985161
ColumnIndexTruncateLength,
51995162
Encoding,
52005163
BloomFilterFpp,
@@ -5243,7 +5206,6 @@ impl<'de> serde::Deserialize<'de> for ParquetOptions {
52435206
"compression" => Ok(GeneratedField::Compression),
52445207
"dictionaryEnabled" | "dictionary_enabled" => Ok(GeneratedField::DictionaryEnabled),
52455208
"statisticsEnabled" | "statistics_enabled" => Ok(GeneratedField::StatisticsEnabled),
5246-
"maxStatisticsSize" | "max_statistics_size" => Ok(GeneratedField::MaxStatisticsSize),
52475209
"columnIndexTruncateLength" | "column_index_truncate_length" => Ok(GeneratedField::ColumnIndexTruncateLength),
52485210
"encoding" => Ok(GeneratedField::Encoding),
52495211
"bloomFilterFpp" | "bloom_filter_fpp" => Ok(GeneratedField::BloomFilterFpp),
@@ -5290,7 +5252,6 @@ impl<'de> serde::Deserialize<'de> for ParquetOptions {
52905252
let mut compression_opt__ = None;
52915253
let mut dictionary_enabled_opt__ = None;
52925254
let mut statistics_enabled_opt__ = None;
5293-
let mut max_statistics_size_opt__ = None;
52945255
let mut column_index_truncate_length_opt__ = None;
52955256
let mut encoding_opt__ = None;
52965257
let mut bloom_filter_fpp_opt__ = None;
@@ -5449,12 +5410,6 @@ impl<'de> serde::Deserialize<'de> for ParquetOptions {
54495410
}
54505411
statistics_enabled_opt__ = map_.next_value::<::std::option::Option<_>>()?.map(parquet_options::StatisticsEnabledOpt::StatisticsEnabled);
54515412
}
5452-
GeneratedField::MaxStatisticsSize => {
5453-
if max_statistics_size_opt__.is_some() {
5454-
return Err(serde::de::Error::duplicate_field("maxStatisticsSize"));
5455-
}
5456-
max_statistics_size_opt__ = map_.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| parquet_options::MaxStatisticsSizeOpt::MaxStatisticsSize(x.0));
5457-
}
54585413
GeneratedField::ColumnIndexTruncateLength => {
54595414
if column_index_truncate_length_opt__.is_some() {
54605415
return Err(serde::de::Error::duplicate_field("columnIndexTruncateLength"));
@@ -5505,7 +5460,6 @@ impl<'de> serde::Deserialize<'de> for ParquetOptions {
55055460
compression_opt: compression_opt__,
55065461
dictionary_enabled_opt: dictionary_enabled_opt__,
55075462
statistics_enabled_opt: statistics_enabled_opt__,
5508-
max_statistics_size_opt: max_statistics_size_opt__,
55095463
column_index_truncate_length_opt: column_index_truncate_length_opt__,
55105464
encoding_opt: encoding_opt__,
55115465
bloom_filter_fpp_opt: bloom_filter_fpp_opt__,

0 commit comments

Comments
 (0)