Skip to content

Commit 2fe785b

Browse files
committed
remove unused max_statistics_size field
1 parent 00a66bd commit 2fe785b

File tree

9 files changed

+1
-144
lines changed

9 files changed

+1
-144
lines changed

datafusion/common/src/config.rs

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -459,10 +459,6 @@ config_namespace! {
459459
/// default parquet writer setting
460460
pub statistics_enabled: Option<String>, transform = str::to_lowercase, default = Some("page".into())
461461

462-
/// (writing) Sets max statistics size for any column. If NULL, uses
463-
/// default parquet writer setting
464-
pub max_statistics_size: Option<usize>, default = Some(4096)
465-
466462
/// (writing) Target maximum number of rows in each row group (defaults to 1M
467463
/// rows). Writing larger row groups requires more memory to write, but
468464
/// can get better compression and be faster to read.
@@ -1653,10 +1649,6 @@ config_namespace_with_hashmap! {
16531649
/// Sets bloom filter number of distinct values. If NULL, uses
16541650
/// default parquet options
16551651
pub bloom_filter_ndv: Option<u64>, default = None
1656-
1657-
/// Sets max statistics size for the column path. If NULL, uses
1658-
/// default parquet options
1659-
pub max_statistics_size: Option<usize>, default = None
16601652
}
16611653
}
16621654

datafusion/common/src/file_options/parquet_writer.rs

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ use parquet::{
2626
basic::{BrotliLevel, GzipLevel, ZstdLevel},
2727
file::properties::{
2828
EnabledStatistics, WriterProperties, WriterPropertiesBuilder, WriterVersion,
29-
DEFAULT_MAX_STATISTICS_SIZE, DEFAULT_STATISTICS_ENABLED,
29+
DEFAULT_STATISTICS_ENABLED,
3030
},
3131
format::KeyValue,
3232
schema::types::ColumnPath,
@@ -129,11 +129,6 @@ impl TryFrom<&TableParquetOptions> for WriterPropertiesBuilder {
129129
builder =
130130
builder.set_column_bloom_filter_ndv(path.clone(), bloom_filter_ndv);
131131
}
132-
133-
if let Some(max_statistics_size) = options.max_statistics_size {
134-
builder =
135-
builder.set_column_max_statistics_size(path, max_statistics_size);
136-
}
137132
}
138133

139134
Ok(builder)
@@ -154,7 +149,6 @@ impl ParquetOptions {
154149
dictionary_enabled,
155150
dictionary_page_size_limit,
156151
statistics_enabled,
157-
max_statistics_size,
158152
max_row_group_size,
159153
created_by,
160154
column_index_truncate_length,
@@ -190,9 +184,6 @@ impl ParquetOptions {
190184
.and_then(|s| parse_statistics_string(s).ok())
191185
.unwrap_or(DEFAULT_STATISTICS_ENABLED),
192186
)
193-
.set_max_statistics_size(
194-
max_statistics_size.unwrap_or(DEFAULT_MAX_STATISTICS_SIZE),
195-
)
196187
.set_max_row_group_size(*max_row_group_size)
197188
.set_created_by(created_by.clone())
198189
.set_column_index_truncate_length(*column_index_truncate_length)
@@ -395,7 +386,6 @@ mod tests {
395386
compression: Some("zstd(22)".into()),
396387
dictionary_enabled: src_col_defaults.dictionary_enabled.map(|v| !v),
397388
statistics_enabled: Some("none".into()),
398-
max_statistics_size: Some(72),
399389
encoding: Some("RLE".into()),
400390
bloom_filter_enabled: Some(true),
401391
bloom_filter_fpp: Some(0.72),
@@ -419,7 +409,6 @@ mod tests {
419409
dictionary_enabled: Some(!defaults.dictionary_enabled.unwrap_or(false)),
420410
dictionary_page_size_limit: 42,
421411
statistics_enabled: Some("chunk".into()),
422-
max_statistics_size: Some(42),
423412
max_row_group_size: 42,
424413
created_by: "wordy".into(),
425414
column_index_truncate_length: Some(42),
@@ -473,7 +462,6 @@ mod tests {
473462
),
474463
bloom_filter_fpp: bloom_filter_default_props.map(|p| p.fpp),
475464
bloom_filter_ndv: bloom_filter_default_props.map(|p| p.ndv),
476-
max_statistics_size: Some(props.max_statistics_size(&col)),
477465
}
478466
}
479467

@@ -523,7 +511,6 @@ mod tests {
523511
compression: default_col_props.compression,
524512
dictionary_enabled: default_col_props.dictionary_enabled,
525513
statistics_enabled: default_col_props.statistics_enabled,
526-
max_statistics_size: default_col_props.max_statistics_size,
527514
bloom_filter_on_write: default_col_props
528515
.bloom_filter_enabled
529516
.unwrap_or_default(),

datafusion/proto-common/proto/datafusion_common.proto

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -473,10 +473,6 @@ message ParquetColumnOptions {
473473
oneof bloom_filter_ndv_opt {
474474
uint64 bloom_filter_ndv = 7;
475475
}
476-
477-
oneof max_statistics_size_opt {
478-
uint32 max_statistics_size = 8;
479-
}
480476
}
481477

482478
message ParquetOptions {
@@ -514,10 +510,6 @@ message ParquetOptions {
514510
string statistics_enabled = 13;
515511
}
516512

517-
oneof max_statistics_size_opt {
518-
uint64 max_statistics_size = 14;
519-
}
520-
521513
oneof column_index_truncate_length_opt {
522514
uint64 column_index_truncate_length = 17;
523515
}

datafusion/proto-common/src/from_proto/mod.rs

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -926,12 +926,6 @@ impl TryFrom<&protobuf::ParquetOptions> for ParquetOptions {
926926
protobuf::parquet_options::StatisticsEnabledOpt::StatisticsEnabled(v) => Some(v),
927927
})
928928
.unwrap_or(None),
929-
max_statistics_size: value
930-
.max_statistics_size_opt.as_ref()
931-
.map(|opt| match opt {
932-
protobuf::parquet_options::MaxStatisticsSizeOpt::MaxStatisticsSize(v) => Some(*v as usize),
933-
})
934-
.unwrap_or(None),
935929
max_row_group_size: value.max_row_group_size as usize,
936930
created_by: value.created_by.clone(),
937931
column_index_truncate_length: value
@@ -986,12 +980,6 @@ impl TryFrom<&protobuf::ParquetColumnOptions> for ParquetColumnOptions {
986980
protobuf::parquet_column_options::StatisticsEnabledOpt::StatisticsEnabled(v) => Some(v),
987981
})
988982
.unwrap_or(None),
989-
max_statistics_size: value
990-
.max_statistics_size_opt
991-
.map(|opt| match opt {
992-
protobuf::parquet_column_options::MaxStatisticsSizeOpt::MaxStatisticsSize(v) => Some(v as usize),
993-
})
994-
.unwrap_or(None),
995983
encoding: value
996984
.encoding_opt.clone()
997985
.map(|opt| match opt {

datafusion/proto-common/src/generated/pbjson.rs

Lines changed: 0 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -4466,9 +4466,6 @@ impl serde::Serialize for ParquetColumnOptions {
44664466
if self.bloom_filter_ndv_opt.is_some() {
44674467
len += 1;
44684468
}
4469-
if self.max_statistics_size_opt.is_some() {
4470-
len += 1;
4471-
}
44724469
let mut struct_ser = serializer.serialize_struct("datafusion_common.ParquetColumnOptions", len)?;
44734470
if let Some(v) = self.bloom_filter_enabled_opt.as_ref() {
44744471
match v {
@@ -4521,13 +4518,6 @@ impl serde::Serialize for ParquetColumnOptions {
45214518
}
45224519
}
45234520
}
4524-
if let Some(v) = self.max_statistics_size_opt.as_ref() {
4525-
match v {
4526-
parquet_column_options::MaxStatisticsSizeOpt::MaxStatisticsSize(v) => {
4527-
struct_ser.serialize_field("maxStatisticsSize", v)?;
4528-
}
4529-
}
4530-
}
45314521
struct_ser.end()
45324522
}
45334523
}
@@ -4550,8 +4540,6 @@ impl<'de> serde::Deserialize<'de> for ParquetColumnOptions {
45504540
"bloomFilterFpp",
45514541
"bloom_filter_ndv",
45524542
"bloomFilterNdv",
4553-
"max_statistics_size",
4554-
"maxStatisticsSize",
45554543
];
45564544

45574545
#[allow(clippy::enum_variant_names)]
@@ -4563,7 +4551,6 @@ impl<'de> serde::Deserialize<'de> for ParquetColumnOptions {
45634551
StatisticsEnabled,
45644552
BloomFilterFpp,
45654553
BloomFilterNdv,
4566-
MaxStatisticsSize,
45674554
}
45684555
impl<'de> serde::Deserialize<'de> for GeneratedField {
45694556
fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
@@ -4592,7 +4579,6 @@ impl<'de> serde::Deserialize<'de> for ParquetColumnOptions {
45924579
"statisticsEnabled" | "statistics_enabled" => Ok(GeneratedField::StatisticsEnabled),
45934580
"bloomFilterFpp" | "bloom_filter_fpp" => Ok(GeneratedField::BloomFilterFpp),
45944581
"bloomFilterNdv" | "bloom_filter_ndv" => Ok(GeneratedField::BloomFilterNdv),
4595-
"maxStatisticsSize" | "max_statistics_size" => Ok(GeneratedField::MaxStatisticsSize),
45964582
_ => Err(serde::de::Error::unknown_field(value, FIELDS)),
45974583
}
45984584
}
@@ -4619,7 +4605,6 @@ impl<'de> serde::Deserialize<'de> for ParquetColumnOptions {
46194605
let mut statistics_enabled_opt__ = None;
46204606
let mut bloom_filter_fpp_opt__ = None;
46214607
let mut bloom_filter_ndv_opt__ = None;
4622-
let mut max_statistics_size_opt__ = None;
46234608
while let Some(k) = map_.next_key()? {
46244609
match k {
46254610
GeneratedField::BloomFilterEnabled => {
@@ -4664,12 +4649,6 @@ impl<'de> serde::Deserialize<'de> for ParquetColumnOptions {
46644649
}
46654650
bloom_filter_ndv_opt__ = map_.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| parquet_column_options::BloomFilterNdvOpt::BloomFilterNdv(x.0));
46664651
}
4667-
GeneratedField::MaxStatisticsSize => {
4668-
if max_statistics_size_opt__.is_some() {
4669-
return Err(serde::de::Error::duplicate_field("maxStatisticsSize"));
4670-
}
4671-
max_statistics_size_opt__ = map_.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| parquet_column_options::MaxStatisticsSizeOpt::MaxStatisticsSize(x.0));
4672-
}
46734652
}
46744653
}
46754654
Ok(ParquetColumnOptions {
@@ -4680,7 +4659,6 @@ impl<'de> serde::Deserialize<'de> for ParquetColumnOptions {
46804659
statistics_enabled_opt: statistics_enabled_opt__,
46814660
bloom_filter_fpp_opt: bloom_filter_fpp_opt__,
46824661
bloom_filter_ndv_opt: bloom_filter_ndv_opt__,
4683-
max_statistics_size_opt: max_statistics_size_opt__,
46844662
})
46854663
}
46864664
}
@@ -4964,9 +4942,6 @@ impl serde::Serialize for ParquetOptions {
49644942
if self.statistics_enabled_opt.is_some() {
49654943
len += 1;
49664944
}
4967-
if self.max_statistics_size_opt.is_some() {
4968-
len += 1;
4969-
}
49704945
if self.column_index_truncate_length_opt.is_some() {
49714946
len += 1;
49724947
}
@@ -5081,15 +5056,6 @@ impl serde::Serialize for ParquetOptions {
50815056
}
50825057
}
50835058
}
5084-
if let Some(v) = self.max_statistics_size_opt.as_ref() {
5085-
match v {
5086-
parquet_options::MaxStatisticsSizeOpt::MaxStatisticsSize(v) => {
5087-
#[allow(clippy::needless_borrow)]
5088-
#[allow(clippy::needless_borrows_for_generic_args)]
5089-
struct_ser.serialize_field("maxStatisticsSize", ToString::to_string(&v).as_str())?;
5090-
}
5091-
}
5092-
}
50935059
if let Some(v) = self.column_index_truncate_length_opt.as_ref() {
50945060
match v {
50955061
parquet_options::ColumnIndexTruncateLengthOpt::ColumnIndexTruncateLength(v) => {
@@ -5176,8 +5142,6 @@ impl<'de> serde::Deserialize<'de> for ParquetOptions {
51765142
"dictionaryEnabled",
51775143
"statistics_enabled",
51785144
"statisticsEnabled",
5179-
"max_statistics_size",
5180-
"maxStatisticsSize",
51815145
"column_index_truncate_length",
51825146
"columnIndexTruncateLength",
51835147
"encoding",
@@ -5212,7 +5176,6 @@ impl<'de> serde::Deserialize<'de> for ParquetOptions {
52125176
Compression,
52135177
DictionaryEnabled,
52145178
StatisticsEnabled,
5215-
MaxStatisticsSize,
52165179
ColumnIndexTruncateLength,
52175180
Encoding,
52185181
BloomFilterFpp,
@@ -5261,7 +5224,6 @@ impl<'de> serde::Deserialize<'de> for ParquetOptions {
52615224
"compression" => Ok(GeneratedField::Compression),
52625225
"dictionaryEnabled" | "dictionary_enabled" => Ok(GeneratedField::DictionaryEnabled),
52635226
"statisticsEnabled" | "statistics_enabled" => Ok(GeneratedField::StatisticsEnabled),
5264-
"maxStatisticsSize" | "max_statistics_size" => Ok(GeneratedField::MaxStatisticsSize),
52655227
"columnIndexTruncateLength" | "column_index_truncate_length" => Ok(GeneratedField::ColumnIndexTruncateLength),
52665228
"encoding" => Ok(GeneratedField::Encoding),
52675229
"bloomFilterFpp" | "bloom_filter_fpp" => Ok(GeneratedField::BloomFilterFpp),
@@ -5308,7 +5270,6 @@ impl<'de> serde::Deserialize<'de> for ParquetOptions {
53085270
let mut compression_opt__ = None;
53095271
let mut dictionary_enabled_opt__ = None;
53105272
let mut statistics_enabled_opt__ = None;
5311-
let mut max_statistics_size_opt__ = None;
53125273
let mut column_index_truncate_length_opt__ = None;
53135274
let mut encoding_opt__ = None;
53145275
let mut bloom_filter_fpp_opt__ = None;
@@ -5467,12 +5428,6 @@ impl<'de> serde::Deserialize<'de> for ParquetOptions {
54675428
}
54685429
statistics_enabled_opt__ = map_.next_value::<::std::option::Option<_>>()?.map(parquet_options::StatisticsEnabledOpt::StatisticsEnabled);
54695430
}
5470-
GeneratedField::MaxStatisticsSize => {
5471-
if max_statistics_size_opt__.is_some() {
5472-
return Err(serde::de::Error::duplicate_field("maxStatisticsSize"));
5473-
}
5474-
max_statistics_size_opt__ = map_.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| parquet_options::MaxStatisticsSizeOpt::MaxStatisticsSize(x.0));
5475-
}
54765431
GeneratedField::ColumnIndexTruncateLength => {
54775432
if column_index_truncate_length_opt__.is_some() {
54785433
return Err(serde::de::Error::duplicate_field("columnIndexTruncateLength"));
@@ -5523,7 +5478,6 @@ impl<'de> serde::Deserialize<'de> for ParquetOptions {
55235478
compression_opt: compression_opt__,
55245479
dictionary_enabled_opt: dictionary_enabled_opt__,
55255480
statistics_enabled_opt: statistics_enabled_opt__,
5526-
max_statistics_size_opt: max_statistics_size_opt__,
55275481
column_index_truncate_length_opt: column_index_truncate_length_opt__,
55285482
encoding_opt: encoding_opt__,
55295483
bloom_filter_fpp_opt: bloom_filter_fpp_opt__,

datafusion/proto-common/src/generated/prost.rs

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -664,10 +664,6 @@ pub struct ParquetColumnOptions {
664664
pub bloom_filter_ndv_opt: ::core::option::Option<
665665
parquet_column_options::BloomFilterNdvOpt,
666666
>,
667-
#[prost(oneof = "parquet_column_options::MaxStatisticsSizeOpt", tags = "8")]
668-
pub max_statistics_size_opt: ::core::option::Option<
669-
parquet_column_options::MaxStatisticsSizeOpt,
670-
>,
671667
}
672668
/// Nested message and enum types in `ParquetColumnOptions`.
673669
pub mod parquet_column_options {
@@ -706,11 +702,6 @@ pub mod parquet_column_options {
706702
#[prost(uint64, tag = "7")]
707703
BloomFilterNdv(u64),
708704
}
709-
#[derive(Clone, Copy, PartialEq, ::prost::Oneof)]
710-
pub enum MaxStatisticsSizeOpt {
711-
#[prost(uint32, tag = "8")]
712-
MaxStatisticsSize(u32),
713-
}
714705
}
715706
#[derive(Clone, PartialEq, ::prost::Message)]
716707
pub struct ParquetOptions {
@@ -785,10 +776,6 @@ pub struct ParquetOptions {
785776
pub statistics_enabled_opt: ::core::option::Option<
786777
parquet_options::StatisticsEnabledOpt,
787778
>,
788-
#[prost(oneof = "parquet_options::MaxStatisticsSizeOpt", tags = "14")]
789-
pub max_statistics_size_opt: ::core::option::Option<
790-
parquet_options::MaxStatisticsSizeOpt,
791-
>,
792779
#[prost(oneof = "parquet_options::ColumnIndexTruncateLengthOpt", tags = "17")]
793780
pub column_index_truncate_length_opt: ::core::option::Option<
794781
parquet_options::ColumnIndexTruncateLengthOpt,
@@ -823,11 +810,6 @@ pub mod parquet_options {
823810
StatisticsEnabled(::prost::alloc::string::String),
824811
}
825812
#[derive(Clone, Copy, PartialEq, ::prost::Oneof)]
826-
pub enum MaxStatisticsSizeOpt {
827-
#[prost(uint64, tag = "14")]
828-
MaxStatisticsSize(u64),
829-
}
830-
#[derive(Clone, Copy, PartialEq, ::prost::Oneof)]
831813
pub enum ColumnIndexTruncateLengthOpt {
832814
#[prost(uint64, tag = "17")]
833815
ColumnIndexTruncateLength(u64),

datafusion/proto-common/src/to_proto/mod.rs

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -820,7 +820,6 @@ impl TryFrom<&ParquetOptions> for protobuf::ParquetOptions {
820820
dictionary_enabled_opt: value.dictionary_enabled.map(protobuf::parquet_options::DictionaryEnabledOpt::DictionaryEnabled),
821821
dictionary_page_size_limit: value.dictionary_page_size_limit as u64,
822822
statistics_enabled_opt: value.statistics_enabled.clone().map(protobuf::parquet_options::StatisticsEnabledOpt::StatisticsEnabled),
823-
max_statistics_size_opt: value.max_statistics_size.map(|v| protobuf::parquet_options::MaxStatisticsSizeOpt::MaxStatisticsSize(v as u64)),
824823
max_row_group_size: value.max_row_group_size as u64,
825824
created_by: value.created_by.clone(),
826825
column_index_truncate_length_opt: value.column_index_truncate_length.map(|v| protobuf::parquet_options::ColumnIndexTruncateLengthOpt::ColumnIndexTruncateLength(v as u64)),
@@ -857,11 +856,6 @@ impl TryFrom<&ParquetColumnOptions> for protobuf::ParquetColumnOptions {
857856
.statistics_enabled
858857
.clone()
859858
.map(protobuf::parquet_column_options::StatisticsEnabledOpt::StatisticsEnabled),
860-
max_statistics_size_opt: value.max_statistics_size.map(|v| {
861-
protobuf::parquet_column_options::MaxStatisticsSizeOpt::MaxStatisticsSize(
862-
v as u32,
863-
)
864-
}),
865859
encoding_opt: value
866860
.encoding
867861
.clone()

0 commit comments

Comments
 (0)