Skip to content

Commit

Permalink
Merge pull request #8981 from youngsofun/fmt
Browse files Browse the repository at this point in the history
feat(format): better checking of format options.
  • Loading branch information
mergify[bot] authored Nov 26, 2022
2 parents d31587e + a121928 commit f4a4ea8
Show file tree
Hide file tree
Showing 37 changed files with 357 additions and 728 deletions.
134 changes: 3 additions & 131 deletions src/common/io/src/format_settings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,145 +13,17 @@
// limitations under the License.

use chrono_tz::Tz;
use common_exception::ErrorCode;
use common_exception::Result;

use crate::consts::*;

// fixed the format in struct/array,
// when it`s repr as a string in csv/tsv/json/...
// should be compatible with the format used SQL
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct NestedFormatSettings {
pub true_bytes: Vec<u8>,
pub false_bytes: Vec<u8>,
pub null_bytes: Vec<u8>,
pub nan_bytes: Vec<u8>,
pub inf_bytes: Vec<u8>,
pub quote_char: u8,
}

impl Default for NestedFormatSettings {
fn default() -> Self {
NestedFormatSettings {
true_bytes: TRUE_BYTES_NUM.as_bytes().to_vec(),
false_bytes: FALSE_BYTES_NUM.as_bytes().to_vec(),
null_bytes: NULL_BYTES_UPPER.as_bytes().to_vec(),
nan_bytes: NAN_BYTES_LOWER.as_bytes().to_vec(),
inf_bytes: INF_BYTES_LOWER.as_bytes().to_vec(),
quote_char: b'\'',
}
}
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct FormatSettings {
// both
pub timezone: Tz,

// inner
pub nested: NestedFormatSettings,

// outer
pub true_bytes: Vec<u8>,
pub false_bytes: Vec<u8>,
pub null_bytes: Vec<u8>,
pub nan_bytes: Vec<u8>,
pub inf_bytes: Vec<u8>,
pub quote_char: u8,
pub escape: Option<u8>,

pub record_delimiter: Vec<u8>,
pub field_delimiter: Vec<u8>,
pub empty_as_default: bool,

pub json_quote_denormals: bool,
pub json_escape_forward_slashes: bool,
pub ident_case_sensitive: bool,

pub row_tag: Vec<u8>,
}

impl FormatSettings {
pub fn parse_escape(option: &str, default: Option<u8>) -> Option<u8> {
if option.is_empty() {
default
} else {
Some(option.as_bytes()[0])
}
}

pub fn parse_quote(option: &str) -> Result<u8> {
if option.len() != 1 {
Err(ErrorCode::InvalidArgument(
"quote_char can only contain one char",
))
} else {
Ok(option.as_bytes()[0])
}
}

pub fn parse_row_tag(option: &str) -> Result<Vec<u8>> {
if option.is_empty() {
return Ok(vec![b'r', b'o', b'w']);
}
Ok(Vec::from(option))
}

pub fn for_values_parsing() -> Self {
Self {
timezone: "UTC".parse::<Tz>().unwrap(),
nested: Default::default(),

true_bytes: TRUE_BYTES_LOWER.as_bytes().to_vec(),
false_bytes: FALSE_BYTES_LOWER.as_bytes().to_vec(),
null_bytes: NULL_BYTES_UPPER.as_bytes().to_vec(),
nan_bytes: NAN_BYTES_LOWER.as_bytes().to_vec(),
inf_bytes: INF_BYTES_LOWER.as_bytes().to_vec(),
quote_char: b'\'',
escape: Some(b'\\'),

record_delimiter: vec![b'\n'],
field_delimiter: vec![b'\t'],

// not used
empty_as_default: true,
json_quote_denormals: false,
json_escape_forward_slashes: true,
ident_case_sensitive: false,
row_tag: vec![],
}
}

fn tsv_default() -> Self {
Self {
timezone: "UTC".parse::<Tz>().unwrap(),
nested: Default::default(),

true_bytes: TRUE_BYTES_NUM.as_bytes().to_vec(),
false_bytes: FALSE_BYTES_NUM.as_bytes().to_vec(),
nan_bytes: NAN_BYTES_LOWER.as_bytes().to_vec(),
inf_bytes: INF_BYTES_LOWER.as_bytes().to_vec(),
null_bytes: NULL_BYTES_ESCAPE.as_bytes().to_vec(),
quote_char: b'\'',
escape: Some(b'\\'),

record_delimiter: vec![b'\n'],
field_delimiter: vec![b'\t'],

// not used
empty_as_default: true,
json_quote_denormals: false,
json_escape_forward_slashes: true,
ident_case_sensitive: false,
row_tag: vec![],
}
}
}

// only used for tests
impl Default for FormatSettings {
fn default() -> Self {
FormatSettings::tsv_default()
Self {
timezone: "UTC".parse::<Tz>().unwrap(),
}
}
}
34 changes: 0 additions & 34 deletions src/query/datavalues/src/types/serializations/helper/csv.rs

This file was deleted.

143 changes: 0 additions & 143 deletions src/query/datavalues/src/types/serializations/helper/json.rs

This file was deleted.

4 changes: 0 additions & 4 deletions src/query/datavalues/src/types/serializations/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ mod array;
mod boolean;
mod const_;
mod date;
pub mod helper;
mod null;
mod nullable;
mod number;
Expand All @@ -34,9 +33,6 @@ use common_io::prelude::FormatSettings;
pub use const_::ConstSerializer;
pub use date::DateSerializer;
use enum_dispatch::enum_dispatch;
pub use helper::csv::write_csv_string;
pub use helper::escape::write_escaped_string;
pub use helper::json::write_json_string;
pub use null::NullSerializer;
pub use nullable::NullableSerializer;
pub use number::NumberSerializer;
Expand Down
Loading

1 comment on commit f4a4ea8

@vercel
Copy link

@vercel vercel bot commented on f4a4ea8 Nov 26, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Successfully deployed to the following URLs:

databend – ./

databend-git-main-databend.vercel.app
databend.vercel.app
databend-databend.vercel.app
databend.rs

Please sign in to comment.