Skip to content

Commit 8ed55be

Browse files
committed
feat: add validation support for the Nested(...) type
1 parent 4998ec1 commit 8ed55be

File tree

3 files changed

+195
-44
lines changed

3 files changed

+195
-44
lines changed

src/rowbinary/validation.rs

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use crate::{row::RowKind, row_metadata::RowMetadata, Row};
1+
use crate::{Row, row::RowKind, row_metadata::RowMetadata};
22
use clickhouse_types::data_types::{Column, DataTypeNode, DecimalType, EnumType};
33
use std::collections::HashMap;
44
use std::fmt::Display;
@@ -331,10 +331,10 @@ impl<'caller, R: Row> SchemaValidator<R> for Option<InnerDataTypeValidator<'_, '
331331
let (full_name, full_data_type) =
332332
inner.root.get_current_column_name_and_type();
333333
panic!(
334-
"While processing column {full_name} defined as {full_data_type}: \
334+
"While processing column {full_name} defined as {full_data_type}: \
335335
Variant identifier {value} is out of bounds, max allowed index is {}",
336-
possible_types.len() - 1
337-
);
336+
possible_types.len() - 1
337+
);
338338
}
339339
let data_type = &possible_types[*value as usize];
340340
validate_impl(inner.root, data_type, &serde_type, true)
@@ -548,6 +548,10 @@ fn validate_impl<'serde, 'caller, R: Row>(
548548
root,
549549
kind: InnerDataTypeValidatorKind::Array(&DataTypeNode::LineString),
550550
}),
551+
DataTypeNode::Nested { as_tuple, .. } => Some(InnerDataTypeValidator {
552+
root,
553+
kind: InnerDataTypeValidatorKind::Array(as_tuple),
554+
}),
551555
_ => root.panic_on_schema_mismatch(data_type, serde_type, is_inner),
552556
},
553557
SerdeType::Tuple(len) => match data_type {

tests/it/nested.rs

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,3 +50,110 @@ async fn smoke() {
5050

5151
assert_eq!(row, original_row);
5252
}
53+
54+
#[tokio::test]
55+
async fn no_flatten() {
56+
let client = prepare_database!().with_option("flatten_nested", "0");
57+
58+
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Row)]
59+
struct MyRow {
60+
no: i32,
61+
items: Vec<(String, u32)>,
62+
}
63+
64+
// `flatten_nested = 0` prevents flattening of nested columns, causing them to be stored as a
65+
// single array of tuples instead of as separate arrays
66+
67+
client
68+
.query(
69+
"
70+
CREATE TABLE test(
71+
no Int32,
72+
items Nested(
73+
name String,
74+
count UInt32
75+
)
76+
)
77+
ENGINE = MergeTree ORDER BY no
78+
",
79+
)
80+
.execute()
81+
.await
82+
.unwrap();
83+
84+
let original_row = MyRow {
85+
no: 42,
86+
items: vec![("foo".into(), 1), ("bar".into(), 5)],
87+
};
88+
89+
let mut insert = client.insert::<MyRow>("test").await.unwrap();
90+
insert.write(&original_row).await.unwrap();
91+
insert.end().await.unwrap();
92+
93+
let row = client
94+
.query("SELECT ?fields FROM test")
95+
.fetch_one::<MyRow>()
96+
.await
97+
.unwrap();
98+
99+
assert_eq!(row, original_row);
100+
}
101+
102+
#[tokio::test]
103+
async fn doubly_flattened() {
104+
let client = prepare_database!();
105+
106+
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Row)]
107+
struct MyRow {
108+
no: i32,
109+
#[serde(rename = "items.names")]
110+
items_names: Vec<Vec<(String, String)>>,
111+
#[serde(rename = "items.count")]
112+
items_count: Vec<u32>,
113+
}
114+
115+
// Only the first level is flattened and any more deeply nested columns are stored as an array
116+
// of tuples, so the table ends up with columns
117+
// - `no Int32`
118+
// - `items.names Array(Nested(first String, last String))`
119+
// (i.e. `Array(Array(Tuple(first String, last String)))`)
120+
// - `items.count Array(UInt32)`
121+
122+
client
123+
.query(
124+
"
125+
CREATE TABLE test(
126+
no Int32,
127+
items Nested(
128+
names Nested(first String, last String),
129+
count UInt32
130+
)
131+
)
132+
ENGINE = MergeTree ORDER BY no
133+
",
134+
)
135+
.execute()
136+
.await
137+
.unwrap();
138+
139+
let original_row = MyRow {
140+
no: 42,
141+
items_names: vec![
142+
vec![("foo".into(), "foo".into())],
143+
vec![("bar".into(), "bar".into())],
144+
],
145+
items_count: vec![1, 5],
146+
};
147+
148+
let mut insert = client.insert::<MyRow>("test").await.unwrap();
149+
insert.write(&original_row).await.unwrap();
150+
insert.end().await.unwrap();
151+
152+
let row = client
153+
.query("SELECT ?fields FROM test")
154+
.fetch_one::<MyRow>()
155+
.await
156+
.unwrap();
157+
158+
assert_eq!(row, original_row);
159+
}

types/src/data_types.rs

Lines changed: 80 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,12 @@ pub enum DataTypeNode {
102102
Polygon,
103103
MultiPolygon,
104104

105-
Nested(Vec<Column>),
105+
Nested {
106+
columns: Vec<Column>,
107+
// This stores the types in `columns` as a tuple node as a hack to be able to validate
108+
// data for this column as an array of tuples
109+
as_tuple: Box<DataTypeNode>,
110+
},
106111
}
107112

108113
impl DataTypeNode {
@@ -159,7 +164,7 @@ impl DataTypeNode {
159164
str if str.starts_with("Tuple") => parse_tuple(str),
160165
str if str.starts_with("Variant") => parse_variant(str),
161166

162-
str if str.starts_with("Nested") => Ok(Self::Nested(parse_nested(str)?)),
167+
str if str.starts_with("Nested") => parse_nested(str),
163168

164169
// ...
165170
str => Err(TypesError::TypeParsingError(format!(
@@ -280,7 +285,7 @@ impl Display for DataTypeNode {
280285
MultiLineString => write!(f, "MultiLineString"),
281286
Polygon => write!(f, "Polygon"),
282287
MultiPolygon => write!(f, "MultiPolygon"),
283-
Nested(columns) => {
288+
Nested { columns, .. } => {
284289
write!(f, "Nested(")?;
285290
for (i, column) in columns.iter().enumerate() {
286291
if i > 0 {
@@ -840,7 +845,7 @@ fn parse_enum_values_map(input: &str) -> Result<HashMap<i16, String>, TypesError
840845
.collect::<HashMap<i16, String>>())
841846
}
842847

843-
fn parse_nested(mut input: &str) -> Result<Vec<Column>, TypesError> {
848+
fn parse_nested(mut input: &str) -> Result<DataTypeNode, TypesError> {
844849
/// Removes the prefix `prefix` from `input`.
845850
fn parse_str(input: &mut &str, prefix: &str) -> Result<(), TypesError> {
846851
if input.starts_with(prefix) {
@@ -901,15 +906,18 @@ fn parse_nested(mut input: &str) -> Result<Vec<Column>, TypesError> {
901906
parse_str(&mut input, "Nested(")?;
902907

903908
let mut columns = Vec::new();
909+
let mut types = Vec::new();
910+
904911
while !input.starts_with(')') {
905912
let name = parse_identifier(&mut input)?;
906913
parse_str(&mut input, " ")?;
907914
let data_type = parse_inner_type(&mut input)?;
908915

909916
columns.push(Column {
910917
name: name.to_string(),
911-
data_type,
918+
data_type: data_type.clone(),
912919
});
920+
types.push(data_type);
913921

914922
if input.starts_with(',') {
915923
parse_str(&mut input, ", ")?;
@@ -923,7 +931,10 @@ fn parse_nested(mut input: &str) -> Result<Vec<Column>, TypesError> {
923931
}
924932

925933
parse_str(&mut input, ")")?;
926-
Ok(columns)
934+
Ok(DataTypeNode::Nested {
935+
columns,
936+
as_tuple: Box::new(DataTypeNode::Tuple(types)),
937+
})
927938
}
928939

929940
#[cfg(test)]
@@ -1580,50 +1591,73 @@ mod tests {
15801591
fn test_data_type_new_nested() {
15811592
assert_eq!(
15821593
DataTypeNode::new("Nested(foo UInt8)").unwrap(),
1583-
DataTypeNode::Nested(vec![Column::new("foo".to_string(), DataTypeNode::UInt8)])
1594+
DataTypeNode::Nested {
1595+
columns: vec![Column::new("foo".to_string(), DataTypeNode::UInt8)],
1596+
as_tuple: Box::new(DataTypeNode::Tuple(vec![DataTypeNode::UInt8])),
1597+
}
15841598
);
15851599
assert_eq!(
15861600
DataTypeNode::new("Nested(foo UInt8, bar String)").unwrap(),
1587-
DataTypeNode::Nested(vec![
1588-
Column::new("foo".to_string(), DataTypeNode::UInt8),
1589-
Column::new("bar".to_string(), DataTypeNode::String),
1590-
])
1601+
DataTypeNode::Nested {
1602+
columns: vec![
1603+
Column::new("foo".to_string(), DataTypeNode::UInt8),
1604+
Column::new("bar".to_string(), DataTypeNode::String),
1605+
],
1606+
as_tuple: Box::new(DataTypeNode::Tuple(vec![
1607+
DataTypeNode::UInt8,
1608+
DataTypeNode::String,
1609+
])),
1610+
}
15911611
);
15921612
assert_eq!(
15931613
DataTypeNode::new("Nested(foo UInt8, `bar` String)").unwrap(),
1594-
DataTypeNode::Nested(vec![
1595-
Column::new("foo".to_string(), DataTypeNode::UInt8),
1596-
Column::new("bar".to_string(), DataTypeNode::String),
1597-
])
1614+
DataTypeNode::Nested {
1615+
columns: vec![
1616+
Column::new("foo".to_string(), DataTypeNode::UInt8),
1617+
Column::new("bar".to_string(), DataTypeNode::String),
1618+
],
1619+
as_tuple: Box::new(DataTypeNode::Tuple(vec![
1620+
DataTypeNode::UInt8,
1621+
DataTypeNode::String,
1622+
])),
1623+
}
15981624
);
15991625
assert_eq!(
16001626
DataTypeNode::new("Nested(foo UInt8, `b a r` String)").unwrap(),
1601-
DataTypeNode::Nested(vec![
1602-
Column::new("foo".to_string(), DataTypeNode::UInt8),
1603-
Column::new("b a r".to_string(), DataTypeNode::String),
1604-
])
1627+
DataTypeNode::Nested {
1628+
columns: vec![
1629+
Column::new("foo".to_string(), DataTypeNode::UInt8),
1630+
Column::new("b a r".to_string(), DataTypeNode::String),
1631+
],
1632+
as_tuple: Box::new(DataTypeNode::Tuple(vec![
1633+
DataTypeNode::UInt8,
1634+
DataTypeNode::String,
1635+
])),
1636+
}
16051637
);
1638+
1639+
let foo = DataTypeNode::Enum(EnumType::Enum8, HashMap::from([(1, "f\\'(".to_string())]));
1640+
let baz = DataTypeNode::Tuple(vec![DataTypeNode::Enum(
1641+
EnumType::Enum8,
1642+
HashMap::from([(1, "f\\'()".to_string())]),
1643+
)]);
1644+
let bar = DataTypeNode::Nested {
1645+
columns: vec![Column::new("baz".to_string(), baz.clone())],
1646+
as_tuple: Box::new(DataTypeNode::Tuple(vec![baz])),
1647+
};
1648+
16061649
assert_eq!(
16071650
DataTypeNode::new(
1608-
"Nested(foo Enum8('f\\'(' = 1), `b a r` Nested(bar Tuple(Enum8('f\\'()' = 1))))"
1651+
"Nested(foo Enum8('f\\'(' = 1), `b a r` Nested(baz Tuple(Enum8('f\\'()' = 1))))"
16091652
)
16101653
.unwrap(),
1611-
DataTypeNode::Nested(vec![
1612-
Column::new(
1613-
"foo".to_string(),
1614-
DataTypeNode::Enum(EnumType::Enum8, HashMap::from([(1, "f\\'(".to_string())]),)
1615-
),
1616-
Column::new(
1617-
"b a r".to_string(),
1618-
DataTypeNode::Nested(vec![Column::new(
1619-
"bar".to_string(),
1620-
DataTypeNode::Tuple(vec![DataTypeNode::Enum(
1621-
EnumType::Enum8,
1622-
HashMap::from([(1, "f\\'()".to_string())]),
1623-
)]),
1624-
)])
1625-
),
1626-
])
1654+
DataTypeNode::Nested {
1655+
columns: vec![
1656+
Column::new("foo".to_string(), foo.clone()),
1657+
Column::new("b a r".to_string(), bar.clone()),
1658+
],
1659+
as_tuple: Box::new(DataTypeNode::Tuple(vec![foo, bar])),
1660+
}
16271661
);
16281662

16291663
assert!(DataTypeNode::new("Nested").is_err());
@@ -1744,10 +1778,16 @@ mod tests {
17441778
"Variant(UInt8, Bool)"
17451779
);
17461780
assert_eq!(
1747-
DataTypeNode::Nested(vec![
1748-
Column::new("foo".to_string(), DataTypeNode::UInt8),
1749-
Column::new("bar".to_string(), DataTypeNode::String),
1750-
])
1781+
DataTypeNode::Nested {
1782+
columns: vec![
1783+
Column::new("foo".to_string(), DataTypeNode::UInt8),
1784+
Column::new("bar".to_string(), DataTypeNode::String),
1785+
],
1786+
as_tuple: Box::new(DataTypeNode::Tuple(vec![
1787+
DataTypeNode::UInt8,
1788+
DataTypeNode::String
1789+
])),
1790+
}
17511791
.to_string(),
17521792
"Nested(foo UInt8, bar String)"
17531793
);

0 commit comments

Comments
 (0)