Skip to content

Commit 9951588

Browse files
authored
feat: variant data type support (#170)
1 parent a9cb053 commit 9951588

File tree

10 files changed

+398
-16
lines changed

10 files changed

+398
-16
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77
<!-- next-header -->
88

99
## [Unreleased] - ReleaseDate
10+
### Added
11+
- [Variant data type](https://clickhouse.com/docs/en/sql-reference/data-types/variant) support ([#170]).
12+
13+
[#170]: https://github.com/ClickHouse/clickhouse-rs/pull/170
1014

1115
## [0.13.1] - 2024-10-21
1216
### Added

Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,10 @@ required-features = ["rustls-tls"]
5353
name = "data_types_derive_simple"
5454
required-features = ["time", "uuid"]
5555

56+
[[example]]
57+
name = "data_types_variant"
58+
required-features = ["time"]
59+
5660
[profile.release]
5761
debug = true
5862

README.md

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -440,13 +440,36 @@ How to choose between all these features? Here are some considerations:
440440
}
441441
```
442442
</details>
443+
* `Variant` data type is supported as a Rust enum. As the inner Variant types are _always_ sorted alphabetically, Rust enum variants should be defined in the _exactly_ same order as it is in the data type; their names are irrelevant, only the order of the types matters. This following example has a column defined as `Variant(Array(UInt16), Bool, Date, String, UInt32)`:
444+
<details>
445+
<summary>Example</summary>
446+
447+
```rust,ignore
448+
#[derive(Serialize, Deserialize)]
449+
enum MyRowVariant {
450+
Array(Vec<i16>),
451+
Boolean(bool),
452+
#[serde(with = "clickhouse::serde::time::date")]
453+
Date(time::Date),
454+
String(String),
455+
UInt32(u32),
456+
}
457+
458+
#[derive(Row, Serialize, Deserialize)]
459+
struct MyRow {
460+
id: u64,
461+
var: MyRowVariant,
462+
}
463+
```
464+
</details>
443465
* [New `JSON` data type](https://clickhouse.com/docs/en/sql-reference/data-types/newjson) is currently supported as a string when using ClickHouse 24.10+. See [this example](examples/data_types_new_json.rs) for more details.
444-
* `Variant`, `Dynamic` types are not supported for now.
466+
* `Dynamic` data type is not supported for now.
445467
446468
See also the additional examples:
447469
448470
* [Simpler ClickHouse data types](examples/data_types_derive_simple.rs)
449471
* [Container-like ClickHouse data types](examples/data_types_derive_containers.rs)
472+
* [Variant data type](examples/data_types_variant.rs)
450473
451474
## Mocking
452475
The crate provides utils for mocking CH server and testing DDL, `SELECT`, `INSERT` and `WATCH` queries.

examples/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ If something is missing, or you found a mistake in one of these examples, please
1919

2020
- [data_types_derive_simple.rs](data_types_derive_simple.rs) - deriving simpler ClickHouse data types in a struct. Required cargo features: `time`, `uuid`.
2121
- [data_types_derive_containers.rs](data_types_derive_containers.rs) - deriving container-like (Array, Tuple, Map, Nested, Geo) ClickHouse data types in a struct.
22+
- [data_types_variant.rs](data_types_variant.rs) - working with the [Variant data type](https://clickhouse.com/docs/en/sql-reference/data-types/variant).
2223
- [data_types_new_json.rs](data_types_new_json.rs) - working with the [new JSON data type](https://clickhouse.com/docs/en/sql-reference/data-types/newjson) as a String.
2324

2425
### Special cases

examples/data_types_variant.rs

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
use clickhouse_derive::Row;
2+
use serde::{Deserialize, Serialize};
3+
4+
use clickhouse::sql::Identifier;
5+
use clickhouse::{error::Result, Client};
6+
7+
// See also: https://clickhouse.com/docs/en/sql-reference/data-types/variant
8+
9+
#[tokio::main]
10+
async fn main() -> Result<()> {
11+
let table_name = "chrs_data_types_variant";
12+
let client = Client::default().with_url("http://localhost:8123");
13+
14+
// No matter the order of the definition on the Variant types in the DDL, this particular Variant will always be sorted as follows:
15+
// Variant(Array(UInt16), Bool, FixedString(6), Float32, Float64, Int128, Int16, Int32, Int64, Int8, String, UInt128, UInt16, UInt32, UInt64, UInt8)
16+
client
17+
.query(
18+
"
19+
CREATE OR REPLACE TABLE ?
20+
(
21+
`id` UInt64,
22+
`var` Variant(
23+
Array(UInt16),
24+
Bool,
25+
Date,
26+
FixedString(6),
27+
Float32, Float64,
28+
Int128, Int16, Int32, Int64, Int8,
29+
String,
30+
UInt128, UInt16, UInt32, UInt64, UInt8
31+
)
32+
)
33+
ENGINE = MergeTree
34+
ORDER BY id",
35+
)
36+
.bind(Identifier(table_name))
37+
.with_option("allow_experimental_variant_type", "1")
38+
// This is required only if we are mixing similar types in the Variant definition
39+
// In this case, this is various Int/UInt types, Float32/Float64, and String/FixedString
40+
// Omit this option if there are no similar types in the definition
41+
.with_option("allow_suspicious_variant_types", "1")
42+
.execute()
43+
.await?;
44+
45+
let mut insert = client.insert(table_name)?;
46+
let rows_to_insert = get_rows();
47+
for row in rows_to_insert {
48+
insert.write(&row).await?;
49+
}
50+
insert.end().await?;
51+
52+
let rows = client
53+
.query("SELECT ?fields FROM ?")
54+
.bind(Identifier(table_name))
55+
.fetch_all::<MyRow>()
56+
.await?;
57+
58+
println!("{rows:#?}");
59+
Ok(())
60+
}
61+
62+
fn get_rows() -> Vec<MyRow> {
63+
vec![
64+
MyRow {
65+
id: 1,
66+
var: MyRowVariant::Array(vec![1, 2]),
67+
},
68+
MyRow {
69+
id: 2,
70+
var: MyRowVariant::Boolean(true),
71+
},
72+
MyRow {
73+
id: 3,
74+
var: MyRowVariant::Date(
75+
time::Date::from_calendar_date(2021, time::Month::January, 1).unwrap(),
76+
),
77+
},
78+
MyRow {
79+
id: 4,
80+
var: MyRowVariant::FixedString(*b"foobar"),
81+
},
82+
MyRow {
83+
id: 5,
84+
var: MyRowVariant::Float32(100.5),
85+
},
86+
MyRow {
87+
id: 6,
88+
var: MyRowVariant::Float64(200.1),
89+
},
90+
MyRow {
91+
id: 7,
92+
var: MyRowVariant::Int8(2),
93+
},
94+
MyRow {
95+
id: 8,
96+
var: MyRowVariant::Int16(3),
97+
},
98+
MyRow {
99+
id: 9,
100+
var: MyRowVariant::Int32(4),
101+
},
102+
MyRow {
103+
id: 10,
104+
var: MyRowVariant::Int64(5),
105+
},
106+
MyRow {
107+
id: 11,
108+
var: MyRowVariant::Int128(6),
109+
},
110+
MyRow {
111+
id: 12,
112+
var: MyRowVariant::String("my_string".to_string()),
113+
},
114+
MyRow {
115+
id: 13,
116+
var: MyRowVariant::UInt8(7),
117+
},
118+
MyRow {
119+
id: 14,
120+
var: MyRowVariant::UInt16(8),
121+
},
122+
MyRow {
123+
id: 15,
124+
var: MyRowVariant::UInt32(9),
125+
},
126+
MyRow {
127+
id: 16,
128+
var: MyRowVariant::UInt64(10),
129+
},
130+
MyRow {
131+
id: 17,
132+
var: MyRowVariant::UInt128(11),
133+
},
134+
]
135+
}
136+
137+
// As the inner Variant types are _always_ sorted alphabetically,
138+
// Rust enum variants should be defined in the _exactly_ same order as it is in the data type;
139+
// their names are irrelevant, only the order of the types matters.
140+
// This enum represents Variant(Array(UInt16), Bool, Date, FixedString(6), Float32, Float64, Int128, Int16, Int32, Int64, Int8, String, UInt128, UInt16, UInt32, UInt64, UInt8)
141+
#[derive(Debug, PartialEq, Serialize, Deserialize)]
142+
enum MyRowVariant {
143+
Array(Vec<i16>),
144+
Boolean(bool),
145+
// attributes should work in this case, too
146+
#[serde(with = "clickhouse::serde::time::date")]
147+
Date(time::Date),
148+
// NB: by default, fetched as raw bytes
149+
FixedString([u8; 6]),
150+
Float32(f32),
151+
Float64(f64),
152+
Int128(i128),
153+
Int16(i16),
154+
Int32(i32),
155+
Int64(i64),
156+
Int8(i8),
157+
String(String),
158+
UInt128(u128),
159+
UInt16(i16),
160+
UInt32(u32),
161+
UInt64(u64),
162+
UInt8(i8),
163+
}
164+
165+
#[derive(Debug, PartialEq, Row, Serialize, Deserialize)]
166+
struct MyRow {
167+
id: u64,
168+
var: MyRowVariant,
169+
}

src/error.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,16 @@ pub enum Error {
3232
InvalidUtf8Encoding(#[from] Utf8Error),
3333
#[error("tag for enum is not valid")]
3434
InvalidTagEncoding(usize),
35+
#[error("max number of types in the Variant data type is 255, got {0}")]
36+
VariantDiscriminatorIsOutOfBound(usize),
3537
#[error("a custom error message from serde: {0}")]
3638
Custom(String),
3739
#[error("bad response: {0}")]
3840
BadResponse(String),
3941
#[error("timeout expired")]
4042
TimedOut,
43+
#[error("unsupported: {0}")]
44+
Unsupported(String),
4145
}
4246

4347
assert_impl_all!(Error: StdError, Send, Sync);

src/rowbinary/de.rs

Lines changed: 64 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
use std::{convert::TryFrom, mem, str};
22

3+
use crate::error::{Error, Result};
34
use bytes::Buf;
5+
use serde::de::{EnumAccess, VariantAccess};
46
use serde::{
57
de::{DeserializeSeed, Deserializer, SeqAccess, Visitor},
68
Deserialize,
79
};
810

9-
use crate::error::{Error, Result};
10-
1111
/// Deserializes a value from `input` with a row encoded in `RowBinary`.
1212
///
1313
/// It accepts _a reference to_ a byte slice because it somehow leads to a more
@@ -146,14 +146,73 @@ impl<'cursor, 'data> Deserializer<'data> for &mut RowBinaryDeserializer<'cursor,
146146
visitor.visit_byte_buf(self.read_vec(size)?)
147147
}
148148

149+
#[inline]
150+
fn deserialize_identifier<V: Visitor<'data>>(self, visitor: V) -> Result<V::Value> {
151+
self.deserialize_u8(visitor)
152+
}
153+
149154
#[inline]
150155
fn deserialize_enum<V: Visitor<'data>>(
151156
self,
152-
name: &'static str,
157+
_name: &'static str,
153158
_variants: &'static [&'static str],
154-
_visitor: V,
159+
visitor: V,
155160
) -> Result<V::Value> {
156-
panic!("enums are unsupported: `{name}`");
161+
struct Access<'de, 'cursor, 'data> {
162+
deserializer: &'de mut RowBinaryDeserializer<'cursor, 'data>,
163+
}
164+
struct VariantDeserializer<'de, 'cursor, 'data> {
165+
deserializer: &'de mut RowBinaryDeserializer<'cursor, 'data>,
166+
}
167+
impl<'data> VariantAccess<'data> for VariantDeserializer<'_, '_, 'data> {
168+
type Error = Error;
169+
170+
fn unit_variant(self) -> Result<()> {
171+
Err(Error::Unsupported("unit variants".to_string()))
172+
}
173+
174+
fn newtype_variant_seed<T>(self, seed: T) -> Result<T::Value>
175+
where
176+
T: DeserializeSeed<'data>,
177+
{
178+
DeserializeSeed::deserialize(seed, &mut *self.deserializer)
179+
}
180+
181+
fn tuple_variant<V>(self, len: usize, visitor: V) -> Result<V::Value>
182+
where
183+
V: Visitor<'data>,
184+
{
185+
self.deserializer.deserialize_tuple(len, visitor)
186+
}
187+
188+
fn struct_variant<V>(
189+
self,
190+
fields: &'static [&'static str],
191+
visitor: V,
192+
) -> Result<V::Value>
193+
where
194+
V: Visitor<'data>,
195+
{
196+
self.deserializer.deserialize_tuple(fields.len(), visitor)
197+
}
198+
}
199+
200+
impl<'de, 'cursor, 'data> EnumAccess<'data> for Access<'de, 'cursor, 'data> {
201+
type Error = Error;
202+
type Variant = VariantDeserializer<'de, 'cursor, 'data>;
203+
204+
fn variant_seed<T>(self, seed: T) -> Result<(T::Value, Self::Variant), Self::Error>
205+
where
206+
T: DeserializeSeed<'data>,
207+
{
208+
let value = seed.deserialize(&mut *self.deserializer)?;
209+
let deserializer = VariantDeserializer {
210+
deserializer: self.deserializer,
211+
};
212+
Ok((value, deserializer))
213+
}
214+
}
215+
visitor.visit_enum(Access { deserializer: self })
157216
}
158217

159218
#[inline]
@@ -222,11 +281,6 @@ impl<'cursor, 'data> Deserializer<'data> for &mut RowBinaryDeserializer<'cursor,
222281
self.deserialize_tuple(fields.len(), visitor)
223282
}
224283

225-
#[inline]
226-
fn deserialize_identifier<V: Visitor<'data>>(self, _visitor: V) -> Result<V::Value> {
227-
panic!("identifiers are unsupported");
228-
}
229-
230284
#[inline]
231285
fn deserialize_newtype_struct<V: Visitor<'data>>(
232286
self,

src/rowbinary/ser.rs

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -134,12 +134,26 @@ impl<'a, B: BufMut> Serializer for &'a mut RowBinarySerializer<B> {
134134
#[inline]
135135
fn serialize_newtype_variant<T: Serialize + ?Sized>(
136136
self,
137-
name: &'static str,
138-
_variant_index: u32,
139-
variant: &'static str,
140-
_value: &T,
137+
_name: &'static str,
138+
variant_index: u32,
139+
_variant: &'static str,
140+
value: &T,
141141
) -> Result<()> {
142-
panic!("newtype variant types are unsupported: `{name}::{variant}`");
142+
// TODO:
143+
// - Now this code implicitly allows using enums at the top level.
144+
// However, instead of a more descriptive panic, it ends with a "not enough data." error.
145+
// - Also, it produces an unclear message for a forgotten `serde_repr` (Enum8 and Enum16).
146+
// See https://github.com/ClickHouse/clickhouse-rs/pull/170#discussion_r1848549636
147+
148+
// Max number of types in the Variant data type is 255
149+
// See also: https://github.com/ClickHouse/ClickHouse/issues/54864
150+
if variant_index > 255 {
151+
return Err(Error::VariantDiscriminatorIsOutOfBound(
152+
variant_index as usize,
153+
));
154+
}
155+
self.buffer.put_u8(variant_index as u8);
156+
value.serialize(self)
143157
}
144158

145159
#[inline]

tests/it/main.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ mod query;
6666
mod time;
6767
mod user_agent;
6868
mod uuid;
69+
mod variant;
6970
mod watch;
7071

7172
const HOST: &str = "localhost:8123";

0 commit comments

Comments
 (0)