Skip to content

Commit 23fbfed

Browse files
committed
More bson deserialization work
1 parent 9c22595 commit 23fbfed

File tree

9 files changed

+363
-68
lines changed

9 files changed

+363
-68
lines changed

crates/core/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ features = []
3434

3535

3636
[features]
37-
default = ["getrandom"]
37+
default = ["getrandom", "loadable_extension"]
3838

3939
loadable_extension = ["sqlite_nostd/loadable_extension"]
4040
static = ["sqlite_nostd/static"]

crates/core/src/bson/de.rs

Lines changed: 226 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,97 @@
1+
use core::assert_matches::debug_assert_matches;
2+
13
use serde::{
2-
de::{self, DeserializeSeed, MapAccess, Visitor},
4+
de::{
5+
self, DeserializeSeed, EnumAccess, IntoDeserializer, MapAccess, SeqAccess, VariantAccess,
6+
Visitor,
7+
},
38
forward_to_deserialize_any,
49
};
510

611
use super::{
12+
error::ErrorKind,
713
parser::{ElementType, Parser},
814
BsonError,
915
};
1016

1117
pub struct Deserializer<'de> {
1218
parser: Parser<'de>,
13-
is_outside_of_document: bool,
14-
pending_value_type: Option<ElementType>,
15-
consumed_name: bool,
19+
position: DeserializerPosition,
20+
}
21+
22+
#[derive(Clone, Debug)]
23+
enum DeserializerPosition {
24+
/// The deserializer is outside of the initial document header.
25+
OutsideOfDocument,
26+
/// The deserializer expects the beginning of a key-value pair, or the end of the current
27+
/// document.
28+
BeforeTypeOrAtEndOfDocument,
29+
/// The deserializer has read past the type of a key-value pair, but did not scan the name yet.
30+
BeforeName { pending_type: ElementType },
31+
/// Read type and name of a key-value pair, position is before the value now.
32+
BeforeValue { pending_type: ElementType },
1633
}
1734

1835
impl<'de> Deserializer<'de> {
1936
pub fn outside_of_document(parser: Parser<'de>) -> Self {
2037
Self {
2138
parser,
22-
is_outside_of_document: true,
23-
pending_value_type: None,
24-
consumed_name: false,
39+
position: DeserializerPosition::OutsideOfDocument,
40+
}
41+
}
42+
43+
/// Prepares to read a value without actually reading it, returning the [ElementType] of the
44+
/// upcoming key-value pair.
45+
fn prepare_to_read_value(&mut self) -> Result<ElementType, BsonError> {
46+
match self.position.clone() {
47+
DeserializerPosition::OutsideOfDocument => {
48+
// The next value we're reading is a document
49+
self.position = DeserializerPosition::BeforeValue {
50+
pending_type: ElementType::Document,
51+
};
52+
Ok(ElementType::Document)
53+
}
54+
DeserializerPosition::BeforeValue { pending_type } => Ok(pending_type),
55+
DeserializerPosition::BeforeTypeOrAtEndOfDocument { .. } => {
56+
Err(self.parser.error(ErrorKind::InvalidStateExpectedType))
57+
}
58+
DeserializerPosition::BeforeName { .. } => {
59+
Err(self.parser.error(ErrorKind::InvalidStateExpectedName))
60+
}
61+
}
62+
}
63+
64+
/// If the deserializer is in a suitable position, read the upcoming key.
65+
fn read_entry_key(&mut self) -> Result<Option<&'de str>, BsonError> {
66+
match self.position.clone() {
67+
DeserializerPosition::BeforeName { pending_type } => {
68+
self.position = DeserializerPosition::BeforeValue {
69+
pending_type: pending_type,
70+
};
71+
Ok(Some(self.parser.read_cstr()?))
72+
}
73+
_ => Ok(None),
74+
}
75+
}
76+
77+
fn object_reader(&mut self) -> Result<BsonObject<'de>, BsonError> {
78+
let parser = self.parser.document_scope()?;
79+
let deserializer = Deserializer {
80+
parser,
81+
position: DeserializerPosition::BeforeTypeOrAtEndOfDocument,
82+
};
83+
Ok(BsonObject { de: deserializer })
84+
}
85+
86+
fn advance_to_next_name(&mut self) -> Result<Option<()>, BsonError> {
87+
if self.parser.end_document()? {
88+
return Ok(None);
2589
}
90+
91+
self.position = DeserializerPosition::BeforeName {
92+
pending_type: self.parser.read_element_type()?,
93+
};
94+
Ok(Some(()))
2695
}
2796
}
2897

@@ -33,91 +102,185 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> {
33102
where
34103
V: Visitor<'de>,
35104
{
36-
// BSON always start with a document, so we need this for the outermost visit_map.
37-
if self.is_outside_of_document {
38-
self.parser = self.parser.document_scope()?;
39-
self.is_outside_of_document = false;
105+
let element_type = self.prepare_to_read_value()?;
40106

41-
let object = BsonObject { de: self };
42-
return visitor.visit_map(object);
107+
match element_type {
108+
ElementType::Double => visitor.visit_f64(self.parser.read_double()?),
109+
ElementType::String => visitor.visit_borrowed_str(self.parser.read_string()?),
110+
ElementType::Document => {
111+
let object = self.object_reader()?;
112+
visitor.visit_map(object)
113+
}
114+
ElementType::Array => {
115+
let object = self.object_reader()?;
116+
visitor.visit_seq(object)
117+
}
118+
ElementType::Binary => {
119+
let (_, bytes) = self.parser.read_binary()?;
120+
visitor.visit_borrowed_bytes(bytes)
121+
}
122+
ElementType::ObjectId => todo!(),
123+
ElementType::Boolean => visitor.visit_bool(self.parser.read_bool()?),
124+
ElementType::DatetimeUtc => todo!(),
125+
ElementType::Null | ElementType::Undefined => visitor.visit_none(),
126+
ElementType::Int32 => visitor.visit_i32(self.parser.read_int32()?),
127+
ElementType::Int64 => visitor.visit_i64(self.parser.read_int64()?),
128+
ElementType::Timestamp => todo!(),
43129
}
130+
}
44131

45-
if !self.consumed_name {
46-
self.consumed_name = true;
47-
// We've read an element type, but not the associated name. Do that now.
48-
return visitor.visit_borrowed_str(self.parser.read_cstr()?);
49-
}
132+
fn deserialize_identifier<V>(self, visitor: V) -> Result<V::Value, Self::Error>
133+
where
134+
V: Visitor<'de>,
135+
{
136+
let Some(key) = self.read_entry_key()? else {
137+
return Err(self.parser.error(ErrorKind::InvalidStateExpectedName));
138+
};
50139

51-
if let Some(element_type) = self.pending_value_type.take() {
52-
return match element_type {
53-
ElementType::Double => visitor.visit_f64(self.parser.read_double()?),
54-
ElementType::String => visitor.visit_borrowed_str(self.parser.read_string()?),
55-
ElementType::Document => {
56-
let parser = self.parser.document_scope()?;
57-
let mut deserializer = Deserializer {
58-
parser,
59-
is_outside_of_document: false,
60-
pending_value_type: None,
61-
consumed_name: false,
62-
};
63-
let object = BsonObject {
64-
de: &mut deserializer,
65-
};
66-
67-
visitor.visit_map(object)
68-
}
69-
ElementType::Array => todo!(),
70-
ElementType::Binary => {
71-
let (_, bytes) = self.parser.read_binary()?;
72-
visitor.visit_borrowed_bytes(bytes)
73-
}
74-
ElementType::ObjectId => todo!(),
75-
ElementType::Boolean => visitor.visit_bool(self.parser.read_bool()?),
76-
ElementType::DatetimeUtc => todo!(),
77-
ElementType::Null | ElementType::Undefined => visitor.visit_none(),
78-
ElementType::Int32 => visitor.visit_i32(self.parser.read_int32()?),
79-
ElementType::Int64 => visitor.visit_i64(self.parser.read_int64()?),
80-
ElementType::Timestamp => todo!(),
81-
};
140+
visitor.visit_borrowed_str(key)
141+
}
142+
143+
fn deserialize_enum<V>(
144+
self,
145+
name: &'static str,
146+
variants: &'static [&'static str],
147+
visitor: V,
148+
) -> Result<V::Value, Self::Error>
149+
where
150+
V: Visitor<'de>,
151+
{
152+
let kind = self.prepare_to_read_value()?;
153+
match kind {
154+
ElementType::String => {
155+
visitor.visit_enum(self.parser.read_string()?.into_deserializer())
156+
}
157+
ElementType::Document => {
158+
let mut object = self.object_reader()?;
159+
visitor.visit_enum(object)
160+
}
161+
_ => Err(self.parser.error(ErrorKind::ExpectedEnum { actual: kind })),
82162
}
163+
}
83164

84-
todo!()
165+
fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, Self::Error>
166+
where
167+
V: Visitor<'de>,
168+
{
169+
let kind = self.prepare_to_read_value()?;
170+
match kind {
171+
ElementType::Null => visitor.visit_none(),
172+
_ => visitor.visit_some(self),
173+
}
85174
}
86175

87176
forward_to_deserialize_any! {
88177
bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string
89-
bytes byte_buf option unit unit_struct newtype_struct seq tuple
90-
tuple_struct map struct enum identifier ignored_any
178+
bytes byte_buf unit unit_struct newtype_struct seq tuple
179+
tuple_struct map struct ignored_any
91180
}
92181
}
93-
struct BsonObject<'a, 'de: 'a> {
94-
de: &'a mut Deserializer<'de>,
182+
183+
struct BsonObject<'de> {
184+
de: Deserializer<'de>,
95185
}
96186

97-
impl<'de, 'a> MapAccess<'de> for BsonObject<'a, 'de> {
187+
impl<'de> MapAccess<'de> for BsonObject<'de> {
98188
type Error = BsonError;
99189

100190
fn next_key_seed<K>(&mut self, seed: K) -> Result<Option<K::Value>, Self::Error>
101191
where
102192
K: DeserializeSeed<'de>,
103193
{
104-
if self.de.parser.end_document()? {
194+
if let None = self.de.advance_to_next_name()? {
195+
return Ok(None);
196+
}
197+
Ok(Some(seed.deserialize(&mut self.de)?))
198+
}
199+
200+
fn next_value_seed<V>(&mut self, seed: V) -> Result<V::Value, Self::Error>
201+
where
202+
V: DeserializeSeed<'de>,
203+
{
204+
seed.deserialize(&mut self.de)
205+
}
206+
}
207+
208+
impl<'de> SeqAccess<'de> for BsonObject<'de> {
209+
type Error = BsonError;
210+
211+
fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>, Self::Error>
212+
where
213+
T: DeserializeSeed<'de>,
214+
{
215+
// Array elements are encoded as an object like `{"0": value, "1": another}`
216+
if let None = self.de.advance_to_next_name()? {
105217
return Ok(None);
106218
}
107219

108-
self.de.pending_value_type = Some(self.de.parser.read_element_type()?);
109-
self.de.consumed_name = false;
220+
// Skip name
221+
debug_assert_matches!(self.de.position, DeserializerPosition::BeforeName { .. });
222+
self.de.read_entry_key()?;
110223

111-
Ok(Some(seed.deserialize(&mut *self.de)?))
224+
// And deserialize value!
225+
Ok(Some(seed.deserialize(&mut self.de)?))
112226
}
227+
}
113228

114-
fn next_value_seed<V>(&mut self, seed: V) -> Result<V::Value, Self::Error>
229+
impl<'de> EnumAccess<'de> for BsonObject<'de> {
230+
type Error = BsonError;
231+
type Variant = Self;
232+
233+
fn variant_seed<V>(mut self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error>
115234
where
116235
V: DeserializeSeed<'de>,
117236
{
118-
debug_assert!(self.de.consumed_name);
119-
debug_assert!(self.de.pending_value_type.is_some());
237+
if let None = self.de.advance_to_next_name()? {
238+
return Err(self
239+
.de
240+
.parser
241+
.error(ErrorKind::UnexpectedEndOfDocumentForEnumVariant));
242+
}
243+
244+
let value = seed.deserialize(&mut self.de)?;
245+
Ok((value, self))
246+
}
247+
}
248+
249+
impl<'de> VariantAccess<'de> for BsonObject<'de> {
250+
type Error = BsonError;
251+
252+
fn unit_variant(self) -> Result<(), Self::Error> {
253+
// Unit variants are encoded as simple string values, which are handled directly in
254+
// Deserializer::deserialize_enum.
255+
Err(self.de.parser.error(ErrorKind::ExpectedString))
256+
}
257+
258+
fn newtype_variant_seed<T>(mut self, seed: T) -> Result<T::Value, Self::Error>
259+
where
260+
T: DeserializeSeed<'de>,
261+
{
262+
// Newtype variants are represented as `{ NAME: VALUE }`, so we just have to deserialize the
263+
// value here.
264+
seed.deserialize(&mut self.de)
265+
}
120266

121-
seed.deserialize(&mut *self.de)
267+
fn tuple_variant<V>(mut self, len: usize, visitor: V) -> Result<V::Value, Self::Error>
268+
where
269+
V: Visitor<'de>,
270+
{
271+
// Tuple variants are represented as `{ NAME: VALUES[] }`, so we deserialize the array here.
272+
de::Deserializer::deserialize_seq(&mut self.de, visitor)
273+
}
274+
275+
fn struct_variant<V>(
276+
mut self,
277+
fields: &'static [&'static str],
278+
visitor: V,
279+
) -> Result<V::Value, Self::Error>
280+
where
281+
V: Visitor<'de>,
282+
{
283+
// Struct variants are represented as `{ NAME: { ... } }`, so we deserialize the struct.
284+
de::Deserializer::deserialize_map(&mut self.de, visitor)
122285
}
123286
}

crates/core/src/bson/error.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ use serde::de::{self, StdError};
88

99
use crate::error::SQLiteError;
1010

11+
use super::parser::ElementType;
12+
1113
#[derive(Debug)]
1214
pub struct BsonError {
1315
/// Using a [Box] here keeps the size of this type as small, which makes results of this error
@@ -31,6 +33,12 @@ pub enum ErrorKind {
3133
UnexpectedEoF,
3234
InvalidEndOfDocument,
3335
InvalidSize,
36+
InvalidStateExpectedType,
37+
InvalidStateExpectedName,
38+
InvalidStateExpectedValue,
39+
ExpectedEnum { actual: ElementType },
40+
ExpectedString,
41+
UnexpectedEndOfDocumentForEnumVariant,
3442
}
3543

3644
impl BsonError {

0 commit comments

Comments
 (0)