Skip to content

Commit 898de16

Browse files
committed
Properly read data from op entry
1 parent a19dd0b commit 898de16

File tree

8 files changed

+300
-49
lines changed

8 files changed

+300
-49
lines changed

crates/core/src/bson/de.rs

Lines changed: 51 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,13 @@ enum DeserializerPosition {
3333
}
3434

3535
impl<'de> Deserializer<'de> {
36+
/// When used as a name hint to [de::Deserialize.deserialize_enum], the BSON deserializer will
37+
/// report documents a byte array view instead of parsing them.
38+
///
39+
/// This is used as an internal optimization when we want to keep a reference to a BSON sub-
40+
/// document without actually inspecting the structure of that document.
41+
pub const SPECIAL_CASE_EMBEDDED_DOCUMENT: &'static str = "\0SpecialCaseEmbedDoc";
42+
3643
pub fn outside_of_document(parser: Parser<'de>) -> Self {
3744
Self {
3845
parser,
@@ -74,13 +81,13 @@ impl<'de> Deserializer<'de> {
7481
}
7582
}
7683

77-
fn object_reader(&mut self) -> Result<BsonObject<'de>, BsonError> {
84+
fn object_reader(&mut self) -> Result<Deserializer<'de>, BsonError> {
7885
let parser = self.parser.document_scope()?;
7986
let deserializer = Deserializer {
8087
parser,
8188
position: DeserializerPosition::BeforeTypeOrAtEndOfDocument,
8289
};
83-
Ok(BsonObject { de: deserializer })
90+
Ok(deserializer)
8491
}
8592

8693
fn advance_to_next_name(&mut self) -> Result<Option<()>, BsonError> {
@@ -98,6 +105,10 @@ impl<'de> Deserializer<'de> {
98105
impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> {
99106
type Error = BsonError;
100107

108+
fn is_human_readable(&self) -> bool {
109+
false
110+
}
111+
101112
fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
102113
where
103114
V: Visitor<'de>,
@@ -108,12 +119,12 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> {
108119
ElementType::Double => visitor.visit_f64(self.parser.read_double()?),
109120
ElementType::String => visitor.visit_borrowed_str(self.parser.read_string()?),
110121
ElementType::Document => {
111-
let object = self.object_reader()?;
112-
visitor.visit_map(object)
122+
let mut object = self.object_reader()?;
123+
visitor.visit_map(&mut object)
113124
}
114125
ElementType::Array => {
115-
let object = self.object_reader()?;
116-
visitor.visit_seq(object)
126+
let mut object = self.object_reader()?;
127+
visitor.visit_seq(&mut object)
117128
}
118129
ElementType::Binary => {
119130
let (_, bytes) = self.parser.read_binary()?;
@@ -150,13 +161,26 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> {
150161
V: Visitor<'de>,
151162
{
152163
let kind = self.prepare_to_read_value()?;
164+
165+
// With this special name, the visitor indicates that it doesn't actually want to read an
166+
// enum, it wants to read values regularly. Except that a document appearing at this
167+
// position should not be parsed, it should be forwarded as an embedded byte array.
168+
if name == Deserializer::SPECIAL_CASE_EMBEDDED_DOCUMENT {
169+
return if matches!(kind, ElementType::Document) {
170+
let object = self.object_reader()?;
171+
visitor.visit_borrowed_bytes(object.parser.remaining())
172+
} else {
173+
self.deserialize_any(visitor)
174+
};
175+
}
176+
153177
match kind {
154178
ElementType::String => {
155179
visitor.visit_enum(self.parser.read_string()?.into_deserializer())
156180
}
157181
ElementType::Document => {
158182
let mut object = self.object_reader()?;
159-
visitor.visit_enum(object)
183+
visitor.visit_enum(&mut object)
160184
}
161185
_ => Err(self.parser.error(ErrorKind::ExpectedEnum { actual: kind })),
162186
}
@@ -180,96 +204,91 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> {
180204
}
181205
}
182206

183-
struct BsonObject<'de> {
184-
de: Deserializer<'de>,
185-
}
186-
187-
impl<'de> MapAccess<'de> for BsonObject<'de> {
207+
impl<'de> MapAccess<'de> for Deserializer<'de> {
188208
type Error = BsonError;
189209

190210
fn next_key_seed<K>(&mut self, seed: K) -> Result<Option<K::Value>, Self::Error>
191211
where
192212
K: DeserializeSeed<'de>,
193213
{
194-
if let None = self.de.advance_to_next_name()? {
214+
if let None = self.advance_to_next_name()? {
195215
return Ok(None);
196216
}
197-
Ok(Some(seed.deserialize(&mut self.de)?))
217+
Ok(Some(seed.deserialize(self)?))
198218
}
199219

200220
fn next_value_seed<V>(&mut self, seed: V) -> Result<V::Value, Self::Error>
201221
where
202222
V: DeserializeSeed<'de>,
203223
{
204-
seed.deserialize(&mut self.de)
224+
seed.deserialize(self)
205225
}
206226
}
207227

208-
impl<'de> SeqAccess<'de> for BsonObject<'de> {
228+
impl<'de> SeqAccess<'de> for Deserializer<'de> {
209229
type Error = BsonError;
210230

211231
fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>, Self::Error>
212232
where
213233
T: DeserializeSeed<'de>,
214234
{
215235
// Array elements are encoded as an object like `{"0": value, "1": another}`
216-
if let None = self.de.advance_to_next_name()? {
236+
if let None = self.advance_to_next_name()? {
217237
return Ok(None);
218238
}
219239

220240
// Skip name
221-
debug_assert_matches!(self.de.position, DeserializerPosition::BeforeName { .. });
222-
self.de.read_entry_key()?;
241+
debug_assert_matches!(self.position, DeserializerPosition::BeforeName { .. });
242+
self.read_entry_key()?;
223243

224244
// And deserialize value!
225-
Ok(Some(seed.deserialize(&mut self.de)?))
245+
Ok(Some(seed.deserialize(self)?))
226246
}
227247
}
228248

229-
impl<'de> EnumAccess<'de> for BsonObject<'de> {
249+
impl<'a, 'de> EnumAccess<'de> for &'a mut Deserializer<'de> {
230250
type Error = BsonError;
231251
type Variant = Self;
232252

233-
fn variant_seed<V>(mut self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error>
253+
fn variant_seed<V>(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error>
234254
where
235255
V: DeserializeSeed<'de>,
236256
{
237-
if let None = self.de.advance_to_next_name()? {
257+
if let None = self.advance_to_next_name()? {
238258
return Err(self
239-
.de
240259
.parser
241260
.error(ErrorKind::UnexpectedEndOfDocumentForEnumVariant));
242261
}
243262

244-
let value = seed.deserialize(&mut self.de)?;
263+
let value = seed.deserialize(&mut *self)?;
245264
Ok((value, self))
246265
}
247266
}
248267

249-
impl<'de> VariantAccess<'de> for BsonObject<'de> {
268+
impl<'a, 'de> VariantAccess<'de> for &'a mut Deserializer<'de> {
250269
type Error = BsonError;
251270

252271
fn unit_variant(self) -> Result<(), Self::Error> {
253272
// Unit variants are encoded as simple string values, which are handled directly in
254273
// Deserializer::deserialize_enum.
255-
Err(self.de.parser.error(ErrorKind::ExpectedString))
274+
Err(self.parser.error(ErrorKind::ExpectedString))
256275
}
257276

258-
fn newtype_variant_seed<T>(mut self, seed: T) -> Result<T::Value, Self::Error>
277+
fn newtype_variant_seed<T>(self, seed: T) -> Result<T::Value, Self::Error>
259278
where
260279
T: DeserializeSeed<'de>,
261280
{
262281
// Newtype variants are represented as `{ NAME: VALUE }`, so we just have to deserialize the
263282
// value here.
264-
seed.deserialize(&mut self.de)
283+
seed.deserialize(self)
265284
}
266285

267-
fn tuple_variant<V>(mut self, len: usize, visitor: V) -> Result<V::Value, Self::Error>
286+
fn tuple_variant<V>(self, len: usize, visitor: V) -> Result<V::Value, Self::Error>
268287
where
269288
V: Visitor<'de>,
270289
{
271290
// Tuple variants are represented as `{ NAME: VALUES[] }`, so we deserialize the array here.
272-
de::Deserializer::deserialize_seq(&mut self.de, visitor)
291+
de::Deserializer::deserialize_seq(self, visitor)
273292
}
274293

275294
fn struct_variant<V>(
@@ -281,6 +300,6 @@ impl<'de> VariantAccess<'de> for BsonObject<'de> {
281300
V: Visitor<'de>,
282301
{
283302
// Struct variants are represented as `{ NAME: { ... } }`, so we deserialize the struct.
284-
de::Deserializer::deserialize_map(&mut self.de, visitor)
303+
de::Deserializer::deserialize_map(self, visitor)
285304
}
286305
}

crates/core/src/bson/mod.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
1-
use de::Deserializer;
1+
pub use de::Deserializer;
22
pub use error::BsonError;
33
use parser::Parser;
44
use serde::Deserialize;
5+
pub use writer::BsonWriter;
56

67
mod de;
78
mod error;
89
mod parser;
10+
mod writer;
911

1012
/// Deserializes BSON [bytes] into a structure [T].
1113
pub fn from_bytes<'de, T: Deserialize<'de>>(bytes: &'de [u8]) -> Result<T, BsonError> {

crates/core/src/bson/parser.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,10 @@ impl<'de> Parser<'de> {
162162
false
163163
})
164164
}
165+
166+
pub fn remaining(&self) -> &'de [u8] {
167+
self.remaining_input
168+
}
165169
}
166170

167171
#[repr(transparent)]

crates/core/src/bson/writer.rs

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
use alloc::vec::Vec;
2+
use bytes::BufMut;
3+
4+
use super::parser::ElementType;
5+
6+
pub struct BsonWriter {
7+
output: Vec<u8>,
8+
}
9+
10+
impl BsonWriter {
11+
pub fn new() -> Self {
12+
let mut data = Vec::<u8>::new();
13+
data.put_i32_le(0); // Total document size, filled out later.
14+
15+
Self { output: data }
16+
}
17+
18+
fn put_entry(&mut self, kind: ElementType, name: &str) {
19+
self.output.push(kind as i8 as u8);
20+
let bytes = name.as_bytes();
21+
self.output.put_slice(bytes);
22+
self.output.push(0);
23+
}
24+
25+
pub fn put_str(&mut self, name: &str, value: &str) {
26+
self.put_entry(ElementType::String, name);
27+
28+
let bytes = name.as_bytes();
29+
self.output.put_i32_le(bytes.len() as i32);
30+
self.output.put_slice(bytes);
31+
self.output.push(0);
32+
}
33+
34+
pub fn put_float(&mut self, name: &str, value: f64) {
35+
self.put_entry(ElementType::Double, name);
36+
self.output.put_f64_le(value);
37+
}
38+
39+
pub fn put_int(&mut self, name: &str, value: i64) {
40+
self.put_entry(ElementType::Int64, name);
41+
self.output.put_i64_le(value);
42+
}
43+
44+
pub fn finish(mut self) -> Vec<u8> {
45+
self.output.push(0);
46+
let length = self.output.len() as i32;
47+
48+
let length_field = &mut self.output[0..4];
49+
length_field.copy_from_slice(&length.to_le_bytes());
50+
self.output
51+
}
52+
}

0 commit comments

Comments
 (0)