Skip to content

Commit 924acb7

Browse files
committed
Add reserved name #content to flatten element hierarchies.
1 parent 1d117c1 commit 924acb7

File tree

2 files changed

+147
-34
lines changed

2 files changed

+147
-34
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ license = "MIT OR Apache-2.0"
88
repository = "https://github.com/adamreichold/serde-roxmltree"
99
documentation = "https://docs.rs/serde-roxmltree"
1010
readme = "README.md"
11-
version = "0.8.4"
11+
version = "0.9.0"
1212
edition = "2021"
1313

1414
[dependencies]

src/lib.rs

Lines changed: 146 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,9 @@
9797
//! # Ok::<(), Box<dyn std::error::Error>>(())
9898
//! ```
9999
//!
100-
//! The reserved name `$text` is used to directly refer to the text within an element:
100+
//! The reserved name `#content` is used to flatten one level of the hierarchy and
101+
//! revisit those nodes and attributes as if embedded inside another struct. This can
102+
//! useful to handle inner text:
101103
//!
102104
//! ```
103105
//! use serde::Deserialize;
@@ -110,7 +112,7 @@
110112
//!
111113
//! #[derive(Deserialize)]
112114
//! struct Child {
113-
//! #[serde(rename = "$text")]
115+
//! #[serde(rename = "#content")]
114116
//! text: String,
115117
//! attribute: i32,
116118
//! }
@@ -122,6 +124,37 @@
122124
//! # Ok::<(), Box<dyn std::error::Error>>(())
123125
//! ```
124126
//!
127+
//! or partial alternatives:
128+
//!
129+
//! ```
130+
//! use serde::Deserialize;
131+
//! use serde_roxmltree::from_str;
132+
//!
133+
//! #[derive(Debug, PartialEq, Deserialize)]
134+
//! #[serde(rename_all = "lowercase")]
135+
//! enum Alternative {
136+
//! Float(f32),
137+
//! Integer(i32),
138+
//! }
139+
//!
140+
//! #[derive(Debug, PartialEq, Deserialize)]
141+
//! struct Record {
142+
//! #[serde(rename = "#content")]
143+
//! alternative: Alternative,
144+
//! string: String,
145+
//! }
146+
//!
147+
//! let record = from_str::<Record>("<record><float>42.0</float><string>foo</string></record>")?;
148+
//! assert_eq!(record.alternative, Alternative::Float(42.0));
149+
//! assert_eq!(record.string, "foo");
150+
//!
151+
//! let record = from_str::<Record>("<record><integer>23</integer><string>bar</string></record>")?;
152+
//! assert_eq!(record.alternative, Alternative::Integer(23));
153+
//! assert_eq!(record.string, "bar");
154+
//! #
155+
//! # Ok::<(), Box<dyn std::error::Error>>(())
156+
//! ```
157+
//!
125158
//! Optionally, attribute names can be prefixed by `@` to distinguish them from tag names:
126159
//!
127160
//! ```
@@ -285,7 +318,7 @@ pub trait Options: Sized {
285318

286319
/// Only visit child elements
287320
///
288-
/// Does not visit attributes and `$text`
321+
/// Does not visit attributes and `#content`
289322
/// to improve efficiency when these are irrelevant.
290323
fn only_children(self) -> OnlyChildren<Self> {
291324
OnlyChildren(PhantomData)
@@ -361,28 +394,21 @@ struct Deserializer<'de, 'input, 'temp, O> {
361394
enum Source<'de, 'input> {
362395
Node(Node<'de, 'input>),
363396
Attribute(Attribute<'de, 'input>),
364-
Text(&'de str),
397+
Content(Node<'de, 'input>),
365398
}
366399

367-
#[derive(Default)]
368-
struct Temp {
369-
visited: BitSet<usize>,
370-
buffer: String,
371-
}
372-
373-
impl<'de, 'input, O> Deserializer<'de, 'input, '_, O>
374-
where
375-
O: Options,
376-
{
377-
fn name(&mut self) -> &str {
378-
match &self.source {
379-
Source::Node(node) => {
400+
impl Source<'_, '_> {
401+
fn name<'a, O>(&'a self, buffer: &'a mut String) -> &'a str
402+
where
403+
O: Options,
404+
{
405+
match self {
406+
Self::Node(node) => {
380407
let tag_name = node.tag_name();
381408
let name = tag_name.name();
382409

383410
match tag_name.namespace() {
384411
Some(namespace) if O::NAMESPACES => {
385-
let buffer = &mut self.temp.buffer;
386412
buffer.clear();
387413

388414
buffer.reserve(namespace.len() + 2 + name.len());
@@ -398,12 +424,11 @@ where
398424
_ => name,
399425
}
400426
}
401-
Source::Attribute(attr) => {
427+
Self::Attribute(attr) => {
402428
let name = attr.name();
403429

404430
match attr.namespace() {
405431
Some(namespace) if O::NAMESPACES => {
406-
let buffer = &mut self.temp.buffer;
407432
buffer.clear();
408433

409434
if O::PREFIX_ATTR {
@@ -424,7 +449,6 @@ where
424449
}
425450
_ => {
426451
if O::PREFIX_ATTR {
427-
let buffer = &mut self.temp.buffer;
428452
buffer.clear();
429453

430454
buffer.reserve(1 + name.len());
@@ -439,14 +463,29 @@ where
439463
}
440464
}
441465
}
442-
Source::Text(_) => "$text",
466+
Self::Content(_) => "#content",
443467
}
444468
}
469+
}
470+
471+
#[derive(Default)]
472+
struct Temp {
473+
visited: BitSet<usize>,
474+
buffer: String,
475+
}
476+
477+
impl<'de, 'input, O> Deserializer<'de, 'input, '_, O>
478+
where
479+
O: Options,
480+
{
481+
fn name(&mut self) -> &str {
482+
self.source.name::<O>(&mut self.temp.buffer)
483+
}
445484

446485
fn node(&self) -> Result<&Node<'de, 'input>, Error> {
447486
match &self.source {
448-
Source::Node(node) => Ok(node),
449-
Source::Attribute(_) | Source::Text(_) => Err(Error::MissingNode),
487+
Source::Node(node) | Source::Content(node) => Ok(node),
488+
Source::Attribute(_) => Err(Error::MissingNode),
450489
}
451490
}
452491

@@ -471,9 +510,9 @@ where
471510

472511
let attributes = node.attributes().map(Source::Attribute);
473512

474-
let text = once(Source::Text(node.text().unwrap_or_default()));
513+
let content = once(Source::Content(*node));
475514

476-
Ok(children.chain(attributes).chain(text))
515+
Ok(children.chain(attributes).chain(content))
477516
}
478517

479518
fn siblings(&self) -> Result<impl Iterator<Item = Node<'de, 'de>>, Error> {
@@ -494,9 +533,8 @@ where
494533

495534
fn text(&self) -> &'de str {
496535
match self.source {
497-
Source::Node(node) => node.text().unwrap_or_default(),
536+
Source::Node(node) | Source::Content(node) => node.text().unwrap_or_default(),
498537
Source::Attribute(attr) => attr.value(),
499-
Source::Text(text) => text,
500538
}
501539
}
502540

@@ -733,7 +771,7 @@ where
733771
fn deserialize_enum<V>(
734772
self,
735773
_name: &'static str,
736-
_variants: &'static [&'static str],
774+
variants: &'static [&'static str],
737775
visitor: V,
738776
) -> Result<V::Value, Self::Error>
739777
where
@@ -742,12 +780,14 @@ where
742780
if O::ONLY_CHILDREN {
743781
visitor.visit_enum(EnumAccess {
744782
source: self.children()?,
783+
variants,
745784
temp: self.temp,
746785
options: PhantomData::<O>,
747786
})
748787
} else {
749788
visitor.visit_enum(EnumAccess {
750789
source: self.children_and_attributes()?,
790+
variants,
751791
temp: self.temp,
752792
options: PhantomData::<O>,
753793
})
@@ -874,6 +914,7 @@ where
874914
I: Iterator<Item = Source<'de, 'input>>,
875915
{
876916
source: I,
917+
variants: &'static [&'static str],
877918
temp: &'temp mut Temp,
878919
options: PhantomData<O>,
879920
}
@@ -890,7 +931,13 @@ where
890931
where
891932
V: de::DeserializeSeed<'de>,
892933
{
893-
let source = self.source.next().ok_or(Error::MissingChildOrAttribute)?;
934+
let source = self
935+
.source
936+
.find(|source| {
937+
self.variants
938+
.contains(&source.name::<O>(&mut self.temp.buffer))
939+
})
940+
.ok_or(Error::MissingChildOrAttribute)?;
894941

895942
let deserializer = Deserializer {
896943
source,
@@ -1059,7 +1106,7 @@ mod tests {
10591106
#[derive(Deserialize)]
10601107
struct Child {
10611108
attr: i32,
1062-
#[serde(rename = "$text")]
1109+
#[serde(rename = "#content")]
10631110
text: u64,
10641111
}
10651112

@@ -1174,6 +1221,72 @@ mod tests {
11741221
assert_eq!(val, Root::Bar(42));
11751222
}
11761223

1224+
#[test]
1225+
fn mixed_enum_and_struct_children() {
1226+
#[derive(Debug, PartialEq, Deserialize)]
1227+
enum Foobar {
1228+
Foo(u32),
1229+
Bar(i64),
1230+
}
1231+
1232+
#[derive(Deserialize)]
1233+
struct Root {
1234+
#[serde(rename = "#content")]
1235+
foobar: Foobar,
1236+
qux: f32,
1237+
}
1238+
1239+
let val = from_str::<Root>(r#"<root><qux>42.0</qux><Foo>23</Foo></root>"#).unwrap();
1240+
assert_eq!(val.foobar, Foobar::Foo(23));
1241+
assert_eq!(val.qux, 42.0);
1242+
}
1243+
1244+
#[test]
1245+
fn mixed_enum_and_repeated_struct_children() {
1246+
#[derive(Debug, PartialEq, Deserialize)]
1247+
enum Foobar {
1248+
Foo(u32),
1249+
Bar(i64),
1250+
}
1251+
1252+
#[derive(Deserialize)]
1253+
struct Root {
1254+
#[serde(rename = "#content")]
1255+
foobar: Foobar,
1256+
qux: Vec<f32>,
1257+
baz: String,
1258+
}
1259+
1260+
let val = from_str::<Root>(
1261+
r#"<root><Bar>42</Bar><qux>1.0</qux><baz>baz</baz><qux>2.0</qux><qux>3.0</qux></root>"#,
1262+
)
1263+
.unwrap();
1264+
assert_eq!(val.foobar, Foobar::Bar(42));
1265+
assert_eq!(val.qux, [1.0, 2.0, 3.0]);
1266+
assert_eq!(val.baz, "baz");
1267+
}
1268+
1269+
#[test]
1270+
fn repeated_enum_and_struct_children() {
1271+
#[derive(Debug, PartialEq, Deserialize)]
1272+
enum Foobar {
1273+
Foo(Vec<u32>),
1274+
Bar(i64),
1275+
}
1276+
1277+
#[derive(Deserialize)]
1278+
struct Root {
1279+
#[serde(rename = "#content")]
1280+
foobar: Foobar,
1281+
baz: String,
1282+
}
1283+
1284+
let val =
1285+
from_str::<Root>(r#"<root><Foo>42</Foo><baz>baz</baz><Foo>23</Foo></root>"#).unwrap();
1286+
assert_eq!(val.foobar, Foobar::Foo(vec![42, 23]));
1287+
assert_eq!(val.baz, "baz");
1288+
}
1289+
11771290
#[test]
11781291
fn borrowed_str() {
11791292
let doc = Document::parse("<root><child>foobar</child></root>").unwrap();
@@ -1309,11 +1422,11 @@ mod tests {
13091422
}
13101423

13111424
#[test]
1312-
fn only_children_skips_text() {
1425+
fn only_children_skips_content() {
13131426
#[derive(Deserialize)]
13141427
struct Root {
13151428
child: u64,
1316-
#[serde(rename = "$text")]
1429+
#[serde(rename = "#content")]
13171430
text: Option<String>,
13181431
}
13191432

0 commit comments

Comments
 (0)