Skip to content

Commit 874e369

Browse files
committed
Rewrite the driver module based on TendrilSink
Depends on servo/tendril#23
1 parent 996fff2 commit 874e369

16 files changed

+121
-169
lines changed

Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[package]
22

33
name = "html5ever"
4-
version = "0.2.11"
4+
version = "0.3.0"
55
authors = [ "The html5ever Project Developers" ]
66
license = "MIT / Apache-2.0"
77
repository = "https://github.com/servo/html5ever"
@@ -26,7 +26,7 @@ log = "0"
2626
phf = "0.7"
2727
string_cache = "0.2.0"
2828
mac = "0"
29-
tendril = "0.1.6"
29+
tendril = "0.2"
3030
heapsize = { version = "0.1.1", optional = true }
3131
heapsize_plugin = { version = "0.1.0", optional = true }
3232

capi/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ crate-type = ["staticlib"]
1111
[dependencies]
1212
libc = "0.2"
1313
string_cache = "0.2"
14-
tendril = "0.1.6"
1514

1615
[dependencies.html5ever]
1716
path = "../"

capi/src/lib.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99

1010
extern crate libc;
1111
#[macro_use] extern crate string_cache;
12-
extern crate tendril;
1312
extern crate html5ever;
1413

1514
use libc::c_int;

capi/src/tokenizer.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
use c_bool;
1313

14+
use html5ever::tendril::{StrTendril, SliceExt};
1415
use html5ever::tokenizer::{TokenSink, Token, Doctype, Tag, ParseError, DoctypeToken};
1516
use html5ever::tokenizer::{CommentToken, CharacterTokens, NullCharacterToken};
1617
use html5ever::tokenizer::{TagToken, StartTag, EndTag, EOFToken, Tokenizer};
@@ -20,7 +21,6 @@ use std::default::Default;
2021

2122
use libc::{c_void, c_int, size_t};
2223
use string_cache::Atom;
23-
use tendril::{StrTendril, SliceExt};
2424

2525
#[repr(C)]
2626
#[derive(Copy, Clone)]

examples/html2html.rs

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,24 +21,26 @@ extern crate html5ever;
2121
use std::io::{self, Write};
2222
use std::default::Default;
2323

24-
use tendril::{ByteTendril, ReadExt};
24+
use tendril::TendrilSink;
2525

2626
use html5ever::driver::ParseOpts;
2727
use html5ever::tree_builder::TreeBuilderOpts;
28-
use html5ever::{parse, one_input, serialize};
28+
use html5ever::{parse_document, serialize};
2929
use html5ever::rcdom::RcDom;
3030

3131
fn main() {
32-
let mut input = ByteTendril::new();
33-
io::stdin().read_to_tendril(&mut input).unwrap();
34-
let input = input.try_reinterpret().unwrap();
35-
let dom: RcDom = parse(one_input(input), ParseOpts {
32+
let opts = ParseOpts {
3633
tree_builder: TreeBuilderOpts {
3734
drop_doctype: true,
3835
..Default::default()
3936
},
4037
..Default::default()
41-
});
38+
};
39+
let stdin = io::stdin();
40+
let dom = parse_document(RcDom::default(), opts)
41+
.from_utf8()
42+
.read_from(&mut stdin.lock())
43+
.unwrap();
4244

4345
// The validator.nu HTML2HTML always prints a doctype at the very beginning.
4446
io::stdout().write_all(b"<!DOCTYPE html>\n")

examples/noop-tokenize.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,7 @@ use std::default::Default;
1717

1818
use tendril::{ByteTendril, ReadExt};
1919

20-
use html5ever::tokenizer::{TokenSink, Token};
21-
use html5ever::driver::{tokenize_to, one_input};
20+
use html5ever::tokenizer::{TokenSink, Token, Tokenizer};
2221

2322
struct Sink(Vec<Token>);
2423

@@ -35,5 +34,7 @@ fn main() {
3534
io::stdin().read_to_tendril(&mut input).unwrap();
3635
let input = input.try_reinterpret().unwrap();
3736

38-
tokenize_to(Sink(Vec::new()), one_input(input), Default::default());
37+
let mut tok = Tokenizer::new(Sink(Vec::new()), Default::default());
38+
tok.feed(input);
39+
tok.end();
3940
}

examples/noop-tree-builder.rs

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@ use std::collections::HashMap;
1818
use std::borrow::Cow;
1919
use string_cache::QualName;
2020

21-
use tendril::{StrTendril, ByteTendril, ReadExt};
21+
use tendril::{StrTendril, TendrilSink};
2222

23-
use html5ever::{parse_to, one_input};
23+
use html5ever::parse_document;
2424
use html5ever::tokenizer::Attribute;
2525
use html5ever::tree_builder::{TreeSink, QuirksMode, NodeOrText};
2626

@@ -39,6 +39,8 @@ impl Sink {
3939

4040
impl TreeSink for Sink {
4141
type Handle = usize;
42+
type Output = Self;
43+
fn finish(self) -> Self { self }
4244

4345
fn get_document(&mut self) -> usize {
4446
0
@@ -96,9 +98,9 @@ fn main() {
9698
next_id: 1,
9799
names: HashMap::new(),
98100
};
99-
100-
let mut input = ByteTendril::new();
101-
io::stdin().read_to_tendril(&mut input).unwrap();
102-
let input = input.try_reinterpret().unwrap();
103-
parse_to(sink, one_input(input), Default::default());
101+
let stdin = io::stdin();
102+
parse_document(sink, Default::default())
103+
.from_utf8()
104+
.read_from(&mut stdin.lock())
105+
.unwrap();
104106
}

examples/print-rcdom.rs

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@ use std::iter::repeat;
1818
use std::default::Default;
1919
use std::string::String;
2020

21-
use tendril::{ByteTendril, ReadExt};
22-
use html5ever::{parse, one_input};
21+
use tendril::TendrilSink;
22+
use html5ever::parse_document;
2323
use html5ever::rcdom::{Document, Doctype, Text, Comment, Element, RcDom, Handle};
2424

2525
// This is not proper HTML serialization, of course.
@@ -63,10 +63,11 @@ pub fn escape_default(s: &str) -> String {
6363
}
6464

6565
fn main() {
66-
let mut input = ByteTendril::new();
67-
io::stdin().read_to_tendril(&mut input).unwrap();
68-
let input = input.try_reinterpret().unwrap();
69-
let dom: RcDom = parse(one_input(input), Default::default());
66+
let stdin = io::stdin();
67+
let dom = parse_document(RcDom::default(), Default::default())
68+
.from_utf8()
69+
.read_from(&mut stdin.lock())
70+
.unwrap();
7071
walk(0, dom.document);
7172

7273
if !dom.errors.is_empty() {

examples/print-tree-actions.rs

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,11 @@ use std::collections::HashMap;
1818
use std::borrow::Cow;
1919
use string_cache::QualName;
2020

21-
use tendril::{ByteTendril, StrTendril, ReadExt};
21+
use tendril::{StrTendril, TendrilSink};
2222

23-
use html5ever::{parse_to, one_input};
2423
use html5ever::tokenizer::Attribute;
2524
use html5ever::tree_builder::{TreeSink, QuirksMode, NodeOrText, AppendNode, AppendText};
25+
use html5ever::parse_document;
2626

2727
struct Sink {
2828
next_id: usize,
@@ -39,6 +39,8 @@ impl Sink {
3939

4040
impl TreeSink for Sink {
4141
type Handle = usize;
42+
type Output = Self;
43+
fn finish(self) -> Self { self }
4244

4345
fn parse_error(&mut self, msg: Cow<'static, str>) {
4446
println!("Parse error: {}", msg);
@@ -143,9 +145,9 @@ fn main() {
143145
next_id: 1,
144146
names: HashMap::new(),
145147
};
146-
147-
let mut input = ByteTendril::new();
148-
io::stdin().read_to_tendril(&mut input).unwrap();
149-
let input = input.try_reinterpret().unwrap();
150-
parse_to(sink, one_input(input), Default::default());
148+
let stdin = io::stdin();
149+
parse_document(sink, Default::default())
150+
.from_utf8()
151+
.read_from(&mut stdin.lock())
152+
.unwrap();
151153
}

examples/tokenize.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,8 @@ use std::default::Default;
1515

1616
use tendril::{ByteTendril, ReadExt};
1717

18-
use html5ever::tokenizer::{TokenSink, Token, TokenizerOpts, ParseError};
18+
use html5ever::tokenizer::{TokenSink, Tokenizer, Token, TokenizerOpts, ParseError};
1919
use html5ever::tokenizer::{CharacterTokens, NullCharacterToken, TagToken, StartTag, EndTag};
20-
use html5ever::driver::{tokenize_to, one_input};
2120

2221
#[derive(Copy, Clone)]
2322
struct TokenPrinter {
@@ -84,9 +83,12 @@ fn main() {
8483
let mut input = ByteTendril::new();
8584
io::stdin().read_to_tendril(&mut input).unwrap();
8685
let input = input.try_reinterpret().unwrap();
87-
tokenize_to(sink, one_input(input), TokenizerOpts {
86+
87+
let mut tok = Tokenizer::new(sink, TokenizerOpts {
8888
profile: true,
8989
.. Default::default()
9090
});
91+
tok.feed(input);
92+
tok.end();
9193
sink.is_char(false);
9294
}

0 commit comments

Comments
 (0)