Skip to content

Commit c8eb9b3

Browse files
committed
Use the proc_macro token model in the bootstrap grammar.
1 parent 5b05f5a commit c8eb9b3

File tree

7 files changed

+154
-150
lines changed

7 files changed

+154
-150
lines changed

build.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,21 +9,22 @@ extern crate proc_quote;
99
mod generate;
1010
#[path = "src/parse_node.rs"]
1111
mod parse_node;
12+
#[path = "src/proc_macro.rs"]
13+
pub mod proc_macro;
1214
#[path = "src/scannerless.rs"]
1315
pub mod scannerless;
1416

1517
use std::env;
1618
use std::fs;
1719
use std::path::PathBuf;
1820

19-
// FIXME(eddyb) use `scannerless::Grammar` when that wrapper hack is fixed.
20-
type Grammar = grammer::Grammar<scannerless::Pat<&'static str>>;
2121
fn main() {
2222
println!("cargo:rerun-if-changed=build.rs");
2323

2424
let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap());
2525

26-
let grammar: Grammar = grammer::grammar_grammar();
26+
let mut grammar = proc_macro::builtin();
27+
grammar.extend(grammer::grammar_grammar());
2728

2829
fs::write(
2930
&out_dir.join("parse_grammar.rs"),

macros/src/lib.rs

Lines changed: 5 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,28 +7,16 @@ use proc_quote::ToTokens as _;
77

88
#[proc_macro]
99
pub fn scannerless_parser(input: TokenStream) -> TokenStream {
10-
// FIXME(eddyb) parse the `proc_macro` tokens instead of strings.
11-
gll::generate::rust::generate(
12-
&input
13-
.to_string()
14-
.parse::<gll::scannerless::Grammar>()
15-
.unwrap(),
16-
)
17-
.into_token_stream()
18-
.into()
10+
let grammar: gll::scannerless::Grammar = gll::parse_grammar(input.into()).unwrap();
11+
gll::generate::rust::generate(&grammar)
12+
.into_token_stream()
13+
.into()
1914
}
2015

2116
#[proc_macro]
2217
pub fn proc_macro_parser(input: TokenStream) -> TokenStream {
23-
// FIXME(eddyb) parse the `proc_macro` tokens instead of strings.
2418
let mut grammar = gll::proc_macro::builtin();
25-
grammar.extend(
26-
input
27-
.to_string()
28-
.parse::<gll::proc_macro::Grammar>()
29-
.unwrap()
30-
.0,
31-
);
19+
grammar.extend(gll::parse_grammar(input.into()).unwrap());
3220
gll::generate::rust::generate(&grammar)
3321
.into_token_stream()
3422
.into()

src/lib.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,5 +18,12 @@ pub mod runtime;
1818
#[forbid(unsafe_code)]
1919
pub mod scannerless;
2020

21+
// HACK(eddyb) this contains impls for types in `proc_macro`, which depend on
22+
// `runtime`. Those parts of `runtime` should be moved to `grammer::input`.
23+
#[forbid(unsafe_code)]
24+
mod proc_macro_runtime;
25+
2126
#[forbid(unsafe_code)]
2227
mod parse_grammar;
28+
29+
pub use parse_grammar::parse_grammar;

src/parse_grammar.rs

Lines changed: 41 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,73 +1,88 @@
11
// HACK(eddyb) silence warnings from unused exports in the generated code.
22
#![allow(unused)]
3+
#![allow(non_camel_case_types)]
34

45
// HACK(eddyb) needed for bootstrapping.
56
use crate as gll;
67

78
include!(concat!(env!("OUT_DIR"), "/parse_grammar.rs"));
89

10+
use crate::proc_macro::{FlatToken, Span, TokenStream};
911
use crate::runtime;
1012
use crate::scannerless::Pat as SPat;
1113
use std::ops::Bound;
1214
use std::str::FromStr;
1315

14-
impl<Pat: From<SPat>> FromStr for crate::scannerless::WrapperHack<grammer::Grammar<Pat>> {
15-
type Err = runtime::ParseError<runtime::LineColumn>;
16-
fn from_str(src: &str) -> Result<Self, Self::Err> {
17-
let mut grammar = grammer::Grammar::new();
18-
Grammar::parse(src)?.with(|g| {
19-
for rule_def in g.one().unwrap().rules {
20-
let rule_def = rule_def.unwrap().one().unwrap();
21-
grammar.define(rule_def.name.source(), rule_def.rule.one().unwrap().lower());
22-
}
23-
});
24-
Ok(crate::scannerless::WrapperHack(grammar))
25-
}
16+
pub fn parse_grammar<Pat: From<SPat>>(
17+
stream: TokenStream,
18+
) -> Result<grammer::Grammar<Pat>, runtime::ParseError<Span>> {
19+
let mut grammar = grammer::Grammar::new();
20+
Grammar::parse(stream)?.with(|g| {
21+
for rule_def in g.one().unwrap().rules {
22+
let rule_def = rule_def.unwrap().one().unwrap();
23+
let name = match rule_def.name.source() {
24+
[FlatToken::Ident(ident)] => ident.to_string(),
25+
_ => unreachable!(),
26+
};
27+
grammar.define(&name, rule_def.rule.one().unwrap().lower());
28+
}
29+
});
30+
Ok(grammar)
2631
}
2732

28-
impl Or<'_, '_, &str> {
33+
impl Or<'_, '_, TokenStream> {
2934
fn lower<Pat: From<SPat>>(self) -> grammer::RuleWithNamedFields<Pat> {
3035
let mut rules = self.rules.map(|rule| rule.unwrap().one().unwrap().lower());
3136
let first = rules.next().unwrap();
3237
rules.fold(first, |a, b| a | b)
3338
}
3439
}
3540

36-
impl Concat<'_, '_, &str> {
41+
impl Concat<'_, '_, TokenStream> {
3742
fn lower<Pat: From<SPat>>(self) -> grammer::RuleWithNamedFields<Pat> {
3843
self.rules
3944
.map(|rule| rule.unwrap().one().unwrap().lower())
4045
.fold(grammer::empty(), |a, b| a + b)
4146
}
4247
}
4348

44-
impl Rule<'_, '_, &str> {
49+
impl Rule<'_, '_, TokenStream> {
4550
fn lower<Pat: From<SPat>>(self) -> grammer::RuleWithNamedFields<Pat> {
4651
let mut rule = self.rule.one().unwrap().lower();
4752
if let Some(modifier) = self.modifier {
4853
rule = modifier.one().unwrap().lower(rule);
4954
}
5055
if let Some(field) = self.field {
51-
rule = rule.field(field.source());
56+
let field = match field.source() {
57+
[FlatToken::Ident(ident)] => ident.to_string(),
58+
_ => unreachable!(),
59+
};
60+
rule = rule.field(&field);
5261
}
5362
rule
5463
}
5564
}
5665

57-
impl Primary<'_, '_, &str> {
66+
impl Primary<'_, '_, TokenStream> {
5867
fn lower<Pat: From<SPat>>(self) -> grammer::RuleWithNamedFields<Pat> {
5968
match self {
6069
Primary::Eat(pat) => grammer::eat(pat.one().unwrap().lower()),
6170
Primary::NegativeLookahead { pat } => {
6271
grammer::negative_lookahead(pat.one().unwrap().lower())
6372
}
64-
Primary::Call(name) => grammer::call(name.source()),
73+
Primary::Call(name) => {
74+
let name = match name.source() {
75+
[FlatToken::Ident(ident)] => ident.to_string(),
76+
_ => unreachable!(),
77+
};
78+
grammer::call(&name)
79+
}
6580
Primary::Group { or } => or.map_or_else(grammer::empty, |or| or.one().unwrap().lower()),
6681
}
6782
}
6883
}
6984

70-
impl Modifier<'_, '_, &str> {
85+
impl Modifier<'_, '_, TokenStream> {
7186
fn lower<Pat: From<SPat>>(
7287
self,
7388
rule: grammer::RuleWithNamedFields<Pat>,
@@ -90,7 +105,7 @@ impl Modifier<'_, '_, &str> {
90105
}
91106
}
92107

93-
impl SepKind<'_, '_, &str> {
108+
impl SepKind<'_, '_, TokenStream> {
94109
fn lower(&self) -> grammer::SepKind {
95110
match self {
96111
SepKind::Simple(_) => grammer::SepKind::Simple,
@@ -99,11 +114,14 @@ impl SepKind<'_, '_, &str> {
99114
}
100115
}
101116

102-
impl Pattern<'_, '_, &str> {
117+
impl Pattern<'_, '_, TokenStream> {
103118
fn lower(self) -> SPat {
104-
fn unescape<T>(handle: Handle<'_, '_, &str, T>) -> String {
119+
fn unescape<T>(handle: Handle<'_, '_, TokenStream, T>) -> String {
105120
let mut out = String::new();
106-
let s = handle.source();
121+
let s = match handle.source() {
122+
[FlatToken::Literal(lit)] => lit.to_string(),
123+
_ => unreachable!(),
124+
};
107125
let mut chars = s[1..s.len() - 1].chars();
108126
while let Some(c) = chars.next() {
109127
let c = match c {

src/proc_macro.rs

Lines changed: 13 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,13 @@
11
use crate::generate::rust::RustInputPat;
22
use crate::generate::src::{quotable_to_src, quote, Src, ToSrc};
3-
use crate::runtime::{Input, InputMatch, Range};
43
use crate::scannerless::Pat as SPat;
5-
use grammer::{self, call, eat, MatchesEmpty, MaybeKnown};
6-
use indexing::{proof::Provable, Container, Index, Unknown};
4+
use grammer::{call, eat, MatchesEmpty, MaybeKnown};
75
pub use proc_macro2::{
86
Delimiter, Ident, LexError, Literal, Punct, Spacing, Span, TokenStream, TokenTree,
97
};
10-
use std::{ops, str::FromStr};
8+
use std::str::FromStr;
119

12-
pub type Grammar = crate::scannerless::WrapperHack<grammer::Grammar<Pat>>;
10+
pub type Grammar = grammer::Grammar<Pat>;
1311

1412
pub fn builtin() -> Grammar {
1513
let mut g = grammer::Grammar::new();
@@ -44,7 +42,7 @@ pub fn builtin() -> Grammar {
4442
ident | punct | literal | group('(', ')') | group('[', ']') | group('{', '}'),
4543
);
4644

47-
crate::scannerless::WrapperHack(g)
45+
g
4846
}
4947

5048
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
@@ -67,6 +65,13 @@ impl FromStr for Pat {
6765
}
6866
}
6967

68+
// FIXME(eddyb) perhaps support `TryFrom`/`TryInto` directly in `grammer`?
69+
impl From<&str> for Pat {
70+
fn from(s: &str) -> Self {
71+
s.parse().unwrap()
72+
}
73+
}
74+
7075
impl From<FlatTokenPat<String>> for Pat {
7176
fn from(pat: FlatTokenPat<String>) -> Self {
7277
Pat(vec![pat])
@@ -76,7 +81,7 @@ impl From<FlatTokenPat<String>> for Pat {
7681
impl From<SPat> for Pat {
7782
fn from(pat: SPat) -> Self {
7883
match pat {
79-
SPat::String(s) => s.parse().unwrap(),
84+
SPat::String(s) => s[..].into(),
8085
SPat::Range(..) => unimplemented!("char ranges are unsupported"),
8186
}
8287
}
@@ -185,7 +190,7 @@ impl FlatToken {
185190
}
186191
}
187192

188-
fn flatten(stream: TokenStream, out: &mut Vec<FlatToken>) {
193+
pub(crate) fn flatten(stream: TokenStream, out: &mut Vec<FlatToken>) {
189194
for tt in stream {
190195
let flat = match tt {
191196
TokenTree::Group(tt) => {
@@ -216,82 +221,3 @@ fn flatten(stream: TokenStream, out: &mut Vec<FlatToken>) {
216221
out.push(flat);
217222
}
218223
}
219-
220-
impl Input for TokenStream {
221-
type Container = Vec<FlatToken>;
222-
type Slice = [FlatToken];
223-
type SourceInfo = ops::Range<Span>;
224-
type SourceInfoPoint = Span;
225-
fn to_container(self) -> Self::Container {
226-
let mut out = vec![];
227-
flatten(self, &mut out);
228-
out
229-
}
230-
fn slice<'b, 'i>(
231-
input: &'b Container<'i, Self::Container>,
232-
range: Range<'i>,
233-
) -> &'b Self::Slice {
234-
&input[range.0]
235-
}
236-
fn source_info<'i>(
237-
input: &Container<'i, Self::Container>,
238-
range: Range<'i>,
239-
) -> Self::SourceInfo {
240-
// FIXME(eddyb) should be joining up spans, but the API
241-
// for that is still "semver-exempt" in `proc-macro2`.
242-
let last = range
243-
.nonempty()
244-
.map(|r| r.last().no_proof())
245-
.unwrap_or(range.past_the_end());
246-
Self::source_info_point(input, range.first())..Self::source_info_point(input, last)
247-
}
248-
fn source_info_point<'i>(
249-
input: &Container<'i, Self::Container>,
250-
index: Index<'i, Unknown>,
251-
) -> Self::SourceInfoPoint {
252-
// Try to get as much information as possible.
253-
let (before, after) = input.split_at(index);
254-
let before = &input[before];
255-
let after = &input[after];
256-
if let Some(first) = after.first() {
257-
first.span()
258-
} else if let Some(last) = before.last() {
259-
// Not correct but we're at the end of the input anyway.
260-
last.span()
261-
} else {
262-
// HACK(eddyb) last resort, make a span up
263-
// (a better option should exist)
264-
Span::call_site()
265-
}
266-
}
267-
}
268-
269-
impl InputMatch<&'static [FlatTokenPat<&'static str>]> for [FlatToken] {
270-
fn match_left(&self, &pat: &&[FlatTokenPat<&str>]) -> Option<usize> {
271-
if self
272-
.iter()
273-
.zip(pat)
274-
.take_while(|(t, p)| t.matches_pat(p))
275-
.count()
276-
== pat.len()
277-
{
278-
Some(pat.len())
279-
} else {
280-
None
281-
}
282-
}
283-
fn match_right(&self, &pat: &&[FlatTokenPat<&str>]) -> Option<usize> {
284-
if self
285-
.iter()
286-
.zip(pat)
287-
.rev()
288-
.take_while(|(t, p)| t.matches_pat(p))
289-
.count()
290-
== pat.len()
291-
{
292-
Some(pat.len())
293-
} else {
294-
None
295-
}
296-
}
297-
}

0 commit comments

Comments
 (0)