Skip to content

Commit 002ccf0

Browse files
authored
Merge pull request #113 from eddyb/bangless
Parse grammars with `proc_macro` tokens and remove "negative lookahead".
2 parents 5b05f5a + 2cb9732 commit 002ccf0

File tree

10 files changed

+156
-175
lines changed

10 files changed

+156
-175
lines changed

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ doctest = false
3131
test = false
3232

3333
[patch.'crates-io']
34-
grammer = { git = "https://github.com/lykenware/grammer", rev = "e108acbe83271761538bf5b2ff2daeaaafa5919c" }
34+
grammer = { git = "https://github.com/lykenware/grammer", rev = "ed4fc4f9be9aa2c1a6c3245ba4e91684ddff5f2f" }
3535

3636
[workspace]
3737
members = [

build.rs

+4-3
Original file line numberDiff line numberDiff line change
@@ -9,21 +9,22 @@ extern crate proc_quote;
99
mod generate;
1010
#[path = "src/parse_node.rs"]
1111
mod parse_node;
12+
#[path = "src/proc_macro.rs"]
13+
pub mod proc_macro;
1214
#[path = "src/scannerless.rs"]
1315
pub mod scannerless;
1416

1517
use std::env;
1618
use std::fs;
1719
use std::path::PathBuf;
1820

19-
// FIXME(eddyb) use `scannerless::Grammar` when that wrapper hack is fixed.
20-
type Grammar = grammer::Grammar<scannerless::Pat<&'static str>>;
2121
fn main() {
2222
println!("cargo:rerun-if-changed=build.rs");
2323

2424
let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap());
2525

26-
let grammar: Grammar = grammer::grammar_grammar();
26+
let mut grammar = proc_macro::builtin();
27+
grammar.extend(grammer::grammar_grammar());
2728

2829
fs::write(
2930
&out_dir.join("parse_grammar.rs"),

macros/src/lib.rs

+5-17
Original file line numberDiff line numberDiff line change
@@ -7,28 +7,16 @@ use proc_quote::ToTokens as _;
77

88
#[proc_macro]
99
pub fn scannerless_parser(input: TokenStream) -> TokenStream {
10-
// FIXME(eddyb) parse the `proc_macro` tokens instead of strings.
11-
gll::generate::rust::generate(
12-
&input
13-
.to_string()
14-
.parse::<gll::scannerless::Grammar>()
15-
.unwrap(),
16-
)
17-
.into_token_stream()
18-
.into()
10+
let grammar: gll::scannerless::Grammar = gll::parse_grammar(input.into()).unwrap();
11+
gll::generate::rust::generate(&grammar)
12+
.into_token_stream()
13+
.into()
1914
}
2015

2116
#[proc_macro]
2217
pub fn proc_macro_parser(input: TokenStream) -> TokenStream {
23-
// FIXME(eddyb) parse the `proc_macro` tokens instead of strings.
2418
let mut grammar = gll::proc_macro::builtin();
25-
grammar.extend(
26-
input
27-
.to_string()
28-
.parse::<gll::proc_macro::Grammar>()
29-
.unwrap()
30-
.0,
31-
);
19+
grammar.extend(gll::parse_grammar(input.into()).unwrap());
3220
gll::generate::rust::generate(&grammar)
3321
.into_token_stream()
3422
.into()

src/generate/rust.rs

+2-8
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ impl<Pat> RuleTypeMethods for Rule<Pat> {
116116

117117
fn field_type(&self, path: &[usize]) -> Src {
118118
match self {
119-
Rule::Empty | Rule::Eat(_) | Rule::NegativeLookahead(_) => {
119+
Rule::Empty | Rule::Eat(_) => {
120120
assert_eq!(path, []);
121121
quote!(())
122122
}
@@ -199,7 +199,6 @@ impl<Pat: Ord + Hash + RustInputPat> RuleRuleMapMethods<Pat> for Rule<Pat> {
199199
match self {
200200
Rule::Empty => "".to_string(),
201201
Rule::Eat(pat) => pat.rust_matcher().to_pretty_string(),
202-
Rule::NegativeLookahead(pat) => format!("!{}", pat.rust_matcher().to_pretty_string()),
203202
Rule::Call(r) => r.clone(),
204203
Rule::Concat([left, right]) => format!(
205204
"({} {})",
@@ -246,7 +245,7 @@ impl<Pat: Ord + Hash + RustInputPat> RuleRuleMapMethods<Pat> for Rule<Pat> {
246245
rules: &RuleMap<'_, Pat>,
247246
) -> ParseNodeShape<ParseNodeKind> {
248247
match &**rc_self {
249-
Rule::Empty | Rule::Eat(_) | Rule::NegativeLookahead(_) => ParseNodeShape::Opaque,
248+
Rule::Empty | Rule::Eat(_) => ParseNodeShape::Opaque,
250249
Rule::Call(_) => unreachable!(),
251250
Rule::Concat([left, right]) => {
252251
ParseNodeShape::Split(left.parse_node_kind(rules), right.parse_node_kind(rules))
@@ -802,10 +801,6 @@ impl<Pat: Ord + Hash + RustInputPat> RuleGenerateMethods<Pat> for Rule<Pat> {
802801
let pat = pat.rust_matcher();
803802
check(quote!(let Some(_range) = p.input_consume_left(_range, &(#pat)))).apply(cont)
804803
}
805-
(Rule::NegativeLookahead(pat), _) => {
806-
let pat = pat.rust_matcher();
807-
check(quote!(!p.input_lookahead_left(_range, &(#pat)))).apply(cont)
808-
}
809804
(Rule::Call(r), _) => call(Rc::new(CodeLabel::NamedRule(r.clone()))).apply(cont),
810805
(Rule::Concat([left, right]), None) => {
811806
(left.generate_parse(None) + right.generate_parse(None)).apply(cont)
@@ -902,7 +897,6 @@ impl<Pat: Ord + Hash + RustInputPat> RuleGenerateMethods<Pat> for Rule<Pat> {
902897
match self {
903898
Rule::Empty
904899
| Rule::Eat(_)
905-
| Rule::NegativeLookahead(_)
906900
| Rule::Call(_)
907901
| Rule::RepeatMany(..)
908902
| Rule::RepeatMore(..) => {

src/lib.rs

+7
Original file line numberDiff line numberDiff line change
@@ -18,5 +18,12 @@ pub mod runtime;
1818
#[forbid(unsafe_code)]
1919
pub mod scannerless;
2020

21+
// HACK(eddyb) this contains impls for types in `proc_macro`, which depend on
22+
// `runtime`. Those parts of `runtime` should be moved to `grammer::input`.
23+
#[forbid(unsafe_code)]
24+
mod proc_macro_runtime;
25+
2126
#[forbid(unsafe_code)]
2227
mod parse_grammar;
28+
29+
pub use parse_grammar::parse_grammar;

src/parse_grammar.rs

+40-25
Original file line numberDiff line numberDiff line change
@@ -1,73 +1,85 @@
11
// HACK(eddyb) silence warnings from unused exports in the generated code.
22
#![allow(unused)]
3+
#![allow(non_camel_case_types)]
34

45
// HACK(eddyb) needed for bootstrapping.
56
use crate as gll;
67

78
include!(concat!(env!("OUT_DIR"), "/parse_grammar.rs"));
89

10+
use crate::proc_macro::{FlatToken, Span, TokenStream};
911
use crate::runtime;
1012
use crate::scannerless::Pat as SPat;
1113
use std::ops::Bound;
1214
use std::str::FromStr;
1315

14-
impl<Pat: From<SPat>> FromStr for crate::scannerless::WrapperHack<grammer::Grammar<Pat>> {
15-
type Err = runtime::ParseError<runtime::LineColumn>;
16-
fn from_str(src: &str) -> Result<Self, Self::Err> {
17-
let mut grammar = grammer::Grammar::new();
18-
Grammar::parse(src)?.with(|g| {
19-
for rule_def in g.one().unwrap().rules {
20-
let rule_def = rule_def.unwrap().one().unwrap();
21-
grammar.define(rule_def.name.source(), rule_def.rule.one().unwrap().lower());
22-
}
23-
});
24-
Ok(crate::scannerless::WrapperHack(grammar))
25-
}
16+
pub fn parse_grammar<Pat: From<SPat>>(
17+
stream: TokenStream,
18+
) -> Result<grammer::Grammar<Pat>, runtime::ParseError<Span>> {
19+
let mut grammar = grammer::Grammar::new();
20+
Grammar::parse(stream)?.with(|g| {
21+
for rule_def in g.one().unwrap().rules {
22+
let rule_def = rule_def.unwrap().one().unwrap();
23+
let name = match rule_def.name.source() {
24+
[FlatToken::Ident(ident)] => ident.to_string(),
25+
_ => unreachable!(),
26+
};
27+
grammar.define(&name, rule_def.rule.one().unwrap().lower());
28+
}
29+
});
30+
Ok(grammar)
2631
}
2732

28-
impl Or<'_, '_, &str> {
33+
impl Or<'_, '_, TokenStream> {
2934
fn lower<Pat: From<SPat>>(self) -> grammer::RuleWithNamedFields<Pat> {
3035
let mut rules = self.rules.map(|rule| rule.unwrap().one().unwrap().lower());
3136
let first = rules.next().unwrap();
3237
rules.fold(first, |a, b| a | b)
3338
}
3439
}
3540

36-
impl Concat<'_, '_, &str> {
41+
impl Concat<'_, '_, TokenStream> {
3742
fn lower<Pat: From<SPat>>(self) -> grammer::RuleWithNamedFields<Pat> {
3843
self.rules
3944
.map(|rule| rule.unwrap().one().unwrap().lower())
4045
.fold(grammer::empty(), |a, b| a + b)
4146
}
4247
}
4348

44-
impl Rule<'_, '_, &str> {
49+
impl Rule<'_, '_, TokenStream> {
4550
fn lower<Pat: From<SPat>>(self) -> grammer::RuleWithNamedFields<Pat> {
4651
let mut rule = self.rule.one().unwrap().lower();
4752
if let Some(modifier) = self.modifier {
4853
rule = modifier.one().unwrap().lower(rule);
4954
}
5055
if let Some(field) = self.field {
51-
rule = rule.field(field.source());
56+
let field = match field.source() {
57+
[FlatToken::Ident(ident)] => ident.to_string(),
58+
_ => unreachable!(),
59+
};
60+
rule = rule.field(&field);
5261
}
5362
rule
5463
}
5564
}
5665

57-
impl Primary<'_, '_, &str> {
66+
impl Primary<'_, '_, TokenStream> {
5867
fn lower<Pat: From<SPat>>(self) -> grammer::RuleWithNamedFields<Pat> {
5968
match self {
6069
Primary::Eat(pat) => grammer::eat(pat.one().unwrap().lower()),
61-
Primary::NegativeLookahead { pat } => {
62-
grammer::negative_lookahead(pat.one().unwrap().lower())
70+
Primary::Call(name) => {
71+
let name = match name.source() {
72+
[FlatToken::Ident(ident)] => ident.to_string(),
73+
_ => unreachable!(),
74+
};
75+
grammer::call(&name)
6376
}
64-
Primary::Call(name) => grammer::call(name.source()),
6577
Primary::Group { or } => or.map_or_else(grammer::empty, |or| or.one().unwrap().lower()),
6678
}
6779
}
6880
}
6981

70-
impl Modifier<'_, '_, &str> {
82+
impl Modifier<'_, '_, TokenStream> {
7183
fn lower<Pat: From<SPat>>(
7284
self,
7385
rule: grammer::RuleWithNamedFields<Pat>,
@@ -90,7 +102,7 @@ impl Modifier<'_, '_, &str> {
90102
}
91103
}
92104

93-
impl SepKind<'_, '_, &str> {
105+
impl SepKind<'_, '_, TokenStream> {
94106
fn lower(&self) -> grammer::SepKind {
95107
match self {
96108
SepKind::Simple(_) => grammer::SepKind::Simple,
@@ -99,11 +111,14 @@ impl SepKind<'_, '_, &str> {
99111
}
100112
}
101113

102-
impl Pattern<'_, '_, &str> {
114+
impl Pattern<'_, '_, TokenStream> {
103115
fn lower(self) -> SPat {
104-
fn unescape<T>(handle: Handle<'_, '_, &str, T>) -> String {
116+
fn unescape<T>(handle: Handle<'_, '_, TokenStream, T>) -> String {
105117
let mut out = String::new();
106-
let s = handle.source();
118+
let s = match handle.source() {
119+
[FlatToken::Literal(lit)] => lit.to_string(),
120+
_ => unreachable!(),
121+
};
107122
let mut chars = s[1..s.len() - 1].chars();
108123
while let Some(c) = chars.next() {
109124
let c = match c {

0 commit comments

Comments
 (0)