Skip to content

Commit be9a44e

Browse files
Merge #1111
1111: Add multi-byte token support in token tree to ast item list r=matklad a=edwin0cheng As discusion in #1105 , this PR add implement all multi-byte tokens in `ra_mbe` crate. Co-authored-by: Edwin Cheng <[email protected]>
2 parents 7713416 + 6ff16c7 commit be9a44e

File tree

3 files changed

+139
-20
lines changed

3 files changed

+139
-20
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/ra_mbe/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,5 @@ authors = ["rust-analyzer developers"]
88
ra_syntax = { path = "../ra_syntax" }
99
ra_parser = { path = "../ra_parser" }
1010
tt = { path = "../ra_tt", package = "ra_tt" }
11-
11+
itertools = "0.8.0"
1212
rustc-hash = "1.0.0"

crates/ra_mbe/src/syntax_bridge.rs

Lines changed: 137 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,51 @@ struct TtToken {
113113
text: SmolStr,
114114
}
115115

116+
// Some helper functions
117+
fn to_punct(tt: &tt::TokenTree) -> Option<&tt::Punct> {
118+
if let tt::TokenTree::Leaf(tt::Leaf::Punct(pp)) = tt {
119+
return Some(pp);
120+
}
121+
None
122+
}
123+
124+
struct TokenPeek<'a, I>
125+
where
126+
I: Iterator<Item = &'a tt::TokenTree>,
127+
{
128+
iter: itertools::MultiPeek<I>,
129+
}
130+
131+
impl<'a, I> TokenPeek<'a, I>
132+
where
133+
I: Iterator<Item = &'a tt::TokenTree>,
134+
{
135+
fn next(&mut self) -> Option<&tt::TokenTree> {
136+
self.iter.next()
137+
}
138+
139+
fn current_punct2(&mut self, p: &tt::Punct) -> Option<((char, char), bool)> {
140+
if p.spacing != tt::Spacing::Joint {
141+
return None;
142+
}
143+
144+
self.iter.reset_peek();
145+
let p1 = to_punct(self.iter.peek()?)?;
146+
Some(((p.char, p1.char), p1.spacing == tt::Spacing::Joint))
147+
}
148+
149+
fn current_punct3(&mut self, p: &tt::Punct) -> Option<((char, char, char), bool)> {
150+
self.current_punct2(p).and_then(|((p0, p1), last_joint)| {
151+
if !last_joint {
152+
None
153+
} else {
154+
let p2 = to_punct(*self.iter.peek()?)?;
155+
Some(((p0, p1, p2.char), p2.spacing == tt::Spacing::Joint))
156+
}
157+
})
158+
}
159+
}
160+
116161
impl TtTokenSource {
117162
fn new(tt: &tt::Subtree) -> TtTokenSource {
118163
let mut res = TtTokenSource { tokens: Vec::new() };
@@ -121,38 +166,53 @@ impl TtTokenSource {
121166
}
122167
fn convert_subtree(&mut self, sub: &tt::Subtree) {
123168
self.push_delim(sub.delimiter, false);
124-
sub.token_trees.iter().for_each(|tt| self.convert_tt(tt));
169+
let mut peek = TokenPeek { iter: itertools::multipeek(sub.token_trees.iter()) };
170+
while let Some(tt) = peek.iter.next() {
171+
self.convert_tt(tt, &mut peek);
172+
}
125173
self.push_delim(sub.delimiter, true)
126174
}
127-
fn convert_tt(&mut self, tt: &tt::TokenTree) {
175+
176+
fn convert_tt<'a, I>(&mut self, tt: &tt::TokenTree, iter: &mut TokenPeek<'a, I>)
177+
where
178+
I: Iterator<Item = &'a tt::TokenTree>,
179+
{
128180
match tt {
129-
tt::TokenTree::Leaf(token) => self.convert_token(token),
181+
tt::TokenTree::Leaf(token) => self.convert_token(token, iter),
130182
tt::TokenTree::Subtree(sub) => self.convert_subtree(sub),
131183
}
132184
}
133-
fn convert_token(&mut self, token: &tt::Leaf) {
185+
186+
fn convert_token<'a, I>(&mut self, token: &tt::Leaf, iter: &mut TokenPeek<'a, I>)
187+
where
188+
I: Iterator<Item = &'a tt::TokenTree>,
189+
{
134190
let tok = match token {
135191
tt::Leaf::Literal(l) => TtToken {
136192
kind: SyntaxKind::INT_NUMBER, // FIXME
137193
is_joint_to_next: false,
138194
text: l.text.clone(),
139195
},
140196
tt::Leaf::Punct(p) => {
141-
let kind = match p.char {
142-
// lexer may produce combpund tokens for these ones
143-
'.' => DOT,
144-
':' => COLON,
145-
'=' => EQ,
146-
'!' => EXCL,
147-
'-' => MINUS,
148-
c => SyntaxKind::from_char(c).unwrap(),
149-
};
150-
let text = {
151-
let mut buf = [0u8; 4];
152-
let s: &str = p.char.encode_utf8(&mut buf);
153-
SmolStr::new(s)
154-
};
155-
TtToken { kind, is_joint_to_next: p.spacing == tt::Spacing::Joint, text }
197+
if let Some(tt) = Self::convert_multi_char_punct(p, iter) {
198+
tt
199+
} else {
200+
let kind = match p.char {
201+
// lexer may produce combpund tokens for these ones
202+
'.' => DOT,
203+
':' => COLON,
204+
'=' => EQ,
205+
'!' => EXCL,
206+
'-' => MINUS,
207+
c => SyntaxKind::from_char(c).unwrap(),
208+
};
209+
let text = {
210+
let mut buf = [0u8; 4];
211+
let s: &str = p.char.encode_utf8(&mut buf);
212+
SmolStr::new(s)
213+
};
214+
TtToken { kind, is_joint_to_next: p.spacing == tt::Spacing::Joint, text }
215+
}
156216
}
157217
tt::Leaf::Ident(ident) => {
158218
let kind = SyntaxKind::from_keyword(ident.text.as_str()).unwrap_or(IDENT);
@@ -161,6 +221,64 @@ impl TtTokenSource {
161221
};
162222
self.tokens.push(tok)
163223
}
224+
225+
fn convert_multi_char_punct<'a, I>(
226+
p: &tt::Punct,
227+
iter: &mut TokenPeek<'a, I>,
228+
) -> Option<TtToken>
229+
where
230+
I: Iterator<Item = &'a tt::TokenTree>,
231+
{
232+
if let Some((m, is_joint_to_next)) = iter.current_punct3(p) {
233+
if let Some((kind, text)) = match m {
234+
('<', '<', '=') => Some((SHLEQ, "<<=")),
235+
('>', '>', '=') => Some((SHREQ, ">>=")),
236+
('.', '.', '.') => Some((DOTDOTDOT, "...")),
237+
('.', '.', '=') => Some((DOTDOTEQ, "..=")),
238+
_ => None,
239+
} {
240+
iter.next();
241+
iter.next();
242+
return Some(TtToken { kind, is_joint_to_next, text: text.into() });
243+
}
244+
}
245+
246+
if let Some((m, is_joint_to_next)) = iter.current_punct2(p) {
247+
if let Some((kind, text)) = match m {
248+
('<', '<') => Some((SHL, "<<")),
249+
('>', '>') => Some((SHR, ">>")),
250+
251+
('|', '|') => Some((PIPEPIPE, "||")),
252+
('&', '&') => Some((AMPAMP, "&&")),
253+
('%', '=') => Some((PERCENTEQ, "%=")),
254+
('*', '=') => Some((STAREQ, "*=")),
255+
('/', '=') => Some((SLASHEQ, "/=")),
256+
('^', '=') => Some((CARETEQ, "^=")),
257+
258+
('&', '=') => Some((AMPEQ, "&=")),
259+
('|', '=') => Some((PIPEEQ, "|=")),
260+
('-', '=') => Some((MINUSEQ, "-=")),
261+
('+', '=') => Some((PLUSEQ, "+=")),
262+
('>', '=') => Some((GTEQ, ">=")),
263+
('<', '=') => Some((LTEQ, "<=")),
264+
265+
('-', '>') => Some((THIN_ARROW, "->")),
266+
('!', '=') => Some((NEQ, "!=")),
267+
('=', '>') => Some((FAT_ARROW, "=>")),
268+
('=', '=') => Some((EQEQ, "==")),
269+
('.', '.') => Some((DOTDOT, "..")),
270+
(':', ':') => Some((COLONCOLON, "::")),
271+
272+
_ => None,
273+
} {
274+
iter.next();
275+
return Some(TtToken { kind, is_joint_to_next, text: text.into() });
276+
}
277+
}
278+
279+
None
280+
}
281+
164282
fn push_delim(&mut self, d: tt::Delimiter, closing: bool) {
165283
let (kinds, texts) = match d {
166284
tt::Delimiter::Parenthesis => ([L_PAREN, R_PAREN], "()"),

0 commit comments

Comments
 (0)