Skip to content

Commit f1e1046

Browse files
authored
Refactor fstrings (RustPython#4188)
1 parent 273ffd9 commit f1e1046

File tree

2 files changed

+126
-118
lines changed

2 files changed

+126
-118
lines changed

compiler/parser/src/fstring.rs

+125-117
Original file line numberDiff line numberDiff line change
@@ -4,67 +4,64 @@ use crate::{
44
error::{FStringError, FStringErrorType, ParseError},
55
parser::parse_expression,
66
};
7-
use itertools::Itertools;
87
use std::{iter, mem, str};
98

10-
struct FStringParser<'a> {
11-
chars: iter::Peekable<str::Chars<'a>>,
9+
struct FStringParser {
1210
str_location: Location,
13-
recurse_lvl: u8,
1411
}
1512

16-
impl<'a> FStringParser<'a> {
17-
fn new(source: &'a str, str_location: Location, recurse_lvl: u8) -> Self {
18-
Self {
19-
chars: source.chars().peekable(),
20-
str_location,
21-
recurse_lvl,
22-
}
13+
impl FStringParser {
14+
fn new(str_location: Location) -> Self {
15+
Self { str_location }
2316
}
2417

2518
#[inline]
2619
fn expr(&self, node: ExprKind) -> Expr {
2720
Expr::new(self.str_location, node)
2821
}
2922

30-
fn parse_formatted_value(&mut self) -> Result<Vec<Expr>, FStringErrorType> {
31-
let mut expression = String::new();
23+
fn parse_formatted_value<'a>(
24+
&mut self,
25+
mut chars: iter::Peekable<str::Chars<'a>>,
26+
nested: u8,
27+
) -> Result<(Vec<Expr>, iter::Peekable<str::Chars<'a>>), FStringErrorType> {
28+
let mut expression = String::from("{");
3229
let mut spec = None;
3330
let mut delims = Vec::new();
3431
let mut conversion = ConversionFlag::None;
3532
let mut self_documenting = false;
3633
let mut trailing_seq = String::new();
3734

38-
while let Some(ch) = self.chars.next() {
35+
while let Some(ch) = chars.next() {
3936
match ch {
4037
// can be integrated better with the remainign code, but as a starting point ok
4138
// in general I would do here a tokenizing of the fstrings to omit this peeking.
42-
'!' if self.chars.peek() == Some(&'=') => {
39+
'!' if chars.peek() == Some(&'=') => {
4340
expression.push_str("!=");
44-
self.chars.next();
41+
chars.next();
4542
}
4643

47-
'=' if self.chars.peek() == Some(&'=') => {
44+
'=' if chars.peek() == Some(&'=') => {
4845
expression.push_str("==");
49-
self.chars.next();
46+
chars.next();
5047
}
5148

52-
'>' if self.chars.peek() == Some(&'=') => {
49+
'>' if chars.peek() == Some(&'=') => {
5350
expression.push_str(">=");
54-
self.chars.next();
51+
chars.next();
5552
}
5653

57-
'<' if self.chars.peek() == Some(&'=') => {
54+
'<' if chars.peek() == Some(&'=') => {
5855
expression.push_str("<=");
59-
self.chars.next();
56+
chars.next();
6057
}
6158

62-
'!' if delims.is_empty() && self.chars.peek() != Some(&'=') => {
59+
'!' if delims.is_empty() && chars.peek() != Some(&'=') => {
6360
if expression.trim().is_empty() {
6461
return Err(EmptyExpression);
6562
}
6663

67-
conversion = match self.chars.next() {
64+
conversion = match chars.next() {
6865
Some('s') => ConversionFlag::Str,
6966
Some('a') => ConversionFlag::Ascii,
7067
Some('r') => ConversionFlag::Repr,
@@ -76,87 +73,34 @@ impl<'a> FStringParser<'a> {
7673
}
7774
};
7875

79-
if let Some(&peek) = self.chars.peek() {
76+
if let Some(&peek) = chars.peek() {
8077
if peek != '}' && peek != ':' {
81-
return Err(ExpectedRbrace);
78+
if expression[1..].trim().is_empty() {
79+
return Err(EmptyExpression);
80+
} else {
81+
return Err(ExpectedRbrace);
82+
}
8283
}
84+
} else if expression[1..].trim().is_empty() {
85+
return Err(EmptyExpression);
8386
} else {
8487
return Err(ExpectedRbrace);
8588
}
8689
}
8790

8891
// match a python 3.8 self documenting expression
8992
// format '{' PYTHON_EXPRESSION '=' FORMAT_SPECIFIER? '}'
90-
'=' if self.chars.peek() != Some(&'=') && delims.is_empty() => {
93+
'=' if chars.peek() != Some(&'=') && delims.is_empty() => {
9194
self_documenting = true;
9295
}
9396

9497
':' if delims.is_empty() => {
95-
let mut nested = 0;
96-
let mut spec_constructor = Vec::new();
97-
let mut constant_piece = String::new();
98-
let mut formatted_value_piece = String::new();
99-
while let Some(&next) = self.chars.peek() {
100-
match next {
101-
'{' if nested > 0 => {
102-
nested += 1;
103-
formatted_value_piece.push(next);
104-
}
105-
'}' if nested > 0 => {
106-
nested -= 1;
107-
if nested == 0 {
108-
formatted_value_piece.push(next);
109-
let values = FStringParser::new(
110-
&formatted_value_piece,
111-
Location::default(),
112-
&self.recurse_lvl + 1,
113-
)
114-
.parse()?;
115-
spec_constructor.push(values
116-
.into_iter()
117-
.exactly_one()
118-
.expect("Expected formatted value to produce exactly one expression.")
119-
);
120-
formatted_value_piece.clear();
121-
} else {
122-
formatted_value_piece.push(next);
123-
}
124-
}
125-
_ if nested > 0 => {
126-
formatted_value_piece.push(next);
127-
}
128-
'{' => {
129-
nested += 1;
130-
if !constant_piece.is_empty() {
131-
spec_constructor.push(self.expr(ExprKind::Constant {
132-
value: constant_piece.to_owned().into(),
133-
kind: None,
134-
}));
135-
constant_piece.clear();
136-
}
137-
formatted_value_piece.push(next);
138-
formatted_value_piece.push(' ');
139-
}
140-
'}' => break,
141-
_ => {
142-
constant_piece.push(next);
143-
}
144-
}
145-
self.chars.next();
146-
}
147-
if !constant_piece.is_empty() {
148-
spec_constructor.push(self.expr(ExprKind::Constant {
149-
value: constant_piece.to_owned().into(),
150-
kind: None,
151-
}));
152-
constant_piece.clear();
153-
}
154-
if nested > 0 {
155-
return Err(UnclosedLbrace);
156-
}
98+
let (parsed_spec, remaining_chars) = self.parse_spec(chars, nested)?;
99+
157100
spec = Some(Box::new(self.expr(ExprKind::JoinedStr {
158-
values: spec_constructor,
159-
})))
101+
values: parsed_spec,
102+
})));
103+
chars = remaining_chars;
160104
}
161105
'(' | '{' | '[' => {
162106
expression.push(ch);
@@ -181,13 +125,15 @@ impl<'a> FStringParser<'a> {
181125
expression.push(ch);
182126
}
183127
'}' => {
184-
if expression.is_empty() {
128+
if expression[1..].trim().is_empty() {
185129
return Err(EmptyExpression);
186130
}
131+
expression.push(ch);
132+
187133
let ret = if !self_documenting {
188134
vec![self.expr(ExprKind::FormattedValue {
189135
value: Box::new(
190-
parse_fstring_expr(&expression)
136+
parse_fstring_expr(&expression[1..expression.len() - 1])
191137
.map_err(|e| InvalidExpression(Box::new(e.error)))?,
192138
),
193139
conversion: conversion as _,
@@ -196,7 +142,9 @@ impl<'a> FStringParser<'a> {
196142
} else {
197143
vec![
198144
self.expr(ExprKind::Constant {
199-
value: Constant::Str(expression.clone() + "="),
145+
value: Constant::Str(
146+
expression[1..expression.len() - 1].to_owned() + "=",
147+
),
200148
kind: None,
201149
}),
202150
self.expr(ExprKind::Constant {
@@ -205,7 +153,7 @@ impl<'a> FStringParser<'a> {
205153
}),
206154
self.expr(ExprKind::FormattedValue {
207155
value: Box::new(
208-
parse_fstring_expr(&expression)
156+
parse_fstring_expr(&expression[1..expression.len() - 1])
209157
.map_err(|e| InvalidExpression(Box::new(e.error)))?,
210158
),
211159
conversion: (if conversion == ConversionFlag::None && spec.is_none()
@@ -218,11 +166,11 @@ impl<'a> FStringParser<'a> {
218166
}),
219167
]
220168
};
221-
return Ok(ret);
169+
return Ok((ret, chars));
222170
}
223171
'"' | '\'' => {
224172
expression.push(ch);
225-
for next in &mut self.chars {
173+
for next in &mut chars {
226174
expression.push(next);
227175
if next == ch {
228176
break;
@@ -236,48 +184,105 @@ impl<'a> FStringParser<'a> {
236184
if self_documenting {
237185
return Err(ExpectedRbrace);
238186
}
187+
239188
expression.push(ch);
240189
}
241190
}
242191
}
243192
Err(UnclosedLbrace)
244193
}
245194

246-
fn parse(mut self) -> Result<Vec<Expr>, FStringErrorType> {
247-
if self.recurse_lvl >= 2 {
195+
fn parse_spec<'a>(
196+
&mut self,
197+
mut chars: iter::Peekable<str::Chars<'a>>,
198+
nested: u8,
199+
) -> Result<(Vec<Expr>, iter::Peekable<str::Chars<'a>>), FStringErrorType> {
200+
let mut spec_constructor = Vec::new();
201+
let mut constant_piece = String::new();
202+
while let Some(&next) = chars.peek() {
203+
match next {
204+
'{' => {
205+
if !constant_piece.is_empty() {
206+
spec_constructor.push(self.expr(ExprKind::Constant {
207+
value: constant_piece.to_owned().into(),
208+
kind: None,
209+
}));
210+
constant_piece.clear();
211+
}
212+
let (parsed_expr, remaining_chars) = self.parse(chars, nested + 1)?;
213+
spec_constructor.extend(parsed_expr);
214+
chars = remaining_chars;
215+
continue;
216+
}
217+
'}' => {
218+
break;
219+
}
220+
_ => {
221+
constant_piece.push(next);
222+
}
223+
}
224+
chars.next();
225+
}
226+
if !constant_piece.is_empty() {
227+
spec_constructor.push(self.expr(ExprKind::Constant {
228+
value: constant_piece.to_owned().into(),
229+
kind: None,
230+
}));
231+
constant_piece.clear();
232+
}
233+
Ok((spec_constructor, chars))
234+
}
235+
236+
fn parse<'a>(
237+
&mut self,
238+
mut chars: iter::Peekable<str::Chars<'a>>,
239+
nested: u8,
240+
) -> Result<(Vec<Expr>, iter::Peekable<str::Chars<'a>>), FStringErrorType> {
241+
if nested >= 2 {
248242
return Err(ExpressionNestedTooDeeply);
249243
}
250244

251245
let mut content = String::new();
252246
let mut values = vec![];
253247

254-
while let Some(ch) = self.chars.next() {
248+
while let Some(&ch) = chars.peek() {
255249
match ch {
256250
'{' => {
257-
if let Some('{') = self.chars.peek() {
258-
self.chars.next();
259-
content.push('{');
260-
} else {
261-
if !content.is_empty() {
262-
values.push(self.expr(ExprKind::Constant {
263-
value: mem::take(&mut content).into(),
264-
kind: None,
265-
}));
251+
chars.next();
252+
if nested == 0 {
253+
if let Some('{') = chars.peek() {
254+
chars.next();
255+
content.push('{');
256+
continue;
266257
}
267-
268-
values.extend(self.parse_formatted_value()?);
269258
}
259+
if !content.is_empty() {
260+
values.push(self.expr(ExprKind::Constant {
261+
value: mem::take(&mut content).into(),
262+
kind: None,
263+
}));
264+
}
265+
266+
let (parsed_values, remaining_chars) =
267+
self.parse_formatted_value(chars, nested)?;
268+
values.extend(parsed_values);
269+
chars = remaining_chars;
270270
}
271271
'}' => {
272-
if let Some('}') = self.chars.peek() {
273-
self.chars.next();
272+
if nested > 0 {
273+
break;
274+
}
275+
chars.next();
276+
if let Some('}') = chars.peek() {
277+
chars.next();
274278
content.push('}');
275279
} else {
276280
return Err(UnopenedRbrace);
277281
}
278282
}
279283
_ => {
280284
content.push(ch);
285+
chars.next();
281286
}
282287
}
283288
}
@@ -289,7 +294,7 @@ impl<'a> FStringParser<'a> {
289294
}))
290295
}
291296

292-
Ok(values)
297+
Ok((values, chars))
293298
}
294299
}
295300

@@ -301,8 +306,9 @@ fn parse_fstring_expr(source: &str) -> Result<Expr, ParseError> {
301306
/// Parse an fstring from a string, located at a certain position in the sourcecode.
302307
/// In case of errors, we will get the location and the error returned.
303308
pub fn parse_located_fstring(source: &str, location: Location) -> Result<Vec<Expr>, FStringError> {
304-
FStringParser::new(source, location, 0)
305-
.parse()
309+
FStringParser::new(location)
310+
.parse(source.chars().peekable(), 0)
311+
.map(|(e, _)| e)
306312
.map_err(|error| FStringError { error, location })
307313
}
308314

@@ -311,7 +317,9 @@ mod tests {
311317
use super::*;
312318

313319
fn parse_fstring(source: &str) -> Result<Vec<Expr>, FStringErrorType> {
314-
FStringParser::new(source, Location::default(), 0).parse()
320+
FStringParser::new(Location::default())
321+
.parse(source.chars().peekable(), 0)
322+
.map(|(e, _)| e)
315323
}
316324

317325
#[test]

0 commit comments

Comments
 (0)