-
Notifications
You must be signed in to change notification settings - Fork 40
Improve whitespace handling in Lexer & Parser, warn on bad whitespace around binops #1249
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
b16d3b2
2ea923a
92f2a69
3068244
1ddbdcc
5614617
deca3f4
0cde33d
dc0c30d
cfad3bb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -303,11 +303,11 @@ class Lexer(source: Source) extends Iterator[Token] { | |
| private def skipWhitespace(): Unit = | ||
| while !atEndOfInput do | ||
| currentChar match { | ||
| case ' ' | '\t' => advance() | ||
| case ' ' | '\t' => return // Stop here, let spaces be handled as a token | ||
| case '\n' => return // Stop here, let newline be handled as a token | ||
| case '\r' => | ||
| if nextChar == '\n' then return // Stop here for \r\n | ||
| else advance() // Treat standalone \r as whitespace | ||
| else advance() // Skip standalone \r | ||
| case _ => return | ||
| } | ||
jiribenes marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
|
|
@@ -351,6 +351,11 @@ class Lexer(source: Source) extends Iterator[Token] { | |
| advance() | ||
| } | ||
|
|
||
| private def advanceSpaces(): TokenKind = { | ||
| advanceWhile { (curr, _) => curr == ' ' || curr == '\t' } | ||
| TokenKind.Space | ||
| } | ||
|
|
||
| private def peekAhead(offset: Int): Char = | ||
| val targetIndex = position.offset + offset | ||
| if targetIndex < source.content.length then | ||
|
|
@@ -380,6 +385,8 @@ class Lexer(source: Source) extends Iterator[Token] { | |
| } | ||
|
|
||
| (currentChar, nextChar) match { | ||
| case (' ', _) => advanceSpaces() | ||
| case ('\t', _) => advanceSpaces() | ||
| case ('\n', _) => advanceWith(TokenKind.Newline) | ||
| case ('\r', '\n') => advance2With(TokenKind.Newline) | ||
|
||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -114,6 +114,7 @@ class Parser(tokens: Seq[Token], source: Source) { | |
|
|
||
| // always points to the latest non-space position | ||
| var position: Int = 0 | ||
| spaces() // HACK: eat spaces before we begin! | ||
|
|
||
| def recover(tokenKind: TokenKind, tokenPosition: Int): TokenKind = tokenKind match { | ||
| case TokenKind.Error(err) => | ||
|
|
@@ -141,11 +142,19 @@ class Parser(tokens: Seq[Token], source: Source) { | |
|
|
||
| def peek: Token = tokens(position).failOnErrorToken(position) | ||
|
|
||
| /** | ||
| * Negative lookahead | ||
| */ | ||
| def lookbehind(offset: Int): Token = | ||
| tokens(position - offset) | ||
| def sawNewlineLast: Boolean = { | ||
| @tailrec | ||
| def go(position: Int): Boolean = | ||
| if position < 0 then fail("Unexpected start of file") | ||
|
|
||
| tokens(position).failOnErrorToken(position) match { | ||
| case token if isSpace(token.kind) && token.kind != Newline => go(position - 1) | ||
| case token if token.kind == Newline => true | ||
| case _ => false | ||
| } | ||
|
|
||
| go(position - 1) | ||
| } | ||
|
Comment on lines
-144
to
+162
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Previously we did |
||
|
|
||
| /** | ||
| * Peeks n tokens ahead of the current one. | ||
|
|
@@ -317,7 +326,7 @@ class Parser(tokens: Seq[Token], source: Source) { | |
|
|
||
| // \n while | ||
| // ^ | ||
| case _ => lookbehind(1).kind == Newline | ||
| case _ => sawNewlineLast | ||
| } | ||
| def semi(): Unit = peek.kind match { | ||
| // \n ; while | ||
|
|
@@ -329,7 +338,7 @@ class Parser(tokens: Seq[Token], source: Source) { | |
|
|
||
| // \n while | ||
| // ^ | ||
| case _ if lookbehind(1).kind == Newline => () | ||
| case _ if sawNewlineLast => () | ||
|
|
||
| case _ => fail("Expected terminator: `;` or a newline") | ||
| } | ||
|
|
@@ -1001,6 +1010,7 @@ class Parser(tokens: Seq[Token], source: Source) { | |
| nonterminal: | ||
| var left = nonTerminal() | ||
| while (ops.contains(peek.kind)) { | ||
| checkBinaryOpWhitespace() | ||
| val op = next() | ||
| val right = nonTerminal() | ||
| left = binaryOp(left, op, right) | ||
|
|
@@ -1053,6 +1063,17 @@ class Parser(tokens: Seq[Token], source: Source) { | |
| def TypeTuple(tps: Many[Type]): Type = | ||
| TypeRef(IdRef(List("effekt"), s"Tuple${tps.size}", tps.span.synthesized), tps, tps.span.synthesized) | ||
|
|
||
| // Check that the current token is surrounded by whitespace. If not, soft fail. | ||
| private def checkBinaryOpWhitespace(): Unit = { | ||
| // position points to the operator token in the raw token array | ||
| val wsBefore = position > 0 && isSpace(tokens(position - 1).kind) | ||
| val wsAfter = position + 1 < tokens.length && isSpace(tokens(position + 1).kind) | ||
|
|
||
| if (!wsBefore || !wsAfter) { | ||
| softFail(s"Missing whitespace around binary operator", position, position) | ||
| } | ||
| } | ||
|
Comment on lines
1071
to
1079
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Now I'm very happy about the implementation :) |
||
|
|
||
| /** | ||
| * This is a compound production for | ||
| * - member selection <EXPR>.<NAME> | ||
|
|
@@ -1101,7 +1122,7 @@ class Parser(tokens: Seq[Token], source: Source) { | |
| // argument lists cannot follow a linebreak: | ||
| // foo == foo; | ||
| // () () | ||
| def isArguments: Boolean = lookbehind(1).kind != Newline && (peek(`(`) || peek(`[`) || peek(`{`)) | ||
| def isArguments: Boolean = !sawNewlineLast && (peek(`(`) || peek(`[`) || peek(`{`)) | ||
| def arguments(): (List[ValueType], List[ValueArg], List[Term]) = | ||
| if (!isArguments) fail("at least one argument section (types, values, or blocks)", peek.kind) | ||
| (maybeTypeArgs().unspan, maybeValueArgs(), maybeBlockArgs()) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,7 +22,7 @@ def safeDiv(x: Int, y: Int): Int / Fail = { | |
| if (y == 0){ | ||
| do Fail(); 0 | ||
| } else { | ||
| x/y | ||
| x / y | ||
| } | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,12 @@ | ||
| [error] examples/neg/parsing/bin_op_spaces.effekt:10:13: Missing whitespace around binary operator | ||
| val _ = 1 +2 | ||
| ^ | ||
| [error] examples/neg/parsing/bin_op_spaces.effekt:11:12: Missing whitespace around binary operator | ||
| val _ = 1+ 2 | ||
| ^ | ||
| [error] examples/neg/parsing/bin_op_spaces.effekt:12:12: Missing whitespace around binary operator | ||
| val _ = 1+2 | ||
| ^ | ||
| [error] examples/neg/parsing/bin_op_spaces.effekt:16:1: Expected expression but got } | ||
| } | ||
| ^ |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,16 @@ | ||
| def main() = { | ||
| // OK | ||
| val _ = 1 + 2 | ||
| val _ = 1 + 2 | ||
| val _ = 1 + 2 | ||
| val _ = 1 + | ||
| 2 | ||
|
|
||
| // FAIL | ||
| val _ = 1 +2 | ||
| val _ = 1+ 2 | ||
| val _ = 1+2 | ||
|
|
||
| // NOTE: this is here on purpose to showcase error recovery! | ||
| ( | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this be called
skipNewlineinstead maybe?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I removed it outright, it's not needed anymore, I think.