-
Notifications
You must be signed in to change notification settings - Fork 40
Improve whitespace handling in Lexer & Parser, warn on bad whitespace around binops #1249
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
10 commits
Select commit
Hold shift + click to select a range
b16d3b2
Enforce spaces around binary operators
jiribenes 2ea923a
Add spaces to 'are_we_fast_yet/nbody'
jiribenes 92f2a69
Fix more spaces in stdlib and tests
jiribenes 3068244
Reformat failing LLVM tests
jiribenes 1ddbdcc
Drive-by: let parser actually handle whitespace
jiribenes 5614617
Add simple neg test
jiribenes deca3f4
Simplify whitespace handling in lexer
jiribenes 0cde33d
Allow warnings in parser
jiribenes dc0c30d
Enforce space around binary operations as warnings only
jiribenes cfad3bb
Recover from recoverable diagnostics that are not errors
jiribenes File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Some comments aren't visible on the classic Files Changed page.
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,6 +7,7 @@ import effekt.source.* | |
| import effekt.source.Origin.Synthesized | ||
| import effekt.util.VirtualSource | ||
| import kiama.parsing.{Input, ParseResult} | ||
| import kiama.util.Severities.Severity | ||
| import kiama.util.{Position, Range, Source} | ||
|
|
||
| import scala.annotation.{tailrec, targetName} | ||
|
|
@@ -52,23 +53,24 @@ object Fail { | |
| def expectedButGot(expected: String, got: String, position: Int): Fail = | ||
| Fail(s"Expected ${expected} but got ${got}", position) | ||
| } | ||
| case class SoftFail(message: String, positionStart: Int, positionEnd: Int) | ||
|
|
||
| case class RecoverableDiagnostic(kind: Severity, message: String, positionStart: Int, positionEnd: Int) | ||
|
|
||
| class Parser(tokens: Seq[Token], source: Source) { | ||
|
|
||
| var softFails: ListBuffer[SoftFail] = ListBuffer[SoftFail]() | ||
| var recoverableDiagnostics: ListBuffer[RecoverableDiagnostic] = ListBuffer[RecoverableDiagnostic]() | ||
|
|
||
| private def report(msg: String, fromPosition: Int, toPosition: Int, source: Source = source)(using C: Context) = { | ||
| private def report(msg: String, fromPosition: Int, toPosition: Int, severity: Severity = kiama.util.Severities.Error, source: Source = source)(using C: Context) = { | ||
| val from = source.offsetToPosition(tokens(fromPosition).start) | ||
| val to = source.offsetToPosition(tokens(toPosition).end + 1) | ||
| val range = Range(from, to) | ||
| C.report(effekt.util.messages.ParseError(msg, Some(range))) | ||
| C.report(effekt.util.messages.ParseError(msg, Some(range), severity)) | ||
| } | ||
|
|
||
| def parse(input: Input)(using C: Context): Option[ModuleDecl] = { | ||
| def reportSoftFails()(using C: Context): Unit = | ||
| softFails.foreach { | ||
| case SoftFail(msg, from, to) => report(msg, from, to, source = input.source) | ||
| def reportRecoverableDiagnostics()(using C: Context): Unit = | ||
| recoverableDiagnostics.foreach { d => | ||
| report(d.message, d.positionStart, d.positionEnd, d.kind) | ||
| } | ||
|
|
||
| try { | ||
|
|
@@ -78,13 +80,16 @@ class Parser(tokens: Seq[Token], source: Source) { | |
| //val after = System.currentTimeMillis() | ||
| //println(s"${input.source.name}: ${after - before}ms") | ||
|
|
||
| // Report soft fails | ||
| reportSoftFails() | ||
| if softFails.isEmpty then res else None | ||
| // Report recoverable diagnostics | ||
| reportRecoverableDiagnostics() | ||
| // Don't fret unless we had a recoverable error | ||
| if recoverableDiagnostics.forall { d => | ||
| d.kind != kiama.util.Severities.Error | ||
| } then res else None | ||
| } catch { | ||
| case Fail(msg, pos) => | ||
| // Don't forget soft fails! | ||
| reportSoftFails() | ||
| reportRecoverableDiagnostics() | ||
|
|
||
| report(msg, pos, pos, source = input.source) | ||
| None | ||
|
|
@@ -114,6 +119,7 @@ class Parser(tokens: Seq[Token], source: Source) { | |
|
|
||
| // always points to the latest non-space position | ||
| var position: Int = 0 | ||
| spaces() // HACK: eat spaces before we begin! | ||
|
|
||
| def recover(tokenKind: TokenKind, tokenPosition: Int): TokenKind = tokenKind match { | ||
| case TokenKind.Error(err) => | ||
|
|
@@ -141,11 +147,19 @@ class Parser(tokens: Seq[Token], source: Source) { | |
|
|
||
| def peek: Token = tokens(position).failOnErrorToken(position) | ||
|
|
||
| /** | ||
| * Negative lookahead | ||
| */ | ||
| def lookbehind(offset: Int): Token = | ||
| tokens(position - offset) | ||
| def sawNewlineLast: Boolean = { | ||
| @tailrec | ||
| def go(position: Int): Boolean = | ||
| if position < 0 then fail("Unexpected start of file") | ||
|
|
||
| tokens(position).failOnErrorToken(position) match { | ||
| case token if isSpace(token.kind) && token.kind != Newline => go(position - 1) | ||
| case token if token.kind == Newline => true | ||
| case _ => false | ||
| } | ||
|
|
||
| go(position - 1) | ||
| } | ||
|
|
||
| /** | ||
| * Peeks n tokens ahead of the current one. | ||
|
|
@@ -317,7 +331,7 @@ class Parser(tokens: Seq[Token], source: Source) { | |
|
|
||
| // \n while | ||
| // ^ | ||
| case _ => lookbehind(1).kind == Newline | ||
| case _ => sawNewlineLast | ||
| } | ||
| def semi(): Unit = peek.kind match { | ||
| // \n ; while | ||
|
|
@@ -329,7 +343,7 @@ class Parser(tokens: Seq[Token], source: Source) { | |
|
|
||
| // \n while | ||
| // ^ | ||
| case _ if lookbehind(1).kind == Newline => () | ||
| case _ if sawNewlineLast => () | ||
|
|
||
| case _ => fail("Expected terminator: `;` or a newline") | ||
| } | ||
|
|
@@ -582,7 +596,7 @@ class Parser(tokens: Seq[Token], source: Source) { | |
| // If we can't parse `effectDef` or `operationDef`, we should try parsing an interface with the wrong keyword | ||
| // and report an error to the user if the malformed interface would be valid. | ||
| def interfaceDefUsingEffect(): Maybe[InterfaceDef] = | ||
| backtrack(restoreSoftFails = false): | ||
| backtrack(restoreRecoverable = false): | ||
| softFailWith("Unexpected 'effect', did you mean to declare an interface of multiple operations using the 'interface' keyword?"): | ||
| interfaceDef(info, `effect`) | ||
|
|
||
|
|
@@ -1001,6 +1015,7 @@ class Parser(tokens: Seq[Token], source: Source) { | |
| nonterminal: | ||
| var left = nonTerminal() | ||
| while (ops.contains(peek.kind)) { | ||
| checkBinaryOpWhitespace() | ||
| val op = next() | ||
| val right = nonTerminal() | ||
| left = binaryOp(left, op, right) | ||
|
|
@@ -1053,6 +1068,16 @@ class Parser(tokens: Seq[Token], source: Source) { | |
| def TypeTuple(tps: Many[Type]): Type = | ||
| TypeRef(IdRef(List("effekt"), s"Tuple${tps.size}", tps.span.synthesized), tps, tps.span.synthesized) | ||
|
|
||
| // Check that the current token is surrounded by whitespace. If not, soft fail. | ||
| private def checkBinaryOpWhitespace(): Unit = { | ||
| val wsBefore = position > 0 && isSpace(tokens(position - 1).kind) | ||
| val wsAfter = position + 1 < tokens.length && isSpace(tokens(position + 1).kind) | ||
|
|
||
| if (!wsBefore || !wsAfter) { | ||
| warn(s"Missing whitespace around binary operator", position, position) | ||
| } | ||
| } | ||
|
Comment on lines
+1071
to
+1079
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We could ever-so-slightly generalise this: // Require that the current token is surrounded by whitespace. If not, soft fail.
private def requireSpacesAround(): Unit = {
val wsBefore = position > 0 && isSpace(tokens(position - 1).kind)
val wsAfter = position + 1 < tokens.length && isSpace(tokens(position + 1).kind)
if (!wsBefore || !wsAfter) {
warn(s"Missing whitespace around '${TokenKind.explain(peek.kind)}'", position, position)
}
}and then use it for other places where we really want spaces on both sides (like the |
||
|
|
||
| /** | ||
| * This is a compound production for | ||
| * - member selection <EXPR>.<NAME> | ||
|
|
@@ -1101,7 +1126,7 @@ class Parser(tokens: Seq[Token], source: Source) { | |
| // argument lists cannot follow a linebreak: | ||
| // foo == foo; | ||
| // () () | ||
| def isArguments: Boolean = lookbehind(1).kind != Newline && (peek(`(`) || peek(`[`) || peek(`{`)) | ||
| def isArguments: Boolean = !sawNewlineLast && (peek(`(`) || peek(`[`) || peek(`{`)) | ||
| def arguments(): (List[ValueType], List[ValueArg], List[Term]) = | ||
| if (!isArguments) fail("at least one argument section (types, values, or blocks)", peek.kind) | ||
| (maybeTypeArgs().unspan, maybeValueArgs(), maybeBlockArgs()) | ||
|
|
@@ -1595,9 +1620,11 @@ class Parser(tokens: Seq[Token], source: Source) { | |
| def fail(msg: String): Nothing = | ||
| throw Fail(msg, position) | ||
|
|
||
| def softFail(message: String, start: Int, end: Int): Unit = { | ||
| softFails += SoftFail(message, start, end) | ||
| } | ||
| def softFail(message: String, start: Int, end: Int): Unit = | ||
| recoverableDiagnostics += RecoverableDiagnostic(kiama.util.Severities.Error, message, start, end) | ||
|
|
||
| def warn(message: String, start: Int, end: Int): Unit = | ||
| recoverableDiagnostics += RecoverableDiagnostic(kiama.util.Severities.Warning, message, start, end) | ||
|
|
||
| inline def softFailWith[T](inline message: String)(inline p: => T): T = { | ||
| val startPosition = position | ||
|
|
@@ -1618,21 +1645,21 @@ class Parser(tokens: Seq[Token], source: Source) { | |
| inline def when[T](t: TokenKind)(inline thn: => T)(inline els: => T): T = | ||
| if peek(t) then { consume(t); thn } else els | ||
|
|
||
| inline def backtrack[T](inline restoreSoftFails: Boolean = true)(inline p: => T): Maybe[T] = | ||
| inline def backtrack[T](inline restoreRecoverable: Boolean = true)(inline p: => T): Maybe[T] = | ||
| val before = position | ||
| val beforePrevious = previous | ||
| val labelBefore = currentLabel | ||
| val softFailsBefore = softFails.clone() | ||
| val recoverableBefore = recoverableDiagnostics.clone() | ||
| try { Maybe.Some(p, span(tokens(before).end)) } catch { | ||
| case Fail(_, _) => { | ||
| position = before | ||
| previous = beforePrevious | ||
| currentLabel = labelBefore | ||
| if restoreSoftFails then softFails = softFailsBefore | ||
| if restoreRecoverable then recoverableDiagnostics = recoverableBefore | ||
| Maybe.None(Span(source, pos(), pos(), Synthesized)) | ||
| } | ||
| } | ||
| inline def backtrack[T](inline p: => T): Maybe[T] = backtrack(restoreSoftFails = true)(p) | ||
| inline def backtrack[T](inline p: => T): Maybe[T] = backtrack(restoreRecoverable = true)(p) | ||
|
|
||
| def interleave[A](xs: List[A], ys: List[A]): List[A] = (xs, ys) match { | ||
| case (x :: xs, y :: ys) => x :: y :: interleave(xs, ys) | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,7 +22,7 @@ def safeDiv(x: Int, y: Int): Int / Fail = { | |
| if (y == 0){ | ||
| do Fail(); 0 | ||
| } else { | ||
| x/y | ||
| x / y | ||
| } | ||
| } | ||
|
|
||
|
|
||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Previously we did
lookbehind(1) == Newline, but that no longer works, thelookbehindwould need to ignore all whitespace tokens except for a newline... I found that to be too weird, so I replaced it with a specialised function to just check if the last non-space token was a newline (feel free to suggest a better name)