Skip to content

Be more defensive in the parser translator lexer #3551

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 14 additions & 14 deletions lib/prism/translation/parser/lexer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ def to_a
when :tCOMMENT
if token.type == :EMBDOC_BEGIN

while !((next_token = lexed[index][0]) && next_token.type == :EMBDOC_END) && (index < length - 1)
while !((next_token, _ = lexed[index]) && next_token.type == :EMBDOC_END) && (index < length - 1)
value += next_token.value
index += 1
end
Expand All @@ -287,8 +287,8 @@ def to_a
is_at_eol = value.chomp!.nil?
location = range(token.location.start_offset, token.location.end_offset + (is_at_eol ? 0 : -1))

prev_token = lexed[index - 2][0] if index - 2 >= 0
next_token = lexed[index][0]
prev_token, _ = lexed[index - 2]
next_token, _ = lexed[index]

is_inline_comment = prev_token&.location&.start_line == token.location.start_line
if is_inline_comment && !is_at_eol && !COMMENT_CONTINUATION_TYPES.include?(next_token&.type)
Expand All @@ -307,7 +307,7 @@ def to_a
end
end
when :tNL
next_token = next_token = lexed[index][0]
next_token, _ = lexed[index]
# Newlines after comments are emitted out of order.
if next_token&.type == :COMMENT
comment_newline_location = location
Expand Down Expand Up @@ -344,8 +344,8 @@ def to_a
location = range(token.location.start_offset, token.location.start_offset + percent_array_leading_whitespace(value))
value = nil
when :tSTRING_BEG
next_token = lexed[index][0]
next_next_token = lexed[index + 1][0]
next_token, _ = lexed[index]
next_next_token, _ = lexed[index + 1]
basic_quotes = value == '"' || value == "'"

if basic_quotes && next_token&.type == :STRING_END
Expand Down Expand Up @@ -413,7 +413,7 @@ def to_a
while token.type == :STRING_CONTENT
current_length += token.value.bytesize
# Heredoc interpolation can have multiple STRING_CONTENT nodes on the same line.
is_first_token_on_line = lexed[index - 1] && token.location.start_line != lexed[index - 2][0].location&.start_line
is_first_token_on_line = lexed[index - 1] && token.location.start_line != lexed[index - 2]&.first.location&.start_line
# The parser gem only removes indentation when the heredoc is not nested
not_nested = heredoc_stack.size == 1
if is_percent_array
Expand All @@ -427,7 +427,7 @@ def to_a
tokens << [:tSTRING_CONTENT, [current_string, range(start_offset, start_offset + current_length)]]
break
end
token = lexed[index][0]
token, _ = lexed[index]
index += 1
end
else
Expand Down Expand Up @@ -482,7 +482,7 @@ def to_a
end

if percent_array?(quote_stack.pop)
prev_token = lexed[index - 2][0] if index - 2 >= 0
prev_token, _ = lexed[index - 2]
empty = %i[PERCENT_LOWER_I PERCENT_LOWER_W PERCENT_UPPER_I PERCENT_UPPER_W].include?(prev_token&.type)
ends_with_whitespace = prev_token&.type == :WORDS_SEP
# parser always emits a space token after content in a percent array, even if no actual whitespace is present.
Expand All @@ -491,7 +491,7 @@ def to_a
end
end
when :tSYMBEG
if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR && next_token.type != :STRING_END
if (next_token, _ = lexed[index]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR && next_token.type != :STRING_END
next_location = token.location.join(next_token.location)
type = :tSYMBOL
value = next_token.value
Expand All @@ -506,13 +506,13 @@ def to_a
type = :tIDENTIFIER
end
when :tXSTRING_BEG
if (next_token = lexed[index][0]) && !%i[STRING_CONTENT STRING_END EMBEXPR_BEGIN].include?(next_token.type)
if (next_token, _ = lexed[index]) && !%i[STRING_CONTENT STRING_END EMBEXPR_BEGIN].include?(next_token.type)
# self.`()
type = :tBACK_REF2
end
quote_stack.push(value)
when :tSYMBOLS_BEG, :tQSYMBOLS_BEG, :tWORDS_BEG, :tQWORDS_BEG
if (next_token = lexed[index][0]) && next_token.type == :WORDS_SEP
if (next_token, _ = lexed[index]) && next_token.type == :WORDS_SEP
index += 1
end

Expand Down Expand Up @@ -588,9 +588,9 @@ def calculate_heredoc_whitespace(heredoc_token_index)
previous_line = -1
result = Float::MAX

while (lexed[next_token_index] && next_token = lexed[next_token_index][0])
while (next_token, _ = lexed[next_token_index])
next_token_index += 1
next_next_token = lexed[next_token_index] && lexed[next_token_index][0]
next_next_token, _ = lexed[next_token_index]
first_token_on_line = next_token.location.start_column == 0

# String content inside nested heredocs and interpolation is ignored
Expand Down
16 changes: 16 additions & 0 deletions test/prism/ruby/parser_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,22 @@ def test_current_parser_for_current_ruby
end
end

def test_invalid_syntax
code = <<~RUBY
foo do
case bar
when
end
end
RUBY
buffer = Parser::Source::Buffer.new("(string)")
buffer.source = code

parser = Prism::Translation::Parser33.new
parser.diagnostics.all_errors_are_fatal = true
assert_raise(Parser::SyntaxError) { parser.tokenize(buffer) }
end

def test_it_block_parameter_syntax
it_fixture_path = Pathname(__dir__).join("../../../test/prism/fixtures/it.txt")

Expand Down
Loading