From 877f497bd17b11947ddc12af208d7a54a0721fdc Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Sat, 21 Dec 2024 15:36:03 -0500 Subject: [PATCH] Support leading logical operators --- src/prism.c | 94 +++++++++++++++++-- test/prism/fixtures/leading_logical.txt | 21 +++++ test/prism/fixtures_test.rb | 2 + test/prism/lex_test.rb | 4 + test/prism/ruby/ripper_test.rb | 3 + test/prism/ruby/ruby_parser_test.rb | 1 + test/prism/snapshots/leading_logical.txt | 109 +++++++++++++++++++++++ 7 files changed, 227 insertions(+), 7 deletions(-) create mode 100644 test/prism/fixtures/leading_logical.txt create mode 100644 test/prism/snapshots/leading_logical.txt diff --git a/src/prism.c b/src/prism.c index ab8de969757..1dae714c98c 100644 --- a/src/prism.c +++ b/src/prism.c @@ -10829,14 +10829,37 @@ parser_lex(pm_parser_t *parser) { following = next_newline(following, parser->end - following); } - // If the lex state was ignored, or we hit a '.' or a '&.', - // we will lex the ignored newline + // If the lex state was ignored, we will lex the + // ignored newline. + if (lex_state_ignored_p(parser)) { + if (!lexed_comment) parser_lex_ignored_newline(parser); + lexed_comment = false; + goto lex_next_token; + } + + // If we hit a '.' or a '&.' we will lex the ignored + // newline. + if (following && ( + (peek_at(parser, following) == '.') || + (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.') + )) { + if (!lexed_comment) parser_lex_ignored_newline(parser); + lexed_comment = false; + goto lex_next_token; + } + + + // If we are parsing as CRuby 3.5 or later and we + // hit a '&&' or a '||' then we will lex the ignored + // newline. if ( - lex_state_ignored_p(parser) || - (following && ( - (peek_at(parser, following) == '.') || - (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.') - )) + (parser->version == PM_OPTIONS_VERSION_LATEST) && + following && ( + (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '&') || + (peek_at(parser, following) == '|' && peek_at(parser, following + 1) == '|') || + (peek_at(parser, following) == 'a' && peek_at(parser, following + 1) == 'n' && peek_at(parser, following + 2) == 'd' && !char_is_identifier(parser, following + 3)) || + (peek_at(parser, following) == 'o' && peek_at(parser, following + 1) == 'r' && !char_is_identifier(parser, following + 2)) + ) ) { if (!lexed_comment) parser_lex_ignored_newline(parser); lexed_comment = false; @@ -10876,6 +10899,63 @@ parser_lex(pm_parser_t *parser) { parser->next_start = NULL; LEX(PM_TOKEN_AMPERSAND_DOT); } + + if (parser->version == PM_OPTIONS_VERSION_LATEST) { + // If we hit an && then we are in a logical chain + // and we need to return the logical operator. + if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '&') { + if (!lexed_comment) parser_lex_ignored_newline(parser); + lex_state_set(parser, PM_LEX_STATE_BEG); + parser->current.start = next_content; + parser->current.end = next_content + 2; + parser->next_start = NULL; + LEX(PM_TOKEN_AMPERSAND_AMPERSAND); + } + + // If we hit a || then we are in a logical chain and + // we need to return the logical operator. + if (peek_at(parser, next_content) == '|' && peek_at(parser, next_content + 1) == '|') { + if (!lexed_comment) parser_lex_ignored_newline(parser); + lex_state_set(parser, PM_LEX_STATE_BEG); + parser->current.start = next_content; + parser->current.end = next_content + 2; + parser->next_start = NULL; + LEX(PM_TOKEN_PIPE_PIPE); + } + + // If we hit an 'and' then we are in a logical chain + // and we need to return the logical operator. + if ( + peek_at(parser, next_content) == 'a' && + peek_at(parser, next_content + 1) == 'n' && + peek_at(parser, next_content + 2) == 'd' && + !char_is_identifier(parser, next_content + 3) + ) { + if (!lexed_comment) parser_lex_ignored_newline(parser); + lex_state_set(parser, PM_LEX_STATE_BEG); + parser->current.start = next_content; + parser->current.end = next_content + 3; + parser->next_start = NULL; + parser->command_start = true; + LEX(PM_TOKEN_KEYWORD_AND); + } + + // If we hit a 'or' then we are in a logical chain + // and we need to return the logical operator. + if ( + peek_at(parser, next_content) == 'o' && + peek_at(parser, next_content + 1) == 'r' && + !char_is_identifier(parser, next_content + 2) + ) { + if (!lexed_comment) parser_lex_ignored_newline(parser); + lex_state_set(parser, PM_LEX_STATE_BEG); + parser->current.start = next_content; + parser->current.end = next_content + 2; + parser->next_start = NULL; + parser->command_start = true; + LEX(PM_TOKEN_KEYWORD_OR); + } + } } // At this point we know this is a regular newline, and we can set the diff --git a/test/prism/fixtures/leading_logical.txt b/test/prism/fixtures/leading_logical.txt new file mode 100644 index 00000000000..feb5ee245c8 --- /dev/null +++ b/test/prism/fixtures/leading_logical.txt @@ -0,0 +1,21 @@ +1 +&& 2 +&& 3 + +1 +|| 2 +|| 3 + +1 +and 2 +and 3 + +1 +or 2 +or 3 + +1 +andfoo + +2 +orfoo diff --git a/test/prism/fixtures_test.rb b/test/prism/fixtures_test.rb index 7225b4ac66c..0d75b7282f4 100644 --- a/test/prism/fixtures_test.rb +++ b/test/prism/fixtures_test.rb @@ -14,6 +14,8 @@ class FixturesTest < TestCase # https://bugs.ruby-lang.org/issues/19539 except << "heredocs_leading_whitespace.txt" if RUBY_VERSION < "3.3.0" + except << "leading_logical.txt" if RUBY_VERSION < "3.5.0" + Fixture.each(except: except) do |fixture| define_method(fixture.test_name) { assert_valid_syntax(fixture.read) } end diff --git a/test/prism/lex_test.rb b/test/prism/lex_test.rb index 7eac677ef70..03212ad94ca 100644 --- a/test/prism/lex_test.rb +++ b/test/prism/lex_test.rb @@ -30,6 +30,10 @@ class LexTest < TestCase except << "heredocs_leading_whitespace.txt" end + if RUBY_VERSION < "3.5.0" + except << "leading_logical.txt" + end + Fixture.each(except: except) do |fixture| define_method(fixture.test_name) { assert_lex(fixture) } end diff --git a/test/prism/ruby/ripper_test.rb b/test/prism/ruby/ripper_test.rb index 8db47da3d35..2c85f88ada6 100644 --- a/test/prism/ruby/ripper_test.rb +++ b/test/prism/ruby/ripper_test.rb @@ -8,6 +8,9 @@ module Prism class RipperTest < TestCase # Skip these tests that Ripper is reporting the wrong results for. incorrect = [ + # Not yet supported. + "leading_logical.txt", + # Ripper incorrectly attributes the block to the keyword. "seattlerb/block_break.txt", "seattlerb/block_next.txt", diff --git a/test/prism/ruby/ruby_parser_test.rb b/test/prism/ruby/ruby_parser_test.rb index a13daeeb849..5df4177c10c 100644 --- a/test/prism/ruby/ruby_parser_test.rb +++ b/test/prism/ruby/ruby_parser_test.rb @@ -48,6 +48,7 @@ class RubyParserTest < TestCase "alias.txt", "dos_endings.txt", "heredocs_with_ignored_newlines.txt", + "leading_logical.txt", "method_calls.txt", "methods.txt", "multi_write.txt", diff --git a/test/prism/snapshots/leading_logical.txt b/test/prism/snapshots/leading_logical.txt new file mode 100644 index 00000000000..98b00424914 --- /dev/null +++ b/test/prism/snapshots/leading_logical.txt @@ -0,0 +1,109 @@ +@ ProgramNode (location: (1,0)-(21,5)) +├── flags: ∅ +├── locals: [] +└── statements: + @ StatementsNode (location: (1,0)-(21,5)) + ├── flags: ∅ + └── body: (length: 8) + ├── @ AndNode (location: (1,0)-(3,4)) + │ ├── flags: newline + │ ├── left: + │ │ @ AndNode (location: (1,0)-(2,4)) + │ │ ├── flags: ∅ + │ │ ├── left: + │ │ │ @ IntegerNode (location: (1,0)-(1,1)) + │ │ │ ├── flags: static_literal, decimal + │ │ │ └── value: 1 + │ │ ├── right: + │ │ │ @ IntegerNode (location: (2,3)-(2,4)) + │ │ │ ├── flags: static_literal, decimal + │ │ │ └── value: 2 + │ │ └── operator_loc: (2,0)-(2,2) = "&&" + │ ├── right: + │ │ @ IntegerNode (location: (3,3)-(3,4)) + │ │ ├── flags: static_literal, decimal + │ │ └── value: 3 + │ └── operator_loc: (3,0)-(3,2) = "&&" + ├── @ OrNode (location: (5,0)-(7,4)) + │ ├── flags: newline + │ ├── left: + │ │ @ OrNode (location: (5,0)-(6,4)) + │ │ ├── flags: ∅ + │ │ ├── left: + │ │ │ @ IntegerNode (location: (5,0)-(5,1)) + │ │ │ ├── flags: static_literal, decimal + │ │ │ └── value: 1 + │ │ ├── right: + │ │ │ @ IntegerNode (location: (6,3)-(6,4)) + │ │ │ ├── flags: static_literal, decimal + │ │ │ └── value: 2 + │ │ └── operator_loc: (6,0)-(6,2) = "||" + │ ├── right: + │ │ @ IntegerNode (location: (7,3)-(7,4)) + │ │ ├── flags: static_literal, decimal + │ │ └── value: 3 + │ └── operator_loc: (7,0)-(7,2) = "||" + ├── @ AndNode (location: (9,0)-(11,5)) + │ ├── flags: newline + │ ├── left: + │ │ @ AndNode (location: (9,0)-(10,5)) + │ │ ├── flags: ∅ + │ │ ├── left: + │ │ │ @ IntegerNode (location: (9,0)-(9,1)) + │ │ │ ├── flags: static_literal, decimal + │ │ │ └── value: 1 + │ │ ├── right: + │ │ │ @ IntegerNode (location: (10,4)-(10,5)) + │ │ │ ├── flags: static_literal, decimal + │ │ │ └── value: 2 + │ │ └── operator_loc: (10,0)-(10,3) = "and" + │ ├── right: + │ │ @ IntegerNode (location: (11,4)-(11,5)) + │ │ ├── flags: static_literal, decimal + │ │ └── value: 3 + │ └── operator_loc: (11,0)-(11,3) = "and" + ├── @ OrNode (location: (13,0)-(15,4)) + │ ├── flags: newline + │ ├── left: + │ │ @ OrNode (location: (13,0)-(14,4)) + │ │ ├── flags: ∅ + │ │ ├── left: + │ │ │ @ IntegerNode (location: (13,0)-(13,1)) + │ │ │ ├── flags: static_literal, decimal + │ │ │ └── value: 1 + │ │ ├── right: + │ │ │ @ IntegerNode (location: (14,3)-(14,4)) + │ │ │ ├── flags: static_literal, decimal + │ │ │ └── value: 2 + │ │ └── operator_loc: (14,0)-(14,2) = "or" + │ ├── right: + │ │ @ IntegerNode (location: (15,3)-(15,4)) + │ │ ├── flags: static_literal, decimal + │ │ └── value: 3 + │ └── operator_loc: (15,0)-(15,2) = "or" + ├── @ IntegerNode (location: (17,0)-(17,1)) + │ ├── flags: newline, static_literal, decimal + │ └── value: 1 + ├── @ CallNode (location: (18,0)-(18,6)) + │ ├── flags: newline, variable_call, ignore_visibility + │ ├── receiver: ∅ + │ ├── call_operator_loc: ∅ + │ ├── name: :andfoo + │ ├── message_loc: (18,0)-(18,6) = "andfoo" + │ ├── opening_loc: ∅ + │ ├── arguments: ∅ + │ ├── closing_loc: ∅ + │ └── block: ∅ + ├── @ IntegerNode (location: (20,0)-(20,1)) + │ ├── flags: newline, static_literal, decimal + │ └── value: 2 + └── @ CallNode (location: (21,0)-(21,5)) + ├── flags: newline, variable_call, ignore_visibility + ├── receiver: ∅ + ├── call_operator_loc: ∅ + ├── name: :orfoo + ├── message_loc: (21,0)-(21,5) = "orfoo" + ├── opening_loc: ∅ + ├── arguments: ∅ + ├── closing_loc: ∅ + └── block: ∅