From 263df495be0b2c25ca0919082782362264e84d09 Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Sat, 21 Dec 2024 15:36:03 -0500
Subject: [PATCH] Support leading logical operators

---
 src/prism.c                              |  92 +++++++++++++++++--
 test/prism/fixtures/leading_logical.txt  |  21 +++++
 test/prism/fixtures_test.rb              |   2 +
 test/prism/lex_test.rb                   |   4 +
 test/prism/ruby/ripper_test.rb           |   3 +
 test/prism/ruby/ruby_parser_test.rb      |   1 +
 test/prism/snapshots/leading_logical.txt | 109 +++++++++++++++++++++++
 7 files changed, 225 insertions(+), 7 deletions(-)
 create mode 100644 test/prism/fixtures/leading_logical.txt
 create mode 100644 test/prism/snapshots/leading_logical.txt

diff --git a/src/prism.c b/src/prism.c
index ab8de969757..e2472b8ceaf 100644
--- a/src/prism.c
+++ b/src/prism.c
@@ -10829,14 +10829,37 @@ parser_lex(pm_parser_t *parser) {
                                 following = next_newline(following, parser->end - following);
                             }
 
-                            // If the lex state was ignored, or we hit a '.' or a '&.',
-                            // we will lex the ignored newline
+                            // If the lex state was ignored, we will lex the
+                            // ignored newline.
+                            if (lex_state_ignored_p(parser)) {
+                                if (!lexed_comment) parser_lex_ignored_newline(parser);
+                                lexed_comment = false;
+                                goto lex_next_token;
+                            }
+
+                            // If we hit a '.' or a '&.' we will lex the ignored
+                            // newline.
+                            if (following && (
+                                (peek_at(parser, following) == '.') ||
+                                (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.')
+                            )) {
+                                if (!lexed_comment) parser_lex_ignored_newline(parser);
+                                lexed_comment = false;
+                                goto lex_next_token;
+                            }
+
+
+                            // If we are parsing as CRuby 3.5 or later and we
+                            // hit a '&&' or a '||' then we will lex the ignored
+                            // newline.
                             if (
-                                lex_state_ignored_p(parser) ||
-                                (following && (
-                                    (peek_at(parser, following) == '.') ||
-                                    (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.')
-                                ))
+                                (parser->version == PM_OPTIONS_VERSION_LATEST) &&
+                                following && (
+                                    (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '&') ||
+                                    (peek_at(parser, following) == '|' && peek_at(parser, following + 1) == '|') ||
+                                    (peek_at(parser, following) == 'a' && peek_at(parser, following + 1) == 'n' && peek_at(parser, following + 2) == 'd' && !char_is_identifier(parser, following + 3)) ||
+                                    (peek_at(parser, following) == 'o' && peek_at(parser, following + 1) == 'r' && !char_is_identifier(parser, following + 2))
+                                )
                             ) {
                                 if (!lexed_comment) parser_lex_ignored_newline(parser);
                                 lexed_comment = false;
@@ -10876,6 +10899,61 @@ parser_lex(pm_parser_t *parser) {
                             parser->next_start = NULL;
                             LEX(PM_TOKEN_AMPERSAND_DOT);
                         }
+
+                        if (parser->version == PM_OPTIONS_VERSION_LATEST) {
+                            // If we hit an && then we are in a logical chain
+                            // and we need to return the logical operator.
+                            if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '&') {
+                                if (!lexed_comment) parser_lex_ignored_newline(parser);
+                                lex_state_set(parser, PM_LEX_STATE_BEG);
+                                parser->current.start = next_content;
+                                parser->current.end = next_content + 2;
+                                parser->next_start = NULL;
+                                LEX(PM_TOKEN_AMPERSAND_AMPERSAND);
+                            }
+
+                            // If we hit a || then we are in a logical chain and
+                            // we need to return the logical operator.
+                            if (peek_at(parser, next_content) == '|' && peek_at(parser, next_content + 1) == '|') {
+                                if (!lexed_comment) parser_lex_ignored_newline(parser);
+                                lex_state_set(parser, PM_LEX_STATE_BEG);
+                                parser->current.start = next_content;
+                                parser->current.end = next_content + 2;
+                                parser->next_start = NULL;
+                                LEX(PM_TOKEN_PIPE_PIPE);
+                            }
+
+                            // If we hit an 'and' then we are in a logical chain
+                            // and we need to return the logical operator.
+                            if (
+                                peek_at(parser, next_content) == 'a' &&
+                                peek_at(parser, next_content + 1) == 'n' &&
+                                peek_at(parser, next_content + 2) == 'd' &&
+                                !char_is_identifier(parser, next_content + 3)
+                            ) {
+                                if (!lexed_comment) parser_lex_ignored_newline(parser);
+                                lex_state_set(parser, PM_LEX_STATE_BEG);
+                                parser->current.start = next_content;
+                                parser->current.end = next_content + 3;
+                                parser->next_start = NULL;
+                                LEX(PM_TOKEN_KEYWORD_AND);
+                            }
+
+                            // If we hit a 'or' then we are in a logical chain
+                            // and we need to return the logical operator.
+                            if (
+                                peek_at(parser, next_content) == 'o' &&
+                                peek_at(parser, next_content + 1) == 'r' &&
+                                !char_is_identifier(parser, next_content + 2)
+                            ) {
+                                if (!lexed_comment) parser_lex_ignored_newline(parser);
+                                lex_state_set(parser, PM_LEX_STATE_BEG);
+                                parser->current.start = next_content;
+                                parser->current.end = next_content + 2;
+                                parser->next_start = NULL;
+                                LEX(PM_TOKEN_KEYWORD_OR);
+                            }
+                        }
                     }
 
                     // At this point we know this is a regular newline, and we can set the
diff --git a/test/prism/fixtures/leading_logical.txt b/test/prism/fixtures/leading_logical.txt
new file mode 100644
index 00000000000..feb5ee245c8
--- /dev/null
+++ b/test/prism/fixtures/leading_logical.txt
@@ -0,0 +1,21 @@
+1
+&& 2
+&& 3
+
+1
+|| 2
+|| 3
+
+1
+and 2
+and 3
+
+1
+or 2
+or 3
+
+1
+andfoo
+
+2
+orfoo
diff --git a/test/prism/fixtures_test.rb b/test/prism/fixtures_test.rb
index 7225b4ac66c..0d75b7282f4 100644
--- a/test/prism/fixtures_test.rb
+++ b/test/prism/fixtures_test.rb
@@ -14,6 +14,8 @@ class FixturesTest < TestCase
     # https://bugs.ruby-lang.org/issues/19539
     except << "heredocs_leading_whitespace.txt" if RUBY_VERSION < "3.3.0"
 
+    except << "leading_logical.txt" if RUBY_VERSION < "3.5.0"
+
     Fixture.each(except: except) do |fixture|
       define_method(fixture.test_name) { assert_valid_syntax(fixture.read) }
     end
diff --git a/test/prism/lex_test.rb b/test/prism/lex_test.rb
index 7eac677ef70..03212ad94ca 100644
--- a/test/prism/lex_test.rb
+++ b/test/prism/lex_test.rb
@@ -30,6 +30,10 @@ class LexTest < TestCase
       except << "heredocs_leading_whitespace.txt"
     end
 
+    if RUBY_VERSION < "3.5.0"
+      except << "leading_logical.txt"
+    end
+
     Fixture.each(except: except) do |fixture|
       define_method(fixture.test_name) { assert_lex(fixture) }
     end
diff --git a/test/prism/ruby/ripper_test.rb b/test/prism/ruby/ripper_test.rb
index 8db47da3d35..2c85f88ada6 100644
--- a/test/prism/ruby/ripper_test.rb
+++ b/test/prism/ruby/ripper_test.rb
@@ -8,6 +8,9 @@ module Prism
   class RipperTest < TestCase
     # Skip these tests that Ripper is reporting the wrong results for.
     incorrect = [
+      # Not yet supported.
+      "leading_logical.txt",
+
       # Ripper incorrectly attributes the block to the keyword.
       "seattlerb/block_break.txt",
       "seattlerb/block_next.txt",
diff --git a/test/prism/ruby/ruby_parser_test.rb b/test/prism/ruby/ruby_parser_test.rb
index a13daeeb849..5df4177c10c 100644
--- a/test/prism/ruby/ruby_parser_test.rb
+++ b/test/prism/ruby/ruby_parser_test.rb
@@ -48,6 +48,7 @@ class RubyParserTest < TestCase
       "alias.txt",
       "dos_endings.txt",
       "heredocs_with_ignored_newlines.txt",
+      "leading_logical.txt",
       "method_calls.txt",
       "methods.txt",
       "multi_write.txt",
diff --git a/test/prism/snapshots/leading_logical.txt b/test/prism/snapshots/leading_logical.txt
new file mode 100644
index 00000000000..98b00424914
--- /dev/null
+++ b/test/prism/snapshots/leading_logical.txt
@@ -0,0 +1,109 @@
+@ ProgramNode (location: (1,0)-(21,5))
+├── flags: ∅
+├── locals: []
+└── statements:
+    @ StatementsNode (location: (1,0)-(21,5))
+    ├── flags: ∅
+    └── body: (length: 8)
+        ├── @ AndNode (location: (1,0)-(3,4))
+        │   ├── flags: newline
+        │   ├── left:
+        │   │   @ AndNode (location: (1,0)-(2,4))
+        │   │   ├── flags: ∅
+        │   │   ├── left:
+        │   │   │   @ IntegerNode (location: (1,0)-(1,1))
+        │   │   │   ├── flags: static_literal, decimal
+        │   │   │   └── value: 1
+        │   │   ├── right:
+        │   │   │   @ IntegerNode (location: (2,3)-(2,4))
+        │   │   │   ├── flags: static_literal, decimal
+        │   │   │   └── value: 2
+        │   │   └── operator_loc: (2,0)-(2,2) = "&&"
+        │   ├── right:
+        │   │   @ IntegerNode (location: (3,3)-(3,4))
+        │   │   ├── flags: static_literal, decimal
+        │   │   └── value: 3
+        │   └── operator_loc: (3,0)-(3,2) = "&&"
+        ├── @ OrNode (location: (5,0)-(7,4))
+        │   ├── flags: newline
+        │   ├── left:
+        │   │   @ OrNode (location: (5,0)-(6,4))
+        │   │   ├── flags: ∅
+        │   │   ├── left:
+        │   │   │   @ IntegerNode (location: (5,0)-(5,1))
+        │   │   │   ├── flags: static_literal, decimal
+        │   │   │   └── value: 1
+        │   │   ├── right:
+        │   │   │   @ IntegerNode (location: (6,3)-(6,4))
+        │   │   │   ├── flags: static_literal, decimal
+        │   │   │   └── value: 2
+        │   │   └── operator_loc: (6,0)-(6,2) = "||"
+        │   ├── right:
+        │   │   @ IntegerNode (location: (7,3)-(7,4))
+        │   │   ├── flags: static_literal, decimal
+        │   │   └── value: 3
+        │   └── operator_loc: (7,0)-(7,2) = "||"
+        ├── @ AndNode (location: (9,0)-(11,5))
+        │   ├── flags: newline
+        │   ├── left:
+        │   │   @ AndNode (location: (9,0)-(10,5))
+        │   │   ├── flags: ∅
+        │   │   ├── left:
+        │   │   │   @ IntegerNode (location: (9,0)-(9,1))
+        │   │   │   ├── flags: static_literal, decimal
+        │   │   │   └── value: 1
+        │   │   ├── right:
+        │   │   │   @ IntegerNode (location: (10,4)-(10,5))
+        │   │   │   ├── flags: static_literal, decimal
+        │   │   │   └── value: 2
+        │   │   └── operator_loc: (10,0)-(10,3) = "and"
+        │   ├── right:
+        │   │   @ IntegerNode (location: (11,4)-(11,5))
+        │   │   ├── flags: static_literal, decimal
+        │   │   └── value: 3
+        │   └── operator_loc: (11,0)-(11,3) = "and"
+        ├── @ OrNode (location: (13,0)-(15,4))
+        │   ├── flags: newline
+        │   ├── left:
+        │   │   @ OrNode (location: (13,0)-(14,4))
+        │   │   ├── flags: ∅
+        │   │   ├── left:
+        │   │   │   @ IntegerNode (location: (13,0)-(13,1))
+        │   │   │   ├── flags: static_literal, decimal
+        │   │   │   └── value: 1
+        │   │   ├── right:
+        │   │   │   @ IntegerNode (location: (14,3)-(14,4))
+        │   │   │   ├── flags: static_literal, decimal
+        │   │   │   └── value: 2
+        │   │   └── operator_loc: (14,0)-(14,2) = "or"
+        │   ├── right:
+        │   │   @ IntegerNode (location: (15,3)-(15,4))
+        │   │   ├── flags: static_literal, decimal
+        │   │   └── value: 3
+        │   └── operator_loc: (15,0)-(15,2) = "or"
+        ├── @ IntegerNode (location: (17,0)-(17,1))
+        │   ├── flags: newline, static_literal, decimal
+        │   └── value: 1
+        ├── @ CallNode (location: (18,0)-(18,6))
+        │   ├── flags: newline, variable_call, ignore_visibility
+        │   ├── receiver: ∅
+        │   ├── call_operator_loc: ∅
+        │   ├── name: :andfoo
+        │   ├── message_loc: (18,0)-(18,6) = "andfoo"
+        │   ├── opening_loc: ∅
+        │   ├── arguments: ∅
+        │   ├── closing_loc: ∅
+        │   └── block: ∅
+        ├── @ IntegerNode (location: (20,0)-(20,1))
+        │   ├── flags: newline, static_literal, decimal
+        │   └── value: 2
+        └── @ CallNode (location: (21,0)-(21,5))
+            ├── flags: newline, variable_call, ignore_visibility
+            ├── receiver: ∅
+            ├── call_operator_loc: ∅
+            ├── name: :orfoo
+            ├── message_loc: (21,0)-(21,5) = "orfoo"
+            ├── opening_loc: ∅
+            ├── arguments: ∅
+            ├── closing_loc: ∅
+            └── block: ∅