diff --git a/README.md b/README.md index 12695d4..33bc328 100644 --- a/README.md +++ b/README.md @@ -329,6 +329,7 @@ _Note that not all of these are available in all versions of Ruby_ |   _Lookbehind_ | `(?<=abc)` | ✓ | |   _Negative Lookbehind_ | `(?abc)` | ✓ | +|   _**Absence**_ | `(?~abc)` | ✓ | |   _**Back-references**_ | | ⋱ | |   _Named_ | `\k` | ✓ | |   _Nest Level_ | `\k` | ✓ | diff --git a/lib/regexp_parser/expression/classes/group.rb b/lib/regexp_parser/expression/classes/group.rb index 217b8b2..9e72a06 100644 --- a/lib/regexp_parser/expression/classes/group.rb +++ b/lib/regexp_parser/expression/classes/group.rb @@ -31,6 +31,7 @@ class Atomic < Group::Base; end class Capture < Group::Base; end class Passive < Group::Base; end class Options < Group::Base; end + class Absence < Group::Base; end class Named < Group::Capture attr_reader :name diff --git a/lib/regexp_parser/lexer.rb b/lib/regexp_parser/lexer.rb index c22882a..d601f54 100644 --- a/lib/regexp_parser/lexer.rb +++ b/lib/regexp_parser/lexer.rb @@ -4,7 +4,7 @@ # given syntax flavor. module Regexp::Lexer - OPENING_TOKENS = [:capture, :options, :passive, :atomic, :named, + OPENING_TOKENS = [:capture, :options, :passive, :atomic, :named, :absence, :lookahead, :nlookahead, :lookbehind, :nlookbehind ].freeze diff --git a/lib/regexp_parser/parser.rb b/lib/regexp_parser/parser.rb index dc92751..a8f823f 100644 --- a/lib/regexp_parser/parser.rb +++ b/lib/regexp_parser/parser.rb @@ -470,6 +470,8 @@ def self.open_group(token) exp = Group::Named.new(token) when :capture exp = Group::Capture.new(token) + when :absence + exp = Group::Absence.new(token) when :lookahead exp = Assertion::Lookahead.new(token) diff --git a/lib/regexp_parser/scanner/scanner.rl b/lib/regexp_parser/scanner/scanner.rl index e200cfc..8cd2861 100644 --- a/lib/regexp_parser/scanner/scanner.rl +++ b/lib/regexp_parser/scanner/scanner.rl @@ -82,6 +82,7 @@ group_atomic = '?>'; group_passive = '?:'; + group_absence = '?~'; assertion_lookahead = '?='; assertion_nlookahead = '?!'; @@ -107,7 +108,7 @@ group_number_ref = group_ref . (('<' . group_number . group_level? '>') | ("'" . group_number . group_level? "'")); - group_type = group_atomic | group_passive | group_named; + group_type = group_atomic | group_passive | group_absence | group_named; assertion_type = assertion_lookahead | assertion_nlookahead | @@ -573,6 +574,7 @@ # Groups # (?:subexp) passive (non-captured) group # (?>subexp) atomic group, don't backtrack in subexp. + # (?~subexp) absence group, matches anything that is not subexp # (?subexp) named group # (?'name'subexp) named group (single quoted version) # (subexp) captured group @@ -581,6 +583,7 @@ case text = text(data, ts, te).first when '(?:'; emit(:group, :passive, text, ts, te) when '(?>'; emit(:group, :atomic, text, ts, te) + when '(?~'; emit(:group, :absence, text, ts, te) when /^\(\?<(\w*)>/ empty_name_error(:group, 'named group (ab)') if $1.empty? diff --git a/lib/regexp_parser/syntax/ruby/2.4.1.rb b/lib/regexp_parser/syntax/ruby/2.4.1.rb index 3c459e6..013a321 100644 --- a/lib/regexp_parser/syntax/ruby/2.4.1.rb +++ b/lib/regexp_parser/syntax/ruby/2.4.1.rb @@ -6,6 +6,8 @@ module Ruby class V241 < Regexp::Syntax::Ruby::V240 def initialize super + + implements :group, Group::Absence end end diff --git a/lib/regexp_parser/syntax/tokens/group.rb b/lib/regexp_parser/syntax/tokens/group.rb index 52bfdf6..1c77f4a 100644 --- a/lib/regexp_parser/syntax/tokens/group.rb +++ b/lib/regexp_parser/syntax/tokens/group.rb @@ -13,6 +13,8 @@ module Group All = Group::Extended + Group::Named + Group::Atomic + Group::Passive + Group::Comment + Absence = [:absence] + Type = :group end diff --git a/test/parser/test_groups.rb b/test/parser/test_groups.rb index 0810a68..a9331dd 100644 --- a/test/parser/test_groups.rb +++ b/test/parser/test_groups.rb @@ -106,4 +106,17 @@ def test_parse_comment end end + if RUBY_VERSION >= '2.4.1' + def test_parse_absence_group + t = RP.parse('a(?~b)c(?~d)e') + + [1,3].each do |i| + assert t.expressions[i].is_a?(Group::Absence), + "Expected absence group, but got #{t.expressions[i].class.name}" + + assert_equal :group, t.expressions[i].type + assert_equal :absence, t.expressions[i].token + end + end + end end diff --git a/test/scanner/test_groups.rb b/test/scanner/test_groups.rb index 5dabb56..f27d9cf 100644 --- a/test/scanner/test_groups.rb +++ b/test/scanner/test_groups.rb @@ -55,6 +55,13 @@ class ScannerGroups < Test::Unit::TestCase }) end + if RUBY_VERSION >= '2.4.1' + tests.merge!({ + # New absence operator + '(?~abc)' => [0, :group, :absence, '(?~', 0, 3], + }) + end + tests.each_with_index do |(pattern, (index, type, token, text, ts, te)), count| define_method "test_scanner_#{type}_#{token}_#{count}" do tokens = RS.scan(pattern) diff --git a/test/warnings.yml b/test/warnings.yml index e957c37..6eebfff 100644 --- a/test/warnings.yml +++ b/test/warnings.yml @@ -1,6 +1,6 @@ --- # Unused variable emitted by ragel -- "lib/regexp_parser/scanner.rb:1674: warning: +- "lib/regexp_parser/scanner.rb:1675: warning: assigned but unused variable - testEof" # Unavoidable duplicated character range tests @@ -10,7 +10,7 @@ character class has duplicated range: /[a[b[^c]]]/" # Warnings generated only under 1.9.3 -- "lib/regexp_parser/scanner.rb:1675: +- "lib/regexp_parser/scanner.rb:1676: warning: assigned but unused variable - _acts" -- "lib/regexp_parser/scanner.rb:1675: +- "lib/regexp_parser/scanner.rb:1676: warning: assigned but unused variable - _nacts"