diff --git a/CHANGELOG.md b/CHANGELOG.md index de69eb3..9845168 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- `#referenced_expressions` + - like `#referenced_expression`, but for multiplexing backrefs + - returns the `Group` expressions that are being referenced + ### Fixed - fixed `#char` & `#codepoint` errors for single-digit hex escapes diff --git a/lib/regexp_parser/expression.rb b/lib/regexp_parser/expression.rb index f8a247a..9ca2f50 100644 --- a/lib/regexp_parser/expression.rb +++ b/lib/regexp_parser/expression.rb @@ -34,6 +34,7 @@ require_relative 'expression/methods/options' require_relative 'expression/methods/parts' require_relative 'expression/methods/printing' +require_relative 'expression/methods/referenced_expressions' require_relative 'expression/methods/strfregexp' require_relative 'expression/methods/tests' require_relative 'expression/methods/traverse' diff --git a/lib/regexp_parser/expression/classes/backreference.rb b/lib/regexp_parser/expression/classes/backreference.rb index c05fd9e..a45dde9 100644 --- a/lib/regexp_parser/expression/classes/backreference.rb +++ b/lib/regexp_parser/expression/classes/backreference.rb @@ -1,25 +1,6 @@ module Regexp::Expression module Backreference - class Base < Regexp::Expression::Base - attr_accessor :referenced_expression - - def initialize_copy(orig) - exp_id = [self.class, self.starts_at] - - # prevent infinite recursion for recursive subexp calls - copied = @@copied ||= {} - self.referenced_expression = - if copied[exp_id] - orig.referenced_expression - else - copied[exp_id] = true - orig.referenced_expression.dup - end - copied.clear - - super - end - end + class Base < Regexp::Expression::Base; end class Number < Backreference::Base attr_reader :number diff --git a/lib/regexp_parser/expression/classes/conditional.rb b/lib/regexp_parser/expression/classes/conditional.rb index 1023e17..24117d9 100644 --- a/lib/regexp_parser/expression/classes/conditional.rb +++ b/lib/regexp_parser/expression/classes/conditional.rb @@ -7,26 +7,17 @@ def initialize end class Condition < Regexp::Expression::Base - attr_accessor :referenced_expression - # Name or number of the referenced capturing group that determines state. # Returns a String if reference is by name, Integer if by number. def reference ref = text.tr("'<>()", "") ref =~ /\D/ ? ref : Integer(ref) end - - def initialize_copy(orig) - self.referenced_expression = orig.referenced_expression.dup - super - end end class Branch < Regexp::Expression::Sequence; end class Expression < Regexp::Expression::Subexpression - attr_accessor :referenced_expression - def <<(exp) expressions.last << exp end @@ -54,11 +45,6 @@ def branches def reference condition.reference end - - def initialize_copy(orig) - self.referenced_expression = orig.referenced_expression.dup - super - end end end end diff --git a/lib/regexp_parser/expression/methods/referenced_expressions.rb b/lib/regexp_parser/expression/methods/referenced_expressions.rb new file mode 100644 index 0000000..2279708 --- /dev/null +++ b/lib/regexp_parser/expression/methods/referenced_expressions.rb @@ -0,0 +1,28 @@ +module Regexp::Expression + module ReferencedExpressions + attr_accessor :referenced_expressions + + def referenced_expression + referenced_expressions && referenced_expressions.first + end + + def initialize_copy(orig) + exp_id = [self.class, self.starts_at] + + # prevent infinite recursion for recursive subexp calls + copied = self.class.instance_eval { @copied_ref_exps ||= {} } + self.referenced_expressions = + if copied[exp_id] + orig.referenced_expressions + else + copied[exp_id] = true + orig.referenced_expressions && orig.referenced_expressions.map(&:dup) + end + copied.clear + + super + end + end + + Base.include ReferencedExpressions +end diff --git a/lib/regexp_parser/parser.rb b/lib/regexp_parser/parser.rb index 632739a..8bcbaf6 100644 --- a/lib/regexp_parser/parser.rb +++ b/lib/regexp_parser/parser.rb @@ -580,16 +580,19 @@ def active_opts # the instance of Group::Capture that it refers to via its number. def assign_referenced_expressions # find all referenceable and referring expressions - targets = { 0 => root } + targets = { 0 => [root] } referrers = [] root.each_expression do |exp| - exp.is_a?(Group::Capture) && targets[exp.identifier] = exp - referrers << exp if exp.referential? + if exp.referential? + referrers << exp + elsif exp.is_a?(Group::Capture) + (targets[exp.identifier] ||= []) << exp + end end - # assign reference expression to referring expressions + # assign referenced expressions to referring expressions # (in a second iteration because there might be forward references) referrers.each do |exp| - exp.referenced_expression = targets[exp.reference] || + exp.referenced_expressions = targets[exp.reference] || raise(ParserError, "Invalid reference #{exp.reference} at pos #{exp.ts}") end end diff --git a/spec/expression/methods/match_length_spec.rb b/spec/expression/methods/match_length_spec.rb index 65763b3..c22a746 100644 --- a/spec/expression/methods/match_length_spec.rb +++ b/spec/expression/methods/match_length_spec.rb @@ -32,7 +32,7 @@ specify('raises for missing references') do exp = RP.parse(/(a)\1/).last - exp.referenced_expression = nil + exp.referenced_expressions = nil expect { exp.match_length }.to raise_error(ArgumentError) end diff --git a/spec/parser/refcalls_spec.rb b/spec/parser/refcalls_spec.rb index 1df8468..2588f3b 100644 --- a/spec/parser/refcalls_spec.rb +++ b/spec/parser/refcalls_spec.rb @@ -59,6 +59,14 @@ expect(exp3.referenced_expression.to_s).to eq '(ghi)' end + specify('parse backref referenced_expressions (multiplex)') do + root = RP.parse('(?A)(?B)\\k') + exp = root.last + + expect(exp.referenced_expressions).to eq [root[0], root[1]] + expect(exp.referenced_expressions.map(&:to_s)).to eq ['(?A)', '(?B)'] + end + specify('parse backref call referenced_expression') do root = RP.parse('\\g<+1>(abc)\\g<+2>(def)(ghi)\\g<-2>') exp1 = root[0]