Skip to content

Commit 9ff549b

Browse files
dop251dlclark
authored andcommitted
Improved the handling of named group references in ECMAScript mode:
- \k<name> is only parsed as a named group reference if there is at least one group name (?<anyname>) in the regexp. - No support for \<name>, \k'name' or \'name' notations. - Fail on invalid group name references (empty or containing non-word characters). This affects all modes, not just ECMAScript.
1 parent 86a04cb commit 9ff549b

File tree

2 files changed

+75
-6
lines changed

2 files changed

+75
-6
lines changed

regexp_test.go

+54
Original file line numberDiff line numberDiff line change
@@ -756,6 +756,60 @@ func TestECMAInvalidEscape(t *testing.T) {
756756
}
757757
}
758758

759+
func TestECMANamedGroup(t *testing.T) {
760+
re := MustCompile(`\k`, ECMAScript)
761+
if m, err := re.MatchString("k"); err != nil {
762+
t.Fatal(err)
763+
} else if !m {
764+
t.Fatal("Expected match")
765+
}
766+
767+
re = MustCompile(`\k'test'`, ECMAScript)
768+
if m, err := re.MatchString(`k'test'`); err != nil {
769+
t.Fatal(err)
770+
} else if !m {
771+
t.Fatal("Expected match")
772+
}
773+
774+
re = MustCompile(`\k<test>`, ECMAScript)
775+
if m, err := re.MatchString(`k<test>`); err != nil {
776+
t.Fatal(err)
777+
} else if !m {
778+
t.Fatal("Expected match")
779+
}
780+
781+
_, err := Compile(`(?<title>\w+), yes \k'title'`, ECMAScript)
782+
if err == nil {
783+
t.Fatal("Expected error")
784+
}
785+
786+
re = MustCompile(`(?<title>\w+), yes \k<title>`, ECMAScript)
787+
if m, err := re.MatchString("sir, yes sir"); err != nil {
788+
t.Fatal(err)
789+
} else if !m {
790+
t.Fatal("Expected match")
791+
}
792+
793+
re = MustCompile(`\k<title>, yes (?<title>\w+)`, ECMAScript)
794+
if m, err := re.MatchString(", yes sir"); err != nil {
795+
t.Fatal(err)
796+
} else if !m {
797+
t.Fatal("Expected match")
798+
}
799+
800+
_, err = Compile(`\k<(?<name>)>`, ECMAScript)
801+
if err == nil {
802+
t.Fatal("Expected error")
803+
}
804+
805+
MustCompile(`\k<(<name>)>`, ECMAScript)
806+
807+
_, err = Compile(`\k<(<name>)>`, 0)
808+
if err == nil {
809+
t.Fatal("Expected error")
810+
}
811+
}
812+
759813
func TestECMAInvalidEscapeCharClass(t *testing.T) {
760814
re := MustCompile(`[\x0]`, ECMAScript)
761815
if m, err := re.MatchString("x"); err != nil {

syntax/parser.go

+21-6
Original file line numberDiff line numberDiff line change
@@ -1191,14 +1191,18 @@ func (p *parser) scanBasicBackslash(scanOnly bool) (*regexNode, error) {
11911191
backpos := p.textpos()
11921192
ch := p.rightChar(0)
11931193

1194-
// allow \k<foo> instead of \<foo>, which is now deprecated
1194+
// Allow \k<foo> instead of \<foo>, which is now deprecated.
11951195

1196-
if ch == 'k' {
1196+
// According to ECMAScript specification, \k<name> is only parsed as a named group reference if
1197+
// there is at least one group name in the regexp.
1198+
// See https://www.ecma-international.org/ecma-262/#sec-isvalidregularexpressionliteral, step 7.
1199+
// Note, during the first (scanOnly) run we may not have all group names scanned, but that's ok.
1200+
if ch == 'k' && (!p.useOptionE() || len(p.capnames) > 0) {
11971201
if p.charsRight() >= 2 {
11981202
p.moveRight(1)
11991203
ch = p.moveRightGetChar()
12001204

1201-
if ch == '<' || ch == '\'' {
1205+
if ch == '<' || (!p.useOptionE() && ch == '\'') { // No support for \k'name' in ECMAScript
12021206
angled = true
12031207
if ch == '\'' {
12041208
close = '\''
@@ -1214,7 +1218,7 @@ func (p *parser) scanBasicBackslash(scanOnly bool) (*regexNode, error) {
12141218

12151219
ch = p.rightChar(0)
12161220

1217-
} else if (ch == '<' || ch == '\'') && p.charsRight() > 1 { // Note angle without \g
1221+
} else if !p.useOptionE() && (ch == '<' || ch == '\'') && p.charsRight() > 1 { // Note angle without \g
12181222
angled = true
12191223
if ch == '\'' {
12201224
close = '\''
@@ -1257,14 +1261,21 @@ func (p *parser) scanBasicBackslash(scanOnly bool) (*regexNode, error) {
12571261
return nil, p.getErr(ErrUndefinedBackRef, capnum)
12581262
}
12591263

1260-
} else if angled && IsWordChar(ch) {
1264+
} else if angled {
12611265
capname := p.scanCapname()
12621266

1263-
if p.charsRight() > 0 && p.moveRightGetChar() == close {
1267+
if capname != "" && p.charsRight() > 0 && p.moveRightGetChar() == close {
1268+
1269+
if scanOnly {
1270+
return nil, nil
1271+
}
1272+
12641273
if p.isCaptureName(capname) {
12651274
return newRegexNodeM(ntRef, p.options, p.captureSlotFromName(capname)), nil
12661275
}
12671276
return nil, p.getErr(ErrUndefinedNameRef, capname)
1277+
} else {
1278+
return nil, p.getErr(ErrMalformedNameRef)
12681279
}
12691280
}
12701281

@@ -1276,6 +1287,10 @@ func (p *parser) scanBasicBackslash(scanOnly bool) (*regexNode, error) {
12761287
return nil, err
12771288
}
12781289

1290+
if scanOnly {
1291+
return nil, nil
1292+
}
1293+
12791294
if p.useOptionI() {
12801295
ch = unicode.ToLower(ch)
12811296
}

0 commit comments

Comments
 (0)