Skip to content

Commit dfeabe0

Browse files
committed
Applied requested changes
- Document MaintainCaptureOrder option - Use return in `assignNameSlots` and remove else - Add test with MaintainCaptureOrder not provided - Change the MaintainCaptureOrder value to `0x0400` - Remove the `o` inline option - Add comment to explain why `autocap` is consumed
1 parent 896189e commit dfeabe0

File tree

4 files changed

+96
-69
lines changed

4 files changed

+96
-69
lines changed

README.md

+14
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,20 @@ if isMatch, _ := re.MatchString(`Something to match`); isMatch {
9191

9292
This feature is a work in progress and I'm open to ideas for more things to put here (maybe more relaxed character escaping rules?).
9393

94+
## MaintainCaptureOrder mode
95+
The default behavior of `regexp2` is to match the .NET regexp engine, which unlike PCRE, doesn't maintain the order of the captures and appends the named capture groups to the end of captured groups. Using the `MaintainCaptureOrder` option when compiling a regexp will keep the order of named and unnamed capture groups.
96+
97+
```go
98+
re := regexp2.MustCompile(`(?<first>This) (is) a (?<last>test)`, regexp2.RE2)
99+
if match, _ := re.FindStringMatch(`This is a test`); match != nil {
100+
// match.Groups()[1].String() == "This"
101+
// match.Groups()[1].Name == "first"
102+
// match.Groups()[2].String() == "is"
103+
// match.Groups()[2].Name == "2"
104+
// match.Groups()[3].String() == "test"
105+
// match.Groups()[3].Name == "last"
106+
}
107+
```
94108

95109
## Library features that I'm still working on
96110
- Regex split

regexp.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ const (
121121
Debug = 0x0080 // "d"
122122
ECMAScript = 0x0100 // "e"
123123
RE2 = 0x0200 // RE2 (regexp package) compatibility mode
124-
MaintainCaptureOrder = 0x1000 // Maintain named and unnamed capture order
124+
MaintainCaptureOrder = 0x0400 // Maintain named and unnamed capture order
125125
)
126126

127127
func (re *Regexp) RightToLeft() bool {

regexp_MaintainCaptureOrder_test.go

+23-11
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@ func TestMaintainCaptureOrder_Basic(t *testing.T) {
3737
if want, got := `this`, string(m.GroupByName(`first`).Runes()); want != got {
3838
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
3939
}
40+
if want, got := `first`, m.regex.GroupNameFromNumber(1); want != got {
41+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
42+
}
4043
if want, got := `testing`, groups[2].String(); want != got {
4144
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
4245
}
@@ -54,13 +57,13 @@ func TestMaintainCaptureOrder_Basic(t *testing.T) {
5457
}
5558
}
5659

57-
func TestMaintainCaptureOrder_With_Other_Options(t *testing.T) {
58-
r, err := Compile("(?si)(?<first>this).+?\n(testing).+?(?<last>stuff)", MaintainCaptureOrder)
60+
func TestMaintainCaptureOrder_Mode_Not_Enabled(t *testing.T) {
61+
r, err := Compile("(?<first>this).+?(testing).+?(?<last>stuff)", 0)
5962
// t.Logf("code dump: %v", r.code.Dump())
6063
if err != nil {
6164
t.Errorf("unexpected compile err: %v", err)
6265
}
63-
text := "This is a \ntesting stuff"
66+
text := `this is a testing stuff`
6467
m, err := r.FindStringMatch(text)
6568
if err != nil {
6669
t.Errorf("unexpected match err: %v", err)
@@ -78,16 +81,22 @@ func TestMaintainCaptureOrder_With_Other_Options(t *testing.T) {
7881
if want, got := text, groups[0].String(); want != got {
7982
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
8083
}
81-
if want, got := `This`, groups[1].String(); want != got {
84+
if want, got := `testing`, groups[1].String(); want != got {
8285
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
8386
}
84-
if want, got := `first`, groups[1].Name; want != got {
87+
if want, got := `1`, groups[1].Name; want != got {
8588
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
8689
}
87-
if want, got := `testing`, groups[2].String(); want != got {
90+
if want, got := `this`, string(m.GroupByName(`first`).Runes()); want != got {
8891
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
8992
}
90-
if want, got := `2`, groups[2].Name; want != got {
93+
if want, got := `first`, m.regex.GroupNameFromNumber(2); want != got {
94+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
95+
}
96+
if want, got := `this`, groups[2].String(); want != got {
97+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
98+
}
99+
if want, got := `first`, groups[2].Name; want != got {
91100
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
92101
}
93102
if want, got := `stuff`, groups[3].String(); want != got {
@@ -96,10 +105,13 @@ func TestMaintainCaptureOrder_With_Other_Options(t *testing.T) {
96105
if want, got := `last`, groups[3].Name; want != got {
97106
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
98107
}
108+
if want, got := `stuff`, string(m.GroupByNumber(3).Runes()); want != got {
109+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
110+
}
99111
}
100112

101-
func TestMaintainCaptureOrder_Enable_Inline(t *testing.T) {
102-
r, err := Compile("(?sio)(?<first>this).+?\n(testing).+?(?<last>stuff)", 0)
113+
func TestMaintainCaptureOrder_With_Other_Options(t *testing.T) {
114+
r, err := Compile("(?si)(?<first>this).+?\n(testing).+?(?<last>stuff)", MaintainCaptureOrder)
103115
// t.Logf("code dump: %v", r.code.Dump())
104116
if err != nil {
105117
t.Errorf("unexpected compile err: %v", err)
@@ -142,8 +154,8 @@ func TestMaintainCaptureOrder_Enable_Inline(t *testing.T) {
142154
}
143155
}
144156

145-
func TestMaintainCaptureOrder_Inline_No_Capture_Groups(t *testing.T) {
146-
r, err := Compile("(?o)this.+?testing.+?stuff", 0)
157+
func TestMaintainCaptureOrder_No_Capture_Groups(t *testing.T) {
158+
r, err := Compile("this.+?testing.+?stuff", MaintainCaptureOrder)
147159
// t.Logf("code dump: %v", r.code.Dump())
148160
if err != nil {
149161
t.Errorf("unexpected compile err: %v", err)

syntax/parser.go

+58-57
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ const (
2222
Debug = 0x0080 // "d"
2323
ECMAScript = 0x0100 // "e"
2424
RE2 = 0x0200 // RE2 compat mode
25-
MaintainCaptureOrder = 0x1000 // "o" Maintain named and unnamed capture order
25+
MaintainCaptureOrder = 0x0400 // Maintain named and unnamed capture order
2626
)
2727

2828
func optionFromCode(ch rune) RegexOptions {
@@ -44,8 +44,6 @@ func optionFromCode(ch rune) RegexOptions {
4444
return Debug
4545
case 'e', 'E':
4646
return ECMAScript
47-
case 'o', 'O':
48-
return MaintainCaptureOrder
4947
default:
5048
return 0
5149
}
@@ -242,75 +240,76 @@ func (p *parser) assignNameSlots() {
242240
if len(p.capnamelist) == 0 || p.capnamelist[0] != `0` {
243241
p.capnamelist = append([]string{fmt.Sprint(0)}, p.capnamelist...)
244242
}
245-
} else {
246-
if p.capnames != nil {
247-
for _, name := range p.capnamelist {
248-
for p.isCaptureSlot(p.autocap) {
249-
p.autocap++
250-
}
251-
pos := p.capnames[name]
252-
p.capnames[name] = p.autocap
253-
p.noteCaptureSlot(p.autocap, pos)
254-
243+
return
244+
}
245+
246+
if p.capnames != nil {
247+
for _, name := range p.capnamelist {
248+
for p.isCaptureSlot(p.autocap) {
255249
p.autocap++
256250
}
257-
}
258-
259-
// if the caps array has at least one gap, construct the list of used slots
260-
if p.capcount < p.captop {
261-
p.capnumlist = make([]int, p.capcount)
262-
i := 0
251+
pos := p.capnames[name]
252+
p.capnames[name] = p.autocap
253+
p.noteCaptureSlot(p.autocap, pos)
263254

264-
for k := range p.caps {
265-
p.capnumlist[i] = k
266-
i++
267-
}
255+
p.autocap++
256+
}
257+
}
258+
259+
// if the caps array has at least one gap, construct the list of used slots
260+
if p.capcount < p.captop {
261+
p.capnumlist = make([]int, p.capcount)
262+
i := 0
268263

269-
sort.Ints(p.capnumlist)
264+
for k := range p.caps {
265+
p.capnumlist[i] = k
266+
i++
270267
}
271268

272-
// merge capsnumlist into capnamelist
273-
if p.capnames != nil || p.capnumlist != nil {
274-
var oldcapnamelist []string
275-
var next int
276-
var k int
269+
sort.Ints(p.capnumlist)
270+
}
277271

278-
if p.capnames == nil {
279-
oldcapnamelist = nil
280-
p.capnames = make(map[string]int)
281-
p.capnamelist = []string{}
282-
next = -1
283-
} else {
284-
oldcapnamelist = p.capnamelist
285-
p.capnamelist = []string{}
286-
next = p.capnames[oldcapnamelist[0]]
287-
}
272+
// merge capsnumlist into capnamelist
273+
if p.capnames != nil || p.capnumlist != nil {
274+
var oldcapnamelist []string
275+
var next int
276+
var k int
288277

289-
for i := 0; i < p.capcount; i++ {
290-
j := i
291-
if p.capnumlist != nil {
292-
j = p.capnumlist[i]
293-
}
278+
if p.capnames == nil {
279+
oldcapnamelist = nil
280+
p.capnames = make(map[string]int)
281+
p.capnamelist = []string{}
282+
next = -1
283+
} else {
284+
oldcapnamelist = p.capnamelist
285+
p.capnamelist = []string{}
286+
next = p.capnames[oldcapnamelist[0]]
287+
}
294288

295-
if next == j {
296-
p.capnamelist = append(p.capnamelist, oldcapnamelist[k])
297-
k++
289+
for i := 0; i < p.capcount; i++ {
290+
j := i
291+
if p.capnumlist != nil {
292+
j = p.capnumlist[i]
293+
}
298294

299-
if k == len(oldcapnamelist) {
300-
next = -1
301-
} else {
302-
next = p.capnames[oldcapnamelist[k]]
303-
}
295+
if next == j {
296+
p.capnamelist = append(p.capnamelist, oldcapnamelist[k])
297+
k++
304298

299+
if k == len(oldcapnamelist) {
300+
next = -1
305301
} else {
306-
//feature: culture?
307-
str := strconv.Itoa(j)
308-
p.capnamelist = append(p.capnamelist, str)
309-
p.capnames[str] = j
310-
}
302+
next = p.capnames[oldcapnamelist[k]]
311303
}
304+
305+
} else {
306+
//feature: culture?
307+
str := strconv.Itoa(j)
308+
p.capnamelist = append(p.capnamelist, str)
309+
p.capnames[str] = j
312310
}
313311
}
312+
}
314313
}
315314

316315
func (p *parser) consumeAutocap() int {
@@ -958,6 +957,8 @@ func (p *parser) scanGroupOpen() (*regexNode, error) {
958957
}
959958

960959
if capnum != -1 && p.useMaintainCaptureOrder() {
960+
// Successfully scanned a named capture group so we need to increment
961+
// our cap number to maintain the order
961962
p.consumeAutocap()
962963
}
963964
} else if ch == '-' {

0 commit comments

Comments
 (0)