Skip to content

Commit b519ae9

Browse files
committed
Add support for Perl(PCRE) named and unnamed group capturing order.
In other words maintain the order of capture groups.
1 parent a2a8dda commit b519ae9

File tree

4 files changed

+475
-56
lines changed

4 files changed

+475
-56
lines changed

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ The internals of `regexp2` always operate on `[]rune` so `Index` and `Length` da
7474
| named back reference `\k'name'` | no | yes |
7575
| named ascii character class `[[:foo:]]`| yes | no (yes in RE2 compat mode) |
7676
| conditionals `(?(expr)yes\|no)` | no | yes |
77+
| PCRE capture group order | no | no (yes in MaintainCaptureOrder mode) |
7778

7879
## RE2 compatibility mode
7980
The default behavior of `regexp2` is to match the .NET regexp engine, however the `RE2` option is provided to change the parsing to increase compatibility with RE2. Using the `RE2` option when compiling a regexp will not take away any features, but will change the following behaviors:

regexp.go

+1
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ const (
121121
Debug = 0x0080 // "d"
122122
ECMAScript = 0x0100 // "e"
123123
RE2 = 0x0200 // RE2 (regexp package) compatibility mode
124+
MaintainCaptureOrder = 0x1000 // Maintain named and unnamed capture order
124125
)
125126

126127
func (re *Regexp) RightToLeft() bool {

regexp_MaintainCaptureOrder_test.go

+366
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,366 @@
1+
package regexp2
2+
3+
import (
4+
"testing"
5+
)
6+
7+
func TestMaintainCaptureOrder_Basic(t *testing.T) {
8+
r, err := Compile("(?<first>this).+?(testing).+?(?<last>stuff)", MaintainCaptureOrder)
9+
// t.Logf("code dump: %v", r.code.Dump())
10+
if err != nil {
11+
t.Errorf("unexpected compile err: %v", err)
12+
}
13+
text := `this is a testing stuff`
14+
m, err := r.FindStringMatch(text)
15+
if err != nil {
16+
t.Errorf("unexpected match err: %v", err)
17+
}
18+
if m == nil {
19+
t.Error("Nil match, expected success")
20+
} else {
21+
//t.Logf("Match: %v", m.dump())
22+
}
23+
24+
groups := m.Groups()
25+
if want, got := text, m.String(); want != got {
26+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
27+
}
28+
if want, got := text, groups[0].String(); want != got {
29+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
30+
}
31+
if want, got := `this`, groups[1].String(); want != got {
32+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
33+
}
34+
if want, got := `first`, groups[1].Name; want != got {
35+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
36+
}
37+
if want, got := `this`, string(m.GroupByName(`first`).Runes()); want != got {
38+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
39+
}
40+
if want, got := `testing`, groups[2].String(); want != got {
41+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
42+
}
43+
if want, got := `2`, groups[2].Name; want != got {
44+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
45+
}
46+
if want, got := `stuff`, groups[3].String(); want != got {
47+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
48+
}
49+
if want, got := `last`, groups[3].Name; want != got {
50+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
51+
}
52+
if want, got := `stuff`, string(m.GroupByNumber(3).Runes()); want != got {
53+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
54+
}
55+
}
56+
57+
func TestMaintainCaptureOrder_With_Other_Options(t *testing.T) {
58+
r, err := Compile("(?si)(?<first>this).+?\n(testing).+?(?<last>stuff)", MaintainCaptureOrder)
59+
// t.Logf("code dump: %v", r.code.Dump())
60+
if err != nil {
61+
t.Errorf("unexpected compile err: %v", err)
62+
}
63+
text := "This is a \ntesting stuff"
64+
m, err := r.FindStringMatch(text)
65+
if err != nil {
66+
t.Errorf("unexpected match err: %v", err)
67+
}
68+
if m == nil {
69+
t.Error("Nil match, expected success")
70+
} else {
71+
//t.Logf("Match: %v", m.dump())
72+
}
73+
74+
groups := m.Groups()
75+
if want, got := text, m.String(); want != got {
76+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
77+
}
78+
if want, got := text, groups[0].String(); want != got {
79+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
80+
}
81+
if want, got := `This`, groups[1].String(); want != got {
82+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
83+
}
84+
if want, got := `first`, groups[1].Name; want != got {
85+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
86+
}
87+
if want, got := `testing`, groups[2].String(); want != got {
88+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
89+
}
90+
if want, got := `2`, groups[2].Name; want != got {
91+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
92+
}
93+
if want, got := `stuff`, groups[3].String(); want != got {
94+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
95+
}
96+
if want, got := `last`, groups[3].Name; want != got {
97+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
98+
}
99+
}
100+
101+
func TestMaintainCaptureOrder_Enable_Inline(t *testing.T) {
102+
r, err := Compile("(?sio)(?<first>this).+?\n(testing).+?(?<last>stuff)", 0)
103+
// t.Logf("code dump: %v", r.code.Dump())
104+
if err != nil {
105+
t.Errorf("unexpected compile err: %v", err)
106+
}
107+
text := "This is a \ntesting stuff"
108+
m, err := r.FindStringMatch(text)
109+
// t.Errorf(" groups: %#v\n", m)
110+
if err != nil {
111+
t.Errorf("unexpected match err: %v", err)
112+
}
113+
if m == nil {
114+
t.Error("Nil match, expected success")
115+
} else {
116+
//t.Logf("Match: %v", m.dump())
117+
}
118+
119+
groups := m.Groups()
120+
if want, got := text, m.String(); want != got {
121+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
122+
}
123+
if want, got := text, groups[0].String(); want != got {
124+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
125+
}
126+
if want, got := `This`, groups[1].String(); want != got {
127+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
128+
}
129+
if want, got := `first`, groups[1].Name; want != got {
130+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
131+
}
132+
if want, got := `testing`, groups[2].String(); want != got {
133+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
134+
}
135+
if want, got := `2`, groups[2].Name; want != got {
136+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
137+
}
138+
if want, got := `stuff`, groups[3].String(); want != got {
139+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
140+
}
141+
if want, got := `last`, groups[3].Name; want != got {
142+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
143+
}
144+
}
145+
146+
func TestMaintainCaptureOrder_NestedCaptures(t *testing.T) {
147+
r, err := Compile(
148+
`(?<first>This)(?<second>(.)+?(?<test>testing)).+?(some.+?(other).+?(?<last>stuff)) (?<test>\k<test>)`, MaintainCaptureOrder)
149+
// t.Logf("code dump: %v", r.code.Dump())
150+
if err != nil {
151+
t.Errorf("unexpected compile err: %v", err)
152+
}
153+
text := "This is a testing some other stuff testing"
154+
m, err := r.FindStringMatch(text)
155+
156+
if err != nil {
157+
t.Errorf("unexpected match err: %v", err)
158+
}
159+
if m == nil {
160+
t.Error("Nil match, expected success")
161+
} else {
162+
//t.Logf("Match: %v", m.dump())
163+
}
164+
165+
groups := m.Groups()
166+
if want, got := text, m.String(); want != got {
167+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
168+
}
169+
if want, got := text, groups[0].String(); want != got {
170+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
171+
}
172+
if want, got := `This`, groups[1].String(); want != got {
173+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
174+
}
175+
if want, got := `first`, groups[1].Name; want != got {
176+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
177+
}
178+
if want, got := ` is a testing`, groups[2].String(); want != got {
179+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
180+
}
181+
if want, got := `second`, groups[2].Name; want != got {
182+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
183+
}
184+
if want, got := groups[2].String(), groups[2].Captures[0].String(); want != got {
185+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
186+
}
187+
if want, got := ` `, groups[3].String(); want != got {
188+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
189+
}
190+
if want, got := `a`, groups[3].Captures[4].String(); want != got {
191+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
192+
}
193+
if want, got := `3`, groups[3].Name; want != got {
194+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
195+
}
196+
if want, got := `testing`, groups[4].String(); want != got {
197+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
198+
}
199+
if want, got := `test`, groups[4].Name; want != got {
200+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
201+
}
202+
if want, got := `some other stuff`, groups[5].String(); want != got {
203+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
204+
}
205+
if want, got := `5`, groups[5].Name; want != got {
206+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
207+
}
208+
if want, got := `other`, groups[6].String(); want != got {
209+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
210+
}
211+
if want, got := `6`, groups[6].Name; want != got {
212+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
213+
}
214+
if want, got := `stuff`, groups[7].String(); want != got {
215+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
216+
}
217+
if want, got := `last`, groups[7].Name; want != got {
218+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
219+
}
220+
if want, got := 8, len(groups); want != got {
221+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
222+
}
223+
}
224+
225+
func TestMaintainCaptureOrder_RE2_And_NumBackref(t *testing.T) {
226+
r, err := Compile(
227+
`(?'first'This).+?(?P<test>testing) (some).+?(?<4>stuff) \2`, MaintainCaptureOrder | RE2)
228+
// t.Logf("code dump: %v", r.code.Dump())
229+
if err != nil {
230+
t.Errorf("unexpected compile err: %v", err)
231+
}
232+
text := "This is a testing some other stuff testing"
233+
m, err := r.FindStringMatch(text)
234+
235+
if err != nil {
236+
t.Errorf("unexpected match err: %v", err)
237+
}
238+
if m == nil {
239+
t.Error("Nil match, expected success")
240+
} else {
241+
//t.Logf("Match: %v", m.dump())
242+
}
243+
244+
groups := m.Groups()
245+
if want, got := text, m.String(); want != got {
246+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
247+
}
248+
if want, got := text, groups[0].String(); want != got {
249+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
250+
}
251+
if want, got := `This`, groups[1].String(); want != got {
252+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
253+
}
254+
if want, got := `first`, groups[1].Name; want != got {
255+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
256+
}
257+
if want, got := `testing`, groups[2].String(); want != got {
258+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
259+
}
260+
if want, got := `test`, groups[2].Name; want != got {
261+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
262+
}
263+
if want, got := `some`, groups[3].String(); want != got {
264+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
265+
}
266+
if want, got := `3`, groups[3].Name; want != got {
267+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
268+
}
269+
if want, got := `stuff`, groups[4].String(); want != got {
270+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
271+
}
272+
if want, got := `4`, groups[4].Name; want != got {
273+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
274+
}
275+
}
276+
277+
func TestMaintainCaptureOrder_Balancing_Conditional_Alternation(t *testing.T) {
278+
r, err := Compile(
279+
`^[^<>]*(((?'Open'<)[^<>]*)+((?'Close-Open'>)[^<>]*)+)*(?(Open)(?!))$`, MaintainCaptureOrder)
280+
// t.Logf("code dump: %v", r.code.Dump())
281+
if err != nil {
282+
t.Errorf("unexpected compile err: %v", err)
283+
}
284+
text := "<abc><mno<xyz>>"
285+
m, err := r.FindStringMatch(text)
286+
287+
if err != nil {
288+
t.Errorf("unexpected match err: %v", err)
289+
}
290+
if m == nil {
291+
t.Error("Nil match, expected success")
292+
} else {
293+
//t.Logf("Match: %v", m.dump())
294+
}
295+
296+
groups := m.Groups()
297+
if want, got := text, m.String(); want != got {
298+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
299+
}
300+
if want, got := text, groups[0].String(); want != got {
301+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
302+
}
303+
if want, got := `<mno<xyz>>`, groups[1].String(); want != got {
304+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
305+
}
306+
if want, got := `<abc>`, groups[1].Captures[0].String(); want != got {
307+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
308+
}
309+
if want, got := `<mno<xyz>>`, groups[1].Captures[1].String(); want != got {
310+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
311+
}
312+
if want, got := `1`, groups[1].Name; want != got {
313+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
314+
}
315+
if want, got := `<xyz`, groups[2].String(); want != got {
316+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
317+
}
318+
if want, got := `<abc`, groups[2].Captures[0].String(); want != got {
319+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
320+
}
321+
if want, got := `<mno`, groups[2].Captures[1].String(); want != got {
322+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
323+
}
324+
if want, got := `<xyz`, groups[2].Captures[2].String(); want != got {
325+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
326+
}
327+
if want, got := `2`, groups[2].Name; want != got {
328+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
329+
}
330+
if want, got := ``, groups[3].String(); want != got {
331+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
332+
}
333+
if want, got := `Open`, groups[3].Name; want != got {
334+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
335+
}
336+
if want, got := `>`, groups[4].String(); want != got {
337+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
338+
}
339+
if want, got := `>`, groups[4].Captures[0].String(); want != got {
340+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
341+
}
342+
if want, got := `>`, groups[4].Captures[1].String(); want != got {
343+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
344+
}
345+
if want, got := `>`, groups[4].Captures[2].String(); want != got {
346+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
347+
}
348+
if want, got := `4`, groups[4].Name; want != got {
349+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
350+
}
351+
if want, got := `mno<xyz>`, groups[5].String(); want != got {
352+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
353+
}
354+
if want, got := `abc`, groups[5].Captures[0].String(); want != got {
355+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
356+
}
357+
if want, got := `xyz`, groups[5].Captures[1].String(); want != got {
358+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
359+
}
360+
if want, got := `mno<xyz>`, groups[5].Captures[2].String(); want != got {
361+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
362+
}
363+
if want, got := `Close`, groups[5].Name; want != got {
364+
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
365+
}
366+
}

0 commit comments

Comments
 (0)