Skip to content

Commit 23c0565

Browse files
committed
Add compile-time validation of regular expressions.
So far we would catch pattern errors only at runtime. Now we report the following at compile time if it can be statically determined: - Patterns that don't compile (e.g., due to syntax errors) - Use of `$N` referring to a capture group that exceeds the number a pattern defines. - Use of `$N' with a regular expression specifying multiple patterns for parallel matching (in which case group numbers are ill-defined). - Use of `$N' with a `&nosub` field. Closes #2131.
1 parent 2944600 commit 23c0565

11 files changed

Lines changed: 107 additions & 72 deletions

File tree

hilti/toolchain/src/compiler/validator.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -479,6 +479,11 @@ struct VisitorPost : visitor::PreOrder, public validator::VisitorMixIn {
479479
void operator()(ctor::Null* n) final {}
480480

481481
void operator()(ctor::RegExp* n) final {
482+
for ( const auto& pattern : n->patterns() ) {
483+
if ( auto rc = pattern.validate(); ! rc )
484+
error(rc.error(), n);
485+
}
486+
482487
if ( n->attributes()->find(hilti::attribute::kind::Anchor) )
483488
// This can end up reporting the same location multiple times,
484489
// which seems fine. Otherwise we'd need to explicitly track what's

spicy/toolchain/src/compiler/validator.cc

Lines changed: 43 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1237,21 +1237,59 @@ struct VisitorPost : visitor::PreOrder, hilti::validator::VisitorMixIn {
12371237
// We check the original type since regexps get parsed as bytes.
12381238
if ( n->kind() == hilti::expression::keyword::Kind::Captures ) {
12391239
UnqualifiedType* original_type = nullptr;
1240+
const type::unit::item::Field* field = nullptr;
12401241

12411242
// Check type in hook bodies.
12421243
if ( auto* hook = n->parent<declaration::Hook>() ) {
12431244
auto idx = hook->unitFieldIndex();
1244-
auto* field = context()->lookup(idx)->as<type::unit::item::Field>();
1245+
field = context()->lookup(idx)->as<type::unit::item::Field>();
12451246
original_type = field->originalType()->type();
12461247
}
12471248

1248-
// Captures can also appear in field attributes.
1249-
else if ( auto* field = n->parent<type::unit::item::Field>() )
1250-
original_type = field->originalType()->type();
1249+
else {
1250+
// Captures can also appear in field attributes.
1251+
field = n->parent<type::unit::item::Field>();
1252+
1253+
if ( field )
1254+
original_type = field->originalType()->type();
1255+
}
12511256

12521257
// In all other cases, or when we are not parsing a regexp raise an error.
1253-
if ( ! original_type || ! original_type->isA<hilti::type::RegExp>() )
1258+
if ( ! original_type || ! original_type->isA<hilti::type::RegExp>() ) {
12541259
error("capture groups can only be used in hooks for fields parsing regexp", n);
1260+
return;
1261+
}
1262+
1263+
if ( field ) {
1264+
if ( field->attributes()->find(attribute::kind::Nosub) ) {
1265+
error("cannot use capture group with &nosub field", n);
1266+
return;
1267+
}
1268+
1269+
if ( const auto* regexp = field->ctor()->tryAs<hilti::ctor::RegExp>() ) {
1270+
if ( regexp->patterns().size() > 1 ) {
1271+
error("cannot use capture groups with regular expression consisting of multiple patterns", n);
1272+
return;
1273+
}
1274+
1275+
const auto* index_operator = n->parent()->as<hilti::operator_::vector::IndexNonConst>();
1276+
auto index = index_operator->op1()
1277+
->as<hilti::expression::Ctor>()
1278+
->ctor()
1279+
->as<hilti::ctor::UnsignedInteger>()
1280+
->value();
1281+
1282+
assert(! regexp->patterns().empty());
1283+
const auto& pattern = regexp->patterns().front();
1284+
1285+
if ( auto captures = pattern.numberOfCaptures();
1286+
captures && index > *captures ) // ignore errors here, will be reported elsewhere
1287+
error(fmt("capture group index %" PRIu64
1288+
" is too large, the regular expression defines only %" PRIu64 " group%s",
1289+
index, *captures, (*captures > 1 ? "s" : "")),
1290+
n);
1291+
}
1292+
}
12551293
}
12561294
}
12571295
};
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
2+
[error] <...>/ctor-fail.hlt:8:21-8:26: error compiling pattern 'gh(i': bad pattern: syntax error
3+
[error] <...>/ctor-fail.hlt:11:17-11:22: error compiling pattern 'gh(i': bad pattern: syntax error
4+
[error] hiltic: aborting after errors
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
2+
[error] <...>/capture-groups-fail.spicy:32:7-32:8: capture groups can only be used in hooks for fields parsing regexp
3+
[error] <...>/capture-groups-fail.spicy:10:15-10:16: capture groups can only be used in hooks for fields parsing regexp
4+
[error] <...>/capture-groups-fail.spicy:14:15-14:16: capture group index 2 is too large, the regular expression defines only 1 group
5+
[error] <...>/capture-groups-fail.spicy:18:15-18:16: cannot use capture groups with regular expression consisting of multiple patterns
6+
[error] <...>/capture-groups-fail.spicy:22:15-22:16: cannot use capture group with &nosub field
7+
[error] <...>/capture-groups-fail.spicy:25:10-25:17: error compiling pattern '<...>/': bad pattern: syntax error
8+
[error] <...>/capture-groups-fail.spicy:29:11-29:12: capture groups can only be used in hooks for fields parsing regexp
9+
[error] spicyc: aborting after errors

tests/Baseline/spicy.types.regexp.capture-without-regexp/output

Lines changed: 0 additions & 5 deletions
This file was deleted.

tests/Baseline/spicy.types.regexp.parse-ctor-captures-no-sub/output

Lines changed: 0 additions & 4 deletions
This file was deleted.
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# @TEST-EXEC-FAIL: hiltic -p %INPUT >output 2>&1
2+
# @TEST-EXEC: btest-diff output
3+
#
4+
# @TEST-DOC: Check that we capture invalid regular expression constructors at compile time.
5+
6+
module Foo {
7+
8+
global regexp re1 = /gh(i/;
9+
10+
function void f() {
11+
local re2 = /gh(i/;
12+
}
13+
14+
}

tests/hilti/types/regexp/match-repeats.hlt

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,31 +16,19 @@ assert |*data.match(/\x00{0,3}/)| == 3;
1616
assert |*data.match(/\x00{0,4}/)| == 4;
1717
assert |*data.match(/\x00{0,5}/)| == 5;
1818

19-
assert-exception data.match(/\x00{1,0}/);
2019
assert |*data.match(/\x00{1,1}/)| == 1;
2120
assert |*data.match(/\x00{1,2}/)| == 2;
2221
assert |*data.match(/\x00{1,3}/)| == 3;
2322
assert |*data.match(/\x00{1,4}/)| == 4;
2423
assert |*data.match(/\x00{1,5}/)| == 5;
2524

26-
assert-exception data.match(/\x00{2,0}/);
27-
assert-exception data.match(/\x00{2,1}/);
2825
assert |*data.match(/\x00{2,2}/)| == 2;
2926
assert |*data.match(/\x00{2,3}/)| == 3;
3027
assert |*data.match(/\x00{2,4}/)| == 4;
3128
assert |*data.match(/\x00{2,5}/)| == 5;
3229

33-
assert-exception data.match(/\x00{4,0}/);
34-
assert-exception data.match(/\x00{4,1}/);
35-
assert-exception data.match(/\x00{4,2}/);
36-
assert-exception data.match(/\x00{4,3}/);
3730
assert |*data.match(/\x00{4,4}/)| == 4;
3831
assert |*data.match(/\x00{4,5}/)| == 5;
3932

40-
assert-exception data.match(/\x00{5,0}/);
41-
assert-exception data.match(/\x00{5,1}/);
42-
assert-exception data.match(/\x00{5,2}/);
43-
assert-exception data.match(/\x00{5,3}/);
44-
assert-exception data.match(/\x00{5,4}/);
4533
assert |*data.match(/\x00{5,5}/)| == 5;
4634
}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# @TEST-DOC: Validate that we reject invaliud usage of captures.
2+
#
3+
# @TEST-EXEC-FAIL: spicyc -dj %INPUT >output 2>&1
4+
# @TEST-EXEC: btest-diff output
5+
6+
module test;
7+
8+
public type X = unit {
9+
x: uint8 {
10+
print $1; # not a regexp field
11+
}
12+
13+
y: /a(b)c/ {
14+
print $2; # group does not exist
15+
}
16+
17+
z: /a(b)c/ | /d(e)f/ {
18+
print $2; # capture groups not supported for parallel matching (because it's ill-defined)
19+
}
20+
21+
nosub: /a(b)c/ &nosub {
22+
print $1; # capture groups not available
23+
}
24+
25+
err: /a(b))c/ { } # invalid regexp
26+
};
27+
28+
function fn() {
29+
print $1; # not inside a unit field
30+
}
31+
32+
print $1; # not inside a unit field

tests/spicy/types/regexp/capture-without-regexp.spicy

Lines changed: 0 additions & 18 deletions
This file was deleted.

0 commit comments

Comments
 (0)