Skip to content

Commit

Permalink
<regex>: Repair character class escapes outside character class def…
Browse files Browse the repository at this point in the history
…initions (#5160)

Co-authored-by: Stephan T. Lavavej <[email protected]>
  • Loading branch information
muellerj2 and StephanTLavavej authored Dec 5, 2024
1 parent e3e65be commit b932cf8
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 5 deletions.
17 changes: 12 additions & 5 deletions stl/inc/regex
Original file line number Diff line number Diff line change
Expand Up @@ -1710,7 +1710,7 @@ private:
bool _OctalDigits();
void _Do_ex_class(_Meta_type);
bool _CharacterClassEscape(bool);
_Prs_ret _ClassEscape(bool);
_Prs_ret _ClassEscape2();
_Prs_ret _ClassAtom();
void _ClassRanges();
void _CharacterClass();
Expand Down Expand Up @@ -4017,22 +4017,29 @@ bool _Parser<_FwdIt, _Elem, _RxTraits>::_CharacterClassEscape(bool _Addit) { //
return false;
}

const bool _Negated = _Traits.isctype(_Char, _RxTraits::_Ch_upper);
if (_Addit) {
_Nfa._Add_class();
// GH-992: Outside character class definitions, _Cls completely defines the character class
// so negating _Cls and negating the entire character class are equivalent.
// Since the former negation is defective, do the latter instead.
if (_Negated) {
_Nfa._Negate();
}
}

_Nfa._Add_named_class(_Cls, _Traits.isctype(_Char, _RxTraits::_Ch_upper));
_Nfa._Add_named_class(_Cls, _Negated && !_Addit);
_Next();
return true;
}

template <class _FwdIt, class _Elem, class _RxTraits>
_Prs_ret _Parser<_FwdIt, _Elem, _RxTraits>::_ClassEscape(bool _Addit) { // check for class escape
_Prs_ret _Parser<_FwdIt, _Elem, _RxTraits>::_ClassEscape2() { // check for class escape
if ((_L_flags & _L_esc_bsl) && _Char == _Esc_bsl) { // handle escape backslash if allowed
_Val = _Esc_bsl;
_Next();
return _Prs_chr;
} else if ((_L_flags & _L_esc_wsd) && _CharacterClassEscape(_Addit)) {
} else if ((_L_flags & _L_esc_wsd) && _CharacterClassEscape(false)) {
return _Prs_set;
} else if (_DecimalDigits(regex_constants::error_escape)) { // check for invalid value
if (_Val != 0) {
Expand All @@ -4049,7 +4056,7 @@ _Prs_ret _Parser<_FwdIt, _Elem, _RxTraits>::_ClassAtom() { // check for class at
if (_Mchar == _Meta_esc) { // check for valid escape sequence
_Next();
if (_L_flags & _L_grp_esc) {
return _ClassEscape(false);
return _ClassEscape2();
} else if ((_L_flags & _L_esc_ffn && _Do_ffn(_Char))
|| (_L_flags & _L_esc_ffnx && _Do_ffnx(_Char))) { // advance to next character
_Next();
Expand Down
11 changes: 11 additions & 0 deletions tests/std/tests/VSO_0000000_regex_use/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -659,6 +659,16 @@ void test_gh_5058() {
}
}

void test_gh_5160() {
// GH-5160 fixed mishandled negated character class escapes
// outside character class definitions
const test_wregex neg_regex(&g_regexTester, LR"(Y\S*Z)");
neg_regex.should_search_match(L"xxxYxx\x0078xxxZxxx", L"Yxx\x0078xxxZ"); // U+0078 LATIN SMALL LETTER X
neg_regex.should_search_match(L"xxxYxx\x03C7xxxZxxx", L"Yxx\x03C7xxxZ"); // U+03C7 GREEK SMALL LETTER CHI
neg_regex.should_search_fail(L"xxxYxx xxxZxxx");
neg_regex.should_search_fail(L"xxxYxx\x2009xxxZxxx"); // U+2009 THIN SPACE
}

int main() {
test_dev10_449367_case_insensitivity_should_work();
test_dev11_462743_regex_collate_should_not_disable_regex_icase();
Expand Down Expand Up @@ -688,6 +698,7 @@ int main() {
test_gh_993();
test_gh_4995();
test_gh_5058();
test_gh_5160();

return g_regexTester.result();
}

0 comments on commit b932cf8

Please sign in to comment.