From 3c0ff963f4b0e29e12e37475ec30d019b095760e Mon Sep 17 00:00:00 2001 From: zufuliu Date: Sun, 10 Dec 2023 14:58:39 +0800 Subject: [PATCH] Change `MatchOnLines()` to use `std::regex_iterator`. see https://sourceforge.net/p/scintilla/bugs/2157/. --- scintilla/src/Document.cxx | 59 +++++++++++++++++--------------------- scintilla/src/RESearch.cxx | 4 +-- 2 files changed, 28 insertions(+), 35 deletions(-) diff --git a/scintilla/src/Document.cxx b/scintilla/src/Document.cxx index bae32d07e8..33b7abf1fd 100644 --- a/scintilla/src/Document.cxx +++ b/scintilla/src/Document.cxx @@ -3003,8 +3003,8 @@ class RESearchRange { lineRangeEnd = doc->SciLineFromPosition(endPos); lineRangeBreak = lineRangeEnd + increment; } - Range LineRange(Sci::Line line) const noexcept { - Range range(doc->LineStart(line), doc->LineEnd(line)); + Range LineRange(Sci::Line line, Sci::Position lineStartPos, Sci::Position lineEndPos) const noexcept { + Range range(lineStartPos, lineEndPos); if (increment > 0) { if (line == lineRangeStart) range.start = startPos; @@ -3215,13 +3215,16 @@ class UTF8Iterator { #define REGEX_MULTILINE #endif -std::regex_constants::match_flag_type MatchFlags(const Document *doc, Sci::Position startPos, Sci::Position endPos) noexcept { +std::regex_constants::match_flag_type MatchFlags(const Document *doc, Sci::Position startPos, Sci::Position endPos, Sci::Position lineStartPos, Sci::Position lineEndPos) noexcept { std::regex_constants::match_flag_type flagsMatch = std::regex_constants::match_default; - if (!doc->IsLineStartPosition(startPos)) { - flagsMatch |= std::regex_constants::match_not_bol; + if (startPos != lineStartPos) { + flagsMatch |= std::regex_constants::match_prev_avail; } - if (!doc->IsLineEndPosition(endPos)) { + if (endPos != lineEndPos) { flagsMatch |= std::regex_constants::match_not_eol; + if (!doc->IsWordEndAt(endPos)) { + flagsMatch |= std::regex_constants::match_not_eow; + } } return flagsMatch; } @@ -3236,13 +3239,15 @@ bool MatchOnLines(const Document *doc, const Regex ®exp, const RESearchRange // has not been implemented by compiler runtimes with MSVC always in multiline // mode and libc++ and libstdc++ always in single-line mode. // If multiline regex worked well then the line by line iteration could be removed - // for the forwards case and replaced with the following 4 lines: + // for the forwards case and replaced with the following: bool matched = false; #ifdef REGEX_MULTILINE if (resr.increment > 0) { + const Sci::Position lineStartPos = doc->LineStart(resr.lineRangeStart); + const Sci::Position lineEndPos = doc->LineEnd(resr.lineRangeEnd); Iterator itStart(doc, resr.startPos); Iterator itEnd(doc, resr.endPos); - const std::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, resr.startPos, resr.endPos); + const std::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, resr.startPos, resr.endPos, lineStartPos, lineEndPos); matched = std::regex_search(itStart, itEnd, match, regexp, flagsMatch); goto labelMatched; } @@ -3250,36 +3255,24 @@ bool MatchOnLines(const Document *doc, const Regex ®exp, const RESearchRange { // Line by line. for (Sci::Line line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) { - const Range lineRange = resr.LineRange(line); + const Sci::Position lineStartPos = doc->LineStart(line); + const Sci::Position lineEndPos = doc->LineEnd(line); + const Range lineRange = resr.LineRange(line, lineStartPos, lineEndPos); Iterator itStart(doc, lineRange.start); Iterator itEnd(doc, lineRange.end); - std::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, lineRange.start, lineRange.end); - matched = std::regex_search(itStart, itEnd, match, regexp, flagsMatch); - // Check for the last match on this line. - if (matched) { + const std::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, lineRange.start, lineRange.end, lineStartPos, lineEndPos); + std::regex_iterator it(itStart, itEnd, regexp, flagsMatch); + for (const std::regex_iterator last; it != last; ++it) { + match = *it; + matched = true; #ifndef REGEX_MULTILINE - if (resr.increment < 0) { -#endif - flagsMatch |= std::regex_constants::match_not_bol; - Sci::Position endPos = match[0].second.PosRoundUp(); - while (endPos < lineRange.end) { - if (match[0].first == match[0].second) { - // empty match - endPos = doc->NextPosition(endPos, 1); - } - Iterator itNext(doc, endPos); - std::match_results matchNext; - if (std::regex_search(itNext, itEnd, matchNext, regexp, flagsMatch)) { - match = matchNext; - endPos = match[0].second.PosRoundUp(); - } else { - break; - } + if (resr.increment > 0) { + break; } -#ifndef REGEX_MULTILINE - } #endif - break; + } + if (matched) { + break; } } } diff --git a/scintilla/src/RESearch.cxx b/scintilla/src/RESearch.cxx index 78897ade95..c0d837ed52 100644 --- a/scintilla/src/RESearch.cxx +++ b/scintilla/src/RESearch.cxx @@ -264,7 +264,7 @@ RESearch::RESearch(const CharClassify *charClassTable) { lineEndPos = 0; sta = NOP; /* status of lastpat */ previousFlags = FindOption::None; - memset(nfa, 0, 4); + memset(nfa, END, 4); memset(bittab, 0, BITBLK); Clear(); } @@ -434,7 +434,7 @@ const char *RESearch::Compile(const char *pattern, size_t length, FindOption fla } const char *RESearch::DoCompile(const char *pattern, size_t length, FindOption flags) noexcept { - memset(nfa, 0, 4); + memset(nfa, END, 4); memset(bittab, 0, BITBLK); const bool caseSensitive = FlagSet(flags, FindOption::MatchCase); const bool posix = FlagSet(flags, FindOption::Posix);