From 5218c90b1b6f041fabc5b9e784f92265c61f4005 Mon Sep 17 00:00:00 2001 From: zufuliu Date: Fri, 24 Nov 2023 18:36:47 +0800 Subject: [PATCH] Remove `minTrailByte` from DBCSCharClassify. --- scintilla/src/CharClassify.cxx | 6 +---- scintilla/src/CharClassify.h | 9 ------- scintilla/src/Document.cxx | 47 ++++++++++++++++++++++------------ scintilla/src/Document.h | 2 ++ 4 files changed, 33 insertions(+), 31 deletions(-) diff --git a/scintilla/src/CharClassify.cxx b/scintilla/src/CharClassify.cxx index 20b29f8554..b9bce0b322 100644 --- a/scintilla/src/CharClassify.cxx +++ b/scintilla/src/CharClassify.cxx @@ -885,9 +885,7 @@ const uint8_t CharClassify_CP1361Data[] = { //dbcs--Autogenerated -- end of section automatically generated } -DBCSCharClassify::DBCSCharClassify(int codePage_) noexcept: - codePage(codePage_), - minTrailByte(0x40) { +DBCSCharClassify::DBCSCharClassify(int codePage_) noexcept { switch (codePage_) { case 932: { // Shift-JIS @@ -907,7 +905,6 @@ DBCSCharClassify::DBCSCharClassify(int codePage_) noexcept: case 949: { // Korean Unified Hangul Code, Wansung KS C-5601-1987 - minTrailByte = 0x41; constexpr uint8_t BytesRLE_CP949[] = {252, 8, 106, 24, 106, 24, 255, 255, 4,}; ExpandRLE(BytesRLE_CP949, leadByte); @@ -924,7 +921,6 @@ DBCSCharClassify::DBCSCharClassify(int codePage_) noexcept: default: { // Korean Johab, KS C-5601-1992 - minTrailByte = 0x31; constexpr uint8_t BytesRLE_CP1361[] = {196, 254, 62, 8, 14, 255, 71, 18, 31, 6, 107, 22, 4,}; ExpandRLE(BytesRLE_CP1361, leadByte); diff --git a/scintilla/src/CharClassify.h b/scintilla/src/CharClassify.h index 0d6a734162..5b54063f28 100644 --- a/scintilla/src/CharClassify.h +++ b/scintilla/src/CharClassify.h @@ -154,16 +154,7 @@ class DBCSCharClassify { return CharacterClass::space; } - constexpr int CodePage() const noexcept { - return codePage; - } - constexpr int MinTrailByte() const noexcept { - return minTrailByte; - } - private: - const int codePage; - int minTrailByte; uint8_t leadByte[256]; unsigned char classifyMap[0xffff + 1]; }; diff --git a/scintilla/src/Document.cxx b/scintilla/src/Document.cxx index f08a437a65..bae32d07e8 100644 --- a/scintilla/src/Document.cxx +++ b/scintilla/src/Document.cxx @@ -163,6 +163,8 @@ Document::Document(DocumentOption options) : actualIndentInChars = 8; useTabs = true; tabIndents = true; + forwardSafeChar = 0x80; + backwardSafeChar = 0x80; backspaceUnindents = false; perLineData[ldMarkers] = std::make_unique(); @@ -264,20 +266,35 @@ LineEndType Document::LineEndTypesSupported() const noexcept { return LineEndType::Default; } -static inline std::unique_ptr GetDBCSCharClassify(int codePage) { - if (codePage != 0 && codePage != CpUtf8) { - return std::make_unique(codePage); - } - return {}; -} - bool Document::SetDBCSCodePage(int dbcsCodePage_) { if (dbcsCodePage != dbcsCodePage_) { dbcsCodePage = dbcsCodePage_; pcf.reset(); cb.SetLineEndTypes(lineEndBitSet & LineEndTypesSupported()); cb.SetUTF8Substance(CpUtf8 == dbcsCodePage); - dbcsCharClass = GetDBCSCharClassify(dbcsCodePage); + DBCSCharClassify *classify = nullptr; + forwardSafeChar = 0xff; + backwardSafeChar = 0xff; + if (dbcsCodePage) { + forwardSafeChar = 0x80; + backwardSafeChar = 0x80; + if (CpUtf8 != dbcsCodePage) { + // minimum trail byte + switch (dbcsCodePage) { + default: + backwardSafeChar = 0x40 - 1; + break; + case 949: + backwardSafeChar = 0x41 - 1; + break; + case 1361: + backwardSafeChar = 0x31 - 1; + break; + } + classify = new DBCSCharClassify(dbcsCodePage); + } + } + dbcsCharClass.reset(classify); regex.reset(); ModifiedAt(0); // Need to restyle whole document return true; @@ -2148,10 +2165,6 @@ Sci::Position Document::FindText(Sci::Position minPos, Sci::Position maxPos, con // Compute actual search ranges needed const Sci::Position lengthFind = *length; - // character less than safeChar is encoded in single byte in the encoding. - constexpr int safeCharASCII = 0x80; // UTF-8 forward & backward search, DBCS forward search - constexpr int safeCharSBCS = 256; // all - //Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, search, lengthFind); const Sci::Position limitPos = std::max(startPos, endPos); Sci::Position pos = startPos; @@ -2162,10 +2175,7 @@ Sci::Position Document::FindText(Sci::Position minPos, Sci::Position maxPos, con const SplitView cbView = cb.AllView(); SearchThing searchThing; if (caseSensitive) { - const Sci::Position endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos; const unsigned char * const searchData = reinterpret_cast(search); - const unsigned char charStartSearch = searchData[0]; - const int safeChar = (0 == dbcsCodePage) ? safeCharSBCS : ((direction >= 0 || CpUtf8 == dbcsCodePage) ? safeCharASCII : dbcsCharClass->MinTrailByte()); // Boyer-Moore-Horspool-Sunday Algorithm / Quick Search Algorithm // https://www-igm.univ-mlv.fr/~lecroq/string/index.html // https://www-igm.univ-mlv.fr/~lecroq/string/node19.html @@ -2194,6 +2204,9 @@ Sci::Position Document::FindText(Sci::Position minPos, Sci::Position maxPos, con } } + const Sci::Position endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos; + const unsigned char charStartSearch = searchData[0]; + const unsigned char safeChar = (direction >= 0) ? forwardSafeChar : backwardSafeChar; const Sci::Position skip = (direction >= 0) ? lengthFind : -1; if (direction < 0) { pos = MovePositionOutsideChar(pos - lengthFind, -1, false); @@ -2213,7 +2226,7 @@ Sci::Position Document::FindText(Sci::Position minPos, Sci::Position maxPos, con } if (lengthFind == 1) { - if (leadByte < safeChar) { + if (leadByte <= safeChar) { pos += increment; } else { if (!NextCharacter(pos, increment)) { @@ -2223,7 +2236,7 @@ Sci::Position Document::FindText(Sci::Position minPos, Sci::Position maxPos, con } else { const unsigned char nextByte = cbView.CharAt(pos + skip); pos += shiftTable[nextByte]; - if (nextByte >= safeChar) { + if (nextByte > safeChar) { pos = MovePositionOutsideChar(pos, increment, false); } } diff --git a/scintilla/src/Document.h b/scintilla/src/Document.h index 1e8d96960e..a0916b4cd1 100644 --- a/scintilla/src/Document.h +++ b/scintilla/src/Document.h @@ -317,6 +317,8 @@ class Document : PerLine, public Scintilla::IDocument, public Scintilla::ILoader int actualIndentInChars; bool useTabs; bool tabIndents; + uint8_t forwardSafeChar; + uint8_t backwardSafeChar; uint8_t backspaceUnindents; ActionDuration durationStyleOneUnit;