Skip to content

Commit

Permalink
Remove minTrailByte from DBCSCharClassify.
Browse files Browse the repository at this point in the history
  • Loading branch information
zufuliu committed Nov 24, 2023
1 parent a012c39 commit 5218c90
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 31 deletions.
6 changes: 1 addition & 5 deletions scintilla/src/CharClassify.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -885,9 +885,7 @@ const uint8_t CharClassify_CP1361Data[] = {
//dbcs--Autogenerated -- end of section automatically generated
}

DBCSCharClassify::DBCSCharClassify(int codePage_) noexcept:
codePage(codePage_),
minTrailByte(0x40) {
DBCSCharClassify::DBCSCharClassify(int codePage_) noexcept {
switch (codePage_) {
case 932: {
// Shift-JIS
Expand All @@ -907,7 +905,6 @@ DBCSCharClassify::DBCSCharClassify(int codePage_) noexcept:

case 949: {
// Korean Unified Hangul Code, Wansung KS C-5601-1987
minTrailByte = 0x41;
constexpr uint8_t BytesRLE_CP949[] = {252, 8, 106, 24, 106, 24, 255, 255, 4,};
ExpandRLE(BytesRLE_CP949, leadByte);

Expand All @@ -924,7 +921,6 @@ DBCSCharClassify::DBCSCharClassify(int codePage_) noexcept:

default: {
// Korean Johab, KS C-5601-1992
minTrailByte = 0x31;
constexpr uint8_t BytesRLE_CP1361[] = {196, 254, 62, 8, 14, 255, 71, 18, 31, 6, 107, 22, 4,};
ExpandRLE(BytesRLE_CP1361, leadByte);

Expand Down
9 changes: 0 additions & 9 deletions scintilla/src/CharClassify.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,16 +154,7 @@ class DBCSCharClassify {
return CharacterClass::space;
}

constexpr int CodePage() const noexcept {
return codePage;
}
constexpr int MinTrailByte() const noexcept {
return minTrailByte;
}

private:
const int codePage;
int minTrailByte;
uint8_t leadByte[256];
unsigned char classifyMap[0xffff + 1];
};
Expand Down
47 changes: 30 additions & 17 deletions scintilla/src/Document.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,8 @@ Document::Document(DocumentOption options) :
actualIndentInChars = 8;
useTabs = true;
tabIndents = true;
forwardSafeChar = 0x80;
backwardSafeChar = 0x80;
backspaceUnindents = false;

perLineData[ldMarkers] = std::make_unique<LineMarkers>();
Expand Down Expand Up @@ -264,20 +266,35 @@ LineEndType Document::LineEndTypesSupported() const noexcept {
return LineEndType::Default;
}

static inline std::unique_ptr<DBCSCharClassify> GetDBCSCharClassify(int codePage) {
if (codePage != 0 && codePage != CpUtf8) {
return std::make_unique<DBCSCharClassify>(codePage);
}
return {};
}

bool Document::SetDBCSCodePage(int dbcsCodePage_) {
if (dbcsCodePage != dbcsCodePage_) {
dbcsCodePage = dbcsCodePage_;
pcf.reset();
cb.SetLineEndTypes(lineEndBitSet & LineEndTypesSupported());
cb.SetUTF8Substance(CpUtf8 == dbcsCodePage);
dbcsCharClass = GetDBCSCharClassify(dbcsCodePage);
DBCSCharClassify *classify = nullptr;
forwardSafeChar = 0xff;
backwardSafeChar = 0xff;
if (dbcsCodePage) {
forwardSafeChar = 0x80;
backwardSafeChar = 0x80;
if (CpUtf8 != dbcsCodePage) {
// minimum trail byte
switch (dbcsCodePage) {
default:
backwardSafeChar = 0x40 - 1;
break;
case 949:
backwardSafeChar = 0x41 - 1;
break;
case 1361:
backwardSafeChar = 0x31 - 1;
break;
}
classify = new DBCSCharClassify(dbcsCodePage);
}
}
dbcsCharClass.reset(classify);
regex.reset();
ModifiedAt(0); // Need to restyle whole document
return true;
Expand Down Expand Up @@ -2148,10 +2165,6 @@ Sci::Position Document::FindText(Sci::Position minPos, Sci::Position maxPos, con
// Compute actual search ranges needed
const Sci::Position lengthFind = *length;

// character less than safeChar is encoded in single byte in the encoding.
constexpr int safeCharASCII = 0x80; // UTF-8 forward & backward search, DBCS forward search
constexpr int safeCharSBCS = 256; // all

//Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, search, lengthFind);
const Sci::Position limitPos = std::max(startPos, endPos);
Sci::Position pos = startPos;
Expand All @@ -2162,10 +2175,7 @@ Sci::Position Document::FindText(Sci::Position minPos, Sci::Position maxPos, con
const SplitView cbView = cb.AllView();
SearchThing searchThing;
if (caseSensitive) {
const Sci::Position endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
const unsigned char * const searchData = reinterpret_cast<const unsigned char *>(search);
const unsigned char charStartSearch = searchData[0];
const int safeChar = (0 == dbcsCodePage) ? safeCharSBCS : ((direction >= 0 || CpUtf8 == dbcsCodePage) ? safeCharASCII : dbcsCharClass->MinTrailByte());
// Boyer-Moore-Horspool-Sunday Algorithm / Quick Search Algorithm
// https://www-igm.univ-mlv.fr/~lecroq/string/index.html
// https://www-igm.univ-mlv.fr/~lecroq/string/node19.html
Expand Down Expand Up @@ -2194,6 +2204,9 @@ Sci::Position Document::FindText(Sci::Position minPos, Sci::Position maxPos, con
}
}

const Sci::Position endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
const unsigned char charStartSearch = searchData[0];
const unsigned char safeChar = (direction >= 0) ? forwardSafeChar : backwardSafeChar;
const Sci::Position skip = (direction >= 0) ? lengthFind : -1;
if (direction < 0) {
pos = MovePositionOutsideChar(pos - lengthFind, -1, false);
Expand All @@ -2213,7 +2226,7 @@ Sci::Position Document::FindText(Sci::Position minPos, Sci::Position maxPos, con
}

if (lengthFind == 1) {
if (leadByte < safeChar) {
if (leadByte <= safeChar) {
pos += increment;
} else {
if (!NextCharacter(pos, increment)) {
Expand All @@ -2223,7 +2236,7 @@ Sci::Position Document::FindText(Sci::Position minPos, Sci::Position maxPos, con
} else {
const unsigned char nextByte = cbView.CharAt(pos + skip);
pos += shiftTable[nextByte];
if (nextByte >= safeChar) {
if (nextByte > safeChar) {
pos = MovePositionOutsideChar(pos, increment, false);
}
}
Expand Down
2 changes: 2 additions & 0 deletions scintilla/src/Document.h
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,8 @@ class Document : PerLine, public Scintilla::IDocument, public Scintilla::ILoader
int actualIndentInChars;
bool useTabs;
bool tabIndents;
uint8_t forwardSafeChar;
uint8_t backwardSafeChar;
uint8_t backspaceUnindents;
ActionDuration durationStyleOneUnit;

Expand Down

0 comments on commit 5218c90

Please sign in to comment.