Skip to content

Commit

Permalink
Fuzz out syntax highlighting bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
jart committed Oct 19, 2024
1 parent 232a9f6 commit 213d7d8
Show file tree
Hide file tree
Showing 15 changed files with 455 additions and 206 deletions.
5 changes: 5 additions & 0 deletions llamafile/BUILD.mk
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ o/$(MODE)/llamafile: \
o/$(MODE)/llamafile/tokenize \
o/$(MODE)/llamafile/addnl \
o/$(MODE)/llamafile/high \
o/$(MODE)/llamafile/highlight_test.runs \
o/$(MODE)/llamafile/highlight_c_test.runs \
o/$(MODE)/llamafile/highlight_python_test.runs \
o/$(MODE)/llamafile/pool_test.runs \
Expand Down Expand Up @@ -165,6 +166,10 @@ o/$(MODE)/llamafile/pool_test: \
o/$(MODE)/llamafile/crash.o \
o/$(MODE)/llamafile/pool.o \

o/$(MODE)/llamafile/highlight_test: \
o/$(MODE)/llamafile/highlight_test.o \
o/$(MODE)/llama.cpp/llama.cpp.a \

o/$(MODE)/llamafile/highlight_c_test: \
o/$(MODE)/llamafile/highlight_c_test.o \
o/$(MODE)/llamafile/highlight_c.o \
Expand Down
52 changes: 33 additions & 19 deletions llamafile/highlight_c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,15 @@
#define NORMAL 0
#define WORD 1
#define QUOTE 2
#define DQUOTE 3
#define SLASH 4
#define SLASH_SLASH 5
#define SLASH_STAR 6
#define SLASH_STAR_STAR 7
#define TICK 8
#define BACKSLASH 64
#define QUOTE_BACKSLASH 3
#define DQUOTE 4
#define DQUOTE_BACKSLASH 5
#define SLASH 6
#define SLASH_SLASH 7
#define SLASH_STAR 8
#define SLASH_STAR_STAR 9
#define TICK 10
#define TICK_BACKSLASH 11

HighlightC::HighlightC(is_keyword_f *is_keyword, is_keyword_f *is_type)
: is_keyword_(is_keyword), is_type_(is_type) {
Expand All @@ -41,17 +43,6 @@ void HighlightC::feed(std::string *r, std::string_view input) {
int c;
for (size_t i = 0; i < input.size(); ++i) {
c = input[i] & 255;

if (t_ & BACKSLASH) {
t_ &= ~BACKSLASH;
*r += c;
continue;
} else if (c == '\\') {
*r += c;
t_ |= BACKSLASH;
continue;
}

switch (t_) {

Normal:
Expand Down Expand Up @@ -147,33 +138,53 @@ void HighlightC::feed(std::string *r, std::string_view input) {
if (c == '\'') {
*r += HI_RESET;
t_ = NORMAL;
} else if (c == '\\') {
t_ = QUOTE_BACKSLASH;
}
break;

case QUOTE_BACKSLASH:
*r += c;
t_ = QUOTE;
break;

case DQUOTE:
*r += c;
if (c == '"') {
*r += HI_RESET;
t_ = NORMAL;
} else if (c == '\\') {
t_ = DQUOTE_BACKSLASH;
}
break;

case DQUOTE_BACKSLASH:
*r += c;
t_ = DQUOTE;
break;

case TICK:
*r += c;
if (c == '`') {
*r += HI_RESET;
t_ = NORMAL;
} else if (c == '\\') {
t_ = TICK_BACKSLASH;
}
break;

case TICK_BACKSLASH:
*r += c;
t_ = TICK;
break;

default:
__builtin_unreachable();
}
}
}

void HighlightC::flush(std::string *r) {
t_ &= ~BACKSLASH;
switch (t_) {
case WORD:
if (is_keyword_(word_.data(), word_.size())) {
Expand All @@ -193,8 +204,11 @@ void HighlightC::flush(std::string *r) {
*r += '/';
break;
case TICK:
case TICK_BACKSLASH:
case QUOTE:
case QUOTE_BACKSLASH:
case DQUOTE:
case DQUOTE_BACKSLASH:
case SLASH_SLASH:
case SLASH_STAR:
case SLASH_STAR_STAR:
Expand Down
33 changes: 20 additions & 13 deletions llamafile/highlight_cobol.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,10 @@
#define NORMAL 0
#define WORD 1
#define QUOTE 2
#define DQUOTE 3
#define COMMENT 4
#define BACKSLASH 64
#define QUOTE_BACKSLASH 3
#define DQUOTE 4
#define DQUOTE_BACKSLASH 5
#define COMMENT 6

HighlightCobol::HighlightCobol() {
}
Expand Down Expand Up @@ -58,16 +59,6 @@ void HighlightCobol::feed(std::string *r, std::string_view input) {
}
}

if (t_ & BACKSLASH) {
t_ &= ~BACKSLASH;
*r += c;
continue;
} else if (c == '\\') {
*r += c;
t_ |= BACKSLASH;
continue;
}

switch (t_) {

Normal:
Expand Down Expand Up @@ -136,17 +127,31 @@ void HighlightCobol::feed(std::string *r, std::string_view input) {
if (c == '\'') {
*r += HI_RESET;
t_ = NORMAL;
} else if (c == '\\') {
t_ = QUOTE_BACKSLASH;
}
break;

case QUOTE_BACKSLASH:
*r += c;
t_ = QUOTE;
break;

case DQUOTE:
*r += c;
if (c == '"') {
*r += HI_RESET;
t_ = NORMAL;
} else if (c == '\\') {
t_ = DQUOTE_BACKSLASH;
}
break;

case DQUOTE_BACKSLASH:
*r += c;
t_ = DQUOTE;
break;

default:
__builtin_unreachable();
}
Expand All @@ -166,7 +171,9 @@ void HighlightCobol::flush(std::string *r) {
word_.clear();
break;
case QUOTE:
case QUOTE_BACKSLASH:
case DQUOTE:
case DQUOTE_BACKSLASH:
case COMMENT:
*r += HI_RESET;
break;
Expand Down
45 changes: 30 additions & 15 deletions llamafile/highlight_css.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,12 @@
#define PROPERTY 2
#define VALUE 3
#define QUOTE 4
#define DQUOTE 5
#define SLASH 6
#define SLASH_STAR 7
#define SLASH_STAR_STAR 8
#define BACKSLASH 0x10000
#define QUOTE_BACKSLASH 5
#define DQUOTE 6
#define DQUOTE_BACKSLASH 7
#define SLASH 8
#define SLASH_STAR 9
#define SLASH_STAR_STAR 10

HighlightCss::HighlightCss() {
}
Expand All @@ -41,16 +42,6 @@ void HighlightCss::feed(std::string *r, std::string_view input) {
for (size_t i = 0; i < input.size(); ++i) {
c = input[i] & 255;

if (t_ & BACKSLASH) {
t_ &= ~BACKSLASH;
*r += c;
continue;
} else if (c == '\\') {
*r += c;
t_ |= BACKSLASH;
continue;
}

TryAgain:
switch (t_ & 255) {

Expand Down Expand Up @@ -170,17 +161,35 @@ void HighlightCss::feed(std::string *r, std::string_view input) {
if (c == '\'') {
*r += HI_RESET;
goto Pop;
} else if (c == '\\') {
t_ &= -256;
t_ |= QUOTE_BACKSLASH;
}
break;

case QUOTE_BACKSLASH:
*r += c;
t_ &= -256;
t_ |= QUOTE;
break;

case DQUOTE:
*r += c;
if (c == '"') {
*r += HI_RESET;
goto Pop;
} else if (c == '\\') {
t_ &= -256;
t_ |= DQUOTE_BACKSLASH;
}
break;

case DQUOTE_BACKSLASH:
*r += c;
t_ &= -256;
t_ |= DQUOTE;
break;

Pop:
t_ >>= 8;
if (t_ == SELECTOR)
Expand All @@ -200,6 +209,12 @@ void HighlightCss::flush(std::string *r) {
case SLASH:
*r += '/';
break;
case SELECTOR:
case PROPERTY:
case DQUOTE:
case DQUOTE_BACKSLASH:
*r += HI_RESET;
break;
default:
break;
}
Expand Down
33 changes: 20 additions & 13 deletions llamafile/highlight_fortran.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,10 @@
#define NORMAL 0
#define WORD 1
#define QUOTE 2
#define DQUOTE 3
#define COMMENT 4
#define BACKSLASH 64
#define QUOTE_BACKSLASH 3
#define DQUOTE 4
#define DQUOTE_BACKSLASH 5
#define COMMENT 6

HighlightFortran::HighlightFortran() {
}
Expand Down Expand Up @@ -58,16 +59,6 @@ void HighlightFortran::feed(std::string *r, std::string_view input) {
}
}

if (t_ & BACKSLASH) {
t_ &= ~BACKSLASH;
*r += c;
continue;
} else if (c == '\\') {
*r += c;
t_ |= BACKSLASH;
continue;
}

switch (t_) {

Normal:
Expand Down Expand Up @@ -140,17 +131,31 @@ void HighlightFortran::feed(std::string *r, std::string_view input) {
if (c == '\'') {
*r += HI_RESET;
t_ = NORMAL;
} else if (c == '\\') {
t_ = QUOTE_BACKSLASH;
}
break;

case QUOTE_BACKSLASH:
*r += c;
t_ = QUOTE;
break;

case DQUOTE:
*r += c;
if (c == '"') {
*r += HI_RESET;
t_ = NORMAL;
} else if (c == '\\') {
t_ = DQUOTE_BACKSLASH;
}
break;

case DQUOTE_BACKSLASH:
*r += c;
t_ = DQUOTE;
break;

default:
__builtin_unreachable();
}
Expand All @@ -170,7 +175,9 @@ void HighlightFortran::flush(std::string *r) {
word_.clear();
break;
case QUOTE:
case QUOTE_BACKSLASH:
case DQUOTE:
case DQUOTE_BACKSLASH:
case COMMENT:
*r += HI_RESET;
break;
Expand Down
Loading

0 comments on commit 213d7d8

Please sign in to comment.