Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Recognize non-BMP punctuation & symbols #297

Merged
merged 1 commit into from
Dec 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 22 additions & 2 deletions lib/inlines.js
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,10 @@ var match = function(re) {

// Returns the code for the character at the current subject position, or -1
// there are no more characters.
// This function must be non-BMP aware because the Unicode category of its result is used.
var peek = function() {
if (this.pos < this.subject.length) {
return this.subject.charCodeAt(this.pos);
return this.subject.codePointAt(this.pos);
} else {
return -1;
}
Expand Down Expand Up @@ -270,7 +271,7 @@ var scanDelims = function(cc) {
return null;
}

char_before = startpos === 0 ? "\n" : this.subject.charAt(startpos - 1);
char_before = previousChar(this.subject, startpos);

cc_after = this.peek();
if (cc_after === -1) {
Expand Down Expand Up @@ -304,6 +305,25 @@ var scanDelims = function(cc) {
}
this.pos = startpos;
return { numdelims: numdelims, can_open: can_open, can_close: can_close };

function previousChar(str, pos) {
if (pos === 0) {
return "\n";
}
var previous_cc = str.charCodeAt(pos - 1);
// not low surrogate (BMP)
if ((previous_cc & 0xfc00) !== 0xdc00) {
return str.charAt(pos - 1);
}
// returns NaN if out of range
var two_previous_cc = str.charCodeAt(pos - 2);
// NaN & 0xfc00 = 0
// checks if 2 previous char is high surrogate
if ((two_previous_cc & 0xfc00) !== 0xd800) {
return previous_char;
}
return str.slice(pos - 2, pos);
}
};

// Handle a delimiter marker for emphasis or a quote.
Expand Down
16 changes: 16 additions & 0 deletions test/regression.txt
Original file line number Diff line number Diff line change
Expand Up @@ -546,3 +546,19 @@ foo <!-- test --> more -->
<p> 全角スペース (U+3000) 全形空白 </p>
<p>ZWNBSP (U+FEFF) ZWNBSP</p>
````````````````````````````````

#296
```````````````````````````````` example
a**a∇**a

a**∇a**a

a**a𝜵**a

a**𝜵a**a
.
<p>a**a∇**a</p>
<p>a**∇a**a</p>
<p>a**a𝜵**a</p>
<p>a**𝜵a**a</p>
````````````````````````````````
Loading