diff --git a/lib/inlines.js b/lib/inlines.js index a37d1665..a8d9d6ed 100644 --- a/lib/inlines.js +++ b/lib/inlines.js @@ -127,9 +127,10 @@ var match = function(re) { // Returns the code for the character at the current subject position, or -1 // there are no more characters. +// This function must be non-BMP aware because the Unicode category of its result is used. var peek = function() { if (this.pos < this.subject.length) { - return this.subject.charCodeAt(this.pos); + return this.subject.codePointAt(this.pos); } else { return -1; } @@ -270,7 +271,7 @@ var scanDelims = function(cc) { return null; } - char_before = startpos === 0 ? "\n" : this.subject.charAt(startpos - 1); + char_before = previousChar(this.subject, startpos); cc_after = this.peek(); if (cc_after === -1) { @@ -304,6 +305,25 @@ var scanDelims = function(cc) { } this.pos = startpos; return { numdelims: numdelims, can_open: can_open, can_close: can_close }; + + function previousChar(str, pos) { + if (pos === 0) { + return "\n"; + } + var previous_cc = str.charCodeAt(pos - 1); + // not low surrogate (BMP) + if ((previous_cc & 0xfc00) !== 0xdc00) { + return str.charAt(pos - 1); + } + // returns NaN if out of range + var two_previous_cc = str.charCodeAt(pos - 2); + // NaN & 0xfc00 = 0 + // checks if 2 previous char is high surrogate + if ((two_previous_cc & 0xfc00) !== 0xd800) { + return previous_char; + } + return str.slice(pos - 2, pos); + } }; // Handle a delimiter marker for emphasis or a quote. diff --git a/test/regression.txt b/test/regression.txt index 6de5a111..624703bd 100644 --- a/test/regression.txt +++ b/test/regression.txt @@ -546,3 +546,19 @@ foo more -->
全角スペース (U+3000) 全形空白
ZWNBSP (U+FEFF) ZWNBSP
```````````````````````````````` + +#296 +```````````````````````````````` example +a**a∇**a + +a**∇a**a + +a**a𝜵**a + +a**𝜵a**a +. +a**a∇**a
+a**∇a**a
+a**a𝜵**a
+a**𝜵a**a
+````````````````````````````````