Skip to content

Commit f26bbf7

Browse files
authored
Merge pull request #1302 from mathjax/issue1297
Changes the TAG and ATTR patterns to be more in line with HTML5 (#1297)
2 parents 5ebb07d + a6751d5 commit f26bbf7

File tree

1 file changed

+10
-9
lines changed

1 file changed

+10
-9
lines changed

ts/adaptors/lite/Parser.ts

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -32,22 +32,23 @@ import { LiteAdaptor } from '../liteAdaptor.js';
3232
/**
3333
* Patterns used in parsing serialized HTML
3434
*/
35-
const TAGNAME = '[a-z][^\\s\\n>]*';
36-
const ATTNAME = '[a-z][^\\s\\n>=]*';
37-
const VALUE = `(?:'[^']*'|"[^"]*"|[^\\s\\n]+)`;
38-
const VALUESPLIT = `(?:'([^']*)'|"([^"]*)"|([^\\s\\n]+))`;
39-
const SPACE = '(?:\\s|\\n)+';
40-
const OPTIONALSPACE = '(?:\\s|\\n)*';
35+
36+
const SPACE = '[ \\n]+';
37+
const OPTIONALSPACE = '[ \\n]*';
38+
const TAGNAME = `[A-Za-z][^\u0000-\u001F "'>/=\u007F-\u009F]*`;
39+
const ATTNAME = `[^\u0000-\u001F "'>/=\u007F-\u009F]+`;
40+
const VALUE = `(?:'[^']*'|"[^"]*"|${SPACE})`;
41+
const VALUESPLIT = `(?:'([^']*)'|"([^"]*)"|(${SPACE}))`;
4142
const ATTRIBUTE = `${ATTNAME}(?:${OPTIONALSPACE}=${OPTIONALSPACE}${VALUE})?`;
4243
const ATTRIBUTESPLIT = `(${ATTNAME})(?:${OPTIONALSPACE}=${OPTIONALSPACE}${VALUESPLIT})?`;
4344
const TAG =
4445
`(<(?:${TAGNAME}(?:${SPACE}${ATTRIBUTE})*` +
4546
`${OPTIONALSPACE}/?|/${TAGNAME}|!--[^]*?--|![^]*?)(?:>|$))`;
4647

4748
export const PATTERNS = {
48-
tag: new RegExp(TAG, 'i'),
49-
attr: new RegExp(ATTRIBUTE, 'i'),
50-
attrsplit: new RegExp(ATTRIBUTESPLIT, 'i'),
49+
tag: new RegExp(TAG, 'u'),
50+
attr: new RegExp(ATTRIBUTE, 'u'),
51+
attrsplit: new RegExp(ATTRIBUTESPLIT, 'u'),
5152
};
5253

5354
/************************************************************/

0 commit comments

Comments
 (0)