diff --git a/src/url-state-machine.js b/src/url-state-machine.js index ee6dcd7df..05f9746a5 100644 --- a/src/url-state-machine.js +++ b/src/url-state-machine.js @@ -43,6 +43,37 @@ function isASCIIHex(c) { return isASCIIDigit(c) || (c >= 0x41 && c <= 0x46) || (c >= 0x61 && c <= 0x66); } +const urlCodePoints = [ + p("!"), p("$"), p("'"), p("("), p(")"), p("*"), + p("+"), p(","), p("-"), p("."), p("/"), p(":"), + p(";"), p("="), p("?"), p("@"), p("_"), p("~") +]; +function isURLCodePoint(c) { + return ( + isASCIIAlphanumeric(c) || + urlCodePoints.indexOf(c) !== -1 || + (c >= 0xA0 && c <= 0xD7FF) || + (c >= 0xE000 && c <= 0xFDCF) || + (c >= 0xFDF0 && c <= 0xFFFD) || + (c >= 0x10000 && c <= 0x1FFFD) || + (c >= 0x20000 && c <= 0x2FFFD) || + (c >= 0x30000 && c <= 0x3FFFD) || + (c >= 0x40000 && c <= 0x4FFFD) || + (c >= 0x50000 && c <= 0x5FFFD) || + (c >= 0x60000 && c <= 0x6FFFD) || + (c >= 0x70000 && c <= 0x7FFFD) || + (c >= 0x80000 && c <= 0x8FFFD) || + (c >= 0x90000 && c <= 0x9FFFD) || + (c >= 0xA0000 && c <= 0xAFFFD) || + (c >= 0xB0000 && c <= 0xBFFFD) || + (c >= 0xC0000 && c <= 0xCFFFD) || + (c >= 0xD0000 && c <= 0xDFFFD) || + (c >= 0xE0000 && c <= 0xEFFFD) || + (c >= 0xF0000 && c <= 0xFFFFD) || + (c >= 0x100000 && c <= 0x10FFFD) + ); +} + function isSingleDot(buffer) { return buffer === "." || buffer.toLowerCase() === "%2e"; } @@ -946,7 +977,9 @@ URLStateMachine.prototype["parse path"] = function parsePath(c) { this.state = "fragment"; } } else { - // TODO: If c is not a URL code point and not "%", parse error. + if (!isURLCodePoint(c) && c !== p("%")) { + this.parseError = true; + } if (c === p("%") && (!isASCIIHex(this.input[this.pointer + 1]) || @@ -975,8 +1008,7 @@ URLStateMachine.prototype["parse cannot-be-a-base-URL path"] = function parseCan this.url.fragment = ""; this.state = "fragment"; } else { - // TODO: Add: not a URL code point - if (!isNaN(c) && c !== p("%")) { + if (!isNaN(c) && !isURLCodePoint(c) && c !== p("%")) { this.parseError = true; } @@ -1016,7 +1048,10 @@ URLStateMachine.prototype["parse query"] = function parseQuery(c, cStr) { this.state = "fragment"; } } else { - // TODO: If c is not a URL code point and not "%", parse error. + if (!isURLCodePoint(c) && c !== p("%")) { + this.parseError = true; + } + if (c === p("%") && (!isASCIIHex(this.input[this.pointer + 1]) || !isASCIIHex(this.input[this.pointer + 2]))) { @@ -1034,7 +1069,10 @@ URLStateMachine.prototype["parse fragment"] = function parseFragment(c, cStr) { } else if (c === 0x0) { this.parseError = true; } else { - // TODO: If c is not a URL code point and not "%", parse error. + if (!isURLCodePoint(c) && c !== p("%")) { + this.parseError = true; + } + if (c === p("%") && (!isASCIIHex(this.input[this.pointer + 1]) || !isASCIIHex(this.input[this.pointer + 2]))) {