diff --git a/tornado/httputil.py b/tornado/httputil.py index 4ec7b68fe..899d76aeb 100644 --- a/tornado/httputil.py +++ b/tornado/httputil.py @@ -1065,15 +1065,20 @@ def qs_to_qsl(qs: Dict[str, List[AnyStr]]) -> Iterable[Tuple[str, AnyStr]]: yield (k, v) -_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]") -_QuotePatt = re.compile(r"[\\].") -_nulljoin = "".join +_unquote_sub = re.compile(r"\\(?:([0-3][0-7][0-7])|(.))").sub + + +def _unquote_replace(m: re.Match) -> str: + if m[1]: + return chr(int(m[1], 8)) + else: + return m[2] def _unquote_cookie(s: str) -> str: """Handle double quotes and escaping in cookie values. - This method is copied verbatim from the Python 3.5 standard + This method is copied verbatim from the Python 3.13 standard library (http.cookies._unquote) so we don't have to depend on non-public interfaces. """ @@ -1094,30 +1099,7 @@ def _unquote_cookie(s: str) -> str: # \012 --> \n # \" --> " # - i = 0 - n = len(s) - res = [] - while 0 <= i < n: - o_match = _OctalPatt.search(s, i) - q_match = _QuotePatt.search(s, i) - if not o_match and not q_match: # Neither matched - res.append(s[i:]) - break - # else: - j = k = -1 - if o_match: - j = o_match.start(0) - if q_match: - k = q_match.start(0) - if q_match and (not o_match or k < j): # QuotePatt matched - res.append(s[i:k]) - res.append(s[k + 1]) - i = k + 2 - else: # OctalPatt matched - res.append(s[i:j]) - res.append(chr(int(s[j + 1 : j + 4], 8))) - i = j + 4 - return _nulljoin(res) + return _unquote_sub(_unquote_replace, s) def parse_cookie(cookie: str) -> Dict[str, str]: diff --git a/tornado/test/httputil_test.py b/tornado/test/httputil_test.py index 7d6759e48..46a28bc69 100644 --- a/tornado/test/httputil_test.py +++ b/tornado/test/httputil_test.py @@ -560,3 +560,49 @@ def test_invalid_cookies(self): self.assertEqual( parse_cookie(" = b ; ; = ; c = ; "), {"": "b", "c": ""} ) + + def test_unquote(self): + # Copied from + # https://github.com/python/cpython/blob/dc7a2b6522ec7af41282bc34f405bee9b306d611/Lib/test/test_http_cookies.py#L62 + cases = [ + (r'a="b=\""', 'b="'), + (r'a="b=\\"', "b=\\"), + (r'a="b=\="', "b=="), + (r'a="b=\n"', "b=n"), + (r'a="b=\042"', 'b="'), + (r'a="b=\134"', "b=\\"), + (r'a="b=\377"', "b=\xff"), + (r'a="b=\400"', "b=400"), + (r'a="b=\42"', "b=42"), + (r'a="b=\\042"', "b=\\042"), + (r'a="b=\\134"', "b=\\134"), + (r'a="b=\\\""', 'b=\\"'), + (r'a="b=\\\042"', 'b=\\"'), + (r'a="b=\134\""', 'b=\\"'), + (r'a="b=\134\042"', 'b=\\"'), + ] + for encoded, decoded in cases: + with self.subTest(encoded): + c = parse_cookie(encoded) + self.assertEqual(c["a"], decoded) + + def test_unquote_large(self): + # Adapted from + # https://github.com/python/cpython/blob/dc7a2b6522ec7af41282bc34f405bee9b306d611/Lib/test/test_http_cookies.py#L87 + # Modified from that test because we handle semicolons differently from the stdlib. + # + # This is a performance regression test: prior to improvements in Tornado 6.4.2, this test + # would take over a minute with n= 100k. Now it runs in tens of milliseconds. + n = 100000 + for encoded in r"\\", r"\134": + with self.subTest(encoded): + start = time.time() + data = 'a="b=' + encoded * n + '"' + value = parse_cookie(data)["a"] + end = time.time() + self.assertEqual(value[:3], "b=\\") + self.assertEqual(value[-3:], "\\\\\\") + self.assertEqual(len(value), n + 2) + + # Very loose performance check to avoid false positives + self.assertLess(end - start, 1, "Test took too long")