From 9df00a132a6ce64e97c455dc26076345e79f68c7 Mon Sep 17 00:00:00 2001 From: "patchback[bot]" <45432694+patchback[bot]@users.noreply.github.com> Date: Sun, 1 Dec 2024 21:55:51 +0000 Subject: [PATCH] [PR #10073/349b7565 backport][3.11] Improve performance of parsing headers (#10083) Co-authored-by: J. Nick Koston --- CHANGES/10073.misc.rst | 1 + aiohttp/_http_parser.pyx | 56 ++++++++++++++++++---------------------- 2 files changed, 26 insertions(+), 31 deletions(-) create mode 100644 CHANGES/10073.misc.rst diff --git a/CHANGES/10073.misc.rst b/CHANGES/10073.misc.rst new file mode 100644 index 00000000000..2a7e3514ea3 --- /dev/null +++ b/CHANGES/10073.misc.rst @@ -0,0 +1 @@ +Improved performance of parsing headers when using the C parser -- by :user:`bdraco`. diff --git a/aiohttp/_http_parser.pyx b/aiohttp/_http_parser.pyx index 988e4247f93..19dc3e63b74 100644 --- a/aiohttp/_http_parser.pyx +++ b/aiohttp/_http_parser.pyx @@ -71,7 +71,7 @@ cdef object CONTENT_ENCODING = hdrs.CONTENT_ENCODING cdef object EMPTY_PAYLOAD = _EMPTY_PAYLOAD cdef object StreamReader = _StreamReader cdef object DeflateBuffer = _DeflateBuffer - +cdef bytes EMPTY_BYTES = b"" cdef inline object extend(object buf, const char* at, size_t length): cdef Py_ssize_t s @@ -277,8 +277,9 @@ cdef class HttpParser: cparser.llhttp_t* _cparser cparser.llhttp_settings_t* _csettings - bytearray _raw_name - bytearray _raw_value + bytes _raw_name + object _name + bytes _raw_value bint _has_value object _protocol @@ -296,7 +297,7 @@ cdef class HttpParser: bytearray _buf str _path str _reason - object _headers + list _headers list _raw_headers bint _upgraded list _messages @@ -350,8 +351,8 @@ cdef class HttpParser: self._payload_exception = payload_exception self._messages = [] - self._raw_name = bytearray() - self._raw_value = bytearray() + self._raw_name = EMPTY_BYTES + self._raw_value = EMPTY_BYTES self._has_value = False self._max_line_size = max_line_size @@ -378,42 +379,35 @@ cdef class HttpParser: self._limit = limit cdef _process_header(self): - if self._raw_name: - raw_name = bytes(self._raw_name) - raw_value = bytes(self._raw_value) - - name = find_header(raw_name) - value = raw_value.decode('utf-8', 'surrogateescape') + cdef str value + if self._raw_name is not EMPTY_BYTES: + name = find_header(self._raw_name) + value = self._raw_value.decode('utf-8', 'surrogateescape') - self._headers.add(name, value) + self._headers.append((name, value)) if name is CONTENT_ENCODING: self._content_encoding = value - PyByteArray_Resize(self._raw_name, 0) - PyByteArray_Resize(self._raw_value, 0) self._has_value = False - self._raw_headers.append((raw_name, raw_value)) + self._raw_headers.append((self._raw_name, self._raw_value)) + self._raw_name = EMPTY_BYTES + self._raw_value = EMPTY_BYTES cdef _on_header_field(self, char* at, size_t length): - cdef Py_ssize_t size - cdef char *buf if self._has_value: self._process_header() - size = PyByteArray_Size(self._raw_name) - PyByteArray_Resize(self._raw_name, size + length) - buf = PyByteArray_AsString(self._raw_name) - memcpy(buf + size, at, length) + if self._raw_name is EMPTY_BYTES: + self._raw_name = at[:length] + else: + self._raw_name += at[:length] cdef _on_header_value(self, char* at, size_t length): - cdef Py_ssize_t size - cdef char *buf - - size = PyByteArray_Size(self._raw_value) - PyByteArray_Resize(self._raw_value, size + length) - buf = PyByteArray_AsString(self._raw_value) - memcpy(buf + size, at, length) + if self._raw_value is EMPTY_BYTES: + self._raw_value = at[:length] + else: + self._raw_value += at[:length] self._has_value = True cdef _on_headers_complete(self): @@ -424,7 +418,7 @@ cdef class HttpParser: chunked = self._cparser.flags & cparser.F_CHUNKED raw_headers = tuple(self._raw_headers) - headers = CIMultiDictProxy(self._headers) + headers = CIMultiDictProxy(CIMultiDict(self._headers)) if self._cparser.type == cparser.HTTP_REQUEST: allowed = upgrade and headers.get("upgrade", "").lower() in ALLOWED_UPGRADES @@ -672,7 +666,7 @@ cdef int cb_on_message_begin(cparser.llhttp_t* parser) except -1: cdef HttpParser pyparser = parser.data pyparser._started = True - pyparser._headers = CIMultiDict() + pyparser._headers = [] pyparser._raw_headers = [] PyByteArray_Resize(pyparser._buf, 0) pyparser._path = None