Skip to content

Commit baadc33

Browse files
committed
Profile and optimize the permessage-deflate extension.
dataclasses.replace is surprisingly expensive. zlib functions make up the bulk of the cost now.
1 parent a0b20f0 commit baadc33

File tree

3 files changed

+68
-8
lines changed

3 files changed

+68
-8
lines changed

experiments/compression/corpus.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,6 @@ def main(corpus):
4747

4848
if __name__ == "__main__":
4949
if len(sys.argv) < 2:
50-
print(f"Usage: {sys.argv[0]} [directory]")
50+
print(f"Usage: {sys.argv[0]} <directory>")
5151
sys.exit(2)
5252
main(pathlib.Path(sys.argv[1]))

experiments/profiling/compression.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
#!/usr/bin/env python
2+
3+
"""
4+
Profile the permessage-deflate extension.
5+
6+
Usage::
7+
$ pip install line_profiler
8+
$ python experiments/compression/corpus.py experiments/compression/corpus
9+
$ PYTHONPATH=src python -m kernprof \
10+
--line-by-line \
11+
--prof-mod src/websockets/extensions/permessage_deflate.py \
12+
--view \
13+
experiments/profiling/compression.py experiments/compression/corpus 12 5 6
14+
15+
"""
16+
17+
import pathlib
18+
import sys
19+
20+
from websockets.extensions.permessage_deflate import PerMessageDeflate
21+
from websockets.frames import OP_TEXT, Frame
22+
23+
24+
def compress_and_decompress(corpus, max_window_bits, memory_level, level):
25+
extension = PerMessageDeflate(
26+
remote_no_context_takeover=False,
27+
local_no_context_takeover=False,
28+
remote_max_window_bits=max_window_bits,
29+
local_max_window_bits=max_window_bits,
30+
compress_settings={"memLevel": memory_level, "level": level},
31+
)
32+
for data in corpus:
33+
frame = Frame(OP_TEXT, data)
34+
frame = extension.encode(frame)
35+
frame = extension.decode(frame)
36+
37+
38+
if __name__ == "__main__":
39+
if len(sys.argv) < 2 or not pathlib.Path(sys.argv[1]).is_dir():
40+
print(f"Usage: {sys.argv[0]} <directory> [<max_window_bits>] [<mem_level>]")
41+
corpus = [file.read_bytes() for file in pathlib.Path(sys.argv[1]).iterdir()]
42+
max_window_bits = int(sys.argv[2]) if len(sys.argv) > 2 else 12
43+
memory_level = int(sys.argv[3]) if len(sys.argv) > 3 else 5
44+
level = int(sys.argv[4]) if len(sys.argv) > 4 else 6
45+
compress_and_decompress(corpus, max_window_bits, memory_level, level)

src/websockets/extensions/permessage_deflate.py

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from __future__ import annotations
22

3-
import dataclasses
43
import zlib
54
from collections.abc import Sequence
65
from typing import Any
@@ -120,7 +119,6 @@ def decode(
120119
else:
121120
if not frame.rsv1:
122121
return frame
123-
frame = dataclasses.replace(frame, rsv1=False)
124122
if not frame.fin:
125123
self.decode_cont_data = True
126124

@@ -146,7 +144,15 @@ def decode(
146144
if frame.fin and self.remote_no_context_takeover:
147145
del self.decoder
148146

149-
return dataclasses.replace(frame, data=data)
147+
return frames.Frame(
148+
frame.opcode,
149+
data,
150+
frame.fin,
151+
# Unset the rsv1 flag on the first frame of a compressed message.
152+
False,
153+
frame.rsv2,
154+
frame.rsv3,
155+
)
150156

151157
def encode(self, frame: frames.Frame) -> frames.Frame:
152158
"""
@@ -161,8 +167,6 @@ def encode(self, frame: frames.Frame) -> frames.Frame:
161167
# data" flag similar to "decode continuation data" at this time.
162168

163169
if frame.opcode is not frames.OP_CONT:
164-
# Set the rsv1 flag on the first frame of a compressed message.
165-
frame = dataclasses.replace(frame, rsv1=True)
166170
# Re-initialize per-message decoder.
167171
if self.local_no_context_takeover:
168172
self.encoder = zlib.compressobj(
@@ -172,14 +176,25 @@ def encode(self, frame: frames.Frame) -> frames.Frame:
172176

173177
# Compress data.
174178
data = self.encoder.compress(frame.data) + self.encoder.flush(zlib.Z_SYNC_FLUSH)
175-
if frame.fin and data.endswith(_EMPTY_UNCOMPRESSED_BLOCK):
179+
if frame.fin and data[-4:] == _EMPTY_UNCOMPRESSED_BLOCK:
180+
# Making a copy is faster than memoryview(a)[:-4] until about 2kB.
181+
# On larger messages, it's slower but profiling shows that it's
182+
# marginal compared to compress() and flush(). Keep it simple.
176183
data = data[:-4]
177184

178185
# Allow garbage collection of the encoder if it won't be reused.
179186
if frame.fin and self.local_no_context_takeover:
180187
del self.encoder
181188

182-
return dataclasses.replace(frame, data=data)
189+
return frames.Frame(
190+
frame.opcode,
191+
data,
192+
frame.fin,
193+
# Set the rsv1 flag on the first frame of a compressed message.
194+
frame.opcode is not frames.OP_CONT,
195+
frame.rsv2,
196+
frame.rsv3,
197+
)
183198

184199

185200
def _build_parameters(

0 commit comments

Comments
 (0)