|
| 1 | +import argparse |
| 2 | +import gzip |
| 3 | +import timeit |
| 4 | +import zlib |
| 5 | +from pathlib import Path |
| 6 | +from typing import Dict |
| 7 | + |
| 8 | +from isal import igzip, isal_zlib # noqa: F401 used in timeit strings |
| 9 | + |
| 10 | +DATA_DIR = Path(__file__).parent / "tests" / "data" |
| 11 | +COMPRESSED_FILE = DATA_DIR / "test.fastq.gz" |
| 12 | +with gzip.open(str(COMPRESSED_FILE), mode="rb") as file_h: |
| 13 | + data = file_h.read() |
| 14 | + |
| 15 | +sizes: Dict[str, bytes] = { |
| 16 | + "0b": b"", |
| 17 | + "8b": data[:8], |
| 18 | + "128b": data[:128], |
| 19 | + "1kb": data[:1024], |
| 20 | + "8kb": data[:8 * 1024], |
| 21 | + "16kb": data[:16 * 1024], |
| 22 | + "32kb": data[:32 * 1024], |
| 23 | + "64kb": data[:64 * 1024], |
| 24 | + # "128kb": data[:128*1024], |
| 25 | + # "512kb": data[:512*1024] |
| 26 | +} |
| 27 | +compressed_sizes = {name: zlib.compress(data_block) |
| 28 | + for name, data_block in sizes.items()} |
| 29 | + |
| 30 | +compressed_sizes_gzip = {name: gzip.compress(data_block) |
| 31 | + for name, data_block in sizes.items()} |
| 32 | + |
| 33 | + |
| 34 | +def show_sizes(): |
| 35 | + print("zlib sizes") |
| 36 | + print("name\t" + "\t".join(str(level) for level in range(-1, 10))) |
| 37 | + for name, data_block in sizes.items(): |
| 38 | + orig_size = max(len(data_block), 1) |
| 39 | + rel_sizes = ( |
| 40 | + str(round(len(zlib.compress(data_block, level)) / orig_size, 3)) |
| 41 | + for level in range(-1, 10)) |
| 42 | + print(name + "\t" + "\t".join(rel_sizes)) |
| 43 | + |
| 44 | + print("isal sizes") |
| 45 | + print("name\t" + "\t".join(str(level) for level in range(0, 4))) |
| 46 | + for name, data_block in sizes.items(): |
| 47 | + orig_size = max(len(data_block), 1) |
| 48 | + rel_sizes = ( |
| 49 | + str(round(len(isal_zlib.compress(data_block, level)) / orig_size, |
| 50 | + 3)) |
| 51 | + for level in range(0, 4)) |
| 52 | + print(name + "\t" + "\t".join(rel_sizes)) |
| 53 | + |
| 54 | + |
| 55 | +def benchmark(name: str, |
| 56 | + names_and_data: Dict[str, bytes], |
| 57 | + isal_string: str, |
| 58 | + zlib_string: str, |
| 59 | + number: int = 10_000, |
| 60 | + **kwargs): |
| 61 | + print(name) |
| 62 | + print("name\tisal\tzlib\tratio") |
| 63 | + for name, data_block in names_and_data.items(): |
| 64 | + timeit_kwargs = dict(globals=dict(**globals(), **locals()), |
| 65 | + number=number, **kwargs) |
| 66 | + isal_time = timeit.timeit(isal_string, **timeit_kwargs) |
| 67 | + zlib_time = timeit.timeit(zlib_string, **timeit_kwargs) |
| 68 | + isal_nanosecs = round(isal_time * (1_000_000 / number), 2) |
| 69 | + zlib_nanosecs = round(zlib_time * (1_000_000 / number), 2) |
| 70 | + ratio = round(isal_time / zlib_time, 2) |
| 71 | + print("{0}\t{1}\t{2}\t{3}".format(name, |
| 72 | + isal_nanosecs, |
| 73 | + zlib_nanosecs, |
| 74 | + ratio)) |
| 75 | + |
| 76 | + |
| 77 | +# show_sizes() |
| 78 | + |
| 79 | +def argument_parser() -> argparse.ArgumentParser: |
| 80 | + parser = argparse.ArgumentParser() |
| 81 | + parser.add_argument("--all", action="store_true") |
| 82 | + parser.add_argument("--checksums", action="store_true") |
| 83 | + parser.add_argument("--functions", action="store_true") |
| 84 | + parser.add_argument("--gzip", action="store_true") |
| 85 | + return parser |
| 86 | + |
| 87 | + |
| 88 | +if __name__ == "__main__": |
| 89 | + args = argument_parser().parse_args() |
| 90 | + if args.checksums or args.all: |
| 91 | + benchmark("CRC32", sizes, |
| 92 | + "isal_zlib.crc32(data_block)", |
| 93 | + "zlib.crc32(data_block)") |
| 94 | + |
| 95 | + benchmark("Adler32", sizes, |
| 96 | + "isal_zlib.adler32(data_block)", |
| 97 | + "zlib.adler32(data_block)") |
| 98 | + if args.functions or args.all: |
| 99 | + benchmark("Compression", sizes, |
| 100 | + "isal_zlib.compress(data_block, 1)", |
| 101 | + "zlib.compress(data_block, 1)") |
| 102 | + |
| 103 | + benchmark("Decompression", compressed_sizes, |
| 104 | + "isal_zlib.decompress(data_block)", |
| 105 | + "zlib.decompress(data_block)") |
| 106 | + |
| 107 | + if args.gzip or args.all: |
| 108 | + benchmark("Compression", sizes, |
| 109 | + "igzip.compress(data_block, 1)", |
| 110 | + "gzip.compress(data_block, 1)") |
| 111 | + |
| 112 | + benchmark("Decompression", compressed_sizes_gzip, |
| 113 | + "igzip.decompress(data_block)", |
| 114 | + "gzip.decompress(data_block)") |
0 commit comments