2525import gzip
2626import io
2727import os
28+ import struct
2829import sys
30+ import time
31+ from typing import List , Optional , SupportsInt
2932
3033from . import isal_zlib
3134
3538_COMPRESS_LEVEL_TRADEOFF = isal_zlib .ISAL_DEFAULT_COMPRESSION
3639_COMPRESS_LEVEL_BEST = isal_zlib .ISAL_BEST_COMPRESSION
3740
41+ FTEXT , FHCRC , FEXTRA , FNAME , FCOMMENT = 1 , 2 , 4 , 8 , 16
42+
3843try :
3944 BadGzipFile = gzip .BadGzipFile # type: ignore
4045except AttributeError : # Versions lower than 3.8 do not have BadGzipFile
@@ -52,7 +57,7 @@ def open(filename, mode="rb", compresslevel=_COMPRESS_LEVEL_TRADEOFF,
5257
5358 The mode argument can be "r", "rb", "w", "wb", "x", "xb", "a" or "ab" for
5459 binary mode, or "rt", "wt", "xt" or "at" for text mode. The default mode is
55- "rb", and the default compresslevel is 9 .
60+ "rb", and the default compresslevel is 2 .
5661
5762 For binary mode, this function is equivalent to the GzipFile constructor:
5863 GzipFile(filename, mode, compresslevel). In this case, the encoding, errors
@@ -217,24 +222,105 @@ def _add_read_data(self, data):
217222_GzipReader = _IGzipReader
218223
219224
220- # Plagiarized from gzip.py from python's stdlib.
225+ def _create_simple_gzip_header (compresslevel : int ,
226+ mtime : Optional [SupportsInt ] = None ) -> bytes :
227+ """
228+ Write a simple gzip header with no extra fields.
229+ :param compresslevel: Compresslevel used to determine the xfl bytes.
230+ :param mtime: The mtime (must support conversion to a 32-bit integer).
231+ :return: A bytes object representing the gzip header.
232+ """
233+ if mtime is None :
234+ mtime = time .time ()
235+ # There is no best compression level. ISA-L only provides algorithms for
236+ # fast and medium levels.
237+ xfl = 4 if compresslevel == _COMPRESS_LEVEL_FAST else 0
238+ # Pack ID1 and ID2 magic bytes, method (8=deflate), header flags (no extra
239+ # fields added to header), mtime, xfl and os (255 for unknown OS).
240+ return struct .pack ("<BBBBLBB" , 0x1f , 0x8b , 8 , 0 , int (mtime ), xfl , 255 )
241+
242+
221243def compress (data , compresslevel = _COMPRESS_LEVEL_BEST , * , mtime = None ):
222244 """Compress data in one shot and return the compressed string.
223245 Optional argument is the compression level, in range of 0-3.
224246 """
225- buf = io .BytesIO ()
226- with IGzipFile (fileobj = buf , mode = 'wb' ,
227- compresslevel = compresslevel , mtime = mtime ) as f :
228- f .write (data )
229- return buf .getvalue ()
247+ header = _create_simple_gzip_header (compresslevel , mtime )
248+ # Compress the data without header or trailer in a raw deflate block.
249+ compressed = isal_zlib .compress (data , compresslevel , wbits = - 15 )
250+ length = len (data ) & 0xFFFFFFFF
251+ crc = isal_zlib .crc32 (data )
252+ trailer = struct .pack ("<LL" , crc , length )
253+ return header + compressed + trailer
254+
255+
256+ def _gzip_header_end (data : bytes ) -> int :
257+ """
258+ Find the start of the raw deflate block in a gzip file.
259+ :param data: Compressed data that starts with a gzip header.
260+ :return: The end of the header / start of the raw deflate block.
261+ """
262+ eof_error = EOFError ("Compressed file ended before the end-of-stream "
263+ "marker was reached" )
264+ if len (data ) < 10 :
265+ raise eof_error
266+ # We are not interested in mtime, xfl and os flags.
267+ magic , method , flags = struct .unpack ("<HBB" , data [:4 ])
268+ if magic != 0x8b1f :
269+ raise BadGzipFile (f"Not a gzipped file ({ repr (data [:2 ])} )" )
270+ if method != 8 :
271+ raise BadGzipFile ("Unknown compression method" )
272+ pos = 10
273+ if flags & FEXTRA :
274+ if len (data ) < pos + 2 :
275+ raise eof_error
276+ xlen = int .from_bytes (data [pos : pos + 2 ], "little" , signed = False )
277+ pos += 2 + xlen
278+ if flags & FNAME :
279+ pos = data .find (b"\x00 " , pos ) + 1
280+ # pos will be -1 + 1 when null byte not found.
281+ if not pos :
282+ raise eof_error
283+ if flags & FCOMMENT :
284+ pos = data .find (b"\x00 " , pos ) + 1
285+ if not pos :
286+ raise eof_error
287+ if flags & FHCRC :
288+ if len (data ) < pos + 2 :
289+ raise eof_error
290+ header_crc = int .from_bytes (data [pos : pos + 2 ], "little" , signed = False )
291+ # CRC is stored as a 16-bit integer by taking last bits of crc32.
292+ crc = isal_zlib .crc32 (data [:pos ]) & 0xFFFF
293+ if header_crc != crc :
294+ raise BadGzipFile (f"Corrupted header. Checksums do not "
295+ f"match: { crc } != { header_crc } " )
296+ pos += 2
297+ return pos
230298
231299
232300def decompress (data ):
233301 """Decompress a gzip compressed string in one shot.
234302 Return the decompressed string.
235303 """
236- with IGzipFile (fileobj = io .BytesIO (data )) as f :
237- return f .read ()
304+ all_blocks : List [bytes ] = []
305+ while True :
306+ if data == b"" :
307+ break
308+ header_end = _gzip_header_end (data )
309+ do = isal_zlib .decompressobj (- 15 )
310+ block = do .decompress (data [header_end :]) + do .flush ()
311+ if not do .eof or len (do .unused_data ) < 8 :
312+ raise EOFError ("Compressed file ended before the end-of-stream "
313+ "marker was reached" )
314+ checksum , length = struct .unpack ("<II" , do .unused_data [:8 ])
315+ crc = isal_zlib .crc32 (block )
316+ if crc != checksum :
317+ raise BadGzipFile ("CRC check failed" )
318+ if length != len (block ):
319+ raise BadGzipFile ("Incorrect length of data produced" )
320+ all_blocks .append (block )
321+ # Remove all padding null bytes and start next block.
322+ data = do .unused_data [8 :].lstrip (b"\x00 " )
323+ return b"" .join (all_blocks )
238324
239325
240326def main ():
0 commit comments