11import io
22import os
3+ from contextlib import contextmanager
34from dataclasses import dataclass
4- from typing import IO , Any , Callable
5+ from typing import IO , Any , Callable , Iterator
56
67from blake3 import blake3
78
@@ -20,6 +21,29 @@ class HashCheckpoint:
2021 file_size : int = 0
2122
2223
24+ @contextmanager
25+ def _open_for_hashing (fp : str | IO [bytes ]) -> Iterator [tuple [IO [bytes ], bool ]]:
26+ """Yield (file_object, is_path) with appropriate setup/teardown."""
27+ if hasattr (fp , "read" ):
28+ seekable = getattr (fp , "seekable" , lambda : False )()
29+ orig_pos = None
30+ if seekable :
31+ try :
32+ orig_pos = fp .tell ()
33+ if orig_pos != 0 :
34+ fp .seek (0 )
35+ except io .UnsupportedOperation :
36+ orig_pos = None
37+ try :
38+ yield fp , False
39+ finally :
40+ if orig_pos is not None :
41+ fp .seek (orig_pos )
42+ else :
43+ with open (os .fspath (fp ), "rb" ) as f :
44+ yield f , True
45+
46+
2347def compute_blake3_hash (
2448 fp : str | IO [bytes ],
2549 chunk_size : int = DEFAULT_CHUNK ,
@@ -42,67 +66,30 @@ def compute_blake3_hash(
4266 (None, checkpoint) on interruption (file paths only), or
4367 (None, None) on interruption of a file object
4468 """
45- if hasattr (fp , "read" ):
46- digest = _hash_file_obj (fp , chunk_size , interrupt_check )
47- return digest , None
69+ if chunk_size <= 0 :
70+ chunk_size = DEFAULT_CHUNK
4871
49- with open ( os . fspath ( fp ), "rb" ) as f :
50- if checkpoint is not None :
72+ with _open_for_hashing ( fp ) as ( f , is_path ) :
73+ if checkpoint is not None and is_path :
5174 f .seek (checkpoint .bytes_processed )
5275 h = checkpoint .hasher
5376 bytes_processed = checkpoint .bytes_processed
5477 else :
5578 h = blake3 ()
5679 bytes_processed = 0
5780
58- if chunk_size <= 0 :
59- chunk_size = DEFAULT_CHUNK
60-
6181 while True :
6282 if interrupt_check is not None and interrupt_check ():
63- return None , HashCheckpoint (
64- bytes_processed = bytes_processed ,
65- hasher = h ,
66- )
83+ if is_path :
84+ return None , HashCheckpoint (
85+ bytes_processed = bytes_processed ,
86+ hasher = h ,
87+ )
88+ return None , None
6789 chunk = f .read (chunk_size )
6890 if not chunk :
6991 break
7092 h .update (chunk )
7193 bytes_processed += len (chunk )
7294
7395 return h .hexdigest (), None
74-
75-
76- def _hash_file_obj (
77- file_obj : IO ,
78- chunk_size : int = DEFAULT_CHUNK ,
79- interrupt_check : InterruptCheck | None = None ,
80- ) -> str | None :
81- if chunk_size <= 0 :
82- chunk_size = DEFAULT_CHUNK
83-
84- seekable = getattr (file_obj , "seekable" , lambda : False )()
85- orig_pos = None
86-
87- if seekable :
88- try :
89- orig_pos = file_obj .tell ()
90- if orig_pos != 0 :
91- file_obj .seek (0 )
92- except io .UnsupportedOperation :
93- seekable = False
94- orig_pos = None
95-
96- try :
97- h = blake3 ()
98- while True :
99- if interrupt_check is not None and interrupt_check ():
100- return None
101- chunk = file_obj .read (chunk_size )
102- if not chunk :
103- break
104- h .update (chunk )
105- return h .hexdigest ()
106- finally :
107- if seekable and orig_pos is not None :
108- file_obj .seek (orig_pos )
0 commit comments