Skip to content

Commit 57780da

Browse files
Consolidate hash functions into single implementation
Extract file open/seek/restore logic into _open_for_hashing context manager and use a single hash loop in compute_blake3_hash for both file paths and file objects. Amp-Thread-ID: https://ampcode.com/threads/T-019ccb05-0db1-7206-8bd9-1c2efb898fef Co-authored-by: Amp <amp@ampcode.com>
1 parent 4c1d87e commit 57780da

File tree

1 file changed

+35
-48
lines changed

1 file changed

+35
-48
lines changed

app/assets/services/hashing.py

Lines changed: 35 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import io
22
import os
3+
from contextlib import contextmanager
34
from dataclasses import dataclass
4-
from typing import IO, Any, Callable
5+
from typing import IO, Any, Callable, Iterator
56

67
from blake3 import blake3
78

@@ -20,6 +21,29 @@ class HashCheckpoint:
2021
file_size: int = 0
2122

2223

24+
@contextmanager
25+
def _open_for_hashing(fp: str | IO[bytes]) -> Iterator[tuple[IO[bytes], bool]]:
26+
"""Yield (file_object, is_path) with appropriate setup/teardown."""
27+
if hasattr(fp, "read"):
28+
seekable = getattr(fp, "seekable", lambda: False)()
29+
orig_pos = None
30+
if seekable:
31+
try:
32+
orig_pos = fp.tell()
33+
if orig_pos != 0:
34+
fp.seek(0)
35+
except io.UnsupportedOperation:
36+
orig_pos = None
37+
try:
38+
yield fp, False
39+
finally:
40+
if orig_pos is not None:
41+
fp.seek(orig_pos)
42+
else:
43+
with open(os.fspath(fp), "rb") as f:
44+
yield f, True
45+
46+
2347
def compute_blake3_hash(
2448
fp: str | IO[bytes],
2549
chunk_size: int = DEFAULT_CHUNK,
@@ -42,67 +66,30 @@ def compute_blake3_hash(
4266
(None, checkpoint) on interruption (file paths only), or
4367
(None, None) on interruption of a file object
4468
"""
45-
if hasattr(fp, "read"):
46-
digest = _hash_file_obj(fp, chunk_size, interrupt_check)
47-
return digest, None
69+
if chunk_size <= 0:
70+
chunk_size = DEFAULT_CHUNK
4871

49-
with open(os.fspath(fp), "rb") as f:
50-
if checkpoint is not None:
72+
with _open_for_hashing(fp) as (f, is_path):
73+
if checkpoint is not None and is_path:
5174
f.seek(checkpoint.bytes_processed)
5275
h = checkpoint.hasher
5376
bytes_processed = checkpoint.bytes_processed
5477
else:
5578
h = blake3()
5679
bytes_processed = 0
5780

58-
if chunk_size <= 0:
59-
chunk_size = DEFAULT_CHUNK
60-
6181
while True:
6282
if interrupt_check is not None and interrupt_check():
63-
return None, HashCheckpoint(
64-
bytes_processed=bytes_processed,
65-
hasher=h,
66-
)
83+
if is_path:
84+
return None, HashCheckpoint(
85+
bytes_processed=bytes_processed,
86+
hasher=h,
87+
)
88+
return None, None
6789
chunk = f.read(chunk_size)
6890
if not chunk:
6991
break
7092
h.update(chunk)
7193
bytes_processed += len(chunk)
7294

7395
return h.hexdigest(), None
74-
75-
76-
def _hash_file_obj(
77-
file_obj: IO,
78-
chunk_size: int = DEFAULT_CHUNK,
79-
interrupt_check: InterruptCheck | None = None,
80-
) -> str | None:
81-
if chunk_size <= 0:
82-
chunk_size = DEFAULT_CHUNK
83-
84-
seekable = getattr(file_obj, "seekable", lambda: False)()
85-
orig_pos = None
86-
87-
if seekable:
88-
try:
89-
orig_pos = file_obj.tell()
90-
if orig_pos != 0:
91-
file_obj.seek(0)
92-
except io.UnsupportedOperation:
93-
seekable = False
94-
orig_pos = None
95-
96-
try:
97-
h = blake3()
98-
while True:
99-
if interrupt_check is not None and interrupt_check():
100-
return None
101-
chunk = file_obj.read(chunk_size)
102-
if not chunk:
103-
break
104-
h.update(chunk)
105-
return h.hexdigest()
106-
finally:
107-
if seekable and orig_pos is not None:
108-
file_obj.seek(orig_pos)

0 commit comments

Comments
 (0)