Skip to content

Commit 4c1d87e

Browse files
refactor(assets): defer hashing to background seeder enrich phase
ingest_existing_file() now only inserts a stub record (hash=NULL) for instant UX visibility. After registering outputs, triggers asset_seeder.start_enrich() to compute hashes in the background. This avoids blocking the prompt worker thread on hash computation. Amp-Thread-ID: https://ampcode.com/threads/T-019cc013-1444-73c8-81d6-07cae6e5e38d Co-authored-by: Amp <amp@ampcode.com>
1 parent 5ac207e commit 4c1d87e

File tree

2 files changed

+7
-25
lines changed

2 files changed

+7
-25
lines changed

app/assets/services/ingest.py

Lines changed: 5 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -134,21 +134,19 @@ def ingest_existing_file(
134134
abs_path: str,
135135
user_metadata: UserMetadata = None,
136136
extra_tags: Sequence[str] = (),
137-
tag_origin: str = "automatic",
138137
owner_id: str = "",
139-
) -> IngestResult:
140-
"""Register an existing on-disk file as an asset.
138+
) -> None:
139+
"""Register an existing on-disk file as an asset stub.
141140
142-
Uses a two-phase approach: first inserts a stub record (hash=NULL) to
143-
unblock UX immediately, then computes the BLAKE3 hash and updates the
144-
asset with the full ingest pipeline (dedup, metadata, tags).
141+
Inserts a stub record (hash=NULL) for immediate UX visibility.
142+
The caller is responsible for triggering background enrichment
143+
(hash computation, metadata extraction) via the asset seeder.
145144
"""
146145
size_bytes, mtime_ns = get_size_and_mtime_ns(abs_path)
147146
mime_type = mimetypes.guess_type(abs_path, strict=False)[0]
148147
name, path_tags = get_name_and_tags_from_asset_path(abs_path)
149148
tags = list(dict.fromkeys(path_tags + list(extra_tags)))
150149

151-
# Phase 1: fast stub insert (hash=NULL) to make the asset visible immediately
152150
spec = {
153151
"abs_path": abs_path,
154152
"size_bytes": size_bytes,
@@ -164,23 +162,6 @@ def ingest_existing_file(
164162
batch_insert_seed_assets(session, [spec], owner_id=owner_id)
165163
session.commit()
166164

167-
# Phase 2: compute hash and run full ingest (dedup, metadata, tags)
168-
digest, _ = hashing.compute_blake3_hash(abs_path)
169-
asset_hash = "blake3:" + digest
170-
171-
return _ingest_file_from_path(
172-
abs_path=abs_path,
173-
asset_hash=asset_hash,
174-
size_bytes=size_bytes,
175-
mtime_ns=mtime_ns,
176-
mime_type=mime_type,
177-
info_name=name,
178-
user_metadata=user_metadata,
179-
tags=tags,
180-
tag_origin=tag_origin,
181-
owner_id=owner_id,
182-
)
183-
184165

185166
def _register_existing_asset(
186167
asset_hash: str,

main.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,8 @@ def prompt_worker(q, server_instance):
304304
try:
305305
e.execute(item[2], prompt_id, extra_data, item[4])
306306
if not asset_seeder.is_disabled():
307-
_register_execution_outputs(e.history_result, prompt_id)
307+
if _register_execution_outputs(e.history_result, prompt_id) > 0:
308+
asset_seeder.start_enrich(roots=("output",), compute_hashes=True)
308309
finally:
309310
if was_paused:
310311
asset_seeder.resume()

0 commit comments

Comments
 (0)