Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions apps/worker/app/services/document_agent/visual.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,8 @@ def purge_debug_visual_dirs(output_dir: str | None) -> None:
candidates.extend(root / name for name in _DEBUG_VISUAL_DIRS)

for path in candidates:
try:
if path.exists():
shutil.rmtree(path)
except Exception:
pass
if path.exists():
shutil.rmtree(path, ignore_errors=True)


@worker
Expand Down
5 changes: 1 addition & 4 deletions apps/worker/app/services/page_memory/_serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,4 @@ def write_top_level_artifacts(
if assets_by_page:
write_json(root / "assets.json", serialize_assets(assets_by_page))
else:
try:
(root / "assets.json").unlink()
except FileNotFoundError:
pass
(root / "assets.json").unlink(missing_ok=True)
4 changes: 1 addition & 3 deletions apps/worker/app/services/page_memory/memory_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -991,9 +991,7 @@ def _cleanup_page_memory_artifacts(output_dir: str) -> None:

def _remove_nested_doc_agent_trace(output_dir: str) -> None:
try:
(Path(output_dir) / "_doc_agent" / "trace.json").unlink()
except FileNotFoundError:
pass
(Path(output_dir) / "_doc_agent" / "trace.json").unlink(missing_ok=True)
except Exception as exc:
logger.debug("[page_memory] failed to remove nested doc-agent trace: {}", exc)

Expand Down
16 changes: 12 additions & 4 deletions apps/worker/app/services/page_memory/page_assets.py
Original file line number Diff line number Diff line change
Expand Up @@ -903,13 +903,21 @@ def _merge_table_html_files(

try:
Path(head_asset.html_path).unlink(missing_ok=True)
except Exception:
pass
except Exception as exc:
logger.debug(
"[page_assets] failed to remove merged table head html {}: {}",
head_asset.html_path,
exc,
)
if head_asset.image_path:
try:
Path(head_asset.image_path).unlink(missing_ok=True)
except Exception:
pass
except Exception as exc:
logger.debug(
"[page_assets] failed to remove merged table head image {}: {}",
head_asset.image_path,
exc,
)


__all__ = [
Expand Down
8 changes: 6 additions & 2 deletions apps/worker/app/services/page_memory/page_tagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,8 +391,12 @@ def _tag_vlm_titles(
prominence = None
try:
prominence = float(item.get("prominence", 0.5))
except (TypeError, ValueError):
pass
except (TypeError, ValueError) as exc:
logger.debug(
"[page_tagger] ignored non-numeric title prominence {}: {}",
item.get("prominence"),
exc,
)
observed.append({
"text": text,
"prominence": prominence,
Expand Down
2 changes: 1 addition & 1 deletion apps/worker/experiments/chart_asset_probe.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,7 +460,7 @@ def _write_report(out_dir: Path, pdf_path: Path, args: Any,
"## How to read",
"- `asset_annotate/page_N.png`: red = new VLM boxes, green = reference boxes from existing chunks.",
"- Judge VLM by: does the red box tightly enclose the table/chart "
"(incl. caption, excl. body text)? Compare against the green reference.",
+ "(incl. caption, excl. body text)? Compare against the green reference.",
"- `crops/`: the actual extracted assets to feed a table model next.",
]
(out_dir / "report.md").write_text("\n".join(lines), encoding="utf-8")
Expand Down
7 changes: 4 additions & 3 deletions packages/shared-python/shared/services/ai/summary/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,6 @@ def _call_llm(
logger.warning("[summary] LLM call failed for {}: {}", usage_task, exc)
if budget is not None:
budget.refund(budget_pool, est=est, stage=budget_stage)
budget = None
return None
return None

Expand All @@ -202,7 +201,8 @@ def summarize(
asset_title_hint: str = ...,
prompt_task: str | None = ...,
prompt_paras: dict[str, Any] | None = ...,
) -> AssetSummary: ...
) -> AssetSummary:
pass


@overload
Expand All @@ -221,7 +221,8 @@ def summarize(
asset_title_hint: str = ...,
prompt_task: str | None = ...,
prompt_paras: dict[str, Any] | None = ...,
) -> BodySummary: ...
) -> BodySummary:
pass


def summarize(
Expand Down
Loading