diff --git a/agents/figure_agent.py b/agents/figure_agent.py
index 49b9ce5..7f6db5c 100644
--- a/agents/figure_agent.py
+++ b/agents/figure_agent.py
@@ -129,7 +129,14 @@ def render_metric_figure_artifacts(
)
used_fallback = False
try:
- subprocess.run([RUNTIME_PYTHON, str(script)], check=True, cwd=str(out_svg.parent), timeout=120)
+ subprocess.run(
+ [RUNTIME_PYTHON, str(script)],
+ check=True,
+ cwd=str(out_svg.parent),
+ timeout=120,
+ capture_output=True,
+ text=True,
+ )
except (subprocess.CalledProcessError, FileNotFoundError, subprocess.TimeoutExpired):
used_fallback = True
out_svg.write_text(
diff --git a/agents/validation_loop.py b/agents/validation_loop.py
index f919568..bfb9c2e 100644
--- a/agents/validation_loop.py
+++ b/agents/validation_loop.py
@@ -22,6 +22,7 @@
from agents import codex_executor
from agents import experiment_supervisor
+from agents import visualization_agent
from agents.workspace_layout import ensure_run_workspace, plan_file_path, promote_canonical_run, write_latest_status
from contracts import DeepInsightSpec, ExperimentIterationPacket, ExperimentSpec
from config import (
@@ -511,6 +512,77 @@ def _record_artifact(
)
+def _generate_validation_figures(
+ run_id: int,
+ workdir: Path,
+ *,
+ insight: dict,
+ metric_name: str,
+ baseline_metric_value: float | None,
+ best_metric_value: float | None = None,
+ verdict: str | None = None,
+ summary_path: Path | None = None,
+) -> list[dict]:
+ """Generate validation-loop figure artifacts for a completed run."""
+ try:
+ bundle = visualization_agent.generate_visualization_bundle(
+ run_id=run_id,
+ workdir=workdir,
+ insight=insight,
+ metric_name=metric_name,
+ baseline_metric_value=baseline_metric_value,
+ best_metric_value=best_metric_value,
+ verdict=verdict,
+ summary_path=summary_path,
+ )
+ except Exception as exc:
+ print(f"[LOOP] Figure generation skipped for run {run_id}: {exc}", flush=True)
+ return []
+
+ assets = [dict(asset) for asset in bundle.get("assets") or [] if isinstance(asset, dict)]
+ for asset in assets:
+ path = Path(str(asset.get("path") or ""))
+ if not path.exists():
+ continue
+ asset_kind = str(asset.get("asset_kind") or "")
+ artifact_type = "plot" if asset_kind in {"svg", "pdf", "png", "jpg", "jpeg"} else "source_data"
+ try:
+ _record_artifact(
+ run_id,
+ artifact_type,
+ path,
+ metric_key=asset.get("metric_name") or metric_name,
+ metadata={
+ "figure_id": asset.get("figure_id"),
+ "figure_kind": asset.get("figure_kind"),
+ "asset_kind": asset_kind,
+ "caption": asset.get("caption"),
+ "source": asset.get("source"),
+ **(asset.get("metadata") if isinstance(asset.get("metadata"), dict) else {}),
+ },
+ )
+ except Exception as exc:
+ print(f"[LOOP] Figure artifact registration skipped for {path}: {exc}", flush=True)
+
+ for key, contract_type in (
+ ("manifest_path", "ValidationFigureManifest"),
+ ("references_path", "ValidationFigureReferences"),
+ ):
+ raw_path = str(bundle.get(key) or "").strip()
+ if raw_path and Path(raw_path).exists():
+ try:
+ _record_artifact(
+ run_id,
+ "source_data",
+ Path(raw_path),
+ metric_key=metric_name,
+ metadata={"contract_type": contract_type},
+ )
+ except Exception as exc:
+ print(f"[LOOP] Figure manifest registration skipped for {raw_path}: {exc}", flush=True)
+ return assets
+
+
def _read_experiment_spec(
run: dict,
insight: dict,
@@ -1234,24 +1306,32 @@ def run_validation_loop(run_id: int, execution_context: dict | None = None) -> d
promote_canonical_run(insight_id, run_id, insight=insight)
summary_path = workdir / "results" / "validation_summary.json"
- summary_path.write_text(
- json.dumps(
- {
- "run_id": run_id,
- "verdict": verdict,
- "baseline": baseline,
- "best_value": best_value,
- "effect_size": effect,
- "effect_pct": effect_pct,
- "iterations_total": iter_num,
- "iterations_kept": total_kept,
- "environment_report": environment_report,
- "stop_reason": stop_reason,
- },
- indent=2,
- ),
- encoding="utf-8",
+ summary_payload = {
+ "run_id": run_id,
+ "verdict": verdict,
+ "baseline": baseline,
+ "best_value": best_value,
+ "effect_size": effect,
+ "effect_pct": effect_pct,
+ "iterations_total": iter_num,
+ "iterations_kept": total_kept,
+ "environment_report": environment_report,
+ "stop_reason": stop_reason,
+ }
+ summary_path.write_text(json.dumps(summary_payload, indent=2), encoding="utf-8")
+ figure_assets = _generate_validation_figures(
+ run_id,
+ workdir,
+ insight=insight,
+ metric_name=metric_name,
+ baseline_metric_value=baseline,
+ best_metric_value=best_value,
+ verdict=verdict,
+ summary_path=summary_path,
)
+ if figure_assets:
+ summary_payload["figure_artifacts"] = figure_assets
+ summary_path.write_text(json.dumps(summary_payload, indent=2), encoding="utf-8")
_record_artifact(
run_id,
"source_data",
@@ -1274,6 +1354,7 @@ def run_validation_loop(run_id: int, execution_context: dict | None = None) -> d
"iterations_total": iter_num,
"iterations_kept": total_kept,
"summary_path": str(summary_path),
+ "figure_artifacts": figure_assets,
},
run_id=run_id,
insight=insight,
@@ -1294,4 +1375,5 @@ def run_validation_loop(run_id: int, execution_context: dict | None = None) -> d
"total_seconds": total_time,
"environment_report": environment_report,
"stop_reason": stop_reason,
+ "figure_artifacts": figure_assets,
}
diff --git a/agents/visualization_agent.py b/agents/visualization_agent.py
new file mode 100644
index 0000000..16ddc5a
--- /dev/null
+++ b/agents/visualization_agent.py
@@ -0,0 +1,846 @@
+"""Automated research visualization pipeline for SciForge runs.
+
+This module turns a completed validation run into manuscript-ready figure
+artifacts. It deliberately uses deterministic renderers first: matplotlib for
+numeric plots and simple diagrams-as-code for conceptual figures. That keeps
+the validation loop stable in offline and CI environments while still leaving
+DOT sidecars that can be refined by downstream manuscript agents.
+"""
+
+from __future__ import annotations
+
+import html
+import json
+import math
+import re
+import subprocess
+import textwrap
+from pathlib import Path
+from typing import Any
+
+from agents import figure_agent
+from config import RUNTIME_PYTHON
+from db import database as db
+
+
+MAX_LABEL_CHARS = 52
+MAX_KG_EDGES = 14
+MAX_RESULT_ROWS = 8
+
+
+def _json_load(value: Any, default: Any) -> Any:
+ if value is None:
+ return default
+ if isinstance(value, (dict, list)):
+ return value
+ try:
+ return json.loads(value)
+ except (json.JSONDecodeError, TypeError):
+ return default
+
+
+def _as_list(value: Any) -> list:
+ loaded = _json_load(value, [])
+ if isinstance(loaded, list):
+ return loaded
+ if loaded in (None, ""):
+ return []
+ return [loaded]
+
+
+def _as_dict(value: Any) -> dict[str, Any]:
+ loaded = _json_load(value, {})
+ return loaded if isinstance(loaded, dict) else {}
+
+
+def _shorten(value: Any, limit: int = MAX_LABEL_CHARS) -> str:
+ text = re.sub(r"\s+", " ", str(value or "")).strip()
+ if len(text) <= limit:
+ return text
+ return text[: max(0, limit - 1)].rstrip() + "..."
+
+
+def _escape(value: Any) -> str:
+ return html.escape(str(value or ""), quote=True)
+
+
+def _wrap_svg_lines(text: Any, width: int = 34, max_lines: int = 3) -> list[str]:
+ raw = _shorten(text, width * max_lines)
+ lines = textwrap.wrap(raw, width=width) or [""]
+ return lines[:max_lines]
+
+
+def _asset(
+ *,
+ figure_id: str,
+ path: Path,
+ asset_kind: str,
+ figure_kind: str,
+ caption: str,
+ source: str,
+ metric_name: str | None = None,
+ metadata: dict[str, Any] | None = None,
+) -> dict[str, Any]:
+ return {
+ "figure_id": figure_id,
+ "figure_kind": figure_kind,
+ "asset_kind": asset_kind,
+ "path": str(path),
+ "caption": caption,
+ "source": source,
+ "metric_name": metric_name or "",
+ "metadata": metadata or {},
+ }
+
+
+def _write_text_block_svg(
+ path: Path,
+ *,
+ title: str,
+ boxes: list[dict[str, Any]],
+ arrows: list[tuple[int, int]] | None = None,
+ width: int = 980,
+ height: int = 520,
+) -> None:
+ path.parent.mkdir(parents=True, exist_ok=True)
+ arrows = arrows or []
+ box_svg: list[str] = []
+ for idx, box in enumerate(boxes):
+ x = int(box.get("x", 40))
+ y = int(box.get("y", 90))
+ w = int(box.get("w", 250))
+ h = int(box.get("h", 110))
+ fill = str(box.get("fill") or "#f8fafc")
+ stroke = str(box.get("stroke") or "#334155")
+ label = str(box.get("label") or f"Box {idx + 1}")
+ body = str(box.get("body") or "")
+ lines = _wrap_svg_lines(body, width=max(18, int(w / 9)), max_lines=4)
+ text_parts = [
+ f'{_escape(label)}'
+ ]
+ for line_idx, line in enumerate(lines):
+ text_parts.append(
+ f'{_escape(line)}'
+ )
+ box_svg.append(
+ "\n".join(
+ [
+ f'',
+ *text_parts,
+ ]
+ )
+ )
+ arrow_svg: list[str] = []
+ for src, dst in arrows:
+ if src >= len(boxes) or dst >= len(boxes):
+ continue
+ a = boxes[src]
+ b = boxes[dst]
+ x1 = int(a.get("x", 0)) + int(a.get("w", 0))
+ y1 = int(a.get("y", 0)) + int(a.get("h", 0)) // 2
+ x2 = int(b.get("x", 0))
+ y2 = int(b.get("y", 0)) + int(b.get("h", 0)) // 2
+ arrow_svg.append(
+ f''
+ )
+ svg = f"""
+"""
+ path.write_text(svg, encoding="utf-8")
+
+
+def _write_dot(path: Path, *, title: str, nodes: list[tuple[str, str]], edges: list[tuple[str, str, str]]) -> None:
+ path.parent.mkdir(parents=True, exist_ok=True)
+ lines = [
+ "digraph G {",
+ " graph [rankdir=LR, bgcolor=white, labelloc=t, fontsize=20, fontname=Helvetica];",
+ f" label={json.dumps(title)};",
+ " node [shape=box, style=\"rounded,filled\", fillcolor=\"#f8fafc\", color=\"#334155\", fontname=Helvetica];",
+ " edge [color=\"#475569\", fontname=Helvetica];",
+ ]
+ for node_id, label in nodes:
+ lines.append(f" {node_id} [label={json.dumps(_shorten(label, 80))}];")
+ for src, dst, label in edges:
+ edge_label = f" [label={json.dumps(_shorten(label, 40))}]" if label else ""
+ lines.append(f" {src} -> {dst}{edge_label};")
+ lines.append("}")
+ path.write_text("\n".join(lines) + "\n", encoding="utf-8")
+
+
+def _write_bar_script(
+ script_path: Path,
+ *,
+ labels: list[str],
+ values: list[float],
+ title: str,
+ ylabel: str,
+ out_svg: Path,
+ out_pdf: Path,
+) -> None:
+ script_path.write_text(
+ f"""
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+
+labels = {labels!r}
+values = {values!r}
+colors = ["#64748b", "#2563eb", "#0f766e", "#b45309", "#7c3aed", "#be123c", "#0369a1", "#4d7c0f"]
+fig, ax = plt.subplots(figsize=(7.6, 4.4))
+bars = ax.bar(range(len(values)), values, color=colors[:len(values)], width=0.62)
+ax.set_title({title!r}, fontsize=14, pad=10)
+ax.set_ylabel({ylabel!r}, fontsize=11)
+ax.set_xticks(range(len(labels)))
+ax.set_xticklabels(labels, rotation=18, ha="right", fontsize=9)
+ax.grid(axis="y", alpha=0.22)
+ax.spines["top"].set_visible(False)
+ax.spines["right"].set_visible(False)
+for bar, value in zip(bars, values):
+ height = bar.get_height()
+ ax.annotate(f"{{value:.4g}}", xy=(bar.get_x() + bar.get_width() / 2, height),
+ xytext=(0, 4), textcoords="offset points", ha="center", va="bottom", fontsize=9)
+fig.tight_layout()
+fig.savefig({str(out_svg)!r}, format="svg")
+fig.savefig({str(out_pdf)!r}, format="pdf")
+plt.close(fig)
+""".strip()
+ + "\n",
+ encoding="utf-8",
+ )
+
+
+def _write_fallback_bar_svg(path: Path, *, labels: list[str], values: list[float], title: str, ylabel: str) -> None:
+ path.parent.mkdir(parents=True, exist_ok=True)
+ width = 820
+ height = 460
+ plot_x = 90
+ plot_y = 90
+ plot_w = 650
+ plot_h = 260
+ max_value = max([abs(v) for v in values] or [1.0]) or 1.0
+ bar_w = max(28, int(plot_w / max(1, len(values)) * 0.55))
+ chunks = [
+ f'")
+ path.write_text("\n".join(chunks) + "\n", encoding="utf-8")
+
+
+def _render_bar_chart(
+ *,
+ labels: list[str],
+ values: list[float],
+ title: str,
+ ylabel: str,
+ out_svg: Path,
+) -> dict[str, str | bool]:
+ out_svg.parent.mkdir(parents=True, exist_ok=True)
+ out_pdf = out_svg.with_suffix(".pdf")
+ script_path = out_svg.with_suffix(".py")
+ _write_bar_script(
+ script_path,
+ labels=labels,
+ values=values,
+ title=title,
+ ylabel=ylabel,
+ out_svg=out_svg,
+ out_pdf=out_pdf,
+ )
+ used_fallback = False
+ try:
+ subprocess.run(
+ [RUNTIME_PYTHON, str(script_path)],
+ cwd=str(out_svg.parent),
+ timeout=120,
+ check=True,
+ capture_output=True,
+ text=True,
+ )
+ except (subprocess.CalledProcessError, FileNotFoundError, subprocess.TimeoutExpired):
+ used_fallback = True
+ _write_fallback_bar_svg(out_svg, labels=labels, values=values, title=title, ylabel=ylabel)
+ if out_pdf.exists():
+ out_pdf.unlink()
+ return {
+ "svg_path": str(out_svg),
+ "pdf_path": str(out_pdf) if out_pdf.exists() else "",
+ "code_path": str(script_path),
+ "used_fallback": used_fallback,
+ }
+
+
+def _fetch_iterations(run_id: int) -> list[dict[str, Any]]:
+ rows = db.fetchall(
+ """
+ SELECT iteration_number, phase, metric_value, metric_name, status, description
+ FROM experiment_iterations
+ WHERE run_id=?
+ ORDER BY iteration_number
+ """,
+ (run_id,),
+ )
+ return [dict(row) for row in rows]
+
+
+def _fetch_kg_relations(insight: dict[str, Any]) -> list[dict[str, Any]]:
+ source_node_ids = [str(x) for x in _as_list(insight.get("source_node_ids")) if str(x).strip()]
+ source_paper_ids = [str(x) for x in _as_list(insight.get("source_paper_ids")) if str(x).strip()]
+ filters: list[str] = []
+ params: list[Any] = []
+ if source_node_ids:
+ filters.append("gr.node_id IN ({})".format(",".join("?" for _ in source_node_ids)))
+ params.extend(source_node_ids)
+ if source_paper_ids:
+ filters.append("gr.paper_id IN ({})".format(",".join("?" for _ in source_paper_ids)))
+ params.extend(source_paper_ids)
+ where = "WHERE " + " OR ".join(filters) if filters else ""
+ sql = f"""
+ SELECT
+ gr.node_id,
+ gr.predicate,
+ gr.confidence,
+ subj.canonical_name AS subject_name,
+ obj.canonical_name AS object_name,
+ subj.entity_type AS subject_type,
+ obj.entity_type AS object_type
+ FROM graph_relations gr
+ JOIN graph_entities subj ON subj.id = gr.subject_entity_id
+ JOIN graph_entities obj ON obj.id = gr.object_entity_id
+ {where}
+ ORDER BY gr.confidence DESC, gr.id DESC
+ LIMIT {MAX_KG_EDGES}
+ """
+ try:
+ return [dict(row) for row in db.fetchall(sql, tuple(params))]
+ except Exception:
+ return []
+
+
+def _fetch_literature_results(insight: dict[str, Any]) -> list[dict[str, Any]]:
+ source_node_ids = [str(x) for x in _as_list(insight.get("source_node_ids")) if str(x).strip()]
+ source_paper_ids = [str(x) for x in _as_list(insight.get("source_paper_ids")) if str(x).strip()]
+ filters: list[str] = []
+ params: list[Any] = []
+ if source_node_ids:
+ filters.append("node_id IN ({})".format(",".join("?" for _ in source_node_ids)))
+ params.extend(source_node_ids)
+ if source_paper_ids:
+ filters.append("paper_id IN ({})".format(",".join("?" for _ in source_paper_ids)))
+ params.extend(source_paper_ids)
+ where = "WHERE metric_value IS NOT NULL"
+ if filters:
+ where += " AND (" + " OR ".join(filters) + ")"
+ sql = f"""
+ SELECT method_name, dataset_name, metric_name, metric_value
+ FROM results
+ {where}
+ ORDER BY metric_value DESC
+ LIMIT {MAX_RESULT_ROWS}
+ """
+ try:
+ return [dict(row) for row in db.fetchall(sql, tuple(params))]
+ except Exception:
+ return []
+
+
+def _method_name(insight: dict[str, Any]) -> str:
+ method = _as_dict(insight.get("proposed_method"))
+ return str(method.get("name") or insight.get("title") or "Proposed method")
+
+
+def _experimental_metric_name(insight: dict[str, Any], fallback: str) -> str:
+ plan = _as_dict(insight.get("experimental_plan"))
+ metrics = plan.get("metrics")
+ if isinstance(metrics, dict):
+ return str(metrics.get("primary") or metrics.get("name") or fallback)
+ if isinstance(metrics, list) and metrics:
+ first = metrics[0]
+ if isinstance(first, dict):
+ return str(first.get("name") or fallback)
+ return str(first or fallback)
+ return fallback
+
+
+def _generate_metric_trajectory(
+ *,
+ iterations: list[dict[str, Any]],
+ baseline_metric_value: float | None,
+ metric_name: str,
+ figures_dir: Path,
+) -> list[dict[str, Any]]:
+ out_svg = figures_dir / "fig_metric_trajectory.svg"
+ meta = figure_agent.generate_metric_figure_with_retry(
+ iterations,
+ baseline_metric_value,
+ metric_name,
+ out_svg,
+ title=f"{metric_name} trajectory",
+ objective="Show validation-loop metric trajectory against the reproduced baseline.",
+ )
+ caption = f"Validation trajectory for {metric_name}, with the reproduced baseline shown as reference."
+ assets: list[dict[str, Any]] = []
+ for key, asset_kind in (("svg_path", "svg"), ("pdf_path", "pdf"), ("code_path", "source")):
+ raw = str(meta.get(key) or "").strip()
+ if raw and Path(raw).exists():
+ assets.append(
+ _asset(
+ figure_id="fig_metric_trajectory",
+ figure_kind="metric_trajectory",
+ asset_kind=asset_kind,
+ path=Path(raw),
+ caption=caption,
+ source="experiment_iterations",
+ metric_name=metric_name,
+ metadata={
+ "score": meta.get("score"),
+ "notes": meta.get("notes"),
+ "attempts": meta.get("attempts"),
+ "used_fallback": meta.get("used_fallback"),
+ },
+ )
+ )
+ return assets
+
+
+def _generate_baseline_comparison(
+ *,
+ baseline_metric_value: float | None,
+ best_metric_value: float | None,
+ metric_name: str,
+ figures_dir: Path,
+) -> list[dict[str, Any]]:
+ if baseline_metric_value is None or best_metric_value is None:
+ return []
+ labels = ["baseline", "best proposed"]
+ values = [float(baseline_metric_value), float(best_metric_value)]
+ meta = _render_bar_chart(
+ labels=labels,
+ values=values,
+ title=f"Baseline vs proposed ({metric_name})",
+ ylabel=metric_name,
+ out_svg=figures_dir / "fig_baseline_vs_proposed.svg",
+ )
+ caption = f"Baseline versus best proposed validation result for {metric_name}."
+ assets: list[dict[str, Any]] = []
+ for key, asset_kind in (("svg_path", "svg"), ("pdf_path", "pdf"), ("code_path", "source")):
+ raw = str(meta.get(key) or "").strip()
+ if raw and Path(raw).exists():
+ assets.append(
+ _asset(
+ figure_id="fig_baseline_vs_proposed",
+ figure_kind="experiment_comparison",
+ asset_kind=asset_kind,
+ path=Path(raw),
+ caption=caption,
+ source="experiment_runs",
+ metric_name=metric_name,
+ metadata={"used_fallback": meta.get("used_fallback")},
+ )
+ )
+ return assets
+
+
+def _generate_literature_results_chart(
+ *,
+ rows: list[dict[str, Any]],
+ figures_dir: Path,
+) -> list[dict[str, Any]]:
+ if not rows:
+ return []
+ metric_name = str(rows[0].get("metric_name") or "metric")
+ labels = [
+ _shorten(f"{row.get('method_name') or 'method'} / {row.get('dataset_name') or 'dataset'}", 32)
+ for row in rows
+ ]
+ values = [float(row.get("metric_value") or 0.0) for row in rows]
+ meta = _render_bar_chart(
+ labels=labels,
+ values=values,
+ title=f"Knowledge graph result snapshot ({metric_name})",
+ ylabel=metric_name,
+ out_svg=figures_dir / "fig_literature_results.svg",
+ )
+ caption = "Top extracted literature results from the DeepGraph results table for this insight context."
+ assets: list[dict[str, Any]] = []
+ for key, asset_kind in (("svg_path", "svg"), ("pdf_path", "pdf"), ("code_path", "source")):
+ raw = str(meta.get(key) or "").strip()
+ if raw and Path(raw).exists():
+ assets.append(
+ _asset(
+ figure_id="fig_literature_results",
+ figure_kind="literature_result_chart",
+ asset_kind=asset_kind,
+ path=Path(raw),
+ caption=caption,
+ source="results",
+ metric_name=metric_name,
+ metadata={"row_count": len(rows), "used_fallback": meta.get("used_fallback")},
+ )
+ )
+ return assets
+
+
+def _generate_overview_diagram(
+ *,
+ insight: dict[str, Any],
+ verdict: str | None,
+ figures_dir: Path,
+) -> list[dict[str, Any]]:
+ method = _as_dict(insight.get("proposed_method"))
+ plan = _as_dict(insight.get("experimental_plan"))
+ metrics = plan.get("metrics") if isinstance(plan.get("metrics"), dict) else {}
+ out_svg = figures_dir / "fig_approach_overview.svg"
+ boxes = [
+ {
+ "x": 44,
+ "y": 112,
+ "w": 245,
+ "h": 145,
+ "label": "Current limitation",
+ "body": insight.get("existing_weakness") or insight.get("problem_statement") or "Existing method limitations",
+ "fill": "#fef2f2",
+ "stroke": "#b91c1c",
+ },
+ {
+ "x": 365,
+ "y": 112,
+ "w": 245,
+ "h": 145,
+ "label": "Proposed improvement",
+ "body": method.get("one_line") or method.get("definition") or _method_name(insight),
+ "fill": "#eff6ff",
+ "stroke": "#1d4ed8",
+ },
+ {
+ "x": 686,
+ "y": 112,
+ "w": 245,
+ "h": 145,
+ "label": "Validation evidence",
+ "body": f"Metric: {metrics.get('primary') or 'primary metric'}; verdict: {verdict or 'pending'}",
+ "fill": "#ecfdf5",
+ "stroke": "#047857",
+ },
+ ]
+ _write_text_block_svg(out_svg, title=_shorten(insight.get("title") or "Approach overview", 80), boxes=boxes, arrows=[(0, 1), (1, 2)])
+ out_dot = out_svg.with_suffix(".dot")
+ _write_dot(
+ out_dot,
+ title="Approach overview",
+ nodes=[("limitation", "Current limitation"), ("method", _method_name(insight)), ("evidence", "Validation evidence")],
+ edges=[("limitation", "method", "addresses"), ("method", "evidence", "validated by")],
+ )
+ caption = "Overview of the limitation, proposed method, and validation evidence produced by SciForge."
+ return [
+ _asset(
+ figure_id="fig_approach_overview",
+ figure_kind="overview_diagram",
+ asset_kind="svg",
+ path=out_svg,
+ caption=caption,
+ source="deep_insights",
+ ),
+ _asset(
+ figure_id="fig_approach_overview",
+ figure_kind="overview_diagram",
+ asset_kind="dot",
+ path=out_dot,
+ caption=caption,
+ source="deep_insights",
+ ),
+ ]
+
+
+def _generate_method_architecture_diagram(
+ *,
+ insight: dict[str, Any],
+ figures_dir: Path,
+) -> list[dict[str, Any]]:
+ method = _as_dict(insight.get("proposed_method"))
+ plan = _as_dict(insight.get("experimental_plan"))
+ baselines = _as_list(plan.get("baselines"))
+ datasets = _as_list(plan.get("datasets"))
+ method_label = method.get("name") or insight.get("title") or "Proposed method"
+ out_svg = figures_dir / "fig_method_architecture.svg"
+ boxes = [
+ {
+ "x": 44,
+ "y": 112,
+ "w": 220,
+ "h": 130,
+ "label": "Inputs",
+ "body": ", ".join(_shorten(item.get("name") if isinstance(item, dict) else item, 24) for item in datasets[:3]) or "Experiment data",
+ "fill": "#f8fafc",
+ "stroke": "#334155",
+ },
+ {
+ "x": 332,
+ "y": 92,
+ "w": 300,
+ "h": 170,
+ "label": _shorten(method_label, 28),
+ "body": method.get("definition") or method.get("one_line") or "Structured proposed method",
+ "fill": "#eff6ff",
+ "stroke": "#2563eb",
+ },
+ {
+ "x": 704,
+ "y": 112,
+ "w": 220,
+ "h": 130,
+ "label": "Evaluation",
+ "body": f"Baselines: {len(baselines)}; metric: {_experimental_metric_name(insight, 'metric')}",
+ "fill": "#f0fdf4",
+ "stroke": "#15803d",
+ },
+ ]
+ _write_text_block_svg(out_svg, title="Method architecture", boxes=boxes, arrows=[(0, 1), (1, 2)])
+ out_dot = out_svg.with_suffix(".dot")
+ _write_dot(
+ out_dot,
+ title="Method architecture",
+ nodes=[("inputs", "Inputs"), ("method", method_label), ("eval", "Evaluation")],
+ edges=[("inputs", "method", "feeds"), ("method", "eval", "measured by")],
+ )
+ caption = "Architecture-style diagram derived from the structured proposed method and experimental plan."
+ return [
+ _asset(
+ figure_id="fig_method_architecture",
+ figure_kind="method_architecture",
+ asset_kind="svg",
+ path=out_svg,
+ caption=caption,
+ source="deep_insights.proposed_method",
+ ),
+ _asset(
+ figure_id="fig_method_architecture",
+ figure_kind="method_architecture",
+ asset_kind="dot",
+ path=out_dot,
+ caption=caption,
+ source="deep_insights.proposed_method",
+ ),
+ ]
+
+
+def _generate_kg_subgraph_diagram(
+ *,
+ relations: list[dict[str, Any]],
+ figures_dir: Path,
+) -> list[dict[str, Any]]:
+ if not relations:
+ return []
+ nodes: dict[str, tuple[float, float]] = {}
+ labels: dict[str, str] = {}
+ edges: list[tuple[str, str, str]] = []
+ for row in relations[:MAX_KG_EDGES]:
+ subject = _shorten(row.get("subject_name") or "subject", 30)
+ obj = _shorten(row.get("object_name") or "object", 30)
+ predicate = _shorten(row.get("predicate") or "relates_to", 28)
+ labels.setdefault(subject, subject)
+ labels.setdefault(obj, obj)
+ edges.append((subject, obj, predicate))
+ node_names = sorted(labels)
+ width = 980
+ height = 620
+ cx, cy = width / 2, height / 2 + 24
+ radius = min(260, max(150, 34 * len(node_names)))
+ for idx, name in enumerate(node_names):
+ angle = 2 * math.pi * idx / max(1, len(node_names)) - math.pi / 2
+ nodes[name] = (cx + radius * math.cos(angle), cy + radius * math.sin(angle))
+ out_svg = figures_dir / "fig_knowledge_subgraph.svg"
+ out_svg.parent.mkdir(parents=True, exist_ok=True)
+ chunks = [
+ f'")
+ out_svg.write_text("\n".join(chunks) + "\n", encoding="utf-8")
+ out_dot = out_svg.with_suffix(".dot")
+ dot_ids = {name: f"n{idx}" for idx, name in enumerate(node_names)}
+ _write_dot(
+ out_dot,
+ title="Knowledge graph subgraph",
+ nodes=[(dot_ids[name], name) for name in node_names],
+ edges=[(dot_ids[src], dot_ids[dst], pred) for src, dst, pred in edges],
+ )
+ caption = "Entity-relation subgraph extracted from DeepGraph knowledge graph context for this insight."
+ return [
+ _asset(
+ figure_id="fig_knowledge_subgraph",
+ figure_kind="knowledge_graph_subgraph",
+ asset_kind="svg",
+ path=out_svg,
+ caption=caption,
+ source="graph_relations",
+ metadata={"edge_count": len(edges), "node_count": len(node_names)},
+ ),
+ _asset(
+ figure_id="fig_knowledge_subgraph",
+ figure_kind="knowledge_graph_subgraph",
+ asset_kind="dot",
+ path=out_dot,
+ caption=caption,
+ source="graph_relations",
+ metadata={"edge_count": len(edges), "node_count": len(node_names)},
+ ),
+ ]
+
+
+def write_figure_references(workdir: Path, assets: list[dict[str, Any]]) -> str:
+ """Reference generated figures in final_report.md when it exists.
+
+ If no final report exists in the experiment workspace, write a standalone
+ figure reference file so downstream manuscript agents can still consume the
+ visual inventory.
+ """
+ logical: dict[str, dict[str, Any]] = {}
+ for asset in assets:
+ if asset.get("asset_kind") != "svg":
+ continue
+ logical.setdefault(str(asset["figure_id"]), asset)
+ lines = ["## Generated Figures", ""]
+ for asset in logical.values():
+ rel_path = Path(str(asset["path"]))
+ try:
+ rel = rel_path.relative_to(workdir)
+ except ValueError:
+ rel = rel_path
+ lines.append(f"- `{asset['figure_id']}`: {asset.get('caption') or ''} (`{rel}`)")
+ block = "\n".join(lines).strip() + "\n"
+ marker_start = ""
+ marker_end = ""
+ wrapped = f"\n{marker_start}\n{block}{marker_end}\n"
+ for candidate in (workdir / "final_report.md", workdir / "results" / "final_report.md"):
+ if not candidate.exists():
+ continue
+ text = candidate.read_text(encoding="utf-8", errors="replace")
+ if marker_start in text and marker_end in text:
+ before, rest = text.split(marker_start, 1)
+ _, after = rest.split(marker_end, 1)
+ candidate.write_text(before.rstrip() + wrapped + after.lstrip(), encoding="utf-8")
+ else:
+ candidate.write_text(text.rstrip() + "\n" + wrapped, encoding="utf-8")
+ return str(candidate)
+ ref_path = workdir / "figures" / "figure_references.md"
+ ref_path.parent.mkdir(parents=True, exist_ok=True)
+ ref_path.write_text(block, encoding="utf-8")
+ return str(ref_path)
+
+
+def generate_visualization_bundle(
+ *,
+ run_id: int,
+ workdir: Path,
+ insight: dict[str, Any],
+ metric_name: str,
+ baseline_metric_value: float | None,
+ best_metric_value: float | None = None,
+ verdict: str | None = None,
+ summary_path: Path | None = None,
+) -> dict[str, Any]:
+ """Generate all SciForge visualization artifacts for a completed run."""
+ workdir = Path(workdir)
+ figures_dir = workdir / "figures"
+ figures_dir.mkdir(parents=True, exist_ok=True)
+ iterations = _fetch_iterations(run_id)
+ assets: list[dict[str, Any]] = []
+ skipped: list[dict[str, str]] = []
+
+ generators = (
+ ("overview_diagram", lambda: _generate_overview_diagram(insight=insight, verdict=verdict, figures_dir=figures_dir)),
+ ("method_architecture", lambda: _generate_method_architecture_diagram(insight=insight, figures_dir=figures_dir)),
+ (
+ "metric_trajectory",
+ lambda: _generate_metric_trajectory(
+ iterations=iterations,
+ baseline_metric_value=baseline_metric_value,
+ metric_name=metric_name,
+ figures_dir=figures_dir,
+ ),
+ ),
+ (
+ "baseline_comparison",
+ lambda: _generate_baseline_comparison(
+ baseline_metric_value=baseline_metric_value,
+ best_metric_value=best_metric_value,
+ metric_name=metric_name,
+ figures_dir=figures_dir,
+ ),
+ ),
+ ("literature_results", lambda: _generate_literature_results_chart(rows=_fetch_literature_results(insight), figures_dir=figures_dir)),
+ ("knowledge_graph_subgraph", lambda: _generate_kg_subgraph_diagram(relations=_fetch_kg_relations(insight), figures_dir=figures_dir)),
+ )
+
+ for name, build in generators:
+ try:
+ produced = build()
+ if produced:
+ assets.extend(produced)
+ else:
+ skipped.append({"figure_kind": name, "reason": "no_source_data"})
+ except Exception as exc:
+ skipped.append({"figure_kind": name, "reason": str(exc)[:240]})
+
+ references_path = write_figure_references(workdir, assets) if assets else ""
+ manifest = {
+ "run_id": run_id,
+ "deep_insight_id": insight.get("id"),
+ "metric_name": metric_name,
+ "baseline_metric_value": baseline_metric_value,
+ "best_metric_value": best_metric_value,
+ "verdict": verdict,
+ "validation_summary": str(summary_path) if summary_path else "",
+ "assets": assets,
+ "skipped": skipped,
+ "references_path": references_path,
+ }
+ manifest_path = figures_dir / "figure_manifest.json"
+ manifest_path.write_text(json.dumps(manifest, indent=2, default=str), encoding="utf-8")
+ return {
+ **manifest,
+ "manifest_path": str(manifest_path),
+ "figures_dir": str(figures_dir),
+ }
diff --git a/tests/test_validation_loop.py b/tests/test_validation_loop.py
index f55dcb9..a809e68 100644
--- a/tests/test_validation_loop.py
+++ b/tests/test_validation_loop.py
@@ -33,6 +33,149 @@ def test_find_train_file_prefers_nested_proxy_path(self):
self.assertEqual(resolved, target)
+ def test_generate_validation_figures_records_plot_artifacts(self):
+ with tempfile.TemporaryDirectory() as tmpdir:
+ workdir = Path(tmpdir)
+ summary_path = workdir / "results" / "validation_summary.json"
+ summary_path.parent.mkdir(parents=True, exist_ok=True)
+ summary_path.write_text("{}", encoding="utf-8")
+
+ def fake_generate(**kwargs):
+ figures_dir = kwargs["workdir"] / "figures"
+ figures_dir.mkdir(parents=True, exist_ok=True)
+ out_svg = figures_dir / "fig_metric_trajectory.svg"
+ out_svg.write_text("", encoding="utf-8")
+ out_pdf = figures_dir / "fig_metric_trajectory.pdf"
+ out_pdf.write_text("%PDF", encoding="utf-8")
+ code_path = figures_dir / "fig_metric_trajectory.py"
+ code_path.write_text("print('plot')\n", encoding="utf-8")
+ manifest_path = figures_dir / "figure_manifest.json"
+ manifest_path.write_text("{}", encoding="utf-8")
+ return {
+ "assets": [
+ {
+ "figure_id": "fig_metric_trajectory",
+ "figure_kind": "metric_trajectory",
+ "asset_kind": "svg",
+ "path": str(out_svg),
+ "caption": "Metric trajectory.",
+ "source": "experiment_iterations",
+ "metric_name": "accuracy",
+ },
+ {
+ "figure_id": "fig_metric_trajectory",
+ "figure_kind": "metric_trajectory",
+ "asset_kind": "pdf",
+ "path": str(out_pdf),
+ "caption": "Metric trajectory.",
+ "source": "experiment_iterations",
+ "metric_name": "accuracy",
+ },
+ {
+ "figure_id": "fig_metric_trajectory",
+ "figure_kind": "metric_trajectory",
+ "asset_kind": "source",
+ "path": str(code_path),
+ "caption": "Metric trajectory.",
+ "source": "experiment_iterations",
+ "metric_name": "accuracy",
+ },
+ ],
+ "manifest_path": str(manifest_path),
+ "references_path": "",
+ }
+
+ with (
+ mock.patch.object(validation_loop.db, "execute") as execute,
+ mock.patch.object(
+ validation_loop.visualization_agent,
+ "generate_visualization_bundle",
+ side_effect=fake_generate,
+ ) as generate,
+ ):
+ assets = validation_loop._generate_validation_figures(
+ 7,
+ workdir,
+ insight={"id": 3, "title": "Insight"},
+ metric_name="accuracy",
+ baseline_metric_value=0.5,
+ best_metric_value=0.62,
+ verdict="confirmed",
+ summary_path=summary_path,
+ )
+
+ self.assertEqual(len(assets), 3)
+ self.assertTrue((workdir / "figures" / "figure_manifest.json").exists())
+ generate.assert_called_once()
+ artifact_types = [call.args[0].strip() for call in execute.call_args_list]
+ self.assertTrue(all("INSERT INTO experiment_artifacts" in sql for sql in artifact_types))
+ params = [call.args[1] for call in execute.call_args_list]
+ self.assertEqual([row[1] for row in params], ["plot", "plot", "source_data", "source_data"])
+
+ def test_generate_validation_figures_is_non_blocking_on_render_error(self):
+ with tempfile.TemporaryDirectory() as tmpdir:
+ workdir = Path(tmpdir)
+ with (
+ mock.patch.object(validation_loop.db, "execute") as execute,
+ mock.patch.object(
+ validation_loop.visualization_agent,
+ "generate_visualization_bundle",
+ side_effect=RuntimeError("renderer unavailable"),
+ ),
+ ):
+ assets = validation_loop._generate_validation_figures(
+ 8,
+ workdir,
+ insight={"id": 4, "title": "Insight"},
+ metric_name="accuracy",
+ baseline_metric_value=0.5,
+ )
+
+ self.assertEqual(assets, [])
+ execute.assert_not_called()
+
+ def test_generate_validation_figures_is_non_blocking_on_artifact_registration_error(self):
+ with tempfile.TemporaryDirectory() as tmpdir:
+ workdir = Path(tmpdir)
+ figures_dir = workdir / "figures"
+ figures_dir.mkdir(parents=True, exist_ok=True)
+ out_svg = figures_dir / "fig_metric_trajectory.svg"
+ out_svg.write_text("", encoding="utf-8")
+ manifest_path = figures_dir / "figure_manifest.json"
+ manifest_path.write_text("{}", encoding="utf-8")
+
+ with (
+ mock.patch.object(
+ validation_loop.visualization_agent,
+ "generate_visualization_bundle",
+ return_value={
+ "assets": [
+ {
+ "figure_id": "fig_metric_trajectory",
+ "figure_kind": "metric_trajectory",
+ "asset_kind": "svg",
+ "path": str(out_svg),
+ "caption": "Metric trajectory.",
+ "source": "experiment_iterations",
+ "metric_name": "accuracy",
+ }
+ ],
+ "manifest_path": str(manifest_path),
+ "references_path": "",
+ },
+ ),
+ mock.patch.object(validation_loop.db, "execute", side_effect=RuntimeError("db unavailable")),
+ ):
+ assets = validation_loop._generate_validation_figures(
+ 9,
+ workdir,
+ insight={"id": 5, "title": "Insight"},
+ metric_name="accuracy",
+ baseline_metric_value=0.5,
+ )
+
+ self.assertEqual(len(assets), 1)
+
def test_run_validation_loop_blocks_non_formal_experiment(self):
with tempfile.TemporaryDirectory() as tmpdir:
workdir = Path(tmpdir)
diff --git a/tests/test_visualization_agent.py b/tests/test_visualization_agent.py
new file mode 100644
index 0000000..13658e1
--- /dev/null
+++ b/tests/test_visualization_agent.py
@@ -0,0 +1,140 @@
+import json
+import tempfile
+import unittest
+from pathlib import Path
+from unittest import mock
+
+from agents import visualization_agent
+
+
+class VisualizationAgentTests(unittest.TestCase):
+ def test_generate_visualization_bundle_emits_result_diagrams_and_report_references(self):
+ with tempfile.TemporaryDirectory() as tmpdir:
+ workdir = Path(tmpdir)
+ (workdir / "results").mkdir(parents=True, exist_ok=True)
+ final_report = workdir / "final_report.md"
+ final_report.write_text("# Existing Report\n\nBody.\n", encoding="utf-8")
+ summary_path = workdir / "results" / "validation_summary.json"
+ summary_path.write_text("{}", encoding="utf-8")
+
+ def fake_metric_figure(iterations, baseline, metric_name, out_svg, **kwargs):
+ out_svg.parent.mkdir(parents=True, exist_ok=True)
+ out_svg.write_text("", encoding="utf-8")
+ out_pdf = out_svg.with_suffix(".pdf")
+ out_pdf.write_text("%PDF", encoding="utf-8")
+ code_path = out_svg.with_suffix(".py")
+ code_path.write_text("print('plot')\n", encoding="utf-8")
+ return {
+ "ok": True,
+ "score": 0.91,
+ "notes": "critic_pass",
+ "attempts": 1,
+ "svg_path": str(out_svg),
+ "pdf_path": str(out_pdf),
+ "code_path": str(code_path),
+ "used_fallback": False,
+ }
+
+ insight = {
+ "id": 3,
+ "title": "Adaptive Routing for Robust Validation",
+ "problem_statement": "Existing methods fail under distribution shift.",
+ "existing_weakness": "Baselines overfit a narrow evidence slice.",
+ "proposed_method": json.dumps(
+ {
+ "name": "Confidence Gated Routing",
+ "one_line": "Route examples by uncertainty before applying the solver.",
+ "definition": "A lightweight controller sends high-uncertainty cases to specialist modules.",
+ }
+ ),
+ "experimental_plan": json.dumps(
+ {
+ "datasets": [{"name": "SyntheticShift"}],
+ "baselines": [{"name": "DirectSolver"}],
+ "metrics": {"primary": "accuracy"},
+ }
+ ),
+ "source_node_ids": json.dumps(["ml.robustness"]),
+ "source_paper_ids": json.dumps(["2401.00001"]),
+ }
+ iterations = [
+ {"iteration_number": 1, "metric_value": 0.5, "status": "ok"},
+ {"iteration_number": 2, "metric_value": 0.64, "status": "keep"},
+ ]
+ result_rows = [
+ {
+ "method_name": "DirectSolver",
+ "dataset_name": "SyntheticShift",
+ "metric_name": "accuracy",
+ "metric_value": 0.58,
+ }
+ ]
+ relation_rows = [
+ {
+ "subject_name": "Confidence routing",
+ "object_name": "Robust validation",
+ "predicate": "improves",
+ "confidence": 0.93,
+ }
+ ]
+
+ with (
+ mock.patch.object(
+ visualization_agent.db,
+ "fetchall",
+ side_effect=[iterations, result_rows, relation_rows],
+ ),
+ mock.patch.object(
+ visualization_agent.figure_agent,
+ "generate_metric_figure_with_retry",
+ side_effect=fake_metric_figure,
+ ),
+ ):
+ bundle = visualization_agent.generate_visualization_bundle(
+ run_id=9,
+ workdir=workdir,
+ insight=insight,
+ metric_name="accuracy",
+ baseline_metric_value=0.5,
+ best_metric_value=0.64,
+ verdict="confirmed",
+ summary_path=summary_path,
+ )
+
+ figure_ids = {asset["figure_id"] for asset in bundle["assets"]}
+ self.assertIn("fig_approach_overview", figure_ids)
+ self.assertIn("fig_method_architecture", figure_ids)
+ self.assertIn("fig_metric_trajectory", figure_ids)
+ self.assertIn("fig_baseline_vs_proposed", figure_ids)
+ self.assertIn("fig_literature_results", figure_ids)
+ self.assertIn("fig_knowledge_subgraph", figure_ids)
+ self.assertTrue(Path(bundle["manifest_path"]).exists())
+ self.assertTrue((workdir / "figures" / "fig_knowledge_subgraph.dot").exists())
+ self.assertIn("Generated Figures", final_report.read_text(encoding="utf-8"))
+ self.assertIn("fig_method_architecture", final_report.read_text(encoding="utf-8"))
+
+ def test_write_figure_references_creates_sidecar_when_final_report_missing(self):
+ with tempfile.TemporaryDirectory() as tmpdir:
+ workdir = Path(tmpdir)
+ asset_path = workdir / "figures" / "fig.svg"
+ asset_path.parent.mkdir(parents=True, exist_ok=True)
+ asset_path.write_text("", encoding="utf-8")
+
+ ref = visualization_agent.write_figure_references(
+ workdir,
+ [
+ {
+ "figure_id": "fig_test",
+ "asset_kind": "svg",
+ "path": str(asset_path),
+ "caption": "A generated figure.",
+ }
+ ],
+ )
+
+ self.assertTrue(Path(ref).exists())
+ self.assertIn("fig_test", Path(ref).read_text(encoding="utf-8"))
+
+
+if __name__ == "__main__":
+ unittest.main()