diff --git a/agents/figure_agent.py b/agents/figure_agent.py index 49b9ce5..7f6db5c 100644 --- a/agents/figure_agent.py +++ b/agents/figure_agent.py @@ -129,7 +129,14 @@ def render_metric_figure_artifacts( ) used_fallback = False try: - subprocess.run([RUNTIME_PYTHON, str(script)], check=True, cwd=str(out_svg.parent), timeout=120) + subprocess.run( + [RUNTIME_PYTHON, str(script)], + check=True, + cwd=str(out_svg.parent), + timeout=120, + capture_output=True, + text=True, + ) except (subprocess.CalledProcessError, FileNotFoundError, subprocess.TimeoutExpired): used_fallback = True out_svg.write_text( diff --git a/agents/validation_loop.py b/agents/validation_loop.py index f919568..bfb9c2e 100644 --- a/agents/validation_loop.py +++ b/agents/validation_loop.py @@ -22,6 +22,7 @@ from agents import codex_executor from agents import experiment_supervisor +from agents import visualization_agent from agents.workspace_layout import ensure_run_workspace, plan_file_path, promote_canonical_run, write_latest_status from contracts import DeepInsightSpec, ExperimentIterationPacket, ExperimentSpec from config import ( @@ -511,6 +512,77 @@ def _record_artifact( ) +def _generate_validation_figures( + run_id: int, + workdir: Path, + *, + insight: dict, + metric_name: str, + baseline_metric_value: float | None, + best_metric_value: float | None = None, + verdict: str | None = None, + summary_path: Path | None = None, +) -> list[dict]: + """Generate validation-loop figure artifacts for a completed run.""" + try: + bundle = visualization_agent.generate_visualization_bundle( + run_id=run_id, + workdir=workdir, + insight=insight, + metric_name=metric_name, + baseline_metric_value=baseline_metric_value, + best_metric_value=best_metric_value, + verdict=verdict, + summary_path=summary_path, + ) + except Exception as exc: + print(f"[LOOP] Figure generation skipped for run {run_id}: {exc}", flush=True) + return [] + + assets = [dict(asset) for asset in bundle.get("assets") or [] if isinstance(asset, dict)] + for asset in assets: + path = Path(str(asset.get("path") or "")) + if not path.exists(): + continue + asset_kind = str(asset.get("asset_kind") or "") + artifact_type = "plot" if asset_kind in {"svg", "pdf", "png", "jpg", "jpeg"} else "source_data" + try: + _record_artifact( + run_id, + artifact_type, + path, + metric_key=asset.get("metric_name") or metric_name, + metadata={ + "figure_id": asset.get("figure_id"), + "figure_kind": asset.get("figure_kind"), + "asset_kind": asset_kind, + "caption": asset.get("caption"), + "source": asset.get("source"), + **(asset.get("metadata") if isinstance(asset.get("metadata"), dict) else {}), + }, + ) + except Exception as exc: + print(f"[LOOP] Figure artifact registration skipped for {path}: {exc}", flush=True) + + for key, contract_type in ( + ("manifest_path", "ValidationFigureManifest"), + ("references_path", "ValidationFigureReferences"), + ): + raw_path = str(bundle.get(key) or "").strip() + if raw_path and Path(raw_path).exists(): + try: + _record_artifact( + run_id, + "source_data", + Path(raw_path), + metric_key=metric_name, + metadata={"contract_type": contract_type}, + ) + except Exception as exc: + print(f"[LOOP] Figure manifest registration skipped for {raw_path}: {exc}", flush=True) + return assets + + def _read_experiment_spec( run: dict, insight: dict, @@ -1234,24 +1306,32 @@ def run_validation_loop(run_id: int, execution_context: dict | None = None) -> d promote_canonical_run(insight_id, run_id, insight=insight) summary_path = workdir / "results" / "validation_summary.json" - summary_path.write_text( - json.dumps( - { - "run_id": run_id, - "verdict": verdict, - "baseline": baseline, - "best_value": best_value, - "effect_size": effect, - "effect_pct": effect_pct, - "iterations_total": iter_num, - "iterations_kept": total_kept, - "environment_report": environment_report, - "stop_reason": stop_reason, - }, - indent=2, - ), - encoding="utf-8", + summary_payload = { + "run_id": run_id, + "verdict": verdict, + "baseline": baseline, + "best_value": best_value, + "effect_size": effect, + "effect_pct": effect_pct, + "iterations_total": iter_num, + "iterations_kept": total_kept, + "environment_report": environment_report, + "stop_reason": stop_reason, + } + summary_path.write_text(json.dumps(summary_payload, indent=2), encoding="utf-8") + figure_assets = _generate_validation_figures( + run_id, + workdir, + insight=insight, + metric_name=metric_name, + baseline_metric_value=baseline, + best_metric_value=best_value, + verdict=verdict, + summary_path=summary_path, ) + if figure_assets: + summary_payload["figure_artifacts"] = figure_assets + summary_path.write_text(json.dumps(summary_payload, indent=2), encoding="utf-8") _record_artifact( run_id, "source_data", @@ -1274,6 +1354,7 @@ def run_validation_loop(run_id: int, execution_context: dict | None = None) -> d "iterations_total": iter_num, "iterations_kept": total_kept, "summary_path": str(summary_path), + "figure_artifacts": figure_assets, }, run_id=run_id, insight=insight, @@ -1294,4 +1375,5 @@ def run_validation_loop(run_id: int, execution_context: dict | None = None) -> d "total_seconds": total_time, "environment_report": environment_report, "stop_reason": stop_reason, + "figure_artifacts": figure_assets, } diff --git a/agents/visualization_agent.py b/agents/visualization_agent.py new file mode 100644 index 0000000..16ddc5a --- /dev/null +++ b/agents/visualization_agent.py @@ -0,0 +1,846 @@ +"""Automated research visualization pipeline for SciForge runs. + +This module turns a completed validation run into manuscript-ready figure +artifacts. It deliberately uses deterministic renderers first: matplotlib for +numeric plots and simple diagrams-as-code for conceptual figures. That keeps +the validation loop stable in offline and CI environments while still leaving +DOT sidecars that can be refined by downstream manuscript agents. +""" + +from __future__ import annotations + +import html +import json +import math +import re +import subprocess +import textwrap +from pathlib import Path +from typing import Any + +from agents import figure_agent +from config import RUNTIME_PYTHON +from db import database as db + + +MAX_LABEL_CHARS = 52 +MAX_KG_EDGES = 14 +MAX_RESULT_ROWS = 8 + + +def _json_load(value: Any, default: Any) -> Any: + if value is None: + return default + if isinstance(value, (dict, list)): + return value + try: + return json.loads(value) + except (json.JSONDecodeError, TypeError): + return default + + +def _as_list(value: Any) -> list: + loaded = _json_load(value, []) + if isinstance(loaded, list): + return loaded + if loaded in (None, ""): + return [] + return [loaded] + + +def _as_dict(value: Any) -> dict[str, Any]: + loaded = _json_load(value, {}) + return loaded if isinstance(loaded, dict) else {} + + +def _shorten(value: Any, limit: int = MAX_LABEL_CHARS) -> str: + text = re.sub(r"\s+", " ", str(value or "")).strip() + if len(text) <= limit: + return text + return text[: max(0, limit - 1)].rstrip() + "..." + + +def _escape(value: Any) -> str: + return html.escape(str(value or ""), quote=True) + + +def _wrap_svg_lines(text: Any, width: int = 34, max_lines: int = 3) -> list[str]: + raw = _shorten(text, width * max_lines) + lines = textwrap.wrap(raw, width=width) or [""] + return lines[:max_lines] + + +def _asset( + *, + figure_id: str, + path: Path, + asset_kind: str, + figure_kind: str, + caption: str, + source: str, + metric_name: str | None = None, + metadata: dict[str, Any] | None = None, +) -> dict[str, Any]: + return { + "figure_id": figure_id, + "figure_kind": figure_kind, + "asset_kind": asset_kind, + "path": str(path), + "caption": caption, + "source": source, + "metric_name": metric_name or "", + "metadata": metadata or {}, + } + + +def _write_text_block_svg( + path: Path, + *, + title: str, + boxes: list[dict[str, Any]], + arrows: list[tuple[int, int]] | None = None, + width: int = 980, + height: int = 520, +) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + arrows = arrows or [] + box_svg: list[str] = [] + for idx, box in enumerate(boxes): + x = int(box.get("x", 40)) + y = int(box.get("y", 90)) + w = int(box.get("w", 250)) + h = int(box.get("h", 110)) + fill = str(box.get("fill") or "#f8fafc") + stroke = str(box.get("stroke") or "#334155") + label = str(box.get("label") or f"Box {idx + 1}") + body = str(box.get("body") or "") + lines = _wrap_svg_lines(body, width=max(18, int(w / 9)), max_lines=4) + text_parts = [ + f'{_escape(label)}' + ] + for line_idx, line in enumerate(lines): + text_parts.append( + f'{_escape(line)}' + ) + box_svg.append( + "\n".join( + [ + f'', + *text_parts, + ] + ) + ) + arrow_svg: list[str] = [] + for src, dst in arrows: + if src >= len(boxes) or dst >= len(boxes): + continue + a = boxes[src] + b = boxes[dst] + x1 = int(a.get("x", 0)) + int(a.get("w", 0)) + y1 = int(a.get("y", 0)) + int(a.get("h", 0)) // 2 + x2 = int(b.get("x", 0)) + y2 = int(b.get("y", 0)) + int(b.get("h", 0)) // 2 + arrow_svg.append( + f'' + ) + svg = f""" + + + + + + +{_escape(title)} +{chr(10).join(arrow_svg)} +{chr(10).join(box_svg)} + +""" + path.write_text(svg, encoding="utf-8") + + +def _write_dot(path: Path, *, title: str, nodes: list[tuple[str, str]], edges: list[tuple[str, str, str]]) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + lines = [ + "digraph G {", + " graph [rankdir=LR, bgcolor=white, labelloc=t, fontsize=20, fontname=Helvetica];", + f" label={json.dumps(title)};", + " node [shape=box, style=\"rounded,filled\", fillcolor=\"#f8fafc\", color=\"#334155\", fontname=Helvetica];", + " edge [color=\"#475569\", fontname=Helvetica];", + ] + for node_id, label in nodes: + lines.append(f" {node_id} [label={json.dumps(_shorten(label, 80))}];") + for src, dst, label in edges: + edge_label = f" [label={json.dumps(_shorten(label, 40))}]" if label else "" + lines.append(f" {src} -> {dst}{edge_label};") + lines.append("}") + path.write_text("\n".join(lines) + "\n", encoding="utf-8") + + +def _write_bar_script( + script_path: Path, + *, + labels: list[str], + values: list[float], + title: str, + ylabel: str, + out_svg: Path, + out_pdf: Path, +) -> None: + script_path.write_text( + f""" +import matplotlib +matplotlib.use("Agg") +import matplotlib.pyplot as plt + +labels = {labels!r} +values = {values!r} +colors = ["#64748b", "#2563eb", "#0f766e", "#b45309", "#7c3aed", "#be123c", "#0369a1", "#4d7c0f"] +fig, ax = plt.subplots(figsize=(7.6, 4.4)) +bars = ax.bar(range(len(values)), values, color=colors[:len(values)], width=0.62) +ax.set_title({title!r}, fontsize=14, pad=10) +ax.set_ylabel({ylabel!r}, fontsize=11) +ax.set_xticks(range(len(labels))) +ax.set_xticklabels(labels, rotation=18, ha="right", fontsize=9) +ax.grid(axis="y", alpha=0.22) +ax.spines["top"].set_visible(False) +ax.spines["right"].set_visible(False) +for bar, value in zip(bars, values): + height = bar.get_height() + ax.annotate(f"{{value:.4g}}", xy=(bar.get_x() + bar.get_width() / 2, height), + xytext=(0, 4), textcoords="offset points", ha="center", va="bottom", fontsize=9) +fig.tight_layout() +fig.savefig({str(out_svg)!r}, format="svg") +fig.savefig({str(out_pdf)!r}, format="pdf") +plt.close(fig) +""".strip() + + "\n", + encoding="utf-8", + ) + + +def _write_fallback_bar_svg(path: Path, *, labels: list[str], values: list[float], title: str, ylabel: str) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + width = 820 + height = 460 + plot_x = 90 + plot_y = 90 + plot_w = 650 + plot_h = 260 + max_value = max([abs(v) for v in values] or [1.0]) or 1.0 + bar_w = max(28, int(plot_w / max(1, len(values)) * 0.55)) + chunks = [ + f'', + '', + f'{_escape(title)}', + f'{_escape(ylabel)}', + f'', + ] + for idx, (label, value) in enumerate(zip(labels, values)): + cx = plot_x + int((idx + 0.5) * plot_w / max(1, len(values))) + h = int(abs(value) / max_value * plot_h) + x = cx - bar_w // 2 + y = plot_y + plot_h - h + color = "#2563eb" if idx else "#64748b" + chunks.extend( + [ + f'', + f'{value:.4g}', + f'{_escape(_shorten(label, 18))}', + ] + ) + chunks.append("") + path.write_text("\n".join(chunks) + "\n", encoding="utf-8") + + +def _render_bar_chart( + *, + labels: list[str], + values: list[float], + title: str, + ylabel: str, + out_svg: Path, +) -> dict[str, str | bool]: + out_svg.parent.mkdir(parents=True, exist_ok=True) + out_pdf = out_svg.with_suffix(".pdf") + script_path = out_svg.with_suffix(".py") + _write_bar_script( + script_path, + labels=labels, + values=values, + title=title, + ylabel=ylabel, + out_svg=out_svg, + out_pdf=out_pdf, + ) + used_fallback = False + try: + subprocess.run( + [RUNTIME_PYTHON, str(script_path)], + cwd=str(out_svg.parent), + timeout=120, + check=True, + capture_output=True, + text=True, + ) + except (subprocess.CalledProcessError, FileNotFoundError, subprocess.TimeoutExpired): + used_fallback = True + _write_fallback_bar_svg(out_svg, labels=labels, values=values, title=title, ylabel=ylabel) + if out_pdf.exists(): + out_pdf.unlink() + return { + "svg_path": str(out_svg), + "pdf_path": str(out_pdf) if out_pdf.exists() else "", + "code_path": str(script_path), + "used_fallback": used_fallback, + } + + +def _fetch_iterations(run_id: int) -> list[dict[str, Any]]: + rows = db.fetchall( + """ + SELECT iteration_number, phase, metric_value, metric_name, status, description + FROM experiment_iterations + WHERE run_id=? + ORDER BY iteration_number + """, + (run_id,), + ) + return [dict(row) for row in rows] + + +def _fetch_kg_relations(insight: dict[str, Any]) -> list[dict[str, Any]]: + source_node_ids = [str(x) for x in _as_list(insight.get("source_node_ids")) if str(x).strip()] + source_paper_ids = [str(x) for x in _as_list(insight.get("source_paper_ids")) if str(x).strip()] + filters: list[str] = [] + params: list[Any] = [] + if source_node_ids: + filters.append("gr.node_id IN ({})".format(",".join("?" for _ in source_node_ids))) + params.extend(source_node_ids) + if source_paper_ids: + filters.append("gr.paper_id IN ({})".format(",".join("?" for _ in source_paper_ids))) + params.extend(source_paper_ids) + where = "WHERE " + " OR ".join(filters) if filters else "" + sql = f""" + SELECT + gr.node_id, + gr.predicate, + gr.confidence, + subj.canonical_name AS subject_name, + obj.canonical_name AS object_name, + subj.entity_type AS subject_type, + obj.entity_type AS object_type + FROM graph_relations gr + JOIN graph_entities subj ON subj.id = gr.subject_entity_id + JOIN graph_entities obj ON obj.id = gr.object_entity_id + {where} + ORDER BY gr.confidence DESC, gr.id DESC + LIMIT {MAX_KG_EDGES} + """ + try: + return [dict(row) for row in db.fetchall(sql, tuple(params))] + except Exception: + return [] + + +def _fetch_literature_results(insight: dict[str, Any]) -> list[dict[str, Any]]: + source_node_ids = [str(x) for x in _as_list(insight.get("source_node_ids")) if str(x).strip()] + source_paper_ids = [str(x) for x in _as_list(insight.get("source_paper_ids")) if str(x).strip()] + filters: list[str] = [] + params: list[Any] = [] + if source_node_ids: + filters.append("node_id IN ({})".format(",".join("?" for _ in source_node_ids))) + params.extend(source_node_ids) + if source_paper_ids: + filters.append("paper_id IN ({})".format(",".join("?" for _ in source_paper_ids))) + params.extend(source_paper_ids) + where = "WHERE metric_value IS NOT NULL" + if filters: + where += " AND (" + " OR ".join(filters) + ")" + sql = f""" + SELECT method_name, dataset_name, metric_name, metric_value + FROM results + {where} + ORDER BY metric_value DESC + LIMIT {MAX_RESULT_ROWS} + """ + try: + return [dict(row) for row in db.fetchall(sql, tuple(params))] + except Exception: + return [] + + +def _method_name(insight: dict[str, Any]) -> str: + method = _as_dict(insight.get("proposed_method")) + return str(method.get("name") or insight.get("title") or "Proposed method") + + +def _experimental_metric_name(insight: dict[str, Any], fallback: str) -> str: + plan = _as_dict(insight.get("experimental_plan")) + metrics = plan.get("metrics") + if isinstance(metrics, dict): + return str(metrics.get("primary") or metrics.get("name") or fallback) + if isinstance(metrics, list) and metrics: + first = metrics[0] + if isinstance(first, dict): + return str(first.get("name") or fallback) + return str(first or fallback) + return fallback + + +def _generate_metric_trajectory( + *, + iterations: list[dict[str, Any]], + baseline_metric_value: float | None, + metric_name: str, + figures_dir: Path, +) -> list[dict[str, Any]]: + out_svg = figures_dir / "fig_metric_trajectory.svg" + meta = figure_agent.generate_metric_figure_with_retry( + iterations, + baseline_metric_value, + metric_name, + out_svg, + title=f"{metric_name} trajectory", + objective="Show validation-loop metric trajectory against the reproduced baseline.", + ) + caption = f"Validation trajectory for {metric_name}, with the reproduced baseline shown as reference." + assets: list[dict[str, Any]] = [] + for key, asset_kind in (("svg_path", "svg"), ("pdf_path", "pdf"), ("code_path", "source")): + raw = str(meta.get(key) or "").strip() + if raw and Path(raw).exists(): + assets.append( + _asset( + figure_id="fig_metric_trajectory", + figure_kind="metric_trajectory", + asset_kind=asset_kind, + path=Path(raw), + caption=caption, + source="experiment_iterations", + metric_name=metric_name, + metadata={ + "score": meta.get("score"), + "notes": meta.get("notes"), + "attempts": meta.get("attempts"), + "used_fallback": meta.get("used_fallback"), + }, + ) + ) + return assets + + +def _generate_baseline_comparison( + *, + baseline_metric_value: float | None, + best_metric_value: float | None, + metric_name: str, + figures_dir: Path, +) -> list[dict[str, Any]]: + if baseline_metric_value is None or best_metric_value is None: + return [] + labels = ["baseline", "best proposed"] + values = [float(baseline_metric_value), float(best_metric_value)] + meta = _render_bar_chart( + labels=labels, + values=values, + title=f"Baseline vs proposed ({metric_name})", + ylabel=metric_name, + out_svg=figures_dir / "fig_baseline_vs_proposed.svg", + ) + caption = f"Baseline versus best proposed validation result for {metric_name}." + assets: list[dict[str, Any]] = [] + for key, asset_kind in (("svg_path", "svg"), ("pdf_path", "pdf"), ("code_path", "source")): + raw = str(meta.get(key) or "").strip() + if raw and Path(raw).exists(): + assets.append( + _asset( + figure_id="fig_baseline_vs_proposed", + figure_kind="experiment_comparison", + asset_kind=asset_kind, + path=Path(raw), + caption=caption, + source="experiment_runs", + metric_name=metric_name, + metadata={"used_fallback": meta.get("used_fallback")}, + ) + ) + return assets + + +def _generate_literature_results_chart( + *, + rows: list[dict[str, Any]], + figures_dir: Path, +) -> list[dict[str, Any]]: + if not rows: + return [] + metric_name = str(rows[0].get("metric_name") or "metric") + labels = [ + _shorten(f"{row.get('method_name') or 'method'} / {row.get('dataset_name') or 'dataset'}", 32) + for row in rows + ] + values = [float(row.get("metric_value") or 0.0) for row in rows] + meta = _render_bar_chart( + labels=labels, + values=values, + title=f"Knowledge graph result snapshot ({metric_name})", + ylabel=metric_name, + out_svg=figures_dir / "fig_literature_results.svg", + ) + caption = "Top extracted literature results from the DeepGraph results table for this insight context." + assets: list[dict[str, Any]] = [] + for key, asset_kind in (("svg_path", "svg"), ("pdf_path", "pdf"), ("code_path", "source")): + raw = str(meta.get(key) or "").strip() + if raw and Path(raw).exists(): + assets.append( + _asset( + figure_id="fig_literature_results", + figure_kind="literature_result_chart", + asset_kind=asset_kind, + path=Path(raw), + caption=caption, + source="results", + metric_name=metric_name, + metadata={"row_count": len(rows), "used_fallback": meta.get("used_fallback")}, + ) + ) + return assets + + +def _generate_overview_diagram( + *, + insight: dict[str, Any], + verdict: str | None, + figures_dir: Path, +) -> list[dict[str, Any]]: + method = _as_dict(insight.get("proposed_method")) + plan = _as_dict(insight.get("experimental_plan")) + metrics = plan.get("metrics") if isinstance(plan.get("metrics"), dict) else {} + out_svg = figures_dir / "fig_approach_overview.svg" + boxes = [ + { + "x": 44, + "y": 112, + "w": 245, + "h": 145, + "label": "Current limitation", + "body": insight.get("existing_weakness") or insight.get("problem_statement") or "Existing method limitations", + "fill": "#fef2f2", + "stroke": "#b91c1c", + }, + { + "x": 365, + "y": 112, + "w": 245, + "h": 145, + "label": "Proposed improvement", + "body": method.get("one_line") or method.get("definition") or _method_name(insight), + "fill": "#eff6ff", + "stroke": "#1d4ed8", + }, + { + "x": 686, + "y": 112, + "w": 245, + "h": 145, + "label": "Validation evidence", + "body": f"Metric: {metrics.get('primary') or 'primary metric'}; verdict: {verdict or 'pending'}", + "fill": "#ecfdf5", + "stroke": "#047857", + }, + ] + _write_text_block_svg(out_svg, title=_shorten(insight.get("title") or "Approach overview", 80), boxes=boxes, arrows=[(0, 1), (1, 2)]) + out_dot = out_svg.with_suffix(".dot") + _write_dot( + out_dot, + title="Approach overview", + nodes=[("limitation", "Current limitation"), ("method", _method_name(insight)), ("evidence", "Validation evidence")], + edges=[("limitation", "method", "addresses"), ("method", "evidence", "validated by")], + ) + caption = "Overview of the limitation, proposed method, and validation evidence produced by SciForge." + return [ + _asset( + figure_id="fig_approach_overview", + figure_kind="overview_diagram", + asset_kind="svg", + path=out_svg, + caption=caption, + source="deep_insights", + ), + _asset( + figure_id="fig_approach_overview", + figure_kind="overview_diagram", + asset_kind="dot", + path=out_dot, + caption=caption, + source="deep_insights", + ), + ] + + +def _generate_method_architecture_diagram( + *, + insight: dict[str, Any], + figures_dir: Path, +) -> list[dict[str, Any]]: + method = _as_dict(insight.get("proposed_method")) + plan = _as_dict(insight.get("experimental_plan")) + baselines = _as_list(plan.get("baselines")) + datasets = _as_list(plan.get("datasets")) + method_label = method.get("name") or insight.get("title") or "Proposed method" + out_svg = figures_dir / "fig_method_architecture.svg" + boxes = [ + { + "x": 44, + "y": 112, + "w": 220, + "h": 130, + "label": "Inputs", + "body": ", ".join(_shorten(item.get("name") if isinstance(item, dict) else item, 24) for item in datasets[:3]) or "Experiment data", + "fill": "#f8fafc", + "stroke": "#334155", + }, + { + "x": 332, + "y": 92, + "w": 300, + "h": 170, + "label": _shorten(method_label, 28), + "body": method.get("definition") or method.get("one_line") or "Structured proposed method", + "fill": "#eff6ff", + "stroke": "#2563eb", + }, + { + "x": 704, + "y": 112, + "w": 220, + "h": 130, + "label": "Evaluation", + "body": f"Baselines: {len(baselines)}; metric: {_experimental_metric_name(insight, 'metric')}", + "fill": "#f0fdf4", + "stroke": "#15803d", + }, + ] + _write_text_block_svg(out_svg, title="Method architecture", boxes=boxes, arrows=[(0, 1), (1, 2)]) + out_dot = out_svg.with_suffix(".dot") + _write_dot( + out_dot, + title="Method architecture", + nodes=[("inputs", "Inputs"), ("method", method_label), ("eval", "Evaluation")], + edges=[("inputs", "method", "feeds"), ("method", "eval", "measured by")], + ) + caption = "Architecture-style diagram derived from the structured proposed method and experimental plan." + return [ + _asset( + figure_id="fig_method_architecture", + figure_kind="method_architecture", + asset_kind="svg", + path=out_svg, + caption=caption, + source="deep_insights.proposed_method", + ), + _asset( + figure_id="fig_method_architecture", + figure_kind="method_architecture", + asset_kind="dot", + path=out_dot, + caption=caption, + source="deep_insights.proposed_method", + ), + ] + + +def _generate_kg_subgraph_diagram( + *, + relations: list[dict[str, Any]], + figures_dir: Path, +) -> list[dict[str, Any]]: + if not relations: + return [] + nodes: dict[str, tuple[float, float]] = {} + labels: dict[str, str] = {} + edges: list[tuple[str, str, str]] = [] + for row in relations[:MAX_KG_EDGES]: + subject = _shorten(row.get("subject_name") or "subject", 30) + obj = _shorten(row.get("object_name") or "object", 30) + predicate = _shorten(row.get("predicate") or "relates_to", 28) + labels.setdefault(subject, subject) + labels.setdefault(obj, obj) + edges.append((subject, obj, predicate)) + node_names = sorted(labels) + width = 980 + height = 620 + cx, cy = width / 2, height / 2 + 24 + radius = min(260, max(150, 34 * len(node_names))) + for idx, name in enumerate(node_names): + angle = 2 * math.pi * idx / max(1, len(node_names)) - math.pi / 2 + nodes[name] = (cx + radius * math.cos(angle), cy + radius * math.sin(angle)) + out_svg = figures_dir / "fig_knowledge_subgraph.svg" + out_svg.parent.mkdir(parents=True, exist_ok=True) + chunks = [ + f'', + "", + ' ', + ' ', + " ", + "", + '', + 'Knowledge graph subgraph', + ] + for src, dst, pred in edges: + x1, y1 = nodes[src] + x2, y2 = nodes[dst] + mx, my = (x1 + x2) / 2, (y1 + y2) / 2 + chunks.append(f'') + chunks.append(f'{_escape(pred)}') + for name, (x, y) in nodes.items(): + chunks.append(f'') + for idx, line in enumerate(_wrap_svg_lines(name, width=13, max_lines=3)): + chunks.append(f'{_escape(line)}') + chunks.append("") + out_svg.write_text("\n".join(chunks) + "\n", encoding="utf-8") + out_dot = out_svg.with_suffix(".dot") + dot_ids = {name: f"n{idx}" for idx, name in enumerate(node_names)} + _write_dot( + out_dot, + title="Knowledge graph subgraph", + nodes=[(dot_ids[name], name) for name in node_names], + edges=[(dot_ids[src], dot_ids[dst], pred) for src, dst, pred in edges], + ) + caption = "Entity-relation subgraph extracted from DeepGraph knowledge graph context for this insight." + return [ + _asset( + figure_id="fig_knowledge_subgraph", + figure_kind="knowledge_graph_subgraph", + asset_kind="svg", + path=out_svg, + caption=caption, + source="graph_relations", + metadata={"edge_count": len(edges), "node_count": len(node_names)}, + ), + _asset( + figure_id="fig_knowledge_subgraph", + figure_kind="knowledge_graph_subgraph", + asset_kind="dot", + path=out_dot, + caption=caption, + source="graph_relations", + metadata={"edge_count": len(edges), "node_count": len(node_names)}, + ), + ] + + +def write_figure_references(workdir: Path, assets: list[dict[str, Any]]) -> str: + """Reference generated figures in final_report.md when it exists. + + If no final report exists in the experiment workspace, write a standalone + figure reference file so downstream manuscript agents can still consume the + visual inventory. + """ + logical: dict[str, dict[str, Any]] = {} + for asset in assets: + if asset.get("asset_kind") != "svg": + continue + logical.setdefault(str(asset["figure_id"]), asset) + lines = ["## Generated Figures", ""] + for asset in logical.values(): + rel_path = Path(str(asset["path"])) + try: + rel = rel_path.relative_to(workdir) + except ValueError: + rel = rel_path + lines.append(f"- `{asset['figure_id']}`: {asset.get('caption') or ''} (`{rel}`)") + block = "\n".join(lines).strip() + "\n" + marker_start = "" + marker_end = "" + wrapped = f"\n{marker_start}\n{block}{marker_end}\n" + for candidate in (workdir / "final_report.md", workdir / "results" / "final_report.md"): + if not candidate.exists(): + continue + text = candidate.read_text(encoding="utf-8", errors="replace") + if marker_start in text and marker_end in text: + before, rest = text.split(marker_start, 1) + _, after = rest.split(marker_end, 1) + candidate.write_text(before.rstrip() + wrapped + after.lstrip(), encoding="utf-8") + else: + candidate.write_text(text.rstrip() + "\n" + wrapped, encoding="utf-8") + return str(candidate) + ref_path = workdir / "figures" / "figure_references.md" + ref_path.parent.mkdir(parents=True, exist_ok=True) + ref_path.write_text(block, encoding="utf-8") + return str(ref_path) + + +def generate_visualization_bundle( + *, + run_id: int, + workdir: Path, + insight: dict[str, Any], + metric_name: str, + baseline_metric_value: float | None, + best_metric_value: float | None = None, + verdict: str | None = None, + summary_path: Path | None = None, +) -> dict[str, Any]: + """Generate all SciForge visualization artifacts for a completed run.""" + workdir = Path(workdir) + figures_dir = workdir / "figures" + figures_dir.mkdir(parents=True, exist_ok=True) + iterations = _fetch_iterations(run_id) + assets: list[dict[str, Any]] = [] + skipped: list[dict[str, str]] = [] + + generators = ( + ("overview_diagram", lambda: _generate_overview_diagram(insight=insight, verdict=verdict, figures_dir=figures_dir)), + ("method_architecture", lambda: _generate_method_architecture_diagram(insight=insight, figures_dir=figures_dir)), + ( + "metric_trajectory", + lambda: _generate_metric_trajectory( + iterations=iterations, + baseline_metric_value=baseline_metric_value, + metric_name=metric_name, + figures_dir=figures_dir, + ), + ), + ( + "baseline_comparison", + lambda: _generate_baseline_comparison( + baseline_metric_value=baseline_metric_value, + best_metric_value=best_metric_value, + metric_name=metric_name, + figures_dir=figures_dir, + ), + ), + ("literature_results", lambda: _generate_literature_results_chart(rows=_fetch_literature_results(insight), figures_dir=figures_dir)), + ("knowledge_graph_subgraph", lambda: _generate_kg_subgraph_diagram(relations=_fetch_kg_relations(insight), figures_dir=figures_dir)), + ) + + for name, build in generators: + try: + produced = build() + if produced: + assets.extend(produced) + else: + skipped.append({"figure_kind": name, "reason": "no_source_data"}) + except Exception as exc: + skipped.append({"figure_kind": name, "reason": str(exc)[:240]}) + + references_path = write_figure_references(workdir, assets) if assets else "" + manifest = { + "run_id": run_id, + "deep_insight_id": insight.get("id"), + "metric_name": metric_name, + "baseline_metric_value": baseline_metric_value, + "best_metric_value": best_metric_value, + "verdict": verdict, + "validation_summary": str(summary_path) if summary_path else "", + "assets": assets, + "skipped": skipped, + "references_path": references_path, + } + manifest_path = figures_dir / "figure_manifest.json" + manifest_path.write_text(json.dumps(manifest, indent=2, default=str), encoding="utf-8") + return { + **manifest, + "manifest_path": str(manifest_path), + "figures_dir": str(figures_dir), + } diff --git a/tests/test_validation_loop.py b/tests/test_validation_loop.py index f55dcb9..a809e68 100644 --- a/tests/test_validation_loop.py +++ b/tests/test_validation_loop.py @@ -33,6 +33,149 @@ def test_find_train_file_prefers_nested_proxy_path(self): self.assertEqual(resolved, target) + def test_generate_validation_figures_records_plot_artifacts(self): + with tempfile.TemporaryDirectory() as tmpdir: + workdir = Path(tmpdir) + summary_path = workdir / "results" / "validation_summary.json" + summary_path.parent.mkdir(parents=True, exist_ok=True) + summary_path.write_text("{}", encoding="utf-8") + + def fake_generate(**kwargs): + figures_dir = kwargs["workdir"] / "figures" + figures_dir.mkdir(parents=True, exist_ok=True) + out_svg = figures_dir / "fig_metric_trajectory.svg" + out_svg.write_text("accuracy trajectory", encoding="utf-8") + out_pdf = figures_dir / "fig_metric_trajectory.pdf" + out_pdf.write_text("%PDF", encoding="utf-8") + code_path = figures_dir / "fig_metric_trajectory.py" + code_path.write_text("print('plot')\n", encoding="utf-8") + manifest_path = figures_dir / "figure_manifest.json" + manifest_path.write_text("{}", encoding="utf-8") + return { + "assets": [ + { + "figure_id": "fig_metric_trajectory", + "figure_kind": "metric_trajectory", + "asset_kind": "svg", + "path": str(out_svg), + "caption": "Metric trajectory.", + "source": "experiment_iterations", + "metric_name": "accuracy", + }, + { + "figure_id": "fig_metric_trajectory", + "figure_kind": "metric_trajectory", + "asset_kind": "pdf", + "path": str(out_pdf), + "caption": "Metric trajectory.", + "source": "experiment_iterations", + "metric_name": "accuracy", + }, + { + "figure_id": "fig_metric_trajectory", + "figure_kind": "metric_trajectory", + "asset_kind": "source", + "path": str(code_path), + "caption": "Metric trajectory.", + "source": "experiment_iterations", + "metric_name": "accuracy", + }, + ], + "manifest_path": str(manifest_path), + "references_path": "", + } + + with ( + mock.patch.object(validation_loop.db, "execute") as execute, + mock.patch.object( + validation_loop.visualization_agent, + "generate_visualization_bundle", + side_effect=fake_generate, + ) as generate, + ): + assets = validation_loop._generate_validation_figures( + 7, + workdir, + insight={"id": 3, "title": "Insight"}, + metric_name="accuracy", + baseline_metric_value=0.5, + best_metric_value=0.62, + verdict="confirmed", + summary_path=summary_path, + ) + + self.assertEqual(len(assets), 3) + self.assertTrue((workdir / "figures" / "figure_manifest.json").exists()) + generate.assert_called_once() + artifact_types = [call.args[0].strip() for call in execute.call_args_list] + self.assertTrue(all("INSERT INTO experiment_artifacts" in sql for sql in artifact_types)) + params = [call.args[1] for call in execute.call_args_list] + self.assertEqual([row[1] for row in params], ["plot", "plot", "source_data", "source_data"]) + + def test_generate_validation_figures_is_non_blocking_on_render_error(self): + with tempfile.TemporaryDirectory() as tmpdir: + workdir = Path(tmpdir) + with ( + mock.patch.object(validation_loop.db, "execute") as execute, + mock.patch.object( + validation_loop.visualization_agent, + "generate_visualization_bundle", + side_effect=RuntimeError("renderer unavailable"), + ), + ): + assets = validation_loop._generate_validation_figures( + 8, + workdir, + insight={"id": 4, "title": "Insight"}, + metric_name="accuracy", + baseline_metric_value=0.5, + ) + + self.assertEqual(assets, []) + execute.assert_not_called() + + def test_generate_validation_figures_is_non_blocking_on_artifact_registration_error(self): + with tempfile.TemporaryDirectory() as tmpdir: + workdir = Path(tmpdir) + figures_dir = workdir / "figures" + figures_dir.mkdir(parents=True, exist_ok=True) + out_svg = figures_dir / "fig_metric_trajectory.svg" + out_svg.write_text("", encoding="utf-8") + manifest_path = figures_dir / "figure_manifest.json" + manifest_path.write_text("{}", encoding="utf-8") + + with ( + mock.patch.object( + validation_loop.visualization_agent, + "generate_visualization_bundle", + return_value={ + "assets": [ + { + "figure_id": "fig_metric_trajectory", + "figure_kind": "metric_trajectory", + "asset_kind": "svg", + "path": str(out_svg), + "caption": "Metric trajectory.", + "source": "experiment_iterations", + "metric_name": "accuracy", + } + ], + "manifest_path": str(manifest_path), + "references_path": "", + }, + ), + mock.patch.object(validation_loop.db, "execute", side_effect=RuntimeError("db unavailable")), + ): + assets = validation_loop._generate_validation_figures( + 9, + workdir, + insight={"id": 5, "title": "Insight"}, + metric_name="accuracy", + baseline_metric_value=0.5, + ) + + self.assertEqual(len(assets), 1) + def test_run_validation_loop_blocks_non_formal_experiment(self): with tempfile.TemporaryDirectory() as tmpdir: workdir = Path(tmpdir) diff --git a/tests/test_visualization_agent.py b/tests/test_visualization_agent.py new file mode 100644 index 0000000..13658e1 --- /dev/null +++ b/tests/test_visualization_agent.py @@ -0,0 +1,140 @@ +import json +import tempfile +import unittest +from pathlib import Path +from unittest import mock + +from agents import visualization_agent + + +class VisualizationAgentTests(unittest.TestCase): + def test_generate_visualization_bundle_emits_result_diagrams_and_report_references(self): + with tempfile.TemporaryDirectory() as tmpdir: + workdir = Path(tmpdir) + (workdir / "results").mkdir(parents=True, exist_ok=True) + final_report = workdir / "final_report.md" + final_report.write_text("# Existing Report\n\nBody.\n", encoding="utf-8") + summary_path = workdir / "results" / "validation_summary.json" + summary_path.write_text("{}", encoding="utf-8") + + def fake_metric_figure(iterations, baseline, metric_name, out_svg, **kwargs): + out_svg.parent.mkdir(parents=True, exist_ok=True) + out_svg.write_text("trajectory", encoding="utf-8") + out_pdf = out_svg.with_suffix(".pdf") + out_pdf.write_text("%PDF", encoding="utf-8") + code_path = out_svg.with_suffix(".py") + code_path.write_text("print('plot')\n", encoding="utf-8") + return { + "ok": True, + "score": 0.91, + "notes": "critic_pass", + "attempts": 1, + "svg_path": str(out_svg), + "pdf_path": str(out_pdf), + "code_path": str(code_path), + "used_fallback": False, + } + + insight = { + "id": 3, + "title": "Adaptive Routing for Robust Validation", + "problem_statement": "Existing methods fail under distribution shift.", + "existing_weakness": "Baselines overfit a narrow evidence slice.", + "proposed_method": json.dumps( + { + "name": "Confidence Gated Routing", + "one_line": "Route examples by uncertainty before applying the solver.", + "definition": "A lightweight controller sends high-uncertainty cases to specialist modules.", + } + ), + "experimental_plan": json.dumps( + { + "datasets": [{"name": "SyntheticShift"}], + "baselines": [{"name": "DirectSolver"}], + "metrics": {"primary": "accuracy"}, + } + ), + "source_node_ids": json.dumps(["ml.robustness"]), + "source_paper_ids": json.dumps(["2401.00001"]), + } + iterations = [ + {"iteration_number": 1, "metric_value": 0.5, "status": "ok"}, + {"iteration_number": 2, "metric_value": 0.64, "status": "keep"}, + ] + result_rows = [ + { + "method_name": "DirectSolver", + "dataset_name": "SyntheticShift", + "metric_name": "accuracy", + "metric_value": 0.58, + } + ] + relation_rows = [ + { + "subject_name": "Confidence routing", + "object_name": "Robust validation", + "predicate": "improves", + "confidence": 0.93, + } + ] + + with ( + mock.patch.object( + visualization_agent.db, + "fetchall", + side_effect=[iterations, result_rows, relation_rows], + ), + mock.patch.object( + visualization_agent.figure_agent, + "generate_metric_figure_with_retry", + side_effect=fake_metric_figure, + ), + ): + bundle = visualization_agent.generate_visualization_bundle( + run_id=9, + workdir=workdir, + insight=insight, + metric_name="accuracy", + baseline_metric_value=0.5, + best_metric_value=0.64, + verdict="confirmed", + summary_path=summary_path, + ) + + figure_ids = {asset["figure_id"] for asset in bundle["assets"]} + self.assertIn("fig_approach_overview", figure_ids) + self.assertIn("fig_method_architecture", figure_ids) + self.assertIn("fig_metric_trajectory", figure_ids) + self.assertIn("fig_baseline_vs_proposed", figure_ids) + self.assertIn("fig_literature_results", figure_ids) + self.assertIn("fig_knowledge_subgraph", figure_ids) + self.assertTrue(Path(bundle["manifest_path"]).exists()) + self.assertTrue((workdir / "figures" / "fig_knowledge_subgraph.dot").exists()) + self.assertIn("Generated Figures", final_report.read_text(encoding="utf-8")) + self.assertIn("fig_method_architecture", final_report.read_text(encoding="utf-8")) + + def test_write_figure_references_creates_sidecar_when_final_report_missing(self): + with tempfile.TemporaryDirectory() as tmpdir: + workdir = Path(tmpdir) + asset_path = workdir / "figures" / "fig.svg" + asset_path.parent.mkdir(parents=True, exist_ok=True) + asset_path.write_text("", encoding="utf-8") + + ref = visualization_agent.write_figure_references( + workdir, + [ + { + "figure_id": "fig_test", + "asset_kind": "svg", + "path": str(asset_path), + "caption": "A generated figure.", + } + ], + ) + + self.assertTrue(Path(ref).exists()) + self.assertIn("fig_test", Path(ref).read_text(encoding="utf-8")) + + +if __name__ == "__main__": + unittest.main()