Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion agents/figure_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,14 @@ def render_metric_figure_artifacts(
)
used_fallback = False
try:
subprocess.run([RUNTIME_PYTHON, str(script)], check=True, cwd=str(out_svg.parent), timeout=120)
subprocess.run(
[RUNTIME_PYTHON, str(script)],
check=True,
cwd=str(out_svg.parent),
timeout=120,
capture_output=True,
text=True,
)
except (subprocess.CalledProcessError, FileNotFoundError, subprocess.TimeoutExpired):
used_fallback = True
out_svg.write_text(
Expand Down
116 changes: 99 additions & 17 deletions agents/validation_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

from agents import codex_executor
from agents import experiment_supervisor
from agents import visualization_agent
from agents.workspace_layout import ensure_run_workspace, plan_file_path, promote_canonical_run, write_latest_status
from contracts import DeepInsightSpec, ExperimentIterationPacket, ExperimentSpec
from config import (
Expand Down Expand Up @@ -511,6 +512,77 @@ def _record_artifact(
)


def _generate_validation_figures(
run_id: int,
workdir: Path,
*,
insight: dict,
metric_name: str,
baseline_metric_value: float | None,
best_metric_value: float | None = None,
verdict: str | None = None,
summary_path: Path | None = None,
) -> list[dict]:
"""Generate validation-loop figure artifacts for a completed run."""
try:
bundle = visualization_agent.generate_visualization_bundle(
run_id=run_id,
workdir=workdir,
insight=insight,
metric_name=metric_name,
baseline_metric_value=baseline_metric_value,
best_metric_value=best_metric_value,
verdict=verdict,
summary_path=summary_path,
)
except Exception as exc:
print(f"[LOOP] Figure generation skipped for run {run_id}: {exc}", flush=True)
return []

assets = [dict(asset) for asset in bundle.get("assets") or [] if isinstance(asset, dict)]
for asset in assets:
path = Path(str(asset.get("path") or ""))
if not path.exists():
continue
asset_kind = str(asset.get("asset_kind") or "")
artifact_type = "plot" if asset_kind in {"svg", "pdf", "png", "jpg", "jpeg"} else "source_data"
try:
_record_artifact(
run_id,
artifact_type,
path,
metric_key=asset.get("metric_name") or metric_name,
metadata={
"figure_id": asset.get("figure_id"),
"figure_kind": asset.get("figure_kind"),
"asset_kind": asset_kind,
"caption": asset.get("caption"),
"source": asset.get("source"),
**(asset.get("metadata") if isinstance(asset.get("metadata"), dict) else {}),
},
)
except Exception as exc:
print(f"[LOOP] Figure artifact registration skipped for {path}: {exc}", flush=True)

for key, contract_type in (
("manifest_path", "ValidationFigureManifest"),
("references_path", "ValidationFigureReferences"),
):
raw_path = str(bundle.get(key) or "").strip()
if raw_path and Path(raw_path).exists():
try:
_record_artifact(
run_id,
"source_data",
Path(raw_path),
metric_key=metric_name,
metadata={"contract_type": contract_type},
)
except Exception as exc:
print(f"[LOOP] Figure manifest registration skipped for {raw_path}: {exc}", flush=True)
return assets


def _read_experiment_spec(
run: dict,
insight: dict,
Expand Down Expand Up @@ -1234,24 +1306,32 @@ def run_validation_loop(run_id: int, execution_context: dict | None = None) -> d
promote_canonical_run(insight_id, run_id, insight=insight)

summary_path = workdir / "results" / "validation_summary.json"
summary_path.write_text(
json.dumps(
{
"run_id": run_id,
"verdict": verdict,
"baseline": baseline,
"best_value": best_value,
"effect_size": effect,
"effect_pct": effect_pct,
"iterations_total": iter_num,
"iterations_kept": total_kept,
"environment_report": environment_report,
"stop_reason": stop_reason,
},
indent=2,
),
encoding="utf-8",
summary_payload = {
"run_id": run_id,
"verdict": verdict,
"baseline": baseline,
"best_value": best_value,
"effect_size": effect,
"effect_pct": effect_pct,
"iterations_total": iter_num,
"iterations_kept": total_kept,
"environment_report": environment_report,
"stop_reason": stop_reason,
}
summary_path.write_text(json.dumps(summary_payload, indent=2), encoding="utf-8")
figure_assets = _generate_validation_figures(
run_id,
workdir,
insight=insight,
metric_name=metric_name,
baseline_metric_value=baseline,
best_metric_value=best_value,
verdict=verdict,
summary_path=summary_path,
)
if figure_assets:
summary_payload["figure_artifacts"] = figure_assets
summary_path.write_text(json.dumps(summary_payload, indent=2), encoding="utf-8")
_record_artifact(
run_id,
"source_data",
Expand All @@ -1274,6 +1354,7 @@ def run_validation_loop(run_id: int, execution_context: dict | None = None) -> d
"iterations_total": iter_num,
"iterations_kept": total_kept,
"summary_path": str(summary_path),
"figure_artifacts": figure_assets,
},
run_id=run_id,
insight=insight,
Expand All @@ -1294,4 +1375,5 @@ def run_validation_loop(run_id: int, execution_context: dict | None = None) -> d
"total_seconds": total_time,
"environment_report": environment_report,
"stop_reason": stop_reason,
"figure_artifacts": figure_assets,
}
Loading
Loading