Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ The SciForge discovery pipeline has additional tuning knobs via `DISCOVERY_BULK_

When `paper_idea_agent` decides an insight is worth a paper, the manuscript pipeline routes it through a per-venue adapter chain rather than a single hard-coded ICLR template. The routing stack:

1. `agents/venue_router.py` reads `agents/venues.yaml` (6 venues: `iclr2026`, `neurips2025`, `acl_arr`, `emnlp2025`, `cvpr2024`, `iccv2025`) and picks a primary + secondary based on subject area, deadline window, and submission_mode.
1. `agents/venue_router.py` reads `manuscript_venues/venues_v1.yaml` (6 venues: `iclr2026`, `neurips2024`, `icml2024`, `acl_arr`, `cvpr2024`, `arxiv_plain`) and picks a primary + secondary based on subject area, deadline window, and submission_mode.
2. `agents/manuscript_templates/` resolves the choice into a `TemplateAdapter` (column layout, bibstyle, page budget, required packages) via `get_adapter(template_id)`.
3. `agents/format_linter.py` runs 12 checks against the rendered LaTeX — 7 structural plus the 5 mandated by issue #14 (`font_size_consistency`, `section_spacing`, `float_density`, `citation_density`, `bib_style_match`); a failure blocks the submission gate.
4. `agents/paper_orchestra_pipeline.py` calls `require_submission_ready()` so synthetic data never reaches a manuscript bundle.
Expand Down
7 changes: 3 additions & 4 deletions artifacts/agenda_loop_acceptance.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
"issue": "billion-token-one-task/Deepgraph#9",
"pr": "billion-token-one-task/Deepgraph#10",
"base_ref": "origin/main",
"head_ref": "feat/issue-9-agenda-driven-research-loop",
"commit": "ef157979e987f079c744f4c63961fa56c19131dd",
"head_ref": "fix/issue-9-docs-repro-fixup",
"commit": "44ae888a0c1c42a5b283ee68fa0d6e83b1df6658",
"generated_by": "scripts/build_agenda_loop_acceptance.py",
"clean_checkout_repro": {
"install_command": "python -m venv .venv && . .venv/bin/activate && pip install -e .",
Expand Down Expand Up @@ -40,10 +40,9 @@
"run_id": 2,
"status": "completed",
"result_packet_path": "/tmp/dg_agenda_real_exp/selection_1/experiment_result_packet.json",
"result_packet_sha256": "0cedbf90ee379a5919d5655eabf2120d7a5fd69efe01114a733c8467c60bdac2",
"result_packet_structural_sha256": "087c1d13348aefbad55a921f3380984de3719cfc581ff300fbe6adf6ab6f67e8",
"real_data_or_benchmark_source": "agents/benchmarks/qkv_fixture_512_64.npz (committed deterministic Q/K/V fixture, seq_len=512, head_dim=64); kernels: softmax_attention vs linear_attention_elu_plus_1 on CPU",
"delta": {
"latency_speedup_x": 4.950367233682263,
"approximation_l2_distance": 10.13387978273153,
"relative_error": 0.7670742721257483
}
Expand Down
4 changes: 2 additions & 2 deletions artifacts/d1_template_router_acceptance.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
"issue": "billion-token-one-task/Deepgraph#12",
"epic": "billion-token-one-task/Deepgraph#11",
"base_ref": "origin/main",
"head_ref": "feat/issue-9-agenda-driven-research-loop",
"commit": "ef157979e987f079c744f4c63961fa56c19131dd",
"head_ref": "fix/issue-9-docs-repro-fixup",
"commit": "44ae888a0c1c42a5b283ee68fa0d6e83b1df6658",
"depends_on": [
"#9 / PR #10 (agenda loop scaffolding)"
],
Expand Down
4 changes: 2 additions & 2 deletions artifacts/d2_top_venue_adapters_acceptance.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
"issue": "billion-token-one-task/Deepgraph#13",
"epic": "billion-token-one-task/Deepgraph#11",
"base_ref": "origin/main",
"head_ref": "feat/issue-9-agenda-driven-research-loop",
"commit": "7403700cd2162ae8d6065b9abf0f4f9e8818621b",
"head_ref": "fix/issue-9-docs-repro-fixup",
"commit": "44ae888a0c1c42a5b283ee68fa0d6e83b1df6658",
"depends_on": [
"#11/#12 (D1 Foundation)"
],
Expand Down
4 changes: 2 additions & 2 deletions artifacts/d3_format_linter_acceptance.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
"issue": "billion-token-one-task/Deepgraph#14",
"epic": "billion-token-one-task/Deepgraph#11",
"base_ref": "origin/main",
"head_ref": "feat/issue-9-agenda-driven-research-loop",
"commit": "ef157979e987f079c744f4c63961fa56c19131dd",
"head_ref": "fix/issue-9-docs-repro-fixup",
"commit": "44ae888a0c1c42a5b283ee68fa0d6e83b1df6658",
"depends_on": [
"#13 (D2 top venues)"
],
Expand Down
4 changes: 2 additions & 2 deletions artifacts/d4_manuscript_routing_api_acceptance.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
"issue": "billion-token-one-task/Deepgraph#15",
"epic": "billion-token-one-task/Deepgraph#11",
"base_ref": "origin/main",
"head_ref": "feat/issue-9-agenda-driven-research-loop",
"commit": "ef157979e987f079c744f4c63961fa56c19131dd",
"head_ref": "fix/issue-9-docs-repro-fixup",
"commit": "44ae888a0c1c42a5b283ee68fa0d6e83b1df6658",
"depends_on": [
"#13 (D2 top venues)",
"#14 (D3 lint + tiebreak)"
Expand Down
73 changes: 34 additions & 39 deletions artifacts/manuscript_venue_routing_acceptance.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
"issue": "#11",
"epic": "Manuscript Venue Routing + Multi-Template Pipeline",
"base_ref": "origin/main",
"head_ref": "feat/issue-9-agenda-driven-research-loop",
"commit": "ef157979e987f079c744f4c63961fa56c19131dd",
"head_ref": "fix/issue-9-docs-repro-fixup",
"commit": "44ae888a0c1c42a5b283ee68fa0d6e83b1df6658",
"depends_on": [
"#9 (PR #10 same branch)"
"#9 (PR #10 merged)"
],
"generated_by": "scripts/demo_full_paper_compile.py + artifacts/d1-d4 cross-reference",
"generated_by": "scripts/build_manuscript_venue_routing_umbrella.py (aggregates artifacts/d{1,2,3,4}_*_acceptance.json + git HEAD)",
"venues": [
"iclr2026",
"neurips2024",
Expand All @@ -21,63 +21,58 @@
{
"venue": "iclr2026",
"bundle_path": "/tmp/full_paper_demo/iclr2026",
"main_tex_sha256": "33ea362c680336e3d0989cb18c55be3934e5ee7ab88f308bc943cbbcd1012ca6",
"pdf_path": "/tmp/full_paper_demo/iclr2026/paper.pdf",
"pdf_bytes": 112048,
"compile_status": "pass"
},
{
"venue": "neurips2024",
"bundle_path": "/tmp/full_paper_demo/neurips2024",
"main_tex_sha256": "c66eb10926c45253455048fe9633fe2acfb85cb5eb7cf9cc5220ef56f242c723",
"pdf_path": "/tmp/full_paper_demo/neurips2024/paper.pdf",
"pdf_bytes": 119065,
"compile_status": "pass"
},
{
"venue": "icml2024",
"bundle_path": "/tmp/full_paper_demo/icml2024",
"main_tex_sha256": "bf365c425fcf1de93986d2efe66824e77297c455006a8cd3b461541e298dd793",
"pdf_path": "/tmp/full_paper_demo/icml2024/paper.pdf",
"pdf_bytes": 122331,
"compile_status": "pass"
},
{
"venue": "cvpr2024",
"bundle_path": "/tmp/full_paper_demo/cvpr2024",
"main_tex_sha256": "2d5e4c3d37489b4329ad87a335f78f6d7dc92302ad862f255172b59422b88401",
"pdf_path": "/tmp/full_paper_demo/cvpr2024/paper.pdf",
"pdf_bytes": 127472,
"compile_status": "pass"
},
{
"venue": "acl_arr",
"bundle_path": "/tmp/full_paper_demo/acl_arr",
"main_tex_sha256": "ca46b353960c5e493659c76164a96a458c4e9e025faf7d242035e29e10b97e7b",
"pdf_path": "/tmp/full_paper_demo/acl_arr/paper.pdf",
"pdf_bytes": 123864,
"compile_status": "pass"
},
{
"venue": "cvpr2024",
"bundle_path": "/tmp/full_paper_demo/cvpr2024",
"pdf_path": "/tmp/full_paper_demo/cvpr2024/paper.pdf",
"pdf_bytes": 127472,
"compile_status": "pass"
},
{
"venue": "arxiv_plain",
"bundle_path": "/tmp/full_paper_demo/arxiv_plain",
"main_tex_sha256": "7fe5795cc07f6af22284c73968b618dcde2b3b59acc5edbb1e6c0911b2c81d7f",
"pdf_path": "/tmp/full_paper_demo/arxiv_plain/paper.pdf",
"pdf_bytes": 129162,
"pdf_bytes": 129161,
"compile_status": "pass"
}
],
"format_lint": {
"clean_fixture_status": "pass",
"dirty_fixture_status": "block",
"checks_triggered": [
"documentclass_present",
"bib_style_match",
"bibstyle_matches_venue",
"required_packages_present",
"page_count_within_budget",
"figure_placement_specifiers",
"column_layout_consistency",
"figure_grid_density"
"documentclass_present",
"figure_grid_density",
"figure_placement_specifiers",
"page_count_within_budget",
"required_packages_present"
],
"all_venues_pass_happy_path": true,
"all_venues_fail_violating_source": true,
Expand All @@ -103,36 +98,36 @@
"d3_format_linter": "artifacts/d3_format_linter_acceptance.json",
"d4_manuscript_routing_api": "artifacts/d4_manuscript_routing_api_acceptance.json"
},
"test_command": "pytest tests/test_top_venue_adapters.py tests/test_venue_router.py tests/test_format_linter.py tests/test_manuscript_routing_routes.py tests/test_template_adapter.py tests/test_venue_router_tiebreaker.py",
"test_summary": "92 passed (D1+D2+D3+D4 scope)",
"test_command": "pytest tests/test_top_venue_adapters.py tests/test_venue_router.py tests/test_format_linter.py tests/test_manuscript_routes.py tests/test_template_adapter.py tests/test_venue_router_tiebreak.py",
"test_summary": "see sub-bundle test_summary fields",
"demo_command": "python scripts/demo_full_paper_compile.py",
"demo_summary": "6/6 venues compiled to PDF via tectonic; 10 builds total counting camera-ready toggles",
"demo_summary": "6 venues; PDFs in /tmp/full_paper_demo/<venue>/paper.pdf when tectonic is available, otherwise 'deferred' per bundle.",
"router_fixture_results": [
{
"fixture": "CV (Diffusion-based image detection)",
"fixture": "cv_state",
"chosen": "cvpr2024",
"score": 1.05
},
{
"fixture": "NLP (Cross-lingual transfer for NER)",
"chosen": "iclr2026",
"score": 0.7
"fixture": "nlp_state",
"chosen": "acl_arr",
"score": 1.05
},
{
"fixture": "ML (Long-context linear attention)",
"chosen": "iclr2026",
"score": 0.85
"fixture": "ml_state",
"chosen": "neurips2024",
"score": 1.05
},
{
"fixture": "Theory (Proof of convergence)",
"fixture": "theory_state",
"chosen": "arxiv_plain",
"score": 0.65
"score": 0.55
}
],
"non_hardcoded_evidence": [
"venues_v1.yaml: 6 venue rules drive router; adding test venue requires no Python edit",
"router_fixture_results: 4 distinct fixtures, 3 distinct chosen venues, all rule-based",
"bundles: 6 distinct main_tex_sha256 (each adapter normalizes preamble + bibstyle per venue)",
"manuscript_venues/venues_v1.yaml: 6 venue rules drive router; adding a venue requires no Python edit",
"router_fixture_results aggregated from d2 sub-bundle",
"bundles: distinct main_tex_sha256 per venue when demo_full_paper_compile.py is run",
"submission_mode toggle: ICLR + NeurIPS + ACL + CVPR each emit distinct review vs camera-ready PDFs"
]
}
}
4 changes: 2 additions & 2 deletions artifacts/review_1.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
"recommendation": "minor_revision",
"confidence": 0.7,
"strengths": [
"Linear attention reduces prefill latency by 79.8% at seq_len=512, head_dim=64.",
"Linear attention reduces prefill latency by 82.2% at seq_len=512, head_dim=64.",
"Linear attention reduces peak memory from 6.32MB to 0.68MB.",
"Hypothesis confirmed with positive effect_size=1.8340 (79.80%)"
"Hypothesis confirmed with positive effect_size=1.9606 (82.20%)"
],
"weaknesses": [
"inconclusive: Linear attention approximation error is large: relative L2 error = 0.767 (>= 0.5); may not be a drop-in replacement.",
Expand Down
4 changes: 3 additions & 1 deletion docs/agenda_loop_clean_checkout_repro.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,9 @@ active_agenda — id + name + version
selection.selection_id — primary key in agenda_selections
selection.rejected_count — non-zero; selector recorded reasons
experiment.run_id — primary key in experiment_runs
experiment.result_packet_sha256 — sha256 of experiment_result_packet.json
experiment.result_packet_structural_sha256 — sha256 of experiment_result_packet.json
with wallclock + DB ids + abs paths stripped
(reproducible across runs on the same machine)
evidence_gate.status — pass | block
manuscript.bundle_id — only present when gate=pass
review.review_id — primary key in agenda_reviews
Expand Down
20 changes: 10 additions & 10 deletions docs/top_venue_manuscript_chain.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,13 @@ bundle reaches the gate.
paper_idea_agent
venue_router.py ──reads── agents/venues.yaml (6 venues registered)
venue_router.py ──reads── manuscript_venues/venues_v1.yaml (6 venues registered)
│ ├─ iclr2026 (single_column, iclr2026_conference)
│ ├─ neurips2025 (single_column, neurips_2025)
│ ├─ neurips2024 (single_column, unsrtnat)
│ ├─ icml2024 (two_column, icml2024)
│ ├─ acl_arr (two_column, acl_natbib)
│ ├─ emnlp2025 (two_column, acl_natbib)
│ ├─ cvpr2024 (two_column, ieee_fullname)
│ └─ iccv2025 (two_column, ieee_fullname)
│ ├─ cvpr2024 (two_column, ieeenat_fullname)
│ └─ arxiv_plain (single_column, plain)
▼ (primary, secondary, reasons)
manuscript_templates.get_adapter(template_id) → TemplateAdapter
Expand Down Expand Up @@ -74,9 +74,9 @@ web/manuscript_routes.py
```

The router is a pure function of `(insight_metadata, now, available_venues)`;
its scoring weights live in `venues.yaml` so adding a venue is config-only —
drop a new entry plus the matching `manuscript_templates/<id>.py` adapter and
the rest of the chain picks it up.
its scoring weights live in `manuscript_venues/venues_v1.yaml` so adding a
venue is config-only — drop a new entry plus the matching
`manuscript_templates/<id>.py` adapter and the rest of the chain picks it up.

## Enforced Standard

Expand Down Expand Up @@ -107,7 +107,7 @@ the rest of the chain picks it up.

- `paper_idea_agent.py`: produces problem-aware paper ideas.
- `agents/venue_router.py` (#12): selects primary/secondary venue from
`agents/venues.yaml`.
`manuscript_venues/venues_v1.yaml`.
- `agents/manuscript_templates/` (#13): per-venue `TemplateAdapter`
implementations (column layout, bibstyle, page budget, packages,
`normalize_source`, `submission_mode` toggle).
Expand Down Expand Up @@ -138,7 +138,7 @@ the rest of the chain picks it up.
| Issue | Evidence JSON | Notes |
|-------|---------------|-------|
| #11 (epic) | `artifacts/manuscript_venue_routing_acceptance.json` | umbrella, references d1-d4 |
| #12 (D1) | `artifacts/d1_template_router_acceptance.json` | router + adapter base + venues.yaml |
| #12 (D1) | `artifacts/d1_template_router_acceptance.json` | router + adapter base + `manuscript_venues/venues_v1.yaml` |
| #13 (D2) | `artifacts/d2_top_venue_adapters_acceptance.json` | 6 venues × 3 fixtures, sha256 distinct |
| #14 (D3) | `artifacts/d3_format_linter_acceptance.json` | 12 checks, db roundtrip True |
| #15 (D4) | `artifacts/d4_manuscript_routing_api_acceptance.json` | API contract + dashboard hooks |
Expand Down
61 changes: 58 additions & 3 deletions scripts/build_agenda_loop_acceptance.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,49 @@ def _sha256(path: Path) -> str:
return h.hexdigest()


# Fields stripped from the experiment_result_packet before structural hashing.
# These cover:
# * wallclock-derived metrics (latency_ms_*, peak_memory_mb, latency_speedup_x)
# * DB auto-increment ids (experiment_run_id, selection_id, deep_insight_id)
# * absolute filesystem paths (packet_path, workdir, etc.)
# Everything else — config, seeds, kernel names, deterministic outputs
# (output_shape, output_l2_norm, approximation_l2_distance, relative_error) —
# is byte-stable across runs with a fixed seed.
_PACKET_NON_DETERMINISTIC_FIELDS = frozenset({
"latency_ms_median", "latency_ms_min", "latency_ms_max",
"peak_memory_mb", "latency_speedup_x",
"experiment_run_id", "selection_id", "deep_insight_id",
"packet_path", "workdir",
})


def _strip_non_deterministic(obj):
"""Recursively drop wallclock / id / path fields so the digest is stable."""
if isinstance(obj, dict):
return {
k: _strip_non_deterministic(v)
for k, v in obj.items()
if k not in _PACKET_NON_DETERMINISTIC_FIELDS
}
if isinstance(obj, list):
return [_strip_non_deterministic(v) for v in obj]
return obj


def _structural_sha256(path: Path) -> str:
"""SHA-256 of the packet with non-deterministic fields removed.

Used so the acceptance bundle can promise byte-stable verification
against the seeded benchmark (config + Q/K/V fixture + kernel impls +
deterministic outputs) without falsely claiming wallclock latency
digits are reproducible. Reviewer feedback on PR #10 (item #3).
"""
raw = json.loads(path.read_text(encoding="utf-8"))
stripped = _strip_non_deterministic(raw)
canonical = json.dumps(stripped, sort_keys=True, separators=(",", ":"))
return hashlib.sha256(canonical.encode("utf-8")).hexdigest()


def _git(*args: str) -> str:
out = subprocess.check_output(["git", *args], cwd=REPO_ROOT, text=True)
return out.strip()
Expand Down Expand Up @@ -126,7 +169,10 @@ def main() -> int:
manuscript = pipeline.get("manuscript")

packet_path = Path(pipeline["experiment_result"]["packet_path"])
packet_sha = _sha256(packet_path) if packet_path.exists() else ""
# Structural digest — strips wallclock latency, DB auto-increment ids,
# and absolute paths so the value is byte-stable across re-runs of the
# same seeded benchmark. See ``_structural_sha256`` for the field list.
packet_sha = _structural_sha256(packet_path) if packet_path.exists() else ""

bundle_path = None
bundle_id = None
Expand Down Expand Up @@ -283,13 +329,22 @@ def main() -> int:
) or {}
).get("status", "unknown"),
"result_packet_path": str(packet_path),
"result_packet_sha256": packet_sha,
# Renamed from ``result_packet_sha256`` (PR #10 review item #3):
# the value is now computed over the packet with non-deterministic
# fields stripped, so the digest is honestly reproducible.
"result_packet_structural_sha256": packet_sha,
"real_data_or_benchmark_source": (
"agents/benchmarks/qkv_fixture_512_64.npz "
"(committed deterministic Q/K/V fixture, seq_len=512, head_dim=64); "
"kernels: softmax_attention vs linear_attention_elu_plus_1 on CPU"
),
"delta": pipeline["experiment_result"].get("metrics", {}).get("delta", {}),
# Strip wallclock-derived fields (latency_speedup_x etc.) so the
# serialized ``delta`` block matches the byte-reproducibility
# claim around ``result_packet_structural_sha256`` — review
# item #3 was about the bundle JSON itself, not only the hash.
"delta": _strip_non_deterministic(
pipeline["experiment_result"].get("metrics", {}).get("delta", {})
),
},
"evidence_gate": {
"status": gate["status"],
Expand Down
Loading
Loading