From a3f7fb6224f441db0a14c9741fb11a55ad28ded3 Mon Sep 17 00:00:00 2001
From: chris-colinsky <chris@lunarcommand.xyz>
Date: Mon, 18 May 2026 10:54:08 -0700
Subject: [PATCH 1/6] feat(examples): add 05-fan-out-with-retry
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Realistic fan-out shape: summarize and classify a batch of news
headlines in parallel.

- ``GraphBuilder.add_fan_out_node`` with ``items_field`` mode and
  ``extra_outputs`` collecting two parallel per-instance lists
  (summary + topic).
- ``instance_middleware=(RetryMiddleware(3 attempts, deterministic
  backoff), TimingMiddleware(on_complete=...))`` wraps each instance's
  whole subgraph invocation — retries are per-instance, timings are
  captured per-instance.
- ``concurrency=3`` caps how many instances run in flight at once.
- Final printout shows per-instance durations in completion order
  alongside a wall-clock total vs sum-of-durations comparison, so
  the speedup from concurrency is visible.

Per-instance subgraph is ``summarize → classify``; both nodes hit the
LLM via the shared ``OpenAIProvider`` pattern the rest of the demos
use. Smoke test list grows to six demos.
---
 examples/05-fan-out-with-retry/main.py | 297 +++++++++++++++++++++++++
 examples/README.md                     |  10 +
 tests/test_examples_smoke.py           |   1 +
 3 files changed, 308 insertions(+)
 create mode 100644 examples/05-fan-out-with-retry/main.py

diff --git a/examples/05-fan-out-with-retry/main.py b/examples/05-fan-out-with-retry/main.py
new file mode 100644
index 0000000..bbf0f94
--- /dev/null
+++ b/examples/05-fan-out-with-retry/main.py
@@ -0,0 +1,297 @@
+"""openarmature demo: summarize a batch of news headlines in parallel, with
+per-headline retries and timing.
+
+**Use case:** Given a list of news headlines, produce a one-sentence
+summary and a topic tag for each one. The headlines are independent, so
+fan them out and let them run concurrently. Each per-headline run hits
+the LLM, which can transiently fail (rate-limit, timeout, transient 5xx);
+wrap each instance in retry middleware so a flaky call doesn't tank the
+whole batch. A timing middleware records how long each instance took.
+
+This is the canonical fan-out shape: N similar tasks, N runtime-determined
+from state, the work independent enough to run concurrently. The
+per-instance subgraph (summarize → classify) is a complete pipeline in
+its own right — it would also work standalone against a single headline.
+
+**What's interesting in the implementation:**
+
+- ``GraphBuilder.add_fan_out_node`` with ``items_field`` mode: one
+  instance per element of ``state.headlines``, ``item_field`` carries the
+  per-instance input into the subgraph.
+- ``extra_outputs`` collects a second per-instance field (``topic``) in
+  parallel with the primary ``collect_field`` (``summary``). The two
+  parent lists are index-aligned.
+- ``instance_middleware=(RetryMiddleware(...), TimingMiddleware(...))``
+  wraps EACH instance's whole subgraph invocation. Retries are
+  per-instance: a failure on headline 3 doesn't restart headlines 0-2.
+- ``concurrency=3`` caps how many instances run in flight at once. Use
+  this to be polite to the upstream API.
+- A ``TimingRecord`` is captured per instance via an ``on_complete``
+  callback. ``TimingRecord`` carries the per-call duration but not the
+  ``fan_out_index`` — that index lives on observer NodeEvents instead.
+  The demo prints captured durations in completion order plus a
+  wall-clock vs sum-of-durations comparison that shows concurrency
+  actually parallelized the work.
+
+**Configuration** (env vars; OpenAI defaults shown):
+
+- ``LLM_BASE_URL`` defaults to ``https://api.openai.com``. **Host root only.**
+- ``LLM_MODEL`` defaults to ``gpt-4o-mini``.
+- ``LLM_API_KEY`` required (empty for local servers that don't authenticate).
+
+Run with:
+
+    uv sync --group examples
+    cd examples/05-fan-out-with-retry
+    LLM_API_KEY=sk-... uv run python main.py
+"""
+
+from __future__ import annotations
+
+import asyncio
+import os
+import time
+from collections.abc import Mapping
+from typing import Annotated, Any
+
+from pydantic import Field
+
+from openarmature.graph import (
+    END,
+    CompiledGraph,
+    GraphBuilder,
+    State,
+    append,
+)
+from openarmature.graph.middleware import (
+    RetryMiddleware,
+    TimingMiddleware,
+    TimingRecord,
+    deterministic_backoff,
+)
+from openarmature.llm import OpenAIProvider, SystemMessage, UserMessage
+
+_provider_instance: OpenAIProvider | None = None
+
+
+def _get_provider() -> OpenAIProvider:
+    global _provider_instance
+    if _provider_instance is None:
+        _provider_instance = OpenAIProvider(
+            base_url=os.environ.get("LLM_BASE_URL", "https://api.openai.com"),
+            model=os.environ.get("LLM_MODEL", "gpt-4o-mini"),
+            api_key=os.environ.get("LLM_API_KEY") or None,
+        )
+    return _provider_instance
+
+
+async def _chat(system: str, user: str) -> str:
+    response = await _get_provider().complete(
+        [SystemMessage(content=system), UserMessage(content=user)],
+    )
+    return (response.message.content or "").strip()
+
+
+# ---------------------------------------------------------------------------
+# A small batch of headlines. In a real app this would come from an RSS
+# feed, a database query, or wherever your batch lives.
+# ---------------------------------------------------------------------------
+
+HEADLINES: list[str] = [
+    "City council approves new bike-lane network spanning downtown",
+    "Researchers report unexpected results from fusion-reactor test run",
+    "Local bakery wins national award for sourdough loaf",
+    "Stock market dips after central bank signals slower rate cuts",
+    "Marathon runner sets new course record under heavy rainfall",
+]
+
+
+# ---------------------------------------------------------------------------
+# State schemas
+# ---------------------------------------------------------------------------
+
+
+class BatchState(State):
+    """Outer graph: list of headlines goes in, parallel lists of summaries
+    and topic tags come out."""
+
+    headlines: list[str] = Field(default_factory=list)
+    summaries: Annotated[list[str], append] = Field(default_factory=list)
+    topics: Annotated[list[str], append] = Field(default_factory=list)
+    trace: Annotated[list[str], append] = Field(default_factory=list)
+
+
+class HeadlineState(State):
+    """Per-instance subgraph state — one headline, its summary, its topic."""
+
+    headline: str = ""
+    summary: str = ""
+    topic: str = ""
+    trace: Annotated[list[str], append] = Field(default_factory=list)
+
+
+# ---------------------------------------------------------------------------
+# Per-instance subgraph: summarize → classify
+# ---------------------------------------------------------------------------
+
+
+async def summarize(s: HeadlineState) -> Mapping[str, Any]:
+    content = await _chat(
+        system=(
+            "Rewrite the headline as one short sentence (~15 words) that would work as a lead. No preamble."
+        ),
+        user=s.headline,
+    )
+    return {"summary": content, "trace": ["summarize"]}
+
+
+async def classify(s: HeadlineState) -> Mapping[str, Any]:
+    content = await _chat(
+        system=(
+            "Tag the topic of the headline below with ONE word from this set: "
+            "politics, science, business, sports, food, technology, other. "
+            "Reply with just the word, lowercase, no punctuation."
+        ),
+        user=s.headline,
+    )
+    tag = content.strip().lower().strip(".")
+    return {"topic": tag, "trace": ["classify"]}
+
+
+def build_headline_subgraph() -> CompiledGraph[HeadlineState]:
+    return (
+        GraphBuilder(HeadlineState)
+        .add_node("summarize", summarize)
+        .add_node("classify", classify)
+        .add_edge("summarize", "classify")
+        .add_edge("classify", END)
+        .set_entry("summarize")
+        .compile()
+    )
+
+
+# ---------------------------------------------------------------------------
+# Instance middleware: retry + timing
+# ---------------------------------------------------------------------------
+# Both middlewares wrap each instance's whole subgraph invocation. Retry's
+# loop is per-instance: if headline 3's first attempt raises a transient
+# error, the retry middleware re-invokes the subgraph for headline 3 only.
+# Headlines 0-2 (already complete) and 4 (still running) are unaffected.
+#
+# Timing's on_complete callback fires once per successful (or final-failure)
+# instance. ``TimingRecord`` carries duration + outcome but not
+# ``fan_out_index`` — the index lives on observer NodeEvents, not in the
+# middleware's record. The demo prints the captured timings in completion
+# order to show "this is what middleware-level timing gives you out of the
+# box." For per-instance correlation against the input list, use an
+# observer instead (see example 03).
+
+
+# Captured timings, populated by the on_complete callback below.
+_timings: list[TimingRecord] = []
+
+
+async def _record_timing(record: TimingRecord) -> None:
+    _timings.append(record)
+
+
+# ---------------------------------------------------------------------------
+# Outer graph
+# ---------------------------------------------------------------------------
+
+
+async def announce(s: BatchState) -> Mapping[str, Any]:
+    del s
+    return {"trace": ["announce"]}
+
+
+async def present(s: BatchState) -> Mapping[str, Any]:
+    """Marker node so the trace shows the outer presented results.
+
+    The summaries and topics are already on parent state from the fan-out's
+    projection; this node just appends to the trace.
+    """
+    del s
+    return {"trace": ["present"]}
+
+
+def build_graph() -> CompiledGraph[BatchState]:
+    headline_subgraph = build_headline_subgraph()
+
+    retry = RetryMiddleware(
+        max_attempts=3,
+        # Short fixed delay so the demo isn't slow. A production app would
+        # use exponential_jitter_backoff (the default).
+        backoff=deterministic_backoff(0.2),
+    )
+    timing = TimingMiddleware(
+        node_name="headline_run",
+        on_complete=_record_timing,
+        clock=time.monotonic,
+    )
+
+    return (
+        GraphBuilder(BatchState)
+        .add_node("announce", announce)
+        .add_fan_out_node(
+            "headline_runs",
+            subgraph=headline_subgraph,
+            items_field="headlines",
+            item_field="headline",
+            collect_field="summary",
+            target_field="summaries",
+            extra_outputs={"topics": "topic"},
+            concurrency=3,
+            instance_middleware=(retry, timing),
+        )
+        .add_node("present", present)
+        .add_edge("announce", "headline_runs")
+        .add_edge("headline_runs", "present")
+        .add_edge("present", END)
+        .set_entry("announce")
+        .compile()
+    )
+
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+
+
+async def main() -> None:
+    graph = build_graph()
+
+    initial = BatchState(headlines=HEADLINES)
+
+    print("=" * 72)
+    print(f"Summarizing {len(HEADLINES)} headlines in parallel (concurrency=3)")
+    print("=" * 72)
+    print()
+
+    wall_start = time.monotonic()
+    try:
+        final = await graph.invoke(initial)
+        wall_ms = (time.monotonic() - wall_start) * 1000.0
+        print("Results (in input order):")
+        print()
+        for i, (h, s, t) in enumerate(zip(final.headlines, final.summaries, final.topics, strict=True)):
+            print(f"  [{i}] {h}")
+            print(f"       summary: {s}")
+            print(f"       topic:   {t}")
+            print()
+        print("Per-instance timings (in completion order):")
+        for nth, record in enumerate(_timings):
+            print(f"  #{nth}  {record.duration_ms:7.1f} ms  outcome={record.outcome}")
+        sum_ms = sum(record.duration_ms for record in _timings)
+        print()
+        print(f"  wall-clock total:        {wall_ms:7.1f} ms")
+        print(f"  sum of per-instance:     {sum_ms:7.1f} ms")
+        print(f"  → concurrency speedup:   {sum_ms / wall_ms:5.2f}x")
+    finally:
+        await graph.drain()
+        if _provider_instance is not None:
+            await _provider_instance.aclose()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/README.md b/examples/README.md
index e2d0db4..8da6474 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -42,6 +42,16 @@ levels of subgraph nesting: outer coordinator → doc-QA subgraph →
 section-extract subgraph. A depth-aware observer prints the descent
 and return.
 
+### [`05-fan-out-with-retry/`](./05-fan-out-with-retry/main.py)
+
+Summarize a batch of news headlines in parallel. Each per-headline
+run goes through a `summarize → classify` subgraph wrapped in retry
+middleware (transient failures don't tank the batch) and timing
+middleware (per-instance duration captured alongside the fan-out
+index). Demonstrates: `add_fan_out_node` with `items_field` mode,
+`extra_outputs` collecting a parallel list, `instance_middleware`,
+concurrency cap.
+
 ## Configuration
 
 All demos configure their LLM client via env vars; OpenAI public-API
diff --git a/tests/test_examples_smoke.py b/tests/test_examples_smoke.py
index fc95d5d..e4bf4d2 100644
--- a/tests/test_examples_smoke.py
+++ b/tests/test_examples_smoke.py
@@ -35,6 +35,7 @@
     "02-explicit-subgraph-mapping",
     "03-observer-hooks",
     "04-nested-subgraphs",
+    "05-fan-out-with-retry",
 ]
 
 

From f4a8af39b7a883447bbfe5d6b0e322d0bf32f6ad Mon Sep 17 00:00:00 2001
From: chris-colinsky <chris@lunarcommand.xyz>
Date: Mon, 18 May 2026 11:18:51 -0700
Subject: [PATCH 2/6] feat(examples): add 06-parallel-branches
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Enrich a lunar-mission news article with three independent analyses
(summary, sentiment, topic tags) running concurrently.

- ``GraphBuilder.add_parallel_branches_node`` registers M
  ``BranchSpec``s under named keys (``summary`` / ``sentiment`` /
  ``topics``). Each spec carries its own compiled subgraph, its own
  input/output projection, and optionally its own middleware.
- The three branches have DIFFERENT state schemas — each is scoped
  to its analysis's inputs and outputs. The projection mapping
  translates the parent's ``article`` field into each branch's input
  field name.
- The sentiment branch wraps its subgraph in ``RetryMiddleware`` to
  show per-branch middleware composition. The other two run bare.
- Wall-clock total prints alongside the results so the parallelism
  benefit is visible.

Sample article is a narrative of Artemis II's splashdown on April 10,
2026 — the first crewed flight beyond low Earth orbit since Apollo
17.

Also tightens the 05 entry in examples/README.md to drop a stale
mention of fan-out-index correlation (the demo doesn't claim that
anymore; the timing record carries no index field). Smoke test list
grows to seven demos.
---
 examples/06-parallel-branches/main.py | 323 ++++++++++++++++++++++++++
 examples/README.md                    |  17 +-
 tests/test_examples_smoke.py          |   1 +
 3 files changed, 337 insertions(+), 4 deletions(-)
 create mode 100644 examples/06-parallel-branches/main.py

diff --git a/examples/06-parallel-branches/main.py b/examples/06-parallel-branches/main.py
new file mode 100644
index 0000000..b53c80b
--- /dev/null
+++ b/examples/06-parallel-branches/main.py
@@ -0,0 +1,323 @@
+"""openarmature demo: enrich a lunar-mission news article with three
+independent analyses running concurrently.
+
+**Use case:** Given a news article about a lunar mission, produce three
+side-by-side outputs: a one-sentence summary, an overall sentiment label,
+and a short list of topic tags. The three analyses don't depend on each
+other, so dispatch them in parallel. Each analysis is its own subgraph
+with its own state schema (the summary subgraph doesn't care about
+sentiment, the topic extractor doesn't care about either) — which is
+exactly the shape parallel-branches is for.
+
+Where fan-out (example 05) runs N copies of ONE subgraph against
+different inputs, parallel-branches runs M heterogeneous subgraphs
+against the same input. Different schemas, different middleware,
+different topologies per branch; one dispatch.
+
+**What's interesting in the implementation:**
+
+- ``GraphBuilder.add_parallel_branches_node`` registers M
+  ``BranchSpec``s under named keys (``summary``, ``sentiment``,
+  ``topics`` here). Each spec carries its own compiled subgraph,
+  its own input/output projection, and optionally its own middleware.
+- The branches have DIFFERENT state schemas. The summary subgraph's
+  state has a ``summary`` field; the sentiment subgraph's has a
+  ``label`` field; the topics subgraph's has a ``tags`` list. Each is
+  scoped to its job. The projection mapping translates the parent's
+  ``article`` into each branch's input field name.
+- The sentiment branch wraps its subgraph in ``RetryMiddleware`` to
+  show per-branch middleware composition. The other two branches run
+  bare. Per-branch middleware is heterogeneous — branch A may have
+  retry + timing, branch B nothing, branch C something custom.
+- Branch insertion order determines fan-in order: when two branches
+  contribute to the same parent field, the parent's reducer applies
+  them in the order the branches were declared in the ``branches``
+  mapping (not in completion order). The three branches here write
+  disjoint parent fields, so the order doesn't affect the result —
+  but the property holds and would matter if they overlapped.
+
+**Configuration** (env vars; OpenAI defaults shown):
+
+- ``LLM_BASE_URL`` defaults to ``https://api.openai.com``. **Host root only.**
+- ``LLM_MODEL`` defaults to ``gpt-4o-mini``.
+- ``LLM_API_KEY`` required (empty for local servers that don't authenticate).
+
+Run with:
+
+    uv sync --group examples
+    cd examples/06-parallel-branches
+    LLM_API_KEY=sk-... uv run python main.py
+"""
+
+from __future__ import annotations
+
+import asyncio
+import os
+import time
+from collections.abc import Mapping
+from typing import Annotated, Any
+
+from pydantic import Field
+
+from openarmature.graph import (
+    END,
+    BranchSpec,
+    CompiledGraph,
+    GraphBuilder,
+    State,
+    append,
+)
+from openarmature.graph.middleware import (
+    RetryMiddleware,
+    deterministic_backoff,
+)
+from openarmature.llm import OpenAIProvider, SystemMessage, UserMessage
+
+_provider_instance: OpenAIProvider | None = None
+
+
+def _get_provider() -> OpenAIProvider:
+    global _provider_instance
+    if _provider_instance is None:
+        _provider_instance = OpenAIProvider(
+            base_url=os.environ.get("LLM_BASE_URL", "https://api.openai.com"),
+            model=os.environ.get("LLM_MODEL", "gpt-4o-mini"),
+            api_key=os.environ.get("LLM_API_KEY") or None,
+        )
+    return _provider_instance
+
+
+async def _chat(system: str, user: str) -> str:
+    response = await _get_provider().complete(
+        [SystemMessage(content=system), UserMessage(content=user)],
+    )
+    return (response.message.content or "").strip()
+
+
+# ---------------------------------------------------------------------------
+# Sample article. A real app would pull this from a feed, a queue, an API.
+# ---------------------------------------------------------------------------
+
+ARTICLE = (
+    "NASA's Artemis II crew capsule Integrity splashed down in the Pacific "
+    "Ocean this evening, ending a ten-day flight that carried four "
+    "astronauts on a free-return trajectory around the Moon and back. The "
+    "flight was the first crewed mission beyond low Earth orbit since "
+    "Apollo 17 in 1972. Agency officials described the result as a "
+    "successful test of the Orion spacecraft's deep-space systems and "
+    "cautioned that the Artemis III surface-landing timeline remains "
+    "dependent on the on-ground refurbishment cadence and lander-system "
+    "milestones. Even so, the splashdown was greeted with relief by "
+    "partner space agencies and renewed calls in policy circles for "
+    "sustained federal funding of the lunar return program."
+)
+
+
+# ---------------------------------------------------------------------------
+# State schemas
+# ---------------------------------------------------------------------------
+
+
+class ArticleState(State):
+    """Outer: an article goes in, three enrichment fields come out."""
+
+    article: str = ""
+    summary: str = ""
+    sentiment: str = ""
+    topics: list[str] = Field(default_factory=list)
+    trace: Annotated[list[str], append] = Field(default_factory=list)
+
+
+class SummaryState(State):
+    """Summary branch: one-sentence rewrite of the article."""
+
+    text: str = ""
+    summary: str = ""
+
+
+class SentimentState(State):
+    """Sentiment branch: overall tone of the article."""
+
+    text: str = ""
+    label: str = ""
+
+
+class TopicsState(State):
+    """Topics branch: a short list of topic tags."""
+
+    text: str = ""
+    tags: list[str] = Field(default_factory=list)
+
+
+# ---------------------------------------------------------------------------
+# Branch subgraphs — each is one node, but each has its own scope.
+# ---------------------------------------------------------------------------
+
+
+async def write_summary(s: SummaryState) -> Mapping[str, Any]:
+    content = await _chat(
+        system=("Summarize the article in one tight sentence (~20 words). No preamble, no quoting."),
+        user=s.text,
+    )
+    return {"summary": content}
+
+
+async def classify_sentiment(s: SentimentState) -> Mapping[str, Any]:
+    content = await _chat(
+        system=(
+            "Classify the overall sentiment of the article. Reply with ONE "
+            "word from this set: positive, negative, neutral, mixed. "
+            "Lowercase, no punctuation."
+        ),
+        user=s.text,
+    )
+    label = content.strip().lower().strip(".")
+    return {"label": label}
+
+
+async def extract_topics(s: TopicsState) -> Mapping[str, Any]:
+    content = await _chat(
+        system=(
+            "Extract three short topic tags for the article. Reply with "
+            "exactly three lines, one tag per line, no numbering or bullets. "
+            "Tags should be 1-3 words each."
+        ),
+        user=s.text,
+    )
+    tags = [line.strip(" -*•\t") for line in content.splitlines() if line.strip()][:3]
+    return {"tags": tags}
+
+
+def build_summary_subgraph() -> CompiledGraph[SummaryState]:
+    return (
+        GraphBuilder(SummaryState)
+        .add_node("write_summary", write_summary)
+        .add_edge("write_summary", END)
+        .set_entry("write_summary")
+        .compile()
+    )
+
+
+def build_sentiment_subgraph() -> CompiledGraph[SentimentState]:
+    return (
+        GraphBuilder(SentimentState)
+        .add_node("classify_sentiment", classify_sentiment)
+        .add_edge("classify_sentiment", END)
+        .set_entry("classify_sentiment")
+        .compile()
+    )
+
+
+def build_topics_subgraph() -> CompiledGraph[TopicsState]:
+    return (
+        GraphBuilder(TopicsState)
+        .add_node("extract_topics", extract_topics)
+        .add_edge("extract_topics", END)
+        .set_entry("extract_topics")
+        .compile()
+    )
+
+
+# ---------------------------------------------------------------------------
+# Outer graph
+# ---------------------------------------------------------------------------
+
+
+async def receive(s: ArticleState) -> Mapping[str, Any]:
+    del s
+    return {"trace": ["receive"]}
+
+
+async def present(s: ArticleState) -> Mapping[str, Any]:
+    del s
+    return {"trace": ["present"]}
+
+
+def build_graph() -> CompiledGraph[ArticleState]:
+    summary = build_summary_subgraph()
+    sentiment = build_sentiment_subgraph()
+    topics = build_topics_subgraph()
+
+    # Only the sentiment branch retries. Realistic in production: the
+    # classification call is short and cheap to retry, but you may not want
+    # the same policy on a longer summarize call (where a retry doubles
+    # cost) or on a topic-extract that has different transient profile.
+    sentiment_retry = RetryMiddleware(
+        max_attempts=3,
+        backoff=deterministic_backoff(0.2),
+    )
+
+    return (
+        GraphBuilder(ArticleState)
+        .add_node("receive", receive)
+        .add_parallel_branches_node(
+            "enrich",
+            branches={
+                "summary": BranchSpec(
+                    subgraph=summary,
+                    inputs={"text": "article"},
+                    outputs={"summary": "summary"},
+                ),
+                "sentiment": BranchSpec(
+                    subgraph=sentiment,
+                    inputs={"text": "article"},
+                    outputs={"sentiment": "label"},
+                    middleware=(sentiment_retry,),
+                ),
+                "topics": BranchSpec(
+                    subgraph=topics,
+                    inputs={"text": "article"},
+                    outputs={"topics": "tags"},
+                ),
+            },
+        )
+        .add_node("present", present)
+        .add_edge("receive", "enrich")
+        .add_edge("enrich", "present")
+        .add_edge("present", END)
+        .set_entry("receive")
+        .compile()
+    )
+
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+
+
+async def main() -> None:
+    graph = build_graph()
+
+    print("=" * 72)
+    print("Lunar-mission article enrichment — three independent analyses in parallel")
+    print("=" * 72)
+    print()
+    print(f"Article ({len(ARTICLE)} chars):")
+    print()
+    print(ARTICLE)
+    print()
+
+    wall_start = time.monotonic()
+    try:
+        final = await graph.invoke(ArticleState(article=ARTICLE))
+        wall_ms = (time.monotonic() - wall_start) * 1000.0
+        print("=" * 72)
+        print("Enrichment results")
+        print("=" * 72)
+        print()
+        print(f"  summary:   {final.summary}")
+        print(f"  sentiment: {final.sentiment}")
+        print(f"  topics:    {final.topics}")
+        print()
+        print(f"  wall-clock: {wall_ms:7.1f} ms")
+        print()
+        print("The three branches ran in parallel — wall-clock is closer to the")
+        print("slowest single branch than to the sum of all three.")
+    finally:
+        await graph.drain()
+        if _provider_instance is not None:
+            await _provider_instance.aclose()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/README.md b/examples/README.md
index 8da6474..1f38992 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -47,10 +47,19 @@ and return.
 Summarize a batch of news headlines in parallel. Each per-headline
 run goes through a `summarize → classify` subgraph wrapped in retry
 middleware (transient failures don't tank the batch) and timing
-middleware (per-instance duration captured alongside the fan-out
-index). Demonstrates: `add_fan_out_node` with `items_field` mode,
-`extra_outputs` collecting a parallel list, `instance_middleware`,
-concurrency cap.
+middleware (per-instance duration captured). Demonstrates:
+`add_fan_out_node` with `items_field` mode, `extra_outputs`
+collecting a parallel list, `instance_middleware`, concurrency cap.
+
+### [`06-parallel-branches/`](./06-parallel-branches/main.py)
+
+Enrich an article with three independent analyses (summary,
+sentiment, topic tags) running concurrently. Each analysis is a
+separate subgraph with its own state schema. The sentiment branch
+wraps its subgraph in retry middleware; the other two run bare.
+Demonstrates: `add_parallel_branches_node`, `BranchSpec` per branch
+with input/output projection, heterogeneous branch state schemas,
+per-branch middleware.
 
 ## Configuration
 
diff --git a/tests/test_examples_smoke.py b/tests/test_examples_smoke.py
index e4bf4d2..428c468 100644
--- a/tests/test_examples_smoke.py
+++ b/tests/test_examples_smoke.py
@@ -36,6 +36,7 @@
     "03-observer-hooks",
     "04-nested-subgraphs",
     "05-fan-out-with-retry",
+    "06-parallel-branches",
 ]
 
 

From 1109514632f893c71e916041ee770a556266416d Mon Sep 17 00:00:00 2001
From: chris-colinsky <chris@lunarcommand.xyz>
Date: Mon, 18 May 2026 11:19:20 -0700
Subject: [PATCH 3/6] chore(examples): moon-themed subject matter
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Sweep across 00-05 so every example's queries, articles, baked-in
corpora, and headlines are moon-related. The company is Lunar
Command; consistent lunar framing makes the demo set feel like a
coherent surface rather than a grab bag.

- 00 hello-world: query → "why did Apollo 13 abort its lunar
  landing?"
- 01 routing-and-subgraphs: default question → "why is the lunar
  south pole strategically important?" (the moon-landing-year
  question stays)
- 02 explicit-subgraph-mapping: default topic pair → Apollo 11 vs
  Apollo 17; docstring and Run-with section updated
- 03 observer-hooks: second default question → "explain why NASA is
  returning to the moon with Artemis" (the moon-landing-year
  question stays)
- 04 nested-subgraphs: corpus → Apollo 11 (kept), Apollo 13 (new),
  Artemis II (new — narrative of the April 2026 splashdown);
  default questions updated to match
- 05 fan-out-with-retry: five headlines all swapped for accurate
  lunar-mission news (Artemis II splashdown, Lunar Gateway pause,
  IM-3 prep, LRO crater find, south-pole water-ice confirmation);
  classify tag set tightened to lunar-relevant categories
  (crew / lander / orbiter / science / hardware / policy / other)

Factual accuracy checked as of 2026-05-17: Artemis II splashed
down 2026-04-10; the Lunar Gateway program was paused 2026-03-24
in favor of a lunar surface base; Intuitive Machines IM-2 ended on
its side in March 2025 and IM-3 is scheduled for second half of
2026.
---
 examples/00-hello-world/main.py               |  2 +-
 examples/01-routing-and-subgraphs/main.py     |  4 +--
 examples/02-explicit-subgraph-mapping/main.py | 11 ++++---
 examples/03-observer-hooks/main.py            |  2 +-
 examples/04-nested-subgraphs/main.py          | 30 ++++++++++--------
 examples/05-fan-out-with-retry/main.py        | 31 ++++++++++---------
 6 files changed, 43 insertions(+), 37 deletions(-)

diff --git a/examples/00-hello-world/main.py b/examples/00-hello-world/main.py
index 82b0cc6..53eb2de 100644
--- a/examples/00-hello-world/main.py
+++ b/examples/00-hello-world/main.py
@@ -205,7 +205,7 @@ async def main() -> None:
     graph = build_graph()
     graph.attach_observer(trace)
     try:
-        final = await graph.invoke(PipelineState(query="what is RAG?"))
+        final = await graph.invoke(PipelineState(query="why did Apollo 13 abort its lunar landing?"))
         print(f"\nclassification: {final.classification}")
         if final.research_plan is not None:
             print(f"research_plan: {final.research_plan}")
diff --git a/examples/01-routing-and-subgraphs/main.py b/examples/01-routing-and-subgraphs/main.py
index bfe0821..5afdbae 100644
--- a/examples/01-routing-and-subgraphs/main.py
+++ b/examples/01-routing-and-subgraphs/main.py
@@ -37,7 +37,7 @@
     uv sync --group examples
     cd examples/01-routing-and-subgraphs
     LLM_API_KEY=sk-... uv run python main.py "what year did the moon landing happen"
-    LLM_API_KEY=sk-... uv run python main.py "is espresso actually more caffeinated than drip?"
+    LLM_API_KEY=sk-... uv run python main.py "why is the lunar south pole strategically important?"
 """
 
 from __future__ import annotations
@@ -454,7 +454,7 @@ def build_graph() -> CompiledGraph[AssistantState]:
 
 
 async def main() -> None:
-    question = " ".join(sys.argv[1:]) or "is espresso actually more caffeinated than drip coffee?"
+    question = " ".join(sys.argv[1:]) or "why is the lunar south pole strategically important?"
     graph = build_graph()
     try:
         final = await graph.invoke(AssistantState(question=question))
diff --git a/examples/02-explicit-subgraph-mapping/main.py b/examples/02-explicit-subgraph-mapping/main.py
index 717bf06..6e8b3a4 100644
--- a/examples/02-explicit-subgraph-mapping/main.py
+++ b/examples/02-explicit-subgraph-mapping/main.py
@@ -1,8 +1,9 @@
 """openarmature demo: same compiled subgraph reused at two sites in one parent
 graph, each site with its own ExplicitMapping.
 
-**Use case:** Compare two topics ("rust vs go", "espresso vs drip coffee")
-by running the same analysis subgraph on each, then synthesizing a verdict.
+**Use case:** Compare two topics ("Apollo program vs Artemis program",
+"Apollo 11 vs Apollo 17") by running the same analysis subgraph on each,
+then synthesizing a verdict.
 
 **Demonstrates:** One compiled subgraph reused at two parent sites with
 per-site `ExplicitMapping` — the canonical way to express "run the same
@@ -27,8 +28,8 @@
 
     uv sync --group examples
     cd examples/02-explicit-subgraph-mapping
-    LLM_API_KEY=sk-... uv run python main.py "rust" "go"
-    LLM_API_KEY=sk-... uv run python main.py "espresso vs drip coffee"
+    LLM_API_KEY=sk-... uv run python main.py "Apollo 11" "Apollo 17"
+    LLM_API_KEY=sk-... uv run python main.py "Apollo program vs Artemis program"
 """
 
 from __future__ import annotations
@@ -262,7 +263,7 @@ async def main() -> None:
     elif len(args) == 1 and " vs " in args[0].lower():
         topic_a, topic_b = re.split(r" vs ", args[0], maxsplit=1, flags=re.IGNORECASE)
     else:
-        topic_a, topic_b = "rust", "go"
+        topic_a, topic_b = "Apollo 11", "Apollo 17"
 
     graph = build_graph()
     try:
diff --git a/examples/03-observer-hooks/main.py b/examples/03-observer-hooks/main.py
index abf0a47..146e387 100644
--- a/examples/03-observer-hooks/main.py
+++ b/examples/03-observer-hooks/main.py
@@ -32,7 +32,7 @@
     uv sync --group examples --all-extras
     cd examples/03-observer-hooks
     LLM_API_KEY=sk-... uv run python main.py "what year did the moon landing happen"
-    LLM_API_KEY=sk-... uv run python main.py "explain the rise of espresso culture"
+    LLM_API_KEY=sk-... uv run python main.py "explain why NASA is returning to the moon with Artemis"
 
 (``--all-extras`` pulls in ``opentelemetry-sdk`` for the OTel observer.)
 """
diff --git a/examples/04-nested-subgraphs/main.py b/examples/04-nested-subgraphs/main.py
index 6441199..f607029 100644
--- a/examples/04-nested-subgraphs/main.py
+++ b/examples/04-nested-subgraphs/main.py
@@ -30,7 +30,8 @@
     uv sync --group examples
     cd examples/04-nested-subgraphs
     LLM_API_KEY=sk-... uv run python main.py "what year did humans first land on the moon?"
-    LLM_API_KEY=sk-... uv run python main.py "how is espresso different from drip coffee?"
+    LLM_API_KEY=sk-... uv run python main.py "what happened on Apollo 13?"
+    LLM_API_KEY=sk-... uv run python main.py "who was on the Artemis II crew?"
 """
 
 from __future__ import annotations
@@ -93,23 +94,26 @@ async def _chat(system: str, user: str) -> str:
         ),
     },
     {
-        "title": "Espresso",
+        "title": "Apollo 13",
         "body": (
-            "Espresso is a coffee brewing method of Italian origin. It is made by forcing pressurized "
-            "hot water through finely ground coffee. The resulting shot is more concentrated than coffee "
-            "brewed by other methods, with a layer of crema on top. Espresso has more caffeine per "
-            "unit volume than most coffee beverages but a typical serving is one-tenth the volume of a "
-            "drip coffee, so a single espresso usually contains less total caffeine than a drip cup."
+            "Apollo 13 was the seventh crewed mission in the Apollo program and the third intended "
+            "to land on the Moon. The lunar landing was aborted after an oxygen tank in the service "
+            "module ruptured two days after launch in April 1970, crippling power and life support. "
+            "The crew of Jim Lovell, Jack Swigert, and Fred Haise used the lunar module Aquarius as "
+            "a lifeboat and looped around the Moon on a free-return trajectory before splashing down "
+            "safely in the Pacific. The mission is remembered as a successful failure."
         ),
     },
     {
-        "title": "Walking",
+        "title": "Artemis II",
         "body": (
-            "Walking is the most common form of human locomotion and is associated with a range of "
-            "health benefits including reduced risk of cardiovascular disease, improved mood, and "
-            "lower mortality. A moderate pace of around 100 steps per minute is often cited as a "
-            "useful threshold. Walking as a deliberate practice has long been associated with "
-            "thinking and writing — many writers credit long walks as part of their creative process."
+            "Artemis II was the first crewed mission of NASA's Artemis program, launching from "
+            "Kennedy Space Center on April 1, 2026 atop the Space Launch System rocket. The "
+            "ten-day flight carried astronauts Reid Wiseman, Victor Glover, Christina Koch, and "
+            "Jeremy Hansen aboard the Orion spacecraft Integrity on a free-return trajectory around "
+            "the Moon and back. It was the first crewed flight beyond low Earth orbit since Apollo "
+            "17 in 1972. The capsule splashed down in the Pacific Ocean on April 10, 2026, marking "
+            "a successful test flight ahead of the Artemis III lunar landing mission."
         ),
     },
 ]
diff --git a/examples/05-fan-out-with-retry/main.py b/examples/05-fan-out-with-retry/main.py
index bbf0f94..3aa7234 100644
--- a/examples/05-fan-out-with-retry/main.py
+++ b/examples/05-fan-out-with-retry/main.py
@@ -1,12 +1,13 @@
-"""openarmature demo: summarize a batch of news headlines in parallel, with
-per-headline retries and timing.
+"""openarmature demo: summarize a batch of lunar-mission headlines in
+parallel, with per-headline retries and timing.
 
-**Use case:** Given a list of news headlines, produce a one-sentence
-summary and a topic tag for each one. The headlines are independent, so
-fan them out and let them run concurrently. Each per-headline run hits
-the LLM, which can transiently fail (rate-limit, timeout, transient 5xx);
-wrap each instance in retry middleware so a flaky call doesn't tank the
-whole batch. A timing middleware records how long each instance took.
+**Use case:** Given a list of lunar-mission news headlines, produce a
+one-sentence summary and a topic tag for each one. The headlines are
+independent, so fan them out and let them run concurrently. Each
+per-headline run hits the LLM, which can transiently fail (rate-limit,
+timeout, transient 5xx); wrap each instance in retry middleware so a
+flaky call doesn't tank the whole batch. A timing middleware records how
+long each instance took.
 
 This is the canonical fan-out shape: N similar tasks, N runtime-determined
 from state, the work independent enough to run concurrently. The
@@ -98,11 +99,11 @@ async def _chat(system: str, user: str) -> str:
 # ---------------------------------------------------------------------------
 
 HEADLINES: list[str] = [
-    "City council approves new bike-lane network spanning downtown",
-    "Researchers report unexpected results from fusion-reactor test run",
-    "Local bakery wins national award for sourdough loaf",
-    "Stock market dips after central bank signals slower rate cuts",
-    "Marathon runner sets new course record under heavy rainfall",
+    "Artemis II splashes down in Pacific after ten-day lunar flyby",
+    "NASA pauses Lunar Gateway program in favor of crewed surface base",
+    "Intuitive Machines prepares IM-3 lander for Reiner Gamma touchdown",
+    "Lunar Reconnaissance Orbiter spots fresh impact crater on far side",
+    "Researchers confirm abundant water ice in permanently shadowed south-pole craters",
 ]
 
 
@@ -148,8 +149,8 @@ async def summarize(s: HeadlineState) -> Mapping[str, Any]:
 async def classify(s: HeadlineState) -> Mapping[str, Any]:
     content = await _chat(
         system=(
-            "Tag the topic of the headline below with ONE word from this set: "
-            "politics, science, business, sports, food, technology, other. "
+            "Tag the topic of the lunar-mission headline below with ONE word "
+            "from this set: crew, lander, orbiter, science, hardware, policy, other. "
             "Reply with just the word, lowercase, no punctuation."
         ),
         user=s.headline,

From 8c358de09414792dd978a771936c4fce6ccb36f6 Mon Sep 17 00:00:00 2001
From: chris-colinsky <chris@lunarcommand.xyz>
Date: Mon, 18 May 2026 11:32:46 -0700
Subject: [PATCH 4/6] feat(examples): add 07-multimodal-prompt
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Caption a historical lunar photograph using a versioned prompt
template plus a multimodal user message.

- ``FilesystemPromptBackend`` loads
  ``prompts/production/caption-lunar-image.j2`` from disk. The
  layout is ``<root>/<label>/<name>.j2`` so prompts can live next to
  the code, be diffed in PRs, and version off their template hash.
- ``PromptManager(backend).get(name, variables={...})`` fetches and
  renders in one call. The returned ``PromptResult`` carries the
  rendered text in ``messages[0]`` plus identifiers
  (``template_hash``, ``rendered_hash``, ``version``) for downstream
  attribution.
- The node pulls the rendered text out of the ``PromptResult``,
  composes a multimodal ``UserMessage(content=[TextBlock(text=...),
  ImageBlock(source=ImageSourceURL(url=...))])``, and passes it to
  ``OpenAIProvider.complete`` — one call carries both the instructions
  and the image.
- ``with_active_prompt(rendered)`` wraps the LLM call so OTel
  observers (none attached in the demo, but the pattern is the same
  in production) stamp ``openarmature.prompt.*`` attributes onto the
  LLM-call span.

Sample: the iconic Apollo 11 photograph of Buzz Aldrin on the lunar
surface, hosted on Wikimedia Commons. ``IMAGE_URL`` env var overrides
the default for users who want to point at their own image.

Smoke test list grows to eight demos.
---
 examples/07-multimodal-prompt/main.py         | 211 ++++++++++++++++++
 .../prompts/production/caption-lunar-image.j2 |  10 +
 examples/README.md                            |  11 +
 tests/test_examples_smoke.py                  |   1 +
 4 files changed, 233 insertions(+)
 create mode 100644 examples/07-multimodal-prompt/main.py
 create mode 100644 examples/07-multimodal-prompt/prompts/production/caption-lunar-image.j2

diff --git a/examples/07-multimodal-prompt/main.py b/examples/07-multimodal-prompt/main.py
new file mode 100644
index 0000000..aa2d617
--- /dev/null
+++ b/examples/07-multimodal-prompt/main.py
@@ -0,0 +1,211 @@
+"""openarmature demo: caption a historical lunar photograph using a
+versioned prompt template plus a multimodal user message.
+
+**Use case:** Given a photograph from a lunar mission and the mission's
+name, describe what's visible in the image. The text instructions are
+loaded from a versioned prompt template on disk so they can be edited,
+diffed, and rolled out independently of the code. The image is passed
+to the model alongside the rendered text as a multimodal user message.
+
+This is the "prompt management + image input" shape — two openarmature
+surfaces that compose cleanly. The prompt manager gives you traceable,
+hashable, version-tagged instruction text; content blocks give you the
+multimodal payload alongside it.
+
+**What's interesting in the implementation:**
+
+- ``FilesystemPromptBackend`` loads ``caption-lunar-image.j2`` from
+  ``prompts/production/``. The layout is ``<root>/<label>/<name>.j2``;
+  the ``label`` ("production" here) is the rollout channel.
+- ``PromptManager(backend)`` wraps the backend. ``manager.get(name,
+  variables={...})`` fetches and renders in one call, returning a
+  ``PromptResult`` whose ``messages`` carries the rendered text and
+  whose ``template_hash`` / ``rendered_hash`` identify exactly which
+  template+variables produced this output.
+- ``with_active_prompt(result)`` is a context manager. While it's
+  active, OTel observers see ``openarmature.prompt.*`` attributes
+  stamped onto any LLM-call span fired inside the block. No OTel
+  observer is attached in this demo (keeps the output focused on the
+  caption), but the wrapping is the canonical pattern for production.
+- The rendered text becomes a ``TextBlock`` inside a multimodal
+  ``UserMessage``; the image is a sibling ``ImageBlock`` carrying an
+  ``ImageSourceURL``. The provider passes both to the model in one
+  call.
+
+**Configuration** (env vars; OpenAI defaults shown):
+
+- ``LLM_BASE_URL`` defaults to ``https://api.openai.com``. **Host root only.**
+- ``LLM_MODEL`` defaults to ``gpt-4o-mini`` (a vision-capable model).
+- ``LLM_API_KEY`` required (empty for local servers that don't authenticate).
+- ``IMAGE_URL`` overrides the default image. Default is a public-domain
+  NASA photograph of Buzz Aldrin on the lunar surface.
+
+Run with:
+
+    uv sync --group examples
+    cd examples/07-multimodal-prompt
+    LLM_API_KEY=sk-... uv run python main.py
+"""
+
+from __future__ import annotations
+
+import asyncio
+import os
+from collections.abc import Mapping
+from pathlib import Path
+from typing import Annotated, Any
+
+from pydantic import Field
+
+from openarmature.graph import (
+    END,
+    CompiledGraph,
+    GraphBuilder,
+    State,
+    append,
+)
+from openarmature.llm import (
+    ImageBlock,
+    ImageSourceURL,
+    OpenAIProvider,
+    TextBlock,
+    UserMessage,
+)
+from openarmature.prompts import (
+    FilesystemPromptBackend,
+    PromptManager,
+    with_active_prompt,
+)
+
+# ---------------------------------------------------------------------------
+# Defaults
+# ---------------------------------------------------------------------------
+
+# Default image: the iconic Apollo 11 photograph of Buzz Aldrin posing
+# next to the deployed seismic experiment on the lunar surface. Hosted on
+# Wikimedia Commons (public-domain NASA imagery).
+DEFAULT_IMAGE_URL = "https://upload.wikimedia.org/wikipedia/commons/9/98/Aldrin_Apollo_11_original.jpg"
+DEFAULT_MISSION = "Apollo 11"
+
+_provider_instance: OpenAIProvider | None = None
+
+
+def _get_provider() -> OpenAIProvider:
+    global _provider_instance
+    if _provider_instance is None:
+        _provider_instance = OpenAIProvider(
+            base_url=os.environ.get("LLM_BASE_URL", "https://api.openai.com"),
+            model=os.environ.get("LLM_MODEL", "gpt-4o-mini"),
+            api_key=os.environ.get("LLM_API_KEY") or None,
+        )
+    return _provider_instance
+
+
+# Build the prompt manager once at import time. The manager is cheap to
+# construct, holds no per-call state, and is safe to share across nodes.
+_PROMPT_ROOT = Path(__file__).parent / "prompts"
+_PROMPT_MANAGER = PromptManager(FilesystemPromptBackend(_PROMPT_ROOT))
+
+
+# ---------------------------------------------------------------------------
+# State
+# ---------------------------------------------------------------------------
+
+
+class CaptionState(State):
+    image_url: str
+    mission: str
+    caption: str = ""
+    prompt_version: str = ""
+    template_hash: str = ""
+    trace: Annotated[list[str], append] = Field(default_factory=list)
+
+
+# ---------------------------------------------------------------------------
+# Node
+# ---------------------------------------------------------------------------
+
+
+async def caption(s: CaptionState) -> Mapping[str, Any]:
+    # Load + render the template in one call. ``variables`` are strict:
+    # an undefined name in the template raises PromptRenderError.
+    rendered = await _PROMPT_MANAGER.get(
+        "caption-lunar-image",
+        variables={"mission": s.mission},
+    )
+
+    # The PromptResult's messages list carries the rendered text as a
+    # UserMessage. Pull out the text and compose a multimodal user
+    # message that also carries the image.
+    rendered_msg = rendered.messages[0]
+    assert isinstance(rendered_msg, UserMessage)
+    rendered_text = rendered_msg.content
+    assert isinstance(rendered_text, str)
+
+    multimodal_message = UserMessage(
+        content=[
+            TextBlock(text=rendered_text),
+            ImageBlock(source=ImageSourceURL(url=s.image_url)),
+        ],
+    )
+
+    # ``with_active_prompt`` propagates the prompt identifiers via
+    # ContextVar to any observer that cares. An OTel observer would
+    # stamp openarmature.prompt.{name,version,label,template_hash,
+    # rendered_hash} on the LLM-call span fired inside this block. No
+    # observer is attached in this demo, but the wrapping is the
+    # canonical pattern; leaving it out drops the audit trail.
+    with with_active_prompt(rendered):
+        response = await _get_provider().complete([multimodal_message])
+
+    return {
+        "caption": (response.message.content or "").strip(),
+        "prompt_version": rendered.version,
+        "template_hash": rendered.template_hash,
+        "trace": ["caption"],
+    }
+
+
+def build_graph() -> CompiledGraph[CaptionState]:
+    return (
+        GraphBuilder(CaptionState)
+        .add_node("caption", caption)
+        .add_edge("caption", END)
+        .set_entry("caption")
+        .compile()
+    )
+
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+
+
+async def main() -> None:
+    image_url = os.environ.get("IMAGE_URL", DEFAULT_IMAGE_URL)
+    mission = os.environ.get("MISSION", DEFAULT_MISSION)
+
+    print("=" * 72)
+    print("Caption a lunar photograph using a versioned prompt template")
+    print("=" * 72)
+    print()
+    print(f"  mission:   {mission}")
+    print(f"  image_url: {image_url}")
+    print()
+
+    graph = build_graph()
+    try:
+        final = await graph.invoke(CaptionState(image_url=image_url, mission=mission))
+        print(f"  prompt:    caption-lunar-image @ {final.prompt_version}")
+        print(f"  template:  {final.template_hash}")
+        print()
+        print("  caption:")
+        print(f"    {final.caption}")
+    finally:
+        await graph.drain()
+        if _provider_instance is not None:
+            await _provider_instance.aclose()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/07-multimodal-prompt/prompts/production/caption-lunar-image.j2 b/examples/07-multimodal-prompt/prompts/production/caption-lunar-image.j2
new file mode 100644
index 0000000..931dac0
--- /dev/null
+++ b/examples/07-multimodal-prompt/prompts/production/caption-lunar-image.j2
@@ -0,0 +1,10 @@
+You are looking at a historical photograph from {{ mission }}.
+
+Describe what's visible in the image in one tight paragraph (~3 sentences).
+Cover three things in order:
+
+1. The subject of the photo — who or what is in focus.
+2. The environment — lunar surface details, equipment, lighting.
+3. Anything distinctive that identifies the era or this specific mission.
+
+No preamble; no markdown; no headers.
diff --git a/examples/README.md b/examples/README.md
index 1f38992..f797367 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -61,6 +61,17 @@ Demonstrates: `add_parallel_branches_node`, `BranchSpec` per branch
 with input/output projection, heterogeneous branch state schemas,
 per-branch middleware.
 
+### [`07-multimodal-prompt/`](./07-multimodal-prompt/main.py)
+
+Caption a historical lunar photograph using a versioned prompt
+template plus a multimodal user message. The prompt text is loaded
+from a Jinja2 template on disk via `FilesystemPromptBackend`; the
+image is passed alongside the rendered text as an `ImageBlock` in a
+multimodal `UserMessage`. Demonstrates: `PromptManager` + filesystem
+backend, prompt fetch + render with template variables,
+`with_active_prompt` context-var propagation for observability,
+multimodal `UserMessage` carrying both text and image content blocks.
+
 ## Configuration
 
 All demos configure their LLM client via env vars; OpenAI public-API
diff --git a/tests/test_examples_smoke.py b/tests/test_examples_smoke.py
index 428c468..527871f 100644
--- a/tests/test_examples_smoke.py
+++ b/tests/test_examples_smoke.py
@@ -37,6 +37,7 @@
     "04-nested-subgraphs",
     "05-fan-out-with-retry",
     "06-parallel-branches",
+    "07-multimodal-prompt",
 ]
 
 

From 398c648731650d50a84913b5cb0fc26e789fc59b Mon Sep 17 00:00:00 2001
From: chris-colinsky <chris@lunarcommand.xyz>
Date: Mon, 18 May 2026 11:53:15 -0700
Subject: [PATCH 5/6] feat(examples): add 08-checkpointing-and-migration
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A lunar-mission planning pipeline that checkpoints after every step,
then resumes the saved record under an upgraded state schema with a
v1→v2 migration.

Phase 1 — v1 invoke:

- ``MissionPlanStateV1`` declares ``schema_version: ClassVar[str] =
  "v1"`` and has four user-facing fields (destination, objective,
  crew_size, timeline) plus trace.
- ``SQLiteCheckpointer(path, serialization="json")`` writes records
  to a temp DB. JSON mode is the migration-eligible serialization —
  pickle mode can't bridge schemas.
- v1 graph: define_objective → size_crew → draft_timeline → END.
  Each completed node fires a save stamped with schema_version="v1".
- Invoked with a deterministic correlation_id so phase 2 can look
  up the invocation by ``CheckpointFilter(correlation_id=...)``.

Phase 2 — v2 resume:

- ``MissionPlanStateV2`` adds a ``risk_assessment`` field and
  ``schema_version = "v2"``.
- ``GraphBuilder.with_state_migration("v1", "v2",
  migrate_v1_to_v2)`` registers the migration. ``migrate_v1_to_v2``
  is a pure function that backfills ``risk_assessment=""`` for v1
  records.
- v2 graph adds an ``assess_risks`` node at the end of the v1
  topology.
- ``invoke(state, resume_invocation=<v1 id>)`` loads the v1 record,
  applies the migration, re-deserializes as ``MissionPlanStateV2``,
  and continues at ``assess_risks`` (the first node not in
  completed_positions).

Final state has the v1 work (objective, crew_size, timeline) AND
the v2 risk_assessment, all under the upgraded schema. Smoke test
list grows to nine demos.
---
 .../08-checkpointing-and-migration/main.py    | 339 ++++++++++++++++++
 examples/README.md                            |  12 +
 tests/test_examples_smoke.py                  |   1 +
 3 files changed, 352 insertions(+)
 create mode 100644 examples/08-checkpointing-and-migration/main.py

diff --git a/examples/08-checkpointing-and-migration/main.py b/examples/08-checkpointing-and-migration/main.py
new file mode 100644
index 0000000..c97c150
--- /dev/null
+++ b/examples/08-checkpointing-and-migration/main.py
@@ -0,0 +1,339 @@
+"""openarmature demo: a lunar-mission planning pipeline that checkpoints
+its progress, then resumes under an upgraded state schema.
+
+**Use case:** A multi-step planning pipeline drafts a lunar mission plan
+(objective, crew size, timeline). It writes a checkpoint after every
+step so a crash or restart can pick up where it left off. Some time
+later, you add a new analysis step (risk assessment) and a new state
+field (``risk_assessment``) to support it. Resuming an old checkpoint
+shouldn't require re-running the work that already finished — and
+shouldn't fail because the saved state has the old shape.
+
+That's exactly what state migration is for. The pipeline runs once
+against the v1 schema, the checkpoint persists, and the v2 schema
+declares a migration from v1 that backfills the new field. The v2
+graph resumes from the v1 checkpoint, the migration runs once on the
+loaded state, and execution picks up at the new node.
+
+**What's interesting in the implementation:**
+
+- ``SQLiteCheckpointer(path, serialization="json")`` writes records to
+  a SQLite file in JSON mode. JSON is the migration-eligible
+  serialization — it lets the engine load the saved state as a plain
+  dict, apply migrations, and re-deserialize against the current
+  state class. ``pickle`` mode is faster but can't bridge schemas.
+- ``GraphBuilder.with_checkpointer(...)`` wires the checkpointer to
+  the graph. The engine then fires a save at every ``completed``
+  event for outermost and subgraph-internal nodes.
+- ``State.schema_version`` is a ``ClassVar[str]`` declared on the
+  state class. Empty string is the "no migration support" sentinel;
+  any non-empty value opts the class into the migration registry.
+- ``GraphBuilder.with_state_migration(from_version, to_version,
+  migrate)`` registers one edge of the migration chain. The
+  ``migrate`` callable receives the saved state as a dict and returns
+  the dict at the new schema. Pure function; no I/O, no side effects.
+- ``compiled.invoke(state, resume_invocation=<id>)`` resumes from a
+  saved record. The engine reads the record, applies any registered
+  migration chain that bridges the saved ``schema_version`` to the
+  current state class's, and continues execution from the first node
+  whose ``completed`` event isn't in the record.
+
+**Configuration** (env vars; OpenAI defaults shown):
+
+- ``LLM_BASE_URL`` defaults to ``https://api.openai.com``. **Host root only.**
+- ``LLM_MODEL`` defaults to ``gpt-4o-mini``.
+- ``LLM_API_KEY`` required (empty for local servers that don't authenticate).
+
+Run with:
+
+    uv sync --group examples
+    cd examples/08-checkpointing-and-migration
+    LLM_API_KEY=sk-... uv run python main.py
+"""
+
+from __future__ import annotations
+
+import asyncio
+import os
+import tempfile
+from collections.abc import Mapping
+from pathlib import Path
+from typing import Annotated, Any, ClassVar
+
+from pydantic import Field
+
+from openarmature.checkpoint import CheckpointFilter, SQLiteCheckpointer
+from openarmature.graph import (
+    END,
+    CompiledGraph,
+    GraphBuilder,
+    State,
+    append,
+)
+from openarmature.llm import OpenAIProvider, SystemMessage, UserMessage
+
+_provider_instance: OpenAIProvider | None = None
+
+
+def _get_provider() -> OpenAIProvider:
+    global _provider_instance
+    if _provider_instance is None:
+        _provider_instance = OpenAIProvider(
+            base_url=os.environ.get("LLM_BASE_URL", "https://api.openai.com"),
+            model=os.environ.get("LLM_MODEL", "gpt-4o-mini"),
+            api_key=os.environ.get("LLM_API_KEY") or None,
+        )
+    return _provider_instance
+
+
+async def _chat(system: str, user: str) -> str:
+    response = await _get_provider().complete(
+        [SystemMessage(content=system), UserMessage(content=user)],
+    )
+    return (response.message.content or "").strip()
+
+
+# ---------------------------------------------------------------------------
+# Phase 1: v1 schema + v1 graph
+# ---------------------------------------------------------------------------
+# The v1 schema doesn't have ``risk_assessment``. The v1 graph is
+# objective → crew_size → timeline → END. A real codebase would have
+# this as ``state.py`` and ``main.py`` until v2 came along; here both
+# generations live in the same file so the demo can replay both.
+
+
+class MissionPlanStateV1(State):
+    schema_version: ClassVar[str] = "v1"
+
+    destination: str = ""
+    objective: str = ""
+    crew_size: int = 0
+    timeline: str = ""
+    trace: Annotated[list[str], append] = Field(default_factory=list)
+
+
+async def define_objective_v1(s: MissionPlanStateV1) -> Mapping[str, Any]:
+    content = await _chat(
+        system=(
+            "You are a mission planner. Given a lunar destination, state the "
+            "single primary objective of a notional crewed mission there in one "
+            "tight sentence. No preamble."
+        ),
+        user=s.destination,
+    )
+    return {"objective": content, "trace": ["define_objective"]}
+
+
+async def size_crew_v1(s: MissionPlanStateV1) -> Mapping[str, Any]:
+    content = await _chat(
+        system=(
+            "You are a mission planner. Given the objective below, reply with "
+            "the optimal crew size as a single integer between 2 and 8. No "
+            "other text."
+        ),
+        user=s.objective,
+    )
+    digits = "".join(c for c in content if c.isdigit())
+    n = int(digits) if digits else 4
+    return {"crew_size": max(2, min(8, n)), "trace": ["size_crew"]}
+
+
+async def draft_timeline_v1(s: MissionPlanStateV1) -> Mapping[str, Any]:
+    content = await _chat(
+        system=(
+            "You are a mission planner. Given the objective and crew size, "
+            "draft a high-level timeline as a single sentence covering launch, "
+            "lunar transit, surface operations, and return. No preamble."
+        ),
+        user=f"Objective: {s.objective}\nCrew size: {s.crew_size}",
+    )
+    return {"timeline": content, "trace": ["draft_timeline"]}
+
+
+def build_graph_v1(checkpointer: SQLiteCheckpointer) -> CompiledGraph[MissionPlanStateV1]:
+    return (
+        GraphBuilder(MissionPlanStateV1)
+        .add_node("define_objective", define_objective_v1)
+        .add_node("size_crew", size_crew_v1)
+        .add_node("draft_timeline", draft_timeline_v1)
+        .add_edge("define_objective", "size_crew")
+        .add_edge("size_crew", "draft_timeline")
+        .add_edge("draft_timeline", END)
+        .set_entry("define_objective")
+        .with_checkpointer(checkpointer)
+        .compile()
+    )
+
+
+# ---------------------------------------------------------------------------
+# Phase 2: v2 schema + migration + v2 graph
+# ---------------------------------------------------------------------------
+# v2 adds a ``risk_assessment`` field and a new ``assess_risks`` node at
+# the end of the pipeline. The migration backfills ``risk_assessment``
+# with an empty string for v1 records — the new node will fill it in
+# when resume executes.
+
+
+class MissionPlanStateV2(State):
+    schema_version: ClassVar[str] = "v2"
+
+    destination: str = ""
+    objective: str = ""
+    crew_size: int = 0
+    timeline: str = ""
+    risk_assessment: str = ""  # NEW in v2
+    trace: Annotated[list[str], append] = Field(default_factory=list)
+
+
+def migrate_v1_to_v2(state_dict: dict[str, Any]) -> dict[str, Any]:
+    """Backfill the new ``risk_assessment`` field with an empty string.
+
+    Pure function: takes the saved state as a dict, returns the dict at
+    the new schema. The engine reads the v1 record, applies this
+    function, and re-deserializes against MissionPlanStateV2.
+    """
+    return {**state_dict, "risk_assessment": ""}
+
+
+async def define_objective_v2(s: MissionPlanStateV2) -> Mapping[str, Any]:
+    # Same body as v1; included so v2 builds a complete graph. When
+    # resuming a saved record whose ``define_objective`` already
+    # completed, the engine skips this node and starts from the first
+    # un-completed step.
+    return await define_objective_v1(s)  # type: ignore[arg-type]
+
+
+async def size_crew_v2(s: MissionPlanStateV2) -> Mapping[str, Any]:
+    return await size_crew_v1(s)  # type: ignore[arg-type]
+
+
+async def draft_timeline_v2(s: MissionPlanStateV2) -> Mapping[str, Any]:
+    return await draft_timeline_v1(s)  # type: ignore[arg-type]
+
+
+async def assess_risks_v2(s: MissionPlanStateV2) -> Mapping[str, Any]:
+    """The new step v2 introduces — names the top risk for the plan."""
+    content = await _chat(
+        system=(
+            "You are a mission planner. Given the timeline below, identify "
+            "the single highest-priority risk in one short sentence. No "
+            "preamble."
+        ),
+        user=s.timeline,
+    )
+    return {"risk_assessment": content, "trace": ["assess_risks"]}
+
+
+def build_graph(checkpointer: SQLiteCheckpointer | None = None) -> CompiledGraph[MissionPlanStateV2]:
+    """Build the v2 graph with checkpointing and migration registered.
+
+    The smoke test calls this with no checkpointer; main() passes a real
+    one. Either path produces a compilable graph.
+    """
+    builder = (
+        GraphBuilder(MissionPlanStateV2)
+        .add_node("define_objective", define_objective_v2)
+        .add_node("size_crew", size_crew_v2)
+        .add_node("draft_timeline", draft_timeline_v2)
+        .add_node("assess_risks", assess_risks_v2)
+        .add_edge("define_objective", "size_crew")
+        .add_edge("size_crew", "draft_timeline")
+        .add_edge("draft_timeline", "assess_risks")
+        .add_edge("assess_risks", END)
+        .set_entry("define_objective")
+        .with_state_migration("v1", "v2", migrate_v1_to_v2)
+    )
+    if checkpointer is not None:
+        builder = builder.with_checkpointer(checkpointer)
+    return builder.compile()
+
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+
+
+async def main() -> None:
+    destination = "Lunar South Pole"
+
+    # SQLite checkpointer in JSON mode (the migration-eligible
+    # serialization). A real app would point at a persistent path; for
+    # the demo a temp file keeps the workspace clean.
+    db_dir = tempfile.mkdtemp(prefix="oa-checkpoint-demo-")
+    db_path = Path(db_dir) / "checkpoints.sqlite"
+    checkpointer = SQLiteCheckpointer(path=db_path, serialization="json")
+
+    print("=" * 72)
+    print("Phase 1 — invoke v1 graph; checkpoints save after every node")
+    print("=" * 72)
+    print()
+    print(f"  destination:       {destination}")
+    print(f"  checkpoint db:     {db_path}")
+    print()
+
+    # Pass a deterministic correlation_id so phase 2 can find the
+    # invocation's saved records via the checkpoint filter. Without a
+    # caller-supplied correlation_id, invoke() generates a UUIDv4.
+    run_id = "demo-mission-plan-1"
+
+    graph_v1 = build_graph_v1(checkpointer)
+    initial_v1 = MissionPlanStateV1(destination=destination)
+    final_v1 = await graph_v1.invoke(initial_v1, correlation_id=run_id)
+    await graph_v1.drain()
+
+    # Look up the saved record's invocation_id by correlation_id. The
+    # invocation_id is generated by invoke() and isn't exposed on the
+    # returned state; finding it through the checkpointer's list API
+    # is the canonical lookup path.
+    summaries = list(await checkpointer.list(CheckpointFilter(correlation_id=run_id)))
+    assert summaries, "expected at least one saved checkpoint"
+    invocation_id = summaries[-1].invocation_id
+
+    print("v1 result:")
+    print(f"  objective:  {final_v1.objective}")
+    print(f"  crew_size:  {final_v1.crew_size}")
+    print(f"  timeline:   {final_v1.timeline}")
+    print()
+    print(f"  v1 invocation_id: {invocation_id}")
+    print()
+
+    print("=" * 72)
+    print("Phase 2 — invoke v2 graph with resume; v1->v2 migration runs")
+    print("=" * 72)
+    print()
+    print("  v2 adds:    risk_assessment field + assess_risks node")
+    print("  migration:  backfills risk_assessment='' for v1 records")
+    print()
+
+    graph_v2 = build_graph(checkpointer)
+    # Resume from the v1 invocation. The engine reads the saved record,
+    # applies migrate_v1_to_v2, re-deserializes against
+    # MissionPlanStateV2, and continues at the first uncompleted node
+    # (assess_risks — the v1 pipeline's three nodes are all in
+    # completed_positions, the new v2 node is not).
+    final_v2 = await graph_v2.invoke(
+        MissionPlanStateV2(destination=destination),
+        resume_invocation=invocation_id,
+    )
+    await graph_v2.drain()
+
+    print("v2 result after resume:")
+    print(f"  objective:        {final_v2.objective}")
+    print(f"  crew_size:        {final_v2.crew_size}")
+    print(f"  timeline:         {final_v2.timeline}")
+    print(f"  risk_assessment:  {final_v2.risk_assessment}")
+    print()
+    print(f"  trace: {final_v2.trace}")
+    print()
+    print(
+        "The v1 nodes appear once each in v1's trace and NOT in v2's "
+        "trace — they were skipped on resume because completed_positions "
+        "already covered them. Only assess_risks ran in phase 2."
+    )
+
+    if _provider_instance is not None:
+        await _provider_instance.aclose()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/README.md b/examples/README.md
index f797367..624a9a6 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -72,6 +72,18 @@ backend, prompt fetch + render with template variables,
 `with_active_prompt` context-var propagation for observability,
 multimodal `UserMessage` carrying both text and image content blocks.
 
+### [`08-checkpointing-and-migration/`](./08-checkpointing-and-migration/main.py)
+
+A lunar-mission planning pipeline that checkpoints after every step,
+then resumes the saved record under an upgraded state schema. Phase
+one invokes a v1 graph against `MissionPlanStateV1`; the
+`SQLiteCheckpointer` (JSON mode) writes records to a temp DB. Phase
+two registers a v1→v2 migration backfilling a new `risk_assessment`
+field, builds a v2 graph with one new node, and resumes from the v1
+invocation. Demonstrates: `SQLiteCheckpointer(serialization="json")`,
+`with_checkpointer`, save-on-completed-event, `State.schema_version`,
+`with_state_migration`, `invoke(resume_invocation=...)`.
+
 ## Configuration
 
 All demos configure their LLM client via env vars; OpenAI public-API
diff --git a/tests/test_examples_smoke.py b/tests/test_examples_smoke.py
index 527871f..adb9cdb 100644
--- a/tests/test_examples_smoke.py
+++ b/tests/test_examples_smoke.py
@@ -38,6 +38,7 @@
     "05-fan-out-with-retry",
     "06-parallel-branches",
     "07-multimodal-prompt",
+    "08-checkpointing-and-migration",
 ]
 
 

From c47ad81e442c45481c153b441ac8e85efc8d68c8 Mon Sep 17 00:00:00 2001
From: chris-colinsky <chris@lunarcommand.xyz>
Date: Mon, 18 May 2026 12:06:45 -0700
Subject: [PATCH 6/6] chore(examples): tighten 05/07/08 from PR-review feedback

- 05 fan-out-with-retry: _timings.clear() at the top of main() so a
  REPL or repeated-main() driver doesn't accumulate timings across
  invocations. Module-level retention was an oversight.
- 07 multimodal-prompt: replace the two type/shape asserts in
  caption() with an explicit isinstance check that raises
  RuntimeError. Asserts strip under python -O; the new shape narrows
  for pyright AND fails loudly if PromptManager's return contract
  drifts.
- 08 checkpointing-and-migration: switch main() from tempfile.mkdtemp
  to tempfile.TemporaryDirectory wrapping the phase 1/2 logic. The
  SQLite DB + temp folder are now cleaned up on both the happy path
  and any raised exception, instead of leaving /tmp/oa-checkpoint-demo-*
  behind across runs.
---
 examples/05-fan-out-with-retry/main.py        |   4 +
 examples/07-multimodal-prompt/main.py         |   8 +-
 .../08-checkpointing-and-migration/main.py    | 154 +++++++++---------
 3 files changed, 88 insertions(+), 78 deletions(-)

diff --git a/examples/05-fan-out-with-retry/main.py b/examples/05-fan-out-with-retry/main.py
index 3aa7234..88cb485 100644
--- a/examples/05-fan-out-with-retry/main.py
+++ b/examples/05-fan-out-with-retry/main.py
@@ -260,6 +260,10 @@ def build_graph() -> CompiledGraph[BatchState]:
 
 
 async def main() -> None:
+    # Reset module-level capture so a REPL or repeated-main() driver
+    # doesn't accumulate timings across invocations.
+    _timings.clear()
+
     graph = build_graph()
 
     initial = BatchState(headlines=HEADLINES)
diff --git a/examples/07-multimodal-prompt/main.py b/examples/07-multimodal-prompt/main.py
index aa2d617..48028f9 100644
--- a/examples/07-multimodal-prompt/main.py
+++ b/examples/07-multimodal-prompt/main.py
@@ -138,9 +138,13 @@ async def caption(s: CaptionState) -> Mapping[str, Any]:
     # UserMessage. Pull out the text and compose a multimodal user
     # message that also carries the image.
     rendered_msg = rendered.messages[0]
-    assert isinstance(rendered_msg, UserMessage)
+    if not isinstance(rendered_msg, UserMessage) or not isinstance(rendered_msg.content, str):
+        raise RuntimeError(
+            "PromptManager.render() returned an unexpected shape; expected a single "
+            f"UserMessage with str content, got {type(rendered_msg).__name__} "
+            f"with content type {type(rendered_msg.content).__name__}"
+        )
     rendered_text = rendered_msg.content
-    assert isinstance(rendered_text, str)
 
     multimodal_message = UserMessage(
         content=[
diff --git a/examples/08-checkpointing-and-migration/main.py b/examples/08-checkpointing-and-migration/main.py
index c97c150..73e7ee6 100644
--- a/examples/08-checkpointing-and-migration/main.py
+++ b/examples/08-checkpointing-and-migration/main.py
@@ -257,82 +257,84 @@ async def main() -> None:
     destination = "Lunar South Pole"
 
     # SQLite checkpointer in JSON mode (the migration-eligible
-    # serialization). A real app would point at a persistent path; for
-    # the demo a temp file keeps the workspace clean.
-    db_dir = tempfile.mkdtemp(prefix="oa-checkpoint-demo-")
-    db_path = Path(db_dir) / "checkpoints.sqlite"
-    checkpointer = SQLiteCheckpointer(path=db_path, serialization="json")
-
-    print("=" * 72)
-    print("Phase 1 — invoke v1 graph; checkpoints save after every node")
-    print("=" * 72)
-    print()
-    print(f"  destination:       {destination}")
-    print(f"  checkpoint db:     {db_path}")
-    print()
-
-    # Pass a deterministic correlation_id so phase 2 can find the
-    # invocation's saved records via the checkpoint filter. Without a
-    # caller-supplied correlation_id, invoke() generates a UUIDv4.
-    run_id = "demo-mission-plan-1"
-
-    graph_v1 = build_graph_v1(checkpointer)
-    initial_v1 = MissionPlanStateV1(destination=destination)
-    final_v1 = await graph_v1.invoke(initial_v1, correlation_id=run_id)
-    await graph_v1.drain()
-
-    # Look up the saved record's invocation_id by correlation_id. The
-    # invocation_id is generated by invoke() and isn't exposed on the
-    # returned state; finding it through the checkpointer's list API
-    # is the canonical lookup path.
-    summaries = list(await checkpointer.list(CheckpointFilter(correlation_id=run_id)))
-    assert summaries, "expected at least one saved checkpoint"
-    invocation_id = summaries[-1].invocation_id
-
-    print("v1 result:")
-    print(f"  objective:  {final_v1.objective}")
-    print(f"  crew_size:  {final_v1.crew_size}")
-    print(f"  timeline:   {final_v1.timeline}")
-    print()
-    print(f"  v1 invocation_id: {invocation_id}")
-    print()
-
-    print("=" * 72)
-    print("Phase 2 — invoke v2 graph with resume; v1->v2 migration runs")
-    print("=" * 72)
-    print()
-    print("  v2 adds:    risk_assessment field + assess_risks node")
-    print("  migration:  backfills risk_assessment='' for v1 records")
-    print()
-
-    graph_v2 = build_graph(checkpointer)
-    # Resume from the v1 invocation. The engine reads the saved record,
-    # applies migrate_v1_to_v2, re-deserializes against
-    # MissionPlanStateV2, and continues at the first uncompleted node
-    # (assess_risks — the v1 pipeline's three nodes are all in
-    # completed_positions, the new v2 node is not).
-    final_v2 = await graph_v2.invoke(
-        MissionPlanStateV2(destination=destination),
-        resume_invocation=invocation_id,
-    )
-    await graph_v2.drain()
-
-    print("v2 result after resume:")
-    print(f"  objective:        {final_v2.objective}")
-    print(f"  crew_size:        {final_v2.crew_size}")
-    print(f"  timeline:         {final_v2.timeline}")
-    print(f"  risk_assessment:  {final_v2.risk_assessment}")
-    print()
-    print(f"  trace: {final_v2.trace}")
-    print()
-    print(
-        "The v1 nodes appear once each in v1's trace and NOT in v2's "
-        "trace — they were skipped on resume because completed_positions "
-        "already covered them. Only assess_risks ran in phase 2."
-    )
-
-    if _provider_instance is not None:
-        await _provider_instance.aclose()
+    # serialization). A real app would point at a persistent path; the
+    # demo uses TemporaryDirectory so the DB file + folder get cleaned
+    # up on exit (happy path or exception) without leaving cruft in /tmp.
+    with tempfile.TemporaryDirectory(prefix="oa-checkpoint-demo-") as db_dir:
+        db_path = Path(db_dir) / "checkpoints.sqlite"
+        checkpointer = SQLiteCheckpointer(path=db_path, serialization="json")
+
+        try:
+            print("=" * 72)
+            print("Phase 1 — invoke v1 graph; checkpoints save after every node")
+            print("=" * 72)
+            print()
+            print(f"  destination:       {destination}")
+            print(f"  checkpoint db:     {db_path}")
+            print()
+
+            # Pass a deterministic correlation_id so phase 2 can find the
+            # invocation's saved records via the checkpoint filter. Without a
+            # caller-supplied correlation_id, invoke() generates a UUIDv4.
+            run_id = "demo-mission-plan-1"
+
+            graph_v1 = build_graph_v1(checkpointer)
+            initial_v1 = MissionPlanStateV1(destination=destination)
+            final_v1 = await graph_v1.invoke(initial_v1, correlation_id=run_id)
+            await graph_v1.drain()
+
+            # Look up the saved record's invocation_id by correlation_id. The
+            # invocation_id is generated by invoke() and isn't exposed on the
+            # returned state; finding it through the checkpointer's list API
+            # is the canonical lookup path.
+            summaries = list(await checkpointer.list(CheckpointFilter(correlation_id=run_id)))
+            assert summaries, "expected at least one saved checkpoint"
+            invocation_id = summaries[-1].invocation_id
+
+            print("v1 result:")
+            print(f"  objective:  {final_v1.objective}")
+            print(f"  crew_size:  {final_v1.crew_size}")
+            print(f"  timeline:   {final_v1.timeline}")
+            print()
+            print(f"  v1 invocation_id: {invocation_id}")
+            print()
+
+            print("=" * 72)
+            print("Phase 2 — invoke v2 graph with resume; v1->v2 migration runs")
+            print("=" * 72)
+            print()
+            print("  v2 adds:    risk_assessment field + assess_risks node")
+            print("  migration:  backfills risk_assessment='' for v1 records")
+            print()
+
+            graph_v2 = build_graph(checkpointer)
+            # Resume from the v1 invocation. The engine reads the saved record,
+            # applies migrate_v1_to_v2, re-deserializes against
+            # MissionPlanStateV2, and continues at the first uncompleted node
+            # (assess_risks — the v1 pipeline's three nodes are all in
+            # completed_positions, the new v2 node is not).
+            final_v2 = await graph_v2.invoke(
+                MissionPlanStateV2(destination=destination),
+                resume_invocation=invocation_id,
+            )
+            await graph_v2.drain()
+
+            print("v2 result after resume:")
+            print(f"  objective:        {final_v2.objective}")
+            print(f"  crew_size:        {final_v2.crew_size}")
+            print(f"  timeline:         {final_v2.timeline}")
+            print(f"  risk_assessment:  {final_v2.risk_assessment}")
+            print()
+            print(f"  trace: {final_v2.trace}")
+            print()
+            print(
+                "The v1 nodes appear once each in v1's trace and NOT in v2's "
+                "trace — they were skipped on resume because completed_positions "
+                "already covered them. Only assess_risks ran in phase 2."
+            )
+        finally:
+            if _provider_instance is not None:
+                await _provider_instance.aclose()
 
 
 if __name__ == "__main__":