From e534b10f4579c75a83183dd5010acf89b4a835bb Mon Sep 17 00:00:00 2001
From: chris-colinsky <chris@lunarcommand.xyz>
Date: Mon, 18 May 2026 12:50:25 -0700
Subject: [PATCH 1/5] chore(examples): fold spec coverage gaps

Spec agent's coverage review (review-examples-coverage thread) flagged
one major gap (tool-calling, follow-on) and several smaller gaps that
fold naturally into existing examples. This commit closes the folds.

- 00 hello-world: pass ``RuntimeConfig(temperature=0.0)`` to every
  ``complete()`` call. Surfaces the per-call sampling knob and makes
  the demo's routing reproducible.
- 05 fan-out-with-retry: add ``error_policy`` param to ``build_graph``
  with a ``COLLECT_MODE`` env-var toggle in main(); new
  ``instance_errors`` state field for the ``errors_field`` collection.
  Add ``fan_out_config_observer`` that reads
  ``NodeEvent.fan_out_config`` on the fan-out node's dispatch event
  and prints the resolved item_count/concurrency/error_policy.
- 06 parallel-branches: add ``branch_attribution_observer`` that
  reads ``NodeEvent.branch_name`` on inner-node events and prints
  which branch each inner step came from. Outermost nodes (receive,
  enrich, present) have ``branch_name=None``.
- 07 multimodal-prompt: wire ``PromptManager`` with a primary +
  fallback ``FilesystemPromptBackend`` to demonstrate composite-
  backend setup. Add second prompt ``identify-mission.j2`` and a
  second node ``identify`` that uses the caption from the first
  node. Wrap the whole invoke in ``with_active_prompt_group(...)``
  so an observability ``group_name`` propagates onto both LLM calls'
  spans. New ``_build_image_block`` helper switches between
  ``ImageSourceURL`` (default) and ``ImageSourceInline`` (when
  ``IMAGE_PATH`` env var is set) with media_type inferred from the
  file extension.
- 08 checkpointing-and-migration: docstring note on
  ``migrate_v1_to_v2`` explaining how a v3 schema would compose via
  BFS chain resolution + the chain-ambiguity error category. No
  code change.

Tool-calling lands as a new example (09-tool-use) in the next
commit on this branch.
---
 examples/00-hello-world/main.py               |  17 +-
 examples/05-fan-out-with-retry/main.py        |  69 ++++-
 examples/06-parallel-branches/main.py         |  23 ++
 examples/07-multimodal-prompt/main.py         | 274 ++++++++++++++----
 .../prompts/production/identify-mission.j2    |  13 +
 .../production/caption-lunar-image.j2         |  11 +
 .../08-checkpointing-and-migration/main.py    |  13 +
 7 files changed, 354 insertions(+), 66 deletions(-)
 create mode 100644 examples/07-multimodal-prompt/prompts/production/identify-mission.j2
 create mode 100644 examples/07-multimodal-prompt/prompts_fallback/production/caption-lunar-image.j2

diff --git a/examples/00-hello-world/main.py b/examples/00-hello-world/main.py
index 53eb2de..b6ce0ed 100644
--- a/examples/00-hello-world/main.py
+++ b/examples/00-hello-world/main.py
@@ -12,6 +12,9 @@
   - Pydantic class (``Classification``, ``Summary``): typed
     instance on ``Response.parsed``.
   - JSON Schema dict (``research``): raw dict on ``Response.parsed``.
+- ``RuntimeConfig`` for per-call sampling knobs — every ``complete()``
+  here passes ``config=RuntimeConfig(temperature=0.0)`` so the run
+  reproduces deterministically.
 - Conditional routing on a parsed field (``route`` reads
   ``state.classification.intent``).
 - ``attach_observer`` for boundary visibility.
@@ -49,7 +52,7 @@
     append,
     merge,
 )
-from openarmature.llm import OpenAIProvider, UserMessage
+from openarmature.llm import OpenAIProvider, RuntimeConfig, UserMessage
 
 
 # Pydantic schemas the model is constrained to produce. Passing a
@@ -84,6 +87,15 @@ class PipelineState(State):
 # builders, IDE inspection) import this module without running main().
 _provider_instance: OpenAIProvider | None = None
 
+# Per-call sampling knobs. The demo locks the model at temperature 0
+# so the routing classification (and the rest of the run) reproduces
+# across invocations — useful for tutorial output, less appropriate
+# for production where some sampling variety is desirable.
+# RuntimeConfig also surfaces max_tokens, top_p, and seed; only
+# temperature is set here so the others fall through to provider
+# defaults.
+_DETERMINISTIC = RuntimeConfig(temperature=0.0)
+
 
 def _get_provider() -> OpenAIProvider:
     global _provider_instance
@@ -113,6 +125,7 @@ async def classify(state: PipelineState) -> Mapping[str, Any]:
             )
         ],
         response_schema=Classification,
+        config=_DETERMINISTIC,
     )
     return {"classification": response.parsed, "metadata": {"classified_by": "llm"}}
 
@@ -140,6 +153,7 @@ async def research(state: PipelineState) -> Mapping[str, Any]:
             "required": ["topics", "follow_up_questions"],
             "additionalProperties": False,
         },
+        config=_DETERMINISTIC,
     )
     return {
         "research_plan": response.parsed,
@@ -161,6 +175,7 @@ async def summarize(state: PipelineState) -> Mapping[str, Any]:
             )
         ],
         response_schema=Summary,
+        config=_DETERMINISTIC,
     )
     return {
         "summary": response.parsed,
diff --git a/examples/05-fan-out-with-retry/main.py b/examples/05-fan-out-with-retry/main.py
index 88cb485..c20538e 100644
--- a/examples/05-fan-out-with-retry/main.py
+++ b/examples/05-fan-out-with-retry/main.py
@@ -27,12 +27,24 @@
   per-instance: a failure on headline 3 doesn't restart headlines 0-2.
 - ``concurrency=3`` caps how many instances run in flight at once. Use
   this to be polite to the upstream API.
+- ``error_policy`` defaults to ``"fail_fast"`` — the first instance
+  failure (after retries exhaust) raises and cancels siblings. Set
+  the ``COLLECT_MODE`` env var to switch to ``"collect"``: each
+  instance runs independently and per-instance failures land in
+  ``state.instance_errors`` instead of aborting the batch. The
+  ``errors_field="instance_errors"`` knob names where the records go.
 - A ``TimingRecord`` is captured per instance via an ``on_complete``
   callback. ``TimingRecord`` carries the per-call duration but not the
   ``fan_out_index`` — that index lives on observer NodeEvents instead.
   The demo prints captured durations in completion order plus a
   wall-clock vs sum-of-durations comparison that shows concurrency
   actually parallelized the work.
+- A ``fan_out_config_observer`` reads ``NodeEvent.fan_out_config`` on
+  the fan-out node's dispatch event. Inner-instance events carry
+  ``fan_out_index`` but not ``fan_out_config``; the config lives on
+  the fan-out node's own started / completed pair and gives observers
+  a record of the resolved item_count, concurrency, and error_policy
+  at dispatch time.
 
 **Configuration** (env vars; OpenAI defaults shown):
 
@@ -61,6 +73,7 @@
     END,
     CompiledGraph,
     GraphBuilder,
+    NodeEvent,
     State,
     append,
 )
@@ -114,11 +127,14 @@ async def _chat(system: str, user: str) -> str:
 
 class BatchState(State):
     """Outer graph: list of headlines goes in, parallel lists of summaries
-    and topic tags come out."""
+    and topic tags come out. ``branch_errors`` only populates under
+    ``error_policy="collect"`` — each failed instance contributes one
+    record naming its ``fan_out_index`` and the exception category."""
 
     headlines: list[str] = Field(default_factory=list)
     summaries: Annotated[list[str], append] = Field(default_factory=list)
     topics: Annotated[list[str], append] = Field(default_factory=list)
+    instance_errors: Annotated[list[dict[str, Any]], append] = Field(default_factory=list[dict[str, Any]])
     trace: Annotated[list[str], append] = Field(default_factory=list)
 
 
@@ -216,7 +232,16 @@ async def present(s: BatchState) -> Mapping[str, Any]:
     return {"trace": ["present"]}
 
 
-def build_graph() -> CompiledGraph[BatchState]:
+def build_graph(error_policy: str = "fail_fast") -> CompiledGraph[BatchState]:
+    """Build the fan-out demo graph.
+
+    ``error_policy`` switches between ``"fail_fast"`` (default; first
+    exhausted-retry failure raises and cancels the rest) and
+    ``"collect"`` (each instance runs independently; failures land in
+    ``state.instance_errors`` and the batch produces partial results).
+    The smoke test calls this with no argument, exercising the default
+    path; main() lets the COLLECT_MODE env var flip to collect.
+    """
     headline_subgraph = build_headline_subgraph()
 
     retry = RetryMiddleware(
@@ -244,6 +269,8 @@ def build_graph() -> CompiledGraph[BatchState]:
             extra_outputs={"topics": "topic"},
             concurrency=3,
             instance_middleware=(retry, timing),
+            error_policy=error_policy,
+            errors_field="instance_errors",
         )
         .add_node("present", present)
         .add_edge("announce", "headline_runs")
@@ -254,6 +281,30 @@ def build_graph() -> CompiledGraph[BatchState]:
     )
 
 
+async def fan_out_config_observer(event: NodeEvent) -> None:
+    """Print the fan-out node's resolved config when its dispatch event
+    fires.
+
+    NodeEvent carries ``fan_out_config`` ONLY on the fan-out node's own
+    started / completed pair (the dispatch wrapper); inner-instance
+    events carry ``fan_out_index`` but not ``fan_out_config``. Reading
+    the config gives observability layers a record of how the dispatch
+    actually resolved at runtime — useful when ``count`` or
+    ``concurrency`` are callable resolvers whose value isn't visible
+    in code.
+    """
+    if event.fan_out_config is None:
+        return
+    if event.phase != "started":
+        return
+    cfg = event.fan_out_config
+    print(
+        f"  [observer] fan-out node {event.node_name!r} dispatching: "
+        f"item_count={cfg.item_count} concurrency={cfg.concurrency} "
+        f"error_policy={cfg.error_policy!r}"
+    )
+
+
 # ---------------------------------------------------------------------------
 # Main
 # ---------------------------------------------------------------------------
@@ -264,12 +315,19 @@ async def main() -> None:
     # doesn't accumulate timings across invocations.
     _timings.clear()
 
-    graph = build_graph()
+    # Set COLLECT_MODE=1 to switch the fan-out error policy from the
+    # default fail_fast to collect. Under collect, each instance runs
+    # independently and per-instance failures (after retries exhaust)
+    # land in state.instance_errors instead of aborting the batch.
+    error_policy = "collect" if os.environ.get("COLLECT_MODE") else "fail_fast"
+    graph = build_graph(error_policy=error_policy)
+    graph.attach_observer(fan_out_config_observer)
 
     initial = BatchState(headlines=HEADLINES)
 
     print("=" * 72)
     print(f"Summarizing {len(HEADLINES)} headlines in parallel (concurrency=3)")
+    print(f"error_policy={error_policy!r}")
     print("=" * 72)
     print()
 
@@ -284,6 +342,11 @@ async def main() -> None:
             print(f"       summary: {s}")
             print(f"       topic:   {t}")
             print()
+        if final.instance_errors:
+            print(f"Captured {len(final.instance_errors)} per-instance error(s):")
+            for err in final.instance_errors:
+                print(f"  {err}")
+            print()
         print("Per-instance timings (in completion order):")
         for nth, record in enumerate(_timings):
             print(f"  #{nth}  {record.duration_ms:7.1f} ms  outcome={record.outcome}")
diff --git a/examples/06-parallel-branches/main.py b/examples/06-parallel-branches/main.py
index b53c80b..78a2ed1 100644
--- a/examples/06-parallel-branches/main.py
+++ b/examples/06-parallel-branches/main.py
@@ -35,6 +35,12 @@
   mapping (not in completion order). The three branches here write
   disjoint parent fields, so the order doesn't affect the result —
   but the property holds and would matter if they overlapped.
+- A ``branch_attribution_observer`` reads ``NodeEvent.branch_name``
+  on inner-node events. ``branch_name`` is populated only for
+  events INSIDE a branch's subgraph; outermost nodes (receive,
+  enrich, present) have ``branch_name=None``. This is the
+  per-event attribution that lets observability backends route
+  metrics / spans by branch.
 
 **Configuration** (env vars; OpenAI defaults shown):
 
@@ -64,6 +70,7 @@
     BranchSpec,
     CompiledGraph,
     GraphBuilder,
+    NodeEvent,
     State,
     append,
 )
@@ -233,6 +240,21 @@ async def present(s: ArticleState) -> Mapping[str, Any]:
     return {"trace": ["present"]}
 
 
+async def branch_attribution_observer(event: NodeEvent) -> None:
+    """Print which branch each inner-node event came from.
+
+    NodeEvent carries ``branch_name`` on events from nodes that
+    execute INSIDE a parallel-branches branch — it's the per-event
+    attribution that says "this came from branch X." Outermost-graph
+    nodes (receive, enrich, present) carry no branch_name. The
+    observer skips events with no branch attribution and prints
+    ``(branch=…) node_name`` for the rest.
+    """
+    if event.branch_name is None or event.phase != "started":
+        return
+    print(f"  [observer] (branch={event.branch_name}) inner node {event.node_name!r} started")
+
+
 def build_graph() -> CompiledGraph[ArticleState]:
     summary = build_summary_subgraph()
     sentiment = build_sentiment_subgraph()
@@ -287,6 +309,7 @@ def build_graph() -> CompiledGraph[ArticleState]:
 
 async def main() -> None:
     graph = build_graph()
+    graph.attach_observer(branch_attribution_observer)
 
     print("=" * 72)
     print("Lunar-mission article enrichment — three independent analyses in parallel")
diff --git a/examples/07-multimodal-prompt/main.py b/examples/07-multimodal-prompt/main.py
index 48028f9..7c8d1a4 100644
--- a/examples/07-multimodal-prompt/main.py
+++ b/examples/07-multimodal-prompt/main.py
@@ -1,55 +1,72 @@
-"""openarmature demo: caption a historical lunar photograph using a
-versioned prompt template plus a multimodal user message.
-
-**Use case:** Given a photograph from a lunar mission and the mission's
-name, describe what's visible in the image. The text instructions are
-loaded from a versioned prompt template on disk so they can be edited,
-diffed, and rolled out independently of the code. The image is passed
-to the model alongside the rendered text as a multimodal user message.
-
-This is the "prompt management + image input" shape — two openarmature
-surfaces that compose cleanly. The prompt manager gives you traceable,
-hashable, version-tagged instruction text; content blocks give you the
-multimodal payload alongside it.
+"""openarmature demo: caption and identify a lunar mission photograph
+using versioned prompt templates, a fallback prompt backend, and a
+multimodal user message.
+
+**Use case:** Given a photograph from a lunar mission, run two prompts
+in sequence: first describe what's visible (``caption-lunar-image``),
+then use that caption alongside the same image to identify the specific
+mission (``identify-mission``). Both prompts are versioned templates on
+disk; both renders are grouped under one observability ``PromptGroup``
+so a trace UI can render them as a single logical unit.
+
+The image can come from a public URL (default) or a local file (set
+``IMAGE_PATH`` to use the inline base64 source instead). The
+``PromptManager`` is wired with a primary + fallback
+``FilesystemPromptBackend`` to demonstrate composite-backend
+configuration; the fallback path fires only when the primary raises
+``PromptStoreUnavailable`` (e.g., a remote Langfuse backend off-line).
 
 **What's interesting in the implementation:**
 
-- ``FilesystemPromptBackend`` loads ``caption-lunar-image.j2`` from
-  ``prompts/production/``. The layout is ``<root>/<label>/<name>.j2``;
-  the ``label`` ("production" here) is the rollout channel.
-- ``PromptManager(backend)`` wraps the backend. ``manager.get(name,
-  variables={...})`` fetches and renders in one call, returning a
-  ``PromptResult`` whose ``messages`` carries the rendered text and
-  whose ``template_hash`` / ``rendered_hash`` identify exactly which
-  template+variables produced this output.
-- ``with_active_prompt(result)`` is a context manager. While it's
-  active, OTel observers see ``openarmature.prompt.*`` attributes
-  stamped onto any LLM-call span fired inside the block. No OTel
-  observer is attached in this demo (keeps the output focused on the
-  caption), but the wrapping is the canonical pattern for production.
+- ``PromptManager(primary, fallback)`` accepts multiple backends. On
+  every ``fetch``, the manager tries them in order: if a backend
+  raises ``PromptStoreUnavailable`` the manager continues to the
+  next; if it raises ``PromptNotFound`` the chain stops (the name is
+  legitimately missing). The typical production shape is "Langfuse
+  primary + local-filesystem fallback".
+- ``FilesystemPromptBackend`` uses the ``<root>/<label>/<name>.j2``
+  layout. The demo ships two prompts (``caption-lunar-image``,
+  ``identify-mission``) under the primary backend's ``production``
+  label, plus a sibling backend rooted at a different folder for the
+  fallback demonstration.
+- ``PromptGroup(group_name=..., members=[result_a, result_b])`` wraps
+  two ``PromptResult`` instances under one observability identifier.
+  ``with_active_prompt_group(group)`` propagates the group name via
+  ContextVar; OTel observers stamp ``openarmature.prompt.group_name``
+  onto every LLM-call span fired inside.
+- ``with_active_prompt(result)`` (inside the group's scope) propagates
+  the per-call prompt identifiers — name, version, label,
+  template_hash, rendered_hash. The two layers compose: spans inside
+  the group see both the group identifier AND the per-call prompt
+  identifiers.
 - The rendered text becomes a ``TextBlock`` inside a multimodal
-  ``UserMessage``; the image is a sibling ``ImageBlock`` carrying an
-  ``ImageSourceURL``. The provider passes both to the model in one
-  call.
+  ``UserMessage``; the image is a sibling ``ImageBlock``. The image
+  source is ``ImageSourceURL(url=...)`` by default; setting
+  ``IMAGE_PATH`` switches to ``ImageSourceInline(base64_data=...)``
+  with the file's bytes base64-encoded and an inferred ``media_type``.
 
 **Configuration** (env vars; OpenAI defaults shown):
 
 - ``LLM_BASE_URL`` defaults to ``https://api.openai.com``. **Host root only.**
 - ``LLM_MODEL`` defaults to ``gpt-4o-mini`` (a vision-capable model).
 - ``LLM_API_KEY`` required (empty for local servers that don't authenticate).
-- ``IMAGE_URL`` overrides the default image. Default is a public-domain
+- ``IMAGE_URL`` overrides the default URL. Default is a public-domain
   NASA photograph of Buzz Aldrin on the lunar surface.
+- ``IMAGE_PATH`` overrides the URL with a local file path. The file's
+  bytes go to the model via ``ImageSourceInline`` (base64) instead.
 
 Run with:
 
     uv sync --group examples
     cd examples/07-multimodal-prompt
     LLM_API_KEY=sk-... uv run python main.py
+    LLM_API_KEY=sk-... IMAGE_PATH=./my-photo.jpg uv run python main.py
 """
 
 from __future__ import annotations
 
 import asyncio
+import base64
 import os
 from collections.abc import Mapping
 from pathlib import Path
@@ -66,6 +83,8 @@
 )
 from openarmature.llm import (
     ImageBlock,
+    ImageSource,
+    ImageSourceInline,
     ImageSourceURL,
     OpenAIProvider,
     TextBlock,
@@ -73,8 +92,11 @@
 )
 from openarmature.prompts import (
     FilesystemPromptBackend,
+    PromptGroup,
     PromptManager,
+    PromptResult,
     with_active_prompt,
+    with_active_prompt_group,
 )
 
 # ---------------------------------------------------------------------------
@@ -103,8 +125,27 @@ def _get_provider() -> OpenAIProvider:
 
 # Build the prompt manager once at import time. The manager is cheap to
 # construct, holds no per-call state, and is safe to share across nodes.
-_PROMPT_ROOT = Path(__file__).parent / "prompts"
-_PROMPT_MANAGER = PromptManager(FilesystemPromptBackend(_PROMPT_ROOT))
+#
+# Two backends are wired here:
+#   - primary: ``prompts/`` — ships caption-lunar-image and
+#     identify-mission.
+#   - fallback: ``prompts_fallback/`` — ships a backup
+#     caption-lunar-image only. The fallback path fires when the
+#     primary raises ``PromptStoreUnavailable`` (e.g., a remote
+#     primary like Langfuse times out); ``PromptNotFound`` from
+#     primary stops the chain (the name is legitimately missing).
+#
+# In this demo both prompts live in primary, so the fallback path
+# isn't exercised at runtime. The construction-time setup is the
+# demonstrated thing; production code would replace primary with a
+# remote backend (LangfusePromptBackend etc.) while keeping the
+# filesystem one as the offline safety net.
+_PROMPT_ROOT_PRIMARY = Path(__file__).parent / "prompts"
+_PROMPT_ROOT_FALLBACK = Path(__file__).parent / "prompts_fallback"
+_PROMPT_MANAGER = PromptManager(
+    FilesystemPromptBackend(_PROMPT_ROOT_PRIMARY),
+    FilesystemPromptBackend(_PROMPT_ROOT_FALLBACK),
+)
 
 
 # ---------------------------------------------------------------------------
@@ -113,30 +154,59 @@ def _get_provider() -> OpenAIProvider:
 
 
 class CaptionState(State):
-    image_url: str
+    # Exactly one of ``image_url`` / ``image_path`` is set when the
+    # demo runs; the helper below picks the right ImageSource shape.
+    image_url: str = ""
+    image_path: str = ""
     mission: str
     caption: str = ""
-    prompt_version: str = ""
-    template_hash: str = ""
+    identified_mission: str = ""
+    group_name: str = ""
     trace: Annotated[list[str], append] = Field(default_factory=list)
 
 
 # ---------------------------------------------------------------------------
-# Node
+# Image source helper
 # ---------------------------------------------------------------------------
+# The image arrives either as a URL (default) or a local file path
+# (``IMAGE_PATH`` env var). The helper picks the right ``ImageSource``
+# shape: ``ImageSourceURL`` passes the URL through to the model
+# unchanged; ``ImageSourceInline`` reads the file, base64-encodes the
+# bytes, and requires a ``media_type`` on the parent ``ImageBlock``.
+
+_EXTENSION_TO_MEDIA_TYPE = {
+    ".jpg": "image/jpeg",
+    ".jpeg": "image/jpeg",
+    ".png": "image/png",
+    ".webp": "image/webp",
+    ".gif": "image/gif",
+}
+
+
+def _build_image_block(image_url: str, image_path: str) -> ImageBlock:
+    if image_path:
+        data = Path(image_path).read_bytes()
+        encoded = base64.b64encode(data).decode("ascii")
+        ext = Path(image_path).suffix.lower()
+        media_type = _EXTENSION_TO_MEDIA_TYPE.get(ext)
+        if media_type is None:
+            raise RuntimeError(
+                f"image extension {ext!r} not recognized; supported: "
+                f"{sorted(_EXTENSION_TO_MEDIA_TYPE.keys())}"
+            )
+        source: ImageSource = ImageSourceInline(base64_data=encoded)
+        return ImageBlock(source=source, media_type=media_type)
+    return ImageBlock(source=ImageSourceURL(url=image_url))
 
 
-async def caption(s: CaptionState) -> Mapping[str, Any]:
-    # Load + render the template in one call. ``variables`` are strict:
-    # an undefined name in the template raises PromptRenderError.
-    rendered = await _PROMPT_MANAGER.get(
-        "caption-lunar-image",
-        variables={"mission": s.mission},
-    )
+# ---------------------------------------------------------------------------
+# Nodes
+# ---------------------------------------------------------------------------
 
-    # The PromptResult's messages list carries the rendered text as a
-    # UserMessage. Pull out the text and compose a multimodal user
-    # message that also carries the image.
+
+def _extract_rendered_text(rendered: PromptResult) -> str:
+    """Pull the rendered text out of a single-UserMessage PromptResult,
+    failing loudly if the contract shape changes."""
     rendered_msg = rendered.messages[0]
     if not isinstance(rendered_msg, UserMessage) or not isinstance(rendered_msg.content, str):
         raise RuntimeError(
@@ -144,37 +214,75 @@ async def caption(s: CaptionState) -> Mapping[str, Any]:
             f"UserMessage with str content, got {type(rendered_msg).__name__} "
             f"with content type {type(rendered_msg.content).__name__}"
         )
-    rendered_text = rendered_msg.content
+    return rendered_msg.content
+
+
+async def caption(s: CaptionState) -> Mapping[str, Any]:
+    # Each node fetches + renders its own prompt. ``get`` is the
+    # convenience shorthand for ``render(await fetch(...))``.
+    rendered = await _PROMPT_MANAGER.get(
+        "caption-lunar-image",
+        variables={"mission": s.mission},
+    )
+    rendered_text = _extract_rendered_text(rendered)
 
     multimodal_message = UserMessage(
         content=[
             TextBlock(text=rendered_text),
-            ImageBlock(source=ImageSourceURL(url=s.image_url)),
+            _build_image_block(s.image_url, s.image_path),
         ],
     )
 
-    # ``with_active_prompt`` propagates the prompt identifiers via
-    # ContextVar to any observer that cares. An OTel observer would
-    # stamp openarmature.prompt.{name,version,label,template_hash,
-    # rendered_hash} on the LLM-call span fired inside this block. No
-    # observer is attached in this demo, but the wrapping is the
-    # canonical pattern; leaving it out drops the audit trail.
+    # ``with_active_prompt`` propagates the per-call prompt
+    # identifiers (name, version, label, template_hash,
+    # rendered_hash) via ContextVar. An OTel observer would stamp
+    # those onto the LLM-call span fired inside the block. The
+    # outer ``with_active_prompt_group`` (set in main()) ALSO stamps
+    # a ``group_name`` onto the same span — the two layers compose
+    # so observers see both per-call AND per-group attribution.
     with with_active_prompt(rendered):
         response = await _get_provider().complete([multimodal_message])
 
     return {
         "caption": (response.message.content or "").strip(),
-        "prompt_version": rendered.version,
-        "template_hash": rendered.template_hash,
         "trace": ["caption"],
     }
 
 
+async def identify(s: CaptionState) -> Mapping[str, Any]:
+    # Uses the caption produced by the previous node — so the render
+    # happens here, not in main(). Same with_active_prompt wrapping;
+    # the outer group context from main() still applies.
+    rendered = await _PROMPT_MANAGER.get(
+        "identify-mission",
+        variables={"caption": s.caption},
+    )
+    rendered_text = _extract_rendered_text(rendered)
+
+    multimodal_message = UserMessage(
+        content=[
+            TextBlock(text=rendered_text),
+            _build_image_block(s.image_url, s.image_path),
+        ],
+    )
+
+    with with_active_prompt(rendered):
+        response = await _get_provider().complete([multimodal_message])
+
+    identified = (response.message.content or "").strip().removeprefix("Mission:").strip()
+    return {
+        "identified_mission": identified,
+        "trace": ["identify"],
+    }
+
+
 def build_graph() -> CompiledGraph[CaptionState]:
     return (
         GraphBuilder(CaptionState)
         .add_node("caption", caption)
-        .add_edge("caption", END)
+        .add_node("identify", identify)
+        .add_edge("caption", "identify")
+        .add_edge("identify", END)
         .set_entry("caption")
         .compile()
     )
@@ -187,24 +295,66 @@ def build_graph() -> CompiledGraph[CaptionState]:
 
 async def main() -> None:
     image_url = os.environ.get("IMAGE_URL", DEFAULT_IMAGE_URL)
+    image_path = os.environ.get("IMAGE_PATH", "")
     mission = os.environ.get("MISSION", DEFAULT_MISSION)
 
     print("=" * 72)
-    print("Caption a lunar photograph using a versioned prompt template")
+    print("Caption + identify a lunar photograph")
     print("=" * 72)
     print()
     print(f"  mission:   {mission}")
-    print(f"  image_url: {image_url}")
+    if image_path:
+        print(f"  image:     {image_path} (inline / base64)")
+    else:
+        print(f"  image:     {image_url} (url)")
     print()
 
+    # Pre-render both prompts with placeholder variables so the
+    # PromptGroup can be built ONCE at invoke entry and set as the
+    # outer observability context for the whole pipeline. The actual
+    # per-call renders happen inside the nodes, picking up the real
+    # ``caption`` variable that's only known after the first node
+    # completes. The group's ``members`` list is a metadata hint
+    # naming the two prompt slots; per-call wrapping inside the
+    # nodes carries the exact-rendered identity for each call.
+    caption_member = await _PROMPT_MANAGER.get(
+        "caption-lunar-image",
+        variables={"mission": mission},
+    )
+    identify_placeholder = await _PROMPT_MANAGER.get(
+        "identify-mission",
+        variables={"caption": "(provided at runtime)"},
+    )
+    group = PromptGroup(
+        group_name="lunar-image-analysis",
+        members=[caption_member, identify_placeholder],
+    )
+
     graph = build_graph()
     try:
-        final = await graph.invoke(CaptionState(image_url=image_url, mission=mission))
-        print(f"  prompt:    caption-lunar-image @ {final.prompt_version}")
-        print(f"  template:  {final.template_hash}")
+        # ``with_active_prompt_group`` propagates the group_name to
+        # observers for the duration of the invoke. Inside the nodes,
+        # ``with_active_prompt`` adds the per-call prompt identifiers
+        # alongside it — both layers stamp attributes on the same
+        # LLM-call span.
+        with with_active_prompt_group(group):
+            final = await graph.invoke(
+                CaptionState(
+                    image_url=image_url if not image_path else "",
+                    image_path=image_path,
+                    mission=mission,
+                    group_name=group.group_name,
+                )
+            )
+
+        print(f"  group:       {final.group_name}")
+        print(f"  caption-prompt:   {caption_member.name} @ {caption_member.version}")
+        print(f"  identify-prompt:  {identify_placeholder.name} @ {identify_placeholder.version}")
         print()
         print("  caption:")
         print(f"    {final.caption}")
+        print()
+        print(f"  identified mission:  {final.identified_mission}")
     finally:
         await graph.drain()
         if _provider_instance is not None:
diff --git a/examples/07-multimodal-prompt/prompts/production/identify-mission.j2 b/examples/07-multimodal-prompt/prompts/production/identify-mission.j2
new file mode 100644
index 0000000..ae81af6
--- /dev/null
+++ b/examples/07-multimodal-prompt/prompts/production/identify-mission.j2
@@ -0,0 +1,13 @@
+You are looking at a historical photograph from a lunar mission.
+
+The caption below was written by another system; use it as a hint
+but rely on the image itself for the answer.
+
+Caption hint: {{ caption }}
+
+Identify the specific mission shown in the image (e.g., "Apollo 11",
+"Apollo 13", "Artemis II") in EXACTLY one short line of the form:
+
+  Mission: <name>
+
+No preamble; no extra commentary; no markdown.
diff --git a/examples/07-multimodal-prompt/prompts_fallback/production/caption-lunar-image.j2 b/examples/07-multimodal-prompt/prompts_fallback/production/caption-lunar-image.j2
new file mode 100644
index 0000000..2689661
--- /dev/null
+++ b/examples/07-multimodal-prompt/prompts_fallback/production/caption-lunar-image.j2
@@ -0,0 +1,11 @@
+You are looking at a photograph from {{ mission }}.
+
+Briefly describe the image: who is visible, where they are, and what
+they appear to be doing. One sentence; no preamble.
+
+(This is the fallback variant. It exists so the demo's
+PromptManager configuration shows TWO backends; the primary backend
+above ships the canonical longer caption-lunar-image prompt and is
+always tried first. This fallback fires only if the primary backend
+raises PromptStoreUnavailable — typical for a remote primary like
+Langfuse with a local filesystem fallback.)
diff --git a/examples/08-checkpointing-and-migration/main.py b/examples/08-checkpointing-and-migration/main.py
index 73e7ee6..38a7058 100644
--- a/examples/08-checkpointing-and-migration/main.py
+++ b/examples/08-checkpointing-and-migration/main.py
@@ -191,6 +191,19 @@ def migrate_v1_to_v2(state_dict: dict[str, Any]) -> dict[str, Any]:
     Pure function: takes the saved state as a dict, returns the dict at
     the new schema. The engine reads the v1 record, applies this
     function, and re-deserializes against MissionPlanStateV2.
+
+    Multi-version chains: a third schema (v3) would add a second
+    migration function (``migrate_v2_to_v3``) and a second
+    ``builder.with_state_migration("v2", "v3", migrate_v2_to_v3)``
+    call. The framework's MigrationRegistry runs a BFS over the
+    registered edges to find the shortest chain from the saved
+    record's ``schema_version`` to the current state class's. A v1
+    record loaded under a v3 graph would run v1->v2 then v2->v3
+    automatically; no caller-side composition required. If two
+    distinct edges with the same ``(from, to)`` pair exist, or two
+    distinct shortest paths exist for one resolution, the registry
+    raises ``CheckpointStateMigrationChainAmbiguous`` at registration
+    or resume time.
     """
     return {**state_dict, "risk_assessment": ""}
 

From 89f13b1f8e91940754e73e091351be2dffcc675f Mon Sep 17 00:00:00 2001
From: chris-colinsky <chris@lunarcommand.xyz>
Date: Mon, 18 May 2026 13:14:54 -0700
Subject: [PATCH 2/5] feat(examples): add 09-tool-use
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes the major gap from the spec coverage review: tool-calling
was absent from the demo set even though structured output (the
other half of complete()'s arity) ships in 00. 09 demonstrates
the full tool-calling contract — Tool definitions, complete with
tools, ToolCall parsing, dispatch, ToolMessage round-trip, and
the multi-turn loop.

Use case: a lunar-mission assistant with two tools.

- ``lookup_mission(name)``: reads a baked-in dict of
  Apollo / Artemis records (launch / splashdown dates, crew,
  outcome). Stand-in for a real fact lookup against a doc store.
- ``compute_delta_v(initial_altitude_km, final_altitude_km)``:
  Hohmann transfer arithmetic between two circular Earth orbits.
  Returns a JSON record with the two burns and the total.

Both Tool definitions use JSON Schema object parameters with
``required`` properties and ``additionalProperties=False``. The
default question naturally exercises both: a factual recall about
Apollo 13 plus a delta-v computation for a free-return-style
injection.

Graph shape: a three-node cycle with a conditional edge.

  call_llm → [route_after_llm]
                  ├── if assistant.tool_calls present → dispatch_tools
                  │     ├── parse each ToolCall
                  │     ├── invoke local function
                  │     ├── append ToolMessage(content, tool_call_id)
                  │     └── → call_llm  (cycle)
                  └── else → present → END

A ``MAX_TURNS=5`` hard cap on ``state.turn`` prevents runaway
loops if the model never settles to a plain ``finish_reason="stop"``
response.

Smoke test list grows to ten demos.
---
 examples/09-tool-use/main.py | 393 +++++++++++++++++++++++++++++++++++
 examples/README.md           |  15 ++
 tests/test_examples_smoke.py |   1 +
 3 files changed, 409 insertions(+)
 create mode 100644 examples/09-tool-use/main.py

diff --git a/examples/09-tool-use/main.py b/examples/09-tool-use/main.py
new file mode 100644
index 0000000..d8d7c6e
--- /dev/null
+++ b/examples/09-tool-use/main.py
@@ -0,0 +1,393 @@
+"""openarmature demo: a lunar-mission assistant that calls local Python
+functions as tools to answer fact and physics questions about Apollo /
+Artemis missions.
+
+**Use case:** A user asks something that mixes a factual recall ("when
+did Apollo 13 splash down?") with a small computation ("what's the
+delta-v for a Hohmann transfer from a 300 km Earth orbit to lunar
+distance?"). Neither belongs in the model's prompt — facts get stale and
+arithmetic is unreliable from the model alone — so the agent defines two
+local tools and lets the model call them.
+
+The agent loops: send messages + tools to the model, dispatch any
+``tool_calls`` the model emits, feed the results back as
+``ToolMessage`` entries, and call the model again. Loop terminates when
+the model returns content with ``finish_reason="stop"`` (or after a
+hard turn cap).
+
+**What's interesting in the implementation:**
+
+- ``Tool(name, description, parameters)`` defines each function as a
+  JSON Schema for the model. Both tools below use the standard
+  ``type: object`` shape with ``required`` properties; the model
+  receives this through ``complete(messages, tools=TOOLS)`` and
+  decides which (if any) to invoke.
+- The model's response carries ``finish_reason="tool_calls"`` and
+  populates ``response.message.tool_calls`` with parsed
+  ``ToolCall(id, name, arguments)`` records. The framework guarantees
+  ``arguments`` is a parsed dict matching the tool's parameters
+  schema (or ``None`` only under ``finish_reason="error"``).
+- The dispatcher node parses each ``ToolCall``, runs the matching
+  local Python function, and appends one
+  ``ToolMessage(content=..., tool_call_id=...)`` per call. Spec
+  requires the ``tool_call_id`` round-trip exactly so the model can
+  pair its requests with the responses.
+- The loop is just a conditional edge on the graph: ``call_llm`` →
+  ``dispatch_tools`` → back to ``call_llm`` when the model wants
+  more tools, or → ``present`` when it's done. No special "agent
+  framework" abstraction — tool-calling composes with the existing
+  graph mechanics.
+- A ``MAX_TURNS`` cap prevents runaway loops if a model stays in
+  tool-calling forever. Production agents typically pair the cap with
+  an explicit termination tool or a fallback summarization step.
+
+**Configuration** (env vars; OpenAI defaults shown):
+
+- ``LLM_BASE_URL`` defaults to ``https://api.openai.com``. **Host root only.**
+- ``LLM_MODEL`` defaults to ``gpt-4o-mini``.
+- ``LLM_API_KEY`` required (empty for local servers that don't authenticate).
+
+Run with:
+
+    uv sync --group examples
+    cd examples/09-tool-use
+    LLM_API_KEY=sk-... uv run python main.py
+    LLM_API_KEY=sk-... uv run python main.py "When was Apollo 17 launched?"
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import math
+import os
+import sys
+from collections.abc import Mapping
+from typing import Annotated, Any
+
+from pydantic import Field
+
+from openarmature.graph import (
+    END,
+    CompiledGraph,
+    GraphBuilder,
+    State,
+    append,
+)
+from openarmature.llm import (
+    AssistantMessage,
+    Message,
+    OpenAIProvider,
+    SystemMessage,
+    Tool,
+    ToolMessage,
+    UserMessage,
+)
+
+# ---------------------------------------------------------------------------
+# Provider
+# ---------------------------------------------------------------------------
+
+_provider_instance: OpenAIProvider | None = None
+
+
+def _get_provider() -> OpenAIProvider:
+    global _provider_instance
+    if _provider_instance is None:
+        _provider_instance = OpenAIProvider(
+            base_url=os.environ.get("LLM_BASE_URL", "https://api.openai.com"),
+            model=os.environ.get("LLM_MODEL", "gpt-4o-mini"),
+            api_key=os.environ.get("LLM_API_KEY") or None,
+        )
+    return _provider_instance
+
+
+# ---------------------------------------------------------------------------
+# Tool 1 — lookup_mission: read a small baked-in fact-record for a
+# named lunar mission. Stand-in for a real lookup against a doc store
+# or knowledge base.
+# ---------------------------------------------------------------------------
+
+LUNAR_MISSIONS: dict[str, dict[str, str]] = {
+    "Apollo 11": {
+        "launch_date": "1969-07-16",
+        "splashdown_date": "1969-07-24",
+        "commander": "Neil Armstrong",
+        "lunar_module_pilot": "Buzz Aldrin",
+        "command_module_pilot": "Michael Collins",
+        "result": "First crewed lunar landing.",
+    },
+    "Apollo 13": {
+        "launch_date": "1970-04-11",
+        "splashdown_date": "1970-04-17",
+        "commander": "Jim Lovell",
+        "lunar_module_pilot": "Fred Haise",
+        "command_module_pilot": "Jack Swigert",
+        "result": (
+            "Aborted lunar landing after service-module oxygen tank rupture; "
+            "safe return via free-return trajectory."
+        ),
+    },
+    "Apollo 17": {
+        "launch_date": "1972-12-07",
+        "splashdown_date": "1972-12-19",
+        "commander": "Eugene Cernan",
+        "lunar_module_pilot": "Harrison Schmitt",
+        "command_module_pilot": "Ronald Evans",
+        "result": "Final Apollo lunar landing.",
+    },
+    "Artemis II": {
+        "launch_date": "2026-04-01",
+        "splashdown_date": "2026-04-10",
+        "commander": "Reid Wiseman",
+        "lunar_module_pilot": "n/a (no surface landing)",
+        "command_module_pilot": "Victor Glover",
+        "result": (
+            "First crewed lunar flyby of the Artemis program; tested Orion "
+            "spacecraft on a free-return trajectory."
+        ),
+    },
+}
+
+
+def lookup_mission(name: str) -> str:
+    record = LUNAR_MISSIONS.get(name)
+    if record is None:
+        known = ", ".join(sorted(LUNAR_MISSIONS.keys()))
+        return f"Unknown mission {name!r}. Known missions: {known}."
+    return json.dumps(record)
+
+
+# ---------------------------------------------------------------------------
+# Tool 2 — compute_delta_v: Hohmann transfer delta-v between two
+# circular orbits around a body with known gravitational parameter.
+# The textbook formula; rough but illustrative.
+# ---------------------------------------------------------------------------
+
+EARTH_RADIUS_KM = 6378.0
+EARTH_MU_KM3_S2 = 398600.4418  # Standard gravitational parameter for Earth.
+
+
+def compute_delta_v(initial_altitude_km: float, final_altitude_km: float) -> str:
+    """Hohmann transfer delta-v from initial_altitude_km to
+    final_altitude_km, both above Earth's surface (so 0 = surface,
+    300 = LEO, 384400 = lunar distance). Returns a JSON record with
+    the two burns and the total."""
+    r1 = initial_altitude_km + EARTH_RADIUS_KM
+    r2 = final_altitude_km + EARTH_RADIUS_KM
+    mu = EARTH_MU_KM3_S2
+    dv1 = math.sqrt(mu / r1) * (math.sqrt(2 * r2 / (r1 + r2)) - 1)
+    dv2 = math.sqrt(mu / r2) * (1 - math.sqrt(2 * r1 / (r1 + r2)))
+    total = abs(dv1) + abs(dv2)
+    return json.dumps(
+        {
+            "first_burn_km_s": round(abs(dv1), 3),
+            "second_burn_km_s": round(abs(dv2), 3),
+            "total_delta_v_km_s": round(total, 3),
+            "note": "Hohmann transfer between two coplanar circular Earth orbits.",
+        }
+    )
+
+
+# ---------------------------------------------------------------------------
+# Tool definitions for the LLM
+# ---------------------------------------------------------------------------
+
+TOOLS: list[Tool] = [
+    Tool(
+        name="lookup_mission",
+        description="Look up factual record for a named historical or upcoming lunar mission.",
+        parameters={
+            "type": "object",
+            "properties": {
+                "name": {
+                    "type": "string",
+                    "description": "Mission name (e.g., 'Apollo 11', 'Artemis II').",
+                }
+            },
+            "required": ["name"],
+            "additionalProperties": False,
+        },
+    ),
+    Tool(
+        name="compute_delta_v",
+        description=(
+            "Compute the Hohmann transfer delta-v between two circular Earth orbits "
+            "given their altitudes above Earth's surface in km. Returns the two burns "
+            "and the total delta-v in km/s."
+        ),
+        parameters={
+            "type": "object",
+            "properties": {
+                "initial_altitude_km": {
+                    "type": "number",
+                    "description": "Altitude of the starting circular orbit above Earth's surface, in km.",
+                },
+                "final_altitude_km": {
+                    "type": "number",
+                    "description": "Altitude of the destination circular orbit above Earth's surface, in km.",
+                },
+            },
+            "required": ["initial_altitude_km", "final_altitude_km"],
+            "additionalProperties": False,
+        },
+    ),
+]
+
+
+def dispatch(name: str, arguments: dict[str, Any]) -> str:
+    """Route a tool call to its local Python function.
+
+    Returns a string the agent loop wraps in a ``ToolMessage`` and
+    feeds back to the model. Unknown tool names produce an error
+    string rather than raising; the model handles the error in the
+    next turn.
+    """
+    if name == "lookup_mission":
+        return lookup_mission(arguments["name"])
+    if name == "compute_delta_v":
+        return compute_delta_v(
+            initial_altitude_km=float(arguments["initial_altitude_km"]),
+            final_altitude_km=float(arguments["final_altitude_km"]),
+        )
+    return f"Unknown tool {name!r}."
+
+
+# ---------------------------------------------------------------------------
+# State
+# ---------------------------------------------------------------------------
+
+MAX_TURNS = 5
+
+
+class AgentState(State):
+    question: str
+    messages: list[Message] = Field(default_factory=list[Message])
+    final_answer: str = ""
+    tool_call_count: int = 0
+    turn: int = 0
+    trace: Annotated[list[str], append] = Field(default_factory=list)
+
+
+# ---------------------------------------------------------------------------
+# Nodes
+# ---------------------------------------------------------------------------
+
+
+async def call_llm(s: AgentState) -> Mapping[str, Any]:
+    response = await _get_provider().complete(s.messages, tools=TOOLS)
+    return {
+        "messages": [*s.messages, response.message],
+        "turn": s.turn + 1,
+        "trace": [f"call_llm[turn={s.turn + 1}]"],
+    }
+
+
+async def dispatch_tools(s: AgentState) -> Mapping[str, Any]:
+    last = s.messages[-1]
+    if not isinstance(last, AssistantMessage) or not last.tool_calls:
+        raise RuntimeError("dispatch_tools entered without a tool-calling assistant message")
+    tool_messages: list[Message] = []
+    for tc in last.tool_calls:
+        result_text = dispatch(tc.name, tc.arguments or {})
+        tool_messages.append(ToolMessage(content=result_text, tool_call_id=tc.id))
+    return {
+        "messages": [*s.messages, *tool_messages],
+        "tool_call_count": s.tool_call_count + len(tool_messages),
+        "trace": [f"dispatch_tools[{len(tool_messages)}]"],
+    }
+
+
+async def present(s: AgentState) -> Mapping[str, Any]:
+    last = s.messages[-1]
+    if isinstance(last, AssistantMessage) and last.content:
+        return {"final_answer": last.content, "trace": ["present"]}
+    return {
+        "final_answer": "(model exited without final content)",
+        "trace": ["present"],
+    }
+
+
+def route_after_llm(s: AgentState) -> str:
+    # Hard turn cap: cut the loop even if the model wants more tools.
+    # Production agents typically pair this with a fallback summarize
+    # step that asks the model to "wrap up with what you have."
+    if s.turn >= MAX_TURNS:
+        return "present"
+    last = s.messages[-1]
+    if isinstance(last, AssistantMessage) and last.tool_calls:
+        return "dispatch_tools"
+    return "present"
+
+
+def build_graph() -> CompiledGraph[AgentState]:
+    return (
+        GraphBuilder(AgentState)
+        .add_node("call_llm", call_llm)
+        .add_node("dispatch_tools", dispatch_tools)
+        .add_node("present", present)
+        .add_conditional_edge("call_llm", route_after_llm)
+        .add_edge("dispatch_tools", "call_llm")
+        .add_edge("present", END)
+        .set_entry("call_llm")
+        .compile()
+    )
+
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+
+
+DEFAULT_QUESTION = (
+    "Tell me about Apollo 13. Then, separately, if I were planning a similar "
+    "free-return-style mission and wanted to inject from a 300 km parking orbit "
+    "to apogee at the Moon's mean distance (384,400 km above Earth's surface), "
+    "roughly how much delta-v would that take?"
+)
+
+
+async def main() -> None:
+    question = " ".join(sys.argv[1:]) or DEFAULT_QUESTION
+
+    initial_messages: list[Message] = [
+        SystemMessage(
+            content=(
+                "You are a helpful lunar-mission assistant. You have access to "
+                "two tools: lookup_mission (factual records for named missions) "
+                "and compute_delta_v (Hohmann transfer arithmetic between two "
+                "Earth orbits). Use them when the answer benefits. Cite the tool "
+                "outputs in your final summary."
+            )
+        ),
+        UserMessage(content=question),
+    ]
+
+    print("=" * 72)
+    print("Lunar-mission assistant — tool-calling loop")
+    print("=" * 72)
+    print()
+    print(f"  question: {question}")
+    print()
+
+    graph = build_graph()
+    try:
+        final = await graph.invoke(AgentState(question=question, messages=initial_messages))
+        print(f"  turns:     {final.turn}")
+        print(f"  tools used: {final.tool_call_count}")
+        print()
+        print("  trace:")
+        for step in final.trace:
+            print(f"    - {step}")
+        print()
+        print("  final answer:")
+        for line in final.final_answer.splitlines() or [""]:
+            print(f"    {line}")
+    finally:
+        await graph.drain()
+        if _provider_instance is not None:
+            await _provider_instance.aclose()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/README.md b/examples/README.md
index 624a9a6..355e380 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -84,6 +84,21 @@ invocation. Demonstrates: `SQLiteCheckpointer(serialization="json")`,
 `with_checkpointer`, save-on-completed-event, `State.schema_version`,
 `with_state_migration`, `invoke(resume_invocation=...)`.
 
+### [`09-tool-use/`](./09-tool-use/main.py)
+
+A lunar-mission assistant that calls local Python tools to answer
+questions mixing fact recall and physics arithmetic. Defines two
+tools (`lookup_mission` reading a baked-in record store,
+`compute_delta_v` doing a Hohmann transfer), passes them to the
+model via `complete(tools=...)`, dispatches `assistant.tool_calls`
+to the local functions, and feeds the results back as
+`ToolMessage` entries. The agent loop is a graph cycle:
+`call_llm → dispatch_tools → call_llm` via a conditional edge, with
+a hard turn cap to prevent runaway loops. Demonstrates: `Tool`
+definitions with JSON Schema parameters, `complete(tools=...)`,
+parsing `ToolCall` records, `ToolMessage(tool_call_id=...)` round-
+trip, multi-turn tool-calling loop as a graph cycle.
+
 ## Configuration
 
 All demos configure their LLM client via env vars; OpenAI public-API
diff --git a/tests/test_examples_smoke.py b/tests/test_examples_smoke.py
index adb9cdb..9f2f844 100644
--- a/tests/test_examples_smoke.py
+++ b/tests/test_examples_smoke.py
@@ -39,6 +39,7 @@
     "06-parallel-branches",
     "07-multimodal-prompt",
     "08-checkpointing-and-migration",
+    "09-tool-use",
 ]
 
 

From 776d27735562918fb8283c7f76d027e3690acc98 Mon Sep 17 00:00:00 2001
From: chris-colinsky <chris@lunarcommand.xyz>
Date: Mon, 18 May 2026 13:39:05 -0700
Subject: [PATCH 3/5] chore(examples): PR-review fixes for 05/07/09
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- 09 tool-use: handle ``ToolCall.arguments is None`` (provider-
  reported parse error path) and wrap dispatch in a try/except that
  catches KeyError/ValueError/TypeError from bad args. Errors surface
  as ToolMessage content so the model can retry or give up gracefully
  rather than crashing the graph.
- 07 multimodal-prompt: add ``prompts_fallback/production/
  identify-mission.j2`` so the fallback backend actually covers both
  prompts. Without it, a primary outage would let the caption call
  fall through but break the identify call with PromptNotFound.
  Update the in-code comment that previously said fallback shipped
  only one prompt.
- 05 fan-out-with-retry: fix BatchState docstring that referenced
  ``branch_errors`` (the parallel-branches-side name) — the actual
  field is ``instance_errors``.
---
 examples/05-fan-out-with-retry/main.py            |  2 +-
 examples/07-multimodal-prompt/main.py             | 11 ++++++-----
 .../production/identify-mission.j2                | 15 +++++++++++++++
 examples/09-tool-use/main.py                      | 15 ++++++++++++++-
 4 files changed, 36 insertions(+), 7 deletions(-)
 create mode 100644 examples/07-multimodal-prompt/prompts_fallback/production/identify-mission.j2

diff --git a/examples/05-fan-out-with-retry/main.py b/examples/05-fan-out-with-retry/main.py
index c20538e..35348a8 100644
--- a/examples/05-fan-out-with-retry/main.py
+++ b/examples/05-fan-out-with-retry/main.py
@@ -127,7 +127,7 @@ async def _chat(system: str, user: str) -> str:
 
 class BatchState(State):
     """Outer graph: list of headlines goes in, parallel lists of summaries
-    and topic tags come out. ``branch_errors`` only populates under
+    and topic tags come out. ``instance_errors`` only populates under
     ``error_policy="collect"`` — each failed instance contributes one
     record naming its ``fan_out_index`` and the exception category."""
 
diff --git a/examples/07-multimodal-prompt/main.py b/examples/07-multimodal-prompt/main.py
index 7c8d1a4..56b5968 100644
--- a/examples/07-multimodal-prompt/main.py
+++ b/examples/07-multimodal-prompt/main.py
@@ -129,11 +129,12 @@ def _get_provider() -> OpenAIProvider:
 # Two backends are wired here:
 #   - primary: ``prompts/`` — ships caption-lunar-image and
 #     identify-mission.
-#   - fallback: ``prompts_fallback/`` — ships a backup
-#     caption-lunar-image only. The fallback path fires when the
-#     primary raises ``PromptStoreUnavailable`` (e.g., a remote
-#     primary like Langfuse times out); ``PromptNotFound`` from
-#     primary stops the chain (the name is legitimately missing).
+#   - fallback: ``prompts_fallback/`` — ships shorter variants of
+#     BOTH prompts so the safety net actually covers the whole
+#     pipeline. The fallback path fires when the primary raises
+#     ``PromptStoreUnavailable`` (e.g., a remote primary like
+#     Langfuse times out); ``PromptNotFound`` from primary stops the
+#     chain (the name is legitimately missing).
 #
 # In this demo both prompts live in primary, so the fallback path
 # isn't exercised at runtime. The construction-time setup is the
diff --git a/examples/07-multimodal-prompt/prompts_fallback/production/identify-mission.j2 b/examples/07-multimodal-prompt/prompts_fallback/production/identify-mission.j2
new file mode 100644
index 0000000..5110b84
--- /dev/null
+++ b/examples/07-multimodal-prompt/prompts_fallback/production/identify-mission.j2
@@ -0,0 +1,15 @@
+You are looking at a photograph from a lunar mission. The caption
+hint below was written by another system.
+
+Caption hint: {{ caption }}
+
+In ONE short line, name the specific mission shown:
+
+  Mission: <name>
+
+No preamble.
+
+(This is the fallback variant. The primary backend ships the
+canonical identify-mission prompt and is always tried first; this
+file exists so a primary-backend outage doesn't break the demo
+pipeline.)
diff --git a/examples/09-tool-use/main.py b/examples/09-tool-use/main.py
index d8d7c6e..baaab89 100644
--- a/examples/09-tool-use/main.py
+++ b/examples/09-tool-use/main.py
@@ -289,7 +289,20 @@ async def dispatch_tools(s: AgentState) -> Mapping[str, Any]:
         raise RuntimeError("dispatch_tools entered without a tool-calling assistant message")
     tool_messages: list[Message] = []
     for tc in last.tool_calls:
-        result_text = dispatch(tc.name, tc.arguments or {})
+        # ToolCall.arguments is None only under provider-reported
+        # finish_reason="error" (unparseable args). In a real agent the
+        # model sees the error string and either retries or bails;
+        # either way the loop doesn't crash.
+        if tc.arguments is None:
+            result_text = (
+                f"Tool {tc.name!r} could not be invoked: arguments were "
+                f"unparseable. Retry with valid JSON arguments."
+            )
+        else:
+            try:
+                result_text = dispatch(tc.name, tc.arguments)
+            except (KeyError, ValueError, TypeError) as exc:
+                result_text = f"Tool {tc.name!r} failed with {type(exc).__name__}: {exc}"
         tool_messages.append(ToolMessage(content=result_text, tool_call_id=tc.id))
     return {
         "messages": [*s.messages, *tool_messages],

From 833163d8783049f79c370ec3b75f8e880d223fac Mon Sep 17 00:00:00 2001
From: chris-colinsky <chris@lunarcommand.xyz>
Date: Mon, 18 May 2026 13:56:44 -0700
Subject: [PATCH 4/5] chore(examples): 05 collect-mode failure + 07 PromptGroup
 rework
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Address the two non-trivial items from spec PR #52 review.

- 05 fan-out-with-retry: ``COLLECT_MODE`` previously flipped the
  fan-out's error_policy but the demo had no failure path to
  exercise the new branch — ``instance_errors`` always stayed empty.
  Add a sentinel-detection in ``summarize`` that raises
  ``ProviderUnavailable`` (transient category) on headlines containing
  ``[FORCE_FAIL]``. Under ``COLLECT_MODE=1``, main() prepends one
  sentinel headline so retry exhausts on that instance, the failure
  lands in ``instance_errors``, and the rest of the batch completes.
  Default (fail_fast) keeps the headline list clean so the happy path
  runs unchanged. Print loop now handles partial summaries / topics
  lists by aligning successes to original indices via the
  ``fan_out_index`` carried on each error record.
- 07 multimodal-prompt: PromptGroup used a placeholder-render pattern
  for the second prompt because the original design had the second
  call depend on the first's output. Spec flagged this as teaching
  the wrong PromptGroup mental model. Restructured to two INDEPENDENT
  analyses of the same image: ``describe-surface`` and
  ``describe-equipment``, both taking only ``mission`` as a variable.
  Both prompts render up front with real variables; PromptGroup
  contains two genuine PromptResults; no placeholder identity sneaks
  into the group's metadata. State renamed (``CaptionState`` →
  ``AnalysisState``, ``caption`` → ``surface_description``,
  ``identified_mission`` → ``equipment_description``); nodes renamed
  (``caption`` → ``describe_surface``, ``identify`` →
  ``describe_equipment``); prompt files renamed accordingly in both
  primary and fallback backends.
---
 examples/05-fan-out-with-retry/main.py        |  53 ++++++-
 examples/07-multimodal-prompt/main.py         | 131 +++++++++---------
 .../prompts/production/caption-lunar-image.j2 |  10 --
 .../prompts/production/describe-equipment.j2  |   6 +
 .../prompts/production/describe-surface.j2    |   5 +
 .../prompts/production/identify-mission.j2    |  13 --
 .../production/caption-lunar-image.j2         |  11 --
 .../production/describe-equipment.j2          |   6 +
 .../production/describe-surface.j2            |   6 +
 .../production/identify-mission.j2            |  15 --
 examples/README.md                            |  17 ++-
 11 files changed, 144 insertions(+), 129 deletions(-)
 delete mode 100644 examples/07-multimodal-prompt/prompts/production/caption-lunar-image.j2
 create mode 100644 examples/07-multimodal-prompt/prompts/production/describe-equipment.j2
 create mode 100644 examples/07-multimodal-prompt/prompts/production/describe-surface.j2
 delete mode 100644 examples/07-multimodal-prompt/prompts/production/identify-mission.j2
 delete mode 100644 examples/07-multimodal-prompt/prompts_fallback/production/caption-lunar-image.j2
 create mode 100644 examples/07-multimodal-prompt/prompts_fallback/production/describe-equipment.j2
 create mode 100644 examples/07-multimodal-prompt/prompts_fallback/production/describe-surface.j2
 delete mode 100644 examples/07-multimodal-prompt/prompts_fallback/production/identify-mission.j2

diff --git a/examples/05-fan-out-with-retry/main.py b/examples/05-fan-out-with-retry/main.py
index 35348a8..9620c7f 100644
--- a/examples/05-fan-out-with-retry/main.py
+++ b/examples/05-fan-out-with-retry/main.py
@@ -33,6 +33,11 @@
   instance runs independently and per-instance failures land in
   ``state.instance_errors`` instead of aborting the batch. The
   ``errors_field="instance_errors"`` knob names where the records go.
+  Under COLLECT_MODE, the demo prepends a sentinel headline
+  (``[FORCE_FAIL] ...``) that ``summarize`` raises
+  ``ProviderUnavailable`` on; retry exhausts, the error lands in
+  ``instance_errors``, and the rest of the batch completes. Without
+  the sentinel, ``COLLECT_MODE`` would have nothing to capture.
 - A ``TimingRecord`` is captured per instance via an ``on_complete``
   callback. ``TimingRecord`` carries the per-call duration but not the
   ``fan_out_index`` — that index lives on observer NodeEvents instead.
@@ -83,7 +88,7 @@
     TimingRecord,
     deterministic_backoff,
 )
-from openarmature.llm import OpenAIProvider, SystemMessage, UserMessage
+from openarmature.llm import OpenAIProvider, ProviderUnavailable, SystemMessage, UserMessage
 
 _provider_instance: OpenAIProvider | None = None
 
@@ -153,6 +158,16 @@ class HeadlineState(State):
 
 
 async def summarize(s: HeadlineState) -> Mapping[str, Any]:
+    # Sentinel for the COLLECT_MODE demo. Raising a transient error
+    # (ProviderUnavailable carries the ``provider_unavailable``
+    # category, which retry's default classifier recognizes as
+    # retryable) lets the retry middleware exhaust its 3 attempts;
+    # the final failure then surfaces according to the fan-out's
+    # error_policy. Under fail_fast (default), the batch aborts.
+    # Under collect, the failure lands in instance_errors and the
+    # batch produces partial results.
+    if "[FORCE_FAIL]" in s.headline:
+        raise ProviderUnavailable("synthetic failure: provider unavailable (COLLECT_MODE demo)")
     content = await _chat(
         system=(
             "Rewrite the headline as one short sentence (~15 words) that would work as a lead. No preamble."
@@ -323,10 +338,23 @@ async def main() -> None:
     graph = build_graph(error_policy=error_policy)
     graph.attach_observer(fan_out_config_observer)
 
-    initial = BatchState(headlines=HEADLINES)
+    # Under COLLECT_MODE, prepend a deliberately-failing headline so
+    # the collect path is exercised end-to-end: retry middleware
+    # exhausts on the sentinel, the failure lands in
+    # state.instance_errors, and the rest of the batch completes.
+    # Default (fail_fast) keeps the headline list clean so the demo's
+    # happy path runs to completion.
+    if error_policy == "collect":
+        headlines = [
+            "[FORCE_FAIL] Synthetic failing headline for the COLLECT_MODE demo",
+            *HEADLINES,
+        ]
+    else:
+        headlines = list(HEADLINES)
+    initial = BatchState(headlines=headlines)
 
     print("=" * 72)
-    print(f"Summarizing {len(HEADLINES)} headlines in parallel (concurrency=3)")
+    print(f"Summarizing {len(headlines)} headlines in parallel (concurrency=3)")
     print(f"error_policy={error_policy!r}")
     print("=" * 72)
     print()
@@ -335,12 +363,23 @@ async def main() -> None:
     try:
         final = await graph.invoke(initial)
         wall_ms = (time.monotonic() - wall_start) * 1000.0
+        # Under collect, failed instances are absent from summaries /
+        # topics (their projections don't fire on failure). Pull the
+        # failed fan_out_indices out of instance_errors so the print
+        # loop can align successes to original positions and mark the
+        # gaps for the reader.
+        failed_indices = {int(e["fan_out_index"]) for e in final.instance_errors}
+        success_iter = iter(zip(final.summaries, final.topics, strict=True))
         print("Results (in input order):")
         print()
-        for i, (h, s, t) in enumerate(zip(final.headlines, final.summaries, final.topics, strict=True)):
-            print(f"  [{i}] {h}")
-            print(f"       summary: {s}")
-            print(f"       topic:   {t}")
+        for i, headline in enumerate(final.headlines):
+            print(f"  [{i}] {headline}")
+            if i in failed_indices:
+                print("       (failed after retries; see instance_errors below)")
+            else:
+                s, t = next(success_iter)
+                print(f"       summary: {s}")
+                print(f"       topic:   {t}")
             print()
         if final.instance_errors:
             print(f"Captured {len(final.instance_errors)} per-instance error(s):")
diff --git a/examples/07-multimodal-prompt/main.py b/examples/07-multimodal-prompt/main.py
index 56b5968..697e559 100644
--- a/examples/07-multimodal-prompt/main.py
+++ b/examples/07-multimodal-prompt/main.py
@@ -1,13 +1,14 @@
-"""openarmature demo: caption and identify a lunar mission photograph
-using versioned prompt templates, a fallback prompt backend, and a
-multimodal user message.
-
-**Use case:** Given a photograph from a lunar mission, run two prompts
-in sequence: first describe what's visible (``caption-lunar-image``),
-then use that caption alongside the same image to identify the specific
-mission (``identify-mission``). Both prompts are versioned templates on
-disk; both renders are grouped under one observability ``PromptGroup``
-so a trace UI can render them as a single logical unit.
+"""openarmature demo: two independent analyses of a lunar-mission
+photograph using versioned prompt templates, a fallback prompt
+backend, and a multimodal user message.
+
+**Use case:** Given a photograph from a lunar mission, run two
+independent analyses: describe the lunar surface visible
+(``describe-surface``) and identify the equipment (``describe-equipment``).
+Both prompts take the mission name as their only variable; neither
+depends on the other's output. Both renders are grouped under one
+observability ``PromptGroup`` so a trace UI can render the analyses
+as one logical unit.
 
 The image can come from a public URL (default) or a local file (set
 ``IMAGE_PATH`` to use the inline base64 source instead). The
@@ -25,13 +26,17 @@
   legitimately missing). The typical production shape is "Langfuse
   primary + local-filesystem fallback".
 - ``FilesystemPromptBackend`` uses the ``<root>/<label>/<name>.j2``
-  layout. The demo ships two prompts (``caption-lunar-image``,
-  ``identify-mission``) under the primary backend's ``production``
-  label, plus a sibling backend rooted at a different folder for the
-  fallback demonstration.
+  layout. The demo ships two prompts (``describe-surface``,
+  ``describe-equipment``) under the primary backend's ``production``
+  label, plus matching variants in the fallback backend so the safety
+  net covers both prompts.
 - ``PromptGroup(group_name=..., members=[result_a, result_b])`` wraps
   two ``PromptResult`` instances under one observability identifier.
-  ``with_active_prompt_group(group)`` propagates the group name via
+  Because the prompts are INDEPENDENT analyses of the same input,
+  both can be rendered upfront with real variables — no placeholder
+  renders, no asymmetric "first call computes the second's input"
+  shape.
+- ``with_active_prompt_group(group)`` propagates the group name via
   ContextVar; OTel observers stamp ``openarmature.prompt.group_name``
   onto every LLM-call span fired inside.
 - ``with_active_prompt(result)`` (inside the group's scope) propagates
@@ -127,11 +132,11 @@ def _get_provider() -> OpenAIProvider:
 # construct, holds no per-call state, and is safe to share across nodes.
 #
 # Two backends are wired here:
-#   - primary: ``prompts/`` — ships caption-lunar-image and
-#     identify-mission.
+#   - primary: ``prompts/`` — ships describe-surface and
+#     describe-equipment.
 #   - fallback: ``prompts_fallback/`` — ships shorter variants of
-#     BOTH prompts so the safety net actually covers the whole
-#     pipeline. The fallback path fires when the primary raises
+#     both prompts so the safety net covers the whole pipeline. The
+#     fallback path fires when the primary raises
 #     ``PromptStoreUnavailable`` (e.g., a remote primary like
 #     Langfuse times out); ``PromptNotFound`` from primary stops the
 #     chain (the name is legitimately missing).
@@ -154,14 +159,14 @@ def _get_provider() -> OpenAIProvider:
 # ---------------------------------------------------------------------------
 
 
-class CaptionState(State):
+class AnalysisState(State):
     # Exactly one of ``image_url`` / ``image_path`` is set when the
     # demo runs; the helper below picks the right ImageSource shape.
     image_url: str = ""
     image_path: str = ""
     mission: str
-    caption: str = ""
-    identified_mission: str = ""
+    surface_description: str = ""
+    equipment_description: str = ""
     group_name: str = ""
     trace: Annotated[list[str], append] = Field(default_factory=list)
 
@@ -218,11 +223,12 @@ def _extract_rendered_text(rendered: PromptResult) -> str:
     return rendered_msg.content
 
 
-async def caption(s: CaptionState) -> Mapping[str, Any]:
-    # Each node fetches + renders its own prompt. ``get`` is the
-    # convenience shorthand for ``render(await fetch(...))``.
+async def describe_surface(s: AnalysisState) -> Mapping[str, Any]:
+    # Each node fetches + renders its own prompt. Both prompts take
+    # only the ``mission`` variable, so neither depends on the other's
+    # output — the two analyses are independent.
     rendered = await _PROMPT_MANAGER.get(
-        "caption-lunar-image",
+        "describe-surface",
         variables={"mission": s.mission},
     )
     rendered_text = _extract_rendered_text(rendered)
@@ -245,18 +251,15 @@ async def caption(s: CaptionState) -> Mapping[str, Any]:
         response = await _get_provider().complete([multimodal_message])
 
     return {
-        "caption": (response.message.content or "").strip(),
-        "trace": ["caption"],
+        "surface_description": (response.message.content or "").strip(),
+        "trace": ["describe_surface"],
     }
 
 
-async def identify(s: CaptionState) -> Mapping[str, Any]:
-    # Uses the caption produced by the previous node — so the render
-    # happens here, not in main(). Same with_active_prompt wrapping;
-    # the outer group context from main() still applies.
+async def describe_equipment(s: AnalysisState) -> Mapping[str, Any]:
     rendered = await _PROMPT_MANAGER.get(
-        "identify-mission",
-        variables={"caption": s.caption},
+        "describe-equipment",
+        variables={"mission": s.mission},
     )
     rendered_text = _extract_rendered_text(rendered)
 
@@ -270,21 +273,20 @@ async def identify(s: CaptionState) -> Mapping[str, Any]:
     with with_active_prompt(rendered):
         response = await _get_provider().complete([multimodal_message])
 
-    identified = (response.message.content or "").strip().removeprefix("Mission:").strip()
     return {
-        "identified_mission": identified,
-        "trace": ["identify"],
+        "equipment_description": (response.message.content or "").strip(),
+        "trace": ["describe_equipment"],
     }
 
 
-def build_graph() -> CompiledGraph[CaptionState]:
+def build_graph() -> CompiledGraph[AnalysisState]:
     return (
-        GraphBuilder(CaptionState)
-        .add_node("caption", caption)
-        .add_node("identify", identify)
-        .add_edge("caption", "identify")
-        .add_edge("identify", END)
-        .set_entry("caption")
+        GraphBuilder(AnalysisState)
+        .add_node("describe_surface", describe_surface)
+        .add_node("describe_equipment", describe_equipment)
+        .add_edge("describe_surface", "describe_equipment")
+        .add_edge("describe_equipment", END)
+        .set_entry("describe_surface")
         .compile()
     )
 
@@ -300,7 +302,7 @@ async def main() -> None:
     mission = os.environ.get("MISSION", DEFAULT_MISSION)
 
     print("=" * 72)
-    print("Caption + identify a lunar photograph")
+    print("Lunar-mission image analysis (surface + equipment)")
     print("=" * 72)
     print()
     print(f"  mission:   {mission}")
@@ -310,25 +312,21 @@ async def main() -> None:
         print(f"  image:     {image_url} (url)")
     print()
 
-    # Pre-render both prompts with placeholder variables so the
-    # PromptGroup can be built ONCE at invoke entry and set as the
-    # outer observability context for the whole pipeline. The actual
-    # per-call renders happen inside the nodes, picking up the real
-    # ``caption`` variable that's only known after the first node
-    # completes. The group's ``members`` list is a metadata hint
-    # naming the two prompt slots; per-call wrapping inside the
-    # nodes carries the exact-rendered identity for each call.
-    caption_member = await _PROMPT_MANAGER.get(
-        "caption-lunar-image",
+    # Pre-render both prompts with the real ``mission`` variable so
+    # the PromptGroup can be built once at invoke entry. Both renders
+    # are honest — the nodes use the same fetch+render path inside,
+    # so no placeholder identities sneak into the group's metadata.
+    surface_member = await _PROMPT_MANAGER.get(
+        "describe-surface",
         variables={"mission": mission},
     )
-    identify_placeholder = await _PROMPT_MANAGER.get(
-        "identify-mission",
-        variables={"caption": "(provided at runtime)"},
+    equipment_member = await _PROMPT_MANAGER.get(
+        "describe-equipment",
+        variables={"mission": mission},
     )
     group = PromptGroup(
         group_name="lunar-image-analysis",
-        members=[caption_member, identify_placeholder],
+        members=[surface_member, equipment_member],
     )
 
     graph = build_graph()
@@ -340,7 +338,7 @@ async def main() -> None:
         # LLM-call span.
         with with_active_prompt_group(group):
             final = await graph.invoke(
-                CaptionState(
+                AnalysisState(
                     image_url=image_url if not image_path else "",
                     image_path=image_path,
                     mission=mission,
@@ -348,14 +346,15 @@ async def main() -> None:
                 )
             )
 
-        print(f"  group:       {final.group_name}")
-        print(f"  caption-prompt:   {caption_member.name} @ {caption_member.version}")
-        print(f"  identify-prompt:  {identify_placeholder.name} @ {identify_placeholder.version}")
+        print(f"  group:                {final.group_name}")
+        print(f"  describe-surface:     {surface_member.name} @ {surface_member.version}")
+        print(f"  describe-equipment:   {equipment_member.name} @ {equipment_member.version}")
         print()
-        print("  caption:")
-        print(f"    {final.caption}")
+        print("  surface description:")
+        print(f"    {final.surface_description}")
         print()
-        print(f"  identified mission:  {final.identified_mission}")
+        print("  equipment description:")
+        print(f"    {final.equipment_description}")
     finally:
         await graph.drain()
         if _provider_instance is not None:
diff --git a/examples/07-multimodal-prompt/prompts/production/caption-lunar-image.j2 b/examples/07-multimodal-prompt/prompts/production/caption-lunar-image.j2
deleted file mode 100644
index 931dac0..0000000
--- a/examples/07-multimodal-prompt/prompts/production/caption-lunar-image.j2
+++ /dev/null
@@ -1,10 +0,0 @@
-You are looking at a historical photograph from {{ mission }}.
-
-Describe what's visible in the image in one tight paragraph (~3 sentences).
-Cover three things in order:
-
-1. The subject of the photo — who or what is in focus.
-2. The environment — lunar surface details, equipment, lighting.
-3. Anything distinctive that identifies the era or this specific mission.
-
-No preamble; no markdown; no headers.
diff --git a/examples/07-multimodal-prompt/prompts/production/describe-equipment.j2 b/examples/07-multimodal-prompt/prompts/production/describe-equipment.j2
new file mode 100644
index 0000000..7627048
--- /dev/null
+++ b/examples/07-multimodal-prompt/prompts/production/describe-equipment.j2
@@ -0,0 +1,6 @@
+You are looking at a photograph from {{ mission }}.
+
+Identify and describe the spacecraft, equipment, instruments, or other
+human-made artifacts visible in the image — what they are and what
+they're for. One tight paragraph (~3 sentences); no preamble; no
+markdown; no headers.
diff --git a/examples/07-multimodal-prompt/prompts/production/describe-surface.j2 b/examples/07-multimodal-prompt/prompts/production/describe-surface.j2
new file mode 100644
index 0000000..8bd359d
--- /dev/null
+++ b/examples/07-multimodal-prompt/prompts/production/describe-surface.j2
@@ -0,0 +1,5 @@
+You are looking at a photograph from {{ mission }}.
+
+Describe the lunar surface features visible in the image — terrain,
+shadows, the regolith's texture, the horizon line. One tight paragraph
+(~3 sentences); no preamble; no markdown; no headers.
diff --git a/examples/07-multimodal-prompt/prompts/production/identify-mission.j2 b/examples/07-multimodal-prompt/prompts/production/identify-mission.j2
deleted file mode 100644
index ae81af6..0000000
--- a/examples/07-multimodal-prompt/prompts/production/identify-mission.j2
+++ /dev/null
@@ -1,13 +0,0 @@
-You are looking at a historical photograph from a lunar mission.
-
-The caption below was written by another system; use it as a hint
-but rely on the image itself for the answer.
-
-Caption hint: {{ caption }}
-
-Identify the specific mission shown in the image (e.g., "Apollo 11",
-"Apollo 13", "Artemis II") in EXACTLY one short line of the form:
-
-  Mission: <name>
-
-No preamble; no extra commentary; no markdown.
diff --git a/examples/07-multimodal-prompt/prompts_fallback/production/caption-lunar-image.j2 b/examples/07-multimodal-prompt/prompts_fallback/production/caption-lunar-image.j2
deleted file mode 100644
index 2689661..0000000
--- a/examples/07-multimodal-prompt/prompts_fallback/production/caption-lunar-image.j2
+++ /dev/null
@@ -1,11 +0,0 @@
-You are looking at a photograph from {{ mission }}.
-
-Briefly describe the image: who is visible, where they are, and what
-they appear to be doing. One sentence; no preamble.
-
-(This is the fallback variant. It exists so the demo's
-PromptManager configuration shows TWO backends; the primary backend
-above ships the canonical longer caption-lunar-image prompt and is
-always tried first. This fallback fires only if the primary backend
-raises PromptStoreUnavailable — typical for a remote primary like
-Langfuse with a local filesystem fallback.)
diff --git a/examples/07-multimodal-prompt/prompts_fallback/production/describe-equipment.j2 b/examples/07-multimodal-prompt/prompts_fallback/production/describe-equipment.j2
new file mode 100644
index 0000000..4ebbc1f
--- /dev/null
+++ b/examples/07-multimodal-prompt/prompts_fallback/production/describe-equipment.j2
@@ -0,0 +1,6 @@
+Briefly identify the spacecraft or equipment visible in this {{ mission }}
+photo. One sentence; no preamble.
+
+(This is the fallback variant; the primary backend ships the canonical
+longer describe-equipment prompt and is always tried first. The fallback
+fires when the primary raises PromptStoreUnavailable.)
diff --git a/examples/07-multimodal-prompt/prompts_fallback/production/describe-surface.j2 b/examples/07-multimodal-prompt/prompts_fallback/production/describe-surface.j2
new file mode 100644
index 0000000..533da32
--- /dev/null
+++ b/examples/07-multimodal-prompt/prompts_fallback/production/describe-surface.j2
@@ -0,0 +1,6 @@
+Briefly describe the lunar surface visible in this {{ mission }} photo.
+One sentence; no preamble.
+
+(This is the fallback variant; the primary backend ships the canonical
+longer describe-surface prompt and is always tried first. The fallback
+fires when the primary raises PromptStoreUnavailable.)
diff --git a/examples/07-multimodal-prompt/prompts_fallback/production/identify-mission.j2 b/examples/07-multimodal-prompt/prompts_fallback/production/identify-mission.j2
deleted file mode 100644
index 5110b84..0000000
--- a/examples/07-multimodal-prompt/prompts_fallback/production/identify-mission.j2
+++ /dev/null
@@ -1,15 +0,0 @@
-You are looking at a photograph from a lunar mission. The caption
-hint below was written by another system.
-
-Caption hint: {{ caption }}
-
-In ONE short line, name the specific mission shown:
-
-  Mission: <name>
-
-No preamble.
-
-(This is the fallback variant. The primary backend ships the
-canonical identify-mission prompt and is always tried first; this
-file exists so a primary-backend outage doesn't break the demo
-pipeline.)
diff --git a/examples/README.md b/examples/README.md
index 355e380..78d33b9 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -63,13 +63,16 @@ per-branch middleware.
 
 ### [`07-multimodal-prompt/`](./07-multimodal-prompt/main.py)
 
-Caption a historical lunar photograph using a versioned prompt
-template plus a multimodal user message. The prompt text is loaded
-from a Jinja2 template on disk via `FilesystemPromptBackend`; the
-image is passed alongside the rendered text as an `ImageBlock` in a
-multimodal `UserMessage`. Demonstrates: `PromptManager` + filesystem
-backend, prompt fetch + render with template variables,
-`with_active_prompt` context-var propagation for observability,
+Two independent analyses of a lunar-mission photograph — describe
+the surface, describe the equipment — using versioned prompt
+templates and a multimodal user message. Templates load from
+`FilesystemPromptBackend` with a primary + fallback chain; both
+renders are grouped under one observability `PromptGroup` so a trace
+UI can render them as one logical unit. Image source switches
+between `ImageSourceURL` and `ImageSourceInline(base64_data=...)`
+via env var. Demonstrates: `PromptManager` with composite backends,
+prompt fetch + render with template variables, `PromptGroup` +
+`with_active_prompt_group`, `with_active_prompt` nesting,
 multimodal `UserMessage` carrying both text and image content blocks.
 
 ### [`08-checkpointing-and-migration/`](./08-checkpointing-and-migration/main.py)

From 4ea1657c1c5b766c4e966546b7dfa6d5316de3ad Mon Sep 17 00:00:00 2001
From: chris-colinsky <chris@lunarcommand.xyz>
Date: Mon, 18 May 2026 14:11:55 -0700
Subject: [PATCH 5/5] chore(examples): soften 00 determinism wording + clarify
 09 loop exit

- 00 hello-world: docstring + module comment described
  temperature=0.0 as making the run "reproduce deterministically",
  which over-promises. LLM APIs don't guarantee strict determinism
  even at temp 0 (provider-side batching, GPU sampling heuristics,
  model-version drift). Reworded to "reduces sampling variance" and
  "as reproducible as the API allows" so the pedagogical point
  (RuntimeConfig is the tuning knob) lands without an inaccurate
  guarantee. ``_DETERMINISTIC`` variable name kept as a recognizable
  shorthand for the demo.
- 09 tool-use: docstring said the loop terminates when
  ``finish_reason="stop"``, but the route function actually checks
  whether the last AssistantMessage carries any ``tool_calls``.
  finish_reason isn't tracked in state. Reworded to match the
  implementation: "loop terminates when the assistant message has
  no tool_calls (the model is done requesting tools) or after a
  hard turn cap."
---
 examples/00-hello-world/main.py | 15 +++++++++------
 examples/09-tool-use/main.py    |  6 +++---
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/examples/00-hello-world/main.py b/examples/00-hello-world/main.py
index b6ce0ed..0aa0c8a 100644
--- a/examples/00-hello-world/main.py
+++ b/examples/00-hello-world/main.py
@@ -13,8 +13,10 @@
     instance on ``Response.parsed``.
   - JSON Schema dict (``research``): raw dict on ``Response.parsed``.
 - ``RuntimeConfig`` for per-call sampling knobs — every ``complete()``
-  here passes ``config=RuntimeConfig(temperature=0.0)`` so the run
-  reproduces deterministically.
+  here passes ``config=RuntimeConfig(temperature=0.0)`` to reduce
+  sampling variance across runs. Temperature 0 isn't a strict
+  determinism guarantee (providers vary at the infra level) but it's
+  the standard tuning knob for "as reproducible as the API allows."
 - Conditional routing on a parsed field (``route`` reads
   ``state.classification.intent``).
 - ``attach_observer`` for boundary visibility.
@@ -87,10 +89,11 @@ class PipelineState(State):
 # builders, IDE inspection) import this module without running main().
 _provider_instance: OpenAIProvider | None = None
 
-# Per-call sampling knobs. The demo locks the model at temperature 0
-# so the routing classification (and the rest of the run) reproduces
-# across invocations — useful for tutorial output, less appropriate
-# for production where some sampling variety is desirable.
+# Per-call sampling knobs. The demo sets temperature 0 to reduce
+# variance across invocations — the run is "as reproducible as the
+# API allows" but not strictly deterministic (providers vary at the
+# infra level even at temp 0). Useful for tutorial output; production
+# usually wants some sampling variety.
 # RuntimeConfig also surfaces max_tokens, top_p, and seed; only
 # temperature is set here so the others fall through to provider
 # defaults.
diff --git a/examples/09-tool-use/main.py b/examples/09-tool-use/main.py
index baaab89..9fa91ff 100644
--- a/examples/09-tool-use/main.py
+++ b/examples/09-tool-use/main.py
@@ -11,9 +11,9 @@
 
 The agent loops: send messages + tools to the model, dispatch any
 ``tool_calls`` the model emits, feed the results back as
-``ToolMessage`` entries, and call the model again. Loop terminates when
-the model returns content with ``finish_reason="stop"`` (or after a
-hard turn cap).
+``ToolMessage`` entries, and call the model again. Loop terminates
+when the assistant message has no ``tool_calls`` (the model is done
+requesting tools) or after a hard turn cap.
 
 **What's interesting in the implementation:**