diff --git a/examples/00-hello-world/main.py b/examples/00-hello-world/main.py
index 53eb2de..0aa0c8a 100644
--- a/examples/00-hello-world/main.py
+++ b/examples/00-hello-world/main.py
@@ -12,6 +12,11 @@
   - Pydantic class (``Classification``, ``Summary``): typed
     instance on ``Response.parsed``.
   - JSON Schema dict (``research``): raw dict on ``Response.parsed``.
+- ``RuntimeConfig`` for per-call sampling knobs — every ``complete()``
+  here passes ``config=RuntimeConfig(temperature=0.0)`` to reduce
+  sampling variance across runs. Temperature 0 isn't a strict
+  determinism guarantee (providers vary at the infra level) but it's
+  the standard tuning knob for "as reproducible as the API allows."
 - Conditional routing on a parsed field (``route`` reads
   ``state.classification.intent``).
 - ``attach_observer`` for boundary visibility.
@@ -49,7 +54,7 @@
     append,
     merge,
 )
-from openarmature.llm import OpenAIProvider, UserMessage
+from openarmature.llm import OpenAIProvider, RuntimeConfig, UserMessage
 
 
 # Pydantic schemas the model is constrained to produce. Passing a
@@ -84,6 +89,16 @@ class PipelineState(State):
 # builders, IDE inspection) import this module without running main().
 _provider_instance: OpenAIProvider | None = None
 
+# Per-call sampling knobs. The demo sets temperature 0 to reduce
+# variance across invocations — the run is "as reproducible as the
+# API allows" but not strictly deterministic (providers vary at the
+# infra level even at temp 0). Useful for tutorial output; production
+# usually wants some sampling variety.
+# RuntimeConfig also surfaces max_tokens, top_p, and seed; only
+# temperature is set here so the others fall through to provider
+# defaults.
+_DETERMINISTIC = RuntimeConfig(temperature=0.0)
+
 
 def _get_provider() -> OpenAIProvider:
     global _provider_instance
@@ -113,6 +128,7 @@ async def classify(state: PipelineState) -> Mapping[str, Any]:
             )
         ],
         response_schema=Classification,
+        config=_DETERMINISTIC,
     )
     return {"classification": response.parsed, "metadata": {"classified_by": "llm"}}
 
@@ -140,6 +156,7 @@ async def research(state: PipelineState) -> Mapping[str, Any]:
             "required": ["topics", "follow_up_questions"],
             "additionalProperties": False,
         },
+        config=_DETERMINISTIC,
     )
     return {
         "research_plan": response.parsed,
@@ -161,6 +178,7 @@ async def summarize(state: PipelineState) -> Mapping[str, Any]:
             )
         ],
         response_schema=Summary,
+        config=_DETERMINISTIC,
     )
     return {
         "summary": response.parsed,
diff --git a/examples/05-fan-out-with-retry/main.py b/examples/05-fan-out-with-retry/main.py
index 88cb485..9620c7f 100644
--- a/examples/05-fan-out-with-retry/main.py
+++ b/examples/05-fan-out-with-retry/main.py
@@ -27,12 +27,29 @@
   per-instance: a failure on headline 3 doesn't restart headlines 0-2.
 - ``concurrency=3`` caps how many instances run in flight at once. Use
   this to be polite to the upstream API.
+- ``error_policy`` defaults to ``"fail_fast"`` — the first instance
+  failure (after retries exhaust) raises and cancels siblings. Set
+  the ``COLLECT_MODE`` env var to switch to ``"collect"``: each
+  instance runs independently and per-instance failures land in
+  ``state.instance_errors`` instead of aborting the batch. The
+  ``errors_field="instance_errors"`` knob names where the records go.
+  Under COLLECT_MODE, the demo prepends a sentinel headline
+  (``[FORCE_FAIL] ...``) that ``summarize`` raises
+  ``ProviderUnavailable`` on; retry exhausts, the error lands in
+  ``instance_errors``, and the rest of the batch completes. Without
+  the sentinel, ``COLLECT_MODE`` would have nothing to capture.
 - A ``TimingRecord`` is captured per instance via an ``on_complete``
   callback. ``TimingRecord`` carries the per-call duration but not the
   ``fan_out_index`` — that index lives on observer NodeEvents instead.
   The demo prints captured durations in completion order plus a
   wall-clock vs sum-of-durations comparison that shows concurrency
   actually parallelized the work.
+- A ``fan_out_config_observer`` reads ``NodeEvent.fan_out_config`` on
+  the fan-out node's dispatch event. Inner-instance events carry
+  ``fan_out_index`` but not ``fan_out_config``; the config lives on
+  the fan-out node's own started / completed pair and gives observers
+  a record of the resolved item_count, concurrency, and error_policy
+  at dispatch time.
 
 **Configuration** (env vars; OpenAI defaults shown):
 
@@ -61,6 +78,7 @@
     END,
     CompiledGraph,
     GraphBuilder,
+    NodeEvent,
     State,
     append,
 )
@@ -70,7 +88,7 @@
     TimingRecord,
     deterministic_backoff,
 )
-from openarmature.llm import OpenAIProvider, SystemMessage, UserMessage
+from openarmature.llm import OpenAIProvider, ProviderUnavailable, SystemMessage, UserMessage
 
 _provider_instance: OpenAIProvider | None = None
 
@@ -114,11 +132,14 @@ async def _chat(system: str, user: str) -> str:
 
 class BatchState(State):
     """Outer graph: list of headlines goes in, parallel lists of summaries
-    and topic tags come out."""
+    and topic tags come out. ``instance_errors`` only populates under
+    ``error_policy="collect"`` — each failed instance contributes one
+    record naming its ``fan_out_index`` and the exception category."""
 
     headlines: list[str] = Field(default_factory=list)
     summaries: Annotated[list[str], append] = Field(default_factory=list)
     topics: Annotated[list[str], append] = Field(default_factory=list)
+    instance_errors: Annotated[list[dict[str, Any]], append] = Field(default_factory=list[dict[str, Any]])
     trace: Annotated[list[str], append] = Field(default_factory=list)
 
 
@@ -137,6 +158,16 @@ class HeadlineState(State):
 
 
 async def summarize(s: HeadlineState) -> Mapping[str, Any]:
+    # Sentinel for the COLLECT_MODE demo. Raising a transient error
+    # (ProviderUnavailable carries the ``provider_unavailable``
+    # category, which retry's default classifier recognizes as
+    # retryable) lets the retry middleware exhaust its 3 attempts;
+    # the final failure then surfaces according to the fan-out's
+    # error_policy. Under fail_fast (default), the batch aborts.
+    # Under collect, the failure lands in instance_errors and the
+    # batch produces partial results.
+    if "[FORCE_FAIL]" in s.headline:
+        raise ProviderUnavailable("synthetic failure: provider unavailable (COLLECT_MODE demo)")
     content = await _chat(
         system=(
             "Rewrite the headline as one short sentence (~15 words) that would work as a lead. No preamble."
@@ -216,7 +247,16 @@ async def present(s: BatchState) -> Mapping[str, Any]:
     return {"trace": ["present"]}
 
 
-def build_graph() -> CompiledGraph[BatchState]:
+def build_graph(error_policy: str = "fail_fast") -> CompiledGraph[BatchState]:
+    """Build the fan-out demo graph.
+
+    ``error_policy`` switches between ``"fail_fast"`` (default; first
+    exhausted-retry failure raises and cancels the rest) and
+    ``"collect"`` (each instance runs independently; failures land in
+    ``state.instance_errors`` and the batch produces partial results).
+    The smoke test calls this with no argument, exercising the default
+    path; main() lets the COLLECT_MODE env var flip to collect.
+    """
     headline_subgraph = build_headline_subgraph()
 
     retry = RetryMiddleware(
@@ -244,6 +284,8 @@ def build_graph() -> CompiledGraph[BatchState]:
             extra_outputs={"topics": "topic"},
             concurrency=3,
             instance_middleware=(retry, timing),
+            error_policy=error_policy,
+            errors_field="instance_errors",
         )
         .add_node("present", present)
         .add_edge("announce", "headline_runs")
@@ -254,6 +296,30 @@ def build_graph() -> CompiledGraph[BatchState]:
     )
 
 
+async def fan_out_config_observer(event: NodeEvent) -> None:
+    """Print the fan-out node's resolved config when its dispatch event
+    fires.
+
+    NodeEvent carries ``fan_out_config`` ONLY on the fan-out node's own
+    started / completed pair (the dispatch wrapper); inner-instance
+    events carry ``fan_out_index`` but not ``fan_out_config``. Reading
+    the config gives observability layers a record of how the dispatch
+    actually resolved at runtime — useful when ``count`` or
+    ``concurrency`` are callable resolvers whose value isn't visible
+    in code.
+    """
+    if event.fan_out_config is None:
+        return
+    if event.phase != "started":
+        return
+    cfg = event.fan_out_config
+    print(
+        f"  [observer] fan-out node {event.node_name!r} dispatching: "
+        f"item_count={cfg.item_count} concurrency={cfg.concurrency} "
+        f"error_policy={cfg.error_policy!r}"
+    )
+
+
 # ---------------------------------------------------------------------------
 # Main
 # ---------------------------------------------------------------------------
@@ -264,12 +330,32 @@ async def main() -> None:
     # doesn't accumulate timings across invocations.
     _timings.clear()
 
-    graph = build_graph()
-
-    initial = BatchState(headlines=HEADLINES)
+    # Set COLLECT_MODE=1 to switch the fan-out error policy from the
+    # default fail_fast to collect. Under collect, each instance runs
+    # independently and per-instance failures (after retries exhaust)
+    # land in state.instance_errors instead of aborting the batch.
+    error_policy = "collect" if os.environ.get("COLLECT_MODE") else "fail_fast"
+    graph = build_graph(error_policy=error_policy)
+    graph.attach_observer(fan_out_config_observer)
+
+    # Under COLLECT_MODE, prepend a deliberately-failing headline so
+    # the collect path is exercised end-to-end: retry middleware
+    # exhausts on the sentinel, the failure lands in
+    # state.instance_errors, and the rest of the batch completes.
+    # Default (fail_fast) keeps the headline list clean so the demo's
+    # happy path runs to completion.
+    if error_policy == "collect":
+        headlines = [
+            "[FORCE_FAIL] Synthetic failing headline for the COLLECT_MODE demo",
+            *HEADLINES,
+        ]
+    else:
+        headlines = list(HEADLINES)
+    initial = BatchState(headlines=headlines)
 
     print("=" * 72)
-    print(f"Summarizing {len(HEADLINES)} headlines in parallel (concurrency=3)")
+    print(f"Summarizing {len(headlines)} headlines in parallel (concurrency=3)")
+    print(f"error_policy={error_policy!r}")
     print("=" * 72)
     print()
 
@@ -277,12 +363,28 @@ async def main() -> None:
     try:
         final = await graph.invoke(initial)
         wall_ms = (time.monotonic() - wall_start) * 1000.0
+        # Under collect, failed instances are absent from summaries /
+        # topics (their projections don't fire on failure). Pull the
+        # failed fan_out_indices out of instance_errors so the print
+        # loop can align successes to original positions and mark the
+        # gaps for the reader.
+        failed_indices = {int(e["fan_out_index"]) for e in final.instance_errors}
+        success_iter = iter(zip(final.summaries, final.topics, strict=True))
         print("Results (in input order):")
         print()
-        for i, (h, s, t) in enumerate(zip(final.headlines, final.summaries, final.topics, strict=True)):
-            print(f"  [{i}] {h}")
-            print(f"       summary: {s}")
-            print(f"       topic:   {t}")
+        for i, headline in enumerate(final.headlines):
+            print(f"  [{i}] {headline}")
+            if i in failed_indices:
+                print("       (failed after retries; see instance_errors below)")
+            else:
+                s, t = next(success_iter)
+                print(f"       summary: {s}")
+                print(f"       topic:   {t}")
+            print()
+        if final.instance_errors:
+            print(f"Captured {len(final.instance_errors)} per-instance error(s):")
+            for err in final.instance_errors:
+                print(f"  {err}")
             print()
         print("Per-instance timings (in completion order):")
         for nth, record in enumerate(_timings):
diff --git a/examples/06-parallel-branches/main.py b/examples/06-parallel-branches/main.py
index b53c80b..78a2ed1 100644
--- a/examples/06-parallel-branches/main.py
+++ b/examples/06-parallel-branches/main.py
@@ -35,6 +35,12 @@
   mapping (not in completion order). The three branches here write
   disjoint parent fields, so the order doesn't affect the result —
   but the property holds and would matter if they overlapped.
+- A ``branch_attribution_observer`` reads ``NodeEvent.branch_name``
+  on inner-node events. ``branch_name`` is populated only for
+  events INSIDE a branch's subgraph; outermost nodes (receive,
+  enrich, present) have ``branch_name=None``. This is the
+  per-event attribution that lets observability backends route
+  metrics / spans by branch.
 
 **Configuration** (env vars; OpenAI defaults shown):
 
@@ -64,6 +70,7 @@
     BranchSpec,
     CompiledGraph,
     GraphBuilder,
+    NodeEvent,
     State,
     append,
 )
@@ -233,6 +240,21 @@ async def present(s: ArticleState) -> Mapping[str, Any]:
     return {"trace": ["present"]}
 
 
+async def branch_attribution_observer(event: NodeEvent) -> None:
+    """Print which branch each inner-node event came from.
+
+    NodeEvent carries ``branch_name`` on events from nodes that
+    execute INSIDE a parallel-branches branch — it's the per-event
+    attribution that says "this came from branch X." Outermost-graph
+    nodes (receive, enrich, present) carry no branch_name. The
+    observer skips events with no branch attribution and prints
+    ``(branch=…) node_name`` for the rest.
+    """
+    if event.branch_name is None or event.phase != "started":
+        return
+    print(f"  [observer] (branch={event.branch_name}) inner node {event.node_name!r} started")
+
+
 def build_graph() -> CompiledGraph[ArticleState]:
     summary = build_summary_subgraph()
     sentiment = build_sentiment_subgraph()
@@ -287,6 +309,7 @@ def build_graph() -> CompiledGraph[ArticleState]:
 
 async def main() -> None:
     graph = build_graph()
+    graph.attach_observer(branch_attribution_observer)
 
     print("=" * 72)
     print("Lunar-mission article enrichment — three independent analyses in parallel")
diff --git a/examples/07-multimodal-prompt/main.py b/examples/07-multimodal-prompt/main.py
index 48028f9..697e559 100644
--- a/examples/07-multimodal-prompt/main.py
+++ b/examples/07-multimodal-prompt/main.py
@@ -1,55 +1,77 @@
-"""openarmature demo: caption a historical lunar photograph using a
-versioned prompt template plus a multimodal user message.
-
-**Use case:** Given a photograph from a lunar mission and the mission's
-name, describe what's visible in the image. The text instructions are
-loaded from a versioned prompt template on disk so they can be edited,
-diffed, and rolled out independently of the code. The image is passed
-to the model alongside the rendered text as a multimodal user message.
-
-This is the "prompt management + image input" shape — two openarmature
-surfaces that compose cleanly. The prompt manager gives you traceable,
-hashable, version-tagged instruction text; content blocks give you the
-multimodal payload alongside it.
+"""openarmature demo: two independent analyses of a lunar-mission
+photograph using versioned prompt templates, a fallback prompt
+backend, and a multimodal user message.
+
+**Use case:** Given a photograph from a lunar mission, run two
+independent analyses: describe the lunar surface visible
+(``describe-surface``) and identify the equipment (``describe-equipment``).
+Both prompts take the mission name as their only variable; neither
+depends on the other's output. Both renders are grouped under one
+observability ``PromptGroup`` so a trace UI can render the analyses
+as one logical unit.
+
+The image can come from a public URL (default) or a local file (set
+``IMAGE_PATH`` to use the inline base64 source instead). The
+``PromptManager`` is wired with a primary + fallback
+``FilesystemPromptBackend`` to demonstrate composite-backend
+configuration; the fallback path fires only when the primary raises
+``PromptStoreUnavailable`` (e.g., a remote Langfuse backend off-line).
 
 **What's interesting in the implementation:**
 
-- ``FilesystemPromptBackend`` loads ``caption-lunar-image.j2`` from
-  ``prompts/production/``. The layout is ``<root>/<label>/<name>.j2``;
-  the ``label`` ("production" here) is the rollout channel.
-- ``PromptManager(backend)`` wraps the backend. ``manager.get(name,
-  variables={...})`` fetches and renders in one call, returning a
-  ``PromptResult`` whose ``messages`` carries the rendered text and
-  whose ``template_hash`` / ``rendered_hash`` identify exactly which
-  template+variables produced this output.
-- ``with_active_prompt(result)`` is a context manager. While it's
-  active, OTel observers see ``openarmature.prompt.*`` attributes
-  stamped onto any LLM-call span fired inside the block. No OTel
-  observer is attached in this demo (keeps the output focused on the
-  caption), but the wrapping is the canonical pattern for production.
+- ``PromptManager(primary, fallback)`` accepts multiple backends. On
+  every ``fetch``, the manager tries them in order: if a backend
+  raises ``PromptStoreUnavailable`` the manager continues to the
+  next; if it raises ``PromptNotFound`` the chain stops (the name is
+  legitimately missing). The typical production shape is "Langfuse
+  primary + local-filesystem fallback".
+- ``FilesystemPromptBackend`` uses the ``<root>/<label>/<name>.j2``
+  layout. The demo ships two prompts (``describe-surface``,
+  ``describe-equipment``) under the primary backend's ``production``
+  label, plus matching variants in the fallback backend so the safety
+  net covers both prompts.
+- ``PromptGroup(group_name=..., members=[result_a, result_b])`` wraps
+  two ``PromptResult`` instances under one observability identifier.
+  Because the prompts are INDEPENDENT analyses of the same input,
+  both can be rendered upfront with real variables — no placeholder
+  renders, no asymmetric "first call computes the second's input"
+  shape.
+- ``with_active_prompt_group(group)`` propagates the group name via
+  ContextVar; OTel observers stamp ``openarmature.prompt.group_name``
+  onto every LLM-call span fired inside.
+- ``with_active_prompt(result)`` (inside the group's scope) propagates
+  the per-call prompt identifiers — name, version, label,
+  template_hash, rendered_hash. The two layers compose: spans inside
+  the group see both the group identifier AND the per-call prompt
+  identifiers.
 - The rendered text becomes a ``TextBlock`` inside a multimodal
-  ``UserMessage``; the image is a sibling ``ImageBlock`` carrying an
-  ``ImageSourceURL``. The provider passes both to the model in one
-  call.
+  ``UserMessage``; the image is a sibling ``ImageBlock``. The image
+  source is ``ImageSourceURL(url=...)`` by default; setting
+  ``IMAGE_PATH`` switches to ``ImageSourceInline(base64_data=...)``
+  with the file's bytes base64-encoded and an inferred ``media_type``.
 
 **Configuration** (env vars; OpenAI defaults shown):
 
 - ``LLM_BASE_URL`` defaults to ``https://api.openai.com``. **Host root only.**
 - ``LLM_MODEL`` defaults to ``gpt-4o-mini`` (a vision-capable model).
 - ``LLM_API_KEY`` required (empty for local servers that don't authenticate).
-- ``IMAGE_URL`` overrides the default image. Default is a public-domain
+- ``IMAGE_URL`` overrides the default URL. Default is a public-domain
   NASA photograph of Buzz Aldrin on the lunar surface.
+- ``IMAGE_PATH`` overrides the URL with a local file path. The file's
+  bytes go to the model via ``ImageSourceInline`` (base64) instead.
 
 Run with:
 
     uv sync --group examples
     cd examples/07-multimodal-prompt
     LLM_API_KEY=sk-... uv run python main.py
+    LLM_API_KEY=sk-... IMAGE_PATH=./my-photo.jpg uv run python main.py
 """
 
 from __future__ import annotations
 
 import asyncio
+import base64
 import os
 from collections.abc import Mapping
 from pathlib import Path
@@ -66,6 +88,8 @@
 )
 from openarmature.llm import (
     ImageBlock,
+    ImageSource,
+    ImageSourceInline,
     ImageSourceURL,
     OpenAIProvider,
     TextBlock,
@@ -73,8 +97,11 @@
 )
 from openarmature.prompts import (
     FilesystemPromptBackend,
+    PromptGroup,
     PromptManager,
+    PromptResult,
     with_active_prompt,
+    with_active_prompt_group,
 )
 
 # ---------------------------------------------------------------------------
@@ -103,8 +130,28 @@ def _get_provider() -> OpenAIProvider:
 
 # Build the prompt manager once at import time. The manager is cheap to
 # construct, holds no per-call state, and is safe to share across nodes.
-_PROMPT_ROOT = Path(__file__).parent / "prompts"
-_PROMPT_MANAGER = PromptManager(FilesystemPromptBackend(_PROMPT_ROOT))
+#
+# Two backends are wired here:
+#   - primary: ``prompts/`` — ships describe-surface and
+#     describe-equipment.
+#   - fallback: ``prompts_fallback/`` — ships shorter variants of
+#     both prompts so the safety net covers the whole pipeline. The
+#     fallback path fires when the primary raises
+#     ``PromptStoreUnavailable`` (e.g., a remote primary like
+#     Langfuse times out); ``PromptNotFound`` from primary stops the
+#     chain (the name is legitimately missing).
+#
+# In this demo both prompts live in primary, so the fallback path
+# isn't exercised at runtime. The construction-time setup is the
+# demonstrated thing; production code would replace primary with a
+# remote backend (LangfusePromptBackend etc.) while keeping the
+# filesystem one as the offline safety net.
+_PROMPT_ROOT_PRIMARY = Path(__file__).parent / "prompts"
+_PROMPT_ROOT_FALLBACK = Path(__file__).parent / "prompts_fallback"
+_PROMPT_MANAGER = PromptManager(
+    FilesystemPromptBackend(_PROMPT_ROOT_PRIMARY),
+    FilesystemPromptBackend(_PROMPT_ROOT_FALLBACK),
+)
 
 
 # ---------------------------------------------------------------------------
@@ -112,31 +159,60 @@ def _get_provider() -> OpenAIProvider:
 # ---------------------------------------------------------------------------
 
 
-class CaptionState(State):
-    image_url: str
+class AnalysisState(State):
+    # Exactly one of ``image_url`` / ``image_path`` is set when the
+    # demo runs; the helper below picks the right ImageSource shape.
+    image_url: str = ""
+    image_path: str = ""
     mission: str
-    caption: str = ""
-    prompt_version: str = ""
-    template_hash: str = ""
+    surface_description: str = ""
+    equipment_description: str = ""
+    group_name: str = ""
     trace: Annotated[list[str], append] = Field(default_factory=list)
 
 
 # ---------------------------------------------------------------------------
-# Node
+# Image source helper
 # ---------------------------------------------------------------------------
+# The image arrives either as a URL (default) or a local file path
+# (``IMAGE_PATH`` env var). The helper picks the right ``ImageSource``
+# shape: ``ImageSourceURL`` passes the URL through to the model
+# unchanged; ``ImageSourceInline`` reads the file, base64-encodes the
+# bytes, and requires a ``media_type`` on the parent ``ImageBlock``.
+
+_EXTENSION_TO_MEDIA_TYPE = {
+    ".jpg": "image/jpeg",
+    ".jpeg": "image/jpeg",
+    ".png": "image/png",
+    ".webp": "image/webp",
+    ".gif": "image/gif",
+}
+
+
+def _build_image_block(image_url: str, image_path: str) -> ImageBlock:
+    if image_path:
+        data = Path(image_path).read_bytes()
+        encoded = base64.b64encode(data).decode("ascii")
+        ext = Path(image_path).suffix.lower()
+        media_type = _EXTENSION_TO_MEDIA_TYPE.get(ext)
+        if media_type is None:
+            raise RuntimeError(
+                f"image extension {ext!r} not recognized; supported: "
+                f"{sorted(_EXTENSION_TO_MEDIA_TYPE.keys())}"
+            )
+        source: ImageSource = ImageSourceInline(base64_data=encoded)
+        return ImageBlock(source=source, media_type=media_type)
+    return ImageBlock(source=ImageSourceURL(url=image_url))
 
 
-async def caption(s: CaptionState) -> Mapping[str, Any]:
-    # Load + render the template in one call. ``variables`` are strict:
-    # an undefined name in the template raises PromptRenderError.
-    rendered = await _PROMPT_MANAGER.get(
-        "caption-lunar-image",
-        variables={"mission": s.mission},
-    )
+# ---------------------------------------------------------------------------
+# Nodes
+# ---------------------------------------------------------------------------
+
 
-    # The PromptResult's messages list carries the rendered text as a
-    # UserMessage. Pull out the text and compose a multimodal user
-    # message that also carries the image.
+def _extract_rendered_text(rendered: PromptResult) -> str:
+    """Pull the rendered text out of a single-UserMessage PromptResult,
+    failing loudly if the contract shape changes."""
     rendered_msg = rendered.messages[0]
     if not isinstance(rendered_msg, UserMessage) or not isinstance(rendered_msg.content, str):
         raise RuntimeError(
@@ -144,38 +220,73 @@ async def caption(s: CaptionState) -> Mapping[str, Any]:
             f"UserMessage with str content, got {type(rendered_msg).__name__} "
             f"with content type {type(rendered_msg.content).__name__}"
         )
-    rendered_text = rendered_msg.content
+    return rendered_msg.content
+
+
+async def describe_surface(s: AnalysisState) -> Mapping[str, Any]:
+    # Each node fetches + renders its own prompt. Both prompts take
+    # only the ``mission`` variable, so neither depends on the other's
+    # output — the two analyses are independent.
+    rendered = await _PROMPT_MANAGER.get(
+        "describe-surface",
+        variables={"mission": s.mission},
+    )
+    rendered_text = _extract_rendered_text(rendered)
+
+    multimodal_message = UserMessage(
+        content=[
+            TextBlock(text=rendered_text),
+            _build_image_block(s.image_url, s.image_path),
+        ],
+    )
+
+    # ``with_active_prompt`` propagates the per-call prompt
+    # identifiers (name, version, label, template_hash,
+    # rendered_hash) via ContextVar. An OTel observer would stamp
+    # those onto the LLM-call span fired inside the block. The
+    # outer ``with_active_prompt_group`` (set in main()) ALSO stamps
+    # a ``group_name`` onto the same span — the two layers compose
+    # so observers see both per-call AND per-group attribution.
+    with with_active_prompt(rendered):
+        response = await _get_provider().complete([multimodal_message])
+
+    return {
+        "surface_description": (response.message.content or "").strip(),
+        "trace": ["describe_surface"],
+    }
+
+
+async def describe_equipment(s: AnalysisState) -> Mapping[str, Any]:
+    rendered = await _PROMPT_MANAGER.get(
+        "describe-equipment",
+        variables={"mission": s.mission},
+    )
+    rendered_text = _extract_rendered_text(rendered)
 
     multimodal_message = UserMessage(
         content=[
             TextBlock(text=rendered_text),
-            ImageBlock(source=ImageSourceURL(url=s.image_url)),
+            _build_image_block(s.image_url, s.image_path),
         ],
     )
 
-    # ``with_active_prompt`` propagates the prompt identifiers via
-    # ContextVar to any observer that cares. An OTel observer would
-    # stamp openarmature.prompt.{name,version,label,template_hash,
-    # rendered_hash} on the LLM-call span fired inside this block. No
-    # observer is attached in this demo, but the wrapping is the
-    # canonical pattern; leaving it out drops the audit trail.
     with with_active_prompt(rendered):
         response = await _get_provider().complete([multimodal_message])
 
     return {
-        "caption": (response.message.content or "").strip(),
-        "prompt_version": rendered.version,
-        "template_hash": rendered.template_hash,
-        "trace": ["caption"],
+        "equipment_description": (response.message.content or "").strip(),
+        "trace": ["describe_equipment"],
     }
 
 
-def build_graph() -> CompiledGraph[CaptionState]:
+def build_graph() -> CompiledGraph[AnalysisState]:
     return (
-        GraphBuilder(CaptionState)
-        .add_node("caption", caption)
-        .add_edge("caption", END)
-        .set_entry("caption")
+        GraphBuilder(AnalysisState)
+        .add_node("describe_surface", describe_surface)
+        .add_node("describe_equipment", describe_equipment)
+        .add_edge("describe_surface", "describe_equipment")
+        .add_edge("describe_equipment", END)
+        .set_entry("describe_surface")
         .compile()
     )
 
@@ -187,24 +298,63 @@ def build_graph() -> CompiledGraph[CaptionState]:
 
 async def main() -> None:
     image_url = os.environ.get("IMAGE_URL", DEFAULT_IMAGE_URL)
+    image_path = os.environ.get("IMAGE_PATH", "")
     mission = os.environ.get("MISSION", DEFAULT_MISSION)
 
     print("=" * 72)
-    print("Caption a lunar photograph using a versioned prompt template")
+    print("Lunar-mission image analysis (surface + equipment)")
     print("=" * 72)
     print()
     print(f"  mission:   {mission}")
-    print(f"  image_url: {image_url}")
+    if image_path:
+        print(f"  image:     {image_path} (inline / base64)")
+    else:
+        print(f"  image:     {image_url} (url)")
     print()
 
+    # Pre-render both prompts with the real ``mission`` variable so
+    # the PromptGroup can be built once at invoke entry. Both renders
+    # are honest — the nodes use the same fetch+render path inside,
+    # so no placeholder identities sneak into the group's metadata.
+    surface_member = await _PROMPT_MANAGER.get(
+        "describe-surface",
+        variables={"mission": mission},
+    )
+    equipment_member = await _PROMPT_MANAGER.get(
+        "describe-equipment",
+        variables={"mission": mission},
+    )
+    group = PromptGroup(
+        group_name="lunar-image-analysis",
+        members=[surface_member, equipment_member],
+    )
+
     graph = build_graph()
     try:
-        final = await graph.invoke(CaptionState(image_url=image_url, mission=mission))
-        print(f"  prompt:    caption-lunar-image @ {final.prompt_version}")
-        print(f"  template:  {final.template_hash}")
+        # ``with_active_prompt_group`` propagates the group_name to
+        # observers for the duration of the invoke. Inside the nodes,
+        # ``with_active_prompt`` adds the per-call prompt identifiers
+        # alongside it — both layers stamp attributes on the same
+        # LLM-call span.
+        with with_active_prompt_group(group):
+            final = await graph.invoke(
+                AnalysisState(
+                    image_url=image_url if not image_path else "",
+                    image_path=image_path,
+                    mission=mission,
+                    group_name=group.group_name,
+                )
+            )
+
+        print(f"  group:                {final.group_name}")
+        print(f"  describe-surface:     {surface_member.name} @ {surface_member.version}")
+        print(f"  describe-equipment:   {equipment_member.name} @ {equipment_member.version}")
+        print()
+        print("  surface description:")
+        print(f"    {final.surface_description}")
         print()
-        print("  caption:")
-        print(f"    {final.caption}")
+        print("  equipment description:")
+        print(f"    {final.equipment_description}")
     finally:
         await graph.drain()
         if _provider_instance is not None:
diff --git a/examples/07-multimodal-prompt/prompts/production/caption-lunar-image.j2 b/examples/07-multimodal-prompt/prompts/production/caption-lunar-image.j2
deleted file mode 100644
index 931dac0..0000000
--- a/examples/07-multimodal-prompt/prompts/production/caption-lunar-image.j2
+++ /dev/null
@@ -1,10 +0,0 @@
-You are looking at a historical photograph from {{ mission }}.
-
-Describe what's visible in the image in one tight paragraph (~3 sentences).
-Cover three things in order:
-
-1. The subject of the photo — who or what is in focus.
-2. The environment — lunar surface details, equipment, lighting.
-3. Anything distinctive that identifies the era or this specific mission.
-
-No preamble; no markdown; no headers.
diff --git a/examples/07-multimodal-prompt/prompts/production/describe-equipment.j2 b/examples/07-multimodal-prompt/prompts/production/describe-equipment.j2
new file mode 100644
index 0000000..7627048
--- /dev/null
+++ b/examples/07-multimodal-prompt/prompts/production/describe-equipment.j2
@@ -0,0 +1,6 @@
+You are looking at a photograph from {{ mission }}.
+
+Identify and describe the spacecraft, equipment, instruments, or other
+human-made artifacts visible in the image — what they are and what
+they're for. One tight paragraph (~3 sentences); no preamble; no
+markdown; no headers.
diff --git a/examples/07-multimodal-prompt/prompts/production/describe-surface.j2 b/examples/07-multimodal-prompt/prompts/production/describe-surface.j2
new file mode 100644
index 0000000..8bd359d
--- /dev/null
+++ b/examples/07-multimodal-prompt/prompts/production/describe-surface.j2
@@ -0,0 +1,5 @@
+You are looking at a photograph from {{ mission }}.
+
+Describe the lunar surface features visible in the image — terrain,
+shadows, the regolith's texture, the horizon line. One tight paragraph
+(~3 sentences); no preamble; no markdown; no headers.
diff --git a/examples/07-multimodal-prompt/prompts_fallback/production/describe-equipment.j2 b/examples/07-multimodal-prompt/prompts_fallback/production/describe-equipment.j2
new file mode 100644
index 0000000..4ebbc1f
--- /dev/null
+++ b/examples/07-multimodal-prompt/prompts_fallback/production/describe-equipment.j2
@@ -0,0 +1,6 @@
+Briefly identify the spacecraft or equipment visible in this {{ mission }}
+photo. One sentence; no preamble.
+
+(This is the fallback variant; the primary backend ships the canonical
+longer describe-equipment prompt and is always tried first. The fallback
+fires when the primary raises PromptStoreUnavailable.)
diff --git a/examples/07-multimodal-prompt/prompts_fallback/production/describe-surface.j2 b/examples/07-multimodal-prompt/prompts_fallback/production/describe-surface.j2
new file mode 100644
index 0000000..533da32
--- /dev/null
+++ b/examples/07-multimodal-prompt/prompts_fallback/production/describe-surface.j2
@@ -0,0 +1,6 @@
+Briefly describe the lunar surface visible in this {{ mission }} photo.
+One sentence; no preamble.
+
+(This is the fallback variant; the primary backend ships the canonical
+longer describe-surface prompt and is always tried first. The fallback
+fires when the primary raises PromptStoreUnavailable.)
diff --git a/examples/08-checkpointing-and-migration/main.py b/examples/08-checkpointing-and-migration/main.py
index 73e7ee6..38a7058 100644
--- a/examples/08-checkpointing-and-migration/main.py
+++ b/examples/08-checkpointing-and-migration/main.py
@@ -191,6 +191,19 @@ def migrate_v1_to_v2(state_dict: dict[str, Any]) -> dict[str, Any]:
     Pure function: takes the saved state as a dict, returns the dict at
     the new schema. The engine reads the v1 record, applies this
     function, and re-deserializes against MissionPlanStateV2.
+
+    Multi-version chains: a third schema (v3) would add a second
+    migration function (``migrate_v2_to_v3``) and a second
+    ``builder.with_state_migration("v2", "v3", migrate_v2_to_v3)``
+    call. The framework's MigrationRegistry runs a BFS over the
+    registered edges to find the shortest chain from the saved
+    record's ``schema_version`` to the current state class's. A v1
+    record loaded under a v3 graph would run v1->v2 then v2->v3
+    automatically; no caller-side composition required. If two
+    distinct edges with the same ``(from, to)`` pair exist, or two
+    distinct shortest paths exist for one resolution, the registry
+    raises ``CheckpointStateMigrationChainAmbiguous`` at registration
+    or resume time.
     """
     return {**state_dict, "risk_assessment": ""}
 
diff --git a/examples/09-tool-use/main.py b/examples/09-tool-use/main.py
new file mode 100644
index 0000000..9fa91ff
--- /dev/null
+++ b/examples/09-tool-use/main.py
@@ -0,0 +1,406 @@
+"""openarmature demo: a lunar-mission assistant that calls local Python
+functions as tools to answer fact and physics questions about Apollo /
+Artemis missions.
+
+**Use case:** A user asks something that mixes a factual recall ("when
+did Apollo 13 splash down?") with a small computation ("what's the
+delta-v for a Hohmann transfer from a 300 km Earth orbit to lunar
+distance?"). Neither belongs in the model's prompt — facts get stale and
+arithmetic is unreliable from the model alone — so the agent defines two
+local tools and lets the model call them.
+
+The agent loops: send messages + tools to the model, dispatch any
+``tool_calls`` the model emits, feed the results back as
+``ToolMessage`` entries, and call the model again. Loop terminates
+when the assistant message has no ``tool_calls`` (the model is done
+requesting tools) or after a hard turn cap.
+
+**What's interesting in the implementation:**
+
+- ``Tool(name, description, parameters)`` defines each function as a
+  JSON Schema for the model. Both tools below use the standard
+  ``type: object`` shape with ``required`` properties; the model
+  receives this through ``complete(messages, tools=TOOLS)`` and
+  decides which (if any) to invoke.
+- The model's response carries ``finish_reason="tool_calls"`` and
+  populates ``response.message.tool_calls`` with parsed
+  ``ToolCall(id, name, arguments)`` records. The framework guarantees
+  ``arguments`` is a parsed dict matching the tool's parameters
+  schema (or ``None`` only under ``finish_reason="error"``).
+- The dispatcher node parses each ``ToolCall``, runs the matching
+  local Python function, and appends one
+  ``ToolMessage(content=..., tool_call_id=...)`` per call. Spec
+  requires the ``tool_call_id`` round-trip exactly so the model can
+  pair its requests with the responses.
+- The loop is just a conditional edge on the graph: ``call_llm`` →
+  ``dispatch_tools`` → back to ``call_llm`` when the model wants
+  more tools, or → ``present`` when it's done. No special "agent
+  framework" abstraction — tool-calling composes with the existing
+  graph mechanics.
+- A ``MAX_TURNS`` cap prevents runaway loops if a model stays in
+  tool-calling forever. Production agents typically pair the cap with
+  an explicit termination tool or a fallback summarization step.
+
+**Configuration** (env vars; OpenAI defaults shown):
+
+- ``LLM_BASE_URL`` defaults to ``https://api.openai.com``. **Host root only.**
+- ``LLM_MODEL`` defaults to ``gpt-4o-mini``.
+- ``LLM_API_KEY`` required (empty for local servers that don't authenticate).
+
+Run with:
+
+    uv sync --group examples
+    cd examples/09-tool-use
+    LLM_API_KEY=sk-... uv run python main.py
+    LLM_API_KEY=sk-... uv run python main.py "When was Apollo 17 launched?"
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import math
+import os
+import sys
+from collections.abc import Mapping
+from typing import Annotated, Any
+
+from pydantic import Field
+
+from openarmature.graph import (
+    END,
+    CompiledGraph,
+    GraphBuilder,
+    State,
+    append,
+)
+from openarmature.llm import (
+    AssistantMessage,
+    Message,
+    OpenAIProvider,
+    SystemMessage,
+    Tool,
+    ToolMessage,
+    UserMessage,
+)
+
+# ---------------------------------------------------------------------------
+# Provider
+# ---------------------------------------------------------------------------
+
+_provider_instance: OpenAIProvider | None = None
+
+
+def _get_provider() -> OpenAIProvider:
+    global _provider_instance
+    if _provider_instance is None:
+        _provider_instance = OpenAIProvider(
+            base_url=os.environ.get("LLM_BASE_URL", "https://api.openai.com"),
+            model=os.environ.get("LLM_MODEL", "gpt-4o-mini"),
+            api_key=os.environ.get("LLM_API_KEY") or None,
+        )
+    return _provider_instance
+
+
+# ---------------------------------------------------------------------------
+# Tool 1 — lookup_mission: read a small baked-in fact-record for a
+# named lunar mission. Stand-in for a real lookup against a doc store
+# or knowledge base.
+# ---------------------------------------------------------------------------
+
+LUNAR_MISSIONS: dict[str, dict[str, str]] = {
+    "Apollo 11": {
+        "launch_date": "1969-07-16",
+        "splashdown_date": "1969-07-24",
+        "commander": "Neil Armstrong",
+        "lunar_module_pilot": "Buzz Aldrin",
+        "command_module_pilot": "Michael Collins",
+        "result": "First crewed lunar landing.",
+    },
+    "Apollo 13": {
+        "launch_date": "1970-04-11",
+        "splashdown_date": "1970-04-17",
+        "commander": "Jim Lovell",
+        "lunar_module_pilot": "Fred Haise",
+        "command_module_pilot": "Jack Swigert",
+        "result": (
+            "Aborted lunar landing after service-module oxygen tank rupture; "
+            "safe return via free-return trajectory."
+        ),
+    },
+    "Apollo 17": {
+        "launch_date": "1972-12-07",
+        "splashdown_date": "1972-12-19",
+        "commander": "Eugene Cernan",
+        "lunar_module_pilot": "Harrison Schmitt",
+        "command_module_pilot": "Ronald Evans",
+        "result": "Final Apollo lunar landing.",
+    },
+    "Artemis II": {
+        "launch_date": "2026-04-01",
+        "splashdown_date": "2026-04-10",
+        "commander": "Reid Wiseman",
+        "lunar_module_pilot": "n/a (no surface landing)",
+        "command_module_pilot": "Victor Glover",
+        "result": (
+            "First crewed lunar flyby of the Artemis program; tested Orion "
+            "spacecraft on a free-return trajectory."
+        ),
+    },
+}
+
+
+def lookup_mission(name: str) -> str:
+    record = LUNAR_MISSIONS.get(name)
+    if record is None:
+        known = ", ".join(sorted(LUNAR_MISSIONS.keys()))
+        return f"Unknown mission {name!r}. Known missions: {known}."
+    return json.dumps(record)
+
+
+# ---------------------------------------------------------------------------
+# Tool 2 — compute_delta_v: Hohmann transfer delta-v between two
+# circular orbits around a body with known gravitational parameter.
+# The textbook formula; rough but illustrative.
+# ---------------------------------------------------------------------------
+
+EARTH_RADIUS_KM = 6378.0
+EARTH_MU_KM3_S2 = 398600.4418  # Standard gravitational parameter for Earth.
+
+
+def compute_delta_v(initial_altitude_km: float, final_altitude_km: float) -> str:
+    """Hohmann transfer delta-v from initial_altitude_km to
+    final_altitude_km, both above Earth's surface (so 0 = surface,
+    300 = LEO, 384400 = lunar distance). Returns a JSON record with
+    the two burns and the total."""
+    r1 = initial_altitude_km + EARTH_RADIUS_KM
+    r2 = final_altitude_km + EARTH_RADIUS_KM
+    mu = EARTH_MU_KM3_S2
+    dv1 = math.sqrt(mu / r1) * (math.sqrt(2 * r2 / (r1 + r2)) - 1)
+    dv2 = math.sqrt(mu / r2) * (1 - math.sqrt(2 * r1 / (r1 + r2)))
+    total = abs(dv1) + abs(dv2)
+    return json.dumps(
+        {
+            "first_burn_km_s": round(abs(dv1), 3),
+            "second_burn_km_s": round(abs(dv2), 3),
+            "total_delta_v_km_s": round(total, 3),
+            "note": "Hohmann transfer between two coplanar circular Earth orbits.",
+        }
+    )
+
+
+# ---------------------------------------------------------------------------
+# Tool definitions for the LLM
+# ---------------------------------------------------------------------------
+
+TOOLS: list[Tool] = [
+    Tool(
+        name="lookup_mission",
+        description="Look up factual record for a named historical or upcoming lunar mission.",
+        parameters={
+            "type": "object",
+            "properties": {
+                "name": {
+                    "type": "string",
+                    "description": "Mission name (e.g., 'Apollo 11', 'Artemis II').",
+                }
+            },
+            "required": ["name"],
+            "additionalProperties": False,
+        },
+    ),
+    Tool(
+        name="compute_delta_v",
+        description=(
+            "Compute the Hohmann transfer delta-v between two circular Earth orbits "
+            "given their altitudes above Earth's surface in km. Returns the two burns "
+            "and the total delta-v in km/s."
+        ),
+        parameters={
+            "type": "object",
+            "properties": {
+                "initial_altitude_km": {
+                    "type": "number",
+                    "description": "Altitude of the starting circular orbit above Earth's surface, in km.",
+                },
+                "final_altitude_km": {
+                    "type": "number",
+                    "description": "Altitude of the destination circular orbit above Earth's surface, in km.",
+                },
+            },
+            "required": ["initial_altitude_km", "final_altitude_km"],
+            "additionalProperties": False,
+        },
+    ),
+]
+
+
+def dispatch(name: str, arguments: dict[str, Any]) -> str:
+    """Route a tool call to its local Python function.
+
+    Returns a string the agent loop wraps in a ``ToolMessage`` and
+    feeds back to the model. Unknown tool names produce an error
+    string rather than raising; the model handles the error in the
+    next turn.
+    """
+    if name == "lookup_mission":
+        return lookup_mission(arguments["name"])
+    if name == "compute_delta_v":
+        return compute_delta_v(
+            initial_altitude_km=float(arguments["initial_altitude_km"]),
+            final_altitude_km=float(arguments["final_altitude_km"]),
+        )
+    return f"Unknown tool {name!r}."
+
+
+# ---------------------------------------------------------------------------
+# State
+# ---------------------------------------------------------------------------
+
+MAX_TURNS = 5
+
+
+class AgentState(State):
+    question: str
+    messages: list[Message] = Field(default_factory=list[Message])
+    final_answer: str = ""
+    tool_call_count: int = 0
+    turn: int = 0
+    trace: Annotated[list[str], append] = Field(default_factory=list)
+
+
+# ---------------------------------------------------------------------------
+# Nodes
+# ---------------------------------------------------------------------------
+
+
+async def call_llm(s: AgentState) -> Mapping[str, Any]:
+    response = await _get_provider().complete(s.messages, tools=TOOLS)
+    return {
+        "messages": [*s.messages, response.message],
+        "turn": s.turn + 1,
+        "trace": [f"call_llm[turn={s.turn + 1}]"],
+    }
+
+
+async def dispatch_tools(s: AgentState) -> Mapping[str, Any]:
+    last = s.messages[-1]
+    if not isinstance(last, AssistantMessage) or not last.tool_calls:
+        raise RuntimeError("dispatch_tools entered without a tool-calling assistant message")
+    tool_messages: list[Message] = []
+    for tc in last.tool_calls:
+        # ToolCall.arguments is None only under provider-reported
+        # finish_reason="error" (unparseable args). In a real agent the
+        # model sees the error string and either retries or bails;
+        # either way the loop doesn't crash.
+        if tc.arguments is None:
+            result_text = (
+                f"Tool {tc.name!r} could not be invoked: arguments were "
+                f"unparseable. Retry with valid JSON arguments."
+            )
+        else:
+            try:
+                result_text = dispatch(tc.name, tc.arguments)
+            except (KeyError, ValueError, TypeError) as exc:
+                result_text = f"Tool {tc.name!r} failed with {type(exc).__name__}: {exc}"
+        tool_messages.append(ToolMessage(content=result_text, tool_call_id=tc.id))
+    return {
+        "messages": [*s.messages, *tool_messages],
+        "tool_call_count": s.tool_call_count + len(tool_messages),
+        "trace": [f"dispatch_tools[{len(tool_messages)}]"],
+    }
+
+
+async def present(s: AgentState) -> Mapping[str, Any]:
+    last = s.messages[-1]
+    if isinstance(last, AssistantMessage) and last.content:
+        return {"final_answer": last.content, "trace": ["present"]}
+    return {
+        "final_answer": "(model exited without final content)",
+        "trace": ["present"],
+    }
+
+
+def route_after_llm(s: AgentState) -> str:
+    # Hard turn cap: cut the loop even if the model wants more tools.
+    # Production agents typically pair this with a fallback summarize
+    # step that asks the model to "wrap up with what you have."
+    if s.turn >= MAX_TURNS:
+        return "present"
+    last = s.messages[-1]
+    if isinstance(last, AssistantMessage) and last.tool_calls:
+        return "dispatch_tools"
+    return "present"
+
+
+def build_graph() -> CompiledGraph[AgentState]:
+    return (
+        GraphBuilder(AgentState)
+        .add_node("call_llm", call_llm)
+        .add_node("dispatch_tools", dispatch_tools)
+        .add_node("present", present)
+        .add_conditional_edge("call_llm", route_after_llm)
+        .add_edge("dispatch_tools", "call_llm")
+        .add_edge("present", END)
+        .set_entry("call_llm")
+        .compile()
+    )
+
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+
+
+DEFAULT_QUESTION = (
+    "Tell me about Apollo 13. Then, separately, if I were planning a similar "
+    "free-return-style mission and wanted to inject from a 300 km parking orbit "
+    "to apogee at the Moon's mean distance (384,400 km above Earth's surface), "
+    "roughly how much delta-v would that take?"
+)
+
+
+async def main() -> None:
+    question = " ".join(sys.argv[1:]) or DEFAULT_QUESTION
+
+    initial_messages: list[Message] = [
+        SystemMessage(
+            content=(
+                "You are a helpful lunar-mission assistant. You have access to "
+                "two tools: lookup_mission (factual records for named missions) "
+                "and compute_delta_v (Hohmann transfer arithmetic between two "
+                "Earth orbits). Use them when the answer benefits. Cite the tool "
+                "outputs in your final summary."
+            )
+        ),
+        UserMessage(content=question),
+    ]
+
+    print("=" * 72)
+    print("Lunar-mission assistant — tool-calling loop")
+    print("=" * 72)
+    print()
+    print(f"  question: {question}")
+    print()
+
+    graph = build_graph()
+    try:
+        final = await graph.invoke(AgentState(question=question, messages=initial_messages))
+        print(f"  turns:     {final.turn}")
+        print(f"  tools used: {final.tool_call_count}")
+        print()
+        print("  trace:")
+        for step in final.trace:
+            print(f"    - {step}")
+        print()
+        print("  final answer:")
+        for line in final.final_answer.splitlines() or [""]:
+            print(f"    {line}")
+    finally:
+        await graph.drain()
+        if _provider_instance is not None:
+            await _provider_instance.aclose()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/README.md b/examples/README.md
index 624a9a6..78d33b9 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -63,13 +63,16 @@ per-branch middleware.
 
 ### [`07-multimodal-prompt/`](./07-multimodal-prompt/main.py)
 
-Caption a historical lunar photograph using a versioned prompt
-template plus a multimodal user message. The prompt text is loaded
-from a Jinja2 template on disk via `FilesystemPromptBackend`; the
-image is passed alongside the rendered text as an `ImageBlock` in a
-multimodal `UserMessage`. Demonstrates: `PromptManager` + filesystem
-backend, prompt fetch + render with template variables,
-`with_active_prompt` context-var propagation for observability,
+Two independent analyses of a lunar-mission photograph — describe
+the surface, describe the equipment — using versioned prompt
+templates and a multimodal user message. Templates load from
+`FilesystemPromptBackend` with a primary + fallback chain; both
+renders are grouped under one observability `PromptGroup` so a trace
+UI can render them as one logical unit. Image source switches
+between `ImageSourceURL` and `ImageSourceInline(base64_data=...)`
+via env var. Demonstrates: `PromptManager` with composite backends,
+prompt fetch + render with template variables, `PromptGroup` +
+`with_active_prompt_group`, `with_active_prompt` nesting,
 multimodal `UserMessage` carrying both text and image content blocks.
 
 ### [`08-checkpointing-and-migration/`](./08-checkpointing-and-migration/main.py)
@@ -84,6 +87,21 @@ invocation. Demonstrates: `SQLiteCheckpointer(serialization="json")`,
 `with_checkpointer`, save-on-completed-event, `State.schema_version`,
 `with_state_migration`, `invoke(resume_invocation=...)`.
 
+### [`09-tool-use/`](./09-tool-use/main.py)
+
+A lunar-mission assistant that calls local Python tools to answer
+questions mixing fact recall and physics arithmetic. Defines two
+tools (`lookup_mission` reading a baked-in record store,
+`compute_delta_v` doing a Hohmann transfer), passes them to the
+model via `complete(tools=...)`, dispatches `assistant.tool_calls`
+to the local functions, and feeds the results back as
+`ToolMessage` entries. The agent loop is a graph cycle:
+`call_llm → dispatch_tools → call_llm` via a conditional edge, with
+a hard turn cap to prevent runaway loops. Demonstrates: `Tool`
+definitions with JSON Schema parameters, `complete(tools=...)`,
+parsing `ToolCall` records, `ToolMessage(tool_call_id=...)` round-
+trip, multi-turn tool-calling loop as a graph cycle.
+
 ## Configuration
 
 All demos configure their LLM client via env vars; OpenAI public-API
diff --git a/tests/test_examples_smoke.py b/tests/test_examples_smoke.py
index adb9cdb..9f2f844 100644
--- a/tests/test_examples_smoke.py
+++ b/tests/test_examples_smoke.py
@@ -39,6 +39,7 @@
     "06-parallel-branches",
     "07-multimodal-prompt",
     "08-checkpointing-and-migration",
+    "09-tool-use",
 ]