LunarCommand · chris-colinsky · May 15, 2026 · May 15, 2026 · May 15, 2026 · May 15, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,24 @@ The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). The
 
 ## [Unreleased]
 
+### Added
+
+- **Structured output (proposal 0016, spec v0.14.0).** `Provider.complete()` now accepts an optional `response_schema` parameter — either a JSON Schema dict or a Pydantic `BaseModel` subclass. When supplied, the provider constrains the model's output to the schema and populates `Response.parsed` with the validated value (`dict` for dict-schema input, a `BaseModel` instance for class input). New `StructuredOutputInvalid` error category (non-transient by default) raises on JSON parse failure or schema validation failure; carries the requested schema, the raw response content, and a failure description.
+- **`OpenAIProvider` native response_format wire path.** When `response_schema` is supplied, the chat-completions request body carries `response_format: { type: "json_schema", json_schema: { name, schema, strict } }`. The `strict` flag is determined by a deep recursive walk over the schema (object-property required-coverage rule across `anyOf` / `oneOf` / `allOf` and `$ref` targets, with cycle protection); unresolvable refs fall through to `strict: false`. The `name` field uses `schema.title` when present, otherwise a deterministic sha256-prefix hash.
+- **`OpenAIProvider` prompt-augmentation fallback.** Constructor flag `force_prompt_augmentation_fallback: bool` (default `False`) and read-only inspect property `uses_prompt_augmentation_fallback: bool`. When the flag is on, structured-output calls build a fresh message list with a system directive containing the serialized schema, omit `response_format` from the wire, and validate the response post-receive. The caller's original `messages` list is never mutated. Use for OpenAI-compatible servers (older vLLM, some LM Studio releases, llama.cpp variants) that reject or silently ignore `response_format`.
+- **Provider-agnostic schema helpers.** `openarmature.llm.validate_response_schema(schema)` (raises `ProviderInvalidRequest` when the schema is not a dict with a top-level `type: "object"`) and `openarmature.llm.strict_mode_supported(schema)` (the deep-tree strict-mode constraint check) are exported for reuse by future Anthropic/Gemini providers.
+- **Capability-agnostic conformance harness helpers.** `tests/conformance/harness/wire.py` adds `match_wire_body` (recursive deep-equal with `"*"` wildcard support), `assert_response_format_absent`, `assert_system_references_schema`, and `assert_error_carries` for the `expected_wire_request[_checks]` and `expected.raises.carries.{...}` fixture shapes. Used by the 0016 fixtures; available for the upcoming 0014 / 0015 / 0017 fixture sets.
+- **Runtime dependency: `jsonschema>=4.0`.** Used by the dict-schema validation path. The Pydantic-class path uses Pydantic's native validator and does not need `jsonschema`.
+
+### Changed
+
+- **Pinned spec version: 0.10.0 → 0.15.0.** Adopts the skip-ahead governance principle: the submodule jumps across v0.11.0–v0.15.0 (proposals 0009, 0011, 0014, 0015, 0016, 0017) in one bump. Only the surface introduced by proposal 0016 is implemented in this changelog entry; fixtures from 0011 / 0014 / 0015 / 0017 are marked deferred-skip in the conformance suite and unmark as their respective PRs land.
+
+### Notes
+
+- **Release gate: do not tag until all of {0011, 0014, 0015, 0016, 0017} are merged.** This batch implements one proposal per PR and lands a consolidated release after the fifth PR. Cutting a release tag before the batch is complete would ship a partial spec implementation against the v0.15.0 pin.
+- **Pre-1.0 MINOR.** Existing free-form callers (no `response_schema`) see no behavior change — the new field defaults to `None`, the wire body omits `response_format`, and `Response.parsed` remains absent.
+
 ## [0.5.0] — 2026-05-10
 
 First release on real PyPI. Catches the implementation up from spec v0.5.x to v0.10.0 across six phases — the spec accepted eight proposals while the python lib was at v0.3.1, and v0.5.0 lands all of them in one curated drop.

diff --git a/README.md b/README.md
@@ -55,26 +55,27 @@ The OpenTelemetry mapping mandates a private `TracerProvider`. That prevents the
 
 ## Hello World
 
-About fifty lines that show the engine in action. Three reducer policies declared on one state class. Routing as a pure function of state, not a hidden state machine. An observer attached at compile time that sees every node boundary the engine emits. No LLM, no API key, no boilerplate. Copy it, run it, watch the events fire. Requires Python 3.12 or later.
+About sixty lines that show the engine in action. Three reducer policies declared on one state class. An LLM call that returns a typed object, not a string. Conditional routing as a pure function of state, not a hidden state machine. An observer attached at compile time that sees every node boundary the engine emits. Requires Python 3.12 or later and an OpenAI-compatible endpoint (defaults to OpenAI public API; works against any local server too).
 
 ```python
 import asyncio
-from typing import Annotated
-
-from openarmature.graph import (
-    END,
-    GraphBuilder,
-    NodeEvent,
-    State,
-    append,
-    merge,
-)
-from pydantic import Field
+import os
+from collections.abc import Mapping
+from typing import Annotated, Any, Literal
+
+from openarmature.graph import END, GraphBuilder, NodeEvent, State, append, merge
+from openarmature.llm import OpenAIProvider, UserMessage
+from pydantic import BaseModel, Field
+
+
+class Classification(BaseModel):
+    intent: Literal["research", "summarize"]
+    rationale: str
 
 
 class PipelineState(State):
     query: str                                                # last_write_wins (default)
-    classification: str = ""                                  # last_write_wins
+    classification: Classification | None = None              # last_write_wins
     sources: Annotated[list[str], append] = Field(            # appends across writes
         default_factory=list
     )
@@ -83,30 +84,32 @@ class PipelineState(State):
     )
 
 
-async def classify(state: PipelineState) -> dict:
-    decision = "research" if "?" in state.query else "summarize"
-    return {
-        "classification": decision,
-        "metadata": {"classified_by": "rule"},
-    }
+provider = OpenAIProvider(
+    base_url=os.environ.get("LLM_BASE_URL", "https://api.openai.com/v1"),
+    model=os.environ.get("LLM_MODEL", "gpt-4o-mini"),
+    api_key=os.environ.get("LLM_API_KEY"),
+)
+
+
+async def classify(state: PipelineState) -> Mapping[str, Any]:
+    response = await provider.complete(
+        [UserMessage(content=f"Route to 'research' or 'summarize': {state.query!r}")],
+        response_schema=Classification,
+    )
+    return {"classification": response.parsed, "metadata": {"classified_by": "llm"}}
 
 
-async def research(state: PipelineState) -> dict:
-    return {
-        "sources": ["wikipedia", "arxiv"],
-        "metadata": {"tool": "search"},
-    }
+async def research(state: PipelineState) -> Mapping[str, Any]:
+    return {"sources": ["wikipedia", "arxiv"], "metadata": {"tool": "search"}}
 
 
-async def summarize(state: PipelineState) -> dict:
-    return {
-        "sources": ["cache"],
-        "metadata": {"tool": "summarizer"},
-    }
+async def summarize(state: PipelineState) -> Mapping[str, Any]:
+    return {"sources": ["cache"], "metadata": {"tool": "summarizer"}}
 
 
 def route(state: PipelineState) -> str:
-    return state.classification
+    assert state.classification is not None
+    return state.classification.intent
 
 
 async def trace(event: NodeEvent) -> None:
@@ -127,22 +130,25 @@ graph = (
 )
 graph.attach_observer(trace)
 
+
 async def main() -> None:
     try:
-        await graph.invoke(PipelineState(query="what is RAG?"))
+        final = await graph.invoke(PipelineState(query="what is RAG?"))
+        print(f"\nclassification: {final.classification}")
     finally:
         await graph.drain()
 
 
 asyncio.run(main())
-# classify: sources=[]
-# research: sources=['wikipedia', 'arxiv']
 ```
 
-A few things to notice in this short example:
+Set `LLM_API_KEY=sk-...` and run. To swap providers, point `LLM_BASE_URL` and `LLM_MODEL` at OpenRouter, vLLM, LM Studio, llama.cpp — anything that speaks the OpenAI Chat Completions wire format. The example also lives at [`examples/00-hello-world/main.py`](./examples/00-hello-world/main.py); see [`examples/`](./examples/) for more runnable demos.
+
+A few things to notice:
 
 - **Three reducer policies on one state schema.** `query` and `classification` get the default `last_write_wins`. `sources` is `Annotated[list[str], append]`, so successive writes concatenate. `metadata` is `Annotated[dict[str, str], merge]`, so successive writes shallow-merge. The merge policy lives on the schema, once.
-- **Conditional routing as a state function.** `route` reads `state.classification` and returns a node name. The graph engine doesn't care that this happens to be deterministic; it would accept an LLM-driven router with the same shape.
+- **Structured output as a typed object.** `provider.complete(..., response_schema=Classification)` returns `Response.parsed` as a validated `Classification` instance, not a string the caller has to JSON-parse and re-validate. Pass a JSON Schema dict instead of a class for the raw form.
+- **Conditional routing on a parsed field.** `route` reads `state.classification.intent` and returns the next node's name. The graph engine doesn't care the discriminator came from an LLM; it would accept a deterministic rule with the same shape.
 - **Observer sees both phases.** `trace` filters to `completed` events for brevity; the engine also delivers `started` events.
 - **The graph either compiles or it doesn't.** Remove `.set_entry()` and `.compile()` raises `NoDeclaredEntry` before `invoke()` runs.
 

diff --git a/examples/00-hello-world/main.py b/examples/00-hello-world/main.py
@@ -0,0 +1,137 @@
+"""Hello-world demo: a 3-node graph that classifies a query with an LLM
+(via structured output) and routes to one of two follow-up nodes.
+
+**Demonstrates:**
+
+- Typed ``State`` with three reducer policies (``last_write_wins``,
+  ``append``, ``merge``).
+- ``OpenAIProvider`` from ``openarmature.llm`` against any
+  OpenAI-compatible endpoint.
+- Structured output via a Pydantic class — the model's response comes
+  back as a validated ``Classification`` instance, not a string.
+- Conditional routing as a pure function of state (``route``).
+- ``attach_observer`` for boundary visibility.
+
+**Configuration** (env vars; OpenAI defaults shown):
+
+- ``LLM_BASE_URL`` — defaults to ``https://api.openai.com/v1``.
+- ``LLM_MODEL`` — defaults to ``gpt-4o-mini``.
+- ``LLM_API_KEY`` — required (your OpenAI API key, or empty for
+  local servers that don't authenticate).
+
+Run with:
+
+    uv sync --group examples
+    LLM_API_KEY=sk-... uv run python examples/00-hello-world/main.py
+"""
+
+from __future__ import annotations
+
+import asyncio
+import os
+from collections.abc import Mapping
+from typing import Annotated, Any, Literal
+
+from pydantic import BaseModel, Field
+
+from openarmature.graph import (
+    END,
+    CompiledGraph,
+    GraphBuilder,
+    NodeEvent,
+    State,
+    append,
+    merge,
+)
+from openarmature.llm import OpenAIProvider, UserMessage
+
+
+class Classification(BaseModel):
+    """The Pydantic schema the model is constrained to produce.
+
+    Passed as ``response_schema`` to ``provider.complete()``; the
+    framework converts to JSON Schema, instructs the provider to
+    return matching content, validates the response, and yields a
+    ``Classification`` instance via ``Response.parsed``.
+    """
+
+    intent: Literal["research", "summarize"]
+    rationale: str
+
+
+class PipelineState(State):
+    query: str
+    classification: Classification | None = None
+    sources: Annotated[list[str], append] = Field(default_factory=list)
+    metadata: Annotated[dict[str, str], merge] = Field(default_factory=dict)
+
+
+_provider = OpenAIProvider(
+    base_url=os.environ.get("LLM_BASE_URL", "https://api.openai.com/v1"),
+    model=os.environ.get("LLM_MODEL", "gpt-4o-mini"),
+    api_key=os.environ.get("LLM_API_KEY"),
+)
+
+
+async def classify(state: PipelineState) -> Mapping[str, Any]:
+    response = await _provider.complete(
+        [
+            UserMessage(
+                content=(
+                    f"Route this query to either 'research' (look something up) or "
+                    f"'summarize' (condense known material): {state.query!r}"
+                )
+            )
+        ],
+        response_schema=Classification,
+    )
+    return {"classification": response.parsed, "metadata": {"classified_by": "llm"}}
+
+
+async def research(state: PipelineState) -> Mapping[str, Any]:
+    return {"sources": ["wikipedia", "arxiv"], "metadata": {"tool": "search"}}
+
+
+async def summarize(state: PipelineState) -> Mapping[str, Any]:
+    return {"sources": ["cache"], "metadata": {"tool": "summarizer"}}
+
+
+def route(state: PipelineState) -> str:
+    if state.classification is None:
+        raise RuntimeError("classify did not populate state.classification")
+    return state.classification.intent
+
+
+async def trace(event: NodeEvent) -> None:
+    if event.phase == "completed" and event.error is None:
+        print(f"{event.node_name}: sources={event.post_state.sources}")
+
+
+def build_graph() -> CompiledGraph[PipelineState]:
+    return (
+        GraphBuilder(PipelineState)
+        .add_node("classify", classify)
+        .add_node("research", research)
+        .add_node("summarize", summarize)
+        .add_conditional_edge("classify", route)
+        .add_edge("research", END)
+        .add_edge("summarize", END)
+        .set_entry("classify")
+        .compile()
+    )
+
+
+async def main() -> None:
+    graph = build_graph()
+    graph.attach_observer(trace)
+    try:
+        final = await graph.invoke(PipelineState(query="what is RAG?"))
+        print(f"\nclassification: {final.classification}")
+        print(f"sources: {final.sources}")
+        print(f"metadata: {final.metadata}")
+    finally:
+        await graph.drain()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/README.md b/examples/README.md
@@ -6,6 +6,18 @@ End-to-end demo projects for `openarmature`. Each is a standalone
 
 ## Demos
 
+### [`00-hello-world/`](./00-hello-world/main.py)
+
+Classify a query with an LLM and route to one of two follow-up
+nodes. Demonstrates: typed `State` with three reducer policies, the
+`OpenAIProvider` from `openarmature.llm`, structured output via a
+Pydantic class (`response_schema=Classification` → `Response.parsed`
+as a `Classification` instance), conditional routing on a parsed
+field, and a compile-time observer.
+
+Configured via env vars (`LLM_BASE_URL`, `LLM_MODEL`, `LLM_API_KEY`);
+defaults to OpenAI public API with `gpt-4o-mini`.
+
 ### [`01-linear-pipeline/`](./01-linear-pipeline/main.py)
 
 Minimal two-node graph (`plan → write`). Demonstrates: typed `State`,

diff --git a/openarmature-spec b/openarmature-spec
diff --git a/pyproject.toml b/pyproject.toml
@@ -24,6 +24,7 @@ classifiers = [
 dependencies = [
     "pydantic>=2.7",
     "httpx>=0.27",
+    "jsonschema>=4.0",
 ]
 
 [project.optional-dependencies]
@@ -46,7 +47,7 @@ Repository = "https://github.com/LunarCommand/openarmature-python"
 Specification = "https://github.com/LunarCommand/openarmature-spec"
 
 [tool.openarmature]
-spec_version = "0.10.0"
+spec_version = "0.15.0"
 
 [dependency-groups]
 dev = [

diff --git a/src/openarmature/llm/__init__.py b/src/openarmature/llm/__init__.py
@@ -30,6 +30,7 @@
     PROVIDER_MODEL_NOT_LOADED,
     PROVIDER_RATE_LIMIT,
     PROVIDER_UNAVAILABLE,
+    STRUCTURED_OUTPUT_INVALID,
     TRANSIENT_CATEGORIES,
     LlmProviderError,
     ProviderAuthentication,
@@ -39,6 +40,7 @@
     ProviderModelNotLoaded,
     ProviderRateLimit,
     ProviderUnavailable,
+    StructuredOutputInvalid,
 )
 from .messages import (
     AssistantMessage,
@@ -49,9 +51,15 @@
     ToolMessage,
     UserMessage,
 )
-from .provider import Provider, validate_message_list, validate_tools
+from .provider import (
+    Provider,
+    strict_mode_supported,
+    validate_message_list,
+    validate_response_schema,
+    validate_tools,
+)
 from .providers import OpenAIProvider, classify_http_error, parse_retry_after
-from .response import FinishReason, Response, RuntimeConfig, Usage
+from .response import FinishReason, ParsedValue, Response, RuntimeConfig, Usage
 
 __all__ = [
     "PROVIDER_AUTHENTICATION",
@@ -61,12 +69,14 @@
     "PROVIDER_MODEL_NOT_LOADED",
     "PROVIDER_RATE_LIMIT",
     "PROVIDER_UNAVAILABLE",
+    "STRUCTURED_OUTPUT_INVALID",
     "TRANSIENT_CATEGORIES",
     "AssistantMessage",
     "FinishReason",
     "LlmProviderError",
     "Message",
     "OpenAIProvider",
+    "ParsedValue",
     "Provider",
     "ProviderAuthentication",
     "ProviderInvalidModel",
@@ -77,6 +87,7 @@
     "ProviderUnavailable",
     "Response",
     "RuntimeConfig",
+    "StructuredOutputInvalid",
     "SystemMessage",
     "Tool",
     "ToolCall",
@@ -85,6 +96,8 @@
     "UserMessage",
     "classify_http_error",
     "parse_retry_after",
+    "strict_mode_supported",
     "validate_message_list",
+    "validate_response_schema",
     "validate_tools",
 ]