LunarCommand · chris-colinsky · May 15, 2026 · May 15, 2026 · May 15, 2026 · May 15, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,24 @@ The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). The
 
 ## [Unreleased]
 
+### Added
+
+- **Structured output (proposal 0016, spec v0.14.0).** `Provider.complete()` now accepts an optional `response_schema` parameter — either a JSON Schema dict or a Pydantic `BaseModel` subclass. When supplied, the provider constrains the model's output to the schema and populates `Response.parsed` with the validated value (`dict` for dict-schema input, a `BaseModel` instance for class input). New `StructuredOutputInvalid` error category (non-transient by default) raises on JSON parse failure or schema validation failure; carries the requested schema, the raw response content, and a failure description.
+- **`OpenAIProvider` native response_format wire path.** When `response_schema` is supplied, the chat-completions request body carries `response_format: { type: "json_schema", json_schema: { name, schema, strict } }`. The `strict` flag is determined by a deep recursive walk over the schema (object-property required-coverage rule across `anyOf` / `oneOf` / `allOf` and `$ref` targets, with cycle protection); unresolvable refs fall through to `strict: false`. The `name` field uses `schema.title` when present, otherwise a deterministic sha256-prefix hash.
+- **`OpenAIProvider` prompt-augmentation fallback.** Constructor flag `force_prompt_augmentation_fallback: bool` (default `False`) and read-only inspect property `uses_prompt_augmentation_fallback: bool`. When the flag is on, structured-output calls build a fresh message list with a system directive containing the serialized schema, omit `response_format` from the wire, and validate the response post-receive. The caller's original `messages` list is never mutated. Use for OpenAI-compatible servers (older vLLM, some LM Studio releases, llama.cpp variants) that reject or silently ignore `response_format`.
+- **Provider-agnostic schema helpers.** `openarmature.llm.validate_response_schema(schema)` (raises `ProviderInvalidRequest` when the schema is not a dict with a top-level `type: "object"`) and `openarmature.llm.strict_mode_supported(schema)` (the deep-tree strict-mode constraint check) are exported for reuse by future Anthropic/Gemini providers.
+- **Capability-agnostic conformance harness helpers.** `tests/conformance/harness/wire.py` adds `match_wire_body` (recursive deep-equal with `"*"` wildcard support), `assert_response_format_absent`, `assert_system_references_schema`, and `assert_error_carries` for the `expected_wire_request[_checks]` and `expected.raises.carries.{...}` fixture shapes. Used by the 0016 fixtures; available for the upcoming 0014 / 0015 / 0017 fixture sets.
+- **Runtime dependency: `jsonschema>=4.0`.** Used by the dict-schema validation path. The Pydantic-class path uses Pydantic's native validator and does not need `jsonschema`.
+
+### Changed
+
+- **Pinned spec version: 0.10.0 → 0.15.0.** Adopts the skip-ahead governance principle: the submodule jumps across v0.11.0–v0.15.0 (proposals 0009, 0011, 0014, 0015, 0016, 0017) in one bump. Only the surface introduced by proposal 0016 is implemented in this changelog entry; fixtures from 0011 / 0014 / 0015 / 0017 are marked deferred-skip in the conformance suite and unmark as their respective PRs land.
+
+### Notes
+
+- **Release gate: do not tag until all of {0011, 0014, 0015, 0016, 0017} are merged.** This batch implements one proposal per PR and lands a consolidated release after the fifth PR. Cutting a release tag before the batch is complete would ship a partial spec implementation against the v0.15.0 pin.
+- **Pre-1.0 MINOR.** Existing free-form callers (no `response_schema`) see no behavior change — the new field defaults to `None`, the wire body omits `response_format`, and `Response.parsed` remains absent.
+
 ## [0.5.0] — 2026-05-10
 
 First release on real PyPI. Catches the implementation up from spec v0.5.x to v0.10.0 across six phases — the spec accepted eight proposals while the python lib was at v0.3.1, and v0.5.0 lands all of them in one curated drop.

diff --git a/openarmature-spec b/openarmature-spec
diff --git a/pyproject.toml b/pyproject.toml
@@ -24,6 +24,7 @@ classifiers = [
 dependencies = [
     "pydantic>=2.7",
     "httpx>=0.27",
+    "jsonschema>=4.0",
 ]
 
 [project.optional-dependencies]
@@ -46,7 +47,7 @@ Repository = "https://github.com/LunarCommand/openarmature-python"
 Specification = "https://github.com/LunarCommand/openarmature-spec"
 
 [tool.openarmature]
-spec_version = "0.10.0"
+spec_version = "0.15.0"
 
 [dependency-groups]
 dev = [

diff --git a/src/openarmature/llm/__init__.py b/src/openarmature/llm/__init__.py
@@ -30,6 +30,7 @@
     PROVIDER_MODEL_NOT_LOADED,
     PROVIDER_RATE_LIMIT,
     PROVIDER_UNAVAILABLE,
+    STRUCTURED_OUTPUT_INVALID,
     TRANSIENT_CATEGORIES,
     LlmProviderError,
     ProviderAuthentication,
@@ -39,6 +40,7 @@
     ProviderModelNotLoaded,
     ProviderRateLimit,
     ProviderUnavailable,
+    StructuredOutputInvalid,
 )
 from .messages import (
     AssistantMessage,
@@ -49,9 +51,15 @@
     ToolMessage,
     UserMessage,
 )
-from .provider import Provider, validate_message_list, validate_tools
+from .provider import (
+    Provider,
+    strict_mode_supported,
+    validate_message_list,
+    validate_response_schema,
+    validate_tools,
+)
 from .providers import OpenAIProvider, classify_http_error, parse_retry_after
-from .response import FinishReason, Response, RuntimeConfig, Usage
+from .response import FinishReason, ParsedValue, Response, RuntimeConfig, Usage
 
 __all__ = [
     "PROVIDER_AUTHENTICATION",
@@ -61,12 +69,14 @@
     "PROVIDER_MODEL_NOT_LOADED",
     "PROVIDER_RATE_LIMIT",
     "PROVIDER_UNAVAILABLE",
+    "STRUCTURED_OUTPUT_INVALID",
     "TRANSIENT_CATEGORIES",
     "AssistantMessage",
     "FinishReason",
     "LlmProviderError",
     "Message",
     "OpenAIProvider",
+    "ParsedValue",
     "Provider",
     "ProviderAuthentication",
     "ProviderInvalidModel",
@@ -77,6 +87,7 @@
     "ProviderUnavailable",
     "Response",
     "RuntimeConfig",
+    "StructuredOutputInvalid",
     "SystemMessage",
     "Tool",
     "ToolCall",
@@ -85,6 +96,8 @@
     "UserMessage",
     "classify_http_error",
     "parse_retry_after",
+    "strict_mode_supported",
     "validate_message_list",
+    "validate_response_schema",
     "validate_tools",
 ]
diff --git a/src/openarmature/llm/errors.py b/src/openarmature/llm/errors.py
@@ -29,13 +29,20 @@
 PROVIDER_RATE_LIMIT = "provider_rate_limit"
 PROVIDER_INVALID_RESPONSE = "provider_invalid_response"
 PROVIDER_INVALID_REQUEST = "provider_invalid_request"
+STRUCTURED_OUTPUT_INVALID = "structured_output_invalid"
 
 
 # Per spec §7 "Retry classification": these three categories are
-# *transient* — a retry MAY succeed. The other four
+# *transient* — a retry MAY succeed. The other categories
 # (`provider_authentication`, `provider_invalid_model`,
-# `provider_invalid_request`, `provider_invalid_response`) are
-# non-transient and MUST NOT be retried by the default classifier.
+# `provider_invalid_request`, `provider_invalid_response`,
+# `structured_output_invalid`) are non-transient and MUST NOT be
+# retried by the default classifier.
+#
+# ``structured_output_invalid`` is explicitly non-transient by default
+# per §7: a model that fails schema compliance on a given prompt usually
+# fails the same way on retry. Users wanting retry-on-validation-failure
+# semantics MAY include it in a custom classifier's transient set.
 #
 # Note: ``finish_reason: "error"`` is also transient per spec §7, but
 # that's a Response-level signal rather than an exception category, so
@@ -130,6 +137,45 @@ class ProviderInvalidRequest(LlmProviderError):
     category = PROVIDER_INVALID_REQUEST
 
 
+# Non-transient by default — a model that fails schema compliance on a
+# given prompt usually fails the same way on retry. The default
+# RetryMiddleware classifier does NOT retry this category. Users wanting
+# retry-on-validation-failure semantics MAY include the category in a
+# custom classifier's transient set.
+#
+# Distinct from ProviderInvalidResponse, which covers wire-shape
+# malformation. StructuredOutputInvalid is raised when the wire envelope
+# is fine but the content does not validate against the caller's schema.
+class StructuredOutputInvalid(LlmProviderError):
+    """Raised when a ``complete()`` call requested a ``response_schema``
+    and the provider's content could not be parsed as JSON or did not
+    validate against the schema.
+
+    Attributes:
+        response_schema: The JSON Schema requested.
+        raw_content: The raw response content the model produced.
+        failure_description: A description of the parse or validation
+            failure.
+    """
+
+    category = STRUCTURED_OUTPUT_INVALID
+    response_schema: dict[str, Any]
+    raw_content: str
+    failure_description: str
+
+    def __init__(
+        self,
+        *args: Any,
+        response_schema: dict[str, Any],
+        raw_content: str,
+        failure_description: str,
+    ) -> None:
+        super().__init__(*args)
+        self.response_schema = response_schema
+        self.raw_content = raw_content
+        self.failure_description = failure_description
+
+
 __all__ = [
     "PROVIDER_AUTHENTICATION",
     "PROVIDER_INVALID_MODEL",
@@ -138,6 +184,7 @@ class ProviderInvalidRequest(LlmProviderError):
     "PROVIDER_MODEL_NOT_LOADED",
     "PROVIDER_RATE_LIMIT",
     "PROVIDER_UNAVAILABLE",
+    "STRUCTURED_OUTPUT_INVALID",
     "TRANSIENT_CATEGORIES",
     "LlmProviderError",
     "ProviderAuthentication",
@@ -147,4 +194,5 @@ class ProviderInvalidRequest(LlmProviderError):
     "ProviderModelNotLoaded",
     "ProviderRateLimit",
     "ProviderUnavailable",
+    "StructuredOutputInvalid",
 ]