From 861af60b285fa7af7f2fc4d5da1c5b160a96b97e Mon Sep 17 00:00:00 2001 From: chris-colinsky Date: Thu, 14 May 2026 19:01:58 -0700 Subject: [PATCH 01/24] chore: bump spec to v0.15.0; add jsonschema; skip deferred fixtures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Spec submodule moves from v0.10.0 to v0.15.0 — covers the full 5-proposal batch (0011, 0014, 0015, 0016, 0017) in one bump per the skip-ahead governance principle. spec_version in pyproject.toml bumped to match. Adds jsonschema>=4.0 as a runtime dependency (used by the forthcoming structured-output validation path on the dict-schema side; Pydantic-class path uses its own validator). Adds skip markers to the conformance test files for fixtures whose runtime support lands in a later PR of the batch: - llm-provider 009-020 → 0015 multimodal (PR-2) - llm-provider 021-028 → 0016 structured output (this PR, wired up in a later commit) - pipeline-utilities 032-038 → 0011 parallel branches (PR-5) - pipeline-utilities 039-046 → 0014 state migration (PR-4) - graph-engine 021-observer-branch-name → 0011 parallel branches (PR-5) Skip markers also apply to test_fixture_parsing.py for the same set — the typed harness models in tests/conformance/harness/ don't yet know about the new directive shapes (state_migration, parallel branches state-schema variation, NodeEvent.branch_name); each deferring PR drops its own skip rows when it lands the harness work. --- openarmature-spec | 2 +- pyproject.toml | 3 +- tests/conformance/test_conformance.py | 15 +++ tests/conformance/test_fixture_parsing.py | 35 +++++ tests/conformance/test_llm_provider.py | 35 +++++ tests/conformance/test_pipeline_utilities.py | 29 ++++ uv.lock | 133 +++++++++++++++++++ 7 files changed, 250 insertions(+), 2 deletions(-) diff --git a/openarmature-spec b/openarmature-spec index ff86945..644c2a5 160000 --- a/openarmature-spec +++ b/openarmature-spec @@ -1 +1 @@ -Subproject commit ff86945747c9d4767e5ededad62bab1c9c4e244a +Subproject commit 644c2a5448f9d5f00205e696cef08ddf7c61bae2 diff --git a/pyproject.toml b/pyproject.toml index f865450..6dd7c3c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ classifiers = [ dependencies = [ "pydantic>=2.7", "httpx>=0.27", + "jsonschema>=4.0", ] [project.optional-dependencies] @@ -46,7 +47,7 @@ Repository = "https://github.com/LunarCommand/openarmature-python" Specification = "https://github.com/LunarCommand/openarmature-spec" [tool.openarmature] -spec_version = "0.10.0" +spec_version = "0.15.0" [dependency-groups] dev = [ diff --git a/tests/conformance/test_conformance.py b/tests/conformance/test_conformance.py index 904cb54..9f79246 100644 --- a/tests/conformance/test_conformance.py +++ b/tests/conformance/test_conformance.py @@ -64,6 +64,18 @@ def _fixture_id(path: Path) -> str: ] +# Fixtures whose implementation lands in a later PR of the 5-proposal +# batch (proposals 0011, 0014, 0015, 0016, 0017). Skip-marked here so a +# green test run at this commit means "everything we claim to implement +# passes." Each subsequent PR drops its own rows as it lands the +# underlying support. +_DEFERRED_FIXTURES: dict[str, str] = { + # proposal 0011 — parallel branches; adds ``branch_name`` to + # NodeEvent (PR-5 of the batch) + "021-observer-branch-name": "0011 parallel branches (PR-5)", +} + + # Node directives the legacy adapter doesn't (yet) translate. Phase 1+ will # either expand the adapter or replace it with the typed harness. _UNSUPPORTED_NODE_DIRECTIVES = frozenset( @@ -167,6 +179,9 @@ def _compile_subgraphs_map( @pytest.mark.parametrize("fixture_path", _STANDARD_RUNTIME_FIXTURES, ids=_fixture_id) async def test_runtime_fixture(fixture_path: Path) -> None: + fixture_id = fixture_path.stem + if fixture_id in _DEFERRED_FIXTURES: + pytest.skip(f"{fixture_id}: {_DEFERRED_FIXTURES[fixture_id]}") spec = _load(fixture_path) # ``cases:`` form (e.g., 020-observer-edge-error-events): each entry diff --git a/tests/conformance/test_fixture_parsing.py b/tests/conformance/test_fixture_parsing.py index a949f9c..ed85e19 100644 --- a/tests/conformance/test_fixture_parsing.py +++ b/tests/conformance/test_fixture_parsing.py @@ -25,6 +25,35 @@ def _id(case: tuple[str, Path]) -> str: _FIXTURES = list(discover_fixtures()) +# Fixtures whose typed-harness directives land in a later PR of the +# 5-proposal batch. The fixture parsers / round-trippers need the new +# directive shapes (state_migration, parallel_branches, NodeEvent +# branch_name) to succeed; those shapes ship with their respective PRs. +# Keyed by the test ID format ``/``. +_DEFERRED_FIXTURES: dict[str, str] = { + # proposal 0011 — parallel branches (PR-5) + "graph-engine/021-observer-branch-name": "0011 parallel branches (PR-5)", + "pipeline-utilities/032-parallel-branches-basic": "0011 parallel branches (PR-5)", + "pipeline-utilities/033-parallel-branches-fail-fast": "0011 parallel branches (PR-5)", + "pipeline-utilities/034-parallel-branches-collect": "0011 parallel branches (PR-5)", + "pipeline-utilities/035-parallel-branches-different-state-schemas": "0011 parallel branches (PR-5)", + "pipeline-utilities/036-parallel-branches-with-branch-middleware-retry": "0011 parallel branches (PR-5)", + "pipeline-utilities/037-parallel-branches-determinism": "0011 parallel branches (PR-5)", + "pipeline-utilities/038-parallel-branches-compose-with-fan-out": "0011 parallel branches (PR-5)", + # proposal 0014 — state migration (PR-4) + "pipeline-utilities/039-state-migration-additive-field": "0014 state migration (PR-4)", + "pipeline-utilities/040-state-migration-chain": "0014 state migration (PR-4)", + "pipeline-utilities/041-state-migration-missing": "0014 state migration (PR-4)", + "pipeline-utilities/042-state-migration-versions-match-no-op": "0014 state migration (PR-4)", + "pipeline-utilities/043-state-migration-parent-states-migrated": "0014 state migration (PR-4)", + "pipeline-utilities/044-state-migration-post-migration-deserialization-fails": ( + "0014 state migration (PR-4)" + ), + "pipeline-utilities/045-state-migration-no-path-in-registry": "0014 state migration (PR-4)", + "pipeline-utilities/046-state-migration-function-raises": "0014 state migration (PR-4)", +} + + def test_inventory_is_non_empty() -> None: """Sanity guard. The spec submodule should expose 68+ fixtures across the four capabilities. If discover returns zero, the submodule pin is @@ -38,6 +67,9 @@ def test_fixture_parses(case: tuple[str, Path]) -> None: discriminator routes to ``LlmProviderFixture``, ``CasesFixture``, or ``GraphFixture`` based on top-level keys; ``extra="forbid"`` rejects any unknown top-level field.""" + case_id = _id(case) + if case_id in _DEFERRED_FIXTURES: + pytest.skip(f"{case_id}: {_DEFERRED_FIXTURES[case_id]}") _, path = case load_fixture(path) @@ -47,6 +79,9 @@ def test_fixture_round_trips(case: tuple[str, Path]) -> None: """Parse → ``model_dump`` → re-parse → equal. Exit criterion for Phase 0 per the implementation plan: catches dropped fields the user intended to use later.""" + case_id = _id(case) + if case_id in _DEFERRED_FIXTURES: + pytest.skip(f"{case_id}: {_DEFERRED_FIXTURES[case_id]}") _, path = case parsed = load_fixture(path) dumped = parsed.model_dump(exclude_none=True) diff --git a/tests/conformance/test_llm_provider.py b/tests/conformance/test_llm_provider.py index f485281..44f7213 100644 --- a/tests/conformance/test_llm_provider.py +++ b/tests/conformance/test_llm_provider.py @@ -48,6 +48,38 @@ ) +# Fixtures whose implementation lands in a later PR of the 5-proposal batch. +# Skip-marked here so a green test run at this commit means "everything we +# claim to implement passes." Each subsequent PR drops its own rows as it +# lands the underlying support. +_DEFERRED_FIXTURES: dict[str, str] = { + # proposal 0015 — multimodal images (PR-2 of the batch) + "009-content-blocks-text-only-equivalence": "0015 multimodal images (PR-2)", + "010-content-blocks-image-url": "0015 multimodal images (PR-2)", + "011-content-blocks-image-inline-base64": "0015 multimodal images (PR-2)", + "012-content-blocks-image-detail-hint": "0015 multimodal images (PR-2)", + "013-content-blocks-mixed-order-preserved": "0015 multimodal images (PR-2)", + "014-content-blocks-validation-empty-sequence": "0015 multimodal images (PR-2)", + "015-content-blocks-validation-empty-text-block": "0015 multimodal images (PR-2)", + "016-content-blocks-unsupported-by-model": "0015 multimodal images (PR-2)", + "017-content-blocks-system-message-text-only": "0015 multimodal images (PR-2)", + "018-content-blocks-image-source-missing": "0015 multimodal images (PR-2)", + "019-content-blocks-invalid-detail-value": "0015 multimodal images (PR-2)", + "020-content-blocks-inline-image-missing-media-type": "0015 multimodal images (PR-2)", + # proposal 0016 — structured output (this PR; wired up later in the + # commit sequence). These rows are removed in the commit that drives + # the structured-output fixtures. + "021-structured-output-success": "0016 structured output (this PR; not yet wired)", + "022-structured-output-parse-failure": "0016 structured output (this PR; not yet wired)", + "023-structured-output-validation-failure": "0016 structured output (this PR; not yet wired)", + "024-structured-output-non-transient": "0016 structured output (this PR; not yet wired)", + "025-structured-output-with-tool-calls": "0016 structured output (this PR; not yet wired)", + "026-structured-output-openai-wire-mapping-native": "0016 structured output (this PR; not yet wired)", + "027-structured-output-openai-wire-mapping-fallback": "0016 structured output (this PR; not yet wired)", + "028-structured-output-no-schema-regression": "0016 structured output (this PR; not yet wired)", +} + + def _fixture_paths() -> list[Path]: return sorted(CONFORMANCE_DIR.glob("[0-9][0-9][0-9]-*.yaml")) @@ -240,6 +272,9 @@ def _assert_raises_matches( @pytest.mark.parametrize("fixture_path", _fixture_paths(), ids=_fixture_id) async def test_llm_provider_fixture(fixture_path: Path) -> None: + fixture_id = fixture_path.stem + if fixture_id in _DEFERRED_FIXTURES: + pytest.skip(f"{fixture_id}: {_DEFERRED_FIXTURES[fixture_id]}") spec = _load(fixture_path) if "cases" in spec: diff --git a/tests/conformance/test_pipeline_utilities.py b/tests/conformance/test_pipeline_utilities.py index a4aed36..877dd18 100644 --- a/tests/conformance/test_pipeline_utilities.py +++ b/tests/conformance/test_pipeline_utilities.py @@ -90,6 +90,32 @@ def _fixture_id(path: Path) -> str: return path.stem +# Fixtures whose implementation lands in a later PR of the 5-proposal +# batch (proposals 0011, 0014, 0015, 0016, 0017). Skip-marked here so a +# green test run at this commit means "everything we claim to implement +# passes." Each subsequent PR drops its own rows as it lands the +# underlying support. +_DEFERRED_FIXTURES: dict[str, str] = { + # proposal 0011 — parallel branches (PR-5 of the batch) + "032-parallel-branches-basic": "0011 parallel branches (PR-5)", + "033-parallel-branches-fail-fast": "0011 parallel branches (PR-5)", + "034-parallel-branches-collect": "0011 parallel branches (PR-5)", + "035-parallel-branches-different-state-schemas": "0011 parallel branches (PR-5)", + "036-parallel-branches-with-branch-middleware-retry": "0011 parallel branches (PR-5)", + "037-parallel-branches-determinism": "0011 parallel branches (PR-5)", + "038-parallel-branches-compose-with-fan-out": "0011 parallel branches (PR-5)", + # proposal 0014 — state migration (PR-4 of the batch) + "039-state-migration-additive-field": "0014 state migration (PR-4)", + "040-state-migration-chain": "0014 state migration (PR-4)", + "041-state-migration-missing": "0014 state migration (PR-4)", + "042-state-migration-versions-match-no-op": "0014 state migration (PR-4)", + "043-state-migration-parent-states-migrated": "0014 state migration (PR-4)", + "044-state-migration-post-migration-deserialization-fails": "0014 state migration (PR-4)", + "045-state-migration-no-path-in-registry": "0014 state migration (PR-4)", + "046-state-migration-function-raises": "0014 state migration (PR-4)", +} + + def _unsupported_directive(spec: dict[str, Any]) -> str | None: """Return the first node directive the driver can't translate yet.""" @@ -317,6 +343,9 @@ async def test_pipeline_utility_fixture( fixture_path: Path, monkeypatch: pytest.MonkeyPatch, ) -> None: + fixture_id = fixture_path.stem + if fixture_id in _DEFERRED_FIXTURES: + pytest.skip(f"{fixture_id}: {_DEFERRED_FIXTURES[fixture_id]}") spec = _load(fixture_path) # Cases-shape fixtures (014, 016, 018-019, 021-023): each case is diff --git a/uv.lock b/uv.lock index 3ed1042..c24ddf9 100644 --- a/uv.lock +++ b/uv.lock @@ -24,6 +24,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/da/42/e921fccf5015463e32a3cf6ee7f980a6ed0f395ceeaa45060b61d86486c2/anyio-4.13.0-py3-none-any.whl", hash = "sha256:08b310f9e24a9594186fd75b4f73f4a4152069e3853f1ed8bfbf58369f4ad708", size = 114353, upload-time = "2026-03-24T12:59:08.246Z" }, ] +[[package]] +name = "attrs" +version = "26.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/9a/8e/82a0fe20a541c03148528be8cac2408564a6c9a0cc7e9171802bc1d26985/attrs-26.1.0.tar.gz", hash = "sha256:d03ceb89cb322a8fd706d4fb91940737b6642aa36998fe130a9bc96c985eff32", size = 952055, upload-time = "2026-03-19T14:22:25.026Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/64/b4/17d4b0b2a2dc85a6df63d1157e028ed19f90d4cd97c36717afef2bc2f395/attrs-26.1.0-py3-none-any.whl", hash = "sha256:c647aa4a12dfbad9333ca4e71fe62ddc36f4e63b2d260a37a8b83d2f043ac309", size = 67548, upload-time = "2026-03-19T14:22:23.645Z" }, +] + [[package]] name = "babel" version = "2.18.0" @@ -518,6 +527,33 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/da/e9/1f9ada30cef7b05e74bb06f52127e7a724976c225f46adb65c37b1dadfb6/jiter-0.14.0-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:67f00d94b281174144d6532a04b66a12cb866cbdc47c3af3bfe2973677f9861a", size = 349613, upload-time = "2026-04-10T14:28:40.066Z" }, ] +[[package]] +name = "jsonschema" +version = "4.26.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "jsonschema-specifications" }, + { name = "referencing" }, + { name = "rpds-py" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b3/fc/e067678238fa451312d4c62bf6e6cf5ec56375422aee02f9cb5f909b3047/jsonschema-4.26.0.tar.gz", hash = "sha256:0c26707e2efad8aa1bfc5b7ce170f3fccc2e4918ff85989ba9ffa9facb2be326", size = 366583, upload-time = "2026-01-07T13:41:07.246Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/69/90/f63fb5873511e014207a475e2bb4e8b2e570d655b00ac19a9a0ca0a385ee/jsonschema-4.26.0-py3-none-any.whl", hash = "sha256:d489f15263b8d200f8387e64b4c3a75f06629559fb73deb8fdfb525f2dab50ce", size = 90630, upload-time = "2026-01-07T13:41:05.306Z" }, +] + +[[package]] +name = "jsonschema-specifications" +version = "2025.9.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "referencing" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/19/74/a633ee74eb36c44aa6d1095e7cc5569bebf04342ee146178e2d36600708b/jsonschema_specifications-2025.9.1.tar.gz", hash = "sha256:b540987f239e745613c7a9176f3edb72b832a4ac465cf02712288397832b5e8d", size = 32855, upload-time = "2025-09-08T01:34:59.186Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437, upload-time = "2025-09-08T01:34:57.871Z" }, +] + [[package]] name = "markdown" version = "3.10.2" @@ -853,6 +889,7 @@ version = "0.5.0" source = { editable = "." } dependencies = [ { name = "httpx" }, + { name = "jsonschema" }, { name = "pydantic" }, ] @@ -889,6 +926,7 @@ examples = [ [package.metadata] requires-dist = [ { name = "httpx", specifier = ">=0.27" }, + { name = "jsonschema", specifier = ">=4.0" }, { name = "opentelemetry-api", marker = "extra == 'otel'", specifier = ">=1.27,<3" }, { name = "opentelemetry-instrumentation-logging", marker = "extra == 'otel'", specifier = ">=0.62.0b1" }, { name = "opentelemetry-sdk", marker = "extra == 'otel'", specifier = ">=1.27,<3" }, @@ -1404,6 +1442,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/04/11/432f32f8097b03e3cd5fe57e88efb685d964e2e5178a48ed61e841f7fdce/pyyaml_env_tag-1.1-py3-none-any.whl", hash = "sha256:17109e1a528561e32f026364712fee1264bc2ea6715120891174ed1b980d2e04", size = 4722, upload-time = "2025-05-13T15:23:59.629Z" }, ] +[[package]] +name = "referencing" +version = "0.37.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "rpds-py" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/22/f5/df4e9027acead3ecc63e50fe1e36aca1523e1719559c499951bb4b53188f/referencing-0.37.0.tar.gz", hash = "sha256:44aefc3142c5b842538163acb373e24cce6632bd54bdb01b21ad5863489f50d8", size = 78036, upload-time = "2025-10-13T15:30:48.871Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2c/58/ca301544e1fa93ed4f80d724bf5b194f6e4b945841c5bfd555878eea9fcb/referencing-0.37.0-py3-none-any.whl", hash = "sha256:381329a9f99628c9069361716891d34ad94af76e461dcb0335825aecc7692231", size = 26766, upload-time = "2025-10-13T15:30:47.625Z" }, +] + [[package]] name = "requests" version = "2.34.0" @@ -1419,6 +1471,87 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ef/e6/e300fce5fe83c30520607a015dabd985df3251e188d234bfe9492e17a389/requests-2.34.0-py3-none-any.whl", hash = "sha256:917520a21b767485ce7c588f4ebb917c436b24a31231b44228715eaeb5a52c60", size = 73021, upload-time = "2026-05-11T19:29:49.923Z" }, ] +[[package]] +name = "rpds-py" +version = "0.30.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/20/af/3f2f423103f1113b36230496629986e0ef7e199d2aa8392452b484b38ced/rpds_py-0.30.0.tar.gz", hash = "sha256:dd8ff7cf90014af0c0f787eea34794ebf6415242ee1d6fa91eaba725cc441e84", size = 69469, upload-time = "2025-11-30T20:24:38.837Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/03/e7/98a2f4ac921d82f33e03f3835f5bf3a4a40aa1bfdc57975e74a97b2b4bdd/rpds_py-0.30.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:a161f20d9a43006833cd7068375a94d035714d73a172b681d8881820600abfad", size = 375086, upload-time = "2025-11-30T20:22:17.93Z" }, + { url = "https://files.pythonhosted.org/packages/4d/a1/bca7fd3d452b272e13335db8d6b0b3ecde0f90ad6f16f3328c6fb150c889/rpds_py-0.30.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6abc8880d9d036ecaafe709079969f56e876fcf107f7a8e9920ba6d5a3878d05", size = 359053, upload-time = "2025-11-30T20:22:19.297Z" }, + { url = "https://files.pythonhosted.org/packages/65/1c/ae157e83a6357eceff62ba7e52113e3ec4834a84cfe07fa4b0757a7d105f/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca28829ae5f5d569bb62a79512c842a03a12576375d5ece7d2cadf8abe96ec28", size = 390763, upload-time = "2025-11-30T20:22:21.661Z" }, + { url = "https://files.pythonhosted.org/packages/d4/36/eb2eb8515e2ad24c0bd43c3ee9cd74c33f7ca6430755ccdb240fd3144c44/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a1010ed9524c73b94d15919ca4d41d8780980e1765babf85f9a2f90d247153dd", size = 408951, upload-time = "2025-11-30T20:22:23.408Z" }, + { url = "https://files.pythonhosted.org/packages/d6/65/ad8dc1784a331fabbd740ef6f71ce2198c7ed0890dab595adb9ea2d775a1/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f8d1736cfb49381ba528cd5baa46f82fdc65c06e843dab24dd70b63d09121b3f", size = 514622, upload-time = "2025-11-30T20:22:25.16Z" }, + { url = "https://files.pythonhosted.org/packages/63/8e/0cfa7ae158e15e143fe03993b5bcd743a59f541f5952e1546b1ac1b5fd45/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d948b135c4693daff7bc2dcfc4ec57237a29bd37e60c2fabf5aff2bbacf3e2f1", size = 414492, upload-time = "2025-11-30T20:22:26.505Z" }, + { url = "https://files.pythonhosted.org/packages/60/1b/6f8f29f3f995c7ffdde46a626ddccd7c63aefc0efae881dc13b6e5d5bb16/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47f236970bccb2233267d89173d3ad2703cd36a0e2a6e92d0560d333871a3d23", size = 394080, upload-time = "2025-11-30T20:22:27.934Z" }, + { url = "https://files.pythonhosted.org/packages/6d/d5/a266341051a7a3ca2f4b750a3aa4abc986378431fc2da508c5034d081b70/rpds_py-0.30.0-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:2e6ecb5a5bcacf59c3f912155044479af1d0b6681280048b338b28e364aca1f6", size = 408680, upload-time = "2025-11-30T20:22:29.341Z" }, + { url = "https://files.pythonhosted.org/packages/10/3b/71b725851df9ab7a7a4e33cf36d241933da66040d195a84781f49c50490c/rpds_py-0.30.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a8fa71a2e078c527c3e9dc9fc5a98c9db40bcc8a92b4e8858e36d329f8684b51", size = 423589, upload-time = "2025-11-30T20:22:31.469Z" }, + { url = "https://files.pythonhosted.org/packages/00/2b/e59e58c544dc9bd8bd8384ecdb8ea91f6727f0e37a7131baeff8d6f51661/rpds_py-0.30.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:73c67f2db7bc334e518d097c6d1e6fed021bbc9b7d678d6cc433478365d1d5f5", size = 573289, upload-time = "2025-11-30T20:22:32.997Z" }, + { url = "https://files.pythonhosted.org/packages/da/3e/a18e6f5b460893172a7d6a680e86d3b6bc87a54c1f0b03446a3c8c7b588f/rpds_py-0.30.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:5ba103fb455be00f3b1c2076c9d4264bfcb037c976167a6047ed82f23153f02e", size = 599737, upload-time = "2025-11-30T20:22:34.419Z" }, + { url = "https://files.pythonhosted.org/packages/5c/e2/714694e4b87b85a18e2c243614974413c60aa107fd815b8cbc42b873d1d7/rpds_py-0.30.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7cee9c752c0364588353e627da8a7e808a66873672bcb5f52890c33fd965b394", size = 563120, upload-time = "2025-11-30T20:22:35.903Z" }, + { url = "https://files.pythonhosted.org/packages/6f/ab/d5d5e3bcedb0a77f4f613706b750e50a5a3ba1c15ccd3665ecc636c968fd/rpds_py-0.30.0-cp312-cp312-win32.whl", hash = "sha256:1ab5b83dbcf55acc8b08fc62b796ef672c457b17dbd7820a11d6c52c06839bdf", size = 223782, upload-time = "2025-11-30T20:22:37.271Z" }, + { url = "https://files.pythonhosted.org/packages/39/3b/f786af9957306fdc38a74cef405b7b93180f481fb48453a114bb6465744a/rpds_py-0.30.0-cp312-cp312-win_amd64.whl", hash = "sha256:a090322ca841abd453d43456ac34db46e8b05fd9b3b4ac0c78bcde8b089f959b", size = 240463, upload-time = "2025-11-30T20:22:39.021Z" }, + { url = "https://files.pythonhosted.org/packages/f3/d2/b91dc748126c1559042cfe41990deb92c4ee3e2b415f6b5234969ffaf0cc/rpds_py-0.30.0-cp312-cp312-win_arm64.whl", hash = "sha256:669b1805bd639dd2989b281be2cfd951c6121b65e729d9b843e9639ef1fd555e", size = 230868, upload-time = "2025-11-30T20:22:40.493Z" }, + { url = "https://files.pythonhosted.org/packages/ed/dc/d61221eb88ff410de3c49143407f6f3147acf2538c86f2ab7ce65ae7d5f9/rpds_py-0.30.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:f83424d738204d9770830d35290ff3273fbb02b41f919870479fab14b9d303b2", size = 374887, upload-time = "2025-11-30T20:22:41.812Z" }, + { url = "https://files.pythonhosted.org/packages/fd/32/55fb50ae104061dbc564ef15cc43c013dc4a9f4527a1f4d99baddf56fe5f/rpds_py-0.30.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e7536cd91353c5273434b4e003cbda89034d67e7710eab8761fd918ec6c69cf8", size = 358904, upload-time = "2025-11-30T20:22:43.479Z" }, + { url = "https://files.pythonhosted.org/packages/58/70/faed8186300e3b9bdd138d0273109784eea2396c68458ed580f885dfe7ad/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2771c6c15973347f50fece41fc447c054b7ac2ae0502388ce3b6738cd366e3d4", size = 389945, upload-time = "2025-11-30T20:22:44.819Z" }, + { url = "https://files.pythonhosted.org/packages/bd/a8/073cac3ed2c6387df38f71296d002ab43496a96b92c823e76f46b8af0543/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0a59119fc6e3f460315fe9d08149f8102aa322299deaa5cab5b40092345c2136", size = 407783, upload-time = "2025-11-30T20:22:46.103Z" }, + { url = "https://files.pythonhosted.org/packages/77/57/5999eb8c58671f1c11eba084115e77a8899d6e694d2a18f69f0ba471ec8b/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:76fec018282b4ead0364022e3c54b60bf368b9d926877957a8624b58419169b7", size = 515021, upload-time = "2025-11-30T20:22:47.458Z" }, + { url = "https://files.pythonhosted.org/packages/e0/af/5ab4833eadc36c0a8ed2bc5c0de0493c04f6c06de223170bd0798ff98ced/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:692bef75a5525db97318e8cd061542b5a79812d711ea03dbc1f6f8dbb0c5f0d2", size = 414589, upload-time = "2025-11-30T20:22:48.872Z" }, + { url = "https://files.pythonhosted.org/packages/b7/de/f7192e12b21b9e9a68a6d0f249b4af3fdcdff8418be0767a627564afa1f1/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9027da1ce107104c50c81383cae773ef5c24d296dd11c99e2629dbd7967a20c6", size = 394025, upload-time = "2025-11-30T20:22:50.196Z" }, + { url = "https://files.pythonhosted.org/packages/91/c4/fc70cd0249496493500e7cc2de87504f5aa6509de1e88623431fec76d4b6/rpds_py-0.30.0-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:9cf69cdda1f5968a30a359aba2f7f9aa648a9ce4b580d6826437f2b291cfc86e", size = 408895, upload-time = "2025-11-30T20:22:51.87Z" }, + { url = "https://files.pythonhosted.org/packages/58/95/d9275b05ab96556fefff73a385813eb66032e4c99f411d0795372d9abcea/rpds_py-0.30.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a4796a717bf12b9da9d3ad002519a86063dcac8988b030e405704ef7d74d2d9d", size = 422799, upload-time = "2025-11-30T20:22:53.341Z" }, + { url = "https://files.pythonhosted.org/packages/06/c1/3088fc04b6624eb12a57eb814f0d4997a44b0d208d6cace713033ff1a6ba/rpds_py-0.30.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5d4c2aa7c50ad4728a094ebd5eb46c452e9cb7edbfdb18f9e1221f597a73e1e7", size = 572731, upload-time = "2025-11-30T20:22:54.778Z" }, + { url = "https://files.pythonhosted.org/packages/d8/42/c612a833183b39774e8ac8fecae81263a68b9583ee343db33ab571a7ce55/rpds_py-0.30.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ba81a9203d07805435eb06f536d95a266c21e5b2dfbf6517748ca40c98d19e31", size = 599027, upload-time = "2025-11-30T20:22:56.212Z" }, + { url = "https://files.pythonhosted.org/packages/5f/60/525a50f45b01d70005403ae0e25f43c0384369ad24ffe46e8d9068b50086/rpds_py-0.30.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:945dccface01af02675628334f7cf49c2af4c1c904748efc5cf7bbdf0b579f95", size = 563020, upload-time = "2025-11-30T20:22:58.2Z" }, + { url = "https://files.pythonhosted.org/packages/0b/5d/47c4655e9bcd5ca907148535c10e7d489044243cc9941c16ed7cd53be91d/rpds_py-0.30.0-cp313-cp313-win32.whl", hash = "sha256:b40fb160a2db369a194cb27943582b38f79fc4887291417685f3ad693c5a1d5d", size = 223139, upload-time = "2025-11-30T20:23:00.209Z" }, + { url = "https://files.pythonhosted.org/packages/f2/e1/485132437d20aa4d3e1d8b3fb5a5e65aa8139f1e097080c2a8443201742c/rpds_py-0.30.0-cp313-cp313-win_amd64.whl", hash = "sha256:806f36b1b605e2d6a72716f321f20036b9489d29c51c91f4dd29a3e3afb73b15", size = 240224, upload-time = "2025-11-30T20:23:02.008Z" }, + { url = "https://files.pythonhosted.org/packages/24/95/ffd128ed1146a153d928617b0ef673960130be0009c77d8fbf0abe306713/rpds_py-0.30.0-cp313-cp313-win_arm64.whl", hash = "sha256:d96c2086587c7c30d44f31f42eae4eac89b60dabbac18c7669be3700f13c3ce1", size = 230645, upload-time = "2025-11-30T20:23:03.43Z" }, + { url = "https://files.pythonhosted.org/packages/ff/1b/b10de890a0def2a319a2626334a7f0ae388215eb60914dbac8a3bae54435/rpds_py-0.30.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:eb0b93f2e5c2189ee831ee43f156ed34e2a89a78a66b98cadad955972548be5a", size = 364443, upload-time = "2025-11-30T20:23:04.878Z" }, + { url = "https://files.pythonhosted.org/packages/0d/bf/27e39f5971dc4f305a4fb9c672ca06f290f7c4e261c568f3dea16a410d47/rpds_py-0.30.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:922e10f31f303c7c920da8981051ff6d8c1a56207dbdf330d9047f6d30b70e5e", size = 353375, upload-time = "2025-11-30T20:23:06.342Z" }, + { url = "https://files.pythonhosted.org/packages/40/58/442ada3bba6e8e6615fc00483135c14a7538d2ffac30e2d933ccf6852232/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cdc62c8286ba9bf7f47befdcea13ea0e26bf294bda99758fd90535cbaf408000", size = 383850, upload-time = "2025-11-30T20:23:07.825Z" }, + { url = "https://files.pythonhosted.org/packages/14/14/f59b0127409a33c6ef6f5c1ebd5ad8e32d7861c9c7adfa9a624fc3889f6c/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:47f9a91efc418b54fb8190a6b4aa7813a23fb79c51f4bb84e418f5476c38b8db", size = 392812, upload-time = "2025-11-30T20:23:09.228Z" }, + { url = "https://files.pythonhosted.org/packages/b3/66/e0be3e162ac299b3a22527e8913767d869e6cc75c46bd844aa43fb81ab62/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1f3587eb9b17f3789ad50824084fa6f81921bbf9a795826570bda82cb3ed91f2", size = 517841, upload-time = "2025-11-30T20:23:11.186Z" }, + { url = "https://files.pythonhosted.org/packages/3d/55/fa3b9cf31d0c963ecf1ba777f7cf4b2a2c976795ac430d24a1f43d25a6ba/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:39c02563fc592411c2c61d26b6c5fe1e51eaa44a75aa2c8735ca88b0d9599daa", size = 408149, upload-time = "2025-11-30T20:23:12.864Z" }, + { url = "https://files.pythonhosted.org/packages/60/ca/780cf3b1a32b18c0f05c441958d3758f02544f1d613abf9488cd78876378/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51a1234d8febafdfd33a42d97da7a43f5dcb120c1060e352a3fbc0c6d36e2083", size = 383843, upload-time = "2025-11-30T20:23:14.638Z" }, + { url = "https://files.pythonhosted.org/packages/82/86/d5f2e04f2aa6247c613da0c1dd87fcd08fa17107e858193566048a1e2f0a/rpds_py-0.30.0-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:eb2c4071ab598733724c08221091e8d80e89064cd472819285a9ab0f24bcedb9", size = 396507, upload-time = "2025-11-30T20:23:16.105Z" }, + { url = "https://files.pythonhosted.org/packages/4b/9a/453255d2f769fe44e07ea9785c8347edaf867f7026872e76c1ad9f7bed92/rpds_py-0.30.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6bdfdb946967d816e6adf9a3d8201bfad269c67efe6cefd7093ef959683c8de0", size = 414949, upload-time = "2025-11-30T20:23:17.539Z" }, + { url = "https://files.pythonhosted.org/packages/a3/31/622a86cdc0c45d6df0e9ccb6becdba5074735e7033c20e401a6d9d0e2ca0/rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c77afbd5f5250bf27bf516c7c4a016813eb2d3e116139aed0096940c5982da94", size = 565790, upload-time = "2025-11-30T20:23:19.029Z" }, + { url = "https://files.pythonhosted.org/packages/1c/5d/15bbf0fb4a3f58a3b1c67855ec1efcc4ceaef4e86644665fff03e1b66d8d/rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:61046904275472a76c8c90c9ccee9013d70a6d0f73eecefd38c1ae7c39045a08", size = 590217, upload-time = "2025-11-30T20:23:20.885Z" }, + { url = "https://files.pythonhosted.org/packages/6d/61/21b8c41f68e60c8cc3b2e25644f0e3681926020f11d06ab0b78e3c6bbff1/rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4c5f36a861bc4b7da6516dbdf302c55313afa09b81931e8280361a4f6c9a2d27", size = 555806, upload-time = "2025-11-30T20:23:22.488Z" }, + { url = "https://files.pythonhosted.org/packages/f9/39/7e067bb06c31de48de3eb200f9fc7c58982a4d3db44b07e73963e10d3be9/rpds_py-0.30.0-cp313-cp313t-win32.whl", hash = "sha256:3d4a69de7a3e50ffc214ae16d79d8fbb0922972da0356dcf4d0fdca2878559c6", size = 211341, upload-time = "2025-11-30T20:23:24.449Z" }, + { url = "https://files.pythonhosted.org/packages/0a/4d/222ef0b46443cf4cf46764d9c630f3fe4abaa7245be9417e56e9f52b8f65/rpds_py-0.30.0-cp313-cp313t-win_amd64.whl", hash = "sha256:f14fc5df50a716f7ece6a80b6c78bb35ea2ca47c499e422aa4463455dd96d56d", size = 225768, upload-time = "2025-11-30T20:23:25.908Z" }, + { url = "https://files.pythonhosted.org/packages/86/81/dad16382ebbd3d0e0328776d8fd7ca94220e4fa0798d1dc5e7da48cb3201/rpds_py-0.30.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:68f19c879420aa08f61203801423f6cd5ac5f0ac4ac82a2368a9fcd6a9a075e0", size = 362099, upload-time = "2025-11-30T20:23:27.316Z" }, + { url = "https://files.pythonhosted.org/packages/2b/60/19f7884db5d5603edf3c6bce35408f45ad3e97e10007df0e17dd57af18f8/rpds_py-0.30.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:ec7c4490c672c1a0389d319b3a9cfcd098dcdc4783991553c332a15acf7249be", size = 353192, upload-time = "2025-11-30T20:23:29.151Z" }, + { url = "https://files.pythonhosted.org/packages/bf/c4/76eb0e1e72d1a9c4703c69607cec123c29028bff28ce41588792417098ac/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f251c812357a3fed308d684a5079ddfb9d933860fc6de89f2b7ab00da481e65f", size = 384080, upload-time = "2025-11-30T20:23:30.785Z" }, + { url = "https://files.pythonhosted.org/packages/72/87/87ea665e92f3298d1b26d78814721dc39ed8d2c74b86e83348d6b48a6f31/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ac98b175585ecf4c0348fd7b29c3864bda53b805c773cbf7bfdaffc8070c976f", size = 394841, upload-time = "2025-11-30T20:23:32.209Z" }, + { url = "https://files.pythonhosted.org/packages/77/ad/7783a89ca0587c15dcbf139b4a8364a872a25f861bdb88ed99f9b0dec985/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3e62880792319dbeb7eb866547f2e35973289e7d5696c6e295476448f5b63c87", size = 516670, upload-time = "2025-11-30T20:23:33.742Z" }, + { url = "https://files.pythonhosted.org/packages/5b/3c/2882bdac942bd2172f3da574eab16f309ae10a3925644e969536553cb4ee/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4e7fc54e0900ab35d041b0601431b0a0eb495f0851a0639b6ef90f7741b39a18", size = 408005, upload-time = "2025-11-30T20:23:35.253Z" }, + { url = "https://files.pythonhosted.org/packages/ce/81/9a91c0111ce1758c92516a3e44776920b579d9a7c09b2b06b642d4de3f0f/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47e77dc9822d3ad616c3d5759ea5631a75e5809d5a28707744ef79d7a1bcfcad", size = 382112, upload-time = "2025-11-30T20:23:36.842Z" }, + { url = "https://files.pythonhosted.org/packages/cf/8e/1da49d4a107027e5fbc64daeab96a0706361a2918da10cb41769244b805d/rpds_py-0.30.0-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:b4dc1a6ff022ff85ecafef7979a2c6eb423430e05f1165d6688234e62ba99a07", size = 399049, upload-time = "2025-11-30T20:23:38.343Z" }, + { url = "https://files.pythonhosted.org/packages/df/5a/7ee239b1aa48a127570ec03becbb29c9d5a9eb092febbd1699d567cae859/rpds_py-0.30.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4559c972db3a360808309e06a74628b95eaccbf961c335c8fe0d590cf587456f", size = 415661, upload-time = "2025-11-30T20:23:40.263Z" }, + { url = "https://files.pythonhosted.org/packages/70/ea/caa143cf6b772f823bc7929a45da1fa83569ee49b11d18d0ada7f5ee6fd6/rpds_py-0.30.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:0ed177ed9bded28f8deb6ab40c183cd1192aa0de40c12f38be4d59cd33cb5c65", size = 565606, upload-time = "2025-11-30T20:23:42.186Z" }, + { url = "https://files.pythonhosted.org/packages/64/91/ac20ba2d69303f961ad8cf55bf7dbdb4763f627291ba3d0d7d67333cced9/rpds_py-0.30.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:ad1fa8db769b76ea911cb4e10f049d80bf518c104f15b3edb2371cc65375c46f", size = 591126, upload-time = "2025-11-30T20:23:44.086Z" }, + { url = "https://files.pythonhosted.org/packages/21/20/7ff5f3c8b00c8a95f75985128c26ba44503fb35b8e0259d812766ea966c7/rpds_py-0.30.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:46e83c697b1f1c72b50e5ee5adb4353eef7406fb3f2043d64c33f20ad1c2fc53", size = 553371, upload-time = "2025-11-30T20:23:46.004Z" }, + { url = "https://files.pythonhosted.org/packages/72/c7/81dadd7b27c8ee391c132a6b192111ca58d866577ce2d9b0ca157552cce0/rpds_py-0.30.0-cp314-cp314-win32.whl", hash = "sha256:ee454b2a007d57363c2dfd5b6ca4a5d7e2c518938f8ed3b706e37e5d470801ed", size = 215298, upload-time = "2025-11-30T20:23:47.696Z" }, + { url = "https://files.pythonhosted.org/packages/3e/d2/1aaac33287e8cfb07aab2e6b8ac1deca62f6f65411344f1433c55e6f3eb8/rpds_py-0.30.0-cp314-cp314-win_amd64.whl", hash = "sha256:95f0802447ac2d10bcc69f6dc28fe95fdf17940367b21d34e34c737870758950", size = 228604, upload-time = "2025-11-30T20:23:49.501Z" }, + { url = "https://files.pythonhosted.org/packages/e8/95/ab005315818cc519ad074cb7784dae60d939163108bd2b394e60dc7b5461/rpds_py-0.30.0-cp314-cp314-win_arm64.whl", hash = "sha256:613aa4771c99f03346e54c3f038e4cc574ac09a3ddfb0e8878487335e96dead6", size = 222391, upload-time = "2025-11-30T20:23:50.96Z" }, + { url = "https://files.pythonhosted.org/packages/9e/68/154fe0194d83b973cdedcdcc88947a2752411165930182ae41d983dcefa6/rpds_py-0.30.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:7e6ecfcb62edfd632e56983964e6884851786443739dbfe3582947e87274f7cb", size = 364868, upload-time = "2025-11-30T20:23:52.494Z" }, + { url = "https://files.pythonhosted.org/packages/83/69/8bbc8b07ec854d92a8b75668c24d2abcb1719ebf890f5604c61c9369a16f/rpds_py-0.30.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:a1d0bc22a7cdc173fedebb73ef81e07faef93692b8c1ad3733b67e31e1b6e1b8", size = 353747, upload-time = "2025-11-30T20:23:54.036Z" }, + { url = "https://files.pythonhosted.org/packages/ab/00/ba2e50183dbd9abcce9497fa5149c62b4ff3e22d338a30d690f9af970561/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d08f00679177226c4cb8c5265012eea897c8ca3b93f429e546600c971bcbae7", size = 383795, upload-time = "2025-11-30T20:23:55.556Z" }, + { url = "https://files.pythonhosted.org/packages/05/6f/86f0272b84926bcb0e4c972262f54223e8ecc556b3224d281e6598fc9268/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5965af57d5848192c13534f90f9dd16464f3c37aaf166cc1da1cae1fd5a34898", size = 393330, upload-time = "2025-11-30T20:23:57.033Z" }, + { url = "https://files.pythonhosted.org/packages/cb/e9/0e02bb2e6dc63d212641da45df2b0bf29699d01715913e0d0f017ee29438/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9a4e86e34e9ab6b667c27f3211ca48f73dba7cd3d90f8d5b11be56e5dbc3fb4e", size = 518194, upload-time = "2025-11-30T20:23:58.637Z" }, + { url = "https://files.pythonhosted.org/packages/ee/ca/be7bca14cf21513bdf9c0606aba17d1f389ea2b6987035eb4f62bd923f25/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e5d3e6b26f2c785d65cc25ef1e5267ccbe1b069c5c21b8cc724efee290554419", size = 408340, upload-time = "2025-11-30T20:24:00.2Z" }, + { url = "https://files.pythonhosted.org/packages/c2/c7/736e00ebf39ed81d75544c0da6ef7b0998f8201b369acf842f9a90dc8fce/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:626a7433c34566535b6e56a1b39a7b17ba961e97ce3b80ec62e6f1312c025551", size = 383765, upload-time = "2025-11-30T20:24:01.759Z" }, + { url = "https://files.pythonhosted.org/packages/4a/3f/da50dfde9956aaf365c4adc9533b100008ed31aea635f2b8d7b627e25b49/rpds_py-0.30.0-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:acd7eb3f4471577b9b5a41baf02a978e8bdeb08b4b355273994f8b87032000a8", size = 396834, upload-time = "2025-11-30T20:24:03.687Z" }, + { url = "https://files.pythonhosted.org/packages/4e/00/34bcc2565b6020eab2623349efbdec810676ad571995911f1abdae62a3a0/rpds_py-0.30.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fe5fa731a1fa8a0a56b0977413f8cacac1768dad38d16b3a296712709476fbd5", size = 415470, upload-time = "2025-11-30T20:24:05.232Z" }, + { url = "https://files.pythonhosted.org/packages/8c/28/882e72b5b3e6f718d5453bd4d0d9cf8df36fddeb4ddbbab17869d5868616/rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:74a3243a411126362712ee1524dfc90c650a503502f135d54d1b352bd01f2404", size = 565630, upload-time = "2025-11-30T20:24:06.878Z" }, + { url = "https://files.pythonhosted.org/packages/3b/97/04a65539c17692de5b85c6e293520fd01317fd878ea1995f0367d4532fb1/rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:3e8eeb0544f2eb0d2581774be4c3410356eba189529a6b3e36bbbf9696175856", size = 591148, upload-time = "2025-11-30T20:24:08.445Z" }, + { url = "https://files.pythonhosted.org/packages/85/70/92482ccffb96f5441aab93e26c4d66489eb599efdcf96fad90c14bbfb976/rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:dbd936cde57abfee19ab3213cf9c26be06d60750e60a8e4dd85d1ab12c8b1f40", size = 556030, upload-time = "2025-11-30T20:24:10.956Z" }, + { url = "https://files.pythonhosted.org/packages/20/53/7c7e784abfa500a2b6b583b147ee4bb5a2b3747a9166bab52fec4b5b5e7d/rpds_py-0.30.0-cp314-cp314t-win32.whl", hash = "sha256:dc824125c72246d924f7f796b4f63c1e9dc810c7d9e2355864b3c3a73d59ade0", size = 211570, upload-time = "2025-11-30T20:24:12.735Z" }, + { url = "https://files.pythonhosted.org/packages/d0/02/fa464cdfbe6b26e0600b62c528b72d8608f5cc49f96b8d6e38c95d60c676/rpds_py-0.30.0-cp314-cp314t-win_amd64.whl", hash = "sha256:27f4b0e92de5bfbc6f86e43959e6edd1425c33b5e69aab0984a72047f2bcf1e3", size = 226532, upload-time = "2025-11-30T20:24:14.634Z" }, +] + [[package]] name = "ruff" version = "0.15.11" From e9298af31a9946b43357dc29155af8ebd7a43d38 Mon Sep 17 00:00:00 2001 From: chris-colinsky Date: Thu, 14 May 2026 19:08:26 -0700 Subject: [PATCH 02/24] feat(llm): add StructuredOutputInvalid error category MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the structured_output_invalid canonical category. Raised when a complete() call requested a response_schema and the provider's returned content could not be parsed as JSON OR did not validate against the schema. The exception carries response_schema, raw_content, and failure_description attributes for caller introspection. Non-transient by default — NOT added to TRANSIENT_CATEGORIES. The default RetryMiddleware classifier will not retry this category; callers wanting retry-on-validation-failure can include the category in a custom classifier's transient set. --- src/openarmature/llm/__init__.py | 4 +++ src/openarmature/llm/errors.py | 54 ++++++++++++++++++++++++++++++-- 2 files changed, 55 insertions(+), 3 deletions(-) diff --git a/src/openarmature/llm/__init__.py b/src/openarmature/llm/__init__.py index b2585bb..dc7637a 100644 --- a/src/openarmature/llm/__init__.py +++ b/src/openarmature/llm/__init__.py @@ -30,6 +30,7 @@ PROVIDER_MODEL_NOT_LOADED, PROVIDER_RATE_LIMIT, PROVIDER_UNAVAILABLE, + STRUCTURED_OUTPUT_INVALID, TRANSIENT_CATEGORIES, LlmProviderError, ProviderAuthentication, @@ -39,6 +40,7 @@ ProviderModelNotLoaded, ProviderRateLimit, ProviderUnavailable, + StructuredOutputInvalid, ) from .messages import ( AssistantMessage, @@ -61,6 +63,7 @@ "PROVIDER_MODEL_NOT_LOADED", "PROVIDER_RATE_LIMIT", "PROVIDER_UNAVAILABLE", + "STRUCTURED_OUTPUT_INVALID", "TRANSIENT_CATEGORIES", "AssistantMessage", "FinishReason", @@ -77,6 +80,7 @@ "ProviderUnavailable", "Response", "RuntimeConfig", + "StructuredOutputInvalid", "SystemMessage", "Tool", "ToolCall", diff --git a/src/openarmature/llm/errors.py b/src/openarmature/llm/errors.py index a3c75f9..5859eb5 100644 --- a/src/openarmature/llm/errors.py +++ b/src/openarmature/llm/errors.py @@ -29,13 +29,20 @@ PROVIDER_RATE_LIMIT = "provider_rate_limit" PROVIDER_INVALID_RESPONSE = "provider_invalid_response" PROVIDER_INVALID_REQUEST = "provider_invalid_request" +STRUCTURED_OUTPUT_INVALID = "structured_output_invalid" # Per spec §7 "Retry classification": these three categories are -# *transient* — a retry MAY succeed. The other four +# *transient* — a retry MAY succeed. The other categories # (`provider_authentication`, `provider_invalid_model`, -# `provider_invalid_request`, `provider_invalid_response`) are -# non-transient and MUST NOT be retried by the default classifier. +# `provider_invalid_request`, `provider_invalid_response`, +# `structured_output_invalid`) are non-transient and MUST NOT be +# retried by the default classifier. +# +# ``structured_output_invalid`` is explicitly non-transient by default +# per §7: a model that fails schema compliance on a given prompt usually +# fails the same way on retry. Users wanting retry-on-validation-failure +# semantics MAY include it in a custom classifier's transient set. # # Note: ``finish_reason: "error"`` is also transient per spec §7, but # that's a Response-level signal rather than an exception category, so @@ -130,6 +137,45 @@ class ProviderInvalidRequest(LlmProviderError): category = PROVIDER_INVALID_REQUEST +# Non-transient by default — a model that fails schema compliance on a +# given prompt usually fails the same way on retry. The default +# RetryMiddleware classifier does NOT retry this category. Users wanting +# retry-on-validation-failure semantics MAY include the category in a +# custom classifier's transient set. +# +# Distinct from ProviderInvalidResponse, which covers wire-shape +# malformation. StructuredOutputInvalid is raised when the wire envelope +# is fine but the content does not validate against the caller's schema. +class StructuredOutputInvalid(LlmProviderError): + """Raised when a ``complete()`` call requested a ``response_schema`` + and the provider's content could not be parsed as JSON or did not + validate against the schema. + + Attributes: + response_schema: The JSON Schema requested. + raw_content: The raw response content the model produced. + failure_description: A description of the parse or validation + failure. + """ + + category = STRUCTURED_OUTPUT_INVALID + response_schema: dict[str, Any] + raw_content: str + failure_description: str + + def __init__( + self, + *args: Any, + response_schema: dict[str, Any], + raw_content: str, + failure_description: str, + ) -> None: + super().__init__(*args) + self.response_schema = response_schema + self.raw_content = raw_content + self.failure_description = failure_description + + __all__ = [ "PROVIDER_AUTHENTICATION", "PROVIDER_INVALID_MODEL", @@ -138,6 +184,7 @@ class ProviderInvalidRequest(LlmProviderError): "PROVIDER_MODEL_NOT_LOADED", "PROVIDER_RATE_LIMIT", "PROVIDER_UNAVAILABLE", + "STRUCTURED_OUTPUT_INVALID", "TRANSIENT_CATEGORIES", "LlmProviderError", "ProviderAuthentication", @@ -147,4 +194,5 @@ class ProviderInvalidRequest(LlmProviderError): "ProviderModelNotLoaded", "ProviderRateLimit", "ProviderUnavailable", + "StructuredOutputInvalid", ] From 069cfc81c529e5e8b5175f3394775f81d5972521 Mon Sep 17 00:00:00 2001 From: chris-colinsky Date: Thu, 14 May 2026 19:10:02 -0700 Subject: [PATCH 03/24] feat(llm): add Response.parsed field MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the parsed field to the Response record. Default None, populated by structured-output calls (response_schema set on complete() and the model returned structured content). The runtime type is a discriminated union over dict (when the caller passed a JSON-Schema dict) and BaseModel instance (when the caller passed a Pydantic class). Pydantic Response config now allows arbitrary types so a BaseModel instance can sit in the parsed slot. No public surface change for free-form callers — parsed defaults to None and remains None when response_schema is not supplied. --- src/openarmature/llm/__init__.py | 3 +- src/openarmature/llm/response.py | 47 ++++++++++++++++++++++++-------- 2 files changed, 38 insertions(+), 12 deletions(-) diff --git a/src/openarmature/llm/__init__.py b/src/openarmature/llm/__init__.py index dc7637a..458b240 100644 --- a/src/openarmature/llm/__init__.py +++ b/src/openarmature/llm/__init__.py @@ -53,7 +53,7 @@ ) from .provider import Provider, validate_message_list, validate_tools from .providers import OpenAIProvider, classify_http_error, parse_retry_after -from .response import FinishReason, Response, RuntimeConfig, Usage +from .response import FinishReason, ParsedValue, Response, RuntimeConfig, Usage __all__ = [ "PROVIDER_AUTHENTICATION", @@ -70,6 +70,7 @@ "LlmProviderError", "Message", "OpenAIProvider", + "ParsedValue", "Provider", "ProviderAuthentication", "ProviderInvalidModel", diff --git a/src/openarmature/llm/response.py b/src/openarmature/llm/response.py index 8626ecf..8b789c7 100644 --- a/src/openarmature/llm/response.py +++ b/src/openarmature/llm/response.py @@ -27,6 +27,14 @@ from .messages import AssistantMessage +# ``parsed`` may carry either a raw dict (when the caller passed a +# JSON-Schema dict as response_schema) or a Pydantic model instance +# (when the caller passed a BaseModel subclass). The latter is a +# per-language ergonomic — the runtime shape mirrors what the caller +# requested. Absent (None) on calls without response_schema and on +# tool-call responses regardless of whether response_schema was set. +ParsedValue = dict[str, Any] | BaseModel | None + # The five spec §6 finish-reason values. Modeled as a Literal union so # pydantic rejects unknown values at parse time — provider responses # carrying a non-standard value surface as ``provider_invalid_response`` @@ -51,24 +59,40 @@ class Usage(BaseModel): class Response(BaseModel): """The result of a ``Provider.complete()`` call. - - ``message`` is the assistant message returned by the model. - Always ``role: "assistant"``. May carry ``tool_calls``. - - ``finish_reason`` is one of the five canonical values - (``"stop"`` / ``"length"`` / ``"tool_calls"`` / - ``"content_filter"`` / ``"error"``). - - ``usage`` is the token record (all ``None`` if the provider - didn't report usage). - - ``raw`` is the parsed provider response, populated on every - successful return. Carries everything the provider returned — - the normalized fields above are derived from it. + Attributes: + message: The assistant message returned by the model. + Always ``role: "assistant"``. May carry ``tool_calls``. + finish_reason: One of ``"stop"``, ``"length"``, ``"tool_calls"``, + ``"content_filter"``, ``"error"``. + usage: The token record (all ``None`` if the provider didn't + report usage). + raw: The parsed provider response, populated on every successful + return. Carries everything the provider returned; the + normalized fields above are derived from it. + parsed: The parsed-and-validated structured value when the call + supplied a ``response_schema`` and the model returned + structured content. ``None`` otherwise. The runtime type + depends on the schema form the caller passed: ``dict`` for + a JSON-Schema dict input, a ``BaseModel`` instance for a + Pydantic class input. """ - model_config = ConfigDict(extra="forbid") + # ``parsed`` is absent (None) on calls that didn't supply a + # response_schema, and on responses whose finish_reason is + # "tool_calls" — the tool-call path and the structured-content + # path are mutually exclusive at the response level. + # + # message.content carries the model's content string verbatim. + # parsed is the post-receive deserialization of that content + # against the schema; the provider's content string is NOT + # re-serialized from parsed. + model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=True) message: AssistantMessage finish_reason: FinishReason usage: Usage raw: dict[str, Any] + parsed: ParsedValue = None class RuntimeConfig(BaseModel): @@ -90,6 +114,7 @@ class RuntimeConfig(BaseModel): __all__ = [ "FinishReason", + "ParsedValue", "Response", "RuntimeConfig", "Usage", From 5a889c7d90ed26de7e6f3c85fcd6b5249e930a91 Mon Sep 17 00:00:00 2001 From: chris-colinsky Date: Thu, 14 May 2026 19:23:12 -0700 Subject: [PATCH 04/24] feat(llm): validate_response_schema + strict_mode_supported helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds two provider-agnostic helpers in openarmature.llm.provider used by structured-output Provider implementations: - validate_response_schema(schema) — pre-send structural check that the value is a dict and its top-level type is "object". Raises ProviderInvalidRequest on failure. - strict_mode_supported(schema) — whether the schema satisfies the strict-mode constraint set (additionalProperties not true, properties fully covered by required) across the full schema tree. Walks anyOf/oneOf/allOf branches and follows $ref targets with cycle protection. An unresolvable $ref or unknown shape returns False (conservative fail). Both are exported from openarmature.llm so OpenAI-compatible providers and any future Anthropic/Gemini provider share the same constraint heuristic. --- src/openarmature/llm/__init__.py | 10 +- src/openarmature/llm/provider.py | 156 ++++++++++++++++++++++++++++++- 2 files changed, 164 insertions(+), 2 deletions(-) diff --git a/src/openarmature/llm/__init__.py b/src/openarmature/llm/__init__.py index 458b240..2b91e9f 100644 --- a/src/openarmature/llm/__init__.py +++ b/src/openarmature/llm/__init__.py @@ -51,7 +51,13 @@ ToolMessage, UserMessage, ) -from .provider import Provider, validate_message_list, validate_tools +from .provider import ( + Provider, + strict_mode_supported, + validate_message_list, + validate_response_schema, + validate_tools, +) from .providers import OpenAIProvider, classify_http_error, parse_retry_after from .response import FinishReason, ParsedValue, Response, RuntimeConfig, Usage @@ -90,6 +96,8 @@ "UserMessage", "classify_http_error", "parse_retry_after", + "strict_mode_supported", "validate_message_list", + "validate_response_schema", "validate_tools", ] diff --git a/src/openarmature/llm/provider.py b/src/openarmature/llm/provider.py index 60d6f32..ed496ab 100644 --- a/src/openarmature/llm/provider.py +++ b/src/openarmature/llm/provider.py @@ -36,7 +36,7 @@ from __future__ import annotations from collections.abc import Sequence -from typing import Protocol +from typing import Any, Protocol, cast from .errors import ProviderInvalidRequest from .messages import ( @@ -157,8 +157,162 @@ def validate_tools(tools: Sequence[Tool] | None) -> None: seen.add(t.name) +# --------------------------------------------------------------------------- +# Schema helpers — used by structured-output Provider implementations +# --------------------------------------------------------------------------- + + +# Spec llm-provider §5 requires the response_schema argument to +# complete() to be a valid JSON Schema with a top-level type "object". +# The pre-send check here is the structural minimum; deeper validity +# (recursive JSON Schema correctness, vendor extensions) is delegated +# to the runtime validator at parse time. +def validate_response_schema(schema: object) -> None: + """Pre-send validation for a JSON Schema passed as the + ``response_schema`` argument to ``complete()``. + + Raises :class:`ProviderInvalidRequest` if the schema is not a dict + or does not declare a top-level object type. + """ + if not isinstance(schema, dict): + raise ProviderInvalidRequest(f"response_schema: MUST be a dict (got {type(schema).__name__})") + schema_dict = cast("dict[str, Any]", schema) + schema_type = schema_dict.get("type") + if schema_type != "object": + raise ProviderInvalidRequest( + f"response_schema: top-level type MUST be 'object' (got {schema_type!r})" + ) + + +# Strict mode (OpenAI's response_format strict:true and the analogous +# native-decoding paths in Anthropic / Gemini) requires the schema to +# satisfy two rules at every nested level: +# 1. additionalProperties is NOT true (false or absent). +# 2. every key in `properties` is listed in `required`. +# strict_mode_supported() walks the schema tree (object properties, +# array items, anyOf/oneOf/allOf branches, $ref targets with cycle +# protection) and returns True only if BOTH rules hold across the full +# tree. An unresolvable $ref or unknown-shape branch returns False — +# the safer choice when we can't statically verify the constraint. +def strict_mode_supported(schema: dict[str, Any]) -> bool: + """Whether a JSON Schema satisfies the strict-mode constraints used + by native-decoding LLM wire paths. + + Returns True iff for every nested (sub)schema in the tree + ``additionalProperties`` is not ``true`` and every key in + ``properties`` appears in ``required``. False on any violation, on + an unresolvable ``$ref``, or on an unknown shape. + + Args: + schema: The root JSON Schema dict. + + Returns: + ``True`` if the schema cleanly supports strict mode; ``False`` + otherwise. + """ + return _strict_mode_check(schema, root=schema, visited=set()) + + +def _strict_mode_check( + schema: Any, + *, + root: dict[str, Any], + visited: set[str], +) -> bool: + if not isinstance(schema, dict): + return False + schema_dict = cast("dict[str, Any]", schema) + + # $ref resolution. Cycle protection: a $ref already in `visited` + # has been (or is being) validated up the chain; returning True + # avoids infinite recursion without weakening the rule. + ref = schema_dict.get("$ref") + if isinstance(ref, str): + if ref in visited: + return True + visited.add(ref) + target = _resolve_ref(ref, root) + if target is None: + return False + return _strict_mode_check(target, root=root, visited=visited) + + # Combinator branches — every branch must independently satisfy + # the strict-mode constraints. anyOf/oneOf/allOf members may + # themselves be arbitrary schemas; recursing handles nested + # objects inside each. + for combinator in ("anyOf", "oneOf", "allOf"): + branches = schema_dict.get(combinator) + if branches is None: + continue + if not isinstance(branches, list): + return False + for branch in cast("list[Any]", branches): + if not _strict_mode_check(branch, root=root, visited=visited): + return False + + schema_type = schema_dict.get("type") + is_object_type = schema_type == "object" or ( + isinstance(schema_type, list) and "object" in cast("list[Any]", schema_type) + ) + is_array_type = schema_type == "array" or ( + isinstance(schema_type, list) and "array" in cast("list[Any]", schema_type) + ) + + if is_object_type: + if schema_dict.get("additionalProperties") is True: + return False + properties = schema_dict.get("properties") + if properties is not None and not isinstance(properties, dict): + return False + properties_dict = cast("dict[str, Any]", properties or {}) + required = schema_dict.get("required") + if required is not None and not isinstance(required, list): + return False + required_set: set[str] = set(cast("list[str]", required or [])) + for prop_name, prop_schema in properties_dict.items(): + if prop_name not in required_set: + return False + if not _strict_mode_check(prop_schema, root=root, visited=visited): + return False + + if is_array_type: + items = schema_dict.get("items") + if isinstance(items, dict): + if not _strict_mode_check(items, root=root, visited=visited): + return False + elif isinstance(items, list): + # Tuple-form items: each entry is its own schema. + for item in cast("list[Any]", items): + if not _strict_mode_check(item, root=root, visited=visited): + return False + + return True + + +# Internal-only $ref resolver. Handles JSON Pointer fragments rooted +# at the document (`#/$defs/Foo`, `#/definitions/Foo`); external refs +# (anything not starting with `#/`) are unresolvable here and return +# None. JSON Pointer escape rules (`~0` for `~`, `~1` for `/`) are +# unescaped per RFC 6901. +def _resolve_ref(ref: str, root: dict[str, Any]) -> dict[str, Any] | None: + if not ref.startswith("#/"): + return None + parts = ref[2:].split("/") + current: Any = root + for part in parts: + decoded = part.replace("~1", "/").replace("~0", "~") + if not isinstance(current, dict) or decoded not in cast("dict[str, Any]", current): + return None + current = cast("dict[str, Any]", current)[decoded] + if isinstance(current, dict): + return cast("dict[str, Any]", current) + return None + + __all__ = [ "Provider", + "strict_mode_supported", "validate_message_list", + "validate_response_schema", "validate_tools", ] From 8203605556d50a4ea0781b0373878c8cf6a8497b Mon Sep 17 00:00:00 2001 From: chris-colinsky Date: Thu, 14 May 2026 19:23:59 -0700 Subject: [PATCH 05/24] feat(llm): Provider Protocol gains response_schema parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends the Provider Protocol's complete() method signature to accept an optional response_schema parameter. Accepts either a JSON Schema dict or a Pydantic BaseModel subclass; the implementation converts the class form to a JSON Schema at the boundary. Free-form callers (response_schema=None or absent) see no behavior change — the parameter defaults to None and the v0.4.0 contract is preserved. OpenAIProvider's complete() still has the v0.4.0 signature; the next commit wires the response_schema parameter through it. --- src/openarmature/llm/provider.py | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/src/openarmature/llm/provider.py b/src/openarmature/llm/provider.py index ed496ab..04fe6e9 100644 --- a/src/openarmature/llm/provider.py +++ b/src/openarmature/llm/provider.py @@ -38,6 +38,8 @@ from collections.abc import Sequence from typing import Any, Protocol, cast +from pydantic import BaseModel + from .errors import ProviderInvalidRequest from .messages import ( AssistantMessage, @@ -67,15 +69,26 @@ async def complete( messages: Sequence[Message], tools: Sequence[Tool] | None = None, config: RuntimeConfig | None = None, + response_schema: dict[str, Any] | type[BaseModel] | None = None, ) -> Response: """Perform a single completion call. - ``messages`` MUST NOT be mutated. ``complete()`` does NOT loop - on tool calls — if the response's ``finish_reason`` is - ``"tool_calls"``, the caller is responsible for executing the - tools and making a follow-on call with ``tool`` messages - appended. ``complete()`` does NOT retry; transient errors - propagate. + Args: + messages: The conversation to send. MUST NOT be mutated by + the implementation. + tools: Optional tool definitions the model may call. + config: Optional per-call sampling parameters. + response_schema: Optional JSON Schema (dict) or Pydantic + model class describing the expected output shape. When + supplied, the implementation constrains the model's + output to the schema and populates ``Response.parsed`` + with the validated value. + + Returns: + A :class:`Response` carrying the assistant message, finish + reason, usage, raw payload, and (when ``response_schema`` + was supplied and the model returned structured content) + the parsed structured value. """ ... From 467f74b9ed665f4772b29d5fa2a5b970b902709d Mon Sep 17 00:00:00 2001 From: chris-colinsky Date: Thu, 14 May 2026 19:30:32 -0700 Subject: [PATCH 06/24] feat(llm/openai): native response_format wire path + Pydantic overload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Threads response_schema through OpenAIProvider.complete() → _do_complete() → _parse_response(). Accepts either a JSON Schema dict OR a Pydantic BaseModel subclass; the latter is converted via model_json_schema() at the boundary. Native wire path: when response_schema is supplied, the request body includes response_format: { type: "json_schema", json_schema: { name, schema, strict } }. The name field comes from schema.title when non-empty, otherwise a deterministic sha256 hash of the schema. The strict flag is set per strict_mode_supported() — true only when the schema cleanly satisfies the constraints across the full tree. Post-receive: parses message.content as JSON, then validates against the schema. Dict-input path validates with jsonschema and returns a dict. BaseModel-class-input path validates with model.model_validate() and returns a BaseModel instance. Either way, JSON parse failure or schema validation failure raises StructuredOutputInvalid carrying the schema, raw content, and failure description. parsed is absent on tool-call responses regardless of whether response_schema was supplied (mutually exclusive paths). Free-form calls (response_schema=None) see no behavior change — body omits response_format, parsed stays None. The prompt-augmentation fallback path is the next commit. --- src/openarmature/llm/providers/openai.py | 156 +++++++++++++++++++++-- 1 file changed, 144 insertions(+), 12 deletions(-) diff --git a/src/openarmature/llm/providers/openai.py b/src/openarmature/llm/providers/openai.py index f720990..e5be1d7 100644 --- a/src/openarmature/llm/providers/openai.py +++ b/src/openarmature/llm/providers/openai.py @@ -40,13 +40,15 @@ from __future__ import annotations +import hashlib import json import uuid from collections.abc import Sequence from typing import Any, Literal, cast import httpx -from pydantic import ValidationError +import jsonschema +from pydantic import BaseModel, ValidationError from openarmature.graph.events import NodeEvent from openarmature.graph.state import State @@ -66,6 +68,7 @@ ProviderModelNotLoaded, ProviderRateLimit, ProviderUnavailable, + StructuredOutputInvalid, ) from ..messages import ( AssistantMessage, @@ -75,8 +78,13 @@ ToolCall, UserMessage, ) -from ..provider import validate_message_list, validate_tools -from ..response import FinishReason, Response, RuntimeConfig, Usage +from ..provider import ( + strict_mode_supported, + validate_message_list, + validate_response_schema, + validate_tools, +) +from ..response import FinishReason, ParsedValue, Response, RuntimeConfig, Usage class OpenAIProvider: @@ -191,18 +199,27 @@ async def complete( messages: Sequence[Message], tools: Sequence[Tool] | None = None, config: RuntimeConfig | None = None, + response_schema: dict[str, Any] | type[BaseModel] | None = None, ) -> Response: """Single completion call. Pre-send validation runs first (per-message Pydantic + - list-level invariants). HTTP errors map to canonical - provider-error categories. The successful 200 body is parsed - into a :class:`Response` — failure to parse raises - ``provider_invalid_response``. + list-level invariants + response_schema shape check). HTTP + errors map to canonical provider-error categories. The + successful 200 body is parsed into a :class:`Response` — + failure to parse raises ``provider_invalid_response``; failure + to validate the response content against ``response_schema`` + raises ``structured_output_invalid``. + + When ``response_schema`` is supplied as a Pydantic BaseModel + subclass, ``Response.parsed`` is a validated instance of that + class; when supplied as a JSON Schema dict, + ``Response.parsed`` is the deserialized dict. """ validate_message_list(messages) validate_tools(tools) - body = self._build_request_body(messages, tools, config) + schema_dict, schema_class = _normalize_response_schema(response_schema) + body = self._build_request_body(messages, tools, config, schema_dict) # Spec observability §5.5 LLM provider span: when an # observability backend is active in the current invocation, @@ -226,7 +243,7 @@ async def complete( dispatch(_make_llm_event("started", call_id=call_id, model=self.model)) try: - response = await self._do_complete(body) + response = await self._do_complete(body, schema_dict, schema_class) except Exception as exc: if dispatch is not None: dispatch(_make_llm_event("completed", call_id=call_id, model=self.model, error=exc)) @@ -244,7 +261,12 @@ async def complete( ) return response - async def _do_complete(self, body: dict[str, Any]) -> Response: + async def _do_complete( + self, + body: dict[str, Any], + schema_dict: dict[str, Any] | None, + schema_class: type[BaseModel] | None, + ) -> Response: """Wire-call helper: separated from ``complete()`` so the LLM-provider span hook in ``complete()`` can wrap success and failure paths uniformly.""" @@ -262,7 +284,7 @@ async def _do_complete(self, body: dict[str, Any]) -> Response: raise ProviderInvalidResponse("POST /v1/chat/completions returned non-JSON body") from exc if not isinstance(payload_raw, dict): raise ProviderInvalidResponse("POST /v1/chat/completions returned a non-object body") - return self._parse_response(cast("dict[str, Any]", payload_raw)) + return self._parse_response(cast("dict[str, Any]", payload_raw), schema_dict, schema_class) # ------------------------------------------------------------------ # Request building (spec §8.1) @@ -273,6 +295,7 @@ def _build_request_body( messages: Sequence[Message], tools: Sequence[Tool] | None, config: RuntimeConfig | None, + schema_dict: dict[str, Any] | None, ) -> dict[str, Any]: body: dict[str, Any] = { "model": self.model, @@ -295,13 +318,27 @@ def _build_request_body( extras = config.model_extra or {} for k, v in extras.items(): body.setdefault(k, v) + if schema_dict is not None: + body["response_format"] = { + "type": "json_schema", + "json_schema": { + "name": _derive_schema_name(schema_dict), + "schema": schema_dict, + "strict": strict_mode_supported(schema_dict), + }, + } return body # ------------------------------------------------------------------ # Response parsing (spec §8.2) # ------------------------------------------------------------------ - def _parse_response(self, payload: dict[str, Any]) -> Response: + def _parse_response( + self, + payload: dict[str, Any], + schema_dict: dict[str, Any] | None, + schema_class: type[BaseModel] | None, + ) -> Response: try: choices = cast("list[dict[str, Any]]", payload["choices"]) choice = choices[0] @@ -358,11 +395,20 @@ def _parse_response(self, payload: dict[str, Any]) -> Response: except ValidationError as exc: raise ProviderInvalidResponse(f"invalid usage record: {exc}") from exc + # Structured-output parsing. parsed is absent when no schema + # was requested AND when the response is a tool-call response + # — the tool-call path and structured-content path are + # mutually exclusive at the response level. + parsed: ParsedValue = None + if schema_dict is not None and finish_reason_typed != "tool_calls": + parsed = _parse_and_validate(assistant_msg.content, schema_dict, schema_class) + return Response( message=assistant_msg, finish_reason=finish_reason_typed, usage=usage, raw=payload, + parsed=parsed, ) @@ -371,6 +417,92 @@ def _parse_response(self, payload: dict[str, Any]) -> Response: # --------------------------------------------------------------------------- +# Normalize a response_schema argument to a dict (plus the optional +# BaseModel subclass form for the post-parse instance return). Accepts +# either form per the Provider Protocol; raises ProviderInvalidRequest +# on invalid shapes (non-dict, non-object-top-level for the dict form; +# pre-validated by validate_response_schema). +def _normalize_response_schema( + response_schema: dict[str, Any] | type[BaseModel] | None, +) -> tuple[dict[str, Any] | None, type[BaseModel] | None]: + if response_schema is None: + return None, None + if isinstance(response_schema, type): + # Per the Protocol signature, the only class form accepted is + # a BaseModel subclass; non-BaseModel classes will AttributeError + # on model_json_schema below. + schema_dict = response_schema.model_json_schema() + validate_response_schema(schema_dict) + return schema_dict, response_schema + validate_response_schema(response_schema) + return response_schema, None + + +# Derive a stable identifier for the JSON Schema for OpenAI's +# response_format.json_schema.name field. Uses the schema's `title` +# when present (and a valid identifier-shaped string); otherwise +# derives a deterministic short hash so the same schema always +# produces the same name across calls. +def _derive_schema_name(schema: dict[str, Any]) -> str: + title = schema.get("title") + if isinstance(title, str) and title: + return title + canonical = json.dumps(schema, sort_keys=True).encode("utf-8") + return f"oa_schema_{hashlib.sha256(canonical).hexdigest()[:16]}" + + +# Parse the model's content string as JSON, then validate against +# the schema. The dict-schema path uses jsonschema; the BaseModel-class +# path uses Pydantic's native validator (which produces an instance +# of the supplied class). +def _parse_and_validate( + content: str, + schema_dict: dict[str, Any], + schema_class: type[BaseModel] | None, +) -> ParsedValue: + try: + loaded = json.loads(content) + except json.JSONDecodeError as exc: + raise StructuredOutputInvalid( + "response content is not valid JSON", + response_schema=schema_dict, + raw_content=content, + failure_description=str(exc), + ) from exc + if not isinstance(loaded, dict): + raise StructuredOutputInvalid( + "response JSON is not an object", + response_schema=schema_dict, + raw_content=content, + failure_description=f"top-level type is {type(loaded).__name__}, expected object", + ) + parsed_dict = cast("dict[str, Any]", loaded) + + # Pydantic-class path: validate and return the BaseModel instance. + if schema_class is not None: + try: + return schema_class.model_validate(parsed_dict) + except ValidationError as exc: + raise StructuredOutputInvalid( + "response failed Pydantic validation", + response_schema=schema_dict, + raw_content=content, + failure_description=str(exc), + ) from exc + + # Dict-schema path: jsonschema validation, return the dict. + try: + jsonschema.validate(instance=parsed_dict, schema=schema_dict) + except jsonschema.ValidationError as exc: + raise StructuredOutputInvalid( + "response failed JSON Schema validation", + response_schema=schema_dict, + raw_content=content, + failure_description=exc.message, + ) from exc + return parsed_dict + + def _message_to_wire(msg: Message) -> dict[str, Any]: """Spec §8.1 request mapping for one message.""" if isinstance(msg, SystemMessage): From 16fb3c456b5c818a81fb36597245d8ceb12c7c1b Mon Sep 17 00:00:00 2001 From: chris-colinsky Date: Thu, 14 May 2026 19:34:02 -0700 Subject: [PATCH 07/24] feat(llm/openai): prompt-augmentation fallback + inspect property MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the prompt-augmentation fallback for OpenAI-compatible servers that don't implement response_format (older vLLM, some LM Studio releases, llama.cpp variants). Constructor: force_prompt_augmentation_fallback: bool = False When True, structured-output calls build the wire body by augmenting the message list with a system directive that includes the serialized JSON Schema, and omit response_format entirely. Native path is the default (False). Inspect property: uses_prompt_augmentation_fallback -> bool Read-only; lets callers verify which wire path is active without poking private state. _augment_messages_with_schema_directive returns a fresh list. When the first message is system, its content is extended with the schema directive (preserving caller intent); otherwise a new system message is prepended. The caller's original messages list is NOT mutated — Message instances are reused unchanged (immutable Pydantic models). Response parsing is unchanged from the native path: parse + validate post-receive raise StructuredOutputInvalid on failure. parsed is populated identically whether the wire took the native or fallback route. --- src/openarmature/llm/providers/openai.py | 67 +++++++++++++++++++++++- 1 file changed, 65 insertions(+), 2 deletions(-) diff --git a/src/openarmature/llm/providers/openai.py b/src/openarmature/llm/providers/openai.py index e5be1d7..5c9e4a5 100644 --- a/src/openarmature/llm/providers/openai.py +++ b/src/openarmature/llm/providers/openai.py @@ -106,9 +106,16 @@ def __init__( api_key: str | None = None, transport: httpx.AsyncBaseTransport | None = None, timeout: float = 60.0, + force_prompt_augmentation_fallback: bool = False, ) -> None: self.base_url = base_url.rstrip("/") self.model = model + # ``force_prompt_augmentation_fallback`` switches structured-output + # calls from the native response_format wire path to the + # prompt-augmentation fallback. Used for older OpenAI-compatible + # servers (some vLLM/LM Studio/llama.cpp versions) that reject + # or silently ignore response_format. + self._force_prompt_augmentation_fallback = force_prompt_augmentation_fallback self._headers: dict[str, str] = {"Content-Type": "application/json"} if api_key is not None: self._headers["Authorization"] = f"Bearer {api_key}" @@ -121,6 +128,14 @@ def __init__( timeout=timeout, ) + @property + def uses_prompt_augmentation_fallback(self) -> bool: + """Whether ``complete(response_schema=...)`` builds the wire + body via prompt augmentation (``True``) or the native + ``response_format`` path (``False``). + """ + return self._force_prompt_augmentation_fallback + async def aclose(self) -> None: """Close the underlying HTTP client. Optional — async clients garbage-collect cleanly, but explicit close is RECOMMENDED in @@ -219,7 +234,22 @@ async def complete( validate_message_list(messages) validate_tools(tools) schema_dict, schema_class = _normalize_response_schema(response_schema) - body = self._build_request_body(messages, tools, config, schema_dict) + # On the fallback path, the wire-side messages list is an + # augmented COPY of the caller's messages — original messages + # MUST NOT be mutated. _augment_messages_with_schema_directive + # builds a fresh list; the original instances are reused + # (immutable Pydantic models) so the caller's sequence is + # untouched. + wire_messages: Sequence[Message] = messages + if schema_dict is not None and self._force_prompt_augmentation_fallback: + wire_messages = _augment_messages_with_schema_directive(messages, schema_dict) + body = self._build_request_body( + wire_messages, + tools, + config, + schema_dict, + include_response_format=not self._force_prompt_augmentation_fallback, + ) # Spec observability §5.5 LLM provider span: when an # observability backend is active in the current invocation, @@ -296,6 +326,7 @@ def _build_request_body( tools: Sequence[Tool] | None, config: RuntimeConfig | None, schema_dict: dict[str, Any] | None, + include_response_format: bool = True, ) -> dict[str, Any]: body: dict[str, Any] = { "model": self.model, @@ -318,7 +349,9 @@ def _build_request_body( extras = config.model_extra or {} for k, v in extras.items(): body.setdefault(k, v) - if schema_dict is not None: + # response_format is omitted entirely on the fallback path — + # the schema travels in the augmented system message instead. + if schema_dict is not None and include_response_format: body["response_format"] = { "type": "json_schema", "json_schema": { @@ -503,6 +536,36 @@ def _parse_and_validate( return parsed_dict +_SCHEMA_DIRECTIVE_TEMPLATE = ( + "You MUST return only valid JSON that conforms to the following JSON Schema. " + "Do not include prose, markdown fences, or any text outside the JSON object.\n\n" + "JSON Schema:\n{schema_json}" +) + + +# Construct a fresh message list with a schema directive added. The +# directive is appended to the existing system message's content when +# present, or prepended as a new system message otherwise. The caller's +# original list is never mutated; Message instances are reused because +# they are immutable Pydantic models. The serialized schema appears +# verbatim in the directive so callers that need to verify the directive +# references the schema (conformance harnesses, observability spans) +# can substring-match the canonical JSON form. +def _augment_messages_with_schema_directive( + messages: Sequence[Message], + schema_dict: dict[str, Any], +) -> list[Message]: + directive = _SCHEMA_DIRECTIVE_TEMPLATE.format(schema_json=json.dumps(schema_dict, sort_keys=True)) + out: list[Message] = list(messages) + if out and isinstance(out[0], SystemMessage): + existing = out[0] + merged = SystemMessage(content=f"{existing.content}\n\n{directive}") + out[0] = merged + else: + out.insert(0, SystemMessage(content=directive)) + return out + + def _message_to_wire(msg: Message) -> dict[str, Any]: """Spec §8.1 request mapping for one message.""" if isinstance(msg, SystemMessage): From 66aa908008f488f3ee8f641dfc6737ab3ecdb3c4 Mon Sep 17 00:00:00 2001 From: chris-colinsky Date: Thu, 14 May 2026 19:44:32 -0700 Subject: [PATCH 08/24] test(conformance): capability-agnostic harness helpers for wire + carries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds tests/conformance/harness/wire.py with helpers used by structured- output and content-block fixtures (and any future capability fixtures that need the same shapes): - match_wire_body(actual, expected) — recursive deep-equal with "*" wildcard support for string slots. - assert_response_format_absent(body) — asserts the wire body has no response_format key. - assert_system_references_schema(body, schema) — asserts the first message in the body is a system message whose content contains the canonical-JSON form of the schema as a substring. - assert_error_carries(exc, carries) — introspects a raised exception's attributes against an expected_carries block; supports _present / _mentions / literal-equal forms; handles the raw_response_content → raw_content fixture-vs-impl naming alias. Extends test_llm_provider.py to drive these from the existing fixture loop: - response_schema is read from call_spec and threaded through provider.complete(). - expected_wire_request literal compare + expected_wire_request_checks sibling checks fire after each captured chat-completions request. - caller_messages_unmodified takes a model_dump snapshot pre-call and asserts byte-equality post-call. - expected.response.parsed is compared for equality. - expected.raises.carries is fed to assert_error_carries. - retry_middleware: block wraps the call in a default-classifier retry simulator (transient = TRANSIENT_CATEGORIES membership); the captured-request count provides provider_call_count. - mock_provider.capabilities.supports_native_response_format: false constructs the provider with force_prompt_augmentation_fallback=True. The 0016 structured-output fixtures (021–028) remain skipped at this commit. The next commit removes their skip markers. --- tests/conformance/harness/__init__.py | 12 ++ tests/conformance/harness/wire.py | 178 +++++++++++++++++++++++++ tests/conformance/test_llm_provider.py | 166 ++++++++++++++++++++++- 3 files changed, 350 insertions(+), 6 deletions(-) create mode 100644 tests/conformance/harness/wire.py diff --git a/tests/conformance/harness/__init__.py b/tests/conformance/harness/__init__.py index 3b2b3b0..87d4649 100644 --- a/tests/conformance/harness/__init__.py +++ b/tests/conformance/harness/__init__.py @@ -24,6 +24,13 @@ ) from .loader import discover_fixtures, load_fixture from .skip import SkipReason +from .wire import ( + assert_error_carries, + assert_response_format_absent, + assert_system_references_schema, + match_wire_body, + request_body, +) __all__ = [ "CasesFixture", @@ -31,6 +38,11 @@ "GraphFixture", "LlmProviderFixture", "SkipReason", + "assert_error_carries", + "assert_response_format_absent", + "assert_system_references_schema", "discover_fixtures", "load_fixture", + "match_wire_body", + "request_body", ] diff --git a/tests/conformance/harness/wire.py b/tests/conformance/harness/wire.py new file mode 100644 index 0000000..bce1479 --- /dev/null +++ b/tests/conformance/harness/wire.py @@ -0,0 +1,178 @@ +"""Generic helpers for conformance fixtures that assert on captured wire +requests and on the attributes of raised exceptions. + +These helpers are capability-agnostic: any fixture format that uses +``expected_wire_request`` (literal compare with wildcards), +``expected_wire_request_checks`` (sibling boolean checks), or +``expected.raises.carries`` (error-attribute introspection) can drive +into the same helpers. + +The ``"*"`` literal in an ``expected_wire_request`` string slot is a +wildcard: the actual value MUST be present and a non-empty string, but +the specific value is exempted from literal comparison. This convention +is documented in the spec's llm-provider conformance fixtures +(021/026/027) and inherited by any future capability that needs the +same shape. +""" + +from __future__ import annotations + +import json +from collections.abc import Mapping +from typing import Any, cast + +import httpx + +WILDCARD = "*" + + +def request_body(captured: httpx.Request) -> dict[str, Any]: + """Decode a captured httpx request's body as a JSON object.""" + parsed = json.loads(captured.content) + if not isinstance(parsed, dict): + raise AssertionError(f"wire body is not a JSON object: {parsed!r}") + return cast("dict[str, Any]", parsed) + + +def match_wire_body( + actual: Any, + expected: Any, + *, + path: str = "$", +) -> None: + """Recursive deep-equal between an actual wire-body value and an + expected shape. Strings equal to ``"*"`` in the expected value match + any non-empty string in the actual value. Keys present in + ``expected`` MUST be present in ``actual`` and equal; keys present + in ``actual`` but absent from ``expected`` are allowed. + + Raises :class:`AssertionError` with a JSON-pointer-style path on + mismatch. + """ + if isinstance(expected, str) and expected == WILDCARD: + if not (isinstance(actual, str) and actual): + raise AssertionError( + f"wire mismatch at {path}: expected non-empty string (wildcard), got {actual!r}" + ) + return + + if isinstance(expected, Mapping): + if not isinstance(actual, Mapping): + raise AssertionError(f"wire mismatch at {path}: expected object, got {type(actual).__name__}") + expected_map = cast("Mapping[str, Any]", expected) + actual_map = cast("Mapping[str, Any]", actual) + for key, exp_v in expected_map.items(): + if key not in actual_map: + raise AssertionError(f"wire mismatch at {path}: missing key {key!r}") + match_wire_body(actual_map[key], exp_v, path=f"{path}.{key}") + return + + if isinstance(expected, list): + if not isinstance(actual, list): + raise AssertionError(f"wire mismatch at {path}: expected list, got {type(actual).__name__}") + expected_list = cast("list[Any]", expected) + actual_list = cast("list[Any]", actual) + if len(actual_list) != len(expected_list): + raise AssertionError( + f"wire mismatch at {path}: length differs " + f"(actual={len(actual_list)}, expected={len(expected_list)})" + ) + for idx, (a, e) in enumerate(zip(actual_list, expected_list, strict=True)): + match_wire_body(a, e, path=f"{path}[{idx}]") + return + + if actual != expected: + raise AssertionError(f"wire mismatch at {path}: actual={actual!r}, expected={expected!r}") + + +def assert_response_format_absent(body: Mapping[str, Any]) -> None: + """Assert the wire body has no ``response_format`` key.""" + if "response_format" in body: + raise AssertionError( + f"wire check failed: response_format present (value={body['response_format']!r}), expected absent" + ) + + +def assert_system_references_schema(body: Mapping[str, Any], schema: Mapping[str, Any]) -> None: + """Assert the first wire message is a system message whose content + references the supplied JSON Schema (via substring match of the + canonical-JSON form). + """ + messages = body.get("messages") + if not isinstance(messages, list) or not messages: + raise AssertionError( + "wire check failed: expected a non-empty messages list to verify system-message presence" + ) + first = cast("list[Any]", messages)[0] + if not isinstance(first, dict): + raise AssertionError( + f"wire check failed: first message is not an object (got {first!r}), " + "cannot verify schema-directive reference" + ) + first_dict = cast("dict[str, Any]", first) + if first_dict.get("role") != "system": + raise AssertionError( + f"wire check failed: first message is not system (got {first_dict!r}), " + "cannot verify schema-directive reference" + ) + content = first_dict.get("content") + if not isinstance(content, str): + raise AssertionError( + f"wire check failed: system message content is not a string (got {type(content).__name__})" + ) + schema_json = json.dumps(schema, sort_keys=True) + if schema_json not in content: + raise AssertionError( + "wire check failed: system message content does not contain the serialized schema; " + f"content={content!r}" + ) + + +def assert_error_carries(exc: BaseException, carries: Mapping[str, Any]) -> None: + """Introspect attributes of a raised exception against an + expected-carries block. Supported keys: + + - ``_present: true`` — attribute MUST be set to a + truthy non-None value (e.g., ``response_schema_present``, + ``failure_description_present``). + - ``: `` — attribute value equals the supplied + value (e.g., ``raw_response_content: '...'``). + - ``_mentions: `` — string attribute value + contains the supplied substring (e.g., + ``failure_description_mentions: 'age'``). + """ + for key, expected in carries.items(): + if key.endswith("_present"): + attr = key[: -len("_present")] + actual = _get_carries_attr(exc, attr) + if bool(expected) and (actual is None or actual == ""): + raise AssertionError(f"carries check failed: expected {attr!r} to be present, got {actual!r}") + if not bool(expected) and (actual is not None and actual != ""): + raise AssertionError(f"carries check failed: expected {attr!r} to be absent, got {actual!r}") + elif key.endswith("_mentions"): + attr = key[: -len("_mentions")] + actual = _get_carries_attr(exc, attr) + if not isinstance(actual, str): + raise AssertionError( + f"carries check failed: {attr!r} is not a string (got {type(actual).__name__}); " + f"cannot substring-match {expected!r}" + ) + if expected not in actual: + raise AssertionError( + f"carries check failed: {attr!r}={actual!r} does not mention {expected!r}" + ) + else: + actual = _get_carries_attr(exc, key) + if actual != expected: + raise AssertionError( + f"carries check failed: {key!r} actual={actual!r}, expected={expected!r}" + ) + + +def _get_carries_attr(exc: BaseException, name: str) -> Any: + # Allow fixture-naming-friendly aliases for the carries block. The + # spec fixtures use ``raw_response_content`` (the wire-side label); + # the Python exception class names its attribute ``raw_content``. + aliases = {"raw_response_content": "raw_content"} + canonical = aliases.get(name, name) + return getattr(exc, canonical, None) diff --git a/tests/conformance/test_llm_provider.py b/tests/conformance/test_llm_provider.py index 44f7213..a4632f7 100644 --- a/tests/conformance/test_llm_provider.py +++ b/tests/conformance/test_llm_provider.py @@ -19,7 +19,7 @@ from __future__ import annotations import json -from collections.abc import Iterator, Mapping +from collections.abc import Awaitable, Callable, Iterator, Mapping from pathlib import Path from typing import Any, cast @@ -29,6 +29,7 @@ from pydantic import ValidationError from openarmature.llm import ( + TRANSIENT_CATEGORIES, AssistantMessage, LlmProviderError, Message, @@ -43,6 +44,14 @@ UserMessage, ) +from .harness import ( + assert_error_carries, + assert_response_format_absent, + assert_system_references_schema, + match_wire_body, + request_body, +) + CONFORMANCE_DIR = ( Path(__file__).resolve().parents[2] / "openarmature-spec" / "spec" / "llm-provider" / "conformance" ) @@ -147,11 +156,18 @@ def _build_provider( mock_provider_cfg.get("responses") or [], ) transport, captured = _build_handler(responses) + # ``capabilities.supports_native_response_format: false`` switches + # the provider into prompt-augmentation fallback mode for structured + # output. Absent or true ⇒ native path (default). + capabilities = cast("Mapping[str, Any]", mock_provider_cfg.get("capabilities") or {}) + supports_native = capabilities.get("supports_native_response_format", True) + force_fallback = supports_native is False provider = OpenAIProvider( base_url="http://mock-llm.test", model=model, api_key="test-key", transport=transport, + force_prompt_augmentation_fallback=force_fallback, ) return provider, captured @@ -209,6 +225,94 @@ def _build_tools(raw_list: list[Mapping[str, Any]] | None) -> list[Tool] | None: # --------------------------------------------------------------------------- +async def _maybe_with_retry( + operation: Callable[[], Awaitable[Any]], + retry_cfg: Mapping[str, Any] | None, +) -> Any: + """Optionally wrap an LLM-provider call in retry-middleware + semantics. The harness simulates RetryMiddleware's default + classifier (transient if exc.category is in TRANSIENT_CATEGORIES, + non-transient otherwise) without dragging the graph-engine into + LLM-provider conformance. ``classifier`` other than ``"default"`` + is not yet supported — raises AssertionError. + """ + if retry_cfg is None: + return await operation() + classifier = retry_cfg.get("classifier", "default") + if classifier != "default": + raise AssertionError(f"retry_middleware classifier {classifier!r} not yet supported") + max_attempts = int(retry_cfg.get("max_attempts", 1)) + attempts = 0 + while True: + attempts += 1 + try: + return await operation() + except LlmProviderError as exc: + if attempts >= max_attempts: + raise + if exc.category not in TRANSIENT_CATEGORIES: + raise + + +def _assert_wire_expectations( + *, + call_spec: Mapping[str, Any], + captured: list[httpx.Request], + wire_count_before: int, + response_schema: Any, +) -> None: + """Apply ``expected_wire_request`` literal compare and + ``expected_wire_request_checks`` sibling-check blocks. Both + operate on the most-recent captured chat-completions request. + """ + expected_wire = cast("Mapping[str, Any] | None", call_spec.get("expected_wire_request")) + checks = cast( + "Mapping[str, Any] | None", + call_spec.get("expected_wire_request_checks"), + ) + if expected_wire is None and checks is None: + return + last_request = _last_chat_completions_request(captured, wire_count_before) + if last_request is None: + raise AssertionError( + "expected_wire_request[_checks] supplied, but no chat-completions request was captured" + ) + body = request_body(last_request) + if expected_wire is not None: + match_wire_body(body, expected_wire) + if checks is not None: + for key, value in checks.items(): + if key == "response_format_absent": + if value is True: + assert_response_format_absent(body) + elif key == "system_message_content_references_schema": + if value is True: + if not isinstance(response_schema, dict): + raise AssertionError( + "system_message_content_references_schema " + "requires a dict response_schema on the call" + ) + assert_system_references_schema(body, cast("dict[str, Any]", response_schema)) + else: + raise AssertionError(f"unknown expected_wire_request_checks key: {key!r}") + + +def _last_chat_completions_request( + captured: list[httpx.Request], + since: int, +) -> httpx.Request | None: + """Pick the most recent /v1/chat/completions request captured at or + after ``since`` (the wire-count baseline before this call started). + The mock transport sees other requests too (e.g., /v1/models on + ready()); skipping non-chat URLs keeps the wire-shape assertions + targeted at the operation under test. + """ + for req in reversed(captured[since:]): + if req.url.path == "/v1/chat/completions": + return req + return None + + def _assert_response_matches(actual: Response, expected: Mapping[str, Any]) -> None: """Verify ``actual`` matches the fixture's ``expected.response`` block. ``raw_check.required_keys`` is enforced as a presence-only @@ -250,6 +354,17 @@ def _assert_response_matches(actual: Response, expected: Mapping[str, Any]) -> N required = cast("list[str]", raw_check.get("required_keys") or []) for key in required: assert key in actual.raw, f"raw missing required key {key!r}" + if "parsed" in expected: + expected_parsed = expected["parsed"] + actual_parsed = actual.parsed + # BaseModel-class fixture cases would surface a BaseModel + # instance on actual.parsed; the fixtures here only use the + # dict-schema form, so a dict equality compare is sufficient. + # Future fixtures driving the Pydantic-class overload can + # extend this with a model_dump() comparison. + assert actual_parsed == expected_parsed, ( + f"parsed mismatch: actual={actual_parsed!r}, expected={expected_parsed!r}" + ) def _assert_raises_matches( @@ -298,10 +413,10 @@ async def _run_one_case(spec: Mapping[str, Any]) -> None: - top-level ``mock_provider:`` configures the wire mock """ mock_cfg = cast("Mapping[str, Any]", spec.get("mock_provider") or {}) - provider, _captured = _build_provider(mock_cfg) + provider, captured = _build_provider(mock_cfg) try: for call_spec in _iter_calls(spec): - await _run_one_call(provider, call_spec) + await _run_one_call(provider, call_spec, captured) finally: await provider.aclose() @@ -328,9 +443,15 @@ def _iter_calls(spec: Mapping[str, Any]) -> Iterator[Mapping[str, Any]]: raise AssertionError("fixture has neither `calls` nor `call` block") -async def _run_one_call(provider: OpenAIProvider, call_spec: Mapping[str, Any]) -> None: +async def _run_one_call( + provider: OpenAIProvider, + call_spec: Mapping[str, Any], + captured: list[httpx.Request], +) -> None: operation = call_spec.get("operation", "complete") expected = cast("Mapping[str, Any]", call_spec.get("expected") or {}) + response_schema = call_spec.get("response_schema") + retry_mw_cfg = cast("Mapping[str, Any] | None", call_spec.get("retry_middleware")) if operation == "complete": # Per spec §3 "Validation timing" — complete() validates at @@ -340,6 +461,7 @@ async def _run_one_call(provider: OpenAIProvider, call_spec: Mapping[str, Any]) # complete() with a malformed input raises"), so wrap the # construction in the raises path so a pydantic ValidationError # surfaces as ProviderInvalidRequest. + wire_count_before = len(captured) if "raises" in expected: with pytest.raises(LlmProviderError) as excinfo: try: @@ -349,13 +471,45 @@ async def _run_one_call(provider: OpenAIProvider, call_spec: Mapping[str, Any]) tools = _build_tools(cast("list[Mapping[str, Any]] | None", call_spec.get("tools"))) except ValidationError as ve: raise ProviderInvalidRequest(str(ve)) from ve - await provider.complete(messages, tools) + await _maybe_with_retry( + lambda: provider.complete(messages, tools, response_schema=response_schema), + retry_mw_cfg, + ) _assert_raises_matches(excinfo, expected["raises"]) + carries = cast( + "Mapping[str, Any] | None", + cast("Mapping[str, Any]", expected["raises"]).get("carries"), + ) + if carries: + assert_error_carries(excinfo.value, carries) else: messages = [_build_message(m) for m in cast("list[Mapping[str, Any]]", call_spec["messages"])] + messages_snapshot = [m.model_dump(mode="json") for m in messages] tools = _build_tools(cast("list[Mapping[str, Any]] | None", call_spec.get("tools"))) - response = await provider.complete(messages, tools) + response = await _maybe_with_retry( + lambda: provider.complete(messages, tools, response_schema=response_schema), + retry_mw_cfg, + ) _assert_response_matches(response, cast("Mapping[str, Any]", expected.get("response") or {})) + if expected.get("caller_messages_unmodified") is True: + post_snapshot = [m.model_dump(mode="json") for m in messages] + assert post_snapshot == messages_snapshot, ( + "caller_messages_unmodified: messages list mutated by complete()" + ) + + wire_count_after = len(captured) + provider_call_count = wire_count_after - wire_count_before + expected_call_count = expected.get("provider_call_count") + if expected_call_count is not None: + assert provider_call_count == expected_call_count, ( + f"provider_call_count: actual={provider_call_count}, expected={expected_call_count}" + ) + _assert_wire_expectations( + call_spec=call_spec, + captured=captured, + wire_count_before=wire_count_before, + response_schema=response_schema, + ) return if operation == "ready": From adf617c3b250478b757e810a040d385720e63eb4 Mon Sep 17 00:00:00 2001 From: chris-colinsky Date: Thu, 14 May 2026 19:54:34 -0700 Subject: [PATCH 09/24] test: drive 0016 fixtures 021-028 + add structured-output unit tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Removes the deferred-fixture skip markers for the 8 structured-output conformance fixtures (021–028). All pass against the OpenAIProvider + harness extensions landed in earlier commits. Adds tests/unit/test_structured_output.py covering bits the conformance fixtures don't exercise directly: - validate_response_schema edge cases: non-dict, non-object top-level, missing type. - strict_mode_supported: required-coverage rule, additionalProperties true, nested-object violation, anyOf branch violation, internal $ref resolution, unresolvable $ref, $ref cycle (self-referential schema). - _derive_schema_name: title-when-present, hash-fallback, determinism, empty-title behavior. - _augment_messages_with_schema_directive: prepend-when-no-system, extend-existing-system, caller-list-not-mutated, serialized-schema- substring. - Pydantic-class overload: class-in returns validated BaseModel instance; pydantic ValidationError wraps in StructuredOutputInvalid; wire body produced from class equals wire body produced from the equivalent .model_json_schema() dict. - uses_prompt_augmentation_fallback inspect property: False by default, True when constructor flag is set. --- tests/conformance/test_llm_provider.py | 11 - tests/unit/test_structured_output.py | 401 +++++++++++++++++++++++++ 2 files changed, 401 insertions(+), 11 deletions(-) create mode 100644 tests/unit/test_structured_output.py diff --git a/tests/conformance/test_llm_provider.py b/tests/conformance/test_llm_provider.py index a4632f7..0ea21c3 100644 --- a/tests/conformance/test_llm_provider.py +++ b/tests/conformance/test_llm_provider.py @@ -75,17 +75,6 @@ "018-content-blocks-image-source-missing": "0015 multimodal images (PR-2)", "019-content-blocks-invalid-detail-value": "0015 multimodal images (PR-2)", "020-content-blocks-inline-image-missing-media-type": "0015 multimodal images (PR-2)", - # proposal 0016 — structured output (this PR; wired up later in the - # commit sequence). These rows are removed in the commit that drives - # the structured-output fixtures. - "021-structured-output-success": "0016 structured output (this PR; not yet wired)", - "022-structured-output-parse-failure": "0016 structured output (this PR; not yet wired)", - "023-structured-output-validation-failure": "0016 structured output (this PR; not yet wired)", - "024-structured-output-non-transient": "0016 structured output (this PR; not yet wired)", - "025-structured-output-with-tool-calls": "0016 structured output (this PR; not yet wired)", - "026-structured-output-openai-wire-mapping-native": "0016 structured output (this PR; not yet wired)", - "027-structured-output-openai-wire-mapping-fallback": "0016 structured output (this PR; not yet wired)", - "028-structured-output-no-schema-regression": "0016 structured output (this PR; not yet wired)", } diff --git a/tests/unit/test_structured_output.py b/tests/unit/test_structured_output.py new file mode 100644 index 0000000..484ec2b --- /dev/null +++ b/tests/unit/test_structured_output.py @@ -0,0 +1,401 @@ +"""Focused tests for the structured-output surface. + +The conformance suite (``tests/conformance/test_llm_provider.py``) +covers the spec's behavioral surface end-to-end against fixtures +021–028. These unit tests fill gaps the conformance fixtures don't +exercise directly: the strict-mode heuristic's tree-walk edge cases +(anyOf, $ref, cycles), the schema-name derivation, the +message-augmentation directive helper, and the Pydantic-class +overload's class-in → instance-out shape. +""" + +from __future__ import annotations + +import json +from typing import Any + +import httpx +import pytest +from pydantic import BaseModel + +from openarmature.llm import ( + OpenAIProvider, + ProviderInvalidRequest, + StructuredOutputInvalid, + SystemMessage, + UserMessage, + strict_mode_supported, + validate_response_schema, +) +from openarmature.llm.providers.openai import ( + _augment_messages_with_schema_directive, + _derive_schema_name, +) + +# --------------------------------------------------------------------------- +# validate_response_schema +# --------------------------------------------------------------------------- + + +def test_validate_response_schema_accepts_object_top_level() -> None: + schema = {"type": "object", "properties": {"x": {"type": "integer"}}, "required": ["x"]} + validate_response_schema(schema) # no raise + + +def test_validate_response_schema_rejects_non_dict() -> None: + with pytest.raises(ProviderInvalidRequest, match="MUST be a dict"): + validate_response_schema("not a dict") # type: ignore[arg-type] + + +def test_validate_response_schema_rejects_non_object_top_level() -> None: + with pytest.raises(ProviderInvalidRequest, match="top-level type MUST be 'object'"): + validate_response_schema({"type": "string"}) + + +def test_validate_response_schema_rejects_missing_type() -> None: + with pytest.raises(ProviderInvalidRequest, match="top-level type MUST be 'object'"): + validate_response_schema({"properties": {"x": {"type": "integer"}}}) + + +# --------------------------------------------------------------------------- +# strict_mode_supported +# --------------------------------------------------------------------------- + + +def test_strict_mode_all_required_passes() -> None: + schema = { + "type": "object", + "properties": {"a": {"type": "string"}, "b": {"type": "integer"}}, + "required": ["a", "b"], + "additionalProperties": False, + } + assert strict_mode_supported(schema) is True + + +def test_strict_mode_missing_required_fails() -> None: + schema = { + "type": "object", + "properties": {"a": {"type": "string"}, "b": {"type": "integer"}}, + "required": ["a"], # "b" not required → violates strict + "additionalProperties": False, + } + assert strict_mode_supported(schema) is False + + +def test_strict_mode_additional_properties_true_fails() -> None: + schema = { + "type": "object", + "properties": {"a": {"type": "string"}}, + "required": ["a"], + "additionalProperties": True, + } + assert strict_mode_supported(schema) is False + + +def test_strict_mode_recurses_into_nested_object() -> None: + schema: dict[str, Any] = { + "type": "object", + "properties": { + "outer": { + "type": "object", + "properties": {"inner": {"type": "string"}}, + "required": [], # nested object violates rule + }, + }, + "required": ["outer"], + } + assert strict_mode_supported(schema) is False + + +def test_strict_mode_anyof_branch_must_satisfy() -> None: + # anyOf member violating the constraint → False + schema = { + "type": "object", + "properties": { + "x": { + "anyOf": [ + {"type": "string"}, + {"type": "object", "properties": {"y": {"type": "string"}}}, # no required + ] + }, + }, + "required": ["x"], + } + assert strict_mode_supported(schema) is False + + +def test_strict_mode_resolves_internal_ref() -> None: + schema = { + "type": "object", + "$defs": { + "Inner": { + "type": "object", + "properties": {"a": {"type": "string"}}, + "required": ["a"], + } + }, + "properties": {"inner": {"$ref": "#/$defs/Inner"}}, + "required": ["inner"], + } + assert strict_mode_supported(schema) is True + + +def test_strict_mode_unresolvable_ref_fails() -> None: + schema = { + "type": "object", + "properties": {"x": {"$ref": "https://example.com/external-schema.json"}}, + "required": ["x"], + } + assert strict_mode_supported(schema) is False + + +def test_strict_mode_handles_ref_cycle() -> None: + # Self-referential schema: each entry has a "children" key pointing + # back to the same definition. Without cycle protection this would + # recurse forever. + schema = { + "type": "object", + "$defs": { + "Node": { + "type": "object", + "properties": { + "value": {"type": "string"}, + "children": {"$ref": "#/$defs/Node"}, + }, + "required": ["value", "children"], + } + }, + "properties": {"root": {"$ref": "#/$defs/Node"}}, + "required": ["root"], + } + assert strict_mode_supported(schema) is True + + +# --------------------------------------------------------------------------- +# _derive_schema_name +# --------------------------------------------------------------------------- + + +def test_derive_schema_name_uses_title_when_present() -> None: + schema: dict[str, Any] = {"type": "object", "title": "PersonRecord", "properties": {}, "required": []} + assert _derive_schema_name(schema) == "PersonRecord" + + +def test_derive_schema_name_falls_back_to_hash_when_no_title() -> None: + schema = {"type": "object", "properties": {"x": {"type": "integer"}}, "required": ["x"]} + name = _derive_schema_name(schema) + assert name.startswith("oa_schema_") + assert len(name) == len("oa_schema_") + 16 + + +def test_derive_schema_name_is_deterministic() -> None: + schema = {"type": "object", "properties": {"a": {"type": "string"}}, "required": ["a"]} + assert _derive_schema_name(schema) == _derive_schema_name(schema) + + +def test_derive_schema_name_ignores_empty_title() -> None: + schema = {"type": "object", "title": "", "properties": {"x": {"type": "string"}}, "required": ["x"]} + assert _derive_schema_name(schema).startswith("oa_schema_") + + +# --------------------------------------------------------------------------- +# _augment_messages_with_schema_directive +# --------------------------------------------------------------------------- + + +SAMPLE_SCHEMA: dict[str, object] = { + "type": "object", + "properties": {"x": {"type": "integer"}}, + "required": ["x"], +} + + +def test_augment_prepends_when_no_system_message() -> None: + original = [UserMessage(content="hello")] + out = _augment_messages_with_schema_directive(original, SAMPLE_SCHEMA) + assert len(out) == 2 + assert isinstance(out[0], SystemMessage) + assert isinstance(out[1], UserMessage) + assert out[1] is original[0] # user message reused unchanged + + +def test_augment_extends_existing_system_message() -> None: + original = [SystemMessage(content="you are helpful"), UserMessage(content="hello")] + out = _augment_messages_with_schema_directive(original, SAMPLE_SCHEMA) + assert len(out) == 2 + assert isinstance(out[0], SystemMessage) + assert "you are helpful" in out[0].content + assert "JSON Schema" in out[0].content + + +def test_augment_does_not_mutate_caller_list() -> None: + original = [UserMessage(content="hello")] + snapshot = [m.model_dump(mode="json") for m in original] + _augment_messages_with_schema_directive(original, SAMPLE_SCHEMA) + after = [m.model_dump(mode="json") for m in original] + assert after == snapshot + + +def test_augment_includes_serialized_schema_substring() -> None: + out = _augment_messages_with_schema_directive([UserMessage(content="x")], SAMPLE_SCHEMA) + schema_json = json.dumps(SAMPLE_SCHEMA, sort_keys=True) + assert schema_json in out[0].content + + +# --------------------------------------------------------------------------- +# Pydantic-class overload +# --------------------------------------------------------------------------- + + +class PersonModel(BaseModel): + name: str + age: int + + +def _mock_chat_completion_response(content: str) -> httpx.MockTransport: + def handler(request: httpx.Request) -> httpx.Response: + body = { + "id": "test", + "object": "chat.completion", + "created": 1700000000, + "model": "test-model", + "choices": [ + { + "index": 0, + "message": {"role": "assistant", "content": content}, + "finish_reason": "stop", + } + ], + "usage": {"prompt_tokens": 5, "completion_tokens": 5, "total_tokens": 10}, + } + return httpx.Response(200, content=json.dumps(body).encode("utf-8")) + + return httpx.MockTransport(handler) + + +async def test_pydantic_class_returns_validated_instance() -> None: + transport = _mock_chat_completion_response('{"name":"Alice","age":30}') + provider = OpenAIProvider( + base_url="http://mock-llm.test", + model="test-model", + api_key="test-key", + transport=transport, + ) + try: + response = await provider.complete( + [UserMessage(content="generate a person")], + response_schema=PersonModel, + ) + finally: + await provider.aclose() + assert isinstance(response.parsed, PersonModel) + assert response.parsed.name == "Alice" + assert response.parsed.age == 30 + + +async def test_pydantic_validation_failure_wraps_in_structured_output_invalid() -> None: + # "thirty" is not a valid int for the age field. + transport = _mock_chat_completion_response('{"name":"Alice","age":"thirty"}') + provider = OpenAIProvider( + base_url="http://mock-llm.test", + model="test-model", + api_key="test-key", + transport=transport, + ) + try: + with pytest.raises(StructuredOutputInvalid) as excinfo: + await provider.complete( + [UserMessage(content="generate a person")], + response_schema=PersonModel, + ) + finally: + await provider.aclose() + err = excinfo.value + assert err.raw_content == '{"name":"Alice","age":"thirty"}' + assert "age" in err.failure_description + + +async def test_pydantic_class_wire_body_matches_dict_form() -> None: + # The wire body produced by class-in MUST equal the wire body + # produced by passing the equivalent JSON Schema dict. + captured_class: list[httpx.Request] = [] + captured_dict: list[httpx.Request] = [] + + def handler_class(request: httpx.Request) -> httpx.Response: + captured_class.append(request) + return httpx.Response( + 200, + content=json.dumps( + { + "id": "x", + "object": "chat.completion", + "created": 0, + "model": "test-model", + "choices": [ + { + "index": 0, + "message": {"role": "assistant", "content": '{"name":"A","age":1}'}, + "finish_reason": "stop", + } + ], + "usage": {"prompt_tokens": 1, "completion_tokens": 1, "total_tokens": 2}, + } + ).encode("utf-8"), + ) + + def handler_dict(request: httpx.Request) -> httpx.Response: + captured_dict.append(request) + return handler_class(request) + + transport_class = httpx.MockTransport(handler_class) + transport_dict = httpx.MockTransport(handler_dict) + + p_class = OpenAIProvider( + base_url="http://mock-llm.test", + model="test-model", + api_key="test-key", + transport=transport_class, + ) + p_dict = OpenAIProvider( + base_url="http://mock-llm.test", + model="test-model", + api_key="test-key", + transport=transport_dict, + ) + + schema_from_class = PersonModel.model_json_schema() + + try: + await p_class.complete([UserMessage(content="x")], response_schema=PersonModel) + await p_dict.complete([UserMessage(content="x")], response_schema=schema_from_class) + finally: + await p_class.aclose() + await p_dict.aclose() + + body_class = json.loads(captured_class[0].content) + body_dict = json.loads(captured_dict[0].content) + assert body_class["response_format"] == body_dict["response_format"] + + +# --------------------------------------------------------------------------- +# uses_prompt_augmentation_fallback inspect property +# --------------------------------------------------------------------------- + + +def test_inspect_property_native_default() -> None: + provider = OpenAIProvider( + base_url="http://mock-llm.test", + model="test-model", + api_key="test-key", + ) + assert provider.uses_prompt_augmentation_fallback is False + + +def test_inspect_property_fallback_when_forced() -> None: + provider = OpenAIProvider( + base_url="http://mock-llm.test", + model="test-model", + api_key="test-key", + force_prompt_augmentation_fallback=True, + ) + assert provider.uses_prompt_augmentation_fallback is True From 1d1e2df86c96114f23d7175f9005cff26525c1bb Mon Sep 17 00:00:00 2001 From: chris-colinsky Date: Thu, 14 May 2026 19:56:28 -0700 Subject: [PATCH 10/24] docs: changelog entry for proposal 0016 under [Unreleased] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Documents the structured-output surface added in this PR: the response_schema parameter, Response.parsed field, StructuredOutputInvalid error category, OpenAIProvider native + fallback wire paths, the provider-agnostic schema helpers, the capability-agnostic conformance harness extensions, and the jsonschema runtime dependency. Also records: - Spec pin bump 0.10.0 → 0.15.0 (skip-ahead governance) with per-proposal deferred-skip in the conformance suite until each PR lands. - Release gate: do not tag the consolidated release until all five PRs of the batch (0011, 0014, 0015, 0016, 0017) are merged. --- CHANGELOG.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 337b3ee..58bc816 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,24 @@ The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). The ## [Unreleased] +### Added + +- **Structured output (proposal 0016, spec v0.14.0).** `Provider.complete()` now accepts an optional `response_schema` parameter — either a JSON Schema dict or a Pydantic `BaseModel` subclass. When supplied, the provider constrains the model's output to the schema and populates `Response.parsed` with the validated value (`dict` for dict-schema input, a `BaseModel` instance for class input). New `StructuredOutputInvalid` error category (non-transient by default) raises on JSON parse failure or schema validation failure; carries the requested schema, the raw response content, and a failure description. +- **`OpenAIProvider` native response_format wire path.** When `response_schema` is supplied, the chat-completions request body carries `response_format: { type: "json_schema", json_schema: { name, schema, strict } }`. The `strict` flag is determined by a deep recursive walk over the schema (object-property required-coverage rule across `anyOf` / `oneOf` / `allOf` and `$ref` targets, with cycle protection); unresolvable refs fall through to `strict: false`. The `name` field uses `schema.title` when present, otherwise a deterministic sha256-prefix hash. +- **`OpenAIProvider` prompt-augmentation fallback.** Constructor flag `force_prompt_augmentation_fallback: bool` (default `False`) and read-only inspect property `uses_prompt_augmentation_fallback: bool`. When the flag is on, structured-output calls build a fresh message list with a system directive containing the serialized schema, omit `response_format` from the wire, and validate the response post-receive. The caller's original `messages` list is never mutated. Use for OpenAI-compatible servers (older vLLM, some LM Studio releases, llama.cpp variants) that reject or silently ignore `response_format`. +- **Provider-agnostic schema helpers.** `openarmature.llm.validate_response_schema(schema)` (raises `ProviderInvalidRequest` when the schema is not a dict with a top-level `type: "object"`) and `openarmature.llm.strict_mode_supported(schema)` (the deep-tree strict-mode constraint check) are exported for reuse by future Anthropic/Gemini providers. +- **Capability-agnostic conformance harness helpers.** `tests/conformance/harness/wire.py` adds `match_wire_body` (recursive deep-equal with `"*"` wildcard support), `assert_response_format_absent`, `assert_system_references_schema`, and `assert_error_carries` for the `expected_wire_request[_checks]` and `expected.raises.carries.{...}` fixture shapes. Used by the 0016 fixtures; available for the upcoming 0014 / 0015 / 0017 fixture sets. +- **Runtime dependency: `jsonschema>=4.0`.** Used by the dict-schema validation path. The Pydantic-class path uses Pydantic's native validator and does not need `jsonschema`. + +### Changed + +- **Pinned spec version: 0.10.0 → 0.15.0.** Adopts the skip-ahead governance principle: the submodule jumps across v0.11.0–v0.15.0 (proposals 0009, 0011, 0014, 0015, 0016, 0017) in one bump. Only the surface introduced by proposal 0016 is implemented in this changelog entry; fixtures from 0011 / 0014 / 0015 / 0017 are marked deferred-skip in the conformance suite and unmark as their respective PRs land. + +### Notes + +- **Release gate: do not tag until all of {0011, 0014, 0015, 0016, 0017} are merged.** This batch implements one proposal per PR and lands a consolidated release after the fifth PR. Cutting a release tag before the batch is complete would ship a partial spec implementation against the v0.15.0 pin. +- **Pre-1.0 MINOR.** Existing free-form callers (no `response_schema`) see no behavior change — the new field defaults to `None`, the wire body omits `response_format`, and `Response.parsed` remains absent. + ## [0.5.0] — 2026-05-10 First release on real PyPI. Catches the implementation up from spec v0.5.x to v0.10.0 across six phases — the spec accepted eight proposals while the python lib was at v0.3.1, and v0.5.0 lands all of them in one curated drop. From 03bcf23d54bc5a959502c72bf8918209558b0c46 Mon Sep 17 00:00:00 2001 From: chris-colinsky Date: Fri, 15 May 2026 11:07:45 -0700 Subject: [PATCH 11/24] fix(llm): address CoPilot review on PR #42 Addresses the 8 CoPilot review threads on the structured-output PR: - strict_mode_supported now requires additionalProperties to be EXPLICITLY false (not just missing-or-false). Missing implies the JSON Schema default of permitting extras, which OpenAI's strict mode rejects. Pydantic's .model_json_schema() omits the key by default, so the class-input path would have 400ed against OpenAI even with conformance fixtures passing. - _normalize_response_schema now raises ProviderInvalidRequest when the class form is not a BaseModel subclass, instead of letting AttributeError leak from model_json_schema. - validate_response_schema now runs jsonschema.Draft202012Validator .check_schema() at the boundary, wrapping SchemaError as ProviderInvalidRequest. Malformed schemas now fail at the API boundary instead of escaping at decode time. - _derive_schema_name now regex-checks the title against OpenAI's name constraint (^[a-zA-Z0-9_-]{1,64}$) and falls back to the hashed name when the title doesn't match. Sanitizing-in-place would silently mutate user intent; the hash is a more honest fallback. - Two comments claiming Message instances are immutable Pydantic models were updated. The models are not configured with frozen=True; the safety actually comes from the helpers not modifying them in place. - match_wire_body now fails on extra keys in actual. The previous permissive default defeated the point of expected_wire_request being a literal compare; partial assertions continue to live in the sibling expected_wire_request_checks block. - _iter_calls now propagates expected_wire_request, expected_wire_request_checks, response_schema, and retry_middleware from sibling-of-call into the call dict. Only expected was being copied before. Cases-form fixtures with case-level wire expectations were silently running without those assertions. The _iter_calls fix surfaced two pre-existing gaps in the harness's handling of cases-shape fixtures, fixed inline: - The harness was never wiring config from the call spec into provider.complete(); fixture 005's runtime_config_passthrough case was effectively a no-op. - OpenAIProvider was using json.dumps default formatting for tool_call.function.arguments (with spaces after colons), which doesn't match the canonical compact form OpenAI emits or the spec's fixture 005 expectations. Switched to compact form. New unit tests cover the missing-additionalProperties strict-mode case, the non-BaseModel class rejection, the malformed JSON Schema rejection, and the title-falls-back hash cases. --- src/openarmature/llm/provider.py | 21 +++++-- src/openarmature/llm/providers/openai.py | 47 ++++++++++----- tests/conformance/harness/wire.py | 10 +++- tests/conformance/test_llm_provider.py | 36 +++++++++--- tests/unit/test_structured_output.py | 73 +++++++++++++++++++++++- 5 files changed, 157 insertions(+), 30 deletions(-) diff --git a/src/openarmature/llm/provider.py b/src/openarmature/llm/provider.py index 04fe6e9..8786a89 100644 --- a/src/openarmature/llm/provider.py +++ b/src/openarmature/llm/provider.py @@ -38,6 +38,7 @@ from collections.abc import Sequence from typing import Any, Protocol, cast +import jsonschema from pydantic import BaseModel from .errors import ProviderInvalidRequest @@ -184,8 +185,9 @@ def validate_response_schema(schema: object) -> None: """Pre-send validation for a JSON Schema passed as the ``response_schema`` argument to ``complete()``. - Raises :class:`ProviderInvalidRequest` if the schema is not a dict - or does not declare a top-level object type. + Raises :class:`ProviderInvalidRequest` if the schema is not a dict, + does not declare a top-level object type, or is not a valid JSON + Schema document. """ if not isinstance(schema, dict): raise ProviderInvalidRequest(f"response_schema: MUST be a dict (got {type(schema).__name__})") @@ -195,12 +197,23 @@ def validate_response_schema(schema: object) -> None: raise ProviderInvalidRequest( f"response_schema: top-level type MUST be 'object' (got {schema_type!r})" ) + # Full JSON Schema validity check at the boundary so a malformed + # schema raises ProviderInvalidRequest here instead of escaping as + # jsonschema.SchemaError at decode time. ValidationError covers + # instance-against-schema failures and is handled separately on the + # parse path. + try: + jsonschema.Draft202012Validator.check_schema(schema_dict) + except jsonschema.SchemaError as exc: + raise ProviderInvalidRequest(f"response_schema: not a valid JSON Schema: {exc.message}") from exc # Strict mode (OpenAI's response_format strict:true and the analogous # native-decoding paths in Anthropic / Gemini) requires the schema to # satisfy two rules at every nested level: -# 1. additionalProperties is NOT true (false or absent). +# 1. additionalProperties is EXPLICITLY false. OpenAI rejects schemas +# where the key is absent, since absence means JSON Schema's +# default of permitting extras. # 2. every key in `properties` is listed in `required`. # strict_mode_supported() walks the schema tree (object properties, # array items, anyOf/oneOf/allOf branches, $ref targets with cycle @@ -272,7 +285,7 @@ def _strict_mode_check( ) if is_object_type: - if schema_dict.get("additionalProperties") is True: + if schema_dict.get("additionalProperties") is not False: return False properties = schema_dict.get("properties") if properties is not None and not isinstance(properties, dict): diff --git a/src/openarmature/llm/providers/openai.py b/src/openarmature/llm/providers/openai.py index 5c9e4a5..61788c6 100644 --- a/src/openarmature/llm/providers/openai.py +++ b/src/openarmature/llm/providers/openai.py @@ -42,6 +42,7 @@ import hashlib import json +import re import uuid from collections.abc import Sequence from typing import Any, Literal, cast @@ -237,9 +238,8 @@ async def complete( # On the fallback path, the wire-side messages list is an # augmented COPY of the caller's messages — original messages # MUST NOT be mutated. _augment_messages_with_schema_directive - # builds a fresh list; the original instances are reused - # (immutable Pydantic models) so the caller's sequence is - # untouched. + # builds a fresh list and does not modify the reused Message + # instances in place; the caller's sequence is untouched. wire_messages: Sequence[Message] = messages if schema_dict is not None and self._force_prompt_augmentation_fallback: wire_messages = _augment_messages_with_schema_directive(messages, schema_dict) @@ -461,9 +461,15 @@ def _normalize_response_schema( if response_schema is None: return None, None if isinstance(response_schema, type): - # Per the Protocol signature, the only class form accepted is - # a BaseModel subclass; non-BaseModel classes will AttributeError - # on model_json_schema below. + # Defensive runtime check: the Protocol signature accepts + # type[BaseModel], but Python doesn't enforce that at the call + # boundary. Reject non-BaseModel classes with a canonical error + # instead of letting AttributeError leak from model_json_schema. + if not issubclass(response_schema, BaseModel): # pyright: ignore[reportUnnecessaryIsInstance] + raise ProviderInvalidRequest( + f"response_schema: class form MUST be a Pydantic BaseModel subclass " + f"(got {response_schema.__name__})" + ) schema_dict = response_schema.model_json_schema() validate_response_schema(schema_dict) return schema_dict, response_schema @@ -471,14 +477,22 @@ def _normalize_response_schema( return response_schema, None +# OpenAI's response_format.json_schema.name field is restricted to +# letters, digits, underscores, and dashes with a max length of 64 +# characters. A JSON Schema title can be any string ("Person Record", +# "User's Profile", etc.), so verbatim use risks a 400 on the wire. +_OPENAI_SCHEMA_NAME_RE = re.compile(r"^[a-zA-Z0-9_-]{1,64}$") + + # Derive a stable identifier for the JSON Schema for OpenAI's # response_format.json_schema.name field. Uses the schema's `title` -# when present (and a valid identifier-shaped string); otherwise -# derives a deterministic short hash so the same schema always -# produces the same name across calls. +# when it satisfies the provider's name constraints; otherwise derives +# a deterministic short hash so the same schema always produces the +# same name across calls. Sanitizing-in-place would silently mutate +# user intent; the hash is a more honest fallback. def _derive_schema_name(schema: dict[str, Any]) -> str: title = schema.get("title") - if isinstance(title, str) and title: + if isinstance(title, str) and _OPENAI_SCHEMA_NAME_RE.match(title): return title canonical = json.dumps(schema, sort_keys=True).encode("utf-8") return f"oa_schema_{hashlib.sha256(canonical).hexdigest()[:16]}" @@ -546,9 +560,11 @@ def _parse_and_validate( # Construct a fresh message list with a schema directive added. The # directive is appended to the existing system message's content when # present, or prepended as a new system message otherwise. The caller's -# original list is never mutated; Message instances are reused because -# they are immutable Pydantic models. The serialized schema appears -# verbatim in the directive so callers that need to verify the directive +# original list is never mutated; Message instances are reused, and +# this helper does not modify them in place (the message models are +# not frozen Pydantic models, so the safety is structural, not +# enforced by the type). The serialized schema appears verbatim in +# the directive so callers that need to verify the directive # references the schema (conformance harnesses, observability spans) # can substring-match the canonical JSON form. def _augment_messages_with_schema_directive( @@ -585,7 +601,10 @@ def _message_to_wire(msg: Message) -> dict[str, Any]: "type": "function", "function": { "name": tc.name, - "arguments": json.dumps(tc.arguments or {}), + # Canonical compact form (no inter-token spaces). Matches + # the spec's wire-mapping fixture (005, cases shape) and + # the form OpenAI itself emits. + "arguments": json.dumps(tc.arguments or {}, separators=(",", ":")), }, } for tc in msg.tool_calls diff --git a/tests/conformance/harness/wire.py b/tests/conformance/harness/wire.py index bce1479..12fef3e 100644 --- a/tests/conformance/harness/wire.py +++ b/tests/conformance/harness/wire.py @@ -42,9 +42,10 @@ def match_wire_body( ) -> None: """Recursive deep-equal between an actual wire-body value and an expected shape. Strings equal to ``"*"`` in the expected value match - any non-empty string in the actual value. Keys present in - ``expected`` MUST be present in ``actual`` and equal; keys present - in ``actual`` but absent from ``expected`` are allowed. + any non-empty string in the actual value. ``expected_wire_request`` + is a literal compare: keys present in ``actual`` but absent from + ``expected`` are NOT allowed. Partial assertions belong in the + sibling ``expected_wire_request_checks`` block. Raises :class:`AssertionError` with a JSON-pointer-style path on mismatch. @@ -61,6 +62,9 @@ def match_wire_body( raise AssertionError(f"wire mismatch at {path}: expected object, got {type(actual).__name__}") expected_map = cast("Mapping[str, Any]", expected) actual_map = cast("Mapping[str, Any]", actual) + extra = set(actual_map) - set(expected_map) + if extra: + raise AssertionError(f"wire mismatch at {path}: unexpected extra keys in actual: {sorted(extra)}") for key, exp_v in expected_map.items(): if key not in actual_map: raise AssertionError(f"wire mismatch at {path}: missing key {key!r}") diff --git a/tests/conformance/test_llm_provider.py b/tests/conformance/test_llm_provider.py index 0ea21c3..6cfc497 100644 --- a/tests/conformance/test_llm_provider.py +++ b/tests/conformance/test_llm_provider.py @@ -37,6 +37,7 @@ ProviderInvalidRequest, ProviderRateLimit, Response, + RuntimeConfig, SystemMessage, Tool, ToolCall, @@ -410,23 +411,40 @@ async def _run_one_case(spec: Mapping[str, Any]) -> None: await provider.aclose() +# Keys that may live as siblings to a ``call:`` block in a cases-shape +# fixture but are conceptually call-level metadata. ``_iter_calls`` +# copies these from the case into the yielded call so the test runner +# sees them in one place. +_CASE_LEVEL_CALL_KEYS = ( + "expected", + "expected_wire_request", + "expected_wire_request_checks", + "response_schema", + "retry_middleware", +) + + def _iter_calls(spec: Mapping[str, Any]) -> Iterator[Mapping[str, Any]]: - """Yield each call dict with its ``expected`` block attached. + """Yield each call dict with its case-level metadata attached. Two shapes the fixtures use: - ``calls: [{operation, messages, expected, ...}]`` — call and expected are siblings inside each call entry. - ``call: {operation, messages, ...}`` + sibling ``expected: ...`` - — the case-shape, where expected lives alongside the call. - Both are normalised here to a flat dict where ``expected`` is on - the call. + (and possibly ``expected_wire_request:``, ``response_schema:``, + ``retry_middleware:``) — the case-shape, where call-level + metadata lives alongside the call. All sibling keys in + ``_CASE_LEVEL_CALL_KEYS`` are folded into the call dict here so + the runner reads them from one place. The nested ``call`` block + takes precedence when both are present. """ if "calls" in spec: yield from cast("list[Mapping[str, Any]]", spec["calls"]) elif "call" in spec: call = dict(cast("Mapping[str, Any]", spec["call"])) - if "expected" in spec and "expected" not in call: - call["expected"] = spec["expected"] + for key in _CASE_LEVEL_CALL_KEYS: + if key in spec and key not in call: + call[key] = spec[key] yield call else: raise AssertionError("fixture has neither `calls` nor `call` block") @@ -441,6 +459,8 @@ async def _run_one_call( expected = cast("Mapping[str, Any]", call_spec.get("expected") or {}) response_schema = call_spec.get("response_schema") retry_mw_cfg = cast("Mapping[str, Any] | None", call_spec.get("retry_middleware")) + config_block = call_spec.get("config") + config = RuntimeConfig(**cast("Mapping[str, Any]", config_block)) if config_block else None if operation == "complete": # Per spec §3 "Validation timing" — complete() validates at @@ -461,7 +481,7 @@ async def _run_one_call( except ValidationError as ve: raise ProviderInvalidRequest(str(ve)) from ve await _maybe_with_retry( - lambda: provider.complete(messages, tools, response_schema=response_schema), + lambda: provider.complete(messages, tools, config, response_schema=response_schema), retry_mw_cfg, ) _assert_raises_matches(excinfo, expected["raises"]) @@ -476,7 +496,7 @@ async def _run_one_call( messages_snapshot = [m.model_dump(mode="json") for m in messages] tools = _build_tools(cast("list[Mapping[str, Any]] | None", call_spec.get("tools"))) response = await _maybe_with_retry( - lambda: provider.complete(messages, tools, response_schema=response_schema), + lambda: provider.complete(messages, tools, config, response_schema=response_schema), retry_mw_cfg, ) _assert_response_matches(response, cast("Mapping[str, Any]", expected.get("response") or {})) diff --git a/tests/unit/test_structured_output.py b/tests/unit/test_structured_output.py index 484ec2b..3521fc0 100644 --- a/tests/unit/test_structured_output.py +++ b/tests/unit/test_structured_output.py @@ -57,6 +57,21 @@ def test_validate_response_schema_rejects_missing_type() -> None: validate_response_schema({"properties": {"x": {"type": "integer"}}}) +def test_validate_response_schema_rejects_malformed_schema() -> None: + # `"type": "foobar"` is not a valid JSON Schema type keyword; the + # boundary check should catch this and raise ProviderInvalidRequest + # rather than letting jsonschema.SchemaError leak at parse time. + with pytest.raises(ProviderInvalidRequest, match="not a valid JSON Schema"): + validate_response_schema( + { + "type": "object", + "properties": {"x": {"type": "foobar"}}, + "required": ["x"], + "additionalProperties": False, + } + ) + + # --------------------------------------------------------------------------- # strict_mode_supported # --------------------------------------------------------------------------- @@ -92,6 +107,18 @@ def test_strict_mode_additional_properties_true_fails() -> None: assert strict_mode_supported(schema) is False +def test_strict_mode_missing_additional_properties_fails() -> None: + # OpenAI strict mode requires additionalProperties: false to be + # EXPLICITLY set; absence (the default for Pydantic-derived schemas) + # is not strict-compatible. + schema = { + "type": "object", + "properties": {"a": {"type": "string"}}, + "required": ["a"], + } + assert strict_mode_supported(schema) is False + + def test_strict_mode_recurses_into_nested_object() -> None: schema: dict[str, Any] = { "type": "object", @@ -132,10 +159,12 @@ def test_strict_mode_resolves_internal_ref() -> None: "type": "object", "properties": {"a": {"type": "string"}}, "required": ["a"], + "additionalProperties": False, } }, "properties": {"inner": {"$ref": "#/$defs/Inner"}}, "required": ["inner"], + "additionalProperties": False, } assert strict_mode_supported(schema) is True @@ -153,7 +182,7 @@ def test_strict_mode_handles_ref_cycle() -> None: # Self-referential schema: each entry has a "children" key pointing # back to the same definition. Without cycle protection this would # recurse forever. - schema = { + schema: dict[str, Any] = { "type": "object", "$defs": { "Node": { @@ -163,10 +192,12 @@ def test_strict_mode_handles_ref_cycle() -> None: "children": {"$ref": "#/$defs/Node"}, }, "required": ["value", "children"], + "additionalProperties": False, } }, "properties": {"root": {"$ref": "#/$defs/Node"}}, "required": ["root"], + "additionalProperties": False, } assert strict_mode_supported(schema) is True @@ -198,6 +229,28 @@ def test_derive_schema_name_ignores_empty_title() -> None: assert _derive_schema_name(schema).startswith("oa_schema_") +def test_derive_schema_name_falls_back_on_title_with_spaces() -> None: + # OpenAI's name field rejects spaces; the hash fallback fires. + schema = { + "type": "object", + "title": "Person Record", + "properties": {"x": {"type": "string"}}, + "required": ["x"], + } + assert _derive_schema_name(schema).startswith("oa_schema_") + + +def test_derive_schema_name_falls_back_on_title_too_long() -> None: + # OpenAI's name field has a 64-char cap; longer titles fall back. + schema = { + "type": "object", + "title": "A" * 65, + "properties": {"x": {"type": "string"}}, + "required": ["x"], + } + assert _derive_schema_name(schema).startswith("oa_schema_") + + # --------------------------------------------------------------------------- # _augment_messages_with_schema_directive # --------------------------------------------------------------------------- @@ -273,6 +326,24 @@ def handler(request: httpx.Request) -> httpx.Response: return httpx.MockTransport(handler) +async def test_non_basemodel_class_raises_provider_invalid_request() -> None: + transport = _mock_chat_completion_response('{"x":1}') + provider = OpenAIProvider( + base_url="http://mock-llm.test", + model="test-model", + api_key="test-key", + transport=transport, + ) + try: + with pytest.raises(ProviderInvalidRequest, match="BaseModel subclass"): + await provider.complete( + [UserMessage(content="x")], + response_schema=str, # type: ignore[arg-type] + ) + finally: + await provider.aclose() + + async def test_pydantic_class_returns_validated_instance() -> None: transport = _mock_chat_completion_response('{"name":"Alice","age":30}') provider = OpenAIProvider( From 2d39a56ad8c1280c90753fa28e57c41b84138476 Mon Sep 17 00:00:00 2001 From: chris-colinsky Date: Fri, 15 May 2026 11:16:16 -0700 Subject: [PATCH 12/24] docs: upgrade hello-world to demo structured output Replaces the no-LLM hello-world in README.md with a version that makes a real LLM call via OpenAIProvider and uses a Pydantic class as the response_schema. The resulting Response.parsed flows through state as a typed Classification instance and drives the conditional edge that routes between research and summarize. Defaults to OpenAI public API (gpt-4o-mini) with env-var config: LLM_BASE_URL, LLM_MODEL, LLM_API_KEY. A trailing line in the README calls out OpenRouter, vLLM, LM Studio, llama.cpp as drop-in swaps via base_url/model. The example also lands as a runnable file at examples/00-hello-world/main.py and is added to the smoke test suite. examples/README.md gets a corresponding entry. --- README.md | 76 ++++++++++-------- examples/00-hello-world/main.py | 137 ++++++++++++++++++++++++++++++++ examples/README.md | 12 +++ tests/test_examples_smoke.py | 1 + 4 files changed, 191 insertions(+), 35 deletions(-) create mode 100644 examples/00-hello-world/main.py diff --git a/README.md b/README.md index ce99685..f3412bd 100644 --- a/README.md +++ b/README.md @@ -55,26 +55,27 @@ The OpenTelemetry mapping mandates a private `TracerProvider`. That prevents the ## Hello World -About fifty lines that show the engine in action. Three reducer policies declared on one state class. Routing as a pure function of state, not a hidden state machine. An observer attached at compile time that sees every node boundary the engine emits. No LLM, no API key, no boilerplate. Copy it, run it, watch the events fire. Requires Python 3.12 or later. +About sixty lines that show the engine in action. Three reducer policies declared on one state class. An LLM call that returns a typed object, not a string. Conditional routing as a pure function of state, not a hidden state machine. An observer attached at compile time that sees every node boundary the engine emits. Requires Python 3.12 or later and an OpenAI-compatible endpoint (defaults to OpenAI public API; works against any local server too). ```python import asyncio -from typing import Annotated - -from openarmature.graph import ( - END, - GraphBuilder, - NodeEvent, - State, - append, - merge, -) -from pydantic import Field +import os +from collections.abc import Mapping +from typing import Annotated, Any, Literal + +from openarmature.graph import END, GraphBuilder, NodeEvent, State, append, merge +from openarmature.llm import OpenAIProvider, UserMessage +from pydantic import BaseModel, Field + + +class Classification(BaseModel): + intent: Literal["research", "summarize"] + rationale: str class PipelineState(State): query: str # last_write_wins (default) - classification: str = "" # last_write_wins + classification: Classification | None = None # last_write_wins sources: Annotated[list[str], append] = Field( # appends across writes default_factory=list ) @@ -83,30 +84,32 @@ class PipelineState(State): ) -async def classify(state: PipelineState) -> dict: - decision = "research" if "?" in state.query else "summarize" - return { - "classification": decision, - "metadata": {"classified_by": "rule"}, - } +provider = OpenAIProvider( + base_url=os.environ.get("LLM_BASE_URL", "https://api.openai.com/v1"), + model=os.environ.get("LLM_MODEL", "gpt-4o-mini"), + api_key=os.environ.get("LLM_API_KEY"), +) + + +async def classify(state: PipelineState) -> Mapping[str, Any]: + response = await provider.complete( + [UserMessage(content=f"Route to 'research' or 'summarize': {state.query!r}")], + response_schema=Classification, + ) + return {"classification": response.parsed, "metadata": {"classified_by": "llm"}} -async def research(state: PipelineState) -> dict: - return { - "sources": ["wikipedia", "arxiv"], - "metadata": {"tool": "search"}, - } +async def research(state: PipelineState) -> Mapping[str, Any]: + return {"sources": ["wikipedia", "arxiv"], "metadata": {"tool": "search"}} -async def summarize(state: PipelineState) -> dict: - return { - "sources": ["cache"], - "metadata": {"tool": "summarizer"}, - } +async def summarize(state: PipelineState) -> Mapping[str, Any]: + return {"sources": ["cache"], "metadata": {"tool": "summarizer"}} def route(state: PipelineState) -> str: - return state.classification + assert state.classification is not None + return state.classification.intent async def trace(event: NodeEvent) -> None: @@ -127,22 +130,25 @@ graph = ( ) graph.attach_observer(trace) + async def main() -> None: try: - await graph.invoke(PipelineState(query="what is RAG?")) + final = await graph.invoke(PipelineState(query="what is RAG?")) + print(f"\nclassification: {final.classification}") finally: await graph.drain() asyncio.run(main()) -# classify: sources=[] -# research: sources=['wikipedia', 'arxiv'] ``` -A few things to notice in this short example: +Set `LLM_API_KEY=sk-...` and run. To swap providers, point `LLM_BASE_URL` and `LLM_MODEL` at OpenRouter, vLLM, LM Studio, llama.cpp — anything that speaks the OpenAI Chat Completions wire format. The example also lives at [`examples/00-hello-world/main.py`](./examples/00-hello-world/main.py); see [`examples/`](./examples/) for more runnable demos. + +A few things to notice: - **Three reducer policies on one state schema.** `query` and `classification` get the default `last_write_wins`. `sources` is `Annotated[list[str], append]`, so successive writes concatenate. `metadata` is `Annotated[dict[str, str], merge]`, so successive writes shallow-merge. The merge policy lives on the schema, once. -- **Conditional routing as a state function.** `route` reads `state.classification` and returns a node name. The graph engine doesn't care that this happens to be deterministic; it would accept an LLM-driven router with the same shape. +- **Structured output as a typed object.** `provider.complete(..., response_schema=Classification)` returns `Response.parsed` as a validated `Classification` instance, not a string the caller has to JSON-parse and re-validate. Pass a JSON Schema dict instead of a class for the raw form. +- **Conditional routing on a parsed field.** `route` reads `state.classification.intent` and returns the next node's name. The graph engine doesn't care the discriminator came from an LLM; it would accept a deterministic rule with the same shape. - **Observer sees both phases.** `trace` filters to `completed` events for brevity; the engine also delivers `started` events. - **The graph either compiles or it doesn't.** Remove `.set_entry()` and `.compile()` raises `NoDeclaredEntry` before `invoke()` runs. diff --git a/examples/00-hello-world/main.py b/examples/00-hello-world/main.py new file mode 100644 index 0000000..b239828 --- /dev/null +++ b/examples/00-hello-world/main.py @@ -0,0 +1,137 @@ +"""Hello-world demo: a 3-node graph that classifies a query with an LLM +(via structured output) and routes to one of two follow-up nodes. + +**Demonstrates:** + +- Typed ``State`` with three reducer policies (``last_write_wins``, + ``append``, ``merge``). +- ``OpenAIProvider`` from ``openarmature.llm`` against any + OpenAI-compatible endpoint. +- Structured output via a Pydantic class — the model's response comes + back as a validated ``Classification`` instance, not a string. +- Conditional routing as a pure function of state (``route``). +- ``attach_observer`` for boundary visibility. + +**Configuration** (env vars; OpenAI defaults shown): + +- ``LLM_BASE_URL`` — defaults to ``https://api.openai.com/v1``. +- ``LLM_MODEL`` — defaults to ``gpt-4o-mini``. +- ``LLM_API_KEY`` — required (your OpenAI API key, or empty for + local servers that don't authenticate). + +Run with: + + uv sync --group examples + LLM_API_KEY=sk-... uv run python examples/00-hello-world/main.py +""" + +from __future__ import annotations + +import asyncio +import os +from collections.abc import Mapping +from typing import Annotated, Any, Literal + +from pydantic import BaseModel, Field + +from openarmature.graph import ( + END, + CompiledGraph, + GraphBuilder, + NodeEvent, + State, + append, + merge, +) +from openarmature.llm import OpenAIProvider, UserMessage + + +class Classification(BaseModel): + """The Pydantic schema the model is constrained to produce. + + Passed as ``response_schema`` to ``provider.complete()``; the + framework converts to JSON Schema, instructs the provider to + return matching content, validates the response, and yields a + ``Classification`` instance via ``Response.parsed``. + """ + + intent: Literal["research", "summarize"] + rationale: str + + +class PipelineState(State): + query: str + classification: Classification | None = None + sources: Annotated[list[str], append] = Field(default_factory=list) + metadata: Annotated[dict[str, str], merge] = Field(default_factory=dict) + + +_provider = OpenAIProvider( + base_url=os.environ.get("LLM_BASE_URL", "https://api.openai.com/v1"), + model=os.environ.get("LLM_MODEL", "gpt-4o-mini"), + api_key=os.environ.get("LLM_API_KEY"), +) + + +async def classify(state: PipelineState) -> Mapping[str, Any]: + response = await _provider.complete( + [ + UserMessage( + content=( + f"Route this query to either 'research' (look something up) or " + f"'summarize' (condense known material): {state.query!r}" + ) + ) + ], + response_schema=Classification, + ) + return {"classification": response.parsed, "metadata": {"classified_by": "llm"}} + + +async def research(state: PipelineState) -> Mapping[str, Any]: + return {"sources": ["wikipedia", "arxiv"], "metadata": {"tool": "search"}} + + +async def summarize(state: PipelineState) -> Mapping[str, Any]: + return {"sources": ["cache"], "metadata": {"tool": "summarizer"}} + + +def route(state: PipelineState) -> str: + if state.classification is None: + raise RuntimeError("classify did not populate state.classification") + return state.classification.intent + + +async def trace(event: NodeEvent) -> None: + if event.phase == "completed" and event.error is None: + print(f"{event.node_name}: sources={event.post_state.sources}") + + +def build_graph() -> CompiledGraph[PipelineState]: + return ( + GraphBuilder(PipelineState) + .add_node("classify", classify) + .add_node("research", research) + .add_node("summarize", summarize) + .add_conditional_edge("classify", route) + .add_edge("research", END) + .add_edge("summarize", END) + .set_entry("classify") + .compile() + ) + + +async def main() -> None: + graph = build_graph() + graph.attach_observer(trace) + try: + final = await graph.invoke(PipelineState(query="what is RAG?")) + print(f"\nclassification: {final.classification}") + print(f"sources: {final.sources}") + print(f"metadata: {final.metadata}") + finally: + await graph.drain() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/README.md b/examples/README.md index c6c47ca..2c52d7c 100644 --- a/examples/README.md +++ b/examples/README.md @@ -6,6 +6,18 @@ End-to-end demo projects for `openarmature`. Each is a standalone ## Demos +### [`00-hello-world/`](./00-hello-world/main.py) + +Classify a query with an LLM and route to one of two follow-up +nodes. Demonstrates: typed `State` with three reducer policies, the +`OpenAIProvider` from `openarmature.llm`, structured output via a +Pydantic class (`response_schema=Classification` → `Response.parsed` +as a `Classification` instance), conditional routing on a parsed +field, and a compile-time observer. + +Configured via env vars (`LLM_BASE_URL`, `LLM_MODEL`, `LLM_API_KEY`); +defaults to OpenAI public API with `gpt-4o-mini`. + ### [`01-linear-pipeline/`](./01-linear-pipeline/main.py) Minimal two-node graph (`plan → write`). Demonstrates: typed `State`, diff --git a/tests/test_examples_smoke.py b/tests/test_examples_smoke.py index 785e776..9a0bac3 100644 --- a/tests/test_examples_smoke.py +++ b/tests/test_examples_smoke.py @@ -30,6 +30,7 @@ EXAMPLES_DIR = Path(__file__).parent.parent / "examples" DEMOS = [ + "00-hello-world", "01-linear-pipeline", "02-routing-and-subgraphs", "03-explicit-subgraph-mapping", From e7cb0de8239af2017008fa5f343a215ec157abd2 Mon Sep 17 00:00:00 2001 From: chris-colinsky Date: Fri, 15 May 2026 11:28:17 -0700 Subject: [PATCH 13/24] fix(examples): hello-world base_url default and observer trace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two bugs surfaced during live validation against OpenAI: - The default LLM_BASE_URL was https://api.openai.com/v1, but our OpenAIProvider's wire path posts to /v1/chat/completions itself. httpx URL join produced https://api.openai.com/v1/v1/chat/completions → 404. Convention is base_url = host root; impl adds /v1. Default now matches; doc-string + README comment make it explicit. - The observer trace fired on the OpenAIProvider LLM-span event (sentinel namespace, post_state=None) and crashed accessing .sources. Added a post_state is not None guard. --- README.md | 4 ++-- examples/00-hello-world/main.py | 11 ++++++++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index f3412bd..5b7daca 100644 --- a/README.md +++ b/README.md @@ -85,7 +85,7 @@ class PipelineState(State): provider = OpenAIProvider( - base_url=os.environ.get("LLM_BASE_URL", "https://api.openai.com/v1"), + base_url=os.environ.get("LLM_BASE_URL", "https://api.openai.com"), # host root; impl adds /v1 model=os.environ.get("LLM_MODEL", "gpt-4o-mini"), api_key=os.environ.get("LLM_API_KEY"), ) @@ -113,7 +113,7 @@ def route(state: PipelineState) -> str: async def trace(event: NodeEvent) -> None: - if event.phase == "completed" and event.error is None: + if event.phase == "completed" and event.error is None and event.post_state is not None: print(f"{event.node_name}: sources={event.post_state.sources}") diff --git a/examples/00-hello-world/main.py b/examples/00-hello-world/main.py index b239828..ed75e86 100644 --- a/examples/00-hello-world/main.py +++ b/examples/00-hello-world/main.py @@ -14,7 +14,9 @@ **Configuration** (env vars; OpenAI defaults shown): -- ``LLM_BASE_URL`` — defaults to ``https://api.openai.com/v1``. +- ``LLM_BASE_URL`` — defaults to ``https://api.openai.com``. **Host + root only** — the impl adds ``/v1/chat/completions`` and + ``/v1/models`` itself, so do NOT include ``/v1`` in this value. - ``LLM_MODEL`` — defaults to ``gpt-4o-mini``. - ``LLM_API_KEY`` — required (your OpenAI API key, or empty for local servers that don't authenticate). @@ -67,7 +69,7 @@ class PipelineState(State): _provider = OpenAIProvider( - base_url=os.environ.get("LLM_BASE_URL", "https://api.openai.com/v1"), + base_url=os.environ.get("LLM_BASE_URL", "https://api.openai.com"), model=os.environ.get("LLM_MODEL", "gpt-4o-mini"), api_key=os.environ.get("LLM_API_KEY"), ) @@ -103,7 +105,10 @@ def route(state: PipelineState) -> str: async def trace(event: NodeEvent) -> None: - if event.phase == "completed" and event.error is None: + # OpenAIProvider emits NodeEvent-shaped events for LLM-span + # tracking under a sentinel namespace; those have post_state=None. + # Filter to events that carry a state snapshot before reading it. + if event.phase == "completed" and event.error is None and event.post_state is not None: print(f"{event.node_name}: sources={event.post_state.sources}") From c9326e86822070b3d70c3b1654edbcd897e79b9e Mon Sep 17 00:00:00 2001 From: chris-colinsky Date: Fri, 15 May 2026 11:43:33 -0700 Subject: [PATCH 14/24] docs(examples): wire research + summarize nodes to real LLM calls The hello-world's research and summarize nodes were returning hard-coded source lists. Replaces both with real provider.complete() calls that emit typed structured output, so the example demonstrates the value of a structured-output pipeline end-to-end instead of just the framework's plumbing. The example now exercises both response_schema forms in one demo: - classify and summarize use Pydantic classes (Classification, Summary); Response.parsed comes back as a validated instance. - research uses a raw JSON Schema dict; Response.parsed comes back as a plain dict. State gains two intermediate-artifact fields (research_plan, summary). Final output prints whichever branch fired, in addition to the existing sources/metadata. The reducer-policy story stays intact (last_write_wins on the LLM outputs, append on sources, merge on metadata). Live-validated against OpenAI gpt-4o-mini; both branches verified (structured class instance + structured dict on Response.parsed). --- README.md | 47 ++++++++++++--- examples/00-hello-world/main.py | 103 +++++++++++++++++++++++++------- 2 files changed, 121 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index 5b7daca..ef3a6a6 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,7 @@ The OpenTelemetry mapping mandates a private `TracerProvider`. That prevents the ## Hello World -About sixty lines that show the engine in action. Three reducer policies declared on one state class. An LLM call that returns a typed object, not a string. Conditional routing as a pure function of state, not a hidden state machine. An observer attached at compile time that sees every node boundary the engine emits. Requires Python 3.12 or later and an OpenAI-compatible endpoint (defaults to OpenAI public API; works against any local server too). +About a hundred lines that show the engine in action. Three reducer policies declared on one state class. Three LLM calls each returning typed structured output (Pydantic class on two, raw JSON Schema dict on the third). Conditional routing as a pure function of state, not a hidden state machine. An observer attached at compile time that sees every node boundary the engine emits. Requires Python 3.12 or later and an OpenAI-compatible endpoint (defaults to OpenAI public API; works against any local server too). ```python import asyncio @@ -73,9 +73,16 @@ class Classification(BaseModel): rationale: str +class Summary(BaseModel): + one_liner: str + confidence: float + + class PipelineState(State): query: str # last_write_wins (default) - classification: Classification | None = None # last_write_wins + classification: Classification | None = None # set by classify + research_plan: dict[str, Any] | None = None # set by research (dict-schema form) + summary: Summary | None = None # set by summarize sources: Annotated[list[str], append] = Field( # appends across writes default_factory=list ) @@ -94,17 +101,37 @@ provider = OpenAIProvider( async def classify(state: PipelineState) -> Mapping[str, Any]: response = await provider.complete( [UserMessage(content=f"Route to 'research' or 'summarize': {state.query!r}")], - response_schema=Classification, + response_schema=Classification, # class → instance ) return {"classification": response.parsed, "metadata": {"classified_by": "llm"}} async def research(state: PipelineState) -> Mapping[str, Any]: - return {"sources": ["wikipedia", "arxiv"], "metadata": {"tool": "search"}} + response = await provider.complete( + [UserMessage(content=f"Plan research for {state.query!r}: list topics + follow-ups.")], + response_schema={ # dict → dict + "type": "object", + "properties": { + "topics": {"type": "array", "items": {"type": "string"}}, + "follow_up_questions": {"type": "array", "items": {"type": "string"}}, + }, + "required": ["topics", "follow_up_questions"], + "additionalProperties": False, + }, + ) + return { + "research_plan": response.parsed, + "sources": ["wikipedia", "arxiv"], + "metadata": {"tool": "research"}, + } async def summarize(state: PipelineState) -> Mapping[str, Any]: - return {"sources": ["cache"], "metadata": {"tool": "summarizer"}} + response = await provider.complete( + [UserMessage(content=f"Summarize {state.query!r} in one sentence with confidence 0-1.")], + response_schema=Summary, # class → instance + ) + return {"summary": response.parsed, "sources": ["cache"], "metadata": {"tool": "summarize"}} def route(state: PipelineState) -> str: @@ -135,6 +162,10 @@ async def main() -> None: try: final = await graph.invoke(PipelineState(query="what is RAG?")) print(f"\nclassification: {final.classification}") + if final.research_plan is not None: + print(f"research_plan: {final.research_plan}") + if final.summary is not None: + print(f"summary: {final.summary}") finally: await graph.drain() @@ -142,12 +173,12 @@ async def main() -> None: asyncio.run(main()) ``` -Set `LLM_API_KEY=sk-...` and run. To swap providers, point `LLM_BASE_URL` and `LLM_MODEL` at OpenRouter, vLLM, LM Studio, llama.cpp — anything that speaks the OpenAI Chat Completions wire format. The example also lives at [`examples/00-hello-world/main.py`](./examples/00-hello-world/main.py); see [`examples/`](./examples/) for more runnable demos. +Set `LLM_API_KEY=sk-...` and run. To swap providers, point `LLM_BASE_URL` and `LLM_MODEL` at OpenRouter, vLLM, LM Studio, llama.cpp, or anything else that speaks the OpenAI Chat Completions wire format. The example also lives at [`examples/00-hello-world/main.py`](./examples/00-hello-world/main.py); see [`examples/`](./examples/) for more runnable demos. A few things to notice: -- **Three reducer policies on one state schema.** `query` and `classification` get the default `last_write_wins`. `sources` is `Annotated[list[str], append]`, so successive writes concatenate. `metadata` is `Annotated[dict[str, str], merge]`, so successive writes shallow-merge. The merge policy lives on the schema, once. -- **Structured output as a typed object.** `provider.complete(..., response_schema=Classification)` returns `Response.parsed` as a validated `Classification` instance, not a string the caller has to JSON-parse and re-validate. Pass a JSON Schema dict instead of a class for the raw form. +- **Three reducer policies on one state schema.** `query` / `classification` / `research_plan` / `summary` get the default `last_write_wins`. `sources` is `Annotated[list[str], append]`, so successive writes concatenate. `metadata` is `Annotated[dict[str, str], merge]`, so successive writes shallow-merge. The merge policy lives on the schema, once. +- **Structured output, two forms.** `response_schema=Classification` (a Pydantic class) returns `Response.parsed` as a validated `Classification` instance, typed end-to-end. `response_schema={...}` (a raw JSON Schema dict) returns `Response.parsed` as a plain dict. Same wire shape underneath; pick the form that fits. - **Conditional routing on a parsed field.** `route` reads `state.classification.intent` and returns the next node's name. The graph engine doesn't care the discriminator came from an LLM; it would accept a deterministic rule with the same shape. - **Observer sees both phases.** `trace` filters to `completed` events for brevity; the engine also delivers `started` events. - **The graph either compiles or it doesn't.** Remove `.set_entry()` and `.compile()` raises `NoDeclaredEntry` before `invoke()` runs. diff --git a/examples/00-hello-world/main.py b/examples/00-hello-world/main.py index ed75e86..55952e5 100644 --- a/examples/00-hello-world/main.py +++ b/examples/00-hello-world/main.py @@ -1,5 +1,6 @@ -"""Hello-world demo: a 3-node graph that classifies a query with an LLM -(via structured output) and routes to one of two follow-up nodes. +"""Hello-world demo: a 3-node graph where each node makes an LLM call +with structured output. Classify a query, then either plan research or +write a one-sentence summary. **Demonstrates:** @@ -7,18 +8,21 @@ ``append``, ``merge``). - ``OpenAIProvider`` from ``openarmature.llm`` against any OpenAI-compatible endpoint. -- Structured output via a Pydantic class — the model's response comes - back as a validated ``Classification`` instance, not a string. -- Conditional routing as a pure function of state (``route``). +- Both ``response_schema`` forms: + - Pydantic class (``Classification``, ``Summary``): typed + instance on ``Response.parsed``. + - JSON Schema dict (``research``): raw dict on ``Response.parsed``. +- Conditional routing on a parsed field (``route`` reads + ``state.classification.intent``). - ``attach_observer`` for boundary visibility. **Configuration** (env vars; OpenAI defaults shown): -- ``LLM_BASE_URL`` — defaults to ``https://api.openai.com``. **Host - root only** — the impl adds ``/v1/chat/completions`` and +- ``LLM_BASE_URL``: defaults to ``https://api.openai.com``. **Host + root only**; the impl adds ``/v1/chat/completions`` and ``/v1/models`` itself, so do NOT include ``/v1`` in this value. -- ``LLM_MODEL`` — defaults to ``gpt-4o-mini``. -- ``LLM_API_KEY`` — required (your OpenAI API key, or empty for +- ``LLM_MODEL``: defaults to ``gpt-4o-mini``. +- ``LLM_API_KEY``: required (your OpenAI API key, or empty for local servers that don't authenticate). Run with: @@ -48,22 +52,28 @@ from openarmature.llm import OpenAIProvider, UserMessage +# Pydantic schemas the model is constrained to produce. Passing a +# class as ``response_schema`` makes the framework convert to JSON +# Schema, instruct the provider to return matching content, validate +# the response, and yield an instance via ``Response.parsed``. class Classification(BaseModel): - """The Pydantic schema the model is constrained to produce. - - Passed as ``response_schema`` to ``provider.complete()``; the - framework converts to JSON Schema, instructs the provider to - return matching content, validates the response, and yields a - ``Classification`` instance via ``Response.parsed``. - """ - intent: Literal["research", "summarize"] rationale: str +class Summary(BaseModel): + one_liner: str + confidence: float + + +# State holds intermediate artifacts from each LLM call. ``research`` +# uses a dict schema (rather than a class), so its parsed value is a +# raw dict, typed here as ``dict[str, Any] | None``. class PipelineState(State): query: str classification: Classification | None = None + research_plan: dict[str, Any] | None = None + summary: Summary | None = None sources: Annotated[list[str], append] = Field(default_factory=list) metadata: Annotated[dict[str, str], merge] = Field(default_factory=dict) @@ -76,12 +86,15 @@ class PipelineState(State): async def classify(state: PipelineState) -> Mapping[str, Any]: + # response_schema=class form: parsed comes back as a Classification + # instance. The model picks the branch (research vs summarize) and + # the routing function below reads it as a typed field. response = await _provider.complete( [ UserMessage( content=( - f"Route this query to either 'research' (look something up) or " - f"'summarize' (condense known material): {state.query!r}" + f"Route this query to either 'research' (find new information) " + f"or 'summarize' (condense known material): {state.query!r}" ) ) ], @@ -91,11 +104,55 @@ async def classify(state: PipelineState) -> Mapping[str, Any]: async def research(state: PipelineState) -> Mapping[str, Any]: - return {"sources": ["wikipedia", "arxiv"], "metadata": {"tool": "search"}} + # response_schema=dict form: parsed comes back as a plain dict. + # Same wire shape as the class form: the framework converts a + # class via .model_json_schema() under the hood. Use dict when + # you want raw shape without declaring a Pydantic model. + response = await _provider.complete( + [ + UserMessage( + content=( + f"Plan research for the query {state.query!r}. List up to 3 " + f"specific topics to investigate and up to 3 follow-up questions." + ) + ) + ], + response_schema={ + "type": "object", + "properties": { + "topics": {"type": "array", "items": {"type": "string"}}, + "follow_up_questions": {"type": "array", "items": {"type": "string"}}, + }, + "required": ["topics", "follow_up_questions"], + "additionalProperties": False, + }, + ) + return { + "research_plan": response.parsed, + "sources": ["wikipedia", "arxiv"], + "metadata": {"tool": "research"}, + } async def summarize(state: PipelineState) -> Mapping[str, Any]: - return {"sources": ["cache"], "metadata": {"tool": "summarizer"}} + # Pydantic-class form again: parsed is a Summary instance with + # a typed one_liner and a confidence float. + response = await _provider.complete( + [ + UserMessage( + content=( + f"Summarize {state.query!r} in one sentence. Set confidence " + f"between 0 and 1 reflecting how well-established the answer is." + ) + ) + ], + response_schema=Summary, + ) + return { + "summary": response.parsed, + "sources": ["cache"], + "metadata": {"tool": "summarize"}, + } def route(state: PipelineState) -> str: @@ -132,6 +189,10 @@ async def main() -> None: try: final = await graph.invoke(PipelineState(query="what is RAG?")) print(f"\nclassification: {final.classification}") + if final.research_plan is not None: + print(f"research_plan: {final.research_plan}") + if final.summary is not None: + print(f"summary: {final.summary}") print(f"sources: {final.sources}") print(f"metadata: {final.metadata}") finally: From 1ae405be7c76e35456c2c39bddd79f83c3b4bc0c Mon Sep 17 00:00:00 2001 From: chris-colinsky Date: Fri, 15 May 2026 11:56:36 -0700 Subject: [PATCH 15/24] docs: structured output concepts page + model-providers updates Adds docs/concepts/llms.md covering how LLM calls fit into the graph model: LLM calls as async IO inside nodes, structured output (both response_schema forms + native/fallback wire paths + strict mode), routing on parsed fields, and errors at the LLM boundary. Nav entry added to mkdocs.yml's Concepts section; concepts/index.md TOC extended. Updates docs/model-providers/index.md: Protocol signature now shows the response_schema parameter; errors table adds StructuredOutputInvalid; new Structured output section walks through both response_schema forms, the native/fallback wire paths, and strict-mode constraints. Updates docs/model-providers/authoring.md: skeleton's complete() signature now matches the Protocol (response_schema parameter); a new "Structured output" entry in Beyond the skeleton points custom- provider authors at validate_response_schema and strict_mode_supported. mkdocs builds clean in strict mode; the runnable example in the new Structured output section is verified by tests/test_docs_examples.py. --- docs/concepts/index.md | 2 + docs/concepts/llms.md | 278 ++++++++++++++++++++++++++++++ docs/model-providers/authoring.md | 15 ++ docs/model-providers/index.md | 110 ++++++++++-- mkdocs.yml | 1 + 5 files changed, 394 insertions(+), 12 deletions(-) create mode 100644 docs/concepts/llms.md diff --git a/docs/concepts/index.md b/docs/concepts/index.md index ac99588..aa63ccb 100644 --- a/docs/concepts/index.md +++ b/docs/concepts/index.md @@ -12,6 +12,8 @@ the framework, or jump to whichever concept you need. data seam. - [Fan-out](fan-out.md): running the same subgraph many times in parallel, results merged back deterministically. +- [LLMs](llms.md): how LLM calls fit into nodes, structured output, + routing on parsed fields, errors at the LLM boundary. - [Observability](observability.md): node-boundary hooks, OTel mapping, log correlation. - [Checkpointing](checkpointing.md): save state at each node boundary, diff --git a/docs/concepts/llms.md b/docs/concepts/llms.md new file mode 100644 index 0000000..3a44981 --- /dev/null +++ b/docs/concepts/llms.md @@ -0,0 +1,278 @@ +# LLMs + +The graph engine has no concept of LLMs or tools. A node is just an +async function that reads typed state and returns a partial update. +Calling an LLM is one of the things a node can do during that call, the +same way it might read a file, hit a database, or invoke an internal +service. This page covers the patterns that emerge once you start +mixing LLM calls into graph nodes. + +## LLM calls are async IO inside a node + +Construct one [`Provider`](../reference/llm.md) at startup and share it +across nodes. Each `complete()` call carries the full message list and +returns a [`Response`](../reference/llm.md); the provider is stateless +and reentrant, so multiple nodes (or fan-out instances) can call into +it concurrently without coordination. + +```python +import os +from openarmature.llm import OpenAIProvider, UserMessage + +provider = OpenAIProvider( + base_url=os.environ.get("LLM_BASE_URL", "https://api.openai.com"), + model="gpt-4o-mini", + api_key=os.environ["LLM_API_KEY"], +) + + +async def analyze(state: AnalysisState) -> dict: + response = await provider.complete( + [UserMessage(content=state.text)], + ) + return {"raw": response.message.content} +``` + +The provider goes wherever your application's other long-lived +dependencies go: module-level constant, dependency-injection +container, factory function. It does not need to be constructed per +call, and constructing it cheaply (no eager network calls) means +import-time setup is fine. + +A real graph hits LLMs from multiple nodes. The conventional shape: + +```python +async def classify(state): # one provider call + response = await provider.complete(...) + return {...} + +async def extract(state): # another provider call + response = await provider.complete(...) + return {...} + +async def synthesize(state): # a third + response = await provider.complete(...) + return {...} +``` + +The graph composes the order; the provider sees three independent +stateless calls. Conversational memory (if you want it) is the +caller's responsibility: thread it through state and pass the +accumulated message list into each call. + +## Structured output + +Every LLM-using node that produces typed data ends up with the same +shape: render a prompt, call the model, parse the response as JSON, +validate it against the expected schema, retry on parse or validation +failure. Five steps of boilerplate that differ only in the schema and +the prompt. + +Structured output collapses that into one parameter. Pass a +`response_schema` to `complete()` and the provider: + +1. Tells the model on the wire to produce schema-conforming output. +2. Parses and validates the response against the schema. +3. Surfaces the validated value on `Response.parsed`. +4. Raises `StructuredOutputInvalid` on parse or validation failure. + +Two forms are accepted: a Pydantic class (typed-instance return) and a +JSON Schema dict (raw-dict return). Same wire shape underneath. + +### Pydantic class form + +```python +from pydantic import BaseModel + +class Classification(BaseModel): + intent: Literal["research", "summarize"] + rationale: str + + +async def classify(state): + response = await provider.complete( + [UserMessage(content=f"Route this query: {state.query!r}")], + response_schema=Classification, + ) + return {"classification": response.parsed} +``` + +`Response.parsed` is a validated `Classification` instance. Field +access is statically typed (`response.parsed.intent` returns +`Literal["research", "summarize"]`); the framework calls +`.model_json_schema()` under the hood to derive the wire body and +`.model_validate()` to deserialize the response. + +### JSON Schema dict form + +```python +async def research(state): + response = await provider.complete( + [UserMessage(content=f"Plan research: {state.query!r}")], + response_schema={ + "type": "object", + "properties": { + "topics": {"type": "array", "items": {"type": "string"}}, + "follow_up_questions": {"type": "array", "items": {"type": "string"}}, + }, + "required": ["topics", "follow_up_questions"], + "additionalProperties": False, + }, + ) + return {"research_plan": response.parsed} +``` + +`Response.parsed` is a `dict[str, Any]` populated per the schema. Use +this when the shape is dynamic, generated, or borrowed from another +system that already speaks JSON Schema. + +### Wire paths: native and fallback + +Real `OpenAIProvider` traffic uses OpenAI's native `response_format` +field on the request body, so the model produces schema-conforming +output in one trip. Some OpenAI-compatible servers (older vLLM, some +LM Studio releases, llama.cpp variants) either reject `response_format` +with a 400 or silently ignore it. For those, construct the provider +with `force_prompt_augmentation_fallback=True`: + +```python +provider = OpenAIProvider( + base_url="http://localhost:8000", + model="some-local-model", + force_prompt_augmentation_fallback=True, # opt into the fallback +) +``` + +In fallback mode the provider prepends a system directive containing +the serialized schema, omits `response_format` from the wire, and +parses-and-validates the response post-receive. The behavioral contract +is identical: `Response.parsed` populates the same way; failures raise +`StructuredOutputInvalid` the same way. The +`uses_prompt_augmentation_fallback` read-only property lets callers +inspect which path is active. + +### Strict mode + +OpenAI's native path supports a `strict: true` flag that engages the +model's schema-constrained decoding (the model literally cannot emit +non-conforming tokens). It applies only when the schema satisfies +specific constraints: `additionalProperties` explicitly `false` on every +object, every key in `properties` listed in `required`, no +unresolvable `$ref` targets. + +`strict_mode_supported(schema)` performs the deep recursive check. The +provider passes `strict: true` to the wire when the schema satisfies +it, and `strict: false` otherwise. Either way, the provider validates +the response post-receive against the supplied schema. Strict is a +wire-level optimization, not a correctness requirement. + +If you control the schema, prefer making it strict-compatible: +explicit `additionalProperties: false` plus `required` covering every +property. Pydantic-derived schemas may need a tweak to satisfy this +(`model_config = ConfigDict(extra="forbid")` on the class). + +## Routing on parsed fields + +A conditional edge is a function `state -> str` that names the next +node. The string can come from anywhere: a hard-coded rule, a lookup +table, the parsed output of an LLM call. The graph engine doesn't +distinguish. + +This means LLM-driven routing and deterministic routing have the same +shape. A classifier node writes its parsed `Classification` to state; +the conditional edge reads `state.classification.intent` and returns +that string. The branches don't know whether the LLM or a regex +produced the discriminator. + +```python +async def classify(state): + response = await provider.complete( + [UserMessage(content=f"Route: {state.query!r}")], + response_schema=Classification, + ) + return {"classification": response.parsed} + + +def route(state) -> str: + return state.classification.intent + + +builder.add_conditional_edge("classify", route) +``` + +The same `route` function could read a feature flag, a config lookup, +or `"research" if "?" in state.query else "summarize"`. The branch +nodes don't change. Swapping a rule-based router for an LLM-based one +is a one-node change. + +## Errors at the LLM boundary + +Every provider call can fail. The +[`openarmature.llm` reference](../reference/llm.md) lists the canonical +error categories; this section covers how they compose with the rest +of the graph. + +**Transient categories** (retry MAY succeed): +`ProviderRateLimit`, `ProviderUnavailable`, `ProviderModelNotLoaded`. +These are the canonical "wrap a node in `RetryMiddleware`" set; the +default classifier picks them up automatically via +`TRANSIENT_CATEGORIES`. + +**Non-transient categories** (retry without changing the request will +not succeed): `ProviderAuthentication`, `ProviderInvalidModel`, +`ProviderInvalidRequest`, `ProviderInvalidResponse`, +`StructuredOutputInvalid`. These propagate up as `NodeException` so +the graph's error-recovery middleware (or the caller of `invoke()`) +can handle them. + +`StructuredOutputInvalid` is the new one and worth a note. It fires +when a model returns content that fails to parse as JSON, or parses +but fails to validate against the supplied schema. The exception +carries the requested `response_schema`, the `raw_content` the model +produced, and a `failure_description`. It is non-transient by default +because a model that emits non-conforming output on a given prompt +usually emits the same non-conforming output on retry. Useful retry +strategies for this case involve changing the prompt or doubling +`max_tokens` rather than re-issuing the same call; that's a +middleware concern, not the provider's default. + +```python +from openarmature.llm import StructuredOutputInvalid + +async def classify_with_diagnostics(state): + try: + response = await provider.complete( + [UserMessage(content=...)], + response_schema=Classification, + ) + except StructuredOutputInvalid as exc: + log.warning( + "schema-validation failure on classify", + extra={ + "raw_content": exc.raw_content, + "failure": exc.failure_description, + }, + ) + raise + return {"classification": response.parsed} +``` + +Callers wanting to retry validation failures specifically can +construct a `RetryMiddleware` with a custom classifier that adds +`structured_output_invalid` to the transient set. The default +classifier won't do this for them. + +## Where to next + +- [Model Providers](../model-providers/index.md) for the provider + contract, the shipped `OpenAIProvider`, and the canonical error + categories. +- [Authoring a Provider](../model-providers/authoring.md) for writing + a provider against a non-OpenAI wire format (Anthropic Messages, + Bedrock, internal gateway). +- [API reference: `openarmature.llm`](../reference/llm.md) for the + full surface: message types, `Response`, `RuntimeConfig`, every + error class, validation helpers. +- [Examples: `00-hello-world`](https://github.com/LunarCommand/openarmature-python/tree/main/examples/00-hello-world) + for a runnable graph exercising both `response_schema` forms in one + pipeline. diff --git a/docs/model-providers/authoring.md b/docs/model-providers/authoring.md index 0bb32f2..44be475 100644 --- a/docs/model-providers/authoring.md +++ b/docs/model-providers/authoring.md @@ -23,6 +23,7 @@ from collections.abc import Sequence from typing import Any import httpx +from pydantic import BaseModel from openarmature.llm import ( AssistantMessage, Message, @@ -64,7 +65,13 @@ class MyProvider: messages: Sequence[Message], tools: Sequence[Tool] | None = None, config: RuntimeConfig | None = None, + response_schema: dict[str, Any] | type[BaseModel] | None = None, ) -> Response: + # response_schema support is an optional capability; a skeleton + # provider can raise ProviderInvalidRequest when it's set, or + # ignore it and return free-form text. A production provider + # would wire it through to native response_format support or + # the prompt-augmentation fallback. See ``openarmature.llm.OpenAIProvider``. validate_message_list(messages) validate_tools(tools) @@ -183,6 +190,14 @@ of: - **Tool calls.** Wire-mapping the `tool_calls` array on `AssistantMessage` to the Provider's expected shape, parsing tool results back from `ToolMessage`s. +- **Structured output.** Threading `response_schema` through the + request body (native `response_format` if the underlying wire + supports it; prompt-augmentation fallback otherwise) and validating + the response against the schema before returning. Populate + `Response.parsed` with the validated value; + raise `StructuredOutputInvalid` on parse or validation failure. + Use `validate_response_schema` and `strict_mode_supported` from + `openarmature.llm` to share the provider-agnostic boundary checks. - **Observability spans.** Opt-in `started`/`completed` events around the wire call so the OTel observer can build LLM spans. - **Lenient response parsing** under `finish_reason="error"`. diff --git a/docs/model-providers/index.md b/docs/model-providers/index.md index e2df9fb..89a68e0 100644 --- a/docs/model-providers/index.md +++ b/docs/model-providers/index.md @@ -23,8 +23,9 @@ A Provider implements two async methods: ```python from collections.abc import Sequence -from typing import Protocol +from typing import Any, Protocol +from pydantic import BaseModel from openarmature.llm import Message, Response, RuntimeConfig, Tool @@ -35,6 +36,7 @@ class Provider(Protocol): messages: Sequence[Message], tools: Sequence[Tool] | None = None, config: RuntimeConfig | None = None, + response_schema: dict[str, Any] | type[BaseModel] | None = None, ) -> Response: ... ``` @@ -42,7 +44,9 @@ class Provider(Protocol): check, typically called once before invoking the graph. - **`complete()`** performs a single completion call and returns the full `Response`: message, finish reason, token usage, raw wire - payload. + payload, and (when `response_schema` is supplied) a parsed + structured value on `Response.parsed`. See + [Structured output](#structured-output) below. ### Behaviour guarantees @@ -60,21 +64,103 @@ class Provider(Protocol): ## Errors -Seven canonical error categories cover every failure mode: +Eight canonical error categories cover every failure mode: -| Error | Trigger | -| --------------------------- | --------------------------------------------- | -| `ProviderAuthentication` | 401 / 403 (bad key, expired token) | -| `ProviderUnavailable` | 5xx, network failure, timeout | -| `ProviderInvalidModel` | Bound model doesn't exist on the provider | -| `ProviderModelNotLoaded` | Model known but not currently serving | -| `ProviderRateLimit` | 429 (with `Retry-After` exposed) | -| `ProviderInvalidResponse` | 200 OK that fails to parse | -| `ProviderInvalidRequest` | Malformed request (per-message or list-level) | +| Error | Trigger | +| --------------------------- | ---------------------------------------------------------------------- | +| `ProviderAuthentication` | 401 / 403 (bad key, expired token) | +| `ProviderUnavailable` | 5xx, network failure, timeout | +| `ProviderInvalidModel` | Bound model doesn't exist on the provider | +| `ProviderModelNotLoaded` | Model known but not currently serving | +| `ProviderRateLimit` | 429 (with `Retry-After` exposed) | +| `ProviderInvalidResponse` | 200 OK that fails to parse | +| `ProviderInvalidRequest` | Malformed request (per-message or list-level) | +| `StructuredOutputInvalid` | Response failed to parse as JSON or failed to validate against schema | Three of these (`Unavailable`, `RateLimit`, `ModelNotLoaded`) are exported in `TRANSIENT_CATEGORIES`, the canonical "safe to retry" set used by the default retry-middleware classifier. +`StructuredOutputInvalid` is non-transient by default; see +[Structured output](#structured-output) below. + +## Structured output + +`complete()` accepts an optional `response_schema` argument that +constrains the model's output to a caller-supplied shape. When set, the +provider tells the model on the wire to produce conforming output, +parses and validates the response, and surfaces the validated value on +`Response.parsed`. Parse or validation failures raise +`StructuredOutputInvalid`. + +Two `response_schema` forms are accepted: a Pydantic class +(typed-instance return) and a raw JSON Schema dict (dict return). Same +wire shape underneath; pick the form that fits the call site. + +```python +from typing import Literal + +from pydantic import BaseModel +from openarmature.llm import OpenAIProvider, UserMessage + + +class Classification(BaseModel): + intent: Literal["research", "summarize"] + rationale: str + + +# Class form: parsed comes back as a Classification instance. +async def classify(provider: OpenAIProvider) -> Classification: + response = await provider.complete( + [UserMessage(content="Route: 'what is RAG?'")], + response_schema=Classification, + ) + assert isinstance(response.parsed, Classification) + return response.parsed + + +# Dict form: parsed comes back as a plain dict. +async def plan_research(provider: OpenAIProvider) -> dict: + response = await provider.complete( + [UserMessage(content="Plan research for: 'what is RAG?'")], + response_schema={ + "type": "object", + "properties": {"topics": {"type": "array", "items": {"type": "string"}}}, + "required": ["topics"], + "additionalProperties": False, + }, + ) + assert isinstance(response.parsed, dict) + return response.parsed +``` + +For the rendering of structured output into LLM-using node patterns +(routing on parsed fields, error handling, retry composition), see the +[LLMs concept page](../concepts/llms.md). + +### Native and fallback wire paths + +`OpenAIProvider` uses OpenAI's native `response_format` field on the +request body by default. Some OpenAI-compatible servers (older vLLM, +some LM Studio releases, llama.cpp variants) either reject +`response_format` or silently ignore it. Construct the provider with +`force_prompt_augmentation_fallback=True` to switch to a +prompt-augmentation path that prepends a system directive with the +serialized schema and parses-and-validates post-receive. The behavioral +contract is identical across both paths; the +`uses_prompt_augmentation_fallback` read-only property lets callers +inspect which path is active. + +### Strict mode + +OpenAI's native path supports a `strict: true` flag that engages +schema-constrained decoding. It applies only when the schema satisfies +specific constraints: `additionalProperties` explicitly `false` on +every object, every key in `properties` listed in `required`, no +unresolvable `$ref` targets. `strict_mode_supported(schema)` (exported +from `openarmature.llm`) performs the deep recursive check; the +provider passes `strict: true` to the wire when the schema satisfies +it, and `strict: false` otherwise. Either way, the provider validates +the response post-receive. ## A minimal example diff --git a/mkdocs.yml b/mkdocs.yml index b7a00b2..0a81d59 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -94,6 +94,7 @@ nav: - Graphs: concepts/graphs.md - Composition: concepts/composition.md - Fan-out: concepts/fan-out.md + - LLMs: concepts/llms.md - Observability: concepts/observability.md - Checkpointing: concepts/checkpointing.md - Model Providers: From 58f6c2fdca97ff9856f854fd2fe9bde426750214 Mon Sep 17 00:00:00 2001 From: chris-colinsky Date: Fri, 15 May 2026 12:04:36 -0700 Subject: [PATCH 16/24] docs: fix Provider.complete docstring Returns rendering The Returns block on Provider.complete started with "A :class:Response carrying ...", which mkdocstrings' Google-parser misread as a name-type pair: it pulled out "A" as the Name column entry and split the multi-line description across three table rows. Moving the return-value sentence into the prose summary at the top of the docstring (matching the pattern OpenAIProvider.complete already uses) renders cleanly: no spurious Name column entry, single description block. --- src/openarmature/llm/provider.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/openarmature/llm/provider.py b/src/openarmature/llm/provider.py index 8786a89..327f653 100644 --- a/src/openarmature/llm/provider.py +++ b/src/openarmature/llm/provider.py @@ -74,6 +74,11 @@ async def complete( ) -> Response: """Perform a single completion call. + Returns a :class:`Response` carrying the assistant message, + finish reason, usage, and raw payload. When ``response_schema`` + is supplied and the model returns structured content, + ``Response.parsed`` carries the validated value. + Args: messages: The conversation to send. MUST NOT be mutated by the implementation. @@ -84,12 +89,6 @@ async def complete( supplied, the implementation constrains the model's output to the schema and populates ``Response.parsed`` with the validated value. - - Returns: - A :class:`Response` carrying the assistant message, finish - reason, usage, raw payload, and (when ``response_schema`` - was supplied and the model returned structured content) - the parsed structured value. """ ... From 8ed334c0140319cb134fd727f71dc1d3f9587a1d Mon Sep 17 00:00:00 2001 From: chris-colinsky Date: Fri, 15 May 2026 13:21:48 -0700 Subject: [PATCH 17/24] fix(llm): second CoPilot review pass on PR #42 Addresses 19 review threads from the second CoPilot pass; about half were duplicates of the same underlying issue: - examples/00-hello-world/main.py + README hello-world: api_key now uses `os.environ.get("LLM_API_KEY") or None` so an exported-but- empty env var falls through to no-auth (matters for local servers that reject an empty bearer header). - Both examples now close the OpenAIProvider in the finally block alongside graph.drain(). Long-running consumers that copy the snippet had been leaking the underlying httpx.AsyncClient. - errors.py header dropped the hard-coded "seven canonical categories" count after StructuredOutputInvalid landed. - strict_mode_supported docstring and the surrounding spec-anchor comment block both updated to match the implementation: additionalProperties must be EXPLICITLY false (an omitted key counts as non-strict, since JSON Schema's default permits extras). - _resolve_ref now handles ref == "#" as the document root before rejecting external refs. Root-recursive schemas that use the bare JSON-Pointer-root form now resolve correctly. Unit test added. - _strict_mode_check tightened to return False on unrecognized shapes (empty {}, const-only, enum-only, unknown keywords) instead of falling through to True. Primitive types (string/integer/ number/boolean/null) classified as terminal-strict-compatible. Two unit tests added. - _build_request_body now explicitly strips response_format from the body when the provider is in fallback mode. RuntimeConfig is extra="allow", so a caller could have piped response_format through the extras loop past the include_response_format gate. - provider.py module docstring's summary signature line updated to match the Protocol's response_schema parameter. - validate_response_schema's spec-anchor comment updated to reflect that JSON Schema validity is now checked at the boundary via Draft202012Validator.check_schema(), not delegated to parse time. - test_pydantic_class_wire_body_matches_dict_form: widened the assertion from response_format-only to full body equality, so any regression in the class-input wire mapping (not just response_format) gets caught. - test_inspect_property_native_default and test_inspect_property_fallback_when_forced converted to async with try/finally + aclose() to match the rest of the file's provider-lifecycle pattern. --- README.md | 3 +- examples/00-hello-world/main.py | 6 ++- src/openarmature/llm/errors.py | 14 +++--- src/openarmature/llm/provider.py | 55 +++++++++++++++++++----- src/openarmature/llm/providers/openai.py | 7 +++ tests/unit/test_structured_output.py | 54 ++++++++++++++++++++--- 6 files changed, 115 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index ef3a6a6..ff8cc86 100644 --- a/README.md +++ b/README.md @@ -94,7 +94,7 @@ class PipelineState(State): provider = OpenAIProvider( base_url=os.environ.get("LLM_BASE_URL", "https://api.openai.com"), # host root; impl adds /v1 model=os.environ.get("LLM_MODEL", "gpt-4o-mini"), - api_key=os.environ.get("LLM_API_KEY"), + api_key=os.environ.get("LLM_API_KEY") or None, # empty → no-auth ) @@ -168,6 +168,7 @@ async def main() -> None: print(f"summary: {final.summary}") finally: await graph.drain() + await provider.aclose() asyncio.run(main()) diff --git a/examples/00-hello-world/main.py b/examples/00-hello-world/main.py index 55952e5..90a7628 100644 --- a/examples/00-hello-world/main.py +++ b/examples/00-hello-world/main.py @@ -81,7 +81,10 @@ class PipelineState(State): _provider = OpenAIProvider( base_url=os.environ.get("LLM_BASE_URL", "https://api.openai.com"), model=os.environ.get("LLM_MODEL", "gpt-4o-mini"), - api_key=os.environ.get("LLM_API_KEY"), + # ``or None`` so an exported-but-empty LLM_API_KEY falls through to + # no-auth (matters for local servers like vLLM that reject an empty + # bearer header). + api_key=os.environ.get("LLM_API_KEY") or None, ) @@ -197,6 +200,7 @@ async def main() -> None: print(f"metadata: {final.metadata}") finally: await graph.drain() + await _provider.aclose() if __name__ == "__main__": diff --git a/src/openarmature/llm/errors.py b/src/openarmature/llm/errors.py index 5859eb5..14eedc3 100644 --- a/src/openarmature/llm/errors.py +++ b/src/openarmature/llm/errors.py @@ -1,15 +1,15 @@ -# Spec: realizes llm-provider §7 (seven canonical error categories). +# Spec: realizes llm-provider §7 (canonical error categories). """Errors raised by an llm-provider implementation. -A provider call (``ready()`` or ``complete()``) MAY raise one of -seven canonical category errors. Each error class carries a -``category`` class attribute matching the canonical string identifier -so callers can dispatch on the category without matching exception -types directly. +A provider call (``ready()`` or ``complete()``) MAY raise one of the +canonical category errors documented below. Each error class carries +a ``category`` class attribute matching the canonical string +identifier so callers can dispatch on the category without matching +exception types directly. This module is also the single source of truth for the canonical -category strings — :data:`TRANSIENT_CATEGORIES` lives here, and +category strings; :data:`TRANSIENT_CATEGORIES` lives here, and ``openarmature.graph.middleware.retry``'s default classifier imports it. """ diff --git a/src/openarmature/llm/provider.py b/src/openarmature/llm/provider.py index 327f653..e8b5984 100644 --- a/src/openarmature/llm/provider.py +++ b/src/openarmature/llm/provider.py @@ -14,9 +14,11 @@ A successful return implies the next ``complete()`` would not raise errors that surface mismatched configuration or unloaded state. -- ``async complete(messages, tools=None, config=None) -> Response`` - — performs a single completion. Stateless, reentrant, MUST NOT - mutate its inputs. +- ``async complete(messages, tools=None, config=None, response_schema=None) -> Response`` + performs a single completion. Stateless, reentrant, MUST NOT mutate + its inputs. When ``response_schema`` is supplied (a JSON Schema + dict or Pydantic class), the implementation constrains the model's + output and populates ``Response.parsed``. This module also exports :func:`validate_message_list`: a list-level invariant check that complements per-message Pydantic validation. A @@ -177,9 +179,12 @@ def validate_tools(tools: Sequence[Tool] | None) -> None: # Spec llm-provider §5 requires the response_schema argument to # complete() to be a valid JSON Schema with a top-level type "object". -# The pre-send check here is the structural minimum; deeper validity -# (recursive JSON Schema correctness, vendor extensions) is delegated -# to the runtime validator at parse time. +# The boundary check here validates BOTH constraints: structural +# (must be a dict with top-level type: "object") AND full JSON Schema +# validity via Draft202012Validator.check_schema(). The runtime +# validator on the parse path only handles instance-against-schema +# failures; malformed schemas fail here rather than escaping at decode +# time as jsonschema.SchemaError. def validate_response_schema(schema: object) -> None: """Pre-send validation for a JSON Schema passed as the ``response_schema`` argument to ``complete()``. @@ -224,9 +229,11 @@ def strict_mode_supported(schema: dict[str, Any]) -> bool: by native-decoding LLM wire paths. Returns True iff for every nested (sub)schema in the tree - ``additionalProperties`` is not ``true`` and every key in - ``properties`` appears in ``required``. False on any violation, on - an unresolvable ``$ref``, or on an unknown shape. + ``additionalProperties`` is explicitly ``false`` (an omitted key + counts as non-strict, since JSON Schema's default is to permit + extras) and every key in ``properties`` appears in ``required``. + False on any violation, on an unresolvable ``$ref``, or on an + unknown shape. Args: schema: The root JSON Schema dict. @@ -238,6 +245,13 @@ def strict_mode_supported(schema: dict[str, Any]) -> bool: return _strict_mode_check(schema, root=schema, visited=set()) +# JSON Schema primitive types: terminal-strict-compatible because they +# carry no nested structure to verify. Object/array types have their +# own branch checks; anything else (const, enum, unknown keywords, +# empty {}) is conservatively non-strict. +_PRIMITIVE_TYPES = frozenset({"string", "integer", "number", "boolean", "null"}) + + def _strict_mode_check( schema: Any, *, @@ -311,7 +325,23 @@ def _strict_mode_check( if not _strict_mode_check(item, root=root, visited=visited): return False - return True + # Determine whether the schema declared a shape we know how to + # verify. Object/array branches above already returned False on + # any internal violation; reaching here means all internal checks + # passed. Combinators with all branches passing are likewise OK. + # Primitive types are terminal. Anything else (empty schema, + # `const`/`enum`-only, unknown keywords) is conservatively + # non-strict — the walker can't statically verify it. + has_combinator = any(k in schema_dict for k in ("anyOf", "oneOf", "allOf")) + if is_object_type or is_array_type or has_combinator: + return True + if isinstance(schema_type, str) and schema_type in _PRIMITIVE_TYPES: + return True + if isinstance(schema_type, list) and all( + isinstance(t, str) and t in _PRIMITIVE_TYPES for t in cast("list[Any]", schema_type) + ): + return True + return False # Internal-only $ref resolver. Handles JSON Pointer fragments rooted @@ -320,6 +350,11 @@ def _strict_mode_check( # None. JSON Pointer escape rules (`~0` for `~`, `~1` for `/`) are # unescaped per RFC 6901. def _resolve_ref(ref: str, root: dict[str, Any]) -> dict[str, Any] | None: + # Bare "#" is the JSON Pointer for the document root; "#/" prefixes + # an internal path. Anything else (external URIs, relative refs we + # can't resolve without a base) we treat as unresolvable. + if ref == "#": + return root if not ref.startswith("#/"): return None parts = ref[2:].split("/") diff --git a/src/openarmature/llm/providers/openai.py b/src/openarmature/llm/providers/openai.py index 61788c6..be776d9 100644 --- a/src/openarmature/llm/providers/openai.py +++ b/src/openarmature/llm/providers/openai.py @@ -360,6 +360,13 @@ def _build_request_body( "strict": strict_mode_supported(schema_dict), }, } + elif not include_response_format: + # On the fallback path the §8.5.1 contract is "response_format + # MUST NOT be on the wire." RuntimeConfig is extra="allow" so + # a caller could pass response_format through via the extras + # loop above; strip it here so the fallback contract holds + # regardless of caller-supplied extras. + body.pop("response_format", None) return body # ------------------------------------------------------------------ diff --git a/tests/unit/test_structured_output.py b/tests/unit/test_structured_output.py index 3521fc0..fbc1bc1 100644 --- a/tests/unit/test_structured_output.py +++ b/tests/unit/test_structured_output.py @@ -178,6 +178,44 @@ def test_strict_mode_unresolvable_ref_fails() -> None: assert strict_mode_supported(schema) is False +def test_strict_mode_empty_property_schema_fails() -> None: + # A property schema of {} (matches anything) cannot be statically + # verified as strict-compatible. The walker should return False + # rather than fall through to True. + schema: dict[str, Any] = { + "type": "object", + "properties": {"x": {}}, + "required": ["x"], + "additionalProperties": False, + } + assert strict_mode_supported(schema) is False + + +def test_strict_mode_primitive_property_passes() -> None: + # Primitive types (string, integer, number, boolean, null) carry no + # nested structure to verify, so they are terminal-strict-compatible. + schema: dict[str, Any] = { + "type": "object", + "properties": {"name": {"type": "string"}, "age": {"type": "integer"}}, + "required": ["name", "age"], + "additionalProperties": False, + } + assert strict_mode_supported(schema) is True + + +def test_strict_mode_resolves_bare_root_ref() -> None: + # JSON Pointer "#" is a valid reference to the document root + # (RFC 6901). A schema using $ref: "#" for self-recursion should + # resolve through and inherit the root's strict-mode status. + schema: dict[str, Any] = { + "type": "object", + "properties": {"value": {"type": "string"}, "self": {"$ref": "#"}}, + "required": ["value", "self"], + "additionalProperties": False, + } + assert strict_mode_supported(schema) is True + + def test_strict_mode_handles_ref_cycle() -> None: # Self-referential schema: each entry has a "children" key pointing # back to the same definition. Without cycle protection this would @@ -445,7 +483,7 @@ def handler_dict(request: httpx.Request) -> httpx.Response: body_class = json.loads(captured_class[0].content) body_dict = json.loads(captured_dict[0].content) - assert body_class["response_format"] == body_dict["response_format"] + assert body_class == body_dict # --------------------------------------------------------------------------- @@ -453,20 +491,26 @@ def handler_dict(request: httpx.Request) -> httpx.Response: # --------------------------------------------------------------------------- -def test_inspect_property_native_default() -> None: +async def test_inspect_property_native_default() -> None: provider = OpenAIProvider( base_url="http://mock-llm.test", model="test-model", api_key="test-key", ) - assert provider.uses_prompt_augmentation_fallback is False + try: + assert provider.uses_prompt_augmentation_fallback is False + finally: + await provider.aclose() -def test_inspect_property_fallback_when_forced() -> None: +async def test_inspect_property_fallback_when_forced() -> None: provider = OpenAIProvider( base_url="http://mock-llm.test", model="test-model", api_key="test-key", force_prompt_augmentation_fallback=True, ) - assert provider.uses_prompt_augmentation_fallback is True + try: + assert provider.uses_prompt_augmentation_fallback is True + finally: + await provider.aclose() From 1b5fbb0c54b9e730a484340931e860c9d703cbd7 Mon Sep 17 00:00:00 2001 From: chris-colinsky Date: Fri, 15 May 2026 13:37:50 -0700 Subject: [PATCH 18/24] fix: third CoPilot review pass on PR #42 Addresses 5 remaining review threads (3 substantive, 2 stale on already-fixed code): - LlmProviderResponseAssertion (the typed assertion model in harness/expectations.py) now lists `parsed: Any | None`. The runtime assertion in test_llm_provider.py already handled it, but the typed parser had it under extra="forbid" and would have rejected any future case-shape LLM fixture using `parsed`. The 021-028 fixtures slip past today on `calls:` form's permissive `LlmCallSpec.expected: dict[str, Any]`; this lines the two paths up. - docs/model-providers/authoring.md skeleton comment tightened: removed the "ignore it and return free-form text" option from the response_schema guidance. A provider that silently drops the parameter violates the Protocol contract; callers expect either Response.parsed populated or StructuredOutputInvalid raised. Now only two valid options surfaced: raise ProviderInvalidRequest until implemented, or wire it through. - docs/concepts/llms.md softened the static-typing claim in the Pydantic-class form section. Response.parsed is `dict[str, Any] | BaseModel | None`, so a type checker won't narrow from `response_schema=Classification` alone. The page now separates the runtime guarantee (validated instance) from static access (requires cast/isinstance/typed assignment); generic Response[T] flagged as a follow-up. The two stale threads (examples/00-hello-world/main.py provider cleanup, test_structured_output.py provider cleanup) were already fixed in commit 8ed334c; replies sent + threads resolved without code changes. --- docs/concepts/llms.md | 17 ++++++++++++----- docs/model-providers/authoring.md | 13 ++++++++----- tests/conformance/harness/expectations.py | 6 ++++++ 3 files changed, 26 insertions(+), 10 deletions(-) diff --git a/docs/concepts/llms.md b/docs/concepts/llms.md index 3a44981..2c4fee9 100644 --- a/docs/concepts/llms.md +++ b/docs/concepts/llms.md @@ -97,11 +97,18 @@ async def classify(state): return {"classification": response.parsed} ``` -`Response.parsed` is a validated `Classification` instance. Field -access is statically typed (`response.parsed.intent` returns -`Literal["research", "summarize"]`); the framework calls -`.model_json_schema()` under the hood to derive the wire body and -`.model_validate()` to deserialize the response. +`Response.parsed` is a validated `Classification` instance at +runtime; the framework calls `.model_json_schema()` under the hood +to derive the wire body and `.model_validate()` to deserialize the +response. + +Static typing is shallower. `Response.parsed` is annotated as +`dict[str, Any] | BaseModel | None`, so a type checker won't narrow +to `Classification` from the `response_schema=Classification` +argument alone. Callers that want static field access either +`cast(Classification, response.parsed)`, narrow with `isinstance`, +or assign the value into a typed local. Generic `Response[T]` is on +the table as a follow-up. ### JSON Schema dict form diff --git a/docs/model-providers/authoring.md b/docs/model-providers/authoring.md index 44be475..8500665 100644 --- a/docs/model-providers/authoring.md +++ b/docs/model-providers/authoring.md @@ -67,11 +67,14 @@ class MyProvider: config: RuntimeConfig | None = None, response_schema: dict[str, Any] | type[BaseModel] | None = None, ) -> Response: - # response_schema support is an optional capability; a skeleton - # provider can raise ProviderInvalidRequest when it's set, or - # ignore it and return free-form text. A production provider - # would wire it through to native response_format support or - # the prompt-augmentation fallback. See ``openarmature.llm.OpenAIProvider``. + # response_schema is part of the Protocol; a skeleton provider + # MUST NOT silently ignore it — callers expect either + # Response.parsed populated or a StructuredOutputInvalid raise. + # Until the wire path is implemented, raise + # ProviderInvalidRequest when response_schema is set. A + # production provider wires it through to native response_format + # support or the prompt-augmentation fallback; see + # ``openarmature.llm.OpenAIProvider``. validate_message_list(messages) validate_tools(tools) diff --git a/tests/conformance/harness/expectations.py b/tests/conformance/harness/expectations.py index 640bbd7..5d1b2b7 100644 --- a/tests/conformance/harness/expectations.py +++ b/tests/conformance/harness/expectations.py @@ -71,6 +71,12 @@ class LlmProviderResponseAssertion(_ForbidExtras): finish_reason: str | None = None usage: dict[str, Any] | None = None raw_check: dict[str, Any] | None = None + # `parsed` was introduced by proposal 0016 — the runtime asserts + # equality against ``Response.parsed``. Typed as Any | None because + # the fixture-side value can be a dict (dict-schema input form), + # a model_dump-equivalent dict (class-schema form), or None + # (tool-call response or no-schema call). + parsed: Any | None = None class LlmProviderRaisesAssertion(BaseModel): From cddb2b1085f238b437eb808476cdfa2a5609356a Mon Sep 17 00:00:00 2001 From: chris-colinsky Date: Fri, 15 May 2026 14:22:05 -0700 Subject: [PATCH 19/24] fix: fourth CoPilot review pass on PR #42 Addresses 6 review threads, several of which surfaced second-order issues from previous rounds: - openai.py complete(): the fallback flag was driving include_response_format=False for every call, including free-form ones. That triggered the response_format strip on calls that weren't structured-output at all, clobbering caller-supplied RuntimeConfig extras. Gating the flag on schema_dict being set so free-form calls preserve extras. Unit test added. - src/openarmature/__init__.py + tests/test_smoke.py: bumped __spec_version__ from "0.10.0" to "0.15.0" to match the pyproject.toml [tool.openarmature].spec_version bump. AGENTS.md flags these three values as required to stay in sync; the submodule-bump commit missed the runtime sources. - _strict_mode_check array branch: {"type": "array"} without `items` no longer returns True. Unconstrained array content is the array analog of an object with no additionalProperties: false: the walker can't statically verify nested shapes, so strict mode rejects. Unit test added. - docs/model-providers/authoring.md: skeleton's complete() now actually enforces what its comment promised. Added `if response_schema is not None: raise ProviderInvalidRequest` to the body and surfaced the exception in the import list, so a provider copied from the skeleton can't silently violate the Protocol contract. - docs/concepts/llms.md Pydantic-class snippet: added `from typing import Literal` so the example is copy-paste- runnable (the snippet uses Literal in the class but only imported BaseModel). - tests/unit/test_structured_output.py nested-recursion tests: test_strict_mode_recurses_into_nested_object and test_strict_mode_anyof_branch_must_satisfy were short-circuiting at the root because the root schema itself failed strict rules. Tightened both root schemas so the recursive walk actually fires; the tests now guard the recursion they claim to. --- docs/concepts/llms.md | 2 + docs/model-providers/authoring.md | 5 ++ src/openarmature/__init__.py | 2 +- src/openarmature/llm/provider.py | 9 +++ src/openarmature/llm/providers/openai.py | 5 +- tests/test_smoke.py | 2 +- tests/unit/test_structured_output.py | 79 ++++++++++++++++++++++-- 7 files changed, 96 insertions(+), 8 deletions(-) diff --git a/docs/concepts/llms.md b/docs/concepts/llms.md index 2c4fee9..fad0f84 100644 --- a/docs/concepts/llms.md +++ b/docs/concepts/llms.md @@ -82,6 +82,8 @@ JSON Schema dict (raw-dict return). Same wire shape underneath. ### Pydantic class form ```python +from typing import Literal + from pydantic import BaseModel class Classification(BaseModel): diff --git a/docs/model-providers/authoring.md b/docs/model-providers/authoring.md index 8500665..f25ebaa 100644 --- a/docs/model-providers/authoring.md +++ b/docs/model-providers/authoring.md @@ -27,6 +27,7 @@ from pydantic import BaseModel from openarmature.llm import ( AssistantMessage, Message, + ProviderInvalidRequest, ProviderInvalidResponse, ProviderUnavailable, Response, @@ -75,6 +76,10 @@ class MyProvider: # production provider wires it through to native response_format # support or the prompt-augmentation fallback; see # ``openarmature.llm.OpenAIProvider``. + if response_schema is not None: + raise ProviderInvalidRequest( + "response_schema is not supported by this provider" + ) validate_message_list(messages) validate_tools(tools) diff --git a/src/openarmature/__init__.py b/src/openarmature/__init__.py index 5a14a50..69af6a1 100644 --- a/src/openarmature/__init__.py +++ b/src/openarmature/__init__.py @@ -1,4 +1,4 @@ """OpenArmature — workflow framework for LLM pipelines and tool-calling agents.""" __version__ = "0.5.0" -__spec_version__ = "0.10.0" +__spec_version__ = "0.15.0" diff --git a/src/openarmature/llm/provider.py b/src/openarmature/llm/provider.py index e8b5984..0c19f96 100644 --- a/src/openarmature/llm/provider.py +++ b/src/openarmature/llm/provider.py @@ -316,6 +316,11 @@ def _strict_mode_check( if is_array_type: items = schema_dict.get("items") + # Missing or unrecognized items: contents are unconstrained and + # may include shapes the walker can't statically verify. Strict + # mode rejects that case. + if items is None: + return False if isinstance(items, dict): if not _strict_mode_check(items, root=root, visited=visited): return False @@ -324,6 +329,10 @@ def _strict_mode_check( for item in cast("list[Any]", items): if not _strict_mode_check(item, root=root, visited=visited): return False + else: + # items present but not dict or list (e.g. items: true) is + # not a strict-compatible shape. + return False # Determine whether the schema declared a shape we know how to # verify. Object/array branches above already returned False on diff --git a/src/openarmature/llm/providers/openai.py b/src/openarmature/llm/providers/openai.py index be776d9..4793a94 100644 --- a/src/openarmature/llm/providers/openai.py +++ b/src/openarmature/llm/providers/openai.py @@ -248,7 +248,10 @@ async def complete( tools, config, schema_dict, - include_response_format=not self._force_prompt_augmentation_fallback, + # The fallback only governs structured-output calls; free- + # form calls (schema_dict is None) must preserve any + # caller-supplied response_format from RuntimeConfig extras. + include_response_format=(schema_dict is None or not self._force_prompt_augmentation_fallback), ) # Spec observability §5.5 LLM provider span: when an diff --git a/tests/test_smoke.py b/tests/test_smoke.py index fe06c38..a6d763e 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -3,4 +3,4 @@ def test_package_versions() -> None: assert openarmature.__version__ == "0.5.0" - assert openarmature.__spec_version__ == "0.10.0" + assert openarmature.__spec_version__ == "0.15.0" diff --git a/tests/unit/test_structured_output.py b/tests/unit/test_structured_output.py index fbc1bc1..69bd236 100644 --- a/tests/unit/test_structured_output.py +++ b/tests/unit/test_structured_output.py @@ -12,7 +12,7 @@ from __future__ import annotations import json -from typing import Any +from typing import Any, cast import httpx import pytest @@ -120,33 +120,49 @@ def test_strict_mode_missing_additional_properties_fails() -> None: def test_strict_mode_recurses_into_nested_object() -> None: + # Root is strict-compatible (additionalProperties: false, all + # properties in required) so the walk DOES reach the nested + # object. The nested object violates the rule; breaking the + # recursion would break this test rather than be hidden by a + # root-level fail. schema: dict[str, Any] = { "type": "object", "properties": { "outer": { "type": "object", "properties": {"inner": {"type": "string"}}, - "required": [], # nested object violates rule + "required": [], # nested object violates the rule + "additionalProperties": False, }, }, "required": ["outer"], + "additionalProperties": False, } assert strict_mode_supported(schema) is False def test_strict_mode_anyof_branch_must_satisfy() -> None: - # anyOf member violating the constraint → False - schema = { + # Root is strict-compatible so the walk reaches the anyOf branches. + # One branch is a non-strict object (no required, no + # additionalProperties: false) — the failure must come from there, + # not from the root. + schema: dict[str, Any] = { "type": "object", "properties": { "x": { "anyOf": [ {"type": "string"}, - {"type": "object", "properties": {"y": {"type": "string"}}}, # no required + { + "type": "object", + "properties": {"y": {"type": "string"}}, + # no required, no additionalProperties: false → + # branch violation + }, ] }, }, "required": ["x"], + "additionalProperties": False, } assert strict_mode_supported(schema) is False @@ -191,6 +207,18 @@ def test_strict_mode_empty_property_schema_fails() -> None: assert strict_mode_supported(schema) is False +def test_strict_mode_array_without_items_fails() -> None: + # An array without items has unconstrained content; the walker + # can't statically verify nested shapes, so strict mode rejects. + schema: dict[str, Any] = { + "type": "object", + "properties": {"tags": {"type": "array"}}, + "required": ["tags"], + "additionalProperties": False, + } + assert strict_mode_supported(schema) is False + + def test_strict_mode_primitive_property_passes() -> None: # Primitive types (string, integer, number, boolean, null) carry no # nested structure to verify, so they are terminal-strict-compatible. @@ -514,3 +542,44 @@ async def test_inspect_property_fallback_when_forced() -> None: assert provider.uses_prompt_augmentation_fallback is True finally: await provider.aclose() + + +async def test_fallback_mode_preserves_response_format_on_free_form_calls() -> None: + # The fallback gate is structured-output-only. A free-form call + # (response_schema=None) on a fallback-mode provider must preserve + # a caller-supplied ``response_format`` from RuntimeConfig extras, + # because the fallback contract only governs structured-output + # calls. + from openarmature.llm import RuntimeConfig + + transport = _mock_chat_completion_response('{"ok":true}') + provider = OpenAIProvider( + base_url="http://mock-llm.test", + model="test-model", + api_key="test-key", + transport=transport, + force_prompt_augmentation_fallback=True, + ) + captured_body_response_format: dict[str, Any] | None = None + original_post = provider._client.post + + async def capturing_post(*args: Any, **kwargs: Any) -> Any: + nonlocal captured_body_response_format + body = kwargs.get("json") + if isinstance(body, dict): + captured_body_response_format = cast("dict[str, Any]", body).get("response_format") + return await original_post(*args, **kwargs) + + # Avoid touching the captured-request shape directly; intercept at + # the client.post level so we see the constructed JSON body. + provider._client.post = capturing_post # type: ignore[method-assign] + try: + caller_extra = {"type": "json_object"} + config = RuntimeConfig(response_format=caller_extra) # type: ignore[call-arg] + await provider.complete( + [UserMessage(content="hello")], + config=config, + ) + finally: + await provider.aclose() + assert captured_body_response_format == caller_extra From b283f9728b02cbba6d4e57672811238bb6247089 Mon Sep 17 00:00:00 2001 From: chris-colinsky Date: Fri, 15 May 2026 14:36:14 -0700 Subject: [PATCH 20/24] docs: cumulative strict-mode constraint list + spec-version-drift test Captures two follow-ups surfaced by the four CoPilot review rounds: - docs/concepts/llms.md "Strict mode" section expanded into the full constraint list. After four rounds of tightening the strict_mode_supported heuristic, the rule set is stable and the user-facing surface should list it directly rather than make callers read provider.py. The page frames the list as the authoritative set: anything not on it trips to non-strict. - docs/model-providers/index.md "Strict mode" subsection trimmed and now links into the concepts page for the full list, following the established split (concepts/ owns the deep-dive, model-providers/ stays terse). - tests/test_smoke.py adds test_spec_version_matches_pyproject: reads pyproject.toml's [tool.openarmature].spec_version and asserts it equals openarmature.__spec_version__. AGENTS.md flags these as required to stay in sync; the previous smoke test only checked internal consistency between __spec_version__ and its asserted value, so the pyproject side could drift silently (and did, in the original submodule-bump commit). --- docs/concepts/llms.md | 44 ++++++++++++++++++++++++----------- docs/model-providers/index.md | 16 ++++++------- tests/test_smoke.py | 16 +++++++++++++ 3 files changed, 54 insertions(+), 22 deletions(-) diff --git a/docs/concepts/llms.md b/docs/concepts/llms.md index fad0f84..05a01f7 100644 --- a/docs/concepts/llms.md +++ b/docs/concepts/llms.md @@ -163,22 +163,38 @@ inspect which path is active. ### Strict mode OpenAI's native path supports a `strict: true` flag that engages the -model's schema-constrained decoding (the model literally cannot emit -non-conforming tokens). It applies only when the schema satisfies -specific constraints: `additionalProperties` explicitly `false` on every -object, every key in `properties` listed in `required`, no -unresolvable `$ref` targets. - -`strict_mode_supported(schema)` performs the deep recursive check. The -provider passes `strict: true` to the wire when the schema satisfies -it, and `strict: false` otherwise. Either way, the provider validates -the response post-receive against the supplied schema. Strict is a +model's schema-constrained decoding: the model literally cannot emit +non-conforming tokens. The framework decides `strict: true` vs +`strict: false` automatically based on whether your schema satisfies +strict-mode constraints. Either way, the framework validates the +response post-receive against the supplied schema; strict is a wire-level optimization, not a correctness requirement. -If you control the schema, prefer making it strict-compatible: -explicit `additionalProperties: false` plus `required` covering every -property. Pydantic-derived schemas may need a tweak to satisfy this -(`model_config = ConfigDict(extra="forbid")` on the class). +`strict_mode_supported(schema)` (exported from `openarmature.llm`) +performs the deep recursive check. The heuristic is conservative — +anything not on the list below trips to `strict: false`: + +- Top-level schema is `type: "object"`. +- For every nested object: `additionalProperties` is **explicitly** + `false`, and every key in `properties` is listed in `required`. +- For every nested array: `items` is present and points to a + verifiable schema (dict, or tuple-form list of dicts). +- Every branch of `anyOf` / `oneOf` / `allOf` independently satisfies + the above. +- Internal `$ref` targets (`#/...` or bare `#`) resolve and their + resolved schema passes. External refs (any other URI) and `$ref` + cycles are handled conservatively. +- Primitive types (`string`, `integer`, `number`, `boolean`, `null`) + are accepted as terminal: no nested structure to verify. +- Empty `{}` schemas and unrecognized-keyword schemas (`const`-only, + `enum`-only, etc.) trip to non-strict; the walker can't statically + verify them. + +If you control the schema and want strict mode, the easiest path is to +set `additionalProperties: false` and put every property in `required` +on every object. Pydantic-derived schemas may need `model_config = +ConfigDict(extra="forbid")` on the class to get the +`additionalProperties: false` in the generated JSON Schema. ## Routing on parsed fields diff --git a/docs/model-providers/index.md b/docs/model-providers/index.md index 89a68e0..d074d43 100644 --- a/docs/model-providers/index.md +++ b/docs/model-providers/index.md @@ -153,14 +153,14 @@ inspect which path is active. ### Strict mode OpenAI's native path supports a `strict: true` flag that engages -schema-constrained decoding. It applies only when the schema satisfies -specific constraints: `additionalProperties` explicitly `false` on -every object, every key in `properties` listed in `required`, no -unresolvable `$ref` targets. `strict_mode_supported(schema)` (exported -from `openarmature.llm`) performs the deep recursive check; the -provider passes `strict: true` to the wire when the schema satisfies -it, and `strict: false` otherwise. Either way, the provider validates -the response post-receive. +schema-constrained decoding. The provider passes `strict: true` when +the schema satisfies the strict-mode constraints and `strict: false` +otherwise; the full constraint list lives on the +[LLMs concepts page](../concepts/llms.md#strict-mode). +`strict_mode_supported(schema)` is exported from `openarmature.llm` +for callers wanting to check the heuristic directly. Either way, the +provider validates the response post-receive against the supplied +schema. ## A minimal example diff --git a/tests/test_smoke.py b/tests/test_smoke.py index a6d763e..f9ff27a 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -1,6 +1,22 @@ +import tomllib +from pathlib import Path + import openarmature def test_package_versions() -> None: assert openarmature.__version__ == "0.5.0" assert openarmature.__spec_version__ == "0.15.0" + + +def test_spec_version_matches_pyproject() -> None: + # AGENTS.md flags __spec_version__, pyproject.toml's + # [tool.openarmature].spec_version, and the submodule pin as + # required to stay in sync. The test_package_versions check above + # only verifies internal consistency between __spec_version__ and + # its asserted value, so the pyproject side can drift undetected. + # This test catches that class of three-place drift. + pyproject_path = Path(__file__).resolve().parent.parent / "pyproject.toml" + config = tomllib.loads(pyproject_path.read_text()) + pyproject_spec_version = config["tool"]["openarmature"]["spec_version"] + assert openarmature.__spec_version__ == pyproject_spec_version From 4c12add8d3ea8d0ecf65cbb59b5f0ff8b08f02af Mon Sep 17 00:00:00 2001 From: chris-colinsky Date: Fri, 15 May 2026 15:14:44 -0700 Subject: [PATCH 21/24] fix: fifth CoPilot review pass on PR #42 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses 4 review threads: - examples/00-hello-world/main.py: provider construction moved from module level to a lazy _get_provider() helper backed by a module global. Avoids opening an httpx.AsyncClient when tooling imports the module without running main() — the smoke test now doesn't trigger construction across 6 example loads. main()'s finally only closes when the cached instance is set. - src/openarmature/llm/provider.py: validate_response_schema now walks all $ref values via _check_refs_resolvable and raises ProviderInvalidRequest for any non-internal-resolvable ref. Draft202012Validator.check_schema doesn't traverse refs, so previously an external ref slipped past the boundary and surfaced as a raw referencing-library exception at validate time. Pre-validation surfaces the clean category at the API boundary. - src/openarmature/llm/providers/openai.py: _parse_and_validate now also catches jsonschema.SchemaError and maps it to StructuredOutputInvalid. Safety net for any schema-side exception (including ref-resolution failures) that pre- validation might miss. - tests/unit/test_structured_output.py: - test_strict_mode_unresolvable_ref_fails: root tightened with additionalProperties: false so the walk reaches the $ref branch (was short-circuiting at the root). - Added test_validate_response_schema_rejects_external_ref covering the new pre-validation path. - tests/test_smoke.py: added test_spec_version_matches_submodule_pin shelling to `git -C openarmature-spec describe --tags --exact-match HEAD` and asserting it equals v{__spec_version__}. Skips cleanly when the submodule isn't a git checkout (installed-package CI lanes). Completes the three-place drift check from AGENTS.md (__spec_version__ ↔ pyproject ↔ submodule pin). --- examples/00-hello-world/main.py | 36 ++++++++++++++-------- src/openarmature/llm/provider.py | 28 +++++++++++++++++ src/openarmature/llm/providers/openai.py | 11 +++++++ tests/test_smoke.py | 38 +++++++++++++++++++++--- tests/unit/test_structured_output.py | 22 +++++++++++++- 5 files changed, 118 insertions(+), 17 deletions(-) diff --git a/examples/00-hello-world/main.py b/examples/00-hello-world/main.py index 90a7628..9d6ba7d 100644 --- a/examples/00-hello-world/main.py +++ b/examples/00-hello-world/main.py @@ -78,21 +78,32 @@ class PipelineState(State): metadata: Annotated[dict[str, str], merge] = Field(default_factory=dict) -_provider = OpenAIProvider( - base_url=os.environ.get("LLM_BASE_URL", "https://api.openai.com"), - model=os.environ.get("LLM_MODEL", "gpt-4o-mini"), - # ``or None`` so an exported-but-empty LLM_API_KEY falls through to - # no-auth (matters for local servers like vLLM that reject an empty - # bearer header). - api_key=os.environ.get("LLM_API_KEY") or None, -) +# Lazy initialization: the provider is constructed on first call from +# inside a node body, not at import time. That avoids opening an +# httpx.AsyncClient connection pool when tools (test harnesses, doc +# builders, IDE inspection) import this module without running main(). +_provider_instance: OpenAIProvider | None = None + + +def _get_provider() -> OpenAIProvider: + global _provider_instance + if _provider_instance is None: + _provider_instance = OpenAIProvider( + base_url=os.environ.get("LLM_BASE_URL", "https://api.openai.com"), + model=os.environ.get("LLM_MODEL", "gpt-4o-mini"), + # ``or None`` so an exported-but-empty LLM_API_KEY falls + # through to no-auth (matters for local servers like vLLM + # that reject an empty bearer header). + api_key=os.environ.get("LLM_API_KEY") or None, + ) + return _provider_instance async def classify(state: PipelineState) -> Mapping[str, Any]: # response_schema=class form: parsed comes back as a Classification # instance. The model picks the branch (research vs summarize) and # the routing function below reads it as a typed field. - response = await _provider.complete( + response = await _get_provider().complete( [ UserMessage( content=( @@ -111,7 +122,7 @@ async def research(state: PipelineState) -> Mapping[str, Any]: # Same wire shape as the class form: the framework converts a # class via .model_json_schema() under the hood. Use dict when # you want raw shape without declaring a Pydantic model. - response = await _provider.complete( + response = await _get_provider().complete( [ UserMessage( content=( @@ -140,7 +151,7 @@ async def research(state: PipelineState) -> Mapping[str, Any]: async def summarize(state: PipelineState) -> Mapping[str, Any]: # Pydantic-class form again: parsed is a Summary instance with # a typed one_liner and a confidence float. - response = await _provider.complete( + response = await _get_provider().complete( [ UserMessage( content=( @@ -200,7 +211,8 @@ async def main() -> None: print(f"metadata: {final.metadata}") finally: await graph.drain() - await _provider.aclose() + if _provider_instance is not None: + await _provider_instance.aclose() if __name__ == "__main__": diff --git a/src/openarmature/llm/provider.py b/src/openarmature/llm/provider.py index 0c19f96..8b832b0 100644 --- a/src/openarmature/llm/provider.py +++ b/src/openarmature/llm/provider.py @@ -210,6 +210,34 @@ def validate_response_schema(schema: object) -> None: jsonschema.Draft202012Validator.check_schema(schema_dict) except jsonschema.SchemaError as exc: raise ProviderInvalidRequest(f"response_schema: not a valid JSON Schema: {exc.message}") from exc + # check_schema() validates the schema's own syntax but does not + # traverse $ref targets. Walk all refs in the schema and confirm + # each resolves to a subschema within the document, so external or + # broken refs fail here rather than escaping at parse time as + # raw referencing-library exceptions. + _check_refs_resolvable(schema_dict) + + +def _check_refs_resolvable(schema: dict[str, Any]) -> None: + """Walk the schema tree and raise ProviderInvalidRequest for any + $ref value that cannot be resolved internally.""" + + def walk(node: Any) -> None: + if isinstance(node, dict): + node_dict = cast("dict[str, Any]", node) + ref = node_dict.get("$ref") + if isinstance(ref, str) and _resolve_ref(ref, schema) is None: + raise ProviderInvalidRequest( + f"response_schema: unresolvable $ref {ref!r}; only internal " + "refs (#/... or #) are supported by the provider's validator" + ) + for value in node_dict.values(): + walk(value) + elif isinstance(node, list): + for item in cast("list[Any]", node): + walk(item) + + walk(schema) # Strict mode (OpenAI's response_format strict:true and the analogous diff --git a/src/openarmature/llm/providers/openai.py b/src/openarmature/llm/providers/openai.py index 4793a94..5a8bab7 100644 --- a/src/openarmature/llm/providers/openai.py +++ b/src/openarmature/llm/providers/openai.py @@ -557,6 +557,17 @@ def _parse_and_validate( raw_content=content, failure_description=exc.message, ) from exc + except jsonschema.SchemaError as exc: + # Safety net: validate_response_schema's pre-validation should + # have caught this, but any schema-side exception (including + # ref-resolution failures via the `referencing` library) MUST + # still map to the canonical taxonomy rather than leak raw. + raise StructuredOutputInvalid( + "response could not be validated against the supplied schema", + response_schema=schema_dict, + raw_content=content, + failure_description=str(exc), + ) from exc return parsed_dict diff --git a/tests/test_smoke.py b/tests/test_smoke.py index f9ff27a..9e47599 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -1,6 +1,9 @@ +import subprocess import tomllib from pathlib import Path +import pytest + import openarmature @@ -12,11 +15,38 @@ def test_package_versions() -> None: def test_spec_version_matches_pyproject() -> None: # AGENTS.md flags __spec_version__, pyproject.toml's # [tool.openarmature].spec_version, and the submodule pin as - # required to stay in sync. The test_package_versions check above - # only verifies internal consistency between __spec_version__ and - # its asserted value, so the pyproject side can drift undetected. - # This test catches that class of three-place drift. + # required to stay in sync. This test catches the pyproject ↔ + # runtime drift class; test_spec_version_matches_submodule_pin + # below catches the submodule side. pyproject_path = Path(__file__).resolve().parent.parent / "pyproject.toml" config = tomllib.loads(pyproject_path.read_text()) pyproject_spec_version = config["tool"]["openarmature"]["spec_version"] assert openarmature.__spec_version__ == pyproject_spec_version + + +def test_spec_version_matches_submodule_pin() -> None: + # The submodule's git HEAD must be at the v{__spec_version__} + # tag, completing the three-place drift check from AGENTS.md. + # Skips cleanly when the submodule isn't a git checkout (e.g., + # installed-package CI lanes pulling from PyPI sdists). + spec_dir = Path(__file__).resolve().parent.parent / "openarmature-spec" + if not (spec_dir / ".git").exists(): + pytest.skip("openarmature-spec is not a git checkout") + try: + result = subprocess.run( + ["git", "-C", str(spec_dir), "describe", "--tags", "--exact-match", "HEAD"], + capture_output=True, + text=True, + check=True, + ) + except subprocess.CalledProcessError: + pytest.fail( + "submodule HEAD is not at any tag; bump it to " + f"v{openarmature.__spec_version__} or update __spec_version__" + ) + submodule_tag = result.stdout.strip() + expected = f"v{openarmature.__spec_version__}" + assert submodule_tag == expected, ( + f"submodule pinned at {submodule_tag}, but __spec_version__ is " + f"{openarmature.__spec_version__} (expected tag {expected})" + ) diff --git a/tests/unit/test_structured_output.py b/tests/unit/test_structured_output.py index 69bd236..7e7889e 100644 --- a/tests/unit/test_structured_output.py +++ b/tests/unit/test_structured_output.py @@ -57,6 +57,22 @@ def test_validate_response_schema_rejects_missing_type() -> None: validate_response_schema({"properties": {"x": {"type": "integer"}}}) +def test_validate_response_schema_rejects_external_ref() -> None: + # External or otherwise unresolvable $refs would surface at + # validate() time as raw referencing-library exceptions; the + # boundary check should reject them with the canonical + # ProviderInvalidRequest category. + with pytest.raises(ProviderInvalidRequest, match="unresolvable"): + validate_response_schema( + { + "type": "object", + "properties": {"x": {"$ref": "https://example.com/schema.json"}}, + "required": ["x"], + "additionalProperties": False, + } + ) + + def test_validate_response_schema_rejects_malformed_schema() -> None: # `"type": "foobar"` is not a valid JSON Schema type keyword; the # boundary check should catch this and raise ProviderInvalidRequest @@ -186,10 +202,14 @@ def test_strict_mode_resolves_internal_ref() -> None: def test_strict_mode_unresolvable_ref_fails() -> None: - schema = { + # Root is strict-compatible so the walk reaches the $ref inside + # properties.x. The external ref is unresolvable, so the walker + # returns False from the ref branch (not from a root-level fail). + schema: dict[str, Any] = { "type": "object", "properties": {"x": {"$ref": "https://example.com/external-schema.json"}}, "required": ["x"], + "additionalProperties": False, } assert strict_mode_supported(schema) is False From b8ffc43152392ef165451b23e7ede3bd2da7400b Mon Sep 17 00:00:00 2001 From: chris-colinsky Date: Fri, 15 May 2026 15:18:50 -0700 Subject: [PATCH 22/24] fix(test): replace git-describe submodule check with CHANGELOG parse The git-describe-based submodule check from the previous commit passed locally but failed in CI because actions/checkout pins the submodule to its recorded SHA without fetching the spec repo's tags. `git describe --tags --exact-match` then finds nothing and the test fails with "submodule HEAD is not at any tag." Switching to parsing openarmature-spec/CHANGELOG.md: the spec follows Keep a Changelog, so the first non-[Unreleased] `## [X.Y.Z]` heading is the version at the pinned commit. This works regardless of CI tag-fetch state and catches the same drift class (submodule moved to a different release). Skips cleanly when CHANGELOG.md isn't present (installed-package lanes that don't ship the submodule checkout). --- tests/test_smoke.py | 56 +++++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/tests/test_smoke.py b/tests/test_smoke.py index 9e47599..bc8392c 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -1,4 +1,4 @@ -import subprocess +import re import tomllib from pathlib import Path @@ -16,7 +16,7 @@ def test_spec_version_matches_pyproject() -> None: # AGENTS.md flags __spec_version__, pyproject.toml's # [tool.openarmature].spec_version, and the submodule pin as # required to stay in sync. This test catches the pyproject ↔ - # runtime drift class; test_spec_version_matches_submodule_pin + # runtime drift class; test_spec_version_matches_submodule_changelog # below catches the submodule side. pyproject_path = Path(__file__).resolve().parent.parent / "pyproject.toml" config = tomllib.loads(pyproject_path.read_text()) @@ -24,29 +24,31 @@ def test_spec_version_matches_pyproject() -> None: assert openarmature.__spec_version__ == pyproject_spec_version -def test_spec_version_matches_submodule_pin() -> None: - # The submodule's git HEAD must be at the v{__spec_version__} - # tag, completing the three-place drift check from AGENTS.md. - # Skips cleanly when the submodule isn't a git checkout (e.g., - # installed-package CI lanes pulling from PyPI sdists). - spec_dir = Path(__file__).resolve().parent.parent / "openarmature-spec" - if not (spec_dir / ".git").exists(): - pytest.skip("openarmature-spec is not a git checkout") - try: - result = subprocess.run( - ["git", "-C", str(spec_dir), "describe", "--tags", "--exact-match", "HEAD"], - capture_output=True, - text=True, - check=True, - ) - except subprocess.CalledProcessError: - pytest.fail( - "submodule HEAD is not at any tag; bump it to " - f"v{openarmature.__spec_version__} or update __spec_version__" - ) - submodule_tag = result.stdout.strip() - expected = f"v{openarmature.__spec_version__}" - assert submodule_tag == expected, ( - f"submodule pinned at {submodule_tag}, but __spec_version__ is " - f"{openarmature.__spec_version__} (expected tag {expected})" +# Keep a Changelog heading: ``## [0.15.0]`` (with optional trailing +# date). The ``[Unreleased]`` entry uses a non-numeric tag and is +# skipped by this pattern. +_CHANGELOG_VERSION_RE = re.compile(r"^## \[(\d+\.\d+\.\d+)\]") + + +def test_spec_version_matches_submodule_changelog() -> None: + # Third value AGENTS.md flags: the submodule pin (the spec + # checkout the parent repo records). We verify by reading the + # spec's CHANGELOG.md at the pinned commit and asserting the + # latest versioned entry equals __spec_version__. CHANGELOG + # parsing is more robust than ``git describe`` (no tag-fetch + # dependency, works in any checkout shape) and the spec follows + # Keep a Changelog so the format is stable. + changelog_path = Path(__file__).resolve().parent.parent / "openarmature-spec" / "CHANGELOG.md" + if not changelog_path.exists(): + pytest.skip("openarmature-spec/CHANGELOG.md is not present") + for line in changelog_path.read_text().splitlines(): + match = _CHANGELOG_VERSION_RE.match(line) + if match: + submodule_latest = match.group(1) + break + else: + pytest.fail("could not find a versioned heading in openarmature-spec/CHANGELOG.md") + assert openarmature.__spec_version__ == submodule_latest, ( + f"submodule's CHANGELOG latest is {submodule_latest}, but " + f"__spec_version__ is {openarmature.__spec_version__}" ) From 578322d899a618f5178bbcd3bc69dcb0d5addac1 Mon Sep 17 00:00:00 2001 From: chris-colinsky Date: Fri, 15 May 2026 15:23:39 -0700 Subject: [PATCH 23/24] fix(test): satisfy CodeQL on the changelog-parsing test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CodeQL flagged the for/else: pytest.fail() pattern as a potentially-uninitialized-local-variable warning because it doesn't model pytest.fail as NoReturn — the analyzer sees a path where submodule_latest is referenced after the loop without ever being bound. Pulling the parse into _read_latest_spec_version_from_changelog that explicitly returns the version or raises AssertionError. Eliminates the unreachable-after-fail pattern and reads cleaner. --- tests/test_smoke.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/tests/test_smoke.py b/tests/test_smoke.py index bc8392c..19f0ef0 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -30,6 +30,19 @@ def test_spec_version_matches_pyproject() -> None: _CHANGELOG_VERSION_RE = re.compile(r"^## \[(\d+\.\d+\.\d+)\]") +def _read_latest_spec_version_from_changelog(path: Path) -> str: + """Return the first non-``[Unreleased]`` versioned heading from a + Keep-a-Changelog file. Raises :class:`AssertionError` if no + versioned heading is present (the file is malformed for our + purposes). + """ + for line in path.read_text().splitlines(): + match = _CHANGELOG_VERSION_RE.match(line) + if match: + return match.group(1) + raise AssertionError(f"no versioned heading found in {path}") + + def test_spec_version_matches_submodule_changelog() -> None: # Third value AGENTS.md flags: the submodule pin (the spec # checkout the parent repo records). We verify by reading the @@ -41,13 +54,7 @@ def test_spec_version_matches_submodule_changelog() -> None: changelog_path = Path(__file__).resolve().parent.parent / "openarmature-spec" / "CHANGELOG.md" if not changelog_path.exists(): pytest.skip("openarmature-spec/CHANGELOG.md is not present") - for line in changelog_path.read_text().splitlines(): - match = _CHANGELOG_VERSION_RE.match(line) - if match: - submodule_latest = match.group(1) - break - else: - pytest.fail("could not find a versioned heading in openarmature-spec/CHANGELOG.md") + submodule_latest = _read_latest_spec_version_from_changelog(changelog_path) assert openarmature.__spec_version__ == submodule_latest, ( f"submodule's CHANGELOG latest is {submodule_latest}, but " f"__spec_version__ is {openarmature.__spec_version__}" From e5f1426d7bed30b8e11fb41fe4f4c8d45713b76b Mon Sep 17 00:00:00 2001 From: chris-colinsky Date: Fri, 15 May 2026 15:42:23 -0700 Subject: [PATCH 24/24] fix: seventh CoPilot review pass on PR #42 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Six second-order correctness fixes surfaced by the round-7 review, mostly hardening _resolve_ref, _check_refs_resolvable, and the Pydantic-class validation path. - _resolve_ref now distinguishes "unresolvable" (path doesn't exist / external ref) from "resolved to non-dict" via a module-level _UNRESOLVABLE sentinel. Boolean schemas (true/false) are valid JSON Schema subschemas; a $ref to one was being incorrectly rejected as ProviderInvalidRequest. Now resolves cleanly and strict-mode still returns False on bool targets (the correct conservative answer). - validate_response_schema's metaschema check now uses jsonschema.validators.validator_for(schema) instead of the hard-coded Draft 2020-12. A valid draft-07 schema (e.g. tuple- form items, common in tooling) was being rejected at the boundary but accepted at runtime. Boundary and runtime now agree. - _resolve_ref percent-decodes JSON Pointer tokens before applying the ~1 / ~0 unescape pair. Per RFC 6901 §6, a JSON Pointer in a URI fragment is percent-encoded; refs like #/$defs/Name%20With%20Spaces now resolve correctly. - _check_refs_resolvable now walks only known subschema-bearing keywords (properties, patternProperties, additionalProperties, items, prefixItems, contains, if/then/else, allOf/anyOf/oneOf/not, $defs/definitions, dependentSchemas, propertyNames, unevaluatedItems, unevaluatedProperties). A "$ref" key under data positions (default, const, enum, $comment, x-* extensions) is data, not a schema reference, and is no longer incorrectly resolved. - docs/concepts/llms.md "LLM calls are async IO inside a node" section reframed: module-level provider construction leaks the httpx.AsyncClient in tooling/test/docs-build imports. The page now documents application-startup / lifecycle-managed construction (lazy on-first-use plus aclose in finally / shutdown hook), matching the pattern the hello-world example was made lazy for. - _parse_and_validate's Pydantic-class path now runs jsonschema.validate against the generated JSON Schema BEFORE calling model_validate. Pydantic's default model_validate is coercive (accepts "30" for an int field), which diverged from the strict dict-schema path. Both paths now apply the same jsonschema check first; model_validate then constructs the typed instance. - jsonschema.ValidationError's failure description now includes exc.json_path (e.g. "$.age: '30' is not of type 'integer'"). The bare exc.message lost the field name, breaking caller diagnostics for the missing-field / wrong-type-at-path cases. Five new unit tests cover the bool-ref, draft-07, percent-encoded ref, ref-under-data, and Pydantic-coercion-rejection cases. --- docs/concepts/llms.md | 55 +++++++--- src/openarmature/llm/provider.py | 128 ++++++++++++++++++----- src/openarmature/llm/providers/openai.py | 35 ++++++- tests/unit/test_structured_output.py | 86 +++++++++++++++ 4 files changed, 261 insertions(+), 43 deletions(-) diff --git a/docs/concepts/llms.md b/docs/concepts/llms.md index 05a01f7..c44b23d 100644 --- a/docs/concepts/llms.md +++ b/docs/concepts/llms.md @@ -9,35 +9,60 @@ mixing LLM calls into graph nodes. ## LLM calls are async IO inside a node -Construct one [`Provider`](../reference/llm.md) at startup and share it -across nodes. Each `complete()` call carries the full message list and -returns a [`Response`](../reference/llm.md); the provider is stateless -and reentrant, so multiple nodes (or fan-out instances) can call into -it concurrently without coordination. +Construct one [`Provider`](../reference/llm.md) when your application +owns its lifecycle (entry-point coroutine, FastAPI startup event, +lazy on-first-use) and share it across nodes. Each `complete()` call +carries the full message list and returns a +[`Response`](../reference/llm.md); the provider is stateless and +reentrant, so multiple nodes (or fan-out instances) can call into it +concurrently without coordination. + +`OpenAIProvider` eagerly opens an `httpx.AsyncClient` in its +constructor; that client must be closed with `await provider.aclose()` +to release the connection pool. Constructing the provider as a +module-level side effect (`provider = OpenAIProvider(...)` at the top +of the file) leaks the client in tooling, tests, and docs-build +processes that import the module without running your shutdown path. +Prefer lazy construction or an explicit lifecycle hook. ```python import os from openarmature.llm import OpenAIProvider, UserMessage -provider = OpenAIProvider( - base_url=os.environ.get("LLM_BASE_URL", "https://api.openai.com"), - model="gpt-4o-mini", - api_key=os.environ["LLM_API_KEY"], -) +_provider_instance: OpenAIProvider | None = None + + +def _get_provider() -> OpenAIProvider: + global _provider_instance + if _provider_instance is None: + _provider_instance = OpenAIProvider( + base_url=os.environ.get("LLM_BASE_URL", "https://api.openai.com"), + model="gpt-4o-mini", + api_key=os.environ["LLM_API_KEY"], + ) + return _provider_instance async def analyze(state: AnalysisState) -> dict: - response = await provider.complete( + response = await _get_provider().complete( [UserMessage(content=state.text)], ) return {"raw": response.message.content} + + +async def main() -> None: + try: + ... # build graph, invoke + finally: + if _provider_instance is not None: + await _provider_instance.aclose() ``` The provider goes wherever your application's other long-lived -dependencies go: module-level constant, dependency-injection -container, factory function. It does not need to be constructed per -call, and constructing it cheaply (no eager network calls) means -import-time setup is fine. +dependencies go (dependency-injection container, factory, lazy +module-level cache), and you close it on the same lifecycle hook you +use for those. A FastAPI app uses `app.on_event("shutdown")`; a +script uses a `try/finally` around the entry-point coroutine. A real graph hits LLMs from multiple nodes. The conventional shape: diff --git a/src/openarmature/llm/provider.py b/src/openarmature/llm/provider.py index 8b832b0..8b475b1 100644 --- a/src/openarmature/llm/provider.py +++ b/src/openarmature/llm/provider.py @@ -39,8 +39,10 @@ from collections.abc import Sequence from typing import Any, Protocol, cast +from urllib.parse import unquote import jsonschema +from jsonschema.validators import validator_for from pydantic import BaseModel from .errors import ProviderInvalidRequest @@ -207,7 +209,13 @@ def validate_response_schema(schema: object) -> None: # instance-against-schema failures and is handled separately on the # parse path. try: - jsonschema.Draft202012Validator.check_schema(schema_dict) + # Pick the validator class the runtime would use, so the + # boundary check uses the same metaschema as + # jsonschema.validate(). validator_for reads the schema's + # $schema URL; absent that, it defaults to the latest + # supported draft. + validator_cls = validator_for(schema_dict) + validator_cls.check_schema(schema_dict) except jsonschema.SchemaError as exc: raise ProviderInvalidRequest(f"response_schema: not a valid JSON Schema: {exc.message}") from exc # check_schema() validates the schema's own syntax but does not @@ -218,24 +226,73 @@ def validate_response_schema(schema: object) -> None: _check_refs_resolvable(schema_dict) +# Subschema-bearing keywords by container shape. Used by +# _check_refs_resolvable to walk only positions that the runtime +# treats as schemas. Anything outside these is data (default, const, +# enum, annotations like description / $comment, unknown / extension +# keywords like x-*) where a nested "$ref" key is just a value. +_SINGLE_SUBSCHEMA_KEYWORDS = frozenset( + { + "additionalProperties", + "propertyNames", + "items", + "contains", + "if", + "then", + "else", + "not", + "unevaluatedItems", + "unevaluatedProperties", + } +) +_MAP_OF_SUBSCHEMA_KEYWORDS = frozenset( + { + "properties", + "patternProperties", + "$defs", + "definitions", + "dependentSchemas", + } +) +_LIST_OF_SUBSCHEMA_KEYWORDS = frozenset( + { + "allOf", + "anyOf", + "oneOf", + "prefixItems", + } +) + + def _check_refs_resolvable(schema: dict[str, Any]) -> None: - """Walk the schema tree and raise ProviderInvalidRequest for any - $ref value that cannot be resolved internally.""" + """Walk subschema positions in the document and raise + ProviderInvalidRequest for any $ref value that cannot be resolved + internally. Skips data positions (default, const, enum, + annotations, unknown / extension keywords) where a "$ref" key is + just a value and the runtime would not try to resolve it. + """ def walk(node: Any) -> None: - if isinstance(node, dict): - node_dict = cast("dict[str, Any]", node) - ref = node_dict.get("$ref") - if isinstance(ref, str) and _resolve_ref(ref, schema) is None: - raise ProviderInvalidRequest( - f"response_schema: unresolvable $ref {ref!r}; only internal " - "refs (#/... or #) are supported by the provider's validator" - ) - for value in node_dict.values(): + if not isinstance(node, dict): + return + node_dict = cast("dict[str, Any]", node) + ref = node_dict.get("$ref") + if isinstance(ref, str) and _resolve_ref(ref, schema) is _UNRESOLVABLE: + raise ProviderInvalidRequest( + f"response_schema: unresolvable $ref {ref!r}; only internal " + "refs (#/... or #) are supported by the provider's validator" + ) + for key, value in node_dict.items(): + if key in _SINGLE_SUBSCHEMA_KEYWORDS: walk(value) - elif isinstance(node, list): - for item in cast("list[Any]", node): - walk(item) + elif key in _MAP_OF_SUBSCHEMA_KEYWORDS: + if isinstance(value, dict): + for subschema in cast("dict[str, Any]", value).values(): + walk(subschema) + elif key in _LIST_OF_SUBSCHEMA_KEYWORDS: + if isinstance(value, list): + for subschema in cast("list[Any]", value): + walk(subschema) walk(schema) @@ -299,8 +356,11 @@ def _strict_mode_check( return True visited.add(ref) target = _resolve_ref(ref, root) - if target is None: + if target is _UNRESOLVABLE: return False + # _strict_mode_check on a non-dict (e.g. boolean subschema) + # returns False via the `if not isinstance(schema, dict)` line + # at the top — the conservative answer for strict-mode compat. return _strict_mode_check(target, root=root, visited=visited) # Combinator branches — every branch must independently satisfy @@ -381,29 +441,43 @@ def _strict_mode_check( return False +# Sentinel returned by _resolve_ref when the ref isn't internally +# resolvable (path doesn't exist in the document, or the ref is +# external / relative). Distinguishing "unresolvable" from "resolved +# to a non-dict value" matters because boolean subschemas (true / +# false) are valid JSON Schema and we want to surface them to the +# caller rather than reject them at the boundary. +_UNRESOLVABLE: Any = object() + + # Internal-only $ref resolver. Handles JSON Pointer fragments rooted # at the document (`#/$defs/Foo`, `#/definitions/Foo`); external refs -# (anything not starting with `#/`) are unresolvable here and return -# None. JSON Pointer escape rules (`~0` for `~`, `~1` for `/`) are -# unescaped per RFC 6901. -def _resolve_ref(ref: str, root: dict[str, Any]) -> dict[str, Any] | None: +# (anything not starting with `#/`) return the unresolvable sentinel. +# JSON Pointer escape rules (`~0` for `~`, `~1` for `/`) are unescaped +# per RFC 6901. +def _resolve_ref(ref: str, root: dict[str, Any]) -> Any: # Bare "#" is the JSON Pointer for the document root; "#/" prefixes # an internal path. Anything else (external URIs, relative refs we - # can't resolve without a base) we treat as unresolvable. + # can't resolve without a base) is treated as unresolvable. if ref == "#": return root if not ref.startswith("#/"): - return None + return _UNRESOLVABLE parts = ref[2:].split("/") current: Any = root for part in parts: - decoded = part.replace("~1", "/").replace("~0", "~") + # Per RFC 6901 §6: a JSON Pointer used as a URI fragment is + # percent-encoded; percent-decoding happens BEFORE the + # `~1` / `~0` JSON-Pointer unescape pair. + decoded = unquote(part).replace("~1", "/").replace("~0", "~") if not isinstance(current, dict) or decoded not in cast("dict[str, Any]", current): - return None + return _UNRESOLVABLE current = cast("dict[str, Any]", current)[decoded] - if isinstance(current, dict): - return cast("dict[str, Any]", current) - return None + # Return whatever's at the resolved path — dict, bool, or otherwise. + # Callers decide what to do with non-dict subschemas: strict-mode + # validation conservatively rejects them; ref-resolvability + # validation accepts them. + return current __all__ = [ diff --git a/src/openarmature/llm/providers/openai.py b/src/openarmature/llm/providers/openai.py index 5a8bab7..682dfa9 100644 --- a/src/openarmature/llm/providers/openai.py +++ b/src/openarmature/llm/providers/openai.py @@ -537,6 +537,29 @@ def _parse_and_validate( # Pydantic-class path: validate and return the BaseModel instance. if schema_class is not None: + # Validate against the generated JSON Schema FIRST so the + # class path enforces the same strict per-type checks as the + # dict path. Pydantic's default model_validate is coercive + # (it accepts "30" for an int field), which would silently + # accept responses that fail the wire schema. Running + # jsonschema first matches the dict-schema path's strictness; + # model_validate then constructs the typed instance. + try: + jsonschema.validate(instance=parsed_dict, schema=schema_dict) + except jsonschema.ValidationError as exc: + raise StructuredOutputInvalid( + "response failed JSON Schema validation", + response_schema=schema_dict, + raw_content=content, + failure_description=_format_jsonschema_failure(exc), + ) from exc + except jsonschema.SchemaError as exc: + raise StructuredOutputInvalid( + "response could not be validated against the supplied schema", + response_schema=schema_dict, + raw_content=content, + failure_description=str(exc), + ) from exc try: return schema_class.model_validate(parsed_dict) except ValidationError as exc: @@ -555,7 +578,7 @@ def _parse_and_validate( "response failed JSON Schema validation", response_schema=schema_dict, raw_content=content, - failure_description=exc.message, + failure_description=_format_jsonschema_failure(exc), ) from exc except jsonschema.SchemaError as exc: # Safety net: validate_response_schema's pre-validation should @@ -571,6 +594,16 @@ def _parse_and_validate( return parsed_dict +def _format_jsonschema_failure(exc: jsonschema.ValidationError) -> str: + """jsonschema.ValidationError.message describes the value mismatch + (e.g., "'30' is not of type 'integer'") but doesn't include the + failing field path. Prefix with ``json_path`` (e.g., ``$.age``) so + the failure_description string carries both, matching the dict- + schema and class-schema paths. + """ + return f"{exc.json_path}: {exc.message}" + + _SCHEMA_DIRECTIVE_TEMPLATE = ( "You MUST return only valid JSON that conforms to the following JSON Schema. " "Do not include prose, markdown fences, or any text outside the JSON object.\n\n" diff --git a/tests/unit/test_structured_output.py b/tests/unit/test_structured_output.py index 7e7889e..aaf8045 100644 --- a/tests/unit/test_structured_output.py +++ b/tests/unit/test_structured_output.py @@ -57,6 +57,20 @@ def test_validate_response_schema_rejects_missing_type() -> None: validate_response_schema({"properties": {"x": {"type": "integer"}}}) +def test_validate_response_schema_accepts_ref_to_boolean_subschema() -> None: + # Boolean true/false are valid JSON Schema subschemas. A $ref + # whose target is a boolean must resolve cleanly (not raise + # ProviderInvalidRequest as if it were unresolvable). + schema: dict[str, Any] = { + "type": "object", + "$defs": {"Any": True}, + "properties": {"x": {"$ref": "#/$defs/Any"}}, + "required": ["x"], + "additionalProperties": False, + } + validate_response_schema(schema) # no raise + + def test_validate_response_schema_rejects_external_ref() -> None: # External or otherwise unresolvable $refs would surface at # validate() time as raw referencing-library exceptions; the @@ -73,6 +87,53 @@ def test_validate_response_schema_rejects_external_ref() -> None: ) +def test_validate_response_schema_ignores_ref_under_data_keywords() -> None: + # JSON Schema permits arbitrary data under keywords like + # ``default``, ``const``, ``enum``, ``$comment``, and unknown / + # extension keywords (``x-*``). A ``"$ref"`` key in those positions + # is data, not a schema reference, and must not be resolved. + schema: dict[str, Any] = { + "type": "object", + "properties": { + "x": { + "type": "string", + "default": {"$ref": "this-is-data-not-a-ref"}, + } + }, + "required": ["x"], + "additionalProperties": False, + } + validate_response_schema(schema) # no raise + + +def test_validate_response_schema_accepts_percent_encoded_ref() -> None: + # JSON Pointer fragments are URI-encoded; spaces in a $defs key + # appear as %20 on the wire. _resolve_ref must percent-decode + # before applying JSON Pointer's ~0/~1 unescape rules. + schema: dict[str, Any] = { + "type": "object", + "$defs": {"Name With Spaces": {"type": "string"}}, + "properties": {"x": {"$ref": "#/$defs/Name%20With%20Spaces"}}, + "required": ["x"], + "additionalProperties": False, + } + validate_response_schema(schema) # no raise + + +def test_validate_response_schema_accepts_draft07_schema() -> None: + # A schema declaring draft-07 (still common in tooling) must pass + # the boundary check via the draft-07 metaschema rather than be + # rejected by a hard-coded 2020-12 metaschema. + schema: dict[str, Any] = { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": {"x": {"type": "string"}}, + "required": ["x"], + "additionalProperties": False, + } + validate_response_schema(schema) # no raise + + def test_validate_response_schema_rejects_malformed_schema() -> None: # `"type": "foobar"` is not a valid JSON Schema type keyword; the # boundary check should catch this and raise ProviderInvalidRequest @@ -450,6 +511,31 @@ async def test_pydantic_class_returns_validated_instance() -> None: assert response.parsed.age == 30 +async def test_pydantic_class_path_rejects_coercible_string_for_int() -> None: + # The dict-schema path rejects {"age": "30"} against an integer + # field via strict jsonschema validation. The class path was + # previously accepting the same input via Pydantic's default + # coercive model_validate ("30" → 30). Both paths now run + # jsonschema first, so both reject the coercion case. + transport = _mock_chat_completion_response('{"name":"Alice","age":"30"}') + provider = OpenAIProvider( + base_url="http://mock-llm.test", + model="test-model", + api_key="test-key", + transport=transport, + ) + try: + with pytest.raises(StructuredOutputInvalid) as excinfo: + await provider.complete( + [UserMessage(content="generate a person")], + response_schema=PersonModel, + ) + finally: + await provider.aclose() + assert "age" in excinfo.value.failure_description + assert "integer" in excinfo.value.failure_description + + async def test_pydantic_validation_failure_wraps_in_structured_output_invalid() -> None: # "thirty" is not a valid int for the age field. transport = _mock_chat_completion_response('{"name":"Alice","age":"thirty"}')