From e7ab8860063da3b1fdfc2d6690a86ecabc43391b Mon Sep 17 00:00:00 2001 From: chris-colinsky Date: Fri, 29 May 2026 12:48:56 -0700 Subject: [PATCH 1/2] Bump spec pin to v0.31.0 + 0039/0041 fixtures Bumps the spec submodule from v0.27.1 to v0.31.0 (proposals 0037 Anthropic, 0039 caller invocation_id, 0040 open-span, 0041 reserved keys) and wires conformance to python's current state for v0.11.0. conformance.toml: spec_pin to v0.31.0 + entries for 0037 / 0039 / 0040 / 0041. 0039 and 0041 are implemented since 0.11.0; 0037 and 0040 stay not-yet (Anthropic provider + #22 open-span are out of scope for this PR). __spec_version__ in src and pyproject likewise bumps; AGENTS.md regenerated. Conformance harness updates: defer Anthropic fixtures (llm-provider/ 033-043) in both the cross-capability parser and the llm-provider runner; defer observability/034 (waits on #22); defer observability/ 035-036 from the cross-capability parser (the langfuse_trace shape isn't modeled; the derivation is pinned by unit tests against the same vector). The _run_fixture_028 runner recognises both the 0034 prefix-rejection and 0041 exact-name rejection patterns; the mid-invocation augment_metadata case waits on #22's harness primitive. Full suite green: 962 passed, 170 skipped. --- conformance.toml | 18 +++++++- openarmature-spec | 2 +- pyproject.toml | 2 +- src/openarmature/AGENTS.md | 9 ++-- src/openarmature/__init__.py | 2 +- tests/conformance/test_fixture_parsing.py | 55 +++++++++++++++++++++++ tests/conformance/test_llm_provider.py | 18 +++++++- tests/conformance/test_observability.py | 16 ++++++- tests/test_smoke.py | 2 +- 9 files changed, 113 insertions(+), 11 deletions(-) diff --git a/conformance.toml b/conformance.toml index d8b0f13..23fcf39 100644 --- a/conformance.toml +++ b/conformance.toml @@ -29,7 +29,7 @@ [manifest] implementation = "openarmature-python" -spec_pin = "v0.27.1" +spec_pin = "v0.31.0" # Status values: # implemented — shipped behavior matches the proposal's contract @@ -175,3 +175,19 @@ since = "0.10.0" [proposals."0036"] status = "implemented" since = "0.10.0" + +# Spec v0.28.0-v0.31.0 (proposals 0037, 0039, 0040, 0041). 0038 +# (Gemini) is mid-accept on spec side and not in v0.31.0 yet. +[proposals."0037"] +status = "not-yet" + +[proposals."0039"] +status = "implemented" +since = "0.11.0" + +[proposals."0040"] +status = "not-yet" + +[proposals."0041"] +status = "implemented" +since = "0.11.0" diff --git a/openarmature-spec b/openarmature-spec index e61fb08..064a39a 160000 --- a/openarmature-spec +++ b/openarmature-spec @@ -1 +1 @@ -Subproject commit e61fb0846ca83c6eaa2391d457c89adc9d588670 +Subproject commit 064a39ae77394fb965541823a1ed40b50b806b00 diff --git a/pyproject.toml b/pyproject.toml index a84ac1a..925529f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,7 +58,7 @@ Specification = "https://github.com/LunarCommand/openarmature-spec" openarmature = "openarmature.cli:main" [tool.openarmature] -spec_version = "0.27.1" +spec_version = "0.31.0" [dependency-groups] dev = [ diff --git a/src/openarmature/AGENTS.md b/src/openarmature/AGENTS.md index 67e614b..efd410a 100644 --- a/src/openarmature/AGENTS.md +++ b/src/openarmature/AGENTS.md @@ -1,6 +1,6 @@ # OpenArmature — Agent documentation -*This is the agent guide bundled with the openarmature Python package, version 0.10.0 (spec v0.27.1). For the full docs site see [openarmature.ai](https://openarmature.ai). For the canonical spec text see [openarmature.org/capabilities](https://openarmature.org/capabilities/). For project-specific conventions for the code you're editing, see the host project's `AGENTS.md` or `CLAUDE.md`.* +*This is the agent guide bundled with the openarmature Python package, version 0.10.0 (spec v0.31.0). For the full docs site see [openarmature.ai](https://openarmature.ai). For the canonical spec text see [openarmature.org/capabilities](https://openarmature.org/capabilities/). For project-specific conventions for the code you're editing, see the host project's `AGENTS.md` or `CLAUDE.md`.* ## TL;DR @@ -10,7 +10,7 @@ OpenArmature is a workflow framework for LLM pipelines and tool-calling agents ## Capability contracts -_Sourced from openarmature-spec v0.27.1. Each entry below reproduces §1 (Purpose) and §2 (Concepts) of the capability's `spec.md`. For the full spec text (execution model, error semantics, determinism, observer hooks, etc.) see the linked docs site._ +_Sourced from openarmature-spec v0.31.0. Each entry below reproduces §1 (Purpose) and §2 (Concepts) of the capability's `spec.md`. For the full spec text (execution model, error semantics, determinism, observer hooks, etc.) see the linked docs site._ ### Capability: `graph-engine` @@ -327,8 +327,9 @@ parent invocation's trace as nested spans. Implementations MUST also support an its own trace and the parent's dispatch span carries an OTel `Link` to that new trace. **Correlation ID.** A per-invocation identifier that flows across observability backends. -Distinct from `invocation_id` — the framework-generated `invocation_id` correlates spans within -a single backend, while `correlation_id` is application-supplied (or auto-generated when absent) +Distinct from `invocation_id` — the `invocation_id` (caller-supplied or framework-generated, per +§5.1) correlates spans within a single backend, while `correlation_id` is application-supplied +(or auto-generated when absent) and is intended to be visible in every backend the implementation emits to. A user running an LLM workflow with both an OTel backend (system traces, logs) and a Langfuse backend (LLM-specific traces) uses the `correlation_id` as a join key between them: find a slow request diff --git a/src/openarmature/__init__.py b/src/openarmature/__init__.py index e667fd8..62e2f48 100644 --- a/src/openarmature/__init__.py +++ b/src/openarmature/__init__.py @@ -25,4 +25,4 @@ """ __version__ = "0.10.0" -__spec_version__ = "0.27.1" +__spec_version__ = "0.31.0" diff --git a/tests/conformance/test_fixture_parsing.py b/tests/conformance/test_fixture_parsing.py index 424cc8b..9740f97 100644 --- a/tests/conformance/test_fixture_parsing.py +++ b/tests/conformance/test_fixture_parsing.py @@ -115,6 +115,61 @@ def _id(case: tuple[str, Path]) -> str: "prompt-management/016-prompt-observability-entities-propagation": ( "Cases shape models live in the PM-specific capability harness" ), + # Proposal 0037 (Anthropic Messages mapping) shipped in spec v0.28.0 + # but python marks it not-yet in conformance.toml — the Anthropic + # provider isn't implemented in this release. Defer the + # cross-capability parse tests for the 033-042 fixtures until that + # lands; the openai-strips-thinking-blocks side (043) is in + # test_llm_provider.py's own deferral. + "llm-provider/033-anthropic-basic-message-round-trip": ( + "Anthropic provider not implemented (0037 not-yet in conformance.toml)" + ), + "llm-provider/034-anthropic-tool-call-flow": ( + "Anthropic provider not implemented (0037 not-yet in conformance.toml)" + ), + "llm-provider/035-anthropic-image-content-blocks": ( + "Anthropic provider not implemented (0037 not-yet in conformance.toml)" + ), + "llm-provider/036-anthropic-tool-choice-modes": ( + "Anthropic provider not implemented (0037 not-yet in conformance.toml)" + ), + "llm-provider/037-anthropic-runtime-config-mapping": ( + "Anthropic provider not implemented (0037 not-yet in conformance.toml)" + ), + "llm-provider/038-anthropic-max-tokens-required": ( + "Anthropic provider not implemented (0037 not-yet in conformance.toml)" + ), + "llm-provider/039-anthropic-error-mapping": ( + "Anthropic provider not implemented (0037 not-yet in conformance.toml)" + ), + "llm-provider/040-anthropic-structured-output-native": ( + "Anthropic provider not implemented (0037 not-yet in conformance.toml)" + ), + "llm-provider/041-anthropic-structured-output-fallback": ( + "Anthropic provider not implemented (0037 not-yet in conformance.toml)" + ), + "llm-provider/042-anthropic-thinking-block-round-trip": ( + "Anthropic provider not implemented (0037 not-yet in conformance.toml)" + ), + # Proposal 0040 (open-span metadata update) — task #22 implements + # the §6 augmentation-event mechanism + un-defers 029/030 + 034. + "observability/034-caller-metadata-open-span-update-serial": ( + "Open-span augmentation-event mechanism lands with #22 (0040 not-yet)" + ), + # Proposal 0039 (caller-supplied invocation_id) Langfuse trace.id + # derivation fixtures use the langfuse_trace expected shape the + # cross-capability parser doesn't model. The derivation itself is + # pinned by unit tests in test_observability_langfuse_adapter.py + # against the same spec vector fixture 036 uses + # (sha256("run_abc123")[:16].hex == 29b50a6c08dabfeaeb1696301f4fabe1); + # wiring into the langfuse-specific conformance harness is a + # follow-up. + "observability/035-caller-invocation-id-uuid": ( + "Cross-capability parser doesn't model langfuse_trace; derivation pinned by unit tests" + ), + "observability/036-caller-invocation-id-non-uuid": ( + "Cross-capability parser doesn't model langfuse_trace; derivation pinned by unit tests" + ), } diff --git a/tests/conformance/test_llm_provider.py b/tests/conformance/test_llm_provider.py index 7b723a8..46feab1 100644 --- a/tests/conformance/test_llm_provider.py +++ b/tests/conformance/test_llm_provider.py @@ -65,7 +65,23 @@ # Skip-marked here so a green test run at this commit means "everything we # claim to implement passes." Each subsequent PR drops its own rows as it # lands the underlying support. -_DEFERRED_FIXTURES: dict[str, str] = {} +_DEFERRED_FIXTURES: dict[str, str] = { + # Proposal 0037 (Anthropic Messages mapping) shipped in spec v0.28.0 + # but python marks it not-yet in conformance.toml — the Anthropic + # provider isn't implemented in this release. 043 (the OpenAI side + # stripping anthropic thinking-block content) waits with it. + "033-anthropic-basic-message-round-trip": "Anthropic provider not implemented (0037 not-yet)", + "034-anthropic-tool-call-flow": "Anthropic provider not implemented (0037 not-yet)", + "035-anthropic-image-content-blocks": "Anthropic provider not implemented (0037 not-yet)", + "036-anthropic-tool-choice-modes": "Anthropic provider not implemented (0037 not-yet)", + "037-anthropic-runtime-config-mapping": "Anthropic provider not implemented (0037 not-yet)", + "038-anthropic-max-tokens-required": "Anthropic provider not implemented (0037 not-yet)", + "039-anthropic-error-mapping": "Anthropic provider not implemented (0037 not-yet)", + "040-anthropic-structured-output-native": "Anthropic provider not implemented (0037 not-yet)", + "041-anthropic-structured-output-fallback": "Anthropic provider not implemented (0037 not-yet)", + "042-anthropic-thinking-block-round-trip": "Anthropic provider not implemented (0037 not-yet)", + "043-openai-strips-thinking-blocks": "Anthropic provider not implemented (0037 not-yet)", +} def _fixture_paths() -> list[Path]: diff --git a/tests/conformance/test_observability.py b/tests/conformance/test_observability.py index 7ce6167..88f5004 100644 --- a/tests/conformance/test_observability.py +++ b/tests/conformance/test_observability.py @@ -884,6 +884,16 @@ async def _run_fixture_028(spec: Mapping[str, Any]) -> None: cases = cast("list[dict[str, Any]]", spec["cases"]) for case in cases: case_name = cast("str", case["name"]) + # Cases using the `augment_metadata` directive exercise §3.4 + # mid-invocation rejection at set_invocation_metadata. The + # augment_metadata harness primitive (per fixture 034) lands + # with proposal 0040 / task #22; skip until then. + nodes_check = cast("dict[str, Any]", case.get("nodes", {})) + if any( + isinstance(n, dict) and "augment_metadata" in cast("dict[str, Any]", n) + for n in nodes_check.values() + ): + continue try: # Build a minimal graph from the case's nodes/edges. The # fixture's node is a noop update — we never expect it to @@ -921,7 +931,11 @@ async def _body(_s: Any) -> dict[str, Any]: caller_metadata = cast("dict[str, Any]", case["caller_metadata"]) try: - with pytest.raises(ValueError, match="reserved namespace prefix"): + # Covers both rejection paths: the prefix-namespace + # rejection (openarmature.* / gen_ai.*, from 0034) and + # the exact-key-name rejection (0041's §8.4 reserved + # set). Both error messages contain "reserved". + with pytest.raises(ValueError, match="reserved"): await graph.invoke(state_cls(), metadata=caller_metadata) finally: otel_observer.shutdown() diff --git a/tests/test_smoke.py b/tests/test_smoke.py index 66533ec..1c41fde 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -9,7 +9,7 @@ def test_package_versions() -> None: assert openarmature.__version__ == "0.10.0" - assert openarmature.__spec_version__ == "0.27.1" + assert openarmature.__spec_version__ == "0.31.0" def test_spec_version_matches_pyproject() -> None: From c82d1f158be3f3f9c3c465932abe9e8d936521ec Mon Sep 17 00:00:00 2001 From: chris-colinsky Date: Fri, 29 May 2026 13:00:36 -0700 Subject: [PATCH 2/2] Align conformance.toml note + visible 028 skip From PR #95 review: - Rewrite the conformance.toml convention block to acknowledge that feature PRs which bump the spec submodule pin update this file too, setting `since` to the upcoming release version (matches the v0.10.0 cycle's pattern in PR #85 / #88, and how this PR works). - Emit warnings.warn for 028's deferred augment_metadata case so pytest's end-of-run summary surfaces the deferred coverage by name rather than silently passing. --- conformance.toml | 11 +++++++---- tests/conformance/test_observability.py | 11 ++++++++++- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/conformance.toml b/conformance.toml index 23fcf39..850cce0 100644 --- a/conformance.toml +++ b/conformance.toml @@ -22,10 +22,13 @@ # between pinned-spec and spec-head is the consumer's job (e.g., the # spec docs site computes the difference and renders accordingly). # -# Convention: this file is only updated as part of release PRs. Between -# releases, the manifest reflects the most-recently-published version -# so external readers never see a `since` referring to an unreleased -# pre-tag commit. +# Convention: this file is updated as part of release PRs AND as part +# of feature PRs that bump the spec submodule pin (the manifest guard +# requires entries for every Accepted proposal in the pinned spec, so +# a submodule bump forces this file to update too). Such bump PRs set +# `since` to the upcoming release version; between the bump PR and the +# matching tag, external readers will see a `since` referring to the +# upcoming, unreleased version. [manifest] implementation = "openarmature-python" diff --git a/tests/conformance/test_observability.py b/tests/conformance/test_observability.py index 88f5004..21588dc 100644 --- a/tests/conformance/test_observability.py +++ b/tests/conformance/test_observability.py @@ -887,12 +887,21 @@ async def _run_fixture_028(spec: Mapping[str, Any]) -> None: # Cases using the `augment_metadata` directive exercise §3.4 # mid-invocation rejection at set_invocation_metadata. The # augment_metadata harness primitive (per fixture 034) lands - # with proposal 0040 / task #22; skip until then. + # with proposal 0040 / task #22; surface the deferral via + # warnings.warn so pytest's end-of-run summary lists it (rather + # than silently passing) and continue to the other cases. nodes_check = cast("dict[str, Any]", case.get("nodes", {})) if any( isinstance(n, dict) and "augment_metadata" in cast("dict[str, Any]", n) for n in nodes_check.values() ): + import warnings # noqa: PLC0415 + + warnings.warn( + f"028 case {case_name!r} deferred: augment_metadata harness primitive " + f"lands with proposal 0040 / #22", + stacklevel=2, + ) continue try: # Build a minimal graph from the case's nodes/edges. The