From e3095a75602f57213a7f0bff60b36f336bd68a91 Mon Sep 17 00:00:00 2001
From: donalddellapietra <Donald.della.pietra@gmail.com>
Date: Sun, 7 Jun 2026 20:45:51 -0400
Subject: [PATCH 1/7] fix(integrations): fail closed on redirect_to_safe in
 non-substituting adapters
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A redirect_to_safe violation returns action="redirected" with
allowed=True (intentional: LangGraph substitutes the safe tool and the
agent flow continues). But every adapter except LangGraph gated tool
execution on `if check.blocked`, which is False on a redirect — so the
guard rolled the unsafe call out of the trace and then the adapter ran
the original unsafe tool anyway. A safety control silently degraded to a
no-op on agents/crewai/vercel_ai/claude_agent/google_adk/mcp.

Add CheckResult.stop_original (blocked OR redirected) and gate the
"run the original tool" decision on it in every adapter that cannot
transparently substitute. Those adapters now fail *closed* (refuse the
unsafe call) instead of fail-open. LangGraph is unchanged: it branches
on .redirected first and performs the substitution.

allowed/blocked/redirected semantics are unchanged, so the existing
CheckResult-level tests still hold. Adds a regression test asserting
stop_original is True on redirect and False on a clean pass.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 sponsio/integrations/agents.py       | 12 ++++++++++--
 sponsio/integrations/base.py         | 21 +++++++++++++++++++++
 sponsio/integrations/claude_agent.py |  6 +++++-
 sponsio/integrations/crewai.py       |  8 ++++++--
 sponsio/integrations/google_adk.py   |  3 ++-
 sponsio/integrations/mcp.py          | 12 ++++++++----
 sponsio/integrations/vercel_ai.py    |  4 +++-
 tests/test_redirect_to_safe.py       | 28 ++++++++++++++++++++++++++++
 8 files changed, 83 insertions(+), 11 deletions(-)

diff --git a/sponsio/integrations/agents.py b/sponsio/integrations/agents.py
index 8d3ffc9..02fe563 100644
--- a/sponsio/integrations/agents.py
+++ b/sponsio/integrations/agents.py
@@ -130,7 +130,11 @@ def wrap_tool(self, tool: Any) -> Any:
             async def guarded_async(*args: Any, **kwargs: Any) -> Any:
                 check = guard.guard_before(tool_name, kwargs)
                 guard.last_check = check
-                if check.blocked:
+                # ``stop_original`` folds in ``redirected``: this adapter
+                # does not implement transparent tool substitution, so a
+                # ``redirect_to_safe`` redirect fails closed (refuse)
+                # rather than running the unsafe call.
+                if check.stop_original:
                     msg = select_agent_message(
                         check.det_violations, fallback="Contract violation"
                     )
@@ -151,7 +155,11 @@ async def guarded_async(*args: Any, **kwargs: Any) -> Any:
             def guarded_sync(*args: Any, **kwargs: Any) -> Any:
                 check = guard.guard_before(tool_name, kwargs)
                 guard.last_check = check
-                if check.blocked:
+                # ``stop_original`` folds in ``redirected``: this adapter
+                # does not implement transparent tool substitution, so a
+                # ``redirect_to_safe`` redirect fails closed (refuse)
+                # rather than running the unsafe call.
+                if check.stop_original:
                     msg = select_agent_message(
                         check.det_violations, fallback="Contract violation"
                     )
diff --git a/sponsio/integrations/base.py b/sponsio/integrations/base.py
index 258e2da..1268043 100644
--- a/sponsio/integrations/base.py
+++ b/sponsio/integrations/base.py
@@ -313,6 +313,27 @@ def redirected(self) -> bool:
         """True if any det violation returned a redirect outcome."""
         return any(r.action == "redirected" for r in self.det_violations)
 
+    @property
+    def stop_original(self) -> bool:
+        """True when the adapter must NOT execute the original tool call.
+
+        Folds hard blocks together with redirects. A ``redirect_to_safe``
+        violation rolls the original ``unsafe`` call out of the trace and
+        sets ``redirected_to``; an adapter that runs the original call
+        anyway would execute the exact action the contract forbade — a
+        fail-*open* hole, the worst outcome for an enforcement layer.
+
+        Adapters that implement transparent substitution (LangGraph)
+        MUST branch on ``redirected`` / ``redirected_to`` *first* and
+        invoke the safe tool. Adapters that don't (yet) support
+        substitution MUST gate execution on ``stop_original`` so a
+        redirect fails *closed* (the unsafe call is refused) instead of
+        falling through to ``if check.blocked``, which is False on a
+        redirect. ``escalated`` is intentionally excluded — see
+        ``guard_before`` for why escalation does not gate execution.
+        """
+        return self.blocked or self.redirected
+
     @property
     def needs_retry(self) -> bool:
         """True if any sto violation returned a retry with feedback."""
diff --git a/sponsio/integrations/claude_agent.py b/sponsio/integrations/claude_agent.py
index 0a64ec4..a2dd042 100644
--- a/sponsio/integrations/claude_agent.py
+++ b/sponsio/integrations/claude_agent.py
@@ -112,7 +112,11 @@ async def pre_tool_hook(
             check = guard.guard_before(tool_name, tool_input)
             guard.last_check = check
 
-            if check.blocked:
+            # ``stop_original`` folds in ``redirected``: this hook denies
+            # via the SDK permission system and has no substitution path,
+            # so a redirect fails closed (denied) rather than running the
+            # unsafe call.
+            if check.stop_original:
                 # Prefer the structured ``agent_msg`` from OutcomeBuilder
                 # — it's already phrased to steer the model toward
                 # abandoning this action. Falls back to the legacy
diff --git a/sponsio/integrations/crewai.py b/sponsio/integrations/crewai.py
index db5c458..31efef3 100644
--- a/sponsio/integrations/crewai.py
+++ b/sponsio/integrations/crewai.py
@@ -102,7 +102,10 @@ def on_tool_start(self, context: Any) -> Any:
         )
         self.last_check = check
 
-        if check.blocked:
+        # ``stop_original`` folds in ``redirected``: CrewAI's adapter has
+        # no transparent-substitution path, so a redirect fails closed
+        # (returns the rejection) rather than executing the unsafe tool.
+        if check.stop_original:
             msg = select_agent_message(
                 check.det_violations, fallback="Contract violation detected"
             )
@@ -186,7 +189,8 @@ def make_guarded(orig: Any, name: str):
                 def guarded(*args: Any, **kwargs: Any) -> Any:
                     call_args = kwargs if kwargs else {"args": list(args)}
                     check = guard.guard_before(name, call_args)
-                    if check.blocked:
+                    # Fail closed on redirect too (no substitution path here).
+                    if check.stop_original:
                         msg = select_agent_message(
                             check.det_violations, fallback="contract violated"
                         )
diff --git a/sponsio/integrations/google_adk.py b/sponsio/integrations/google_adk.py
index 300ada1..6f8a103 100644
--- a/sponsio/integrations/google_adk.py
+++ b/sponsio/integrations/google_adk.py
@@ -93,7 +93,8 @@ def wrap_tool(self, tool: Callable[..., Any]) -> Callable[..., Any]:
             async def guarded_async(*args: Any, **kwargs: Any) -> Any:
                 check = guard.guard_before(tool_name, _call_args(tool, args, kwargs))
                 guard.last_check = check
-                if check.blocked:
+                # Fail closed on redirect too (no substitution path here).
+                if check.stop_original:
                     return _blocked_result(check)
 
                 result = await tool(*args, **kwargs)
diff --git a/sponsio/integrations/mcp.py b/sponsio/integrations/mcp.py
index fede799..2b1798b 100644
--- a/sponsio/integrations/mcp.py
+++ b/sponsio/integrations/mcp.py
@@ -142,13 +142,17 @@ async def call_tool(self, tool_name: str, arguments: dict | None = None) -> dict
         # that proxy this to an LLM (Claude Desktop, custom orchestrators)
         # can show the agent-tuned phrasing while keeping the legacy
         # ``violations`` array of log-formatted strings for back-compat.
-        blocked = [r for r in results if r.action == "blocked"]
-        if blocked:
+        # Treat ``redirected`` the same as ``blocked`` here: this proxy
+        # has no transparent-substitution path, so a ``redirect_to_safe``
+        # redirect must refuse the unsafe call rather than fall through
+        # and execute it (a fail-open hole).
+        stopped = [r for r in results if r.action in ("blocked", "redirected")]
+        if stopped:
             return {
                 "error": "Blocked by behavioral contract",
-                "violations": [r.message for r in blocked],
+                "violations": [r.message for r in stopped],
                 "agent_messages": [
-                    r.agent_msg for r in blocked if getattr(r, "agent_msg", "")
+                    r.agent_msg for r in stopped if getattr(r, "agent_msg", "")
                 ],
             }
 
diff --git a/sponsio/integrations/vercel_ai.py b/sponsio/integrations/vercel_ai.py
index 3ec36e7..299cde8 100644
--- a/sponsio/integrations/vercel_ai.py
+++ b/sponsio/integrations/vercel_ai.py
@@ -120,7 +120,9 @@ async def wrap_tool(self, call: Any, next_fn: Any) -> Any:
                 check = guard.guard_before(tool_name, kwargs)
                 guard.last_check = check
 
-                if check.blocked:
+                # ``stop_original`` folds in ``redirected``: no transparent
+                # substitution path here, so a redirect fails closed.
+                if check.stop_original:
                     msg = select_agent_message(
                         check.det_violations, fallback="Contract violation"
                     )
diff --git a/tests/test_redirect_to_safe.py b/tests/test_redirect_to_safe.py
index 0d39243..c642e55 100644
--- a/tests/test_redirect_to_safe.py
+++ b/tests/test_redirect_to_safe.py
@@ -96,6 +96,34 @@ def test_unconditional_redirect_fires_on_first_call(self) -> None:
         # ``allowed`` stays True so adapters know the agent flow can
         # continue (with the substituted tool, not the original).
         assert result.allowed is True
+        # ...but ``stop_original`` is True so an adapter that cannot
+        # substitute fails *closed* (refuses the unsafe call) instead of
+        # reading ``allowed``/``blocked`` and running the original.
+        assert result.stop_original is True
+
+    def test_stop_original_fail_closed_contract(self) -> None:
+        """A redirect must never let the original ``unsafe`` call run.
+
+        ``blocked`` is False on a redirect (by design), so any adapter
+        that gated execution on ``if check.blocked`` alone would
+        fail open. ``stop_original`` is the safe gate: True for both
+        blocks and redirects, False on a clean pass.
+        """
+        guard = Sponsio(
+            agent_id="bot",
+            contracts=[
+                contract("redirect rm to trash").guarantees(
+                    redirect_to_safe("rm_rf", "trash")
+                )
+            ],
+            mode="enforce",
+            verbose=False,
+        )
+        redirected = guard.guard_before("rm_rf", {"path": "/tmp/x"})
+        assert redirected.stop_original is True
+        # A clean call does not stop.
+        clean = guard.guard_before("read_file", {"path": "/tmp/x"})
+        assert clean.stop_original is False
 
     def test_other_tools_pass_through(self) -> None:
         guard = Sponsio(

From 820e5e7e33b8ceaa606dc8ab11cc039e5de72a07 Mon Sep 17 00:00:00 2001
From: donalddellapietra <Donald.della.pietra@gmail.com>
Date: Sun, 7 Jun 2026 20:48:39 -0400
Subject: [PATCH 2/7] fix(ts/sdk): align Eq with Python value-equality for
 composite values
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Python evaluator compares Eq operands with `left == right` (value
equality); the TS evaluator used `l === r`, which compares arrays and
objects by reference. The v0.2 Term abstraction makes value-equality
reachable via `Eq(ArgValue(...), CtxValue(...))`, so a list- or
object-valued arg that is equal-by-value (`[1,2] == [1,2]` → True in
Python) compared False in TS on the same trace — a cross-language
divergence on the deterministic core.

Replace the `===` eq path with a `valuesEqual` deep structural
comparison (element-wise for arrays, key-wise for objects, identity for
primitives). le/lt/ge/gt are unchanged. Adds a parity regression test.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 ts/packages/sdk/src/__tests__/parity.test.ts | 52 ++++++++++++++++++++
 ts/packages/sdk/src/core/evaluator.ts        | 46 ++++++++++++++++-
 2 files changed, 96 insertions(+), 2 deletions(-)

diff --git a/ts/packages/sdk/src/__tests__/parity.test.ts b/ts/packages/sdk/src/__tests__/parity.test.ts
index 8ef732f..c2bd69f 100644
--- a/ts/packages/sdk/src/__tests__/parity.test.ts
+++ b/ts/packages/sdk/src/__tests__/parity.test.ts
@@ -16,6 +16,7 @@ import {
   Atom, G, Implies, X, F, And,
   type Valuation,
 } from "../index.js";
+import { Eq, ArgValue, CtxValue, Const, predKey } from "../core/formula.js";
 import {
   deadline,
   requiredStepsCompletion,
@@ -348,12 +349,63 @@ function testDegeneratePatternRejection() {
   }
 }
 
+// ─────────────────────────────────────────────────────────────────────────
+// Eq value-equality parity (composite values)
+// ─────────────────────────────────────────────────────────────────────────
+//
+// Python ``_safe_compare`` eq uses ``left == right``; the TS evaluator
+// used ``l === r``, which compares arrays/objects by reference. With the
+// Term abstraction, ``Eq(ArgValue(...), CtxValue(...))`` over list-valued
+// args could pass in Python (``[1,2] == [1,2]`` is True) and fail in TS.
+// ``valuesEqual`` now closes the gap with deep comparison.
+function testEqValueEquality() {
+  console.log("[Eq value-equality parity]");
+
+  const key = (p: string, ...a: string[]) => predKey(p, ...a);
+  const eqArgCtx = new Eq(
+    new ArgValue("book", "seats"),
+    new CtxValue("expected_seats"),
+  );
+
+  // Equal-by-value arrays: Python True, TS now True (was False).
+  assert(
+    evaluate(eqArgCtx, [
+      {
+        [key("arg_value", "book", "seats")]: [1, 2, 3],
+        [key("ctx_value", "expected_seats")]: [1, 2, 3],
+      },
+    ] as unknown as Valuation[]) === true,
+    "Eq over equal arrays is True (deep equality)",
+  );
+
+  // Different arrays: False.
+  assert(
+    evaluate(eqArgCtx, [
+      {
+        [key("arg_value", "book", "seats")]: [1, 2, 3],
+        [key("ctx_value", "expected_seats")]: [1, 2],
+      },
+    ] as unknown as Valuation[]) === false,
+    "Eq over unequal arrays is False",
+  );
+
+  // Scalar equality still works.
+  const eqScalar = new Eq(new ArgValue("pay", "amount"), new Const(50));
+  assert(
+    evaluate(eqScalar, [
+      { [key("arg_value", "pay", "amount")]: 50 },
+    ] as unknown as Valuation[]) === true,
+    "Eq over equal scalars is True",
+  );
+}
+
 console.log("=== TS↔Python Parity Regression Tests ===\n");
 testBoundedEventuallyDeadline();
 testRequiredStepsCompletion();
 testGuardBeforeRollback();
 testDeadlineNlParity();
 testDegeneratePatternRejection();
+testEqValueEquality();
 
 console.log(`\n${"=".repeat(40)}`);
 console.log(`Results: ${passed} passed, ${failed} failed`);
diff --git a/ts/packages/sdk/src/core/evaluator.ts b/ts/packages/sdk/src/core/evaluator.ts
index f00b492..fee756d 100644
--- a/ts/packages/sdk/src/core/evaluator.ts
+++ b/ts/packages/sdk/src/core/evaluator.ts
@@ -76,12 +76,54 @@ function resolveArith(expr: Term, state: Valuation): unknown {
   }
 }
 
+/**
+ * Structural value-equality, matching Python's `==` for the value
+ * shapes that flow through grounding (numbers, strings, booleans,
+ * arrays, plain objects).
+ *
+ * The naive `l === r` diverged from the Python evaluator (`left ==
+ * right`) for composite values: an `Eq(ArgValue(...), CtxValue(...))`
+ * over list- or object-valued args compares by *value* in Python
+ * (`[1] == [1]` is True) but `===` compares arrays/objects by
+ * *reference* in JS (`[1] === [1]` is False). With the v0.2 Term
+ * abstraction making value-equality reachable, that gap could pass a
+ * contract in Python and fail it in TS on the same trace. `valuesEqual`
+ * closes it with element-/key-wise deep comparison.
+ */
+function valuesEqual(a: unknown, b: unknown): boolean {
+  if (a === b) return true;
+  if (a === null || b === null || a === undefined || b === undefined) {
+    return false;
+  }
+  if (Array.isArray(a) || Array.isArray(b)) {
+    if (!Array.isArray(a) || !Array.isArray(b) || a.length !== b.length) {
+      return false;
+    }
+    return a.every((x, i) => valuesEqual(x, b[i]));
+  }
+  if (typeof a === "object" && typeof b === "object") {
+    const ka = Object.keys(a as object);
+    const kb = Object.keys(b as object);
+    if (ka.length !== kb.length) return false;
+    return ka.every(
+      (k) =>
+        Object.prototype.hasOwnProperty.call(b, k) &&
+        valuesEqual(
+          (a as Record<string, unknown>)[k],
+          (b as Record<string, unknown>)[k],
+        ),
+    );
+  }
+  return false;
+}
+
 /**
  * Compare two resolved values with the canonical "missing" semantics.
  *
  * If either operand is undefined / null, the comparison is False (the
  * comparison cannot decide). Same for type errors (mismatched types).
- * This is the Hoare-vacuity convention.
+ * This is the Hoare-vacuity convention. `eq` uses `valuesEqual` for
+ * Python `==` parity on composite values.
  */
 function safeCompare(op: string, left: unknown, right: unknown): boolean {
   if (left === undefined || left === null) return false;
@@ -96,7 +138,7 @@ function safeCompare(op: string, left: unknown, right: unknown): boolean {
       case "lt": return l < r;
       case "ge": return l >= r;
       case "gt": return l > r;
-      case "eq": return l === r;
+      case "eq": return valuesEqual(l, r);
     }
   } catch {
     return false;

From 46d438ddfa41a351191bd856c01b85b7d763bf4d Mon Sep 17 00:00:00 2001
From: donalddellapietra <Donald.della.pietra@gmail.com>
Date: Sun, 7 Jun 2026 20:49:46 -0400
Subject: [PATCH 3/7] fix(ts/sdk): guard createRequire for non-Node runtimes
 (Cloudflare Workers)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

@sponsio/sdk built modules call `createRequire(import.meta.url)` at the
top level of config-loader and pack-loader to lazily pull in the
optional `yaml` package. On Cloudflare Workers (and Deno Deploy)
`import.meta.url` is `undefined`, so `createRequire(undefined)` throws at
module-evaluation time and takes the entire Worker bundle down — even
when the app never loads YAML config or packs. This is the crash the
sponsio-demo repo worked around with a patch-package patch against the
shipped dist (`createRequire(import.meta.url ?? "file:///sponsio-noop.js")`).

Upstream the fix to source, but more elegantly than the dist patch:
build the require lazily on first use (memoized getRequireCjs) instead
of eagerly at import. A Worker bundle that never touches YAML now never
calls createRequire at all; the `?? "file:///sponsio-noop.js"` fallback
keeps it from throwing in the rare case it is reached on such a runtime.
This lets the demo drop its patch-package patch once it bumps the SDK.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 ts/packages/sdk/src/core/config-loader.ts | 18 ++++++++++++++++--
 ts/packages/sdk/src/core/pack-loader.ts   | 16 ++++++++++++++--
 2 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/ts/packages/sdk/src/core/config-loader.ts b/ts/packages/sdk/src/core/config-loader.ts
index 29aadaa..4048c43 100644
--- a/ts/packages/sdk/src/core/config-loader.ts
+++ b/ts/packages/sdk/src/core/config-loader.ts
@@ -43,7 +43,21 @@ import { dirname, resolve as resolvePath } from "node:path";
 // Use createRequire so we can lazily load the optional `yaml` package
 // without breaking non-yaml users (ESM dynamic import would force the
 // entire constructor path async, which we don't want).
-const requireCjs = createRequire(import.meta.url);
+//
+// Built lazily on first use rather than at module load. On runtimes
+// where the SDK is bundled but YAML config is never used (Cloudflare
+// Workers, Deno Deploy), `import.meta.url` is `undefined` and an eager
+// `createRequire(undefined)` throws at import time, taking the whole
+// bundle down. Deferring the call means a Worker that never loads YAML
+// never touches `createRequire`; the `?? noop` guard keeps it from
+// throwing in the rare case it is reached on such a runtime.
+let _requireCjs: ReturnType<typeof createRequire> | null = null;
+function getRequireCjs(): ReturnType<typeof createRequire> {
+  if (_requireCjs === null) {
+    _requireCjs = createRequire(import.meta.url ?? "file:///sponsio-noop.js");
+  }
+  return _requireCjs;
+}
 
 export interface LoadedConfig {
   /**
@@ -114,7 +128,7 @@ type YamlLib = {
 
 function loadYamlLib(): YamlLib {
   try {
-    return requireCjs("yaml") as YamlLib;
+    return getRequireCjs()("yaml") as YamlLib;
   } catch {
     throw new Error(
       "[sponsio] config loading requires the `yaml` package. " +
diff --git a/ts/packages/sdk/src/core/pack-loader.ts b/ts/packages/sdk/src/core/pack-loader.ts
index 58b1c88..b99f6d1 100644
--- a/ts/packages/sdk/src/core/pack-loader.ts
+++ b/ts/packages/sdk/src/core/pack-loader.ts
@@ -26,14 +26,26 @@ import { createRequire } from "node:module";
 import { fileURLToPath } from "node:url";
 import type { SkippedItem } from "./config-loader.js";
 
-const requireCjs = createRequire(import.meta.url);
+// Lazily built on first use, with a fallback URL. On runtimes where
+// `import.meta.url` is `undefined` (Cloudflare Workers, Deno Deploy),
+// an eager `createRequire(undefined)` throws at module load and breaks
+// the whole bundle even when YAML packs are never loaded. Deferring the
+// call keeps Worker bundles that never touch packs from hitting it; the
+// `?? noop` guard avoids the throw if it is reached. See config-loader.
+let _requireCjs: ReturnType<typeof createRequire> | null = null;
+function getRequireCjs(): ReturnType<typeof createRequire> {
+  if (_requireCjs === null) {
+    _requireCjs = createRequire(import.meta.url ?? "file:///sponsio-noop.js");
+  }
+  return _requireCjs;
+}
 
 interface YamlLib {
   parse: (src: string) => unknown;
 }
 
 function loadYamlLib(): YamlLib {
-  return requireCjs("yaml") as YamlLib;
+  return getRequireCjs()("yaml") as YamlLib;
 }
 
 /**

From 477b819d87a6af9a62cb0d8252b1c14bd43d263d Mon Sep 17 00:00:00 2001
From: donalddellapietra <Donald.della.pietra@gmail.com>
Date: Sun, 7 Jun 2026 20:51:09 -0400
Subject: [PATCH 4/7] docs(core): document filter_tools cost, workflow_step
 batch caveat, and footguns
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Doc/comment-only clarifications from the v0.2 review (no behavior change):

* filter_tools: note it is O(candidates × trace_length) per call —
  rollback_last_event resets the verifier so each probe re-grounds the
  whole trace — and to call it once per turn + bound the trace with
  rotate_session. (Snapshot/restore fast path tracked as follow-up.)
* workflow_step: document the end-of-trace weak-next vacuity — a trigger
  on the final event of a batch-verified trace incurs no violation
  (self-corrects in live enforce mode; matters only for verify/replay).
* evaluator._warned_missing_vars: note it is process-global, lock-free,
  one-shot, and does not re-fire across sessions/tests.
* Var: warn that ==/<=/< etc. build comparison AST nodes (not bools),
  so == does not value-compare two Var instances (hashing still works).
* grounding arg_value: note raw arg values are retained for the whole
  trace; bound with rotate_session for large payloads.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 sponsio/formulas/evaluator.py |  8 ++++++++
 sponsio/formulas/formula.py   |  9 +++++++++
 sponsio/integrations/base.py  | 11 +++++++++++
 sponsio/patterns/library.py   | 11 +++++++++++
 sponsio/tracer/grounding.py   |  7 +++++++
 5 files changed, 46 insertions(+)

diff --git a/sponsio/formulas/evaluator.py b/sponsio/formulas/evaluator.py
index 8dca0df..ad579c1 100644
--- a/sponsio/formulas/evaluator.py
+++ b/sponsio/formulas/evaluator.py
@@ -72,6 +72,14 @@
     }
 )
 
+# Process-global, append-only, lock-free. It backs "warn at most once
+# per missing Var key for the lifetime of the process" — deliberately
+# coarse: the warning is a developer aid, not session state, so a benign
+# data race on ``add`` (which never loses the warning, only its exact
+# ordering) is acceptable and not worth taking the monitor's RLock for.
+# Consequence: warnings do not re-fire across sessions or test cases in
+# the same interpreter. Tests that assert on the warning should reset
+# this set in a fixture.
 _warned_missing_vars: set[str] = set()
 
 
diff --git a/sponsio/formulas/formula.py b/sponsio/formulas/formula.py
index eed790e..b9c7c0f 100644
--- a/sponsio/formulas/formula.py
+++ b/sponsio/formulas/formula.py
@@ -298,6 +298,15 @@ class Var(FormulaMixin, Term):
 
     Examples: ``Var("cost")``, ``Var("count", "tool")``.
 
+    Note: ``==`` / ``<`` / ``<=`` / ``>`` / ``>=`` are overloaded to
+    *build comparison AST nodes* (``Var("x") == 5`` returns
+    ``Eq(Var("x"), Const(5))``), SQLAlchemy-column style — they do NOT
+    return a bool. So ``Var("x") == Var("x")`` is a truthy ``Eq`` node,
+    not ``True``; don't rely on ``==`` to value-compare two ``Var``
+    instances or to dedupe them in ordinary code. Hashing still works
+    (the frozen-dataclass ``__hash__`` is based on ``name``/``args``),
+    so ``Var`` is usable as a dict key / set member.
+
     Attributes:
         name: Variable name.
         args: Optional positional arguments for parameterized variables.
diff --git a/sponsio/integrations/base.py b/sponsio/integrations/base.py
index 1268043..dedfe47 100644
--- a/sponsio/integrations/base.py
+++ b/sponsio/integrations/base.py
@@ -1374,6 +1374,17 @@ def filter_tools(self, candidates: list[str]) -> list[str]:
         via ``guard_before``. Treat ``filter_tools`` as a first-line
         defence, not a replacement for ``guard_before``.
 
+        Cost: O(len(candidates) × trace_length) per call. Each probe
+        appends a synthetic event and ``rollback_last_event`` resets the
+        verifier, so the *next* probe re-grounds the whole trace from
+        scratch rather than incrementally. That's fine for typical tool
+        menus and session lengths, but on a long-running agent with a
+        wide toolset it is the one spot that gives up the otherwise
+        incremental O(ΔN) grounding — call it once per turn, not per
+        candidate-per-turn, and lean on ``rotate_session`` to bound the
+        trace length. (A snapshot/restore fast path that avoids the full
+        re-ground is tracked as a follow-up.)
+
         Args:
             candidates: Tool names the framework would normally expose
                 to the agent for the next turn.
diff --git a/sponsio/patterns/library.py b/sponsio/patterns/library.py
index fe450d8..51a5a6a 100644
--- a/sponsio/patterns/library.py
+++ b/sponsio/patterns/library.py
@@ -299,6 +299,17 @@ def workflow_step(
     Returns:
         A ``DetFormula`` (NOT marked liveness — X is one-step bounded
         and the runtime can decide it after a single event, unlike F).
+
+    Caveat (end-of-trace): under weak finite-trace semantics ``X`` is
+    vacuously true at the last position, so a ``trigger`` that fires on
+    the *final* event of a batch-verified trace incurs no violation
+    (there is no "next" event to inspect). In incremental enforce mode
+    this self-corrects — when the next event arrives, a non-matching
+    next action is blocked and rolled back, effectively forcing
+    ``next_action`` — but a whole-trace ``verify`` / replay will
+    silently pass a trailing trigger. Mirrors the ``rotate_session``
+    liveness caveat; relevant only to post-hoc batch checks, not live
+    guarding.
     """
     if not isinstance(trigger, Atom) or not isinstance(next_action, Atom):
         raise TypeError(
diff --git a/sponsio/tracer/grounding.py b/sponsio/tracer/grounding.py
index 6a6a779..7507671 100644
--- a/sponsio/tracer/grounding.py
+++ b/sponsio/tracer/grounding.py
@@ -467,6 +467,13 @@ def ground_event(
         # Emit unconditionally for every arg field on every tool_call —
         # Terms read by direct key lookup, not by content_atoms
         # extraction.
+        #
+        # Memory note: this stores the raw arg value (by reference) into
+        # the per-timestep valuation, which is retained for the whole
+        # trace. For tools with large or deeply-nested args that keeps
+        # those objects alive until ``reset`` / ``rotate_session``. Fine
+        # for typical scalar args; if a tool passes megabyte payloads,
+        # bound the trace with ``rotate_session``.
         if event.args:
             for _field, _val in event.args.items():
                 v[pred_key("arg_value", event.tool, _field)] = _val

From 4aec07086a7ff780d463ea828820c271326b08fe Mon Sep 17 00:00:00 2001
From: donalddellapietra <Donald.della.pietra@gmail.com>
Date: Sun, 7 Jun 2026 20:52:12 -0400
Subject: [PATCH 5/7] docs: repair broken artifacts from the v0.2 em-dash sweep
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The v0.2 cosmetic em-dash→period pass mangled a few spots beyond the
intended restyle. Fix only the genuinely broken ones (not a wholesale
revert, which would clash with deliberate rewordings):

* library.py: an orphaned comment line ("# . Sponsio doesn't hard-code")
  where the subject got stranded onto the previous line.
* monitor.rotate_session docstring: the whole body was re-indented by
  one space and contained a broken ". lose visibility" fragment;
  restore 8-space indentation and reflow the sentences.
* module docstring first-lines that read as typos in help()/IDEs:
  "Pattern library. the...", "RuntimeMonitor. intercepts...",
  "BaseGuard. unified..." → proper em-dash openers.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 sponsio/integrations/base.py |  2 +-
 sponsio/patterns/library.py  |  6 +--
 sponsio/runtime/monitor.py   | 84 ++++++++++++++++++------------------
 3 files changed, 46 insertions(+), 46 deletions(-)

diff --git a/sponsio/integrations/base.py b/sponsio/integrations/base.py
index dedfe47..7e7d2b4 100644
--- a/sponsio/integrations/base.py
+++ b/sponsio/integrations/base.py
@@ -1,4 +1,4 @@
-"""BaseGuard. unified parent class for all framework integrations.
+"""BaseGuard — unified parent class for all framework integrations.
 
 Every framework adapter (LangGraph, MCP, CrewAI, etc.) inherits from
 BaseGuard. The base class owns all contract logic:
diff --git a/sponsio/patterns/library.py b/sponsio/patterns/library.py
index 51a5a6a..9858341 100644
--- a/sponsio/patterns/library.py
+++ b/sponsio/patterns/library.py
@@ -1,4 +1,4 @@
-"""Pattern library. the constraint primitive layer.
+"""Pattern library — the constraint primitive layer.
 
 Patterns are the building blocks the rest of Sponsio compiles to: each
 function takes plain string args and returns a ``DetFormula`` (an LTL
@@ -1951,8 +1951,8 @@ def delegation_depth_limit(max_depth: int, desc: str = "") -> DetFormula:
 #
 # Covers the runtime half of **ASI-03** (identity), **ASI-06** (memory
 # poisoning via content-source gating), and **ASI-07** (inter-agent
-# comm via msg_verified gating). Users supply their own key convention
-# . Sponsio doesn't hard-code "caller_id" vs "source" vs "msg_sender"
+# comm via msg_verified gating). Users supply their own key convention;
+# Sponsio doesn't hard-code "caller_id" vs "source" vs "msg_sender"
 # because each team has their own tagging scheme.
 # ---------------------------------------------------------------------------
 
diff --git a/sponsio/runtime/monitor.py b/sponsio/runtime/monitor.py
index 56c5032..a6c4c87 100644
--- a/sponsio/runtime/monitor.py
+++ b/sponsio/runtime/monitor.py
@@ -1,4 +1,4 @@
-"""RuntimeMonitor. intercepts agent actions and enforces det contracts.
+"""RuntimeMonitor — intercepts agent actions and enforces det contracts.
 
 This is the central enforcement point.  Every agent action flows through
 ``check_action()``, which runs the deterministic evaluation pipeline:
@@ -377,47 +377,47 @@ def reset(self) -> None:
     def rotate_session(self) -> dict:
         """Begin a new session window; return a summary of what was flushed.
 
-         This is the **supported** way to bound memory in long-running
-         agents (24/7 service agents, always-on schedulers) without
-         losing contract enforcement. It behaves exactly like
-         :meth:`reset`. trace, log, spans, verifier cache, and atom
-         caches are all cleared; contracts on the underlying
-         :class:`~sponsio.models.system.System` are **not** touched.
-         The only difference is intent signalling and the return value:
-         callers get back the headline metrics of the window that just
-         closed so they can plumb them into audit logs / dashboards
-         before the numbers go away.
-
-         Why not just keep using :meth:`reset`?
-         ``reset`` reads as "something went wrong, start over".
-         ``rotate_session`` is the name you want to see at a quarterly
-         review. "we rotate every 1000 turns to cap memory; here's the
-         hand-off record."
-
-         Liveness caveat
-         ---------------
-         Formulas that span the **entire trace**. ``F(tool)`` /
-         ``always_followed_by(a, b)`` / whole-trace ``rate_limit(tool, N)``
-        . lose visibility across the rotation boundary. Concretely: if
-         ``response`` was promised before ``rotate_session`` and still
-         hasn't happened, the post-rotation verifier won't see the
-         original ``trigger`` and can never fire the liveness violation.
-         To avoid silently eating obligations, this method refuses to
-         rotate while ``finish_session`` hasn't been called on a guard
-         with pending liveness obligations. but since ``RuntimeMonitor``
-         doesn't know about guard-level ``finish_session``, the check
-         has to happen one layer up. See
-         :meth:`sponsio.integrations.base.BaseGuard.rotate_session` for
-         the guard-side handling: run ``finish_session`` first, then
-         rotate.
-
-         Returns
-         -------
-         dict
-             ``{"events": int, "turns": int, "log_entries": int,
-             "violations_cleared": 0}`` (``violations_cleared`` is always
-             0 at the monitor layer. violations are tracked by
-             :class:`~sponsio.integrations.base.BaseGuard`, not here).
+        This is the **supported** way to bound memory in long-running
+        agents (24/7 service agents, always-on schedulers) without
+        losing contract enforcement. It behaves exactly like
+        :meth:`reset`: trace, log, spans, verifier cache, and atom
+        caches are all cleared; contracts on the underlying
+        :class:`~sponsio.models.system.System` are **not** touched.
+        The only difference is intent signalling and the return value:
+        callers get back the headline metrics of the window that just
+        closed so they can plumb them into audit logs / dashboards
+        before the numbers go away.
+
+        Why not just keep using :meth:`reset`?
+        ``reset`` reads as "something went wrong, start over".
+        ``rotate_session`` is the name you want to see at a quarterly
+        review: "we rotate every 1000 turns to cap memory; here's the
+        hand-off record."
+
+        Liveness caveat
+        ---------------
+        Formulas that span the **entire trace** — ``F(tool)`` /
+        ``always_followed_by(a, b)`` / whole-trace ``rate_limit(tool, N)``
+        — lose visibility across the rotation boundary. Concretely: if
+        ``response`` was promised before ``rotate_session`` and still
+        hasn't happened, the post-rotation verifier won't see the
+        original ``trigger`` and can never fire the liveness violation.
+        To avoid silently eating obligations, this method refuses to
+        rotate while ``finish_session`` hasn't been called on a guard
+        with pending liveness obligations — but since ``RuntimeMonitor``
+        doesn't know about guard-level ``finish_session``, the check
+        has to happen one layer up. See
+        :meth:`sponsio.integrations.base.BaseGuard.rotate_session` for
+        the guard-side handling: run ``finish_session`` first, then
+        rotate.
+
+        Returns
+        -------
+        dict
+            ``{"events": int, "turns": int, "log_entries": int,
+            "violations_cleared": 0}`` (``violations_cleared`` is always
+            0 at the monitor layer — violations are tracked by
+            :class:`~sponsio.integrations.base.BaseGuard`, not here).
         """
         with self._lock:
             summary = {

From ea9670dd3c8f8502a73c31746da695d8eabb569c Mon Sep 17 00:00:00 2001
From: donalddellapietra <Donald.della.pietra@gmail.com>
Date: Sun, 7 Jun 2026 21:22:10 -0400
Subject: [PATCH 6/7] test: fix suite-wide setup errors and a false-positive
 sync failure

Two pre-existing test-infra bugs that made a full `pytest tests/` run
unusable (628 passed / 1684 errors); fixed independently of the v0.2
review changes.

1. conftest `_reset_rich_style_cache` (autouse) walks `gc.get_objects()`
   and calls `isinstance(obj, Style)` on every live object. CPython's
   isinstance consults the instance's `__class__` attribute, and once an
   openai-touching test has run, the SDK leaves lazy-import proxies in
   memory whose `__class__` getter imports `sounddevice` (voice helpers)
   or raises `OpenAIError`. That exception escaped the fixture and
   errored the *setup* of ~1680 unrelated tests. Wrap the isinstance
   probe in try/except so any object that objects to introspection is
   treated as "not a Style".

2. test_openclaw_artifact_sync compared the canonical plugin's npm build
   output (`plugins/sponsio-openclaw/dist/`, gitignored) against the
   committed bundled copy. On a fresh checkout that hasn't run
   `npm run build`, the source dist is absent and the test failed with
   "source missing". Skip when the build isn't present (CI builds first,
   so the check still fires there); a built tree that actually drifts
   still fails.

Full suite now: 2296 passed, 26 skipped, 0 errors (random ordering).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 tests/conftest.py                    | 16 +++++++++++++++-
 tests/test_openclaw_artifact_sync.py | 16 ++++++++++++++++
 2 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index e410e4a..aabdfb3 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -76,7 +76,21 @@ def _reset_rich_style_cache():
     if hasattr(Color, "parse") and hasattr(Color.parse, "cache_clear"):
         Color.parse.cache_clear()
     for obj in gc.get_objects():
-        if isinstance(obj, Style):
+        # ``isinstance(obj, Style)`` reads ``obj.__class__`` (CPython
+        # consults the instance's ``__class__`` attribute, not just
+        # ``type(obj)``). Some live objects are lazy-import proxies
+        # whose ``__class__`` getter has side effects — e.g. once an
+        # openai-touching test has run, the openai SDK leaves proxies
+        # for its optional submodules in memory, and probing their
+        # ``__class__`` tries to import ``sounddevice`` (voice helpers)
+        # or raises ``OpenAIError``. That used to escape this autouse
+        # fixture and error the *setup* of every subsequent test. Treat
+        # any object that objects to introspection as "not a Style".
+        try:
+            is_style = isinstance(obj, Style)
+        except Exception:
+            continue
+        if is_style:
             try:
                 obj._ansi = None
             except Exception:
diff --git a/tests/test_openclaw_artifact_sync.py b/tests/test_openclaw_artifact_sync.py
index eb3ff42..94314f6 100644
--- a/tests/test_openclaw_artifact_sync.py
+++ b/tests/test_openclaw_artifact_sync.py
@@ -23,6 +23,8 @@
 
 from pathlib import Path
 
+import pytest
+
 REPO_ROOT = Path(__file__).resolve().parents[1]
 SRC_ROOT = REPO_ROOT / "plugins" / "sponsio-openclaw"
 DST_ROOT = REPO_ROOT / "sponsio" / "plugin" / "openclaw_artifact"
@@ -38,6 +40,20 @@
 
 def test_runtime_artifact_matches_canonical_plugin():
     """Every file the sync script copies must be byte-identical."""
+    # The canonical ``dist/`` is an npm build output and is gitignored.
+    # A fresh source checkout that hasn't run ``npm run build`` has
+    # nothing to compare against, so this guard can't run — skip rather
+    # than fail with a spurious "source missing". CI builds the plugin
+    # before pytest, so the sync check still fires there; a *built* tree
+    # that has actually drifted (bundled copy missing, or bytes differ)
+    # still fails below.
+    if not (SRC_ROOT / "dist" / "index.js").exists():
+        pytest.skip(
+            "canonical plugin not built — plugins/sponsio-openclaw/dist/ is "
+            "absent. Run `cd plugins/sponsio-openclaw && npm install && "
+            "npm run build` to enable this sync check."
+        )
+
     diffs: list[str] = []
     missing: list[str] = []
     for rel in _SYNCED_FILES:

From 1c07712bdcae5b12e48db93d90727595c30360b4 Mon Sep 17 00:00:00 2001
From: donalddellapietra <Donald.della.pietra@gmail.com>
Date: Sun, 7 Jun 2026 21:42:13 -0400
Subject: [PATCH 7/7] test: use asyncio.run instead of deprecated
 get_event_loop().run_until_complete

asyncio.get_event_loop() emits a DeprecationWarning ("There is no current
event loop") when no loop is running, which it isn't in these synchronous
tests. asyncio.run(coro) is the modern equivalent: it creates a fresh
loop, runs the coroutine, and closes the loop. Clears the suite's last
warning; full run is now 2296 passed / 26 skipped / 0 warnings.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 tests/test_claude_agent_integration.py | 26 +++++++-------------------
 1 file changed, 7 insertions(+), 19 deletions(-)

diff --git a/tests/test_claude_agent_integration.py b/tests/test_claude_agent_integration.py
index 5c978a6..2156526 100644
--- a/tests/test_claude_agent_integration.py
+++ b/tests/test_claude_agent_integration.py
@@ -56,9 +56,7 @@ def test_pre_tool_hook_blocks_violation(self):
             "agent_type": "test",
         }
 
-        result = asyncio.get_event_loop().run_until_complete(
-            pre_hook(input_data, "id_1", None)
-        )
+        result = asyncio.run(pre_hook(input_data, "id_1", None))
 
         assert result.get("hookSpecificOutput", {}).get("permissionDecision") == "deny"
         assert "Sponsio" in result.get("hookSpecificOutput", {}).get(
@@ -85,9 +83,7 @@ def test_pre_tool_hook_allows_compliant_call(self):
             "agent_id": "test",
             "agent_type": "test",
         }
-        result1 = asyncio.get_event_loop().run_until_complete(
-            pre_hook(input_check, "id_1", None)
-        )
+        result1 = asyncio.run(pre_hook(input_check, "id_1", None))
         assert result1 == {}
 
         # Record that tool completed
@@ -104,9 +100,7 @@ def test_pre_tool_hook_allows_compliant_call(self):
             "agent_id": "test",
             "agent_type": "test",
         }
-        result2 = asyncio.get_event_loop().run_until_complete(
-            pre_hook(input_refund, "id_2", None)
-        )
+        result2 = asyncio.run(pre_hook(input_refund, "id_2", None))
         assert result2 == {}
 
     def test_pre_tool_hook_rate_limit(self):
@@ -129,16 +123,12 @@ def test_pre_tool_hook_rate_limit(self):
         }
 
         # First call — allowed
-        r1 = asyncio.get_event_loop().run_until_complete(
-            pre_hook(input_data, "id_1", None)
-        )
+        r1 = asyncio.run(pre_hook(input_data, "id_1", None))
         assert r1 == {}
         guard.guard_after("issue_refund", "done")
 
         # Second call — blocked
-        r2 = asyncio.get_event_loop().run_until_complete(
-            pre_hook(input_data, "id_2", None)
-        )
+        r2 = asyncio.run(pre_hook(input_data, "id_2", None))
         assert r2.get("hookSpecificOutput", {}).get("permissionDecision") == "deny"
 
     def test_last_check_updated(self):
@@ -162,7 +152,7 @@ def test_last_check_updated(self):
             "agent_type": "test",
         }
 
-        asyncio.get_event_loop().run_until_complete(pre_hook(input_data, "id_1", None))
+        asyncio.run(pre_hook(input_data, "id_1", None))
         assert guard.last_check is not None
         assert guard.last_check.allowed
 
@@ -198,9 +188,7 @@ def test_system_message_contains_tool_name(self):
             "agent_type": "test",
         }
 
-        result = asyncio.get_event_loop().run_until_complete(
-            pre_hook(input_data, "id_1", None)
-        )
+        result = asyncio.run(pre_hook(input_data, "id_1", None))
 
         msg = result.get("systemMessage", "")
         assert "issue_refund" in msg