SponsioLabs · yfxiao16 · Jun 8, 2026 · Jun 8, 2026 · Jun 8, 2026 · Jun 8, 2026
diff --git a/sponsio/formulas/evaluator.py b/sponsio/formulas/evaluator.py
@@ -72,6 +72,14 @@
     }
 )
 
+# Process-global, append-only, lock-free. It backs "warn at most once
+# per missing Var key for the lifetime of the process" — deliberately
+# coarse: the warning is a developer aid, not session state, so a benign
+# data race on ``add`` (which never loses the warning, only its exact
+# ordering) is acceptable and not worth taking the monitor's RLock for.
+# Consequence: warnings do not re-fire across sessions or test cases in
+# the same interpreter. Tests that assert on the warning should reset
+# this set in a fixture.
 _warned_missing_vars: set[str] = set()
 
 

diff --git a/sponsio/formulas/formula.py b/sponsio/formulas/formula.py
@@ -298,6 +298,15 @@ class Var(FormulaMixin, Term):
 
     Examples: ``Var("cost")``, ``Var("count", "tool")``.
 
+    Note: ``==`` / ``<`` / ``<=`` / ``>`` / ``>=`` are overloaded to
+    *build comparison AST nodes* (``Var("x") == 5`` returns
+    ``Eq(Var("x"), Const(5))``), SQLAlchemy-column style — they do NOT
+    return a bool. So ``Var("x") == Var("x")`` is a truthy ``Eq`` node,
+    not ``True``; don't rely on ``==`` to value-compare two ``Var``
+    instances or to dedupe them in ordinary code. Hashing still works
+    (the frozen-dataclass ``__hash__`` is based on ``name``/``args``),
+    so ``Var`` is usable as a dict key / set member.
+
     Attributes:
         name: Variable name.
         args: Optional positional arguments for parameterized variables.

diff --git a/sponsio/integrations/agents.py b/sponsio/integrations/agents.py
@@ -130,7 +130,11 @@ def wrap_tool(self, tool: Any) -> Any:
             async def guarded_async(*args: Any, **kwargs: Any) -> Any:
                 check = guard.guard_before(tool_name, kwargs)
                 guard.last_check = check
-                if check.blocked:
+                # ``stop_original`` folds in ``redirected``: this adapter
+                # does not implement transparent tool substitution, so a
+                # ``redirect_to_safe`` redirect fails closed (refuse)
+                # rather than running the unsafe call.
+                if check.stop_original:
                     msg = select_agent_message(
                         check.det_violations, fallback="Contract violation"
                     )
@@ -151,7 +155,11 @@ async def guarded_async(*args: Any, **kwargs: Any) -> Any:
             def guarded_sync(*args: Any, **kwargs: Any) -> Any:
                 check = guard.guard_before(tool_name, kwargs)
                 guard.last_check = check
-                if check.blocked:
+                # ``stop_original`` folds in ``redirected``: this adapter
+                # does not implement transparent tool substitution, so a
+                # ``redirect_to_safe`` redirect fails closed (refuse)
+                # rather than running the unsafe call.
+                if check.stop_original:
                     msg = select_agent_message(
                         check.det_violations, fallback="Contract violation"
                     )

diff --git a/sponsio/integrations/base.py b/sponsio/integrations/base.py
@@ -1,4 +1,4 @@
-"""BaseGuard. unified parent class for all framework integrations.
+"""BaseGuard — unified parent class for all framework integrations.
 
 Every framework adapter (LangGraph, MCP, CrewAI, etc.) inherits from
 BaseGuard. The base class owns all contract logic:
@@ -313,6 +313,27 @@ def redirected(self) -> bool:
         """True if any det violation returned a redirect outcome."""
         return any(r.action == "redirected" for r in self.det_violations)
 
+    @property
+    def stop_original(self) -> bool:
+        """True when the adapter must NOT execute the original tool call.
+
+        Folds hard blocks together with redirects. A ``redirect_to_safe``
+        violation rolls the original ``unsafe`` call out of the trace and
+        sets ``redirected_to``; an adapter that runs the original call
+        anyway would execute the exact action the contract forbade — a
+        fail-*open* hole, the worst outcome for an enforcement layer.
+
+        Adapters that implement transparent substitution (LangGraph)
+        MUST branch on ``redirected`` / ``redirected_to`` *first* and
+        invoke the safe tool. Adapters that don't (yet) support
+        substitution MUST gate execution on ``stop_original`` so a
+        redirect fails *closed* (the unsafe call is refused) instead of
+        falling through to ``if check.blocked``, which is False on a
+        redirect. ``escalated`` is intentionally excluded — see
+        ``guard_before`` for why escalation does not gate execution.
+        """
+        return self.blocked or self.redirected
+
     @property
     def needs_retry(self) -> bool:
         """True if any sto violation returned a retry with feedback."""
@@ -1353,6 +1374,17 @@ def filter_tools(self, candidates: list[str]) -> list[str]:
         via ``guard_before``. Treat ``filter_tools`` as a first-line
         defence, not a replacement for ``guard_before``.
 
+        Cost: O(len(candidates) × trace_length) per call. Each probe
+        appends a synthetic event and ``rollback_last_event`` resets the
+        verifier, so the *next* probe re-grounds the whole trace from
+        scratch rather than incrementally. That's fine for typical tool
+        menus and session lengths, but on a long-running agent with a
+        wide toolset it is the one spot that gives up the otherwise
+        incremental O(ΔN) grounding — call it once per turn, not per
+        candidate-per-turn, and lean on ``rotate_session`` to bound the
+        trace length. (A snapshot/restore fast path that avoids the full
+        re-ground is tracked as a follow-up.)
+
         Args:
             candidates: Tool names the framework would normally expose
                 to the agent for the next turn.

diff --git a/sponsio/integrations/claude_agent.py b/sponsio/integrations/claude_agent.py
@@ -112,7 +112,11 @@ async def pre_tool_hook(
             check = guard.guard_before(tool_name, tool_input)
             guard.last_check = check
 
-            if check.blocked:
+            # ``stop_original`` folds in ``redirected``: this hook denies
+            # via the SDK permission system and has no substitution path,
+            # so a redirect fails closed (denied) rather than running the
+            # unsafe call.
+            if check.stop_original:
                 # Prefer the structured ``agent_msg`` from OutcomeBuilder
                 # — it's already phrased to steer the model toward
                 # abandoning this action. Falls back to the legacy

diff --git a/sponsio/integrations/crewai.py b/sponsio/integrations/crewai.py
@@ -102,7 +102,10 @@ def on_tool_start(self, context: Any) -> Any:
         )
         self.last_check = check
 
-        if check.blocked:
+        # ``stop_original`` folds in ``redirected``: CrewAI's adapter has
+        # no transparent-substitution path, so a redirect fails closed
+        # (returns the rejection) rather than executing the unsafe tool.
+        if check.stop_original:
             msg = select_agent_message(
                 check.det_violations, fallback="Contract violation detected"
             )
@@ -186,7 +189,8 @@ def make_guarded(orig: Any, name: str):
                 def guarded(*args: Any, **kwargs: Any) -> Any:
                     call_args = kwargs if kwargs else {"args": list(args)}
                     check = guard.guard_before(name, call_args)
-                    if check.blocked:
+                    # Fail closed on redirect too (no substitution path here).
+                    if check.stop_original:
                         msg = select_agent_message(
                             check.det_violations, fallback="contract violated"
                         )

diff --git a/sponsio/integrations/google_adk.py b/sponsio/integrations/google_adk.py
@@ -93,7 +93,8 @@ def wrap_tool(self, tool: Callable[..., Any]) -> Callable[..., Any]:
             async def guarded_async(*args: Any, **kwargs: Any) -> Any:
                 check = guard.guard_before(tool_name, _call_args(tool, args, kwargs))
                 guard.last_check = check
-                if check.blocked:
+                # Fail closed on redirect too (no substitution path here).
+                if check.stop_original:
                     return _blocked_result(check)
 
                 result = await tool(*args, **kwargs)

diff --git a/sponsio/integrations/mcp.py b/sponsio/integrations/mcp.py
@@ -142,13 +142,17 @@ async def call_tool(self, tool_name: str, arguments: dict | None = None) -> dict
         # that proxy this to an LLM (Claude Desktop, custom orchestrators)
         # can show the agent-tuned phrasing while keeping the legacy
         # ``violations`` array of log-formatted strings for back-compat.
-        blocked = [r for r in results if r.action == "blocked"]
-        if blocked:
+        # Treat ``redirected`` the same as ``blocked`` here: this proxy
+        # has no transparent-substitution path, so a ``redirect_to_safe``
+        # redirect must refuse the unsafe call rather than fall through
+        # and execute it (a fail-open hole).
+        stopped = [r for r in results if r.action in ("blocked", "redirected")]
+        if stopped:
             return {
                 "error": "Blocked by behavioral contract",
-                "violations": [r.message for r in blocked],
+                "violations": [r.message for r in stopped],
                 "agent_messages": [
-                    r.agent_msg for r in blocked if getattr(r, "agent_msg", "")
+                    r.agent_msg for r in stopped if getattr(r, "agent_msg", "")
                 ],
             }
 

diff --git a/sponsio/integrations/vercel_ai.py b/sponsio/integrations/vercel_ai.py
@@ -120,7 +120,9 @@ async def wrap_tool(self, call: Any, next_fn: Any) -> Any:
                 check = guard.guard_before(tool_name, kwargs)
                 guard.last_check = check
 
-                if check.blocked:
+                # ``stop_original`` folds in ``redirected``: no transparent
+                # substitution path here, so a redirect fails closed.
+                if check.stop_original:
                     msg = select_agent_message(
                         check.det_violations, fallback="Contract violation"
                     )

diff --git a/sponsio/patterns/library.py b/sponsio/patterns/library.py
@@ -1,4 +1,4 @@
-"""Pattern library. the constraint primitive layer.
+"""Pattern library — the constraint primitive layer.
 
 Patterns are the building blocks the rest of Sponsio compiles to: each
 function takes plain string args and returns a ``DetFormula`` (an LTL
@@ -299,6 +299,17 @@ def workflow_step(
     Returns:
         A ``DetFormula`` (NOT marked liveness — X is one-step bounded
         and the runtime can decide it after a single event, unlike F).
+
+    Caveat (end-of-trace): under weak finite-trace semantics ``X`` is
+    vacuously true at the last position, so a ``trigger`` that fires on
+    the *final* event of a batch-verified trace incurs no violation
+    (there is no "next" event to inspect). In incremental enforce mode
+    this self-corrects — when the next event arrives, a non-matching
+    next action is blocked and rolled back, effectively forcing
+    ``next_action`` — but a whole-trace ``verify`` / replay will
+    silently pass a trailing trigger. Mirrors the ``rotate_session``
+    liveness caveat; relevant only to post-hoc batch checks, not live
+    guarding.
     """
     if not isinstance(trigger, Atom) or not isinstance(next_action, Atom):
         raise TypeError(
@@ -1940,8 +1951,8 @@ def delegation_depth_limit(max_depth: int, desc: str = "") -> DetFormula:
 #
 # Covers the runtime half of **ASI-03** (identity), **ASI-06** (memory
 # poisoning via content-source gating), and **ASI-07** (inter-agent
-# comm via msg_verified gating). Users supply their own key convention
-# . Sponsio doesn't hard-code "caller_id" vs "source" vs "msg_sender"
+# comm via msg_verified gating). Users supply their own key convention;
+# Sponsio doesn't hard-code "caller_id" vs "source" vs "msg_sender"
 # because each team has their own tagging scheme.
 # ---------------------------------------------------------------------------
 

diff --git a/sponsio/runtime/monitor.py b/sponsio/runtime/monitor.py
@@ -1,4 +1,4 @@
-"""RuntimeMonitor. intercepts agent actions and enforces det contracts.
+"""RuntimeMonitor — intercepts agent actions and enforces det contracts.
 
 This is the central enforcement point.  Every agent action flows through
 ``check_action()``, which runs the deterministic evaluation pipeline:
@@ -377,47 +377,47 @@ def reset(self) -> None:
     def rotate_session(self) -> dict:
         """Begin a new session window; return a summary of what was flushed.
 
-         This is the **supported** way to bound memory in long-running
-         agents (24/7 service agents, always-on schedulers) without
-         losing contract enforcement. It behaves exactly like
-         :meth:`reset`. trace, log, spans, verifier cache, and atom
-         caches are all cleared; contracts on the underlying
-         :class:`~sponsio.models.system.System` are **not** touched.
-         The only difference is intent signalling and the return value:
-         callers get back the headline metrics of the window that just
-         closed so they can plumb them into audit logs / dashboards
-         before the numbers go away.
-
-         Why not just keep using :meth:`reset`?
-         ``reset`` reads as "something went wrong, start over".
-         ``rotate_session`` is the name you want to see at a quarterly
-         review. "we rotate every 1000 turns to cap memory; here's the
-         hand-off record."
-
-         Liveness caveat
-         ---------------
-         Formulas that span the **entire trace**. ``F(tool)`` /
-         ``always_followed_by(a, b)`` / whole-trace ``rate_limit(tool, N)``
-        . lose visibility across the rotation boundary. Concretely: if
-         ``response`` was promised before ``rotate_session`` and still
-         hasn't happened, the post-rotation verifier won't see the
-         original ``trigger`` and can never fire the liveness violation.
-         To avoid silently eating obligations, this method refuses to
-         rotate while ``finish_session`` hasn't been called on a guard
-         with pending liveness obligations. but since ``RuntimeMonitor``
-         doesn't know about guard-level ``finish_session``, the check
-         has to happen one layer up. See
-         :meth:`sponsio.integrations.base.BaseGuard.rotate_session` for
-         the guard-side handling: run ``finish_session`` first, then
-         rotate.
-
-         Returns
-         -------
-         dict
-             ``{"events": int, "turns": int, "log_entries": int,
-             "violations_cleared": 0}`` (``violations_cleared`` is always
-             0 at the monitor layer. violations are tracked by
-             :class:`~sponsio.integrations.base.BaseGuard`, not here).
+        This is the **supported** way to bound memory in long-running
+        agents (24/7 service agents, always-on schedulers) without
+        losing contract enforcement. It behaves exactly like
+        :meth:`reset`: trace, log, spans, verifier cache, and atom
+        caches are all cleared; contracts on the underlying
+        :class:`~sponsio.models.system.System` are **not** touched.
+        The only difference is intent signalling and the return value:
+        callers get back the headline metrics of the window that just
+        closed so they can plumb them into audit logs / dashboards
+        before the numbers go away.
+
+        Why not just keep using :meth:`reset`?
+        ``reset`` reads as "something went wrong, start over".
+        ``rotate_session`` is the name you want to see at a quarterly
+        review: "we rotate every 1000 turns to cap memory; here's the
+        hand-off record."
+
+        Liveness caveat
+        ---------------
+        Formulas that span the **entire trace** — ``F(tool)`` /
+        ``always_followed_by(a, b)`` / whole-trace ``rate_limit(tool, N)``
+        — lose visibility across the rotation boundary. Concretely: if
+        ``response`` was promised before ``rotate_session`` and still
+        hasn't happened, the post-rotation verifier won't see the
+        original ``trigger`` and can never fire the liveness violation.
+        To avoid silently eating obligations, this method refuses to
+        rotate while ``finish_session`` hasn't been called on a guard
+        with pending liveness obligations — but since ``RuntimeMonitor``
+        doesn't know about guard-level ``finish_session``, the check
+        has to happen one layer up. See
+        :meth:`sponsio.integrations.base.BaseGuard.rotate_session` for
+        the guard-side handling: run ``finish_session`` first, then
+        rotate.
+
+        Returns
+        -------
+        dict
+            ``{"events": int, "turns": int, "log_entries": int,
+            "violations_cleared": 0}`` (``violations_cleared`` is always
+            0 at the monitor layer — violations are tracked by
+            :class:`~sponsio.integrations.base.BaseGuard`, not here).
         """
         with self._lock:
             summary = {

diff --git a/sponsio/tracer/grounding.py b/sponsio/tracer/grounding.py
@@ -467,6 +467,13 @@ def ground_event(
         # Emit unconditionally for every arg field on every tool_call —
         # Terms read by direct key lookup, not by content_atoms
         # extraction.
+        #
+        # Memory note: this stores the raw arg value (by reference) into
+        # the per-timestep valuation, which is retained for the whole
+        # trace. For tools with large or deeply-nested args that keeps
+        # those objects alive until ``reset`` / ``rotate_session``. Fine
+        # for typical scalar args; if a tool passes megabyte payloads,
+        # bound the trace with ``rotate_session``.
         if event.args:
             for _field, _val in event.args.items():
                 v[pred_key("arg_value", event.tool, _field)] = _val

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -76,7 +76,21 @@ def _reset_rich_style_cache():
     if hasattr(Color, "parse") and hasattr(Color.parse, "cache_clear"):
         Color.parse.cache_clear()
     for obj in gc.get_objects():
-        if isinstance(obj, Style):
+        # ``isinstance(obj, Style)`` reads ``obj.__class__`` (CPython
+        # consults the instance's ``__class__`` attribute, not just
+        # ``type(obj)``). Some live objects are lazy-import proxies
+        # whose ``__class__`` getter has side effects — e.g. once an
+        # openai-touching test has run, the openai SDK leaves proxies
+        # for its optional submodules in memory, and probing their
+        # ``__class__`` tries to import ``sounddevice`` (voice helpers)
+        # or raises ``OpenAIError``. That used to escape this autouse
+        # fixture and error the *setup* of every subsequent test. Treat
+        # any object that objects to introspection as "not a Style".
+        try:
+            is_style = isinstance(obj, Style)
+        except Exception:
+            continue
+        if is_style:
             try:
                 obj._ansi = None
             except Exception: