Skip to content
Merged
8 changes: 8 additions & 0 deletions sponsio/formulas/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,14 @@
}
)

# Process-global, append-only, lock-free. It backs "warn at most once
# per missing Var key for the lifetime of the process" β€” deliberately
# coarse: the warning is a developer aid, not session state, so a benign
# data race on ``add`` (which never loses the warning, only its exact
# ordering) is acceptable and not worth taking the monitor's RLock for.
# Consequence: warnings do not re-fire across sessions or test cases in
# the same interpreter. Tests that assert on the warning should reset
# this set in a fixture.
_warned_missing_vars: set[str] = set()


Expand Down
9 changes: 9 additions & 0 deletions sponsio/formulas/formula.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,15 @@ class Var(FormulaMixin, Term):

Examples: ``Var("cost")``, ``Var("count", "tool")``.

Note: ``==`` / ``<`` / ``<=`` / ``>`` / ``>=`` are overloaded to
*build comparison AST nodes* (``Var("x") == 5`` returns
``Eq(Var("x"), Const(5))``), SQLAlchemy-column style β€” they do NOT
return a bool. So ``Var("x") == Var("x")`` is a truthy ``Eq`` node,
not ``True``; don't rely on ``==`` to value-compare two ``Var``
instances or to dedupe them in ordinary code. Hashing still works
(the frozen-dataclass ``__hash__`` is based on ``name``/``args``),
so ``Var`` is usable as a dict key / set member.

Attributes:
name: Variable name.
args: Optional positional arguments for parameterized variables.
Expand Down
12 changes: 10 additions & 2 deletions sponsio/integrations/agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,11 @@ def wrap_tool(self, tool: Any) -> Any:
async def guarded_async(*args: Any, **kwargs: Any) -> Any:
check = guard.guard_before(tool_name, kwargs)
guard.last_check = check
if check.blocked:
# ``stop_original`` folds in ``redirected``: this adapter
# does not implement transparent tool substitution, so a
# ``redirect_to_safe`` redirect fails closed (refuse)
# rather than running the unsafe call.
if check.stop_original:
msg = select_agent_message(
check.det_violations, fallback="Contract violation"
)
Expand All @@ -151,7 +155,11 @@ async def guarded_async(*args: Any, **kwargs: Any) -> Any:
def guarded_sync(*args: Any, **kwargs: Any) -> Any:
check = guard.guard_before(tool_name, kwargs)
guard.last_check = check
if check.blocked:
# ``stop_original`` folds in ``redirected``: this adapter
# does not implement transparent tool substitution, so a
# ``redirect_to_safe`` redirect fails closed (refuse)
# rather than running the unsafe call.
if check.stop_original:
msg = select_agent_message(
check.det_violations, fallback="Contract violation"
)
Expand Down
34 changes: 33 additions & 1 deletion sponsio/integrations/base.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""BaseGuard. unified parent class for all framework integrations.
"""BaseGuard β€” unified parent class for all framework integrations.

Every framework adapter (LangGraph, MCP, CrewAI, etc.) inherits from
BaseGuard. The base class owns all contract logic:
Expand Down Expand Up @@ -313,6 +313,27 @@ def redirected(self) -> bool:
"""True if any det violation returned a redirect outcome."""
return any(r.action == "redirected" for r in self.det_violations)

@property
def stop_original(self) -> bool:
"""True when the adapter must NOT execute the original tool call.

Folds hard blocks together with redirects. A ``redirect_to_safe``
violation rolls the original ``unsafe`` call out of the trace and
sets ``redirected_to``; an adapter that runs the original call
anyway would execute the exact action the contract forbade β€” a
fail-*open* hole, the worst outcome for an enforcement layer.

Adapters that implement transparent substitution (LangGraph)
MUST branch on ``redirected`` / ``redirected_to`` *first* and
invoke the safe tool. Adapters that don't (yet) support
substitution MUST gate execution on ``stop_original`` so a
redirect fails *closed* (the unsafe call is refused) instead of
falling through to ``if check.blocked``, which is False on a
redirect. ``escalated`` is intentionally excluded β€” see
``guard_before`` for why escalation does not gate execution.
"""
return self.blocked or self.redirected

@property
def needs_retry(self) -> bool:
"""True if any sto violation returned a retry with feedback."""
Expand Down Expand Up @@ -1353,6 +1374,17 @@ def filter_tools(self, candidates: list[str]) -> list[str]:
via ``guard_before``. Treat ``filter_tools`` as a first-line
defence, not a replacement for ``guard_before``.

Cost: O(len(candidates) Γ— trace_length) per call. Each probe
appends a synthetic event and ``rollback_last_event`` resets the
verifier, so the *next* probe re-grounds the whole trace from
scratch rather than incrementally. That's fine for typical tool
menus and session lengths, but on a long-running agent with a
wide toolset it is the one spot that gives up the otherwise
incremental O(Ξ”N) grounding β€” call it once per turn, not per
candidate-per-turn, and lean on ``rotate_session`` to bound the
trace length. (A snapshot/restore fast path that avoids the full
re-ground is tracked as a follow-up.)

Args:
candidates: Tool names the framework would normally expose
to the agent for the next turn.
Expand Down
6 changes: 5 additions & 1 deletion sponsio/integrations/claude_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,11 @@ async def pre_tool_hook(
check = guard.guard_before(tool_name, tool_input)
guard.last_check = check

if check.blocked:
# ``stop_original`` folds in ``redirected``: this hook denies
# via the SDK permission system and has no substitution path,
# so a redirect fails closed (denied) rather than running the
# unsafe call.
if check.stop_original:
# Prefer the structured ``agent_msg`` from OutcomeBuilder
# β€” it's already phrased to steer the model toward
# abandoning this action. Falls back to the legacy
Expand Down
8 changes: 6 additions & 2 deletions sponsio/integrations/crewai.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,10 @@ def on_tool_start(self, context: Any) -> Any:
)
self.last_check = check

if check.blocked:
# ``stop_original`` folds in ``redirected``: CrewAI's adapter has
# no transparent-substitution path, so a redirect fails closed
# (returns the rejection) rather than executing the unsafe tool.
if check.stop_original:
msg = select_agent_message(
check.det_violations, fallback="Contract violation detected"
)
Expand Down Expand Up @@ -186,7 +189,8 @@ def make_guarded(orig: Any, name: str):
def guarded(*args: Any, **kwargs: Any) -> Any:
call_args = kwargs if kwargs else {"args": list(args)}
check = guard.guard_before(name, call_args)
if check.blocked:
# Fail closed on redirect too (no substitution path here).
if check.stop_original:
msg = select_agent_message(
check.det_violations, fallback="contract violated"
)
Expand Down
3 changes: 2 additions & 1 deletion sponsio/integrations/google_adk.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,8 @@ def wrap_tool(self, tool: Callable[..., Any]) -> Callable[..., Any]:
async def guarded_async(*args: Any, **kwargs: Any) -> Any:
check = guard.guard_before(tool_name, _call_args(tool, args, kwargs))
guard.last_check = check
if check.blocked:
# Fail closed on redirect too (no substitution path here).
if check.stop_original:
return _blocked_result(check)

result = await tool(*args, **kwargs)
Expand Down
12 changes: 8 additions & 4 deletions sponsio/integrations/mcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,13 +142,17 @@ async def call_tool(self, tool_name: str, arguments: dict | None = None) -> dict
# that proxy this to an LLM (Claude Desktop, custom orchestrators)
# can show the agent-tuned phrasing while keeping the legacy
# ``violations`` array of log-formatted strings for back-compat.
blocked = [r for r in results if r.action == "blocked"]
if blocked:
# Treat ``redirected`` the same as ``blocked`` here: this proxy
# has no transparent-substitution path, so a ``redirect_to_safe``
# redirect must refuse the unsafe call rather than fall through
# and execute it (a fail-open hole).
stopped = [r for r in results if r.action in ("blocked", "redirected")]
if stopped:
return {
"error": "Blocked by behavioral contract",
"violations": [r.message for r in blocked],
"violations": [r.message for r in stopped],
"agent_messages": [
r.agent_msg for r in blocked if getattr(r, "agent_msg", "")
r.agent_msg for r in stopped if getattr(r, "agent_msg", "")
],
}

Expand Down
4 changes: 3 additions & 1 deletion sponsio/integrations/vercel_ai.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,9 @@ async def wrap_tool(self, call: Any, next_fn: Any) -> Any:
check = guard.guard_before(tool_name, kwargs)
guard.last_check = check

if check.blocked:
# ``stop_original`` folds in ``redirected``: no transparent
# substitution path here, so a redirect fails closed.
if check.stop_original:
msg = select_agent_message(
check.det_violations, fallback="Contract violation"
)
Expand Down
17 changes: 14 additions & 3 deletions sponsio/patterns/library.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Pattern library. the constraint primitive layer.
"""Pattern library β€” the constraint primitive layer.

Patterns are the building blocks the rest of Sponsio compiles to: each
function takes plain string args and returns a ``DetFormula`` (an LTL
Expand Down Expand Up @@ -299,6 +299,17 @@ def workflow_step(
Returns:
A ``DetFormula`` (NOT marked liveness β€” X is one-step bounded
and the runtime can decide it after a single event, unlike F).

Caveat (end-of-trace): under weak finite-trace semantics ``X`` is
vacuously true at the last position, so a ``trigger`` that fires on
the *final* event of a batch-verified trace incurs no violation
(there is no "next" event to inspect). In incremental enforce mode
this self-corrects β€” when the next event arrives, a non-matching
next action is blocked and rolled back, effectively forcing
``next_action`` β€” but a whole-trace ``verify`` / replay will
silently pass a trailing trigger. Mirrors the ``rotate_session``
liveness caveat; relevant only to post-hoc batch checks, not live
guarding.
"""
if not isinstance(trigger, Atom) or not isinstance(next_action, Atom):
raise TypeError(
Expand Down Expand Up @@ -1940,8 +1951,8 @@ def delegation_depth_limit(max_depth: int, desc: str = "") -> DetFormula:
#
# Covers the runtime half of **ASI-03** (identity), **ASI-06** (memory
# poisoning via content-source gating), and **ASI-07** (inter-agent
# comm via msg_verified gating). Users supply their own key convention
# . Sponsio doesn't hard-code "caller_id" vs "source" vs "msg_sender"
# comm via msg_verified gating). Users supply their own key convention;
# Sponsio doesn't hard-code "caller_id" vs "source" vs "msg_sender"
# because each team has their own tagging scheme.
# ---------------------------------------------------------------------------

Expand Down
84 changes: 42 additions & 42 deletions sponsio/runtime/monitor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""RuntimeMonitor. intercepts agent actions and enforces det contracts.
"""RuntimeMonitor β€” intercepts agent actions and enforces det contracts.

This is the central enforcement point. Every agent action flows through
``check_action()``, which runs the deterministic evaluation pipeline:
Expand Down Expand Up @@ -377,47 +377,47 @@ def reset(self) -> None:
def rotate_session(self) -> dict:
"""Begin a new session window; return a summary of what was flushed.

This is the **supported** way to bound memory in long-running
agents (24/7 service agents, always-on schedulers) without
losing contract enforcement. It behaves exactly like
:meth:`reset`. trace, log, spans, verifier cache, and atom
caches are all cleared; contracts on the underlying
:class:`~sponsio.models.system.System` are **not** touched.
The only difference is intent signalling and the return value:
callers get back the headline metrics of the window that just
closed so they can plumb them into audit logs / dashboards
before the numbers go away.

Why not just keep using :meth:`reset`?
``reset`` reads as "something went wrong, start over".
``rotate_session`` is the name you want to see at a quarterly
review. "we rotate every 1000 turns to cap memory; here's the
hand-off record."

Liveness caveat
---------------
Formulas that span the **entire trace**. ``F(tool)`` /
``always_followed_by(a, b)`` / whole-trace ``rate_limit(tool, N)``
. lose visibility across the rotation boundary. Concretely: if
``response`` was promised before ``rotate_session`` and still
hasn't happened, the post-rotation verifier won't see the
original ``trigger`` and can never fire the liveness violation.
To avoid silently eating obligations, this method refuses to
rotate while ``finish_session`` hasn't been called on a guard
with pending liveness obligations. but since ``RuntimeMonitor``
doesn't know about guard-level ``finish_session``, the check
has to happen one layer up. See
:meth:`sponsio.integrations.base.BaseGuard.rotate_session` for
the guard-side handling: run ``finish_session`` first, then
rotate.

Returns
-------
dict
``{"events": int, "turns": int, "log_entries": int,
"violations_cleared": 0}`` (``violations_cleared`` is always
0 at the monitor layer. violations are tracked by
:class:`~sponsio.integrations.base.BaseGuard`, not here).
This is the **supported** way to bound memory in long-running
agents (24/7 service agents, always-on schedulers) without
losing contract enforcement. It behaves exactly like
:meth:`reset`: trace, log, spans, verifier cache, and atom
caches are all cleared; contracts on the underlying
:class:`~sponsio.models.system.System` are **not** touched.
The only difference is intent signalling and the return value:
callers get back the headline metrics of the window that just
closed so they can plumb them into audit logs / dashboards
before the numbers go away.

Why not just keep using :meth:`reset`?
``reset`` reads as "something went wrong, start over".
``rotate_session`` is the name you want to see at a quarterly
review: "we rotate every 1000 turns to cap memory; here's the
hand-off record."

Liveness caveat
---------------
Formulas that span the **entire trace** β€” ``F(tool)`` /
``always_followed_by(a, b)`` / whole-trace ``rate_limit(tool, N)``
β€” lose visibility across the rotation boundary. Concretely: if
``response`` was promised before ``rotate_session`` and still
hasn't happened, the post-rotation verifier won't see the
original ``trigger`` and can never fire the liveness violation.
To avoid silently eating obligations, this method refuses to
rotate while ``finish_session`` hasn't been called on a guard
with pending liveness obligations β€” but since ``RuntimeMonitor``
doesn't know about guard-level ``finish_session``, the check
has to happen one layer up. See
:meth:`sponsio.integrations.base.BaseGuard.rotate_session` for
the guard-side handling: run ``finish_session`` first, then
rotate.

Returns
-------
dict
``{"events": int, "turns": int, "log_entries": int,
"violations_cleared": 0}`` (``violations_cleared`` is always
0 at the monitor layer β€” violations are tracked by
:class:`~sponsio.integrations.base.BaseGuard`, not here).
"""
with self._lock:
summary = {
Expand Down
7 changes: 7 additions & 0 deletions sponsio/tracer/grounding.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,13 @@ def ground_event(
# Emit unconditionally for every arg field on every tool_call β€”
# Terms read by direct key lookup, not by content_atoms
# extraction.
#
# Memory note: this stores the raw arg value (by reference) into
# the per-timestep valuation, which is retained for the whole
# trace. For tools with large or deeply-nested args that keeps
# those objects alive until ``reset`` / ``rotate_session``. Fine
# for typical scalar args; if a tool passes megabyte payloads,
# bound the trace with ``rotate_session``.
if event.args:
for _field, _val in event.args.items():
v[pred_key("arg_value", event.tool, _field)] = _val
Expand Down
16 changes: 15 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,21 @@ def _reset_rich_style_cache():
if hasattr(Color, "parse") and hasattr(Color.parse, "cache_clear"):
Color.parse.cache_clear()
for obj in gc.get_objects():
if isinstance(obj, Style):
# ``isinstance(obj, Style)`` reads ``obj.__class__`` (CPython
# consults the instance's ``__class__`` attribute, not just
# ``type(obj)``). Some live objects are lazy-import proxies
# whose ``__class__`` getter has side effects β€” e.g. once an
# openai-touching test has run, the openai SDK leaves proxies
# for its optional submodules in memory, and probing their
# ``__class__`` tries to import ``sounddevice`` (voice helpers)
# or raises ``OpenAIError``. That used to escape this autouse
# fixture and error the *setup* of every subsequent test. Treat
# any object that objects to introspection as "not a Style".
try:
is_style = isinstance(obj, Style)
except Exception:
continue
if is_style:
try:
obj._ansi = None
except Exception:
Expand Down
Loading