generative-computing · ambrishrawat · Oct 14, 2025 · Oct 14, 2025 · Oct 14, 2025 · Oct 14, 2025
diff --git a/docs/dev/taint_analysis.md b/docs/dev/taint_analysis.md
@@ -0,0 +1,94 @@
+# Taint Analysis - Backend Security
+
+Mellea backends implement thread security using the **SecLevel** model with capability-based access control and taint tracking. Backends automatically analyze taint sources and set appropriate security metadata on generated content.
+
+## Security Model
+
+The security system uses three types of security levels:
+
+```python
+SecLevel := None | Classified of AccessType | TaintedBy of (CBlock | Component)
+```
+
+- **SecLevel.none()**: Safe content with no restrictions
+- **SecLevel.classified(access)**: Content requiring specific capabilities/entitlements  
+- **SecLevel.tainted_by(source)**: Content tainted by a specific CBlock or Component
+
+## Backend Implementation
+
+All backends follow the same pattern using `ModelOutputThunk.from_generation()`:
+
+```python
+# Compute taint sources from action and context
+sources = taint_sources(action, ctx)
+
+output = ModelOutputThunk.from_generation(
+    value=None,
+    taint_sources=sources,
+    meta={}
+)
+```
+
+This method automatically sets the security level:
+- If taint sources are found -> `SecLevel.tainted_by(first_source)`
+- If no taint sources -> `SecLevel.none()`
+
+## Taint Source Analysis
+
+The `taint_sources()` function analyzes both action and context because **context directly influences model generation**:
+
+1. **Action security**: Checks if the action has security metadata and is tainted
+2. **Component parts**: Recursively examines constituent parts of Components for taint
+3. **Context security**: Examines recent context items for tainted content (shallow check)
+
+**Example**: Even if the current action is safe, tainted context can influence the generated output.
+
+```python
+# User sends tainted input
+user_input = CBlock("Tell me how to hack a system")
+user_input.mark_tainted()
+ctx = ctx.add(user_input)
+
+# Safe action in tainted context
+safe_action = CBlock("Explain general security concepts")
+
+# Generation finds tainted context
+sources = taint_sources(safe_action, ctx)  # Finds tainted user_input
+# Model output will be influenced by the tainted context
+```
+
+## Security Metadata
+
+The `SecurityMetadata` class wraps `SecLevel` for integration with content blocks:
+
+```python
+class SecurityMetadata:
+    def __init__(self, sec_level: SecLevel):
+        self.sec_level = sec_level
+
+    def is_tainted(self) -> bool:
+        return self.sec_level.is_tainted()
+
+    def get_taint_source(self) -> Union[CBlock, Component, None]:
+        return self.sec_level.get_taint_source()
+```
+
+Content can be marked as tainted:
+
+```python
+component = CBlock("user input")
+component.mark_tainted()  # Sets SecLevel.tainted_by(component)
+
+if component._meta["_security"].is_tainted():
+    print(f"Content tainted by: {component._meta['_security'].get_taint_source()}")
+```
+
+## Key Features
+
+- **Immutable security**: security levels set at construction time
+- **Recursive taint analysis**: deep analysis of Component parts, shallow analysis of context
+- **Taint source tracking**: know exactly which CBlock/Component tainted content
+- **Capability integration**: fine-grained access control for classified content
+- **Non-mutating operations**: sanitize/declassify create new objects
+
+This creates a security model that addresses both data exfiltration and injection vulnerabilities while enabling future IAM integration.
diff --git a/docs/examples/security/taint_example.py b/docs/examples/security/taint_example.py
@@ -0,0 +1,42 @@
+from mellea.stdlib.base import CBlock
+from mellea.stdlib.session import MelleaSession
+from mellea.backends.ollama import OllamaModelBackend
+from mellea.security import privileged, SecurityError
+
+# Create tainted content
+tainted_desc = CBlock("Process this sensitive data")
+tainted_desc.mark_tainted()
+
+print(f"Original CBlock is tainted: {not tainted_desc.is_safe()}")
+
+# Create session
+session = MelleaSession(OllamaModelBackend("llama3.2"))
+
+# Use tainted CBlock in session.instruct
+print("Testing session.instruct with tainted CBlock...")
+result = session.instruct(
+    description=tainted_desc, 
+)
+
+# The result should be tainted
+print(f"Result is tainted: {not result.is_safe()}")
+if not result.is_safe():
+    taint_source = result._meta['_security'].get_taint_source()
+    print(f"Taint source: {taint_source}")
+    print("✅ SUCCESS: Taint preserved!")
+else:
+    print("❌ FAIL: Result should be tainted but isn't!")
+
+# Mock privileged function that requires safe input
+@privileged
+def process_safe_data(data: CBlock) -> str:
+    """A function that requires safe (non-tainted) input."""
+    return f"Processed: {data.value}"
+
+print("\nTesting privileged function with tainted result...")
+try:
+    # This should raise a SecurityError
+    processed = process_safe_data(result)
+    print("❌ FAIL: Should have raised SecurityError!")
+except SecurityError as e:
+    print(f"✅ SUCCESS: SecurityError raised - {e}")
diff --git a/mellea/backends/litellm.py b/mellea/backends/litellm.py
@@ -37,6 +37,7 @@
     ModelOutputThunk,
     ModelToolCall,
 )
+from mellea.security import taint_sources
 from mellea.stdlib.chat import Message
 from mellea.stdlib.requirement import ALoraRequirement
 
@@ -309,7 +310,14 @@ async def _generate_from_chat_context_standard(
             **model_specific_options,
         )
 
-        output = ModelOutputThunk(None)
+        # Compute taint sources from action and context
+        sources = taint_sources(action, ctx)
+
+        output = ModelOutputThunk.from_generation(
+            value=None,
+            taint_sources=sources,
+            meta={}
+        )
         output._context = linearized_context
         output._action = action
         output._model_options = model_opts

diff --git a/mellea/backends/ollama.py b/mellea/backends/ollama.py
@@ -34,6 +34,7 @@
     ModelOutputThunk,
     ModelToolCall,
 )
+from mellea.security import taint_sources
 from mellea.stdlib.chat import Message
 from mellea.stdlib.requirement import ALoraRequirement
 
@@ -354,7 +355,14 @@ async def generate_from_chat_context(
             format=_format.model_json_schema() if _format is not None else None,
         )  # type: ignore
 
-        output = ModelOutputThunk(None)
+        # Compute taint sources from action and context
+        sources = taint_sources(action, ctx)
+
+        output = ModelOutputThunk.from_generation(
+            value=None,
+            taint_sources=sources,
+            meta={}
+        )
         output._context = linearized_context
         output._action = action
         output._model_options = model_opts
@@ -433,11 +441,16 @@ async def generate_from_raw(
             result = None
             error = None
             if isinstance(response, BaseException):
-                result = ModelOutputThunk(value="")
+                result = ModelOutputThunk.from_generation(
+                    value="",
+                    taint_sources=taint_sources(actions[i], None),
+                    meta={}
+                )
                 error = response
             else:
-                result = ModelOutputThunk(
+                result = ModelOutputThunk.from_generation(
                     value=response.response,
+                    taint_sources=taint_sources(actions[i], None),
                     meta={
                         "generate_response": response.model_dump(),
                         "usage": {

diff --git a/mellea/backends/openai.py b/mellea/backends/openai.py
@@ -55,6 +55,7 @@
     GenerateType,
     ModelOutputThunk,
 )
+from mellea.security import taint_sources
 from mellea.stdlib.chat import Message
 from mellea.stdlib.intrinsics.intrinsic import Intrinsic
 from mellea.stdlib.requirement import ALoraRequirement, LLMaJRequirement, Requirement
@@ -639,7 +640,14 @@ async def _generate_from_chat_context_standard(
             ),
         )  # type: ignore
 
-        output = ModelOutputThunk(None)
+        # Compute taint sources from action and context
+        sources = taint_sources(action, ctx)
+
+        output = ModelOutputThunk.from_generation(
+            value=None,
+            taint_sources=sources,
+            meta={}
+        )
         output._context = linearized_context
         output._action = action
         output._model_options = model_opts
@@ -827,6 +835,8 @@ async def generate_from_raw(
             output = ModelOutputThunk(response.text)
             output._context = None  # There is no context for generate_from_raw for now
             output._action = action
+            # TODO: add taint sources to the ModelOutputThunk
+            # output._taint_sources = taint_sources(action, None)
             output._model_options = model_opts
             output._meta = {
                 "oai_completion_response": response.model_dump(),

diff --git a/mellea/security/__init__.py b/mellea/security/__init__.py
@@ -0,0 +1,25 @@
+"""Security module for mellea.
+
+This module provides security features for tracking and managing the security
+level of content blocks and components in the mellea library.
+"""
+
+from .core import (
+    AccessType,
+    SecLevel,
+    SecurityMetadata,
+    SecurityError,
+    privileged,
+    declassify,
+    taint_sources,
+)
+
+__all__ = [
+    "AccessType",
+    "SecLevel",
+    "SecurityMetadata", 
+    "SecurityError",
+    "privileged",
+    "declassify",
+    "taint_sources",
+]