generative-computing · nrfulton · Nov 20, 2025 · Nov 17, 2025 · Nov 17, 2025 · Nov 18, 2025
diff --git a/docs/examples/tools/interpreter_example.py b/docs/examples/tools/interpreter_example.py
@@ -0,0 +1,77 @@
+from mellea.stdlib.tools import code_interpreter, local_code_interpreter
+from mellea import start_session, MelleaSession
+from mellea.backends.types import ModelOption
+from mellea.backends.model_ids import OPENAI_GPT_OSS_20B
+from mellea.stdlib.reqlib.tools import uses_tool, tool_arg_validator
+
+
+def example_1(m: MelleaSession):
+    # First, let's see how the code interpreter function works without an LLM in the loop:
+    result = code_interpreter("print(1+1)")
+    print(result)
+
+
+# Now let's ask the LLM to make a plot.
+
+
+def example_2(m: MelleaSession):
+    plot_output = m.instruct(
+        description="Make a plot of y=x^2",
+        model_options={ModelOption.TOOLS: [local_code_interpreter]},
+    )
+    print(plot_output)
+
+
+# Notice that the model did not actually generate a plot. Let's force tool use:
+
+
+def example_3(m: MelleaSession):
+    plot_output = m.instruct(
+        description="Use the code interpreter tool to make a plot of y=x^2.",
+        requirements=[uses_tool(local_code_interpreter)],
+        model_options={ModelOption.TOOLS: [local_code_interpreter]},
+        tool_calls=True,
+    )
+
+    code = plot_output.tool_calls["local_code_interpreter"].args["code"]
+    print(f"Going to execute the following code:\n```python\n{code}\n```")
+
+    # Call the tool.
+    exec_result = plot_output.tool_calls["local_code_interpreter"].call_func()
+
+    print(exec_result)
+
+
+# Notice that the model did make a plot, but it just "showed" the plot.
+# We would actually like this to be written out to a file.
+
+
+def example_4(m: MelleaSession):
+    plot_output = m.instruct(
+        description="Use the code interpreter tool to make a plot of y=x^2.",
+        requirements=[
+            uses_tool(local_code_interpreter),
+            tool_arg_validator(
+                "The plot should be written to /tmp/output.png",
+                tool_name=local_code_interpreter,
+                arg_name="code",
+                validation_fn=lambda code_snippet: "/tmp/output.png" in code_snippet
+                and "plt.show()" not in code_snippet,
+            ),
+        ],
+        model_options={ModelOption.TOOLS: [local_code_interpreter]},
+        tool_calls=True,
+    )
+
+    code = plot_output.tool_calls["local_code_interpreter"].args["code"]
+    print(f"Going to execute the following code:\n```python\n{code}\n```")
+
+    # Call the tool.
+    exec_result = plot_output.tool_calls["local_code_interpreter"].call_func()
+
+    print(exec_result)
+
+
+# m = start_session(backend_name="ollama", model_id=OPENAI_GPT_OSS_20B)
+m = start_session()
+example_4(m)
diff --git a/mellea/stdlib/reqlib/python.py b/mellea/stdlib/reqlib/python.py
@@ -12,204 +12,15 @@
 from mellea.helpers.fancy_logger import FancyLogger
 from mellea.stdlib.base import Context
 from mellea.stdlib.requirement import Requirement, ValidationResult
+from mellea.stdlib.tools.interpreter import (
+    ExecutionEnvironment,
+    LLMSandboxEnvironment,
+    StaticAnalysisEnvironment,
+    UnsafeEnvironment,
+)
 
 logger = FancyLogger.get_logger()
 
-# region execution backends
-
-
-@dataclass
-class ExecutionResult:
-    """Result of code execution."""
-
-    success: bool
-    message: str | None = None
-    error: str | None = None
-    skipped: bool = False
-
-
-class ExecutionEnvironment(ABC):
-    """Abstract environment for executing Python code."""
-
-    def __init__(self, allowed_imports: list[str] | None = None):
-        """Initialize with optional import restrictions.
-
-        Args:
-            allowed_imports: List of allowed import modules. None means any import is allowed.
-        """
-        self.allowed_imports = allowed_imports
-
-    @abstractmethod
-    def execute(self, code: str, timeout: int) -> ExecutionResult:
-        """Execute code and return result."""
-
-
-class SafeEnvironment(ExecutionEnvironment):
-    """Safe environment that validates but does not execute code."""
-
-    def execute(self, code: str, timeout: int) -> ExecutionResult:
-        """Validate code syntax and imports without executing."""
-        try:
-            ast.parse(code)
-        except SyntaxError as e:
-            return ExecutionResult(success=False, error=str(e))
-
-        if self.allowed_imports:
-            unauthorized = _get_unauthorized_imports(code, self.allowed_imports)
-            if unauthorized:
-                return ExecutionResult(
-                    success=False,
-                    error=f"Unauthorized imports detected: {', '.join(unauthorized)}",
-                )
-
-        return ExecutionResult(
-            success=True,
-            skipped=True,
-            message="Code validated but not executed (safe mode)",
-        )
-
-
-class UnsafeEnvironment(ExecutionEnvironment):
-    """Unsafe environment that executes code directly with subprocess."""
-
-    def execute(self, code: str, timeout: int) -> ExecutionResult:
-        """Execute code with subprocess after checking imports."""
-        if self.allowed_imports:
-            unauthorized = _get_unauthorized_imports(code, self.allowed_imports)
-            if unauthorized:
-                return ExecutionResult(
-                    success=False,
-                    error=f"Unauthorized imports detected: {', '.join(unauthorized)}",
-                )
-
-        return self._execute_subprocess(code, timeout)
-
-    def _execute_subprocess(self, code: str, timeout: int) -> ExecutionResult:
-        with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f:
-            f.write(code)
-            temp_file = f.name
-
-        try:
-            # Execute code using the same Python interpreter and environment as the current process
-            # This ensures the code has access to all installed packages and dependencies
-            result = subprocess.run(
-                [sys.executable, temp_file],
-                capture_output=True,
-                text=True,
-                timeout=timeout,
-            )
-
-            if result.returncode == 0:
-                message = "Code executed successfully"
-                if result.stdout.strip():
-                    message += f"\nOutput: {result.stdout.strip()}"
-                return ExecutionResult(success=True, message=message)
-            else:
-                return ExecutionResult(
-                    success=False,
-                    error=f"Execution failed with error: {result.stderr[:200]}",
-                )
-        except subprocess.TimeoutExpired:
-            return ExecutionResult(
-                success=False, error=f"Execution timed out after {timeout} seconds"
-            )
-        except Exception as e:
-            return ExecutionResult(success=False, error=f"Execution error: {e!s}")
-        finally:
-            try:
-                Path(temp_file).unlink()
-            except Exception:
-                pass
-
-
-class LLMSandboxEnvironment(ExecutionEnvironment):
-    """Environment using llm-sandbox for secure Docker-based execution."""
-
-    def execute(self, code: str, timeout: int) -> ExecutionResult:
-        """Execute code using llm-sandbox."""
-        if self.allowed_imports:
-            unauthorized = _get_unauthorized_imports(code, self.allowed_imports)
-            if unauthorized:
-                return ExecutionResult(
-                    success=False,
-                    error=f"Unauthorized imports detected: {', '.join(unauthorized)}",
-                )
-
-        try:
-            from llm_sandbox import SandboxSession
-        except ImportError:
-            return ExecutionResult(
-                success=False,
-                error="llm-sandbox not installed. Install with: uv add 'llm-sandbox[docker]'",
-            )
-
-        try:
-            with SandboxSession(
-                lang="python", verbose=False, keep_template=False
-            ) as session:
-                result = session.run(code, timeout=timeout)
-
-                if result.exit_code == 0:
-                    message = "Code executed successfully in sandbox"
-                    if (
-                        hasattr(result, "stdout")
-                        and result.stdout
-                        and result.stdout.strip()
-                    ):
-                        message += f"\nOutput: {result.stdout.strip()}"
-                    return ExecutionResult(success=True, message=message)
-                else:
-                    if result.stderr:
-                        error_msg = f"Sandbox execution failed: {result.stderr[:200]}"
-                    else:
-                        # Log unknown error details for debugging
-                        logger.warning(
-                            f"Sandbox execution failed without stderr. Exit code: {result.exit_code}, "
-                            f"Available attributes: {[attr for attr in dir(result) if not attr.startswith('_')]}"
-                        )
-                        error_msg = f"Sandbox execution failed with exit code {result.exit_code} (no error details available)"
-                    return ExecutionResult(success=False, error=error_msg)
-
-        except Exception as e:
-            return ExecutionResult(
-                success=False, error=f"Sandbox execution error: {e!s}"
-            )
-
-
-def _get_unauthorized_imports(code: str, allowed_imports: list[str]) -> list[str]:
-    """Get list of unauthorized imports used in code."""
-    unauthorized: list[str] = []
-    try:
-        tree = ast.parse(code)
-    except SyntaxError:
-        return unauthorized
-
-    for node in ast.walk(tree):
-        if isinstance(node, ast.Import):
-            for alias in node.names:
-                base_module = alias.name.split(".")[0]
-                if (
-                    base_module not in allowed_imports
-                    and base_module not in unauthorized
-                ):
-                    unauthorized.append(base_module)
-        elif isinstance(node, ast.ImportFrom):
-            if node.module:
-                base_module = node.module.split(".")[0]
-                if (
-                    base_module not in allowed_imports
-                    and base_module not in unauthorized
-                ):
-                    unauthorized.append(base_module)
-    return unauthorized
-
-
-def _check_allowed_imports(code: str, allowed_imports: list[str]) -> bool:
-    """Check if code only uses allowed imports."""
-    return len(_get_unauthorized_imports(code, allowed_imports)) == 0
-
-
-# endregion
 
 # region code extraction
 
@@ -328,11 +139,11 @@ def _python_executes_without_error(
     elif allow_unsafe:
         environment = UnsafeEnvironment(allowed_imports=allowed_imports)
     else:
-        environment = SafeEnvironment(allowed_imports=allowed_imports)
+        environment = StaticAnalysisEnvironment(allowed_imports=allowed_imports)
 
     result = environment.execute(code, timeout)
     return ValidationResult(
-        result=result.success, reason=result.message or result.error
+        result=result.success, reason=result.to_validationresult_reason()
     )