diff --git a/nodes/src/nodes/tool_shell/IGlobal.py b/nodes/src/nodes/tool_shell/IGlobal.py new file mode 100644 index 000000000..e01e25741 --- /dev/null +++ b/nodes/src/nodes/tool_shell/IGlobal.py @@ -0,0 +1,99 @@ +# ============================================================================= +# MIT License +# Copyright (c) 2024 RocketRide Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ============================================================================= + +""" +Shell tool node - global (shared) state. + +Reads config and stores execution defaults (working dir, timeout, env vars, +output cap) and the command allowlist for IInstance tool methods. +""" + +from __future__ import annotations + +import re + +from ai.common.config import Config +from rocketlib import IGlobalBase, OPEN_MODE, warning + +from .config_parser import ( + DEFAULT_MAX_OUTPUT_BYTES, + DEFAULT_TIMEOUT, + MAX_TIMEOUT, + parse_command_patterns, + parse_env_vars, + parse_max_output, + parse_timeout, + parse_working_dir, +) + + +__all__ = ['IGlobal', 'DEFAULT_TIMEOUT', 'MAX_TIMEOUT', 'DEFAULT_MAX_OUTPUT_BYTES'] + + +class IGlobal(IGlobalBase): + """Global state for tool_shell.""" + + working_dir: str | None = None + timeout: int = DEFAULT_TIMEOUT + max_output_bytes: int = DEFAULT_MAX_OUTPUT_BYTES + env_vars: dict[str, str] | None = None + allow_external_env: bool = False + allow_shell: bool = False + command_patterns: list[re.Pattern] | None = None + + def beginGlobal(self) -> None: + """Load node config into instance state; refuses to start with a broken allowlist.""" + if self.IEndpoint.endpoint.openMode == OPEN_MODE.CONFIG: + return + + cfg = Config.getNodeConfig(self.glb.logicalType, self.glb.connConfig) + self.working_dir = parse_working_dir(cfg) + self.timeout = parse_timeout(cfg) + self.max_output_bytes = parse_max_output(cfg) + self.env_vars = parse_env_vars(cfg) + self.allow_external_env = bool(cfg.get('allowExternalEnv', False)) + self.allow_shell = bool(cfg.get('allowShell', False)) + + invalid_pattern_errors: list[str] = [] + + def _on_invalid_pattern(msg: str) -> None: + """Record a pattern compile failure and emit a warning.""" + invalid_pattern_errors.append(msg) + warning(msg) + + compiled_patterns = parse_command_patterns(cfg, on_invalid=_on_invalid_pattern) + if invalid_pattern_errors and not compiled_patterns: + raise ValueError( + f'commandAllowlist is configured but every pattern failed to compile; refusing to start with a non-functional allowlist (would silently allow all commands). First error: {invalid_pattern_errors[0]}' + ) + self.command_patterns = compiled_patterns + + def endGlobal(self) -> None: + """Reset shared state to defaults when the node tears down.""" + self.working_dir = None + self.timeout = DEFAULT_TIMEOUT + self.max_output_bytes = DEFAULT_MAX_OUTPUT_BYTES + self.env_vars = None + self.allow_external_env = False + self.allow_shell = False + self.command_patterns = None diff --git a/nodes/src/nodes/tool_shell/IInstance.py b/nodes/src/nodes/tool_shell/IInstance.py new file mode 100644 index 000000000..802a2975b --- /dev/null +++ b/nodes/src/nodes/tool_shell/IInstance.py @@ -0,0 +1,199 @@ +# ============================================================================= +# MIT License +# Copyright (c) 2024 RocketRide Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ============================================================================= + +""" +Shell tool node instance. + +Exposes a single ``execute`` tool that runs a shell command on the host and +returns stdout, stderr, and exit code. +""" + +from __future__ import annotations + +import os +import shlex + +from rocketlib import IInstanceBase, tool_function + +from .IGlobal import IGlobal, MAX_TIMEOUT +from .shell_executor import build_environment, execute_command, is_destructive_argv + + +class IInstance(IInstanceBase): + """Per-call instance for the shell tool; exposes the ``execute`` tool function.""" + + IGlobal: IGlobal + + @tool_function( + input_schema={ + 'type': 'object', + 'required': ['command'], + 'properties': { + 'command': { + 'type': 'string', + 'description': 'Command to execute. By default it is parsed (shlex) into argv and run without a shell — pipes, redirects, globs, and "&&" do not work. Set "use_shell": true (only if the node permits it) to enable shell features. Example: "npm run build" or "ls -la /tmp".', + }, + 'working_dir': { + 'type': 'string', + 'description': 'Optional working directory for this call. Overrides the node-level default. Must be an existing directory.', + }, + 'env': { + 'type': 'object', + 'description': 'Optional environment variables to inject for this call. Layered over the host environment; node-configured vars take precedence.', + 'additionalProperties': {'type': 'string'}, + }, + 'timeout': { + 'type': 'integer', + 'description': 'Optional timeout in seconds for this call. Capped by the node configuration.', + 'minimum': 1, + }, + 'use_shell': { + 'type': 'boolean', + 'description': 'If true, run the command via the host shell (enables pipes, redirects, globs, "&&"). Only honored when the node has shell mode enabled in its configuration; otherwise the call is rejected.', + 'default': False, + }, + 'confirm_destructive': { + 'type': 'boolean', + 'description': 'Required to permit destructive operations like "rm -r", "dd of=", "mkfs", "find -delete", "shred", "git clean -f", "chmod 000", "truncate -s 0". Without this flag, such commands are rejected. Only checked in argv mode.', + 'default': False, + }, + }, + }, + output_schema={ + 'type': 'object', + 'properties': { + 'stdout': {'type': 'string', 'description': 'Captured stdout (UTF-8, possibly truncated).'}, + 'stderr': {'type': 'string', 'description': 'Captured stderr (UTF-8, possibly truncated).'}, + 'exit_code': { + 'type': 'integer', + 'description': 'Process exit code. -1 indicates a timeout, 127 indicates the shell could not be launched.', + }, + 'timed_out': {'type': 'boolean', 'description': 'True if the command was killed due to timeout.'}, + 'truncated': { + 'type': 'boolean', + 'description': 'True if stdout or stderr was truncated to fit the size cap.', + }, + }, + }, + description=lambda self: ( + 'Execute a shell command on the host and return stdout, stderr, and exit code. ' + 'Use for build scripts (npm/pip/make), package management, file operations, process management, ' + 'environment inspection, and host-installed git operations. ' + f'Timeout: {self.IGlobal.timeout}s (max {MAX_TIMEOUT}s). ' + f'Default working directory: {self.IGlobal.working_dir or "host process CWD"}. ' + 'For portable git operations that do not depend on the host having git installed, prefer the Git node.' + ), + ) + def execute(self, args): + """Execute a shell command on the host.""" + if not isinstance(args, dict): + raise ValueError('Tool input must be a JSON object (dict)') + + command = args.get('command') + if not isinstance(command, str) or not command.strip(): + raise ValueError('"command" is required and must be a non-empty string') + + self._validate_command(command) + + use_shell = bool(args.get('use_shell', False)) + if use_shell and not self.IGlobal.allow_shell: + raise ValueError('"use_shell" is not permitted by the node configuration. Enable "Allow shell mode" in the node configuration to run commands through the host shell.') + + cwd = self._resolve_cwd(args.get('working_dir')) + timeout = self._resolve_timeout(args.get('timeout')) + call_env = args.get('env') + if call_env is not None and not isinstance(call_env, dict): + raise ValueError('"env" must be a JSON object of string values') + + env = build_environment( + base_env=None, + config_env=self.IGlobal.env_vars or {}, + call_env=call_env, + allow_external_env=self.IGlobal.allow_external_env, + ) + + if use_shell: + command_to_run: str | list[str] = command + else: + try: + argv = shlex.split(command) + except ValueError as exc: + raise ValueError(f'Could not parse command into argv: {exc}') from exc + if not argv: + raise ValueError('"command" must contain at least one token after parsing') + + destructive, label = is_destructive_argv(argv) + if destructive and not bool(args.get('confirm_destructive', False)): + raise ValueError(f'Command appears to perform a destructive operation ({label}). Pass "confirm_destructive": true in the call args to acknowledge and proceed.') + command_to_run = argv + + return execute_command( + command_to_run, + cwd=cwd, + env=env, + timeout=timeout, + max_output_bytes=self.IGlobal.max_output_bytes, + use_shell=use_shell, + ) + + def _validate_command(self, command: str) -> None: + """Reject commands that don't fully match any configured allowlist regex.""" + # Use fullmatch (not search) so that an unanchored pattern like + # "git status" cannot be smuggled past via "git status; rm -rf /". + patterns = self.IGlobal.command_patterns or [] + if patterns and not any(p.fullmatch(command) for p in patterns): + raise ValueError('Command is not permitted by the configured allowlist.') + + def _resolve_cwd(self, override: object) -> str | None: + """Pick the per-call cwd override (validated) or fall back to the configured default.""" + if override is None: + return self._validated_default_cwd() + if not isinstance(override, str): + raise ValueError('"working_dir" must be a string') + path = override.strip() + if not path: + return self._validated_default_cwd() + if not os.path.isdir(path): + raise ValueError(f'working_dir does not exist or is not a directory: {path!r}') + return path + + def _validated_default_cwd(self) -> str | None: + """Return the configured default cwd after verifying it exists, or None if unset.""" + default = self.IGlobal.working_dir + if default is None: + return None + if not os.path.isdir(default): + raise ValueError(f'working_dir does not exist or is not a directory: {default!r}') + return default + + def _resolve_timeout(self, override: object) -> int: + """Coerce a per-call timeout override and clamp it to the configured maximum.""" + if override is None: + return self.IGlobal.timeout + try: + value = int(override) + except (TypeError, ValueError) as exc: + raise ValueError('"timeout" must be an integer (seconds)') from exc + if value <= 0: + raise ValueError('"timeout" must be a positive integer') + return min(value, self.IGlobal.timeout) diff --git a/nodes/src/nodes/tool_shell/README.md b/nodes/src/nodes/tool_shell/README.md new file mode 100644 index 000000000..07eecb983 --- /dev/null +++ b/nodes/src/nodes/tool_shell/README.md @@ -0,0 +1,55 @@ +--- +title: Shell +date: 2026-04-30 +sidebar_position: 1 +--- + + + Shell - RocketRide Documentation + + +## What it does + +Tool node that runs a command on the host and returns stdout, stderr, and exit code. Useful for build scripts, package managers, file operations, and host-installed git. + +By default the command is parsed into argv and executed without invoking a shell — pipes, redirects, globs, and `&&` are not interpreted, eliminating the shell-injection class. Shell mode is opt-in via node configuration. + +## As a tool + +When connected to an agent, exposes one function under the configured server name (default: `shell`): + +| Function | Description | +| --------------- | ------------------------------------------------------ | +| `shell.execute` | Run a command and return stdout, stderr, and exit code | + +**`shell.execute` parameters:** + +| Parameter | Required | Description | +| --------------------- | -------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `command` | yes | Command to execute. By default parsed into argv and run without a shell | +| `working_dir` | no | Working directory for this call. Overrides the node default | +| `env` | no | Object of environment variables to inject for this call | +| `timeout` | no | Per-call timeout in seconds (capped by node configuration) | +| `use_shell` | no | If `true`, run via the host shell to enable pipes, redirects, globs, `&&`. Only honoured when the node has shell mode enabled | +| `confirm_destructive` | no | Required to permit destructive operations (`rm -r`, `dd of=`, `mkfs`, `find -delete`, `shred`, `git clean -f`, `chmod 000`, `truncate -s 0`). Only checked in argv mode | + +`exit_code` is the process return code. `-1` indicates a timeout kill; `127` indicates the command could not be launched. + +## Configuration + +| Field | Default | Description | +| ----------------------------- | --------- | ------------------------------------------------------------------------------------------------------------------------------------------ | +| Default working directory | host CWD | Working directory used when the agent does not provide one | +| Execution timeout (seconds) | `30` | Max seconds a command may run (max 1800). On timeout the entire process tree is killed | +| Max output size (bytes) | `1048576` | Per-stream cap on stdout and stderr; output beyond this is streamed and discarded so memory stays bounded | +| Allow shell mode | off | Permits agents to set `use_shell: true` per call. Off by default — the safer argv mode handles most use cases without shell-injection risk | +| Allow agent-supplied env vars | off | Whether the agent may inject env vars per call. Off by default — `LD_PRELOAD`/`PATH`/`NODE_OPTIONS` can redirect execution | +| Environment variables | — | Variables injected into every command. Override agent-supplied vars of the same name | +| Command allowlist | — | Regex patterns. If non-empty, the full command must match at least one (`re.fullmatch`). Use `.*` for substring matches | + +## Notes + +- Commands run directly on the host with the privileges of the running process — no sandbox. Use the allowlist to restrict commands and avoid deploying in untrusted environments. +- The destructive-verb gate only fires in argv mode. In shell mode the gate is disabled because the shell can express equivalent operations (redirects, expansions) that flat-string analysis cannot reliably detect — operators who enable shell mode are accepting that responsibility. +- An allowlist whose patterns all fail to compile is rejected at startup (fail-closed); individual invalid patterns are dropped with a warning. +- For portable git operations that don't require git on the host, prefer the Git node. diff --git a/nodes/src/nodes/tool_shell/__init__.py b/nodes/src/nodes/tool_shell/__init__.py new file mode 100644 index 000000000..dbc02b165 --- /dev/null +++ b/nodes/src/nodes/tool_shell/__init__.py @@ -0,0 +1,29 @@ +# ============================================================================= +# MIT License +# Copyright (c) 2024 RocketRide Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ============================================================================= + +"""Shell tool node package: exposes a host shell-execution tool to agents.""" + +from .IGlobal import IGlobal +from .IInstance import IInstance + +__all__ = ['IGlobal', 'IInstance'] diff --git a/nodes/src/nodes/tool_shell/config_parser.py b/nodes/src/nodes/tool_shell/config_parser.py new file mode 100644 index 000000000..0ca711cfd --- /dev/null +++ b/nodes/src/nodes/tool_shell/config_parser.py @@ -0,0 +1,128 @@ +# ============================================================================= +# MIT License +# Copyright (c) 2024 RocketRide Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ============================================================================= + +""" +Pure-Python config parsing helpers for tool_shell. + +Kept free of rocketlib/runtime imports so the helpers can be unit-tested +in isolation. +""" + +from __future__ import annotations + +import json +import re +from typing import Callable + + +DEFAULT_TIMEOUT = 30 +MAX_TIMEOUT = 1800 +DEFAULT_MAX_OUTPUT_BYTES = 1024 * 1024 + + +def parse_working_dir(cfg: dict) -> str | None: + """Return the configured working directory, or None when unset/blank.""" + raw = cfg.get('workingDir') + if raw is None: + return None + val = str(raw).strip() + return val or None + + +def parse_timeout(cfg: dict) -> int: + """Return the configured timeout in seconds, clamped to [1, MAX_TIMEOUT].""" + raw = cfg.get('timeout') + if raw is None: + return DEFAULT_TIMEOUT + try: + value = int(raw) + except (TypeError, ValueError): + return DEFAULT_TIMEOUT + return max(1, min(value, MAX_TIMEOUT)) + + +def parse_max_output(cfg: dict) -> int: + """Return the per-stream output cap in bytes (minimum 1 KiB).""" + raw = cfg.get('maxOutputBytes') + if raw is None: + return DEFAULT_MAX_OUTPUT_BYTES + try: + value = int(raw) + except (TypeError, ValueError): + return DEFAULT_MAX_OUTPUT_BYTES + return max(1024, value) + + +def parse_env_vars(cfg: dict) -> dict[str, str]: + """Parse the envVars config rows into a {name: str(value)} mapping.""" + raw = _coerce_list(cfg.get('envVars')) + env: dict[str, str] = {} + for row in raw: + if not hasattr(row, 'get'): + continue + name = str(row.get('envName') or '').strip() + if not name: + continue + value = row.get('envValue') + env[name] = '' if value is None else str(value) + return env + + +def parse_command_patterns( + cfg: dict, + *, + on_invalid: Callable[[str], None] | None = None, +) -> list[re.Pattern]: + """Compile the command allowlist regexes. + + *on_invalid* is invoked with a human-readable message for each pattern + that fails to compile. It defaults to a no-op so the helper stays a + pure function for tests. + """ + raw = _coerce_list(cfg.get('commandAllowlist')) + patterns: list[re.Pattern] = [] + for row in raw: + if not hasattr(row, 'get'): + continue + pat_str = str(row.get('commandPattern') or '').strip() + if not pat_str: + continue + try: + patterns.append(re.compile(pat_str)) + except re.error as e: + if on_invalid is not None: + on_invalid(f'Invalid command allowlist regex {pat_str!r}: {e}') + return patterns + + +def _coerce_list(raw: object) -> list: + """Accept a list directly or a JSON-encoded list string; otherwise return [].""" + if raw is None: + return [] + if isinstance(raw, list): + return raw + try: + parsed = json.loads(str(raw)) + except (json.JSONDecodeError, TypeError, ValueError): + return [] + return parsed if isinstance(parsed, list) else [] diff --git a/nodes/src/nodes/tool_shell/services.json b/nodes/src/nodes/tool_shell/services.json new file mode 100644 index 000000000..71ce34a0e --- /dev/null +++ b/nodes/src/nodes/tool_shell/services.json @@ -0,0 +1,121 @@ +{ + "title": "Shell", + "protocol": "tool_shell://", + "classType": ["tool"], + "capabilities": ["invoke"], + "register": "filter", + "node": "python", + "path": "nodes.tool_shell", + "prefix": "shell", + "icon": "shell.svg", + "documentation": "https://docs.rocketride.org", + "description": ["Executes shell commands in the host environment. Use to run scripts, manage processes, install packages, and interact with the operating system via the command line.", "Common use cases: build scripts (npm run build, make), package management (npm/pip install), process management, file operations (cp, mv, mkdir, find), environment inspection (env, which), and host-installed git operations.", "Note: For portable git operations that do not depend on the host environment having git installed, use the Git node instead."], + "tile": ["Tool: ${parameters.tool_shell.serverName}.execute"], + "lanes": {}, + "preconfig": { + "default": "default", + "profiles": { + "default": { + "title": "Default", + "serverName": "shell" + } + } + }, + "fields": { + "tool_shell.serverName": { + "hidden": true, + "type": "string", + "title": "Server name", + "description": "Namespace prefix for the tool: .execute", + "default": "shell" + }, + "tool_shell.workingDir": { + "type": "string", + "title": "Default working directory", + "description": "Default working directory for command execution. The agent may override per-call. If empty, the host process CWD is used.", + "default": "", + "optional": true + }, + "tool_shell.timeout": { + "type": "integer", + "title": "Execution timeout (seconds)", + "description": "Maximum seconds a command may run before it is killed. Default 30s, max 1800s.", + "default": 30, + "minimum": 1, + "maximum": 1800, + "optional": true + }, + "tool_shell.maxOutputBytes": { + "type": "integer", + "title": "Max output size (bytes)", + "description": "Maximum size applied to each of stdout and stderr individually; each stream is truncated beyond this limit. Default 1 MiB.", + "default": 1048576, + "minimum": 1024, + "optional": true + }, + "tool_shell.allowExternalEnv": { + "type": "boolean", + "title": "Allow agent-supplied env vars", + "description": "If enabled, the agent may inject additional environment variables per call. Variables defined in this node's config always take precedence. Defaults off because env vars like LD_PRELOAD, PATH, or NODE_OPTIONS can redirect command execution.", + "default": false, + "enum": [ + [true, "Yes"], + [false, "No"] + ] + }, + "tool_shell.allowShell": { + "type": "boolean", + "title": "Allow shell mode", + "description": "When off (default), commands are parsed into argv and executed without invoking a shell — pipes, redirects, globs, and '&&' are not interpreted, eliminating the shell-injection class. Enable only if commands genuinely need shell features; once enabled, agents can request shell mode per call via use_shell=true.", + "default": false, + "enum": [ + [true, "Yes"], + [false, "No"] + ] + }, + "tool_shell.envName": { + "type": "string", + "title": "Variable name", + "default": "" + }, + "tool_shell.envValue": { + "type": "string", + "title": "Value", + "default": "" + }, + "tool_shell.envVars": { + "title": "Environment variables", + "description": "Environment variables injected into every command. These override any same-named variables supplied by the agent.", + "type": "array", + "optional": true, + "minItems": 0, + "items": { + "type": "object", + "properties": ["tool_shell.envName", "tool_shell.envValue"] + } + }, + "tool_shell.commandPattern": { + "type": "string", + "title": "Command pattern (regex)", + "default": "" + }, + "tool_shell.commandAllowlist": { + "title": "Command allowlist", + "description": "Regex patterns for allowed commands. The full command string must match at least one pattern (re.fullmatch) to run; use .* for substring matching (e.g. 'npm .*'). If empty, any command is allowed.", + "type": "array", + "optional": true, + "minItems": 0, + "items": { + "type": "object", + "properties": ["tool_shell.commandPattern"] + } + } + }, + "shape": [ + { + "section": "Pipe", + "title": "Shell", + "properties": ["type", "tool_shell.workingDir", "tool_shell.timeout", "tool_shell.maxOutputBytes", "tool_shell.allowShell", "tool_shell.allowExternalEnv", "tool_shell.envVars", "tool_shell.commandAllowlist"] + } + ] +} diff --git a/nodes/src/nodes/tool_shell/shell_executor.py b/nodes/src/nodes/tool_shell/shell_executor.py new file mode 100644 index 000000000..ce82290f7 --- /dev/null +++ b/nodes/src/nodes/tool_shell/shell_executor.py @@ -0,0 +1,260 @@ +# ============================================================================= +# MIT License +# Copyright (c) 2024 RocketRide Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ============================================================================= + +""" +Subprocess driver for tool_shell. + +Runs a command in the host with bounded timeout and output capture. The +default is argv-list execution with no shell (eliminates the shell-injection +attack class); shell mode is opt-in via ``use_shell=True``. +""" + +from __future__ import annotations + +import os +import signal +import subprocess +import sys +import threading +from typing import Callable + + +_TRUNCATED_MARKER = '\n...[truncated]' +_READ_CHUNK_SIZE = 4096 +_IS_WINDOWS = sys.platform == 'win32' + + +def _rm_recursive(argv: list[str]) -> bool: + """``rm`` invocation that recursively deletes (``-r`` / ``-rf`` / ``--recursive``).""" + if argv[0] != 'rm': + return False + for tok in argv[1:]: + if tok == '--recursive': + return True + if tok.startswith('-') and not tok.startswith('--') and ('r' in tok or 'R' in tok): + return True + return False + + +def _git_clean_force(argv: list[str]) -> bool: + """``git clean`` with a force flag (the only way it actually deletes).""" + if argv[0] != 'git' or len(argv) < 2 or argv[1] != 'clean': + return False + return any(tok == '--force' or (tok.startswith('-') and not tok.startswith('--') and 'f' in tok) for tok in argv[2:]) + + +def _truncate_to_zero(argv: list[str]) -> bool: + """``truncate -s 0`` (or ``-s0``) — wipes file contents in place.""" + if argv[0] != 'truncate': + return False + for i, tok in enumerate(argv[1:], start=1): + if tok == '-s' and i + 1 < len(argv) and argv[i + 1] == '0': + return True + if tok == '-s0' or tok == '--size=0': + return True + return False + + +_DESTRUCTIVE_CHECKS: list[tuple[str, Callable[[list[str]], bool]]] = [ + ('rm -r', _rm_recursive), + ('dd of=', lambda a: a[0] == 'dd' and any(t.startswith('of=') for t in a[1:])), + ('mkfs', lambda a: a[0] == 'mkfs' or a[0].startswith('mkfs.')), + ('find -delete', lambda a: a[0] == 'find' and '-delete' in a[1:]), + ('shred', lambda a: a[0] == 'shred'), + ('git clean -f', _git_clean_force), + ('chmod 000', lambda a: a[0] == 'chmod' and '000' in a[1:]), + ('truncate -s 0', _truncate_to_zero), +] + + +def is_destructive_argv(argv: list[str]) -> tuple[bool, str | None]: + """Return ``(True, label)`` if *argv* matches a known-destructive pattern. + + Used by the IInstance layer to gate destructive operations behind an + explicit ``confirm_destructive`` token in the call args. Only meaningful + in argv mode — shell-mode commands aren't analysed because the shell can + express equivalent operations (redirects, expansions) that flat-string + analysis cannot reliably detect. + """ + if not argv: + return False, None + for label, check in _DESTRUCTIVE_CHECKS: + try: + if check(argv): + return True, label + except (IndexError, AttributeError): + continue + return False, None + + +def _kill_process_tree(proc: subprocess.Popen) -> None: + """Force-kill *proc* and every descendant it spawned through the shell.""" + if _IS_WINDOWS: + # taskkill /T walks the process tree; /F forces termination. + # Falls back to proc.kill() if taskkill itself can't run. + try: + subprocess.run( + ['taskkill', '/T', '/F', '/PID', str(proc.pid)], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + timeout=5, + check=False, + ) + except (OSError, subprocess.SubprocessError): + try: + proc.kill() + except OSError: + pass + else: + try: + os.killpg(os.getpgid(proc.pid), signal.SIGKILL) + except (ProcessLookupError, PermissionError, OSError): + try: + proc.kill() + except OSError: + pass + + +def execute_command( + command: str | list[str], + *, + cwd: str | None, + env: dict[str, str], + timeout: int, + max_output_bytes: int, + use_shell: bool = False, +) -> dict: + """Run *command* and capture its result. + + With ``use_shell=False`` (default), *command* must be an argv list and + the child is spawned directly without a shell — eliminating the entire + shell-injection class. With ``use_shell=True``, *command* is a string + interpreted by the host shell (enabling pipes, redirects, globs, ``&&``). + + Stdout and stderr are streamed through reader threads and capped at + ``max_output_bytes`` per stream so a runaway command cannot exhaust + engine memory. Once a stream's buffer reaches the cap, further chunks + are drained and discarded so the child can finish writing without + blocking on a full OS pipe (preserving its natural exit code). + """ + # Spawn the child in its own process group/session so we can later kill + # the entire tree on timeout — otherwise shell-spawned grandchildren + # outlive proc.kill() and keep our reader threads blocked on their pipes. + popen_kwargs: dict = { + 'shell': use_shell, + 'cwd': cwd, + 'env': env, + 'stdout': subprocess.PIPE, + 'stderr': subprocess.PIPE, + } + if _IS_WINDOWS: + popen_kwargs['creationflags'] = subprocess.CREATE_NEW_PROCESS_GROUP + else: + popen_kwargs['start_new_session'] = True + + try: + proc = subprocess.Popen(command, **popen_kwargs) + except FileNotFoundError as exc: + return { + 'stdout': '', + 'stderr': f'Command not available: {exc}', + 'exit_code': 127, + 'timed_out': False, + 'truncated': False, + } + + stdout_buf = bytearray() + stderr_buf = bytearray() + capped = {'stdout': False, 'stderr': False} + + def _drain(stream, buf: bytearray, key: str) -> None: + """Append chunks to *buf* up to the cap; discard anything beyond it.""" + try: + while True: + chunk = stream.read(_READ_CHUNK_SIZE) + if not chunk: + return + remaining = max_output_bytes - len(buf) + if remaining <= 0: + capped[key] = True + continue + if len(chunk) > remaining: + buf.extend(chunk[:remaining]) + capped[key] = True + continue + buf.extend(chunk) + except (ValueError, OSError): + return + + t_out = threading.Thread(target=_drain, args=(proc.stdout, stdout_buf, 'stdout'), daemon=True) + t_err = threading.Thread(target=_drain, args=(proc.stderr, stderr_buf, 'stderr'), daemon=True) + t_out.start() + t_err.start() + + timed_out = False + try: + exit_code = proc.wait(timeout=timeout) + except subprocess.TimeoutExpired: + timed_out = True + _kill_process_tree(proc) + proc.wait() + exit_code = -1 + + t_out.join() + t_err.join() + + stdout_text = bytes(stdout_buf).decode('utf-8', errors='replace') + stderr_text = bytes(stderr_buf).decode('utf-8', errors='replace') + if capped['stdout']: + stdout_text += _TRUNCATED_MARKER + if capped['stderr']: + stderr_text += _TRUNCATED_MARKER + + return { + 'stdout': stdout_text, + 'stderr': stderr_text, + 'exit_code': exit_code, + 'timed_out': timed_out, + 'truncated': capped['stdout'] or capped['stderr'], + } + + +def build_environment( + base_env: dict[str, str] | None, + config_env: dict[str, str], + call_env: dict[str, str] | None, + *, + allow_external_env: bool, +) -> dict[str, str]: + """Merge env sources. Config-defined variables always take precedence over + agent-supplied ones; both layer over the host process env. + """ + merged: dict[str, str] = dict(base_env if base_env is not None else os.environ) + if allow_external_env and call_env: + for k, v in call_env.items(): + if not isinstance(k, str) or not k: + continue + merged[k] = '' if v is None else str(v) + for k, v in config_env.items(): + merged[k] = v + return merged diff --git a/nodes/test/tool_shell/__init__.py b/nodes/test/tool_shell/__init__.py new file mode 100644 index 000000000..ff7ed3623 --- /dev/null +++ b/nodes/test/tool_shell/__init__.py @@ -0,0 +1 @@ +"""Tests for the tool_shell node.""" diff --git a/nodes/test/tool_shell/test_config_parser.py b/nodes/test/tool_shell/test_config_parser.py new file mode 100644 index 000000000..578c36df2 --- /dev/null +++ b/nodes/test/tool_shell/test_config_parser.py @@ -0,0 +1,209 @@ +# ============================================================================= +# MIT License +# Copyright (c) 2024 RocketRide Inc. +# ============================================================================= + +"""Unit tests for tool_shell config parsing.""" + +from __future__ import annotations + +import sys +from pathlib import Path + +# Add the node source directory to sys.path so we can import the helper +# module without triggering the top-level nodes/__init__.py. +sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent / 'src' / 'nodes' / 'tool_shell')) + +from config_parser import ( # noqa: E402 + DEFAULT_MAX_OUTPUT_BYTES, + DEFAULT_TIMEOUT, + MAX_TIMEOUT, + parse_command_patterns, + parse_env_vars, + parse_max_output, + parse_timeout, + parse_working_dir, +) + + +class TestParseWorkingDir: + """parse_working_dir.""" + + def test_missing_returns_none(self): + """Missing key returns None.""" + assert parse_working_dir({}) is None + + def test_strips_whitespace(self): + """Surrounding whitespace is trimmed.""" + assert parse_working_dir({'workingDir': ' /tmp '}) == '/tmp' + + def test_empty_string_returns_none(self): + """Whitespace-only value collapses to None.""" + assert parse_working_dir({'workingDir': ' '}) is None + + +class TestParseTimeout: + """parse_timeout.""" + + def test_missing_returns_default(self): + """Missing key falls back to DEFAULT_TIMEOUT.""" + assert parse_timeout({}) == DEFAULT_TIMEOUT + + def test_valid_value(self): + """Valid integer is returned as-is.""" + assert parse_timeout({'timeout': 60}) == 60 + + def test_string_numeric_accepted(self): + """Numeric strings are coerced.""" + assert parse_timeout({'timeout': '120'}) == 120 + + def test_clamps_above_max(self): + """Values above MAX_TIMEOUT are clamped down.""" + assert parse_timeout({'timeout': MAX_TIMEOUT * 10}) == MAX_TIMEOUT + + def test_clamps_below_min(self): + """Zero or negative values are clamped up to 1.""" + assert parse_timeout({'timeout': 0}) == 1 + assert parse_timeout({'timeout': -5}) == 1 + + def test_invalid_falls_back_to_default(self): + """Non-numeric/None values fall back to DEFAULT_TIMEOUT.""" + assert parse_timeout({'timeout': 'not-a-number'}) == DEFAULT_TIMEOUT + assert parse_timeout({'timeout': None}) == DEFAULT_TIMEOUT + + +class TestParseMaxOutput: + """parse_max_output.""" + + def test_missing_returns_default(self): + """Missing key falls back to DEFAULT_MAX_OUTPUT_BYTES.""" + assert parse_max_output({}) == DEFAULT_MAX_OUTPUT_BYTES + + def test_enforces_minimum(self): + """Values below 1 KiB are clamped up to 1024.""" + assert parse_max_output({'maxOutputBytes': 100}) == 1024 + + def test_passes_through_large_values(self): + """Large values are returned unchanged.""" + assert parse_max_output({'maxOutputBytes': 5 * 1024 * 1024}) == 5 * 1024 * 1024 + + def test_invalid_falls_back_to_default(self): + """Non-numeric values fall back to DEFAULT_MAX_OUTPUT_BYTES.""" + assert parse_max_output({'maxOutputBytes': 'huge'}) == DEFAULT_MAX_OUTPUT_BYTES + + +class TestParseEnvVars: + """parse_env_vars.""" + + def test_missing_returns_empty(self): + """Missing key yields an empty dict.""" + assert parse_env_vars({}) == {} + + def test_parses_array_of_rows(self): + """Each well-formed row becomes a name/value pair.""" + cfg = { + 'envVars': [ + {'envName': 'FOO', 'envValue': 'bar'}, + {'envName': 'BAZ', 'envValue': 'qux'}, + ], + } + assert parse_env_vars(cfg) == {'FOO': 'bar', 'BAZ': 'qux'} + + def test_skips_blank_names(self): + """Rows with empty/whitespace envName are dropped.""" + cfg = { + 'envVars': [ + {'envName': ' ', 'envValue': 'ignored'}, + {'envName': 'KEEP', 'envValue': 'yes'}, + ], + } + assert parse_env_vars(cfg) == {'KEEP': 'yes'} + + def test_coerces_value_to_string(self): + """Non-string values are stringified.""" + cfg = {'envVars': [{'envName': 'N', 'envValue': 42}]} + assert parse_env_vars(cfg) == {'N': '42'} + + def test_preserves_falsy_non_none_values(self): + """0/False/'' are preserved instead of collapsing to empty.""" + cfg = { + 'envVars': [ + {'envName': 'ZERO', 'envValue': 0}, + {'envName': 'FLAG', 'envValue': False}, + {'envName': 'EMPTY', 'envValue': ''}, + ], + } + assert parse_env_vars(cfg) == {'ZERO': '0', 'FLAG': 'False', 'EMPTY': ''} + + def test_none_value_becomes_empty_string(self): + """None-valued rows produce an empty string.""" + cfg = {'envVars': [{'envName': 'NIL', 'envValue': None}]} + assert parse_env_vars(cfg) == {'NIL': ''} + + def test_accepts_json_encoded_array_string(self): + """JSON-string array form is parsed transparently.""" + cfg = {'envVars': '[{"envName": "X", "envValue": "y"}]'} + assert parse_env_vars(cfg) == {'X': 'y'} + + def test_malformed_json_returns_empty(self): + """Malformed JSON falls back to empty.""" + assert parse_env_vars({'envVars': 'not json'}) == {} + + def test_skips_non_dict_rows(self): + """Non-mapping rows are ignored.""" + cfg = {'envVars': ['not-a-dict', {'envName': 'OK', 'envValue': 'v'}]} + assert parse_env_vars(cfg) == {'OK': 'v'} + + +class TestParseCommandPatterns: + """parse_command_patterns.""" + + def test_missing_returns_empty(self): + """Missing key yields an empty pattern list.""" + assert parse_command_patterns({}) == [] + + def test_compiles_valid_regexes(self): + """All compilable regexes appear in the result.""" + cfg = { + 'commandAllowlist': [ + {'commandPattern': r'^npm '}, + {'commandPattern': r'^git status'}, + ], + } + patterns = parse_command_patterns(cfg) + assert len(patterns) == 2 + assert patterns[0].search('npm install lodash') + assert patterns[1].search('git status') + assert not patterns[1].search('git push') + + def test_invalid_regex_reported_and_skipped(self): + """Compile failures invoke on_invalid and are skipped.""" + warnings: list[str] = [] + cfg = { + 'commandAllowlist': [ + {'commandPattern': r'(unbalanced'}, + {'commandPattern': r'^ls$'}, + ], + } + patterns = parse_command_patterns(cfg, on_invalid=warnings.append) + assert len(patterns) == 1 + assert patterns[0].search('ls') + assert len(warnings) == 1 + assert 'unbalanced' in warnings[0] + + def test_skips_blank_patterns(self): + """Blank/whitespace patterns are skipped.""" + cfg = {'commandAllowlist': [{'commandPattern': ' '}, {'commandPattern': '^echo'}]} + patterns = parse_command_patterns(cfg) + assert len(patterns) == 1 + + def test_accepts_json_encoded_array_string(self): + """JSON-string array form is parsed transparently.""" + cfg = {'commandAllowlist': '[{"commandPattern": "^echo"}]'} + patterns = parse_command_patterns(cfg) + assert len(patterns) == 1 + assert patterns[0].search('echo hi') + + def test_malformed_json_returns_empty(self): + """Malformed JSON falls back to empty.""" + assert parse_command_patterns({'commandAllowlist': 'nope'}) == [] diff --git a/nodes/test/tool_shell/test_shell_executor.py b/nodes/test/tool_shell/test_shell_executor.py new file mode 100644 index 000000000..a9d9b1c95 --- /dev/null +++ b/nodes/test/tool_shell/test_shell_executor.py @@ -0,0 +1,385 @@ +# ============================================================================= +# MIT License +# Copyright (c) 2024 RocketRide Inc. +# ============================================================================= + +"""Unit tests for tool_shell's subprocess executor and env-merging helper.""" + +from __future__ import annotations + +import os +import sys +from pathlib import Path + +# Add the node source directory to sys.path so we can import the module +# without triggering the top-level nodes/__init__.py (which requires the +# engine runtime). +sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent / 'src' / 'nodes' / 'tool_shell')) + +from shell_executor import build_environment, execute_command, is_destructive_argv # noqa: E402 + + +# Use the running interpreter so the tests are cross-platform. +PY = sys.executable + + +class TestExecuteCommandBasics: + """execute_command happy paths in argv mode (the safer default).""" + + def test_captures_stdout(self): + """Stdout from the child is captured in the result.""" + result = execute_command( + [PY, '-c', "print('hello')"], + cwd=None, + env=dict(os.environ), + timeout=10, + max_output_bytes=4096, + ) + assert result['exit_code'] == 0 + assert 'hello' in result['stdout'] + assert result['timed_out'] is False + assert result['truncated'] is False + + def test_captures_stderr(self): + """Stderr from the child is captured in the result.""" + result = execute_command( + [PY, '-c', "import sys; sys.stderr.write('oops')"], + cwd=None, + env=dict(os.environ), + timeout=10, + max_output_bytes=4096, + ) + assert result['exit_code'] == 0 + assert 'oops' in result['stderr'] + + def test_propagates_nonzero_exit_code(self): + """Non-zero exit codes propagate verbatim.""" + result = execute_command( + [PY, '-c', 'import sys; sys.exit(2)'], + cwd=None, + env=dict(os.environ), + timeout=10, + max_output_bytes=4096, + ) + assert result['exit_code'] == 2 + assert result['timed_out'] is False + + def test_argv_mode_does_not_interpret_shell_metacharacters(self): + """Pipes/redirects passed as argv tokens reach the child as literals.""" + # In shell mode, '|' and 'evil' would pipe stdout. In argv mode they + # are positional arguments to python -c, not shell metas. + result = execute_command( + [PY, '-c', 'import sys; print(sys.argv[1:])', '|', 'evil'], + cwd=None, + env=dict(os.environ), + timeout=10, + max_output_bytes=4096, + ) + assert result['exit_code'] == 0 + assert "'|'" in result['stdout'] + assert "'evil'" in result['stdout'] + + +class TestExecuteCommandShellMode: + """Opt-in shell mode preserves access to pipes/redirects/globs.""" + + def test_shell_mode_interprets_metacharacters(self): + """With use_shell=True, '&&' chains commands at the shell level.""" + result = execute_command( + f'"{PY}" -c "print(\'first\')" && "{PY}" -c "print(\'second\')"', + cwd=None, + env=dict(os.environ), + timeout=10, + max_output_bytes=4096, + use_shell=True, + ) + assert result['exit_code'] == 0 + assert 'first' in result['stdout'] + assert 'second' in result['stdout'] + + +class TestExecuteCommandTimeout: + """Timeout-driven termination.""" + + def test_kills_long_running_command(self): + """Long-running commands are killed and reported as timed_out.""" + import time + + # Track wall time so we can verify the tree-kill returns promptly. + start = time.monotonic() + result = execute_command( + [PY, '-c', 'import time; time.sleep(5)'], + cwd=None, + env=dict(os.environ), + timeout=1, + max_output_bytes=4096, + ) + elapsed = time.monotonic() - start + assert result['timed_out'] is True + assert result['exit_code'] == -1 + # Without tree-kill the reader threads stay blocked on the orphan's + # pipes until the child's own sleep finishes (~5s). Tree-kill should + # bring everything down well under that. Generous bound to absorb + # CI jitter while still failing if the regression returns. + assert elapsed < 4.0, f'expected tree-kill to return fast; took {elapsed:.2f}s' + + +class TestExecuteCommandWorkingDir: + """Working directory override.""" + + def test_runs_in_specified_directory(self, tmp_path): + """Child runs with the supplied cwd.""" + result = execute_command( + [PY, '-c', 'import os; print(os.getcwd())'], + cwd=str(tmp_path), + env=dict(os.environ), + timeout=10, + max_output_bytes=4096, + ) + assert result['exit_code'] == 0 + # tmp_path may be reported with a different case on Windows, so + # compare via os.path.realpath to normalize. + assert os.path.realpath(result['stdout'].strip()) == os.path.realpath(str(tmp_path)) + + +class TestExecuteCommandEnvInjection: + """Per-call environment injection.""" + + def test_injects_env_var(self): + """Env values supplied on the call are visible to the child.""" + env = dict(os.environ) + env['ROCKETRIDE_TEST_VAR'] = 'injected-value' + result = execute_command( + [PY, '-c', "import os; print(os.environ.get('ROCKETRIDE_TEST_VAR', 'MISSING'))"], + cwd=None, + env=env, + timeout=10, + max_output_bytes=4096, + ) + assert result['exit_code'] == 0 + assert 'injected-value' in result['stdout'] + assert 'MISSING' not in result['stdout'] + + +class TestExecuteCommandTruncation: + """Output truncation/streaming caps.""" + + def test_truncates_oversized_stdout(self): + """Stdout above the cap is truncated and marked.""" + # Emit ~4 KiB but cap at 1 KiB. + result = execute_command( + [PY, '-c', "print('x' * 4096)"], + cwd=None, + env=dict(os.environ), + timeout=10, + max_output_bytes=1024, + ) + assert result['exit_code'] == 0 + assert result['truncated'] is True + # Output is the first 1024 bytes plus the truncation marker. + assert result['stdout'].startswith('x' * 100) + assert '[truncated]' in result['stdout'] + + def test_streaming_buffer_stays_bounded_for_large_output(self): + """The streaming reader keeps the buffer at the cap regardless of total output.""" + # Emit ~2 MiB but cap at 4 KiB. The streaming reader must keep the + # captured buffer at the cap regardless of how much the child writes; + # this guards against the previous capture_output=True behaviour that + # buffered the whole output in memory before truncating. + result = execute_command( + [PY, '-c', "import sys; sys.stdout.write('y' * (2 * 1024 * 1024))"], + cwd=None, + env=dict(os.environ), + timeout=15, + max_output_bytes=4096, + ) + assert result['exit_code'] == 0 + assert result['truncated'] is True + # Captured text is exactly cap bytes of payload + the marker; nothing + # near 2 MiB ever reaches our buffer. + marker = '\n...[truncated]' + assert result['stdout'].endswith(marker) + payload = result['stdout'][: -len(marker)] + assert len(payload) == 4096 + assert payload == 'y' * 4096 + + +class TestIsDestructiveArgv: + """is_destructive_argv detects argv that performs destructive operations.""" + + def test_empty_argv_is_not_destructive(self): + """An empty argv list is reported as non-destructive.""" + ok, label = is_destructive_argv([]) + assert ok is False and label is None + + def test_plain_command_is_not_destructive(self): + """A normal read-only command is not flagged.""" + ok, _ = is_destructive_argv(['ls', '-la']) + assert ok is False + + def test_rm_without_recursive_is_not_destructive(self): + """`rm somefile` (no recursive flag) is not flagged — single-file deletes are routine.""" + ok, _ = is_destructive_argv(['rm', 'somefile']) + assert ok is False + + def test_rm_recursive_short(self): + """`rm -r dir` is flagged.""" + ok, label = is_destructive_argv(['rm', '-r', 'dir']) + assert ok is True and label == 'rm -r' + + def test_rm_recursive_combined_short(self): + """`rm -rf dir` (combined short flags) is flagged.""" + ok, label = is_destructive_argv(['rm', '-rf', 'dir']) + assert ok is True and label == 'rm -r' + + def test_rm_recursive_long(self): + """`rm --recursive dir` is flagged.""" + ok, label = is_destructive_argv(['rm', '--recursive', 'dir']) + assert ok is True and label == 'rm -r' + + def test_dd_with_of(self): + """`dd if=... of=...` is flagged.""" + ok, label = is_destructive_argv(['dd', 'if=/dev/zero', 'of=/dev/sda']) + assert ok is True and label == 'dd of=' + + def test_mkfs_variants(self): + """`mkfs` and `mkfs.ext4` are both flagged.""" + ok1, _ = is_destructive_argv(['mkfs', '/dev/sda1']) + ok2, _ = is_destructive_argv(['mkfs.ext4', '/dev/sda1']) + assert ok1 is True and ok2 is True + + def test_find_delete(self): + """`find . -delete` is flagged.""" + ok, label = is_destructive_argv(['find', '.', '-name', '*.log', '-delete']) + assert ok is True and label == 'find -delete' + + def test_git_clean_force(self): + """`git clean -fd` is flagged.""" + ok, label = is_destructive_argv(['git', 'clean', '-fd']) + assert ok is True and label == 'git clean -f' + + def test_git_clean_dry_run_not_flagged(self): + """`git clean -n` (dry run, no force) is not flagged — git refuses to delete without -f.""" + ok, _ = is_destructive_argv(['git', 'clean', '-n']) + assert ok is False + + def test_truncate_to_zero(self): + """`truncate -s 0 file` is flagged.""" + ok, label = is_destructive_argv(['truncate', '-s', '0', 'file']) + assert ok is True and label == 'truncate -s 0' + + def test_shred(self): + """Bare `shred file` is flagged.""" + ok, label = is_destructive_argv(['shred', 'file']) + assert ok is True and label == 'shred' + + def test_chmod_000(self): + """`chmod 000 file` (lockout) is flagged.""" + ok, label = is_destructive_argv(['chmod', '000', 'file']) + assert ok is True and label == 'chmod 000' + + +class TestBuildEnvironment: + """Env precedence: config-defined > agent-supplied > base/host env.""" + + def test_config_overrides_call_env(self): + """Config-defined values beat agent-supplied ones.""" + merged = build_environment( + base_env={}, + config_env={'KEY': 'from-config'}, + call_env={'KEY': 'from-agent'}, + allow_external_env=True, + ) + assert merged['KEY'] == 'from-config' + + def test_call_env_overrides_base_when_allowed(self): + """Agent-supplied values beat base/host env when external env is allowed.""" + merged = build_environment( + base_env={'KEY': 'from-base'}, + config_env={}, + call_env={'KEY': 'from-agent'}, + allow_external_env=True, + ) + assert merged['KEY'] == 'from-agent' + + def test_call_env_ignored_when_external_disabled(self): + """Agent env is dropped entirely when allow_external_env is False.""" + merged = build_environment( + base_env={'KEY': 'from-base'}, + config_env={}, + call_env={'KEY': 'from-agent', 'EXTRA': 'agent-only'}, + allow_external_env=False, + ) + assert merged['KEY'] == 'from-base' + assert 'EXTRA' not in merged + + def test_config_env_added_even_when_external_disabled(self): + """Config env still applies regardless of allow_external_env.""" + merged = build_environment( + base_env={}, + config_env={'CFG': 'pinned'}, + call_env={'CFG': 'agent'}, + allow_external_env=False, + ) + assert merged['CFG'] == 'pinned' + + def test_falls_back_to_os_environ_when_base_is_none(self): + """base_env=None inherits the host process environment.""" + # Pick whatever key happens to be in os.environ at test time so this + # test stays robust on sanitized shells that may not expose PATH. + sentinel = next(iter(os.environ), None) + if sentinel is None: + return + merged = build_environment( + base_env=None, + config_env={}, + call_env=None, + allow_external_env=True, + ) + assert sentinel in merged + + def test_skips_invalid_call_env_entries(self): + """Empty/non-string keys in call_env are dropped silently.""" + merged = build_environment( + base_env={}, + config_env={}, + call_env={'': 'no-name', 'GOOD': 'yes'}, + allow_external_env=True, + ) + assert '' not in merged + assert merged['GOOD'] == 'yes' + + def test_coerces_none_value_to_empty_string(self): + """None values in call_env become empty strings.""" + merged = build_environment( + base_env={}, + config_env={}, + call_env={'NONE_VAL': None}, + allow_external_env=True, + ) + assert merged['NONE_VAL'] == '' + + +class TestShellNotAvailable: + """If the shell binary cannot be launched, return exit_code 127.""" + + def test_returns_127_when_shell_missing(self, monkeypatch): + """A FileNotFoundError from Popen is mapped to exit_code 127.""" + import subprocess + + def fake_popen(*args, **kwargs): + """Stand-in Popen that simulates a missing shell binary.""" + raise FileNotFoundError('no shell here') + + monkeypatch.setattr(subprocess, 'Popen', fake_popen) + + result = execute_command( + 'echo hi', + cwd=None, + env={}, + timeout=10, + max_output_bytes=4096, + ) + assert result['exit_code'] == 127 + assert result['timed_out'] is False + assert 'no shell here' in result['stderr']