Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 96 additions & 0 deletions nodes/src/nodes/tool_shell/IGlobal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
# =============================================================================
# MIT License
# Copyright (c) 2024 RocketRide Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# =============================================================================

"""
Shell tool node - global (shared) state.

Reads config and stores execution defaults (working dir, timeout, env vars,
output cap) and the command allowlist for IInstance tool methods.
"""

from __future__ import annotations

import re

from ai.common.config import Config
from rocketlib import IGlobalBase, OPEN_MODE, warning

from .config_parser import (
DEFAULT_MAX_OUTPUT_BYTES,
DEFAULT_TIMEOUT,
MAX_TIMEOUT,
parse_command_patterns,
parse_env_vars,
parse_max_output,
parse_timeout,
parse_working_dir,
)


__all__ = ['IGlobal', 'DEFAULT_TIMEOUT', 'MAX_TIMEOUT', 'DEFAULT_MAX_OUTPUT_BYTES']


class IGlobal(IGlobalBase):
"""Global state for tool_shell."""

working_dir: str | None = None
timeout: int = DEFAULT_TIMEOUT
max_output_bytes: int = DEFAULT_MAX_OUTPUT_BYTES
env_vars: dict[str, str] | None = None
allow_external_env: bool = True
command_patterns: list[re.Pattern] | None = None

def beginGlobal(self) -> None:
"""Load node config into instance state; refuses to start with a broken allowlist."""
if self.IEndpoint.endpoint.openMode == OPEN_MODE.CONFIG:
return

cfg = Config.getNodeConfig(self.glb.logicalType, self.glb.connConfig)
self.working_dir = parse_working_dir(cfg)
self.timeout = parse_timeout(cfg)
self.max_output_bytes = parse_max_output(cfg)
self.env_vars = parse_env_vars(cfg)
self.allow_external_env = bool(cfg.get('allowExternalEnv', False))

invalid_pattern_errors: list[str] = []

def _on_invalid_pattern(msg: str) -> None:
"""Record a pattern compile failure and emit a warning."""
invalid_pattern_errors.append(msg)
warning(msg)

compiled_patterns = parse_command_patterns(cfg, on_invalid=_on_invalid_pattern)
if invalid_pattern_errors and not compiled_patterns:
raise ValueError(
f'commandAllowlist is configured but every pattern failed to compile; refusing to start with a non-functional allowlist (would silently allow all commands). First error: {invalid_pattern_errors[0]}'
)
self.command_patterns = compiled_patterns
Comment on lines +82 to +87
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Allowlist still fails open when configured with only blank patterns.

The fail-closed guard only triggers when at least one pattern produces a compile error (invalid_pattern_errors). However, parse_command_patterns silently skips blank/whitespace commandPattern entries without calling on_invalid, so a config like:

{"commandAllowlist": [{"commandPattern": ""}, {"commandPattern": "   "}]}

ends with compiled_patterns == [] and invalid_pattern_errors == []. No ValueError is raised, and IInstance._validate_command then short-circuits on the empty list and admits every command — exactly the silent-allow-all outcome this check is meant to prevent.

Tighten the guard to fire whenever an allowlist was configured but produced no usable patterns.

💡 Proposed fix
-        compiled_patterns = parse_command_patterns(cfg, on_invalid=_on_invalid_pattern)
-        if invalid_pattern_errors and not compiled_patterns:
-            raise ValueError(f'commandAllowlist is configured but every pattern failed to compile; refusing to start with a non-functional allowlist (would silently allow all commands). First error: {invalid_pattern_errors[0]}')
-        self.command_patterns = compiled_patterns
+        compiled_patterns = parse_command_patterns(cfg, on_invalid=_on_invalid_pattern)
+        raw_allowlist = cfg.get('commandAllowlist')
+        if raw_allowlist and not compiled_patterns:
+            first = invalid_pattern_errors[0] if invalid_pattern_errors else 'all entries were blank or invalid'
+            raise ValueError(
+                'commandAllowlist is configured but no usable patterns were compiled; '
+                f'refusing to start with a non-functional allowlist (would silently allow all commands). Detail: {first}'
+            )
+        self.command_patterns = compiled_patterns
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@nodes/src/nodes/tool_shell/IGlobal.py` around lines 82 - 85, The current
guard only raises if there are compile errors (invalid_pattern_errors) but
misses the case where a configured allowlist (commandAllowlist) yields no usable
patterns because entries were blank; update the check after
parse_command_patterns so that if the config indicates an allowlist was provided
(e.g., cfg.commandAllowlist / presence of any commandPattern entries) but
compiled_patterns is empty, raise the ValueError instead of allowing an empty
list; modify the logic around parse_command_patterns, invalid_pattern_errors and
compiled_patterns in IGlobal (the block that currently sets
self.command_patterns) to treat "configured but produced no usable patterns" as
a failure case so IInstance._validate_command cannot short-circuit and allow all
commands.


Comment thread
coderabbitai[bot] marked this conversation as resolved.
def endGlobal(self) -> None:
"""Reset shared state to defaults when the node tears down."""
self.working_dir = None
self.timeout = DEFAULT_TIMEOUT
self.max_output_bytes = DEFAULT_MAX_OUTPUT_BYTES
self.env_vars = None
self.allow_external_env = True
self.command_patterns = None
168 changes: 168 additions & 0 deletions nodes/src/nodes/tool_shell/IInstance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
# =============================================================================
# MIT License
# Copyright (c) 2024 RocketRide Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# =============================================================================

"""
Shell tool node instance.

Exposes a single ``execute`` tool that runs a shell command on the host and
returns stdout, stderr, and exit code.
"""

from __future__ import annotations

import os

from rocketlib import IInstanceBase, tool_function

from .IGlobal import IGlobal, MAX_TIMEOUT
from .shell_executor import build_environment, execute_command


class IInstance(IInstanceBase):
"""Per-call instance for the shell tool; exposes the ``execute`` tool function."""

IGlobal: IGlobal

@tool_function(
input_schema={
'type': 'object',
'required': ['command'],
'properties': {
'command': {
'type': 'string',
'description': 'Shell command to execute (interpreted by the host shell). Example: "npm run build" or "ls -la /tmp".',
},
'working_dir': {
'type': 'string',
'description': 'Optional working directory for this call. Overrides the node-level default. Must be an existing directory.',
},
'env': {
'type': 'object',
'description': 'Optional environment variables to inject for this call. Layered over the host environment; node-configured vars take precedence.',
'additionalProperties': {'type': 'string'},
},
'timeout': {
'type': 'integer',
'description': 'Optional timeout in seconds for this call. Capped by the node configuration.',
'minimum': 1,
},
},
},
output_schema={
'type': 'object',
'properties': {
'stdout': {'type': 'string', 'description': 'Captured stdout (UTF-8, possibly truncated).'},
'stderr': {'type': 'string', 'description': 'Captured stderr (UTF-8, possibly truncated).'},
'exit_code': {
'type': 'integer',
'description': 'Process exit code. -1 indicates a timeout, 127 indicates the shell could not be launched.',
},
'timed_out': {'type': 'boolean', 'description': 'True if the command was killed due to timeout.'},
'truncated': {
'type': 'boolean',
'description': 'True if stdout or stderr was truncated to fit the size cap.',
},
},
},
description=lambda self: (
'Execute a shell command on the host and return stdout, stderr, and exit code. '
'Use for build scripts (npm/pip/make), package management, file operations, process management, '
'environment inspection, and host-installed git operations. '
f'Timeout: {self.IGlobal.timeout}s (max {MAX_TIMEOUT}s). '
f'Default working directory: {self.IGlobal.working_dir or "host process CWD"}. '
'For portable git operations that do not depend on the host having git installed, prefer the Git node.'
),
Comment on lines +87 to +94
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

Tool description overstates the per‑call timeout ceiling.

_resolve_timeout clamps the per‑call override to self.IGlobal.timeout (line 168), not to MAX_TIMEOUT. The description string f'Timeout: {self.IGlobal.timeout}s (max {MAX_TIMEOUT}s). ' advertises MAX_TIMEOUT as the ceiling, which is what an LLM consumer will use when constructing tool calls. Requests with a timeout between IGlobal.timeout and MAX_TIMEOUT will be silently clamped down, contradicting the description.

Either drop the MAX_TIMEOUT mention from the per‑call description (since it’s really the configurable upper bound, not the per‑call ceiling), or change the clamp to MAX_TIMEOUT.

💡 Proposed fix (description-only)
-            f'Timeout: {self.IGlobal.timeout}s (max {MAX_TIMEOUT}s). '
+            f'Timeout: {self.IGlobal.timeout}s (per-call cap, also the maximum). '
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@nodes/src/nodes/tool_shell/IInstance.py` around lines 87 - 94, The
description in the description=lambda of IInstance currently advertises "max
{MAX_TIMEOUT}s" which is incorrect because _resolve_timeout clamps per-call
overrides to self.IGlobal.timeout, not MAX_TIMEOUT; update the description
string to remove the MAX_TIMEOUT parenthetical and instead state the configured
global timeout (e.g., "Timeout: {self.IGlobal.timeout}s (configured global
timeout)") so the text matches the actual behavior of _resolve_timeout and
IGlobal.timeout; alternatively, if you prefer the other approach, change
_resolve_timeout to clamp to MAX_TIMEOUT instead of self.IGlobal.timeout, but do
not leave the mismatch between the description lambda, _resolve_timeout,
IGlobal.timeout, and MAX_TIMEOUT.

)
def execute(self, args):
"""Execute a shell command on the host."""
if not isinstance(args, dict):
raise ValueError('Tool input must be a JSON object (dict)')

command = args.get('command')
if not isinstance(command, str) or not command.strip():
raise ValueError('"command" is required and must be a non-empty string')

self._validate_command(command)

cwd = self._resolve_cwd(args.get('working_dir'))
timeout = self._resolve_timeout(args.get('timeout'))
call_env = args.get('env')
if call_env is not None and not isinstance(call_env, dict):
raise ValueError('"env" must be a JSON object of string values')

env = build_environment(
base_env=None,
config_env=self.IGlobal.env_vars or {},
call_env=call_env,
allow_external_env=self.IGlobal.allow_external_env,
)

return execute_command(
command,
cwd=cwd,
env=env,
timeout=timeout,
max_output_bytes=self.IGlobal.max_output_bytes,
)

def _validate_command(self, command: str) -> None:
"""Reject commands that don't fully match any configured allowlist regex."""
# Use fullmatch (not search) so that an unanchored pattern like
# "git status" cannot be smuggled past via "git status; rm -rf /".
patterns = self.IGlobal.command_patterns or []
if patterns and not any(p.fullmatch(command) for p in patterns):
raise ValueError('Command is not permitted by the configured allowlist.')
Comment thread
coderabbitai[bot] marked this conversation as resolved.

def _resolve_cwd(self, override: object) -> str | None:
"""Pick the per-call cwd override (validated) or fall back to the configured default."""
if override is None:
return self._validated_default_cwd()
if not isinstance(override, str):
raise ValueError('"working_dir" must be a string')
path = override.strip()
if not path:
return self._validated_default_cwd()
if not os.path.isdir(path):
raise ValueError(f'working_dir does not exist or is not a directory: {path!r}')
return path

def _validated_default_cwd(self) -> str | None:
"""Return the configured default cwd after verifying it exists, or None if unset."""
default = self.IGlobal.working_dir
if default is None:
return None
if not os.path.isdir(default):
raise ValueError(f'working_dir does not exist or is not a directory: {default!r}')
return default

def _resolve_timeout(self, override: object) -> int:
"""Coerce a per-call timeout override and clamp it to the configured maximum."""
if override is None:
return self.IGlobal.timeout
try:
value = int(override)
except (TypeError, ValueError) as exc:
raise ValueError('"timeout" must be an integer (seconds)') from exc
if value <= 0:
raise ValueError('"timeout" must be a positive integer')
return min(value, self.IGlobal.timeout)
69 changes: 69 additions & 0 deletions nodes/src/nodes/tool_shell/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
---
title: Shell
date: 2026-04-30
sidebar_position: 1
---

<head>
<title>Shell - RocketRide Documentation</title>
</head>

## What it does

Executes shell commands in the host environment. Use to run scripts, manage processes, install packages, and interact with the operating system via the command line.

Common use cases:

- Build scripts: `npm run build`, `python setup.py install`, `make`
- Package management: `npm install`, `pip install`, `apt-get install`
- Process management: starting/stopping services, checking process status
- File operations: `cp`, `mv`, `rm`, `mkdir`, `find`, `grep`
- Environment inspection: `env`, `echo $PATH`, `which <binary>`
- Git operations (when git is available on the host): `git status`, `git add`, `git commit -m "message"`, `git push`, `git pull`, `git clone <url>`, `git log --oneline`, `git diff`, `git checkout -b <branch>`, `git merge <branch>`

> Note: For portable git operations that do not depend on the host environment having git installed, use the Git node instead.

## Tools

| Tool | Description |
| --------------- | ------------------------------------------------------------ |
| `shell.execute` | Run a shell command and return stdout, stderr, and exit code |

### shell.execute

| Parameter | Required | Description |
| ------------- | -------- | ------------------------------------------------------------------------------------------- |
| `command` | yes | Shell command to execute (interpreted by the host shell) |
| `working_dir` | no | Working directory for this call. Overrides the node default. Must be an existing directory. |
| `env` | no | Object of environment variables to inject for this call |
| `timeout` | no | Per-call timeout in seconds (capped by node configuration) |

**Response:**

```json
{
"stdout": "...",
"stderr": "...",
"exit_code": 0,
"timed_out": false,
"truncated": false
}
```

`exit_code` is the process return code. `-1` indicates the command was killed due to timeout; `127` indicates the host shell could not be launched.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Verify typical shell semantics for exit code 127 without repository access.
set -euo pipefail

# 1) Command-not-found path (usually returns 127 from /bin/sh)
if command -v sh >/dev/null 2>&1; then
  sh -c 'definitely_not_a_real_command_12345' >/tmp/rr_out 2>/tmp/rr_err || true
  echo "sh command-not-found exit code: $?"
fi

# 2) Launch failure path (simulate missing shell binary)
python - <<'PY'
import subprocess
try:
    subprocess.run(["/definitely/missing/shell", "-c", "echo hi"], check=False)
except FileNotFoundError:
    print("launch failure produces FileNotFoundError before process exit code")
PY

Repository: rocketride-org/rocketride-server

Length of output: 177


🏁 Script executed:

cat -n nodes/src/nodes/tool_shell/README.md | sed -n '45,60p'

Repository: rocketride-org/rocketride-server

Length of output: 1166


🏁 Script executed:

#!/bin/bash
# Corrected verification: capture exit code BEFORE || operator
set -euo pipefail

# 1) Correct way to check command-not-found exit code
sh -c 'definitely_not_a_real_command_12345' 2>/dev/null || exit_code=$?
echo "sh command-not-found exit code: ${exit_code:-0}"

# 2) Verify on bash as well
bash -c 'definitely_not_a_real_command_12345' 2>/dev/null || exit_code=$?
echo "bash command-not-found exit code: ${exit_code:-0}"

Repository: rocketride-org/rocketride-server

Length of output: 150


Broaden exit_code=127 explanation to include command-not-found.

Line 53 states that 127 indicates the host shell could not be launched, but in standard POSIX shells, 127 is also (and more commonly) returned when a command is not found. The documentation should cover both cases to prevent troubleshooting confusion. Suggest: "127 indicates either a command was not found or the host shell could not be launched, depending on context."

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@nodes/src/nodes/tool_shell/README.md` at line 53, Update the README
description for the exit_code handling to broaden the meaning of 127: change the
current line that says "`127` indicates the host shell could not be launched" to
something like "`127` indicates either a command was not found or the host shell
could not be launched, depending on context" so readers understand both common
POSIX uses; target the `exit_code` documentation entry in
nodes/src/nodes/tool_shell/README.md and update the sentence that references
`127`.


## Configuration

| Field | Description |
| ----------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------- |
| Tool Namespace | Prefix for the tool name (default: `shell`) |
| Default working directory | Working directory used when the agent does not provide one. Defaults to the host process CWD |
| Execution timeout (seconds) | Maximum seconds a command may run (default 30, max 1800) |
| Max output size (bytes) | Cap on stdout and stderr each (default 1 MiB). Output beyond this is truncated |
| Allow agent-supplied env vars | Whether the agent may add env vars per call (default off). Node-defined vars always take precedence when on |
| Environment variables | Variables injected into every command |
| Command allowlist | Regex patterns. If non-empty, the full command must match at least one pattern (re.fullmatch). Use `.*` for substring matches, e.g. `npm .*` |

## Security

This node executes commands directly on the host with the privileges of the running process. It does not sandbox the command. Use the command allowlist to restrict which commands can run, set a working directory to scope file access, and avoid deploying this node in untrusted environments.
29 changes: 29 additions & 0 deletions nodes/src/nodes/tool_shell/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# =============================================================================
# MIT License
# Copyright (c) 2024 RocketRide Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# =============================================================================

"""Shell tool node package: exposes a host shell-execution tool to agents."""

from .IGlobal import IGlobal
from .IInstance import IInstance

__all__ = ['IGlobal', 'IInstance']
Loading
Loading