diff --git a/docs.json b/docs.json index 00574e7..7f194fc 100644 --- a/docs.json +++ b/docs.json @@ -195,7 +195,8 @@ "sdk/guides/llm-registry", "sdk/guides/llm-routing", "sdk/guides/llm-reasoning", - "sdk/guides/llm-image-input" + "sdk/guides/llm-image-input", + "sdk/guides/llm-responses-streaming" ] }, { @@ -349,4 +350,4 @@ { "source": "/openhands/usage/prompting/prompting-best-practices", "destination": "/openhands/usage/tips/prompting-best-practices" }, { "source": "/openhands/usage/feedback", "destination": "/openhands/usage/troubleshooting/feedback" } ] -} +} \ No newline at end of file diff --git a/sdk/guides/agent-server/api-sandbox.mdx b/sdk/guides/agent-server/api-sandbox.mdx index 438391a..c58b0c1 100644 --- a/sdk/guides/agent-server/api-sandbox.mdx +++ b/sdk/guides/agent-server/api-sandbox.mdx @@ -23,7 +23,7 @@ Usage: uv run examples/24_remote_convo_with_api_sandboxed_server.py Requirements: - - LITELLM_API_KEY: API key for LLM access + - LLM_API_KEY: API key for LLM access - RUNTIME_API_KEY: API key for runtime API access """ @@ -45,13 +45,13 @@ from openhands.workspace import APIRemoteWorkspace logger = get_logger(__name__) -api_key = os.getenv("LITELLM_API_KEY") -assert api_key, "LITELLM_API_KEY required" +api_key = os.getenv("LLM_API_KEY") +assert api_key, "LLM_API_KEY required" llm = LLM( usage_id="agent", model="litellm_proxy/anthropic/claude-sonnet-4-5-20250929", - base_url="https://llm-proxy.eval.all-hands.dev", + base_url=os.getenv("LLM_BASE_URL"), api_key=SecretStr(api_key), ) @@ -62,7 +62,7 @@ if not runtime_api_key: with APIRemoteWorkspace( - runtime_api_url="https://runtime.eval.all-hands.dev", + runtime_api_url=os.getenv("RUNTIME_API_URL", "https://runtime.eval.all-hands.dev"), runtime_api_key=runtime_api_key, server_image="ghcr.io/openhands/agent-server:main-python", ) as workspace: diff --git a/sdk/guides/agent-server/docker-sandbox.mdx b/sdk/guides/agent-server/docker-sandbox.mdx index e07b44d..6d76bde 100644 --- a/sdk/guides/agent-server/docker-sandbox.mdx +++ b/sdk/guides/agent-server/docker-sandbox.mdx @@ -44,7 +44,7 @@ assert api_key is not None, "LLM_API_KEY environment variable is not set." llm = LLM( usage_id="agent", model="litellm_proxy/anthropic/claude-sonnet-4-5-20250929", - base_url="https://llm-proxy.eval.all-hands.dev", + base_url=os.getenv("LLM_BASE_URL"), api_key=SecretStr(api_key), ) @@ -258,19 +258,30 @@ assert api_key is not None, "LLM_API_KEY environment variable is not set." llm = LLM( usage_id="agent", model="litellm_proxy/anthropic/claude-sonnet-4-5-20250929", - base_url="https://llm-proxy.eval.all-hands.dev", + base_url=os.getenv("LLM_BASE_URL"), api_key=SecretStr(api_key), ) # Create a Docker-based remote workspace with extra ports for VSCode access + + +def detect_platform(): + """Detects the correct Docker platform string.""" + import platform + + machine = platform.machine().lower() + if "arm" in machine or "aarch64" in machine: + return "linux/arm64" + return "linux/amd64" + + with DockerWorkspace( base_image="nikolaik/python-nodejs:python3.12-nodejs22", host_port=18010, - # TODO: Change this to your platform if not linux/arm64 - platform="linux/arm64", + platform=detect_platform(), extra_ports=True, # Expose extra ports for VSCode and VNC ) as workspace: - """Extra ports allows you to access VSCode at localhost:8011""" + """Extra ports allows you to access VSCode at localhost:18011""" # Create agent agent = get_default_agent( @@ -441,7 +452,7 @@ assert api_key is not None, "LLM_API_KEY environment variable is not set." llm = LLM( usage_id="agent", model="litellm_proxy/anthropic/claude-sonnet-4-5-20250929", - base_url="https://llm-proxy.eval.all-hands.dev", + base_url=os.getenv("LLM_BASE_URL"), api_key=SecretStr(api_key), ) @@ -458,7 +469,6 @@ def detect_platform(): with DockerWorkspace( base_image="nikolaik/python-nodejs:python3.12-nodejs22", host_port=8010, - # TODO: Change this to your platform if not linux/arm64 platform=detect_platform(), extra_ports=True, # Expose extra ports for VSCode and VNC ) as workspace: @@ -492,7 +502,7 @@ with DockerWorkspace( logger.info(f"\nšŸ“‹ Conversation ID: {conversation.state.id}") logger.info("šŸ“ Sending first message...") conversation.send_message( - "Could you go to https://all-hands.dev/ blog page and summarize main " + "Could you go to https://openhands.dev/ blog page and summarize main " "points of the latest blog?" ) conversation.run() diff --git a/sdk/guides/agent-server/local-server.mdx b/sdk/guides/agent-server/local-server.mdx index 0c0b3b1..3aa9210 100644 --- a/sdk/guides/agent-server/local-server.mdx +++ b/sdk/guides/agent-server/local-server.mdx @@ -139,13 +139,13 @@ assert api_key is not None, "LLM_API_KEY environment variable is not set." llm = LLM( usage_id="agent", model="litellm_proxy/anthropic/claude-sonnet-4-5-20250929", - base_url="https://llm-proxy.eval.all-hands.dev", + base_url=os.getenv("LLM_BASE_URL"), api_key=SecretStr(api_key), ) title_gen_llm = LLM( usage_id="title-gen-llm", model="litellm_proxy/openai/gpt-5-mini", - base_url="https://llm-proxy.eval.all-hands.dev", + base_url=os.getenv("LLM_BASE_URL"), api_key=SecretStr(api_key), ) diff --git a/sdk/guides/custom-tools.mdx b/sdk/guides/custom-tools.mdx index 8426c10..12a33b4 100644 --- a/sdk/guides/custom-tools.mdx +++ b/sdk/guides/custom-tools.mdx @@ -115,7 +115,7 @@ class GrepExecutor(ToolExecutor[GrepAction, GrepObservation]): def __init__(self, bash: BashExecutor): self.bash: BashExecutor = bash - def __call__(self, action: GrepAction) -> GrepObservation: + def __call__(self, action: GrepAction, conversation=None) -> GrepObservation: # noqa: ARG002 root = os.path.abspath(action.path) pat = shlex.quote(action.pattern) root_q = shlex.quote(root) diff --git a/sdk/guides/llm-responses-streaming.mdx b/sdk/guides/llm-responses-streaming.mdx new file mode 100644 index 0000000..c4ca998 --- /dev/null +++ b/sdk/guides/llm-responses-streaming.mdx @@ -0,0 +1,62 @@ +--- +title: Responses Streaming +description: Stream token deltas from the OpenAI Responses API path via LiteLLM. +--- + + +This example is available on GitHub: [examples/01_standalone_sdk/24_responses_streaming.py](https://github.com/All-Hands-AI/agent-sdk/blob/main/examples/01_standalone_sdk/24_responses_streaming.py) + + +Enable live token streaming when using the OpenAI Responses API path. This guide shows how to: + +- Subscribe to streaming deltas from the model +- Log streamed chunks to a JSONL file +- Optionally render streaming visually or print deltas to stdout + +```python icon="python" expandable examples/01_standalone_sdk/24_responses_streaming.py +``` + +```bash Running the Example +export LLM_API_KEY="your-openai-compatible-api-key" +# Optional overrides +# export LLM_MODEL="openhands/gpt-5-codex" +# export LLM_BASE_URL="https://your-litellm-or-provider-base-url" + +cd agent-sdk +uv run python examples/01_standalone_sdk/24_responses_streaming.py +``` + +### How It Works + +- Pass a token callback to Conversation to receive streaming chunks as they arrive: + +```python +conversation = Conversation( + agent=agent, + workspace=os.getcwd(), + token_callbacks=[on_token], +) +``` + +- Each chunk contains a delta: `text_delta` for content tokens or `arguments_delta` for tool-call arguments. The example logs a serialized record per chunk to `./logs/stream/*.jsonl`. + +- For a visual live view, use the built-in streaming visualizer: + +```python +from openhands.sdk.conversation.streaming_visualizer import create_streaming_visualizer + +visualizer = create_streaming_visualizer() +conversation = Conversation( + agent=agent, + workspace=os.getcwd(), + token_callbacks=[on_token], + callbacks=[visualizer.on_event], + visualize=False, +) +``` + +## Next Steps + +- **[Reasoning (Responses API)](/sdk/guides/llm-reasoning)** – Access model reasoning traces +- **[LLM Routing](/sdk/guides/llm-routing)** – Route requests to different models +- **[Image Input](/sdk/guides/llm-image-input)** – Send images to multimodal models diff --git a/sdk/guides/llm-routing.mdx b/sdk/guides/llm-routing.mdx index b76c392..0766af6 100644 --- a/sdk/guides/llm-routing.mdx +++ b/sdk/guides/llm-routing.mdx @@ -48,7 +48,7 @@ primary_llm = LLM( secondary_llm = LLM( usage_id="agent-secondary", model="litellm_proxy/mistral/devstral-small-2507", - base_url="https://llm-proxy.eval.all-hands.dev", + base_url=base_url, api_key=SecretStr(api_key), ) multimodal_router = MultimodalRouter( diff --git a/sdk/guides/metrics.mdx b/sdk/guides/metrics.mdx index e8b7351..a960431 100644 --- a/sdk/guides/metrics.mdx +++ b/sdk/guides/metrics.mdx @@ -332,7 +332,7 @@ conversation.run() second_llm = LLM( usage_id="demo-secondary", model="litellm_proxy/anthropic/claude-sonnet-4-5-20250929", - base_url="https://llm-proxy.eval.all-hands.dev", + base_url=os.getenv("LLM_BASE_URL"), api_key=SecretStr(api_key), ) conversation.llm_registry.add(second_llm)