Rename metadata field to litellm_extra_body and add custom config support (#837)

li-boxuan · openhands-agent · web-flow · commit 7b98d96ed4d9 · 2025-11-05T03:04:13.000+08:00
Co-authored-by: openhands &lt;openhands@all-hands.dev&gt;
diff --git a/openhands-sdk/openhands/sdk/agent/agent.py b/openhands-sdk/openhands/sdk/agent/agent.py
@@ -171,13 +171,13 @@ def step(
                     include=None,
                     store=False,
                     add_security_risk_prediction=self._add_security_risk_prediction,
-                    metadata=self.llm.metadata,
+                    extra_body=self.llm.litellm_extra_body,
                 )
             else:
                 llm_response = self.llm.completion(
                     messages=_messages,
                     tools=list(self.tools_map.values()),
-                    extra_body={"metadata": self.llm.metadata},
+                    extra_body=self.llm.litellm_extra_body,
                     add_security_risk_prediction=self._add_security_risk_prediction,
                 )
         except FunctionCallValidationError as e:
diff --git a/openhands-sdk/openhands/sdk/context/condenser/llm_summarizing_condenser.py b/openhands-sdk/openhands/sdk/context/condenser/llm_summarizing_condenser.py
@@ -67,7 +67,7 @@ def get_condensation(self, view: View) -> Condensation:
 
         llm_response = self.llm.completion(
             messages=messages,
-            extra_body={"metadata": self.llm.metadata},
+            extra_body=self.llm.litellm_extra_body,
         )
         # Extract summary from the LLMResponse message
         summary = None
diff --git a/openhands-sdk/openhands/sdk/llm/llm.py b/openhands-sdk/openhands/sdk/llm/llm.py
@@ -246,10 +246,12 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
             "telemetry, and spend tracking."
         ),
     )
-    metadata: dict[str, Any] = Field(
+    litellm_extra_body: dict[str, Any] = Field(
         default_factory=dict,
         description=(
-            "Additional metadata for the LLM instance. "
+            "Additional key-value pairs to pass to litellm's extra_body parameter. "
+            "This is useful for custom inference clusters that need additional "
+            "metadata for logging, tracking, or routing purposes. "
             "Example structure: "
             "{'trace_version': '1.0.0', 'tags': ['model:gpt-4', 'agent:my-agent'], "
             "'session_id': 'session-123', 'trace_user_id': 'user-456'}"
diff --git a/openhands-sdk/openhands/sdk/llm/options/chat_options.py b/openhands-sdk/openhands/sdk/llm/options/chat_options.py
@@ -76,8 +76,12 @@ def select_chat_options(
         out.pop("tools", None)
         out.pop("tool_choice", None)
 
+    # Pass through litellm_extra_body if provided
+    if llm.litellm_extra_body:
+        out["extra_body"] = llm.litellm_extra_body
     # non litellm proxy special-case: keep `extra_body` off unless model requires it
-    if "litellm_proxy" not in llm.model:
+    # or user provided it
+    elif "litellm_proxy" not in llm.model:
         out.pop("extra_body", None)
 
     return out
diff --git a/openhands-sdk/openhands/sdk/llm/options/responses_options.py b/openhands-sdk/openhands/sdk/llm/options/responses_options.py
@@ -47,4 +47,8 @@ def select_responses_options(
     effort = llm.reasoning_effort or "high"
     out["reasoning"] = {"effort": effort, "summary": "detailed"}
 
+    # Pass through litellm_extra_body if provided
+    if llm.litellm_extra_body:
+        out["extra_body"] = llm.litellm_extra_body
+
     return out
diff --git a/tests/sdk/context/condenser/test_llm_summarizing_condenser.py b/tests/sdk/context/condenser/test_llm_summarizing_condenser.py
@@ -64,7 +64,7 @@ def create_completion_result(content: str) -> LLMResponse:
     mock_llm.custom_tokenizer = None
     mock_llm.base_url = None
     mock_llm.reasoning_effort = None
-    mock_llm.metadata = {}
+    mock_llm.litellm_extra_body = {}
 
     # Explicitly set pricing attributes required by LLM -> Telemetry wiring
     mock_llm.input_cost_per_token = None
diff --git a/tests/sdk/llm/test_llm_litellm_extra_body.py b/tests/sdk/llm/test_llm_litellm_extra_body.py
@@ -0,0 +1,56 @@
+from unittest.mock import patch
+
+from litellm.types.utils import ModelResponse
+
+from openhands.sdk.llm import LLM, Message, TextContent
+
+
+def test_litellm_extra_body_passed_to_completion():
+    """Test that litellm_extra_body is correctly passed to litellm.completion()."""
+    custom_extra_body = {
+        "cluster_id": "prod-cluster-1",
+        "routing_key": "high-priority",
+        "user_tier": "premium",
+        "custom_headers": {
+            "X-Request-Source": "openhands-agent",
+        },
+    }
+
+    llm = LLM(model="gpt-4o", usage_id="test", litellm_extra_body=custom_extra_body)
+    messages = [Message(role="user", content=[TextContent(text="Hello")])]
+
+    with patch("openhands.sdk.llm.llm.litellm_completion") as mock_completion:
+        # Create a proper ModelResponse mock
+        mock_response = ModelResponse(
+            id="test-id",
+            choices=[
+                {
+                    "index": 0,
+                    "message": {"role": "assistant", "content": "Hello!"},
+                    "finish_reason": "stop",
+                }
+            ],
+            created=1234567890,
+            model="gpt-4o",
+            object="chat.completion",
+        )
+        mock_completion.return_value = mock_response
+
+        # Call completion
+        llm.completion(messages=messages)
+
+        # Verify that litellm.completion was called with our extra_body
+        mock_completion.assert_called_once()
+        call_kwargs = mock_completion.call_args[1]
+
+        # Check that extra_body was passed correctly
+        assert "extra_body" in call_kwargs
+        assert call_kwargs["extra_body"] == custom_extra_body
+
+        # Verify specific custom fields were passed through
+        assert call_kwargs["extra_body"]["cluster_id"] == "prod-cluster-1"
+        assert call_kwargs["extra_body"]["routing_key"] == "high-priority"
+        assert (
+            call_kwargs["extra_body"]["custom_headers"]["X-Request-Source"]
+            == "openhands-agent"
+        )
diff --git a/tests/sdk/llm/test_llm_metadata.py b/tests/sdk/llm/test_llm_metadata.py

Original file line number	Diff line number	Diff line change
`@@ -67,7 +67,7 @@ def get_condensation(self, view: View) -> Condensation:`
`67`	`67`
`68`	`68`	`llm_response = self.llm.completion(`
`69`	`69`	`messages=messages,`
`70`		`- extra_body={"metadata": self.llm.metadata},`
	`70`	`+ extra_body=self.llm.litellm_extra_body,`
`71`	`71`	`)`
`72`	`72`	`# Extract summary from the LLMResponse message`
`73`	`73`	`summary = None`