diff --git a/examples/01_standalone_sdk/05_use_llm_registry.py b/examples/01_standalone_sdk/05_use_llm_registry.py index 6cad805467..326e061321 100644 --- a/examples/01_standalone_sdk/05_use_llm_registry.py +++ b/examples/01_standalone_sdk/05_use_llm_registry.py @@ -76,15 +76,12 @@ def conversation_callback(event: Event): print(f"Same LLM instance: {llm is same_llm}") # Demonstrate requesting a completion directly from an LLM -completion_response = llm.completion( +resp = llm.completion( messages=[ Message(role="user", content=[TextContent(text="Say hello in one word.")]) ] ) -# Access the response content -raw_response = completion_response.raw_response -if raw_response.choices and raw_response.choices[0].message: # type: ignore - content = raw_response.choices[0].message.content # type: ignore - print(f"Direct completion response: {content}") -else: - print("No response content available") +# Access the response content via OpenHands LLMResponse +msg = resp.message +texts = [c.text for c in msg.content if isinstance(c, TextContent)] +print(f"Direct completion response: {texts[0] if texts else str(msg)}") diff --git a/openhands-sdk/openhands/sdk/llm/utils/model_features.py b/openhands-sdk/openhands/sdk/llm/utils/model_features.py index 500ac93673..76b4afcf1d 100644 --- a/openhands-sdk/openhands/sdk/llm/utils/model_features.py +++ b/openhands-sdk/openhands/sdk/llm/utils/model_features.py @@ -1,68 +1,17 @@ from dataclasses import dataclass -from fnmatch import fnmatch - - -def normalize_model_name(model: str) -> str: - """Normalize a model string to a canonical, comparable name. - - Strategy: - - Trim whitespace - - Lowercase - - If there is a '/', keep only the basename after the last '/' - (handles prefixes like openrouter/, litellm_proxy/, anthropic/, etc.) - and treat ':' inside that basename as an Ollama-style variant tag to be removed - - There is no provider:model form; providers, when present, use 'provider/model' - - Drop a trailing "-gguf" suffix if present - - If basename starts with a known vendor prefix followed by '.', drop that prefix - (e.g., 'anthropic.claude-*' -> 'claude-*') - """ - raw = (model or "").strip().lower() - if "/" in raw: - name = raw.split("/")[-1] - if ":" in name: - # Drop Ollama-style variant tag in basename - name = name.split(":", 1)[0] - else: - # No '/', keep the whole raw name (we do not support provider:model) - name = raw - - # Drop common vendor prefixes embedded in the basename (bedrock style), once. - # Keep this list small and explicit to avoid accidental over-matching. - vendor_prefixes = { - "anthropic", - "meta", - "cohere", - "mistral", - "ai21", - "amazon", - } - if "." in name: - vendor, rest = name.split(".", 1) - if vendor in vendor_prefixes and rest: - name = rest - - if name.endswith("-gguf"): - name = name[: -len("-gguf")] - return name def model_matches(model: str, patterns: list[str]) -> bool: - """Return True if the model matches any of the glob patterns. + """Return True if any pattern appears as a substring in the raw model name. - If a pattern contains a '/', it is treated as provider-qualified and matched - against the full, lowercased model string (including provider prefix). - Otherwise, it is matched against the normalized basename. + Matching semantics: + - Case-insensitive substring search on full raw model string """ raw = (model or "").strip().lower() - name = normalize_model_name(model) for pat in patterns: - pat_l = pat.lower() - if "/" in pat_l: - if fnmatch(raw, pat_l): - return True - else: - if fnmatch(name, pat_l): - return True + token = pat.strip().lower() + if token in raw: + return True return False @@ -76,11 +25,12 @@ class ModelFeatures: # Pattern tables capturing current behavior. Keep patterns lowercase. + REASONING_EFFORT_PATTERNS: list[str] = [ # Mirror main behavior exactly (no unintended expansion) "o1-2024-12-17", - "o1*", # Match all o1 variants including o1-preview - "o3*", # Match all o3 variants + "o1", + "o3", "o3-2025-04-16", "o3-mini-2025-01-31", "o3-mini", @@ -89,45 +39,48 @@ class ModelFeatures: "gemini-2.5-flash", "gemini-2.5-pro", # OpenAI GPT-5 family (includes mini variants) - "gpt-5*", + "gpt-5", ] EXTENDED_THINKING_PATTERNS: list[str] = [ # Anthropic model family # We did not include sonnet 3.7 and 4 here as they don't brings # significant performance improvements for agents - "claude-sonnet-4-5*", - "claude-haiku-4-5*", + "claude-sonnet-4-5", + "claude-haiku-4-5", ] PROMPT_CACHE_PATTERNS: list[str] = [ - "claude-3-7-sonnet*", - "claude-3.7-sonnet*", + "claude-3-7-sonnet", + "claude-3.7-sonnet", "claude-sonnet-3-7-latest", - "claude-3-5-sonnet*", - "claude-3.5-sonnet*", - "claude-3-5-haiku*", - "claude-3.5-haiku*", - "claude-3-haiku-20240307*", - "claude-3-opus-20240229*", - "claude-sonnet-4*", - "claude-opus-4*", + "claude-3-5-sonnet", + "claude-3.5-sonnet", + "claude-3-5-haiku", + "claude-3.5-haiku", + "claude-3-haiku-20240307", + "claude-3-opus-20240229", + "claude-sonnet-4", + "claude-opus-4", + # Anthropic Haiku 4.5 variants (dot and dash) + "claude-haiku-4.5", + "claude-haiku-4-5", ] SUPPORTS_STOP_WORDS_FALSE_PATTERNS: list[str] = [ # o1 family doesn't support stop words - "o1*", + "o1", # grok-4 specific model name (basename) "grok-4-0709", "grok-code-fast-1", # DeepSeek R1 family - "deepseek-r1-0528*", + "deepseek-r1-0528", ] # Models that should use the OpenAI Responses API path by default RESPONSES_API_PATTERNS: list[str] = [ # OpenAI GPT-5 family (includes mini variants) - "gpt-5*", + "gpt-5", # OpenAI Codex (uses Responses API) "codex-mini-latest", ] diff --git a/tests/sdk/llm/test_model_features.py b/tests/sdk/llm/test_model_features.py index 60e05fe5c7..ae12efa5cc 100644 --- a/tests/sdk/llm/test_model_features.py +++ b/tests/sdk/llm/test_model_features.py @@ -3,40 +3,17 @@ from openhands.sdk.llm.utils.model_features import ( get_features, model_matches, - normalize_model_name, ) -@pytest.mark.parametrize( - "raw,expected", - [ - (" OPENAI/gpt-4o ", "gpt-4o"), - ("anthropic/claude-3-7-sonnet", "claude-3-7-sonnet"), - ("litellm_proxy/gemini-2.5-pro", "gemini-2.5-pro"), - ("qwen3-coder-480b-a35b-instruct", "qwen3-coder-480b-a35b-instruct"), - ("gpt-5", "gpt-5"), - ("openai/GLM-4.5-GGUF", "glm-4.5"), - ("openrouter/gpt-4o-mini", "gpt-4o-mini"), - ( - "bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0", - "claude-3-5-sonnet-20241022-v2", - ), - ("", ""), - (None, ""), # type: ignore[arg-type] - ], -) -def test_normalize_model_name(raw, expected): - assert normalize_model_name(raw) == expected - - @pytest.mark.parametrize( "name,pattern,expected", [ - ("gpt-4o", "gpt-4o*", True), - ("openai/gpt-4o", "gpt-4o*", True), - ("litellm_proxy/gpt-4o-mini", "gpt-4o*", True), - ("claude-3-7-sonnet-20250219", "claude-3-7-sonnet*", True), - ("o1-2024-12-17", "o1*", True), + ("gpt-4o", "gpt-4o", True), + ("openai/gpt-4o", "gpt-4o", True), + ("litellm_proxy/gpt-4o-mini", "gpt-4o", True), + ("claude-3-7-sonnet-20250219", "claude-3-7-sonnet", True), + ("o1-2024-12-17", "o1", True), ("grok-4-0709", "grok-4-0709", True), ("grok-4-0801", "grok-4-0709", False), ], @@ -70,10 +47,22 @@ def test_reasoning_effort_support(model, expected_reasoning): ("claude-3-7-sonnet", True), ("claude-3-haiku-20240307", True), ("claude-3-opus-20240229", True), - # AWS Bedrock models + # AWS Bedrock model ids (provider-prefixed) ("bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0", True), ("bedrock/anthropic.claude-3-haiku-20240307-v1:0", True), + # Anthropic Haiku 4.5 variants (dot and dash) + ("claude-haiku-4.5", True), + ("claude-haiku-4-5", True), + ("us.anthropic.claude-haiku-4.5-20251001", True), + ("us.anthropic.claude-haiku-4-5-20251001", True), ("bedrock/anthropic.claude-3-opus-20240229-v1:0", True), + # Anthropic 4.5 variants (dash and dot) + ("claude-sonnet-4-5", True), + ("claude-sonnet-4.5", True), + # User-facing model names (no provider prefix) + ("anthropic.claude-3-5-sonnet-20241022", True), + ("anthropic.claude-3-haiku-20240307", True), + ("anthropic.claude-3-opus-20240229", True), ("gpt-4o", False), # OpenAI doesn't support explicit prompt caching ("gemini-1.5-pro", False), ("unknown-model", False), @@ -152,7 +141,7 @@ def test_get_features_with_version_suffixes(): def test_model_matches_multiple_patterns(): """Test model_matches with multiple patterns.""" - patterns = ["gpt-4*", "claude-3*", "gemini-*"] + patterns = ["gpt-4", "claude-3", "gemini-"] assert model_matches("gpt-4o", patterns) is True assert model_matches("claude-3-5-sonnet", patterns) is True @@ -160,31 +149,17 @@ def test_model_matches_multiple_patterns(): assert model_matches("llama-3.1-70b", patterns) is False -def test_model_matches_exact_match(): - """Test model_matches with exact patterns (no wildcards).""" +def test_model_matches_substring_semantics(): + """Test model_matches uses substring semantics (no globbing).""" patterns = ["gpt-4o", "claude-3-5-sonnet"] assert model_matches("gpt-4o", patterns) is True assert model_matches("claude-3-5-sonnet", patterns) is True - assert model_matches("gpt-4o-mini", patterns) is False + # Substring match: 'gpt-4o' matches 'gpt-4o-mini' + assert model_matches("gpt-4o-mini", patterns) is True assert model_matches("claude-3-haiku", patterns) is False -def test_normalize_model_name_edge_cases(): - """Test normalize_model_name with edge cases.""" - # Test with multiple slashes - assert normalize_model_name("provider/sub/model-name") == "model-name" - - # Test with colons and special characters - assert normalize_model_name("provider/model:version:tag") == "model" - - # Test with whitespace and case - assert normalize_model_name(" PROVIDER/Model-Name ") == "model-name" - - # Test with underscores and hyphens - assert normalize_model_name("provider/model_name-v1") == "model_name-v1" - - def test_get_features_unknown_model(): """Test get_features with completely unknown model.""" features = get_features("completely-unknown-model-12345") @@ -208,11 +183,10 @@ def test_get_features_empty_model(): def test_model_matches_with_provider_pattern(): - """Test model_matches with pattern containing '/' matches raw model string.""" - # Test pattern with '/' matches against raw model string (lines 43-44) - assert model_matches("openai/gpt-4", ["openai/*"]) - assert model_matches("anthropic/claude-3", ["anthropic/claude*"]) - assert not model_matches("openai/gpt-4", ["anthropic/*"]) + """model_matches uses substring on raw model name incl. provider prefixes.""" + assert model_matches("openai/gpt-4", ["openai/"]) + assert model_matches("anthropic/claude-3", ["anthropic/claude"]) + assert not model_matches("openai/gpt-4", ["anthropic/"]) def test_stop_words_grok_provider_prefixed(): @@ -236,3 +210,19 @@ def test_supports_stop_words_false_models(model): """Test models that don't support stop words.""" features = get_features(model) assert features.supports_stop_words is False + + +@pytest.mark.parametrize( + "model,expected_responses", + [ + ("gpt-5", True), + ("openai/gpt-5-mini", True), + ("codex-mini-latest", True), + ("openai/codex-mini-latest", True), + ("gpt-4o", False), + ("unknown-model", False), + ], +) +def test_responses_api_support(model, expected_responses): + features = get_features(model) + assert features.supports_responses_api is expected_responses