diff --git a/libs/openant-core/README.md b/libs/openant-core/README.md index 9d466ed..d43dcc8 100644 --- a/libs/openant-core/README.md +++ b/libs/openant-core/README.md @@ -348,6 +348,48 @@ For a repository with 1,000 units: --- +## Using non-Claude models via OpenRouter + +OpenRouter exposes an Anthropic-compatible endpoint, so OpenAnt can drive +Qwen, Kimi, MiniMax, DeepSeek and similar models without a separate SDK. +Three env vars + a `--model` argument are all that's needed: + +```bash +export OPENANT_LLM_BASE_URL=https://openrouter.ai/api/v1 +export OPENANT_LLM_API_KEY=sk-or-v1-... +openant scan /path/to/repo --output /tmp/results --model qwen/qwen-3-coder-480b +``` + +When `OPENANT_LLM_BASE_URL` is unset, every Anthropic client construction +falls back to the SDK defaults, so existing Claude setups behave exactly +as before. + +`--model` accepts: + +| Form | Example | Effect | +|------|---------|--------| +| Alias | `opus`, `sonnet` | Resolves to the canonical Claude ID. | +| Explicit Claude ID | `claude-opus-4-6` | Used verbatim. | +| Slash-form ID | `qwen/qwen-3-coder-480b` | Used verbatim against the configured endpoint. | +| OpenCode-style prefix | `openrouter/moonshotai/kimi-k2` | Leading `openrouter/` is stripped (becomes `moonshotai/kimi-k2`). | + +### Cost tracking for non-Claude models + +The hardcoded pricing table only covers Claude. For unknown model IDs, +OpenAnt defaults to `$0` per million tokens and prints a one-time warning +to stderr, so cost rollups stay honest rather than guessing. To plug in +real OpenRouter pricing, set `MODEL_PRICING_OVERRIDE` to a JSON object of +`{model_id: {input, output}}` per million tokens: + +```bash +export MODEL_PRICING_OVERRIDE='{"qwen/qwen-3-coder-480b": {"input": 0.4, "output": 1.6}}' +``` + +Override values take precedence over the built-in table, so you can also +use this to update Claude pricing without code changes. + +--- + ## Supported Vulnerabilities | Type | Detection Pattern | Languages | diff --git a/libs/openant-core/core/analyzer.py b/libs/openant-core/core/analyzer.py index 7fb5966..46fbf7f 100644 --- a/libs/openant-core/core/analyzer.py +++ b/libs/openant-core/core/analyzer.py @@ -312,8 +312,11 @@ def run_analysis( checkpoint = StepCheckpoint("Analyze", output_dir) checkpoint.dir = checkpoint_path - # Select model - model_id = "claude-opus-4-6" if model == "opus" else "claude-sonnet-4-20250514" + # Select model. resolve_model_id() handles "opus"/"sonnet" aliases, + # passes through slash-form IDs verbatim, and strips a leading + # "openrouter/" prefix so OpenCode-style IDs work (see issue #9). + from utilities.llm_client import resolve_model_id + model_id = resolve_model_id(model) print(f"[Analyze] Model: {model_id}", file=sys.stderr) # Initialize client diff --git a/libs/openant-core/core/enhancer.py b/libs/openant-core/core/enhancer.py index fef1453..168a0e9 100644 --- a/libs/openant-core/core/enhancer.py +++ b/libs/openant-core/core/enhancer.py @@ -50,7 +50,11 @@ def enhance_dataset( # Configure global rate limiter configure_rate_limiter(backoff_seconds=float(backoff_seconds)) - model_id = "claude-sonnet-4-20250514" if model == "sonnet" else "claude-opus-4-6" + # resolve_model_id() handles "opus"/"sonnet" aliases, passes through + # slash-form IDs verbatim, and strips a leading "openrouter/" prefix + # so OpenCode-style IDs work (see issue #9). + from utilities.llm_client import resolve_model_id + model_id = resolve_model_id(model) print(f"[Enhance] Mode: {mode}", file=sys.stderr) print(f"[Enhance] Model: {model_id}", file=sys.stderr) diff --git a/libs/openant-core/generate_report.py b/libs/openant-core/generate_report.py index 633cd9b..198d66e 100644 --- a/libs/openant-core/generate_report.py +++ b/libs/openant-core/generate_report.py @@ -29,9 +29,10 @@ import os from datetime import datetime -import anthropic from dotenv import load_dotenv +from utilities.llm_client import get_anthropic_client + # Load environment variables from .env file load_dotenv() @@ -198,11 +199,13 @@ def generate_remediation_guidance(findings: list) -> str: {findings_text} """ - api_key = os.getenv("ANTHROPIC_API_KEY") - if not api_key: - raise ValueError("ANTHROPIC_API_KEY not found in environment") + if not os.getenv("ANTHROPIC_API_KEY") and not os.getenv("OPENANT_LLM_API_KEY"): + raise ValueError( + "No API key found. Set ANTHROPIC_API_KEY, or for non-Claude " + "providers set OPENANT_LLM_API_KEY (and OPENANT_LLM_BASE_URL)." + ) - client = anthropic.Anthropic(api_key=api_key) + client = get_anthropic_client() response = client.messages.create( model=REPORT_MODEL, max_tokens=MAX_TOKENS, diff --git a/libs/openant-core/openant/cli.py b/libs/openant-core/openant/cli.py index b0ce345..9649d92 100644 --- a/libs/openant-core/openant/cli.py +++ b/libs/openant-core/openant/cli.py @@ -587,10 +587,9 @@ def cmd_report_data(args): and step reports — everything display-ready. """ import html as html_mod - import anthropic from core.schemas import success, error from core.step_report import step_context - from utilities.llm_client import get_global_tracker + from utilities.llm_client import get_anthropic_client, get_global_tracker results_path = args.results dataset_path = args.dataset @@ -810,7 +809,7 @@ def cmd_report_data(args): {findings_text} """ print("[Report] Generating remediation guidance (LLM)...", file=sys.stderr) - client = anthropic.Anthropic() + client = get_anthropic_client() response = client.messages.create( model="claude-sonnet-4-20250514", max_tokens=4096, @@ -985,7 +984,18 @@ def main(): help="Enable Docker-isolated dynamic testing (off by default)") scan_p.add_argument("--no-skip-tests", action="store_true", help="Include test files in parsing (default: tests are skipped)") scan_p.add_argument("--limit", type=int, help="Max units to analyze") - scan_p.add_argument("--model", choices=["opus", "sonnet"], default="opus", help="Model (default: opus)") + scan_p.add_argument( + "--model", + default="opus", + help=( + "Model to use. Accepts 'opus' / 'sonnet' aliases, an explicit " + "Claude ID, or a slash-form ID for an OpenAI/OpenRouter-compatible " + "endpoint (e.g. 'qwen/qwen-3-coder-480b'). The 'openrouter/' " + "prefix is recognised and stripped (e.g. " + "'openrouter/moonshotai/kimi-k2'). Set OPENANT_LLM_BASE_URL + " + "OPENANT_LLM_API_KEY to route requests to a non-Anthropic provider." + ), + ) scan_p.add_argument("--workers", type=int, default=8, help="Number of parallel workers for LLM steps (default: 8)") scan_p.add_argument("--repo-name", help="Repository name (org/repo)") @@ -1056,7 +1066,17 @@ def main(): help="Analyze units classified as exploitable or vulnerable_internal (safer, compensates for parser gaps)") exploit_group.add_argument("--exploitable-only", action="store_true", help="Analyze only units classified as exploitable (strict, use after parser entry point fixes)") - analyze_p.add_argument("--model", choices=["opus", "sonnet"], default="opus", help="Model (default: opus)") + analyze_p.add_argument( + "--model", + default="opus", + help=( + "Model to use. Accepts 'opus' / 'sonnet' aliases, an explicit " + "Claude ID, or a slash-form ID for an OpenAI/OpenRouter-compatible " + "endpoint (e.g. 'qwen/qwen-3-coder-480b'). The 'openrouter/' " + "prefix is recognised and stripped. Set OPENANT_LLM_BASE_URL + " + "OPENANT_LLM_API_KEY to route requests to a non-Anthropic provider." + ), + ) analyze_p.add_argument("--workers", type=int, default=8, help="Number of parallel workers for LLM calls (default: 8)") analyze_p.add_argument("--checkpoint", help="Path to checkpoint directory for save/resume") diff --git a/libs/openant-core/report/generator.py b/libs/openant-core/report/generator.py index c996250..1f51e99 100644 --- a/libs/openant-core/report/generator.py +++ b/libs/openant-core/report/generator.py @@ -8,30 +8,28 @@ import os import re import sys -import anthropic +import anthropic # noqa: F401 — re-exported so monkeypatch tests can patch generator.anthropic.Anthropic from pathlib import Path from dotenv import load_dotenv from .schema import validate_pipeline_output, ValidationError +from utilities.llm_client import get_anthropic_client, get_pricing load_dotenv() PROMPTS_DIR = Path(__file__).parent / "prompts" MODEL = "claude-opus-4-6" -# Pricing per million tokens -_PRICING = { - "claude-opus-4-6": {"input": 15.00, "output": 75.00}, - "claude-opus-4-20250514": {"input": 15.00, "output": 75.00}, - "claude-sonnet-4-20250514": {"input": 3.00, "output": 15.00}, -} -_DEFAULT_PRICING = {"input": 3.00, "output": 15.00} - def _extract_usage(response, model: str = MODEL) -> dict: - """Extract usage info from an Anthropic API response.""" + """Extract usage info from an Anthropic API response. + + Pricing is sourced from utilities.llm_client.get_pricing(), which + honours MODEL_PRICING_OVERRIDE and reports $0 (with a one-time stderr + warning) for unknown model IDs rather than guessing. + """ usage = response.usage - pricing = _PRICING.get(model, _DEFAULT_PRICING) + pricing = get_pricing(model) input_cost = (usage.input_tokens / 1_000_000) * pricing["input"] output_cost = (usage.output_tokens / 1_000_000) * pricing["output"] return { @@ -54,11 +52,17 @@ def _merge_usage(usages: list[dict]) -> dict: def _check_api_key(): - """Check that ANTHROPIC_API_KEY is set.""" - if not os.environ.get("ANTHROPIC_API_KEY"): - print("Error: ANTHROPIC_API_KEY environment variable not set.", file=sys.stderr) - print("Set it with: export ANTHROPIC_API_KEY=sk-ant-...", file=sys.stderr) - sys.exit(1) + """Check that an API key is set (ANTHROPIC_API_KEY or OPENANT_LLM_API_KEY).""" + if os.environ.get("ANTHROPIC_API_KEY") or os.environ.get("OPENANT_LLM_API_KEY"): + return + print("Error: no API key found.", file=sys.stderr) + print("Set ANTHROPIC_API_KEY=sk-ant-... for Claude, or", file=sys.stderr) + print( + "OPENANT_LLM_API_KEY + OPENANT_LLM_BASE_URL for non-Claude providers " + "(see README).", + file=sys.stderr, + ) + sys.exit(1) def load_prompt(name: str) -> str: @@ -136,7 +140,7 @@ def generate_summary_report(pipeline_data: dict) -> tuple[str, dict]: output_tokens, total_tokens, cost_usd. """ _check_api_key() - client = anthropic.Anthropic() + client = get_anthropic_client() summary_data = _compact_for_summary(pipeline_data) system_prompt = load_prompt("system") @@ -199,7 +203,7 @@ def generate_disclosure(vulnerability_data: dict, product_name: str) -> tuple[st (disclosure_text, usage_dict) """ _check_api_key() - client = anthropic.Anthropic() + client = get_anthropic_client() system_prompt = load_prompt("system") diff --git a/libs/openant-core/tests/test_llm_provider_routing.py b/libs/openant-core/tests/test_llm_provider_routing.py new file mode 100644 index 0000000..5e79154 --- /dev/null +++ b/libs/openant-core/tests/test_llm_provider_routing.py @@ -0,0 +1,197 @@ +"""Tests for the OpenRouter / non-Claude provider routing in llm_client. + +Covers issue #9: + - OPENANT_LLM_BASE_URL / OPENANT_LLM_API_KEY are picked up by every + Anthropic client construction. + - --model accepts slash-form IDs verbatim and strips a leading + "openrouter/" prefix per OpenCode convention. + - Unknown model IDs default to $0 pricing with a one-time warning. + - MODEL_PRICING_OVERRIDE merges over the built-in pricing table. +""" +import os + +import pytest + +from utilities import llm_client +from utilities.llm_client import ( + get_anthropic_client, + get_pricing, + resolve_model_id, + TokenTracker, +) + + +@pytest.fixture(autouse=True) +def _clear_provider_env(monkeypatch): + """Strip OpenRouter env vars before each test to keep them isolated.""" + for var in ( + "OPENANT_LLM_BASE_URL", + "OPENANT_LLM_API_KEY", + "MODEL_PRICING_OVERRIDE", + ): + monkeypatch.delenv(var, raising=False) + monkeypatch.setattr(llm_client, "_unknown_model_warned", set()) + yield + + +class TestResolveModelId: + def test_opus_alias(self): + assert resolve_model_id("opus") == "claude-opus-4-6" + + def test_sonnet_alias(self): + assert resolve_model_id("sonnet") == "claude-sonnet-4-20250514" + + def test_explicit_claude_id_passes_through(self): + assert resolve_model_id("claude-opus-4-6") == "claude-opus-4-6" + + def test_slash_form_passes_through_verbatim(self): + assert ( + resolve_model_id("qwen/qwen-3-coder-480b") == "qwen/qwen-3-coder-480b" + ) + + def test_openrouter_prefix_stripped(self): + # OpenCode convention per Didier on issue #9: openrouter// + # collapses to / for the actual API call. + assert ( + resolve_model_id("openrouter/moonshotai/kimi-k2") + == "moonshotai/kimi-k2" + ) + + def test_openrouter_prefix_only_at_start(self): + # A "/openrouter/..." substring later in the ID is not stripped. + assert ( + resolve_model_id("acme/openrouter/x") == "acme/openrouter/x" + ) + + def test_empty_string_returns_empty(self): + assert resolve_model_id("") == "" + + +class TestGetAnthropicClientEnvWiring: + def test_no_env_vars_passes_no_overrides(self, monkeypatch): + captured = {} + + class _Stub: + def __init__(self, **kwargs): + captured.update(kwargs) + + monkeypatch.setattr(llm_client.anthropic, "Anthropic", _Stub) + get_anthropic_client(max_retries=5) + + assert "base_url" not in captured + assert "api_key" not in captured + assert captured["max_retries"] == 5 + + def test_env_vars_passed_through(self, monkeypatch): + monkeypatch.setenv( + "OPENANT_LLM_BASE_URL", "https://openrouter.ai/api/v1" + ) + monkeypatch.setenv("OPENANT_LLM_API_KEY", "sk-or-v1-test") + + captured = {} + + class _Stub: + def __init__(self, **kwargs): + captured.update(kwargs) + + monkeypatch.setattr(llm_client.anthropic, "Anthropic", _Stub) + get_anthropic_client(max_retries=5) + + assert captured["base_url"] == "https://openrouter.ai/api/v1" + assert captured["api_key"] == "sk-or-v1-test" + assert captured["max_retries"] == 5 + + def test_explicit_kwargs_win_over_env(self, monkeypatch): + monkeypatch.setenv( + "OPENANT_LLM_BASE_URL", "https://openrouter.ai/api/v1" + ) + monkeypatch.setenv("OPENANT_LLM_API_KEY", "sk-or-v1-env") + + captured = {} + + class _Stub: + def __init__(self, **kwargs): + captured.update(kwargs) + + monkeypatch.setattr(llm_client.anthropic, "Anthropic", _Stub) + get_anthropic_client(api_key="explicit-key", base_url="https://other") + + assert captured["base_url"] == "https://other" + assert captured["api_key"] == "explicit-key" + + def test_only_base_url_set(self, monkeypatch): + monkeypatch.setenv( + "OPENANT_LLM_BASE_URL", "https://openrouter.ai/api/v1" + ) + + captured = {} + + class _Stub: + def __init__(self, **kwargs): + captured.update(kwargs) + + monkeypatch.setattr(llm_client.anthropic, "Anthropic", _Stub) + get_anthropic_client() + + assert captured["base_url"] == "https://openrouter.ai/api/v1" + assert "api_key" not in captured + + +class TestGetPricing: + def test_known_claude_model(self): + # Sonnet is in the built-in table at $3 input / $15 output per million. + pricing = get_pricing("claude-sonnet-4-20250514") + assert pricing == {"input": 3.0, "output": 15.0} + + def test_unknown_model_returns_zero_and_warns_once(self, capsys): + first = get_pricing("qwen/qwen-3-coder-480b") + second = get_pricing("qwen/qwen-3-coder-480b") + assert first == {"input": 0.0, "output": 0.0} + assert second == {"input": 0.0, "output": 0.0} + + err = capsys.readouterr().err + # Warning should appear exactly once even though we called twice. + assert err.count("qwen/qwen-3-coder-480b") == 1 + + def test_pricing_override_merges_over_default(self, monkeypatch): + monkeypatch.setenv( + "MODEL_PRICING_OVERRIDE", + '{"qwen/qwen-3-coder-480b": {"input": 0.4, "output": 1.6}}', + ) + pricing = get_pricing("qwen/qwen-3-coder-480b") + assert pricing == {"input": 0.4, "output": 1.6} + + def test_pricing_override_can_replace_known_claude_pricing(self, monkeypatch): + # Power users sometimes want to update Claude pricing without code + # changes — the override must take precedence over the built-in table. + monkeypatch.setenv( + "MODEL_PRICING_OVERRIDE", + '{"claude-sonnet-4-20250514": {"input": 99.0, "output": 99.0}}', + ) + pricing = get_pricing("claude-sonnet-4-20250514") + assert pricing == {"input": 99.0, "output": 99.0} + + def test_invalid_override_json_is_ignored(self, monkeypatch, capsys): + monkeypatch.setenv("MODEL_PRICING_OVERRIDE", "not json") + # Falls back to the built-in table without raising. + pricing = get_pricing("claude-sonnet-4-20250514") + assert pricing == {"input": 3.0, "output": 15.0} + assert "MODEL_PRICING_OVERRIDE" in capsys.readouterr().err + + def test_non_object_override_is_ignored(self, monkeypatch, capsys): + monkeypatch.setenv("MODEL_PRICING_OVERRIDE", "[1, 2, 3]") + pricing = get_pricing("claude-sonnet-4-20250514") + assert pricing == {"input": 3.0, "output": 15.0} + assert "MODEL_PRICING_OVERRIDE" in capsys.readouterr().err + + +class TestTokenTrackerHonoursOverride: + def test_override_flows_through_record_call(self, monkeypatch): + monkeypatch.setenv( + "MODEL_PRICING_OVERRIDE", + '{"qwen/qwen-3-coder-480b": {"input": 1.0, "output": 2.0}}', + ) + tracker = TokenTracker() + result = tracker.record_call("qwen/qwen-3-coder-480b", 1_000_000, 1_000_000) + # 1M tokens * $1 input + 1M * $2 output = $3.00 + assert result["cost_usd"] == 3.0 diff --git a/libs/openant-core/tests/test_token_tracker.py b/libs/openant-core/tests/test_token_tracker.py index 08fdc9c..7e103c0 100644 --- a/libs/openant-core/tests/test_token_tracker.py +++ b/libs/openant-core/tests/test_token_tracker.py @@ -1,5 +1,5 @@ """Tests for TokenTracker.""" -from utilities.llm_client import TokenTracker, MODEL_PRICING +from utilities.llm_client import TokenTracker class TestTokenTracker: @@ -22,12 +22,21 @@ def test_record_call_known_model(self): expected_cost = (1000 / 1_000_000) * 3.0 + (500 / 1_000_000) * 15.0 assert result["cost_usd"] == round(expected_cost, 6) - def test_record_call_unknown_model_uses_default(self): + def test_record_call_unknown_model_yields_zero_cost(self, capsys, monkeypatch): + # Unknown model IDs now default to $0 (with a one-time stderr warning) + # rather than silently estimating with Sonnet rates. See issue #9 — + # once non-Claude models could enter the mix, the old fallback turned + # cost rollups into fiction. Set MODEL_PRICING_OVERRIDE for real prices. + from utilities import llm_client + monkeypatch.setattr(llm_client, "_unknown_model_warned", set()) + tracker = TokenTracker() result = tracker.record_call("some-future-model", 100, 50) - default_pricing = MODEL_PRICING["default"] - expected_cost = (100 / 1_000_000) * default_pricing["input"] + (50 / 1_000_000) * default_pricing["output"] - assert result["cost_usd"] == round(expected_cost, 6) + assert result["cost_usd"] == 0.0 + + captured = capsys.readouterr() + assert "some-future-model" in captured.err + assert "MODEL_PRICING_OVERRIDE" in captured.err def test_cumulative_tracking(self): tracker = TokenTracker() diff --git a/libs/openant-core/utilities/__init__.py b/libs/openant-core/utilities/__init__.py index 7a4ec70..dcd6937 100644 --- a/libs/openant-core/utilities/__init__.py +++ b/libs/openant-core/utilities/__init__.py @@ -5,7 +5,11 @@ TokenTracker, get_global_tracker, reset_global_tracker, - MODEL_PRICING + get_anthropic_client, + get_pricing, + resolve_model_id, + MODEL_ALIASES, + MODEL_PRICING, ) from .json_corrector import JSONCorrector from .context_corrector import ContextCorrector @@ -20,6 +24,10 @@ 'get_global_tracker', 'reset_global_tracker', 'MODEL_PRICING', + 'MODEL_ALIASES', + 'get_anthropic_client', + 'get_pricing', + 'resolve_model_id', 'JSONCorrector', 'ContextCorrector', 'ContextReviewer', diff --git a/libs/openant-core/utilities/agentic_enhancer/agent.py b/libs/openant-core/utilities/agentic_enhancer/agent.py index 62061b7..f7ebd0d 100644 --- a/libs/openant-core/utilities/agentic_enhancer/agent.py +++ b/libs/openant-core/utilities/agentic_enhancer/agent.py @@ -16,7 +16,7 @@ import anthropic -from ..llm_client import TokenTracker, get_global_tracker +from ..llm_client import TokenTracker, get_anthropic_client, get_global_tracker from ..rate_limiter import get_rate_limiter from .repository_index import RepositoryIndex from .tools import TOOL_DEFINITIONS, ToolExecutor @@ -126,7 +126,7 @@ def __init__( self.tool_executor = ToolExecutor(index) self.entry_points = entry_points or set() self.reachability = reachability - self.client = client or anthropic.Anthropic(max_retries=5) + self.client = client or get_anthropic_client(max_retries=5) def analyze_unit( self, diff --git a/libs/openant-core/utilities/context_enhancer.py b/libs/openant-core/utilities/context_enhancer.py index 2ffbfe6..2b25fe7 100644 --- a/libs/openant-core/utilities/context_enhancer.py +++ b/libs/openant-core/utilities/context_enhancer.py @@ -25,7 +25,13 @@ import anthropic -from .llm_client import AnthropicClient, TokenTracker, get_global_tracker, reset_global_tracker +from .llm_client import ( + AnthropicClient, + TokenTracker, + get_anthropic_client, + get_global_tracker, + reset_global_tracker, +) from .agentic_enhancer import RepositoryIndex, enhance_unit_with_agent, load_index_from_file from .rate_limiter import get_rate_limiter, is_rate_limit_error, is_retryable_error @@ -585,7 +591,7 @@ def enhance_dataset_agentic( # which spawns a new httpx connection pool. With 1000+ units and 8 workers, # this exhausted file descriptors (macOS limit ~256). The httpx.Client # underlying anthropic.Anthropic is thread-safe, so sharing is correct. - shared_client = anthropic.Anthropic(max_retries=5) + shared_client = get_anthropic_client(max_retries=5) # Filter to unprocessed units units_to_process = [(i, unit) for i, unit in enumerate(units) if unit.get("id") not in processed_ids] diff --git a/libs/openant-core/utilities/finding_verifier.py b/libs/openant-core/utilities/finding_verifier.py index 2e66b7c..ecf1515 100644 --- a/libs/openant-core/utilities/finding_verifier.py +++ b/libs/openant-core/utilities/finding_verifier.py @@ -40,7 +40,7 @@ import anthropic -from .llm_client import TokenTracker, get_global_tracker +from .llm_client import TokenTracker, get_anthropic_client, get_global_tracker from .rate_limiter import get_rate_limiter # Null logger that discards all messages (used when no logger provided) @@ -271,7 +271,7 @@ def __init__( self.verbose = verbose self.app_context = app_context self.tool_executor = ToolExecutor(index) - self.client = client or anthropic.Anthropic(max_retries=5) + self.client = client or get_anthropic_client(max_retries=5) self.logger = logger or _null_logger self._use_logger = logger is not None diff --git a/libs/openant-core/utilities/llm_client.py b/libs/openant-core/utilities/llm_client.py index ea356bf..645976e 100644 --- a/libs/openant-core/utilities/llm_client.py +++ b/libs/openant-core/utilities/llm_client.py @@ -15,9 +15,18 @@ tracker = get_global_tracker() print(f"Total cost: ${tracker.total_cost_usd:.4f}") + +OpenRouter / non-Claude models: + Set OPENANT_LLM_BASE_URL and OPENANT_LLM_API_KEY to route every + anthropic.Anthropic(...) construction through a different endpoint + (e.g. https://openrouter.ai/api/v1). Use a slash-form --model value + (qwen/qwen-3-coder-480b) or the OpenCode-style openrouter/ prefix + (openrouter/moonshotai/kimi-k2 -> moonshotai/kimi-k2). """ +import json import os +import sys import threading from typing import Optional import anthropic @@ -28,6 +37,7 @@ # Pricing per million tokens (as of December 2024) MODEL_PRICING = { + "claude-opus-4-6": {"input": 15.00, "output": 75.00}, "claude-opus-4-20250514": {"input": 15.00, "output": 75.00}, "claude-sonnet-4-20250514": {"input": 3.00, "output": 15.00}, # Fallback for unknown models (use Sonnet pricing as conservative estimate) @@ -35,6 +45,116 @@ } +# Model aliases used by --model on the CLI. Anything not in this map and +# without a "/" is left as-is so future Claude IDs keep working. +MODEL_ALIASES = { + "opus": "claude-opus-4-6", + "sonnet": "claude-sonnet-4-20250514", +} + + +def resolve_model_id(value: str) -> str: + """Resolve a --model argument to the literal ID sent to the API. + + Rules: + - "opus" / "sonnet" -> their canonical Claude IDs. + - A value containing "/" is passed through verbatim, with one + exception: a leading "openrouter/" prefix is stripped so that + OpenCode-style IDs like "openrouter/moonshotai/kimi-k2" work + out of the box (they become "moonshotai/kimi-k2"). See issue #9. + - Anything else is returned unchanged so explicit Claude IDs + (e.g. "claude-opus-4-6") still work. + """ + if not value: + return value + if value in MODEL_ALIASES: + return MODEL_ALIASES[value] + if "/" in value: + if value.startswith("openrouter/"): + return value[len("openrouter/"):] + return value + return value + + +_unknown_model_warned: set[str] = set() +_unknown_model_lock = threading.Lock() + + +def _warn_unknown_model_once(model: str) -> None: + """Warn at most once per process for each unpriced model ID.""" + with _unknown_model_lock: + if model in _unknown_model_warned: + return + _unknown_model_warned.add(model) + print( + f"[llm_client] No pricing entry for model '{model}'; cost rollups " + f"for this model will report $0.00. Set MODEL_PRICING_OVERRIDE to " + f"add it (see README).", + file=sys.stderr, + ) + + +def _load_pricing_override() -> dict: + """Parse MODEL_PRICING_OVERRIDE, returning {} on error.""" + raw = os.environ.get("MODEL_PRICING_OVERRIDE") + if not raw: + return {} + try: + parsed = json.loads(raw) + except (ValueError, TypeError) as exc: + print( + f"[llm_client] Could not parse MODEL_PRICING_OVERRIDE as JSON: {exc}", + file=sys.stderr, + ) + return {} + if not isinstance(parsed, dict): + print( + "[llm_client] MODEL_PRICING_OVERRIDE must be a JSON object " + "of {model_id: {input, output}}; ignoring.", + file=sys.stderr, + ) + return {} + return parsed + + +def get_pricing(model: str) -> dict: + """Return pricing for a model, honouring MODEL_PRICING_OVERRIDE. + + Override values take precedence over the built-in table. Unknown + models default to {input: 0, output: 0} and emit a one-time warning. + """ + override = _load_pricing_override() + if model in override: + return override[model] + if model in MODEL_PRICING: + return MODEL_PRICING[model] + _warn_unknown_model_once(model) + return {"input": 0.0, "output": 0.0} + + +def get_anthropic_client(**kwargs) -> "anthropic.Anthropic": + """Construct an anthropic.Anthropic() honouring OPENANT_LLM_* env vars. + + When OPENANT_LLM_BASE_URL is set, the SDK is pointed at that endpoint + (typically OpenRouter or a self-hosted Anthropic-compatible proxy). + When OPENANT_LLM_API_KEY is set, that key is used in place of + ANTHROPIC_API_KEY. Both fall through to the SDK's normal env handling + when unset, so existing setups behave identically. + + Any kwargs (e.g. max_retries=5, api_key=...) are forwarded; explicit + kwargs take precedence over the env var fallbacks. + """ + base_url = os.environ.get("OPENANT_LLM_BASE_URL") + api_key = os.environ.get("OPENANT_LLM_API_KEY") + + if base_url and "base_url" not in kwargs: + kwargs["base_url"] = base_url + if api_key and "api_key" not in kwargs: + kwargs["api_key"] = api_key + + return anthropic.Anthropic(**kwargs) + + class TokenTracker: """ Tracks token usage and costs across LLM calls. @@ -70,8 +190,10 @@ def record_call(self, model: str, input_tokens: int, output_tokens: int) -> dict Returns: Dict with call details including cost """ - # Get pricing for model - pricing = MODEL_PRICING.get(model, MODEL_PRICING["default"]) + # Get pricing for model. get_pricing() honours MODEL_PRICING_OVERRIDE + # and falls back to {input: 0, output: 0} for unknown models with a + # one-time warning, rather than silently estimating with Sonnet rates. + pricing = get_pricing(model) # Calculate cost (pricing is per million tokens) input_cost = (input_tokens / 1_000_000) * pricing["input"] @@ -200,11 +322,14 @@ def __init__(self, model: str = "claude-opus-4-20250514", tracker: TokenTracker """ load_dotenv() - api_key = os.getenv("ANTHROPIC_API_KEY") - if not api_key: - raise ValueError("ANTHROPIC_API_KEY not found in environment") + # Either the OpenRouter override or ANTHROPIC_API_KEY must be set. + if not os.getenv("OPENANT_LLM_API_KEY") and not os.getenv("ANTHROPIC_API_KEY"): + raise ValueError( + "No API key found. Set ANTHROPIC_API_KEY, or for non-Claude " + "providers set OPENANT_LLM_API_KEY (and OPENANT_LLM_BASE_URL)." + ) - self.client = anthropic.Anthropic(api_key=api_key, max_retries=5) + self.client = get_anthropic_client(max_retries=5) self.model = model self.tracker = tracker or _global_tracker self.last_call = None # Store last call details