billion-token-one-task · HelloAnner · Apr 11, 2026 · Apr 11, 2026 · Apr 11, 2026 · Apr 11, 2026
diff --git a/.env.example b/.env.example
@@ -6,8 +6,54 @@
 # GitHub Personal Access Token — needs public_repo scope at minimum
 GITHUB_TOKEN=ghp_your-token-here
 
-# Kimi Code direct API key (required — OpenRouter is NOT supported due to content filter)
-KIMI_API_KEY=sk-kimi-your-key-here
+# === LLM Model Configuration ===
+# See docs/model-routing.md for full provider examples (OpenAI, DeepSeek, MiniMax, etc.)
+
+# Provider name — becomes the OpenClaw provider block key and model ID prefix
+LLM_PROVIDER=anthropic
+
+# OpenAI-compatible API endpoint for this provider
+LLM_BASE_URL=https://api.anthropic.com/v1
+
+# API key for the provider above
+LLM_API_KEY=sk-ant-your-key-here
+
+# Complex model (Opus-tier) — used by implementation sub-agents (code writing, debugging)
+LLM_MODEL_COMPLEX=claude-opus-4-6
+
+# Simple model (Sonnet-tier) — used by orchestrator/heartbeat (routing, file reads)
+LLM_MODEL_SIMPLE=claude-sonnet-4-6
+
+# Cost per million tokens in USD (used for dashboard spend display).
+# Per-model pricing — complex (Opus-tier) and simple (Sonnet-tier) can differ.
+# If per-model vars are not set, INPUT_COST_PER_M / OUTPUT_COST_PER_M are used as fallback.
+INPUT_COST_PER_M_COMPLEX=5.0     # Claude Opus 4.6 input ($5/M)
+OUTPUT_COST_PER_M_COMPLEX=25.0   # Claude Opus 4.6 output ($25/M)
+INPUT_COST_PER_M_SIMPLE=3.0      # Claude Sonnet 4.6 input ($3/M)
+OUTPUT_COST_PER_M_SIMPLE=15.0    # Claude Sonnet 4.6 output ($15/M)
+# Fallback if per-model vars are absent (set to your average expected price)
+INPUT_COST_PER_M=3.0
+OUTPUT_COST_PER_M=15.0
+
+# Context window and max output tokens (model-specific)
+LLM_CONTEXT_WINDOW=1000000
+LLM_MAX_TOKENS=32000
+
+# Public vars exposed to dashboard browser bundle (mirrors LLM_* above)
+NEXT_PUBLIC_LLM_PROVIDER=anthropic
+NEXT_PUBLIC_LLM_MODEL_COMPLEX=claude-opus-4-6
+NEXT_PUBLIC_LLM_MODEL_SIMPLE=claude-sonnet-4-6
+
+# === Budget Control ===
+# Total cumulative spend cap in USD. Agent pauses when reached. 0 = unlimited.
+BUDGET_USD_TOTAL=20.0
+
+# Per-model token caps (JSON map). Keys are BARE model names — matched across
+# all providers (e.g. `glm-4.6` covers `z-ai/glm-4.6`, `openrouter/glm-4.6`, etc).
+# Value is total tokens (input + output). Missing key or value 0 = unlimited.
+# When a model exceeds its cap, the agent stops using it AND a red banner shows
+# at the top of the dashboard. See docs/model-routing.md.
+# MODEL_TOKEN_BUDGETS={"glm-4.6":20000000,"deepseek-chat":50000000,"claude-opus-4-6":10000000}
 
 # === Optional ===
 
@@ -18,3 +64,131 @@ GITHUB_EMAIL=billionclaw+clawoss@users.noreply.github.com
 # Dashboard (for dashboard-reporter skill)
 DASHBOARD_URL=https://clawoss-dashboard.vercel.app
 CLAW_API_KEY=your-shared-secret-here
+
+# Legacy / compatibility (no longer required — use LLM_* above instead)
+# KIMI_API_KEY=sk-kimi-your-key-here
+# MINIMAX_API_KEY=your-minimax-key-here
+
+# =============================================================================
+# Provider Quick Reference — copy the block you want and replace the defaults
+# Prices: April 2026. Verify at provider docs before setting budget.
+# =============================================================================
+
+# ── Google Gemini ─────────────────────────────────────────────────────────────
+# Docs: https://ai.google.dev/gemini-api/docs/pricing
+# Uses OpenAI-compatible endpoint. 2.5 Pro >200k context doubles price.
+# All models have free tier with limited requests.
+#
+# LLM_PROVIDER=google
+# LLM_BASE_URL=https://generativelanguage.googleapis.com/v1beta/openai
+# LLM_API_KEY=your-gemini-key
+# LLM_MODEL_COMPLEX=gemini-2.5-pro
+# LLM_MODEL_SIMPLE=gemini-2.5-flash
+# INPUT_COST_PER_M_COMPLEX=1.25
+# OUTPUT_COST_PER_M_COMPLEX=10.0
+# INPUT_COST_PER_M_SIMPLE=0.30
+# OUTPUT_COST_PER_M_SIMPLE=2.50
+# LLM_CONTEXT_WINDOW=1000000
+# LLM_MAX_TOKENS=65536
+# NEXT_PUBLIC_LLM_PROVIDER=google
+# NEXT_PUBLIC_LLM_MODEL_COMPLEX=gemini-2.5-pro
+# NEXT_PUBLIC_LLM_MODEL_SIMPLE=gemini-2.5-flash
+
+# ── Mistral AI ───────────────────────────────────────────────────────────────
+# Docs: https://mistral.ai/pricing
+# Large 3 for complex, Small 3.1 for orchestration. Nemo ($0.02/M) is cheapest.
+#
+# LLM_PROVIDER=mistral
+# LLM_BASE_URL=https://api.mistral.ai/v1
+# LLM_API_KEY=your-mistral-key
+# LLM_MODEL_COMPLEX=mistral-large-3
+# LLM_MODEL_SIMPLE=mistral-small-3.1
+# INPUT_COST_PER_M_COMPLEX=2.0
+# OUTPUT_COST_PER_M_COMPLEX=6.0
+# INPUT_COST_PER_M_SIMPLE=0.20
+# OUTPUT_COST_PER_M_SIMPLE=0.60
+# LLM_CONTEXT_WINDOW=128000
+# LLM_MAX_TOKENS=32000
+# NEXT_PUBLIC_LLM_PROVIDER=mistral
+# NEXT_PUBLIC_LLM_MODEL_COMPLEX=mistral-large-3
+# NEXT_PUBLIC_LLM_MODEL_SIMPLE=mistral-small-3.1
+
+# ── DeepSeek ──────────────────────────────────────────────────────────────────
+# Docs: https://api-docs.deepseek.com/quick_start/pricing
+# deepseek-chat = V3.2 non-thinking | deepseek-reasoner = V3.2 thinking mode
+# Same price, reasoner supports 32K output vs 8K.
+# Cache hit: $0.028/M input (90% off)
+#
+# LLM_PROVIDER=deepseek
+# LLM_BASE_URL=https://api.deepseek.com/v1
+# LLM_API_KEY=sk-your-deepseek-key
+# LLM_MODEL_COMPLEX=deepseek-reasoner     # thinking mode for complex tasks
+# LLM_MODEL_SIMPLE=deepseek-chat          # non-thinking for orchestration
+# INPUT_COST_PER_M_COMPLEX=0.28
+# OUTPUT_COST_PER_M_COMPLEX=0.42
+# INPUT_COST_PER_M_SIMPLE=0.28
+# OUTPUT_COST_PER_M_SIMPLE=0.42
+# LLM_CONTEXT_WINDOW=128000
+# LLM_MAX_TOKENS=32000
+# NEXT_PUBLIC_LLM_PROVIDER=deepseek
+# NEXT_PUBLIC_LLM_MODEL_COMPLEX=deepseek-reasoner
+# NEXT_PUBLIC_LLM_MODEL_SIMPLE=deepseek-chat
+
+# ── MiniMax ───────────────────────────────────────────────────────────────────
+# Docs: https://platform.minimax.io/docs/guides/pricing-paygo
+# highspeed variants are 2× price but lower latency
+#
+# LLM_PROVIDER=minimax
+# LLM_BASE_URL=https://api.minimaxi.com/v1
+# LLM_API_KEY=your-minimax-key
+# LLM_MODEL_COMPLEX=MiniMax-M2.7
+# LLM_MODEL_SIMPLE=MiniMax-M2.5
+# INPUT_COST_PER_M_COMPLEX=0.30
+# OUTPUT_COST_PER_M_COMPLEX=1.20
+# INPUT_COST_PER_M_SIMPLE=0.30
+# OUTPUT_COST_PER_M_SIMPLE=1.20
+# LLM_CONTEXT_WINDOW=204800
+# LLM_MAX_TOKENS=131072
+# NEXT_PUBLIC_LLM_PROVIDER=minimax
+# NEXT_PUBLIC_LLM_MODEL_COMPLEX=MiniMax-M2.7
+# NEXT_PUBLIC_LLM_MODEL_SIMPLE=MiniMax-M2.5
+
+# ── Kimi / Moonshot ───────────────────────────────────────────────────────────
+# Docs: https://platform.kimi.ai/docs/pricing/chat
+# kimi-k2.5 = latest coding model | moonshot-v1-32k = general purpose
+# Cache hit: $0.10/M input (vs $0.60/M cache miss)
+#
+# LLM_PROVIDER=moonshot
+# LLM_BASE_URL=https://api.moonshot.cn/v1
+# LLM_API_KEY=sk-your-moonshot-key
+# LLM_MODEL_COMPLEX=kimi-k2.5
+# LLM_MODEL_SIMPLE=moonshot-v1-32k
+# INPUT_COST_PER_M_COMPLEX=0.60
+# OUTPUT_COST_PER_M_COMPLEX=3.00
+# INPUT_COST_PER_M_SIMPLE=3.29
+# OUTPUT_COST_PER_M_SIMPLE=3.29
+# LLM_CONTEXT_WINDOW=131072
+# LLM_MAX_TOKENS=32000
+# NEXT_PUBLIC_LLM_PROVIDER=moonshot
+# NEXT_PUBLIC_LLM_MODEL_COMPLEX=kimi-k2.5
+# NEXT_PUBLIC_LLM_MODEL_SIMPLE=moonshot-v1-32k
+
+# ── GLM / Zhipu AI (Z.AI) ─────────────────────────────────────────────────────
+# Docs: https://docs.z.ai/guides/overview/pricing
+# International endpoint: api.z.ai/v1 | China endpoint: open.bigmodel.cn/api/paas/v4
+# glm-4.7-flash is FREE — useful as the simple/orchestrator model
+#
+# LLM_PROVIDER=z-ai
+# LLM_BASE_URL=https://api.z.ai/v1
+# LLM_API_KEY=your-zhipu-key
+# LLM_MODEL_COMPLEX=glm-4.7          # $0.60/$2.20 per M
+# LLM_MODEL_SIMPLE=glm-4.5-air       # $0.20/$1.10 per M (or glm-4.7-flash for free)
+# INPUT_COST_PER_M_COMPLEX=0.60
+# OUTPUT_COST_PER_M_COMPLEX=2.20
+# INPUT_COST_PER_M_SIMPLE=0.20
+# OUTPUT_COST_PER_M_SIMPLE=1.10
+# LLM_CONTEXT_WINDOW=128000
+# LLM_MAX_TOKENS=32000
+# NEXT_PUBLIC_LLM_PROVIDER=z-ai
+# NEXT_PUBLIC_LLM_MODEL_COMPLEX=glm-4.7
+# NEXT_PUBLIC_LLM_MODEL_SIMPLE=glm-4.5-air
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -59,10 +59,11 @@ The quality of ClawOSS output is 100% determined by its prompts. When strategy c
 - Review prompts regularly for cross-file consistency
 
 ## Model
-- MiniMax M2.7 via direct API (`https://api.minimaxi.com/v1`)
-- 204k context window, 131k max output
-- Fallback: Kimi Code k2p5
-- API key env var: `MINIMAX_API_KEY`
+- Configured via env vars: `LLM_PROVIDER` / `LLM_MODEL_COMPLEX` / `LLM_MODEL_SIMPLE`
+- Complex tasks (sub-agents): `LLM_MODEL_COMPLEX` (default: `anthropic/claude-opus-4-6`)
+- Simple tasks (orchestrator/heartbeat): `LLM_MODEL_SIMPLE` (default: `anthropic/claude-sonnet-4-6`)
+- API key: `LLM_API_KEY`; endpoint: `LLM_BASE_URL`
+- See `docs/model-routing.md` for provider examples and budget config
 
 ## Common Commands
 ```bash

diff --git a/config/openclaw.json b/config/openclaw.json
@@ -24,11 +24,11 @@
         ]
       },
       "model": {
-        "primary": "minimax/MiniMax-M2.7",
-        "fallbacks": ["kimi-coding/k2p5"]
+        "primary": "__LLM_PROVIDER__/__LLM_MODEL_SIMPLE__",
+        "fallbacks": ["__LLM_PROVIDER__/__LLM_MODEL_COMPLEX__"]
       },
       "subagents": {
-        "model": "minimax/MiniMax-M2.7",
+        "model": "__LLM_PROVIDER__/__LLM_MODEL_COMPLEX__",
         "maxConcurrent": 14,
         "archiveAfterMinutes": 1440,
         "maxChildrenPerAgent": 15,
@@ -42,7 +42,7 @@
         "default": true,
         "name": "ClawOSS",
         "workspace": "__WORKSPACE_PATH__",
-        "model": "minimax/MiniMax-M2.7",
+        "model": "__LLM_PROVIDER__/__LLM_MODEL_SIMPLE__",
         "tools": {
           "profile": "coding"
         },
@@ -51,7 +51,7 @@
         },
         "heartbeat": {
           "every": "5m",
-          "model": "minimax/MiniMax-M2.7",
+          "model": "__LLM_PROVIDER__/__LLM_MODEL_SIMPLE__",
           "session": "main",
           "target": "none",
           "prompt": "You are autonomous. Read HEARTBEAT.md and execute EVERY step 0-7. Do NOT just reply HEARTBEAT_OK.\n\nGOAL: MERGED PRs. Not submitted PRs \u2014 MERGED. A PR that never gets reviewed is zero output.\n\nPRIORITY ORDER (follow this EVERY cycle):\n1. NEW PRs FIRST: Fill all 10 impl slots with new implementations. Discover issues, triage, spawn.\n2. FOLLOW-UPS ONLY AFTER all 10 slots are full or no new work exists.\n3. The PR Monitor (always-on) handles simple follow-ups automatically. Main agent focuses on NEW work.\n\nTRUST-BUILDING: Stop spray-and-pray. Focus on 10-15 repos where we build reputation. Return to repos that merged our PRs. A repo that merged your PR is 10x more likely to merge the next one.\n\nFINDING REPOS: Discover target repos using CRITERIA, not a hardcoded list. Stars >= 200, active development, merge velocity > 0, review rate > 50%, open PRs < 50. Search: 'topic:llm/agent/rag/ai' + 'label:bug/help-wanted'. Check trusted repos FIRST.\n\nPR TYPES: Bug fixes, docs fixes, typo fixes, test additions. NOT features or refactors.\nMIX: 60% easy wins (docs, typos, tests) + 40% bug fixes. A merged typo fix > an unreviewed bug fix.\n\nCLA REPOS: CLAs require manual signing. Do not attempt to sign CLAs.\n\nQUALITY: Understand codebase deeply. Trace root causes. Read .github/workflows/. Target 25-100 LOC (max 200). PR descriptions: write like a human developer, not an AI. No 'This PR addresses...', 'Upon investigation...', 'I identified...'. Jump straight to what's broken and what you did.\n\nSUPERSESSION CHECK: Before starting ANY issue, check: is it assigned? Does it have linked PRs from other contributors? If yes, SKIP \u2014 working on superseded issues wastes cycles and annoys maintainers.\n\nDEDUP: Check for spawned_pending and lock files before spawning. Multiple PRs per repo is fine. ALWAYS use 'BillionClaw' explicitly \u2014 NEVER use @me. New PRs get priority over follow-ups. No daily caps \u2014 ship as many quality PRs as possible.\n\nFOLLOW UP on open PRs \u2014 bump stale ones, respond to reviews, merge approved ones.\n\nSCORING: Use P(merge) 0-100 weighted formula for prioritization. P(merge) >= 30 to attempt, >= 60 for priority spawning. Sort work queue by P(merge) descending. 10 impl/followup slots + 4 always-on (scout, PR monitor scan, PR monitor deep, PR analyst) = 14 total. NEVER reply HEARTBEAT_OK — there is ALWAYS work to do. If queue is empty, run discovery. If discovery finds nothing, check follow-ups. If no follow-ups, expand to new niches. The agent must ALWAYS be working on something.\n\nWEB SEARCH: You have web_search and web_fetch tools (Perplexity). Use them aggressively — search before implementing, search when stuck, search to validate approaches. 5-10 searches per cycle minimum.\n\nIf work exists, DO IT. NEVER idle. NEVER reply HEARTBEAT_OK. There is ALWAYS something to do.",
@@ -63,20 +63,29 @@
   "models": {
     "mode": "merge",
     "providers": {
-      "minimax": {
-        "baseUrl": "https://api.minimaxi.com/v1",
-        "apiKey": "${MINIMAX_API_KEY}",
+      "__LLM_PROVIDER__": {
+        "baseUrl": "__LLM_BASE_URL__",
+        "apiKey": "${LLM_API_KEY}",
         "api": "openai-completions",
         "authHeader": true,
         "models": [
           {
-            "id": "MiniMax-M2.7",
-            "name": "MiniMax M2.7",
+            "id": "__LLM_MODEL_COMPLEX__",
+            "name": "Complex Model (Opus-tier)",
             "reasoning": true,
             "input": ["text"],
-            "cost": { "input": 0.5, "output": 1.5, "cacheRead": 0.125, "cacheWrite": 0.5 },
-            "contextWindow": 204800,
-            "maxTokens": 131072
+            "cost": { "input": __INPUT_COST_PER_M_COMPLEX__, "output": __OUTPUT_COST_PER_M_COMPLEX__ },
+            "contextWindow": __LLM_CONTEXT_WINDOW__,
+            "maxTokens": __LLM_MAX_TOKENS__
+          },
+          {
+            "id": "__LLM_MODEL_SIMPLE__",
+            "name": "Simple Model (Sonnet-tier)",
+            "reasoning": false,
+            "input": ["text"],
+            "cost": { "input": __INPUT_COST_PER_M_SIMPLE__, "output": __OUTPUT_COST_PER_M_SIMPLE__ },
+            "contextWindow": __LLM_CONTEXT_WINDOW__,
+            "maxTokens": __LLM_MAX_TOKENS__
           }
         ]
       }