diff --git a/.env.example b/.env.example
index b449c5a..6f84c32 100644
--- a/.env.example
+++ b/.env.example
@@ -4,32 +4,59 @@
 # === Required ===
 
 # GitHub Personal Access Token — needs public_repo scope at minimum
+# IMPORTANT: must be a classic token (ghp_*). Fine-grained tokens (github_pat_*)
+# cannot create PRs in other people's repos. See docs/quickstart.md.
 GITHUB_TOKEN=ghp_your-token-here
 
-# Choose models in provider/model format.
-# Examples:
-# - openai/gpt-4.1
-# - openai/gpt-4.1-mini
-# - openrouter/openai/gpt-4.1-mini
-# - deepseek/deepseek-chat
-# - minimax/MiniMax-M2.7
-# - kimi/moonshot-v1-32k
-CLAWOSS_PRIMARY_MODEL=openai/gpt-4.1
-CLAWOSS_FALLBACK_MODEL=
-CLAWOSS_SUBAGENT_MODEL=openai/gpt-4.1-mini
-CLAWOSS_HEARTBEAT_MODEL=openai/gpt-4.1
-CLAWOSS_AGENT_MODEL=openai/gpt-4.1
-
-# Set the API key(s) for whichever provider(s) your selected models use.
-OPENAI_API_KEY=sk-openai-your-key-here
-OPENROUTER_API_KEY=
-DEEPSEEK_API_KEY=
-MINIMAX_API_KEY=
-KIMI_API_KEY=
-
-# Optional custom OpenAI-compatible endpoint.
-CUSTOM_OPENAI_API_KEY=
-CUSTOM_OPENAI_BASE_URL=
+# === LLM Model Configuration ===
+# See docs/model-routing.md for full provider examples (OpenAI, DeepSeek, MiniMax, etc.)
+# See the "Provider Quick Reference" section at the bottom of this file.
+
+# Provider name — becomes the OpenClaw provider block key and model ID prefix
+LLM_PROVIDER=anthropic
+
+# OpenAI-compatible API endpoint for this provider
+LLM_BASE_URL=https://api.anthropic.com/v1
+
+# API key for the provider above
+LLM_API_KEY=sk-ant-your-key-here
+
+# Complex model (Opus-tier) — used by implementation sub-agents (code writing, debugging)
+LLM_MODEL_COMPLEX=claude-opus-4-6
+
+# Simple model (Sonnet-tier) — used by orchestrator/heartbeat (routing, file reads)
+LLM_MODEL_SIMPLE=claude-sonnet-4-6
+
+# Cost per million tokens in USD (used for dashboard spend display).
+# Per-model pricing — complex (Opus-tier) and simple (Sonnet-tier) can differ.
+# If per-model vars are not set, INPUT_COST_PER_M / OUTPUT_COST_PER_M are used as fallback.
+INPUT_COST_PER_M_COMPLEX=5.0     # Claude Opus 4.6 input ($5/M)
+OUTPUT_COST_PER_M_COMPLEX=25.0   # Claude Opus 4.6 output ($25/M)
+INPUT_COST_PER_M_SIMPLE=3.0      # Claude Sonnet 4.6 input ($3/M)
+OUTPUT_COST_PER_M_SIMPLE=15.0    # Claude Sonnet 4.6 output ($15/M)
+# Fallback if per-model vars are absent (set to your average expected price)
+INPUT_COST_PER_M=3.0
+OUTPUT_COST_PER_M=15.0
+
+# Context window and max output tokens (model-specific)
+LLM_CONTEXT_WINDOW=1000000
+LLM_MAX_TOKENS=32000
+
+# Public vars exposed to dashboard browser bundle (mirrors LLM_* above)
+NEXT_PUBLIC_LLM_PROVIDER=anthropic
+NEXT_PUBLIC_LLM_MODEL_COMPLEX=claude-opus-4-6
+NEXT_PUBLIC_LLM_MODEL_SIMPLE=claude-sonnet-4-6
+
+# === Budget Control ===
+# Total cumulative spend cap in USD. Agent pauses when reached. 0 = unlimited.
+BUDGET_USD_TOTAL=20.0
+
+# Per-model token caps (JSON map). Keys are BARE model names — matched across
+# all providers (e.g. `glm-4.6` covers `z-ai/glm-4.6`, `openrouter/glm-4.6`, etc).
+# Value is total tokens (input + output). Missing key or value 0 = unlimited.
+# When a model exceeds its cap, the agent stops using it AND a red banner shows
+# at the top of the dashboard. See docs/model-routing.md.
+# MODEL_TOKEN_BUDGETS={"glm-4.6":20000000,"deepseek-chat":50000000,"claude-opus-4-6":10000000}
 
 # === Optional ===
 
@@ -42,8 +69,146 @@ DASHBOARD_URL=https://clawoss-dashboard.vercel.app
 CLAW_API_KEY=your-shared-secret-here
 
 # Override only if your clone lives outside the default detected path
-CLAWOSS_ROOT=/absolute/path/to/ClawOSS
+# CLAWOSS_ROOT=/absolute/path/to/ClawOSS
 
-# Structured event logging for decision/outcome/reflection pipelines
+# Structured event logging for decision/outcome/reflection pipelines (alpha)
 CLAWOSS_RECORD_DECISIONS=1
 CLAWOSS_RECORD_OUTCOMES=1
+
+# === Legacy / Alpha compatibility (optional overrides) ===
+# If set, these take priority over LLM_* above. Useful during the alpha
+# transition when you want to pin a specific full model ID.
+# CLAWOSS_PRIMARY_MODEL=anthropic/claude-opus-4-6
+# CLAWOSS_SUBAGENT_MODEL=anthropic/claude-opus-4-6
+# CLAWOSS_HEARTBEAT_MODEL=anthropic/claude-sonnet-4-6
+# CLAWOSS_AGENT_MODEL=anthropic/claude-sonnet-4-6
+# CLAWOSS_FALLBACK_MODEL=
+
+# Legacy per-provider API keys (no longer required — use LLM_API_KEY above).
+# Kept for historical alpha deployments.
+# KIMI_API_KEY=sk-kimi-your-key-here
+# MINIMAX_API_KEY=your-minimax-key-here
+
+# =============================================================================
+# Provider Quick Reference — copy the block you want and replace the defaults
+# Prices: April 2026. Verify at provider docs before setting budget.
+# =============================================================================
+
+# ── Google Gemini ─────────────────────────────────────────────────────────────
+# Docs: https://ai.google.dev/gemini-api/docs/pricing
+# Uses OpenAI-compatible endpoint. 2.5 Pro >200k context doubles price.
+# All models have free tier with limited requests.
+#
+# LLM_PROVIDER=google
+# LLM_BASE_URL=https://generativelanguage.googleapis.com/v1beta/openai
+# LLM_API_KEY=your-gemini-key
+# LLM_MODEL_COMPLEX=gemini-2.5-pro
+# LLM_MODEL_SIMPLE=gemini-2.5-flash
+# INPUT_COST_PER_M_COMPLEX=1.25
+# OUTPUT_COST_PER_M_COMPLEX=10.0
+# INPUT_COST_PER_M_SIMPLE=0.30
+# OUTPUT_COST_PER_M_SIMPLE=2.50
+# LLM_CONTEXT_WINDOW=1000000
+# LLM_MAX_TOKENS=65536
+# NEXT_PUBLIC_LLM_PROVIDER=google
+# NEXT_PUBLIC_LLM_MODEL_COMPLEX=gemini-2.5-pro
+# NEXT_PUBLIC_LLM_MODEL_SIMPLE=gemini-2.5-flash
+
+# ── Mistral AI ───────────────────────────────────────────────────────────────
+# Docs: https://mistral.ai/pricing
+# Large 3 for complex, Small 3.1 for orchestration. Nemo ($0.02/M) is cheapest.
+#
+# LLM_PROVIDER=mistral
+# LLM_BASE_URL=https://api.mistral.ai/v1
+# LLM_API_KEY=your-mistral-key
+# LLM_MODEL_COMPLEX=mistral-large-3
+# LLM_MODEL_SIMPLE=mistral-small-3.1
+# INPUT_COST_PER_M_COMPLEX=2.0
+# OUTPUT_COST_PER_M_COMPLEX=6.0
+# INPUT_COST_PER_M_SIMPLE=0.20
+# OUTPUT_COST_PER_M_SIMPLE=0.60
+# LLM_CONTEXT_WINDOW=128000
+# LLM_MAX_TOKENS=32000
+# NEXT_PUBLIC_LLM_PROVIDER=mistral
+# NEXT_PUBLIC_LLM_MODEL_COMPLEX=mistral-large-3
+# NEXT_PUBLIC_LLM_MODEL_SIMPLE=mistral-small-3.1
+
+# ── DeepSeek ──────────────────────────────────────────────────────────────────
+# Docs: https://api-docs.deepseek.com/quick_start/pricing
+# deepseek-chat = V3.2 non-thinking | deepseek-reasoner = V3.2 thinking mode
+# Same price, reasoner supports 32K output vs 8K.
+# Cache hit: $0.028/M input (90% off)
+#
+# LLM_PROVIDER=deepseek
+# LLM_BASE_URL=https://api.deepseek.com/v1
+# LLM_API_KEY=sk-your-deepseek-key
+# LLM_MODEL_COMPLEX=deepseek-reasoner     # thinking mode for complex tasks
+# LLM_MODEL_SIMPLE=deepseek-chat          # non-thinking for orchestration
+# INPUT_COST_PER_M_COMPLEX=0.28
+# OUTPUT_COST_PER_M_COMPLEX=0.42
+# INPUT_COST_PER_M_SIMPLE=0.28
+# OUTPUT_COST_PER_M_SIMPLE=0.42
+# LLM_CONTEXT_WINDOW=128000
+# LLM_MAX_TOKENS=32000
+# NEXT_PUBLIC_LLM_PROVIDER=deepseek
+# NEXT_PUBLIC_LLM_MODEL_COMPLEX=deepseek-reasoner
+# NEXT_PUBLIC_LLM_MODEL_SIMPLE=deepseek-chat
+
+# ── MiniMax ───────────────────────────────────────────────────────────────────
+# Docs: https://platform.minimax.io/docs/guides/pricing-paygo
+# highspeed variants are 2× price but lower latency
+#
+# LLM_PROVIDER=minimax
+# LLM_BASE_URL=https://api.minimaxi.com/v1
+# LLM_API_KEY=your-minimax-key
+# LLM_MODEL_COMPLEX=MiniMax-M2.7
+# LLM_MODEL_SIMPLE=MiniMax-M2.5
+# INPUT_COST_PER_M_COMPLEX=0.30
+# OUTPUT_COST_PER_M_COMPLEX=1.20
+# INPUT_COST_PER_M_SIMPLE=0.30
+# OUTPUT_COST_PER_M_SIMPLE=1.20
+# LLM_CONTEXT_WINDOW=204800
+# LLM_MAX_TOKENS=131072
+# NEXT_PUBLIC_LLM_PROVIDER=minimax
+# NEXT_PUBLIC_LLM_MODEL_COMPLEX=MiniMax-M2.7
+# NEXT_PUBLIC_LLM_MODEL_SIMPLE=MiniMax-M2.5
+
+# ── Kimi / Moonshot ───────────────────────────────────────────────────────────
+# Docs: https://platform.kimi.ai/docs/pricing/chat
+# kimi-k2.5 = latest coding model | moonshot-v1-32k = general purpose
+# Cache hit: $0.10/M input (vs $0.60/M cache miss)
+#
+# LLM_PROVIDER=moonshot
+# LLM_BASE_URL=https://api.moonshot.cn/v1
+# LLM_API_KEY=sk-your-moonshot-key
+# LLM_MODEL_COMPLEX=kimi-k2.5
+# LLM_MODEL_SIMPLE=moonshot-v1-32k
+# INPUT_COST_PER_M_COMPLEX=0.60
+# OUTPUT_COST_PER_M_COMPLEX=3.00
+# INPUT_COST_PER_M_SIMPLE=3.29
+# OUTPUT_COST_PER_M_SIMPLE=3.29
+# LLM_CONTEXT_WINDOW=131072
+# LLM_MAX_TOKENS=32000
+# NEXT_PUBLIC_LLM_PROVIDER=moonshot
+# NEXT_PUBLIC_LLM_MODEL_COMPLEX=kimi-k2.5
+# NEXT_PUBLIC_LLM_MODEL_SIMPLE=moonshot-v1-32k
+
+# ── GLM / Zhipu AI (Z.AI) ─────────────────────────────────────────────────────
+# Docs: https://docs.z.ai/guides/overview/pricing
+# International endpoint: api.z.ai/v1 | China endpoint: open.bigmodel.cn/api/paas/v4
+# glm-4.7-flash is FREE — useful as the simple/orchestrator model
+#
+# LLM_PROVIDER=z-ai
+# LLM_BASE_URL=https://api.z.ai/v1
+# LLM_API_KEY=your-zhipu-key
+# LLM_MODEL_COMPLEX=glm-4.7          # $0.60/$2.20 per M
+# LLM_MODEL_SIMPLE=glm-4.5-air       # $0.20/$1.10 per M (or glm-4.7-flash for free)
+# INPUT_COST_PER_M_COMPLEX=0.60
+# OUTPUT_COST_PER_M_COMPLEX=2.20
+# INPUT_COST_PER_M_SIMPLE=0.20
+# OUTPUT_COST_PER_M_SIMPLE=1.10
+# LLM_CONTEXT_WINDOW=128000
+# LLM_MAX_TOKENS=32000
+# NEXT_PUBLIC_LLM_PROVIDER=z-ai
+# NEXT_PUBLIC_LLM_MODEL_COMPLEX=glm-4.7
+# NEXT_PUBLIC_LLM_MODEL_SIMPLE=glm-4.5-air
diff --git a/.github/workflows/smoke.yml b/.github/workflows/smoke.yml
new file mode 100644
index 0000000..8820ddf
--- /dev/null
+++ b/.github/workflows/smoke.yml
@@ -0,0 +1,87 @@
+name: Smoke
+
+# Minimal pre-merge gate for Phase-1 demo deployment. Runs on every PR to
+# main or alpha/**. Kept intentionally small — if this goes red, the PR
+# shouldn't merge. Heavier tests live in validate.yml.
+
+on:
+  pull_request:
+    branches: [main, "alpha/**"]
+  push:
+    branches: [main, "alpha/**"]
+
+jobs:
+  bash-scripts:
+    name: Bash script syntax
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Syntax-check every shell script
+        run: |
+          set -e
+          fail=0
+          while IFS= read -r -d '' f; do
+            if ! bash -n "$f"; then
+              echo "::error file=$f::bash -n failed"
+              fail=1
+            fi
+          done < <(find scripts deploy -type f -name '*.sh' -print0)
+          exit "$fail"
+
+  env-example-parses:
+    name: .env.example is a valid env file
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Source .env.example
+        run: |
+          # .env.example ships provider snippets commented out — sourcing it
+          # should succeed and should set the required LLM_* vars. This catches
+          # accidental stray "$(...)" expansions or unbalanced quotes before
+          # they reach a user's .env.
+          set -a
+          # shellcheck disable=SC1091
+          . ./.env.example
+          set +a
+          for v in LLM_PROVIDER LLM_BASE_URL LLM_MODEL_COMPLEX LLM_MODEL_SIMPLE; do
+            if [ -z "${!v:-}" ]; then
+              echo "::error::.env.example did not set $v"
+              exit 1
+            fi
+          done
+          echo "LLM_PROVIDER=$LLM_PROVIDER LLM_MODEL_COMPLEX=$LLM_MODEL_COMPLEX LLM_MODEL_SIMPLE=$LLM_MODEL_SIMPLE"
+
+  openclaw-json-substitutes:
+    name: openclaw.json template substitution
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: "20"
+
+      - name: Run validate-config.mjs
+        run: node scripts/validate-config.mjs || (echo "::error::validate-config.mjs failed" && exit 1)
+
+  docker-build:
+    name: Agent Docker image builds
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Build agent image (no push)
+        # The image pulls openclaw from npm and a few apt packages. We only
+        # care that the build graph succeeds — we do not run the container
+        # (that requires real LLM / GitHub credentials).
+        run: |
+          docker build \
+            -f deploy/docker/Dockerfile \
+            -t clawoss-agent:ci \
+            .
+
+      - name: Docker compose config lint
+        run: |
+          docker compose -f deploy/docker/docker-compose.yml config > /dev/null
diff --git a/.github/workflows/validate.yml b/.github/workflows/validate.yml
index ff79afb..06ce2e5 100644
--- a/.github/workflows/validate.yml
+++ b/.github/workflows/validate.yml
@@ -2,9 +2,9 @@ name: Validate
 
 on:
   push:
-    branches: [main]
+    branches: [main, "alpha/**"]
   pull_request:
-    branches: [main]
+    branches: [main, "alpha/**"]
 
 jobs:
   validate-config:
@@ -17,8 +17,12 @@ jobs:
         with:
           node-version: "20"
 
-      - name: Validate openclaw.json
-        run: node -e "JSON.parse(require('fs').readFileSync('config/openclaw.json', 'utf8'))"
+      # openclaw.json uses __PLACEHOLDER__ tokens that are substituted by
+      # restart.sh / deploy/docker/entrypoint.sh. validate-config.mjs below
+      # runs the full post-substitution parse. This inline check just sanity-
+      # asserts the file exists and isn't empty.
+      - name: Ensure openclaw.json is present
+        run: test -s config/openclaw.json
 
       - name: Validate cron-jobs.json
         run: node -e "JSON.parse(require('fs').readFileSync('config/cron-jobs.json', 'utf8'))"
diff --git a/CLAUDE.md b/CLAUDE.md
index 526fe26..2e29505 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -59,10 +59,11 @@ The quality of ClawOSS output is 100% determined by its prompts. When strategy c
 - Review prompts regularly for cross-file consistency
 
 ## Model
-- MiniMax M2.7 via direct API (`https://api.minimaxi.com/v1`)
-- 204k context window, 131k max output
-- Fallback: Kimi Code k2p5
-- API key env var: `MINIMAX_API_KEY`
+- Configured via env vars: `LLM_PROVIDER` / `LLM_MODEL_COMPLEX` / `LLM_MODEL_SIMPLE`
+- Complex tasks (sub-agents): `LLM_MODEL_COMPLEX` (default: `anthropic/claude-opus-4-6`)
+- Simple tasks (orchestrator/heartbeat): `LLM_MODEL_SIMPLE` (default: `anthropic/claude-sonnet-4-6`)
+- API key: `LLM_API_KEY`; endpoint: `LLM_BASE_URL`
+- See `docs/model-routing.md` for provider examples and budget config
 
 ## Common Commands
 ```bash
diff --git a/config/openclaw.json b/config/openclaw.json
index a589ebb..4cf365c 100644
--- a/config/openclaw.json
+++ b/config/openclaw.json
@@ -24,12 +24,12 @@
         ]
       },
       "model": {
-        "primary": "minimax/MiniMax-M2.7",
-        "fallbacks": ["kimi-coding/k2p5"]
+        "primary": "__LLM_PROVIDER__/__LLM_MODEL_SIMPLE__",
+        "fallbacks": ["__LLM_PROVIDER__/__LLM_MODEL_COMPLEX__"]
       },
       "subagents": {
-        "model": "minimax/MiniMax-M2.7",
-        "maxConcurrent": 1,
+        "model": "__LLM_PROVIDER__/__LLM_MODEL_COMPLEX__",
+        "maxConcurrent": 14,
         "archiveAfterMinutes": 1440,
         "maxChildrenPerAgent": 15,
         "maxSpawnDepth": 2,
@@ -42,7 +42,7 @@
         "default": true,
         "name": "ClawOSS",
         "workspace": "__WORKSPACE_PATH__",
-        "model": "minimax/MiniMax-M2.7",
+        "model": "__LLM_PROVIDER__/__LLM_MODEL_SIMPLE__",
         "tools": {
           "profile": "coding"
         },
@@ -50,7 +50,8 @@
           "mode": "off"
         },
         "heartbeat": {
-          "model": "minimax/MiniMax-M2.7",
+          "every": "5m",
+          "model": "__LLM_PROVIDER__/__LLM_MODEL_SIMPLE__",
           "session": "main",
           "target": "none",
           "prompt": "External-controller mode. Read /home/ubuntu/projects/codex/ClawOSS/workspace/HEARTBEAT.md and follow the current prompt goal and output contract.",
@@ -62,20 +63,29 @@
   "models": {
     "mode": "merge",
     "providers": {
-      "minimax": {
-        "baseUrl": "https://api.minimaxi.com/v1",
-        "apiKey": "${MINIMAX_API_KEY}",
+      "__LLM_PROVIDER__": {
+        "baseUrl": "__LLM_BASE_URL__",
+        "apiKey": "${LLM_API_KEY}",
         "api": "openai-completions",
         "authHeader": true,
         "models": [
           {
-            "id": "MiniMax-M2.7",
-            "name": "MiniMax M2.7",
+            "id": "__LLM_MODEL_COMPLEX__",
+            "name": "Complex Model (Opus-tier)",
             "reasoning": true,
             "input": ["text"],
-            "cost": { "input": 0.5, "output": 1.5, "cacheRead": 0.125, "cacheWrite": 0.5 },
-            "contextWindow": 204800,
-            "maxTokens": 131072
+            "cost": { "input": __INPUT_COST_PER_M_COMPLEX__, "output": __OUTPUT_COST_PER_M_COMPLEX__ },
+            "contextWindow": __LLM_CONTEXT_WINDOW__,
+            "maxTokens": __LLM_MAX_TOKENS__
+          },
+          {
+            "id": "__LLM_MODEL_SIMPLE__",
+            "name": "Simple Model (Sonnet-tier)",
+            "reasoning": false,
+            "input": ["text"],
+            "cost": { "input": __INPUT_COST_PER_M_SIMPLE__, "output": __OUTPUT_COST_PER_M_SIMPLE__ },
+            "contextWindow": __LLM_CONTEXT_WINDOW__,
+            "maxTokens": __LLM_MAX_TOKENS__
           }
         ]
       }
diff --git a/dashboard/app/api/agent/health-check/route.ts b/dashboard/app/api/agent/health-check/route.ts
index 1618de5..fdcf10c 100644
--- a/dashboard/app/api/agent/health-check/route.ts
+++ b/dashboard/app/api/agent/health-check/route.ts
@@ -2,8 +2,31 @@ export const dynamic = "force-dynamic";
 
 import { NextResponse } from "next/server";
 import { db, ensureDb } from "@/lib/db";
-import { pullRequests, prReviews, agentLogs } from "@/lib/schema";
+import { pullRequests, prReviews, agentLogs, metricsTokens, settings } from "@/lib/schema";
 import { eq, sql, gte } from "drizzle-orm";
+import { bareModelName } from "@/lib/cost-models";
+
+/** Safe JSON.parse — returns undefined on failure, never throws. */
+function safeParseJson<T = unknown>(raw: string | undefined | null): T | undefined {
+  if (!raw) return undefined;
+  try { return JSON.parse(raw) as T; } catch { return undefined; }
+}
+
+/** Coerce raw budget config into a bare-name-keyed record of positive caps. */
+function normalizeModelBudgets(
+  raw: unknown
+): Record<string, number> {
+  if (!raw || typeof raw !== "object") return {};
+  const out: Record<string, number> = {};
+  for (const [k, v] of Object.entries(raw as Record<string, unknown>)) {
+    const cap = typeof v === "number" ? v : Number(v);
+    if (Number.isFinite(cap) && cap > 0) {
+      const bare = bareModelName(k);
+      if (bare) out[bare] = cap;
+    }
+  }
+  return out;
+}
 
 /**
  * Hard blocklist — repos where submitting PRs risks bans or reputation damage.
@@ -59,6 +82,64 @@ export async function GET() {
     const merged = mergedResult[0]?.count || 0;
     const open = openResult[0]?.count || 0;
 
+    // Budget check — cumulative spend vs totalBudgetUsd setting
+    let budgetExhausted = false;
+    let totalCostUsd = 0;
+    let totalBudgetUsd = 0;
+    // Per-model token budgets (bare-name keyed; matches across providers).
+    const modelUsage: Record<string, number> = {};
+    let modelCaps: Record<string, number> = {};
+    const exhaustedModels: { model: string; used: number; cap: number }[] = [];
+    try {
+      const costResult = await db
+        .select({ total: sql<number>`coalesce(sum(${metricsTokens.costUsd}), 0)` })
+        .from(metricsTokens);
+      totalCostUsd = costResult[0]?.total || 0;
+
+      // Budget from settings table (dashboard-editable), fallback to env var
+      const settingsRow = await db.query.settings.findFirst({
+        where: eq(settings.key, "dashboard_settings"),
+      });
+      const settingsVal = settingsRow?.value as {
+        totalBudgetUsd?: number;
+        modelTokenBudgets?: Record<string, number>;
+      } | null;
+      totalBudgetUsd =
+        settingsVal?.totalBudgetUsd ??
+        parseFloat(process.env.BUDGET_USD_TOTAL || "0");
+
+      budgetExhausted = totalBudgetUsd > 0 && totalCostUsd > totalBudgetUsd;
+
+      // Per-model token aggregation — group by bare model name so the same
+      // model served by different providers is merged (e.g. z-ai/glm-4.6
+      // and openrouter/glm-4.6 both accumulate into "glm-4.6").
+      const rawPerModel = await db
+        .select({
+          model: metricsTokens.model,
+          tokens: sql<number>`coalesce(sum(${metricsTokens.inputTokens} + ${metricsTokens.outputTokens}), 0)`,
+        })
+        .from(metricsTokens)
+        .groupBy(metricsTokens.model);
+
+      for (const row of rawPerModel) {
+        const bare = bareModelName(row.model ?? "");
+        if (!bare) continue;
+        modelUsage[bare] = (modelUsage[bare] ?? 0) + Number(row.tokens ?? 0);
+      }
+
+      // Resolve caps: settings table first, env var fallback.
+      modelCaps = normalizeModelBudgets(
+        settingsVal?.modelTokenBudgets ?? safeParseJson(process.env.MODEL_TOKEN_BUDGETS)
+      );
+
+      for (const [bare, cap] of Object.entries(modelCaps)) {
+        const used = modelUsage[bare] ?? 0;
+        if (used >= cap) exhaustedModels.push({ model: bare, used, cap });
+      }
+    } catch {
+      // non-critical — don't block health check
+    }
+
     // Today's PRs
     const todayResult = await db
       .select({ count: sql<number>`count(*)` })
@@ -166,6 +247,23 @@ export async function GET() {
     // Quick directives
     const directives: string[] = [];
 
+    if (budgetExhausted) {
+      directives.unshift(
+        `BUDGET EXHAUSTED: Spent $${totalCostUsd.toFixed(2)} of $${totalBudgetUsd.toFixed(2)} total budget. ` +
+        `STOP all new work immediately — do NOT spawn new implementations or submit PRs. ` +
+        `To resume: raise totalBudgetUsd in dashboard Settings or increase BUDGET_USD_TOTAL env var and restart.`
+      );
+    }
+
+    for (const m of exhaustedModels) {
+      directives.unshift(
+        `MODEL TOKEN BUDGET EXHAUSTED: ${m.model} used ${m.used.toLocaleString()}/${m.cap.toLocaleString()} tokens. ` +
+        `STOP using this model across ALL providers (matched by bare model name). ` +
+        `Do NOT spawn sub-agents that route to it. ` +
+        `To resume: raise modelTokenBudgets["${m.model}"] in dashboard Settings or MODEL_TOKEN_BUDGETS env var.`
+      );
+    }
+
     if (approvedPRs.length > 0) {
       directives.unshift("MERGE NOW: " + approvedPRs.length + " approved PR(s) ready to merge: " + approvedPRs.map((pr) => pr.repo + "#" + pr.number).join(", ") + ". Run `gh pr merge --squash` if CI passes, or comment asking maintainer to trigger CI.");
     }
@@ -204,6 +302,17 @@ export async function GET() {
 
     return NextResponse.json({
       healthy: directives.length === 0,
+      budget: {
+        totalCostUsd: Math.round(totalCostUsd * 10000) / 10000,
+        totalBudgetUsd,
+        remainingUsd: totalBudgetUsd > 0 ? Math.max(0, totalBudgetUsd - totalCostUsd) : null,
+        exhausted: budgetExhausted,
+      },
+      modelBudgets: {
+        exhausted: exhaustedModels,
+        usage: modelUsage,
+        caps: modelCaps,
+      },
       stats: {
         total,
         merged,
diff --git a/dashboard/app/api/agent/llm-health/route.ts b/dashboard/app/api/agent/llm-health/route.ts
new file mode 100644
index 0000000..fe8589e
--- /dev/null
+++ b/dashboard/app/api/agent/llm-health/route.ts
@@ -0,0 +1,128 @@
+export const dynamic = "force-dynamic";
+
+import { NextResponse } from "next/server";
+import { promises as fs } from "node:fs";
+import path from "node:path";
+import os from "node:os";
+
+/**
+ * Scans the latest openclaw session jsonl for the most recent LLM call
+ * outcome. Separates "agent alive but LLM is erroring" (e.g. upstream 401 /
+ * quota exhausted) from the existing heartbeat-based connection state, which
+ * only reflects whether the dashboard-reporter hook has fired — and the hook
+ * only fires on `agent_end`, which never happens if the very first LLM call
+ * fails.
+ *
+ * Returns ok=true only when the latest LLM call in the session succeeded.
+ */
+export async function GET() {
+  const sessionsRoot =
+    process.env.OPENCLAW_SESSIONS_DIR ||
+    path.join(os.homedir(), ".openclaw", "agents", "clawoss", "sessions");
+
+  try {
+    const entries = await fs.readdir(sessionsRoot, { withFileTypes: true });
+    const jsonlFiles = entries
+      .filter((e) => e.isFile() && e.name.endsWith(".jsonl"))
+      .map((e) => path.join(sessionsRoot, e.name));
+
+    if (jsonlFiles.length === 0) {
+      return NextResponse.json({
+        state: "unknown",
+        message: "No session file found",
+        lastCallAt: null,
+        lastError: null,
+        lastErrorAt: null,
+      });
+    }
+
+    // Pick the most recently modified session file
+    const stats = await Promise.all(
+      jsonlFiles.map(async (f) => ({ f, mtime: (await fs.stat(f)).mtimeMs }))
+    );
+    stats.sort((a, b) => b.mtime - a.mtime);
+    const latest = stats[0].f;
+
+    const content = await fs.readFile(latest, "utf8");
+    const lines = content.split("\n").filter(Boolean);
+
+    // Walk backwards — the first assistant message we hit decides state.
+    let lastCallAt: string | null = null;
+    let lastError: string | null = null;
+    let lastErrorAt: string | null = null;
+    let lastSuccessAt: string | null = null;
+    let state: "ok" | "errored" | "unknown" = "unknown";
+
+    for (let i = lines.length - 1; i >= 0; i--) {
+      let evt: Record<string, unknown>;
+      try {
+        evt = JSON.parse(lines[i]);
+      } catch {
+        continue;
+      }
+      const msg = (evt as { message?: { role?: string; errorMessage?: string; usage?: { totalTokens?: number } } }).message;
+      if (!msg || msg.role !== "assistant") continue;
+
+      const ts = (evt as { timestamp?: string }).timestamp ?? null;
+
+      if (msg.errorMessage) {
+        if (!lastError) {
+          lastError = msg.errorMessage;
+          lastErrorAt = ts;
+        }
+        // Keep walking — maybe an earlier successful call exists
+        continue;
+      }
+
+      if ((msg.usage?.totalTokens ?? 0) > 0) {
+        lastSuccessAt = ts;
+        break;
+      }
+    }
+
+    // Decide state from the TAIL of the file (most recent assistant event)
+    // re-walk once more from the end until the first assistant we find.
+    for (let i = lines.length - 1; i >= 0; i--) {
+      let evt: Record<string, unknown>;
+      try { evt = JSON.parse(lines[i]); } catch { continue; }
+      const msg = (evt as { message?: { role?: string; errorMessage?: string; usage?: { totalTokens?: number } } }).message;
+      if (!msg || msg.role !== "assistant") continue;
+      const ts = (evt as { timestamp?: string }).timestamp ?? null;
+      lastCallAt = ts;
+      if (msg.errorMessage) {
+        state = "errored";
+      } else if ((msg.usage?.totalTokens ?? 0) > 0) {
+        state = "ok";
+      } else {
+        // assistant with no error and no usage (e.g. toolUse-only) — treat as ok
+        state = "ok";
+      }
+      break;
+    }
+
+    return NextResponse.json({
+      state,
+      session: path.basename(latest),
+      lastCallAt,
+      lastSuccessAt,
+      lastError,
+      lastErrorAt,
+      message:
+        state === "errored"
+          ? `LLM call failing: ${lastError}`
+          : state === "ok"
+            ? "LLM calls succeeding"
+            : "No LLM calls recorded yet",
+    });
+  } catch (error) {
+    // Sessions dir missing (dashboard running outside openclaw container) —
+    // report unknown rather than 500 so the existing UI keeps working.
+    return NextResponse.json({
+      state: "unknown",
+      message: `Sessions unavailable: ${String((error as Error).message || error)}`,
+      lastCallAt: null,
+      lastError: null,
+      lastErrorAt: null,
+    });
+  }
+}
diff --git a/dashboard/app/api/connection-status/route.ts b/dashboard/app/api/connection-status/route.ts
index db09ba2..acb5c0a 100644
--- a/dashboard/app/api/connection-status/route.ts
+++ b/dashboard/app/api/connection-status/route.ts
@@ -75,6 +75,41 @@ export async function GET() {
       ? lastMetric[0].timestamp.getTime() > oneHourAgo.getTime()
       : false;
 
+    // LLM health — probes the openclaw session jsonl directly so we can
+    // surface "agent alive but LLM is 4xx/5xx" even when no heartbeat has
+    // been ingested yet (the hook only fires on agent_end, which never
+    // happens if the very first LLM call fails).
+    let llm: {
+      state: "ok" | "errored" | "unknown";
+      message: string;
+      lastError: string | null;
+      lastErrorAt: string | null;
+      lastSuccessAt: string | null;
+    } = {
+      state: "unknown",
+      message: "LLM health probe unavailable",
+      lastError: null,
+      lastErrorAt: null,
+      lastSuccessAt: null,
+    };
+    try {
+      const origin = process.env.NEXT_PUBLIC_DASHBOARD_URL ||
+        `http://127.0.0.1:${process.env.PORT || 3000}`;
+      const res = await fetch(`${origin}/api/agent/llm-health`, { cache: "no-store" });
+      if (res.ok) {
+        const body = await res.json();
+        llm = {
+          state: body.state,
+          message: body.message,
+          lastError: body.lastError ?? null,
+          lastErrorAt: body.lastErrorAt ?? null,
+          lastSuccessAt: body.lastSuccessAt ?? null,
+        };
+      }
+    } catch {
+      // non-critical
+    }
+
     const response = {
       connection: {
         state: connectionState,
@@ -89,6 +124,7 @@ export async function GET() {
         errorsLastHour: recentErrors[0]?.count || 0,
         lastMetricAt: lastMetric[0]?.timestamp || null,
       },
+      llm,
       hasAnyData: hasHeartbeats || hasMetrics,
       runtime,
     };
diff --git a/dashboard/app/api/metrics/overview/route.ts b/dashboard/app/api/metrics/overview/route.ts
index 8f40138..0e70b7e 100644
--- a/dashboard/app/api/metrics/overview/route.ts
+++ b/dashboard/app/api/metrics/overview/route.ts
@@ -129,9 +129,16 @@ export async function GET() {
       // Estimate 70/30 input/output split for fallback
       inputTokensToday = Math.round(tokensUsedToday * 0.7);
       outputTokensToday = tokensUsedToday - inputTokensToday;
-      // Estimate cost using Kimi K2.5 average ($1.8/M tokens)
+      // Estimate cost using env-configured pricing (INPUT_COST_PER_M / OUTPUT_COST_PER_M)
       if (tokensUsedToday > 0 && costToday === 0) {
-        costToday = tokensUsedToday * (1.8 / 1_000_000);
+        const inputCostComplex = parseFloat(process.env.INPUT_COST_PER_M_COMPLEX || process.env.INPUT_COST_PER_M || "3.0");
+        const outputCostComplex = parseFloat(process.env.OUTPUT_COST_PER_M_COMPLEX || process.env.OUTPUT_COST_PER_M || "15.0");
+        const inputCostSimple = parseFloat(process.env.INPUT_COST_PER_M_SIMPLE || process.env.INPUT_COST_PER_M || "3.0");
+        const outputCostSimple = parseFloat(process.env.OUTPUT_COST_PER_M_SIMPLE || process.env.OUTPUT_COST_PER_M || "15.0");
+        // Weighted average: ~40% complex (sub-agents) + 60% simple (orchestrator)
+        const avgInputCostPerM = inputCostComplex * 0.4 + inputCostSimple * 0.6;
+        const avgOutputCostPerM = outputCostComplex * 0.4 + outputCostSimple * 0.6;
+        costToday = (inputTokensToday * avgInputCostPerM + outputTokensToday * avgOutputCostPerM) / 1_000_000;
       }
     }
 
diff --git a/dashboard/app/api/settings/route.ts b/dashboard/app/api/settings/route.ts
index a345dc9..949351e 100644
--- a/dashboard/app/api/settings/route.ts
+++ b/dashboard/app/api/settings/route.ts
@@ -5,6 +5,7 @@ import { db, ensureDb } from "@/lib/db";
 import { settings } from "@/lib/schema";
 import { eq } from "drizzle-orm";
 import type { DashboardSettings } from "@/lib/types";
+import { bareModelName } from "@/lib/cost-models";
 
 const DEFAULT_SETTINGS: DashboardSettings = {
   targetRepos: [],
@@ -21,8 +22,25 @@ const DEFAULT_SETTINGS: DashboardSettings = {
     onAgentOffline: true,
   },
   dailyBudgetUsd: 50,
+  totalBudgetUsd: 0, // 0 = unlimited; raise this in dashboard to cap spend
+  modelTokenBudgets: {}, // empty = no per-model caps; bare-name keys
+  modelComplex: process.env.LLM_MODEL_COMPLEX || "claude-opus-4-6",
+  modelSimple: process.env.LLM_MODEL_SIMPLE || "claude-sonnet-4-6",
 };
 
+/** Normalize incoming modelTokenBudgets: bare-name keys, numeric positive values. */
+function normalizeModelTokenBudgets(raw: unknown): Record<string, number> {
+  if (!raw || typeof raw !== "object") return {};
+  const out: Record<string, number> = {};
+  for (const [k, v] of Object.entries(raw as Record<string, unknown>)) {
+    const bare = bareModelName(String(k));
+    if (!bare) continue;
+    const n = typeof v === "number" ? v : Number(v);
+    if (Number.isFinite(n) && n >= 0) out[bare] = n;
+  }
+  return out;
+}
+
 export async function GET() {
   try {
     await ensureDb();
@@ -58,6 +76,9 @@ export async function PUT(request: Request) {
       : DEFAULT_SETTINGS;
 
     const updatedSettings = { ...currentSettings, ...body };
+    if (body && Object.prototype.hasOwnProperty.call(body, "modelTokenBudgets")) {
+      updatedSettings.modelTokenBudgets = normalizeModelTokenBudgets(body.modelTokenBudgets);
+    }
 
     await db
       .insert(settings)
diff --git a/dashboard/app/layout.tsx b/dashboard/app/layout.tsx
index 542aea9..7165653 100644
--- a/dashboard/app/layout.tsx
+++ b/dashboard/app/layout.tsx
@@ -4,6 +4,8 @@ import { ThemeProvider } from "next-themes";
 import { TooltipProvider } from "@/components/ui/tooltip";
 import { SidebarProvider } from "@/components/ui/sidebar";
 import { AppSidebar } from "@/components/layout/app-sidebar";
+import { ModelBudgetBanner } from "@/components/layout/model-budget-banner";
+import { LlmErrorBanner } from "@/components/layout/llm-error-banner";
 import "./globals.css";
 
 const spaceMono = Space_Mono({
@@ -48,7 +50,11 @@ export default function RootLayout({
           <TooltipProvider>
             <SidebarProvider>
               <AppSidebar />
-              <main className="flex-1 overflow-auto">{children}</main>
+              <main className="flex-1 overflow-auto">
+                <ModelBudgetBanner />
+                <LlmErrorBanner />
+                {children}
+              </main>
             </SidebarProvider>
           </TooltipProvider>
         </ThemeProvider>
diff --git a/dashboard/app/page.tsx b/dashboard/app/page.tsx
index 16e5a28..a2eabb9 100644
--- a/dashboard/app/page.tsx
+++ b/dashboard/app/page.tsx
@@ -126,7 +126,7 @@ export default function OverviewPage() {
             </span>
           </div>
           <div className="flex items-center gap-3 text-muted-foreground/40">
-            <span>billionclaw-demo</span>
+            <span>{process.env.NEXT_PUBLIC_LLM_MODEL_COMPLEX || "billionclaw-demo"}</span>
             <span className="text-muted-foreground/15">|</span>
             <span>parallel-agents</span>
             <span className="text-muted-foreground/15">|</span>
@@ -221,7 +221,9 @@ export default function OverviewPage() {
                 {connectionData.pipeline.errorsLastHour}
               </span></span>
               <span className="text-muted-foreground/10">|</span>
-              <span>source <span className="text-foreground/45">github/BillionClaw</span></span>
+              <span>source <span className="text-foreground/45">github/{process.env.GITHUB_USERNAME || "BillionClaw"}</span></span>
+              <span className="text-muted-foreground/10">|</span>
+              <span>model <span className="text-foreground/45">{process.env.NEXT_PUBLIC_LLM_MODEL_COMPLEX || "claude-opus-4-6"}</span></span>
               <span>cost <span className="text-foreground/45">$0.60/$3.00/M</span></span>
               <span className="text-muted-foreground/10">|</span>
               <span>pii <span className="text-foreground/45">off</span></span>
diff --git a/dashboard/components/layout/llm-error-banner.tsx b/dashboard/components/layout/llm-error-banner.tsx
new file mode 100644
index 0000000..b878c55
--- /dev/null
+++ b/dashboard/components/layout/llm-error-banner.tsx
@@ -0,0 +1,79 @@
+"use client";
+
+import { useEffect, useState } from "react";
+
+interface LlmState {
+  state: "ok" | "errored" | "unknown";
+  message?: string;
+  lastError?: string | null;
+  lastErrorAt?: string | null;
+  lastSuccessAt?: string | null;
+}
+
+const POLL_INTERVAL_MS = 30_000;
+
+function formatRelative(iso: string | null | undefined): string {
+  if (!iso) return "";
+  const diffMs = Date.now() - new Date(iso).getTime();
+  if (diffMs < 60_000) return `${Math.floor(diffMs / 1000)}s ago`;
+  if (diffMs < 3600_000) return `${Math.floor(diffMs / 60_000)}m ago`;
+  return `${Math.floor(diffMs / 3600_000)}h ago`;
+}
+
+export function LlmErrorBanner() {
+  const [llm, setLlm] = useState<LlmState | null>(null);
+  const [mounted, setMounted] = useState(false);
+
+  useEffect(() => {
+    setMounted(true);
+    let cancelled = false;
+
+    async function poll() {
+      try {
+        const res = await fetch("/api/connection-status", { cache: "no-store" });
+        if (!res.ok) return;
+        const data = await res.json();
+        if (cancelled) return;
+        setLlm(data.llm ?? null);
+      } catch {
+        // silent — banner stays in last known state
+      }
+    }
+
+    poll();
+    const interval = setInterval(poll, POLL_INTERVAL_MS);
+    return () => {
+      cancelled = true;
+      clearInterval(interval);
+    };
+  }, []);
+
+  if (!mounted) return null;
+  if (!llm || llm.state !== "errored") return null;
+
+  return (
+    <div className="flex items-start gap-3 px-4 py-2 text-xs font-mono border-b bg-amber-500/10 border-amber-500/30 text-amber-300">
+      <span className="font-bold shrink-0 pt-0.5">!! LLM ERROR</span>
+      <div className="flex-1 flex flex-wrap gap-x-4 gap-y-0.5">
+        <span className="text-amber-300">
+          Agent is alive but LLM calls are failing — upstream provider rejecting requests.
+        </span>
+        {llm.lastError && (
+          <span className="text-amber-300/80 font-semibold truncate max-w-[50vw]">
+            {llm.lastError}
+          </span>
+        )}
+        {llm.lastErrorAt && (
+          <span className="text-amber-300/60 shrink-0">
+            last fail {formatRelative(llm.lastErrorAt)}
+          </span>
+        )}
+        {llm.lastSuccessAt && (
+          <span className="text-amber-300/60 shrink-0">
+            last ok {formatRelative(llm.lastSuccessAt)}
+          </span>
+        )}
+      </div>
+    </div>
+  );
+}
diff --git a/dashboard/components/layout/model-budget-banner.tsx b/dashboard/components/layout/model-budget-banner.tsx
new file mode 100644
index 0000000..d33772c
--- /dev/null
+++ b/dashboard/components/layout/model-budget-banner.tsx
@@ -0,0 +1,71 @@
+"use client";
+
+import { useEffect, useState } from "react";
+
+interface ExhaustedModel {
+  model: string;
+  used: number;
+  cap: number;
+}
+
+interface HealthCheckResponse {
+  modelBudgets?: {
+    exhausted?: ExhaustedModel[];
+  };
+}
+
+const POLL_INTERVAL_MS = 30_000;
+
+function formatTokens(n: number): string {
+  if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(2)}M`;
+  if (n >= 1_000) return `${(n / 1_000).toFixed(1)}K`;
+  return String(n);
+}
+
+export function ModelBudgetBanner() {
+  const [exhausted, setExhausted] = useState<ExhaustedModel[]>([]);
+  const [mounted, setMounted] = useState(false);
+
+  useEffect(() => {
+    setMounted(true);
+    let cancelled = false;
+
+    async function poll() {
+      try {
+        const res = await fetch("/api/agent/health-check", { cache: "no-store" });
+        if (!res.ok) return;
+        const data: HealthCheckResponse = await res.json();
+        if (cancelled) return;
+        setExhausted(data.modelBudgets?.exhausted ?? []);
+      } catch {
+        // non-critical — banner stays in last known state
+      }
+    }
+
+    poll();
+    const interval = setInterval(poll, POLL_INTERVAL_MS);
+    return () => {
+      cancelled = true;
+      clearInterval(interval);
+    };
+  }, []);
+
+  if (!mounted) return null;
+  if (exhausted.length === 0) return null;
+
+  return (
+    <div className="flex items-start gap-3 px-4 py-2 text-xs font-mono border-b bg-red-500/10 border-red-500/30 text-red-300">
+      <span className="font-bold shrink-0 pt-0.5">!! MODEL BUDGET</span>
+      <div className="flex-1 flex flex-wrap gap-x-4 gap-y-0.5">
+        {exhausted.map((m) => (
+          <span key={m.model} className="text-red-300">
+            <span className="font-semibold">{m.model}</span>: {formatTokens(m.used)} / {formatTokens(m.cap)} tokens — STOPPED
+          </span>
+        ))}
+        <span className="text-red-300/70 shrink-0">
+          raise via PUT /api/settings or MODEL_TOKEN_BUDGETS env var
+        </span>
+      </div>
+    </div>
+  );
+}
diff --git a/dashboard/components/live/gateway-status.tsx b/dashboard/components/live/gateway-status.tsx
index 470d1d2..493f90c 100644
--- a/dashboard/components/live/gateway-status.tsx
+++ b/dashboard/components/live/gateway-status.tsx
@@ -100,7 +100,7 @@ export function GatewayStatus({
         </div>
         <div className="flex justify-between">
           <span className="stat-label">Model</span>
-          <span className="text-foreground/60">minimax/MiniMax-M2.7</span>
+          <span className="text-foreground/60">{process.env.NEXT_PUBLIC_LLM_PROVIDER || "anthropic"}/{process.env.NEXT_PUBLIC_LLM_MODEL_COMPLEX || "claude-opus-4-6"}</span>
         </div>
         <div className="flex justify-between">
           <span className="stat-label">Auth</span>
diff --git a/dashboard/components/overview/metric-cards.tsx b/dashboard/components/overview/metric-cards.tsx
index bda83b5..9420b10 100644
--- a/dashboard/components/overview/metric-cards.tsx
+++ b/dashboard/components/overview/metric-cards.tsx
@@ -137,7 +137,7 @@ export function MetricCards({
     {
       label: "Cost/24h",
       value: formatCost(costToday),
-      sub: costToday > 0 ? "kimi k2.5" : null,
+      sub: costToday > 0 ? (process.env.NEXT_PUBLIC_LLM_MODEL_SIMPLE || "sonnet") : null,
       bar: { value: costToday, max: 5 },
     },
     {
diff --git a/dashboard/lib/cost-models.ts b/dashboard/lib/cost-models.ts
index 1184259..a252fc8 100644
--- a/dashboard/lib/cost-models.ts
+++ b/dashboard/lib/cost-models.ts
@@ -1,81 +1,465 @@
 /**
  * Cost models for different LLM providers.
- * Prices are in USD per token.
+ * Prices are in USD per million tokens (stored here as per-token for computation).
+ *
+ * This is a reference registry — actual pricing used by the agent is driven by
+ * INPUT_COST_PER_M_COMPLEX / OUTPUT_COST_PER_M_COMPLEX (and _SIMPLE) env vars.
+ * Add entries here so the dashboard can display correct costs when the model ID
+ * is known from telemetry.
+ *
+ * Prices last verified: April 2026. Always confirm at the provider's pricing page
+ * before committing to a budget.
  */
 export interface CostModel {
   name: string;
   provider: string;
+  /** USD per token (divide the per-million price by 1_000_000) */
   inputCostPerToken: number;
   outputCostPerToken: number;
+  /** Cache-hit input price, if provider supports prompt caching */
+  cacheHitCostPerToken?: number;
+  contextWindow?: number;
+  notes?: string;
 }
 
 export const COST_MODELS: Record<string, CostModel> = {
-  "kimi-coding/k2p5": {
-    name: "Kimi K2.5 (Kimi Code)",
-    provider: "kimi-code",
-    inputCostPerToken: 0.6 / 1_000_000,
-    outputCostPerToken: 3.0 / 1_000_000,
+
+  // ─── Anthropic Claude ──────────────────────────────────────────────────────
+  // https://platform.claude.com/docs/en/about-claude/pricing
+  // Opus 4.6/4.5 dropped to $5/$25 (from $15/$75). 1M context at standard rate.
+  "anthropic/claude-opus-4-6": {
+    name: "Claude Opus 4.6",
+    provider: "anthropic",
+    inputCostPerToken:  5.0 / 1_000_000,
+    outputCostPerToken: 25.0 / 1_000_000,
+    cacheHitCostPerToken: 0.50 / 1_000_000,  // 0.1x base input
+    contextWindow: 1_000_000,
   },
-  "z-ai/glm-5": {
-    name: "GLM-5",
-    provider: "openrouter",
-    inputCostPerToken: 0.72 / 1_000_000,
-    outputCostPerToken: 2.3 / 1_000_000,
+  "anthropic/claude-sonnet-4-6": {
+    name: "Claude Sonnet 4.6",
+    provider: "anthropic",
+    inputCostPerToken:  3.0 / 1_000_000,
+    outputCostPerToken: 15.0 / 1_000_000,
+    cacheHitCostPerToken: 0.30 / 1_000_000,
+    contextWindow: 1_000_000,
   },
-  "moonshotai/kimi-k2.5": {
-    name: "Kimi K2.5 (OpenRouter)",
-    provider: "openrouter",
-    inputCostPerToken: 0.45 / 1_000_000,
-    outputCostPerToken: 2.2 / 1_000_000,
+  "anthropic/claude-opus-4-5": {
+    name: "Claude Opus 4.5",
+    provider: "anthropic",
+    inputCostPerToken:  5.0 / 1_000_000,
+    outputCostPerToken: 25.0 / 1_000_000,
+    cacheHitCostPerToken: 0.50 / 1_000_000,
+    contextWindow: 1_000_000,
+  },
+  "anthropic/claude-sonnet-4-5": {
+    name: "Claude Sonnet 4.5",
+    provider: "anthropic",
+    inputCostPerToken:  3.0 / 1_000_000,
+    outputCostPerToken: 15.0 / 1_000_000,
+    cacheHitCostPerToken: 0.30 / 1_000_000,
+    contextWindow: 1_000_000,
+  },
+  "anthropic/claude-haiku-4-5": {
+    name: "Claude Haiku 4.5",
+    provider: "anthropic",
+    inputCostPerToken:  1.0 / 1_000_000,
+    outputCostPerToken: 5.0 / 1_000_000,
+    cacheHitCostPerToken: 0.10 / 1_000_000,
+    contextWindow: 200_000,
+  },
+  "anthropic/claude-haiku-3-5": {
+    name: "Claude Haiku 3.5",
+    provider: "anthropic",
+    inputCostPerToken:  0.80 / 1_000_000,
+    outputCostPerToken: 4.0 / 1_000_000,
+    cacheHitCostPerToken: 0.08 / 1_000_000,
+    contextWindow: 200_000,
+  },
+
+  // ─── OpenAI ────────────────────────────────────────────────────────────────
+  // https://openai.com/api/pricing
+  "openai/gpt-4o": {
+    name: "GPT-4o",
+    provider: "openai",
+    inputCostPerToken:  2.5  / 1_000_000,
+    outputCostPerToken: 10.0 / 1_000_000,
+    cacheHitCostPerToken: 1.25 / 1_000_000,
+    contextWindow: 128_000,
+  },
+  "openai/gpt-4o-mini": {
+    name: "GPT-4o Mini",
+    provider: "openai",
+    inputCostPerToken:  0.15 / 1_000_000,
+    outputCostPerToken: 0.6  / 1_000_000,
+    cacheHitCostPerToken: 0.075 / 1_000_000,
+    contextWindow: 128_000,
+  },
+  "openai/o3": {
+    name: "OpenAI o3",
+    provider: "openai",
+    inputCostPerToken:  10.0 / 1_000_000,
+    outputCostPerToken: 40.0 / 1_000_000,
+    contextWindow: 200_000,
+  },
+  "openai/o4-mini": {
+    name: "OpenAI o4-mini",
+    provider: "openai",
+    inputCostPerToken:  1.1 / 1_000_000,
+    outputCostPerToken: 4.4 / 1_000_000,
+    contextWindow: 200_000,
+  },
+
+  // ─── DeepSeek ──────────────────────────────────────────────────────────────
+  // https://api-docs.deepseek.com/quick_start/pricing
+  // Both deepseek-chat and deepseek-reasoner are now DeepSeek-V3.2
+  "deepseek/deepseek-chat": {
+    name: "DeepSeek Chat (V3.2)",
+    provider: "deepseek",
+    inputCostPerToken:  0.28  / 1_000_000,  // cache miss
+    outputCostPerToken: 0.42  / 1_000_000,
+    cacheHitCostPerToken: 0.028 / 1_000_000, // cache hit: 90% cheaper
+    contextWindow: 128_000,
+    notes: "Non-thinking mode. Cache miss $0.28/M, cache hit $0.028/M.",
+  },
+  "deepseek/deepseek-reasoner": {
+    name: "DeepSeek Reasoner (V3.2 Thinking)",
+    provider: "deepseek",
+    inputCostPerToken:  0.28  / 1_000_000,
+    outputCostPerToken: 0.42  / 1_000_000,
+    cacheHitCostPerToken: 0.028 / 1_000_000,
+    contextWindow: 128_000,
+    notes: "Thinking mode. Max 32K output tokens (vs 8K for chat mode).",
   },
+
+  // ─── MiniMax ───────────────────────────────────────────────────────────────
+  // https://platform.minimax.io/docs/guides/pricing-paygo
   "minimax/MiniMax-M2.7": {
     name: "MiniMax M2.7",
     provider: "minimax",
-    inputCostPerToken: 0.3 / 1_000_000,
-    outputCostPerToken: 1.2 / 1_000_000,
+    inputCostPerToken:  0.30 / 1_000_000,
+    outputCostPerToken: 1.20 / 1_000_000,
+    contextWindow: 204_800,
   },
-  "minimax/MiniMax-M1-80k": {
-    name: "MiniMax M2.5 (legacy)",
-    provider: "openrouter",
-    inputCostPerToken: 0.25 / 1_000_000,
-    outputCostPerToken: 1.2 / 1_000_000,
+  "minimax/MiniMax-M2.7-highspeed": {
+    name: "MiniMax M2.7 (High Speed)",
+    provider: "minimax",
+    inputCostPerToken:  0.60 / 1_000_000,
+    outputCostPerToken: 2.40 / 1_000_000,
+    contextWindow: 204_800,
+    notes: "Faster inference at 2× the price.",
+  },
+  "minimax/MiniMax-M2.5": {
+    name: "MiniMax M2.5",
+    provider: "minimax",
+    inputCostPerToken:  0.30 / 1_000_000,
+    outputCostPerToken: 1.20 / 1_000_000,
+    contextWindow: 204_800,
+  },
+  "minimax/MiniMax-M2.5-highspeed": {
+    name: "MiniMax M2.5 (High Speed)",
+    provider: "minimax",
+    inputCostPerToken:  0.60 / 1_000_000,
+    outputCostPerToken: 2.40 / 1_000_000,
+    contextWindow: 204_800,
+  },
+  "minimax/MiniMax-M2": {
+    name: "MiniMax M2",
+    provider: "minimax",
+    inputCostPerToken:  0.30 / 1_000_000,
+    outputCostPerToken: 1.20 / 1_000_000,
+    contextWindow: 204_800,
   },
   "minimax/MiniMax-M1": {
     name: "MiniMax M1",
-    provider: "openrouter",
-    inputCostPerToken: 0.25 / 1_000_000,
-    outputCostPerToken: 1.2 / 1_000_000,
+    provider: "minimax",
+    inputCostPerToken:  0.25 / 1_000_000,
+    outputCostPerToken: 1.20 / 1_000_000,
   },
-  "anthropic/claude-sonnet-4-20250514": {
-    name: "Claude Sonnet 4",
-    provider: "openrouter",
-    inputCostPerToken: 3.0 / 1_000_000,
-    outputCostPerToken: 15.0 / 1_000_000,
+  "minimax/MiniMax-M1-80k": {
+    name: "MiniMax M1 (80k)",
+    provider: "minimax",
+    inputCostPerToken:  0.25 / 1_000_000,
+    outputCostPerToken: 1.20 / 1_000_000,
+    contextWindow: 80_000,
   },
-  "openai/gpt-4o": {
-    name: "GPT-4o",
+
+  // ─── Kimi / Moonshot AI ────────────────────────────────────────────────────
+  // Direct API: https://platform.kimi.ai  (endpoint: api.moonshot.cn/v1)
+  // OpenRouter: moonshotai/*
+  "moonshot/kimi-k2.5": {
+    name: "Kimi K2.5",
+    provider: "moonshot",
+    inputCostPerToken:  0.60  / 1_000_000,  // cache miss
+    outputCostPerToken: 3.00  / 1_000_000,
+    cacheHitCostPerToken: 0.10 / 1_000_000, // cache hit
+    contextWindow: 131_072,
+    notes: "Latest Kimi coding model. Cache miss $0.60/M, cache hit $0.10/M.",
+  },
+  "moonshot/kimi-k2": {
+    name: "Kimi K2",
+    provider: "moonshot",
+    inputCostPerToken:  0.55 / 1_000_000,
+    outputCostPerToken: 2.20 / 1_000_000,
+    contextWindow: 131_072,
+  },
+  "moonshot/moonshot-v1-8k": {
+    name: "Moonshot V1 (8k)",
+    provider: "moonshot",
+    inputCostPerToken:  1.65 / 1_000_000,  // ≈ ¥12/M at 7.3 CNY/USD
+    outputCostPerToken: 1.65 / 1_000_000,
+    contextWindow: 8_000,
+    notes: "Legacy general model. Uniform input/output pricing.",
+  },
+  "moonshot/moonshot-v1-32k": {
+    name: "Moonshot V1 (32k)",
+    provider: "moonshot",
+    inputCostPerToken:  3.29 / 1_000_000,  // ≈ ¥24/M
+    outputCostPerToken: 3.29 / 1_000_000,
+    contextWindow: 32_000,
+    notes: "Legacy general model. Uniform input/output pricing.",
+  },
+  "moonshot/moonshot-v1-128k": {
+    name: "Moonshot V1 (128k)",
+    provider: "moonshot",
+    inputCostPerToken:  8.22 / 1_000_000,  // ≈ ¥60/M
+    outputCostPerToken: 8.22 / 1_000_000,
+    contextWindow: 128_000,
+    notes: "Legacy general model. Uniform input/output pricing.",
+  },
+  // OpenRouter aliases
+  "moonshotai/kimi-k2.5": {
+    name: "Kimi K2.5 (OpenRouter)",
     provider: "openrouter",
-    inputCostPerToken: 2.5 / 1_000_000,
-    outputCostPerToken: 10.0 / 1_000_000,
+    inputCostPerToken:  0.60 / 1_000_000,
+    outputCostPerToken: 3.00 / 1_000_000,
+  },
+  "kimi-coding/k2p5": {
+    name: "Kimi K2.5 (direct)",
+    provider: "kimi-code",
+    inputCostPerToken:  0.60 / 1_000_000,
+    outputCostPerToken: 3.00 / 1_000_000,
+  },
+
+  // ─── GLM / Zhipu AI (Z.AI) ─────────────────────────────────────────────────
+  // International API: https://api.z.ai/v1
+  // China API: https://open.bigmodel.cn/api/paas/v4
+  // https://docs.z.ai/guides/overview/pricing
+  "z-ai/glm-5.1": {
+    name: "GLM-5.1",
+    provider: "z-ai",
+    inputCostPerToken:  1.40 / 1_000_000,
+    outputCostPerToken: 4.40 / 1_000_000,
+  },
+  "z-ai/glm-5": {
+    name: "GLM-5",
+    provider: "z-ai",
+    inputCostPerToken:  1.00 / 1_000_000,
+    outputCostPerToken: 3.20 / 1_000_000,
+    notes: "China's first public AI company frontier model.",
+  },
+  "z-ai/glm-5-turbo": {
+    name: "GLM-5 Turbo",
+    provider: "z-ai",
+    inputCostPerToken:  1.20 / 1_000_000,
+    outputCostPerToken: 4.00 / 1_000_000,
+  },
+  "z-ai/glm-4.7": {
+    name: "GLM-4.7",
+    provider: "z-ai",
+    inputCostPerToken:  0.60 / 1_000_000,
+    outputCostPerToken: 2.20 / 1_000_000,
+  },
+  "z-ai/glm-4.7-flashx": {
+    name: "GLM-4.7 FlashX",
+    provider: "z-ai",
+    inputCostPerToken:  0.07 / 1_000_000,
+    outputCostPerToken: 0.40 / 1_000_000,
+    notes: "Fast, cheap. Good for simple orchestration tasks.",
+  },
+  "z-ai/glm-4.7-flash": {
+    name: "GLM-4.7 Flash",
+    provider: "z-ai",
+    inputCostPerToken:  0.0,
+    outputCostPerToken: 0.0,
+    notes: "Free tier.",
+  },
+  "z-ai/glm-4.6": {
+    name: "GLM-4.6",
+    provider: "z-ai",
+    inputCostPerToken:  0.60 / 1_000_000,
+    outputCostPerToken: 2.20 / 1_000_000,
+  },
+  "z-ai/glm-4.5": {
+    name: "GLM-4.5",
+    provider: "z-ai",
+    inputCostPerToken:  0.60 / 1_000_000,
+    outputCostPerToken: 2.20 / 1_000_000,
+  },
+  "z-ai/glm-4.5-x": {
+    name: "GLM-4.5-X (32B MoE)",
+    provider: "z-ai",
+    inputCostPerToken:  2.20 / 1_000_000,
+    outputCostPerToken: 8.90 / 1_000_000,
+  },
+  "z-ai/glm-4.5-air": {
+    name: "GLM-4.5 Air",
+    provider: "z-ai",
+    inputCostPerToken:  0.20 / 1_000_000,
+    outputCostPerToken: 1.10 / 1_000_000,
+    notes: "Lightweight, good for orchestrator/simple tasks.",
+  },
+  "z-ai/glm-4.5-airx": {
+    name: "GLM-4.5 AirX",
+    provider: "z-ai",
+    inputCostPerToken:  1.10 / 1_000_000,
+    outputCostPerToken: 4.50 / 1_000_000,
+  },
+  "z-ai/glm-4.5-flash": {
+    name: "GLM-4.5 Flash",
+    provider: "z-ai",
+    inputCostPerToken:  0.0,
+    outputCostPerToken: 0.0,
+    notes: "Free tier.",
+  },
+  "z-ai/glm-4-32b-0414-128k": {
+    name: "GLM-4 32B (128k)",
+    provider: "z-ai",
+    inputCostPerToken:  0.10 / 1_000_000,
+    outputCostPerToken: 0.10 / 1_000_000,
+  },
+
+  // ─── Google Gemini ────────────────────────────────────────────────────────
+  // https://ai.google.dev/gemini-api/docs/pricing
+  "google/gemini-2.5-pro": {
+    name: "Gemini 2.5 Pro",
+    provider: "google",
+    inputCostPerToken:  1.25 / 1_000_000,  // ≤200k; >200k doubles to $2.50
+    outputCostPerToken: 10.0 / 1_000_000,  // ≤200k; >200k $15.00
+    cacheHitCostPerToken: 0.125 / 1_000_000,
+    contextWindow: 1_000_000,
+    notes: "Tiered: >200k context doubles input/output price.",
+  },
+  "google/gemini-2.5-flash": {
+    name: "Gemini 2.5 Flash",
+    provider: "google",
+    inputCostPerToken:  0.30 / 1_000_000,
+    outputCostPerToken: 2.50 / 1_000_000,
+    cacheHitCostPerToken: 0.03 / 1_000_000,
+    contextWindow: 1_000_000,
+  },
+  "google/gemini-2.5-flash-lite": {
+    name: "Gemini 2.5 Flash-Lite",
+    provider: "google",
+    inputCostPerToken:  0.10 / 1_000_000,
+    outputCostPerToken: 0.40 / 1_000_000,
+    cacheHitCostPerToken: 0.01 / 1_000_000,
+    contextWindow: 1_000_000,
+    notes: "Cheapest Gemini model.",
+  },
+  "google/gemini-3-flash": {
+    name: "Gemini 3 Flash (Preview)",
+    provider: "google",
+    inputCostPerToken:  0.50 / 1_000_000,
+    outputCostPerToken: 3.00 / 1_000_000,
+    cacheHitCostPerToken: 0.05 / 1_000_000,
+    contextWindow: 1_000_000,
+  },
+  "google/gemini-3.1-pro": {
+    name: "Gemini 3.1 Pro (Preview)",
+    provider: "google",
+    inputCostPerToken:  2.00 / 1_000_000,  // ≤200k; >200k doubles
+    outputCostPerToken: 12.00 / 1_000_000,
+    contextWindow: 1_000_000,
+    notes: "Preview. Tiered: >200k context doubles price.",
+  },
+
+  // ─── Mistral AI ───────────────────────────────────────────────────────────
+  // https://mistral.ai/pricing
+  "mistral/mistral-large-3": {
+    name: "Mistral Large 3",
+    provider: "mistral",
+    inputCostPerToken:  2.0 / 1_000_000,
+    outputCostPerToken: 6.0 / 1_000_000,
+    contextWindow: 128_000,
+  },
+  "mistral/mistral-medium-3": {
+    name: "Mistral Medium 3",
+    provider: "mistral",
+    inputCostPerToken:  1.0 / 1_000_000,
+    outputCostPerToken: 3.0 / 1_000_000,
+    contextWindow: 128_000,
+  },
+  "mistral/mistral-small-3.1": {
+    name: "Mistral Small 3.1",
+    provider: "mistral",
+    inputCostPerToken:  0.20 / 1_000_000,
+    outputCostPerToken: 0.60 / 1_000_000,
+    contextWindow: 128_000,
+  },
+  "mistral/mistral-nemo": {
+    name: "Mistral Nemo",
+    provider: "mistral",
+    inputCostPerToken:  0.02 / 1_000_000,
+    outputCostPerToken: 0.04 / 1_000_000,
+    contextWindow: 128_000,
+    notes: "Cheapest Mistral model.",
   },
 };
 
-// Default model for the ClawOSS agent (switched to MiniMax M2.7 direct API)
-export const DEFAULT_MODEL = "minimax/MiniMax-M2.7";
-export const DEFAULT_COST_MODEL = COST_MODELS[DEFAULT_MODEL];
+/**
+ * Build a fallback cost model from env vars when the active model is not in the registry.
+ */
+function envFallbackCostModel(): CostModel {
+  const provider = process.env.LLM_PROVIDER || "unknown";
+  const model = process.env.LLM_MODEL_COMPLEX || "unknown";
+  return {
+    name: `${provider}/${model}`,
+    provider,
+    inputCostPerToken:  parseFloat(process.env.INPUT_COST_PER_M  || "3.0") / 1_000_000,
+    outputCostPerToken: parseFloat(process.env.OUTPUT_COST_PER_M || "15.0") / 1_000_000,
+  };
+}
+
+/**
+ * The active complex model ID, resolved from env vars at runtime.
+ * Format: "{LLM_PROVIDER}/{LLM_MODEL_COMPLEX}"
+ */
+export function getActiveModel(): string {
+  const provider = process.env.LLM_PROVIDER || "anthropic";
+  const model = process.env.LLM_MODEL_COMPLEX || "claude-opus-4-6";
+  return `${provider}/${model}`;
+}
 
 /**
  * Compute the cost for a given token usage.
- * Falls back to the default Kimi Code pricing if model is unknown.
+ * Looks up the model in the registry; falls back to env-configured pricing.
  */
 export function computeTokenCost(
   inputTokens: number,
   outputTokens: number,
   model?: string
 ): number {
-  const costModel = (model && COST_MODELS[model]) || DEFAULT_COST_MODEL;
+  const activeModel = model || getActiveModel();
+  const costModel = COST_MODELS[activeModel] || envFallbackCostModel();
   return (
-    inputTokens * costModel.inputCostPerToken +
+    inputTokens  * costModel.inputCostPerToken +
     outputTokens * costModel.outputCostPerToken
   );
 }
+
+/** @deprecated Use getActiveModel() instead */
+export const DEFAULT_MODEL = getActiveModel();
+export const DEFAULT_COST_MODEL = COST_MODELS[DEFAULT_MODEL] || envFallbackCostModel();
+
+/**
+ * Normalize a model identifier to its bare model name — used for cross-provider
+ * matching so that `z-ai/glm-4.6`, `openrouter/glm-4.6`, and `glm-4.6` all collapse
+ * to the same key. The last path segment wins (handles nested prefixes like
+ * `openrouter/anthropic/claude-opus-4-6`).
+ */
+export function bareModelName(model: string): string {
+  if (!model) return "";
+  const lastSlash = model.lastIndexOf("/");
+  const tail = lastSlash >= 0 ? model.slice(lastSlash + 1) : model;
+  return tail.toLowerCase().trim();
+}
diff --git a/dashboard/lib/github.ts b/dashboard/lib/github.ts
index 842cb77..191fc17 100644
--- a/dashboard/lib/github.ts
+++ b/dashboard/lib/github.ts
@@ -16,7 +16,7 @@ export async function syncPRsFromGitHub(): Promise<{
 }> {
   await ensureDb();
   const octokit = getOctokit();
-  const agentUsername = process.env.CLAW_AGENT_USERNAME || "BillionClaw";
+  const agentUsername = process.env.CLAW_AGENT_USERNAME || process.env.GITHUB_USERNAME || "BillionClaw";
 
   // Dynamic discovery: search for ALL PRs by the agent across GitHub
   // Use raw fetch to avoid Octokit query encoding issues
diff --git a/dashboard/lib/types.ts b/dashboard/lib/types.ts
index f7f82ac..7ad88ff 100644
--- a/dashboard/lib/types.ts
+++ b/dashboard/lib/types.ts
@@ -131,6 +131,18 @@ export interface DashboardSettings {
     onAgentOffline: boolean;
   };
   dailyBudgetUsd: number;
+  /** Cumulative total spend cap in USD. 0 = unlimited. Enforced by health-check. */
+  totalBudgetUsd: number;
+  /**
+   * Per-model cumulative token caps. Keyed by bare model name (e.g. "glm-4.6",
+   * "deepseek-chat"), matched across all providers. Value is total tokens
+   * (input + output). Missing or ≤ 0 = unlimited. Enforced by health-check.
+   */
+  modelTokenBudgets: Record<string, number>;
+  /** Display-only: complex model in use (set via LLM_MODEL_COMPLEX env var) */
+  modelComplex: string;
+  /** Display-only: simple model in use (set via LLM_MODEL_SIMPLE env var) */
+  modelSimple: string;
 }
 
 export interface ConversationMessage {
diff --git a/deploy/docker/Dockerfile b/deploy/docker/Dockerfile
new file mode 100644
index 0000000..dd72605
--- /dev/null
+++ b/deploy/docker/Dockerfile
@@ -0,0 +1,60 @@
+# ClawOSS agent — Linux container image.
+#
+# This image runs the OpenClaw gateway + the clawoss agent configuration
+# in a single container. It's the Linux-native counterpart to
+# scripts/restart.sh's launchd flow on macOS.
+#
+# Build:
+#   docker build -f deploy/docker/Dockerfile -t clawoss-agent .
+#
+# Run (see deploy/docker/docker-compose.yml for the real invocation):
+#   docker run --env-file .env -v clawoss_state:/home/clawoss/.openclaw clawoss-agent
+
+FROM node:22-bookworm-slim
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+# System deps used by scripts + subagents: git, gh CLI, jq, python3, curl, ca-certs.
+# The git repos worked on by the agent get cloned inside /tmp at runtime, so
+# git itself must be present. gh is used for every PR operation.
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends \
+        ca-certificates curl git gnupg jq python3 python3-pip tini \
+ && install -d -m 0755 /etc/apt/keyrings \
+ && curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \
+        | tee /etc/apt/keyrings/githubcli-archive-keyring.gpg > /dev/null \
+ && chmod go+r /etc/apt/keyrings/githubcli-archive-keyring.gpg \
+ && echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" \
+        > /etc/apt/sources.list.d/github-cli.list \
+ && apt-get update \
+ && apt-get install -y --no-install-recommends gh \
+ && rm -rf /var/lib/apt/lists/*
+
+# Install openclaw CLI globally. Pinned to a known-good range; operators can
+# override with --build-arg OPENCLAW_VERSION=x.y.z.
+ARG OPENCLAW_VERSION=latest
+RUN npm install -g "openclaw@${OPENCLAW_VERSION}" \
+ && openclaw --version
+
+# Non-root user so the agent doesn't run as root inside the container.
+RUN useradd --create-home --shell /bin/bash --uid 1000 clawoss
+WORKDIR /app
+COPY --chown=clawoss:clawoss . /app
+
+# Install project deps (workspaces include the dashboard — skip the heavy
+# dashboard install here; run the agent and the dashboard in separate images
+# if both are needed).
+RUN npm install --omit=dev --ignore-scripts --workspaces=false \
+ && chown -R clawoss:clawoss /app
+
+USER clawoss
+ENV HOME=/home/clawoss
+ENV PATH=/home/clawoss/.local/bin:/usr/local/lib/node_modules/.bin:$PATH
+
+# Entrypoint handles env validation, config deploy, gateway start, and then
+# execs openclaw gateway run as PID 1 (via tini).
+COPY --chown=clawoss:clawoss deploy/docker/entrypoint.sh /usr/local/bin/clawoss-entrypoint
+RUN chmod +x /usr/local/bin/clawoss-entrypoint
+
+ENTRYPOINT ["/usr/bin/tini", "--", "/usr/local/bin/clawoss-entrypoint"]
+CMD []
diff --git a/deploy/docker/README.md b/deploy/docker/README.md
new file mode 100644
index 0000000..9e7ad39
--- /dev/null
+++ b/deploy/docker/README.md
@@ -0,0 +1,66 @@
+# ClawOSS — Linux Docker deployment
+
+Phase-1 demo deployment: one container, one long-running process
+(`openclaw gateway run`), `.env`-driven LLM routing, token-budget aware.
+
+## Quickstart
+
+```bash
+cp .env.example .env
+$EDITOR .env                 # fill LLM_* and GITHUB_TOKEN at minimum
+docker compose -f deploy/docker/docker-compose.yml up --build
+```
+
+The container fails fast and prints the missing env var if required
+settings are absent. Silent misconfiguration that wastes tokens is the
+thing we're explicitly trying to avoid.
+
+## What goes in `.env`
+
+Minimum for the container to boot:
+
+| Variable | Purpose |
+|---|---|
+| `GITHUB_TOKEN` | Classic PAT (`ghp_*`) with `public_repo` scope. |
+| `LLM_PROVIDER` | e.g. `anthropic`, `deepseek`, `z-ai`, `minimax`. |
+| `LLM_BASE_URL` | OpenAI-compatible endpoint for the provider. |
+| `LLM_API_KEY`  | Key for that provider. |
+| `LLM_MODEL_COMPLEX` | Opus-tier model for subagents. |
+| `LLM_MODEL_SIMPLE`  | Sonnet-tier model for the orchestrator. |
+
+Strongly recommended (container warns if missing):
+
+- `BUDGET_USD_TOTAL` — hard cap in USD, agent pauses when reached.
+- `CLAW_API_KEY` + `DASHBOARD_URL` — telemetry into the Vercel dashboard.
+- `MODEL_TOKEN_BUDGETS` — per-model token caps (see `.env.example`).
+
+## State persistence
+
+`clawoss_state` (named volume) holds `~/.openclaw/` — the agent registry,
+session jsonl files, and OpenClaw extensions. Delete the volume to get a
+clean-room restart:
+
+```bash
+docker compose -f deploy/docker/docker-compose.yml down -v
+```
+
+Workspace memory (`workspace/memory/*.md`) is bind-mounted to the host so
+you can watch the pipeline state live from outside the container.
+
+## Relationship to the other docker setups
+
+| Path | Purpose |
+|---|---|
+| `docker/` + root `docker-compose.yml` | Alpha autonomy backend — API + worker + reflection services that read/write the dashboard DB. |
+| `deploy/docker/` (this dir) | The OpenClaw agent itself. This is what you run on a Linux host for the Phase-1 demo. |
+| `scripts/restart.sh` | macOS-native launchd deployment. On Linux it detects systemd and degrades gracefully; this image is the cleaner option for Linux. |
+
+## Not included in this image
+
+- The Vercel dashboard (keep it on Vercel — running it locally doesn't
+  help the Phase-1 demo). Set `DASHBOARD_URL` + `CLAW_API_KEY` to connect.
+- The `openclaw` CLI binary is pulled from npm at build time. Operators
+  behind a proxy should set `--build-arg OPENCLAW_VERSION=<pinned>` and
+  configure their npm registry.
+- No automated backup of `clawoss_state`. If you care about queue
+  survival across host rebuilds, back up the volume yourself.
diff --git a/deploy/docker/docker-compose.yml b/deploy/docker/docker-compose.yml
new file mode 100644
index 0000000..c6e87ce
--- /dev/null
+++ b/deploy/docker/docker-compose.yml
@@ -0,0 +1,47 @@
+# ClawOSS agent — Linux Docker Compose deployment.
+#
+# This is additive to the root-level docker-compose.yml (which runs the
+# autonomy backend API + worker + reflection services). This file runs the
+# actual OpenClaw agent container for Phase-1 demo deployments on Linux hosts.
+#
+# Usage:
+#   cp .env.example .env && $EDITOR .env
+#   docker compose -f deploy/docker/docker-compose.yml up --build
+#
+# Stop and preserve state:
+#   docker compose -f deploy/docker/docker-compose.yml down
+#
+# Full reset (blows away openclaw state — required after config changes):
+#   docker compose -f deploy/docker/docker-compose.yml down -v
+#
+# The single-service design matches the macOS launchd flow: one long-lived
+# process (`openclaw gateway run`) supervises its own heartbeat + subagents.
+
+services:
+  agent:
+    build:
+      context: ../..
+      dockerfile: deploy/docker/Dockerfile
+    image: clawoss-agent:local
+    restart: unless-stopped
+    env_file:
+      - ../../.env
+    volumes:
+      # Persist openclaw state (agent registry, session jsonl, extensions).
+      # Without this, every restart drops queued work and pending subagents.
+      - clawoss_state:/home/clawoss/.openclaw
+      # Expose workspace memory so operators can tail state files on the host.
+      - ../../workspace/memory:/app/workspace/memory
+    # Gateway default port; expose only on localhost so an open .env doesn't
+    # turn into an open LLM proxy.
+    ports:
+      - "127.0.0.1:18789:18789"
+    healthcheck:
+      test: ["CMD", "sh", "-c", "openclaw gateway status 2>/dev/null | grep -qi 'running\\|reachable\\|ok'"]
+      interval: 30s
+      timeout: 5s
+      retries: 5
+      start_period: 30s
+
+volumes:
+  clawoss_state:
diff --git a/deploy/docker/entrypoint.sh b/deploy/docker/entrypoint.sh
new file mode 100755
index 0000000..4da1132
--- /dev/null
+++ b/deploy/docker/entrypoint.sh
@@ -0,0 +1,115 @@
+#!/usr/bin/env bash
+# ClawOSS Linux container entrypoint.
+#
+# Responsibilities:
+#   1. Validate required env vars (fail fast and loudly — the whole point of
+#      Task #5 was that silent failures waste tokens).
+#   2. Link the workspace into $HOME/.openclaw/ the same way setup.sh does on
+#      the host.
+#   3. Run scripts/restart.sh in a Linux-aware path so config gets deployed
+#      into $HOME/.openclaw/openclaw.json.
+#   4. Exec `openclaw gateway run` as PID 1 so Docker can supervise it.
+
+set -euo pipefail
+
+log() { printf '[clawoss-docker] %s\n' "$*"; }
+fail() { printf '[clawoss-docker][FAIL] %s\n' "$*" >&2; exit 1; }
+
+# ── 0. Required env vars ──────────────────────────────────────────────
+REQUIRED=(GITHUB_TOKEN LLM_API_KEY LLM_PROVIDER LLM_BASE_URL LLM_MODEL_COMPLEX LLM_MODEL_SIMPLE)
+MISSING=()
+for v in "${REQUIRED[@]}"; do
+  if [ -z "${!v:-}" ]; then
+    MISSING+=("$v")
+  fi
+done
+if [ ${#MISSING[@]} -gt 0 ]; then
+  fail "missing required env: ${MISSING[*]} (see .env.example)"
+fi
+
+# Optional but strongly recommended — warn, don't fail.
+for v in BUDGET_USD_TOTAL CLAW_API_KEY DASHBOARD_URL; do
+  if [ -z "${!v:-}" ]; then
+    log "[WARN] $v not set"
+  fi
+done
+
+# ── 1. Link workspace ─────────────────────────────────────────────────
+PROJECT_DIR="/app"
+WORKSPACE_DIR="$PROJECT_DIR/workspace"
+OC_DIR="$HOME/.openclaw"
+mkdir -p "$OC_DIR/logs" "$OC_DIR/agents"
+
+if [ ! -L "$OC_DIR/workspace" ]; then
+  ln -sfn "$WORKSPACE_DIR" "$OC_DIR/workspace"
+  log "linked workspace: $OC_DIR/workspace -> $WORKSPACE_DIR"
+fi
+
+# ── 2. Deploy resolved openclaw.json ──────────────────────────────────
+# Mirrors the sed substitution in scripts/restart.sh. Kept in-entrypoint so
+# the container can come up without invoking the full restart.sh (which also
+# does macOS-specific work like launchd).
+RESOLVED_CONFIG=$(sed \
+  -e "s|__WORKSPACE_PATH__|$WORKSPACE_DIR|g" \
+  -e "s|__PROJECT_DIR__|$PROJECT_DIR|g" \
+  -e "s|__HOME_DIR__|$HOME|g" \
+  -e "s|__LLM_PROVIDER__|${LLM_PROVIDER}|g" \
+  -e "s|__LLM_BASE_URL__|${LLM_BASE_URL}|g" \
+  -e "s|__LLM_MODEL_COMPLEX__|${LLM_MODEL_COMPLEX}|g" \
+  -e "s|__LLM_MODEL_SIMPLE__|${LLM_MODEL_SIMPLE}|g" \
+  -e "s|__INPUT_COST_PER_M_COMPLEX__|${INPUT_COST_PER_M_COMPLEX:-${INPUT_COST_PER_M:-3.0}}|g" \
+  -e "s|__OUTPUT_COST_PER_M_COMPLEX__|${OUTPUT_COST_PER_M_COMPLEX:-${OUTPUT_COST_PER_M:-15.0}}|g" \
+  -e "s|__INPUT_COST_PER_M_SIMPLE__|${INPUT_COST_PER_M_SIMPLE:-${INPUT_COST_PER_M:-3.0}}|g" \
+  -e "s|__OUTPUT_COST_PER_M_SIMPLE__|${OUTPUT_COST_PER_M_SIMPLE:-${OUTPUT_COST_PER_M:-15.0}}|g" \
+  -e "s|__LLM_CONTEXT_WINDOW__|${LLM_CONTEXT_WINDOW:-200000}|g" \
+  -e "s|__LLM_MAX_TOKENS__|${LLM_MAX_TOKENS:-32000}|g" \
+  "$PROJECT_DIR/config/openclaw.json")
+
+# Inject env block (API key + token + budget + pricing) so openclaw has
+# everything it needs to authenticate.
+echo "$RESOLVED_CONFIG" | python3 -c "
+import json, os, sys
+merged = json.load(sys.stdin)
+env = merged.setdefault('env', {})
+keys = [
+  'LLM_API_KEY','LLM_BASE_URL','LLM_PROVIDER',
+  'LLM_MODEL_COMPLEX','LLM_MODEL_SIMPLE',
+  'GITHUB_TOKEN','GITHUB_USERNAME','GITHUB_EMAIL',
+  'CLAW_API_KEY','DASHBOARD_URL',
+  'BUDGET_USD_TOTAL','MODEL_TOKEN_BUDGETS',
+  'INPUT_COST_PER_M','OUTPUT_COST_PER_M',
+  'INPUT_COST_PER_M_COMPLEX','OUTPUT_COST_PER_M_COMPLEX',
+  'INPUT_COST_PER_M_SIMPLE','OUTPUT_COST_PER_M_SIMPLE',
+]
+for k in keys:
+  v = os.environ.get(k)
+  if v:
+    env[k] = v
+merged['env'] = env
+json.dump(merged, open('$OC_DIR/openclaw.json','w'), indent=2)
+"
+
+log "deployed $OC_DIR/openclaw.json"
+
+# ── 3. GitHub CLI auth (non-interactive token login) ──────────────────
+if [ -n "${GITHUB_TOKEN:-}" ]; then
+  echo "$GITHUB_TOKEN" | gh auth login --with-token >/dev/null 2>&1 || \
+    log "[WARN] gh auth login --with-token failed; gh commands may 401"
+fi
+
+# Git identity — PRs need author info.
+git config --global user.name  "${GITHUB_USERNAME:-clawoss-bot}"
+git config --global user.email "${GITHUB_EMAIL:-${GITHUB_USERNAME:-clawoss-bot}@users.noreply.github.com}"
+
+# ── 4. Register agent + hand off to gateway ───────────────────────────
+AGENT_MODEL="${LLM_PROVIDER}/${LLM_MODEL_SIMPLE}"
+if ! openclaw agents list 2>/dev/null | grep -q "^- clawoss "; then
+  openclaw agents add clawoss \
+    --workspace "$WORKSPACE_DIR" \
+    --model "$AGENT_MODEL" \
+    --non-interactive
+  log "registered agent clawoss (model=$AGENT_MODEL)"
+fi
+
+log "starting openclaw gateway (foreground)"
+exec openclaw gateway run
diff --git a/docs/images/clawoss-review.png b/docs/images/clawoss-review.png
new file mode 100644
index 0000000..979b0c5
Binary files /dev/null and b/docs/images/clawoss-review.png differ
diff --git a/docs/model-routing.md b/docs/model-routing.md
new file mode 100644
index 0000000..8b2a019
--- /dev/null
+++ b/docs/model-routing.md
@@ -0,0 +1,546 @@
+# 模型路由
+
+## 目标
+
+1. **任意主流模型**：通过环境变量切换供应商和模型，无需改代码
+2. **双轨路由**：复杂任务用 Opus 级模型，简单任务用 Sonnet 级模型
+3. **总预算熔断**：累计花费达到上限时服务自动暂停
+4. **Dashboard 可视**：实时显示模型配置、累计花费、预算进度
+
+## 路由规则
+
+| 角色 | 使用模型 | 原因 |
+|------|---------|------|
+| Orchestrator（heartbeat 主循环） | `LLM_MODEL_SIMPLE` | 只做文件读写、状态路由 |
+| 主 Agent session | `LLM_MODEL_SIMPLE` | 同上 |
+| 所有 Sub-agents（实现、跟进、监控） | `LLM_MODEL_COMPLEX` | 需深度理解代码、写 patch、分析 review |
+
+Fallback：complex 失败时回退 simple。
+
+## 配置注入机制
+
+```
+.env
+  ↓ restart.sh 读取
+config/openclaw.json（含 __LLM_*__ 占位符）
+  ↓ sed 替换占位符
+~/.openclaw/openclaw.json（已注入实际值）
+  ↓ OpenClaw gateway 启动时读取
+Agent 运行（使用正确模型）
+```
+
+改了 `.env` 后必须 `bash scripts/restart.sh` 重启才能生效。
+
+---
+
+## 环境变量
+
+所有变量在 `.env` 中配置，`restart.sh` 读取后注入到 OpenClaw config 和 gateway plist。
+
+### 必填
+
+| 变量 | 说明 | 示例 |
+|------|------|------|
+| `LLM_PROVIDER` | 供应商 key，作为 OpenClaw provider 块名和模型 ID 前缀 | `anthropic` |
+| `LLM_BASE_URL` | OpenAI 兼容 API 端点 | `https://api.anthropic.com/v1` |
+| `LLM_API_KEY` | 供应商 API 密钥 | `sk-ant-...` |
+| `LLM_MODEL_COMPLEX` | 复杂任务模型 ID（sub-agents 使用） | `claude-opus-4-6` |
+| `LLM_MODEL_SIMPLE` | 简单任务模型 ID（orchestrator 使用） | `claude-sonnet-4-6` |
+| `GITHUB_TOKEN` | GitHub PAT，需 `public_repo` 权限 | `ghp_...` |
+
+### 计价
+
+| 变量 | 说明 | 默认值 |
+|------|------|--------|
+| `INPUT_COST_PER_M_COMPLEX` | Complex 模型输入价（$/M token） | 读 `INPUT_COST_PER_M` |
+| `OUTPUT_COST_PER_M_COMPLEX` | Complex 模型输出价（$/M token） | 读 `OUTPUT_COST_PER_M` |
+| `INPUT_COST_PER_M_SIMPLE` | Simple 模型输入价（$/M token） | 读 `INPUT_COST_PER_M` |
+| `OUTPUT_COST_PER_M_SIMPLE` | Simple 模型输出价（$/M token） | 读 `OUTPUT_COST_PER_M` |
+| `INPUT_COST_PER_M` | 通用 fallback 输入价 | `3.0` |
+| `OUTPUT_COST_PER_M` | 通用 fallback 输出价 | `15.0` |
+
+### 模型参数
+
+| 变量 | 说明 | 默认值 |
+|------|------|--------|
+| `LLM_CONTEXT_WINDOW` | 上下文窗口（tokens） | `200000` |
+| `LLM_MAX_TOKENS` | 最大输出（tokens） | `32000` |
+
+### 预算
+
+| 变量 | 说明 | 默认值 |
+|------|------|--------|
+| `BUDGET_USD_TOTAL` | 累计总预算（美元），`0` = 不限制 | `0` |
+| `MODEL_TOKEN_BUDGETS` | 每模型 token 上限的 JSON 映射，`0` 或缺省 = 不限制 | `{}` |
+
+`MODEL_TOKEN_BUDGETS` 示例：
+
+```bash
+MODEL_TOKEN_BUDGETS='{"glm-4.6":20000000,"deepseek-chat":50000000,"claude-opus-4-6":10000000}'
+```
+
+**关键语义**：
+
+- **key 是 bare model name**（与供应商前缀无关）。系统按 model name 的最后一段做匹配，全部小写化。`z-ai/glm-4.6`、`openrouter/glm-4.6`、`zhipu/glm-4.6` 都会被合并到同一个 `glm-4.6` 计数器，跨供应商累加。
+- value 是**累计 token 上限**（input + output 之和）。
+- value `0` 或缺省 = 不限制。
+- 触发后行为：health-check 在 directives 顶部插入 `MODEL TOKEN BUDGET EXHAUSTED: <model> ...`，agent 停止派发使用该模型的 sub-agent。Dashboard 顶部出现红色横幅。
+- **不能用 `LLM_BASE_URL` 或 provider 字段判定模型**——同一个模型可能从多个供应商接入，必须用 model name 匹配。
+
+### Dashboard
+
+| 变量 | 说明 |
+|------|------|
+| `DASHBOARD_URL` | Dashboard URL |
+| `CLAW_API_KEY` | Dashboard API 共享密钥 |
+| `NEXT_PUBLIC_LLM_PROVIDER` | 浏览器端显示用（镜像 `LLM_PROVIDER`） |
+| `NEXT_PUBLIC_LLM_MODEL_COMPLEX` | 浏览器端显示用 |
+| `NEXT_PUBLIC_LLM_MODEL_SIMPLE` | 浏览器端显示用 |
+
+---
+
+## 供应商配置 & 定价
+
+> 价格：2026 年 4 月核实。使用前请在供应商文档确认最新价格。
+
+### 价格对照表
+
+| 供应商 | Complex 模型 | Simple 模型 | Complex 输入/输出 $/M | Simple 输入/输出 $/M |
+|--------|-------------|------------|----------------------|---------------------|
+| Anthropic | claude-opus-4-6 | claude-sonnet-4-6 | $5 / $25 | $3 / $15 |
+| OpenAI | gpt-4o | gpt-4o-mini | $2.5 / $10 | $0.15 / $0.6 |
+| Google | gemini-2.5-pro | gemini-2.5-flash | $1.25 / $10 | $0.30 / $2.50 |
+| Mistral | mistral-large-3 | mistral-small-3.1 | $2 / $6 | $0.20 / $0.60 |
+| DeepSeek | deepseek-reasoner | deepseek-chat | $0.28 / $0.42 | $0.28 / $0.42 |
+| MiniMax | MiniMax-M2.7 | MiniMax-M2.5 | $0.30 / $1.20 | $0.30 / $1.20 |
+| Kimi | kimi-k2.5 | moonshot-v1-32k | $0.60 / $3.00 | $3.29 / $3.29 |
+| GLM | glm-4.7 | glm-4.5-air | $0.60 / $2.20 | $0.20 / $1.10 |
+
+### Anthropic Claude
+
+文档：https://platform.claude.com/docs/en/about-claude/pricing
+
+Opus 4.6/4.5 已降价至 $5/$25（原 $15/$75）。4.6 系列支持 1M context window，标准费率。
+
+| 模型 | 输入 $/M | 输出 $/M | Cache hit $/M | 上下文 |
+|------|---------|---------|--------------|--------|
+| claude-opus-4-6 | $5.0 | $25.0 | $0.50 | 1M |
+| claude-sonnet-4-6 | $3.0 | $15.0 | $0.30 | 1M |
+| claude-opus-4-5 | $5.0 | $25.0 | $0.50 | 1M |
+| claude-sonnet-4-5 | $3.0 | $15.0 | $0.30 | 1M |
+| claude-haiku-4-5 | $1.0 | $5.0 | $0.10 | 200k |
+
+```bash
+LLM_PROVIDER=anthropic
+LLM_BASE_URL=https://api.anthropic.com/v1
+LLM_API_KEY=sk-ant-...
+LLM_MODEL_COMPLEX=claude-opus-4-6
+LLM_MODEL_SIMPLE=claude-sonnet-4-6
+INPUT_COST_PER_M_COMPLEX=5.0
+OUTPUT_COST_PER_M_COMPLEX=25.0
+INPUT_COST_PER_M_SIMPLE=3.0
+OUTPUT_COST_PER_M_SIMPLE=15.0
+LLM_CONTEXT_WINDOW=1000000
+LLM_MAX_TOKENS=32000
+NEXT_PUBLIC_LLM_PROVIDER=anthropic
+NEXT_PUBLIC_LLM_MODEL_COMPLEX=claude-opus-4-6
+NEXT_PUBLIC_LLM_MODEL_SIMPLE=claude-sonnet-4-6
+```
+
+### OpenAI
+
+文档：https://openai.com/api/pricing
+
+| 模型 | 输入 $/M | 输出 $/M | Cache hit $/M | 上下文 |
+|------|---------|---------|--------------|--------|
+| gpt-4o | $2.5 | $10.0 | $1.25 | 128k |
+| gpt-4o-mini | $0.15 | $0.6 | $0.075 | 128k |
+| o3 | $10.0 | $40.0 | -- | 200k |
+| o4-mini | $1.1 | $4.4 | -- | 200k |
+
+```bash
+LLM_PROVIDER=openai
+LLM_BASE_URL=https://api.openai.com/v1
+LLM_API_KEY=sk-...
+LLM_MODEL_COMPLEX=gpt-4o
+LLM_MODEL_SIMPLE=gpt-4o-mini
+INPUT_COST_PER_M_COMPLEX=2.5
+OUTPUT_COST_PER_M_COMPLEX=10.0
+INPUT_COST_PER_M_SIMPLE=0.15
+OUTPUT_COST_PER_M_SIMPLE=0.6
+LLM_CONTEXT_WINDOW=128000
+LLM_MAX_TOKENS=16000
+NEXT_PUBLIC_LLM_PROVIDER=openai
+NEXT_PUBLIC_LLM_MODEL_COMPLEX=gpt-4o
+NEXT_PUBLIC_LLM_MODEL_SIMPLE=gpt-4o-mini
+```
+
+### DeepSeek
+
+文档：https://api-docs.deepseek.com/quick_start/pricing
+
+`deepseek-chat` 和 `deepseek-reasoner` 现均为 DeepSeek-V3.2，价格相同。
+区别：reasoner 是 thinking 模式，最大输出 32K；chat 是非 thinking，最大输出 8K。
+Cache hit 价格比 cache miss 便宜 90%。
+
+| 模型 | 输入 $/M (miss) | 输入 $/M (hit) | 输出 $/M | 上下文 |
+|------|----------------|---------------|---------|--------|
+| deepseek-chat | $0.28 | $0.028 | $0.42 | 128k |
+| deepseek-reasoner | $0.28 | $0.028 | $0.42 | 128k |
+
+```bash
+LLM_PROVIDER=deepseek
+LLM_BASE_URL=https://api.deepseek.com/v1
+LLM_API_KEY=sk-...
+LLM_MODEL_COMPLEX=deepseek-reasoner
+LLM_MODEL_SIMPLE=deepseek-chat
+INPUT_COST_PER_M_COMPLEX=0.28
+OUTPUT_COST_PER_M_COMPLEX=0.42
+INPUT_COST_PER_M_SIMPLE=0.28
+OUTPUT_COST_PER_M_SIMPLE=0.42
+LLM_CONTEXT_WINDOW=128000
+LLM_MAX_TOKENS=32000
+NEXT_PUBLIC_LLM_PROVIDER=deepseek
+NEXT_PUBLIC_LLM_MODEL_COMPLEX=deepseek-reasoner
+NEXT_PUBLIC_LLM_MODEL_SIMPLE=deepseek-chat
+```
+
+### MiniMax
+
+文档：https://platform.minimax.io/docs/guides/pricing-paygo
+
+highspeed 变体延迟更低，价格翻倍。
+
+| 模型 | 输入 $/M | 输出 $/M | 上下文 |
+|------|---------|---------|--------|
+| MiniMax-M2.7 | $0.30 | $1.20 | 204k |
+| MiniMax-M2.7-highspeed | $0.60 | $2.40 | 204k |
+| MiniMax-M2.5 | $0.30 | $1.20 | 204k |
+| MiniMax-M2.5-highspeed | $0.60 | $2.40 | 204k |
+| MiniMax-M2 | $0.30 | $1.20 | 204k |
+
+```bash
+LLM_PROVIDER=minimax
+LLM_BASE_URL=https://api.minimaxi.com/v1
+LLM_API_KEY=...
+LLM_MODEL_COMPLEX=MiniMax-M2.7
+LLM_MODEL_SIMPLE=MiniMax-M2.5
+INPUT_COST_PER_M_COMPLEX=0.30
+OUTPUT_COST_PER_M_COMPLEX=1.20
+INPUT_COST_PER_M_SIMPLE=0.30
+OUTPUT_COST_PER_M_SIMPLE=1.20
+LLM_CONTEXT_WINDOW=204800
+LLM_MAX_TOKENS=131072
+NEXT_PUBLIC_LLM_PROVIDER=minimax
+NEXT_PUBLIC_LLM_MODEL_COMPLEX=MiniMax-M2.7
+NEXT_PUBLIC_LLM_MODEL_SIMPLE=MiniMax-M2.5
+```
+
+### Kimi / Moonshot
+
+文档：https://platform.kimi.ai/docs/pricing/chat
+
+kimi-k2.5 是最新编程模型，cache hit 价格比 cache miss 便宜 83%。
+moonshot-v1 系列是按 token 长度统一计价的旧款通用模型。
+
+| 模型 | 输入 $/M (miss) | 输入 $/M (hit) | 输出 $/M | 上下文 |
+|------|----------------|---------------|---------|--------|
+| kimi-k2.5 | $0.60 | $0.10 | $3.00 | 131k |
+| kimi-k2 | $0.55 | -- | $2.20 | 131k |
+| moonshot-v1-8k | $1.65 | -- | $1.65 | 8k |
+| moonshot-v1-32k | $3.29 | -- | $3.29 | 32k |
+| moonshot-v1-128k | $8.22 | -- | $8.22 | 128k |
+
+```bash
+LLM_PROVIDER=moonshot
+LLM_BASE_URL=https://api.moonshot.cn/v1
+LLM_API_KEY=sk-...
+LLM_MODEL_COMPLEX=kimi-k2.5
+LLM_MODEL_SIMPLE=moonshot-v1-32k
+INPUT_COST_PER_M_COMPLEX=0.60
+OUTPUT_COST_PER_M_COMPLEX=3.00
+INPUT_COST_PER_M_SIMPLE=3.29
+OUTPUT_COST_PER_M_SIMPLE=3.29
+LLM_CONTEXT_WINDOW=131072
+LLM_MAX_TOKENS=32000
+NEXT_PUBLIC_LLM_PROVIDER=moonshot
+NEXT_PUBLIC_LLM_MODEL_COMPLEX=kimi-k2.5
+NEXT_PUBLIC_LLM_MODEL_SIMPLE=moonshot-v1-32k
+```
+
+### GLM / Zhipu AI
+
+国际 API：https://api.z.ai/v1（文档：https://docs.z.ai/guides/overview/pricing）
+国内 API：https://open.bigmodel.cn/api/paas/v4
+
+`glm-4.7-flash` 和 `glm-4.5-flash` 完全免费，可用作 simple 模型把编排成本压到零。
+
+| 模型 | 输入 $/M | 输出 $/M | 备注 |
+|------|---------|---------|------|
+| glm-5.1 | $1.40 | $4.40 | |
+| glm-5 | $1.00 | $3.20 | |
+| glm-5-turbo | $1.20 | $4.00 | |
+| glm-4.7 | $0.60 | $2.20 | |
+| glm-4.7-flashx | $0.07 | $0.40 | 轻量快速 |
+| glm-4.7-flash | 免费 | 免费 | |
+| glm-4.5 | $0.60 | $2.20 | |
+| glm-4.5-x | $2.20 | $8.90 | 32B MoE |
+| glm-4.5-air | $0.20 | $1.10 | 适合 simple 模型 |
+| glm-4.5-airx | $1.10 | $4.50 | |
+| glm-4.5-flash | 免费 | 免费 | |
+| glm-4-32b-0414-128k | $0.10 | $0.10 | |
+
+```bash
+LLM_PROVIDER=z-ai
+LLM_BASE_URL=https://api.z.ai/v1
+LLM_API_KEY=...
+LLM_MODEL_COMPLEX=glm-4.7
+LLM_MODEL_SIMPLE=glm-4.5-air        # 或 glm-4.7-flash（免费）
+INPUT_COST_PER_M_COMPLEX=0.60
+OUTPUT_COST_PER_M_COMPLEX=2.20
+INPUT_COST_PER_M_SIMPLE=0.20        # glm-4.7-flash 填 0
+OUTPUT_COST_PER_M_SIMPLE=1.10       # glm-4.7-flash 填 0
+LLM_CONTEXT_WINDOW=128000
+LLM_MAX_TOKENS=32000
+NEXT_PUBLIC_LLM_PROVIDER=z-ai
+NEXT_PUBLIC_LLM_MODEL_COMPLEX=glm-4.7
+NEXT_PUBLIC_LLM_MODEL_SIMPLE=glm-4.5-air
+```
+
+### Google Gemini
+
+文档：https://ai.google.dev/gemini-api/docs/pricing
+
+2.5 Pro 按 prompt 长度分档：≤200k 标准价，>200k 翻倍。所有模型有免费额度。
+
+| 模型 | 输入 $/M | 输出 $/M | Cache hit $/M | 上下文 |
+|------|---------|---------|--------------|--------|
+| gemini-2.5-pro | $1.25 | $10.0 | $0.125 | 1M |
+| gemini-2.5-flash | $0.30 | $2.50 | $0.03 | 1M |
+| gemini-2.5-flash-lite | $0.10 | $0.40 | $0.01 | 1M |
+| gemini-3-flash (preview) | $0.50 | $3.00 | $0.05 | 1M |
+| gemini-3.1-pro (preview) | $2.00 | $12.00 | -- | 1M |
+
+```bash
+LLM_PROVIDER=google
+LLM_BASE_URL=https://generativelanguage.googleapis.com/v1beta/openai
+LLM_API_KEY=...
+LLM_MODEL_COMPLEX=gemini-2.5-pro
+LLM_MODEL_SIMPLE=gemini-2.5-flash
+INPUT_COST_PER_M_COMPLEX=1.25
+OUTPUT_COST_PER_M_COMPLEX=10.0
+INPUT_COST_PER_M_SIMPLE=0.30
+OUTPUT_COST_PER_M_SIMPLE=2.50
+LLM_CONTEXT_WINDOW=1000000
+LLM_MAX_TOKENS=65536
+NEXT_PUBLIC_LLM_PROVIDER=google
+NEXT_PUBLIC_LLM_MODEL_COMPLEX=gemini-2.5-pro
+NEXT_PUBLIC_LLM_MODEL_SIMPLE=gemini-2.5-flash
+```
+
+### Mistral AI
+
+文档：https://mistral.ai/pricing
+
+| 模型 | 输入 $/M | 输出 $/M | 上下文 |
+|------|---------|---------|--------|
+| mistral-large-3 | $2.0 | $6.0 | 128k |
+| mistral-medium-3 | $1.0 | $3.0 | 128k |
+| mistral-small-3.1 | $0.20 | $0.60 | 128k |
+| mistral-nemo | $0.02 | $0.04 | 128k |
+
+```bash
+LLM_PROVIDER=mistral
+LLM_BASE_URL=https://api.mistral.ai/v1
+LLM_API_KEY=...
+LLM_MODEL_COMPLEX=mistral-large-3
+LLM_MODEL_SIMPLE=mistral-small-3.1
+INPUT_COST_PER_M_COMPLEX=2.0
+OUTPUT_COST_PER_M_COMPLEX=6.0
+INPUT_COST_PER_M_SIMPLE=0.20
+OUTPUT_COST_PER_M_SIMPLE=0.60
+LLM_CONTEXT_WINDOW=128000
+LLM_MAX_TOKENS=32000
+NEXT_PUBLIC_LLM_PROVIDER=mistral
+NEXT_PUBLIC_LLM_MODEL_COMPLEX=mistral-large-3
+NEXT_PUBLIC_LLM_MODEL_SIMPLE=mistral-small-3.1
+```
+
+---
+
+## 计费原理
+
+### Token 数：估算而非真实值
+
+OpenClaw hook 事件**不包含真实 token 数**（hook payload 中无 `usage` / `prompt_tokens` / `completion_tokens` 字段，这是平台限制）。当前系统用工具调用参数的字符长度粗估：
+
+```
+inputTokens  ≈ JSON.stringify(params).length / 4
+outputTokens ≈ JSON.stringify(params).length / 8
+```
+
+**实际误差可达 5-20 倍**，因为：
+- 只统计了工具调用参数的字符长度，完全忽略对话历史和 system prompt 的 token 消耗
+- output 固定按 input 的一半算，没有依据
+- 中文内容会低估（中文字符占更多 token）
+
+Dashboard 上显示的 token 数和费用是量级参考，不能用来跟供应商账单对账。
+
+### 双轨计价
+
+每次工具调用时，根据 `sessionId` 判断模型 tier，累积到对应计数器：
+
+```
+sessionId === "main"   →  simple  →  INPUT_COST_PER_M_SIMPLE / OUTPUT_COST_PER_M_SIMPLE
+sessionId !== "main"   →  complex →  INPUT_COST_PER_M_COMPLEX / OUTPUT_COST_PER_M_COMPLEX
+```
+
+`agent_end` 事件触发时，分两条 metrics 记录上报至 dashboard：
+
+| channel | model | 计价变量 |
+|---------|-------|---------|
+| `orchestrator` | `LLM_PROVIDER/LLM_MODEL_SIMPLE` | `*_SIMPLE` |
+| `subagent` | `LLM_PROVIDER/LLM_MODEL_COMPLEX` | `*_COMPLEX` |
+
+### 计价变量优先级
+
+```
+INPUT_COST_PER_M_COMPLEX    →  未设置时读 INPUT_COST_PER_M   →  未设置时用 3.0
+OUTPUT_COST_PER_M_COMPLEX   →  未设置时读 OUTPUT_COST_PER_M  →  未设置时用 15.0
+```
+
+Simple 模型同理。
+
+### 改进方向
+
+当前估算方案是 OpenClaw 平台限制下的权宜之计。已知可行的改进路径：
+
+1. **Session JSONL 解析**：OpenClaw 在 `~/.openclaw/agents/<id>/sessions/*.jsonl` 中存储对话记录，启用 `includeTranscriptUsage` 后每个 turn 包含真实 token 数。可在 heartbeat 结束后轮询解析。
+2. **供应商 Admin API**：Anthropic 提供 `/v1/organizations/usage_report/messages` 接口（需 admin key），可查真实用量。OpenAI 也有类似接口。DeepSeek/MiniMax/GLM 暂无。
+3. **HTTP 代理拦截**：在 gateway 和供应商之间加代理，从 response header/body 提取真实 usage。架构侵入较大。
+
+---
+
+## 预算熔断
+
+### 工作原理
+
+每次 heartbeat 的 step 0c 调用 `GET /api/agent/health-check`，该接口会：
+
+1. 查询 `metrics_tokens` 表的累计 `sum(cost_usd)`
+2. 与预算上限比较（优先读 dashboard settings，次之读 `BUDGET_USD_TOTAL` env var）
+3. 超限时在 `directives` 头部插入 `BUDGET EXHAUSTED` 指令
+
+Agent 读到该指令后停止新工作（不 spawn 新实现、不提交 PR）。
+
+### 配置
+
+**方式 A：env var（启动时固定）**
+
+```bash
+BUDGET_USD_TOTAL=20.0   # 0 = 不限制
+```
+
+**方式 B：Dashboard Settings（运行时可调）**
+
+Settings 页的 `totalBudgetUsd` 字段，修改后下一个 heartbeat 周期立即生效，无需重启。
+
+优先级：Dashboard settings > `BUDGET_USD_TOTAL` env var > 0（不限制）。
+
+### 恢复
+
+在 Dashboard Settings 页提高 `totalBudgetUsd`，下一次 heartbeat 会重新评估，自动恢复工作。
+
+### Health-check 响应格式
+
+```json
+{
+  "budget": {
+    "totalCostUsd": 18.42,
+    "totalBudgetUsd": 20.0,
+    "remainingUsd": 1.58,
+    "exhausted": false
+  },
+  "directives": []
+}
+```
+
+`exhausted: true` 时 directives 会包含：
+
+```
+BUDGET EXHAUSTED: Spent $20.01 of $20.00 total budget.
+STOP all new work immediately — do NOT spawn new implementations or submit PRs.
+To resume: raise totalBudgetUsd in dashboard Settings or increase BUDGET_USD_TOTAL env var and restart.
+```
+
+### 注意事项
+
+- 熔断基于估算成本，实际账单可能有偏差（见上方计费原理）
+- `totalBudgetUsd = 0` 表示不限制，不会触发熔断
+- `metrics_tokens` 表有 30 天数据保留策略，超期数据会被清理，清理后累计值会重置
+
+---
+
+## 每模型 Token 熔断
+
+与美元总预算并行的另一道闸门：按**模型**配置 token 上限，超限即停止使用该模型。
+
+### 工作原理
+
+健康检查同一接口聚合 `metrics_tokens` 表中每个模型的 `sum(input_tokens + output_tokens)`，按 **bare model name**（model 路径的最后一段，小写）归并跨供应商的用量。任何 model 的累计值 ≥ 配置上限即视为超支：
+
+1. `directives` 顶部追加 `MODEL TOKEN BUDGET EXHAUSTED: <model> used X/Y tokens. STOP using this model across ALL providers ...`
+2. 响应体新增 `modelBudgets` 字段（`exhausted` 数组、`usage` 映射、`caps` 映射）
+3. Dashboard 全局横幅（`ModelBudgetBanner`）轮询 health-check，检测到 `modelBudgets.exhausted` 非空即在所有页面顶部渲染红色提示
+
+### 配置
+
+**方式 A：env var（启动时固定）**
+
+```bash
+MODEL_TOKEN_BUDGETS='{"glm-4.6":20000000,"deepseek-chat":50000000}'
+```
+
+**方式 B：Dashboard Settings（运行时可调）**
+
+通过 `PUT /api/settings` 更新 `modelTokenBudgets` 字段：
+
+```bash
+curl -X PUT http://localhost:3000/api/settings \
+  -H 'Content-Type: application/json' \
+  -d '{"modelTokenBudgets":{"glm-4.6":20000000}}'
+```
+
+API 会自动把 key 归一化为 bare model name（小写、剥前缀）。
+
+优先级：Dashboard settings > `MODEL_TOKEN_BUDGETS` env var > `{}`（不限制）。
+
+### Bare-name 匹配规则
+
+| 写入的 model 字段 | 归一化后 |
+|------------------|---------|
+| `z-ai/glm-4.6` | `glm-4.6` |
+| `openrouter/glm-4.6` | `glm-4.6` |
+| `openrouter/anthropic/claude-opus-4-6` | `claude-opus-4-6` |
+| `GLM-4.6` | `glm-4.6` |
+| `glm-4.6` | `glm-4.6` |
+
+配置 key 同样会经过此归一化，因此用户可以随便写大小写或带不带前缀。
+
+### Health-check 响应格式（新增字段）
+
+```json
+{
+  "modelBudgets": {
+    "exhausted": [
+      { "model": "glm-4.6", "used": 20300000, "cap": 20000000 }
+    ],
+    "usage": { "glm-4.6": 20300000, "claude-opus-4-6": 1200000 },
+    "caps":  { "glm-4.6": 20000000 }
+  }
+}
+```
+
+### 恢复
+
+提高 `modelTokenBudgets["<model>"]` 的值（dashboard settings 或 env var），下一次 health-check 即可撤销 directive，banner 消失，agent 自动恢复使用该模型。
diff --git a/docs/quickstart.md b/docs/quickstart.md
new file mode 100644
index 0000000..b64d23f
--- /dev/null
+++ b/docs/quickstart.md
@@ -0,0 +1,139 @@
+# 快速上手
+
+## 前置要求
+
+- Node.js 22+
+- GitHub Classic Token（`ghp_*` 格式，需要 `repo` scope）
+  - Fine-grained token（`github_pat_*`）无法在别人的仓库创建 PR，不适用
+- 任意 OpenAI 兼容的 LLM API key
+
+## 1. 配置模型
+
+复制 `.env.example` 为 `.env`，填入 API 信息：
+
+```bash
+cp .env.example .env
+```
+
+最少需要填 6 个变量：
+
+```bash
+GITHUB_TOKEN=ghp_your-classic-token      # 必须是 classic token
+LLM_PROVIDER=deepseek                     # 供应商标识
+LLM_BASE_URL=https://api.deepseek.com/v1  # OpenAI 兼容端点
+LLM_API_KEY=sk-your-key                   # 供应商 API Key
+LLM_MODEL_COMPLEX=deepseek-reasoner       # 子 agent 用的模型（需要强推理）
+LLM_MODEL_SIMPLE=deepseek-chat            # 编排用的模型（轻量即可）
+```
+
+完整供应商配置参考 [model-routing.md](model-routing.md)，支持 Anthropic、OpenAI、DeepSeek、Google Gemini、Mistral、MiniMax、Kimi、GLM 等 8 家供应商。
+
+## 2. 切换模型
+
+改 `.env` 中的 6 个变量后重启：
+
+```bash
+bash scripts/restart.sh
+```
+
+`restart.sh` 会读取 `.env` → 替换 `config/openclaw.json` 中的占位符 → 部署到 `~/.openclaw/openclaw.json` → 重启 gateway。
+
+验证是否生效：
+
+```bash
+# 查看 gateway 日志中的模型信息
+openclaw logs 2>&1 | grep "agent model"
+# 应该输出: [gateway] agent model: deepseek/deepseek-chat
+```
+
+## 3. 预算控制
+
+### 在哪里看额度
+
+**Dashboard → Overview 页**：顶部 metric cards 展示 24h cost 和 total cost。
+
+**Dashboard → Health 页**：Cost Tracking Chart 展示花费趋势。
+
+**API**：
+
+```bash
+# 健康检查接口，返回预算信息
+curl http://localhost:3000/api/agent/health-check | jq '.budget'
+# {
+#   "totalCostUsd": 0.42,
+#   "totalBudgetUsd": 20.0,
+#   "remainingUsd": 19.58,
+#   "exhausted": false
+# }
+```
+
+### 设置预算上限
+
+**方式 A：环境变量（启动时固定）**
+
+```bash
+BUDGET_USD_TOTAL=20.0   # 美元，0 = 不限制
+```
+
+**方式 B：Dashboard Settings（运行时可调）**
+
+访问 Dashboard Settings 页，修改 `totalBudgetUsd` 字段。下一个 heartbeat 周期（5 分钟）立即生效，无需重启。
+
+当累计花费超过预算时，agent 自动停止新工作，dashboard 显示 `BUDGET EXHAUSTED` 指令。提高预算后自动恢复。
+
+### 计费精度
+
+当前 token 数是**估算值**（基于工具调用参数的字符长度），不是 LLM API 返回的真实 token 数。实际误差可达 5-20 倍。Dashboard 上的费用是量级参考，不能直接和供应商账单对账。详见 [model-routing.md § 计费原理](model-routing.md#计费原理)。
+
+## 4. 双轨定价
+
+系统区分两种模型，各自独立计价：
+
+| 角色 | 模型变量 | 定价变量 | 用途 |
+|------|---------|---------|------|
+| Orchestrator | `LLM_MODEL_SIMPLE` | `INPUT_COST_PER_M_SIMPLE` / `OUTPUT_COST_PER_M_SIMPLE` | heartbeat 循环、文件读写、状态路由 |
+| Sub-agents | `LLM_MODEL_COMPLEX` | `INPUT_COST_PER_M_COMPLEX` / `OUTPUT_COST_PER_M_COMPLEX` | 代码实现、bug 修复、PR review |
+
+如果不设 per-model 定价，回退到 `INPUT_COST_PER_M` / `OUTPUT_COST_PER_M`（默认 $3/$15）。
+
+dashboard 的 token metrics 表按 `model` 列区分两种模型的用量，Health 页的 Cost Tracking Chart 展示合计趋势。
+
+## 5. Dashboard
+
+### 启动
+
+```bash
+cd dashboard && npm run dev     # 开发模式，http://localhost:3000
+cd dashboard && npm run build && npm run start   # 生产模式
+```
+
+### 页面说明
+
+| 页面 | 看什么 |
+|------|--------|
+| **Overview** | agent 状态、子 agent 槽位、token/cost metrics、merge rate、PR 漏斗 |
+| **Live Feed** | agent 实时思考过程、工具调用、错误 |
+| **Pull Requests** | 所有提交的 PR，按状态/仓库/质量分筛选 |
+| **Repo Health** | 目标仓库的健康评分、merge 速率、推荐策略 |
+| **Health** | heartbeat、token 用量趋势、花费曲线 |
+| **Quality** | PR 质量分析、首次通过率、拒绝原因 |
+| **Logs** | 系统审计日志 |
+
+### 数据来源
+
+Dashboard 数据有三个来源：
+
+1. **GitHub Sync**（`/api/github/sync`）— 从 GitHub API 拉取 PR 数据，基于 `GITHUB_USERNAME` 搜索
+2. **Ingest API**（`/api/ingest/*`）— agent 的 dashboard-reporter hook 实时推送 heartbeat、metrics、conversation
+3. **本地 DB**（`dashboard/local.db`）— SQLite 存储所有数据
+
+如果 dashboard 显示 "Disconnected"，通常是 hook 推送不通（检查 `DASHBOARD_URL` 和 `CLAW_API_KEY` 环境变量）。
+
+## 6. GitHub Token 说明
+
+| Token 类型 | 格式 | 能否创建跨仓库 PR |
+|-----------|------|-----------------|
+| Classic token | `ghp_*` | 有 `repo` scope 即可 |
+| Fine-grained token | `github_pat_*` | 不能（只对指定仓库有写权限） |
+
+ClawOSS 需要在别人的仓库 fork → push → 创建 PR，必须使用 **Classic token + `repo` scope**。
diff --git a/scripts/dashboard-sync.sh b/scripts/dashboard-sync.sh
index cc9b2a2..51f2c6a 100755
--- a/scripts/dashboard-sync.sh
+++ b/scripts/dashboard-sync.sh
@@ -196,7 +196,7 @@ for line in sys.stdin:
     metrics.append({
         'inputTokens': inp,
         'outputTokens': out,
-        'model': model or 'minimax/MiniMax-M2.7',
+        'model': model or os.environ.get('CLAWOSS_PRIMARY_MODEL') or os.environ.get('CLAWOSS_DEFAULT_MODEL') or (os.environ.get('LLM_PROVIDER','anthropic') + '/' + os.environ.get('LLM_MODEL_COMPLEX','claude-opus-4-6')),
         'channel': sid
     })
 if metrics:
diff --git a/scripts/restart.sh b/scripts/restart.sh
index a2836b0..d15fca0 100755
--- a/scripts/restart.sh
+++ b/scripts/restart.sh
@@ -65,6 +65,17 @@ if [ ${#MISSING[@]} -gt 0 ]; then
     exit 1
 fi
 echo "[OK] All required tools found (python3, gh, jq, openclaw, node)"
+if clawoss_is_macos; then
+    echo "[INFO] Platform: macOS — will use launchd plists for gateway and pr-ledger-sync"
+else
+    if command -v systemctl >/dev/null 2>&1; then
+        echo "[INFO] Platform: $(uname -s) — will use systemd user units; launchd steps will be skipped"
+    else
+        echo "[WARN] Platform: $(uname -s) — no launchd AND no systemctl detected."
+        echo "       Gateway will fall back to an unmanaged 'openclaw gateway run' background process."
+        echo "       It WILL NOT survive a reboot. Consider running in Docker (see deploy/docker/)."
+    fi
+fi
 if [ "$SMOKE_MODE" -eq 1 ]; then
     echo "[INFO] Restart smoke mode enabled — skipping global cleanup and external side effects where possible"
 fi
@@ -138,6 +149,18 @@ REPO_CONFIG_RESOLVED=$(sed \
     -e "s|__WORKSPACE_PATH__|$WORKSPACE_DIR|g" \
     -e "s|__PROJECT_DIR__|$PROJECT_DIR|g" \
     -e "s|__HOME_DIR__|$HOME|g" \
+    -e "s|__LLM_PROVIDER__|${LLM_PROVIDER:-anthropic}|g" \
+    -e "s|__LLM_BASE_URL__|${LLM_BASE_URL:-https://api.anthropic.com/v1}|g" \
+    -e "s|__LLM_MODEL_COMPLEX__|${LLM_MODEL_COMPLEX:-claude-opus-4-6}|g" \
+    -e "s|__LLM_MODEL_SIMPLE__|${LLM_MODEL_SIMPLE:-claude-sonnet-4-6}|g" \
+    -e "s|__INPUT_COST_PER_M_COMPLEX__|${INPUT_COST_PER_M_COMPLEX:-${INPUT_COST_PER_M:-3.0}}|g" \
+    -e "s|__OUTPUT_COST_PER_M_COMPLEX__|${OUTPUT_COST_PER_M_COMPLEX:-${OUTPUT_COST_PER_M:-15.0}}|g" \
+    -e "s|__INPUT_COST_PER_M_SIMPLE__|${INPUT_COST_PER_M_SIMPLE:-${INPUT_COST_PER_M:-3.0}}|g" \
+    -e "s|__OUTPUT_COST_PER_M_SIMPLE__|${OUTPUT_COST_PER_M_SIMPLE:-${OUTPUT_COST_PER_M:-15.0}}|g" \
+    -e "s|__INPUT_COST_PER_M__|${INPUT_COST_PER_M:-3.0}|g" \
+    -e "s|__OUTPUT_COST_PER_M__|${OUTPUT_COST_PER_M:-15.0}|g" \
+    -e "s|__LLM_CONTEXT_WINDOW__|${LLM_CONTEXT_WINDOW:-200000}|g" \
+    -e "s|__LLM_MAX_TOKENS__|${LLM_MAX_TOKENS:-32000}|g" \
     "$PROJECT_DIR/config/openclaw.json")
 
 _REPO_CONFIG="$REPO_CONFIG_RESOLVED" \
@@ -157,6 +180,19 @@ _CLAW_KEY="${CLAW_API_KEY:-}" \
 _CLAWOSS_ROOT="${PROJECT_DIR}" \
 _RECORD_DECISIONS="${CLAWOSS_RECORD_DECISIONS:-1}" \
 _RECORD_OUTCOMES="${CLAWOSS_RECORD_OUTCOMES:-1}" \
+_LLM_KEY="${LLM_API_KEY:-}" \
+_LLM_BASE_URL="${LLM_BASE_URL:-}" \
+_LLM_PROVIDER="${LLM_PROVIDER:-}" \
+_LLM_MODEL_COMPLEX="${LLM_MODEL_COMPLEX:-}" \
+_LLM_MODEL_SIMPLE="${LLM_MODEL_SIMPLE:-}" \
+_INPUT_COST_PER_M="${INPUT_COST_PER_M:-}" \
+_OUTPUT_COST_PER_M="${OUTPUT_COST_PER_M:-}" \
+_INPUT_COST_PER_M_COMPLEX="${INPUT_COST_PER_M_COMPLEX:-}" \
+_OUTPUT_COST_PER_M_COMPLEX="${OUTPUT_COST_PER_M_COMPLEX:-}" \
+_INPUT_COST_PER_M_SIMPLE="${INPUT_COST_PER_M_SIMPLE:-}" \
+_OUTPUT_COST_PER_M_SIMPLE="${OUTPUT_COST_PER_M_SIMPLE:-}" \
+_BUDGET_USD_TOTAL="${BUDGET_USD_TOTAL:-}" \
+_MODEL_TOKEN_BUDGETS="${MODEL_TOKEN_BUDGETS:-}" \
 python3 -c "
 import json, os
 
@@ -198,6 +234,20 @@ env_map = {
     'CLAWOSS_ROOT': os.environ.get('_CLAWOSS_ROOT', ''),
     'CLAWOSS_RECORD_DECISIONS': os.environ.get('_RECORD_DECISIONS', ''),
     'CLAWOSS_RECORD_OUTCOMES': os.environ.get('_RECORD_OUTCOMES', ''),
+    # Generic LLM config — used by model routing system
+    'LLM_API_KEY': os.environ.get('_LLM_KEY', ''),
+    'LLM_BASE_URL': os.environ.get('_LLM_BASE_URL', ''),
+    'LLM_PROVIDER': os.environ.get('_LLM_PROVIDER', ''),
+    'LLM_MODEL_COMPLEX': os.environ.get('_LLM_MODEL_COMPLEX', ''),
+    'LLM_MODEL_SIMPLE': os.environ.get('_LLM_MODEL_SIMPLE', ''),
+    'INPUT_COST_PER_M': os.environ.get('_INPUT_COST_PER_M', ''),
+    'OUTPUT_COST_PER_M': os.environ.get('_OUTPUT_COST_PER_M', ''),
+    'INPUT_COST_PER_M_COMPLEX': os.environ.get('_INPUT_COST_PER_M_COMPLEX', ''),
+    'OUTPUT_COST_PER_M_COMPLEX': os.environ.get('_OUTPUT_COST_PER_M_COMPLEX', ''),
+    'INPUT_COST_PER_M_SIMPLE': os.environ.get('_INPUT_COST_PER_M_SIMPLE', ''),
+    'OUTPUT_COST_PER_M_SIMPLE': os.environ.get('_OUTPUT_COST_PER_M_SIMPLE', ''),
+    'BUDGET_USD_TOTAL': os.environ.get('_BUDGET_USD_TOTAL', ''),
+    'MODEL_TOKEN_BUDGETS': os.environ.get('_MODEL_TOKEN_BUDGETS', ''),
 }
 for k, v in env_map.items():
     if v:
@@ -290,8 +340,15 @@ if clawoss_is_macos && [ -f "$GATEWAY_PLIST" ]; then
     else
         echo "[OK] Gateway plist PATH already includes required dirs"
     fi
+elif clawoss_is_macos; then
+    echo "[INFO] macOS detected but no gateway plist at $GATEWAY_PLIST — 'openclaw gateway install' will create it"
 else
-    echo "[INFO] No gateway plist found at $GATEWAY_PLIST — gateway install will create it"
+    # Linux / non-macOS: launchd/PlistBuddy do not exist. OpenClaw manages the
+    # gateway via systemd user units (see step 13). PATH propagation on Linux
+    # is handled by the Environment= directives emitted into the systemd unit
+    # file, so there's nothing to do here — log it so operators can see this
+    # step was intentionally skipped rather than silently broken.
+    echo "[SKIP] Gateway plist path update — not applicable on $(uname -s) (Linux uses systemd, handled in step 13)"
 fi
 
 # ── 7. Flush context & clean sessions ─────────────────────────────────
@@ -572,7 +629,7 @@ runtime_status "running" "running" "${DASHBOARD_SYNC_STATE:-unknown}" "${RUN_CYC
 # ── Summary ───────────────────────────────────────────────────────────
 echo ""
 echo "=== ClawOSS V10 Running ==="
-echo "  Model: minimax/m2.7 (MiniMax M2.7, 204k context) + kimi-coding/k2p5 fallback"
+echo "  Model: ${LLM_PROVIDER:-anthropic}/${LLM_MODEL_COMPLEX:-claude-opus-4-6} (complex) + ${LLM_PROVIDER:-anthropic}/${LLM_MODEL_SIMPLE:-claude-sonnet-4-6} (simple/orchestrator)"
 echo "  Dashboard: https://clawoss-dashboard.vercel.app"
 echo "  Slots: 3 always-on (scout + PR monitor + PR analyst) + 10 impl/followup = 13"
 echo "  Heartbeat: 5m"
diff --git a/scripts/start.sh b/scripts/start.sh
index b8b9669..3eafe2b 100755
--- a/scripts/start.sh
+++ b/scripts/start.sh
@@ -10,7 +10,7 @@ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
 PROJECT_DIR="$(clawoss_resolve_project_dir "$0")"
 AGENT_ID="clawoss"
 WORKSPACE_DIR="$(clawoss_resolve_workspace_dir "$0")"
-AGENT_MODEL="${CLAWOSS_MODEL:-${CLAWOSS_AGENT_MODEL:-${CLAWOSS_PRIMARY_MODEL:-${CLAWOSS_DEFAULT_MODEL:-minimax/MiniMax-M2.7}}}}"
+AGENT_MODEL="${CLAWOSS_MODEL:-${CLAWOSS_AGENT_MODEL:-${CLAWOSS_PRIMARY_MODEL:-${CLAWOSS_DEFAULT_MODEL:-${LLM_PROVIDER:-anthropic}/${LLM_MODEL_SIMPLE:-claude-sonnet-4-6}}}}}"
 
 if [ -f "$PROJECT_DIR/.env" ]; then
     set -a
diff --git a/scripts/validate-config.mjs b/scripts/validate-config.mjs
index 4d09293..f49da29 100644
--- a/scripts/validate-config.mjs
+++ b/scripts/validate-config.mjs
@@ -41,8 +41,28 @@ console.log("\n=== Config Files ===");
 
 try {
   const raw = readFileSync(join(ROOT, "config/openclaw.json"), "utf8");
-  JSON.parse(raw);
-  pass("config/openclaw.json is valid JSON");
+  // openclaw.json contains __PLACEHOLDER__ tokens that scripts/restart.sh +
+  // deploy/docker/entrypoint.sh substitute at deploy time. Validate the
+  // post-substitution shape here so CI catches malformed templates without
+  // requiring operators to run the full deploy flow.
+  const substituted = raw
+    .replace(/__WORKSPACE_PATH__/g, "/app/workspace")
+    .replace(/__PROJECT_DIR__/g, "/app")
+    .replace(/__HOME_DIR__/g, "/home/clawoss")
+    .replace(/__LLM_PROVIDER__/g, "anthropic")
+    .replace(/__LLM_BASE_URL__/g, "https://api.anthropic.com/v1")
+    .replace(/__LLM_MODEL_COMPLEX__/g, "claude-opus-4-6")
+    .replace(/__LLM_MODEL_SIMPLE__/g, "claude-sonnet-4-6")
+    .replace(/__INPUT_COST_PER_M_COMPLEX__/g, "5.0")
+    .replace(/__OUTPUT_COST_PER_M_COMPLEX__/g, "25.0")
+    .replace(/__INPUT_COST_PER_M_SIMPLE__/g, "3.0")
+    .replace(/__OUTPUT_COST_PER_M_SIMPLE__/g, "15.0")
+    .replace(/__INPUT_COST_PER_M__/g, "3.0")
+    .replace(/__OUTPUT_COST_PER_M__/g, "15.0")
+    .replace(/__LLM_CONTEXT_WINDOW__/g, "200000")
+    .replace(/__LLM_MAX_TOKENS__/g, "32000");
+  JSON.parse(substituted);
+  pass("config/openclaw.json is valid JSON (post-template-substitution)");
 } catch (e) {
   fail(`config/openclaw.json: ${e.message}`);
 }
diff --git a/workspace/hooks/dashboard-reporter/handler.ts b/workspace/hooks/dashboard-reporter/handler.ts
index e5f4ce8..d35438d 100644
--- a/workspace/hooks/dashboard-reporter/handler.ts
+++ b/workspace/hooks/dashboard-reporter/handler.ts
@@ -1,15 +1,44 @@
 const DASHBOARD_URL = process.env.DASHBOARD_URL || "https://clawoss-dashboard.vercel.app";
 const AGENT_ID = "clawoss";
-const GITHUB_USERNAME = process.env.GITHUB_USERNAME || "unknown";
-const DEFAULT_MODEL =
-  process.env.CLAWOSS_PRIMARY_MODEL ||
-  process.env.CLAWOSS_DEFAULT_MODEL ||
-  "minimax/MiniMax-M2.7";
-const INPUT_COST_PER_TOKEN = 0.3 / 1_000_000;
-const OUTPUT_COST_PER_TOKEN = 1.2 / 1_000_000;
-
-let accumulatedInputTokens = 0;
-let accumulatedOutputTokens = 0;
+const GITHUB_USERNAME = process.env.GITHUB_USERNAME || "BillionClaw";
+
+// Model routing — driven by env vars. See docs/model-routing.md.
+const LLM_PROVIDER = process.env.LLM_PROVIDER || "anthropic";
+const LLM_MODEL_COMPLEX = process.env.LLM_MODEL_COMPLEX || "claude-opus-4-6";
+const LLM_MODEL_SIMPLE  = process.env.LLM_MODEL_SIMPLE  || "claude-sonnet-4-6";
+
+const MODEL_COMPLEX = `${LLM_PROVIDER}/${LLM_MODEL_COMPLEX}`;
+const MODEL_SIMPLE  = `${LLM_PROVIDER}/${LLM_MODEL_SIMPLE}`;
+
+// Per-model pricing (USD per million tokens).
+// Complex and simple models can have different prices.
+// Falls back to INPUT_COST_PER_M / OUTPUT_COST_PER_M if per-model vars not set.
+const FALLBACK_IN  = process.env.INPUT_COST_PER_M  || "3.0";
+const FALLBACK_OUT = process.env.OUTPUT_COST_PER_M || "15.0";
+
+const PRICING = {
+  complex: {
+    input:  parseFloat(process.env.INPUT_COST_PER_M_COMPLEX  || FALLBACK_IN)  / 1_000_000,
+    output: parseFloat(process.env.OUTPUT_COST_PER_M_COMPLEX || FALLBACK_OUT) / 1_000_000,
+  },
+  simple: {
+    input:  parseFloat(process.env.INPUT_COST_PER_M_SIMPLE  || FALLBACK_IN)  / 1_000_000,
+    output: parseFloat(process.env.OUTPUT_COST_PER_M_SIMPLE || FALLBACK_OUT) / 1_000_000,
+  },
+};
+
+// Determine model tier from session key:
+// main session → orchestrator → simple model
+// anything else → sub-agent → complex model
+function modelTierForSession(sessionId: string): "complex" | "simple" {
+  return sessionId === "main" ? "simple" : "complex";
+}
+
+// Per-tier token accumulators — flushed to dashboard on agent_end
+const accumulated = {
+  complex: { inputTokens: 0, outputTokens: 0 },
+  simple:  { inputTokens: 0, outputTokens: 0 },
+};
 let accumulatedDurationMs = 0;
 let toolCallCount = 0;
 let startTime = Date.now();
@@ -171,7 +200,7 @@ async function postState(apiKey: string): Promise<void> {
         metadata: {
           agent_id: AGENT_ID,
           tool_calls: toolCallCount,
-          model: DEFAULT_MODEL,
+          model: MODEL_SIMPLE,  // postState reflects orchestrator (main session)
         },
       }),
       signal: controller.signal,
@@ -249,8 +278,9 @@ const handler = async (event: {
       const params = event.params || {};
       if (typeof params === "object") {
         const paramStr = JSON.stringify(params);
-        accumulatedInputTokens += Math.ceil(paramStr.length / 4);
-        accumulatedOutputTokens += Math.ceil(paramStr.length / 8);
+        const tier = modelTierForSession(sessionId);
+        accumulated[tier].inputTokens  += Math.ceil(paramStr.length / 4);
+        accumulated[tier].outputTokens += Math.ceil(paramStr.length / 8);
       }
 
       // Track repos from tool params
@@ -488,7 +518,7 @@ const handler = async (event: {
           role: "assistant",
           content: event.assistantMessage.slice(0, 5000),
           timestamp: ts,
-          tokenCount: accumulatedOutputTokens || null,
+          tokenCount: accumulated[modelTierForSession(sessionId)].outputTokens || null,
           metadata: {
             agent_id: AGENT_ID,
             event: "agent_end",
@@ -518,7 +548,7 @@ const handler = async (event: {
         role: "system",
         content: event.error
           ? `Run ended with error: ${event.error} (${toolCallCount} tool calls, ${uptimeSeconds}s)`
-          : `Run completed: ${toolCallCount} tool calls, ${uptimeSeconds}s, ~${accumulatedInputTokens + accumulatedOutputTokens} tokens`,
+          : `Run completed: ${toolCallCount} tool calls, ${uptimeSeconds}s, ~${accumulated.complex.inputTokens + accumulated.complex.outputTokens + accumulated.simple.inputTokens + accumulated.simple.outputTokens} tokens`,
         timestamp: ts,
         metadata: {
           agent_id: AGENT_ID,
@@ -550,7 +580,7 @@ const handler = async (event: {
           metadata: {
             session_key: sessionId,
             tool_calls: toolCallCount,
-            model: DEFAULT_MODEL,
+            model: modelTierForSession(sessionId) === "simple" ? MODEL_SIMPLE : MODEL_COMPLEX,
             repos: Array.from(reposUsed),
             skill: lastSkillName,
           },
@@ -558,38 +588,38 @@ const handler = async (event: {
         apiKey
       );
 
-      // Send accumulated metrics if any
-      if (accumulatedInputTokens > 0 || accumulatedOutputTokens > 0) {
-        const costUsd =
-          accumulatedInputTokens * INPUT_COST_PER_TOKEN +
-          accumulatedOutputTokens * OUTPUT_COST_PER_TOKEN;
+      // Send accumulated metrics — one entry per model tier that has usage
+      const metricsEntries: Record<string, unknown>[] = [];
+      const tiers = (["complex", "simple"] as const).filter(
+        (t) => accumulated[t].inputTokens > 0 || accumulated[t].outputTokens > 0
+      );
 
-        await postNonBlocking(
-          "/api/ingest/metrics",
-          {
-            metrics: [
-              {
-                channel: "agent",
-                provider: "minimax",
-                model: DEFAULT_MODEL,
-                inputTokens: accumulatedInputTokens,
-                outputTokens: accumulatedOutputTokens,
-                costUsd: Math.round(costUsd * 1_000_000) / 1_000_000,
-                runDurationMs: accumulatedDurationMs,
-                contextTokens: accumulatedInputTokens,
-              },
-            ],
-          },
-          apiKey
-        );
+      for (const tier of tiers) {
+        const { inputTokens, outputTokens } = accumulated[tier];
+        const pricing = PRICING[tier];
+        const costUsd = inputTokens * pricing.input + outputTokens * pricing.output;
+        metricsEntries.push({
+          channel: tier === "complex" ? "subagent" : "orchestrator",
+          provider: `${LLM_PROVIDER}-direct`,
+          model: tier === "complex" ? MODEL_COMPLEX : MODEL_SIMPLE,
+          inputTokens,
+          outputTokens,
+          costUsd: Math.round(costUsd * 1_000_000) / 1_000_000,
+          runDurationMs: tier === "complex" ? accumulatedDurationMs : 0,
+          contextTokens: inputTokens,
+        });
+        accumulated[tier].inputTokens = 0;
+        accumulated[tier].outputTokens = 0;
+      }
 
-        // Reset accumulators
-        accumulatedInputTokens = 0;
-        accumulatedOutputTokens = 0;
-        accumulatedDurationMs = 0;
-        toolCallCount = 0;
+      if (metricsEntries.length > 0) {
+        await postNonBlocking("/api/ingest/metrics", { metrics: metricsEntries }, apiKey);
       }
 
+      // Reset shared accumulators
+      accumulatedDurationMs = 0;
+      toolCallCount = 0;
+
       // Log agent completion with enriched metadata
       await postNonBlocking(
         "/api/ingest/logs",
diff --git a/workspace/hooks/dashboard-reporter/post-tool.sh b/workspace/hooks/dashboard-reporter/post-tool.sh
index bdcee2c..4199065 100755
--- a/workspace/hooks/dashboard-reporter/post-tool.sh
+++ b/workspace/hooks/dashboard-reporter/post-tool.sh
@@ -6,7 +6,7 @@
 DASHBOARD_URL="${DASHBOARD_URL:-https://clawoss-dashboard.vercel.app}"
 API_KEY="${CLAW_API_KEY:?Set CLAW_API_KEY env var}"
 SESSION_ID="${CLAUDE_SESSION_ID:-agent-session}"
-DEFAULT_MODEL="${CLAWOSS_PRIMARY_MODEL:-${CLAWOSS_DEFAULT_MODEL:-minimax/MiniMax-M2.7}}"
+DEFAULT_MODEL="${CLAWOSS_PRIMARY_MODEL:-${CLAWOSS_DEFAULT_MODEL:-${LLM_PROVIDER:-anthropic}/${LLM_MODEL_COMPLEX:-claude-opus-4-6}}}"
 
 # Read the hook input from stdin
 INPUT=$(cat 2>/dev/null || echo '{}')
@@ -33,7 +33,7 @@ PAYLOAD=$(jq -n \
   --argjson durationMs "$DURATION" \
   --arg ts "$TIMESTAMP" \
   --arg resultContent "$TOOL_OUTPUT_RAW" \
-  --arg defaultModel "$DEFAULT_MODEL" \
+  --arg model "$DEFAULT_MODEL" \
   '{
     messages: [
       {
@@ -44,7 +44,7 @@ PAYLOAD=$(jq -n \
         toolCallId: $toolCallId,
         durationMs: $durationMs,
         timestamp: $ts,
-        metadata: { agent_id: "clawoss", model: $defaultModel }
+        metadata: { agent_id: "clawoss", model: $model }
       },
       {
         sessionId: $sid,