diff --git a/.env.example b/.env.example index b449c5a..6f84c32 100644 --- a/.env.example +++ b/.env.example @@ -4,32 +4,59 @@ # === Required === # GitHub Personal Access Token — needs public_repo scope at minimum +# IMPORTANT: must be a classic token (ghp_*). Fine-grained tokens (github_pat_*) +# cannot create PRs in other people's repos. See docs/quickstart.md. GITHUB_TOKEN=ghp_your-token-here -# Choose models in provider/model format. -# Examples: -# - openai/gpt-4.1 -# - openai/gpt-4.1-mini -# - openrouter/openai/gpt-4.1-mini -# - deepseek/deepseek-chat -# - minimax/MiniMax-M2.7 -# - kimi/moonshot-v1-32k -CLAWOSS_PRIMARY_MODEL=openai/gpt-4.1 -CLAWOSS_FALLBACK_MODEL= -CLAWOSS_SUBAGENT_MODEL=openai/gpt-4.1-mini -CLAWOSS_HEARTBEAT_MODEL=openai/gpt-4.1 -CLAWOSS_AGENT_MODEL=openai/gpt-4.1 - -# Set the API key(s) for whichever provider(s) your selected models use. -OPENAI_API_KEY=sk-openai-your-key-here -OPENROUTER_API_KEY= -DEEPSEEK_API_KEY= -MINIMAX_API_KEY= -KIMI_API_KEY= - -# Optional custom OpenAI-compatible endpoint. -CUSTOM_OPENAI_API_KEY= -CUSTOM_OPENAI_BASE_URL= +# === LLM Model Configuration === +# See docs/model-routing.md for full provider examples (OpenAI, DeepSeek, MiniMax, etc.) +# See the "Provider Quick Reference" section at the bottom of this file. + +# Provider name — becomes the OpenClaw provider block key and model ID prefix +LLM_PROVIDER=anthropic + +# OpenAI-compatible API endpoint for this provider +LLM_BASE_URL=https://api.anthropic.com/v1 + +# API key for the provider above +LLM_API_KEY=sk-ant-your-key-here + +# Complex model (Opus-tier) — used by implementation sub-agents (code writing, debugging) +LLM_MODEL_COMPLEX=claude-opus-4-6 + +# Simple model (Sonnet-tier) — used by orchestrator/heartbeat (routing, file reads) +LLM_MODEL_SIMPLE=claude-sonnet-4-6 + +# Cost per million tokens in USD (used for dashboard spend display). +# Per-model pricing — complex (Opus-tier) and simple (Sonnet-tier) can differ. +# If per-model vars are not set, INPUT_COST_PER_M / OUTPUT_COST_PER_M are used as fallback. +INPUT_COST_PER_M_COMPLEX=5.0 # Claude Opus 4.6 input ($5/M) +OUTPUT_COST_PER_M_COMPLEX=25.0 # Claude Opus 4.6 output ($25/M) +INPUT_COST_PER_M_SIMPLE=3.0 # Claude Sonnet 4.6 input ($3/M) +OUTPUT_COST_PER_M_SIMPLE=15.0 # Claude Sonnet 4.6 output ($15/M) +# Fallback if per-model vars are absent (set to your average expected price) +INPUT_COST_PER_M=3.0 +OUTPUT_COST_PER_M=15.0 + +# Context window and max output tokens (model-specific) +LLM_CONTEXT_WINDOW=1000000 +LLM_MAX_TOKENS=32000 + +# Public vars exposed to dashboard browser bundle (mirrors LLM_* above) +NEXT_PUBLIC_LLM_PROVIDER=anthropic +NEXT_PUBLIC_LLM_MODEL_COMPLEX=claude-opus-4-6 +NEXT_PUBLIC_LLM_MODEL_SIMPLE=claude-sonnet-4-6 + +# === Budget Control === +# Total cumulative spend cap in USD. Agent pauses when reached. 0 = unlimited. +BUDGET_USD_TOTAL=20.0 + +# Per-model token caps (JSON map). Keys are BARE model names — matched across +# all providers (e.g. `glm-4.6` covers `z-ai/glm-4.6`, `openrouter/glm-4.6`, etc). +# Value is total tokens (input + output). Missing key or value 0 = unlimited. +# When a model exceeds its cap, the agent stops using it AND a red banner shows +# at the top of the dashboard. See docs/model-routing.md. +# MODEL_TOKEN_BUDGETS={"glm-4.6":20000000,"deepseek-chat":50000000,"claude-opus-4-6":10000000} # === Optional === @@ -42,8 +69,146 @@ DASHBOARD_URL=https://clawoss-dashboard.vercel.app CLAW_API_KEY=your-shared-secret-here # Override only if your clone lives outside the default detected path -CLAWOSS_ROOT=/absolute/path/to/ClawOSS +# CLAWOSS_ROOT=/absolute/path/to/ClawOSS -# Structured event logging for decision/outcome/reflection pipelines +# Structured event logging for decision/outcome/reflection pipelines (alpha) CLAWOSS_RECORD_DECISIONS=1 CLAWOSS_RECORD_OUTCOMES=1 + +# === Legacy / Alpha compatibility (optional overrides) === +# If set, these take priority over LLM_* above. Useful during the alpha +# transition when you want to pin a specific full model ID. +# CLAWOSS_PRIMARY_MODEL=anthropic/claude-opus-4-6 +# CLAWOSS_SUBAGENT_MODEL=anthropic/claude-opus-4-6 +# CLAWOSS_HEARTBEAT_MODEL=anthropic/claude-sonnet-4-6 +# CLAWOSS_AGENT_MODEL=anthropic/claude-sonnet-4-6 +# CLAWOSS_FALLBACK_MODEL= + +# Legacy per-provider API keys (no longer required — use LLM_API_KEY above). +# Kept for historical alpha deployments. +# KIMI_API_KEY=sk-kimi-your-key-here +# MINIMAX_API_KEY=your-minimax-key-here + +# ============================================================================= +# Provider Quick Reference — copy the block you want and replace the defaults +# Prices: April 2026. Verify at provider docs before setting budget. +# ============================================================================= + +# ── Google Gemini ───────────────────────────────────────────────────────────── +# Docs: https://ai.google.dev/gemini-api/docs/pricing +# Uses OpenAI-compatible endpoint. 2.5 Pro >200k context doubles price. +# All models have free tier with limited requests. +# +# LLM_PROVIDER=google +# LLM_BASE_URL=https://generativelanguage.googleapis.com/v1beta/openai +# LLM_API_KEY=your-gemini-key +# LLM_MODEL_COMPLEX=gemini-2.5-pro +# LLM_MODEL_SIMPLE=gemini-2.5-flash +# INPUT_COST_PER_M_COMPLEX=1.25 +# OUTPUT_COST_PER_M_COMPLEX=10.0 +# INPUT_COST_PER_M_SIMPLE=0.30 +# OUTPUT_COST_PER_M_SIMPLE=2.50 +# LLM_CONTEXT_WINDOW=1000000 +# LLM_MAX_TOKENS=65536 +# NEXT_PUBLIC_LLM_PROVIDER=google +# NEXT_PUBLIC_LLM_MODEL_COMPLEX=gemini-2.5-pro +# NEXT_PUBLIC_LLM_MODEL_SIMPLE=gemini-2.5-flash + +# ── Mistral AI ─────────────────────────────────────────────────────────────── +# Docs: https://mistral.ai/pricing +# Large 3 for complex, Small 3.1 for orchestration. Nemo ($0.02/M) is cheapest. +# +# LLM_PROVIDER=mistral +# LLM_BASE_URL=https://api.mistral.ai/v1 +# LLM_API_KEY=your-mistral-key +# LLM_MODEL_COMPLEX=mistral-large-3 +# LLM_MODEL_SIMPLE=mistral-small-3.1 +# INPUT_COST_PER_M_COMPLEX=2.0 +# OUTPUT_COST_PER_M_COMPLEX=6.0 +# INPUT_COST_PER_M_SIMPLE=0.20 +# OUTPUT_COST_PER_M_SIMPLE=0.60 +# LLM_CONTEXT_WINDOW=128000 +# LLM_MAX_TOKENS=32000 +# NEXT_PUBLIC_LLM_PROVIDER=mistral +# NEXT_PUBLIC_LLM_MODEL_COMPLEX=mistral-large-3 +# NEXT_PUBLIC_LLM_MODEL_SIMPLE=mistral-small-3.1 + +# ── DeepSeek ────────────────────────────────────────────────────────────────── +# Docs: https://api-docs.deepseek.com/quick_start/pricing +# deepseek-chat = V3.2 non-thinking | deepseek-reasoner = V3.2 thinking mode +# Same price, reasoner supports 32K output vs 8K. +# Cache hit: $0.028/M input (90% off) +# +# LLM_PROVIDER=deepseek +# LLM_BASE_URL=https://api.deepseek.com/v1 +# LLM_API_KEY=sk-your-deepseek-key +# LLM_MODEL_COMPLEX=deepseek-reasoner # thinking mode for complex tasks +# LLM_MODEL_SIMPLE=deepseek-chat # non-thinking for orchestration +# INPUT_COST_PER_M_COMPLEX=0.28 +# OUTPUT_COST_PER_M_COMPLEX=0.42 +# INPUT_COST_PER_M_SIMPLE=0.28 +# OUTPUT_COST_PER_M_SIMPLE=0.42 +# LLM_CONTEXT_WINDOW=128000 +# LLM_MAX_TOKENS=32000 +# NEXT_PUBLIC_LLM_PROVIDER=deepseek +# NEXT_PUBLIC_LLM_MODEL_COMPLEX=deepseek-reasoner +# NEXT_PUBLIC_LLM_MODEL_SIMPLE=deepseek-chat + +# ── MiniMax ─────────────────────────────────────────────────────────────────── +# Docs: https://platform.minimax.io/docs/guides/pricing-paygo +# highspeed variants are 2× price but lower latency +# +# LLM_PROVIDER=minimax +# LLM_BASE_URL=https://api.minimaxi.com/v1 +# LLM_API_KEY=your-minimax-key +# LLM_MODEL_COMPLEX=MiniMax-M2.7 +# LLM_MODEL_SIMPLE=MiniMax-M2.5 +# INPUT_COST_PER_M_COMPLEX=0.30 +# OUTPUT_COST_PER_M_COMPLEX=1.20 +# INPUT_COST_PER_M_SIMPLE=0.30 +# OUTPUT_COST_PER_M_SIMPLE=1.20 +# LLM_CONTEXT_WINDOW=204800 +# LLM_MAX_TOKENS=131072 +# NEXT_PUBLIC_LLM_PROVIDER=minimax +# NEXT_PUBLIC_LLM_MODEL_COMPLEX=MiniMax-M2.7 +# NEXT_PUBLIC_LLM_MODEL_SIMPLE=MiniMax-M2.5 + +# ── Kimi / Moonshot ─────────────────────────────────────────────────────────── +# Docs: https://platform.kimi.ai/docs/pricing/chat +# kimi-k2.5 = latest coding model | moonshot-v1-32k = general purpose +# Cache hit: $0.10/M input (vs $0.60/M cache miss) +# +# LLM_PROVIDER=moonshot +# LLM_BASE_URL=https://api.moonshot.cn/v1 +# LLM_API_KEY=sk-your-moonshot-key +# LLM_MODEL_COMPLEX=kimi-k2.5 +# LLM_MODEL_SIMPLE=moonshot-v1-32k +# INPUT_COST_PER_M_COMPLEX=0.60 +# OUTPUT_COST_PER_M_COMPLEX=3.00 +# INPUT_COST_PER_M_SIMPLE=3.29 +# OUTPUT_COST_PER_M_SIMPLE=3.29 +# LLM_CONTEXT_WINDOW=131072 +# LLM_MAX_TOKENS=32000 +# NEXT_PUBLIC_LLM_PROVIDER=moonshot +# NEXT_PUBLIC_LLM_MODEL_COMPLEX=kimi-k2.5 +# NEXT_PUBLIC_LLM_MODEL_SIMPLE=moonshot-v1-32k + +# ── GLM / Zhipu AI (Z.AI) ───────────────────────────────────────────────────── +# Docs: https://docs.z.ai/guides/overview/pricing +# International endpoint: api.z.ai/v1 | China endpoint: open.bigmodel.cn/api/paas/v4 +# glm-4.7-flash is FREE — useful as the simple/orchestrator model +# +# LLM_PROVIDER=z-ai +# LLM_BASE_URL=https://api.z.ai/v1 +# LLM_API_KEY=your-zhipu-key +# LLM_MODEL_COMPLEX=glm-4.7 # $0.60/$2.20 per M +# LLM_MODEL_SIMPLE=glm-4.5-air # $0.20/$1.10 per M (or glm-4.7-flash for free) +# INPUT_COST_PER_M_COMPLEX=0.60 +# OUTPUT_COST_PER_M_COMPLEX=2.20 +# INPUT_COST_PER_M_SIMPLE=0.20 +# OUTPUT_COST_PER_M_SIMPLE=1.10 +# LLM_CONTEXT_WINDOW=128000 +# LLM_MAX_TOKENS=32000 +# NEXT_PUBLIC_LLM_PROVIDER=z-ai +# NEXT_PUBLIC_LLM_MODEL_COMPLEX=glm-4.7 +# NEXT_PUBLIC_LLM_MODEL_SIMPLE=glm-4.5-air diff --git a/.github/workflows/smoke.yml b/.github/workflows/smoke.yml new file mode 100644 index 0000000..8820ddf --- /dev/null +++ b/.github/workflows/smoke.yml @@ -0,0 +1,87 @@ +name: Smoke + +# Minimal pre-merge gate for Phase-1 demo deployment. Runs on every PR to +# main or alpha/**. Kept intentionally small — if this goes red, the PR +# shouldn't merge. Heavier tests live in validate.yml. + +on: + pull_request: + branches: [main, "alpha/**"] + push: + branches: [main, "alpha/**"] + +jobs: + bash-scripts: + name: Bash script syntax + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Syntax-check every shell script + run: | + set -e + fail=0 + while IFS= read -r -d '' f; do + if ! bash -n "$f"; then + echo "::error file=$f::bash -n failed" + fail=1 + fi + done < <(find scripts deploy -type f -name '*.sh' -print0) + exit "$fail" + + env-example-parses: + name: .env.example is a valid env file + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Source .env.example + run: | + # .env.example ships provider snippets commented out — sourcing it + # should succeed and should set the required LLM_* vars. This catches + # accidental stray "$(...)" expansions or unbalanced quotes before + # they reach a user's .env. + set -a + # shellcheck disable=SC1091 + . ./.env.example + set +a + for v in LLM_PROVIDER LLM_BASE_URL LLM_MODEL_COMPLEX LLM_MODEL_SIMPLE; do + if [ -z "${!v:-}" ]; then + echo "::error::.env.example did not set $v" + exit 1 + fi + done + echo "LLM_PROVIDER=$LLM_PROVIDER LLM_MODEL_COMPLEX=$LLM_MODEL_COMPLEX LLM_MODEL_SIMPLE=$LLM_MODEL_SIMPLE" + + openclaw-json-substitutes: + name: openclaw.json template substitution + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-node@v4 + with: + node-version: "20" + + - name: Run validate-config.mjs + run: node scripts/validate-config.mjs || (echo "::error::validate-config.mjs failed" && exit 1) + + docker-build: + name: Agent Docker image builds + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Build agent image (no push) + # The image pulls openclaw from npm and a few apt packages. We only + # care that the build graph succeeds — we do not run the container + # (that requires real LLM / GitHub credentials). + run: | + docker build \ + -f deploy/docker/Dockerfile \ + -t clawoss-agent:ci \ + . + + - name: Docker compose config lint + run: | + docker compose -f deploy/docker/docker-compose.yml config > /dev/null diff --git a/.github/workflows/validate.yml b/.github/workflows/validate.yml index ff79afb..06ce2e5 100644 --- a/.github/workflows/validate.yml +++ b/.github/workflows/validate.yml @@ -2,9 +2,9 @@ name: Validate on: push: - branches: [main] + branches: [main, "alpha/**"] pull_request: - branches: [main] + branches: [main, "alpha/**"] jobs: validate-config: @@ -17,8 +17,12 @@ jobs: with: node-version: "20" - - name: Validate openclaw.json - run: node -e "JSON.parse(require('fs').readFileSync('config/openclaw.json', 'utf8'))" + # openclaw.json uses __PLACEHOLDER__ tokens that are substituted by + # restart.sh / deploy/docker/entrypoint.sh. validate-config.mjs below + # runs the full post-substitution parse. This inline check just sanity- + # asserts the file exists and isn't empty. + - name: Ensure openclaw.json is present + run: test -s config/openclaw.json - name: Validate cron-jobs.json run: node -e "JSON.parse(require('fs').readFileSync('config/cron-jobs.json', 'utf8'))" diff --git a/CLAUDE.md b/CLAUDE.md index 526fe26..2e29505 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -59,10 +59,11 @@ The quality of ClawOSS output is 100% determined by its prompts. When strategy c - Review prompts regularly for cross-file consistency ## Model -- MiniMax M2.7 via direct API (`https://api.minimaxi.com/v1`) -- 204k context window, 131k max output -- Fallback: Kimi Code k2p5 -- API key env var: `MINIMAX_API_KEY` +- Configured via env vars: `LLM_PROVIDER` / `LLM_MODEL_COMPLEX` / `LLM_MODEL_SIMPLE` +- Complex tasks (sub-agents): `LLM_MODEL_COMPLEX` (default: `anthropic/claude-opus-4-6`) +- Simple tasks (orchestrator/heartbeat): `LLM_MODEL_SIMPLE` (default: `anthropic/claude-sonnet-4-6`) +- API key: `LLM_API_KEY`; endpoint: `LLM_BASE_URL` +- See `docs/model-routing.md` for provider examples and budget config ## Common Commands ```bash diff --git a/config/openclaw.json b/config/openclaw.json index a589ebb..4cf365c 100644 --- a/config/openclaw.json +++ b/config/openclaw.json @@ -24,12 +24,12 @@ ] }, "model": { - "primary": "minimax/MiniMax-M2.7", - "fallbacks": ["kimi-coding/k2p5"] + "primary": "__LLM_PROVIDER__/__LLM_MODEL_SIMPLE__", + "fallbacks": ["__LLM_PROVIDER__/__LLM_MODEL_COMPLEX__"] }, "subagents": { - "model": "minimax/MiniMax-M2.7", - "maxConcurrent": 1, + "model": "__LLM_PROVIDER__/__LLM_MODEL_COMPLEX__", + "maxConcurrent": 14, "archiveAfterMinutes": 1440, "maxChildrenPerAgent": 15, "maxSpawnDepth": 2, @@ -42,7 +42,7 @@ "default": true, "name": "ClawOSS", "workspace": "__WORKSPACE_PATH__", - "model": "minimax/MiniMax-M2.7", + "model": "__LLM_PROVIDER__/__LLM_MODEL_SIMPLE__", "tools": { "profile": "coding" }, @@ -50,7 +50,8 @@ "mode": "off" }, "heartbeat": { - "model": "minimax/MiniMax-M2.7", + "every": "5m", + "model": "__LLM_PROVIDER__/__LLM_MODEL_SIMPLE__", "session": "main", "target": "none", "prompt": "External-controller mode. Read /home/ubuntu/projects/codex/ClawOSS/workspace/HEARTBEAT.md and follow the current prompt goal and output contract.", @@ -62,20 +63,29 @@ "models": { "mode": "merge", "providers": { - "minimax": { - "baseUrl": "https://api.minimaxi.com/v1", - "apiKey": "${MINIMAX_API_KEY}", + "__LLM_PROVIDER__": { + "baseUrl": "__LLM_BASE_URL__", + "apiKey": "${LLM_API_KEY}", "api": "openai-completions", "authHeader": true, "models": [ { - "id": "MiniMax-M2.7", - "name": "MiniMax M2.7", + "id": "__LLM_MODEL_COMPLEX__", + "name": "Complex Model (Opus-tier)", "reasoning": true, "input": ["text"], - "cost": { "input": 0.5, "output": 1.5, "cacheRead": 0.125, "cacheWrite": 0.5 }, - "contextWindow": 204800, - "maxTokens": 131072 + "cost": { "input": __INPUT_COST_PER_M_COMPLEX__, "output": __OUTPUT_COST_PER_M_COMPLEX__ }, + "contextWindow": __LLM_CONTEXT_WINDOW__, + "maxTokens": __LLM_MAX_TOKENS__ + }, + { + "id": "__LLM_MODEL_SIMPLE__", + "name": "Simple Model (Sonnet-tier)", + "reasoning": false, + "input": ["text"], + "cost": { "input": __INPUT_COST_PER_M_SIMPLE__, "output": __OUTPUT_COST_PER_M_SIMPLE__ }, + "contextWindow": __LLM_CONTEXT_WINDOW__, + "maxTokens": __LLM_MAX_TOKENS__ } ] } diff --git a/dashboard/app/api/agent/health-check/route.ts b/dashboard/app/api/agent/health-check/route.ts index 1618de5..fdcf10c 100644 --- a/dashboard/app/api/agent/health-check/route.ts +++ b/dashboard/app/api/agent/health-check/route.ts @@ -2,8 +2,31 @@ export const dynamic = "force-dynamic"; import { NextResponse } from "next/server"; import { db, ensureDb } from "@/lib/db"; -import { pullRequests, prReviews, agentLogs } from "@/lib/schema"; +import { pullRequests, prReviews, agentLogs, metricsTokens, settings } from "@/lib/schema"; import { eq, sql, gte } from "drizzle-orm"; +import { bareModelName } from "@/lib/cost-models"; + +/** Safe JSON.parse — returns undefined on failure, never throws. */ +function safeParseJson(raw: string | undefined | null): T | undefined { + if (!raw) return undefined; + try { return JSON.parse(raw) as T; } catch { return undefined; } +} + +/** Coerce raw budget config into a bare-name-keyed record of positive caps. */ +function normalizeModelBudgets( + raw: unknown +): Record { + if (!raw || typeof raw !== "object") return {}; + const out: Record = {}; + for (const [k, v] of Object.entries(raw as Record)) { + const cap = typeof v === "number" ? v : Number(v); + if (Number.isFinite(cap) && cap > 0) { + const bare = bareModelName(k); + if (bare) out[bare] = cap; + } + } + return out; +} /** * Hard blocklist — repos where submitting PRs risks bans or reputation damage. @@ -59,6 +82,64 @@ export async function GET() { const merged = mergedResult[0]?.count || 0; const open = openResult[0]?.count || 0; + // Budget check — cumulative spend vs totalBudgetUsd setting + let budgetExhausted = false; + let totalCostUsd = 0; + let totalBudgetUsd = 0; + // Per-model token budgets (bare-name keyed; matches across providers). + const modelUsage: Record = {}; + let modelCaps: Record = {}; + const exhaustedModels: { model: string; used: number; cap: number }[] = []; + try { + const costResult = await db + .select({ total: sql`coalesce(sum(${metricsTokens.costUsd}), 0)` }) + .from(metricsTokens); + totalCostUsd = costResult[0]?.total || 0; + + // Budget from settings table (dashboard-editable), fallback to env var + const settingsRow = await db.query.settings.findFirst({ + where: eq(settings.key, "dashboard_settings"), + }); + const settingsVal = settingsRow?.value as { + totalBudgetUsd?: number; + modelTokenBudgets?: Record; + } | null; + totalBudgetUsd = + settingsVal?.totalBudgetUsd ?? + parseFloat(process.env.BUDGET_USD_TOTAL || "0"); + + budgetExhausted = totalBudgetUsd > 0 && totalCostUsd > totalBudgetUsd; + + // Per-model token aggregation — group by bare model name so the same + // model served by different providers is merged (e.g. z-ai/glm-4.6 + // and openrouter/glm-4.6 both accumulate into "glm-4.6"). + const rawPerModel = await db + .select({ + model: metricsTokens.model, + tokens: sql`coalesce(sum(${metricsTokens.inputTokens} + ${metricsTokens.outputTokens}), 0)`, + }) + .from(metricsTokens) + .groupBy(metricsTokens.model); + + for (const row of rawPerModel) { + const bare = bareModelName(row.model ?? ""); + if (!bare) continue; + modelUsage[bare] = (modelUsage[bare] ?? 0) + Number(row.tokens ?? 0); + } + + // Resolve caps: settings table first, env var fallback. + modelCaps = normalizeModelBudgets( + settingsVal?.modelTokenBudgets ?? safeParseJson(process.env.MODEL_TOKEN_BUDGETS) + ); + + for (const [bare, cap] of Object.entries(modelCaps)) { + const used = modelUsage[bare] ?? 0; + if (used >= cap) exhaustedModels.push({ model: bare, used, cap }); + } + } catch { + // non-critical — don't block health check + } + // Today's PRs const todayResult = await db .select({ count: sql`count(*)` }) @@ -166,6 +247,23 @@ export async function GET() { // Quick directives const directives: string[] = []; + if (budgetExhausted) { + directives.unshift( + `BUDGET EXHAUSTED: Spent $${totalCostUsd.toFixed(2)} of $${totalBudgetUsd.toFixed(2)} total budget. ` + + `STOP all new work immediately — do NOT spawn new implementations or submit PRs. ` + + `To resume: raise totalBudgetUsd in dashboard Settings or increase BUDGET_USD_TOTAL env var and restart.` + ); + } + + for (const m of exhaustedModels) { + directives.unshift( + `MODEL TOKEN BUDGET EXHAUSTED: ${m.model} used ${m.used.toLocaleString()}/${m.cap.toLocaleString()} tokens. ` + + `STOP using this model across ALL providers (matched by bare model name). ` + + `Do NOT spawn sub-agents that route to it. ` + + `To resume: raise modelTokenBudgets["${m.model}"] in dashboard Settings or MODEL_TOKEN_BUDGETS env var.` + ); + } + if (approvedPRs.length > 0) { directives.unshift("MERGE NOW: " + approvedPRs.length + " approved PR(s) ready to merge: " + approvedPRs.map((pr) => pr.repo + "#" + pr.number).join(", ") + ". Run `gh pr merge --squash` if CI passes, or comment asking maintainer to trigger CI."); } @@ -204,6 +302,17 @@ export async function GET() { return NextResponse.json({ healthy: directives.length === 0, + budget: { + totalCostUsd: Math.round(totalCostUsd * 10000) / 10000, + totalBudgetUsd, + remainingUsd: totalBudgetUsd > 0 ? Math.max(0, totalBudgetUsd - totalCostUsd) : null, + exhausted: budgetExhausted, + }, + modelBudgets: { + exhausted: exhaustedModels, + usage: modelUsage, + caps: modelCaps, + }, stats: { total, merged, diff --git a/dashboard/app/api/agent/llm-health/route.ts b/dashboard/app/api/agent/llm-health/route.ts new file mode 100644 index 0000000..fe8589e --- /dev/null +++ b/dashboard/app/api/agent/llm-health/route.ts @@ -0,0 +1,128 @@ +export const dynamic = "force-dynamic"; + +import { NextResponse } from "next/server"; +import { promises as fs } from "node:fs"; +import path from "node:path"; +import os from "node:os"; + +/** + * Scans the latest openclaw session jsonl for the most recent LLM call + * outcome. Separates "agent alive but LLM is erroring" (e.g. upstream 401 / + * quota exhausted) from the existing heartbeat-based connection state, which + * only reflects whether the dashboard-reporter hook has fired — and the hook + * only fires on `agent_end`, which never happens if the very first LLM call + * fails. + * + * Returns ok=true only when the latest LLM call in the session succeeded. + */ +export async function GET() { + const sessionsRoot = + process.env.OPENCLAW_SESSIONS_DIR || + path.join(os.homedir(), ".openclaw", "agents", "clawoss", "sessions"); + + try { + const entries = await fs.readdir(sessionsRoot, { withFileTypes: true }); + const jsonlFiles = entries + .filter((e) => e.isFile() && e.name.endsWith(".jsonl")) + .map((e) => path.join(sessionsRoot, e.name)); + + if (jsonlFiles.length === 0) { + return NextResponse.json({ + state: "unknown", + message: "No session file found", + lastCallAt: null, + lastError: null, + lastErrorAt: null, + }); + } + + // Pick the most recently modified session file + const stats = await Promise.all( + jsonlFiles.map(async (f) => ({ f, mtime: (await fs.stat(f)).mtimeMs })) + ); + stats.sort((a, b) => b.mtime - a.mtime); + const latest = stats[0].f; + + const content = await fs.readFile(latest, "utf8"); + const lines = content.split("\n").filter(Boolean); + + // Walk backwards — the first assistant message we hit decides state. + let lastCallAt: string | null = null; + let lastError: string | null = null; + let lastErrorAt: string | null = null; + let lastSuccessAt: string | null = null; + let state: "ok" | "errored" | "unknown" = "unknown"; + + for (let i = lines.length - 1; i >= 0; i--) { + let evt: Record; + try { + evt = JSON.parse(lines[i]); + } catch { + continue; + } + const msg = (evt as { message?: { role?: string; errorMessage?: string; usage?: { totalTokens?: number } } }).message; + if (!msg || msg.role !== "assistant") continue; + + const ts = (evt as { timestamp?: string }).timestamp ?? null; + + if (msg.errorMessage) { + if (!lastError) { + lastError = msg.errorMessage; + lastErrorAt = ts; + } + // Keep walking — maybe an earlier successful call exists + continue; + } + + if ((msg.usage?.totalTokens ?? 0) > 0) { + lastSuccessAt = ts; + break; + } + } + + // Decide state from the TAIL of the file (most recent assistant event) + // re-walk once more from the end until the first assistant we find. + for (let i = lines.length - 1; i >= 0; i--) { + let evt: Record; + try { evt = JSON.parse(lines[i]); } catch { continue; } + const msg = (evt as { message?: { role?: string; errorMessage?: string; usage?: { totalTokens?: number } } }).message; + if (!msg || msg.role !== "assistant") continue; + const ts = (evt as { timestamp?: string }).timestamp ?? null; + lastCallAt = ts; + if (msg.errorMessage) { + state = "errored"; + } else if ((msg.usage?.totalTokens ?? 0) > 0) { + state = "ok"; + } else { + // assistant with no error and no usage (e.g. toolUse-only) — treat as ok + state = "ok"; + } + break; + } + + return NextResponse.json({ + state, + session: path.basename(latest), + lastCallAt, + lastSuccessAt, + lastError, + lastErrorAt, + message: + state === "errored" + ? `LLM call failing: ${lastError}` + : state === "ok" + ? "LLM calls succeeding" + : "No LLM calls recorded yet", + }); + } catch (error) { + // Sessions dir missing (dashboard running outside openclaw container) — + // report unknown rather than 500 so the existing UI keeps working. + return NextResponse.json({ + state: "unknown", + message: `Sessions unavailable: ${String((error as Error).message || error)}`, + lastCallAt: null, + lastError: null, + lastErrorAt: null, + }); + } +} diff --git a/dashboard/app/api/connection-status/route.ts b/dashboard/app/api/connection-status/route.ts index db09ba2..acb5c0a 100644 --- a/dashboard/app/api/connection-status/route.ts +++ b/dashboard/app/api/connection-status/route.ts @@ -75,6 +75,41 @@ export async function GET() { ? lastMetric[0].timestamp.getTime() > oneHourAgo.getTime() : false; + // LLM health — probes the openclaw session jsonl directly so we can + // surface "agent alive but LLM is 4xx/5xx" even when no heartbeat has + // been ingested yet (the hook only fires on agent_end, which never + // happens if the very first LLM call fails). + let llm: { + state: "ok" | "errored" | "unknown"; + message: string; + lastError: string | null; + lastErrorAt: string | null; + lastSuccessAt: string | null; + } = { + state: "unknown", + message: "LLM health probe unavailable", + lastError: null, + lastErrorAt: null, + lastSuccessAt: null, + }; + try { + const origin = process.env.NEXT_PUBLIC_DASHBOARD_URL || + `http://127.0.0.1:${process.env.PORT || 3000}`; + const res = await fetch(`${origin}/api/agent/llm-health`, { cache: "no-store" }); + if (res.ok) { + const body = await res.json(); + llm = { + state: body.state, + message: body.message, + lastError: body.lastError ?? null, + lastErrorAt: body.lastErrorAt ?? null, + lastSuccessAt: body.lastSuccessAt ?? null, + }; + } + } catch { + // non-critical + } + const response = { connection: { state: connectionState, @@ -89,6 +124,7 @@ export async function GET() { errorsLastHour: recentErrors[0]?.count || 0, lastMetricAt: lastMetric[0]?.timestamp || null, }, + llm, hasAnyData: hasHeartbeats || hasMetrics, runtime, }; diff --git a/dashboard/app/api/metrics/overview/route.ts b/dashboard/app/api/metrics/overview/route.ts index 8f40138..0e70b7e 100644 --- a/dashboard/app/api/metrics/overview/route.ts +++ b/dashboard/app/api/metrics/overview/route.ts @@ -129,9 +129,16 @@ export async function GET() { // Estimate 70/30 input/output split for fallback inputTokensToday = Math.round(tokensUsedToday * 0.7); outputTokensToday = tokensUsedToday - inputTokensToday; - // Estimate cost using Kimi K2.5 average ($1.8/M tokens) + // Estimate cost using env-configured pricing (INPUT_COST_PER_M / OUTPUT_COST_PER_M) if (tokensUsedToday > 0 && costToday === 0) { - costToday = tokensUsedToday * (1.8 / 1_000_000); + const inputCostComplex = parseFloat(process.env.INPUT_COST_PER_M_COMPLEX || process.env.INPUT_COST_PER_M || "3.0"); + const outputCostComplex = parseFloat(process.env.OUTPUT_COST_PER_M_COMPLEX || process.env.OUTPUT_COST_PER_M || "15.0"); + const inputCostSimple = parseFloat(process.env.INPUT_COST_PER_M_SIMPLE || process.env.INPUT_COST_PER_M || "3.0"); + const outputCostSimple = parseFloat(process.env.OUTPUT_COST_PER_M_SIMPLE || process.env.OUTPUT_COST_PER_M || "15.0"); + // Weighted average: ~40% complex (sub-agents) + 60% simple (orchestrator) + const avgInputCostPerM = inputCostComplex * 0.4 + inputCostSimple * 0.6; + const avgOutputCostPerM = outputCostComplex * 0.4 + outputCostSimple * 0.6; + costToday = (inputTokensToday * avgInputCostPerM + outputTokensToday * avgOutputCostPerM) / 1_000_000; } } diff --git a/dashboard/app/api/settings/route.ts b/dashboard/app/api/settings/route.ts index a345dc9..949351e 100644 --- a/dashboard/app/api/settings/route.ts +++ b/dashboard/app/api/settings/route.ts @@ -5,6 +5,7 @@ import { db, ensureDb } from "@/lib/db"; import { settings } from "@/lib/schema"; import { eq } from "drizzle-orm"; import type { DashboardSettings } from "@/lib/types"; +import { bareModelName } from "@/lib/cost-models"; const DEFAULT_SETTINGS: DashboardSettings = { targetRepos: [], @@ -21,8 +22,25 @@ const DEFAULT_SETTINGS: DashboardSettings = { onAgentOffline: true, }, dailyBudgetUsd: 50, + totalBudgetUsd: 0, // 0 = unlimited; raise this in dashboard to cap spend + modelTokenBudgets: {}, // empty = no per-model caps; bare-name keys + modelComplex: process.env.LLM_MODEL_COMPLEX || "claude-opus-4-6", + modelSimple: process.env.LLM_MODEL_SIMPLE || "claude-sonnet-4-6", }; +/** Normalize incoming modelTokenBudgets: bare-name keys, numeric positive values. */ +function normalizeModelTokenBudgets(raw: unknown): Record { + if (!raw || typeof raw !== "object") return {}; + const out: Record = {}; + for (const [k, v] of Object.entries(raw as Record)) { + const bare = bareModelName(String(k)); + if (!bare) continue; + const n = typeof v === "number" ? v : Number(v); + if (Number.isFinite(n) && n >= 0) out[bare] = n; + } + return out; +} + export async function GET() { try { await ensureDb(); @@ -58,6 +76,9 @@ export async function PUT(request: Request) { : DEFAULT_SETTINGS; const updatedSettings = { ...currentSettings, ...body }; + if (body && Object.prototype.hasOwnProperty.call(body, "modelTokenBudgets")) { + updatedSettings.modelTokenBudgets = normalizeModelTokenBudgets(body.modelTokenBudgets); + } await db .insert(settings) diff --git a/dashboard/app/layout.tsx b/dashboard/app/layout.tsx index 542aea9..7165653 100644 --- a/dashboard/app/layout.tsx +++ b/dashboard/app/layout.tsx @@ -4,6 +4,8 @@ import { ThemeProvider } from "next-themes"; import { TooltipProvider } from "@/components/ui/tooltip"; import { SidebarProvider } from "@/components/ui/sidebar"; import { AppSidebar } from "@/components/layout/app-sidebar"; +import { ModelBudgetBanner } from "@/components/layout/model-budget-banner"; +import { LlmErrorBanner } from "@/components/layout/llm-error-banner"; import "./globals.css"; const spaceMono = Space_Mono({ @@ -48,7 +50,11 @@ export default function RootLayout({ -
{children}
+
+ + + {children} +
diff --git a/dashboard/app/page.tsx b/dashboard/app/page.tsx index 16e5a28..a2eabb9 100644 --- a/dashboard/app/page.tsx +++ b/dashboard/app/page.tsx @@ -126,7 +126,7 @@ export default function OverviewPage() {
- billionclaw-demo + {process.env.NEXT_PUBLIC_LLM_MODEL_COMPLEX || "billionclaw-demo"} | parallel-agents | @@ -221,7 +221,9 @@ export default function OverviewPage() { {connectionData.pipeline.errorsLastHour} | - source github/BillionClaw + source github/{process.env.GITHUB_USERNAME || "BillionClaw"} + | + model {process.env.NEXT_PUBLIC_LLM_MODEL_COMPLEX || "claude-opus-4-6"} cost $0.60/$3.00/M | pii off diff --git a/dashboard/components/layout/llm-error-banner.tsx b/dashboard/components/layout/llm-error-banner.tsx new file mode 100644 index 0000000..b878c55 --- /dev/null +++ b/dashboard/components/layout/llm-error-banner.tsx @@ -0,0 +1,79 @@ +"use client"; + +import { useEffect, useState } from "react"; + +interface LlmState { + state: "ok" | "errored" | "unknown"; + message?: string; + lastError?: string | null; + lastErrorAt?: string | null; + lastSuccessAt?: string | null; +} + +const POLL_INTERVAL_MS = 30_000; + +function formatRelative(iso: string | null | undefined): string { + if (!iso) return ""; + const diffMs = Date.now() - new Date(iso).getTime(); + if (diffMs < 60_000) return `${Math.floor(diffMs / 1000)}s ago`; + if (diffMs < 3600_000) return `${Math.floor(diffMs / 60_000)}m ago`; + return `${Math.floor(diffMs / 3600_000)}h ago`; +} + +export function LlmErrorBanner() { + const [llm, setLlm] = useState(null); + const [mounted, setMounted] = useState(false); + + useEffect(() => { + setMounted(true); + let cancelled = false; + + async function poll() { + try { + const res = await fetch("/api/connection-status", { cache: "no-store" }); + if (!res.ok) return; + const data = await res.json(); + if (cancelled) return; + setLlm(data.llm ?? null); + } catch { + // silent — banner stays in last known state + } + } + + poll(); + const interval = setInterval(poll, POLL_INTERVAL_MS); + return () => { + cancelled = true; + clearInterval(interval); + }; + }, []); + + if (!mounted) return null; + if (!llm || llm.state !== "errored") return null; + + return ( +
+ !! LLM ERROR +
+ + Agent is alive but LLM calls are failing — upstream provider rejecting requests. + + {llm.lastError && ( + + {llm.lastError} + + )} + {llm.lastErrorAt && ( + + last fail {formatRelative(llm.lastErrorAt)} + + )} + {llm.lastSuccessAt && ( + + last ok {formatRelative(llm.lastSuccessAt)} + + )} +
+
+ ); +} diff --git a/dashboard/components/layout/model-budget-banner.tsx b/dashboard/components/layout/model-budget-banner.tsx new file mode 100644 index 0000000..d33772c --- /dev/null +++ b/dashboard/components/layout/model-budget-banner.tsx @@ -0,0 +1,71 @@ +"use client"; + +import { useEffect, useState } from "react"; + +interface ExhaustedModel { + model: string; + used: number; + cap: number; +} + +interface HealthCheckResponse { + modelBudgets?: { + exhausted?: ExhaustedModel[]; + }; +} + +const POLL_INTERVAL_MS = 30_000; + +function formatTokens(n: number): string { + if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(2)}M`; + if (n >= 1_000) return `${(n / 1_000).toFixed(1)}K`; + return String(n); +} + +export function ModelBudgetBanner() { + const [exhausted, setExhausted] = useState([]); + const [mounted, setMounted] = useState(false); + + useEffect(() => { + setMounted(true); + let cancelled = false; + + async function poll() { + try { + const res = await fetch("/api/agent/health-check", { cache: "no-store" }); + if (!res.ok) return; + const data: HealthCheckResponse = await res.json(); + if (cancelled) return; + setExhausted(data.modelBudgets?.exhausted ?? []); + } catch { + // non-critical — banner stays in last known state + } + } + + poll(); + const interval = setInterval(poll, POLL_INTERVAL_MS); + return () => { + cancelled = true; + clearInterval(interval); + }; + }, []); + + if (!mounted) return null; + if (exhausted.length === 0) return null; + + return ( +
+ !! MODEL BUDGET +
+ {exhausted.map((m) => ( + + {m.model}: {formatTokens(m.used)} / {formatTokens(m.cap)} tokens — STOPPED + + ))} + + raise via PUT /api/settings or MODEL_TOKEN_BUDGETS env var + +
+
+ ); +} diff --git a/dashboard/components/live/gateway-status.tsx b/dashboard/components/live/gateway-status.tsx index 470d1d2..493f90c 100644 --- a/dashboard/components/live/gateway-status.tsx +++ b/dashboard/components/live/gateway-status.tsx @@ -100,7 +100,7 @@ export function GatewayStatus({
Model - minimax/MiniMax-M2.7 + {process.env.NEXT_PUBLIC_LLM_PROVIDER || "anthropic"}/{process.env.NEXT_PUBLIC_LLM_MODEL_COMPLEX || "claude-opus-4-6"}
Auth diff --git a/dashboard/components/overview/metric-cards.tsx b/dashboard/components/overview/metric-cards.tsx index bda83b5..9420b10 100644 --- a/dashboard/components/overview/metric-cards.tsx +++ b/dashboard/components/overview/metric-cards.tsx @@ -137,7 +137,7 @@ export function MetricCards({ { label: "Cost/24h", value: formatCost(costToday), - sub: costToday > 0 ? "kimi k2.5" : null, + sub: costToday > 0 ? (process.env.NEXT_PUBLIC_LLM_MODEL_SIMPLE || "sonnet") : null, bar: { value: costToday, max: 5 }, }, { diff --git a/dashboard/lib/cost-models.ts b/dashboard/lib/cost-models.ts index 1184259..a252fc8 100644 --- a/dashboard/lib/cost-models.ts +++ b/dashboard/lib/cost-models.ts @@ -1,81 +1,465 @@ /** * Cost models for different LLM providers. - * Prices are in USD per token. + * Prices are in USD per million tokens (stored here as per-token for computation). + * + * This is a reference registry — actual pricing used by the agent is driven by + * INPUT_COST_PER_M_COMPLEX / OUTPUT_COST_PER_M_COMPLEX (and _SIMPLE) env vars. + * Add entries here so the dashboard can display correct costs when the model ID + * is known from telemetry. + * + * Prices last verified: April 2026. Always confirm at the provider's pricing page + * before committing to a budget. */ export interface CostModel { name: string; provider: string; + /** USD per token (divide the per-million price by 1_000_000) */ inputCostPerToken: number; outputCostPerToken: number; + /** Cache-hit input price, if provider supports prompt caching */ + cacheHitCostPerToken?: number; + contextWindow?: number; + notes?: string; } export const COST_MODELS: Record = { - "kimi-coding/k2p5": { - name: "Kimi K2.5 (Kimi Code)", - provider: "kimi-code", - inputCostPerToken: 0.6 / 1_000_000, - outputCostPerToken: 3.0 / 1_000_000, + + // ─── Anthropic Claude ────────────────────────────────────────────────────── + // https://platform.claude.com/docs/en/about-claude/pricing + // Opus 4.6/4.5 dropped to $5/$25 (from $15/$75). 1M context at standard rate. + "anthropic/claude-opus-4-6": { + name: "Claude Opus 4.6", + provider: "anthropic", + inputCostPerToken: 5.0 / 1_000_000, + outputCostPerToken: 25.0 / 1_000_000, + cacheHitCostPerToken: 0.50 / 1_000_000, // 0.1x base input + contextWindow: 1_000_000, }, - "z-ai/glm-5": { - name: "GLM-5", - provider: "openrouter", - inputCostPerToken: 0.72 / 1_000_000, - outputCostPerToken: 2.3 / 1_000_000, + "anthropic/claude-sonnet-4-6": { + name: "Claude Sonnet 4.6", + provider: "anthropic", + inputCostPerToken: 3.0 / 1_000_000, + outputCostPerToken: 15.0 / 1_000_000, + cacheHitCostPerToken: 0.30 / 1_000_000, + contextWindow: 1_000_000, }, - "moonshotai/kimi-k2.5": { - name: "Kimi K2.5 (OpenRouter)", - provider: "openrouter", - inputCostPerToken: 0.45 / 1_000_000, - outputCostPerToken: 2.2 / 1_000_000, + "anthropic/claude-opus-4-5": { + name: "Claude Opus 4.5", + provider: "anthropic", + inputCostPerToken: 5.0 / 1_000_000, + outputCostPerToken: 25.0 / 1_000_000, + cacheHitCostPerToken: 0.50 / 1_000_000, + contextWindow: 1_000_000, + }, + "anthropic/claude-sonnet-4-5": { + name: "Claude Sonnet 4.5", + provider: "anthropic", + inputCostPerToken: 3.0 / 1_000_000, + outputCostPerToken: 15.0 / 1_000_000, + cacheHitCostPerToken: 0.30 / 1_000_000, + contextWindow: 1_000_000, + }, + "anthropic/claude-haiku-4-5": { + name: "Claude Haiku 4.5", + provider: "anthropic", + inputCostPerToken: 1.0 / 1_000_000, + outputCostPerToken: 5.0 / 1_000_000, + cacheHitCostPerToken: 0.10 / 1_000_000, + contextWindow: 200_000, + }, + "anthropic/claude-haiku-3-5": { + name: "Claude Haiku 3.5", + provider: "anthropic", + inputCostPerToken: 0.80 / 1_000_000, + outputCostPerToken: 4.0 / 1_000_000, + cacheHitCostPerToken: 0.08 / 1_000_000, + contextWindow: 200_000, + }, + + // ─── OpenAI ──────────────────────────────────────────────────────────────── + // https://openai.com/api/pricing + "openai/gpt-4o": { + name: "GPT-4o", + provider: "openai", + inputCostPerToken: 2.5 / 1_000_000, + outputCostPerToken: 10.0 / 1_000_000, + cacheHitCostPerToken: 1.25 / 1_000_000, + contextWindow: 128_000, + }, + "openai/gpt-4o-mini": { + name: "GPT-4o Mini", + provider: "openai", + inputCostPerToken: 0.15 / 1_000_000, + outputCostPerToken: 0.6 / 1_000_000, + cacheHitCostPerToken: 0.075 / 1_000_000, + contextWindow: 128_000, + }, + "openai/o3": { + name: "OpenAI o3", + provider: "openai", + inputCostPerToken: 10.0 / 1_000_000, + outputCostPerToken: 40.0 / 1_000_000, + contextWindow: 200_000, + }, + "openai/o4-mini": { + name: "OpenAI o4-mini", + provider: "openai", + inputCostPerToken: 1.1 / 1_000_000, + outputCostPerToken: 4.4 / 1_000_000, + contextWindow: 200_000, + }, + + // ─── DeepSeek ────────────────────────────────────────────────────────────── + // https://api-docs.deepseek.com/quick_start/pricing + // Both deepseek-chat and deepseek-reasoner are now DeepSeek-V3.2 + "deepseek/deepseek-chat": { + name: "DeepSeek Chat (V3.2)", + provider: "deepseek", + inputCostPerToken: 0.28 / 1_000_000, // cache miss + outputCostPerToken: 0.42 / 1_000_000, + cacheHitCostPerToken: 0.028 / 1_000_000, // cache hit: 90% cheaper + contextWindow: 128_000, + notes: "Non-thinking mode. Cache miss $0.28/M, cache hit $0.028/M.", + }, + "deepseek/deepseek-reasoner": { + name: "DeepSeek Reasoner (V3.2 Thinking)", + provider: "deepseek", + inputCostPerToken: 0.28 / 1_000_000, + outputCostPerToken: 0.42 / 1_000_000, + cacheHitCostPerToken: 0.028 / 1_000_000, + contextWindow: 128_000, + notes: "Thinking mode. Max 32K output tokens (vs 8K for chat mode).", }, + + // ─── MiniMax ─────────────────────────────────────────────────────────────── + // https://platform.minimax.io/docs/guides/pricing-paygo "minimax/MiniMax-M2.7": { name: "MiniMax M2.7", provider: "minimax", - inputCostPerToken: 0.3 / 1_000_000, - outputCostPerToken: 1.2 / 1_000_000, + inputCostPerToken: 0.30 / 1_000_000, + outputCostPerToken: 1.20 / 1_000_000, + contextWindow: 204_800, }, - "minimax/MiniMax-M1-80k": { - name: "MiniMax M2.5 (legacy)", - provider: "openrouter", - inputCostPerToken: 0.25 / 1_000_000, - outputCostPerToken: 1.2 / 1_000_000, + "minimax/MiniMax-M2.7-highspeed": { + name: "MiniMax M2.7 (High Speed)", + provider: "minimax", + inputCostPerToken: 0.60 / 1_000_000, + outputCostPerToken: 2.40 / 1_000_000, + contextWindow: 204_800, + notes: "Faster inference at 2× the price.", + }, + "minimax/MiniMax-M2.5": { + name: "MiniMax M2.5", + provider: "minimax", + inputCostPerToken: 0.30 / 1_000_000, + outputCostPerToken: 1.20 / 1_000_000, + contextWindow: 204_800, + }, + "minimax/MiniMax-M2.5-highspeed": { + name: "MiniMax M2.5 (High Speed)", + provider: "minimax", + inputCostPerToken: 0.60 / 1_000_000, + outputCostPerToken: 2.40 / 1_000_000, + contextWindow: 204_800, + }, + "minimax/MiniMax-M2": { + name: "MiniMax M2", + provider: "minimax", + inputCostPerToken: 0.30 / 1_000_000, + outputCostPerToken: 1.20 / 1_000_000, + contextWindow: 204_800, }, "minimax/MiniMax-M1": { name: "MiniMax M1", - provider: "openrouter", - inputCostPerToken: 0.25 / 1_000_000, - outputCostPerToken: 1.2 / 1_000_000, + provider: "minimax", + inputCostPerToken: 0.25 / 1_000_000, + outputCostPerToken: 1.20 / 1_000_000, }, - "anthropic/claude-sonnet-4-20250514": { - name: "Claude Sonnet 4", - provider: "openrouter", - inputCostPerToken: 3.0 / 1_000_000, - outputCostPerToken: 15.0 / 1_000_000, + "minimax/MiniMax-M1-80k": { + name: "MiniMax M1 (80k)", + provider: "minimax", + inputCostPerToken: 0.25 / 1_000_000, + outputCostPerToken: 1.20 / 1_000_000, + contextWindow: 80_000, }, - "openai/gpt-4o": { - name: "GPT-4o", + + // ─── Kimi / Moonshot AI ──────────────────────────────────────────────────── + // Direct API: https://platform.kimi.ai (endpoint: api.moonshot.cn/v1) + // OpenRouter: moonshotai/* + "moonshot/kimi-k2.5": { + name: "Kimi K2.5", + provider: "moonshot", + inputCostPerToken: 0.60 / 1_000_000, // cache miss + outputCostPerToken: 3.00 / 1_000_000, + cacheHitCostPerToken: 0.10 / 1_000_000, // cache hit + contextWindow: 131_072, + notes: "Latest Kimi coding model. Cache miss $0.60/M, cache hit $0.10/M.", + }, + "moonshot/kimi-k2": { + name: "Kimi K2", + provider: "moonshot", + inputCostPerToken: 0.55 / 1_000_000, + outputCostPerToken: 2.20 / 1_000_000, + contextWindow: 131_072, + }, + "moonshot/moonshot-v1-8k": { + name: "Moonshot V1 (8k)", + provider: "moonshot", + inputCostPerToken: 1.65 / 1_000_000, // ≈ ¥12/M at 7.3 CNY/USD + outputCostPerToken: 1.65 / 1_000_000, + contextWindow: 8_000, + notes: "Legacy general model. Uniform input/output pricing.", + }, + "moonshot/moonshot-v1-32k": { + name: "Moonshot V1 (32k)", + provider: "moonshot", + inputCostPerToken: 3.29 / 1_000_000, // ≈ ¥24/M + outputCostPerToken: 3.29 / 1_000_000, + contextWindow: 32_000, + notes: "Legacy general model. Uniform input/output pricing.", + }, + "moonshot/moonshot-v1-128k": { + name: "Moonshot V1 (128k)", + provider: "moonshot", + inputCostPerToken: 8.22 / 1_000_000, // ≈ ¥60/M + outputCostPerToken: 8.22 / 1_000_000, + contextWindow: 128_000, + notes: "Legacy general model. Uniform input/output pricing.", + }, + // OpenRouter aliases + "moonshotai/kimi-k2.5": { + name: "Kimi K2.5 (OpenRouter)", provider: "openrouter", - inputCostPerToken: 2.5 / 1_000_000, - outputCostPerToken: 10.0 / 1_000_000, + inputCostPerToken: 0.60 / 1_000_000, + outputCostPerToken: 3.00 / 1_000_000, + }, + "kimi-coding/k2p5": { + name: "Kimi K2.5 (direct)", + provider: "kimi-code", + inputCostPerToken: 0.60 / 1_000_000, + outputCostPerToken: 3.00 / 1_000_000, + }, + + // ─── GLM / Zhipu AI (Z.AI) ───────────────────────────────────────────────── + // International API: https://api.z.ai/v1 + // China API: https://open.bigmodel.cn/api/paas/v4 + // https://docs.z.ai/guides/overview/pricing + "z-ai/glm-5.1": { + name: "GLM-5.1", + provider: "z-ai", + inputCostPerToken: 1.40 / 1_000_000, + outputCostPerToken: 4.40 / 1_000_000, + }, + "z-ai/glm-5": { + name: "GLM-5", + provider: "z-ai", + inputCostPerToken: 1.00 / 1_000_000, + outputCostPerToken: 3.20 / 1_000_000, + notes: "China's first public AI company frontier model.", + }, + "z-ai/glm-5-turbo": { + name: "GLM-5 Turbo", + provider: "z-ai", + inputCostPerToken: 1.20 / 1_000_000, + outputCostPerToken: 4.00 / 1_000_000, + }, + "z-ai/glm-4.7": { + name: "GLM-4.7", + provider: "z-ai", + inputCostPerToken: 0.60 / 1_000_000, + outputCostPerToken: 2.20 / 1_000_000, + }, + "z-ai/glm-4.7-flashx": { + name: "GLM-4.7 FlashX", + provider: "z-ai", + inputCostPerToken: 0.07 / 1_000_000, + outputCostPerToken: 0.40 / 1_000_000, + notes: "Fast, cheap. Good for simple orchestration tasks.", + }, + "z-ai/glm-4.7-flash": { + name: "GLM-4.7 Flash", + provider: "z-ai", + inputCostPerToken: 0.0, + outputCostPerToken: 0.0, + notes: "Free tier.", + }, + "z-ai/glm-4.6": { + name: "GLM-4.6", + provider: "z-ai", + inputCostPerToken: 0.60 / 1_000_000, + outputCostPerToken: 2.20 / 1_000_000, + }, + "z-ai/glm-4.5": { + name: "GLM-4.5", + provider: "z-ai", + inputCostPerToken: 0.60 / 1_000_000, + outputCostPerToken: 2.20 / 1_000_000, + }, + "z-ai/glm-4.5-x": { + name: "GLM-4.5-X (32B MoE)", + provider: "z-ai", + inputCostPerToken: 2.20 / 1_000_000, + outputCostPerToken: 8.90 / 1_000_000, + }, + "z-ai/glm-4.5-air": { + name: "GLM-4.5 Air", + provider: "z-ai", + inputCostPerToken: 0.20 / 1_000_000, + outputCostPerToken: 1.10 / 1_000_000, + notes: "Lightweight, good for orchestrator/simple tasks.", + }, + "z-ai/glm-4.5-airx": { + name: "GLM-4.5 AirX", + provider: "z-ai", + inputCostPerToken: 1.10 / 1_000_000, + outputCostPerToken: 4.50 / 1_000_000, + }, + "z-ai/glm-4.5-flash": { + name: "GLM-4.5 Flash", + provider: "z-ai", + inputCostPerToken: 0.0, + outputCostPerToken: 0.0, + notes: "Free tier.", + }, + "z-ai/glm-4-32b-0414-128k": { + name: "GLM-4 32B (128k)", + provider: "z-ai", + inputCostPerToken: 0.10 / 1_000_000, + outputCostPerToken: 0.10 / 1_000_000, + }, + + // ─── Google Gemini ──────────────────────────────────────────────────────── + // https://ai.google.dev/gemini-api/docs/pricing + "google/gemini-2.5-pro": { + name: "Gemini 2.5 Pro", + provider: "google", + inputCostPerToken: 1.25 / 1_000_000, // ≤200k; >200k doubles to $2.50 + outputCostPerToken: 10.0 / 1_000_000, // ≤200k; >200k $15.00 + cacheHitCostPerToken: 0.125 / 1_000_000, + contextWindow: 1_000_000, + notes: "Tiered: >200k context doubles input/output price.", + }, + "google/gemini-2.5-flash": { + name: "Gemini 2.5 Flash", + provider: "google", + inputCostPerToken: 0.30 / 1_000_000, + outputCostPerToken: 2.50 / 1_000_000, + cacheHitCostPerToken: 0.03 / 1_000_000, + contextWindow: 1_000_000, + }, + "google/gemini-2.5-flash-lite": { + name: "Gemini 2.5 Flash-Lite", + provider: "google", + inputCostPerToken: 0.10 / 1_000_000, + outputCostPerToken: 0.40 / 1_000_000, + cacheHitCostPerToken: 0.01 / 1_000_000, + contextWindow: 1_000_000, + notes: "Cheapest Gemini model.", + }, + "google/gemini-3-flash": { + name: "Gemini 3 Flash (Preview)", + provider: "google", + inputCostPerToken: 0.50 / 1_000_000, + outputCostPerToken: 3.00 / 1_000_000, + cacheHitCostPerToken: 0.05 / 1_000_000, + contextWindow: 1_000_000, + }, + "google/gemini-3.1-pro": { + name: "Gemini 3.1 Pro (Preview)", + provider: "google", + inputCostPerToken: 2.00 / 1_000_000, // ≤200k; >200k doubles + outputCostPerToken: 12.00 / 1_000_000, + contextWindow: 1_000_000, + notes: "Preview. Tiered: >200k context doubles price.", + }, + + // ─── Mistral AI ─────────────────────────────────────────────────────────── + // https://mistral.ai/pricing + "mistral/mistral-large-3": { + name: "Mistral Large 3", + provider: "mistral", + inputCostPerToken: 2.0 / 1_000_000, + outputCostPerToken: 6.0 / 1_000_000, + contextWindow: 128_000, + }, + "mistral/mistral-medium-3": { + name: "Mistral Medium 3", + provider: "mistral", + inputCostPerToken: 1.0 / 1_000_000, + outputCostPerToken: 3.0 / 1_000_000, + contextWindow: 128_000, + }, + "mistral/mistral-small-3.1": { + name: "Mistral Small 3.1", + provider: "mistral", + inputCostPerToken: 0.20 / 1_000_000, + outputCostPerToken: 0.60 / 1_000_000, + contextWindow: 128_000, + }, + "mistral/mistral-nemo": { + name: "Mistral Nemo", + provider: "mistral", + inputCostPerToken: 0.02 / 1_000_000, + outputCostPerToken: 0.04 / 1_000_000, + contextWindow: 128_000, + notes: "Cheapest Mistral model.", }, }; -// Default model for the ClawOSS agent (switched to MiniMax M2.7 direct API) -export const DEFAULT_MODEL = "minimax/MiniMax-M2.7"; -export const DEFAULT_COST_MODEL = COST_MODELS[DEFAULT_MODEL]; +/** + * Build a fallback cost model from env vars when the active model is not in the registry. + */ +function envFallbackCostModel(): CostModel { + const provider = process.env.LLM_PROVIDER || "unknown"; + const model = process.env.LLM_MODEL_COMPLEX || "unknown"; + return { + name: `${provider}/${model}`, + provider, + inputCostPerToken: parseFloat(process.env.INPUT_COST_PER_M || "3.0") / 1_000_000, + outputCostPerToken: parseFloat(process.env.OUTPUT_COST_PER_M || "15.0") / 1_000_000, + }; +} + +/** + * The active complex model ID, resolved from env vars at runtime. + * Format: "{LLM_PROVIDER}/{LLM_MODEL_COMPLEX}" + */ +export function getActiveModel(): string { + const provider = process.env.LLM_PROVIDER || "anthropic"; + const model = process.env.LLM_MODEL_COMPLEX || "claude-opus-4-6"; + return `${provider}/${model}`; +} /** * Compute the cost for a given token usage. - * Falls back to the default Kimi Code pricing if model is unknown. + * Looks up the model in the registry; falls back to env-configured pricing. */ export function computeTokenCost( inputTokens: number, outputTokens: number, model?: string ): number { - const costModel = (model && COST_MODELS[model]) || DEFAULT_COST_MODEL; + const activeModel = model || getActiveModel(); + const costModel = COST_MODELS[activeModel] || envFallbackCostModel(); return ( - inputTokens * costModel.inputCostPerToken + + inputTokens * costModel.inputCostPerToken + outputTokens * costModel.outputCostPerToken ); } + +/** @deprecated Use getActiveModel() instead */ +export const DEFAULT_MODEL = getActiveModel(); +export const DEFAULT_COST_MODEL = COST_MODELS[DEFAULT_MODEL] || envFallbackCostModel(); + +/** + * Normalize a model identifier to its bare model name — used for cross-provider + * matching so that `z-ai/glm-4.6`, `openrouter/glm-4.6`, and `glm-4.6` all collapse + * to the same key. The last path segment wins (handles nested prefixes like + * `openrouter/anthropic/claude-opus-4-6`). + */ +export function bareModelName(model: string): string { + if (!model) return ""; + const lastSlash = model.lastIndexOf("/"); + const tail = lastSlash >= 0 ? model.slice(lastSlash + 1) : model; + return tail.toLowerCase().trim(); +} diff --git a/dashboard/lib/github.ts b/dashboard/lib/github.ts index 842cb77..191fc17 100644 --- a/dashboard/lib/github.ts +++ b/dashboard/lib/github.ts @@ -16,7 +16,7 @@ export async function syncPRsFromGitHub(): Promise<{ }> { await ensureDb(); const octokit = getOctokit(); - const agentUsername = process.env.CLAW_AGENT_USERNAME || "BillionClaw"; + const agentUsername = process.env.CLAW_AGENT_USERNAME || process.env.GITHUB_USERNAME || "BillionClaw"; // Dynamic discovery: search for ALL PRs by the agent across GitHub // Use raw fetch to avoid Octokit query encoding issues diff --git a/dashboard/lib/types.ts b/dashboard/lib/types.ts index f7f82ac..7ad88ff 100644 --- a/dashboard/lib/types.ts +++ b/dashboard/lib/types.ts @@ -131,6 +131,18 @@ export interface DashboardSettings { onAgentOffline: boolean; }; dailyBudgetUsd: number; + /** Cumulative total spend cap in USD. 0 = unlimited. Enforced by health-check. */ + totalBudgetUsd: number; + /** + * Per-model cumulative token caps. Keyed by bare model name (e.g. "glm-4.6", + * "deepseek-chat"), matched across all providers. Value is total tokens + * (input + output). Missing or ≤ 0 = unlimited. Enforced by health-check. + */ + modelTokenBudgets: Record; + /** Display-only: complex model in use (set via LLM_MODEL_COMPLEX env var) */ + modelComplex: string; + /** Display-only: simple model in use (set via LLM_MODEL_SIMPLE env var) */ + modelSimple: string; } export interface ConversationMessage { diff --git a/deploy/docker/Dockerfile b/deploy/docker/Dockerfile new file mode 100644 index 0000000..dd72605 --- /dev/null +++ b/deploy/docker/Dockerfile @@ -0,0 +1,60 @@ +# ClawOSS agent — Linux container image. +# +# This image runs the OpenClaw gateway + the clawoss agent configuration +# in a single container. It's the Linux-native counterpart to +# scripts/restart.sh's launchd flow on macOS. +# +# Build: +# docker build -f deploy/docker/Dockerfile -t clawoss-agent . +# +# Run (see deploy/docker/docker-compose.yml for the real invocation): +# docker run --env-file .env -v clawoss_state:/home/clawoss/.openclaw clawoss-agent + +FROM node:22-bookworm-slim + +ENV DEBIAN_FRONTEND=noninteractive + +# System deps used by scripts + subagents: git, gh CLI, jq, python3, curl, ca-certs. +# The git repos worked on by the agent get cloned inside /tmp at runtime, so +# git itself must be present. gh is used for every PR operation. +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + ca-certificates curl git gnupg jq python3 python3-pip tini \ + && install -d -m 0755 /etc/apt/keyrings \ + && curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \ + | tee /etc/apt/keyrings/githubcli-archive-keyring.gpg > /dev/null \ + && chmod go+r /etc/apt/keyrings/githubcli-archive-keyring.gpg \ + && echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" \ + > /etc/apt/sources.list.d/github-cli.list \ + && apt-get update \ + && apt-get install -y --no-install-recommends gh \ + && rm -rf /var/lib/apt/lists/* + +# Install openclaw CLI globally. Pinned to a known-good range; operators can +# override with --build-arg OPENCLAW_VERSION=x.y.z. +ARG OPENCLAW_VERSION=latest +RUN npm install -g "openclaw@${OPENCLAW_VERSION}" \ + && openclaw --version + +# Non-root user so the agent doesn't run as root inside the container. +RUN useradd --create-home --shell /bin/bash --uid 1000 clawoss +WORKDIR /app +COPY --chown=clawoss:clawoss . /app + +# Install project deps (workspaces include the dashboard — skip the heavy +# dashboard install here; run the agent and the dashboard in separate images +# if both are needed). +RUN npm install --omit=dev --ignore-scripts --workspaces=false \ + && chown -R clawoss:clawoss /app + +USER clawoss +ENV HOME=/home/clawoss +ENV PATH=/home/clawoss/.local/bin:/usr/local/lib/node_modules/.bin:$PATH + +# Entrypoint handles env validation, config deploy, gateway start, and then +# execs openclaw gateway run as PID 1 (via tini). +COPY --chown=clawoss:clawoss deploy/docker/entrypoint.sh /usr/local/bin/clawoss-entrypoint +RUN chmod +x /usr/local/bin/clawoss-entrypoint + +ENTRYPOINT ["/usr/bin/tini", "--", "/usr/local/bin/clawoss-entrypoint"] +CMD [] diff --git a/deploy/docker/README.md b/deploy/docker/README.md new file mode 100644 index 0000000..9e7ad39 --- /dev/null +++ b/deploy/docker/README.md @@ -0,0 +1,66 @@ +# ClawOSS — Linux Docker deployment + +Phase-1 demo deployment: one container, one long-running process +(`openclaw gateway run`), `.env`-driven LLM routing, token-budget aware. + +## Quickstart + +```bash +cp .env.example .env +$EDITOR .env # fill LLM_* and GITHUB_TOKEN at minimum +docker compose -f deploy/docker/docker-compose.yml up --build +``` + +The container fails fast and prints the missing env var if required +settings are absent. Silent misconfiguration that wastes tokens is the +thing we're explicitly trying to avoid. + +## What goes in `.env` + +Minimum for the container to boot: + +| Variable | Purpose | +|---|---| +| `GITHUB_TOKEN` | Classic PAT (`ghp_*`) with `public_repo` scope. | +| `LLM_PROVIDER` | e.g. `anthropic`, `deepseek`, `z-ai`, `minimax`. | +| `LLM_BASE_URL` | OpenAI-compatible endpoint for the provider. | +| `LLM_API_KEY` | Key for that provider. | +| `LLM_MODEL_COMPLEX` | Opus-tier model for subagents. | +| `LLM_MODEL_SIMPLE` | Sonnet-tier model for the orchestrator. | + +Strongly recommended (container warns if missing): + +- `BUDGET_USD_TOTAL` — hard cap in USD, agent pauses when reached. +- `CLAW_API_KEY` + `DASHBOARD_URL` — telemetry into the Vercel dashboard. +- `MODEL_TOKEN_BUDGETS` — per-model token caps (see `.env.example`). + +## State persistence + +`clawoss_state` (named volume) holds `~/.openclaw/` — the agent registry, +session jsonl files, and OpenClaw extensions. Delete the volume to get a +clean-room restart: + +```bash +docker compose -f deploy/docker/docker-compose.yml down -v +``` + +Workspace memory (`workspace/memory/*.md`) is bind-mounted to the host so +you can watch the pipeline state live from outside the container. + +## Relationship to the other docker setups + +| Path | Purpose | +|---|---| +| `docker/` + root `docker-compose.yml` | Alpha autonomy backend — API + worker + reflection services that read/write the dashboard DB. | +| `deploy/docker/` (this dir) | The OpenClaw agent itself. This is what you run on a Linux host for the Phase-1 demo. | +| `scripts/restart.sh` | macOS-native launchd deployment. On Linux it detects systemd and degrades gracefully; this image is the cleaner option for Linux. | + +## Not included in this image + +- The Vercel dashboard (keep it on Vercel — running it locally doesn't + help the Phase-1 demo). Set `DASHBOARD_URL` + `CLAW_API_KEY` to connect. +- The `openclaw` CLI binary is pulled from npm at build time. Operators + behind a proxy should set `--build-arg OPENCLAW_VERSION=` and + configure their npm registry. +- No automated backup of `clawoss_state`. If you care about queue + survival across host rebuilds, back up the volume yourself. diff --git a/deploy/docker/docker-compose.yml b/deploy/docker/docker-compose.yml new file mode 100644 index 0000000..c6e87ce --- /dev/null +++ b/deploy/docker/docker-compose.yml @@ -0,0 +1,47 @@ +# ClawOSS agent — Linux Docker Compose deployment. +# +# This is additive to the root-level docker-compose.yml (which runs the +# autonomy backend API + worker + reflection services). This file runs the +# actual OpenClaw agent container for Phase-1 demo deployments on Linux hosts. +# +# Usage: +# cp .env.example .env && $EDITOR .env +# docker compose -f deploy/docker/docker-compose.yml up --build +# +# Stop and preserve state: +# docker compose -f deploy/docker/docker-compose.yml down +# +# Full reset (blows away openclaw state — required after config changes): +# docker compose -f deploy/docker/docker-compose.yml down -v +# +# The single-service design matches the macOS launchd flow: one long-lived +# process (`openclaw gateway run`) supervises its own heartbeat + subagents. + +services: + agent: + build: + context: ../.. + dockerfile: deploy/docker/Dockerfile + image: clawoss-agent:local + restart: unless-stopped + env_file: + - ../../.env + volumes: + # Persist openclaw state (agent registry, session jsonl, extensions). + # Without this, every restart drops queued work and pending subagents. + - clawoss_state:/home/clawoss/.openclaw + # Expose workspace memory so operators can tail state files on the host. + - ../../workspace/memory:/app/workspace/memory + # Gateway default port; expose only on localhost so an open .env doesn't + # turn into an open LLM proxy. + ports: + - "127.0.0.1:18789:18789" + healthcheck: + test: ["CMD", "sh", "-c", "openclaw gateway status 2>/dev/null | grep -qi 'running\\|reachable\\|ok'"] + interval: 30s + timeout: 5s + retries: 5 + start_period: 30s + +volumes: + clawoss_state: diff --git a/deploy/docker/entrypoint.sh b/deploy/docker/entrypoint.sh new file mode 100755 index 0000000..4da1132 --- /dev/null +++ b/deploy/docker/entrypoint.sh @@ -0,0 +1,115 @@ +#!/usr/bin/env bash +# ClawOSS Linux container entrypoint. +# +# Responsibilities: +# 1. Validate required env vars (fail fast and loudly — the whole point of +# Task #5 was that silent failures waste tokens). +# 2. Link the workspace into $HOME/.openclaw/ the same way setup.sh does on +# the host. +# 3. Run scripts/restart.sh in a Linux-aware path so config gets deployed +# into $HOME/.openclaw/openclaw.json. +# 4. Exec `openclaw gateway run` as PID 1 so Docker can supervise it. + +set -euo pipefail + +log() { printf '[clawoss-docker] %s\n' "$*"; } +fail() { printf '[clawoss-docker][FAIL] %s\n' "$*" >&2; exit 1; } + +# ── 0. Required env vars ────────────────────────────────────────────── +REQUIRED=(GITHUB_TOKEN LLM_API_KEY LLM_PROVIDER LLM_BASE_URL LLM_MODEL_COMPLEX LLM_MODEL_SIMPLE) +MISSING=() +for v in "${REQUIRED[@]}"; do + if [ -z "${!v:-}" ]; then + MISSING+=("$v") + fi +done +if [ ${#MISSING[@]} -gt 0 ]; then + fail "missing required env: ${MISSING[*]} (see .env.example)" +fi + +# Optional but strongly recommended — warn, don't fail. +for v in BUDGET_USD_TOTAL CLAW_API_KEY DASHBOARD_URL; do + if [ -z "${!v:-}" ]; then + log "[WARN] $v not set" + fi +done + +# ── 1. Link workspace ───────────────────────────────────────────────── +PROJECT_DIR="/app" +WORKSPACE_DIR="$PROJECT_DIR/workspace" +OC_DIR="$HOME/.openclaw" +mkdir -p "$OC_DIR/logs" "$OC_DIR/agents" + +if [ ! -L "$OC_DIR/workspace" ]; then + ln -sfn "$WORKSPACE_DIR" "$OC_DIR/workspace" + log "linked workspace: $OC_DIR/workspace -> $WORKSPACE_DIR" +fi + +# ── 2. Deploy resolved openclaw.json ────────────────────────────────── +# Mirrors the sed substitution in scripts/restart.sh. Kept in-entrypoint so +# the container can come up without invoking the full restart.sh (which also +# does macOS-specific work like launchd). +RESOLVED_CONFIG=$(sed \ + -e "s|__WORKSPACE_PATH__|$WORKSPACE_DIR|g" \ + -e "s|__PROJECT_DIR__|$PROJECT_DIR|g" \ + -e "s|__HOME_DIR__|$HOME|g" \ + -e "s|__LLM_PROVIDER__|${LLM_PROVIDER}|g" \ + -e "s|__LLM_BASE_URL__|${LLM_BASE_URL}|g" \ + -e "s|__LLM_MODEL_COMPLEX__|${LLM_MODEL_COMPLEX}|g" \ + -e "s|__LLM_MODEL_SIMPLE__|${LLM_MODEL_SIMPLE}|g" \ + -e "s|__INPUT_COST_PER_M_COMPLEX__|${INPUT_COST_PER_M_COMPLEX:-${INPUT_COST_PER_M:-3.0}}|g" \ + -e "s|__OUTPUT_COST_PER_M_COMPLEX__|${OUTPUT_COST_PER_M_COMPLEX:-${OUTPUT_COST_PER_M:-15.0}}|g" \ + -e "s|__INPUT_COST_PER_M_SIMPLE__|${INPUT_COST_PER_M_SIMPLE:-${INPUT_COST_PER_M:-3.0}}|g" \ + -e "s|__OUTPUT_COST_PER_M_SIMPLE__|${OUTPUT_COST_PER_M_SIMPLE:-${OUTPUT_COST_PER_M:-15.0}}|g" \ + -e "s|__LLM_CONTEXT_WINDOW__|${LLM_CONTEXT_WINDOW:-200000}|g" \ + -e "s|__LLM_MAX_TOKENS__|${LLM_MAX_TOKENS:-32000}|g" \ + "$PROJECT_DIR/config/openclaw.json") + +# Inject env block (API key + token + budget + pricing) so openclaw has +# everything it needs to authenticate. +echo "$RESOLVED_CONFIG" | python3 -c " +import json, os, sys +merged = json.load(sys.stdin) +env = merged.setdefault('env', {}) +keys = [ + 'LLM_API_KEY','LLM_BASE_URL','LLM_PROVIDER', + 'LLM_MODEL_COMPLEX','LLM_MODEL_SIMPLE', + 'GITHUB_TOKEN','GITHUB_USERNAME','GITHUB_EMAIL', + 'CLAW_API_KEY','DASHBOARD_URL', + 'BUDGET_USD_TOTAL','MODEL_TOKEN_BUDGETS', + 'INPUT_COST_PER_M','OUTPUT_COST_PER_M', + 'INPUT_COST_PER_M_COMPLEX','OUTPUT_COST_PER_M_COMPLEX', + 'INPUT_COST_PER_M_SIMPLE','OUTPUT_COST_PER_M_SIMPLE', +] +for k in keys: + v = os.environ.get(k) + if v: + env[k] = v +merged['env'] = env +json.dump(merged, open('$OC_DIR/openclaw.json','w'), indent=2) +" + +log "deployed $OC_DIR/openclaw.json" + +# ── 3. GitHub CLI auth (non-interactive token login) ────────────────── +if [ -n "${GITHUB_TOKEN:-}" ]; then + echo "$GITHUB_TOKEN" | gh auth login --with-token >/dev/null 2>&1 || \ + log "[WARN] gh auth login --with-token failed; gh commands may 401" +fi + +# Git identity — PRs need author info. +git config --global user.name "${GITHUB_USERNAME:-clawoss-bot}" +git config --global user.email "${GITHUB_EMAIL:-${GITHUB_USERNAME:-clawoss-bot}@users.noreply.github.com}" + +# ── 4. Register agent + hand off to gateway ─────────────────────────── +AGENT_MODEL="${LLM_PROVIDER}/${LLM_MODEL_SIMPLE}" +if ! openclaw agents list 2>/dev/null | grep -q "^- clawoss "; then + openclaw agents add clawoss \ + --workspace "$WORKSPACE_DIR" \ + --model "$AGENT_MODEL" \ + --non-interactive + log "registered agent clawoss (model=$AGENT_MODEL)" +fi + +log "starting openclaw gateway (foreground)" +exec openclaw gateway run diff --git a/docs/images/clawoss-review.png b/docs/images/clawoss-review.png new file mode 100644 index 0000000..979b0c5 Binary files /dev/null and b/docs/images/clawoss-review.png differ diff --git a/docs/model-routing.md b/docs/model-routing.md new file mode 100644 index 0000000..8b2a019 --- /dev/null +++ b/docs/model-routing.md @@ -0,0 +1,546 @@ +# 模型路由 + +## 目标 + +1. **任意主流模型**:通过环境变量切换供应商和模型,无需改代码 +2. **双轨路由**:复杂任务用 Opus 级模型,简单任务用 Sonnet 级模型 +3. **总预算熔断**:累计花费达到上限时服务自动暂停 +4. **Dashboard 可视**:实时显示模型配置、累计花费、预算进度 + +## 路由规则 + +| 角色 | 使用模型 | 原因 | +|------|---------|------| +| Orchestrator(heartbeat 主循环) | `LLM_MODEL_SIMPLE` | 只做文件读写、状态路由 | +| 主 Agent session | `LLM_MODEL_SIMPLE` | 同上 | +| 所有 Sub-agents(实现、跟进、监控) | `LLM_MODEL_COMPLEX` | 需深度理解代码、写 patch、分析 review | + +Fallback:complex 失败时回退 simple。 + +## 配置注入机制 + +``` +.env + ↓ restart.sh 读取 +config/openclaw.json(含 __LLM_*__ 占位符) + ↓ sed 替换占位符 +~/.openclaw/openclaw.json(已注入实际值) + ↓ OpenClaw gateway 启动时读取 +Agent 运行(使用正确模型) +``` + +改了 `.env` 后必须 `bash scripts/restart.sh` 重启才能生效。 + +--- + +## 环境变量 + +所有变量在 `.env` 中配置,`restart.sh` 读取后注入到 OpenClaw config 和 gateway plist。 + +### 必填 + +| 变量 | 说明 | 示例 | +|------|------|------| +| `LLM_PROVIDER` | 供应商 key,作为 OpenClaw provider 块名和模型 ID 前缀 | `anthropic` | +| `LLM_BASE_URL` | OpenAI 兼容 API 端点 | `https://api.anthropic.com/v1` | +| `LLM_API_KEY` | 供应商 API 密钥 | `sk-ant-...` | +| `LLM_MODEL_COMPLEX` | 复杂任务模型 ID(sub-agents 使用) | `claude-opus-4-6` | +| `LLM_MODEL_SIMPLE` | 简单任务模型 ID(orchestrator 使用) | `claude-sonnet-4-6` | +| `GITHUB_TOKEN` | GitHub PAT,需 `public_repo` 权限 | `ghp_...` | + +### 计价 + +| 变量 | 说明 | 默认值 | +|------|------|--------| +| `INPUT_COST_PER_M_COMPLEX` | Complex 模型输入价($/M token) | 读 `INPUT_COST_PER_M` | +| `OUTPUT_COST_PER_M_COMPLEX` | Complex 模型输出价($/M token) | 读 `OUTPUT_COST_PER_M` | +| `INPUT_COST_PER_M_SIMPLE` | Simple 模型输入价($/M token) | 读 `INPUT_COST_PER_M` | +| `OUTPUT_COST_PER_M_SIMPLE` | Simple 模型输出价($/M token) | 读 `OUTPUT_COST_PER_M` | +| `INPUT_COST_PER_M` | 通用 fallback 输入价 | `3.0` | +| `OUTPUT_COST_PER_M` | 通用 fallback 输出价 | `15.0` | + +### 模型参数 + +| 变量 | 说明 | 默认值 | +|------|------|--------| +| `LLM_CONTEXT_WINDOW` | 上下文窗口(tokens) | `200000` | +| `LLM_MAX_TOKENS` | 最大输出(tokens) | `32000` | + +### 预算 + +| 变量 | 说明 | 默认值 | +|------|------|--------| +| `BUDGET_USD_TOTAL` | 累计总预算(美元),`0` = 不限制 | `0` | +| `MODEL_TOKEN_BUDGETS` | 每模型 token 上限的 JSON 映射,`0` 或缺省 = 不限制 | `{}` | + +`MODEL_TOKEN_BUDGETS` 示例: + +```bash +MODEL_TOKEN_BUDGETS='{"glm-4.6":20000000,"deepseek-chat":50000000,"claude-opus-4-6":10000000}' +``` + +**关键语义**: + +- **key 是 bare model name**(与供应商前缀无关)。系统按 model name 的最后一段做匹配,全部小写化。`z-ai/glm-4.6`、`openrouter/glm-4.6`、`zhipu/glm-4.6` 都会被合并到同一个 `glm-4.6` 计数器,跨供应商累加。 +- value 是**累计 token 上限**(input + output 之和)。 +- value `0` 或缺省 = 不限制。 +- 触发后行为:health-check 在 directives 顶部插入 `MODEL TOKEN BUDGET EXHAUSTED: ...`,agent 停止派发使用该模型的 sub-agent。Dashboard 顶部出现红色横幅。 +- **不能用 `LLM_BASE_URL` 或 provider 字段判定模型**——同一个模型可能从多个供应商接入,必须用 model name 匹配。 + +### Dashboard + +| 变量 | 说明 | +|------|------| +| `DASHBOARD_URL` | Dashboard URL | +| `CLAW_API_KEY` | Dashboard API 共享密钥 | +| `NEXT_PUBLIC_LLM_PROVIDER` | 浏览器端显示用(镜像 `LLM_PROVIDER`) | +| `NEXT_PUBLIC_LLM_MODEL_COMPLEX` | 浏览器端显示用 | +| `NEXT_PUBLIC_LLM_MODEL_SIMPLE` | 浏览器端显示用 | + +--- + +## 供应商配置 & 定价 + +> 价格:2026 年 4 月核实。使用前请在供应商文档确认最新价格。 + +### 价格对照表 + +| 供应商 | Complex 模型 | Simple 模型 | Complex 输入/输出 $/M | Simple 输入/输出 $/M | +|--------|-------------|------------|----------------------|---------------------| +| Anthropic | claude-opus-4-6 | claude-sonnet-4-6 | $5 / $25 | $3 / $15 | +| OpenAI | gpt-4o | gpt-4o-mini | $2.5 / $10 | $0.15 / $0.6 | +| Google | gemini-2.5-pro | gemini-2.5-flash | $1.25 / $10 | $0.30 / $2.50 | +| Mistral | mistral-large-3 | mistral-small-3.1 | $2 / $6 | $0.20 / $0.60 | +| DeepSeek | deepseek-reasoner | deepseek-chat | $0.28 / $0.42 | $0.28 / $0.42 | +| MiniMax | MiniMax-M2.7 | MiniMax-M2.5 | $0.30 / $1.20 | $0.30 / $1.20 | +| Kimi | kimi-k2.5 | moonshot-v1-32k | $0.60 / $3.00 | $3.29 / $3.29 | +| GLM | glm-4.7 | glm-4.5-air | $0.60 / $2.20 | $0.20 / $1.10 | + +### Anthropic Claude + +文档:https://platform.claude.com/docs/en/about-claude/pricing + +Opus 4.6/4.5 已降价至 $5/$25(原 $15/$75)。4.6 系列支持 1M context window,标准费率。 + +| 模型 | 输入 $/M | 输出 $/M | Cache hit $/M | 上下文 | +|------|---------|---------|--------------|--------| +| claude-opus-4-6 | $5.0 | $25.0 | $0.50 | 1M | +| claude-sonnet-4-6 | $3.0 | $15.0 | $0.30 | 1M | +| claude-opus-4-5 | $5.0 | $25.0 | $0.50 | 1M | +| claude-sonnet-4-5 | $3.0 | $15.0 | $0.30 | 1M | +| claude-haiku-4-5 | $1.0 | $5.0 | $0.10 | 200k | + +```bash +LLM_PROVIDER=anthropic +LLM_BASE_URL=https://api.anthropic.com/v1 +LLM_API_KEY=sk-ant-... +LLM_MODEL_COMPLEX=claude-opus-4-6 +LLM_MODEL_SIMPLE=claude-sonnet-4-6 +INPUT_COST_PER_M_COMPLEX=5.0 +OUTPUT_COST_PER_M_COMPLEX=25.0 +INPUT_COST_PER_M_SIMPLE=3.0 +OUTPUT_COST_PER_M_SIMPLE=15.0 +LLM_CONTEXT_WINDOW=1000000 +LLM_MAX_TOKENS=32000 +NEXT_PUBLIC_LLM_PROVIDER=anthropic +NEXT_PUBLIC_LLM_MODEL_COMPLEX=claude-opus-4-6 +NEXT_PUBLIC_LLM_MODEL_SIMPLE=claude-sonnet-4-6 +``` + +### OpenAI + +文档:https://openai.com/api/pricing + +| 模型 | 输入 $/M | 输出 $/M | Cache hit $/M | 上下文 | +|------|---------|---------|--------------|--------| +| gpt-4o | $2.5 | $10.0 | $1.25 | 128k | +| gpt-4o-mini | $0.15 | $0.6 | $0.075 | 128k | +| o3 | $10.0 | $40.0 | -- | 200k | +| o4-mini | $1.1 | $4.4 | -- | 200k | + +```bash +LLM_PROVIDER=openai +LLM_BASE_URL=https://api.openai.com/v1 +LLM_API_KEY=sk-... +LLM_MODEL_COMPLEX=gpt-4o +LLM_MODEL_SIMPLE=gpt-4o-mini +INPUT_COST_PER_M_COMPLEX=2.5 +OUTPUT_COST_PER_M_COMPLEX=10.0 +INPUT_COST_PER_M_SIMPLE=0.15 +OUTPUT_COST_PER_M_SIMPLE=0.6 +LLM_CONTEXT_WINDOW=128000 +LLM_MAX_TOKENS=16000 +NEXT_PUBLIC_LLM_PROVIDER=openai +NEXT_PUBLIC_LLM_MODEL_COMPLEX=gpt-4o +NEXT_PUBLIC_LLM_MODEL_SIMPLE=gpt-4o-mini +``` + +### DeepSeek + +文档:https://api-docs.deepseek.com/quick_start/pricing + +`deepseek-chat` 和 `deepseek-reasoner` 现均为 DeepSeek-V3.2,价格相同。 +区别:reasoner 是 thinking 模式,最大输出 32K;chat 是非 thinking,最大输出 8K。 +Cache hit 价格比 cache miss 便宜 90%。 + +| 模型 | 输入 $/M (miss) | 输入 $/M (hit) | 输出 $/M | 上下文 | +|------|----------------|---------------|---------|--------| +| deepseek-chat | $0.28 | $0.028 | $0.42 | 128k | +| deepseek-reasoner | $0.28 | $0.028 | $0.42 | 128k | + +```bash +LLM_PROVIDER=deepseek +LLM_BASE_URL=https://api.deepseek.com/v1 +LLM_API_KEY=sk-... +LLM_MODEL_COMPLEX=deepseek-reasoner +LLM_MODEL_SIMPLE=deepseek-chat +INPUT_COST_PER_M_COMPLEX=0.28 +OUTPUT_COST_PER_M_COMPLEX=0.42 +INPUT_COST_PER_M_SIMPLE=0.28 +OUTPUT_COST_PER_M_SIMPLE=0.42 +LLM_CONTEXT_WINDOW=128000 +LLM_MAX_TOKENS=32000 +NEXT_PUBLIC_LLM_PROVIDER=deepseek +NEXT_PUBLIC_LLM_MODEL_COMPLEX=deepseek-reasoner +NEXT_PUBLIC_LLM_MODEL_SIMPLE=deepseek-chat +``` + +### MiniMax + +文档:https://platform.minimax.io/docs/guides/pricing-paygo + +highspeed 变体延迟更低,价格翻倍。 + +| 模型 | 输入 $/M | 输出 $/M | 上下文 | +|------|---------|---------|--------| +| MiniMax-M2.7 | $0.30 | $1.20 | 204k | +| MiniMax-M2.7-highspeed | $0.60 | $2.40 | 204k | +| MiniMax-M2.5 | $0.30 | $1.20 | 204k | +| MiniMax-M2.5-highspeed | $0.60 | $2.40 | 204k | +| MiniMax-M2 | $0.30 | $1.20 | 204k | + +```bash +LLM_PROVIDER=minimax +LLM_BASE_URL=https://api.minimaxi.com/v1 +LLM_API_KEY=... +LLM_MODEL_COMPLEX=MiniMax-M2.7 +LLM_MODEL_SIMPLE=MiniMax-M2.5 +INPUT_COST_PER_M_COMPLEX=0.30 +OUTPUT_COST_PER_M_COMPLEX=1.20 +INPUT_COST_PER_M_SIMPLE=0.30 +OUTPUT_COST_PER_M_SIMPLE=1.20 +LLM_CONTEXT_WINDOW=204800 +LLM_MAX_TOKENS=131072 +NEXT_PUBLIC_LLM_PROVIDER=minimax +NEXT_PUBLIC_LLM_MODEL_COMPLEX=MiniMax-M2.7 +NEXT_PUBLIC_LLM_MODEL_SIMPLE=MiniMax-M2.5 +``` + +### Kimi / Moonshot + +文档:https://platform.kimi.ai/docs/pricing/chat + +kimi-k2.5 是最新编程模型,cache hit 价格比 cache miss 便宜 83%。 +moonshot-v1 系列是按 token 长度统一计价的旧款通用模型。 + +| 模型 | 输入 $/M (miss) | 输入 $/M (hit) | 输出 $/M | 上下文 | +|------|----------------|---------------|---------|--------| +| kimi-k2.5 | $0.60 | $0.10 | $3.00 | 131k | +| kimi-k2 | $0.55 | -- | $2.20 | 131k | +| moonshot-v1-8k | $1.65 | -- | $1.65 | 8k | +| moonshot-v1-32k | $3.29 | -- | $3.29 | 32k | +| moonshot-v1-128k | $8.22 | -- | $8.22 | 128k | + +```bash +LLM_PROVIDER=moonshot +LLM_BASE_URL=https://api.moonshot.cn/v1 +LLM_API_KEY=sk-... +LLM_MODEL_COMPLEX=kimi-k2.5 +LLM_MODEL_SIMPLE=moonshot-v1-32k +INPUT_COST_PER_M_COMPLEX=0.60 +OUTPUT_COST_PER_M_COMPLEX=3.00 +INPUT_COST_PER_M_SIMPLE=3.29 +OUTPUT_COST_PER_M_SIMPLE=3.29 +LLM_CONTEXT_WINDOW=131072 +LLM_MAX_TOKENS=32000 +NEXT_PUBLIC_LLM_PROVIDER=moonshot +NEXT_PUBLIC_LLM_MODEL_COMPLEX=kimi-k2.5 +NEXT_PUBLIC_LLM_MODEL_SIMPLE=moonshot-v1-32k +``` + +### GLM / Zhipu AI + +国际 API:https://api.z.ai/v1(文档:https://docs.z.ai/guides/overview/pricing) +国内 API:https://open.bigmodel.cn/api/paas/v4 + +`glm-4.7-flash` 和 `glm-4.5-flash` 完全免费,可用作 simple 模型把编排成本压到零。 + +| 模型 | 输入 $/M | 输出 $/M | 备注 | +|------|---------|---------|------| +| glm-5.1 | $1.40 | $4.40 | | +| glm-5 | $1.00 | $3.20 | | +| glm-5-turbo | $1.20 | $4.00 | | +| glm-4.7 | $0.60 | $2.20 | | +| glm-4.7-flashx | $0.07 | $0.40 | 轻量快速 | +| glm-4.7-flash | 免费 | 免费 | | +| glm-4.5 | $0.60 | $2.20 | | +| glm-4.5-x | $2.20 | $8.90 | 32B MoE | +| glm-4.5-air | $0.20 | $1.10 | 适合 simple 模型 | +| glm-4.5-airx | $1.10 | $4.50 | | +| glm-4.5-flash | 免费 | 免费 | | +| glm-4-32b-0414-128k | $0.10 | $0.10 | | + +```bash +LLM_PROVIDER=z-ai +LLM_BASE_URL=https://api.z.ai/v1 +LLM_API_KEY=... +LLM_MODEL_COMPLEX=glm-4.7 +LLM_MODEL_SIMPLE=glm-4.5-air # 或 glm-4.7-flash(免费) +INPUT_COST_PER_M_COMPLEX=0.60 +OUTPUT_COST_PER_M_COMPLEX=2.20 +INPUT_COST_PER_M_SIMPLE=0.20 # glm-4.7-flash 填 0 +OUTPUT_COST_PER_M_SIMPLE=1.10 # glm-4.7-flash 填 0 +LLM_CONTEXT_WINDOW=128000 +LLM_MAX_TOKENS=32000 +NEXT_PUBLIC_LLM_PROVIDER=z-ai +NEXT_PUBLIC_LLM_MODEL_COMPLEX=glm-4.7 +NEXT_PUBLIC_LLM_MODEL_SIMPLE=glm-4.5-air +``` + +### Google Gemini + +文档:https://ai.google.dev/gemini-api/docs/pricing + +2.5 Pro 按 prompt 长度分档:≤200k 标准价,>200k 翻倍。所有模型有免费额度。 + +| 模型 | 输入 $/M | 输出 $/M | Cache hit $/M | 上下文 | +|------|---------|---------|--------------|--------| +| gemini-2.5-pro | $1.25 | $10.0 | $0.125 | 1M | +| gemini-2.5-flash | $0.30 | $2.50 | $0.03 | 1M | +| gemini-2.5-flash-lite | $0.10 | $0.40 | $0.01 | 1M | +| gemini-3-flash (preview) | $0.50 | $3.00 | $0.05 | 1M | +| gemini-3.1-pro (preview) | $2.00 | $12.00 | -- | 1M | + +```bash +LLM_PROVIDER=google +LLM_BASE_URL=https://generativelanguage.googleapis.com/v1beta/openai +LLM_API_KEY=... +LLM_MODEL_COMPLEX=gemini-2.5-pro +LLM_MODEL_SIMPLE=gemini-2.5-flash +INPUT_COST_PER_M_COMPLEX=1.25 +OUTPUT_COST_PER_M_COMPLEX=10.0 +INPUT_COST_PER_M_SIMPLE=0.30 +OUTPUT_COST_PER_M_SIMPLE=2.50 +LLM_CONTEXT_WINDOW=1000000 +LLM_MAX_TOKENS=65536 +NEXT_PUBLIC_LLM_PROVIDER=google +NEXT_PUBLIC_LLM_MODEL_COMPLEX=gemini-2.5-pro +NEXT_PUBLIC_LLM_MODEL_SIMPLE=gemini-2.5-flash +``` + +### Mistral AI + +文档:https://mistral.ai/pricing + +| 模型 | 输入 $/M | 输出 $/M | 上下文 | +|------|---------|---------|--------| +| mistral-large-3 | $2.0 | $6.0 | 128k | +| mistral-medium-3 | $1.0 | $3.0 | 128k | +| mistral-small-3.1 | $0.20 | $0.60 | 128k | +| mistral-nemo | $0.02 | $0.04 | 128k | + +```bash +LLM_PROVIDER=mistral +LLM_BASE_URL=https://api.mistral.ai/v1 +LLM_API_KEY=... +LLM_MODEL_COMPLEX=mistral-large-3 +LLM_MODEL_SIMPLE=mistral-small-3.1 +INPUT_COST_PER_M_COMPLEX=2.0 +OUTPUT_COST_PER_M_COMPLEX=6.0 +INPUT_COST_PER_M_SIMPLE=0.20 +OUTPUT_COST_PER_M_SIMPLE=0.60 +LLM_CONTEXT_WINDOW=128000 +LLM_MAX_TOKENS=32000 +NEXT_PUBLIC_LLM_PROVIDER=mistral +NEXT_PUBLIC_LLM_MODEL_COMPLEX=mistral-large-3 +NEXT_PUBLIC_LLM_MODEL_SIMPLE=mistral-small-3.1 +``` + +--- + +## 计费原理 + +### Token 数:估算而非真实值 + +OpenClaw hook 事件**不包含真实 token 数**(hook payload 中无 `usage` / `prompt_tokens` / `completion_tokens` 字段,这是平台限制)。当前系统用工具调用参数的字符长度粗估: + +``` +inputTokens ≈ JSON.stringify(params).length / 4 +outputTokens ≈ JSON.stringify(params).length / 8 +``` + +**实际误差可达 5-20 倍**,因为: +- 只统计了工具调用参数的字符长度,完全忽略对话历史和 system prompt 的 token 消耗 +- output 固定按 input 的一半算,没有依据 +- 中文内容会低估(中文字符占更多 token) + +Dashboard 上显示的 token 数和费用是量级参考,不能用来跟供应商账单对账。 + +### 双轨计价 + +每次工具调用时,根据 `sessionId` 判断模型 tier,累积到对应计数器: + +``` +sessionId === "main" → simple → INPUT_COST_PER_M_SIMPLE / OUTPUT_COST_PER_M_SIMPLE +sessionId !== "main" → complex → INPUT_COST_PER_M_COMPLEX / OUTPUT_COST_PER_M_COMPLEX +``` + +`agent_end` 事件触发时,分两条 metrics 记录上报至 dashboard: + +| channel | model | 计价变量 | +|---------|-------|---------| +| `orchestrator` | `LLM_PROVIDER/LLM_MODEL_SIMPLE` | `*_SIMPLE` | +| `subagent` | `LLM_PROVIDER/LLM_MODEL_COMPLEX` | `*_COMPLEX` | + +### 计价变量优先级 + +``` +INPUT_COST_PER_M_COMPLEX → 未设置时读 INPUT_COST_PER_M → 未设置时用 3.0 +OUTPUT_COST_PER_M_COMPLEX → 未设置时读 OUTPUT_COST_PER_M → 未设置时用 15.0 +``` + +Simple 模型同理。 + +### 改进方向 + +当前估算方案是 OpenClaw 平台限制下的权宜之计。已知可行的改进路径: + +1. **Session JSONL 解析**:OpenClaw 在 `~/.openclaw/agents//sessions/*.jsonl` 中存储对话记录,启用 `includeTranscriptUsage` 后每个 turn 包含真实 token 数。可在 heartbeat 结束后轮询解析。 +2. **供应商 Admin API**:Anthropic 提供 `/v1/organizations/usage_report/messages` 接口(需 admin key),可查真实用量。OpenAI 也有类似接口。DeepSeek/MiniMax/GLM 暂无。 +3. **HTTP 代理拦截**:在 gateway 和供应商之间加代理,从 response header/body 提取真实 usage。架构侵入较大。 + +--- + +## 预算熔断 + +### 工作原理 + +每次 heartbeat 的 step 0c 调用 `GET /api/agent/health-check`,该接口会: + +1. 查询 `metrics_tokens` 表的累计 `sum(cost_usd)` +2. 与预算上限比较(优先读 dashboard settings,次之读 `BUDGET_USD_TOTAL` env var) +3. 超限时在 `directives` 头部插入 `BUDGET EXHAUSTED` 指令 + +Agent 读到该指令后停止新工作(不 spawn 新实现、不提交 PR)。 + +### 配置 + +**方式 A:env var(启动时固定)** + +```bash +BUDGET_USD_TOTAL=20.0 # 0 = 不限制 +``` + +**方式 B:Dashboard Settings(运行时可调)** + +Settings 页的 `totalBudgetUsd` 字段,修改后下一个 heartbeat 周期立即生效,无需重启。 + +优先级:Dashboard settings > `BUDGET_USD_TOTAL` env var > 0(不限制)。 + +### 恢复 + +在 Dashboard Settings 页提高 `totalBudgetUsd`,下一次 heartbeat 会重新评估,自动恢复工作。 + +### Health-check 响应格式 + +```json +{ + "budget": { + "totalCostUsd": 18.42, + "totalBudgetUsd": 20.0, + "remainingUsd": 1.58, + "exhausted": false + }, + "directives": [] +} +``` + +`exhausted: true` 时 directives 会包含: + +``` +BUDGET EXHAUSTED: Spent $20.01 of $20.00 total budget. +STOP all new work immediately — do NOT spawn new implementations or submit PRs. +To resume: raise totalBudgetUsd in dashboard Settings or increase BUDGET_USD_TOTAL env var and restart. +``` + +### 注意事项 + +- 熔断基于估算成本,实际账单可能有偏差(见上方计费原理) +- `totalBudgetUsd = 0` 表示不限制,不会触发熔断 +- `metrics_tokens` 表有 30 天数据保留策略,超期数据会被清理,清理后累计值会重置 + +--- + +## 每模型 Token 熔断 + +与美元总预算并行的另一道闸门:按**模型**配置 token 上限,超限即停止使用该模型。 + +### 工作原理 + +健康检查同一接口聚合 `metrics_tokens` 表中每个模型的 `sum(input_tokens + output_tokens)`,按 **bare model name**(model 路径的最后一段,小写)归并跨供应商的用量。任何 model 的累计值 ≥ 配置上限即视为超支: + +1. `directives` 顶部追加 `MODEL TOKEN BUDGET EXHAUSTED: used X/Y tokens. STOP using this model across ALL providers ...` +2. 响应体新增 `modelBudgets` 字段(`exhausted` 数组、`usage` 映射、`caps` 映射) +3. Dashboard 全局横幅(`ModelBudgetBanner`)轮询 health-check,检测到 `modelBudgets.exhausted` 非空即在所有页面顶部渲染红色提示 + +### 配置 + +**方式 A:env var(启动时固定)** + +```bash +MODEL_TOKEN_BUDGETS='{"glm-4.6":20000000,"deepseek-chat":50000000}' +``` + +**方式 B:Dashboard Settings(运行时可调)** + +通过 `PUT /api/settings` 更新 `modelTokenBudgets` 字段: + +```bash +curl -X PUT http://localhost:3000/api/settings \ + -H 'Content-Type: application/json' \ + -d '{"modelTokenBudgets":{"glm-4.6":20000000}}' +``` + +API 会自动把 key 归一化为 bare model name(小写、剥前缀)。 + +优先级:Dashboard settings > `MODEL_TOKEN_BUDGETS` env var > `{}`(不限制)。 + +### Bare-name 匹配规则 + +| 写入的 model 字段 | 归一化后 | +|------------------|---------| +| `z-ai/glm-4.6` | `glm-4.6` | +| `openrouter/glm-4.6` | `glm-4.6` | +| `openrouter/anthropic/claude-opus-4-6` | `claude-opus-4-6` | +| `GLM-4.6` | `glm-4.6` | +| `glm-4.6` | `glm-4.6` | + +配置 key 同样会经过此归一化,因此用户可以随便写大小写或带不带前缀。 + +### Health-check 响应格式(新增字段) + +```json +{ + "modelBudgets": { + "exhausted": [ + { "model": "glm-4.6", "used": 20300000, "cap": 20000000 } + ], + "usage": { "glm-4.6": 20300000, "claude-opus-4-6": 1200000 }, + "caps": { "glm-4.6": 20000000 } + } +} +``` + +### 恢复 + +提高 `modelTokenBudgets[""]` 的值(dashboard settings 或 env var),下一次 health-check 即可撤销 directive,banner 消失,agent 自动恢复使用该模型。 diff --git a/docs/quickstart.md b/docs/quickstart.md new file mode 100644 index 0000000..b64d23f --- /dev/null +++ b/docs/quickstart.md @@ -0,0 +1,139 @@ +# 快速上手 + +## 前置要求 + +- Node.js 22+ +- GitHub Classic Token(`ghp_*` 格式,需要 `repo` scope) + - Fine-grained token(`github_pat_*`)无法在别人的仓库创建 PR,不适用 +- 任意 OpenAI 兼容的 LLM API key + +## 1. 配置模型 + +复制 `.env.example` 为 `.env`,填入 API 信息: + +```bash +cp .env.example .env +``` + +最少需要填 6 个变量: + +```bash +GITHUB_TOKEN=ghp_your-classic-token # 必须是 classic token +LLM_PROVIDER=deepseek # 供应商标识 +LLM_BASE_URL=https://api.deepseek.com/v1 # OpenAI 兼容端点 +LLM_API_KEY=sk-your-key # 供应商 API Key +LLM_MODEL_COMPLEX=deepseek-reasoner # 子 agent 用的模型(需要强推理) +LLM_MODEL_SIMPLE=deepseek-chat # 编排用的模型(轻量即可) +``` + +完整供应商配置参考 [model-routing.md](model-routing.md),支持 Anthropic、OpenAI、DeepSeek、Google Gemini、Mistral、MiniMax、Kimi、GLM 等 8 家供应商。 + +## 2. 切换模型 + +改 `.env` 中的 6 个变量后重启: + +```bash +bash scripts/restart.sh +``` + +`restart.sh` 会读取 `.env` → 替换 `config/openclaw.json` 中的占位符 → 部署到 `~/.openclaw/openclaw.json` → 重启 gateway。 + +验证是否生效: + +```bash +# 查看 gateway 日志中的模型信息 +openclaw logs 2>&1 | grep "agent model" +# 应该输出: [gateway] agent model: deepseek/deepseek-chat +``` + +## 3. 预算控制 + +### 在哪里看额度 + +**Dashboard → Overview 页**:顶部 metric cards 展示 24h cost 和 total cost。 + +**Dashboard → Health 页**:Cost Tracking Chart 展示花费趋势。 + +**API**: + +```bash +# 健康检查接口,返回预算信息 +curl http://localhost:3000/api/agent/health-check | jq '.budget' +# { +# "totalCostUsd": 0.42, +# "totalBudgetUsd": 20.0, +# "remainingUsd": 19.58, +# "exhausted": false +# } +``` + +### 设置预算上限 + +**方式 A:环境变量(启动时固定)** + +```bash +BUDGET_USD_TOTAL=20.0 # 美元,0 = 不限制 +``` + +**方式 B:Dashboard Settings(运行时可调)** + +访问 Dashboard Settings 页,修改 `totalBudgetUsd` 字段。下一个 heartbeat 周期(5 分钟)立即生效,无需重启。 + +当累计花费超过预算时,agent 自动停止新工作,dashboard 显示 `BUDGET EXHAUSTED` 指令。提高预算后自动恢复。 + +### 计费精度 + +当前 token 数是**估算值**(基于工具调用参数的字符长度),不是 LLM API 返回的真实 token 数。实际误差可达 5-20 倍。Dashboard 上的费用是量级参考,不能直接和供应商账单对账。详见 [model-routing.md § 计费原理](model-routing.md#计费原理)。 + +## 4. 双轨定价 + +系统区分两种模型,各自独立计价: + +| 角色 | 模型变量 | 定价变量 | 用途 | +|------|---------|---------|------| +| Orchestrator | `LLM_MODEL_SIMPLE` | `INPUT_COST_PER_M_SIMPLE` / `OUTPUT_COST_PER_M_SIMPLE` | heartbeat 循环、文件读写、状态路由 | +| Sub-agents | `LLM_MODEL_COMPLEX` | `INPUT_COST_PER_M_COMPLEX` / `OUTPUT_COST_PER_M_COMPLEX` | 代码实现、bug 修复、PR review | + +如果不设 per-model 定价,回退到 `INPUT_COST_PER_M` / `OUTPUT_COST_PER_M`(默认 $3/$15)。 + +dashboard 的 token metrics 表按 `model` 列区分两种模型的用量,Health 页的 Cost Tracking Chart 展示合计趋势。 + +## 5. Dashboard + +### 启动 + +```bash +cd dashboard && npm run dev # 开发模式,http://localhost:3000 +cd dashboard && npm run build && npm run start # 生产模式 +``` + +### 页面说明 + +| 页面 | 看什么 | +|------|--------| +| **Overview** | agent 状态、子 agent 槽位、token/cost metrics、merge rate、PR 漏斗 | +| **Live Feed** | agent 实时思考过程、工具调用、错误 | +| **Pull Requests** | 所有提交的 PR,按状态/仓库/质量分筛选 | +| **Repo Health** | 目标仓库的健康评分、merge 速率、推荐策略 | +| **Health** | heartbeat、token 用量趋势、花费曲线 | +| **Quality** | PR 质量分析、首次通过率、拒绝原因 | +| **Logs** | 系统审计日志 | + +### 数据来源 + +Dashboard 数据有三个来源: + +1. **GitHub Sync**(`/api/github/sync`)— 从 GitHub API 拉取 PR 数据,基于 `GITHUB_USERNAME` 搜索 +2. **Ingest API**(`/api/ingest/*`)— agent 的 dashboard-reporter hook 实时推送 heartbeat、metrics、conversation +3. **本地 DB**(`dashboard/local.db`)— SQLite 存储所有数据 + +如果 dashboard 显示 "Disconnected",通常是 hook 推送不通(检查 `DASHBOARD_URL` 和 `CLAW_API_KEY` 环境变量)。 + +## 6. GitHub Token 说明 + +| Token 类型 | 格式 | 能否创建跨仓库 PR | +|-----------|------|-----------------| +| Classic token | `ghp_*` | 有 `repo` scope 即可 | +| Fine-grained token | `github_pat_*` | 不能(只对指定仓库有写权限) | + +ClawOSS 需要在别人的仓库 fork → push → 创建 PR,必须使用 **Classic token + `repo` scope**。 diff --git a/scripts/dashboard-sync.sh b/scripts/dashboard-sync.sh index cc9b2a2..51f2c6a 100755 --- a/scripts/dashboard-sync.sh +++ b/scripts/dashboard-sync.sh @@ -196,7 +196,7 @@ for line in sys.stdin: metrics.append({ 'inputTokens': inp, 'outputTokens': out, - 'model': model or 'minimax/MiniMax-M2.7', + 'model': model or os.environ.get('CLAWOSS_PRIMARY_MODEL') or os.environ.get('CLAWOSS_DEFAULT_MODEL') or (os.environ.get('LLM_PROVIDER','anthropic') + '/' + os.environ.get('LLM_MODEL_COMPLEX','claude-opus-4-6')), 'channel': sid }) if metrics: diff --git a/scripts/restart.sh b/scripts/restart.sh index a2836b0..d15fca0 100755 --- a/scripts/restart.sh +++ b/scripts/restart.sh @@ -65,6 +65,17 @@ if [ ${#MISSING[@]} -gt 0 ]; then exit 1 fi echo "[OK] All required tools found (python3, gh, jq, openclaw, node)" +if clawoss_is_macos; then + echo "[INFO] Platform: macOS — will use launchd plists for gateway and pr-ledger-sync" +else + if command -v systemctl >/dev/null 2>&1; then + echo "[INFO] Platform: $(uname -s) — will use systemd user units; launchd steps will be skipped" + else + echo "[WARN] Platform: $(uname -s) — no launchd AND no systemctl detected." + echo " Gateway will fall back to an unmanaged 'openclaw gateway run' background process." + echo " It WILL NOT survive a reboot. Consider running in Docker (see deploy/docker/)." + fi +fi if [ "$SMOKE_MODE" -eq 1 ]; then echo "[INFO] Restart smoke mode enabled — skipping global cleanup and external side effects where possible" fi @@ -138,6 +149,18 @@ REPO_CONFIG_RESOLVED=$(sed \ -e "s|__WORKSPACE_PATH__|$WORKSPACE_DIR|g" \ -e "s|__PROJECT_DIR__|$PROJECT_DIR|g" \ -e "s|__HOME_DIR__|$HOME|g" \ + -e "s|__LLM_PROVIDER__|${LLM_PROVIDER:-anthropic}|g" \ + -e "s|__LLM_BASE_URL__|${LLM_BASE_URL:-https://api.anthropic.com/v1}|g" \ + -e "s|__LLM_MODEL_COMPLEX__|${LLM_MODEL_COMPLEX:-claude-opus-4-6}|g" \ + -e "s|__LLM_MODEL_SIMPLE__|${LLM_MODEL_SIMPLE:-claude-sonnet-4-6}|g" \ + -e "s|__INPUT_COST_PER_M_COMPLEX__|${INPUT_COST_PER_M_COMPLEX:-${INPUT_COST_PER_M:-3.0}}|g" \ + -e "s|__OUTPUT_COST_PER_M_COMPLEX__|${OUTPUT_COST_PER_M_COMPLEX:-${OUTPUT_COST_PER_M:-15.0}}|g" \ + -e "s|__INPUT_COST_PER_M_SIMPLE__|${INPUT_COST_PER_M_SIMPLE:-${INPUT_COST_PER_M:-3.0}}|g" \ + -e "s|__OUTPUT_COST_PER_M_SIMPLE__|${OUTPUT_COST_PER_M_SIMPLE:-${OUTPUT_COST_PER_M:-15.0}}|g" \ + -e "s|__INPUT_COST_PER_M__|${INPUT_COST_PER_M:-3.0}|g" \ + -e "s|__OUTPUT_COST_PER_M__|${OUTPUT_COST_PER_M:-15.0}|g" \ + -e "s|__LLM_CONTEXT_WINDOW__|${LLM_CONTEXT_WINDOW:-200000}|g" \ + -e "s|__LLM_MAX_TOKENS__|${LLM_MAX_TOKENS:-32000}|g" \ "$PROJECT_DIR/config/openclaw.json") _REPO_CONFIG="$REPO_CONFIG_RESOLVED" \ @@ -157,6 +180,19 @@ _CLAW_KEY="${CLAW_API_KEY:-}" \ _CLAWOSS_ROOT="${PROJECT_DIR}" \ _RECORD_DECISIONS="${CLAWOSS_RECORD_DECISIONS:-1}" \ _RECORD_OUTCOMES="${CLAWOSS_RECORD_OUTCOMES:-1}" \ +_LLM_KEY="${LLM_API_KEY:-}" \ +_LLM_BASE_URL="${LLM_BASE_URL:-}" \ +_LLM_PROVIDER="${LLM_PROVIDER:-}" \ +_LLM_MODEL_COMPLEX="${LLM_MODEL_COMPLEX:-}" \ +_LLM_MODEL_SIMPLE="${LLM_MODEL_SIMPLE:-}" \ +_INPUT_COST_PER_M="${INPUT_COST_PER_M:-}" \ +_OUTPUT_COST_PER_M="${OUTPUT_COST_PER_M:-}" \ +_INPUT_COST_PER_M_COMPLEX="${INPUT_COST_PER_M_COMPLEX:-}" \ +_OUTPUT_COST_PER_M_COMPLEX="${OUTPUT_COST_PER_M_COMPLEX:-}" \ +_INPUT_COST_PER_M_SIMPLE="${INPUT_COST_PER_M_SIMPLE:-}" \ +_OUTPUT_COST_PER_M_SIMPLE="${OUTPUT_COST_PER_M_SIMPLE:-}" \ +_BUDGET_USD_TOTAL="${BUDGET_USD_TOTAL:-}" \ +_MODEL_TOKEN_BUDGETS="${MODEL_TOKEN_BUDGETS:-}" \ python3 -c " import json, os @@ -198,6 +234,20 @@ env_map = { 'CLAWOSS_ROOT': os.environ.get('_CLAWOSS_ROOT', ''), 'CLAWOSS_RECORD_DECISIONS': os.environ.get('_RECORD_DECISIONS', ''), 'CLAWOSS_RECORD_OUTCOMES': os.environ.get('_RECORD_OUTCOMES', ''), + # Generic LLM config — used by model routing system + 'LLM_API_KEY': os.environ.get('_LLM_KEY', ''), + 'LLM_BASE_URL': os.environ.get('_LLM_BASE_URL', ''), + 'LLM_PROVIDER': os.environ.get('_LLM_PROVIDER', ''), + 'LLM_MODEL_COMPLEX': os.environ.get('_LLM_MODEL_COMPLEX', ''), + 'LLM_MODEL_SIMPLE': os.environ.get('_LLM_MODEL_SIMPLE', ''), + 'INPUT_COST_PER_M': os.environ.get('_INPUT_COST_PER_M', ''), + 'OUTPUT_COST_PER_M': os.environ.get('_OUTPUT_COST_PER_M', ''), + 'INPUT_COST_PER_M_COMPLEX': os.environ.get('_INPUT_COST_PER_M_COMPLEX', ''), + 'OUTPUT_COST_PER_M_COMPLEX': os.environ.get('_OUTPUT_COST_PER_M_COMPLEX', ''), + 'INPUT_COST_PER_M_SIMPLE': os.environ.get('_INPUT_COST_PER_M_SIMPLE', ''), + 'OUTPUT_COST_PER_M_SIMPLE': os.environ.get('_OUTPUT_COST_PER_M_SIMPLE', ''), + 'BUDGET_USD_TOTAL': os.environ.get('_BUDGET_USD_TOTAL', ''), + 'MODEL_TOKEN_BUDGETS': os.environ.get('_MODEL_TOKEN_BUDGETS', ''), } for k, v in env_map.items(): if v: @@ -290,8 +340,15 @@ if clawoss_is_macos && [ -f "$GATEWAY_PLIST" ]; then else echo "[OK] Gateway plist PATH already includes required dirs" fi +elif clawoss_is_macos; then + echo "[INFO] macOS detected but no gateway plist at $GATEWAY_PLIST — 'openclaw gateway install' will create it" else - echo "[INFO] No gateway plist found at $GATEWAY_PLIST — gateway install will create it" + # Linux / non-macOS: launchd/PlistBuddy do not exist. OpenClaw manages the + # gateway via systemd user units (see step 13). PATH propagation on Linux + # is handled by the Environment= directives emitted into the systemd unit + # file, so there's nothing to do here — log it so operators can see this + # step was intentionally skipped rather than silently broken. + echo "[SKIP] Gateway plist path update — not applicable on $(uname -s) (Linux uses systemd, handled in step 13)" fi # ── 7. Flush context & clean sessions ───────────────────────────────── @@ -572,7 +629,7 @@ runtime_status "running" "running" "${DASHBOARD_SYNC_STATE:-unknown}" "${RUN_CYC # ── Summary ─────────────────────────────────────────────────────────── echo "" echo "=== ClawOSS V10 Running ===" -echo " Model: minimax/m2.7 (MiniMax M2.7, 204k context) + kimi-coding/k2p5 fallback" +echo " Model: ${LLM_PROVIDER:-anthropic}/${LLM_MODEL_COMPLEX:-claude-opus-4-6} (complex) + ${LLM_PROVIDER:-anthropic}/${LLM_MODEL_SIMPLE:-claude-sonnet-4-6} (simple/orchestrator)" echo " Dashboard: https://clawoss-dashboard.vercel.app" echo " Slots: 3 always-on (scout + PR monitor + PR analyst) + 10 impl/followup = 13" echo " Heartbeat: 5m" diff --git a/scripts/start.sh b/scripts/start.sh index b8b9669..3eafe2b 100755 --- a/scripts/start.sh +++ b/scripts/start.sh @@ -10,7 +10,7 @@ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" PROJECT_DIR="$(clawoss_resolve_project_dir "$0")" AGENT_ID="clawoss" WORKSPACE_DIR="$(clawoss_resolve_workspace_dir "$0")" -AGENT_MODEL="${CLAWOSS_MODEL:-${CLAWOSS_AGENT_MODEL:-${CLAWOSS_PRIMARY_MODEL:-${CLAWOSS_DEFAULT_MODEL:-minimax/MiniMax-M2.7}}}}" +AGENT_MODEL="${CLAWOSS_MODEL:-${CLAWOSS_AGENT_MODEL:-${CLAWOSS_PRIMARY_MODEL:-${CLAWOSS_DEFAULT_MODEL:-${LLM_PROVIDER:-anthropic}/${LLM_MODEL_SIMPLE:-claude-sonnet-4-6}}}}}" if [ -f "$PROJECT_DIR/.env" ]; then set -a diff --git a/scripts/validate-config.mjs b/scripts/validate-config.mjs index 4d09293..f49da29 100644 --- a/scripts/validate-config.mjs +++ b/scripts/validate-config.mjs @@ -41,8 +41,28 @@ console.log("\n=== Config Files ==="); try { const raw = readFileSync(join(ROOT, "config/openclaw.json"), "utf8"); - JSON.parse(raw); - pass("config/openclaw.json is valid JSON"); + // openclaw.json contains __PLACEHOLDER__ tokens that scripts/restart.sh + + // deploy/docker/entrypoint.sh substitute at deploy time. Validate the + // post-substitution shape here so CI catches malformed templates without + // requiring operators to run the full deploy flow. + const substituted = raw + .replace(/__WORKSPACE_PATH__/g, "/app/workspace") + .replace(/__PROJECT_DIR__/g, "/app") + .replace(/__HOME_DIR__/g, "/home/clawoss") + .replace(/__LLM_PROVIDER__/g, "anthropic") + .replace(/__LLM_BASE_URL__/g, "https://api.anthropic.com/v1") + .replace(/__LLM_MODEL_COMPLEX__/g, "claude-opus-4-6") + .replace(/__LLM_MODEL_SIMPLE__/g, "claude-sonnet-4-6") + .replace(/__INPUT_COST_PER_M_COMPLEX__/g, "5.0") + .replace(/__OUTPUT_COST_PER_M_COMPLEX__/g, "25.0") + .replace(/__INPUT_COST_PER_M_SIMPLE__/g, "3.0") + .replace(/__OUTPUT_COST_PER_M_SIMPLE__/g, "15.0") + .replace(/__INPUT_COST_PER_M__/g, "3.0") + .replace(/__OUTPUT_COST_PER_M__/g, "15.0") + .replace(/__LLM_CONTEXT_WINDOW__/g, "200000") + .replace(/__LLM_MAX_TOKENS__/g, "32000"); + JSON.parse(substituted); + pass("config/openclaw.json is valid JSON (post-template-substitution)"); } catch (e) { fail(`config/openclaw.json: ${e.message}`); } diff --git a/workspace/hooks/dashboard-reporter/handler.ts b/workspace/hooks/dashboard-reporter/handler.ts index e5f4ce8..d35438d 100644 --- a/workspace/hooks/dashboard-reporter/handler.ts +++ b/workspace/hooks/dashboard-reporter/handler.ts @@ -1,15 +1,44 @@ const DASHBOARD_URL = process.env.DASHBOARD_URL || "https://clawoss-dashboard.vercel.app"; const AGENT_ID = "clawoss"; -const GITHUB_USERNAME = process.env.GITHUB_USERNAME || "unknown"; -const DEFAULT_MODEL = - process.env.CLAWOSS_PRIMARY_MODEL || - process.env.CLAWOSS_DEFAULT_MODEL || - "minimax/MiniMax-M2.7"; -const INPUT_COST_PER_TOKEN = 0.3 / 1_000_000; -const OUTPUT_COST_PER_TOKEN = 1.2 / 1_000_000; - -let accumulatedInputTokens = 0; -let accumulatedOutputTokens = 0; +const GITHUB_USERNAME = process.env.GITHUB_USERNAME || "BillionClaw"; + +// Model routing — driven by env vars. See docs/model-routing.md. +const LLM_PROVIDER = process.env.LLM_PROVIDER || "anthropic"; +const LLM_MODEL_COMPLEX = process.env.LLM_MODEL_COMPLEX || "claude-opus-4-6"; +const LLM_MODEL_SIMPLE = process.env.LLM_MODEL_SIMPLE || "claude-sonnet-4-6"; + +const MODEL_COMPLEX = `${LLM_PROVIDER}/${LLM_MODEL_COMPLEX}`; +const MODEL_SIMPLE = `${LLM_PROVIDER}/${LLM_MODEL_SIMPLE}`; + +// Per-model pricing (USD per million tokens). +// Complex and simple models can have different prices. +// Falls back to INPUT_COST_PER_M / OUTPUT_COST_PER_M if per-model vars not set. +const FALLBACK_IN = process.env.INPUT_COST_PER_M || "3.0"; +const FALLBACK_OUT = process.env.OUTPUT_COST_PER_M || "15.0"; + +const PRICING = { + complex: { + input: parseFloat(process.env.INPUT_COST_PER_M_COMPLEX || FALLBACK_IN) / 1_000_000, + output: parseFloat(process.env.OUTPUT_COST_PER_M_COMPLEX || FALLBACK_OUT) / 1_000_000, + }, + simple: { + input: parseFloat(process.env.INPUT_COST_PER_M_SIMPLE || FALLBACK_IN) / 1_000_000, + output: parseFloat(process.env.OUTPUT_COST_PER_M_SIMPLE || FALLBACK_OUT) / 1_000_000, + }, +}; + +// Determine model tier from session key: +// main session → orchestrator → simple model +// anything else → sub-agent → complex model +function modelTierForSession(sessionId: string): "complex" | "simple" { + return sessionId === "main" ? "simple" : "complex"; +} + +// Per-tier token accumulators — flushed to dashboard on agent_end +const accumulated = { + complex: { inputTokens: 0, outputTokens: 0 }, + simple: { inputTokens: 0, outputTokens: 0 }, +}; let accumulatedDurationMs = 0; let toolCallCount = 0; let startTime = Date.now(); @@ -171,7 +200,7 @@ async function postState(apiKey: string): Promise { metadata: { agent_id: AGENT_ID, tool_calls: toolCallCount, - model: DEFAULT_MODEL, + model: MODEL_SIMPLE, // postState reflects orchestrator (main session) }, }), signal: controller.signal, @@ -249,8 +278,9 @@ const handler = async (event: { const params = event.params || {}; if (typeof params === "object") { const paramStr = JSON.stringify(params); - accumulatedInputTokens += Math.ceil(paramStr.length / 4); - accumulatedOutputTokens += Math.ceil(paramStr.length / 8); + const tier = modelTierForSession(sessionId); + accumulated[tier].inputTokens += Math.ceil(paramStr.length / 4); + accumulated[tier].outputTokens += Math.ceil(paramStr.length / 8); } // Track repos from tool params @@ -488,7 +518,7 @@ const handler = async (event: { role: "assistant", content: event.assistantMessage.slice(0, 5000), timestamp: ts, - tokenCount: accumulatedOutputTokens || null, + tokenCount: accumulated[modelTierForSession(sessionId)].outputTokens || null, metadata: { agent_id: AGENT_ID, event: "agent_end", @@ -518,7 +548,7 @@ const handler = async (event: { role: "system", content: event.error ? `Run ended with error: ${event.error} (${toolCallCount} tool calls, ${uptimeSeconds}s)` - : `Run completed: ${toolCallCount} tool calls, ${uptimeSeconds}s, ~${accumulatedInputTokens + accumulatedOutputTokens} tokens`, + : `Run completed: ${toolCallCount} tool calls, ${uptimeSeconds}s, ~${accumulated.complex.inputTokens + accumulated.complex.outputTokens + accumulated.simple.inputTokens + accumulated.simple.outputTokens} tokens`, timestamp: ts, metadata: { agent_id: AGENT_ID, @@ -550,7 +580,7 @@ const handler = async (event: { metadata: { session_key: sessionId, tool_calls: toolCallCount, - model: DEFAULT_MODEL, + model: modelTierForSession(sessionId) === "simple" ? MODEL_SIMPLE : MODEL_COMPLEX, repos: Array.from(reposUsed), skill: lastSkillName, }, @@ -558,38 +588,38 @@ const handler = async (event: { apiKey ); - // Send accumulated metrics if any - if (accumulatedInputTokens > 0 || accumulatedOutputTokens > 0) { - const costUsd = - accumulatedInputTokens * INPUT_COST_PER_TOKEN + - accumulatedOutputTokens * OUTPUT_COST_PER_TOKEN; + // Send accumulated metrics — one entry per model tier that has usage + const metricsEntries: Record[] = []; + const tiers = (["complex", "simple"] as const).filter( + (t) => accumulated[t].inputTokens > 0 || accumulated[t].outputTokens > 0 + ); - await postNonBlocking( - "/api/ingest/metrics", - { - metrics: [ - { - channel: "agent", - provider: "minimax", - model: DEFAULT_MODEL, - inputTokens: accumulatedInputTokens, - outputTokens: accumulatedOutputTokens, - costUsd: Math.round(costUsd * 1_000_000) / 1_000_000, - runDurationMs: accumulatedDurationMs, - contextTokens: accumulatedInputTokens, - }, - ], - }, - apiKey - ); + for (const tier of tiers) { + const { inputTokens, outputTokens } = accumulated[tier]; + const pricing = PRICING[tier]; + const costUsd = inputTokens * pricing.input + outputTokens * pricing.output; + metricsEntries.push({ + channel: tier === "complex" ? "subagent" : "orchestrator", + provider: `${LLM_PROVIDER}-direct`, + model: tier === "complex" ? MODEL_COMPLEX : MODEL_SIMPLE, + inputTokens, + outputTokens, + costUsd: Math.round(costUsd * 1_000_000) / 1_000_000, + runDurationMs: tier === "complex" ? accumulatedDurationMs : 0, + contextTokens: inputTokens, + }); + accumulated[tier].inputTokens = 0; + accumulated[tier].outputTokens = 0; + } - // Reset accumulators - accumulatedInputTokens = 0; - accumulatedOutputTokens = 0; - accumulatedDurationMs = 0; - toolCallCount = 0; + if (metricsEntries.length > 0) { + await postNonBlocking("/api/ingest/metrics", { metrics: metricsEntries }, apiKey); } + // Reset shared accumulators + accumulatedDurationMs = 0; + toolCallCount = 0; + // Log agent completion with enriched metadata await postNonBlocking( "/api/ingest/logs", diff --git a/workspace/hooks/dashboard-reporter/post-tool.sh b/workspace/hooks/dashboard-reporter/post-tool.sh index bdcee2c..4199065 100755 --- a/workspace/hooks/dashboard-reporter/post-tool.sh +++ b/workspace/hooks/dashboard-reporter/post-tool.sh @@ -6,7 +6,7 @@ DASHBOARD_URL="${DASHBOARD_URL:-https://clawoss-dashboard.vercel.app}" API_KEY="${CLAW_API_KEY:?Set CLAW_API_KEY env var}" SESSION_ID="${CLAUDE_SESSION_ID:-agent-session}" -DEFAULT_MODEL="${CLAWOSS_PRIMARY_MODEL:-${CLAWOSS_DEFAULT_MODEL:-minimax/MiniMax-M2.7}}" +DEFAULT_MODEL="${CLAWOSS_PRIMARY_MODEL:-${CLAWOSS_DEFAULT_MODEL:-${LLM_PROVIDER:-anthropic}/${LLM_MODEL_COMPLEX:-claude-opus-4-6}}}" # Read the hook input from stdin INPUT=$(cat 2>/dev/null || echo '{}') @@ -33,7 +33,7 @@ PAYLOAD=$(jq -n \ --argjson durationMs "$DURATION" \ --arg ts "$TIMESTAMP" \ --arg resultContent "$TOOL_OUTPUT_RAW" \ - --arg defaultModel "$DEFAULT_MODEL" \ + --arg model "$DEFAULT_MODEL" \ '{ messages: [ { @@ -44,7 +44,7 @@ PAYLOAD=$(jq -n \ toolCallId: $toolCallId, durationMs: $durationMs, timestamp: $ts, - metadata: { agent_id: "clawoss", model: $defaultModel } + metadata: { agent_id: "clawoss", model: $model } }, { sessionId: $sid,