diff --git a/.gitignore b/.gitignore index 933497d..5ddadf0 100644 --- a/.gitignore +++ b/.gitignore @@ -43,3 +43,4 @@ package-lock.json *.png *.pem .docker-data/ +PR-FLOW.md diff --git a/examples/proxy-user-inject.js b/examples/proxy-user-inject.js new file mode 100644 index 0000000..ea9291f --- /dev/null +++ b/examples/proxy-user-inject.js @@ -0,0 +1,166 @@ +/** + * proxy-user-inject.js + * ==================== + * A lightweight HTTP proxy that injects a `user` field into every chat + * completion request body before forwarding it to the upstream WindsurfAPI + * instance. When paired with sticky sessions, this ensures that multiple + * end users sharing a single WindsurfAPI deployment each get bound to a + * different upstream Windsurf account — no cross-user quota mixing. + * + * ── Problem ──────────────────────────────────────────────────────────── + * Two developers share one WindsurfAPI on a VPS. Both access the same + * API_KEY via ccswitch / Cline / Claude Code, so from WindsurfAPI's + * perspective they look identical. Without a per-user signal: + * • STICKY_SESSION_ENABLED can't tell them apart + * • CASCADE_REUSE_BY_CALLER produces the same callerKey for both + * • They randomly compete for accounts in the pool + * + * ── Solution ─────────────────────────────────────────────────────────── + * Each user points their client at a different proxy port (e.g. :3004 + * for Alice, :3005 for Bob). The proxy injects `user: 'alice'` (or + * `user: 'bob'`) into the request body before forwarding to WindsurfAPI + * on 127.0.0.1:3003. + * + * WindsurfAPI's caller-key.js then builds the callerKey as: + * api::user: + * + * This produces two distinct, stable callerKeys → sticky sessions stay + * pinned to separate accounts → cascade reuse works independently. + * + * ── HTTP method handling ─────────────────────────────────────────────── + * GET /v1/models (and other read endpoints) are transparently forwarded + * as-is. The `user` field is only injected into POST / PUT / PATCH + * request bodies. + * + * ── Prerequisites ────────────────────────────────────────────────────── + * • WindsurfAPI running on 127.0.0.1:3003 with .env containing: + * STICKY_SESSION_ENABLED=1 + * CASCADE_REUSE_BY_CALLER=1 + * • (Recommended) Independent LS instances per user via tinyproxy: + * - tinyproxy on :8080 → LS on :42101 (Alice) + * - tinyproxy on :9090 → LS on :42102 (Bob) + * On the Dashboard, assign 127.0.0.1:8080 / 127.0.0.1:9090 as account + * proxy configs to separate LS pools. + * + * ── Usage ────────────────────────────────────────────────────────────── + * 1. Copy this file for each user and change the port + user value: + * $ cp proxy-user-inject.js proxy-alice.js → port 3004, user 'alice' + * $ cp proxy-user-inject.js proxy-bob.js → port 3005, user 'bob' + * + * 2. Start each proxy (directly via Node.js or as a systemd service): + * $ node proxy-alice.js & + * $ node proxy-bob.js & + * + * 3. Point each user's client to their dedicated port: + * Alice → http://:3004/v1 + * Bob → http://:3005/v1 + * + * ── systemd Service Example ──────────────────────────────────────────── + * Create /etc/systemd/system/windsurf-proxy-alice.service: + * + * [Unit] + * Description=WindsurfAPI user-inject proxy (Alice) + * After=network.target + * + * [Service] + * Type=simple + * ExecStart=/usr/bin/node /opt/windsurf/proxy-alice.js + * Restart=always + * RestartSec=5 + * User=nobody + * WorkingDirectory=/opt/windsurf + * + * [Install] + * WantedBy=multi-user.target + * + * Then: + * $ sudo systemctl daemon-reload + * $ sudo systemctl enable --now windsurf-proxy-alice + * + * ── Verification ─────────────────────────────────────────────────────── + * After setup, send two consecutive requests from the same proxy port: + * + * $ curl -s -X POST http://:3004/v1/chat/completions \ + * -H "Content-Type: application/json" \ + * -H "Authorization: Bearer YOUR_API_KEY" \ + * -d '{"model":"claude-sonnet-4-6","messages":[{"role":"user","content":"hi"}],"max_tokens":50}' + * + * Check server logs: both requests should hit the same upstream account + * with reuse=1 on the second request: + * + * account=same@email.com ls=42101 turns=N reuse=1 + * account=same@email.com ls=42101 turns=N+1 reuse=1 + * + * And the other proxy port should land on a different account, confirming + * isolation. + * + * ── Security Note ────────────────────────────────────────────────────── + * These proxies listen on 0.0.0.0 by default so remote clients can reach + * them. If all clients are on the same machine as the server, change the + * listen address to 127.0.0.1. If exposed to the internet, consider + * putting them behind nginx with TLS or using a firewall to restrict + * source IPs. + */ + +'use strict'; + +const http = require('http'); + +// ═══════════════ CONFIGURE THESE ═══════════════ +const LISTEN_PORT = 3004; // External port clients connect to +const LISTEN_ADDR = '0.0.0.0'; // '127.0.0.1' for local-only +const USER_ID = 'alice'; // Injected as body.user (per-user unique) +const UPSTREAM_HOST = '127.0.0.1'; // WindsurfAPI host +const UPSTREAM_PORT = 3003; // WindsurfAPI port +// ════════════════════════════════════════════════ + +http.createServer((req, res) => { + let body = ''; + req.on('data', chunk => body += chunk); + req.on('end', () => { + try { + const hasBody = (req.method === 'POST' || req.method === 'PUT' || req.method === 'PATCH'); + const json = hasBody ? JSON.parse(body || '{}') : {}; + let postData = null; + + const headers = { + 'Authorization': req.headers['authorization'] || '', + 'Host': `${UPSTREAM_HOST}:${UPSTREAM_PORT}`, + }; + + if (hasBody) { + json.user = USER_ID; + postData = JSON.stringify(json); + headers['Content-Type'] = 'application/json'; + headers['Content-Length'] = Buffer.byteLength(postData); + } + + const opts = { + hostname: UPSTREAM_HOST, + port: UPSTREAM_PORT, + path: req.url, + method: req.method, + headers, + }; + + const upstream = http.request(opts, (proxyRes) => { + res.writeHead(proxyRes.statusCode, proxyRes.headers); + proxyRes.on('data', chunk => res.write(chunk)); + proxyRes.on('end', () => res.end()); + }); + + upstream.on('error', (err) => { + res.writeHead(502, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ error: { message: 'Upstream error: ' + err.message } })); + }); + + if (postData) upstream.write(postData); + upstream.end(); + } catch (err) { + res.writeHead(400, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ error: { message: 'Parse error: ' + err.message } })); + } + }); +}).listen(LISTEN_PORT, LISTEN_ADDR, () => { + console.log(`[proxy-user-inject] user="${USER_ID}" listening on ${LISTEN_ADDR}:${LISTEN_PORT} → ${UPSTREAM_HOST}:${UPSTREAM_PORT}`); +}); diff --git a/src/account/sticky-session.js b/src/account/sticky-session.js index d2285b5..3379b9f 100644 --- a/src/account/sticky-session.js +++ b/src/account/sticky-session.js @@ -34,6 +34,9 @@ * Related issues: #93, #133 (context loss mid-task) */ +import { isExperimentalEnabled } from '../runtime-config.js'; +import { log } from '../config.js'; + const ENABLED = process.env.STICKY_SESSION_ENABLED === '1'; const TTL_MS = (() => { @@ -59,6 +62,9 @@ const _stats = { * Using \0 delimiter (valid in Map keys but never appears in user input). */ function bindingKey(callerKey, modelKey) { + if (isExperimentalEnabled('stickyBindByUserOnly')) { + return callerKey + '\0' + '*'; + } return callerKey + '\0' + (modelKey || '*'); } @@ -97,13 +103,16 @@ export function isStickyEnabled() { * @returns {{ accountId: string, apiKey: string } | null} */ export function getStickyBinding(callerKey, modelKey = '') { - if (!ENABLED || !callerKey) return null; + log.info('[sticky] ENTER callerKey=%s model=%s enabled=%s', (callerKey || '(none)').slice(0, 50), modelKey, ENABLED); + if (!ENABLED) return null; + if (!callerKey) { log.info('[sticky] SKIP (no callerKey) model=%s', modelKey); return null; } ensureCleanupTimer(); const key = bindingKey(callerKey, modelKey); const binding = _bindings.get(key); if (!binding) { _stats.misses++; + log.info('[sticky] MISS key=%s model=%s', key, modelKey); return null; } @@ -116,6 +125,7 @@ export function getStickyBinding(callerKey, modelKey = '') { binding.lastAccess = now; _stats.hits++; + log.info('[sticky] HIT key=%s account=%s', key, binding.accountId); return { accountId: binding.accountId, apiKey: binding.apiKey }; } @@ -158,7 +168,10 @@ export function setStickyBinding(callerKey, modelKey, accountId, apiKey) { lastAccess: now, }); - if (!existing) _stats.creates++; + if (!existing) { + _stats.creates++; + log.info('[sticky] SET key=%s account=%s', key, accountId); + } } /** @@ -170,7 +183,9 @@ export function setStickyBinding(callerKey, modelKey, accountId, apiKey) { */ export function clearStickyBinding(callerKey, modelKey = '') { if (!ENABLED || !callerKey) return; - _bindings.delete(bindingKey(callerKey, modelKey)); + const key = bindingKey(callerKey, modelKey); + if (_bindings.has(key)) log.info('[sticky] CLEAR key=%s', key); + _bindings.delete(key); } /** diff --git a/src/auth.js b/src/auth.js index 8fdfeaa..8e2e5d1 100644 --- a/src/auth.js +++ b/src/auth.js @@ -12,6 +12,7 @@ import { createHash, randomUUID, timingSafeEqual } from 'crypto'; import { isStickyEnabled, getStickyBinding, setStickyBinding, clearStickyBinding } from './account/sticky-session.js'; +import { isExperimentalEnabled } from './runtime-config.js'; import { readFileSync, writeFileSync, existsSync, renameSync, unlinkSync, readdirSync } from 'fs'; import { config, log } from './config.js'; import { getEffectiveProxy } from './dashboard/proxy-config.js'; @@ -630,6 +631,7 @@ export function getApiKey(excludeKeys = [], modelKey = null, callerKey = null) { // account so the cascade_id from the previous turn is still valid. // Falls through to normal selection if the bound account is unavailable. if (callerKey && isStickyEnabled()) { + log.info('[sticky] CHECK callerKey=%s model=%s enabled=%s', (callerKey || '(none)').slice(0, 50), modelKey || '(none)', isStickyEnabled()); const bound = getStickyBinding(callerKey, modelKey); if (bound) { const acct = accounts.find(a => a.id === bound.accountId && a.status === 'active' && a.apiKey === bound.apiKey); @@ -653,10 +655,17 @@ export function getApiKey(excludeKeys = [], modelKey = null, callerKey = null) { } } } - // Bound account is no longer usable — clear it so the next call - // falls through to normal selection instead of looping. + // Bound account is no longer usable + if (isExperimentalEnabled('stickyNoFallback')) { + log.info('[sticky] NO-FALLBACK callerKey=%s model=%s — bound account unavailable, refusing to rotate', + (callerKey || '').slice(0, 50), modelKey || '(none)'); + return null; + } + // Clear it so the next call falls through to normal selection instead of looping. clearStickyBinding(callerKey, modelKey); } + } else { + log.info('[sticky] SKIP-CHECK callerKey=%s enabled=%s', (callerKey ? callerKey.slice(0, 30) : String(callerKey)), isStickyEnabled()); } const candidates = []; @@ -699,6 +708,49 @@ export function getApiKey(excludeKeys = [], modelKey = null, callerKey = null) { return (x.account.lastUsed || 0) - (y.account.lastUsed || 0); }); + // ── Tiebreaker: user-aware account sharding ───────────────────── + // + // Level 1 — strict pinning (stickyBindByUserOnly + stickyNoFallback): + // When both flags are on the user wants per-user account isolation + // with zero cross-contamination. Skip all health-metric comparison + // and deterministically pin each caller to a fixed account slot from + // the very first request, regardless of quota / RPM / tier + // differences. Once pinned, stickyNoFallback prevents the request + // from ever rotating to another account. + // + // Level 2 — soft sharding (the two flags are NOT both on): + // Only re-shard candidates when the top two are genuinely tied on + // every health metric. This avoids overriding legitimate + // load-balancing when one account is clearly healthier. + if (callerKey && candidates.length > 1) { + const strictPin = isExperimentalEnabled('stickyBindByUserOnly') && isExperimentalEnabled('stickyNoFallback'); + let doShard = false; + if (strictPin) { + doShard = true; + } else { + const first = candidates[0]; + const second = candidates[1]; + const ix0 = first.account._inflight || 0; + const iy0 = second.account._inflight || 0; + const qx0 = Math.floor(quotaScore(first.account) / 5); + const qy0 = Math.floor(quotaScore(second.account) / 5); + const rx0 = (first.limit - first.used) / first.limit || 0; + const ry0 = (second.limit - second.used) / second.limit || 0; + doShard = + ix0 === iy0 && qx0 === qy0 && rx0 === ry0 && + (first.account.lastUsed || 0) === (second.account.lastUsed || 0); + } + if (doShard) { + const hash = createHash('sha256').update(callerKey).digest(); + const bucket = hash.readUInt32BE(0) % candidates.length; + if (bucket > 0) { + const chosen = candidates[bucket]; + candidates[bucket] = candidates[0]; + candidates[0] = chosen; + } + } + } + const { account } = candidates[0]; const reservationTimestamp = nextReservationToken(now); account._rpmHistory.push(reservationTimestamp); diff --git a/src/caller-key.js b/src/caller-key.js index 61d3b32..0869e99 100644 --- a/src/caller-key.js +++ b/src/caller-key.js @@ -1,4 +1,5 @@ import { createHash } from 'crypto'; +import { log } from './config.js'; function sha256Hex(value) { return createHash('sha256').update(String(value || '')).digest('hex'); @@ -19,8 +20,8 @@ function sha256Hex(value) { // pinned to (apiKey, user/session). Returns '' when no usable signal. export function extractBodyCallerSubKey(body) { if (!body || typeof body !== 'object') return ''; + if (typeof body.user === 'string') return sha256Hex(body.user).slice(0, 16); const candidates = [ - typeof body.user === 'string' ? body.user : '', typeof body?.metadata?.conversation_id === 'string' ? body.metadata.conversation_id : '', typeof body.conversation === 'string' ? body.conversation : '', typeof body.previous_response_id === 'string' ? body.previous_response_id : '', @@ -67,6 +68,8 @@ function ipUaFingerprint(req) { export function callerKeyFromRequest(req, apiKey = '', body = null) { const bodySubKey = body ? extractBodyCallerSubKey(body) : ''; + const hasUserInBody = !!(body && typeof body.user === 'string'); + log.info('[caller-key] body.user=%s subKey=%s', hasUserInBody ? body.user : '(none)', bodySubKey || '(none)'); if (apiKey) { const base = `api:${sha256Hex(apiKey).slice(0, 32)}`; if (bodySubKey) return `${base}:user:${bodySubKey}`; diff --git a/src/dashboard/index.html b/src/dashboard/index.html index 0324077..99a44f4 100644 --- a/src/dashboard/index.html +++ b/src/dashboard/index.html @@ -2248,6 +2248,42 @@

实验性功能

+
+
+
粘性会话按用户绑定
+
+
+
开启后,同一 user 无论请求什么模型都会固定绑定到同一个上游账号。关闭时按 (user, model) 二元组分别绑定(不同模型可能分配不同账号,适合账号有模型限制的场景)。需启用 STICKY_SESSION_ENABLED=1 才生效。
+
+ +
+
忽略模型维度
+
默认关闭。开启后 user_a 下的 opus/haiku/gemini 全部共享同一个绑定。
+
+
+
+
+
+
+
粘性会话禁止回退
+
+
+
开启后,粘性会话绑定的账号如果遇到限流/错误/模型不可用,不会自动切换到账号池中的其他账号,而是直接将错误返回给客户端。这样可以确保每个 user 严格只消耗自己绑定账号的额度。需启用 STICKY_SESSION_ENABLED=1 才生效。
+
+ +
+
禁止账号回退
+
默认关闭。开启后绑定账号失败时不会切换到其他账号消耗额度。
+
+
+
+
@@ -4059,7 +4095,7 @@

控制台登录

// Use fetch + blob so we can pass the X-Dashboard-Password header // (raw can't set headers). const resp = await fetch(url, { - headers: { 'X-Dashboard-Password': sessionStorage.getItem('dashboard_password') || '' }, + headers: { 'X-Dashboard-Password': this.password || '' }, }); if (!resp.ok) { this.toast(I18n.t('toast.exportFailed') + ': HTTP ' + resp.status, 'error'); @@ -5367,6 +5403,10 @@

控制台登录

// v2.0.70 (#112 follow-up) — quiet-window auto-update toggle. const cbQw = document.getElementById('exp-quiet-window'); if (cbQw) cbQw.checked = !!d.flags?.autoUpdateQuietWindow; + const cbStickyByUser = document.getElementById('exp-sticky-by-user'); + if (cbStickyByUser) cbStickyByUser.checked = !!d.flags?.stickyBindByUserOnly; + const cbStickyNoFallback = document.getElementById('exp-sticky-no-fallback'); + if (cbStickyNoFallback) cbStickyNoFallback.checked = !!d.flags?.stickyNoFallback; this.loadQuietWindowStatus(); // Reflect current skin cookie in the dropdown so reload state is visible. const skinSel = document.getElementById('skin-select'); diff --git a/src/handlers/chat.js b/src/handlers/chat.js index a6a854f..e5fcae3 100644 --- a/src/handlers/chat.js +++ b/src/handlers/chat.js @@ -1147,6 +1147,10 @@ async function waitForAccount(tried, signal, maxWaitMs = QUEUE_MAX_WAIT_MS, mode while (!acct) { if (signal?.aborted) return null; if (Date.now() >= deadline) return null; + if (callerKey && isStickyEnabled() && isExperimentalEnabled('stickyNoFallback')) { + log.info('[sticky] NO-FALLBACK waitForAccount — bound account unavailable, failing immediately'); + return null; + } await new Promise(r => setTimeout(r, QUEUE_RETRY_MS)); acct = getApiKey(tried, modelKey, callerKey); } @@ -2023,11 +2027,19 @@ async function _handleChatCompletionsInner(body, context = {}) { }, }; } + if (acct?._sticky && isExperimentalEnabled('stickyNoFallback')) { + log.warn(`Account ${acct.email} (sticky-bound) rate-limited on ${displayModel}, stickyNoFallback enabled — not trying other accounts`); + break; + } log.warn(`Account ${acct.email} rate-limited on ${displayModel}, trying next account`); continue; } // Cascade transient 错误通常是上游或本地 LS 短暂抖动,先退避再切账号,避免连续打爆同一热窗口。 if (errType === 'upstream_internal_error' || errType === 'upstream_transient_error') { + if (acct?._sticky && isExperimentalEnabled('stickyNoFallback')) { + log.warn(`Chat[${reqId}]: ${acct.email} (sticky-bound) upstream transient error, stickyNoFallback enabled — not trying other accounts`); + break; + } internalCount++; const backoffMs = await internalErrorBackoff(internalCount - 1); log.warn(`Chat[${reqId}]: ${acct.email} upstream transient error, waited ${backoffMs}ms before next account`); @@ -2035,6 +2047,10 @@ async function _handleChatCompletionsInner(body, context = {}) { } // Model not available on this account (permission_denied, etc.) if (errType === 'model_not_available') { + if (acct?._sticky && isExperimentalEnabled('stickyNoFallback')) { + log.warn(`Account ${acct.email} (sticky-bound) cannot serve ${displayModel}, stickyNoFallback enabled — not trying other accounts`); + break; + } log.warn(`Account ${acct.email} cannot serve ${displayModel}, trying next account`); continue; } @@ -2923,7 +2939,7 @@ function streamResponse(id, created, model, modelKey, provider, messages, cascad } } if (!acct) { - acct = await waitForAccountFn(tried, abortController.signal, QUEUE_MAX_WAIT_MS, modelKey); + acct = await waitForAccountFn(tried, abortController.signal, QUEUE_MAX_WAIT_MS, modelKey, callerKey); if (!acct) { // Without an explicit lastErr here, the final retry-failed log // ends up printing an empty message and the SSE error event @@ -3304,6 +3320,11 @@ function streamResponse(id, created, model, modelKey, provider, messages, cascad } // Retry only if nothing has been streamed yet AND it's a retryable error if (!hadSuccess && (err.isModelError || isRateLimit)) { + if (acct?._sticky && isExperimentalEnabled('stickyNoFallback')) { + const tag = isRateLimit ? 'rate_limit' : isTransient ? 'upstream_transient' : 'model_error'; + log.warn(`Account ${acct.email} (sticky-bound) failed (${tag}) on ${model}, stickyNoFallback enabled — not trying other accounts`); + break; + } const tag = isRateLimit ? 'rate_limit' : isTransient ? 'upstream_transient' : 'model_error'; if (isTransient) { streamInternalCount++; diff --git a/src/handlers/messages.js b/src/handlers/messages.js index cea5ef8..acfd18a 100644 --- a/src/handlers/messages.js +++ b/src/handlers/messages.js @@ -674,7 +674,8 @@ export async function handleMessages(body, context = {}) { // sharing one API key. Bare API-key callers and other client SDKs that // do not send metadata.user_id keep the original callerKey unchanged. const subKey = extractCallerSubKey(body); - const effectiveContext = subKey + const alreadyUserScoped = context.callerKey && context.callerKey.includes(':user:'); + const effectiveContext = (subKey && !alreadyUserScoped) ? { ...context, callerKey: `${context.callerKey || ''}:user:${subKey}` } : context; diff --git a/src/runtime-config.js b/src/runtime-config.js index 010f843..7517e19 100644 --- a/src/runtime-config.js +++ b/src/runtime-config.js @@ -41,6 +41,18 @@ const DEFAULTS = { // the container). Default OFF — only useful for self-hosted boxes // that mount /var/run/docker.sock and run via docker-compose. autoUpdateQuietWindow: false, + // When enabled with STICKY_SESSION_ENABLED=1, the sticky session + // binding ignores the model dimension — a user gets the same + // upstream account regardless of which model they request. + // Default OFF to preserve per-model isolation (avoids routing + // requests through an account that may not entitle that model). + stickyBindByUserOnly: false, + // When enabled, a sticky-bound account that fails (rate_limit, + // upstream_error, model_not_available) does NOT trigger account + // rotation. The request fails back to the client immediately + // instead of burning through other accounts in the pool. + // Requires STICKY_SESSION_ENABLED=1. Default OFF. + stickyNoFallback: false, }, // v2.0.67 (#112) — Tunables for the quiet-window auto-updater. // Not under `experimental` because they're not boolean flags.