Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
14 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,4 @@ package-lock.json
*.png
*.pem
.docker-data/
PR-FLOW.md
166 changes: 166 additions & 0 deletions examples/proxy-user-inject.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
/**
* proxy-user-inject.js
* ====================
* A lightweight HTTP proxy that injects a `user` field into every chat
* completion request body before forwarding it to the upstream WindsurfAPI
* instance. When paired with sticky sessions, this ensures that multiple
* end users sharing a single WindsurfAPI deployment each get bound to a
* different upstream Windsurf account — no cross-user quota mixing.
*
* ── Problem ────────────────────────────────────────────────────────────
* Two developers share one WindsurfAPI on a VPS. Both access the same
* API_KEY via ccswitch / Cline / Claude Code, so from WindsurfAPI's
* perspective they look identical. Without a per-user signal:
* • STICKY_SESSION_ENABLED can't tell them apart
* • CASCADE_REUSE_BY_CALLER produces the same callerKey for both
* • They randomly compete for accounts in the pool
*
* ── Solution ───────────────────────────────────────────────────────────
* Each user points their client at a different proxy port (e.g. :3004
* for Alice, :3005 for Bob). The proxy injects `user: 'alice'` (or
* `user: 'bob'`) into the request body before forwarding to WindsurfAPI
* on 127.0.0.1:3003.
*
* WindsurfAPI's caller-key.js then builds the callerKey as:
* api:<apiKeyHash>:user:<body.user hash>
*
* This produces two distinct, stable callerKeys → sticky sessions stay
* pinned to separate accounts → cascade reuse works independently.
*
* ── HTTP method handling ───────────────────────────────────────────────
* GET /v1/models (and other read endpoints) are transparently forwarded
* as-is. The `user` field is only injected into POST / PUT / PATCH
* request bodies.
*
* ── Prerequisites ──────────────────────────────────────────────────────
* • WindsurfAPI running on 127.0.0.1:3003 with .env containing:
* STICKY_SESSION_ENABLED=1
* CASCADE_REUSE_BY_CALLER=1
* • (Recommended) Independent LS instances per user via tinyproxy:
* - tinyproxy on :8080 → LS on :42101 (Alice)
* - tinyproxy on :9090 → LS on :42102 (Bob)
* On the Dashboard, assign 127.0.0.1:8080 / 127.0.0.1:9090 as account
* proxy configs to separate LS pools.
*
* ── Usage ──────────────────────────────────────────────────────────────
* 1. Copy this file for each user and change the port + user value:
* $ cp proxy-user-inject.js proxy-alice.js → port 3004, user 'alice'
* $ cp proxy-user-inject.js proxy-bob.js → port 3005, user 'bob'
*
* 2. Start each proxy (directly via Node.js or as a systemd service):
* $ node proxy-alice.js &
* $ node proxy-bob.js &
*
* 3. Point each user's client to their dedicated port:
* Alice → http://<SERVER_IP>:3004/v1
* Bob → http://<SERVER_IP>:3005/v1
*
* ── systemd Service Example ────────────────────────────────────────────
* Create /etc/systemd/system/windsurf-proxy-alice.service:
*
* [Unit]
* Description=WindsurfAPI user-inject proxy (Alice)
* After=network.target
*
* [Service]
* Type=simple
* ExecStart=/usr/bin/node /opt/windsurf/proxy-alice.js
* Restart=always
* RestartSec=5
* User=nobody
* WorkingDirectory=/opt/windsurf
*
* [Install]
* WantedBy=multi-user.target
*
* Then:
* $ sudo systemctl daemon-reload
* $ sudo systemctl enable --now windsurf-proxy-alice
*
* ── Verification ───────────────────────────────────────────────────────
* After setup, send two consecutive requests from the same proxy port:
*
* $ curl -s -X POST http://<SERVER>:3004/v1/chat/completions \
* -H "Content-Type: application/json" \
* -H "Authorization: Bearer YOUR_API_KEY" \
* -d '{"model":"claude-sonnet-4-6","messages":[{"role":"user","content":"hi"}],"max_tokens":50}'
*
* Check server logs: both requests should hit the same upstream account
* with reuse=1 on the second request:
*
* account=same@email.com ls=42101 turns=N reuse=1
* account=same@email.com ls=42101 turns=N+1 reuse=1
*
* And the other proxy port should land on a different account, confirming
* isolation.
*
* ── Security Note ──────────────────────────────────────────────────────
* These proxies listen on 0.0.0.0 by default so remote clients can reach
* them. If all clients are on the same machine as the server, change the
* listen address to 127.0.0.1. If exposed to the internet, consider
* putting them behind nginx with TLS or using a firewall to restrict
* source IPs.
*/

'use strict';

const http = require('http');

// ═══════════════ CONFIGURE THESE ═══════════════
const LISTEN_PORT = 3004; // External port clients connect to
const LISTEN_ADDR = '0.0.0.0'; // '127.0.0.1' for local-only
const USER_ID = 'alice'; // Injected as body.user (per-user unique)
const UPSTREAM_HOST = '127.0.0.1'; // WindsurfAPI host
const UPSTREAM_PORT = 3003; // WindsurfAPI port
// ════════════════════════════════════════════════

http.createServer((req, res) => {
let body = '';
req.on('data', chunk => body += chunk);
req.on('end', () => {
try {
const hasBody = (req.method === 'POST' || req.method === 'PUT' || req.method === 'PATCH');
const json = hasBody ? JSON.parse(body || '{}') : {};
let postData = null;

const headers = {
'Authorization': req.headers['authorization'] || '',
'Host': `${UPSTREAM_HOST}:${UPSTREAM_PORT}`,
};

if (hasBody) {
json.user = USER_ID;
postData = JSON.stringify(json);
headers['Content-Type'] = 'application/json';
headers['Content-Length'] = Buffer.byteLength(postData);
}

const opts = {
hostname: UPSTREAM_HOST,
port: UPSTREAM_PORT,
path: req.url,
method: req.method,
headers,
};

const upstream = http.request(opts, (proxyRes) => {
res.writeHead(proxyRes.statusCode, proxyRes.headers);
proxyRes.on('data', chunk => res.write(chunk));
proxyRes.on('end', () => res.end());
});

upstream.on('error', (err) => {
res.writeHead(502, { 'Content-Type': 'application/json' });
res.end(JSON.stringify({ error: { message: 'Upstream error: ' + err.message } }));
});

if (postData) upstream.write(postData);
upstream.end();
} catch (err) {
res.writeHead(400, { 'Content-Type': 'application/json' });
res.end(JSON.stringify({ error: { message: 'Parse error: ' + err.message } }));
}
});
}).listen(LISTEN_PORT, LISTEN_ADDR, () => {
console.log(`[proxy-user-inject] user="${USER_ID}" listening on ${LISTEN_ADDR}:${LISTEN_PORT} → ${UPSTREAM_HOST}:${UPSTREAM_PORT}`);
});
21 changes: 18 additions & 3 deletions src/account/sticky-session.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@
* Related issues: #93, #133 (context loss mid-task)
*/

import { isExperimentalEnabled } from '../runtime-config.js';
import { log } from '../config.js';

const ENABLED = process.env.STICKY_SESSION_ENABLED === '1';

const TTL_MS = (() => {
Expand All @@ -59,6 +62,9 @@ const _stats = {
* Using \0 delimiter (valid in Map keys but never appears in user input).
*/
function bindingKey(callerKey, modelKey) {
if (isExperimentalEnabled('stickyBindByUserOnly')) {
return callerKey + '\0' + '*';
}
return callerKey + '\0' + (modelKey || '*');
}

Expand Down Expand Up @@ -97,13 +103,16 @@ export function isStickyEnabled() {
* @returns {{ accountId: string, apiKey: string } | null}
*/
export function getStickyBinding(callerKey, modelKey = '') {
if (!ENABLED || !callerKey) return null;
log.info('[sticky] ENTER callerKey=%s model=%s enabled=%s', (callerKey || '(none)').slice(0, 50), modelKey, ENABLED);
if (!ENABLED) return null;
if (!callerKey) { log.info('[sticky] SKIP (no callerKey) model=%s', modelKey); return null; }
ensureCleanupTimer();

const key = bindingKey(callerKey, modelKey);
const binding = _bindings.get(key);
if (!binding) {
_stats.misses++;
log.info('[sticky] MISS key=%s model=%s', key, modelKey);
return null;
}

Expand All @@ -116,6 +125,7 @@ export function getStickyBinding(callerKey, modelKey = '') {

binding.lastAccess = now;
_stats.hits++;
log.info('[sticky] HIT key=%s account=%s', key, binding.accountId);
return { accountId: binding.accountId, apiKey: binding.apiKey };
}

Expand Down Expand Up @@ -158,7 +168,10 @@ export function setStickyBinding(callerKey, modelKey, accountId, apiKey) {
lastAccess: now,
});

if (!existing) _stats.creates++;
if (!existing) {
_stats.creates++;
log.info('[sticky] SET key=%s account=%s', key, accountId);
}
}

/**
Expand All @@ -170,7 +183,9 @@ export function setStickyBinding(callerKey, modelKey, accountId, apiKey) {
*/
export function clearStickyBinding(callerKey, modelKey = '') {
if (!ENABLED || !callerKey) return;
_bindings.delete(bindingKey(callerKey, modelKey));
const key = bindingKey(callerKey, modelKey);
if (_bindings.has(key)) log.info('[sticky] CLEAR key=%s', key);
_bindings.delete(key);
}

/**
Expand Down
56 changes: 54 additions & 2 deletions src/auth.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

import { createHash, randomUUID, timingSafeEqual } from 'crypto';
import { isStickyEnabled, getStickyBinding, setStickyBinding, clearStickyBinding } from './account/sticky-session.js';
import { isExperimentalEnabled } from './runtime-config.js';
import { readFileSync, writeFileSync, existsSync, renameSync, unlinkSync, readdirSync } from 'fs';
import { config, log } from './config.js';
import { getEffectiveProxy } from './dashboard/proxy-config.js';
Expand Down Expand Up @@ -630,6 +631,7 @@ export function getApiKey(excludeKeys = [], modelKey = null, callerKey = null) {
// account so the cascade_id from the previous turn is still valid.
// Falls through to normal selection if the bound account is unavailable.
if (callerKey && isStickyEnabled()) {
log.info('[sticky] CHECK callerKey=%s model=%s enabled=%s', (callerKey || '(none)').slice(0, 50), modelKey || '(none)', isStickyEnabled());
const bound = getStickyBinding(callerKey, modelKey);
if (bound) {
const acct = accounts.find(a => a.id === bound.accountId && a.status === 'active' && a.apiKey === bound.apiKey);
Expand All @@ -653,10 +655,17 @@ export function getApiKey(excludeKeys = [], modelKey = null, callerKey = null) {
}
}
}
// Bound account is no longer usable — clear it so the next call
// falls through to normal selection instead of looping.
// Bound account is no longer usable
if (isExperimentalEnabled('stickyNoFallback')) {
log.info('[sticky] NO-FALLBACK callerKey=%s model=%s — bound account unavailable, refusing to rotate',
(callerKey || '').slice(0, 50), modelKey || '(none)');
return null;
}
// Clear it so the next call falls through to normal selection instead of looping.
clearStickyBinding(callerKey, modelKey);
}
} else {
log.info('[sticky] SKIP-CHECK callerKey=%s enabled=%s', (callerKey ? callerKey.slice(0, 30) : String(callerKey)), isStickyEnabled());
}

const candidates = [];
Expand Down Expand Up @@ -699,6 +708,49 @@ export function getApiKey(excludeKeys = [], modelKey = null, callerKey = null) {
return (x.account.lastUsed || 0) - (y.account.lastUsed || 0);
});

// ── Tiebreaker: user-aware account sharding ─────────────────────
//
// Level 1 — strict pinning (stickyBindByUserOnly + stickyNoFallback):
// When both flags are on the user wants per-user account isolation
// with zero cross-contamination. Skip all health-metric comparison
// and deterministically pin each caller to a fixed account slot from
// the very first request, regardless of quota / RPM / tier
// differences. Once pinned, stickyNoFallback prevents the request
// from ever rotating to another account.
//
// Level 2 — soft sharding (the two flags are NOT both on):
// Only re-shard candidates when the top two are genuinely tied on
// every health metric. This avoids overriding legitimate
// load-balancing when one account is clearly healthier.
if (callerKey && candidates.length > 1) {
const strictPin = isExperimentalEnabled('stickyBindByUserOnly') && isExperimentalEnabled('stickyNoFallback');
let doShard = false;
if (strictPin) {
doShard = true;
} else {
const first = candidates[0];
const second = candidates[1];
const ix0 = first.account._inflight || 0;
const iy0 = second.account._inflight || 0;
const qx0 = Math.floor(quotaScore(first.account) / 5);
const qy0 = Math.floor(quotaScore(second.account) / 5);
const rx0 = (first.limit - first.used) / first.limit || 0;
const ry0 = (second.limit - second.used) / second.limit || 0;
doShard =
ix0 === iy0 && qx0 === qy0 && rx0 === ry0 &&
(first.account.lastUsed || 0) === (second.account.lastUsed || 0);
}
if (doShard) {
const hash = createHash('sha256').update(callerKey).digest();
const bucket = hash.readUInt32BE(0) % candidates.length;
if (bucket > 0) {
const chosen = candidates[bucket];
candidates[bucket] = candidates[0];
candidates[0] = chosen;
}
}
}

const { account } = candidates[0];
const reservationTimestamp = nextReservationToken(now);
account._rpmHistory.push(reservationTimestamp);
Expand Down
5 changes: 4 additions & 1 deletion src/caller-key.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { createHash } from 'crypto';
import { log } from './config.js';

function sha256Hex(value) {
return createHash('sha256').update(String(value || '')).digest('hex');
Expand All @@ -19,8 +20,8 @@ function sha256Hex(value) {
// pinned to (apiKey, user/session). Returns '' when no usable signal.
export function extractBodyCallerSubKey(body) {
if (!body || typeof body !== 'object') return '';
if (typeof body.user === 'string') return sha256Hex(body.user).slice(0, 16);
const candidates = [
typeof body.user === 'string' ? body.user : '',
typeof body?.metadata?.conversation_id === 'string' ? body.metadata.conversation_id : '',
typeof body.conversation === 'string' ? body.conversation : '',
typeof body.previous_response_id === 'string' ? body.previous_response_id : '',
Expand Down Expand Up @@ -67,6 +68,8 @@ function ipUaFingerprint(req) {

export function callerKeyFromRequest(req, apiKey = '', body = null) {
const bodySubKey = body ? extractBodyCallerSubKey(body) : '';
const hasUserInBody = !!(body && typeof body.user === 'string');
log.info('[caller-key] body.user=%s subKey=%s', hasUserInBody ? body.user : '(none)', bodySubKey || '(none)');
if (apiKey) {
const base = `api:${sha256Hex(apiKey).slice(0, 32)}`;
if (bodySubKey) return `${base}:user:${bodySubKey}`;
Expand Down
Loading