Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions packages/core/src/core/tokenLimits.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { describe, it, expect } from 'vitest';
import {
normalize,
tokenLimit,
knownTokenLimit,
DEFAULT_TOKEN_LIMIT,
DEFAULT_OUTPUT_TOKEN_LIMIT,
} from './tokenLimits.js';
Expand Down Expand Up @@ -234,6 +235,21 @@ describe('tokenLimit', () => {
});
});

describe('knownTokenLimit', () => {
it('returns a limit for known input models', () => {
expect(knownTokenLimit('qwen3-max')).toBe(262144);
expect(knownTokenLimit('gpt-5')).toBe(272000);
});

it('returns a limit for known output models', () => {
expect(knownTokenLimit('qwen3-max', 'output')).toBe(32768);
});

it('returns undefined for unknown models instead of the default fallback', () => {
expect(knownTokenLimit('unknown-model-v1.0')).toBeUndefined();
});
});

describe('tokenLimit with output type', () => {
describe('latest models output limits', () => {
it('should return correct output limits for GPT-5.x', () => {
Expand Down
40 changes: 27 additions & 13 deletions packages/core/src/core/tokenLimits.ts
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,22 @@ const OUTPUT_PATTERNS: Array<[RegExp, TokenCount]> = [
[/^kimi-k2\.5/, LIMITS['32k']],
];

function findTokenLimit(
model: Model,
type: TokenLimitType = 'input',
): TokenCount | undefined {
const norm = normalize(model);
const patterns = type === 'output' ? OUTPUT_PATTERNS : PATTERNS;

for (const [regex, limit] of patterns) {
if (regex.test(norm)) {
return limit;
}
}

return undefined;
}

/**
* Check if a model has an explicitly defined output token limit.
* This distinguishes between models with known limits in OUTPUT_PATTERNS
Expand All @@ -197,6 +213,13 @@ export function hasExplicitOutputLimit(model: Model): boolean {
return OUTPUT_PATTERNS.some(([regex]) => regex.test(norm));
}

export function knownTokenLimit(
model: Model,
type: TokenLimitType = 'input',
): TokenCount | undefined {
return findTokenLimit(model, type);
}

/**
* Return the token limit for a model string based on the specified type.
*
Expand All @@ -216,17 +239,8 @@ export function tokenLimit(
model: Model,
type: TokenLimitType = 'input',
): TokenCount {
const norm = normalize(model);

// Choose the appropriate patterns based on token type
const patterns = type === 'output' ? OUTPUT_PATTERNS : PATTERNS;

for (const [regex, limit] of patterns) {
if (regex.test(norm)) {
return limit;
}
}

// Return appropriate default based on token type
return type === 'output' ? DEFAULT_OUTPUT_TOKEN_LIMIT : DEFAULT_TOKEN_LIMIT;
return (
knownTokenLimit(model, type) ??
(type === 'output' ? DEFAULT_OUTPUT_TOKEN_LIMIT : DEFAULT_TOKEN_LIMIT)
);
}
52 changes: 52 additions & 0 deletions packages/vscode-ide-companion/src/utils/acpModelInfo.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,26 @@ describe('extractSessionModelState', () => {
// The function should still return a state with empty availableModels
expect(result?.availableModels).toHaveLength(0);
});

it('derives contextLimit for known models when the ACP payload omits it', () => {
const result = extractSessionModelState({
models: {
currentModelId: 'qwen3-max',
availableModels: [{ modelId: 'qwen3-max', name: 'Qwen3 Max' }],
},
});

expect(result).toEqual({
currentModelId: 'qwen3-max',
availableModels: [
{
modelId: 'qwen3-max',
name: 'Qwen3 Max',
_meta: { contextLimit: 262144 },
},
],
});
});
});

describe('extractModelInfoFromNewSessionResult', () => {
Expand Down Expand Up @@ -205,4 +225,36 @@ describe('extractModelInfoFromNewSessionResult', () => {
expect(extractModelInfoFromNewSessionResult({})).toBeNull();
expect(extractModelInfoFromNewSessionResult(null)).toBeNull();
});

it('derives contextLimit for known models when the payload has null metadata', () => {
expect(
extractModelInfoFromNewSessionResult({
model: {
name: 'Qwen3 Max',
modelId: 'qwen3-max',
_meta: null,
},
}),
).toEqual({
name: 'Qwen3 Max',
modelId: 'qwen3-max',
_meta: { contextLimit: 262144 },
});
});

it('preserves null contextLimit for unknown models', () => {
expect(
extractModelInfoFromNewSessionResult({
model: {
name: 'Unknown',
modelId: 'unknown-model-v1.0',
_meta: { contextLimit: null },
},
}),
).toEqual({
name: 'Unknown',
modelId: 'unknown-model-v1.0',
_meta: { contextLimit: null },
});
});
});
27 changes: 26 additions & 1 deletion packages/vscode-ide-companion/src/utils/acpModelInfo.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
*/

import type { ModelInfo } from '@agentclientprotocol/sdk';
import { knownTokenLimit } from '@qwen-code/qwen-code-core/src/core/tokenLimits.js';
import type { ApprovalModeValue } from '../types/approvalModeValueTypes.js';

type AcpMeta = Record<string, unknown>;
Expand All @@ -19,6 +20,15 @@ const asMeta = (value: unknown): AcpMeta | null | undefined => {
return undefined;
};

const getContextLimitFromMeta = (
meta: AcpMeta | null | undefined,
): number | null | undefined => {
const metaLimit = meta?.['contextLimit'];
return typeof metaLimit === 'number' || metaLimit === null
? metaLimit
: undefined;
};

const normalizeModelInfo = (value: unknown): ModelInfo | null => {
if (!value || typeof value !== 'object') {
return null;
Expand Down Expand Up @@ -48,10 +58,25 @@ const normalizeModelInfo = (value: unknown): ModelInfo | null => {

// Back-compat: older implementations used `contextLimit` at the top-level.
const legacyContextLimit = obj['contextLimit'];
const contextLimit =
const legacyLimit =
typeof legacyContextLimit === 'number' || legacyContextLimit === null
? legacyContextLimit
: undefined;
const metaLimit = getContextLimitFromMeta(metaFromWire);
const derivedLimit = knownTokenLimit(modelId || name);

// Priority: legacy numeric > meta numeric > derived from known model > explicit null > undefined.
// An explicit `null` from the server means "limit intentionally unknown"; `undefined` means "not provided".
const contextLimit =
typeof legacyLimit === 'number'
? legacyLimit
: typeof metaLimit === 'number'
? metaLimit
: typeof derivedLimit === 'number'
? derivedLimit
: legacyLimit === null || metaLimit === null
? null
: undefined;

let mergedMeta: AcpMeta | null | undefined = metaFromWire;
if (typeof contextLimit !== 'undefined') {
Expand Down
52 changes: 2 additions & 50 deletions packages/vscode-ide-companion/src/webview/App.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,8 @@ import type { ApprovalModeValue } from '../types/approvalModeValueTypes.js';
import type { PlanEntry, UsageStatsPayload } from '../types/chatTypes.js';
import type { ModelInfo, AvailableCommand } from '@agentclientprotocol/sdk';
import type { Question } from '../types/acpTypes.js';
import {
DEFAULT_TOKEN_LIMIT,
tokenLimit,
} from '@qwen-code/qwen-code-core/src/core/tokenLimits.js';
import { useImagePaste, type WebViewImageMessage } from './hooks/useImage.js';
import { computeContextUsage } from './utils/contextUsage.js';

export const App: React.FC = () => {
const vscode = useVSCode();
Expand Down Expand Up @@ -208,52 +205,7 @@ export const App: React.FC = () => {

const completion = useCompletionTrigger(inputFieldRef, getCompletionItems);

const contextUsage = useMemo(() => {
if (!usageStats && !modelInfo) {
return null;
}

const modelName =
modelInfo?.modelId && typeof modelInfo.modelId === 'string'
? modelInfo.modelId
: modelInfo?.name && typeof modelInfo.name === 'string'
? modelInfo.name
: undefined;

// Note: In the webview context, the contextWindowSize is already reflected in
// modelInfo._meta.contextLimit which is computed on the extension side with the proper config.
// We only use tokenLimit as a fallback if metaLimit is not available.
const derivedLimit =
modelName && modelName.length > 0
? tokenLimit(modelName, 'input')
: undefined;

const metaLimitRaw = modelInfo?._meta?.['contextLimit'];
const metaLimit =
typeof metaLimitRaw === 'number' || metaLimitRaw === null
? metaLimitRaw
: undefined;

const limit =
usageStats?.tokenLimit ??
metaLimit ??
derivedLimit ??
DEFAULT_TOKEN_LIMIT;

const used = usageStats?.usage?.promptTokens ?? 0;
if (typeof limit !== 'number' || limit <= 0 || used < 0) {
return null;
}
const percentLeft = Math.max(
0,
Math.min(100, Math.round(((limit - used) / limit) * 100)),
);
return {
percentLeft,
usedTokens: used,
tokenLimit: limit,
};
}, [usageStats, modelInfo]);
const contextUsage = useMemo(() => computeContextUsage(usageStats, modelInfo), [usageStats, modelInfo]);

// Track a lightweight signature of workspace files to detect content changes even when length is unchanged
const workspaceFilesSignature = useMemo(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
/**
* @license
* Copyright 2025 Qwen Team
* SPDX-License-Identifier: Apache-2.0
*/

import { describe, expect, it } from 'vitest';
import { computeContextUsage } from './contextUsage.js';

describe('computeContextUsage', () => {
it('returns null when there is no trusted token limit', () => {
expect(
computeContextUsage(
{
usage: {
promptTokens: 1234,
},
},
{
modelId: 'unknown-model',
name: 'Unknown Model',
},
),
).toBeNull();
});

it('prefers usageStats.tokenLimit over model metadata', () => {
expect(
computeContextUsage(
{
usage: {
promptTokens: 1000,
},
tokenLimit: 4000,
},
{
modelId: 'qwen3-max',
name: 'Qwen3 Max',
_meta: { contextLimit: 8000 },
},
),
).toEqual({
percentLeft: 75,
usedTokens: 1000,
tokenLimit: 4000,
});
});

it('falls back to model metadata when usageStats does not include a limit', () => {
expect(
computeContextUsage(
{
usage: {
promptTokens: 2000,
},
},
{
modelId: 'qwen3-max',
name: 'Qwen3 Max',
_meta: { contextLimit: 8000 },
},
),
).toEqual({
percentLeft: 75,
usedTokens: 2000,
tokenLimit: 8000,
});
});

it('uses inputTokens when promptTokens is unavailable', () => {
expect(
computeContextUsage(
{
usage: {
inputTokens: 3000,
},
tokenLimit: 12000,
},
null,
),
).toEqual({
percentLeft: 75,
usedTokens: 3000,
tokenLimit: 12000,
});
});
});
Loading