Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,16 @@ import {
import { OpenAILogger } from '../../utils/openaiLogger.js';
import type OpenAI from 'openai';

vi.mock('../../telemetry/loggers.js', () => ({
logApiRequest: vi.fn(),
logApiResponse: vi.fn(),
logApiError: vi.fn(),
}));
vi.mock('../../telemetry/loggers.js', async (importOriginal) => {
const actual =
await importOriginal<typeof import('../../telemetry/loggers.js')>();
return {
...actual,
logApiRequest: vi.fn(),
logApiResponse: vi.fn(),
logApiError: vi.fn(),
};
});

vi.mock('../../utils/openaiLogger.js', () => ({
OpenAILogger: vi.fn().mockImplementation(() => ({
Expand Down Expand Up @@ -474,4 +479,45 @@ describe('LoggingContentGenerator', () => {
},
]);
});

it.each(['prompt_suggestion', 'forked_query'])(
'skips logApiRequest and OpenAI logging for internal promptId %s (generateContent)',
async (promptId) => {
const mockResponse = {
responseId: 'internal-resp',
modelVersion: 'test-model',
candidates: [{ content: { parts: [{ text: 'suggestion' }] } }],
usageMetadata: { promptTokenCount: 10, candidatesTokenCount: 5 },
} as unknown as GenerateContentResponse;

const mockWrapped = {
generateContent: vi.fn().mockResolvedValue(mockResponse),
generateContentStream: vi.fn(),
} as unknown as ContentGenerator;

const gen = new LoggingContentGenerator(mockWrapped, createConfig(), {
model: 'test-model',
enableOpenAILogging: true,
openAILoggingDir: '/tmp/test-logs',
});

const request = {
model: 'test-model',
contents: [{ role: 'user', parts: [{ text: 'test' }] }],
} as unknown as GenerateContentParameters;

await gen.generateContent(request, promptId);

// logApiRequest should NOT be called for internal prompts
expect(logApiRequest).not.toHaveBeenCalled();
// logApiResponse SHOULD be called (for /stats token tracking)
expect(logApiResponse).toHaveBeenCalled();
// OpenAI logger should be constructed, but no interaction should be logged
expect(OpenAILogger).toHaveBeenCalled();
const loggerInstance = (
OpenAILogger as unknown as ReturnType<typeof vi.fn>
).mock.results[0]?.value;
expect(loggerInstance.logInteraction).not.toHaveBeenCalled();
},
);
});
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import {
logApiRequest,
logApiResponse,
} from '../../telemetry/loggers.js';
import { isInternalPromptId } from '../../utils/internalPromptIds.js';
import type {
ContentGenerator,
ContentGeneratorConfig,
Expand Down Expand Up @@ -143,8 +144,17 @@ export class LoggingContentGenerator implements ContentGenerator {
userPromptId: string,
): Promise<GenerateContentResponse> {
const startTime = Date.now();
this.logApiRequest(this.toContents(req.contents), req.model, userPromptId);
const openaiRequest = await this.buildOpenAIRequestForLogging(req);
const isInternal = isInternalPromptId(userPromptId);
if (!isInternal) {
this.logApiRequest(
this.toContents(req.contents),
req.model,
userPromptId,
);
}
const openaiRequest = isInternal
? undefined
: await this.buildOpenAIRequestForLogging(req);
try {
const response = await this.wrapped.generateContent(req, userPromptId);
const durationMs = Date.now() - startTime;
Expand All @@ -155,12 +165,16 @@ export class LoggingContentGenerator implements ContentGenerator {
userPromptId,
response.usageMetadata,
);
await this.logOpenAIInteraction(openaiRequest, response);
if (!isInternal) {
await this.logOpenAIInteraction(openaiRequest, response);
}
return response;
} catch (error) {
const durationMs = Date.now() - startTime;
this._logApiError('', durationMs, error, req.model, userPromptId);
await this.logOpenAIInteraction(openaiRequest, undefined, error);
if (!isInternal) {
await this.logOpenAIInteraction(openaiRequest, undefined, error);
}
throw error;
}
}
Expand All @@ -170,16 +184,27 @@ export class LoggingContentGenerator implements ContentGenerator {
userPromptId: string,
): Promise<AsyncGenerator<GenerateContentResponse>> {
const startTime = Date.now();
this.logApiRequest(this.toContents(req.contents), req.model, userPromptId);
const openaiRequest = await this.buildOpenAIRequestForLogging(req);
const isInternal = isInternalPromptId(userPromptId);
if (!isInternal) {
this.logApiRequest(
this.toContents(req.contents),
req.model,
userPromptId,
);
}
const openaiRequest = isInternal
? undefined
: await this.buildOpenAIRequestForLogging(req);

let stream: AsyncGenerator<GenerateContentResponse>;
try {
stream = await this.wrapped.generateContentStream(req, userPromptId);
} catch (error) {
const durationMs = Date.now() - startTime;
this._logApiError('', durationMs, error, req.model, userPromptId);
await this.logOpenAIInteraction(openaiRequest, undefined, error);
if (!isInternal) {
await this.logOpenAIInteraction(openaiRequest, undefined, error);
}
throw error;
}

Expand All @@ -199,12 +224,27 @@ export class LoggingContentGenerator implements ContentGenerator {
model: string,
openaiRequest?: OpenAI.Chat.ChatCompletionCreateParams,
): AsyncGenerator<GenerateContentResponse> {
const isInternal = isInternalPromptId(userPromptId);
// For internal prompts we only need the last usage metadata (for /stats);
// skip collecting full responses to avoid unnecessary memory overhead.
const responses: GenerateContentResponse[] = [];

// Track first-seen IDs so _logApiResponse/_logApiError have accurate
// values even when we skip collecting full responses for internal prompts.
let firstResponseId = '';
let firstModelVersion = '';
let lastUsageMetadata: GenerateContentResponseUsageMetadata | undefined;
try {
for await (const response of stream) {
responses.push(response);
if (!firstResponseId && response.responseId) {
firstResponseId = response.responseId;
}
if (!firstModelVersion && response.modelVersion) {
firstModelVersion = response.modelVersion;
}
if (!isInternal) {
responses.push(response);
}
if (response.usageMetadata) {
lastUsageMetadata = response.usageMetadata;
}
Expand All @@ -213,25 +253,29 @@ export class LoggingContentGenerator implements ContentGenerator {
// Only log successful API response if no error occurred
const durationMs = Date.now() - startTime;
this._logApiResponse(
responses[0]?.responseId ?? '',
firstResponseId,
durationMs,
responses[0]?.modelVersion || model,
firstModelVersion || model,
userPromptId,
lastUsageMetadata,
);
const consolidatedResponse =
this.consolidateGeminiResponsesForLogging(responses);
await this.logOpenAIInteraction(openaiRequest, consolidatedResponse);
if (!isInternal) {
const consolidatedResponse =
this.consolidateGeminiResponsesForLogging(responses);
await this.logOpenAIInteraction(openaiRequest, consolidatedResponse);
}
} catch (error) {
const durationMs = Date.now() - startTime;
this._logApiError(
responses[0]?.responseId ?? '',
firstResponseId,
durationMs,
error,
responses[0]?.modelVersion || model,
firstModelVersion || model,
userPromptId,
);
await this.logOpenAIInteraction(openaiRequest, undefined, error);
if (!isInternal) {
await this.logOpenAIInteraction(openaiRequest, undefined, error);
}
throw error;
}
}
Expand Down
27 changes: 20 additions & 7 deletions packages/core/src/followup/forkedQuery.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,15 @@
* Forked Query Infrastructure
*
* Enables cache-aware secondary LLM calls that share the main conversation's
* prompt prefix (systemInstruction + tools + history) for cache hits.
* prompt prefix (systemInstruction + history) for cache hits.
*
* DashScope already enables cache_control via X-DashScope-CacheControl header.
* By constructing the forked GeminiChat with identical generationConfig and
* history prefix, the fork automatically benefits from prefix caching.
*
* Note: `runForkedQuery` overrides `tools: []` at the per-request level so the
* model cannot produce function calls. `createForkedChat` retains the full
* generationConfig (including tools) for callers like speculation that need them.
*/

import type {
Expand All @@ -21,6 +25,9 @@ import type {
import { GeminiChat, StreamEventType } from '../core/geminiChat.js';
import type { Config } from '../config/config.js';

/** Per-request config that strips tools so the model never produces function calls. */
const NO_TOOLS: Readonly<Pick<GenerateContentConfig, 'tools'>> = { tools: [] };

/**
* Snapshot of the main conversation's cache-critical parameters.
* Captured after each successful main turn so forked queries share the same prefix.
Expand Down Expand Up @@ -111,9 +118,13 @@ export function clearCacheSafeParams(): void {
// ---------------------------------------------------------------------------

/**
* Create an isolated GeminiChat that shares the same cache prefix as the main
* conversation. The fork uses identical generationConfig (systemInstruction +
* tools) and history, so DashScope's cache_control mechanism produces cache hits.
* Create an isolated GeminiChat that shares the main conversation's
* generationConfig (including systemInstruction, tools, and history).
*
* The full config is retained so that callers like `runSpeculativeLoop`
* can execute tool calls during speculation. For pure-text callers like
* `runForkedQuery`, tools are stripped at the per-request level via
* `NO_TOOLS` — see {@link runForkedQuery}.
*
* The fork does NOT have chatRecordingService or telemetryService to avoid
* polluting the main session's recordings and token counts.
Expand Down Expand Up @@ -165,7 +176,7 @@ function extractUsage(

/**
* Run a forked query using a GeminiChat that shares the main conversation's
* cache prefix. This is a single-turn request (no tool execution loop).
* cache prefix. This is a single-turn, tool-free request (no function calls).
*
* @param config - App config
* @param userMessage - The user message to send (e.g., SUGGESTION_PROMPT)
Expand All @@ -191,8 +202,10 @@ export async function runForkedQuery(
const model = options?.model ?? params.model;
const chat = createForkedChat(config, params);

// Build per-request config overrides for JSON schema if needed
const requestConfig: GenerateContentConfig = {};
// Build per-request config overrides.
// NO_TOOLS prevents the model from producing function calls — forked
// queries are pure text completion and must not appear in tool-call UI.
const requestConfig: GenerateContentConfig = { ...NO_TOOLS };
if (options?.abortSignal) {
requestConfig.abortSignal = options.abortSignal;
}
Expand Down
97 changes: 97 additions & 0 deletions packages/core/src/telemetry/loggers.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ import {
logExtensionInstallEvent,
logExtensionUninstall,
logHookCall,
logApiError,
} from './loggers.js';
import * as metrics from './metrics.js';
import { QwenLogger } from './qwen-logger/qwen-logger.js';
Expand All @@ -77,6 +78,7 @@ import {
ExtensionInstallEvent,
ExtensionUninstallEvent,
HookCallEvent,
ApiErrorEvent,
} from './types.js';
import { FileOperation } from './metrics.js';
import type {
Expand Down Expand Up @@ -359,6 +361,101 @@ describe('loggers', () => {
});
});

describe('logApiResponse skips chatRecordingService for internal prompt IDs', () => {
it.each(['prompt_suggestion', 'forked_query'])(
'should not record to chatRecordingService when prompt_id is %s',
(promptId) => {
const mockRecordUiTelemetryEvent = vi.fn();
const configWithRecording = {
getSessionId: () => 'test-session-id',
getUsageStatisticsEnabled: () => false,
getChatRecordingService: () => ({
recordUiTelemetryEvent: mockRecordUiTelemetryEvent,
}),
} as unknown as Config;

const event = new ApiResponseEvent(
'resp-id',
'test-model',
50,
promptId,
);
logApiResponse(configWithRecording, event);

expect(mockRecordUiTelemetryEvent).not.toHaveBeenCalled();
expect(mockUiEvent.addEvent).toHaveBeenCalled();
},
);

it('should record to chatRecordingService for normal prompt IDs', () => {
const mockRecordUiTelemetryEvent = vi.fn();
const configWithRecording = {
getSessionId: () => 'test-session-id',
getUsageStatisticsEnabled: () => false,
getChatRecordingService: () => ({
recordUiTelemetryEvent: mockRecordUiTelemetryEvent,
}),
} as unknown as Config;

const event = new ApiResponseEvent(
'resp-id',
'test-model',
50,
'user_query',
);
logApiResponse(configWithRecording, event);

expect(mockRecordUiTelemetryEvent).toHaveBeenCalled();
});
});

describe('logApiError skips chatRecordingService for internal prompt IDs', () => {
it.each(['prompt_suggestion', 'forked_query'])(
'should not record to chatRecordingService when prompt_id is %s',
(promptId) => {
const mockRecordUiTelemetryEvent = vi.fn();
const configWithRecording = {
getSessionId: () => 'test-session-id',
getUsageStatisticsEnabled: () => false,
getChatRecordingService: () => ({
recordUiTelemetryEvent: mockRecordUiTelemetryEvent,
}),
} as unknown as Config;

const event = new ApiErrorEvent({
model: 'test-model',
durationMs: 100,
promptId,
errorMessage: 'test error',
});
logApiError(configWithRecording, event);

expect(mockRecordUiTelemetryEvent).not.toHaveBeenCalled();
},
);

it('should record to chatRecordingService for normal prompt IDs', () => {
const mockRecordUiTelemetryEvent = vi.fn();
const configWithRecording = {
getSessionId: () => 'test-session-id',
getUsageStatisticsEnabled: () => false,
getChatRecordingService: () => ({
recordUiTelemetryEvent: mockRecordUiTelemetryEvent,
}),
} as unknown as Config;

const event = new ApiErrorEvent({
model: 'test-model',
durationMs: 100,
promptId: 'user_query',
errorMessage: 'test error',
});
logApiError(configWithRecording, event);

expect(mockRecordUiTelemetryEvent).toHaveBeenCalled();
});
});

describe('logApiRequest', () => {
const mockConfig = {
getSessionId: () => 'test-session-id',
Expand Down
Loading
Loading