Skip to content

Commit 1f56796

Browse files
committed
Feature: Implement V0 Episodic Context Manager
- Re-wrote the monolithic string-based context manipulation logic into an elegant, immutable Episodic IR pipeline. - Implemented four non-destructive degradation processors: `HistorySquashingProcessor`, `ToolMaskingProcessor`, `BlobDegradationProcessor`, and `SemanticCompressionProcessor`. - Added dynamic configuration knobs (`budget` and `strategies`) to precisely tune the retention algorithms. - Implemented a power-user `incrementalGc` flag for maximum context preservation beneath the ceiling. - Enforced strict typing across the new pipeline, replacing unsafe casts with robust mapping interfaces. - Added `powerUserProfile` to support features for those wanting a bit more quality at the cost of tokens.
1 parent 06173c0 commit 1f56796

39 files changed

+2612
-2315
lines changed

docs/cli/settings.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@ they appear in the UI.
165165
| Model Steering | `experimental.modelSteering` | Enable model steering (user hints) to guide the model during tool execution. | `false` |
166166
| Direct Web Fetch | `experimental.directWebFetch` | Enable web fetch behavior that bypasses LLM summarization. | `false` |
167167
| Memory Manager Agent | `experimental.memoryManager` | Replace the built-in save_memory tool with a memory manager subagent that supports adding, removing, de-duplicating, and organizing memories. | `false` |
168+
| Use the power user profile for massive contexts. | `experimental.powerUserProfile` | Enables continuous minimal GC near the max tokens limit instead of a blocked backbuffer. | `false` |
168169
| Use the generalist profile to manage agent contexts. | `experimental.generalistProfile` | Suitable for general coding and software development tasks. | `false` |
169170
| Enable Context Management | `experimental.contextManagement` | Enable logic for context management. | `false` |
170171
| Topic & Update Narration | `experimental.topicUpdateNarration` | Enable the experimental Topic & Update communication model for reduced chattiness and structured progress reporting. | `false` |

docs/reference/configuration.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1693,6 +1693,12 @@ their corresponding top-level category object in your `settings.json` file.
16931693
- **Default:** `false`
16941694
- **Requires restart:** Yes
16951695

1696+
- **`experimental.powerUserProfile`** (boolean):
1697+
- **Description:** Enables continuous minimal GC near the max tokens limit
1698+
instead of a blocked backbuffer.
1699+
- **Default:** `false`
1700+
- **Requires restart:** Yes
1701+
16961702
- **`experimental.generalistProfile`** (boolean):
16971703
- **Description:** Suitable for general coding and software development tasks.
16981704
- **Default:** `false`

packages/cli/src/config/config.test.ts

Lines changed: 13 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ import {
2121
type MCPServerConfig,
2222
type GeminiCLIExtension,
2323
Storage,
24-
generalistProfile,
24+
GENERALIST_PROFILE,
2525
type ContextManagementConfig,
2626
} from '@google/gemini-cli-core';
2727
import { loadCliConfig, parseArguments, type CliArgs } from './config.js';
@@ -2211,7 +2211,7 @@ describe('loadCliConfig context management', () => {
22112211
});
22122212
const config = await loadCliConfig(settings, 'test-session', argv);
22132213
expect(config.getContextManagementConfig()).toStrictEqual(
2214-
generalistProfile,
2214+
GENERALIST_PROFILE,
22152215
);
22162216
expect(config.isContextManagementEnabled()).toBe(true);
22172217
});
@@ -2220,24 +2220,19 @@ describe('loadCliConfig context management', () => {
22202220
process.argv = ['node', 'script.js'];
22212221
const argv = await parseArguments(createTestMergedSettings());
22222222
const contextManagementConfig: Partial<ContextManagementConfig> = {
2223-
historyWindow: {
2223+
budget: {
2224+
incrementalGc: false,
22242225
maxTokens: 100_000,
22252226
retainedTokens: 50_000,
2226-
},
2227-
messageLimits: {
2228-
normalMaxTokens: 1000,
2229-
retainedMaxTokens: 10_000,
2230-
normalizationHeadRatio: 0.25,
2231-
},
2232-
tools: {
2233-
distillation: {
2234-
maxOutputTokens: 10_000,
2235-
summarizationThresholdTokens: 15_000,
2236-
},
2237-
outputMasking: {
2238-
protectionThresholdTokens: 30_000,
2239-
minPrunableThresholdTokens: 10_000,
2240-
protectLatestTurn: false,
2227+
protectedEpisodes: 1,
2228+
protectSystemEpisode: true,
2229+
},
2230+
strategies: {
2231+
historySquashing: { maxTokensPerNode: 12000 },
2232+
toolMasking: { stringLengthThresholdTokens: 10000 },
2233+
semanticCompression: {
2234+
nodeThresholdTokens: 5000,
2235+
compressionModel: 'chat-compression-2.5-flash-lite',
22412236
},
22422237
},
22432238
};

packages/cli/src/config/config.ts

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,8 @@ import {
4646
type HookEventName,
4747
type OutputFormat,
4848
detectIdeFromEnv,
49-
generalistProfile,
49+
GENERALIST_PROFILE,
50+
POWER_USER_PROFILE,
5051
} from '@google/gemini-cli-core';
5152
import {
5253
type Settings,
@@ -886,12 +887,15 @@ export async function loadCliConfig(
886887

887888
const useGeneralistProfile =
888889
settings.experimental?.generalistProfile ?? false;
890+
const usePowerUserProfile =
891+
settings.experimental?.powerUserProfile ?? false;
889892
const useContextManagement =
890893
settings.experimental?.contextManagement ?? false;
891894
const contextManagement = {
892-
...(useGeneralistProfile ? generalistProfile : {}),
895+
...(useGeneralistProfile ? GENERALIST_PROFILE : {}),
896+
...(usePowerUserProfile ? POWER_USER_PROFILE : {}),
893897
...(useContextManagement ? settings?.contextManagement : {}),
894-
enabled: useContextManagement || useGeneralistProfile,
898+
enabled: useContextManagement || useGeneralistProfile || usePowerUserProfile,
895899
};
896900

897901
return new Config({

packages/cli/src/config/settingsSchema.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2149,6 +2149,15 @@ const SETTINGS_SCHEMA = {
21492149
'Replace the built-in save_memory tool with a memory manager subagent that supports adding, removing, de-duplicating, and organizing memories.',
21502150
showInDialog: true,
21512151
},
2152+
powerUserProfile: {
2153+
type: 'boolean',
2154+
label: 'Use the power user profile for massive contexts.',
2155+
category: 'Experimental',
2156+
requiresRestart: true,
2157+
default: false,
2158+
description: 'Enables continuous minimal GC near the max tokens limit instead of a blocked backbuffer.',
2159+
showInDialog: true,
2160+
},
21522161
generalistProfile: {
21532162
type: 'boolean',
21542163
label: 'Use the generalist profile to manage agent contexts.',

packages/core/src/config/config.ts

Lines changed: 26 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,7 @@ import { inspect } from 'node:util';
1111
import process from 'node:process';
1212
import { z } from 'zod';
1313
import type { ConversationRecord } from '../services/chatRecordingService.js';
14-
import type {
15-
AgentHistoryProviderConfig,
16-
ContextManagementConfig,
17-
ToolOutputMaskingConfig,
18-
} from '../context/types.js';
14+
import type { ContextManagementConfig } from '../context/types.js';
1915
export type { ConversationRecord };
2016
import {
2117
AuthType,
@@ -436,12 +432,6 @@ import {
436432
DEFAULT_MEMORY_FILE_FILTERING_OPTIONS,
437433
type FileFilteringOptions,
438434
} from './constants.js';
439-
import {
440-
DEFAULT_TOOL_PROTECTION_THRESHOLD,
441-
DEFAULT_MIN_PRUNABLE_TOKENS_THRESHOLD,
442-
DEFAULT_PROTECT_LATEST_TURN,
443-
} from '../context/toolOutputMaskingService.js';
444-
445435
import {
446436
type ExtensionLoader,
447437
SimpleExtensionLoader,
@@ -1139,40 +1129,34 @@ export class Config implements McpContext, AgentLoopContext {
11391129
this.memoryBoundaryMarkers = params.memoryBoundaryMarkers ?? ['.git'];
11401130
this.contextManagement = {
11411131
enabled: params.contextManagement?.enabled ?? false,
1142-
historyWindow: {
1143-
maxTokens: params.contextManagement?.historyWindow?.maxTokens ?? 150000,
1132+
budget: {
1133+
maxTokens: params.contextManagement?.budget?.maxTokens ?? 150000,
11441134
retainedTokens:
1145-
params.contextManagement?.historyWindow?.retainedTokens ?? 40000,
1135+
params.contextManagement?.budget?.retainedTokens ?? 40000,
1136+
protectedEpisodes:
1137+
params.contextManagement?.budget?.protectedEpisodes ?? 1,
1138+
protectSystemEpisode:
1139+
params.contextManagement?.budget?.protectSystemEpisode ?? true,
1140+
incrementalGc: params.contextManagement?.budget?.incrementalGc ?? false,
11461141
},
1147-
messageLimits: {
1148-
normalMaxTokens:
1149-
params.contextManagement?.messageLimits?.normalMaxTokens ?? 2500,
1150-
retainedMaxTokens:
1151-
params.contextManagement?.messageLimits?.retainedMaxTokens ?? 12000,
1152-
normalizationHeadRatio:
1153-
params.contextManagement?.messageLimits?.normalizationHeadRatio ??
1154-
0.25,
1155-
},
1156-
tools: {
1157-
distillation: {
1158-
maxOutputTokens:
1159-
params.contextManagement?.tools?.distillation?.maxOutputTokens ??
1160-
10000,
1161-
summarizationThresholdTokens:
1162-
params.contextManagement?.tools?.distillation
1163-
?.summarizationThresholdTokens ?? 20000,
1142+
strategies: {
1143+
historySquashing: {
1144+
maxTokensPerNode:
1145+
params.contextManagement?.strategies?.historySquashing
1146+
?.maxTokensPerNode ?? 3000,
1147+
},
1148+
toolMasking: {
1149+
stringLengthThresholdTokens:
1150+
params.contextManagement?.strategies?.toolMasking
1151+
?.stringLengthThresholdTokens ?? 10000,
11641152
},
1165-
outputMasking: {
1166-
protectionThresholdTokens:
1167-
params.contextManagement?.tools?.outputMasking
1168-
?.protectionThresholdTokens ?? DEFAULT_TOOL_PROTECTION_THRESHOLD,
1169-
minPrunableThresholdTokens:
1170-
params.contextManagement?.tools?.outputMasking
1171-
?.minPrunableThresholdTokens ??
1172-
DEFAULT_MIN_PRUNABLE_TOKENS_THRESHOLD,
1173-
protectLatestTurn:
1174-
params.contextManagement?.tools?.outputMasking?.protectLatestTurn ??
1175-
DEFAULT_PROTECT_LATEST_TURN,
1153+
semanticCompression: {
1154+
nodeThresholdTokens:
1155+
params.contextManagement?.strategies?.semanticCompression
1156+
?.nodeThresholdTokens ?? 5000,
1157+
compressionModel:
1158+
params.contextManagement?.strategies?.semanticCompression
1159+
?.compressionModel ?? 'chat-compression-2.5-flash-lite',
11761160
},
11771161
},
11781162
};
@@ -2391,18 +2375,6 @@ export class Config implements McpContext, AgentLoopContext {
23912375
return this.contextManagement;
23922376
}
23932377

2394-
get agentHistoryProviderConfig(): AgentHistoryProviderConfig {
2395-
return {
2396-
maxTokens: this.contextManagement.historyWindow.maxTokens,
2397-
retainedTokens: this.contextManagement.historyWindow.retainedTokens,
2398-
normalMessageTokens: this.contextManagement.messageLimits.normalMaxTokens,
2399-
maximumMessageTokens:
2400-
this.contextManagement.messageLimits.retainedMaxTokens,
2401-
normalizationHeadRatio:
2402-
this.contextManagement.messageLimits.normalizationHeadRatio,
2403-
};
2404-
}
2405-
24062378
isTopicUpdateNarrationEnabled(): boolean {
24072379
return this.topicUpdateNarration;
24082380
}
@@ -2411,43 +2383,6 @@ export class Config implements McpContext, AgentLoopContext {
24112383
return this.modelSteering;
24122384
}
24132385

2414-
async getToolOutputMaskingConfig(): Promise<ToolOutputMaskingConfig> {
2415-
await this.ensureExperimentsLoaded();
2416-
2417-
const remoteProtection =
2418-
this.experiments?.flags[ExperimentFlags.MASKING_PROTECTION_THRESHOLD]
2419-
?.intValue;
2420-
const remotePrunable =
2421-
this.experiments?.flags[ExperimentFlags.MASKING_PRUNABLE_THRESHOLD]
2422-
?.intValue;
2423-
const remoteProtectLatest =
2424-
this.experiments?.flags[ExperimentFlags.MASKING_PROTECT_LATEST_TURN]
2425-
?.boolValue;
2426-
2427-
const parsedProtection = remoteProtection
2428-
? parseInt(remoteProtection, 10)
2429-
: undefined;
2430-
const parsedPrunable = remotePrunable
2431-
? parseInt(remotePrunable, 10)
2432-
: undefined;
2433-
2434-
return {
2435-
protectionThresholdTokens:
2436-
parsedProtection !== undefined && !isNaN(parsedProtection)
2437-
? parsedProtection
2438-
: this.contextManagement.tools.outputMasking
2439-
.protectionThresholdTokens,
2440-
minPrunableThresholdTokens:
2441-
parsedPrunable !== undefined && !isNaN(parsedPrunable)
2442-
? parsedPrunable
2443-
: this.contextManagement.tools.outputMasking
2444-
.minPrunableThresholdTokens,
2445-
protectLatestTurn:
2446-
remoteProtectLatest ??
2447-
this.contextManagement.tools.outputMasking.protectLatestTurn,
2448-
};
2449-
}
2450-
24512386
getGeminiMdFileCount(): number {
24522387
if (this.experimentalJitContext && this.memoryContextManager) {
24532388
return this.memoryContextManager.getLoadedPaths().size;
@@ -3298,15 +3233,6 @@ export class Config implements McpContext, AgentLoopContext {
32983233
);
32993234
}
33003235

3301-
getToolMaxOutputTokens(): number {
3302-
return this.contextManagement.tools.distillation.maxOutputTokens;
3303-
}
3304-
3305-
getToolSummarizationThresholdTokens(): number {
3306-
return this.contextManagement.tools.distillation
3307-
.summarizationThresholdTokens;
3308-
}
3309-
33103236
getNextCompressionTruncationId(): number {
33113237
return ++this.compressionTruncationCounter;
33123238
}

0 commit comments

Comments
 (0)