Skip to content

Commit 6a55a9a

Browse files
wenshaoclaude
andcommitted
feat(config): make thinking idle threshold configurable and lower default to 5min
Align with observed provider prompt-cache TTL (~5 min). Add `context.gapThresholdMinutes` setting so users can tune the threshold for providers with different cache TTLs. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 4bbdfee commit 6a55a9a

8 files changed

Lines changed: 49 additions & 27 deletions

File tree

docs/users/configuration/settings.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,7 @@ The `extra_body` field allows you to add custom parameters to the request body s
206206
| `context.fileFiltering.respectQwenIgnore` | boolean | Respect .qwenignore files when searching. | `true` |
207207
| `context.fileFiltering.enableRecursiveFileSearch` | boolean | Whether to enable searching recursively for filenames under the current tree when completing `@` prefixes in the prompt. | `true` |
208208
| `context.fileFiltering.enableFuzzySearch` | boolean | When `true`, enables fuzzy search capabilities when searching for files. Set to `false` to improve performance on projects with a large number of files. | `true` |
209+
| `context.gapThresholdMinutes` | number | Minutes of inactivity after which retained thinking blocks are cleared to free context tokens. Aligns with typical provider prompt-cache TTL. Set higher if your provider has a longer cache TTL. | `5` |
209210

210211
#### Troubleshooting File Search Performance
211212

packages/cli/src/config/config.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1069,6 +1069,7 @@ export async function loadCliConfig(
10691069
telemetry: telemetrySettings,
10701070
usageStatisticsEnabled: settings.privacy?.usageStatisticsEnabled ?? true,
10711071
fileFiltering: settings.context?.fileFiltering,
1072+
thinkingIdleThresholdMinutes: settings.context?.gapThresholdMinutes,
10721073
checkpointing:
10731074
argv.checkpointing || settings.general?.checkpointing?.enabled,
10741075
proxy:

packages/cli/src/config/settingsSchema.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -914,6 +914,16 @@ const SETTINGS_SCHEMA = {
914914
},
915915
},
916916
},
917+
gapThresholdMinutes: {
918+
type: 'number',
919+
label: 'Thinking Block Idle Threshold (minutes)',
920+
category: 'Context',
921+
requiresRestart: false,
922+
default: 5,
923+
description:
924+
'Minutes of inactivity after which retained thinking blocks are cleared to free context tokens. Aligns with provider prompt-cache TTL.',
925+
showInDialog: false,
926+
},
917927
},
918928
},
919929

packages/core/src/config/config.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,8 @@ export interface ConfigParameters {
370370
model?: string;
371371
outputLanguageFilePath?: string;
372372
maxSessionTurns?: number;
373+
/** Minutes of inactivity before clearing retained thinking blocks. */
374+
thinkingIdleThresholdMinutes?: number;
373375
sessionTokenLimit?: number;
374376
experimentalZedIntegration?: boolean;
375377
cronEnabled?: boolean;
@@ -557,6 +559,7 @@ export class Config {
557559
private ideMode: boolean;
558560

559561
private readonly maxSessionTurns: number;
562+
private readonly thinkingIdleThresholdMs: number;
560563
private readonly sessionTokenLimit: number;
561564
private readonly listExtensions: boolean;
562565
private readonly overrideExtensions?: string[];
@@ -683,6 +686,8 @@ export class Config {
683686
this.fileDiscoveryService = params.fileDiscoveryService ?? null;
684687
this.bugCommand = params.bugCommand;
685688
this.maxSessionTurns = params.maxSessionTurns ?? -1;
689+
this.thinkingIdleThresholdMs =
690+
(params.thinkingIdleThresholdMinutes ?? 5) * 60 * 1000;
686691
this.sessionTokenLimit = params.sessionTokenLimit ?? -1;
687692
this.experimentalZedIntegration =
688693
params.experimentalZedIntegration ?? false;
@@ -1329,6 +1334,10 @@ export class Config {
13291334
return this.maxSessionTurns;
13301335
}
13311336

1337+
getThinkingIdleThresholdMs(): number {
1338+
return this.thinkingIdleThresholdMs;
1339+
}
1340+
13321341
getSessionTokenLimit(): number {
13331342
return this.sessionTokenLimit;
13341343
}

packages/core/src/core/client.test.ts

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,7 @@ describe('Gemini Client (client.ts)', () => {
323323
getWorkingDir: vi.fn().mockReturnValue('/test/dir'),
324324
getFileService: vi.fn().mockReturnValue(fileService),
325325
getMaxSessionTurns: vi.fn().mockReturnValue(0),
326+
getThinkingIdleThresholdMs: vi.fn().mockReturnValue(5 * 60 * 1000),
326327
getSessionTokenLimit: vi.fn().mockReturnValue(32000),
327328
getNoBrowser: vi.fn().mockReturnValue(false),
328329
getUsageStatisticsEnabled: vi.fn().mockReturnValue(true),
@@ -448,9 +449,9 @@ describe('Gemini Client (client.ts)', () => {
448449
client['chat'] = mockChat as GeminiChat;
449450
});
450451

451-
it('should not strip thoughts on active session (< 1h idle)', async () => {
452-
// Simulate a recent API completion (5 minutes ago)
453-
client['lastApiCompletionTimestamp'] = Date.now() - 5 * 60 * 1000;
452+
it('should not strip thoughts on active session (< 5min idle)', async () => {
453+
// Simulate a recent API completion (2 minutes ago — within default 5 min threshold)
454+
client['lastApiCompletionTimestamp'] = Date.now() - 2 * 60 * 1000;
454455
client['thinkingClearLatched'] = false;
455456

456457
const gen = client.sendMessageStream(
@@ -468,9 +469,9 @@ describe('Gemini Client (client.ts)', () => {
468469
).not.toHaveBeenCalled();
469470
});
470471

471-
it('should latch and strip thoughts after > 1h idle', async () => {
472-
// Simulate an old API completion (2 hours ago)
473-
client['lastApiCompletionTimestamp'] = Date.now() - 2 * 60 * 60 * 1000;
472+
it('should latch and strip thoughts after > 5min idle', async () => {
473+
// Simulate an old API completion (10 minutes ago — exceeds default 5 min threshold)
474+
client['lastApiCompletionTimestamp'] = Date.now() - 10 * 60 * 1000;
474475
client['thinkingClearLatched'] = false;
475476

476477
const gen = client.sendMessageStream(
@@ -489,9 +490,9 @@ describe('Gemini Client (client.ts)', () => {
489490
);
490491
});
491492

492-
it('should keep stripping once latched even if idle < 1h', async () => {
493-
// Pre-set latch with a recent timestamp
494-
client['lastApiCompletionTimestamp'] = Date.now() - 5 * 60 * 1000;
493+
it('should keep stripping once latched even if idle < 5min', async () => {
494+
// Pre-set latch with a recent timestamp (2 minutes ago — within threshold)
495+
client['lastApiCompletionTimestamp'] = Date.now() - 2 * 60 * 1000;
495496
client['thinkingClearLatched'] = true;
496497

497498
const gen = client.sendMessageStream(

packages/core/src/core/client.ts

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -111,13 +111,6 @@ export interface SendMessageOptions {
111111
};
112112
}
113113

114-
/**
115-
* Idle threshold for thinking block cleanup. After this period without any
116-
* API call the old thinking blocks are unlikely to aid reasoning coherence
117-
* and only waste context tokens.
118-
*/
119-
const THINKING_IDLE_THRESHOLD_MS = 60 * 60 * 1000; // 1 hour
120-
121114
export class GeminiClient {
122115
private chat?: GeminiChat;
123116
private sessionTurnCount = 0;
@@ -143,11 +136,11 @@ export class GeminiClient {
143136

144137
/**
145138
* Sticky-on latch for clearing thinking blocks from prior turns.
146-
* Triggered when >1h since last API call — old thinking is no longer
147-
* useful for reasoning coherence. Once latched, stays true to prevent
148-
* oscillation: without it, thinking would accumulate → get stripped →
149-
* accumulate again, causing the message prefix to change repeatedly
150-
* (bad for any provider-side prompt caching and wastes context).
139+
* Triggered when idle exceeds the configured threshold (default 5 min,
140+
* aligned with provider prompt-cache TTL). Once latched, stays true to
141+
* prevent oscillation: without it, thinking would accumulate → get
142+
* stripped → accumulate again, causing the message prefix to change
143+
* repeatedly (bad for provider-side prompt caching and wastes context).
151144
* Reset on /clear (resetChat).
152145
*/
153146
private thinkingClearLatched = false;
@@ -567,18 +560,19 @@ export class GeminiClient {
567560
this.config.getChatRecordingService()?.recordUserMessage(request);
568561

569562
// Thinking block cross-turn retention with idle cleanup:
570-
// - Active session (< 1h idle): keep thinking blocks for reasoning coherence
571-
// - Idle > 1h: clear old thinking, keep only last 1 turn to free context
563+
// - Active session (< threshold idle): keep thinking blocks for reasoning coherence
564+
// - Idle > threshold: clear old thinking, keep only last 1 turn to free context
572565
// - Latch: once triggered, never revert — prevents oscillation
573566
if (
574567
!this.thinkingClearLatched &&
575568
this.lastApiCompletionTimestamp !== null
576569
) {
570+
const thresholdMs = this.config.getThinkingIdleThresholdMs();
577571
const idleMs = Date.now() - this.lastApiCompletionTimestamp;
578-
if (idleMs > THINKING_IDLE_THRESHOLD_MS) {
572+
if (idleMs > thresholdMs) {
579573
this.thinkingClearLatched = true;
580574
debugLogger.debug(
581-
`Thinking clear latched: idle ${Math.round(idleMs / 1000)}s > threshold ${THINKING_IDLE_THRESHOLD_MS / 1000}s`,
575+
`Thinking clear latched: idle ${Math.round(idleMs / 1000)}s > threshold ${thresholdMs / 1000}s`,
582576
);
583577
}
584578
}

packages/core/src/core/geminiChat.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -584,8 +584,9 @@ export class GeminiChat {
584584
* model turns) so the most recent reasoning chain is always preserved
585585
* even if later model turns happen to have no thinking.
586586
*
587-
* Used for idle cleanup: after >1h idle the old thinking blocks are no
588-
* longer useful for reasoning coherence but still consume context tokens.
587+
* Used for idle cleanup: after exceeding the configured idle threshold
588+
* the old thinking blocks are no longer useful for reasoning coherence
589+
* but still consume context tokens.
589590
*/
590591
stripThoughtsFromHistoryKeepRecent(keepTurns: number): void {
591592
keepTurns = Number.isFinite(keepTurns)

packages/vscode-ide-companion/schemas/settings.schema.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,11 @@
383383
"default": true
384384
}
385385
}
386+
},
387+
"gapThresholdMinutes": {
388+
"description": "Minutes of inactivity after which retained thinking blocks are cleared to free context tokens. Aligns with provider prompt-cache TTL.",
389+
"type": "number",
390+
"default": 5
386391
}
387392
}
388393
},

0 commit comments

Comments
 (0)