feat(config): make thinking idle threshold configurable and lower default to 5min

wenshao · claude · wenshao · commit 6a55a9aeea1e · 2026-04-08T14:21:06.000+08:00
Align with observed provider prompt-cache TTL (~5 min). Add
`context.gapThresholdMinutes` setting so users can tune the threshold
for providers with different cache TTLs.

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/docs/users/configuration/settings.md b/docs/users/configuration/settings.md
@@ -206,6 +206,7 @@ The `extra_body` field allows you to add custom parameters to the request body s
 | `context.fileFiltering.respectQwenIgnore`         | boolean                    | Respect .qwenignore files when searching.                                                                                                                                                                                                                                                                                                                             | `true`      |
 | `context.fileFiltering.enableRecursiveFileSearch` | boolean                    | Whether to enable searching recursively for filenames under the current tree when completing `@` prefixes in the prompt.                                                                                                                                                                                                                                              | `true`      |
 | `context.fileFiltering.enableFuzzySearch`         | boolean                    | When `true`, enables fuzzy search capabilities when searching for files. Set to `false` to improve performance on projects with a large number of files.                                                                                                                                                                                                              | `true`      |
+| `context.gapThresholdMinutes`                     | number                     | Minutes of inactivity after which retained thinking blocks are cleared to free context tokens. Aligns with typical provider prompt-cache TTL. Set higher if your provider has a longer cache TTL.                                                                                                                                                                     | `5`         |
 
 #### Troubleshooting File Search Performance
 
diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts
@@ -1069,6 +1069,7 @@ export async function loadCliConfig(
     telemetry: telemetrySettings,
     usageStatisticsEnabled: settings.privacy?.usageStatisticsEnabled ?? true,
     fileFiltering: settings.context?.fileFiltering,
+    thinkingIdleThresholdMinutes: settings.context?.gapThresholdMinutes,
     checkpointing:
       argv.checkpointing || settings.general?.checkpointing?.enabled,
     proxy:
diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts
@@ -914,6 +914,16 @@ const SETTINGS_SCHEMA = {
           },
         },
       },
+      gapThresholdMinutes: {
+        type: 'number',
+        label: 'Thinking Block Idle Threshold (minutes)',
+        category: 'Context',
+        requiresRestart: false,
+        default: 5,
+        description:
+          'Minutes of inactivity after which retained thinking blocks are cleared to free context tokens. Aligns with provider prompt-cache TTL.',
+        showInDialog: false,
+      },
     },
   },
 
diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts
@@ -370,6 +370,8 @@ export interface ConfigParameters {
   model?: string;
   outputLanguageFilePath?: string;
   maxSessionTurns?: number;
+  /** Minutes of inactivity before clearing retained thinking blocks. */
+  thinkingIdleThresholdMinutes?: number;
   sessionTokenLimit?: number;
   experimentalZedIntegration?: boolean;
   cronEnabled?: boolean;
@@ -557,6 +559,7 @@ export class Config {
   private ideMode: boolean;
 
   private readonly maxSessionTurns: number;
+  private readonly thinkingIdleThresholdMs: number;
   private readonly sessionTokenLimit: number;
   private readonly listExtensions: boolean;
   private readonly overrideExtensions?: string[];
@@ -683,6 +686,8 @@ export class Config {
     this.fileDiscoveryService = params.fileDiscoveryService ?? null;
     this.bugCommand = params.bugCommand;
     this.maxSessionTurns = params.maxSessionTurns ?? -1;
+    this.thinkingIdleThresholdMs =
+      (params.thinkingIdleThresholdMinutes ?? 5) * 60 * 1000;
     this.sessionTokenLimit = params.sessionTokenLimit ?? -1;
     this.experimentalZedIntegration =
       params.experimentalZedIntegration ?? false;
@@ -1329,6 +1334,10 @@ export class Config {
     return this.maxSessionTurns;
   }
 
+  getThinkingIdleThresholdMs(): number {
+    return this.thinkingIdleThresholdMs;
+  }
+
   getSessionTokenLimit(): number {
     return this.sessionTokenLimit;
   }
diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts
@@ -323,6 +323,7 @@ describe('Gemini Client (client.ts)', () => {
       getWorkingDir: vi.fn().mockReturnValue('/test/dir'),
       getFileService: vi.fn().mockReturnValue(fileService),
       getMaxSessionTurns: vi.fn().mockReturnValue(0),
+      getThinkingIdleThresholdMs: vi.fn().mockReturnValue(5 * 60 * 1000),
       getSessionTokenLimit: vi.fn().mockReturnValue(32000),
       getNoBrowser: vi.fn().mockReturnValue(false),
       getUsageStatisticsEnabled: vi.fn().mockReturnValue(true),
@@ -448,9 +449,9 @@ describe('Gemini Client (client.ts)', () => {
       client['chat'] = mockChat as GeminiChat;
     });
 
-    it('should not strip thoughts on active session (< 1h idle)', async () => {
-      // Simulate a recent API completion (5 minutes ago)
-      client['lastApiCompletionTimestamp'] = Date.now() - 5 * 60 * 1000;
+    it('should not strip thoughts on active session (< 5min idle)', async () => {
+      // Simulate a recent API completion (2 minutes ago — within default 5 min threshold)
+      client['lastApiCompletionTimestamp'] = Date.now() - 2 * 60 * 1000;
       client['thinkingClearLatched'] = false;
 
       const gen = client.sendMessageStream(
@@ -468,9 +469,9 @@ describe('Gemini Client (client.ts)', () => {
       ).not.toHaveBeenCalled();
     });
 
-    it('should latch and strip thoughts after > 1h idle', async () => {
-      // Simulate an old API completion (2 hours ago)
-      client['lastApiCompletionTimestamp'] = Date.now() - 2 * 60 * 60 * 1000;
+    it('should latch and strip thoughts after > 5min idle', async () => {
+      // Simulate an old API completion (10 minutes ago — exceeds default 5 min threshold)
+      client['lastApiCompletionTimestamp'] = Date.now() - 10 * 60 * 1000;
       client['thinkingClearLatched'] = false;
 
       const gen = client.sendMessageStream(
@@ -489,9 +490,9 @@ describe('Gemini Client (client.ts)', () => {
       );
     });
 
-    it('should keep stripping once latched even if idle < 1h', async () => {
-      // Pre-set latch with a recent timestamp
-      client['lastApiCompletionTimestamp'] = Date.now() - 5 * 60 * 1000;
+    it('should keep stripping once latched even if idle < 5min', async () => {
+      // Pre-set latch with a recent timestamp (2 minutes ago — within threshold)
+      client['lastApiCompletionTimestamp'] = Date.now() - 2 * 60 * 1000;
       client['thinkingClearLatched'] = true;
 
       const gen = client.sendMessageStream(
diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts
@@ -111,13 +111,6 @@ export interface SendMessageOptions {
   };
 }
 
-/**
- * Idle threshold for thinking block cleanup. After this period without any
- * API call the old thinking blocks are unlikely to aid reasoning coherence
- * and only waste context tokens.
- */
-const THINKING_IDLE_THRESHOLD_MS = 60 * 60 * 1000; // 1 hour
-
 export class GeminiClient {
   private chat?: GeminiChat;
   private sessionTurnCount = 0;
@@ -143,11 +136,11 @@ export class GeminiClient {
 
   /**
    * Sticky-on latch for clearing thinking blocks from prior turns.
-   * Triggered when >1h since last API call — old thinking is no longer
-   * useful for reasoning coherence. Once latched, stays true to prevent
-   * oscillation: without it, thinking would accumulate → get stripped →
-   * accumulate again, causing the message prefix to change repeatedly
-   * (bad for any provider-side prompt caching and wastes context).
+   * Triggered when idle exceeds the configured threshold (default 5 min,
+   * aligned with provider prompt-cache TTL). Once latched, stays true to
+   * prevent oscillation: without it, thinking would accumulate → get
+   * stripped → accumulate again, causing the message prefix to change
+   * repeatedly (bad for provider-side prompt caching and wastes context).
    * Reset on /clear (resetChat).
    */
   private thinkingClearLatched = false;
@@ -567,18 +560,19 @@ export class GeminiClient {
       this.config.getChatRecordingService()?.recordUserMessage(request);
 
       // Thinking block cross-turn retention with idle cleanup:
-      // - Active session (< 1h idle): keep thinking blocks for reasoning coherence
-      // - Idle > 1h: clear old thinking, keep only last 1 turn to free context
+      // - Active session (< threshold idle): keep thinking blocks for reasoning coherence
+      // - Idle > threshold: clear old thinking, keep only last 1 turn to free context
       // - Latch: once triggered, never revert — prevents oscillation
       if (
         !this.thinkingClearLatched &&
         this.lastApiCompletionTimestamp !== null
       ) {
+        const thresholdMs = this.config.getThinkingIdleThresholdMs();
         const idleMs = Date.now() - this.lastApiCompletionTimestamp;
-        if (idleMs > THINKING_IDLE_THRESHOLD_MS) {
+        if (idleMs > thresholdMs) {
           this.thinkingClearLatched = true;
           debugLogger.debug(
-            `Thinking clear latched: idle ${Math.round(idleMs / 1000)}s > threshold ${THINKING_IDLE_THRESHOLD_MS / 1000}s`,
+            `Thinking clear latched: idle ${Math.round(idleMs / 1000)}s > threshold ${thresholdMs / 1000}s`,
           );
         }
       }
diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts
@@ -584,8 +584,9 @@ export class GeminiChat {
    * model turns) so the most recent reasoning chain is always preserved
    * even if later model turns happen to have no thinking.
    *
-   * Used for idle cleanup: after >1h idle the old thinking blocks are no
-   * longer useful for reasoning coherence but still consume context tokens.
+   * Used for idle cleanup: after exceeding the configured idle threshold
+   * the old thinking blocks are no longer useful for reasoning coherence
+   * but still consume context tokens.
    */
   stripThoughtsFromHistoryKeepRecent(keepTurns: number): void {
     keepTurns = Number.isFinite(keepTurns)
diff --git a/packages/vscode-ide-companion/schemas/settings.schema.json b/packages/vscode-ide-companion/schemas/settings.schema.json
@@ -383,6 +383,11 @@
               "default": true
             }
           }
+        },
+        "gapThresholdMinutes": {
+          "description": "Minutes of inactivity after which retained thinking blocks are cleared to free context tokens. Aligns with provider prompt-cache TTL.",
+          "type": "number",
+          "default": 5
         }
       }
     },

Original file line number	Diff line number	Diff line change
`@@ -383,6 +383,11 @@`
`383`	`383`	`"default": true`
`384`	`384`	`}`
`385`	`385`	`}`
	`386`	`+ },`
	`387`	`+ "gapThresholdMinutes": {`
	`388`	`+ "description": "Minutes of inactivity after which retained thinking blocks are cleared to free context tokens. Aligns with provider prompt-cache TTL.",`
	`389`	`+ "type": "number",`
	`390`	`+ "default": 5`
`386`	`391`	`}`
`387`	`392`	`}`
`388`	`393`	`},`