Merge pull request #2420 from huww98/fix/ctrl-y-retry-rate-limit

tanzhenxin · web-flow · commit 0776627e0fc7 · 2026-04-05T14:47:59.000+08:00
feat: allow Ctrl+Y to skip rate-limit retry delay immediately
diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts
@@ -9,6 +9,7 @@ import type {
   Config,
   EditorType,
   GeminiClient,
+  RetryInfo,
   ServerGeminiChatCompressedEvent,
   ServerGeminiContentEvent as ContentEvent,
   ServerGeminiFinishedEvent,
@@ -272,6 +273,7 @@ export const useGeminiStream = (
    */
   const clearRetryCountdown = useCallback(() => {
     stopRetryCountdownTimer();
+    skipRetryDelayRef.current = null;
     setPendingRetryErrorItem(null);
     setPendingRetryCountdownItem(null);
   }, [
@@ -280,14 +282,14 @@ export const useGeminiStream = (
     stopRetryCountdownTimer,
   ]);
 
+  // Holds the skipDelay callback from the current rate-limit RetryInfo.
+  // Managed symmetrically: set in startRetryCountdown, cleared in clearRetryCountdown.
+  const skipRetryDelayRef = useRef<(() => void) | null>(null);
+
   const startRetryCountdown = useCallback(
-    (retryInfo: {
-      message?: string;
-      attempt: number;
-      maxRetries: number;
-      delayMs: number;
-    }) => {
+    (retryInfo: RetryInfo) => {
       stopRetryCountdownTimer();
+      skipRetryDelayRef.current = retryInfo.skipDelay;
       const startTime = Date.now();
       const { message, attempt, maxRetries, delayMs } = retryInfo;
       const retryReasonText =
@@ -1391,6 +1393,15 @@ export const useGeminiStream = (
    * when the user presses Ctrl+Y (bound to Command.RETRY_LAST in keyBindings.ts).
    */
   const retryLastPrompt = useCallback(async () => {
+    // During a rate-limit retry countdown, skip the delay so the generator
+    // retries immediately — no abort/re-submit needed.
+    if (skipRetryDelayRef.current) {
+      skipRetryDelayRef.current();
+      skipRetryDelayRef.current = null;
+      clearRetryCountdown();
+      return;
+    }
+
     if (
       streamingState === StreamingState.Responding ||
       streamingState === StreamingState.WaitingForConfirmation
diff --git a/packages/core/src/core/geminiChat.test.ts b/packages/core/src/core/geminiChat.test.ts
@@ -1146,6 +1146,162 @@ describe('GeminiChat', async () => {
       }
     });
 
+    it('should retry immediately when skipDelay is called during rate-limit wait', async () => {
+      vi.useFakeTimers();
+
+      try {
+        const tpmError = new StreamContentError(
+          '{"error":{"code":"429","message":"Throttling: TPM(1/1)"}}',
+        );
+        const successStream = (async function* () {
+          yield {
+            candidates: [
+              {
+                content: { parts: [{ text: 'Success after skip' }] },
+                finishReason: 'STOP',
+              },
+            ],
+          } as unknown as GenerateContentResponse;
+        })();
+
+        vi.mocked(mockContentGenerator.generateContentStream)
+          .mockResolvedValueOnce(
+            (async function* () {
+              throw tpmError;
+
+              yield {} as GenerateContentResponse;
+            })(),
+          )
+          .mockResolvedValueOnce(successStream);
+
+        const stream = await chat.sendMessageStream(
+          'test-model',
+          { message: 'test' },
+          'prompt-id-skip-delay',
+        );
+
+        const iterator = stream[Symbol.asyncIterator]();
+        // First event: RETRY with retryInfo containing skipDelay
+        const first = await iterator.next();
+        expect(first.value.type).toBe(StreamEventType.RETRY);
+        const skipDelay = first.value.retryInfo!.skipDelay!;
+
+        // Resume generator — it's now awaiting the 60s delay.
+        // Call skipDelay() to resolve it immediately instead of advancing timers.
+        const secondPromise = iterator.next();
+        skipDelay();
+        const second = await secondPromise;
+
+        // The generator should have continued to the next attempt immediately
+        expect(second.done).toBe(false);
+        expect(second.value.type).toBe(StreamEventType.RETRY); // retry-start marker
+
+        // Consume remaining events
+        const events: StreamEvent[] = [first.value, second.value];
+        for (;;) {
+          const next = await iterator.next();
+          if (next.done) break;
+          events.push(next.value);
+        }
+
+        expect(
+          mockContentGenerator.generateContentStream,
+        ).toHaveBeenCalledTimes(2);
+        expect(
+          events.some(
+            (e) =>
+              e.type === StreamEventType.CHUNK &&
+              e.value.candidates?.[0]?.content?.parts?.[0]?.text ===
+                'Success after skip',
+          ),
+        ).toBe(true);
+      } finally {
+        vi.useRealTimers();
+      }
+    });
+
+    it('should exit retry loop when aborted during rate-limit delay', async () => {
+      vi.useFakeTimers();
+
+      try {
+        const tpmError = new StreamContentError(
+          '{"error":{"code":"429","message":"Throttling: TPM(1/1)"}}',
+        );
+        async function* failingStreamGenerator() {
+          throw tpmError;
+
+          yield {} as GenerateContentResponse;
+        }
+
+        const abortController = new AbortController();
+
+        vi.mocked(mockContentGenerator.generateContentStream)
+          .mockResolvedValueOnce(failingStreamGenerator())
+          // Should never be called — abort should prevent the second attempt
+          .mockResolvedValueOnce(failingStreamGenerator());
+
+        const stream = await chat.sendMessageStream(
+          'test-model',
+          { message: 'test', config: { abortSignal: abortController.signal } },
+          'prompt-id-abort-delay',
+        );
+
+        const iterator = stream[Symbol.asyncIterator]();
+        // First event: RETRY with retryInfo
+        const first = await iterator.next();
+        expect(first.value.type).toBe(StreamEventType.RETRY);
+
+        // Abort while the generator is awaiting the 60s delay
+        const nextPromise = iterator.next();
+        abortController.abort();
+
+        // The generator should throw the abort error
+        await expect(nextPromise).rejects.toThrow();
+
+        // Only one API call should have been made (no retry after abort)
+        expect(
+          mockContentGenerator.generateContentStream,
+        ).toHaveBeenCalledTimes(1);
+
+        // Verify the next sendMessageStream is not blocked by the old delay.
+        // If sendPromise were still pending, this would hang until the 60s
+        // timer fires — which never happens under fake timers, causing a timeout.
+        const nextStream = (async function* () {
+          yield {
+            candidates: [
+              {
+                content: { parts: [{ text: 'Next request OK' }] },
+                finishReason: 'STOP',
+              },
+            ],
+          } as unknown as GenerateContentResponse;
+        })();
+        vi.mocked(mockContentGenerator.generateContentStream)
+          .mockReset()
+          .mockResolvedValueOnce(nextStream);
+
+        const stream2 = await chat.sendMessageStream(
+          'test-model',
+          { message: 'follow-up' },
+          'prompt-id-after-abort',
+        );
+        const events: StreamEvent[] = [];
+        for await (const e of stream2) {
+          events.push(e);
+        }
+        expect(
+          events.some(
+            (e) =>
+              e.type === StreamEventType.CHUNK &&
+              e.value.candidates?.[0]?.content?.parts?.[0]?.text ===
+                'Next request OK',
+          ),
+        ).toBe(true);
+      } finally {
+        vi.useRealTimers();
+      }
+    });
+
     it('should retry on GLM rate limit StreamContentError with backoff delay', async () => {
       vi.useFakeTimers();
 
diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts
@@ -85,6 +85,53 @@ const RATE_LIMIT_RETRY_OPTIONS = {
   delayMs: 60000,
 };
 
+/**
+ * Creates a promise that resolves after the specified delay, but can be
+ * resolved early by calling the returned `skip` function.
+ *
+ * If an `AbortSignal` is provided and it fires before the delay completes,
+ * the promise rejects so the caller's `await` throws and normal error
+ * propagation takes over (e.g. the retry loop breaks and the generator exits).
+ */
+function delay(
+  delayMs: number,
+  signal?: AbortSignal,
+): {
+  promise: Promise<void>;
+  skip: () => void;
+} {
+  let resolveRef: () => void;
+  let timeoutId: ReturnType<typeof setTimeout>;
+
+  const promise = new Promise<void>((resolve, reject) => {
+    resolveRef = resolve;
+
+    if (signal?.aborted) {
+      reject(signal.reason);
+      return;
+    }
+
+    timeoutId = setTimeout(resolve, delayMs);
+
+    signal?.addEventListener(
+      'abort',
+      () => {
+        clearTimeout(timeoutId);
+        reject(signal.reason);
+      },
+      { once: true },
+    );
+  });
+
+  return {
+    promise,
+    skip: () => {
+      clearTimeout(timeoutId);
+      resolveRef();
+    },
+  };
+}
+
 /**
  * Returns true if the response is valid, false otherwise.
  *
@@ -353,18 +400,23 @@ export class GeminiChat {
                 `Rate limit throttling detected (retry ${rateLimitRetryCount}/${maxRateLimitRetries}). ` +
                   `Waiting ${delayMs / 1000}s before retrying...`,
               );
+              const { promise: delayPromise, skip } = delay(
+                delayMs,
+                params.config?.abortSignal,
+              );
               yield {
                 type: StreamEventType.RETRY,
                 retryInfo: {
                   message,
                   attempt: rateLimitRetryCount,
                   maxRetries: maxRateLimitRetries,
                   delayMs,
+                  skipDelay: skip,
                 },
               };
               // Don't count rate-limit retries against the content retry limit
               attempt--;
-              await new Promise((res) => setTimeout(res, delayMs));
+              await delayPromise;
               continue;
             }
 
@@ -397,7 +449,7 @@ export class GeminiChat {
               yield { type: StreamEventType.RETRY };
               // Don't count transient retries against content retry limit.
               attempt--;
-              await new Promise((res) => setTimeout(res, delayMs));
+              await delay(delayMs, params.config?.abortSignal).promise;
               continue;
             }
             // Transient budget exhausted — stop immediately.
@@ -418,13 +470,10 @@ export class GeminiChat {
                     model,
                   ),
                 );
-                await new Promise((res) =>
-                  setTimeout(
-                    res,
-                    INVALID_CONTENT_RETRY_OPTIONS.initialDelayMs *
-                      (attempt + 1),
-                  ),
-                );
+                await delay(
+                  INVALID_CONTENT_RETRY_OPTIONS.initialDelayMs * (attempt + 1),
+                  params.config?.abortSignal,
+                ).promise;
                 continue;
               }
             }
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
@@ -233,6 +233,7 @@ export * from './utils/projectSummary.js';
 export * from './utils/promptIdContext.js';
 export * from './utils/proxyUtils.js';
 export * from './utils/quotaErrorDetection.js';
+export * from './utils/rateLimit.js';
 export * from './utils/readManyFiles.js';
 export * from './utils/request-tokenizer/supportedImageFormats.js';
 export { TextTokenizer } from './utils/request-tokenizer/textTokenizer.js';
diff --git a/packages/core/src/utils/rateLimit.ts b/packages/core/src/utils/rateLimit.ts
@@ -22,6 +22,8 @@ export interface RetryInfo {
   maxRetries: number;
   /** Delay in milliseconds before the retry happens. */
   delayMs: number;
+  /** When called, resolves the delay promise early so the retry happens immediately. */
+  skipDelay: () => void;
 }
 
 /**

Original file line number	Diff line number	Diff line change
`@@ -22,6 +22,8 @@ export interface RetryInfo {`
`22`	`22`	`maxRetries: number;`
`23`	`23`	`/** Delay in milliseconds before the retry happens. */`
`24`	`24`	`delayMs: number;`
	`25`	`+ /** When called, resolves the delay promise early so the retry happens immediately. */`
	`26`	`+ skipDelay: () => void;`
`25`	`27`	`}`
`26`	`28`
`27`	`29`	`/**`