Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

### Fixes

- The MCP server no longer hangs on startup when the parser fails to load a language grammar. Previously a grammar-load crash could wedge the shared background daemon, so every editor session that connected to it hung waiting to initialize — on any project, large or small. The parser now falls back to in-process parsing instead of hanging, and a stuck startup reconcile can no longer block the first query indefinitely (tunable via `CODEGRAPH_CATCHUP_GATE_TIMEOUT_MS`).
- Indexing a project that contains only config-style files (YAML, Twig, or `.properties`) no longer misleadingly reports "No files found to index" — these files are tracked at the file level and are now counted as indexed. Thanks @luojiyin1987 (#357).

## [0.9.7] - 2026-05-28
Expand Down
17 changes: 17 additions & 0 deletions __tests__/mcp-catchup-gate.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -119,4 +119,21 @@ describe('MCP catch-up gate', () => {
expect(res.isError).toBeFalsy();
expect(res.content[0].text).toMatch(/survivor/);
});

it('a gate that never settles does not hang the first tool call (timeout fallback)', async () => {
// Defense in depth for the daemon init-hang: if catch-up never settles for
// any reason, the first call must still serve (best-effort) after the
// bounded wait instead of hanging forever. Tiny override keeps it fast.
process.env.CODEGRAPH_CATCHUP_GATE_TIMEOUT_MS = '50';
try {
handler.setCatchUpGate(new Promise<void>(() => { /* never settles */ }));
const start = Date.now();
const res = await handler.execute('codegraph_search', { query: 'survivor' });
expect(Date.now() - start).toBeLessThan(5000);
expect(res.isError).toBeFalsy();
expect(res.content[0].text).toMatch(/survivor/);
} finally {
delete process.env.CODEGRAPH_CATCHUP_GATE_TIMEOUT_MS;
}
});
});
80 changes: 80 additions & 0 deletions __tests__/parse-worker-grammar-load.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/**
* Regression test for the MCP daemon init-hang.
*
* `ensureWorker()` used to await a fresh parse worker's grammar-load with a bare
* `once('message')` that only resolved on `grammars-loaded`. If the worker DIED
* while loading grammars (a tree-sitter WASM abort), it never sent that message,
* so the await — and the in-process `indexMutex` it runs under — hung forever.
* In the shared daemon that wedged initialization for every connecting client.
*
* `awaitWorkerGrammarLoad` must settle (never hang) on every outcome: loaded,
* load-failed, worker error, worker exit, or timeout.
*/
import { EventEmitter } from 'node:events';
import { describe, it, expect } from 'vitest';
import { awaitWorkerGrammarLoad } from '../src/extraction/index';

class FakeWorker extends EventEmitter {
posted: unknown[] = [];
postMessage(msg: unknown): void {
this.posted.push(msg);
}
}

const noLeakedListeners = (w: FakeWorker) =>
w.listenerCount('message') === 0 &&
w.listenerCount('error') === 0 &&
w.listenerCount('exit') === 0;

describe('awaitWorkerGrammarLoad — daemon init-hang regression', () => {
it('posts the load-grammars request and resolves on grammars-loaded', async () => {
const w = new FakeWorker();
const p = awaitWorkerGrammarLoad(w as never, ['typescript'], 1000);
expect(w.posted).toEqual([{ type: 'load-grammars', languages: ['typescript'] }]);
w.emit('message', { type: 'grammars-loaded' });
await expect(p).resolves.toBeUndefined();
expect(noLeakedListeners(w)).toBe(true);
});

it('rejects (does not hang) on grammars-load-failed', async () => {
const w = new FakeWorker();
const p = awaitWorkerGrammarLoad(w as never, ['typescript'], 1000);
w.emit('message', { type: 'grammars-load-failed', error: 'bad wasm' });
await expect(p).rejects.toThrow(/bad wasm/);
expect(noLeakedListeners(w)).toBe(true);
});

it('rejects (does not hang) when the worker exits before grammars load — the WASM-abort case', async () => {
const w = new FakeWorker();
const p = awaitWorkerGrammarLoad(w as never, ['typescript'], 1000);
w.emit('exit', 1);
await expect(p).rejects.toThrow(/exited/);
expect(noLeakedListeners(w)).toBe(true);
});

it('rejects (does not hang) when the worker emits an error', async () => {
const w = new FakeWorker();
const p = awaitWorkerGrammarLoad(w as never, ['typescript'], 1000);
w.emit('error', new Error('worker died'));
await expect(p).rejects.toThrow(/worker died/);
expect(noLeakedListeners(w)).toBe(true);
});

it('rejects on timeout when the worker goes silent — never hangs forever', async () => {
const w = new FakeWorker();
const p = awaitWorkerGrammarLoad(w as never, ['typescript'], 50);
await expect(p).rejects.toThrow(/timed out/);
expect(noLeakedListeners(w)).toBe(true);
});

it('settles exactly once — a late worker exit after grammars-loaded is ignored', async () => {
const w = new FakeWorker();
const p = awaitWorkerGrammarLoad(w as never, ['typescript'], 1000);
w.emit('message', { type: 'grammars-loaded' });
await expect(p).resolves.toBeUndefined();
// The promise already resolved; a later crash must not double-settle or
// throw an unhandled 'error' (listeners are gone, so emitting is a no-op).
expect(() => w.emit('exit', 1)).not.toThrow();
expect(noLeakedListeners(w)).toBe(true);
});
});
138 changes: 116 additions & 22 deletions src/extraction/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,14 @@ const FILE_IO_BATCH_SIZE = 10;
*/
const PARSE_TIMEOUT_MS = 10_000;

/**
* Backstop for the one-time grammar-load handshake with a fresh worker. The
* worker normally answers `grammars-loaded` in well under a second; this only
* fires if it goes silent without crashing. Worker crash/exit during load is
* caught immediately (no need to wait this out) — see `ensureWorker`.
*/
const GRAMMAR_LOAD_TIMEOUT_MS = 30_000;

/**
* Number of files to parse before recycling the worker thread.
* WASM linear memory can grow but NEVER shrink (WebAssembly spec limitation).
Expand All @@ -48,6 +56,60 @@ const PARSE_TIMEOUT_MS = 10_000;
*/
const WORKER_RECYCLE_INTERVAL = 250;

/**
* Minimal shape of a parse worker needed for the grammar-load handshake. The
* real `worker_threads.Worker` satisfies it; tests pass a fake EventEmitter.
*/
export interface GrammarLoadWorker {
on(event: string, cb: (...args: any[]) => void): void;
off(event: string, cb: (...args: any[]) => void): void;
once(event: string, cb: (...args: any[]) => void): void;
postMessage(msg: unknown): void;
}

/**
* Await a fresh worker's one-time grammar-load handshake, settling on the FIRST
* of: `grammars-loaded` (resolve) / `grammars-load-failed` / worker `error` /
* worker `exit` / timeout (all reject). Listeners are always removed and the
* timer cleared on settle.
*
* Regression guard (daemon-init-hang fix): the bare `once('message')` this
* replaced only resolved on `grammars-loaded` and never settled when the worker
* DIED loading grammars (a tree-sitter WASM abort). The caller's await — and the
* in-process `indexMutex` it runs under — then hung forever, which in a
* long-lived MCP daemon wedged initialization for every client that connected.
*/
export function awaitWorkerGrammarLoad(
worker: GrammarLoadWorker,
languages: Language[],
timeoutMs: number = GRAMMAR_LOAD_TIMEOUT_MS
): Promise<void> {
return new Promise<void>((resolve, reject) => {
let settled = false;
const finish = (err?: Error) => {
if (settled) return;
settled = true;
clearTimeout(timer);
worker.off('message', onMessage);
worker.off('error', onError);
worker.off('exit', onExit);
if (err) reject(err); else resolve();
};
const onMessage = (msg: { type: string; error?: string }) => {
if (msg.type === 'grammars-loaded') finish();
else if (msg.type === 'grammars-load-failed') finish(new Error(`worker failed to load grammars: ${msg.error ?? 'unknown error'}`));
};
const onError = (err: Error) => finish(err);
const onExit = (code: number) => finish(new Error(`worker exited (code ${code}) before grammars loaded`));
const timer = setTimeout(() => finish(new Error(`grammar load timed out after ${timeoutMs}ms`)), timeoutMs);
timer.unref?.();
worker.on('message', onMessage);
worker.once('error', onError);
worker.once('exit', onExit);
worker.postMessage({ type: 'load-grammars', languages });
});
}

/**
* Progress callback for indexing operations
*/
Expand Down Expand Up @@ -682,6 +744,22 @@ export class ExtractionOrchestrator {
await loadGrammarsForLanguages(neededLanguages);
}

// Set true if the worker becomes unusable mid-run (e.g. it crashed while
// loading grammars). From then on we parse in-process so a broken worker
// degrades to slower-but-correct parsing instead of hanging the run.
let workerDisabled = false;
// Grammars are already loaded on the main thread only when we never spawned
// a worker. The in-process fallback lazily loads them on first use.
let mainGrammarsLoaded = !useWorker;

async function parseInProcess(filePath: string, content: string): Promise<ExtractionResult> {
if (!mainGrammarsLoaded) {
await loadGrammarsForLanguages(neededLanguages); // idempotent — skips already-loaded
mainGrammarsLoaded = true;
}
return extractFromSource(filePath, content, detectLanguage(filePath, content), frameworkNames);
}

// --- Worker lifecycle management ---
// The worker can crash (OOM in WASM) or hang on pathological files.
// We track pending parse promises and handle both cases:
Expand Down Expand Up @@ -738,23 +816,33 @@ export class ExtractionOrchestrator {
async function ensureWorker(): Promise<import('worker_threads').Worker> {
if (parseWorker) return parseWorker;
log('Spawning new parse worker...');
parseWorker = new WorkerClass!(parseWorkerPath);
attachWorkerHandlers(parseWorker);

// Load grammars in the new worker
await new Promise<void>((resolve, reject) => {
parseWorker!.once('message', (msg: { type: string }) => {
if (msg.type === 'grammars-loaded') resolve();
else reject(new Error(`Unexpected message: ${msg.type}`));
});
parseWorker!.postMessage({ type: 'load-grammars', languages: neededLanguages });
});
const w = new WorkerClass!(parseWorkerPath);
parseWorker = w;
attachWorkerHandlers(w);

// Wait for grammars to load — bounded, and rejects (never hangs) if the
// worker dies mid-load. See `awaitWorkerGrammarLoad`.
try {
await awaitWorkerGrammarLoad(w, neededLanguages);
} catch (err) {
// Worker is unusable — tear it down and let the caller degrade.
if (parseWorker === w) parseWorker = null;
try { void w.terminate(); } catch { /* ignore */ }
throw err instanceof Error ? err : new Error(String(err));
}

return parseWorker;
return w;
}

if (WorkerClass) {
await ensureWorker();
try {
await ensureWorker();
} catch (err) {
workerDisabled = true;
logWarn('Parse worker failed to start — falling back to in-process parsing', {
error: err instanceof Error ? err.message : String(err),
});
}
}

/**
Expand All @@ -773,14 +861,8 @@ export class ExtractionOrchestrator {
}

async function requestParse(filePath: string, content: string): Promise<ExtractionResult> {
if (!WorkerClass) {
// In-process fallback
return extractFromSource(
filePath,
content,
detectLanguage(filePath, content),
frameworkNames
);
if (!WorkerClass || workerDisabled) {
return parseInProcess(filePath, content);
}

// Recycle the worker before the next parse if we've hit the threshold.
Expand All @@ -790,7 +872,19 @@ export class ExtractionOrchestrator {
await recycleWorker();
}

const worker = await ensureWorker();
let worker: import('worker_threads').Worker;
try {
worker = await ensureWorker();
} catch (err) {
// The worker died (e.g. crashed while loading grammars) — degrade to
// in-process parsing for the rest of this run rather than failing every
// file or hanging on a worker that will never answer.
workerDisabled = true;
logWarn('Parse worker unavailable — falling back to in-process parsing', {
error: err instanceof Error ? err.message : String(err),
});
return parseInProcess(filePath, content);
}
const id = nextId++;
workerParseCount++;

Expand Down
15 changes: 13 additions & 2 deletions src/extraction/parse-worker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,19 @@ const parseCounts = new Map<Language, number>();

parentPort!.on('message', async (msg: { type: string; id?: number; filePath?: string; content?: string; languages?: Language[]; frameworkNames?: string[] }) => {
if (msg.type === 'load-grammars') {
await loadGrammarsForLanguages(msg.languages!);
parentPort!.postMessage({ type: 'grammars-loaded' });
try {
await loadGrammarsForLanguages(msg.languages!);
parentPort!.postMessage({ type: 'grammars-loaded' });
} catch (err) {
// Don't die silently: a JS-level grammar-load failure must be reported so
// the parent degrades to in-process parsing instead of waiting forever
// for a 'grammars-loaded' that will never come. (A hard WASM abort kills
// the worker outright — the parent's 'exit'/'error' guard covers that.)
parentPort!.postMessage({
type: 'grammars-load-failed',
error: err instanceof Error ? err.message : String(err),
});
}
} else if (msg.type === 'parse') {
const { id, filePath, content, frameworkNames } = msg;
try {
Expand Down
26 changes: 25 additions & 1 deletion src/mcp/tools.ts
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,19 @@ function adaptiveExploreEnabled(): boolean {
return process.env.CODEGRAPH_ADAPTIVE_EXPLORE !== '0' && process.env.CODEGRAPH_ADAPTIVE_EXPLORE !== 'false';
}

/**
* Upper bound (ms) on how long the first tool call waits for the catch-up gate
* before serving best-effort. Defense in depth: a catch-up sync that never
* settles must not hang the first call — and, in the shared daemon, every
* client. Override via `CODEGRAPH_CATCHUP_GATE_TIMEOUT_MS`; default 120s (well
* above any real incremental reconcile).
*/
function catchUpGateTimeoutMs(): number {
const raw = process.env.CODEGRAPH_CATCHUP_GATE_TIMEOUT_MS;
const n = raw ? Number.parseInt(raw, 10) : NaN;
return Number.isFinite(n) && n > 0 ? n : 120_000;
}

/**
* Prefix each line of a source slice with its 1-based line number, matching
* the Read tool's `cat -n` convention (number + tab) so the agent treats it
Expand Down Expand Up @@ -1042,7 +1055,18 @@ export class ToolHandler {
if (this.catchUpGate) {
const gate = this.catchUpGate;
this.catchUpGate = null;
try { await gate; } catch { /* engine already logged */ }
// Bound the wait: a catch-up sync that never settles (for any reason)
// must not wedge the first call. On timeout we proceed best-effort over
// possibly-stale data — the same outcome the rejection-swallow accepts.
try {
await Promise.race([
gate,
new Promise<void>((resolve) => {
const t = setTimeout(resolve, catchUpGateTimeoutMs());
t.unref?.();
}),
]);
} catch { /* engine already logged */ }
}
// Honor the optional tool allowlist (CODEGRAPH_MCP_TOOLS): a trimmed
// surface rejects ablated tools defensively even if a client cached them.
Expand Down