Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/mcp-error-categorization.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'@hyperdx/api': patch
---

Classify MCP tool errors as `user` (bad input, not-found) or `server` (infrastructure failure, timeout) so alerting rules can filter on `error_category=server` without noise from agent input mistakes. Adds `error_category` attribute to spans and the `hyperdx.mcp.tool.errors` metric counter. ClickHouse errors are auto-classified by inspecting the error type and walking the cause chain for TCP-level codes.
1 change: 1 addition & 0 deletions packages/api/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
"@ai-sdk/anthropic": "^3.0.58",
"@ai-sdk/openai": "^3.0.47",
"@braintree/sanitize-url": "^7.1.1",
"@clickhouse/client-common": "1.23.0-head.fae5998.1",
"@esm2cjs/p-queue": "^7.3.0",
"@hyperdx/common-utils": "^0.21.0",
"@hyperdx/node-opentelemetry": "^0.9.0",
Expand Down
306 changes: 306 additions & 0 deletions packages/api/src/mcp/__tests__/query.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,16 @@ jest.mock('@/utils/trimToolResponse', () => ({
trimToolResponse: (data: unknown) => ({ data, isTrimmed: false }),
}));

import { ClickHouseError } from '@clickhouse/client-common';

import {
annotateIncreaseTopNHint,
assertSourceKindMatchesSelect,
clickHouseErrorResult,
errorHint,
getClickHouseErrorType,
INCREASE_TOP_N_CAP,
isServerError,
mergeWhereIntoSelectItems,
parseTimeRange,
} from '@/mcp/tools/query/helpers';
Expand All @@ -20,6 +25,7 @@ import {
validateMetricSelectItems,
} from '@/mcp/tools/query/schemas';
import { resolveOrderBy } from '@/mcp/tools/query/table';
import { getErrorCategory } from '@/mcp/utils/errors';

describe('parseTimeRange', () => {
it('should return default range (last 15 minutes) when no arguments provided', () => {
Expand Down Expand Up @@ -261,6 +267,306 @@ describe('errorHint', () => {
});
});

// ─── getClickHouseErrorType ──────────────────────────────────────────────────

describe('getClickHouseErrorType', () => {
it('returns undefined for a non-Error value', () => {
expect(getClickHouseErrorType('NETWORK_ERROR')).toBeUndefined();
expect(getClickHouseErrorType(null)).toBeUndefined();
expect(getClickHouseErrorType(undefined)).toBeUndefined();
expect(getClickHouseErrorType({ type: 'NETWORK_ERROR' })).toBeUndefined();
});

it('returns the type from .cause when present (wrapped ClickHouseError)', () => {
const cause = new ClickHouseError({
message: 'timeout',
code: '159',
type: 'SOCKET_TIMEOUT',
});
const err = new Error('query failed', { cause });
expect(getClickHouseErrorType(err)).toBe('SOCKET_TIMEOUT');
});

it('returns the type from a direct ClickHouseError', () => {
const err = new ClickHouseError({
message: 'direct',
code: '62',
type: 'SYNTAX_ERROR',
});
expect(getClickHouseErrorType(err)).toBe('SYNTAX_ERROR');
});

it('returns .cause type when wrapper carries a ClickHouseError cause', () => {
const cause = new ClickHouseError({
message: 'inner',
code: '210',
type: 'NETWORK_ERROR',
});
const err = new Error('outer wrapper', { cause });
expect(getClickHouseErrorType(err)).toBe('NETWORK_ERROR');
});

it('returns undefined when neither .cause nor the error is a ClickHouseError', () => {
expect(getClickHouseErrorType(new Error('plain'))).toBeUndefined();
});

it('ignores non-ClickHouseError objects with a .type property', () => {
const err = Object.assign(new Error('not CH'), { type: 'NETWORK_ERROR' });
expect(getClickHouseErrorType(err)).toBeUndefined();
});

it('ignores .cause that is a plain object with a type (not ClickHouseError)', () => {
const err = new Error('no CH cause', {
cause: { type: 'SOCKET_TIMEOUT' },
});
expect(getClickHouseErrorType(err)).toBeUndefined();
});

it('detects a cross-package ClickHouseError via constructor-name fallback', () => {
// Simulate a ClickHouseError from a different copy of
// @clickhouse/client-common where instanceof would fail.
class ClickHouseError extends Error {
type: string;
code: string;
constructor(opts: { message: string; code: string; type: string }) {
super(opts.message);
this.name = 'ClickHouseError';
this.type = opts.type;
this.code = opts.code;
}
}
const cause = new ClickHouseError({
message: 'network',
code: '210',
type: 'NETWORK_ERROR',
});
const err = new Error('query failed', { cause });
expect(getClickHouseErrorType(err)).toBe('NETWORK_ERROR');
});
});

// ─── isServerError ───────────────────────────────────────────────────────────

describe('isServerError', () => {
it('returns true for ClickHouseError with a server-side type', () => {
const cause = new ClickHouseError({
message: 'network',
code: '210',
type: 'NETWORK_ERROR',
});
const err = new Error('query failed', { cause });
expect(isServerError(err)).toBe(true);
});

it('returns false for ClickHouseError with a query-level type', () => {
const cause = new ClickHouseError({
message: 'syntax',
code: '62',
type: 'SYNTAX_ERROR',
});
const err = new Error('query failed', { cause });
expect(isServerError(err)).toBe(false);
});

it('returns true for ECONNREFUSED (TCP connection failure)', () => {
const err = Object.assign(new Error('connect ECONNREFUSED 127.0.0.1:1'), {
code: 'ECONNREFUSED',
});
expect(isServerError(err)).toBe(true);
});

it('returns true for ENOTFOUND (DNS failure)', () => {
const err = Object.assign(
new Error('getaddrinfo ENOTFOUND bogus.invalid'),
{ code: 'ENOTFOUND' },
);
expect(isServerError(err)).toBe(true);
});

it('returns true for ECONNRESET', () => {
const err = Object.assign(new Error('socket hang up'), {
code: 'ECONNRESET',
});
expect(isServerError(err)).toBe(true);
});

it('returns true for ETIMEDOUT', () => {
const err = Object.assign(new Error('connect timed out'), {
code: 'ETIMEDOUT',
});
expect(isServerError(err)).toBe(true);
});

it('returns true when TCP error is wrapped in .cause', () => {
const cause = Object.assign(new Error('connect ECONNREFUSED 127.0.0.1:1'), {
code: 'ECONNREFUSED',
});
const err = new Error('request failed', { cause });
expect(isServerError(err)).toBe(true);
});

it('returns false for a plain Error with no code', () => {
expect(isServerError(new Error('something failed'))).toBe(false);
});

it('returns false for non-Error values', () => {
expect(isServerError('ECONNREFUSED')).toBe(false);
expect(isServerError(null)).toBe(false);
});

it('returns true for ClickHouseError nested at depth 2+', () => {
const chErr = new ClickHouseError({
message: 'network',
code: '210',
type: 'NETWORK_ERROR',
});
const mid = new Error('mid-level wrapper', { cause: chErr });
const outer = new Error('outer wrapper', { cause: mid });
expect(isServerError(outer)).toBe(true);
});

it('detects a cross-package ClickHouseError via constructor-name fallback', () => {
// Locally-declared class — instanceof against the imported
// ClickHouseError will fail, exercising the name-based fallback.
class ClickHouseError extends Error {
type: string;
code: string;
constructor(opts: { message: string; code: string; type: string }) {
super(opts.message);
this.name = 'ClickHouseError';
this.type = opts.type;
this.code = opts.code;
}
}
const cause = new ClickHouseError({
message: 'network',
code: '210',
type: 'NETWORK_ERROR',
});
const err = new Error('query failed', { cause });
expect(isServerError(err)).toBe(true);
});

it('returns true for AggregateError containing a TCP error', () => {
const tcpErr = Object.assign(
new Error('connect ECONNREFUSED 127.0.0.1:1'),
{ code: 'ECONNREFUSED' },
);
const aggErr = new AggregateError([tcpErr], 'all connections failed');
expect(isServerError(aggErr)).toBe(true);
});

it('handles circular .cause without infinite loop', () => {
const a = new Error('a');
const b = new Error('b', { cause: a });
// Create circular reference: a.cause -> b -> a -> ...
(a as any).cause = b;
// Should terminate and return false (no server error codes)
expect(isServerError(a)).toBe(false);
});
});

// ─── clickHouseErrorResult ───────────────────────────────────────────────────

describe('clickHouseErrorResult', () => {
it('classifies infrastructure error types as "server"', () => {
for (const type of [
'NETWORK_ERROR',
'SOCKET_TIMEOUT',
'POCO_EXCEPTION',
'ALL_CONNECTION_TRIES_FAILED',
]) {
const cause = new ClickHouseError({
message: 'infra down',
code: '210',
type,
});
const err = new Error('infra down', { cause });
const result = clickHouseErrorResult(err);
expect(result.isError).toBe(true);
expect(getErrorCategory(result)).toBe('server');
}
});

it('classifies query/syntax errors as "user"', () => {
const cause = new ClickHouseError({
message: 'bad query',
code: '62',
type: 'SYNTAX_ERROR',
});
const err = new Error('bad query', { cause });
const result = clickHouseErrorResult(err);
expect(getErrorCategory(result)).toBe('user');
});

it('defaults to "user" when no error type is available', () => {
const result = clickHouseErrorResult(new Error('mystery'));
expect(getErrorCategory(result)).toBe('user');
});

it('classifies a non-Error value as "user"', () => {
const result = clickHouseErrorResult('some string failure');
expect(getErrorCategory(result)).toBe('user');
expect(result.content[0].text).toContain('some string failure');
});

it('prepends a string prefix to the message', () => {
const result = clickHouseErrorResult(
new Error('boom'),
'Failed to sample rows',
);
expect(result.content[0].text).toBe('Failed to sample rows: boom');
});

it('supports prefix and suffix via the object form', () => {
const result = clickHouseErrorResult(new Error('boom'), {
prefix: 'Failed to compute breakdown',
suffix: 'The parentFilter must be valid ClickHouse SQL.',
});
expect(result.content[0].text).toBe(
'Failed to compute breakdown: boom The parentFilter must be valid ClickHouse SQL.',
);
});

it('preserves categorization regardless of message context', () => {
const cause = new ClickHouseError({
message: 'infra down',
code: '210',
type: 'NETWORK_ERROR',
});
const err = new Error('infra down', { cause });
const result = clickHouseErrorResult(err, 'Failed to sample rows');
expect(getErrorCategory(result)).toBe('server');
});

it('appends the DateTime64 hint and still classifies as "user"', () => {
const err = new Error("Cannot parse string '2024Z' as DateTime64(9)");
const result = clickHouseErrorResult(err);
expect(result.content[0].text).toContain('HINT:');
expect(getErrorCategory(result)).toBe('user');
});

it('classifies ECONNREFUSED as "server"', () => {
const err = Object.assign(
new Error('connect ECONNREFUSED 127.0.0.1:19999'),
{ code: 'ECONNREFUSED' },
);
const result = clickHouseErrorResult(err);
expect(result.isError).toBe(true);
expect(getErrorCategory(result)).toBe('server');
});

it('classifies ENOTFOUND as "server"', () => {
const err = Object.assign(
new Error('getaddrinfo ENOTFOUND bogus.invalid'),
{ code: 'ENOTFOUND' },
);
const result = clickHouseErrorResult(err);
expect(result.isError).toBe(true);
expect(getErrorCategory(result)).toBe('server');
});
});

// ─── resolveOrderBy ──────────────────────────────────────────────────────────

describe('resolveOrderBy', () => {
Expand Down
Loading
Loading