Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions .changeset/ratio-group-by.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
---
"@hyperdx/common-utils": patch
---

fix: support Group By on ratio charts

A ratio chart (`seriesReturnType: 'ratio'`) with a Group By previously collapsed
to a single line. Two issues in the multi-series merge: (1) rows were keyed by
time bucket only, so groups at the same bucket overwrote each other, and (2) the
ratio computation dropped every non-value column, discarding the group
dimension. The merge now keys by (time bucket + group dimensions) and the ratio
result carries the group columns through, so a grouped ratio renders one series
per group.

Grouped ratios use share-of-total semantics: each group's denominator is the
total of the denominator column across all groups in the same time bucket, so
the grouped lines are each group's contribution to the overall ratio and sum to
the ungrouped value (e.g. each tenant's share of the overall error rate), rather
than each group's in-group rate. Ungrouped ratios are unchanged (one row per
bucket → the bucket total is that row's denominator). A group absent from the
filtered numerator (e.g. a tenant with zero errors) contributes 0%, not N/A.
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
import { useState } from 'react';
import { useMemo, useState } from 'react';
import {
Control,
FieldArrayWithId,
FieldErrors,
UseFormClearErrors,
UseFormSetValue,
useWatch,
} from 'react-hook-form';
import { TableConnection } from '@hyperdx/common-utils/dist/core/metadata';
import {
TableConnection,
TableConnectionChoice,
} from '@hyperdx/common-utils/dist/core/metadata';
import {
HEATMAP_ALLOWED_SOURCE_KINDS,
isBuilderChartConfig,
Expand All @@ -32,6 +36,7 @@ import SourceSchemaPreview, {
import { SourceSelectControlled } from '@/components/SourceSelect';
import { SQLInlineEditorControlled } from '@/components/SQLEditor/SQLInlineEditor';
import { IS_LOCAL_MODE } from '@/config';
import { getMetricTableName } from '@/utils';
import { DEFAULT_TILE_ALERT } from '@/utils/alerts';

import { OnClickFormButton } from './OnClickForm/OnClickFormButton';
Expand Down Expand Up @@ -105,6 +110,42 @@ export function ChartEditorControls({
const [isSourceSchemaPreviewOpen, setIsSourceSchemaPreviewOpen] =
useState(false);

// Metric sources have no single `from.tableName` (they fan out to per-type
// metric tables), so the default tableConnection can't drive attribute
// autocomplete for the chart-level Group By. Build one connection per series'
// metric table + name so the editor unions `Attributes` keys across every
// series (a ratio can mix metric types), matching the per-series inputs.
const series = useWatch({ control, name: 'series' });
const groupByTableConnections = useMemo<TableConnection[] | undefined>(() => {
if (tableSource?.kind !== SourceKind.Metric || !Array.isArray(series)) {
return undefined;
}
const seen = new Set<string>();
const connections: TableConnection[] = [];
for (const s of series) {
if (!s?.metricType || !s?.metricName) continue;
const metricTable = getMetricTableName(tableSource, s.metricType);
if (!metricTable) continue;
const key = `${metricTable}::${s.metricName}`;
if (seen.has(key)) continue;
seen.add(key);
connections.push({
databaseName: tableSource.from.databaseName,
tableName: metricTable,
connectionId: tableSource.connection,
metricName: s.metricName,
});
}
return connections.length > 0 ? connections : undefined;
Comment thread
greptile-apps[bot] marked this conversation as resolved.
Outdated
}, [tableSource, series]);

// tableConnection / tableConnections are mutually exclusive (XOR union), so
// pick one: union of per-series metric tables when available, else the
// source's single connection.
const groupByConnectionProps: TableConnectionChoice = groupByTableConnections
? { tableConnections: groupByTableConnections }
: { tableConnection };

return (
<>
<Flex mb="md" align="center" justify="space-between">
Expand Down Expand Up @@ -211,7 +252,7 @@ export function ChartEditorControls({
</div>
<div>
<SQLInlineEditorControlled
tableConnection={tableConnection}
{...groupByConnectionProps}
control={control}
name={`groupBy`}
placeholder="SQL Columns"
Expand Down
89 changes: 89 additions & 0 deletions packages/common-utils/src/__tests__/clickhouse.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,95 @@ describe('computeResultSetRatio', () => {
/Unable to compute ratio/,
);
});

it('computes a grouped ratio as each group share of the per-bucket total (lines sum to the overall)', () => {
// Joined meta seeds the two value columns first, then the group + timestamp
// columns (mirrors queryChartConfig's merge output).
const mockResultSet: ResponseJSON<any> = {
meta: [
{ name: 'errors', type: 'UInt64' },
{ name: 'total', type: 'UInt64' },
{ name: 'tenant', type: 'String' },
{ name: 'timestamp', type: 'DateTime' },
],
data: [
{ errors: '20', total: '100', tenant: 'acme', timestamp: 't0' },
{ errors: '30', total: '100', tenant: 'globex', timestamp: 't0' },
{ errors: '10', total: '100', tenant: 'acme', timestamp: 't1' },
],
rows: 3,
statistics: { elapsed: 0.1, rows_read: 3, bytes_read: 100 },
};

const result = computeResultSetRatio(mockResultSet);

// ratio column + carried-through group + timestamp
expect(result.meta.map(m => m.name)).toEqual([
'errors/total',
'tenant',
'timestamp',
]);
// Denominator is the per-bucket total across all groups (t0: 200, t1: 100),
// so each group is its contribution to the overall rate.
expect(result.data).toEqual([
{ 'errors/total': 0.1, tenant: 'acme', timestamp: 't0' }, // 20/200
{ 'errors/total': 0.15, tenant: 'globex', timestamp: 't0' }, // 30/200
{ 'errors/total': 0.1, tenant: 'acme', timestamp: 't1' }, // 10/100
]);
// t0 contributions sum to the overall error rate for that bucket (50/200).
const t0 = result.data.filter(r => r.timestamp === 't0');
expect(t0.reduce((s, r) => s + r['errors/total'], 0)).toBeCloseTo(0.25);
});

it('renders a zero-error group as 0% (missing numerator), not N/A', () => {
const mockResultSet: ResponseJSON<any> = {
meta: [
{ name: 'errors', type: 'UInt64' },
{ name: 'total', type: 'UInt64' },
{ name: 'tenant', type: 'String' },
{ name: 'timestamp', type: 'DateTime' },
],
data: [
// has errors -> contributes 20/200
{ errors: '20', total: '100', tenant: 'acme', timestamp: 't0' },
// denominator only (no rows in the error-filtered numerator query) -> 0%
{ total: '100', tenant: 'globex', timestamp: 't0' },
],
rows: 2,
statistics: { elapsed: 0.1, rows_read: 2, bytes_read: 100 },
};

const result = computeResultSetRatio(mockResultSet);

expect(result.data[0]['errors/total']).toBe(0.1); // 20 / (100+100)
expect(result.data[1]['errors/total']).toBe(0); // 0 / 200
});

it('does not let a group missing the denominator poison the bucket total', () => {
const mockResultSet: ResponseJSON<any> = {
meta: [
{ name: 'errors', type: 'UInt64' },
{ name: 'total', type: 'UInt64' },
{ name: 'tenant', type: 'String' },
{ name: 'timestamp', type: 'DateTime' },
],
data: [
// present in both numerator and denominator
{ errors: '20', total: '100', tenant: 'acme', timestamp: 't0' },
// numerator only (no matching denominator row) -> total is undefined.
// Must not turn the bucket total into NaN for the other groups.
{ errors: '5', tenant: 'globex', timestamp: 't0' },
],
rows: 2,
statistics: { elapsed: 0.1, rows_read: 2, bytes_read: 100 },
};

const result = computeResultSetRatio(mockResultSet);

// Bucket total is 100 (only acme contributes a denominator), so acme's
// share is still well-defined rather than NaN.
expect(result.data[0]['errors/total']).toBe(0.2); // 20 / 100
});
});

describe('processClickhouseSettings - optimization settings', () => {
Expand Down
111 changes: 71 additions & 40 deletions packages/common-utils/src/clickhouse/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -387,47 +387,69 @@ export const computeRatio = (
};

export const computeResultSetRatio = (resultSet: ResponseJSON<any>) => {
const _meta = resultSet.meta;
const _meta = resultSet.meta ?? [];
const _data = resultSet.data;
const timestampColumn = inferTimestampColumn(_meta ?? []);
const _restColumns = _meta?.filter(m => m.name !== timestampColumn?.name);
const firstColumn = _restColumns?.[0];
const secondColumn = _restColumns?.[1];
if (!firstColumn || !secondColumn) {
// The numerator/denominator are the two value columns. The joined meta seeds
// them first (see queryChartConfig), so the first two numeric columns are the
// ratio operands; numeric group-by dimensions (if any) come after.
const numericColumns = inferNumericColumn(_meta);
const numerator = numericColumns?.[0];
const denominator = numericColumns?.[1];
if (!numerator || !denominator) {
throw new Error(
`Unable to compute ratio - meta information: ${JSON.stringify(_meta)}.`,
);
}
const ratioColumnName = `${firstColumn.name}/${secondColumn.name}`;
const result = {
const ratioColumnName = `${numerator.name}/${denominator.name}`;
// Carry through every non-operand column — the timestamp and any group-by
// dimensions — so a grouped ratio renders one series per group instead of
// collapsing into a single line.
const passthroughColumns = _meta.filter(
m => m.name !== numerator.name && m.name !== denominator.name,
);
const timestampColumn = inferTimestampColumn(_meta);

// Share-of-total semantics: each group's denominator is the total of the
// denominator column across ALL groups in the same time bucket. So a grouped
// ratio shows each group's contribution to the overall ratio (the lines sum
// to the ungrouped value) rather than each group's own in-group rate. With no
// grouping there's one row per bucket, so the bucket total equals that row's
// denominator and the result is unchanged.
const bucketKey = (row: Record<string, any>) =>
timestampColumn ? String(row[timestampColumn.name]) : '__all__';
const totalDenominatorByBucket = new Map<string, number>();
for (const row of _data) {
const denominatorValue = row[denominator.name];
// A group missing from the denominator split has an undefined value;
// castToNumber returns it as-is and Number.isNaN(undefined) is false, so
// guard explicitly or it would poison the whole bucket total with NaN.
const denom =
denominatorValue == null ? NaN : castToNumber(denominatorValue);
if (!Number.isNaN(denom)) {
const key = bucketKey(row);
totalDenominatorByBucket.set(
key,
(totalDenominatorByBucket.get(key) ?? 0) + denom,
);
}
}
Comment thread
greptile-apps[bot] marked this conversation as resolved.

return {
...resultSet,
data: _data.map(row => ({
[ratioColumnName]: computeRatio(
row[firstColumn.name],
row[secondColumn.name],
),
...(timestampColumn
? {
[timestampColumn.name]: row[timestampColumn.name],
}
: {}),
})),
meta: [
{
name: ratioColumnName,
type: 'Float64',
},
...(timestampColumn
? [
{
name: timestampColumn.name,
type: timestampColumn.type,
},
]
: []),
],
data: _data.map(row => {
// A group absent from the (filtered) numerator query contributes zero, not
// "no data" — so a zero-error group reads 0%, not N/A.
const numeratorValue = row[numerator.name] ?? 0;
const bucketTotal = totalDenominatorByBucket.get(bucketKey(row));
return {
[ratioColumnName]: computeRatio(numeratorValue, bucketTotal ?? NaN),
...Object.fromEntries(
passthroughColumns.map(c => [c.name, row[c.name]]),
),
};
}),
meta: [{ name: ratioColumnName, type: 'Float64' }, ...passthroughColumns],
};
return result;
};

export interface QueryInputs<Format extends DataFormat> {
Expand Down Expand Up @@ -716,20 +738,29 @@ export abstract class BaseClickhouseClient {
),
)
: { ...row };
const ts =
timestampColumn != null
// When the series are grouped, two rows at the same time bucket but
// different group values must stay distinct — key by (bucket + group
// dims) via the hash of the row minus its value column. Without a
// group dimension this collapses to the timestamp (or a fixed key),
// preserving the original behavior.
const hasGroupCols = Object.keys(_rowWithoutValue).some(
key => key !== timestampColumn?.name,
);
const mergeKey = hasGroupCols
? objectHash(_rowWithoutValue)
: timestampColumn != null
? row[timestampColumn.name]
: isTimeSeries
? objectHash(_rowWithoutValue)
: '__FIXED_TIMESTAMP__';
if (tsBucketMap.has(ts)) {
const existingRow = tsBucketMap.get(ts);
tsBucketMap.set(ts, {
if (tsBucketMap.has(mergeKey)) {
const existingRow = tsBucketMap.get(mergeKey);
tsBucketMap.set(mergeKey, {
...existingRow,
...row,
});
} else {
tsBucketMap.set(ts, row);
tsBucketMap.set(mergeKey, row);
}
}
}
Expand Down
Loading