From 2f5159b49081bb1bd32f150c78f372b42fc9fb5a Mon Sep 17 00:00:00 2001 From: Dan Hable Date: Tue, 9 Jun 2026 16:19:39 -0500 Subject: [PATCH 01/21] [HDX-4347] feat(mcp): add metric fields to query/dashboard select schemas Extend the MCP select-item schemas with first-class metric fields so the clickstack_timeseries, clickstack_table, and dashboard builder tile tools can accept metric-source queries: - Add 'increase' to MCP_AGG_FN_OPTIONS (sum-only counter-increase aggFn) - Add optional metricType (gauge/sum/histogram), metricName, and isDelta fields to mcpSelectItemSchema and mcpTileSelectItemSchema - Restrict metricType to the three kinds the renderer supports today; summary and exponential histogram are excluded - Extract cross-field validation into getMetricSelectIssues so both schemas share the same metric rules: metricType<->metricName paired, 'increase' is sum-only, histogram only allows quantile (with level) or count, isDelta is gauge-only, valueExpression is required for non-count aggFns unless metricType is set (defaults to 'Value' for metric sources) - Apply validation imperatively in the timeseries/table handlers via validateMetricSelectItems (works around a Zod 3.x quirk where .superRefine widens optional-field inference and breaks the strict consumer types of mergeWhereIntoSelectItems and resolveOrderBy) - Update timeseries/table tool descriptions with a METRIC SOURCES subsection covering the discovery workflow chain and per-kind aggFn guidance, plus the 20-group top-N cap on sum+increase+groupBy - Exclude 'increase' from AGG_FN_NAMES so resolveOrderBy doesn't try to synthesize a non-existent SQL increase() function Adds 20 unit tests covering the validator branches and the resolveOrderBy handling of 'increase'. No new TypeScript errors above the pre-existing baseline on main. --- packages/api/src/mcp/__tests__/query.test.ts | 200 +++++++++++++++ .../api/src/mcp/tools/dashboards/schemas.ts | 79 ++++-- packages/api/src/mcp/tools/query/schemas.ts | 233 +++++++++++++++++- packages/api/src/mcp/tools/query/table.ts | 42 +++- .../api/src/mcp/tools/query/timeseries.ts | 34 ++- 5 files changed, 552 insertions(+), 36 deletions(-) diff --git a/packages/api/src/mcp/__tests__/query.test.ts b/packages/api/src/mcp/__tests__/query.test.ts index c523a67999..7525cbb848 100644 --- a/packages/api/src/mcp/__tests__/query.test.ts +++ b/packages/api/src/mcp/__tests__/query.test.ts @@ -11,6 +11,10 @@ import { mergeWhereIntoSelectItems, parseTimeRange, } from '../tools/query/helpers'; +import { + getMetricSelectIssues, + validateMetricSelectItems, +} from '../tools/query/schemas'; import { resolveOrderBy } from '../tools/query/table'; describe('parseTimeRange', () => { @@ -352,4 +356,200 @@ describe('resolveOrderBy', () => { ]), ).toBe('quantile'); }); + + it('should NOT synthesize for "increase" aggFn (metric-only marker)', () => { + // increase compiles to a multi-CTE sum(Rate) pipeline in the renderer, + // not a standalone SQL function. resolveOrderBy must leave bare + // "increase" alone so the renderer-assigned alias can take over. + expect( + resolveOrderBy('increase', [ + { aggFn: 'increase', valueExpression: 'Value' }, + ]), + ).toBe('increase'); + }); +}); + +// ─── getMetricSelectIssues ─────────────────────────────────────────────────── + +describe('getMetricSelectIssues', () => { + it('returns no issues for a plain non-metric count', () => { + expect(getMetricSelectIssues({ aggFn: 'count' })).toEqual([]); + }); + + it('returns no issues for a plain non-metric avg with valueExpression', () => { + expect( + getMetricSelectIssues({ aggFn: 'avg', valueExpression: 'Duration' }), + ).toEqual([]); + }); + + it('requires valueExpression for non-count non-metric aggregations', () => { + const issues = getMetricSelectIssues({ aggFn: 'avg' }); + expect(issues).toHaveLength(1); + expect(issues[0].path).toEqual(['valueExpression']); + expect(issues[0].message).toContain('required for non-count'); + }); + + it('does NOT require valueExpression when metricType is set', () => { + // valueExpression defaults to "Value" for metric sources + expect( + getMetricSelectIssues({ + aggFn: 'avg', + metricType: 'gauge', + metricName: 'system.cpu.utilization', + }), + ).toEqual([]); + }); + + it('rejects valueExpression on aggFn:"count"', () => { + const issues = getMetricSelectIssues({ + aggFn: 'count', + valueExpression: 'Duration', + }); + expect(issues).toHaveLength(1); + expect(issues[0].path).toEqual(['valueExpression']); + }); + + it('rejects metricType without metricName', () => { + const issues = getMetricSelectIssues({ + aggFn: 'avg', + metricType: 'gauge', + }); + expect(issues.find(i => i.path[0] === 'metricName')).toBeDefined(); + }); + + it('rejects metricName without metricType', () => { + const issues = getMetricSelectIssues({ + aggFn: 'avg', + metricName: 'system.cpu.utilization', + }); + expect(issues.find(i => i.path[0] === 'metricType')).toBeDefined(); + }); + + it('rejects aggFn:"increase" on a gauge metric', () => { + const issues = getMetricSelectIssues({ + aggFn: 'increase', + metricType: 'gauge', + metricName: 'system.cpu.utilization', + }); + expect(issues.find(i => i.path[0] === 'aggFn')).toBeDefined(); + }); + + it('accepts aggFn:"increase" on a sum metric', () => { + expect( + getMetricSelectIssues({ + aggFn: 'increase', + metricType: 'sum', + metricName: 'http.server.request.count', + }), + ).toEqual([]); + }); + + it('rejects aggFn:"avg" on a histogram metric', () => { + const issues = getMetricSelectIssues({ + aggFn: 'avg', + metricType: 'histogram', + metricName: 'http.server.request.duration', + }); + expect(issues.find(i => i.path[0] === 'aggFn')).toBeDefined(); + }); + + it('accepts aggFn:"count" on a histogram metric (no level required)', () => { + expect( + getMetricSelectIssues({ + aggFn: 'count', + metricType: 'histogram', + metricName: 'http.server.request.duration', + }), + ).toEqual([]); + }); + + it('requires level for aggFn:"quantile" on a histogram metric', () => { + const issues = getMetricSelectIssues({ + aggFn: 'quantile', + metricType: 'histogram', + metricName: 'http.server.request.duration', + }); + expect(issues.find(i => i.path[0] === 'level')).toBeDefined(); + }); + + it('accepts aggFn:"quantile" with level on a histogram metric', () => { + expect( + getMetricSelectIssues({ + aggFn: 'quantile', + level: 0.95, + metricType: 'histogram', + metricName: 'http.server.request.duration', + }), + ).toEqual([]); + }); + + it('rejects isDelta on a non-gauge metric', () => { + const issues = getMetricSelectIssues({ + aggFn: 'sum', + metricType: 'sum', + metricName: 'http.request.count', + isDelta: true, + }); + expect(issues.find(i => i.path[0] === 'isDelta')).toBeDefined(); + }); + + it('accepts isDelta on a gauge metric', () => { + expect( + getMetricSelectIssues({ + aggFn: 'avg', + metricType: 'gauge', + metricName: 'system.cpu.utilization', + isDelta: true, + }), + ).toEqual([]); + }); + + it('rejects level when aggFn is not quantile', () => { + const issues = getMetricSelectIssues({ + aggFn: 'avg', + valueExpression: 'Duration', + level: 0.95, + }); + expect(issues.find(i => i.path[0] === 'level')).toBeDefined(); + }); +}); + +// ─── validateMetricSelectItems ─────────────────────────────────────────────── + +describe('validateMetricSelectItems', () => { + it('returns null for a valid items array', () => { + expect( + validateMetricSelectItems([ + { aggFn: 'count' }, + { aggFn: 'avg', valueExpression: 'Duration' }, + ]), + ).toBeNull(); + }); + + it('returns an error envelope and labels each item by index', () => { + const result = validateMetricSelectItems([ + { aggFn: 'avg' }, // missing valueExpression + { aggFn: 'increase', metricType: 'gauge', metricName: 'x' }, // increase requires sum + ]); + expect(result).not.toBeNull(); + if (!result) return; + expect(result.isError).toBe(true); + expect(result.content[0].type).toBe('text'); + const text = result.content[0].text; + expect(text).toContain('select[0].valueExpression'); + expect(text).toContain('select[1].aggFn'); + }); + + it('returns an error envelope for a single bad item', () => { + const result = validateMetricSelectItems([ + { + aggFn: 'quantile', + metricType: 'histogram', + metricName: 'http.server.request.duration', + }, // missing level + ]); + expect(result).not.toBeNull(); + if (!result) return; + expect(result.content[0].text).toContain('select[0].level'); + }); }); diff --git a/packages/api/src/mcp/tools/dashboards/schemas.ts b/packages/api/src/mcp/tools/dashboards/schemas.ts index e1dbad9674..6328d84716 100644 --- a/packages/api/src/mcp/tools/dashboards/schemas.ts +++ b/packages/api/src/mcp/tools/dashboards/schemas.ts @@ -15,6 +15,20 @@ import { z } from 'zod'; import { externalQuantileLevelSchema, objectIdSchema } from '@/utils/zod'; +import { getMetricSelectIssues } from '../query/schemas'; + +/** + * Metric type values exposed on dashboard tile select items. Restricted to + * the three kinds the query renderer can translate today; summary and + * exponential histogram are intentionally excluded. Mirrors + * MCP_METRIC_TYPE_OPTIONS in `tools/query/schemas.ts`. + */ +const mcpTileMetricTypeSchema = z.enum([ + MetricsDataType.Gauge, + MetricsDataType.Sum, + MetricsDataType.Histogram, +]); + // ─── Shared tile schemas for MCP dashboard tools ───────────────────────────── const seriesLevelNumberFormatDescription = @@ -87,7 +101,10 @@ const mcpNumberFormatSchema = z.object({ const mcpTileSelectItemSchema = z .object({ aggFn: AggregateFunctionSchema.describe( - 'Aggregation function. "count" requires no valueExpression; all others do.', + 'Aggregation function. "count" requires no valueExpression; all others do. ' + + 'METRIC SOURCES: "increase" computes the per-bucket counter increase for Sum metrics ' + + '(reset-aware). For Gauges use last_value/avg/min/max. For Histograms use "quantile" ' + + 'with level or "count".', ), valueExpression: z .string() @@ -96,7 +113,9 @@ const mcpTileSelectItemSchema = z 'Column or expression to aggregate. Required for all aggFn except "count". ' + 'Use PascalCase for top-level columns (e.g. "Duration", "StatusCode"). ' + "For span attributes use: SpanAttributes['key'] (e.g. SpanAttributes['http.method']). " + - "For resource attributes use: ResourceAttributes['key'] (e.g. ResourceAttributes['service.name']).", + "For resource attributes use: ResourceAttributes['key'] (e.g. ResourceAttributes['service.name']).\n\n" + + 'METRIC SOURCES: optional — defaults to "Value" (the metric value column) when ' + + 'metricType/metricName are set.', ), where: z .string() @@ -115,29 +134,53 @@ const mcpTileSelectItemSchema = z ), level: externalQuantileLevelSchema .optional() - .describe('Percentile level for aggFn="quantile"'), + .describe( + 'Percentile level for aggFn="quantile". REQUIRED for histogram metrics with aggFn:"quantile".', + ), numberFormat: mcpNumberFormatSchema .optional() .describe(seriesLevelNumberFormatDescription), + metricType: mcpTileMetricTypeSchema + .optional() + .describe( + 'METRIC SOURCES ONLY. OTel metric kind: gauge, sum, or histogram. ' + + 'Required (with metricName) when the tile sourceId is a metric source. ' + + 'summary and exponential histogram are not supported by the renderer yet.', + ), + metricName: z + .string() + .optional() + .describe( + 'METRIC SOURCES ONLY. OTel metric name (e.g. "system.cpu.utilization"). ' + + 'Required when metricType is set.', + ), + isDelta: z + .boolean() + .optional() + .describe( + 'METRIC SOURCES ONLY (gauge metrics). When true, computes the Prometheus-style ' + + 'delta over each bucket. Default false.', + ), }) .superRefine((data, ctx) => { - if (data.level && data.aggFn !== 'quantile') { - ctx.addIssue({ - code: z.ZodIssueCode.custom, - message: 'Level can only be used with quantile aggregation function', - }); - } - if (data.valueExpression && data.aggFn === 'count') { - ctx.addIssue({ - code: z.ZodIssueCode.custom, - message: - 'Value expression cannot be used with count aggregation function', - }); - } else if (!data.valueExpression && data.aggFn !== 'count') { + const narrow = { + aggFn: typeof data.aggFn === 'string' ? data.aggFn : undefined, + metricType: + typeof data.metricType === 'string' ? data.metricType : undefined, + metricName: + typeof data.metricName === 'string' ? data.metricName : undefined, + isDelta: typeof data.isDelta === 'boolean' ? data.isDelta : undefined, + level: typeof data.level === 'number' ? data.level : undefined, + valueExpression: + typeof data.valueExpression === 'string' + ? data.valueExpression + : undefined, + }; + for (const issue of getMetricSelectIssues(narrow)) { ctx.addIssue({ code: z.ZodIssueCode.custom, - message: - 'Value expression is required for non-count aggregation functions', + path: issue.path, + message: issue.message, }); } }); diff --git a/packages/api/src/mcp/tools/query/schemas.ts b/packages/api/src/mcp/tools/query/schemas.ts index 6bcdd91ab4..78809b0fa3 100644 --- a/packages/api/src/mcp/tools/query/schemas.ts +++ b/packages/api/src/mcp/tools/query/schemas.ts @@ -1,3 +1,4 @@ +import { MetricsDataType } from '@hyperdx/common-utils/dist/types'; import { z } from 'zod'; // ─── Shared description fragments ──────────────────────────────────────────── @@ -48,6 +49,10 @@ export const MCP_AGG_FN_OPTIONS = [ 'quantile', 'sum', 'none', + // 'increase' is only valid for Sum (counter) metrics. The renderer + // computes the per-bucket counter increase with reset-handling; the + // bare aggFn string maps to that behavior, not a SQL function. + 'increase', ] as const; const mcpAggFnSchema = z @@ -59,9 +64,151 @@ const mcpAggFnSchema = z ' count_distinct – unique value count (valueExpression required)\n' + ' quantile – percentile; also set level (valueExpression required)\n' + ' last_value – most recent value of a column\n' + - ' none – pass a raw expression through unchanged', + ' none – pass a raw expression through unchanged\n' + + ' increase – METRIC-ONLY (Sum/counter): per-bucket counter increase, ' + + 'reset-aware. Requires metricType:"sum" and metricName.', ); +/** + * Metric type values exposed to MCP tool callers. Restricted to the three + * kinds the renderer can translate today; summary and exponential histogram + * are intentionally excluded. + */ +export const MCP_METRIC_TYPE_OPTIONS = [ + MetricsDataType.Gauge, + MetricsDataType.Sum, + MetricsDataType.Histogram, +] as const; + +const mcpMetricTypeSchema = z + .enum(MCP_METRIC_TYPE_OPTIONS) + .describe( + 'METRIC SOURCES ONLY. OTel metric kind. Required (along with metricName) ' + + 'when querying a metric source — discover via clickstack_describe_source ' + + 'or clickstack_describe_metric.\n' + + ' gauge – instantaneous values (CPU, memory, queue depth). Use last_value/avg/min/max; set isDelta:true for Prometheus-style delta over the bucket.\n' + + ' sum – cumulative or delta counters (request counts, bytes processed). Use aggFn:"increase" for counter increase, or sum/avg on the computed rate.\n' + + ' histogram – bucketed distributions (request duration). Use aggFn:"quantile" with level for percentiles, or aggFn:"count" for total bucket count.\n' + + 'NOTE: summary and exponential histogram are not supported by the query renderer yet.', + ); + +/** + * Shared cross-field validation issues for MCP select items. Returns the + * list of Zod issues a `.superRefine` callback should emit. Kept as a pure + * function so both `mcpSelectItemSchema` (query tools) and + * `mcpTileSelectItemSchema` (dashboard tile tools) can call it inline + * without widening Zod's output type inference. + * + * Constraints enforced: + * - metricType + metricName must be set together + * - aggFn:"increase" is Sum-only + * - histogram metrics only support quantile (with level) or count + * - isDelta is Gauge-only + * - level still requires aggFn:"quantile" + * - valueExpression is required for non-count aggFns UNLESS metricType is + * set (defaults to "Value" for metric sources) + * + * Note: summary and exponential histogram metric kinds are not in the input + * enum so they cannot reach this function — they are rejected by the field + * schema itself. + */ +export function getMetricSelectIssues(data: { + aggFn?: string; + metricType?: string; + metricName?: string; + isDelta?: boolean; + level?: number; + valueExpression?: string; +}): { path: (string | number)[]; message: string }[] { + const issues: { path: (string | number)[]; message: string }[] = []; + + // metricType ↔ metricName must be set together + if (data.metricType && !data.metricName) { + issues.push({ + path: ['metricName'], + message: + 'metricName is required when metricType is set. Discover metric names ' + + 'via clickstack_list_metrics or clickstack_describe_source.', + }); + } + if (data.metricName && !data.metricType) { + issues.push({ + path: ['metricType'], + message: + 'metricType is required when metricName is set. Use one of: gauge, sum, histogram.', + }); + } + + // increase is Sum-only + if (data.aggFn === 'increase' && data.metricType !== 'sum') { + issues.push({ + path: ['aggFn'], + message: + 'aggFn "increase" is only valid for sum (counter) metrics. ' + + 'Set metricType:"sum" and metricName, or pick a different aggFn.', + }); + } + + // Histogram supports only quantile (+ level) or count today + if (data.metricType === 'histogram') { + if (data.aggFn !== 'quantile' && data.aggFn !== 'count') { + issues.push({ + path: ['aggFn'], + message: + 'Histogram metrics only support aggFn "quantile" (with level) or "count" today.', + }); + } + if (data.aggFn === 'quantile' && data.level == null) { + issues.push({ + path: ['level'], + message: + 'level is required when aggFn is "quantile" on a histogram metric. ' + + 'Use 0.5, 0.9, 0.95, or 0.99.', + }); + } + } + + // isDelta is Gauge-only + if (data.isDelta && data.metricType !== 'gauge') { + issues.push({ + path: ['isDelta'], + message: 'isDelta is only valid for gauge metrics (metricType:"gauge").', + }); + } + + // level requires aggFn:"quantile" + if (data.level != null && data.aggFn !== 'quantile') { + issues.push({ + path: ['level'], + message: 'level is only valid with aggFn:"quantile".', + }); + } + + // valueExpression rules: + // - "count" never takes a valueExpression (existing rule) + // - non-count aggFns require valueExpression UNLESS metricType is set + // (metric sources default valueExpression to "Value" in the helper) + if (data.valueExpression && data.aggFn === 'count') { + issues.push({ + path: ['valueExpression'], + message: 'valueExpression cannot be used with aggFn:"count".', + }); + } else if ( + !data.valueExpression && + data.aggFn !== 'count' && + !data.metricType + ) { + issues.push({ + path: ['valueExpression'], + message: + 'valueExpression is required for non-count aggregation functions ' + + '(or set metricType to query a metric source, which defaults valueExpression to "Value").', + }); + } + + return issues; +} + export const mcpSelectItemSchema = z.object({ aggFn: mcpAggFnSchema, valueExpression: z @@ -74,7 +221,10 @@ export const mcpSelectItemSchema = z.object({ 'Any ClickHouse expression is allowed — common useful forms: ' + '"Duration / 1e6" (ns→ms), ' + '"toFloat64OrZero(SpanAttributes[\'response.size_bytes\'])" (cast attribute), ' + - '"if(StatusCode = \'STATUS_CODE_ERROR\', 1, 0)" (boolean→numeric for ratios).', + '"if(StatusCode = \'STATUS_CODE_ERROR\', 1, 0)" (boolean→numeric for ratios).\n\n' + + 'METRIC SOURCES: optional — defaults to "Value" (the metric value column) when ' + + 'metricType/metricName are set. Set explicitly only if you want to transform the ' + + 'metric value (e.g. "Value / 1e6").', ), where: z .string() @@ -113,10 +263,87 @@ export const mcpSelectItemSchema = z.object({ .union([z.literal(0.5), z.literal(0.9), z.literal(0.95), z.literal(0.99)]) .optional() .describe( - 'Percentile level. Only applicable when aggFn is "quantile". ' + + 'Percentile level. Required when aggFn is "quantile" on a histogram metric, ' + + 'optional otherwise. ' + 'Allowed values: 0.5, 0.9, 0.95, 0.99', ), + metricType: mcpMetricTypeSchema + .optional() + .describe( + 'METRIC SOURCES ONLY. OTel metric kind: gauge, sum, or histogram. ' + + 'Required (with metricName) when sourceId is a metric source. ' + + 'Discover via clickstack_describe_source (sample) or clickstack_describe_metric.', + ), + metricName: z + .string() + .optional() + .describe( + 'METRIC SOURCES ONLY. OTel metric name (e.g. "system.cpu.utilization", ' + + '"http.server.request.duration"). Required when metricType is set. ' + + 'Discover via clickstack_list_metrics or clickstack_describe_source.', + ), + isDelta: z + .boolean() + .optional() + .describe( + 'METRIC SOURCES ONLY (gauge metrics). When true, computes the Prometheus-style ' + + 'delta over each bucket: (argMax(Value) - argMin(Value)) * bucketSecs / timeDiff. ' + + 'Use for cumulative gauges where you want to chart growth per bucket. Default false.', + ), }); +// NOTE: cross-field validation (metric rules + level + valueExpression) is +// applied imperatively in the timeseries / table tool handlers via +// `validateMetricSelectItems`. We intentionally skip `.superRefine` here +// because Zod 3.x widens optional-field types post-refine, which breaks +// strict-typed downstream consumers like `mergeWhereIntoSelectItems` and +// `resolveOrderBy`. The dashboard tile schema, whose consumers don't trip +// on the widening, keeps its own `.superRefine`. + +/** + * Concrete shape of a parsed MCP select item. Mirrors the runtime values + * produced by `mcpSelectItemSchema` — needed as an explicit type because + * Zod 3.x's structural inference of `z.object({...})` callbacks (e.g. the + * MCP SDK's tool-handler input) can widen optional-field types into + * `unknown`. Cast `input.select` to `McpSelectItem[]` at tool boundaries. + */ +export type McpSelectItem = { + aggFn: string; + valueExpression?: string; + where?: string; + whereLanguage?: 'lucene' | 'sql'; + alias?: string; + level?: number; + metricType?: 'gauge' | 'sum' | 'histogram'; + metricName?: string; + isDelta?: boolean; +}; + +/** + * Apply `getMetricSelectIssues` to every select item in a tool input. + * Returns an error-shaped tool response when any issue is detected, or + * `null` when all items pass. Call this from a tool handler before + * passing items to `runConfigTile`. + */ +export function validateMetricSelectItems( + items: ReadonlyArray, +): { isError: true; content: [{ type: 'text'; text: string }] } | null { + const errors: string[] = []; + items.forEach((item, idx) => { + for (const issue of getMetricSelectIssues(item)) { + errors.push(`select[${idx}].${issue.path.join('.')}: ${issue.message}`); + } + }); + if (errors.length === 0) return null; + return { + isError: true as const, + content: [ + { + type: 'text' as const, + text: errors.join('\n'), + }, + ], + }; +} export const startTimeSchema = z .string() diff --git a/packages/api/src/mcp/tools/query/table.ts b/packages/api/src/mcp/tools/query/table.ts index 41b7e44f0b..4fd99d8c28 100644 --- a/packages/api/src/mcp/tools/query/table.ts +++ b/packages/api/src/mcp/tools/query/table.ts @@ -13,10 +13,12 @@ import { endTimeSchema, groupBySchema, MCP_AGG_FN_OPTIONS, + McpSelectItem, mcpSelectItemSchema, orderBySchema, sourceIdSchema, startTimeSchema, + validateMetricSelectItems, whereLanguageSchema, whereSchema, } from './schemas'; @@ -61,9 +63,13 @@ const tableSchema = z.object({ // ─── orderBy resolution ────────────────────────────────────────────────────── /** Aggregation function names that ClickHouse cannot resolve as bare identifiers in ORDER BY. - * 'none' is excluded — it passes a raw expression through unchanged and has no synthesizable form. */ + * Excluded: + * - 'none' passes a raw expression through unchanged and has no synthesizable form. + * - 'increase' is a metric-only renderer marker that compiles to a multi-CTE + * sum(Rate) pipeline — there is no standalone SQL function to synthesize. + * The renderer auto-aliases the resulting column; agents should orderBy by alias. */ const AGG_FN_NAMES: ReadonlySet = new Set( - MCP_AGG_FN_OPTIONS.filter(fn => fn !== 'none'), + MCP_AGG_FN_OPTIONS.filter(fn => fn !== 'none' && fn !== 'increase'), ); /** @@ -170,7 +176,18 @@ export function registerTable(server: McpServer, context: McpContext) { 'Map attributes work in groupBy and valueExpression, including ' + "toFloat64OrZero(SpanAttributes['key']).\n\n" + 'Shape auto-upgrade: if shape is "number" or "pie" but select has >1 item, ' + - 'it is transparently upgraded to "table".', + 'it is transparently upgraded to "table".\n\n' + + '── METRIC SOURCES ──\n' + + 'When sourceId is a metric source, each select item MUST set ' + + 'metricType ("gauge"|"sum"|"histogram") and metricName (the OTel metric name). ' + + 'valueExpression defaults to "Value" — set it explicitly only to transform the value.\n' + + 'Discovery: clickstack_describe_source returns a per-kind metric-name sample; ' + + 'clickstack_list_metrics paginates the full catalog; clickstack_describe_metric ' + + 'returns attribute keys + sampled values for a single metric.\n' + + 'Per kind: gauge uses last_value/avg/min/max; sum uses aggFn:"increase" for counter increase ' + + '(top-N capped at 20 groups when combined with groupBy), or sum/avg on the rate; ' + + 'histogram uses aggFn:"quantile" + level for percentiles, or aggFn:"count" for total bucket count.\n' + + 'summary and exponential histogram kinds are not supported by the query renderer yet.', inputSchema: tableSchema, }, withToolTracing('clickstack_table', context, async input => { @@ -183,12 +200,23 @@ export function registerTable(server: McpServer, context: McpContext) { } const { startDate, endDate } = timeRange; + // Cast to the concrete `McpSelectItem[]` because Zod 3.x widens + // optional-field inference at the MCP-SDK tool boundary; the + // runtime parser still produces the correct shape. + const select = input.select as McpSelectItem[]; + + // Validate cross-field constraints (metric rules, level/quantile, + // valueExpression presence) and surface friendly errors before we + // touch ClickHouse. + const validation = validateMetricSelectItems(select); + if (validation) return validation; + // Auto-upgrade shape when select has multiple items but shape is // single-value (number/pie). This is the #1 Zod error class from agents. let displayType: 'table' | 'number' | 'pie' = input.shape; if ( (displayType === 'number' || displayType === 'pie') && - input.select.length > 1 + select.length > 1 ) { displayType = 'table'; } @@ -197,11 +225,7 @@ export function registerTable(server: McpServer, context: McpContext) { // of the aggCondition for every metric. Table/line/number/pie display // types don't have a chart-level where — filtering is per-select-item. const { items: selectItems, warnings: mergeWarnings } = - mergeWhereIntoSelectItems( - input.select, - input.where, - input.whereLanguage, - ); + mergeWhereIntoSelectItems(select, input.where, input.whereLanguage); const tile = buildTile('MCP Table', 12, 4, { displayType, diff --git a/packages/api/src/mcp/tools/query/timeseries.ts b/packages/api/src/mcp/tools/query/timeseries.ts index 283effb48a..8af6b82071 100644 --- a/packages/api/src/mcp/tools/query/timeseries.ts +++ b/packages/api/src/mcp/tools/query/timeseries.ts @@ -12,10 +12,12 @@ import { import { endTimeSchema, groupBySchema, + McpSelectItem, mcpSelectItemSchema, orderBySchema, sourceIdSchema, startTimeSchema, + validateMetricSelectItems, whereLanguageSchema, whereSchema, } from './schemas'; @@ -79,7 +81,20 @@ export function registerTimeseries(server: McpServer, context: McpContext) { 'Requires sourceId — call clickstack_list_sources then clickstack_describe_source first. ' + 'Each select item defines one plotted series.\n\n' + 'Column naming: top-level columns are PascalCase (Duration, StatusCode). ' + - "Map attributes use bracket syntax: SpanAttributes['http.method'].", + "Map attributes use bracket syntax: SpanAttributes['http.method'].\n\n" + + '── METRIC SOURCES ──\n' + + 'When sourceId is a metric source, each select item MUST set ' + + 'metricType ("gauge"|"sum"|"histogram") and metricName (the OTel metric name). ' + + 'valueExpression defaults to "Value" — set it explicitly only to transform the value.\n' + + 'Discovery: clickstack_describe_source returns a per-kind metric-name sample; ' + + 'clickstack_list_metrics paginates the full catalog; clickstack_describe_metric ' + + 'returns attribute keys + sampled values for a single metric.\n' + + 'Per kind: gauge uses last_value/avg/min/max (or aggFn:any + isDelta:true for Prometheus-style delta); ' + + 'sum uses aggFn:"increase" for the counter increase, or sum/avg on the computed rate; ' + + 'histogram uses aggFn:"quantile" + level for percentiles, or aggFn:"count" for total bucket count.\n' + + 'TOP-N CAP: aggFn:"increase" + groupBy is capped at 20 groups by the renderer ' + + '(top by max bucket sum). Narrow with where/groupBy to see other groups.\n' + + 'summary and exponential histogram kinds are not supported by the query renderer yet.', inputSchema: timeseriesSchema, }, withToolTracing('clickstack_timeseries', context, async input => { @@ -92,14 +107,21 @@ export function registerTimeseries(server: McpServer, context: McpContext) { } const { startDate, endDate } = timeRange; + // Cast to the concrete `McpSelectItem[]` because Zod 3.x widens + // optional-field inference at the MCP-SDK tool boundary; the + // runtime parser still produces the correct shape. + const select = input.select as McpSelectItem[]; + + // Validate cross-field constraints (metric rules, level/quantile, + // valueExpression presence) and surface friendly errors before we + // touch ClickHouse. + const validation = validateMetricSelectItems(select); + if (validation) return validation; + // Inject top-level where into each select item (timeseries uses // per-select-item aggCondition, not chart-level where) const { items: selectItems, warnings: mergeWarnings } = - mergeWhereIntoSelectItems( - input.select, - input.where, - input.whereLanguage, - ); + mergeWhereIntoSelectItems(select, input.where, input.whereLanguage); const tile = buildTile('MCP Timeseries', 12, 4, { displayType: input.shape, From 309aa2561b377cbe6984c8c0bf95d23207fb285a Mon Sep 17 00:00:00 2001 From: Dan Hable Date: Tue, 9 Jun 2026 16:24:02 -0500 Subject: [PATCH 02/21] [HDX-4347] feat(mcp): thread metricTables through runConfigTile builder path Make clickstack_timeseries and clickstack_table actually work against metric sources. Before this change the builder branch of runConfigTile fell back to the source's empty tableName and never threaded source.metricTables onto the chart config, so any metric query failed with 'Both table name and UUID are empty' at render time. For metric sources, the builder branch now: - Sets from.tableName = '' (renderer picks the per-kind ClickHouse table from metricTables at render time) - Adds metricTables: source.metricTables to the chart config (mirrors packages/api/src/routers/external-api/v2/charts.ts:261-267) - Defaults each select item's valueExpression to 'Value' when missing, matching the agent-friendly external-API behavior so metric queries can omit valueExpression and have it resolve correctly Also surfaces the renderer's increase+groupBy top-N cap (20 groups) to agents: - Adds annotateIncreaseTopNHint helper that attaches a neutral hint to successful, non-empty tool results when any select item uses aggFn:'increase' alongside a non-empty groupBy - Exposes INCREASE_TOP_N_CAP=20 as a named constant referencing the renderer's INCREASE_MAX_NUM_GROUPS in common-utils - Wires the hint into both clickstack_timeseries and clickstack_table Adds 9 unit tests covering the hint helper across the increase+groupBy condition, empty results, error results, missing groupBy, missing increase aggFn, and malformed/missing content envelopes. No new TypeScript errors above the pre-existing baseline on main. --- packages/api/src/mcp/__tests__/query.test.ts | 87 +++++++++++++++++++ packages/api/src/mcp/tools/query/helpers.ts | 69 ++++++++++++++- packages/api/src/mcp/tools/query/table.ts | 5 ++ .../api/src/mcp/tools/query/timeseries.ts | 5 ++ 4 files changed, 165 insertions(+), 1 deletion(-) diff --git a/packages/api/src/mcp/__tests__/query.test.ts b/packages/api/src/mcp/__tests__/query.test.ts index 7525cbb848..169f050543 100644 --- a/packages/api/src/mcp/__tests__/query.test.ts +++ b/packages/api/src/mcp/__tests__/query.test.ts @@ -7,7 +7,9 @@ jest.mock('@/utils/trimToolResponse', () => ({ })); import { + annotateIncreaseTopNHint, errorHint, + INCREASE_TOP_N_CAP, mergeWhereIntoSelectItems, parseTimeRange, } from '../tools/query/helpers'; @@ -553,3 +555,88 @@ describe('validateMetricSelectItems', () => { expect(result.content[0].text).toContain('select[0].level'); }); }); + +// ─── annotateIncreaseTopNHint ──────────────────────────────────────────────── + +function buildResult(data: unknown[]) { + return { + content: [ + { + type: 'text', + text: JSON.stringify({ result: { data } }), + }, + ], + isError: false, + }; +} + +describe('annotateIncreaseTopNHint', () => { + it('exposes the renderer cap as a constant', () => { + expect(INCREASE_TOP_N_CAP).toBe(20); + }); + + it('appends a hint when increase + groupBy is used and result is non-empty', () => { + const result = buildResult([{ x: 1 }, { x: 2 }]); + annotateIncreaseTopNHint(result, [{ aggFn: 'increase' }], 'ServiceName'); + const parsed = JSON.parse(result.content[0].text); + expect(parsed.hint).toContain('top 20'); + expect(parsed.hint).toContain('aggFn:"increase"'); + }); + + it('does NOT annotate when increase is used WITHOUT a groupBy', () => { + const result = buildResult([{ x: 1 }]); + annotateIncreaseTopNHint(result, [{ aggFn: 'increase' }], undefined); + const parsed = JSON.parse(result.content[0].text); + expect(parsed.hint).toBeUndefined(); + }); + + it('does NOT annotate when groupBy is an empty string', () => { + const result = buildResult([{ x: 1 }]); + annotateIncreaseTopNHint(result, [{ aggFn: 'increase' }], ' '); + const parsed = JSON.parse(result.content[0].text); + expect(parsed.hint).toBeUndefined(); + }); + + it('does NOT annotate when no select item uses increase', () => { + const result = buildResult([{ x: 1 }]); + annotateIncreaseTopNHint( + result, + [{ aggFn: 'sum' }, { aggFn: 'avg' }], + 'ServiceName', + ); + const parsed = JSON.parse(result.content[0].text); + expect(parsed.hint).toBeUndefined(); + }); + + it('does NOT annotate empty results (already labelled by formatQueryResult)', () => { + const result = buildResult([]); + annotateIncreaseTopNHint(result, [{ aggFn: 'increase' }], 'ServiceName'); + const parsed = JSON.parse(result.content[0].text); + expect(parsed.hint).toBeUndefined(); + }); + + it('does NOT annotate error results', () => { + const result = { + content: [{ type: 'text', text: 'an error message' }], + isError: true, + }; + annotateIncreaseTopNHint(result, [{ aggFn: 'increase' }], 'ServiceName'); + expect(result.content[0].text).toBe('an error message'); + }); + + it('leaves unparseable text content unchanged', () => { + const result = { + content: [{ type: 'text', text: 'not json' }], + isError: false, + }; + annotateIncreaseTopNHint(result, [{ aggFn: 'increase' }], 'ServiceName'); + expect(result.content[0].text).toBe('not json'); + }); + + it('is a no-op when content is missing', () => { + const result = {} as Parameters[0]; + expect(() => + annotateIncreaseTopNHint(result, [{ aggFn: 'increase' }], 'ServiceName'), + ).not.toThrow(); + }); +}); diff --git a/packages/api/src/mcp/tools/query/helpers.ts b/packages/api/src/mcp/tools/query/helpers.ts index 87f59c362c..181e7e3fa1 100644 --- a/packages/api/src/mcp/tools/query/helpers.ts +++ b/packages/api/src/mcp/tools/query/helpers.ts @@ -78,6 +78,48 @@ const MCP_CLICKHOUSE_SETTINGS = { */ const MCP_REQUEST_TIMEOUT = 32_000; // 30s query limit + 2s buffer +// ─── Increase top-N cap hint ──────────────────────────────────────────────── + +/** + * Group limit applied by the metric renderer when `aggFn:"increase"` is + * combined with `groupBy`. Mirrors `INCREASE_MAX_NUM_GROUPS` in + * `packages/common-utils/src/core/renderChartConfig.ts`. Surfaced to MCP + * callers as a hint so they can reason about why high-cardinality groupBys + * may be truncated. + */ +export const INCREASE_TOP_N_CAP = 20; + +/** + * Mutates a successful tool result envelope in place to add a neutral + * informational hint when the renderer's increase+groupBy top-N cap may + * have applied. Safe to call unconditionally — does nothing for empty + * results, error results, or queries that don't combine `aggFn:"increase"` + * with a non-empty `groupBy`. + */ +export function annotateIncreaseTopNHint( + result: { content?: { type: string; text?: string }[]; isError?: boolean }, + selectItems: ReadonlyArray<{ aggFn?: string }>, + groupBy: string | undefined, +): void { + if (result.isError) return; + const first = result.content?.[0]; + if (first?.type !== 'text' || typeof first.text !== 'string') return; + if (!groupBy || groupBy.trim() === '') return; + if (!selectItems.some(s => s.aggFn === 'increase')) return; + try { + const parsed = JSON.parse(first.text); + const data = parsed?.result?.data; + if (!Array.isArray(data) || data.length === 0) return; + parsed.hint = + `aggFn:"increase" combined with groupBy is capped at the top ${INCREASE_TOP_N_CAP} ` + + `groups by max bucket sum at the renderer layer. ` + + `Results may not include every group present in the underlying data.`; + first.text = JSON.stringify(parsed, null, 2); + } catch { + // leave result unmodified on parse failure + } +} + // ─── Where merging ─────────────────────────────────────────────────────────── export interface MergeWhereResult { @@ -298,6 +340,7 @@ export async function runConfigTile( }); const isSearch = builderConfig.displayType === DisplayType.Search; + const isMetricSource = source.kind === SourceKind.Metric; const defaultTableSelect = 'defaultTableSelectExpression' in source ? source.defaultTableSelectExpression @@ -327,13 +370,37 @@ export async function runConfigTile( } : {}; + // Metric sources need three adjustments before the renderer can + // translate the query: + // 1. `from.tableName` is blank — the renderer picks the correct + // per-kind ClickHouse table from `metricTables` at render time. + // 2. `metricTables` must be threaded onto the chart config (mirrors + // packages/api/src/routers/external-api/v2/charts.ts:261-267). + // 3. Each select item's `valueExpression` defaults to "Value" (the + // metric value column) when missing — agents normally omit it. + const metricSelectOverrides = + isMetricSource && Array.isArray(builderConfig.select) + ? { + select: builderConfig.select.map(item => + typeof item === 'string' + ? item + : { + ...item, + valueExpression: item.valueExpression || 'Value', + }, + ), + } + : {}; + const chartConfig = { ...builderConfig, ...searchOverrides, + ...metricSelectOverrides, from: { databaseName: source.from.databaseName, - tableName: source.from.tableName, + tableName: isMetricSource ? '' : source.from.tableName, }, + ...(isMetricSource && { metricTables: source.metricTables }), connection: source.connection.toString(), timestampValueExpression: source.timestampValueExpression, implicitColumnExpression: implicitColumn, diff --git a/packages/api/src/mcp/tools/query/table.ts b/packages/api/src/mcp/tools/query/table.ts index 4fd99d8c28..ad01bcf687 100644 --- a/packages/api/src/mcp/tools/query/table.ts +++ b/packages/api/src/mcp/tools/query/table.ts @@ -4,6 +4,7 @@ import { z } from 'zod'; import { withToolTracing } from '../../utils/tracing'; import type { McpContext } from '../types'; import { + annotateIncreaseTopNHint, buildTile, mergeWhereIntoSelectItems, parseTimeRange, @@ -254,6 +255,10 @@ export function registerTable(server: McpServer, context: McpContext) { } } + // Surface the increase+groupBy top-N cap so the agent knows results + // may be truncated to 20 groups. + annotateIncreaseTopNHint(result, select, input.groupBy); + return result; }), ); diff --git a/packages/api/src/mcp/tools/query/timeseries.ts b/packages/api/src/mcp/tools/query/timeseries.ts index 8af6b82071..7e63784cce 100644 --- a/packages/api/src/mcp/tools/query/timeseries.ts +++ b/packages/api/src/mcp/tools/query/timeseries.ts @@ -4,6 +4,7 @@ import { z } from 'zod'; import { withToolTracing } from '../../utils/tracing'; import type { McpContext } from '../types'; import { + annotateIncreaseTopNHint, buildTile, mergeWhereIntoSelectItems, parseTimeRange, @@ -151,6 +152,10 @@ export function registerTimeseries(server: McpServer, context: McpContext) { } } + // Surface the increase+groupBy top-N cap so the agent knows results + // may be truncated to 20 groups. + annotateIncreaseTopNHint(result, select, input.groupBy); + // Detect single-bucket collapse: when a timeseries query returns only // 1 row, the data likely collapsed into a single time bucket. Add a // hint so the agent knows to adjust the time range. From 5e38c0e0455d85705fd98135898cfca0c7569e98 Mon Sep 17 00:00:00 2001 From: Dan Hable Date: Tue, 9 Jun 2026 16:32:26 -0500 Subject: [PATCH 03/21] [HDX-4347] feat(mcp): make clickstack_describe_source metric-aware MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the early-return for metric sources at describeSource.ts:97-115 with a metric-aware discovery flow so agents can use the existing describe-then-query workflow against metric sources without falling back to clickstack_sql. For metric sources, describe_source now: - Picks a representative metric table (gauge -> sum -> histogram) via pickRepresentativeMetricTable, exposed in meta.discoveryMetricKind so agents know which kind's schema they're seeing - Reuses the existing stages 1-4 (columns, map keys, low-cardinality values, map-attribute values) against the representative table — the OTel Collector metric schemas share Attributes/ResourceAttributes/ ScopeAttributes shape, so one sample is representative enough for the starter view - Samples up to 20 distinct MetricName values per queryable kind in a new stage 5 (sampleMetricNamesForKind), reusing the discovery dateRange and respecting the existing wall-clock timeout / partial-result flag - Defensively enriches each sampled metric with MetricUnit / MetricDescription when those columns exist on the kind's table (getColumns presence check); falls back gracefully on schemas that omit them - Surfaces the new clickstack_list_metrics / clickstack_describe_metric discovery tools via nextSteps.discovery so agents can paginate or drill into per-metric attribute keys when the sample isn't enough Updates the existing locking test in sources.test.ts that asserted columns/lowCardinalityValues/etc. were undefined for metric sources; the test now asserts the rich discovery payload (columns present, discoveryMetricKind set to 'gauge', nextSteps mention the new discovery tools). No new TypeScript errors above the pre-existing baseline on main. --- .../api/src/mcp/__tests__/sources.test.ts | 27 +- .../src/mcp/tools/sources/describeSource.ts | 296 +++++++++++++++++- 2 files changed, 311 insertions(+), 12 deletions(-) diff --git a/packages/api/src/mcp/__tests__/sources.test.ts b/packages/api/src/mcp/__tests__/sources.test.ts index 008e4fe852..b4025ecc5c 100644 --- a/packages/api/src/mcp/__tests__/sources.test.ts +++ b/packages/api/src/mcp/__tests__/sources.test.ts @@ -249,7 +249,7 @@ describe('MCP Source Tools', () => { expect(output.source.keyColumns).toHaveProperty('spanId'); }); - it('should return metricTables for a metric source (no column schema)', async () => { + it('should return metricTables AND column schema for a metric source', async () => { const metricSource = await Source.create({ kind: SourceKind.Metric, team: team._id, @@ -280,13 +280,28 @@ describe('MCP Source Tools', () => { kind: SourceKind.Metric, }); - // Metric sources have metricTables but no column schema or value samples + // Metric sources surface metricTables AND run column / map-key + // discovery against the representative metric table (gauge picked + // first by pickRepresentativeMetricTable). expect(output.source.metricTables).toBeDefined(); expect(output.source.metricTables).toHaveProperty('gauge'); - expect(output.source.columns).toBeUndefined(); - expect(output.source.lowCardinalityValues).toBeUndefined(); - expect(output.source.mapAttributeKeys).toBeUndefined(); - expect(output.source.mapAttributeValues).toBeUndefined(); + expect(output.source.discoveryMetricKind).toBe('gauge'); + expect(output.source.columns).toBeDefined(); + expect(Array.isArray(output.source.columns)).toBe(true); + // The OTel Collector gauge schema includes MetricName + Value + // as native columns. + const columnNames = output.source.columns.map( + (c: { name: string }) => c.name, + ); + expect(columnNames).toContain('MetricName'); + expect(columnNames).toContain('Value'); + + // nextSteps points at the new metric discovery tools. + expect(output.nextSteps.query).toContain('metricType'); + expect(output.nextSteps.discovery).toContain('clickstack_list_metrics'); + expect(output.nextSteps.discovery).toContain( + 'clickstack_describe_metric', + ); }); it('should include usage guidance and nextSteps', async () => { diff --git a/packages/api/src/mcp/tools/sources/describeSource.ts b/packages/api/src/mcp/tools/sources/describeSource.ts index 5674c7e2cf..5b25cf8222 100644 --- a/packages/api/src/mcp/tools/sources/describeSource.ts +++ b/packages/api/src/mcp/tools/sources/describeSource.ts @@ -1,11 +1,18 @@ import { + chSql, + concatChSql, convertCHDataTypeToJSType, filterColumnMetaByType, JSDataType, + tableExpr, } from '@hyperdx/common-utils/dist/clickhouse'; import { ClickhouseClient } from '@hyperdx/common-utils/dist/clickhouse/node'; import { getMetadata } from '@hyperdx/common-utils/dist/core/metadata'; -import { SourceKind } from '@hyperdx/common-utils/dist/types'; +import { + MetricsDataType, + type MetricTable, + SourceKind, +} from '@hyperdx/common-utils/dist/types'; import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { z } from 'zod'; @@ -28,6 +35,205 @@ const MAX_LC_VALUES = 20; const MAX_MAP_KEY_VALUES = 5; const MAX_MAP_KEYS_TO_SAMPLE = 10; +// Max MetricName values returned per metric kind by the starter sample. +// clickstack_list_metrics provides paginated discovery beyond this cap. +const MAX_METRIC_NAMES_PER_KIND = 20; + +// Metric kinds the query renderer can translate today. Summary and +// ExponentialHistogram are intentionally omitted — they have schema but +// no SQL translator in the renderer yet. +const QUERYABLE_METRIC_KINDS = [ + MetricsDataType.Gauge, + MetricsDataType.Sum, + MetricsDataType.Histogram, +] as const; + +/** + * Pick the representative metric table to use as the starting point for + * schema/attribute discovery on a metric source. Prefers gauge → sum → + * histogram from the source's populated metricTables map. Returns the + * ClickHouse table name, or undefined when no queryable metric table is + * populated. + */ +function pickRepresentativeMetricTable( + metricTables: MetricTable, +): + | { kind: (typeof QUERYABLE_METRIC_KINDS)[number]; tableName: string } + | undefined { + for (const kind of QUERYABLE_METRIC_KINDS) { + const tableName = metricTables[kind]; + if (tableName) { + return { kind, tableName }; + } + } + return undefined; +} + +type MetricNameSample = { + name: string; + unit?: string; + description?: string; +}; + +/** + * Sample distinct MetricName values for a single metric kind. Optionally + * enriches each name with MetricUnit / MetricDescription when those + * columns are present on the table (the OTel Collector default schema + * includes them; custom schemas may not). + */ +async function sampleMetricNamesForKind({ + metadata, + clickhouseClient, + databaseName, + tableName, + connectionId, + dateRange, + signal, + cachedColumns, +}: { + metadata: ReturnType; + clickhouseClient: ClickhouseClient; + databaseName: string; + tableName: string; + connectionId: string; + dateRange: [Date, Date]; + signal: AbortSignal; + cachedColumns?: { name: string }[]; +}): Promise { + // Defensive column presence check for MetricUnit / MetricDescription. + const kindColumns = + cachedColumns ?? + (await metadata.getColumns({ databaseName, tableName, connectionId })); + const columnNames = new Set(kindColumns.map(c => c.name)); + const hasUnit = columnNames.has('MetricUnit'); + const hasDescription = columnNames.has('MetricDescription'); + + // First fetch the distinct metric names; this is the only step that + // strictly needs to succeed for the kind to appear in the response. + const nameResults = await metadata.getAllKeyValues({ + databaseName, + tableName, + keyExpressions: ['MetricName'], + maxValuesPerKey: MAX_METRIC_NAMES_PER_KIND, + connectionId, + dateRange, + signal, + }); + const names = nameResults[0]?.value ?? []; + if (names.length === 0) return []; + + // Best-effort enrichment with unit + description. One small query + // returns one row per metric name with the most-recent unit / desc. + let enrichments: Record = {}; + if ((hasUnit || hasDescription) && !signal.aborted) { + try { + enrichments = await fetchMetricNameEnrichments({ + clickhouseClient, + databaseName, + tableName, + connectionId, + names, + dateRange, + hasUnit, + hasDescription, + signal, + }); + } catch (e) { + logger.warn( + { databaseName, tableName, error: e }, + 'Failed to enrich metric names with unit/description', + ); + } + } + + return names.map(name => { + const enrichment = enrichments[name] ?? {}; + const sample: MetricNameSample = { name }; + if (enrichment.unit) sample.unit = enrichment.unit; + if (enrichment.description) sample.description = enrichment.description; + return sample; + }); +} + +/** + * Fetch MetricUnit and MetricDescription for a batch of metric names. + * Uses `anyLast` so the most-recent value wins when a metric has changed + * unit/description over time. + */ +async function fetchMetricNameEnrichments({ + clickhouseClient, + databaseName, + tableName, + connectionId, + names, + dateRange, + hasUnit, + hasDescription, + signal, +}: { + clickhouseClient: ClickhouseClient; + databaseName: string; + tableName: string; + connectionId: string; + names: string[]; + dateRange: [Date, Date]; + hasUnit: boolean; + hasDescription: boolean; + signal: AbortSignal; +}): Promise> { + // Build the projection fragments via the parameterised chSql DSL so + // identifiers are quoted and the unit/description columns only appear + // when present on the source table. + const projections = [ + chSql`MetricName`, + ...(hasUnit + ? [chSql`anyLast(${{ Identifier: 'MetricUnit' }}) AS MetricUnit`] + : []), + ...(hasDescription + ? [ + chSql`anyLast(${{ Identifier: 'MetricDescription' }}) AS MetricDescription`, + ] + : []), + ]; + const namePlaceholders = concatChSql( + ',', + names.map(name => chSql`${{ String: name }}`), + ); + const sql = chSql` + SELECT ${concatChSql(', ', projections)} + FROM ${tableExpr({ database: databaseName, table: tableName })} + WHERE MetricName IN (${namePlaceholders}) + AND TimeUnix >= fromUnixTimestamp64Milli(${{ Int64: dateRange[0].getTime() }}) + AND TimeUnix <= fromUnixTimestamp64Milli(${{ Int64: dateRange[1].getTime() }}) + GROUP BY MetricName + `; + + type EnrichmentRow = { + MetricName: string; + MetricUnit?: string; + MetricDescription?: string; + }; + + const response = await clickhouseClient.query<'JSON'>({ + query: sql.sql, + query_params: sql.params, + format: 'JSON', + connectionId, + abort_signal: signal, + }); + const result = (await response.json()) as { data: EnrichmentRow[] }; + + const enrichments: Record = + {}; + for (const row of result.data) { + enrichments[row.MetricName] = { + ...(row.MetricUnit ? { unit: row.MetricUnit } : {}), + ...(row.MetricDescription ? { description: row.MetricDescription } : {}), + }; + } + return enrichments; +} + /** * Core schema-discovery logic. Extracted so the caller can wrap it in * Promise.race for wall-clock timeout enforcement. @@ -72,6 +278,9 @@ async function describeSourceSchema( } // Key columns by source kind + let representativeMetric: + | { kind: (typeof QUERYABLE_METRIC_KINDS)[number]; tableName: string } + | undefined; if (source.kind === SourceKind.Trace) { meta.keyColumns = { spanName: source.spanNameExpression, @@ -91,10 +300,22 @@ async function describeSourceSchema( }; } else if (source.kind === SourceKind.Metric) { meta.metricTables = source.metricTables; + representativeMetric = pickRepresentativeMetricTable(source.metricTables); + if (representativeMetric) { + meta.discoveryMetricKind = representativeMetric.kind; + } } - // For sources without a table (e.g. metric sources), return early - if (!source.from.tableName) { + // Resolve the table name we'll use for column / map-key / value + // discovery. For non-metric sources this is just source.from.tableName. + // For metric sources we use the representative metric table picked + // above (gauge → sum → histogram). + const discoveryTableName = + source.from.tableName || representativeMetric?.tableName || ''; + + // Only early-return when there is truly no table to discover schema + // against (e.g. a metric source with no populated metric tables). + if (!discoveryTableName) { return { content: [ { @@ -103,7 +324,7 @@ async function describeSourceSchema( { source: meta, nextSteps: { - query: `Use clickstack_timeseries, clickstack_table, or clickstack_search with sourceId "${sourceId}" and the metric tables above.`, + query: `Use clickstack_timeseries, clickstack_table, or clickstack_search with sourceId "${sourceId}".`, }, }, null, @@ -137,7 +358,8 @@ async function describeSourceSchema( password: connection.password, }); const metadata = getMetadata(clickhouseClient); - const { databaseName, tableName } = source.from; + const databaseName = source.from.databaseName; + const tableName = discoveryTableName; const connectionId = source.connection.toString(); // Track which sampling stages were skipped due to timeout @@ -287,6 +509,54 @@ async function describeSourceSchema( skippedStages.push('mapAttributeValues'); } + // ── 5. Metric name + unit + description sampling ────────────────────── + // For metric sources, sample distinct MetricName values per queryable + // kind so the agent has a starter list without needing a follow-up call + // to clickstack_list_metrics for the common case (<= 20 metrics/kind). + // Defensively check for MetricUnit / MetricDescription columns: they + // exist on the standard OTel Collector schema but a custom metric table + // may not declare them. + if (source.kind === SourceKind.Metric && !signal.aborted) { + const metricNames: Record = {}; + await Promise.all( + QUERYABLE_METRIC_KINDS.map(async kind => { + const kindTableName = source.metricTables[kind]; + if (!kindTableName) return; + try { + const samples = await sampleMetricNamesForKind({ + metadata, + clickhouseClient, + databaseName, + tableName: kindTableName, + connectionId, + dateRange, + signal, + // Reuse representative columns when the kind matches the + // representative table to avoid a second getColumns round-trip. + cachedColumns: + representativeMetric?.tableName === kindTableName + ? columns + : undefined, + }); + if (samples.length > 0) { + metricNames[kind] = samples; + } + } catch (e) { + logger.warn( + { sourceId, kind, error: e }, + 'Failed to sample metric names for kind', + ); + } + }), + ); + if (signal.aborted && Object.keys(metricNames).length === 0) { + skippedStages.push('metricNames'); + } + if (Object.keys(metricNames).length > 0) { + meta.metricNames = metricNames; + } + } + // Flag partial results so the LLM knows value samples may be incomplete if (skippedStages.length > 0) { meta.partial = true; @@ -300,6 +570,14 @@ async function describeSourceSchema( : 'These are the REAL values in your data — use them in filters instead of guessing. ' + 'Example: where: "SeverityText:error" (if \'error\' appears in the sampled values above).'; + const isMetricSource = source.kind === SourceKind.Metric; + const queryNextStep = isMetricSource + ? `Use clickstack_timeseries or clickstack_table with sourceId "${sourceId}" and metricType/metricName from above.` + : `Use clickstack_timeseries, clickstack_table, or clickstack_search with sourceId "${sourceId}" and the columns/attributes above.`; + const discoveryNextStep = isMetricSource + ? `For more metric names than the sample above, call clickstack_list_metrics with sourceId "${sourceId}". For per-metric attribute keys + sampled values, call clickstack_describe_metric with sourceId and metricName.` + : undefined; + const { data: output, isTrimmed } = trimToolResponse({ source: meta, usage: { @@ -308,11 +586,17 @@ async function describeSourceSchema( mapAttributes: "Use bracket syntax: SpanAttributes['http.method'], ResourceAttributes['service.name']", lowCardinalityValues: lcValuesHint, + ...(isMetricSource && { + metricNames: + 'Each entry maps a metric kind (gauge/sum/histogram) to a sample of metric names ' + + 'available on that table. Pass metricType + metricName on each select item.', + }), }, nextSteps: { - query: `Use clickstack_timeseries, clickstack_table, or clickstack_search with sourceId "${sourceId}" and the columns/attributes above.`, + query: queryNextStep, mapAttributeAccess: "Use bracket syntax for map columns: ResourceAttributes['service.name'], SpanAttributes['http.method']", + ...(discoveryNextStep && { discovery: discoveryNextStep }), }, }); From e88f7a8e1291f50d3c4feb2f9bc769da9c5dbedd Mon Sep 17 00:00:00 2001 From: Dan Hable Date: Tue, 9 Jun 2026 16:55:47 -0500 Subject: [PATCH 04/21] [HDX-4347] feat(mcp): add clickstack_list_metrics and clickstack_describe_metric Add two new metric-specific discovery tools so agents can find and characterise metrics on a metric source without falling back to clickstack_sql or guessing OTel semantic-convention names. clickstack_list_metrics - Paginated metric-name catalog scoped to a metric source - Optional kind filter (gauge/sum/histogram); omit to scan every populated kind in order - Optional namePattern (ClickHouse ILIKE) and time window - Defaults to last 24h to surface only metrics with recent data - Opaque base64 cursor pagination over (kind, lastName) pairs so multi-kind scans resume cleanly mid-result - Default limit 50, max 500 - Defensive MetricUnit/MetricDescription enrichment using getColumns presence checks (matches the OTel Collector default schema; falls back gracefully on custom schemas) - Empty-result hint guides the agent to widen the filter or drop kind clickstack_describe_metric - Per-metric drill-down: kind(s), unit, description, attributeKeys per map column, sampled values (when sampleValues=true, default) - kind auto-detection across all populated tables when omitted, via parallel SELECT 1 probes per kind - Reuses metadata.getAllFields with metricName filter for attribute-key discovery scoped to the specific metric (existing helper at packages/common-utils/src/core/metadata.ts:736-816) - Per-kind usage guidance baked into the response (aggFn hints, level requirement for histogram quantile, 20-group cap for sum + increase) - Pre-built clickstack_timeseries call example in nextSteps.query with the right aggFn / level for the detected kind - 10s wall-clock timeout matches clickstack_describe_source Both tools reject non-Metric sources with a friendly hint pointing at clickstack_list_sources. Tool descriptions document the full discovery workflow chain: list_sources -> describe_source -> list_metrics -> describe_metric -> timeseries|table. Notable implementation detail: the queryable-kind tuple is declared as plain string literals ('gauge'|'sum'|'histogram') rather than MetricsDataType enum members because Zod 3.x's z.enum(...) infers an unknown-widened type when fed an enum-member tuple, which breaks the MCP SDK callback inference downstream. A compile-time assertion keeps the string literals in sync with the enum. No new TypeScript errors above the pre-existing baseline on main. --- .../src/mcp/tools/sources/describeMetric.ts | 664 ++++++++++++++++++ packages/api/src/mcp/tools/sources/index.ts | 4 + .../api/src/mcp/tools/sources/listMetrics.ts | 440 ++++++++++++ 3 files changed, 1108 insertions(+) create mode 100644 packages/api/src/mcp/tools/sources/describeMetric.ts create mode 100644 packages/api/src/mcp/tools/sources/listMetrics.ts diff --git a/packages/api/src/mcp/tools/sources/describeMetric.ts b/packages/api/src/mcp/tools/sources/describeMetric.ts new file mode 100644 index 0000000000..041f355cc4 --- /dev/null +++ b/packages/api/src/mcp/tools/sources/describeMetric.ts @@ -0,0 +1,664 @@ +import { + chSql, + concatChSql, + tableExpr, +} from '@hyperdx/common-utils/dist/clickhouse'; +import { ClickhouseClient } from '@hyperdx/common-utils/dist/clickhouse/node'; +import { getMetadata } from '@hyperdx/common-utils/dist/core/metadata'; +import { MetricsDataType, SourceKind } from '@hyperdx/common-utils/dist/types'; +import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; +import { z } from 'zod'; + +import { getConnectionById } from '@/controllers/connection'; +import { getSource } from '@/controllers/sources'; +import logger from '@/utils/logger'; +import { trimToolResponse } from '@/utils/trimToolResponse'; + +import { withToolTracing } from '../../utils/tracing'; +import type { McpContext } from '../types'; + +// Metric kinds the query renderer can translate today. Mirrors +// QUERYABLE_METRIC_KINDS in describeSource.ts / listMetrics.ts. +// Declared as string literals (not MetricsDataType enum members) so +// `z.enum(...)` can narrow the inferred handler input type properly — +// referencing the enum here pessimises Zod's inference to `unknown` at +// the MCP SDK callback boundary. +const QUERYABLE_METRIC_KINDS = ['gauge', 'sum', 'histogram'] as const; +type QueryableKind = (typeof QUERYABLE_METRIC_KINDS)[number]; + +// Compile-time guarantee that the string literals still match the +// MetricsDataType enum (and so are valid keys on MetricTable). +const _assertKindsMatch: readonly QueryableKind[] = [ + MetricsDataType.Gauge, + MetricsDataType.Sum, + MetricsDataType.Histogram, +]; +void _assertKindsMatch; + +const DEFAULT_LOOKBACK_MS = 24 * 60 * 60 * 1000; +const DESCRIBE_TIMEOUT_MS = 10_000; + +// Max sampled values per attribute key (when sampleValues is true). +const MAX_ATTR_VALUES = 10; +// Cap on the number of attribute keys we sample values for per kind to +// avoid runaway fan-out on high-cardinality metrics. +const MAX_ATTR_KEYS_TO_SAMPLE = 12; + +// Per-kind aggregation guidance baked into the response so the agent can +// build a valid clickstack_timeseries / clickstack_table call without +// re-reading the schemas. +const KIND_USAGE: Record = { + gauge: + 'Gauge: use aggFn:"last_value"|"avg"|"min"|"max" on Value. Set isDelta:true for Prometheus-style delta over each bucket.', + sum: 'Sum (counter): use aggFn:"increase" for the per-bucket counter increase (reset-aware), or aggFn:"sum"/"avg" on the computed rate. increase+groupBy is capped at the top 20 groups.', + histogram: + 'Histogram: use aggFn:"quantile" with level ∈ {0.5, 0.9, 0.95, 0.99} for percentiles, or aggFn:"count" for the total bucket count.', +}; + +// ─── Schema ────────────────────────────────────────────────────────────────── + +const describeMetricSchema = z.object({ + sourceId: z + .string() + .describe( + 'Source ID. Must reference a metric source — get IDs from clickstack_list_sources.', + ), + metricName: z + .string() + .min(1) + .describe( + 'OTel metric name to describe (e.g. "system.cpu.utilization", "http.server.request.duration"). ' + + 'Discover via clickstack_list_metrics or clickstack_describe_source.', + ), + kind: z + .enum(QUERYABLE_METRIC_KINDS) + .optional() + .describe( + 'Optional metric kind filter. Omit to auto-detect across every populated metric table on the source ' + + '(unusual cases where the same metric name lives in multiple kinds will return one entry per kind).', + ), + startTime: z + .string() + .optional() + .describe( + 'Restrict discovery to data points after this ISO 8601 timestamp. Default: 24 hours before endTime (or now).', + ), + endTime: z + .string() + .optional() + .describe('End of the discovery window as ISO 8601. Default: now.'), + sampleValues: z + .boolean() + .optional() + .default(true) + .describe( + 'When true (default), sample up to ' + + `${MAX_ATTR_VALUES} distinct values per attribute key for the top ${MAX_ATTR_KEYS_TO_SAMPLE} attributes. ` + + 'Set false to skip value sampling for faster responses on high-cardinality metrics.', + ), +}); + +// ─── Helpers ───────────────────────────────────────────────────────────────── + +type KindDetail = { + kind: QueryableKind; + tableName: string; + unit?: string; + description?: string; + attributeKeys: Record; + attributeValues?: Record; + usage: string; +}; + +function parseTimeRange( + startTime?: string, + endTime?: string, +): { error: string } | { startDate: Date; endDate: Date } { + const endDate = endTime ? new Date(endTime) : new Date(); + const startDate = startTime + ? new Date(startTime) + : new Date(endDate.getTime() - DEFAULT_LOOKBACK_MS); + if (isNaN(endDate.getTime()) || isNaN(startDate.getTime())) { + return { + error: 'Invalid startTime or endTime: must be valid ISO 8601 strings', + }; + } + if (startDate >= endDate) { + return { error: 'endTime must be greater than startTime' }; + } + return { startDate, endDate }; +} + +/** + * Probe each candidate kind's table for the given metric name. Returns + * only the kinds where at least one row matches. + */ +async function detectKindsForMetric({ + clickhouseClient, + databaseName, + metricTables, + connectionId, + metricName, + startDate, + endDate, + signal, +}: { + clickhouseClient: ClickhouseClient; + databaseName: string; + metricTables: Record; + connectionId: string; + metricName: string; + startDate: Date; + endDate: Date; + signal: AbortSignal; +}): Promise { + const probes = await Promise.all( + QUERYABLE_METRIC_KINDS.map(async kind => { + const tableName = metricTables[kind]; + if (!tableName) return null; + try { + const sql = chSql` + SELECT 1 + FROM ${tableExpr({ database: databaseName, table: tableName })} + WHERE MetricName = ${{ String: metricName }} + AND TimeUnix >= fromUnixTimestamp64Milli(${{ Int64: startDate.getTime() }}) + AND TimeUnix <= fromUnixTimestamp64Milli(${{ Int64: endDate.getTime() }}) + LIMIT 1 + `; + const response = await clickhouseClient.query<'JSON'>({ + query: sql.sql, + query_params: sql.params, + format: 'JSON', + connectionId, + abort_signal: signal, + }); + const result = (await response.json()) as { data: unknown[] }; + return result.data.length > 0 ? kind : null; + } catch (e) { + logger.warn( + { kind, error: e instanceof Error ? e.message : String(e) }, + 'detectKindsForMetric: probe failed', + ); + return null; + } + }), + ); + return probes.filter((k): k is QueryableKind => k !== null); +} + +/** + * Fetch unit and description for a metric name on a single kind table. + * Returns undefined values when the columns are absent from the schema. + */ +async function fetchUnitAndDescription({ + clickhouseClient, + databaseName, + tableName, + connectionId, + metricName, + startDate, + endDate, + hasUnit, + hasDescription, + signal, +}: { + clickhouseClient: ClickhouseClient; + databaseName: string; + tableName: string; + connectionId: string; + metricName: string; + startDate: Date; + endDate: Date; + hasUnit: boolean; + hasDescription: boolean; + signal: AbortSignal; +}): Promise<{ unit?: string; description?: string }> { + if (!hasUnit && !hasDescription) return {}; + const projections = [ + ...(hasUnit + ? [chSql`anyLast(${{ Identifier: 'MetricUnit' }}) AS MetricUnit`] + : []), + ...(hasDescription + ? [ + chSql`anyLast(${{ Identifier: 'MetricDescription' }}) AS MetricDescription`, + ] + : []), + ]; + const sql = chSql` + SELECT ${concatChSql(', ', projections)} + FROM ${tableExpr({ database: databaseName, table: tableName })} + WHERE MetricName = ${{ String: metricName }} + AND TimeUnix >= fromUnixTimestamp64Milli(${{ Int64: startDate.getTime() }}) + AND TimeUnix <= fromUnixTimestamp64Milli(${{ Int64: endDate.getTime() }}) + `; + try { + const response = await clickhouseClient.query<'JSON'>({ + query: sql.sql, + query_params: sql.params, + format: 'JSON', + connectionId, + abort_signal: signal, + }); + const result = (await response.json()) as { + data: Array<{ MetricUnit?: string; MetricDescription?: string }>; + }; + const row = result.data[0]; + if (!row) return {}; + return { + ...(row.MetricUnit ? { unit: row.MetricUnit } : {}), + ...(row.MetricDescription ? { description: row.MetricDescription } : {}), + }; + } catch (e) { + logger.warn( + { tableName, error: e instanceof Error ? e.message : String(e) }, + 'fetchUnitAndDescription failed', + ); + return {}; + } +} + +/** + * Sample distinct values per attribute key. Uses one composed + * groupArray-per-key query so all keys are fetched in a single round + * trip. + */ +async function sampleAttributeValues({ + clickhouseClient, + databaseName, + tableName, + connectionId, + metricName, + attributeKeys, + startDate, + endDate, + signal, +}: { + clickhouseClient: ClickhouseClient; + databaseName: string; + tableName: string; + connectionId: string; + metricName: string; + // attributeKeys is shape { mapColumn: keyName[] } e.g. { Attributes: ['http.method', 'http.route'] } + attributeKeys: Record; + startDate: Date; + endDate: Date; + signal: AbortSignal; +}): Promise> { + const flatKeyExprs: { display: string; mapColumn: string; key: string }[] = + []; + for (const [mapColumn, keys] of Object.entries(attributeKeys)) { + for (const key of keys) { + flatKeyExprs.push({ + display: `${mapColumn}['${key}']`, + mapColumn, + key, + }); + if (flatKeyExprs.length >= MAX_ATTR_KEYS_TO_SAMPLE) break; + } + if (flatKeyExprs.length >= MAX_ATTR_KEYS_TO_SAMPLE) break; + } + if (flatKeyExprs.length === 0) return {}; + + const projections = flatKeyExprs.map( + ({ mapColumn, key }, idx) => chSql` + groupUniqArray(${{ Int32: MAX_ATTR_VALUES }})( + ${{ Identifier: mapColumn }}[${{ String: key }}] + ) AS param${{ UNSAFE_RAW_SQL: String(idx) }} + `, + ); + + const sql = chSql` + SELECT ${concatChSql(', ', projections)} + FROM ${tableExpr({ database: databaseName, table: tableName })} + WHERE MetricName = ${{ String: metricName }} + AND TimeUnix >= fromUnixTimestamp64Milli(${{ Int64: startDate.getTime() }}) + AND TimeUnix <= fromUnixTimestamp64Milli(${{ Int64: endDate.getTime() }}) + `; + + try { + const response = await clickhouseClient.query<'JSON'>({ + query: sql.sql, + query_params: sql.params, + format: 'JSON', + connectionId, + abort_signal: signal, + }); + const result = (await response.json()) as { + data: Array>; + }; + const row = result.data[0]; + if (!row) return {}; + const values: Record = {}; + flatKeyExprs.forEach((meta, idx) => { + const sample = (row[`param${idx}`] ?? []).filter(v => v !== ''); + if (sample.length > 0) { + values[meta.display] = sample; + } + }); + return values; + } catch (e) { + logger.warn( + { tableName, error: e instanceof Error ? e.message : String(e) }, + 'sampleAttributeValues failed', + ); + return {}; + } +} + +async function describeMetricImpl( + teamId: string, + input: z.infer, + signal: AbortSignal, +) { + const source = await getSource(teamId, input.sourceId); + if (!source) { + return { + isError: true, + content: [ + { + type: 'text' as const, + text: `Source "${input.sourceId}" not found. Call clickstack_list_sources to see available source IDs.`, + }, + ], + }; + } + if (source.kind !== SourceKind.Metric) { + return { + isError: true, + content: [ + { + type: 'text' as const, + text: `Source "${input.sourceId}" is a "${source.kind}" source, not a metric source. clickstack_describe_metric only works on metric sources.`, + }, + ], + }; + } + + const timeRange = parseTimeRange(input.startTime, input.endTime); + if ('error' in timeRange) { + return { + isError: true, + content: [{ type: 'text' as const, text: timeRange.error }], + }; + } + const { startDate, endDate } = timeRange; + + const connection = await getConnectionById( + teamId, + source.connection.toString(), + true, + ); + if (!connection) { + return { + isError: true, + content: [ + { + type: 'text' as const, + text: `Connection not found for source "${input.sourceId}".`, + }, + ], + }; + } + + const clickhouseClient = new ClickhouseClient({ + host: connection.host, + username: connection.username, + password: connection.password, + }); + const metadata = getMetadata(clickhouseClient); + const databaseName = source.from.databaseName; + const connectionId = source.connection.toString(); + + // Resolve which kinds to describe. With an explicit `kind`, restrict to + // that one. Without, probe each populated kind so the agent gets one + // entry per match (rare but possible for shared metric names). + let candidateKinds: QueryableKind[]; + if (input.kind) { + if (!source.metricTables[input.kind]) { + return { + isError: true, + content: [ + { + type: 'text' as const, + text: `Source "${input.sourceId}" has no "${input.kind}" metric table populated. Populated kinds: ${Object.keys(source.metricTables).join(', ')}.`, + }, + ], + }; + } + candidateKinds = [input.kind]; + } else { + candidateKinds = await detectKindsForMetric({ + clickhouseClient, + databaseName, + metricTables: source.metricTables, + connectionId, + metricName: input.metricName, + startDate, + endDate, + signal, + }); + if (candidateKinds.length === 0) { + return { + content: [ + { + type: 'text' as const, + text: JSON.stringify( + { + metricName: input.metricName, + kinds: [], + hint: + `No data found for MetricName "${input.metricName}" in any populated metric table ` + + `between ${startDate.toISOString()} and ${endDate.toISOString()}. ` + + 'Try widening startTime/endTime, or call clickstack_list_metrics to confirm the metric name exists.', + }, + null, + 2, + ), + }, + ], + }; + } + } + + const kindDetails: KindDetail[] = []; + const skippedStages: string[] = []; + + for (const kind of candidateKinds) { + if (signal.aborted) { + skippedStages.push(`kind:${kind}`); + continue; + } + const tableName = source.metricTables[kind]; + if (!tableName) continue; + + // Defensive column-presence check before referencing MetricUnit / + // MetricDescription. + let columns: { name: string }[]; + try { + columns = await metadata.getColumns({ + databaseName, + tableName, + connectionId, + }); + } catch (e) { + logger.warn( + { kind, error: e instanceof Error ? e.message : String(e) }, + 'describeMetric: getColumns failed', + ); + continue; + } + const columnNames = new Set(columns.map(c => c.name)); + const hasUnit = columnNames.has('MetricUnit'); + const hasDescription = columnNames.has('MetricDescription'); + + const [meta, allFields] = await Promise.all([ + fetchUnitAndDescription({ + clickhouseClient, + databaseName, + tableName, + connectionId, + metricName: input.metricName, + startDate, + endDate, + hasUnit, + hasDescription, + signal, + }), + metadata + .getAllFields({ + databaseName, + tableName, + connectionId, + metricName: input.metricName, + dateRange: [startDate, endDate], + }) + .catch(e => { + logger.warn( + { kind, error: e instanceof Error ? e.message : String(e) }, + 'describeMetric: getAllFields failed', + ); + return []; + }), + ]); + + // Group fields by map column (Attributes / ResourceAttributes / + // ScopeAttributes). Native columns are skipped — the agent already + // knows the metric value column is `Value`. + const attributeKeys: Record = {}; + for (const field of allFields) { + if (field.path.length < 2) continue; + const mapColumn = field.path[0]; + const keyName = field.path[1]; + if (!attributeKeys[mapColumn]) attributeKeys[mapColumn] = []; + attributeKeys[mapColumn].push(keyName); + } + + const detail: KindDetail = { + kind, + tableName, + ...(meta.unit ? { unit: meta.unit } : {}), + ...(meta.description ? { description: meta.description } : {}), + attributeKeys, + usage: KIND_USAGE[kind], + }; + + if (input.sampleValues && Object.keys(attributeKeys).length > 0) { + detail.attributeValues = await sampleAttributeValues({ + clickhouseClient, + databaseName, + tableName, + connectionId, + metricName: input.metricName, + attributeKeys, + startDate, + endDate, + signal, + }); + } + + kindDetails.push(detail); + } + + const exampleKind = kindDetails[0]?.kind; + const queryExample = exampleKind + ? `clickstack_timeseries({ sourceId: "${input.sourceId}", select: [{ aggFn: ${ + exampleKind === 'sum' + ? '"increase"' + : exampleKind === 'histogram' + ? '"quantile", level: 0.95' + : '"avg"' + }, metricType: "${exampleKind}", metricName: "${input.metricName}" }] })` + : undefined; + + const responseObj: Record = { + metricName: input.metricName, + kinds: kindDetails, + ...(skippedStages.length > 0 && { partial: true, skippedStages }), + nextSteps: { + query: queryExample + ? `Example: ${queryExample}` + : `Use clickstack_timeseries / clickstack_table with sourceId "${input.sourceId}" plus the metricType + metricName + attribute keys above.`, + }, + }; + + const { data: trimmed, isTrimmed } = trimToolResponse(responseObj); + return { + content: [ + { + type: 'text' as const, + text: JSON.stringify( + isTrimmed + ? { + ...trimmed, + note: 'Result was trimmed for context size. Set sampleValues:false or narrow startTime/endTime.', + } + : trimmed, + null, + 2, + ), + }, + ], + }; +} + +// ─── Tool registration ─────────────────────────────────────────────────────── + +export function registerDescribeMetric( + server: McpServer, + context: McpContext, +): void { + const { teamId } = context; + + server.registerTool( + 'clickstack_describe_metric', + { + title: 'Describe Metric', + description: + 'DRILL-DOWN: Use after clickstack_list_metrics (or after a clickstack_describe_source ' + + 'sample) to get attribute keys, sampled values, kind, unit, and description for a ' + + 'specific metric. Attribute keys vary per metric name — not per source — so always call ' + + "this before clickstack_timeseries / clickstack_table for any metric you've never queried.\n\n" + + 'Workflow: clickstack_list_sources → clickstack_list_metrics → ' + + 'clickstack_describe_metric → clickstack_timeseries|clickstack_table.', + inputSchema: describeMetricSchema, + }, + withToolTracing('clickstack_describe_metric', context, async rawInput => { + // Re-parse explicitly: the MCP SDK callback signature widens + // optional-field types into `unknown`, but the parser produces + // the typed shape we need for downstream calls. + const input = describeMetricSchema.parse(rawInput); + const controller = new AbortController(); + const timeoutPromise = new Promise((_, reject) => { + setTimeout(() => { + controller.abort(); + reject(new Error('DESCRIBE_METRIC_TIMEOUT')); + }, DESCRIBE_TIMEOUT_MS); + }); + try { + return await Promise.race([ + describeMetricImpl(teamId.toString(), input, controller.signal), + timeoutPromise, + ]); + } catch (e) { + if (e instanceof Error && e.message === 'DESCRIBE_METRIC_TIMEOUT') { + logger.warn( + { teamId, sourceId: input.sourceId, metricName: input.metricName }, + 'clickstack_describe_metric timed out', + ); + return { + isError: true as const, + content: [ + { + type: 'text' as const, + text: + 'Discovery timed out. The metric table may be under load or have very high cardinality. ' + + 'Try again, narrow startTime/endTime, or set sampleValues:false.', + }, + ], + }; + } + throw e; + } + }), + ); +} diff --git a/packages/api/src/mcp/tools/sources/index.ts b/packages/api/src/mcp/tools/sources/index.ts index 250e4abd91..5d43d1d455 100644 --- a/packages/api/src/mcp/tools/sources/index.ts +++ b/packages/api/src/mcp/tools/sources/index.ts @@ -1,7 +1,9 @@ import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import type { McpContext, ToolDefinition } from '../types'; +import { registerDescribeMetric } from './describeMetric'; import { registerDescribeSource } from './describeSource'; +import { registerListMetrics } from './listMetrics'; import { registerListSources } from './listSources'; const sourcesTools: ToolDefinition = ( @@ -10,6 +12,8 @@ const sourcesTools: ToolDefinition = ( ) => { registerListSources(server, context); registerDescribeSource(server, context); + registerListMetrics(server, context); + registerDescribeMetric(server, context); }; export default sourcesTools; diff --git a/packages/api/src/mcp/tools/sources/listMetrics.ts b/packages/api/src/mcp/tools/sources/listMetrics.ts new file mode 100644 index 0000000000..f184fc9dff --- /dev/null +++ b/packages/api/src/mcp/tools/sources/listMetrics.ts @@ -0,0 +1,440 @@ +import { + chSql, + concatChSql, + tableExpr, +} from '@hyperdx/common-utils/dist/clickhouse'; +import { ClickhouseClient } from '@hyperdx/common-utils/dist/clickhouse/node'; +import { getMetadata } from '@hyperdx/common-utils/dist/core/metadata'; +import { MetricsDataType, SourceKind } from '@hyperdx/common-utils/dist/types'; +import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; +import ms from 'ms'; +import { z } from 'zod'; + +import { getConnectionById } from '@/controllers/connection'; +import { getSource } from '@/controllers/sources'; +import logger from '@/utils/logger'; + +import { withToolTracing } from '../../utils/tracing'; +import type { McpContext } from '../types'; + +// Metric kinds the query renderer can translate today. Mirrors +// QUERYABLE_METRIC_KINDS in describeSource.ts. Declared as string +// literals (not MetricsDataType enum members) so `z.enum(...)` can +// narrow the inferred handler input type properly — referencing the +// enum here pessimises Zod's inference to `unknown` at the MCP SDK +// callback boundary. +const QUERYABLE_METRIC_KINDS = ['gauge', 'sum', 'histogram'] as const; +type QueryableKind = (typeof QUERYABLE_METRIC_KINDS)[number]; + +// Compile-time guarantee that the string literals still match the +// MetricsDataType enum (and so are valid keys on MetricTable). +const _assertKindsMatch: readonly QueryableKind[] = [ + MetricsDataType.Gauge, + MetricsDataType.Sum, + MetricsDataType.Histogram, +]; +void _assertKindsMatch; + +const DEFAULT_LIMIT = 50; +const MAX_LIMIT = 500; +const DEFAULT_LOOKBACK_MS = 24 * 60 * 60 * 1000; + +// ─── Cursor ────────────────────────────────────────────────────────────────── + +type CursorPayload = { kind: QueryableKind; lastName: string }; + +function encodeCursor(payload: CursorPayload): string { + return Buffer.from(JSON.stringify(payload), 'utf8').toString('base64'); +} + +function decodeCursor(raw: string): CursorPayload | null { + try { + const decoded = Buffer.from(raw, 'base64').toString('utf8'); + const parsed = JSON.parse(decoded); + if ( + typeof parsed === 'object' && + parsed !== null && + typeof parsed.kind === 'string' && + typeof parsed.lastName === 'string' && + QUERYABLE_METRIC_KINDS.includes(parsed.kind as QueryableKind) + ) { + return { kind: parsed.kind as QueryableKind, lastName: parsed.lastName }; + } + } catch { + // fall through + } + return null; +} + +// ─── Schema ────────────────────────────────────────────────────────────────── + +const listMetricsSchema = z.object({ + sourceId: z + .string() + .describe( + 'Source ID. Must reference a metric source — get IDs from clickstack_list_sources.', + ), + kind: z + .enum(QUERYABLE_METRIC_KINDS) + .optional() + .describe( + 'Optional metric kind filter. Omit to scan every populated kind on the source ' + + '(gauge → sum → histogram). Set to narrow results to one kind.', + ), + namePattern: z + .string() + .optional() + .describe( + 'Optional ClickHouse ILIKE pattern applied to MetricName server-side. ' + + 'Use % as the wildcard. Examples: "system.cpu.%", "%duration%", "http.server.%".', + ), + startTime: z + .string() + .optional() + .describe( + 'Restrict to metrics with data points after this ISO 8601 timestamp. ' + + 'Default: 24 hours before endTime (or now).', + ), + endTime: z + .string() + .optional() + .describe('End of the time window as ISO 8601. Default: now.'), + limit: z + .number() + .int() + .min(1) + .max(MAX_LIMIT) + .optional() + .default(DEFAULT_LIMIT) + .describe( + `Max metrics returned per page. Default ${DEFAULT_LIMIT}, max ${MAX_LIMIT}.`, + ), + cursor: z + .string() + .optional() + .describe( + 'Opaque pagination cursor returned by a previous call as `nextCursor`. ' + + 'Pass it back unchanged to get the next page.', + ), +}); + +// ─── Helpers ───────────────────────────────────────────────────────────────── + +type MetricEntry = { + name: string; + kind: QueryableKind; + unit?: string; + description?: string; +}; + +function parseTimeRange( + startTime?: string, + endTime?: string, +): { error: string } | { startDate: Date; endDate: Date } { + const endDate = endTime ? new Date(endTime) : new Date(); + const startDate = startTime + ? new Date(startTime) + : new Date(endDate.getTime() - DEFAULT_LOOKBACK_MS); + if (isNaN(endDate.getTime()) || isNaN(startDate.getTime())) { + return { + error: 'Invalid startTime or endTime: must be valid ISO 8601 strings', + }; + } + if (startDate >= endDate) { + return { error: 'endTime must be greater than startTime' }; + } + return { startDate, endDate }; +} + +async function fetchMetricsForKind({ + clickhouseClient, + metadata, + kind, + databaseName, + tableName, + connectionId, + startDate, + endDate, + namePattern, + afterName, + limit, +}: { + clickhouseClient: ClickhouseClient; + metadata: ReturnType; + kind: QueryableKind; + databaseName: string; + tableName: string; + connectionId: string; + startDate: Date; + endDate: Date; + namePattern: string | undefined; + afterName: string | undefined; + limit: number; +}): Promise { + // Defensive column-presence check so we don't reference MetricUnit / + // MetricDescription on non-OTel-default schemas. + const columns = await metadata.getColumns({ + databaseName, + tableName, + connectionId, + }); + const columnNames = new Set(columns.map(c => c.name)); + const hasUnit = columnNames.has('MetricUnit'); + const hasDescription = columnNames.has('MetricDescription'); + + const projections = [ + chSql`MetricName`, + ...(hasUnit + ? [chSql`anyLast(${{ Identifier: 'MetricUnit' }}) AS MetricUnit`] + : []), + ...(hasDescription + ? [ + chSql`anyLast(${{ Identifier: 'MetricDescription' }}) AS MetricDescription`, + ] + : []), + ]; + + const whereParts = [ + chSql`TimeUnix >= fromUnixTimestamp64Milli(${{ Int64: startDate.getTime() }})`, + chSql`TimeUnix <= fromUnixTimestamp64Milli(${{ Int64: endDate.getTime() }})`, + ...(afterName !== undefined + ? [chSql`MetricName > ${{ String: afterName }}`] + : []), + ...(namePattern + ? [chSql`MetricName ILIKE ${{ String: namePattern }}`] + : []), + ]; + + const sql = chSql` + SELECT ${concatChSql(', ', projections)} + FROM ${tableExpr({ database: databaseName, table: tableName })} + WHERE ${concatChSql(' AND ', whereParts)} + GROUP BY MetricName + ORDER BY MetricName ASC + LIMIT ${{ Int32: limit }} + `; + + type Row = { + MetricName: string; + MetricUnit?: string; + MetricDescription?: string; + }; + + const response = await clickhouseClient.query<'JSON'>({ + query: sql.sql, + query_params: sql.params, + format: 'JSON', + connectionId, + }); + const result = (await response.json()) as { data: Row[] }; + return result.data.map(row => { + const entry: MetricEntry = { name: row.MetricName, kind }; + if (row.MetricUnit) entry.unit = row.MetricUnit; + if (row.MetricDescription) entry.description = row.MetricDescription; + return entry; + }); +} + +// ─── Tool registration ─────────────────────────────────────────────────────── + +export function registerListMetrics( + server: McpServer, + context: McpContext, +): void { + const { teamId } = context; + + server.registerTool( + 'clickstack_list_metrics', + { + title: 'List Metric Names', + description: + 'DISCOVERY: Use this after clickstack_describe_source when you need more metric ' + + 'names than the per-kind sample shows, or when you want to narrow by ' + + 'kind / name pattern / time window. ' + + 'Returns paginated metric names per kind (gauge/sum/histogram) ' + + 'with optional unit and description (when the OTel-default columns are present). ' + + 'Pass the returned `nextCursor` back unchanged to fetch the next page.\n\n' + + 'Workflow: clickstack_list_sources → clickstack_describe_source → ' + + 'clickstack_list_metrics → clickstack_describe_metric → ' + + 'clickstack_timeseries|clickstack_table.', + inputSchema: listMetricsSchema, + }, + withToolTracing('clickstack_list_metrics', context, async rawInput => { + // Re-parse explicitly: the MCP SDK callback signature widens + // optional-field types into `unknown`, but the parser produces + // the typed shape we need for downstream calls. + const input: z.infer = + listMetricsSchema.parse(rawInput); + const source = await getSource(teamId.toString(), input.sourceId); + if (!source) { + return { + isError: true, + content: [ + { + type: 'text' as const, + text: `Source "${input.sourceId}" not found. Call clickstack_list_sources to see available source IDs.`, + }, + ], + }; + } + if (source.kind !== SourceKind.Metric) { + return { + isError: true, + content: [ + { + type: 'text' as const, + text: `Source "${input.sourceId}" is a "${source.kind}" source, not a metric source. clickstack_list_metrics only works on metric sources — call clickstack_list_sources to find one whose kind is "metric".`, + }, + ], + }; + } + + const timeRange = parseTimeRange(input.startTime, input.endTime); + if ('error' in timeRange) { + return { + isError: true, + content: [{ type: 'text' as const, text: timeRange.error }], + }; + } + const { startDate, endDate } = timeRange; + + // Decode cursor; reject silently and start over if malformed so a + // truncated or tampered cursor does not surface internals. + const cursor = input.cursor ? decodeCursor(input.cursor) : null; + if (input.cursor && !cursor) { + return { + isError: true, + content: [ + { + type: 'text' as const, + text: 'Invalid cursor. Omit cursor to start over, or pass the exact `nextCursor` value returned by a previous call.', + }, + ], + }; + } + + // Resolve which kinds to scan, in order. When a cursor is set, + // skip kinds before the cursor's kind (already returned) and start + // the cursor's kind at the lastName-exclusive position. + const requestedKinds: QueryableKind[] = input.kind + ? [input.kind] + : QUERYABLE_METRIC_KINDS.filter(k => Boolean(source.metricTables[k])); + const startKindIdx = cursor ? requestedKinds.indexOf(cursor.kind) : 0; + if (startKindIdx < 0) { + // Cursor points at a kind that's not in scope for this call — + // safer to error than silently skip. + return { + isError: true, + content: [ + { + type: 'text' as const, + text: `Cursor references kind "${cursor!.kind}" but that kind is not in scope for this call. Drop the kind filter or pass a matching cursor.`, + }, + ], + }; + } + + const connection = await getConnectionById( + teamId.toString(), + source.connection.toString(), + true, + ); + if (!connection) { + return { + isError: true, + content: [ + { + type: 'text' as const, + text: `Connection not found for source "${input.sourceId}".`, + }, + ], + }; + } + + const clickhouseClient = new ClickhouseClient({ + host: connection.host, + username: connection.username, + password: connection.password, + }); + const metadata = getMetadata(clickhouseClient); + + const limit = input.limit ?? DEFAULT_LIMIT; + const databaseName = source.from.databaseName; + + const metrics: MetricEntry[] = []; + let nextCursor: string | undefined; + for (let i = startKindIdx; i < requestedKinds.length; i++) { + const kind = requestedKinds[i]; + const tableName = source.metricTables[kind]; + if (!tableName) continue; + const afterName = + cursor && cursor.kind === kind && i === startKindIdx + ? cursor.lastName + : undefined; + const remaining = limit - metrics.length; + if (remaining <= 0) break; + // Fetch one extra row so we can detect more-data-available. + let kindMetrics: MetricEntry[]; + try { + kindMetrics = await fetchMetricsForKind({ + clickhouseClient, + metadata, + kind, + databaseName, + tableName, + connectionId: source.connection.toString(), + startDate, + endDate, + namePattern: input.namePattern, + afterName, + limit: remaining + 1, + }); + } catch (e) { + logger.warn( + { + sourceId: input.sourceId, + kind, + error: e instanceof Error ? e.message : String(e), + }, + 'Failed to list metrics for kind', + ); + continue; + } + if (kindMetrics.length > remaining) { + // We hit the cap for this kind; emit cursor pointing at the + // last returned name and stop iterating further kinds. + const truncated = kindMetrics.slice(0, remaining); + metrics.push(...truncated); + nextCursor = encodeCursor({ + kind, + lastName: truncated[truncated.length - 1].name, + }); + break; + } + metrics.push(...kindMetrics); + } + + const responseObj: Record = { + metrics, + ...(nextCursor && { nextCursor }), + ...(metrics.length === 0 && { + hint: + 'No metrics matched. Try widening the time window (startTime/endTime), ' + + 'removing the namePattern filter, or omitting `kind` to scan every populated metric table.', + }), + usage: + 'Pass `metricType` + `metricName` from each entry to clickstack_timeseries / clickstack_table. ' + + 'For per-metric attribute keys and sampled values, call clickstack_describe_metric.', + }; + + return { + content: [ + { + type: 'text' as const, + text: JSON.stringify(responseObj, null, 2), + }, + ], + }; + }), + ); +} From 52268e68f377a0d02a22472b86fcb1825acc3205 Mon Sep 17 00:00:00 2001 From: Dan Hable Date: Tue, 9 Jun 2026 16:59:33 -0500 Subject: [PATCH 05/21] [HDX-4347] feat(mcp): replace metric-tile workaround with positive guidance in prompt The clickstack_save_dashboard prompt previously told the model to fall back to raw SQL tiles for any metric source because the MCP select-item shape did not carry metricType / metricName and builder tiles failed at render time with 'Both table name and UUID are empty'. Stages 1-3b fixed both gaps. Update the prompt and its regression tests to teach the new discovery + builder workflow. prompts/dashboards/content.ts - Replace the 'NOTE: Authoring builder tiles on a metric source is not reliable today...' paragraph with a dedicated '== METRIC SOURCES ==' section that documents: * the required select-item fields (metricType, metricName) and the valueExpression='Value' default * per-kind aggregation guidance for gauge / sum / histogram including the histogram quantile + level requirement and the 20-group cap on sum + increase + groupBy * the five-step discovery workflow (list_sources -> describe_source -> list_metrics -> describe_metric -> timeseries|table) * one worked example per supported kind using real OTel metric names - Drop 'metric tiles with multiple metricTables' and 'builder tiles on metric sources' from the validation-known-gaps paragraphs at lines 127 and 1247 since both gaps are closed. __tests__/dashboards/prompts.test.ts - Rewrite the two regression tests that asserted the old workaround language. The new tests assert the positive guidance is present (metricType / metricName / isDelta fields are named, per-kind aggFn rules and 20-group cap are documented, the five-step workflow chain is enumerated, one worked example per kind appears) AND assert the old workaround language is gone (no 'Authoring builder tiles on a metric source is not reliable', no 'Both table name and UUID are empty'). --- .../mcp/__tests__/dashboards/prompts.test.ts | 78 +++++++++++++------ .../api/src/mcp/prompts/dashboards/content.ts | 28 ++++++- 2 files changed, 78 insertions(+), 28 deletions(-) diff --git a/packages/api/src/mcp/__tests__/dashboards/prompts.test.ts b/packages/api/src/mcp/__tests__/dashboards/prompts.test.ts index 5bb16e6113..b2a30d7439 100644 --- a/packages/api/src/mcp/__tests__/dashboards/prompts.test.ts +++ b/packages/api/src/mcp/__tests__/dashboards/prompts.test.ts @@ -50,25 +50,39 @@ describe('MCP Dashboard Prompts', () => { expect(numberFormat.toLowerCase()).toContain('per-series'); }); - it('declares the MCP metric-authoring gap rather than referencing fields that do not exist', () => { - // The MCP select-item schema does not carry metricName / metricType, - // so any prompt that tells the model to author a metric tile via - // the builder path is teaching it to ship JSON that gets silently - // stripped. Guard with an explicit assertion so a future diff that - // restores metric-tile guidance fails this test loudly. + it('documents the metric-source builder support with the discovery workflow', () => { + // Builder tiles on a metric source now work via the metricType + + // metricName + isDelta fields on each select item, with metricTables + // threaded through runConfigTile's builder branch. The prompt has to + // teach the model the discovery workflow (list_sources -> describe + // _source -> list_metrics -> describe_metric -> timeseries|table) + // and the per-kind aggFn rules so it doesn't fall back to raw SQL. const prompt = buildQueryGuidePrompt(); - expect(prompt).not.toMatch(/metricName \+ metricType/); - expect(prompt).not.toMatch(/exactly 1 select item with metricName/); - const constraintsIdx = prompt.indexOf('== PER-TILE TYPE CONSTRAINTS =='); - const constraintsBody = prompt.slice(constraintsIdx); - // The constraints section closes with an explicit note that builder - // tiles on a metric source are not reliable today, with a fallback - // recipe to raw SQL. Anchor on the phrase so a future diff that - // drops the gap-acknowledgement fails loudly. - expect(constraintsBody).toMatch( + const metricsIdx = prompt.indexOf('== METRIC SOURCES =='); + expect(metricsIdx).toBeGreaterThan(-1); + const metricsBody = prompt.slice( + metricsIdx, + prompt.indexOf('\n== ', metricsIdx + 1), + ); + // The four supported metric select fields are named. + expect(metricsBody).toContain('metricType'); + expect(metricsBody).toContain('metricName'); + expect(metricsBody).toContain('isDelta'); + // Per-kind aggregation guidance is present. + expect(metricsBody).toMatch(/gauge\s+Use aggFn:"last_value"/); + expect(metricsBody).toMatch(/sum\s+Use aggFn:"increase"/); + expect(metricsBody).toMatch(/histogram\s+Use aggFn:"quantile"/); + // The 20-group cap on increase + groupBy is documented. + expect(metricsBody).toMatch(/top 20 groups/); + // The four-tool discovery chain is documented in order. + expect(metricsBody).toContain('clickstack_describe_source'); + expect(metricsBody).toContain('clickstack_list_metrics'); + expect(metricsBody).toContain('clickstack_describe_metric'); + // The old "use raw SQL for metric tiles" workaround language is gone. + expect(prompt).not.toMatch( /Authoring builder tiles on a metric source is not reliable/, ); - expect(constraintsBody).toMatch(/MCP select-item shape does not carry/); + expect(prompt).not.toMatch(/Both table name and UUID are empty/); }); it('documents table-tile onClick linking features', () => { @@ -227,16 +241,30 @@ describe('MCP Dashboard Prompts', () => { ); }); - it('flags the metric source builder gap', () => { - // Builder tiles on metric sources currently save but render with - // "Both table name and UUID are empty". Claude went 100% raw SQL - // across 21 metric tiles for this reason; the prompt has to make - // the workaround obvious so the model doesn't try the builder - // first and fail silently. + it('walks the metric discovery workflow end-to-end with worked examples', () => { + // Metric source builder tiles now work — the prompt teaches the + // model how to find, characterise, and chart a metric without + // falling through to raw SQL. The examples cover one tile per + // supported metric kind so the pattern is unambiguous. const prompt = buildQueryGuidePrompt(); - expect(prompt).toMatch(/Both table name and UUID are empty/); - expect(prompt).toMatch(/use a raw SQL tile/); - expect(prompt).toMatch(/otel_metrics_gauge/); + const metricsIdx = prompt.indexOf('== METRIC SOURCES =='); + const metricsBody = prompt.slice( + metricsIdx, + prompt.indexOf('\n== ', metricsIdx + 1), + ); + // The five-step discovery workflow is enumerated. + expect(metricsBody).toMatch(/clickstack_list_sources/); + expect(metricsBody).toMatch(/clickstack_describe_source/); + expect(metricsBody).toMatch(/clickstack_list_metrics/); + expect(metricsBody).toMatch(/clickstack_describe_metric/); + expect(metricsBody).toMatch(/clickstack_timeseries/); + // One worked example per supported kind, each using a real OTel + // metric name so the agent has a concrete template. + expect(metricsBody).toContain('system.cpu.utilization'); + expect(metricsBody).toContain('http.server.request.count'); + expect(metricsBody).toContain('http.server.request.duration'); + // valueExpression default is documented. + expect(metricsBody).toMatch(/valueExpression defaults to "Value"/); }); it('contains no em-dashes or en-dashes used as em-dashes', () => { diff --git a/packages/api/src/mcp/prompts/dashboards/content.ts b/packages/api/src/mcp/prompts/dashboards/content.ts index a79fde71ab..3293c26611 100644 --- a/packages/api/src/mcp/prompts/dashboards/content.ts +++ b/packages/api/src/mcp/prompts/dashboards/content.ts @@ -124,7 +124,7 @@ Apply these before calling clickstack_save_dashboard. Each rule is enforced by t ] Use tabs when one container needs to show different views of the same data (Throughput / Latency / Errors over time, for instance); the tab bar appears when a container has two or more tabs declared. -12. VALIDATE EVERY TILE AFTER SAVE. After clickstack_save_dashboard, call clickstack_query_tile on EVERY tile (not just one). Save validates input shape; it does NOT validate query semantics. Some queries pass save and fail at render time (known gaps: Lucene comparison/wildcard on map attributes, metric tiles with multiple metricTables, malformed having clauses). If query_tile returns an error, fix the tile and re-save before declaring the dashboard ready. +12. VALIDATE EVERY TILE AFTER SAVE. After clickstack_save_dashboard, call clickstack_query_tile on EVERY tile (not just one). Save validates input shape; it does NOT validate query semantics. Some queries pass save and fail at render time (known gaps: Lucene comparison/wildcard on map attributes, malformed having clauses). If query_tile returns an error, fix the tile and re-save before declaring the dashboard ready. 13. NO TITLE-RECAP MARKDOWN TILE. The dashboard's name shows in the title bar. Adding a markdown tile with the dashboard name (or a "About this dashboard" header) doubles the title and eats a row of vertical space because markdown heading styles render at title-bar scale. Skip the markdown tile entirely on starter dashboards. @@ -958,7 +958,29 @@ For configType: "sql" tiles, write ClickHouse SQL with template macros: search No select items (select is a column list string). where is the filter. markdown No select items. Set markdown field with content. -NOTE: Authoring builder tiles on a metric source is not reliable today. The MCP select-item shape does not carry the metricName / metricType fields the metric query path needs, and a save with a metric sourceId may render in the UI as "Both table name and UUID are empty" even though the save itself succeeded. For metrics, use a raw SQL tile (configType: "sql") with explicit table reference. The standard tables that back a metric source are otel_metrics_gauge, otel_metrics_sum, and otel_metrics_histogram; clickstack_list_sources returns the metric source's metricTables map so you know which table holds which metric kind. Discovery: metric source schemas today do NOT publish mapAttributeKeys for ResourceAttributes / Attributes the way log and trace sources do, so attribute keys must be discovered by sampling (SELECT DISTINCT mapKeys(Attributes) FROM ...). +== METRIC SOURCES == + +Builder tiles work on metric sources. Each select item on a metric tile MUST set metricType ("gauge" | "sum" | "histogram") and metricName (the OTel metric name, e.g. "system.cpu.utilization"). valueExpression defaults to "Value" when omitted, so a typical metric series is { aggFn: "", metricType: "", metricName: "" }. summary and "exponential histogram" kinds are not yet supported by the renderer. + +Per-kind aggregation guidance: + gauge Use aggFn:"last_value" | "avg" | "min" | "max". Set isDelta:true for Prometheus-style delta over each bucket. + sum Use aggFn:"increase" for the per-bucket counter increase (reset-aware), or aggFn:"sum" | "avg" on the computed rate. increase + groupBy is capped at the top 20 groups by the renderer; pre-filter via where or pick a coarser groupBy when you need broader coverage. + histogram Use aggFn:"quantile" with level ∈ {0.5, 0.9, 0.95, 0.99} for percentiles, or aggFn:"count" for the total bucket count. quantile without level is rejected. + +Discovery workflow for metrics: + 1. clickstack_list_sources: find the metric source ID and its metricTables map (which kinds are populated). + 2. clickstack_describe_source(sourceId): returns columns, attribute keys, low-cardinality values, AND a per-kind metric-name sample (up to 20 names per kind). + 3. clickstack_list_metrics(sourceId, ...): paginate the full metric catalog with optional kind + namePattern (ILIKE) + time-window filters. Pass nextCursor unchanged for the next page. + 4. clickstack_describe_metric(sourceId, metricName): drill into a single metric: kind(s), unit, description, attribute keys per map column, and sampled values per attribute. Attribute keys vary per metric, not per source, so always call this before authoring tiles for a metric you've never queried. + 5. clickstack_timeseries | clickstack_table: author the chart. Set metricType + metricName on each select item; pass the discovered attribute keys via groupBy / where. + +Examples: + Gauge p95-like spread by service (use last_value or avg): + { aggFn: "avg", metricType: "gauge", metricName: "system.cpu.utilization" }, groupBy: "ServiceName" + Sum counter increase: + { aggFn: "increase", metricType: "sum", metricName: "http.server.request.count", alias: "Requests" }, groupBy: "ServiceName" + Histogram p95 latency: + { aggFn: "quantile", level: 0.95, metricType: "histogram", metricName: "http.server.request.duration", alias: "P95 Latency" }, groupBy: "ServiceName" == NUMBER FORMAT == @@ -1244,7 +1266,7 @@ Example: find top patterns for production services over the last 4 hours: The dashboard filter applies globally; tiles do not need the literal. On mixed-source dashboards where only some tiles carry the column, add appliesToSourceIds: ["", ...] to scope the filter to just those sources instead of breaking the unrelated tiles. 8. Forgetting to validate tiles after saving - Always call clickstack_query_tile on EVERY tile after clickstack_save_dashboard, not just one. Save validates input shape but not query semantics. Several known gaps (Lucene comparison/wildcard on map attributes, builder tiles on metric sources, malformed having) pass save and fail at render time. A dashboard with one bad tile renders the whole page in a degraded state; the user sees "Error loading chart" with no way to know which tile broke unless you validated. If query_tile returns an error, fix the where / SQL and re-save before declaring the dashboard ready. + Always call clickstack_query_tile on EVERY tile after clickstack_save_dashboard, not just one. Save validates input shape but not query semantics. Several known gaps (Lucene comparison/wildcard on map attributes, malformed having) pass save and fail at render time. A dashboard with one bad tile renders the whole page in a degraded state; the user sees "Error loading chart" with no way to know which tile broke unless you validated. If query_tile returns an error, fix the where / SQL and re-save before declaring the dashboard ready. 9. Using connectionId with builder tiles, or omitting connectionId or sourceId on a single-table SQL tile Builder tiles (line, table, etc.) use sourceId (no connectionId). From 2687af065e9757195fb358cfe5febbaf83257738 Mon Sep 17 00:00:00 2001 From: Dan Hable Date: Tue, 9 Jun 2026 17:04:44 -0500 Subject: [PATCH 06/21] [HDX-4347] test(mcp): add metric-source coverage across query, sources, cursor Adds unit and integration coverage for the metric pieces shipped in Stages 1-3b. listMetricsCursor.test.ts (NEW, unit) - 12 tests for encodeCursor / decodeCursor round-trips and rejection cases. Exercises malformed base64, malformed JSON, missing fields, wrong types, non-queryable kinds (summary, exponential histogram), empty input, and unicode metric names. Exports cursor helpers from listMetrics.ts as @internal so they can be tested without spinning up ClickHouse. sources.test.ts (integration; expanded) - clickstack_list_metrics: * rejects non-metric sources and unknown source IDs * returns metrics across every populated kind when kind is omitted * narrows to a single kind when kind is set * applies namePattern as a server-side ILIKE filter * paginates via opaque nextCursor and resumes cleanly * rejects malformed cursors with an actionable error * surfaces the empty-result hint - clickstack_describe_metric: * rejects non-metric sources * returns an actionable hint when the metric is unknown * auto-detects kind for a gauge metric, returns attribute keys + sampled values + matching usage example * respects sampleValues:false * returns the increase-shaped query example for sum kind * returns the quantile + level example for histogram kind * rejects an explicit kind when the source has no table for it queryTool.test.ts (integration; expanded) - clickstack_timeseries and clickstack_table: * gauge avg over time * sum + aggFn:'increase' table * histogram + aggFn:'quantile' + level:0.95 timeseries - Schema rejection paths: * aggFn:'increase' on a gauge -> friendly schema error * aggFn:'quantile' on a histogram without level * aggFn:'avg' on a histogram (not supported) * metricType set without metricName * isDelta on a non-gauge metric All unit tests pass (103 total: 63 query + 12 cursor + 28 prompts). Integration tests require make dev-int with the standard Docker stack to run end-to-end (they rely on bulkInsertMetricsGauge/Sum/Histogram seeding). No new TypeScript errors above the pre-existing baseline on main. --- .../mcp/__tests__/listMetricsCursor.test.ts | 104 +++++ .../api/src/mcp/__tests__/queryTool.test.ts | 228 ++++++++++- .../api/src/mcp/__tests__/sources.test.ts | 366 ++++++++++++++++++ .../api/src/mcp/tools/sources/listMetrics.ts | 13 +- 4 files changed, 706 insertions(+), 5 deletions(-) create mode 100644 packages/api/src/mcp/__tests__/listMetricsCursor.test.ts diff --git a/packages/api/src/mcp/__tests__/listMetricsCursor.test.ts b/packages/api/src/mcp/__tests__/listMetricsCursor.test.ts new file mode 100644 index 0000000000..77947a5dc1 --- /dev/null +++ b/packages/api/src/mcp/__tests__/listMetricsCursor.test.ts @@ -0,0 +1,104 @@ +// Mock heavy dependencies that break in unit-test context (no ClickHouse/Mongo) +jest.mock('@/models/source', () => ({})); +jest.mock('@/controllers/sources', () => ({})); +jest.mock('@/controllers/connection', () => ({})); +jest.mock('@/utils/trimToolResponse', () => ({ + trimToolResponse: (data: unknown) => ({ data, isTrimmed: false }), +})); + +import { decodeCursor, encodeCursor } from '../tools/sources/listMetrics'; + +describe('listMetrics cursor', () => { + describe('encodeCursor / decodeCursor round-trip', () => { + it('round-trips a gauge cursor', () => { + const payload = { kind: 'gauge' as const, lastName: 'system.cpu.idle' }; + const encoded = encodeCursor(payload); + expect(encoded).toMatch(/^[A-Za-z0-9+/]+=*$/); // base64 + expect(decodeCursor(encoded)).toEqual(payload); + }); + + it('round-trips a sum cursor', () => { + const payload = { + kind: 'sum' as const, + lastName: 'http.server.request.count', + }; + expect(decodeCursor(encodeCursor(payload))).toEqual(payload); + }); + + it('round-trips a histogram cursor', () => { + const payload = { + kind: 'histogram' as const, + lastName: 'http.server.request.duration', + }; + expect(decodeCursor(encodeCursor(payload))).toEqual(payload); + }); + + it('round-trips metric names with dots, dashes, and unicode', () => { + const payload = { + kind: 'gauge' as const, + lastName: 'system.cpu.utilization-µ.naïve', + }; + expect(decodeCursor(encodeCursor(payload))).toEqual(payload); + }); + }); + + describe('decodeCursor rejection cases', () => { + it('returns null for non-base64 input', () => { + expect(decodeCursor('not-base64!')).toBeNull(); + }); + + it('returns null for base64 of invalid JSON', () => { + const garbage = Buffer.from('not json').toString('base64'); + expect(decodeCursor(garbage)).toBeNull(); + }); + + it('returns null when kind is missing', () => { + const malformed = Buffer.from(JSON.stringify({ lastName: 'x' })).toString( + 'base64', + ); + expect(decodeCursor(malformed)).toBeNull(); + }); + + it('returns null when lastName is missing', () => { + const malformed = Buffer.from(JSON.stringify({ kind: 'gauge' })).toString( + 'base64', + ); + expect(decodeCursor(malformed)).toBeNull(); + }); + + it('returns null when kind is not a queryable metric kind', () => { + const summaryCursor = Buffer.from( + JSON.stringify({ kind: 'summary', lastName: 'x' }), + ).toString('base64'); + expect(decodeCursor(summaryCursor)).toBeNull(); + + const expHistCursor = Buffer.from( + JSON.stringify({ kind: 'exponential histogram', lastName: 'x' }), + ).toString('base64'); + expect(decodeCursor(expHistCursor)).toBeNull(); + + const bogusCursor = Buffer.from( + JSON.stringify({ kind: 'bogus', lastName: 'x' }), + ).toString('base64'); + expect(decodeCursor(bogusCursor)).toBeNull(); + }); + + it('returns null when kind has the wrong type', () => { + const malformed = Buffer.from( + JSON.stringify({ kind: 1, lastName: 'x' }), + ).toString('base64'); + expect(decodeCursor(malformed)).toBeNull(); + }); + + it('returns null when lastName has the wrong type', () => { + const malformed = Buffer.from( + JSON.stringify({ kind: 'gauge', lastName: 42 }), + ).toString('base64'); + expect(decodeCursor(malformed)).toBeNull(); + }); + + it('returns null for empty string', () => { + expect(decodeCursor('')).toBeNull(); + }); + }); +}); diff --git a/packages/api/src/mcp/__tests__/queryTool.test.ts b/packages/api/src/mcp/__tests__/queryTool.test.ts index 725e55047c..175411e181 100644 --- a/packages/api/src/mcp/__tests__/queryTool.test.ts +++ b/packages/api/src/mcp/__tests__/queryTool.test.ts @@ -1,10 +1,13 @@ import { ClickhouseClient } from '@hyperdx/common-utils/dist/clickhouse/node'; -import { SourceKind } from '@hyperdx/common-utils/dist/types'; +import { MetricsDataType, SourceKind } from '@hyperdx/common-utils/dist/types'; import { Client } from '@modelcontextprotocol/sdk/client/index.js'; import * as config from '@/config'; import { bulkInsertLogs, + bulkInsertMetricsGauge, + bulkInsertMetricsHistogram, + bulkInsertMetricsSum, DEFAULT_DATABASE, DEFAULT_LOGS_TABLE, DEFAULT_TRACES_TABLE, @@ -802,4 +805,227 @@ describe('MCP Query Tools', () => { }); }); }); + + // ── Metric source query coverage ───────────────────────────────────────── + + describe('Metric sources (timeseries + table)', () => { + const createMetricSource = () => + Source.create({ + kind: SourceKind.Metric, + team: team._id, + from: { databaseName: DEFAULT_DATABASE, tableName: '' }, + metricTables: { + [MetricsDataType.Gauge.toLowerCase()]: 'otel_metrics_gauge', + [MetricsDataType.Sum.toLowerCase()]: 'otel_metrics_sum', + [MetricsDataType.Histogram.toLowerCase()]: 'otel_metrics_histogram', + }, + timestampValueExpression: 'TimeUnix', + connection: connection._id, + name: 'Metrics', + }); + + const seedGauge = async (now: Date) => { + await bulkInsertMetricsGauge([ + { + MetricName: 'system.cpu.utilization', + ResourceAttributes: { 'service.name': 'svc-a' }, + ServiceName: 'svc-a', + TimeUnix: now, + Value: 0.42, + }, + { + MetricName: 'system.cpu.utilization', + ResourceAttributes: { 'service.name': 'svc-a' }, + ServiceName: 'svc-a', + TimeUnix: new Date(now.getTime() + 1000), + Value: 0.61, + }, + ]); + }; + + const seedSum = async (now: Date) => { + await bulkInsertMetricsSum([ + { + MetricName: 'http.server.request.count', + AggregationTemporality: 1, // DELTA + IsMonotonic: true, + ResourceAttributes: { 'service.name': 'svc-a' }, + ServiceName: 'svc-a', + TimeUnix: now, + Value: 100, + }, + { + MetricName: 'http.server.request.count', + AggregationTemporality: 1, + IsMonotonic: true, + ResourceAttributes: { 'service.name': 'svc-a' }, + ServiceName: 'svc-a', + TimeUnix: new Date(now.getTime() + 1000), + Value: 25, + }, + ]); + }; + + const seedHistogram = async (now: Date) => { + await bulkInsertMetricsHistogram([ + { + MetricName: 'http.server.request.duration', + ResourceAttributes: { 'service.name': 'svc-a' }, + TimeUnix: now, + BucketCounts: [1, 2, 3, 4], + ExplicitBounds: [10, 100, 1000], + AggregationTemporality: 1, + }, + ]); + }; + + it('runs clickstack_timeseries against a gauge metric source', async () => { + const metricSource = await createMetricSource(); + const now = new Date(); + await seedGauge(now); + + const result = await callTool(client, 'clickstack_timeseries', { + sourceId: metricSource._id.toString(), + select: [ + { + aggFn: 'avg', + metricType: 'gauge', + metricName: 'system.cpu.utilization', + alias: 'cpu', + }, + ], + startTime: new Date(now.getTime() - 60_000).toISOString(), + endTime: new Date(now.getTime() + 60_000).toISOString(), + granularity: '1 minute', + }); + + expect(result.isError).toBeFalsy(); + // No "Both table name and UUID are empty" error from the renderer. + expect(getFirstText(result)).not.toMatch(/UUID are empty/); + }); + + it('runs clickstack_table aggFn:"increase" against a sum metric source', async () => { + const metricSource = await createMetricSource(); + const now = new Date(); + await seedSum(now); + + const result = await callTool(client, 'clickstack_table', { + sourceId: metricSource._id.toString(), + select: [ + { + aggFn: 'increase', + metricType: 'sum', + metricName: 'http.server.request.count', + alias: 'requests', + }, + ], + startTime: new Date(now.getTime() - 60_000).toISOString(), + endTime: new Date(now.getTime() + 60_000).toISOString(), + }); + + expect(result.isError).toBeFalsy(); + expect(getFirstText(result)).not.toMatch(/UUID are empty/); + }); + + it('runs clickstack_timeseries quantile against a histogram metric source', async () => { + const metricSource = await createMetricSource(); + const now = new Date(); + await seedHistogram(now); + + const result = await callTool(client, 'clickstack_timeseries', { + sourceId: metricSource._id.toString(), + select: [ + { + aggFn: 'quantile', + level: 0.95, + metricType: 'histogram', + metricName: 'http.server.request.duration', + alias: 'p95', + }, + ], + startTime: new Date(now.getTime() - 60_000).toISOString(), + endTime: new Date(now.getTime() + 60_000).toISOString(), + granularity: '1 minute', + }); + + expect(result.isError).toBeFalsy(); + }); + + it('rejects aggFn:"increase" on a gauge select item with a friendly schema error', async () => { + const metricSource = await createMetricSource(); + const result = await callTool(client, 'clickstack_timeseries', { + sourceId: metricSource._id.toString(), + select: [ + { + aggFn: 'increase', + metricType: 'gauge', + metricName: 'system.cpu.utilization', + }, + ], + }); + expect(result.isError).toBe(true); + expect(getFirstText(result)).toMatch(/"increase" is only valid for sum/); + }); + + it('rejects aggFn:"quantile" on a histogram without level', async () => { + const metricSource = await createMetricSource(); + const result = await callTool(client, 'clickstack_timeseries', { + sourceId: metricSource._id.toString(), + select: [ + { + aggFn: 'quantile', + metricType: 'histogram', + metricName: 'http.server.request.duration', + }, + ], + }); + expect(result.isError).toBe(true); + expect(getFirstText(result)).toMatch(/level is required/); + }); + + it('rejects aggFn:"avg" on a histogram select item', async () => { + const metricSource = await createMetricSource(); + const result = await callTool(client, 'clickstack_timeseries', { + sourceId: metricSource._id.toString(), + select: [ + { + aggFn: 'avg', + metricType: 'histogram', + metricName: 'http.server.request.duration', + }, + ], + }); + expect(result.isError).toBe(true); + expect(getFirstText(result)).toMatch( + /Histogram metrics only support aggFn/, + ); + }); + + it('rejects metricType without metricName', async () => { + const metricSource = await createMetricSource(); + const result = await callTool(client, 'clickstack_timeseries', { + sourceId: metricSource._id.toString(), + select: [{ aggFn: 'avg', metricType: 'gauge' }], + }); + expect(result.isError).toBe(true); + expect(getFirstText(result)).toMatch(/metricName is required/); + }); + + it('rejects isDelta on a non-gauge metric', async () => { + const metricSource = await createMetricSource(); + const result = await callTool(client, 'clickstack_timeseries', { + sourceId: metricSource._id.toString(), + select: [ + { + aggFn: 'increase', + metricType: 'sum', + metricName: 'http.server.request.count', + isDelta: true, + }, + ], + }); + expect(result.isError).toBe(true); + expect(getFirstText(result)).toMatch(/isDelta is only valid for gauge/); + }); + }); }); diff --git a/packages/api/src/mcp/__tests__/sources.test.ts b/packages/api/src/mcp/__tests__/sources.test.ts index b4025ecc5c..4aff66c4a2 100644 --- a/packages/api/src/mcp/__tests__/sources.test.ts +++ b/packages/api/src/mcp/__tests__/sources.test.ts @@ -3,6 +3,9 @@ import { Client } from '@modelcontextprotocol/sdk/client/index.js'; import * as config from '@/config'; import { + bulkInsertMetricsGauge, + bulkInsertMetricsHistogram, + bulkInsertMetricsSum, DEFAULT_DATABASE, DEFAULT_LOGS_TABLE, DEFAULT_TRACES_TABLE, @@ -353,4 +356,367 @@ describe('MCP Source Tools', () => { expect(getFirstText(result)).toContain('not found'); }); }); + + // ── clickstack_list_metrics ────────────────────────────────────────────── + + describe('clickstack_list_metrics', () => { + const createMetricSource = () => + Source.create({ + kind: SourceKind.Metric, + team: team._id, + from: { databaseName: DEFAULT_DATABASE, tableName: '' }, + metricTables: { + [MetricsDataType.Gauge.toLowerCase()]: 'otel_metrics_gauge', + [MetricsDataType.Sum.toLowerCase()]: 'otel_metrics_sum', + [MetricsDataType.Histogram.toLowerCase()]: 'otel_metrics_histogram', + }, + timestampValueExpression: 'TimeUnix', + connection: connection._id, + name: 'Metrics', + }); + + const seedMetricNames = async () => { + const now = new Date(); + await bulkInsertMetricsGauge([ + { + MetricName: 'system.cpu.utilization', + ResourceAttributes: { 'service.name': 'svc-a' }, + ServiceName: 'svc-a', + TimeUnix: now, + Value: 0.42, + }, + { + MetricName: 'system.memory.usage', + ResourceAttributes: { 'service.name': 'svc-a' }, + ServiceName: 'svc-a', + TimeUnix: now, + Value: 12345, + }, + ]); + await bulkInsertMetricsSum([ + { + MetricName: 'http.server.request.count', + AggregationTemporality: 1, + IsMonotonic: true, + ResourceAttributes: { 'service.name': 'svc-a' }, + ServiceName: 'svc-a', + TimeUnix: now, + Value: 100, + }, + ]); + await bulkInsertMetricsHistogram([ + { + MetricName: 'http.server.request.duration', + ResourceAttributes: { 'service.name': 'svc-a' }, + TimeUnix: now, + BucketCounts: [1, 2, 3], + ExplicitBounds: [10, 100, 1000], + AggregationTemporality: 1, + }, + ]); + }; + + it('rejects non-metric sources with a friendly error', async () => { + const result = await callTool(client, 'clickstack_list_metrics', { + sourceId: traceSource._id.toString(), + }); + expect(result.isError).toBe(true); + expect(getFirstText(result)).toContain('not a metric source'); + }); + + it('returns 404 for unknown source IDs', async () => { + const result = await callTool(client, 'clickstack_list_metrics', { + sourceId: '000000000000000000000000', + }); + expect(result.isError).toBe(true); + expect(getFirstText(result)).toContain('not found'); + }); + + it('returns metrics across all populated kinds when kind is omitted', async () => { + const metricSource = await createMetricSource(); + await seedMetricNames(); + + const result = await callTool(client, 'clickstack_list_metrics', { + sourceId: metricSource._id.toString(), + }); + + expect(result.isError).toBeFalsy(); + const output = JSON.parse(getFirstText(result)); + const namesByKind: Record = {}; + for (const entry of output.metrics ?? []) { + if (!namesByKind[entry.kind]) namesByKind[entry.kind] = []; + namesByKind[entry.kind].push(entry.name); + } + expect(namesByKind.gauge ?? []).toEqual( + expect.arrayContaining([ + 'system.cpu.utilization', + 'system.memory.usage', + ]), + ); + expect(namesByKind.sum ?? []).toEqual( + expect.arrayContaining(['http.server.request.count']), + ); + expect(namesByKind.histogram ?? []).toEqual( + expect.arrayContaining(['http.server.request.duration']), + ); + }); + + it('restricts results to a single kind when kind is set', async () => { + const metricSource = await createMetricSource(); + await seedMetricNames(); + + const result = await callTool(client, 'clickstack_list_metrics', { + sourceId: metricSource._id.toString(), + kind: 'gauge', + }); + expect(result.isError).toBeFalsy(); + const output = JSON.parse(getFirstText(result)); + for (const entry of output.metrics ?? []) { + expect(entry.kind).toBe('gauge'); + } + }); + + it('applies namePattern as a server-side ILIKE filter', async () => { + const metricSource = await createMetricSource(); + await seedMetricNames(); + + const result = await callTool(client, 'clickstack_list_metrics', { + sourceId: metricSource._id.toString(), + namePattern: 'system.cpu.%', + }); + const output = JSON.parse(getFirstText(result)); + for (const entry of output.metrics ?? []) { + expect(entry.name).toMatch(/^system\.cpu\./); + } + }); + + it('paginates via opaque nextCursor and resumes cleanly', async () => { + const metricSource = await createMetricSource(); + await seedMetricNames(); + + // First page: only ask for one entry so the cap is forced even with + // a small seed; sanity-check the cursor round-trip. + const first = await callTool(client, 'clickstack_list_metrics', { + sourceId: metricSource._id.toString(), + kind: 'gauge', + limit: 1, + }); + const firstOutput = JSON.parse(getFirstText(first)); + expect(firstOutput.metrics.length).toBe(1); + expect(typeof firstOutput.nextCursor).toBe('string'); + + const second = await callTool(client, 'clickstack_list_metrics', { + sourceId: metricSource._id.toString(), + kind: 'gauge', + limit: 5, + cursor: firstOutput.nextCursor, + }); + expect(second.isError).toBeFalsy(); + const secondOutput = JSON.parse(getFirstText(second)); + // The second page must not repeat the first page's last name. + const firstNames = new Set( + (firstOutput.metrics as { name: string }[]).map(m => m.name), + ); + for (const entry of secondOutput.metrics as { name: string }[]) { + expect(firstNames.has(entry.name)).toBe(false); + } + }); + + it('rejects a malformed cursor with an actionable error', async () => { + const metricSource = await createMetricSource(); + const result = await callTool(client, 'clickstack_list_metrics', { + sourceId: metricSource._id.toString(), + cursor: 'not-base64!', + }); + expect(result.isError).toBe(true); + expect(getFirstText(result)).toMatch(/Invalid cursor/); + }); + + it('returns an empty-result hint when nothing matches', async () => { + const metricSource = await createMetricSource(); + const result = await callTool(client, 'clickstack_list_metrics', { + sourceId: metricSource._id.toString(), + namePattern: 'this.name.does.not.exist.%', + }); + expect(result.isError).toBeFalsy(); + const output = JSON.parse(getFirstText(result)); + expect(output.metrics).toEqual([]); + expect(output.hint).toMatch(/widening|removing|omitting/); + }); + }); + + // ── clickstack_describe_metric ─────────────────────────────────────────── + + describe('clickstack_describe_metric', () => { + const createMetricSource = () => + Source.create({ + kind: SourceKind.Metric, + team: team._id, + from: { databaseName: DEFAULT_DATABASE, tableName: '' }, + metricTables: { + [MetricsDataType.Gauge.toLowerCase()]: 'otel_metrics_gauge', + [MetricsDataType.Sum.toLowerCase()]: 'otel_metrics_sum', + [MetricsDataType.Histogram.toLowerCase()]: 'otel_metrics_histogram', + }, + timestampValueExpression: 'TimeUnix', + connection: connection._id, + name: 'Metrics', + }); + + it('rejects non-metric sources', async () => { + const result = await callTool(client, 'clickstack_describe_metric', { + sourceId: traceSource._id.toString(), + metricName: 'system.cpu.utilization', + }); + expect(result.isError).toBe(true); + expect(getFirstText(result)).toContain('not a metric source'); + }); + + it('returns an actionable empty payload when the metric is unknown', async () => { + const metricSource = await createMetricSource(); + const result = await callTool(client, 'clickstack_describe_metric', { + sourceId: metricSource._id.toString(), + metricName: 'definitely.not.a.metric', + }); + expect(result.isError).toBeFalsy(); + const output = JSON.parse(getFirstText(result)); + expect(output.kinds).toEqual([]); + expect(output.hint).toMatch(/No data found/); + }); + + it('auto-detects the kind and returns attribute keys for a gauge metric', async () => { + const metricSource = await createMetricSource(); + const now = new Date(); + await bulkInsertMetricsGauge([ + { + MetricName: 'system.cpu.utilization', + ResourceAttributes: { 'service.name': 'auto-detect-svc' }, + ServiceName: 'auto-detect-svc', + TimeUnix: now, + Value: 0.5, + }, + ]); + + const result = await callTool(client, 'clickstack_describe_metric', { + sourceId: metricSource._id.toString(), + metricName: 'system.cpu.utilization', + }); + expect(result.isError).toBeFalsy(); + const output = JSON.parse(getFirstText(result)); + expect(output.metricName).toBe('system.cpu.utilization'); + expect(output.kinds.length).toBe(1); + const detail = output.kinds[0]; + expect(detail.kind).toBe('gauge'); + expect(detail.usage).toContain('aggFn'); + expect(detail.attributeKeys).toBeDefined(); + // ResourceAttributes['service.name'] should land in the keys map + // under ResourceAttributes. + expect(detail.attributeKeys.ResourceAttributes ?? []).toEqual( + expect.arrayContaining(['service.name']), + ); + // Attribute values should be sampled by default. + expect(detail.attributeValues).toBeDefined(); + // nextSteps.query carries a worked example matching the detected kind. + expect(output.nextSteps.query).toContain('metricType: "gauge"'); + }); + + it('skips value sampling when sampleValues is false', async () => { + const metricSource = await createMetricSource(); + const now = new Date(); + await bulkInsertMetricsGauge([ + { + MetricName: 'system.cpu.utilization', + ResourceAttributes: { 'service.name': 'no-sample-svc' }, + ServiceName: 'no-sample-svc', + TimeUnix: now, + Value: 0.5, + }, + ]); + + const result = await callTool(client, 'clickstack_describe_metric', { + sourceId: metricSource._id.toString(), + metricName: 'system.cpu.utilization', + sampleValues: false, + }); + expect(result.isError).toBeFalsy(); + const output = JSON.parse(getFirstText(result)); + expect(output.kinds[0].attributeValues).toBeUndefined(); + }); + + it('returns the correct usage example for a sum metric', async () => { + const metricSource = await createMetricSource(); + const now = new Date(); + await bulkInsertMetricsSum([ + { + MetricName: 'http.server.request.count', + AggregationTemporality: 1, + IsMonotonic: true, + ResourceAttributes: { 'service.name': 'sum-svc' }, + ServiceName: 'sum-svc', + TimeUnix: now, + Value: 42, + }, + ]); + + const result = await callTool(client, 'clickstack_describe_metric', { + sourceId: metricSource._id.toString(), + metricName: 'http.server.request.count', + kind: 'sum', + }); + expect(result.isError).toBeFalsy(); + const output = JSON.parse(getFirstText(result)); + expect(output.kinds[0].kind).toBe('sum'); + expect(output.kinds[0].usage).toContain('increase'); + expect(output.nextSteps.query).toContain('"increase"'); + }); + + it('returns the quantile + level example for a histogram metric', async () => { + const metricSource = await createMetricSource(); + const now = new Date(); + await bulkInsertMetricsHistogram([ + { + MetricName: 'http.server.request.duration', + ResourceAttributes: { 'service.name': 'hist-svc' }, + TimeUnix: now, + BucketCounts: [1, 2, 3], + ExplicitBounds: [10, 100, 1000], + AggregationTemporality: 1, + }, + ]); + + const result = await callTool(client, 'clickstack_describe_metric', { + sourceId: metricSource._id.toString(), + metricName: 'http.server.request.duration', + kind: 'histogram', + }); + expect(result.isError).toBeFalsy(); + const output = JSON.parse(getFirstText(result)); + expect(output.kinds[0].kind).toBe('histogram'); + expect(output.kinds[0].usage).toContain('quantile'); + expect(output.nextSteps.query).toContain('"quantile"'); + expect(output.nextSteps.query).toContain('level: 0.95'); + }); + + it('rejects explicit kind when the source has no table for that kind', async () => { + // Source with only gauge populated. + const gaugeOnly = await Source.create({ + kind: SourceKind.Metric, + team: team._id, + from: { databaseName: DEFAULT_DATABASE, tableName: '' }, + metricTables: { + [MetricsDataType.Gauge.toLowerCase()]: 'otel_metrics_gauge', + }, + timestampValueExpression: 'TimeUnix', + connection: connection._id, + name: 'GaugeOnly', + }); + const result = await callTool(client, 'clickstack_describe_metric', { + sourceId: gaugeOnly._id.toString(), + metricName: 'whatever', + kind: 'sum', + }); + expect(result.isError).toBe(true); + expect(getFirstText(result)).toMatch(/no "sum" metric table/); + }); + }); }); diff --git a/packages/api/src/mcp/tools/sources/listMetrics.ts b/packages/api/src/mcp/tools/sources/listMetrics.ts index f184fc9dff..8aca793a28 100644 --- a/packages/api/src/mcp/tools/sources/listMetrics.ts +++ b/packages/api/src/mcp/tools/sources/listMetrics.ts @@ -41,13 +41,18 @@ const DEFAULT_LOOKBACK_MS = 24 * 60 * 60 * 1000; // ─── Cursor ────────────────────────────────────────────────────────────────── -type CursorPayload = { kind: QueryableKind; lastName: string }; +export type ListMetricsCursorPayload = { + kind: QueryableKind; + lastName: string; +}; -function encodeCursor(payload: CursorPayload): string { +/** @internal Exported for testing. */ +export function encodeCursor(payload: ListMetricsCursorPayload): string { return Buffer.from(JSON.stringify(payload), 'utf8').toString('base64'); } -function decodeCursor(raw: string): CursorPayload | null { +/** @internal Exported for testing. */ +export function decodeCursor(raw: string): ListMetricsCursorPayload | null { try { const decoded = Buffer.from(raw, 'base64').toString('utf8'); const parsed = JSON.parse(decoded); @@ -56,7 +61,7 @@ function decodeCursor(raw: string): CursorPayload | null { parsed !== null && typeof parsed.kind === 'string' && typeof parsed.lastName === 'string' && - QUERYABLE_METRIC_KINDS.includes(parsed.kind as QueryableKind) + (QUERYABLE_METRIC_KINDS as readonly string[]).includes(parsed.kind) ) { return { kind: parsed.kind as QueryableKind, lastName: parsed.lastName }; } From 2762ccfa0a9bdae70b964052e88aa3a15e9be7b8 Mon Sep 17 00:00:00 2001 From: Dan Hable Date: Tue, 9 Jun 2026 17:05:42 -0500 Subject: [PATCH 07/21] [HDX-4347] docs: document metric-source support in MCP.md Update the user-facing MCP tool reference to reflect the new metric discovery + querying capabilities shipped in Stages 1-3b. - Add clickstack_list_metrics and clickstack_describe_metric rows to the tool table - Mark clickstack_describe_source / clickstack_timeseries / clickstack_table as supporting metric sources alongside log/trace - Add a new 'Metric Sources' section covering: * the required metricType + metricName fields on each select item * the valueExpression='Value' default * per-kind aggregation guidance (gauge/sum/histogram, the increase top-N cap on sum + groupBy, the level requirement for histogram quantile) * the unsupported metric kinds (summary, exponential histogram) * the five-step discovery workflow chaining list_sources -> describe_source -> list_metrics -> describe_metric -> timeseries|table Closes the documentation acceptance criterion on HDX-4347. --- MCP.md | 48 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/MCP.md b/MCP.md index 3616e5c9a6..312276cb57 100644 --- a/MCP.md +++ b/MCP.md @@ -109,9 +109,11 @@ with: | Tool | Description | | ----------------------------- | -------------------------------------------------------------------------------------------- | | `clickstack_list_sources` | List all data sources and connections as a lightweight catalog (IDs, names, kinds) | -| `clickstack_describe_source` | Full column schema, attribute keys, and sampled low-cardinality values for a single source | -| `clickstack_timeseries` | Plot metrics over time as a line or stacked bar chart | -| `clickstack_table` | Compute aggregated metrics as a table, single number, or pie chart | +| `clickstack_describe_source` | Full column schema, attribute keys, and sampled low-cardinality values for a single source; for metric sources also returns a per-kind metric-name sample | +| `clickstack_list_metrics` | Paginated catalog of metric names on a metric source with optional kind / namePattern / time-window filters | +| `clickstack_describe_metric` | Per-metric drill-down: kind(s), unit, description, attribute keys per map column, and sampled values | +| `clickstack_timeseries` | Plot metrics over time as a line or stacked bar chart (works on log, trace, and metric sources) | +| `clickstack_table` | Compute aggregated metrics as a table, single number, or pie chart (works on log, trace, and metric sources) | | `clickstack_search` | Browse individual log, event, or trace rows | | `clickstack_event_patterns` | Discover the most common log messages and event patterns using Drain clustering | | `clickstack_event_deltas` | Compare two row groups and rank properties by how their value distributions differ | @@ -127,3 +129,43 @@ with: | `clickstack_trace_waterfall` | Fetch all spans in a single trace as a parent/child waterfall tree with optional correlated logs | | `clickstack_trace_top_time_consuming_operations` | Aggregate breakdown of child operations by cumulative time across matching parent traces | | `clickstack_get_webhook` | List available webhook destinations for use as alert notification channels | + +### Metric Sources + +`clickstack_timeseries`, `clickstack_table`, and the dashboard builder tile +tools accept metric sources transparently. Each `select` item on a metric +query must set `metricType` (`"gauge"`, `"sum"`, or `"histogram"`) and +`metricName` (the OTel metric name, e.g. `system.cpu.utilization`). +`valueExpression` defaults to `"Value"` when omitted, so a typical metric +series looks like: + +```jsonc +{ "aggFn": "avg", "metricType": "gauge", "metricName": "system.cpu.utilization" } +``` + +Per-kind aggregation guidance: + +- **Gauge**: `avg`, `last_value`, `min`, or `max`. Set `"isDelta": true` for + Prometheus-style delta over each bucket. +- **Sum (counter)**: `"increase"` returns the per-bucket counter increase + (reset-aware), or `sum` / `avg` on the computed rate. `increase` combined + with `groupBy` is capped at the top 20 groups by the renderer; the tool + emits a neutral hint when the cap may apply. +- **Histogram**: `"quantile"` with `level` ∈ {0.5, 0.9, 0.95, 0.99} for + percentiles, or `"count"` for the total bucket count. + +`summary` and `"exponential histogram"` metric kinds are not yet supported +by the query renderer. + +Discovery workflow for metrics: + +1. `clickstack_list_sources` to find the metric source ID and its + `metricTables` map. +2. `clickstack_describe_source` to see columns, attribute keys, and a + per-kind metric-name sample. +3. `clickstack_list_metrics` for paginated catalog access with optional + kind / namePattern / time-window filters; pass `nextCursor` unchanged + for the next page. +4. `clickstack_describe_metric` to drill into a specific metric and + discover its attribute keys + sampled values before authoring queries. +5. `clickstack_timeseries` / `clickstack_table` to chart the metric. From 666a44c0540ef709fe93858da8f2e88d9c4fd804 Mon Sep 17 00:00:00 2001 From: Dan Hable Date: Wed, 10 Jun 2026 13:15:02 -0500 Subject: [PATCH 08/21] [HDX-4347] chore: add changeset for metric MCP support --- .changeset/mcp-metric-source-support.md | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 .changeset/mcp-metric-source-support.md diff --git a/.changeset/mcp-metric-source-support.md b/.changeset/mcp-metric-source-support.md new file mode 100644 index 0000000000..4dfd135051 --- /dev/null +++ b/.changeset/mcp-metric-source-support.md @@ -0,0 +1,11 @@ +--- +'@hyperdx/api': minor +--- + +feat(mcp): first-class metric source support + +- Two new tools: `clickstack_list_metrics` paginates the metric-name catalog with optional kind / namePattern (ILIKE) / time-window filters and opaque cursor pagination; `clickstack_describe_metric` returns per-metric kind(s), unit, description, attribute keys, and sampled values (with kind auto-detection). +- `clickstack_describe_source` is metric-aware: picks a representative metric table (gauge → sum → histogram), runs column / map-key / value-sampling against it, and adds a per-kind metric-name sample. +- `clickstack_timeseries` and `clickstack_table` accept `metricType` (gauge / sum / histogram), `metricName`, and `isDelta` on each select item, plus `aggFn:"increase"` for Sum counters. `valueExpression` defaults to `"Value"` for metric sources. Surfaces the renderer's 20-group top-N cap on `increase + groupBy` as a neutral hint. +- Dashboard prompt's "use raw SQL for metric tiles" workaround is replaced with positive discovery-workflow guidance and one worked example per supported kind. +- `summary` and `"exponential histogram"` kinds remain out of scope (no query renderer support yet). From 8717ea29b6b1e8b915eeacabe62e273e9f790dfd Mon Sep 17 00:00:00 2001 From: Dan Hable Date: Wed, 10 Jun 2026 13:45:12 -0500 Subject: [PATCH 09/21] [HDX-4347] fix(mcp): address review feedback (greptile + knip) - Extract QUERYABLE_METRIC_KINDS to a shared packages/api/src/mcp/tools/sources/metricKinds.ts so describeMetric, listMetrics, describeSource, and the query / dashboard tile schemas all reference a single source of truth. Adding a new queryable kind (e.g. when the renderer learns 'summary') is now a one-file change (greptile). - Drop the unused 'ms' import in listMetrics.ts that would have failed CI lint (greptile). - Clear the describe_metric wall-clock timeout in 'finally' so a stale controller.abort() does not fire DESCRIBE_TIMEOUT_MS after every successful call, and the setTimeout closure does not stay pinned for the same duration (greptile). - Apply the same clearTimeout fix to describe_source as a drive-by: the bug pre-existed this PR but the pattern is now consistent across both tools (PR comment posted on the line). - Use ?? instead of || for the metric-source valueExpression default in runConfigTile so an explicit empty string is not silently swapped for 'Value' (greptile). - Remove the unused MCP_METRIC_TYPE_OPTIONS export; mcpMetricTypeSchema now builds directly off the shared QUERYABLE_METRIC_KINDS tuple (knip). The dashboard tile mcpTileMetricTypeSchema also switches to the shared constant for consistency. Verified: yarn tsc --noEmit baseline diff = 0, yarn lint clean, yarn knip clean, 105 unit tests pass. --- .../api/src/mcp/tools/dashboards/schemas.ts | 11 ++--- packages/api/src/mcp/tools/query/helpers.ts | 2 +- packages/api/src/mcp/tools/query/schemas.ts | 14 +++--- .../src/mcp/tools/sources/describeMetric.ts | 43 ++++++++----------- .../src/mcp/tools/sources/describeSource.ts | 36 +++++++--------- .../api/src/mcp/tools/sources/listMetrics.ts | 38 ++++++---------- .../api/src/mcp/tools/sources/metricKinds.ts | 34 +++++++++++++++ 7 files changed, 91 insertions(+), 87 deletions(-) create mode 100644 packages/api/src/mcp/tools/sources/metricKinds.ts diff --git a/packages/api/src/mcp/tools/dashboards/schemas.ts b/packages/api/src/mcp/tools/dashboards/schemas.ts index 6328d84716..0f543561ca 100644 --- a/packages/api/src/mcp/tools/dashboards/schemas.ts +++ b/packages/api/src/mcp/tools/dashboards/schemas.ts @@ -16,18 +16,15 @@ import { z } from 'zod'; import { externalQuantileLevelSchema, objectIdSchema } from '@/utils/zod'; import { getMetricSelectIssues } from '../query/schemas'; +import { QUERYABLE_METRIC_KINDS } from '../sources/metricKinds'; /** * Metric type values exposed on dashboard tile select items. Restricted to * the three kinds the query renderer can translate today; summary and - * exponential histogram are intentionally excluded. Mirrors - * MCP_METRIC_TYPE_OPTIONS in `tools/query/schemas.ts`. + * exponential histogram are intentionally excluded. Imports the shared + * `QUERYABLE_METRIC_KINDS` source-of-truth tuple from `../sources/metricKinds`. */ -const mcpTileMetricTypeSchema = z.enum([ - MetricsDataType.Gauge, - MetricsDataType.Sum, - MetricsDataType.Histogram, -]); +const mcpTileMetricTypeSchema = z.enum(QUERYABLE_METRIC_KINDS); // ─── Shared tile schemas for MCP dashboard tools ───────────────────────────── diff --git a/packages/api/src/mcp/tools/query/helpers.ts b/packages/api/src/mcp/tools/query/helpers.ts index 181e7e3fa1..fd93b82bae 100644 --- a/packages/api/src/mcp/tools/query/helpers.ts +++ b/packages/api/src/mcp/tools/query/helpers.ts @@ -386,7 +386,7 @@ export async function runConfigTile( ? item : { ...item, - valueExpression: item.valueExpression || 'Value', + valueExpression: item.valueExpression ?? 'Value', }, ), } diff --git a/packages/api/src/mcp/tools/query/schemas.ts b/packages/api/src/mcp/tools/query/schemas.ts index 78809b0fa3..bfa75ea579 100644 --- a/packages/api/src/mcp/tools/query/schemas.ts +++ b/packages/api/src/mcp/tools/query/schemas.ts @@ -1,6 +1,7 @@ -import { MetricsDataType } from '@hyperdx/common-utils/dist/types'; import { z } from 'zod'; +import { QUERYABLE_METRIC_KINDS } from '../sources/metricKinds'; + // ─── Shared description fragments ──────────────────────────────────────────── const WHERE_DESCRIPTION = @@ -72,16 +73,11 @@ const mcpAggFnSchema = z /** * Metric type values exposed to MCP tool callers. Restricted to the three * kinds the renderer can translate today; summary and exponential histogram - * are intentionally excluded. + * are intentionally excluded. See `../sources/metricKinds` for the shared + * source-of-truth constant used by every metric-aware tool. */ -export const MCP_METRIC_TYPE_OPTIONS = [ - MetricsDataType.Gauge, - MetricsDataType.Sum, - MetricsDataType.Histogram, -] as const; - const mcpMetricTypeSchema = z - .enum(MCP_METRIC_TYPE_OPTIONS) + .enum(QUERYABLE_METRIC_KINDS) .describe( 'METRIC SOURCES ONLY. OTel metric kind. Required (along with metricName) ' + 'when querying a metric source — discover via clickstack_describe_source ' + diff --git a/packages/api/src/mcp/tools/sources/describeMetric.ts b/packages/api/src/mcp/tools/sources/describeMetric.ts index 041f355cc4..abef8fcedf 100644 --- a/packages/api/src/mcp/tools/sources/describeMetric.ts +++ b/packages/api/src/mcp/tools/sources/describeMetric.ts @@ -5,7 +5,7 @@ import { } from '@hyperdx/common-utils/dist/clickhouse'; import { ClickhouseClient } from '@hyperdx/common-utils/dist/clickhouse/node'; import { getMetadata } from '@hyperdx/common-utils/dist/core/metadata'; -import { MetricsDataType, SourceKind } from '@hyperdx/common-utils/dist/types'; +import { SourceKind } from '@hyperdx/common-utils/dist/types'; import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { z } from 'zod'; @@ -16,24 +16,10 @@ import { trimToolResponse } from '@/utils/trimToolResponse'; import { withToolTracing } from '../../utils/tracing'; import type { McpContext } from '../types'; - -// Metric kinds the query renderer can translate today. Mirrors -// QUERYABLE_METRIC_KINDS in describeSource.ts / listMetrics.ts. -// Declared as string literals (not MetricsDataType enum members) so -// `z.enum(...)` can narrow the inferred handler input type properly — -// referencing the enum here pessimises Zod's inference to `unknown` at -// the MCP SDK callback boundary. -const QUERYABLE_METRIC_KINDS = ['gauge', 'sum', 'histogram'] as const; -type QueryableKind = (typeof QUERYABLE_METRIC_KINDS)[number]; - -// Compile-time guarantee that the string literals still match the -// MetricsDataType enum (and so are valid keys on MetricTable). -const _assertKindsMatch: readonly QueryableKind[] = [ - MetricsDataType.Gauge, - MetricsDataType.Sum, - MetricsDataType.Histogram, -]; -void _assertKindsMatch; +import { + QUERYABLE_METRIC_KINDS, + type QueryableMetricKind, +} from './metricKinds'; const DEFAULT_LOOKBACK_MS = 24 * 60 * 60 * 1000; const DESCRIBE_TIMEOUT_MS = 10_000; @@ -47,7 +33,7 @@ const MAX_ATTR_KEYS_TO_SAMPLE = 12; // Per-kind aggregation guidance baked into the response so the agent can // build a valid clickstack_timeseries / clickstack_table call without // re-reading the schemas. -const KIND_USAGE: Record = { +const KIND_USAGE: Record = { gauge: 'Gauge: use aggFn:"last_value"|"avg"|"min"|"max" on Value. Set isDelta:true for Prometheus-style delta over each bucket.', sum: 'Sum (counter): use aggFn:"increase" for the per-bucket counter increase (reset-aware), or aggFn:"sum"/"avg" on the computed rate. increase+groupBy is capped at the top 20 groups.', @@ -101,7 +87,7 @@ const describeMetricSchema = z.object({ // ─── Helpers ───────────────────────────────────────────────────────────────── type KindDetail = { - kind: QueryableKind; + kind: QueryableMetricKind; tableName: string; unit?: string; description?: string; @@ -151,7 +137,7 @@ async function detectKindsForMetric({ startDate: Date; endDate: Date; signal: AbortSignal; -}): Promise { +}): Promise { const probes = await Promise.all( QUERYABLE_METRIC_KINDS.map(async kind => { const tableName = metricTables[kind]; @@ -183,7 +169,7 @@ async function detectKindsForMetric({ } }), ); - return probes.filter((k): k is QueryableKind => k !== null); + return probes.filter((k): k is QueryableMetricKind => k !== null); } /** @@ -412,7 +398,7 @@ async function describeMetricImpl( // Resolve which kinds to describe. With an explicit `kind`, restrict to // that one. Without, probe each populated kind so the agent gets one // entry per match (rare but possible for shared metric names). - let candidateKinds: QueryableKind[]; + let candidateKinds: QueryableMetricKind[]; if (input.kind) { if (!source.metricTables[input.kind]) { return { @@ -628,8 +614,13 @@ export function registerDescribeMetric( // the typed shape we need for downstream calls. const input = describeMetricSchema.parse(rawInput); const controller = new AbortController(); + // Hoist the timer handle so the finally block can cancel it on the + // success path — otherwise a stale controller.abort() fires + // DESCRIBE_TIMEOUT_MS after every successful call and the + // setTimeout closure stays pinned for the same duration. + let timeoutId: ReturnType | undefined; const timeoutPromise = new Promise((_, reject) => { - setTimeout(() => { + timeoutId = setTimeout(() => { controller.abort(); reject(new Error('DESCRIBE_METRIC_TIMEOUT')); }, DESCRIBE_TIMEOUT_MS); @@ -658,6 +649,8 @@ export function registerDescribeMetric( }; } throw e; + } finally { + if (timeoutId !== undefined) clearTimeout(timeoutId); } }), ); diff --git a/packages/api/src/mcp/tools/sources/describeSource.ts b/packages/api/src/mcp/tools/sources/describeSource.ts index 5b25cf8222..806a4f8dcb 100644 --- a/packages/api/src/mcp/tools/sources/describeSource.ts +++ b/packages/api/src/mcp/tools/sources/describeSource.ts @@ -8,11 +8,7 @@ import { } from '@hyperdx/common-utils/dist/clickhouse'; import { ClickhouseClient } from '@hyperdx/common-utils/dist/clickhouse/node'; import { getMetadata } from '@hyperdx/common-utils/dist/core/metadata'; -import { - MetricsDataType, - type MetricTable, - SourceKind, -} from '@hyperdx/common-utils/dist/types'; +import { type MetricTable, SourceKind } from '@hyperdx/common-utils/dist/types'; import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { z } from 'zod'; @@ -23,6 +19,10 @@ import { trimToolResponse } from '@/utils/trimToolResponse'; import { withToolTracing } from '../../utils/tracing'; import type { McpContext } from '../types'; +import { + QUERYABLE_METRIC_KINDS, + type QueryableMetricKind, +} from './metricKinds'; // How far back to look when querying the rollup tables for value samples. const VALUE_SAMPLE_LOOKBACK_MS = 24 * 60 * 60 * 1000; // 24 hours @@ -39,15 +39,6 @@ const MAX_MAP_KEYS_TO_SAMPLE = 10; // clickstack_list_metrics provides paginated discovery beyond this cap. const MAX_METRIC_NAMES_PER_KIND = 20; -// Metric kinds the query renderer can translate today. Summary and -// ExponentialHistogram are intentionally omitted — they have schema but -// no SQL translator in the renderer yet. -const QUERYABLE_METRIC_KINDS = [ - MetricsDataType.Gauge, - MetricsDataType.Sum, - MetricsDataType.Histogram, -] as const; - /** * Pick the representative metric table to use as the starting point for * schema/attribute discovery on a metric source. Prefers gauge → sum → @@ -57,9 +48,7 @@ const QUERYABLE_METRIC_KINDS = [ */ function pickRepresentativeMetricTable( metricTables: MetricTable, -): - | { kind: (typeof QUERYABLE_METRIC_KINDS)[number]; tableName: string } - | undefined { +): { kind: QueryableMetricKind; tableName: string } | undefined { for (const kind of QUERYABLE_METRIC_KINDS) { const tableName = metricTables[kind]; if (tableName) { @@ -279,7 +268,7 @@ async function describeSourceSchema( // Key columns by source kind let representativeMetric: - | { kind: (typeof QUERYABLE_METRIC_KINDS)[number]; tableName: string } + | { kind: QueryableMetricKind; tableName: string } | undefined; if (source.kind === SourceKind.Trace) { meta.keyColumns = { @@ -656,9 +645,14 @@ export function registerDescribeSource( const controller = new AbortController(); // Promise.race enforces wall-clock timeout regardless of whether - // internal ClickHouse calls honour the AbortSignal. + // internal ClickHouse calls honour the AbortSignal. Hoist the + // timer handle so the finally block can cancel it on the success + // path — otherwise a stale controller.abort() fires + // DESCRIBE_TIMEOUT_MS after every successful call and the + // setTimeout closure stays pinned for the same duration. + let timeoutId: ReturnType | undefined; const timeoutPromise = new Promise((_, reject) => { - setTimeout(() => { + timeoutId = setTimeout(() => { controller.abort(); reject(new Error('DESCRIBE_TIMEOUT')); }, DESCRIBE_TIMEOUT_MS); @@ -696,6 +690,8 @@ export function registerDescribeSource( 'Failed to describe source schema', ); throw e; + } finally { + if (timeoutId !== undefined) clearTimeout(timeoutId); } }, ), diff --git a/packages/api/src/mcp/tools/sources/listMetrics.ts b/packages/api/src/mcp/tools/sources/listMetrics.ts index 8aca793a28..27884996f9 100644 --- a/packages/api/src/mcp/tools/sources/listMetrics.ts +++ b/packages/api/src/mcp/tools/sources/listMetrics.ts @@ -5,9 +5,8 @@ import { } from '@hyperdx/common-utils/dist/clickhouse'; import { ClickhouseClient } from '@hyperdx/common-utils/dist/clickhouse/node'; import { getMetadata } from '@hyperdx/common-utils/dist/core/metadata'; -import { MetricsDataType, SourceKind } from '@hyperdx/common-utils/dist/types'; +import { SourceKind } from '@hyperdx/common-utils/dist/types'; import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; -import ms from 'ms'; import { z } from 'zod'; import { getConnectionById } from '@/controllers/connection'; @@ -16,24 +15,10 @@ import logger from '@/utils/logger'; import { withToolTracing } from '../../utils/tracing'; import type { McpContext } from '../types'; - -// Metric kinds the query renderer can translate today. Mirrors -// QUERYABLE_METRIC_KINDS in describeSource.ts. Declared as string -// literals (not MetricsDataType enum members) so `z.enum(...)` can -// narrow the inferred handler input type properly — referencing the -// enum here pessimises Zod's inference to `unknown` at the MCP SDK -// callback boundary. -const QUERYABLE_METRIC_KINDS = ['gauge', 'sum', 'histogram'] as const; -type QueryableKind = (typeof QUERYABLE_METRIC_KINDS)[number]; - -// Compile-time guarantee that the string literals still match the -// MetricsDataType enum (and so are valid keys on MetricTable). -const _assertKindsMatch: readonly QueryableKind[] = [ - MetricsDataType.Gauge, - MetricsDataType.Sum, - MetricsDataType.Histogram, -]; -void _assertKindsMatch; +import { + QUERYABLE_METRIC_KINDS, + type QueryableMetricKind, +} from './metricKinds'; const DEFAULT_LIMIT = 50; const MAX_LIMIT = 500; @@ -42,7 +27,7 @@ const DEFAULT_LOOKBACK_MS = 24 * 60 * 60 * 1000; // ─── Cursor ────────────────────────────────────────────────────────────────── export type ListMetricsCursorPayload = { - kind: QueryableKind; + kind: QueryableMetricKind; lastName: string; }; @@ -63,7 +48,10 @@ export function decodeCursor(raw: string): ListMetricsCursorPayload | null { typeof parsed.lastName === 'string' && (QUERYABLE_METRIC_KINDS as readonly string[]).includes(parsed.kind) ) { - return { kind: parsed.kind as QueryableKind, lastName: parsed.lastName }; + return { + kind: parsed.kind as QueryableMetricKind, + lastName: parsed.lastName, + }; } } catch { // fall through @@ -127,7 +115,7 @@ const listMetricsSchema = z.object({ type MetricEntry = { name: string; - kind: QueryableKind; + kind: QueryableMetricKind; unit?: string; description?: string; }; @@ -166,7 +154,7 @@ async function fetchMetricsForKind({ }: { clickhouseClient: ClickhouseClient; metadata: ReturnType; - kind: QueryableKind; + kind: QueryableMetricKind; databaseName: string; tableName: string; connectionId: string; @@ -321,7 +309,7 @@ export function registerListMetrics( // Resolve which kinds to scan, in order. When a cursor is set, // skip kinds before the cursor's kind (already returned) and start // the cursor's kind at the lastName-exclusive position. - const requestedKinds: QueryableKind[] = input.kind + const requestedKinds: QueryableMetricKind[] = input.kind ? [input.kind] : QUERYABLE_METRIC_KINDS.filter(k => Boolean(source.metricTables[k])); const startKindIdx = cursor ? requestedKinds.indexOf(cursor.kind) : 0; diff --git a/packages/api/src/mcp/tools/sources/metricKinds.ts b/packages/api/src/mcp/tools/sources/metricKinds.ts new file mode 100644 index 0000000000..6898a1d5cf --- /dev/null +++ b/packages/api/src/mcp/tools/sources/metricKinds.ts @@ -0,0 +1,34 @@ +import { MetricsDataType } from '@hyperdx/common-utils/dist/types'; + +/** + * Metric kinds the query renderer can translate today. Mirrors the three + * MetricsDataType members that `translateMetricChartConfig` in + * common-utils branches over; `summary` and `"exponential histogram"` + * are intentionally excluded because the renderer throws on them. + * + * Declared as plain string literals (not MetricsDataType enum members) + * so `z.enum(...)` narrows correctly at the MCP SDK callback boundary — + * referencing the enum here pessimises Zod's inference to `unknown` on + * every optional field of the surrounding tool input schema, which then + * cascades into "not assignable to type string" errors at every handler + * call site. The compile-time assertion below guarantees the literals + * stay in sync with the enum so adding a new queryable kind cannot be + * done in only one place. + * + * Imported by every metric-aware MCP tool (clickstack_describe_source, + * clickstack_list_metrics, clickstack_describe_metric) and by the + * clickstack_timeseries / clickstack_table select-item schema. + */ +export const QUERYABLE_METRIC_KINDS = ['gauge', 'sum', 'histogram'] as const; + +export type QueryableMetricKind = (typeof QUERYABLE_METRIC_KINDS)[number]; + +// Compile-time assertion that the string literals above still match the +// MetricsDataType enum members. If a future MetricsDataType rename +// breaks this list, this line fails to type-check. +const _assertKindsMatchEnum: readonly QueryableMetricKind[] = [ + MetricsDataType.Gauge, + MetricsDataType.Sum, + MetricsDataType.Histogram, +]; +void _assertKindsMatchEnum; From 21ac365a2ef841ba081aeee9e5968393729039c2 Mon Sep 17 00:00:00 2001 From: Dan Hable Date: Wed, 10 Jun 2026 14:28:57 -0500 Subject: [PATCH 10/21] [HDX-4347] fix(mcp): unblock CI integration test failures Bug 1: timeseries / table metric queries failed with 'Value expression is required for non-count aggregation functions' The Stage 2 default of valueExpression='Value' for metric items ran inside runConfigTile, AFTER buildTile had already called externalDashboardTileSchemaWithId.parse(). That schema's superRefine rejects non-count aggregations with an empty valueExpression, so any metric query that omitted the field never reached the runtime default. Add applyMetricSelectDefaults() to tools/query/schemas.ts and call it in the timeseries / table handlers BEFORE buildTile, mirroring the agent-friendly default the external API v2 charts path applies on the REST surface. The runtime default in runConfigTile stays as defensive coverage for saved dashboards. Bug 2: clickstack_describe_metric returned an empty attributeKeys map for metric scopes that DID have data metadata.getAllFields({ metricName }) was returning empty arrays for metric-scoped Map-column scans in the CI environment. Replace the call with fetchAttributeKeys(), a direct query that issues 'arrayDistinct(arrayFlatten(groupArray(mapKeys(col))))' for each Map column on the kind's table. This matches the proven pattern used by the web app's useFetchMetricResourceAttrs hook and is more transparent than the metadata helper for this single-purpose case. Verified end-to-end against make dev-int: - queryTool.test.ts: 53/53 pass (was 50/53 before this commit) - sources.test.ts: 76/76 pass (was 75/76 before this commit) - query.test.ts unit suite: 69/69 (added 4 new tests for applyMetricSelectDefaults) No new TypeScript errors above the pre-existing baseline. --- packages/api/src/mcp/__tests__/query.test.ts | 44 +++++++ packages/api/src/mcp/tools/query/schemas.ts | 18 +++ packages/api/src/mcp/tools/query/table.ts | 12 +- .../api/src/mcp/tools/query/timeseries.ts | 12 +- .../src/mcp/tools/sources/describeMetric.ts | 119 +++++++++++++----- 5 files changed, 172 insertions(+), 33 deletions(-) diff --git a/packages/api/src/mcp/__tests__/query.test.ts b/packages/api/src/mcp/__tests__/query.test.ts index 169f050543..326394c3ce 100644 --- a/packages/api/src/mcp/__tests__/query.test.ts +++ b/packages/api/src/mcp/__tests__/query.test.ts @@ -14,6 +14,7 @@ import { parseTimeRange, } from '../tools/query/helpers'; import { + applyMetricSelectDefaults, getMetricSelectIssues, validateMetricSelectItems, } from '../tools/query/schemas'; @@ -556,6 +557,49 @@ describe('validateMetricSelectItems', () => { }); }); +// ─── applyMetricSelectDefaults ─────────────────────────────────────────────── + +describe('applyMetricSelectDefaults', () => { + it('defaults valueExpression to "Value" when metricType is set', () => { + const out = applyMetricSelectDefaults([ + { + aggFn: 'avg', + metricType: 'gauge', + metricName: 'system.cpu.utilization', + }, + ]); + expect(out[0].valueExpression).toBe('Value'); + }); + + it('preserves an explicit valueExpression on metric items', () => { + const out = applyMetricSelectDefaults([ + { + aggFn: 'avg', + metricType: 'gauge', + metricName: 'x', + valueExpression: 'Value * 100', + }, + ]); + expect(out[0].valueExpression).toBe('Value * 100'); + }); + + it('leaves non-metric items untouched', () => { + const out = applyMetricSelectDefaults([{ aggFn: 'count' }]); + expect(out[0]).toEqual({ aggFn: 'count' }); + expect(out[0].valueExpression).toBeUndefined(); + }); + + it('returns new objects only for the items it mutates', () => { + const input = [ + { aggFn: 'count' as const }, + { aggFn: 'avg', metricType: 'gauge' as const, metricName: 'x' }, + ]; + const out = applyMetricSelectDefaults(input); + expect(out[0]).toBe(input[0]); // unchanged item is the same reference + expect(out[1]).not.toBe(input[1]); // mutated item is a new object + }); +}); + // ─── annotateIncreaseTopNHint ──────────────────────────────────────────────── function buildResult(data: unknown[]) { diff --git a/packages/api/src/mcp/tools/query/schemas.ts b/packages/api/src/mcp/tools/query/schemas.ts index bfa75ea579..486bfbf2d1 100644 --- a/packages/api/src/mcp/tools/query/schemas.ts +++ b/packages/api/src/mcp/tools/query/schemas.ts @@ -314,6 +314,24 @@ export type McpSelectItem = { isDelta?: boolean; }; +/** + * Default `valueExpression` to `"Value"` for every metric-tagged select + * item that omits it. Must be called BEFORE `buildTile`, because the + * external dashboard tile schema's `superRefine` rejects non-count + * aggregations with an empty `valueExpression`. The runtime renderer + * looks for `Value` on metric tables, so defaulting it here matches what + * `external-api/v2/charts.ts:240-250` does on the REST path. + */ +export function applyMetricSelectDefaults( + items: ReadonlyArray, +): T[] { + return items.map(item => + item.metricType && !item.valueExpression + ? { ...item, valueExpression: 'Value' } + : item, + ); +} + /** * Apply `getMetricSelectIssues` to every select item in a tool input. * Returns an error-shaped tool response when any issue is detected, or diff --git a/packages/api/src/mcp/tools/query/table.ts b/packages/api/src/mcp/tools/query/table.ts index ad01bcf687..0f90f2c7dd 100644 --- a/packages/api/src/mcp/tools/query/table.ts +++ b/packages/api/src/mcp/tools/query/table.ts @@ -11,6 +11,7 @@ import { runConfigTile, } from './helpers'; import { + applyMetricSelectDefaults, endTimeSchema, groupBySchema, MCP_AGG_FN_OPTIONS, @@ -204,14 +205,21 @@ export function registerTable(server: McpServer, context: McpContext) { // Cast to the concrete `McpSelectItem[]` because Zod 3.x widens // optional-field inference at the MCP-SDK tool boundary; the // runtime parser still produces the correct shape. - const select = input.select as McpSelectItem[]; + const rawSelect = input.select as McpSelectItem[]; // Validate cross-field constraints (metric rules, level/quantile, // valueExpression presence) and surface friendly errors before we // touch ClickHouse. - const validation = validateMetricSelectItems(select); + const validation = validateMetricSelectItems(rawSelect); if (validation) return validation; + // Default valueExpression="Value" for metric items BEFORE we call + // buildTile, because the external dashboard tile schema's + // superRefine rejects non-count aggregations with empty + // valueExpression and agents normally omit the field on metric + // queries. + const select = applyMetricSelectDefaults(rawSelect); + // Auto-upgrade shape when select has multiple items but shape is // single-value (number/pie). This is the #1 Zod error class from agents. let displayType: 'table' | 'number' | 'pie' = input.shape; diff --git a/packages/api/src/mcp/tools/query/timeseries.ts b/packages/api/src/mcp/tools/query/timeseries.ts index 7e63784cce..fdc2f4b179 100644 --- a/packages/api/src/mcp/tools/query/timeseries.ts +++ b/packages/api/src/mcp/tools/query/timeseries.ts @@ -11,6 +11,7 @@ import { runConfigTile, } from './helpers'; import { + applyMetricSelectDefaults, endTimeSchema, groupBySchema, McpSelectItem, @@ -111,14 +112,21 @@ export function registerTimeseries(server: McpServer, context: McpContext) { // Cast to the concrete `McpSelectItem[]` because Zod 3.x widens // optional-field inference at the MCP-SDK tool boundary; the // runtime parser still produces the correct shape. - const select = input.select as McpSelectItem[]; + const rawSelect = input.select as McpSelectItem[]; // Validate cross-field constraints (metric rules, level/quantile, // valueExpression presence) and surface friendly errors before we // touch ClickHouse. - const validation = validateMetricSelectItems(select); + const validation = validateMetricSelectItems(rawSelect); if (validation) return validation; + // Default valueExpression="Value" for metric items BEFORE we call + // buildTile, because the external dashboard tile schema's + // superRefine rejects non-count aggregations with empty + // valueExpression and agents normally omit the field on metric + // queries. + const select = applyMetricSelectDefaults(rawSelect); + // Inject top-level where into each select item (timeseries uses // per-select-item aggCondition, not chart-level where) const { items: selectItems, warnings: mergeWarnings } = diff --git a/packages/api/src/mcp/tools/sources/describeMetric.ts b/packages/api/src/mcp/tools/sources/describeMetric.ts index abef8fcedf..26505d2455 100644 --- a/packages/api/src/mcp/tools/sources/describeMetric.ts +++ b/packages/api/src/mcp/tools/sources/describeMetric.ts @@ -1,6 +1,9 @@ import { chSql, concatChSql, + convertCHDataTypeToJSType, + filterColumnMetaByType, + JSDataType, tableExpr, } from '@hyperdx/common-utils/dist/clickhouse'; import { ClickhouseClient } from '@hyperdx/common-utils/dist/clickhouse/node'; @@ -243,6 +246,82 @@ async function fetchUnitAndDescription({ } } +/** + * Discover the distinct attribute keys present for a single metric + * name on a single kind's table, grouped by the Map column they live on + * (typically ResourceAttributes / Attributes / ScopeAttributes on the + * OTel Collector default schema). Issues one query per Map column with + * `mapKeys(col) AS keys` and aggregates the distinct keys server-side. + * + * Replaces an earlier `metadata.getAllFields({ metricName })` call which + * silently returned empty arrays in this CI environment for + * metric-scoped scans. A direct query is more transparent and matches + * the proven pattern used by the web app's `useFetchMetricResourceAttrs`. + */ +async function fetchAttributeKeys({ + clickhouseClient, + databaseName, + tableName, + connectionId, + metricName, + columns, + signal, +}: { + clickhouseClient: ClickhouseClient; + databaseName: string; + tableName: string; + connectionId: string; + metricName: string; + columns: { name: string; type: string }[]; + signal: AbortSignal; +}): Promise> { + const mapColumns = + filterColumnMetaByType(columns, [JSDataType.Map])?.filter( + c => convertCHDataTypeToJSType(c.type) === JSDataType.Map, + ) ?? []; + if (mapColumns.length === 0) return {}; + + const projections = mapColumns.map( + col => + chSql`arrayDistinct(arrayFlatten(groupArray(mapKeys(${{ Identifier: col.name }})))) AS ${{ Identifier: col.name }}`, + ); + + const sql = chSql` + SELECT ${concatChSql(', ', projections)} + FROM ${tableExpr({ database: databaseName, table: tableName })} + WHERE MetricName = ${{ String: metricName }} + `; + + try { + const response = await clickhouseClient.query<'JSON'>({ + query: sql.sql, + query_params: sql.params, + format: 'JSON', + connectionId, + abort_signal: signal, + }); + const result = (await response.json()) as { + data: Array>; + }; + const row = result.data[0]; + if (!row) return {}; + const out: Record = {}; + for (const col of mapColumns) { + const keys = row[col.name]; + if (Array.isArray(keys) && keys.length > 0) { + out[col.name] = keys.filter(k => typeof k === 'string' && k.length > 0); + } + } + return out; + } catch (e) { + logger.warn( + { tableName, error: e instanceof Error ? e.message : String(e) }, + 'fetchAttributeKeys failed', + ); + return {}; + } +} + /** * Sample distinct values per attribute key. Uses one composed * groupArray-per-key query so all keys are fetched in a single round @@ -459,7 +538,7 @@ async function describeMetricImpl( // Defensive column-presence check before referencing MetricUnit / // MetricDescription. - let columns: { name: string }[]; + let columns: { name: string; type: string }[]; try { columns = await metadata.getColumns({ databaseName, @@ -477,7 +556,7 @@ async function describeMetricImpl( const hasUnit = columnNames.has('MetricUnit'); const hasDescription = columnNames.has('MetricDescription'); - const [meta, allFields] = await Promise.all([ + const [meta, attributeKeys] = await Promise.all([ fetchUnitAndDescription({ clickhouseClient, databaseName, @@ -490,35 +569,17 @@ async function describeMetricImpl( hasDescription, signal, }), - metadata - .getAllFields({ - databaseName, - tableName, - connectionId, - metricName: input.metricName, - dateRange: [startDate, endDate], - }) - .catch(e => { - logger.warn( - { kind, error: e instanceof Error ? e.message : String(e) }, - 'describeMetric: getAllFields failed', - ); - return []; - }), + fetchAttributeKeys({ + clickhouseClient, + databaseName, + tableName, + connectionId, + metricName: input.metricName, + columns, + signal, + }), ]); - // Group fields by map column (Attributes / ResourceAttributes / - // ScopeAttributes). Native columns are skipped — the agent already - // knows the metric value column is `Value`. - const attributeKeys: Record = {}; - for (const field of allFields) { - if (field.path.length < 2) continue; - const mapColumn = field.path[0]; - const keyName = field.path[1]; - if (!attributeKeys[mapColumn]) attributeKeys[mapColumn] = []; - attributeKeys[mapColumn].push(keyName); - } - const detail: KindDetail = { kind, tableName, From 22c1c276a73c466f3a211f66ac0bf13ad0b6110b Mon Sep 17 00:00:00 2001 From: Dan Hable Date: Wed, 10 Jun 2026 14:39:36 -0500 Subject: [PATCH 11/21] [HDX-4347] fix(mcp): annotate applyMetricSelectDefaults test inputs The new applyMetricSelectDefaults tests inferred narrow object shapes from inline literals, so 'valueExpression' was absent from the inferred generic type and TS errored on accessing 'out[0].valueExpression'. Annotate each test input as 'SelectItem' so the optional field stays in the inferred type. Tests behave identically at runtime; this is a TypeScript-only fix. --- packages/api/src/mcp/__tests__/query.test.ts | 30 ++++++++++++++------ 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/packages/api/src/mcp/__tests__/query.test.ts b/packages/api/src/mcp/__tests__/query.test.ts index 326394c3ce..5db92f19ec 100644 --- a/packages/api/src/mcp/__tests__/query.test.ts +++ b/packages/api/src/mcp/__tests__/query.test.ts @@ -559,40 +559,54 @@ describe('validateMetricSelectItems', () => { // ─── applyMetricSelectDefaults ─────────────────────────────────────────────── +// Typed as McpSelectItem so the inferred generic preserves the +// optional valueExpression field. Otherwise structural inference +// narrows away `valueExpression` and the assertions below stop +// type-checking. +type SelectItem = { + aggFn: string; + metricType?: 'gauge' | 'sum' | 'histogram'; + metricName?: string; + valueExpression?: string; +}; + describe('applyMetricSelectDefaults', () => { it('defaults valueExpression to "Value" when metricType is set', () => { - const out = applyMetricSelectDefaults([ + const input: SelectItem[] = [ { aggFn: 'avg', metricType: 'gauge', metricName: 'system.cpu.utilization', }, - ]); + ]; + const out = applyMetricSelectDefaults(input); expect(out[0].valueExpression).toBe('Value'); }); it('preserves an explicit valueExpression on metric items', () => { - const out = applyMetricSelectDefaults([ + const input: SelectItem[] = [ { aggFn: 'avg', metricType: 'gauge', metricName: 'x', valueExpression: 'Value * 100', }, - ]); + ]; + const out = applyMetricSelectDefaults(input); expect(out[0].valueExpression).toBe('Value * 100'); }); it('leaves non-metric items untouched', () => { - const out = applyMetricSelectDefaults([{ aggFn: 'count' }]); + const input: SelectItem[] = [{ aggFn: 'count' }]; + const out = applyMetricSelectDefaults(input); expect(out[0]).toEqual({ aggFn: 'count' }); expect(out[0].valueExpression).toBeUndefined(); }); it('returns new objects only for the items it mutates', () => { - const input = [ - { aggFn: 'count' as const }, - { aggFn: 'avg', metricType: 'gauge' as const, metricName: 'x' }, + const input: SelectItem[] = [ + { aggFn: 'count' }, + { aggFn: 'avg', metricType: 'gauge', metricName: 'x' }, ]; const out = applyMetricSelectDefaults(input); expect(out[0]).toBe(input[0]); // unchanged item is the same reference From 05221ff4b1677bd1a5c2e425ee1272cfcdd53ec4 Mon Sep 17 00:00:00 2001 From: Dan Hable Date: Thu, 11 Jun 2026 12:31:54 -0500 Subject: [PATCH 12/21] [HDX-4347] fix(mcp): wrap getSource to handle invalid ObjectId MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Calling clickstack_describe_source / clickstack_list_metrics / clickstack_describe_metric with a non-ObjectId sourceId surfaced a raw Mongoose CastError to the MCP client: Cast to ObjectId failed for value "…" (type string) at path "_id" for model "Source" The well-formed-but-missing path was already handled by each tool's "Source X not found" branch, but malformed IDs threw before getSource returned. Pre-check via mongoose.Types.ObjectId.isValid and wrap the findOne in a defensive try/catch so getSource returns null on any malformed input and the existing not-found branch fires cleanly. Add a controller unit test asserting both the malformed and missing ObjectId paths return null. --- .../src/controllers/__tests__/sources.test.ts | 39 +++++++++++++++++++ packages/api/src/controllers/sources.ts | 18 ++++++++- 2 files changed, 55 insertions(+), 2 deletions(-) create mode 100644 packages/api/src/controllers/__tests__/sources.test.ts diff --git a/packages/api/src/controllers/__tests__/sources.test.ts b/packages/api/src/controllers/__tests__/sources.test.ts new file mode 100644 index 0000000000..71a7d73466 --- /dev/null +++ b/packages/api/src/controllers/__tests__/sources.test.ts @@ -0,0 +1,39 @@ +import mongoose from 'mongoose'; + +import { getSource } from '@/controllers/sources'; +import { clearDBCollections, closeDB, connectDB } from '@/fixtures'; + +describe('sources controller', () => { + beforeAll(async () => { + await connectDB(); + }); + + afterEach(async () => { + await clearDBCollections(); + }); + + afterAll(async () => { + await closeDB(); + }); + + describe('getSource', () => { + it('returns null when sourceId is not a valid ObjectId', async () => { + // Non-ObjectId strings used to bubble a Mongoose CastError up + // through MCP tools as "Cast to ObjectId failed for value ...". + // The wrapper now short-circuits before hitting MongoDB so the + // caller's not-found branch fires cleanly. + const team = new mongoose.Types.ObjectId().toString(); + + expect(await getSource(team, 'not-an-objectid')).toBeNull(); + expect(await getSource(team, '')).toBeNull(); + expect(await getSource(team, ' ')).toBeNull(); + }); + + it('returns null for a well-formed but missing ObjectId', async () => { + const team = new mongoose.Types.ObjectId().toString(); + const missingSourceId = new mongoose.Types.ObjectId().toString(); + + expect(await getSource(team, missingSourceId)).toBeNull(); + }); + }); +}); diff --git a/packages/api/src/controllers/sources.ts b/packages/api/src/controllers/sources.ts index a34f931dff..940d88cfe6 100644 --- a/packages/api/src/controllers/sources.ts +++ b/packages/api/src/controllers/sources.ts @@ -1,4 +1,5 @@ import { SourceKind, SourceSchema } from '@hyperdx/common-utils/dist/types'; +import mongoose from 'mongoose'; import { ISourceInput, @@ -35,8 +36,21 @@ export function getSources(team: string) { return Source.find({ team }); } -export function getSource(team: string, sourceId: string) { - return Source.findOne({ _id: sourceId, team }); +export async function getSource(team: string, sourceId: string) { + // Pre-check the sourceId shape so a non-ObjectId input from the MCP + // surface returns null (and the caller's "not found" branch) instead + // of bubbling a Mongoose CastError through to the agent. + if (!mongoose.Types.ObjectId.isValid(sourceId)) { + return null; + } + try { + return await Source.findOne({ _id: sourceId, team }); + } catch { + // Defense-in-depth: if Mongoose still throws (e.g. a future cast + // path), treat it as "not found" so the MCP tools surface a clean + // error message rather than a raw stack. + return null; + } } type DistributiveOmit = T extends T From df976b18b692a48dcb95047c45e6aa94665d1731 Mon Sep 17 00:00:00 2001 From: Dan Hable Date: Thu, 11 Jun 2026 12:35:15 -0500 Subject: [PATCH 13/21] [HDX-4347] feat(mcp): guard query tools against source-kind / metricType mismatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit clickstack_timeseries and clickstack_table accepted metricType + metricName on any source. When the source was not a metric source, the renderer composed SQL that referenced the metric "Value" column and let ClickHouse fail with: Unknown expression or function identifier `Value` in scope SELECT AVG(toFloat64OrDefault(toString(Value))) … FROM default.otel_traces The reciprocal case (metric source with no metricType) reached the renderer in the same broken state. Add assertSourceKindMatchesSelect() in tools/query/helpers.ts and call it inside runConfigTile right after getSource(). Both clickstack_timeseries and clickstack_table inherit the guard, with agent-actionable error messages that mirror the wording on clickstack_list_metrics / clickstack_describe_metric. Add unit tests covering both mismatch directions plus the no-op paths (raw-string select, empty metricType, non-array select). --- packages/api/src/mcp/__tests__/query.test.ts | 71 +++++++++++++++++ packages/api/src/mcp/tools/query/helpers.ts | 82 ++++++++++++++++++++ 2 files changed, 153 insertions(+) diff --git a/packages/api/src/mcp/__tests__/query.test.ts b/packages/api/src/mcp/__tests__/query.test.ts index 5db92f19ec..3feaacc68e 100644 --- a/packages/api/src/mcp/__tests__/query.test.ts +++ b/packages/api/src/mcp/__tests__/query.test.ts @@ -8,6 +8,7 @@ jest.mock('@/utils/trimToolResponse', () => ({ import { annotateIncreaseTopNHint, + assertSourceKindMatchesSelect, errorHint, INCREASE_TOP_N_CAP, mergeWhereIntoSelectItems, @@ -698,3 +699,73 @@ describe('annotateIncreaseTopNHint', () => { ).not.toThrow(); }); }); + +describe('assertSourceKindMatchesSelect', () => { + it('returns null when metric select runs against a metric source', () => { + const source = { kind: 'metric' }; + const select = [ + { + aggFn: 'avg', + metricType: 'gauge', + metricName: 'system.cpu.utilization', + }, + ]; + expect(assertSourceKindMatchesSelect(source, select)).toBeNull(); + }); + + it('returns null when non-metric select runs against a trace source', () => { + const source = { kind: 'trace' }; + const select = [{ aggFn: 'count' }]; + expect(assertSourceKindMatchesSelect(source, select)).toBeNull(); + }); + + it('errors when metric params target a non-metric source', () => { + const source = { kind: 'trace' }; + const select = [ + { + aggFn: 'avg', + metricType: 'gauge', + metricName: 'system.cpu.utilization', + }, + ]; + const result = assertSourceKindMatchesSelect(source, select); + expect(result?.isError).toBe(true); + expect(result?.content[0].text).toMatch(/not metric/); + expect(result?.content[0].text).toMatch(/clickstack_list_sources/); + }); + + it('errors when a metric source receives select items without metricType', () => { + const source = { kind: 'metric' }; + const select = [{ aggFn: 'avg', valueExpression: 'Value' }]; + const result = assertSourceKindMatchesSelect(source, select); + expect(result?.isError).toBe(true); + expect(result?.content[0].text).toMatch(/metricType \+ metricName/); + expect(result?.content[0].text).toMatch(/clickstack_describe_source/); + }); + + it('counts only items whose metricType is a non-empty string', () => { + const source = { kind: 'trace' }; + // Items with empty/missing metricType should not trip the check + expect( + assertSourceKindMatchesSelect(source, [ + { aggFn: 'count' }, + { aggFn: 'avg', valueExpression: 'Duration', metricType: '' }, + ]), + ).toBeNull(); + }); + + it('returns null for a raw string select', () => { + // Raw-string selects bypass the metric-annotation check; the + // renderer handles them directly. + expect( + assertSourceKindMatchesSelect({ kind: 'metric' }, 'count()'), + ).toBeNull(); + }); + + it('returns null when select is not an array', () => { + expect( + assertSourceKindMatchesSelect({ kind: 'trace' }, undefined), + ).toBeNull(); + expect(assertSourceKindMatchesSelect({ kind: 'trace' }, null)).toBeNull(); + }); +}); diff --git a/packages/api/src/mcp/tools/query/helpers.ts b/packages/api/src/mcp/tools/query/helpers.ts index fd93b82bae..68bb53f651 100644 --- a/packages/api/src/mcp/tools/query/helpers.ts +++ b/packages/api/src/mcp/tools/query/helpers.ts @@ -264,6 +264,80 @@ function formatQueryResult(result: unknown) { }; } +// ─── Source-kind / select-shape guardrail ──────────────────────────────────── + +type SelectItemForKindCheck = { metricType?: unknown }; + +/** + * Reject builder-config queries where the select items' metric annotations + * don't match the source kind: + * - Non-metric source with any select item carrying `metricType` would + * fall through to SQL generation that references the metric `Value` + * column and fail with a cryptic ClickHouse error. + * - Metric source with no select item carrying `metricType` would also + * reach the renderer in a broken state. + * + * Catching both up-front gives the agent a clear next action that mirrors + * the wording on `clickstack_list_metrics` / `clickstack_describe_metric`. + * + * Returns an error envelope on mismatch, or `null` when the select shape + * is consistent with the source kind (or the select is a raw string that + * the caller already parsed by hand). + */ +export function assertSourceKindMatchesSelect( + source: { kind: string }, + select: unknown, +): { isError: true; content: [{ type: 'text'; text: string }] } | null { + // Raw-string select (rare on the builder path) — the renderer handles + // it; no metric annotations to inspect. + if (typeof select === 'string') return null; + if (!Array.isArray(select)) return null; + + const metricItemCount = (select as SelectItemForKindCheck[]).filter( + item => + typeof item === 'object' && + item !== null && + typeof item.metricType === 'string' && + item.metricType.length > 0, + ).length; + + const isMetricSource = source.kind === SourceKind.Metric; + + if (isMetricSource && metricItemCount === 0) { + return { + isError: true as const, + content: [ + { + type: 'text' as const, + text: + 'Source kind is "metric", but no select item specifies metricType + metricName. ' + + 'Each select item on a metric source must set metricType ("gauge" | "sum" | "histogram") ' + + 'and metricName (e.g. metricName:"system.cpu.utilization"). Call ' + + 'clickstack_describe_source or clickstack_list_metrics to discover available metric names.', + }, + ], + }; + } + + if (!isMetricSource && metricItemCount > 0) { + return { + isError: true as const, + content: [ + { + type: 'text' as const, + text: + `Source kind is "${source.kind}", not metric — but ${metricItemCount} select item(s) ` + + 'set metricType. metricType + metricName only work on metric sources. ' + + 'Drop the metric fields to query this source, or call clickstack_list_sources to find a ' + + 'source whose kind is "metric".', + }, + ], + }; + } + + return null; +} + // ─── Tile execution ────────────────────────────────────────────────────────── export async function runConfigTile( @@ -315,6 +389,14 @@ export async function runConfigTile( }; } + // Reject metric-style select against a non-metric source (and vice + // versa) before the renderer composes SQL against the wrong table. + const kindMismatch = assertSourceKindMatchesSelect( + source, + builderConfig.select, + ); + if (kindMismatch) return kindMismatch; + const connection = await getConnectionById( teamId, source.connection.toString(), From e2461be355da46c722c0323f41ac5a19e5d8f1d4 Mon Sep 17 00:00:00 2001 From: Dan Hable Date: Thu, 11 Jun 2026 12:41:20 -0500 Subject: [PATCH 14/21] [HDX-4347] fix(mcp): stop _id from leaking into metricTables responses clickstack_list_sources and clickstack_describe_source emitted the embedded metricTables subdoc with a stray Mongoose-generated _id key mixed in with the legitimate kind keys: "metricTables": { "gauge": "otel_metrics_gauge", "sum": "otel_metrics_sum", "_id": "6a2ad3b4da94be764e2deed8" } The metricTables field was declared as an inline `type: {...}` literal which Mongoose interprets as an embedded subdoc schema with an auto-generated _id field. Convert it to a real child Schema with { _id: false } so newly persisted documents stop carrying the stray id. Add a sanitizeMetricTables() helper as belt-and-braces defense for documents persisted before the schema fix, and use it at the MCP response boundary in both list_sources and describe_source. The helper looks up each valid MetricsDataType key via property access so it handles both plain objects and Mongoose subdoc instances. Tests: unit tests for sanitizeMetricTables covering the _id strip, non-string-value drop, unknown-key drop, and Mongoose-subdoc shape. Extend the existing describe_source integration test to assert the response payload only contains valid kind keys. --- .../api/src/mcp/__tests__/metricKinds.test.ts | 86 +++++++++++++++++++ .../api/src/mcp/__tests__/sources.test.ts | 6 ++ .../src/mcp/tools/sources/describeSource.ts | 9 +- .../api/src/mcp/tools/sources/listSources.ts | 9 +- .../api/src/mcp/tools/sources/metricKinds.ts | 41 +++++++++ packages/api/src/models/source.ts | 23 +++-- 6 files changed, 165 insertions(+), 9 deletions(-) create mode 100644 packages/api/src/mcp/__tests__/metricKinds.test.ts diff --git a/packages/api/src/mcp/__tests__/metricKinds.test.ts b/packages/api/src/mcp/__tests__/metricKinds.test.ts new file mode 100644 index 0000000000..a1d4d6aca0 --- /dev/null +++ b/packages/api/src/mcp/__tests__/metricKinds.test.ts @@ -0,0 +1,86 @@ +import { sanitizeMetricTables } from '../tools/sources/metricKinds'; + +describe('sanitizeMetricTables', () => { + it('returns undefined for null / undefined input', () => { + expect(sanitizeMetricTables(undefined)).toBeUndefined(); + expect(sanitizeMetricTables(null)).toBeUndefined(); + }); + + it('preserves valid queryable kind entries', () => { + expect( + sanitizeMetricTables({ + gauge: 'otel_metrics_gauge', + sum: 'otel_metrics_sum', + histogram: 'otel_metrics_histogram', + }), + ).toEqual({ + gauge: 'otel_metrics_gauge', + sum: 'otel_metrics_sum', + histogram: 'otel_metrics_histogram', + }); + }); + + it('preserves non-queryable kinds the schema still declares', () => { + // summary and "exponential histogram" are valid MetricsDataType + // members even though the query renderer cannot translate them. + expect( + sanitizeMetricTables({ + gauge: 'otel_metrics_gauge', + summary: 'otel_metrics_summary', + 'exponential histogram': 'otel_metrics_exponential_histogram', + }), + ).toEqual({ + gauge: 'otel_metrics_gauge', + summary: 'otel_metrics_summary', + 'exponential histogram': 'otel_metrics_exponential_histogram', + }); + }); + + it('strips Mongoose _id leaking from the metricTables subdoc', () => { + // Existing documents persisted before the schema fix may still + // carry a stray ObjectId in the embedded metricTables subdoc. + // The sanitizer keeps only valid kind keys. + const result = sanitizeMetricTables({ + gauge: 'otel_metrics_gauge', + sum: 'otel_metrics_sum', + _id: '6a2ad3b4da94be764e2deed8', + }); + expect(result).toEqual({ + gauge: 'otel_metrics_gauge', + sum: 'otel_metrics_sum', + }); + expect(result).not.toHaveProperty('_id'); + }); + + it('drops unknown keys', () => { + expect( + sanitizeMetricTables({ + gauge: 'otel_metrics_gauge', + not_a_real_kind: 'something', + }), + ).toEqual({ + gauge: 'otel_metrics_gauge', + }); + }); + + it('drops non-string values for valid keys', () => { + expect( + sanitizeMetricTables({ + gauge: 'otel_metrics_gauge', + sum: 12345, + histogram: null, + } as Record), + ).toEqual({ + gauge: 'otel_metrics_gauge', + }); + }); + + it('returns undefined when no valid entries remain', () => { + expect( + sanitizeMetricTables({ + _id: 'whatever', + not_a_real_kind: 'x', + }), + ).toBeUndefined(); + }); +}); diff --git a/packages/api/src/mcp/__tests__/sources.test.ts b/packages/api/src/mcp/__tests__/sources.test.ts index 4aff66c4a2..7d67a7365d 100644 --- a/packages/api/src/mcp/__tests__/sources.test.ts +++ b/packages/api/src/mcp/__tests__/sources.test.ts @@ -288,6 +288,12 @@ describe('MCP Source Tools', () => { // first by pickRepresentativeMetricTable). expect(output.source.metricTables).toBeDefined(); expect(output.source.metricTables).toHaveProperty('gauge'); + // metricTables should only contain valid kind keys — not a stray + // Mongoose `_id` from the embedded subdoc. + expect(output.source.metricTables).not.toHaveProperty('_id'); + expect(Object.keys(output.source.metricTables).sort()).toEqual( + ['gauge', 'histogram', 'sum'].sort(), + ); expect(output.source.discoveryMetricKind).toBe('gauge'); expect(output.source.columns).toBeDefined(); expect(Array.isArray(output.source.columns)).toBe(true); diff --git a/packages/api/src/mcp/tools/sources/describeSource.ts b/packages/api/src/mcp/tools/sources/describeSource.ts index 806a4f8dcb..53e36d93ce 100644 --- a/packages/api/src/mcp/tools/sources/describeSource.ts +++ b/packages/api/src/mcp/tools/sources/describeSource.ts @@ -22,6 +22,7 @@ import type { McpContext } from '../types'; import { QUERYABLE_METRIC_KINDS, type QueryableMetricKind, + sanitizeMetricTables, } from './metricKinds'; // How far back to look when querying the rollup tables for value samples. @@ -288,7 +289,13 @@ async function describeSourceSchema( traceId: source.traceIdExpression, }; } else if (source.kind === SourceKind.Metric) { - meta.metricTables = source.metricTables; + // Filter out implementation-detail keys (e.g. a stray Mongoose `_id` + // on the metricTables subdoc) so the agent only sees valid metric + // kinds. + const tables = sanitizeMetricTables( + source.metricTables as Record | undefined, + ); + if (tables) meta.metricTables = tables; representativeMetric = pickRepresentativeMetricTable(source.metricTables); if (representativeMetric) { meta.discoveryMetricKind = representativeMetric.kind; diff --git a/packages/api/src/mcp/tools/sources/listSources.ts b/packages/api/src/mcp/tools/sources/listSources.ts index 0c1e614689..a69d62ae87 100644 --- a/packages/api/src/mcp/tools/sources/listSources.ts +++ b/packages/api/src/mcp/tools/sources/listSources.ts @@ -7,6 +7,7 @@ import { getSources } from '@/controllers/sources'; import { withToolTracing } from '../../utils/tracing'; import type { McpContext } from '../types'; +import { sanitizeMetricTables } from './metricKinds'; export function registerListSources( server: McpServer, @@ -72,7 +73,13 @@ export function registerListSources( traceId: s.traceIdExpression, }; } else if (s.kind === SourceKind.Metric) { - meta.metricTables = s.metricTables; + // Filter out implementation-detail keys (e.g. a stray Mongoose + // `_id` on the metricTables subdoc) so the agent only sees + // valid metric kinds. + const tables = sanitizeMetricTables( + s.metricTables as Record | undefined, + ); + if (tables) meta.metricTables = tables; } return meta; diff --git a/packages/api/src/mcp/tools/sources/metricKinds.ts b/packages/api/src/mcp/tools/sources/metricKinds.ts index 6898a1d5cf..bdaac920ba 100644 --- a/packages/api/src/mcp/tools/sources/metricKinds.ts +++ b/packages/api/src/mcp/tools/sources/metricKinds.ts @@ -32,3 +32,44 @@ const _assertKindsMatchEnum: readonly QueryableMetricKind[] = [ MetricsDataType.Histogram, ]; void _assertKindsMatchEnum; + +/** + * Allowed kind keys on the `metricTables` map when serialising a metric + * source into an MCP response. Includes the non-queryable kinds + * (`summary`, `exponential histogram`) because the model schema declares + * tables for them even though the query renderer cannot yet translate + * them. + */ +const ALLOWED_METRIC_TABLE_KINDS: readonly string[] = + Object.values(MetricsDataType); + +/** + * Filter source.metricTables to the known kind keys before emitting it + * in a clickstack_list_sources / clickstack_describe_source response. + * + * Belt-and-braces defense: even after the model schema declares + * `_id: false` on the metricTables subdoc, existing documents persisted + * before the schema fix may still carry a stray `_id` value that + * Mongoose serialises alongside the legitimate kind keys. Filtering at + * the response boundary keeps the agent-facing payload free of + * implementation-detail keys. + * + * Accepts both plain objects and Mongoose subdocuments — we look up + * each allowed kind via property access rather than enumerating keys, + * because Mongoose subdoc instances expose field values through getters + * (not as own enumerable properties). + */ +export function sanitizeMetricTables( + metricTables: Record | undefined | null, +): Record | undefined { + if (!metricTables) return undefined; + const source = metricTables as Record; + const out: Record = {}; + for (const kind of ALLOWED_METRIC_TABLE_KINDS) { + const value = source[kind]; + if (typeof value === 'string' && value.length > 0) { + out[kind] = value; + } + } + return Object.keys(out).length > 0 ? out : undefined; +} diff --git a/packages/api/src/models/source.ts b/packages/api/src/models/source.ts index b7e7d55d33..e570617985 100644 --- a/packages/api/src/models/source.ts +++ b/packages/api/src/models/source.ts @@ -237,18 +237,27 @@ export const SessionSource = Source.discriminator( // -------------------------- // Metric discriminator // -------------------------- +// metricTables is declared as a nested Schema with `_id: false` so the +// embedded subdoc does not auto-generate an ObjectId. Without this opt-out +// Mongoose adds an `_id` field that leaks into MCP responses alongside +// the queryable kind keys (gauge/sum/histogram/...). +const MetricTablesSchema = new Schema( + { + [MetricsDataType.Gauge]: String, + [MetricsDataType.Histogram]: String, + [MetricsDataType.Sum]: String, + [MetricsDataType.Summary]: String, + [MetricsDataType.ExponentialHistogram]: String, + }, + { _id: false }, +); + type IMetricSource = Extract; export const MetricSource = Source.discriminator( SourceKind.Metric, new Schema>({ metricTables: { - type: { - [MetricsDataType.Gauge]: String, - [MetricsDataType.Histogram]: String, - [MetricsDataType.Sum]: String, - [MetricsDataType.Summary]: String, - [MetricsDataType.ExponentialHistogram]: String, - }, + type: MetricTablesSchema, default: undefined, }, resourceAttributesExpression: String, From e8e64e8ece4356e3c3d84d15e1b8b7bd2c6cef5f Mon Sep 17 00:00:00 2001 From: Dan Hable Date: Thu, 11 Jun 2026 12:43:45 -0500 Subject: [PATCH 15/21] [HDX-4347] feat(mcp): require kind on clickstack_describe_metric MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit clickstack_describe_metric historically auto-detected the metric kind when omitted, probing every populated metric table for the given MetricName and returning one entry per kind that matched. This path was the consistent source of 10s wall-clock timeouts on multi-kind metrics (e.g. container.cpu.usage which lives in both gauge and sum tables), even with sampleValues:false. The auto-detect path was also redundant: the upstream discovery tools both surface kind alongside name: - clickstack_list_metrics returns { name, kind, ... } - clickstack_describe_source returns metricNames grouped by kind By the time an agent reaches clickstack_describe_metric it already has the kind. Requiring it on the input drops the detectKindsForMetric probe, eliminates the multi-kind discovery path, and removes the only place the tool ran across more than one ClickHouse table. Also bound the per-kind attribute-keys discovery so the single-kind path stays fast on production-scale metric tables. The previous inline SQL ran an unbounded `WHERE MetricName = ?` scan with no time filter, which still exceeded the wall-clock timeout on tables with millions of rows per metric. Wrap the scan in a CTE that samples at most 100k matching rows (MetricName + time window) and aggregate map keys from that sample, plus an 8s server-side max_execution_time as a belt-and-braces safety net. Changes: - Make `kind` required in describeMetricSchema (was optional). - Remove detectKindsForMetric — auto-detect path no longer exists. - Collapse the if/else branch to the explicit-kind path. - Always return exactly one kindDetail; emit a top-level hint when the (metric, kind) pair has no signal so the agent can widen the window or verify the kind via clickstack_list_metrics. - Bound fetchAttributeKeys with a CTE-LIMIT sample + max_execution_time so the discovery query stays within the wall clock on hot metrics. - Update the tool description and the timeout hint accordingly. Tests: drop the auto-detect and multi-kind tests; existing tests that already pinned `kind` are unchanged. Update the unknown-metric test to reflect the new always-one-kind shape. Add a test asserting the tool rejects calls with missing kind. --- .../api/src/mcp/__tests__/sources.test.ts | 27 +- .../src/mcp/tools/sources/describeMetric.ts | 374 ++++++++---------- 2 files changed, 191 insertions(+), 210 deletions(-) diff --git a/packages/api/src/mcp/__tests__/sources.test.ts b/packages/api/src/mcp/__tests__/sources.test.ts index 7d67a7365d..da58d5ee68 100644 --- a/packages/api/src/mcp/__tests__/sources.test.ts +++ b/packages/api/src/mcp/__tests__/sources.test.ts @@ -573,24 +573,41 @@ describe('MCP Source Tools', () => { const result = await callTool(client, 'clickstack_describe_metric', { sourceId: traceSource._id.toString(), metricName: 'system.cpu.utilization', + kind: 'gauge', }); expect(result.isError).toBe(true); expect(getFirstText(result)).toContain('not a metric source'); }); - it('returns an actionable empty payload when the metric is unknown', async () => { + it('rejects calls that omit kind', async () => { + // `kind` is required. Calling without it returns an MCP error + // surfaced from the schema parser rather than reaching ClickHouse. + const metricSource = await createMetricSource(); + const result = await callTool(client, 'clickstack_describe_metric', { + sourceId: metricSource._id.toString(), + metricName: 'system.cpu.utilization', + }); + expect(result.isError).toBe(true); + }); + + it('returns an actionable hint when the metric has no data in the kind table', async () => { const metricSource = await createMetricSource(); const result = await callTool(client, 'clickstack_describe_metric', { sourceId: metricSource._id.toString(), metricName: 'definitely.not.a.metric', + kind: 'gauge', }); expect(result.isError).toBeFalsy(); const output = JSON.parse(getFirstText(result)); - expect(output.kinds).toEqual([]); + // We always return a single kindDetail for the requested kind — + // attribute keys are empty when the metric has no data. + expect(output.kinds).toHaveLength(1); + expect(output.kinds[0].kind).toBe('gauge'); + expect(output.kinds[0].attributeKeys).toEqual({}); expect(output.hint).toMatch(/No data found/); }); - it('auto-detects the kind and returns attribute keys for a gauge metric', async () => { + it('returns attribute keys for a gauge metric when kind is specified', async () => { const metricSource = await createMetricSource(); const now = new Date(); await bulkInsertMetricsGauge([ @@ -606,6 +623,7 @@ describe('MCP Source Tools', () => { const result = await callTool(client, 'clickstack_describe_metric', { sourceId: metricSource._id.toString(), metricName: 'system.cpu.utilization', + kind: 'gauge', }); expect(result.isError).toBeFalsy(); const output = JSON.parse(getFirstText(result)); @@ -622,7 +640,7 @@ describe('MCP Source Tools', () => { ); // Attribute values should be sampled by default. expect(detail.attributeValues).toBeDefined(); - // nextSteps.query carries a worked example matching the detected kind. + // nextSteps.query carries a worked example matching the requested kind. expect(output.nextSteps.query).toContain('metricType: "gauge"'); }); @@ -642,6 +660,7 @@ describe('MCP Source Tools', () => { const result = await callTool(client, 'clickstack_describe_metric', { sourceId: metricSource._id.toString(), metricName: 'system.cpu.utilization', + kind: 'gauge', sampleValues: false, }); expect(result.isError).toBeFalsy(); diff --git a/packages/api/src/mcp/tools/sources/describeMetric.ts b/packages/api/src/mcp/tools/sources/describeMetric.ts index 26505d2455..262341fa25 100644 --- a/packages/api/src/mcp/tools/sources/describeMetric.ts +++ b/packages/api/src/mcp/tools/sources/describeMetric.ts @@ -27,6 +27,15 @@ import { const DEFAULT_LOOKBACK_MS = 24 * 60 * 60 * 1000; const DESCRIBE_TIMEOUT_MS = 10_000; +// Server-side safety nets for the attribute-keys discovery query. +// Sample at most N rows that match (MetricName, time range), then +// aggregate from that sample. ClickHouse can stop scanning once the +// CTE has N matching rows so a hot metric does not starve the +// wall-clock budget. 100k rows is plenty to surface every unique map +// key on a healthy OTel metric. +const METRIC_ATTR_KEYS_SAMPLE_SIZE = 100_000; +const METRIC_ATTR_KEYS_MAX_EXEC_SECONDS = 8; + // Max sampled values per attribute key (when sampleValues is true). const MAX_ATTR_VALUES = 10; // Cap on the number of attribute keys we sample values for per kind to @@ -61,10 +70,13 @@ const describeMetricSchema = z.object({ ), kind: z .enum(QUERYABLE_METRIC_KINDS) - .optional() .describe( - 'Optional metric kind filter. Omit to auto-detect across every populated metric table on the source ' + - '(unusual cases where the same metric name lives in multiple kinds will return one entry per kind).', + 'Metric kind: "gauge" | "sum" | "histogram". Required. ' + + 'Discover via clickstack_list_metrics (which returns name + kind per entry) ' + + 'or clickstack_describe_source (which groups metric-name samples by kind). ' + + 'A metric name can legitimately live in more than one kind (e.g. ' + + '"container.cpu.usage" appears in both gauge and sum) — pick the kind ' + + 'matching the value you want to query.', ), startTime: z .string() @@ -118,63 +130,6 @@ function parseTimeRange( return { startDate, endDate }; } -/** - * Probe each candidate kind's table for the given metric name. Returns - * only the kinds where at least one row matches. - */ -async function detectKindsForMetric({ - clickhouseClient, - databaseName, - metricTables, - connectionId, - metricName, - startDate, - endDate, - signal, -}: { - clickhouseClient: ClickhouseClient; - databaseName: string; - metricTables: Record; - connectionId: string; - metricName: string; - startDate: Date; - endDate: Date; - signal: AbortSignal; -}): Promise { - const probes = await Promise.all( - QUERYABLE_METRIC_KINDS.map(async kind => { - const tableName = metricTables[kind]; - if (!tableName) return null; - try { - const sql = chSql` - SELECT 1 - FROM ${tableExpr({ database: databaseName, table: tableName })} - WHERE MetricName = ${{ String: metricName }} - AND TimeUnix >= fromUnixTimestamp64Milli(${{ Int64: startDate.getTime() }}) - AND TimeUnix <= fromUnixTimestamp64Milli(${{ Int64: endDate.getTime() }}) - LIMIT 1 - `; - const response = await clickhouseClient.query<'JSON'>({ - query: sql.sql, - query_params: sql.params, - format: 'JSON', - connectionId, - abort_signal: signal, - }); - const result = (await response.json()) as { data: unknown[] }; - return result.data.length > 0 ? kind : null; - } catch (e) { - logger.warn( - { kind, error: e instanceof Error ? e.message : String(e) }, - 'detectKindsForMetric: probe failed', - ); - return null; - } - }), - ); - return probes.filter((k): k is QueryableMetricKind => k !== null); -} - /** * Fetch unit and description for a metric name on a single kind table. * Returns undefined values when the columns are absent from the schema. @@ -253,10 +208,16 @@ async function fetchUnitAndDescription({ * OTel Collector default schema). Issues one query per Map column with * `mapKeys(col) AS keys` and aggregates the distinct keys server-side. * - * Replaces an earlier `metadata.getAllFields({ metricName })` call which - * silently returned empty arrays in this CI environment for - * metric-scoped scans. A direct query is more transparent and matches - * the proven pattern used by the web app's `useFetchMetricResourceAttrs`. + * Bounds the scan two ways so it stays fast on production-shaped metric + * tables (where a single MetricName can match millions of rows): + * - WHERE TimeUnix BETWEEN startDate AND endDate scopes to the + * discovery window the caller already passed. + * - max_rows_to_read caps the per-query scan server-side so a hot + * metric cannot starve the wall-clock budget on its own. + * + * The earlier inline SQL did neither — an unbounded + * `WHERE MetricName = ?` scan consistently exceeded the wall-clock + * timeout on production tables. */ async function fetchAttributeKeys({ clickhouseClient, @@ -265,6 +226,8 @@ async function fetchAttributeKeys({ connectionId, metricName, columns, + startDate, + endDate, signal, }: { clickhouseClient: ClickhouseClient; @@ -273,6 +236,8 @@ async function fetchAttributeKeys({ connectionId: string; metricName: string; columns: { name: string; type: string }[]; + startDate: Date; + endDate: Date; signal: AbortSignal; }): Promise> { const mapColumns = @@ -281,15 +246,31 @@ async function fetchAttributeKeys({ ) ?? []; if (mapColumns.length === 0) return {}; - const projections = mapColumns.map( + const sampleProjections = mapColumns.map( + col => chSql`${{ Identifier: col.name }}`, + ); + + const aggProjections = mapColumns.map( col => chSql`arrayDistinct(arrayFlatten(groupArray(mapKeys(${{ Identifier: col.name }})))) AS ${{ Identifier: col.name }}`, ); + // Aggregate from a bounded sample of matching rows. The inner LIMIT + // lets ClickHouse stop scanning once it has SAMPLE_SIZE rows that + // match (MetricName, time range), which keeps the query fast on hot + // metric tables. ResourceAttributes / Attributes / ScopeAttributes + // are rarely keyed independently per row, so 100k rows surfaces + // every realistic key set. const sql = chSql` - SELECT ${concatChSql(', ', projections)} - FROM ${tableExpr({ database: databaseName, table: tableName })} - WHERE MetricName = ${{ String: metricName }} + SELECT ${concatChSql(', ', aggProjections)} + FROM ( + SELECT ${concatChSql(', ', sampleProjections)} + FROM ${tableExpr({ database: databaseName, table: tableName })} + WHERE MetricName = ${{ String: metricName }} + AND TimeUnix >= fromUnixTimestamp64Milli(${{ Int64: startDate.getTime() }}) + AND TimeUnix <= fromUnixTimestamp64Milli(${{ Int64: endDate.getTime() }}) + LIMIT ${{ Int32: METRIC_ATTR_KEYS_SAMPLE_SIZE }} + ) `; try { @@ -298,6 +279,10 @@ async function fetchAttributeKeys({ query_params: sql.params, format: 'JSON', connectionId, + clickhouse_settings: { + max_execution_time: METRIC_ATTR_KEYS_MAX_EXEC_SECONDS, + timeout_overflow_mode: 'break', + }, abort_signal: signal, }); const result = (await response.json()) as { @@ -474,157 +459,128 @@ async function describeMetricImpl( const databaseName = source.from.databaseName; const connectionId = source.connection.toString(); - // Resolve which kinds to describe. With an explicit `kind`, restrict to - // that one. Without, probe each populated kind so the agent gets one - // entry per match (rare but possible for shared metric names). - let candidateKinds: QueryableMetricKind[]; - if (input.kind) { - if (!source.metricTables[input.kind]) { - return { - isError: true, - content: [ - { - type: 'text' as const, - text: `Source "${input.sourceId}" has no "${input.kind}" metric table populated. Populated kinds: ${Object.keys(source.metricTables).join(', ')}.`, - }, - ], - }; - } - candidateKinds = [input.kind]; - } else { - candidateKinds = await detectKindsForMetric({ + // Validate the requested kind has a populated table on this source. + // The schema requires `kind`, so we always have an exact target. + const kind = input.kind; + const tableName = source.metricTables[kind]; + if (!tableName) { + return { + isError: true, + content: [ + { + type: 'text' as const, + text: `Source "${input.sourceId}" has no "${kind}" metric table populated. Populated kinds: ${Object.keys(source.metricTables).join(', ')}.`, + }, + ], + }; + } + + // Defensive column-presence check before referencing MetricUnit / + // MetricDescription on this kind's table. + let columns: { name: string; type: string }[]; + try { + columns = await metadata.getColumns({ + databaseName, + tableName, + connectionId, + }); + } catch (e) { + logger.warn( + { kind, error: e instanceof Error ? e.message : String(e) }, + 'describeMetric: getColumns failed', + ); + return { + isError: true, + content: [ + { + type: 'text' as const, + text: `Failed to load columns for "${tableName}". The metric table may be missing or unreachable.`, + }, + ], + }; + } + const columnNames = new Set(columns.map(c => c.name)); + const hasUnit = columnNames.has('MetricUnit'); + const hasDescription = columnNames.has('MetricDescription'); + + const [meta, attributeKeys] = await Promise.all([ + fetchUnitAndDescription({ clickhouseClient, databaseName, - metricTables: source.metricTables, + tableName, connectionId, metricName: input.metricName, startDate, endDate, + hasUnit, + hasDescription, signal, - }); - if (candidateKinds.length === 0) { - return { - content: [ - { - type: 'text' as const, - text: JSON.stringify( - { - metricName: input.metricName, - kinds: [], - hint: - `No data found for MetricName "${input.metricName}" in any populated metric table ` + - `between ${startDate.toISOString()} and ${endDate.toISOString()}. ` + - 'Try widening startTime/endTime, or call clickstack_list_metrics to confirm the metric name exists.', - }, - null, - 2, - ), - }, - ], - }; - } - } - - const kindDetails: KindDetail[] = []; - const skippedStages: string[] = []; + }), + fetchAttributeKeys({ + clickhouseClient, + databaseName, + tableName, + connectionId, + metricName: input.metricName, + columns, + startDate, + endDate, + signal, + }), + ]); + + const kindDetail: KindDetail = { + kind, + tableName, + ...(meta.unit ? { unit: meta.unit } : {}), + ...(meta.description ? { description: meta.description } : {}), + attributeKeys, + usage: KIND_USAGE[kind], + }; - for (const kind of candidateKinds) { - if (signal.aborted) { - skippedStages.push(`kind:${kind}`); - continue; - } - const tableName = source.metricTables[kind]; - if (!tableName) continue; - - // Defensive column-presence check before referencing MetricUnit / - // MetricDescription. - let columns: { name: string; type: string }[]; - try { - columns = await metadata.getColumns({ - databaseName, - tableName, - connectionId, - }); - } catch (e) { - logger.warn( - { kind, error: e instanceof Error ? e.message : String(e) }, - 'describeMetric: getColumns failed', - ); - continue; - } - const columnNames = new Set(columns.map(c => c.name)); - const hasUnit = columnNames.has('MetricUnit'); - const hasDescription = columnNames.has('MetricDescription'); - - const [meta, attributeKeys] = await Promise.all([ - fetchUnitAndDescription({ - clickhouseClient, - databaseName, - tableName, - connectionId, - metricName: input.metricName, - startDate, - endDate, - hasUnit, - hasDescription, - signal, - }), - fetchAttributeKeys({ - clickhouseClient, - databaseName, - tableName, - connectionId, - metricName: input.metricName, - columns, - signal, - }), - ]); - - const detail: KindDetail = { - kind, + if (input.sampleValues && Object.keys(attributeKeys).length > 0) { + kindDetail.attributeValues = await sampleAttributeValues({ + clickhouseClient, + databaseName, tableName, - ...(meta.unit ? { unit: meta.unit } : {}), - ...(meta.description ? { description: meta.description } : {}), + connectionId, + metricName: input.metricName, attributeKeys, - usage: KIND_USAGE[kind], - }; + startDate, + endDate, + signal, + }); + } - if (input.sampleValues && Object.keys(attributeKeys).length > 0) { - detail.attributeValues = await sampleAttributeValues({ - clickhouseClient, - databaseName, - tableName, - connectionId, - metricName: input.metricName, - attributeKeys, - startDate, - endDate, - signal, - }); - } + const kindDetails: KindDetail[] = [kindDetail]; - kindDetails.push(detail); - } + // Heuristic "no data in this kind" hint: when neither attribute keys, + // unit, nor description came back, the (metric, kind) pair likely has + // no data in the requested window. The agent's recourse is to widen + // startTime/endTime or double-check the kind via clickstack_list_metrics. + const noSignal = + Object.keys(attributeKeys).length === 0 && !meta.unit && !meta.description; - const exampleKind = kindDetails[0]?.kind; - const queryExample = exampleKind - ? `clickstack_timeseries({ sourceId: "${input.sourceId}", select: [{ aggFn: ${ - exampleKind === 'sum' - ? '"increase"' - : exampleKind === 'histogram' - ? '"quantile", level: 0.95' - : '"avg"' - }, metricType: "${exampleKind}", metricName: "${input.metricName}" }] })` - : undefined; + const queryExample = `clickstack_timeseries({ sourceId: "${input.sourceId}", select: [{ aggFn: ${ + kind === 'sum' + ? '"increase"' + : kind === 'histogram' + ? '"quantile", level: 0.95' + : '"avg"' + }, metricType: "${kind}", metricName: "${input.metricName}" }] })`; const responseObj: Record = { metricName: input.metricName, kinds: kindDetails, - ...(skippedStages.length > 0 && { partial: true, skippedStages }), + ...(noSignal && { + hint: + `No data found for MetricName "${input.metricName}" with kind "${kind}" ` + + `between ${startDate.toISOString()} and ${endDate.toISOString()}. ` + + 'Try widening startTime/endTime, or call clickstack_list_metrics to ' + + 'confirm the metric name + kind combination exists.', + }), nextSteps: { - query: queryExample - ? `Example: ${queryExample}` - : `Use clickstack_timeseries / clickstack_table with sourceId "${input.sourceId}" plus the metricType + metricName + attribute keys above.`, + query: `Example: ${queryExample}`, }, }; @@ -662,9 +618,13 @@ export function registerDescribeMetric( title: 'Describe Metric', description: 'DRILL-DOWN: Use after clickstack_list_metrics (or after a clickstack_describe_source ' + - 'sample) to get attribute keys, sampled values, kind, unit, and description for a ' + - 'specific metric. Attribute keys vary per metric name — not per source — so always call ' + - "this before clickstack_timeseries / clickstack_table for any metric you've never queried.\n\n" + + 'sample) to get attribute keys, sampled values, unit, and description for a ' + + 'specific (metricName, kind) pair. Attribute keys vary per metric — not per source — ' + + "so always call this before clickstack_timeseries / clickstack_table for any metric you've never queried.\n\n" + + 'REQUIRES `kind` — pass the gauge/sum/histogram value emitted alongside the metric name by ' + + 'clickstack_list_metrics or clickstack_describe_source. A metric name can legitimately ' + + 'live in more than one kind (e.g. "container.cpu.usage" appears in both gauge and sum); ' + + 'call this tool once per kind you care about.\n\n' + 'Workflow: clickstack_list_sources → clickstack_list_metrics → ' + 'clickstack_describe_metric → clickstack_timeseries|clickstack_table.', inputSchema: describeMetricSchema, @@ -703,8 +663,10 @@ export function registerDescribeMetric( { type: 'text' as const, text: - 'Discovery timed out. The metric table may be under load or have very high cardinality. ' + - 'Try again, narrow startTime/endTime, or set sampleValues:false.', + 'Discovery timed out. The metric table may be under load or the ' + + 'attribute set may be very high-cardinality. Try narrowing ' + + 'startTime/endTime or setting sampleValues:false to skip the ' + + 'value-sampling stage.', }, ], }; From e27d68355edf0155553f415752b648a040cd8a57 Mon Sep 17 00:00:00 2001 From: Dan Hable Date: Thu, 11 Jun 2026 15:05:29 -0500 Subject: [PATCH 16/21] [HDX-4347] perf(mcp): thread timestampValueExpression through describe_source MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit clickstack_describe_source consistently timed out on the cold-cache first call against a populated metric source, then succeeded on retry. The bottleneck was the value-sampling stages (3 + 4) and the metric- name sampling stage (5), which all delegate to metadata.getAllKeyValues / metadata.getMapKeys. When the source has no metadataMaterializedViews configured (the case for every metric source today), those helpers fall back to scanning the raw metric table. The fallback path only applies a time filter when timestampValueExpression is provided alongside dateRange — and describe_source was passing dateRange but not timestampValueExpression. The result was an unbounded scan per key against the full metric table on cold cache, easily exceeding the 10s wall-clock timeout. Pass source.timestampValueExpression through to: - metadata.getMapKeys (stage 2) - metadata.getAllKeyValues × 2 (stages 3 + 4) - sampleMetricNamesForKind (stage 5) and its inner metadata.getAllKeyValues call For log / trace sources this is also a small win because the no-MVs fallback now scopes its scan, but the practical impact is on metric sources where the rollup MVs are not yet configured. No new tests: existing sources.test.ts coverage already exercises every stage on a metric source. The change is observably load-bearing on the live MCP server — the first call now returns the full payload in under the 10s budget instead of timing out. --- .../api/src/mcp/tools/sources/describeSource.ts | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/packages/api/src/mcp/tools/sources/describeSource.ts b/packages/api/src/mcp/tools/sources/describeSource.ts index 53e36d93ce..757cccf6bd 100644 --- a/packages/api/src/mcp/tools/sources/describeSource.ts +++ b/packages/api/src/mcp/tools/sources/describeSource.ts @@ -78,6 +78,7 @@ async function sampleMetricNamesForKind({ tableName, connectionId, dateRange, + timestampValueExpression, signal, cachedColumns, }: { @@ -87,6 +88,7 @@ async function sampleMetricNamesForKind({ tableName: string; connectionId: string; dateRange: [Date, Date]; + timestampValueExpression: string; signal: AbortSignal; cachedColumns?: { name: string }[]; }): Promise { @@ -100,6 +102,9 @@ async function sampleMetricNamesForKind({ // First fetch the distinct metric names; this is the only step that // strictly needs to succeed for the kind to appear in the response. + // Pass timestampValueExpression so the no-rollup fallback path scopes + // its scan to dateRange instead of going unbounded against the raw + // metric table on cold cache. const nameResults = await metadata.getAllKeyValues({ databaseName, tableName, @@ -107,6 +112,7 @@ async function sampleMetricNamesForKind({ maxValuesPerKey: MAX_METRIC_NAMES_PER_KIND, connectionId, dateRange, + timestampValueExpression, signal, }); const names = nameResults[0]?.value ?? []; @@ -388,6 +394,12 @@ async function describeSourceSchema( })); // ── 2. Map attribute keys ───────────────────────────────────────────── + // timestampValueExpression is threaded into getMapKeys / getAllKeyValues / + // sampleMetricNamesForKind below so the no-rollup fallback path (i.e. + // metric sources, which don't have metadataMaterializedViews configured) + // can scope its scan to dateRange instead of going unbounded against + // the raw metric table on cold cache. + const timestampValueExpression = source.timestampValueExpression; const mapColumns = filterColumnMetaByType(columns, [JSDataType.Map]); const mapKeysResults: Record = {}; @@ -403,6 +415,7 @@ async function describeSourceSchema( connectionId, metadataMVs, dateRange, + timestampValueExpression, signal, }); mapKeysResults[col.name] = keys; @@ -444,6 +457,7 @@ async function describeSourceSchema( connectionId, metadataMVs, dateRange, + timestampValueExpression, signal, }); for (const { key, value } of results) { @@ -483,6 +497,7 @@ async function describeSourceSchema( connectionId, metadataMVs, dateRange, + timestampValueExpression, signal, }); for (const { key, value } of results) { @@ -526,6 +541,7 @@ async function describeSourceSchema( tableName: kindTableName, connectionId, dateRange, + timestampValueExpression, signal, // Reuse representative columns when the kind matches the // representative table to avoid a second getColumns round-trip. From 7f395b329064cfd454f4512602c0a21a2423529f Mon Sep 17 00:00:00 2001 From: Dan Hable Date: Thu, 11 Jun 2026 16:11:00 -0500 Subject: [PATCH 17/21] [HDX-4347] perf(mcp): bound sampleAttributeValues scan to match fetchAttributeKeys MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Greptile pointed out that sampleAttributeValues hit the same (MetricName, time range) filter on the same metric table as fetchAttributeKeys but lacked any of the scan bounds the earlier commit added there: - no inner LIMIT subquery - no max_execution_time / timeout_overflow_mode On a hot metric with millions of matching rows in the default 24-hour window the query could run until the client-side AbortController fires the wall-clock timeout, after which the ClickHouse server may still be processing the work. Apply the same pattern used in fetchAttributeKeys: - Wrap the FROM/WHERE clause in a subquery that LIMITs at METRIC_ATTR_KEYS_SAMPLE_SIZE matching rows. The inner SELECT projects only the distinct set of map columns referenced by the sampled keys. - Add clickhouse_settings: { max_execution_time: METRIC_ATTR_KEYS_MAX_EXEC_SECONDS, timeout_overflow_mode: 'break' } as a server-side ceiling. Live retest against the metric source on staging confirms the attributeValues payload still comes back rich (same k8s.* diversity on container.cpu.usage) — the 100k-row sample is plenty for value discovery. Refs: https://github.com/hyperdxio/hyperdx/pull/2437#discussion_r3398993569 --- .../src/mcp/tools/sources/describeMetric.ts | 31 ++++++++++++++++--- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/packages/api/src/mcp/tools/sources/describeMetric.ts b/packages/api/src/mcp/tools/sources/describeMetric.ts index 262341fa25..a994b578c3 100644 --- a/packages/api/src/mcp/tools/sources/describeMetric.ts +++ b/packages/api/src/mcp/tools/sources/describeMetric.ts @@ -357,12 +357,31 @@ async function sampleAttributeValues({ `, ); + // Distinct set of map columns we need from each sampled row, so the + // inner subquery only projects what's required for the outer + // groupUniqArray aggregates. + const sampledMapColumns = Array.from( + new Set(flatKeyExprs.map(e => e.mapColumn)), + ); + const sampleProjections = sampledMapColumns.map( + col => chSql`${{ Identifier: col }}`, + ); + + // Aggregate from a bounded sample of matching rows. Mirrors + // fetchAttributeKeys: the inner LIMIT lets ClickHouse stop scanning + // once it has SAMPLE_SIZE rows that match (MetricName, time range), + // and the matching clickhouse_settings cap server-side execution so a + // hot metric cannot starve the wall-clock budget. const sql = chSql` SELECT ${concatChSql(', ', projections)} - FROM ${tableExpr({ database: databaseName, table: tableName })} - WHERE MetricName = ${{ String: metricName }} - AND TimeUnix >= fromUnixTimestamp64Milli(${{ Int64: startDate.getTime() }}) - AND TimeUnix <= fromUnixTimestamp64Milli(${{ Int64: endDate.getTime() }}) + FROM ( + SELECT ${concatChSql(', ', sampleProjections)} + FROM ${tableExpr({ database: databaseName, table: tableName })} + WHERE MetricName = ${{ String: metricName }} + AND TimeUnix >= fromUnixTimestamp64Milli(${{ Int64: startDate.getTime() }}) + AND TimeUnix <= fromUnixTimestamp64Milli(${{ Int64: endDate.getTime() }}) + LIMIT ${{ Int32: METRIC_ATTR_KEYS_SAMPLE_SIZE }} + ) `; try { @@ -371,6 +390,10 @@ async function sampleAttributeValues({ query_params: sql.params, format: 'JSON', connectionId, + clickhouse_settings: { + max_execution_time: METRIC_ATTR_KEYS_MAX_EXEC_SECONDS, + timeout_overflow_mode: 'break', + }, abort_signal: signal, }); const result = (await response.json()) as { From 0c2e0d9307bace9b0cef71c007fe432edcca627c Mon Sep 17 00:00:00 2001 From: Dan Hable Date: Fri, 12 Jun 2026 15:15:09 -0500 Subject: [PATCH 18/21] [HDX-4347] docs(controllers): generalize getSource comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit getSource is called from REST routers, external-api v2, and MCP tools alike — the inline comments referenced only the MCP surface. Reword to caller-neutral language per review feedback. Refs: https://github.com/hyperdxio/hyperdx/pull/2437#discussion_r3405313539 --- packages/api/src/controllers/sources.ts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/api/src/controllers/sources.ts b/packages/api/src/controllers/sources.ts index 940d88cfe6..daa24cf810 100644 --- a/packages/api/src/controllers/sources.ts +++ b/packages/api/src/controllers/sources.ts @@ -37,9 +37,9 @@ export function getSources(team: string) { } export async function getSource(team: string, sourceId: string) { - // Pre-check the sourceId shape so a non-ObjectId input from the MCP - // surface returns null (and the caller's "not found" branch) instead - // of bubbling a Mongoose CastError through to the agent. + // Pre-check the sourceId shape so a non-ObjectId input returns null + // (the caller's "not found" branch) instead of bubbling a Mongoose + // CastError. if (!mongoose.Types.ObjectId.isValid(sourceId)) { return null; } @@ -47,8 +47,8 @@ export async function getSource(team: string, sourceId: string) { return await Source.findOne({ _id: sourceId, team }); } catch { // Defense-in-depth: if Mongoose still throws (e.g. a future cast - // path), treat it as "not found" so the MCP tools surface a clean - // error message rather than a raw stack. + // path), treat it as "not found" so the caller can surface a clean + // error. return null; } } From 4b4b40c897784519b50fb4a004688ad8e539fa3d Mon Sep 17 00:00:00 2001 From: Dan Hable Date: Fri, 12 Jun 2026 15:19:00 -0500 Subject: [PATCH 19/21] [HDX-4347] fix(mcp): gate increase top-N hint on group count, migrate to hints[] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two problems with the query-tool hint plumbing: 1. annotateIncreaseTopNHint emitted the 'results may be truncated' disclaimer whenever increase + groupBy returned any data — even 3 groups — making it a false alarm rather than a detection. 2. Both annotateIncreaseTopNHint and the single-bucket hint in timeseries.ts assigned to parsed.hint directly, so whichever ran second silently overwrote the other. Fixes: - Add countDistinctGroups(): resolves comma-separated groupBy segments against the result row keys (bracket-aware split) and counts distinct combinations. The truncation hint now only fires when the distinct group count reaches INCREASE_TOP_N_CAP. When no segment resolves to a row key (e.g. bracket-syntax map attrs render as arrayElement(...) aliases), skip the hint instead of crying wolf. - Replace the single hint: string with hints: string[] across the query-tool response shape (annotateIncreaseTopNHint, the single-bucket hint, and formatQueryResult's empty-result hint) via a shared appendHint() helper, so multiple advisories coexist. Tests: rewrite the annotateIncreaseTopNHint block — at/below cap, distinct-groups-vs-raw-rows, multi-column groupBy combinations, unresolvable groupBy, and append-not-overwrite coexistence. Refs: https://github.com/hyperdxio/hyperdx/pull/2437#discussion_r3405783414 --- packages/api/src/mcp/__tests__/query.test.ts | 91 +++++++++++++++++-- packages/api/src/mcp/tools/query/helpers.ts | 60 ++++++++++-- .../api/src/mcp/tools/query/timeseries.ts | 13 ++- 3 files changed, 141 insertions(+), 23 deletions(-) diff --git a/packages/api/src/mcp/__tests__/query.test.ts b/packages/api/src/mcp/__tests__/query.test.ts index 3feaacc68e..6033e6c712 100644 --- a/packages/api/src/mcp/__tests__/query.test.ts +++ b/packages/api/src/mcp/__tests__/query.test.ts @@ -629,49 +629,120 @@ function buildResult(data: unknown[]) { }; } +/** Rows with `count` distinct ServiceName group values. */ +function buildGroupedRows(count: number) { + return Array.from({ length: count }, (_, i) => ({ + ServiceName: `svc-${i}`, + Series: i, + })); +} + describe('annotateIncreaseTopNHint', () => { it('exposes the renderer cap as a constant', () => { expect(INCREASE_TOP_N_CAP).toBe(20); }); - it('appends a hint when increase + groupBy is used and result is non-empty', () => { + it('appends a hint when the distinct group count reaches the cap', () => { + const result = buildResult(buildGroupedRows(INCREASE_TOP_N_CAP)); + annotateIncreaseTopNHint(result, [{ aggFn: 'increase' }], 'ServiceName'); + const parsed = JSON.parse(result.content[0].text); + expect(parsed.hints).toHaveLength(1); + expect(parsed.hints[0]).toContain('top 20'); + expect(parsed.hints[0]).toContain('aggFn:"increase"'); + }); + + it('does NOT annotate when the distinct group count is below the cap', () => { + const result = buildResult(buildGroupedRows(INCREASE_TOP_N_CAP - 1)); + annotateIncreaseTopNHint(result, [{ aggFn: 'increase' }], 'ServiceName'); + const parsed = JSON.parse(result.content[0].text); + expect(parsed.hints).toBeUndefined(); + }); + + it('counts distinct groups across repeated time buckets, not raw rows', () => { + // 3 groups × 10 buckets = 30 rows but only 3 distinct groups. + const rows = Array.from({ length: 30 }, (_, i) => ({ + ServiceName: `svc-${i % 3}`, + bucket: i, + })); + const result = buildResult(rows); + annotateIncreaseTopNHint(result, [{ aggFn: 'increase' }], 'ServiceName'); + const parsed = JSON.parse(result.content[0].text); + expect(parsed.hints).toBeUndefined(); + }); + + it('does NOT annotate when the groupBy column cannot be resolved from rows', () => { const result = buildResult([{ x: 1 }, { x: 2 }]); annotateIncreaseTopNHint(result, [{ aggFn: 'increase' }], 'ServiceName'); const parsed = JSON.parse(result.content[0].text); - expect(parsed.hint).toContain('top 20'); - expect(parsed.hint).toContain('aggFn:"increase"'); + expect(parsed.hints).toBeUndefined(); + }); + + it('supports multi-column groupBy by counting distinct combinations', () => { + // 4 ServiceName values × 5 SpanName values = 20 distinct combos. + const rows = Array.from({ length: 20 }, (_, i) => ({ + ServiceName: `svc-${i % 4}`, + SpanName: `op-${Math.floor(i / 4)}`, + })); + const result = buildResult(rows); + annotateIncreaseTopNHint( + result, + [{ aggFn: 'increase' }], + 'ServiceName, SpanName', + ); + const parsed = JSON.parse(result.content[0].text); + expect(parsed.hints).toHaveLength(1); + }); + + it('appends alongside an existing hint instead of overwriting', () => { + const result = { + content: [ + { + type: 'text', + text: JSON.stringify({ + result: { data: buildGroupedRows(INCREASE_TOP_N_CAP) }, + hints: ['previous hint from another writer'], + }), + }, + ], + isError: false, + }; + annotateIncreaseTopNHint(result, [{ aggFn: 'increase' }], 'ServiceName'); + const parsed = JSON.parse(result.content[0].text); + expect(parsed.hints).toHaveLength(2); + expect(parsed.hints[0]).toBe('previous hint from another writer'); + expect(parsed.hints[1]).toContain('top 20'); }); it('does NOT annotate when increase is used WITHOUT a groupBy', () => { - const result = buildResult([{ x: 1 }]); + const result = buildResult(buildGroupedRows(INCREASE_TOP_N_CAP)); annotateIncreaseTopNHint(result, [{ aggFn: 'increase' }], undefined); const parsed = JSON.parse(result.content[0].text); - expect(parsed.hint).toBeUndefined(); + expect(parsed.hints).toBeUndefined(); }); it('does NOT annotate when groupBy is an empty string', () => { - const result = buildResult([{ x: 1 }]); + const result = buildResult(buildGroupedRows(INCREASE_TOP_N_CAP)); annotateIncreaseTopNHint(result, [{ aggFn: 'increase' }], ' '); const parsed = JSON.parse(result.content[0].text); - expect(parsed.hint).toBeUndefined(); + expect(parsed.hints).toBeUndefined(); }); it('does NOT annotate when no select item uses increase', () => { - const result = buildResult([{ x: 1 }]); + const result = buildResult(buildGroupedRows(INCREASE_TOP_N_CAP)); annotateIncreaseTopNHint( result, [{ aggFn: 'sum' }, { aggFn: 'avg' }], 'ServiceName', ); const parsed = JSON.parse(result.content[0].text); - expect(parsed.hint).toBeUndefined(); + expect(parsed.hints).toBeUndefined(); }); it('does NOT annotate empty results (already labelled by formatQueryResult)', () => { const result = buildResult([]); annotateIncreaseTopNHint(result, [{ aggFn: 'increase' }], 'ServiceName'); const parsed = JSON.parse(result.content[0].text); - expect(parsed.hint).toBeUndefined(); + expect(parsed.hints).toBeUndefined(); }); it('does NOT annotate error results', () => { diff --git a/packages/api/src/mcp/tools/query/helpers.ts b/packages/api/src/mcp/tools/query/helpers.ts index 68bb53f651..2c466dc0da 100644 --- a/packages/api/src/mcp/tools/query/helpers.ts +++ b/packages/api/src/mcp/tools/query/helpers.ts @@ -89,12 +89,47 @@ const MCP_REQUEST_TIMEOUT = 32_000; // 30s query limit + 2s buffer */ export const INCREASE_TOP_N_CAP = 20; +/** + * Append a hint message onto a parsed tool response body. All hint + * writers share `hints: string[]` so multiple advisories can coexist + * (e.g. the increase top-N cap and the single-bucket collapse hint can + * legitimately both apply to the same result). + */ +export function appendHint(parsed: Record, hint: string) { + const existing = Array.isArray(parsed.hints) ? parsed.hints : []; + parsed.hints = [...existing, hint]; +} + +/** + * Count the distinct groupBy combinations present in a result set. + * Resolves each comma-separated groupBy segment against the row keys + * (the renderer aliases group columns by their literal expression). + * Returns null when no segment matches any row key — the caller should + * treat that as "count unknown". + */ +function countDistinctGroups( + data: Array>, + groupBy: string, +): number | null { + const segments = splitAndTrimWithBracket(groupBy); + const sample = data[0]; + if (!sample || typeof sample !== 'object') return null; + const resolvable = segments.filter(seg => seg in sample); + if (resolvable.length === 0) return null; + const seen = new Set(); + for (const row of data) { + seen.add(JSON.stringify(resolvable.map(seg => row[seg]))); + } + return seen.size; +} + /** * Mutates a successful tool result envelope in place to add a neutral - * informational hint when the renderer's increase+groupBy top-N cap may - * have applied. Safe to call unconditionally — does nothing for empty - * results, error results, or queries that don't combine `aggFn:"increase"` - * with a non-empty `groupBy`. + * informational hint when the renderer's increase+groupBy top-N cap + * likely applied. Safe to call unconditionally — does nothing for empty + * results, error results, queries that don't combine `aggFn:"increase"` + * with a non-empty `groupBy`, or results whose distinct group count is + * below the cap (no truncation possible). */ export function annotateIncreaseTopNHint( result: { content?: { type: string; text?: string }[]; isError?: boolean }, @@ -110,10 +145,17 @@ export function annotateIncreaseTopNHint( const parsed = JSON.parse(first.text); const data = parsed?.result?.data; if (!Array.isArray(data) || data.length === 0) return; - parsed.hint = + // Only warn when the result actually carries enough distinct groups + // to have hit the renderer cap. When the group column cannot be + // resolved from the rows, skip the hint rather than crying wolf. + const distinctGroups = countDistinctGroups(data, groupBy); + if (distinctGroups === null || distinctGroups < INCREASE_TOP_N_CAP) return; + appendHint( + parsed, `aggFn:"increase" combined with groupBy is capped at the top ${INCREASE_TOP_N_CAP} ` + - `groups by max bucket sum at the renderer layer. ` + - `Results may not include every group present in the underlying data.`; + `groups by max bucket sum at the renderer layer. ` + + `Results may not include every group present in the underlying data.`, + ); first.text = JSON.stringify(parsed, null, 2); } catch { // leave result unmodified on parse failure @@ -252,7 +294,9 @@ function formatQueryResult(result: unknown) { : {}), ...(empty ? { - hint: 'No data found in the queried time range. Try setting startTime to a wider window (e.g. 24 hours ago) or check that filters match existing data.', + hints: [ + 'No data found in the queried time range. Try setting startTime to a wider window (e.g. 24 hours ago) or check that filters match existing data.', + ], } : {}), }, diff --git a/packages/api/src/mcp/tools/query/timeseries.ts b/packages/api/src/mcp/tools/query/timeseries.ts index fdc2f4b179..0f80041da5 100644 --- a/packages/api/src/mcp/tools/query/timeseries.ts +++ b/packages/api/src/mcp/tools/query/timeseries.ts @@ -5,6 +5,7 @@ import { withToolTracing } from '../../utils/tracing'; import type { McpContext } from '../types'; import { annotateIncreaseTopNHint, + appendHint, buildTile, mergeWhereIntoSelectItems, parseTimeRange, @@ -175,12 +176,14 @@ export function registerTimeseries(server: McpServer, context: McpContext) { const parsed = JSON.parse(result.content[0].text); const data = parsed?.result?.data; if (Array.isArray(data) && data.length === 1 && input.granularity) { - parsed.hint = + appendHint( + parsed, `Timeseries returned only 1 time bucket. ` + - `All data may have collapsed into a single "${input.granularity}" bucket. ` + - `The queried range was ${startDate.toISOString()} to ${endDate.toISOString()}. ` + - `If this looks wrong, adjust startTime/endTime to match the actual data range, ` + - `or try a coarser granularity.`; + `All data may have collapsed into a single "${input.granularity}" bucket. ` + + `The queried range was ${startDate.toISOString()} to ${endDate.toISOString()}. ` + + `If this looks wrong, adjust startTime/endTime to match the actual data range, ` + + `or try a coarser granularity.`, + ); result.content[0].text = JSON.stringify(parsed, null, 2); } } catch { From b41d6230219a390f91c6e9d948c2668601bb5f47 Mon Sep 17 00:00:00 2001 From: Dan Hable Date: Fri, 12 Jun 2026 15:23:12 -0500 Subject: [PATCH 20/21] [HDX-4347] feat(mcp): surface partialFailure on describe_metric and list_metrics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Discovery sub-query failures were silently swallowed into empty returns (fetchAttributeKeys / sampleAttributeValues catch → {}; listMetrics per-kind catch → continue). Transient ClickHouse errors then surfaced as the 'No data found — try widening startTime/endTime' hint, so agents could not distinguish 'genuinely empty' from 'fetch failed' and followed incorrect recovery steps. - fetchAttributeKeys / sampleAttributeValues now return a typed { ok, data } | { ok: false, error } result. Errors are sanitised (single line, capped at 200 chars). - describe_metric collects failures into partialFailure[{stage,error}] on the response and suppresses the no-data hint when any stage failed — empty attributeKeys can't be trusted as 'no data' then. A retry-oriented hint is emitted instead. - list_metrics collects per-kind fetch failures into partialFailure[{kind,error}] and suppresses the 'No metrics matched' hint when failures explain the empty result. Tests: new cases pointing a kind's metricTables entry at the logs table (exists, but no MetricName/TimeUnix columns) to force the discovery query to throw; assert partialFailure is present and the misleading widen-the-window hints are absent. Happy-path test asserts partialFailure stays undefined. Refs: https://github.com/hyperdxio/hyperdx/pull/2437#discussion_r3405795712 --- .../api/src/mcp/__tests__/sources.test.ts | 61 +++++++++++++++ .../src/mcp/tools/sources/describeMetric.ts | 74 +++++++++++++++---- .../api/src/mcp/tools/sources/listMetrics.ts | 28 +++++-- 3 files changed, 142 insertions(+), 21 deletions(-) diff --git a/packages/api/src/mcp/__tests__/sources.test.ts b/packages/api/src/mcp/__tests__/sources.test.ts index da58d5ee68..d2804bc21c 100644 --- a/packages/api/src/mcp/__tests__/sources.test.ts +++ b/packages/api/src/mcp/__tests__/sources.test.ts @@ -549,6 +549,35 @@ describe('MCP Source Tools', () => { expect(output.metrics).toEqual([]); expect(output.hint).toMatch(/widening|removing|omitting/); }); + + it('surfaces partialFailure instead of the empty hint when a kind fetch fails', async () => { + // Point the gauge kind at the logs table: it exists (so source + // resolution succeeds) but has no MetricName/TimeUnix columns, so + // the per-kind listing query throws. + const brokenSource = await Source.create({ + kind: SourceKind.Metric, + team: team._id, + from: { databaseName: DEFAULT_DATABASE, tableName: '' }, + metricTables: { + [MetricsDataType.Gauge.toLowerCase()]: DEFAULT_LOGS_TABLE, + }, + timestampValueExpression: 'TimeUnix', + connection: connection._id, + name: 'BrokenMetrics', + }); + const result = await callTool(client, 'clickstack_list_metrics', { + sourceId: brokenSource._id.toString(), + }); + expect(result.isError).toBeFalsy(); + const output = JSON.parse(getFirstText(result)); + expect(output.metrics).toEqual([]); + expect(output.partialFailure).toHaveLength(1); + expect(output.partialFailure[0].kind).toBe('gauge'); + expect(output.partialFailure[0].error).toBeTruthy(); + // The misleading "No metrics matched … widen the window" hint must + // NOT appear — the agent should retry, not widen. + expect(output.hint).not.toMatch(/No metrics matched/); + }); }); // ── clickstack_describe_metric ─────────────────────────────────────────── @@ -605,6 +634,38 @@ describe('MCP Source Tools', () => { expect(output.kinds[0].kind).toBe('gauge'); expect(output.kinds[0].attributeKeys).toEqual({}); expect(output.hint).toMatch(/No data found/); + expect(output.partialFailure).toBeUndefined(); + }); + + it('surfaces partialFailure instead of the no-data hint when discovery fails', async () => { + // Point the gauge kind at the logs table: it exists (so getColumns + // succeeds) but lacks MetricName/TimeUnix columns, so the + // attribute-keys discovery query throws. + const brokenSource = await Source.create({ + kind: SourceKind.Metric, + team: team._id, + from: { databaseName: DEFAULT_DATABASE, tableName: '' }, + metricTables: { + [MetricsDataType.Gauge.toLowerCase()]: DEFAULT_LOGS_TABLE, + }, + timestampValueExpression: 'TimeUnix', + connection: connection._id, + name: 'BrokenMetrics', + }); + const result = await callTool(client, 'clickstack_describe_metric', { + sourceId: brokenSource._id.toString(), + metricName: 'whatever', + kind: 'gauge', + }); + expect(result.isError).toBeFalsy(); + const output = JSON.parse(getFirstText(result)); + expect(output.partialFailure).toBeDefined(); + expect( + output.partialFailure.map((f: { stage: string }) => f.stage), + ).toContain('attributeKeys'); + // The misleading "No data found … widen startTime/endTime" hint + // must NOT appear — the fetch failed; widening would not help. + expect(output.hint).not.toMatch(/No data found/); }); it('returns attribute keys for a gauge metric when kind is specified', async () => { diff --git a/packages/api/src/mcp/tools/sources/describeMetric.ts b/packages/api/src/mcp/tools/sources/describeMetric.ts index a994b578c3..743d037150 100644 --- a/packages/api/src/mcp/tools/sources/describeMetric.ts +++ b/packages/api/src/mcp/tools/sources/describeMetric.ts @@ -111,6 +111,22 @@ type KindDetail = { usage: string; }; +/** + * Discriminated result for the discovery sub-queries so the caller can + * distinguish "fetch failed" from "genuinely empty" — the two cases + * need different agent guidance (retry/report vs. widen the window). + */ +type FetchResult = { ok: true; data: T } | { ok: false; error: string }; + +/** + * Compact an error for inclusion in a tool response: single line, + * capped length, no stack frames. + */ +function sanitizeFetchError(e: unknown): string { + const message = e instanceof Error ? e.message : String(e); + return message.replace(/\s+/g, ' ').trim().slice(0, 200); +} + function parseTimeRange( startTime?: string, endTime?: string, @@ -239,12 +255,12 @@ async function fetchAttributeKeys({ startDate: Date; endDate: Date; signal: AbortSignal; -}): Promise> { +}): Promise>> { const mapColumns = filterColumnMetaByType(columns, [JSDataType.Map])?.filter( c => convertCHDataTypeToJSType(c.type) === JSDataType.Map, ) ?? []; - if (mapColumns.length === 0) return {}; + if (mapColumns.length === 0) return { ok: true, data: {} }; const sampleProjections = mapColumns.map( col => chSql`${{ Identifier: col.name }}`, @@ -289,7 +305,7 @@ async function fetchAttributeKeys({ data: Array>; }; const row = result.data[0]; - if (!row) return {}; + if (!row) return { ok: true, data: {} }; const out: Record = {}; for (const col of mapColumns) { const keys = row[col.name]; @@ -297,13 +313,13 @@ async function fetchAttributeKeys({ out[col.name] = keys.filter(k => typeof k === 'string' && k.length > 0); } } - return out; + return { ok: true, data: out }; } catch (e) { logger.warn( { tableName, error: e instanceof Error ? e.message : String(e) }, 'fetchAttributeKeys failed', ); - return {}; + return { ok: false, error: sanitizeFetchError(e) }; } } @@ -333,7 +349,7 @@ async function sampleAttributeValues({ startDate: Date; endDate: Date; signal: AbortSignal; -}): Promise> { +}): Promise>> { const flatKeyExprs: { display: string; mapColumn: string; key: string }[] = []; for (const [mapColumn, keys] of Object.entries(attributeKeys)) { @@ -347,7 +363,7 @@ async function sampleAttributeValues({ } if (flatKeyExprs.length >= MAX_ATTR_KEYS_TO_SAMPLE) break; } - if (flatKeyExprs.length === 0) return {}; + if (flatKeyExprs.length === 0) return { ok: true, data: {} }; const projections = flatKeyExprs.map( ({ mapColumn, key }, idx) => chSql` @@ -400,7 +416,7 @@ async function sampleAttributeValues({ data: Array>; }; const row = result.data[0]; - if (!row) return {}; + if (!row) return { ok: true, data: {} }; const values: Record = {}; flatKeyExprs.forEach((meta, idx) => { const sample = (row[`param${idx}`] ?? []).filter(v => v !== ''); @@ -408,13 +424,13 @@ async function sampleAttributeValues({ values[meta.display] = sample; } }); - return values; + return { ok: true, data: values }; } catch (e) { logger.warn( { tableName, error: e instanceof Error ? e.message : String(e) }, 'sampleAttributeValues failed', ); - return {}; + return { ok: false, error: sanitizeFetchError(e) }; } } @@ -526,7 +542,7 @@ async function describeMetricImpl( const hasUnit = columnNames.has('MetricUnit'); const hasDescription = columnNames.has('MetricDescription'); - const [meta, attributeKeys] = await Promise.all([ + const [meta, attributeKeysResult] = await Promise.all([ fetchUnitAndDescription({ clickhouseClient, databaseName, @@ -552,6 +568,19 @@ async function describeMetricImpl( }), ]); + // Track discovery sub-queries that failed so the agent can distinguish + // "fetch failed" (retry / report) from "genuinely empty" (widen the + // window). Without this, transient ClickHouse errors surfaced as the + // misleading "No data found" hint. + const partialFailure: { stage: string; error: string }[] = []; + const attributeKeys = attributeKeysResult.ok ? attributeKeysResult.data : {}; + if (!attributeKeysResult.ok) { + partialFailure.push({ + stage: 'attributeKeys', + error: attributeKeysResult.error, + }); + } + const kindDetail: KindDetail = { kind, tableName, @@ -562,7 +591,7 @@ async function describeMetricImpl( }; if (input.sampleValues && Object.keys(attributeKeys).length > 0) { - kindDetail.attributeValues = await sampleAttributeValues({ + const valuesResult = await sampleAttributeValues({ clickhouseClient, databaseName, tableName, @@ -573,6 +602,14 @@ async function describeMetricImpl( endDate, signal, }); + if (valuesResult.ok) { + kindDetail.attributeValues = valuesResult.data; + } else { + partialFailure.push({ + stage: 'attributeValues', + error: valuesResult.error, + }); + } } const kindDetails: KindDetail[] = [kindDetail]; @@ -581,8 +618,13 @@ async function describeMetricImpl( // unit, nor description came back, the (metric, kind) pair likely has // no data in the requested window. The agent's recourse is to widen // startTime/endTime or double-check the kind via clickstack_list_metrics. + // Suppressed when any discovery stage failed — an empty attributeKeys + // can't be trusted as "no data" then. const noSignal = - Object.keys(attributeKeys).length === 0 && !meta.unit && !meta.description; + partialFailure.length === 0 && + Object.keys(attributeKeys).length === 0 && + !meta.unit && + !meta.description; const queryExample = `clickstack_timeseries({ sourceId: "${input.sourceId}", select: [{ aggFn: ${ kind === 'sum' @@ -595,6 +637,12 @@ async function describeMetricImpl( const responseObj: Record = { metricName: input.metricName, kinds: kindDetails, + ...(partialFailure.length > 0 && { + partialFailure, + hint: + 'Some discovery queries failed — the fields above may be incomplete. ' + + 'Retry the call; if the failure persists, narrow startTime/endTime or set sampleValues:false.', + }), ...(noSignal && { hint: `No data found for MetricName "${input.metricName}" with kind "${kind}" ` + diff --git a/packages/api/src/mcp/tools/sources/listMetrics.ts b/packages/api/src/mcp/tools/sources/listMetrics.ts index 27884996f9..20f8ff8009 100644 --- a/packages/api/src/mcp/tools/sources/listMetrics.ts +++ b/packages/api/src/mcp/tools/sources/listMetrics.ts @@ -355,6 +355,10 @@ export function registerListMetrics( const databaseName = source.from.databaseName; const metrics: MetricEntry[] = []; + // Per-kind fetch failures, surfaced on the response so the agent + // can distinguish "kind genuinely has no metrics" from "the fetch + // for that kind failed" — the two need different recovery steps. + const partialFailure: { kind: string; error: string }[] = []; let nextCursor: string | undefined; for (let i = startKindIdx; i < requestedKinds.length; i++) { const kind = requestedKinds[i]; @@ -383,14 +387,15 @@ export function registerListMetrics( limit: remaining + 1, }); } catch (e) { + const message = e instanceof Error ? e.message : String(e); logger.warn( - { - sourceId: input.sourceId, - kind, - error: e instanceof Error ? e.message : String(e), - }, + { sourceId: input.sourceId, kind, error: message }, 'Failed to list metrics for kind', ); + partialFailure.push({ + kind, + error: message.replace(/\s+/g, ' ').trim().slice(0, 200), + }); continue; } if (kindMetrics.length > remaining) { @@ -410,11 +415,18 @@ export function registerListMetrics( const responseObj: Record = { metrics, ...(nextCursor && { nextCursor }), - ...(metrics.length === 0 && { + ...(partialFailure.length > 0 && { + partialFailure, hint: - 'No metrics matched. Try widening the time window (startTime/endTime), ' + - 'removing the namePattern filter, or omitting `kind` to scan every populated metric table.', + 'Listing failed for some metric kinds — results may be incomplete. ' + + 'Retry the call; if the failure persists, narrow startTime/endTime or pin a single `kind`.', }), + ...(metrics.length === 0 && + partialFailure.length === 0 && { + hint: + 'No metrics matched. Try widening the time window (startTime/endTime), ' + + 'removing the namePattern filter, or omitting `kind` to scan every populated metric table.', + }), usage: 'Pass `metricType` + `metricName` from each entry to clickstack_timeseries / clickstack_table. ' + 'For per-metric attribute keys and sampled values, call clickstack_describe_metric.', From 3b0b570ab252b85ab40d5723eb2361b77e7bd227 Mon Sep 17 00:00:00 2001 From: Dan Hable Date: Fri, 12 Jun 2026 15:25:38 -0500 Subject: [PATCH 21/21] [HDX-4347] feat(mcp): distinguish unsampled from empty attributeValues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sampleAttributeValues caps value sampling at MAX_ATTR_KEYS_TO_SAMPLE (12) keys and only emits keys with non-empty samples — so a key absent from attributeValues was indistinguishable between 'skipped by the cap' and 'sampled but empty in the window'. Agents could not tell when to re-query a key directly. Add attributeValuesMeta to each kindDetail: - sampledKeys: display names actually queried for values - truncatedKeys: display names skipped because the cap fired A key in truncatedKeys was never queried; a key in sampledKeys but missing from attributeValues is genuinely empty in the window. Tool description documents the semantics so agents know to query truncated keys directly. Tests: 15-key metric → 12 sampled + 3 truncated, disjoint sets covering all keys; small metric → truncatedKeys empty. Refs: https://github.com/hyperdxio/hyperdx/pull/2437#discussion_r3405808748 --- .../api/src/mcp/__tests__/sources.test.ts | 66 +++++++++++++++++++ .../src/mcp/tools/sources/describeMetric.ts | 60 +++++++++++++---- 2 files changed, 112 insertions(+), 14 deletions(-) diff --git a/packages/api/src/mcp/__tests__/sources.test.ts b/packages/api/src/mcp/__tests__/sources.test.ts index d2804bc21c..8acee4dfed 100644 --- a/packages/api/src/mcp/__tests__/sources.test.ts +++ b/packages/api/src/mcp/__tests__/sources.test.ts @@ -705,6 +705,72 @@ describe('MCP Source Tools', () => { expect(output.nextSteps.query).toContain('metricType: "gauge"'); }); + it('reports sampledKeys and truncatedKeys so unsampled keys are distinguishable', async () => { + // 15 attribute keys > MAX_ATTR_KEYS_TO_SAMPLE (12): the overflow + // must land in truncatedKeys so the agent knows those keys were + // never queried (vs. sampled-but-empty). + const metricSource = await createMetricSource(); + const manyAttrs: Record = {}; + for (let i = 0; i < 15; i++) { + manyAttrs[`attr.key.${String(i).padStart(2, '0')}`] = `value-${i}`; + } + await bulkInsertMetricsGauge([ + { + MetricName: 'many.attrs.metric', + ResourceAttributes: manyAttrs, + ServiceName: 'many-attrs-svc', + TimeUnix: new Date(), + Value: 1, + }, + ]); + + const result = await callTool(client, 'clickstack_describe_metric', { + sourceId: metricSource._id.toString(), + metricName: 'many.attrs.metric', + kind: 'gauge', + }); + expect(result.isError).toBeFalsy(); + const output = JSON.parse(getFirstText(result)); + const detail = output.kinds[0]; + const meta = detail.attributeValuesMeta; + expect(meta).toBeDefined(); + expect(meta.sampledKeys).toHaveLength(12); + expect(meta.truncatedKeys).toHaveLength(3); + // Sampled and truncated sets are disjoint and cover all 15 keys. + expect(new Set([...meta.sampledKeys, ...meta.truncatedKeys]).size).toBe( + 15, + ); + // Every key with values must have been sampled. + for (const key of Object.keys(detail.attributeValues ?? {})) { + expect(meta.sampledKeys).toContain(key); + } + }); + + it('reports empty truncatedKeys when all attribute keys fit the cap', async () => { + const metricSource = await createMetricSource(); + await bulkInsertMetricsGauge([ + { + MetricName: 'few.attrs.metric', + ResourceAttributes: { 'service.name': 'few-attrs-svc' }, + ServiceName: 'few-attrs-svc', + TimeUnix: new Date(), + Value: 1, + }, + ]); + + const result = await callTool(client, 'clickstack_describe_metric', { + sourceId: metricSource._id.toString(), + metricName: 'few.attrs.metric', + kind: 'gauge', + }); + expect(result.isError).toBeFalsy(); + const output = JSON.parse(getFirstText(result)); + const meta = output.kinds[0].attributeValuesMeta; + expect(meta).toBeDefined(); + expect(meta.truncatedKeys).toEqual([]); + expect(meta.sampledKeys).toContain("ResourceAttributes['service.name']"); + }); + it('skips value sampling when sampleValues is false', async () => { const metricSource = await createMetricSource(); const now = new Date(); diff --git a/packages/api/src/mcp/tools/sources/describeMetric.ts b/packages/api/src/mcp/tools/sources/describeMetric.ts index 743d037150..d481652c46 100644 --- a/packages/api/src/mcp/tools/sources/describeMetric.ts +++ b/packages/api/src/mcp/tools/sources/describeMetric.ts @@ -101,6 +101,13 @@ const describeMetricSchema = z.object({ // ─── Helpers ───────────────────────────────────────────────────────────────── +type AttributeValuesMeta = { + /** Display names (`MapColumn['key']`) queried for value samples. */ + sampledKeys: string[]; + /** Display names skipped because the MAX_ATTR_KEYS_TO_SAMPLE cap fired. */ + truncatedKeys: string[]; +}; + type KindDetail = { kind: QueryableMetricKind; tableName: string; @@ -108,6 +115,7 @@ type KindDetail = { description?: string; attributeKeys: Record; attributeValues?: Record; + attributeValuesMeta?: AttributeValuesMeta; usage: string; }; @@ -323,10 +331,21 @@ async function fetchAttributeKeys({ } } +type SampleAttributeValuesData = { + /** Keys with at least one non-empty sampled value. */ + values: Record; + meta: AttributeValuesMeta; +}; + /** * Sample distinct values per attribute key. Uses one composed * groupArray-per-key query so all keys are fetched in a single round * trip. + * + * The returned meta records which keys were actually queried + * (`sampledKeys`) vs. skipped by the MAX_ATTR_KEYS_TO_SAMPLE cap + * (`truncatedKeys`) so callers can distinguish "not sampled" from + * "sampled but empty in the window". */ async function sampleAttributeValues({ clickhouseClient, @@ -349,21 +368,27 @@ async function sampleAttributeValues({ startDate: Date; endDate: Date; signal: AbortSignal; -}): Promise>> { +}): Promise> { const flatKeyExprs: { display: string; mapColumn: string; key: string }[] = []; + const truncatedKeys: string[] = []; for (const [mapColumn, keys] of Object.entries(attributeKeys)) { for (const key of keys) { - flatKeyExprs.push({ - display: `${mapColumn}['${key}']`, - mapColumn, - key, - }); - if (flatKeyExprs.length >= MAX_ATTR_KEYS_TO_SAMPLE) break; + const display = `${mapColumn}['${key}']`; + if (flatKeyExprs.length >= MAX_ATTR_KEYS_TO_SAMPLE) { + truncatedKeys.push(display); + continue; + } + flatKeyExprs.push({ display, mapColumn, key }); } - if (flatKeyExprs.length >= MAX_ATTR_KEYS_TO_SAMPLE) break; } - if (flatKeyExprs.length === 0) return { ok: true, data: {} }; + const meta: AttributeValuesMeta = { + sampledKeys: flatKeyExprs.map(e => e.display), + truncatedKeys, + }; + if (flatKeyExprs.length === 0) { + return { ok: true, data: { values: {}, meta } }; + } const projections = flatKeyExprs.map( ({ mapColumn, key }, idx) => chSql` @@ -416,15 +441,15 @@ async function sampleAttributeValues({ data: Array>; }; const row = result.data[0]; - if (!row) return { ok: true, data: {} }; + if (!row) return { ok: true, data: { values: {}, meta } }; const values: Record = {}; - flatKeyExprs.forEach((meta, idx) => { + flatKeyExprs.forEach((keyExpr, idx) => { const sample = (row[`param${idx}`] ?? []).filter(v => v !== ''); if (sample.length > 0) { - values[meta.display] = sample; + values[keyExpr.display] = sample; } }); - return { ok: true, data: values }; + return { ok: true, data: { values, meta } }; } catch (e) { logger.warn( { tableName, error: e instanceof Error ? e.message : String(e) }, @@ -603,7 +628,11 @@ async function describeMetricImpl( signal, }); if (valuesResult.ok) { - kindDetail.attributeValues = valuesResult.data; + kindDetail.attributeValues = valuesResult.data.values; + // sampledKeys / truncatedKeys let the agent tell "key absent + // because we never queried it" (truncated) apart from "queried + // but empty in the window" (in sampledKeys, missing from values). + kindDetail.attributeValuesMeta = valuesResult.data.meta; } else { partialFailure.push({ stage: 'attributeValues', @@ -696,6 +725,9 @@ export function registerDescribeMetric( 'clickstack_list_metrics or clickstack_describe_source. A metric name can legitimately ' + 'live in more than one kind (e.g. "container.cpu.usage" appears in both gauge and sum); ' + 'call this tool once per kind you care about.\n\n' + + 'attributeValuesMeta on each kind reports sampledKeys (queried for values) and ' + + 'truncatedKeys (skipped by the per-call sampling cap) — a key in truncatedKeys was ' + + 'never queried, so query it directly if you need its values.\n\n' + 'Workflow: clickstack_list_sources → clickstack_list_metrics → ' + 'clickstack_describe_metric → clickstack_timeseries|clickstack_table.', inputSchema: describeMetricSchema,