diff --git a/.changeset/mcp-metric-source-support.md b/.changeset/mcp-metric-source-support.md new file mode 100644 index 0000000000..4dfd135051 --- /dev/null +++ b/.changeset/mcp-metric-source-support.md @@ -0,0 +1,11 @@ +--- +'@hyperdx/api': minor +--- + +feat(mcp): first-class metric source support + +- Two new tools: `clickstack_list_metrics` paginates the metric-name catalog with optional kind / namePattern (ILIKE) / time-window filters and opaque cursor pagination; `clickstack_describe_metric` returns per-metric kind(s), unit, description, attribute keys, and sampled values (with kind auto-detection). +- `clickstack_describe_source` is metric-aware: picks a representative metric table (gauge → sum → histogram), runs column / map-key / value-sampling against it, and adds a per-kind metric-name sample. +- `clickstack_timeseries` and `clickstack_table` accept `metricType` (gauge / sum / histogram), `metricName`, and `isDelta` on each select item, plus `aggFn:"increase"` for Sum counters. `valueExpression` defaults to `"Value"` for metric sources. Surfaces the renderer's 20-group top-N cap on `increase + groupBy` as a neutral hint. +- Dashboard prompt's "use raw SQL for metric tiles" workaround is replaced with positive discovery-workflow guidance and one worked example per supported kind. +- `summary` and `"exponential histogram"` kinds remain out of scope (no query renderer support yet). diff --git a/MCP.md b/MCP.md index 3616e5c9a6..312276cb57 100644 --- a/MCP.md +++ b/MCP.md @@ -109,9 +109,11 @@ with: | Tool | Description | | ----------------------------- | -------------------------------------------------------------------------------------------- | | `clickstack_list_sources` | List all data sources and connections as a lightweight catalog (IDs, names, kinds) | -| `clickstack_describe_source` | Full column schema, attribute keys, and sampled low-cardinality values for a single source | -| `clickstack_timeseries` | Plot metrics over time as a line or stacked bar chart | -| `clickstack_table` | Compute aggregated metrics as a table, single number, or pie chart | +| `clickstack_describe_source` | Full column schema, attribute keys, and sampled low-cardinality values for a single source; for metric sources also returns a per-kind metric-name sample | +| `clickstack_list_metrics` | Paginated catalog of metric names on a metric source with optional kind / namePattern / time-window filters | +| `clickstack_describe_metric` | Per-metric drill-down: kind(s), unit, description, attribute keys per map column, and sampled values | +| `clickstack_timeseries` | Plot metrics over time as a line or stacked bar chart (works on log, trace, and metric sources) | +| `clickstack_table` | Compute aggregated metrics as a table, single number, or pie chart (works on log, trace, and metric sources) | | `clickstack_search` | Browse individual log, event, or trace rows | | `clickstack_event_patterns` | Discover the most common log messages and event patterns using Drain clustering | | `clickstack_event_deltas` | Compare two row groups and rank properties by how their value distributions differ | @@ -127,3 +129,43 @@ with: | `clickstack_trace_waterfall` | Fetch all spans in a single trace as a parent/child waterfall tree with optional correlated logs | | `clickstack_trace_top_time_consuming_operations` | Aggregate breakdown of child operations by cumulative time across matching parent traces | | `clickstack_get_webhook` | List available webhook destinations for use as alert notification channels | + +### Metric Sources + +`clickstack_timeseries`, `clickstack_table`, and the dashboard builder tile +tools accept metric sources transparently. Each `select` item on a metric +query must set `metricType` (`"gauge"`, `"sum"`, or `"histogram"`) and +`metricName` (the OTel metric name, e.g. `system.cpu.utilization`). +`valueExpression` defaults to `"Value"` when omitted, so a typical metric +series looks like: + +```jsonc +{ "aggFn": "avg", "metricType": "gauge", "metricName": "system.cpu.utilization" } +``` + +Per-kind aggregation guidance: + +- **Gauge**: `avg`, `last_value`, `min`, or `max`. Set `"isDelta": true` for + Prometheus-style delta over each bucket. +- **Sum (counter)**: `"increase"` returns the per-bucket counter increase + (reset-aware), or `sum` / `avg` on the computed rate. `increase` combined + with `groupBy` is capped at the top 20 groups by the renderer; the tool + emits a neutral hint when the cap may apply. +- **Histogram**: `"quantile"` with `level` ∈ {0.5, 0.9, 0.95, 0.99} for + percentiles, or `"count"` for the total bucket count. + +`summary` and `"exponential histogram"` metric kinds are not yet supported +by the query renderer. + +Discovery workflow for metrics: + +1. `clickstack_list_sources` to find the metric source ID and its + `metricTables` map. +2. `clickstack_describe_source` to see columns, attribute keys, and a + per-kind metric-name sample. +3. `clickstack_list_metrics` for paginated catalog access with optional + kind / namePattern / time-window filters; pass `nextCursor` unchanged + for the next page. +4. `clickstack_describe_metric` to drill into a specific metric and + discover its attribute keys + sampled values before authoring queries. +5. `clickstack_timeseries` / `clickstack_table` to chart the metric. diff --git a/packages/api/src/controllers/__tests__/sources.test.ts b/packages/api/src/controllers/__tests__/sources.test.ts new file mode 100644 index 0000000000..71a7d73466 --- /dev/null +++ b/packages/api/src/controllers/__tests__/sources.test.ts @@ -0,0 +1,39 @@ +import mongoose from 'mongoose'; + +import { getSource } from '@/controllers/sources'; +import { clearDBCollections, closeDB, connectDB } from '@/fixtures'; + +describe('sources controller', () => { + beforeAll(async () => { + await connectDB(); + }); + + afterEach(async () => { + await clearDBCollections(); + }); + + afterAll(async () => { + await closeDB(); + }); + + describe('getSource', () => { + it('returns null when sourceId is not a valid ObjectId', async () => { + // Non-ObjectId strings used to bubble a Mongoose CastError up + // through MCP tools as "Cast to ObjectId failed for value ...". + // The wrapper now short-circuits before hitting MongoDB so the + // caller's not-found branch fires cleanly. + const team = new mongoose.Types.ObjectId().toString(); + + expect(await getSource(team, 'not-an-objectid')).toBeNull(); + expect(await getSource(team, '')).toBeNull(); + expect(await getSource(team, ' ')).toBeNull(); + }); + + it('returns null for a well-formed but missing ObjectId', async () => { + const team = new mongoose.Types.ObjectId().toString(); + const missingSourceId = new mongoose.Types.ObjectId().toString(); + + expect(await getSource(team, missingSourceId)).toBeNull(); + }); + }); +}); diff --git a/packages/api/src/controllers/sources.ts b/packages/api/src/controllers/sources.ts index a34f931dff..daa24cf810 100644 --- a/packages/api/src/controllers/sources.ts +++ b/packages/api/src/controllers/sources.ts @@ -1,4 +1,5 @@ import { SourceKind, SourceSchema } from '@hyperdx/common-utils/dist/types'; +import mongoose from 'mongoose'; import { ISourceInput, @@ -35,8 +36,21 @@ export function getSources(team: string) { return Source.find({ team }); } -export function getSource(team: string, sourceId: string) { - return Source.findOne({ _id: sourceId, team }); +export async function getSource(team: string, sourceId: string) { + // Pre-check the sourceId shape so a non-ObjectId input returns null + // (the caller's "not found" branch) instead of bubbling a Mongoose + // CastError. + if (!mongoose.Types.ObjectId.isValid(sourceId)) { + return null; + } + try { + return await Source.findOne({ _id: sourceId, team }); + } catch { + // Defense-in-depth: if Mongoose still throws (e.g. a future cast + // path), treat it as "not found" so the caller can surface a clean + // error. + return null; + } } type DistributiveOmit = T extends T diff --git a/packages/api/src/mcp/__tests__/dashboards/prompts.test.ts b/packages/api/src/mcp/__tests__/dashboards/prompts.test.ts index 5bb16e6113..b2a30d7439 100644 --- a/packages/api/src/mcp/__tests__/dashboards/prompts.test.ts +++ b/packages/api/src/mcp/__tests__/dashboards/prompts.test.ts @@ -50,25 +50,39 @@ describe('MCP Dashboard Prompts', () => { expect(numberFormat.toLowerCase()).toContain('per-series'); }); - it('declares the MCP metric-authoring gap rather than referencing fields that do not exist', () => { - // The MCP select-item schema does not carry metricName / metricType, - // so any prompt that tells the model to author a metric tile via - // the builder path is teaching it to ship JSON that gets silently - // stripped. Guard with an explicit assertion so a future diff that - // restores metric-tile guidance fails this test loudly. + it('documents the metric-source builder support with the discovery workflow', () => { + // Builder tiles on a metric source now work via the metricType + + // metricName + isDelta fields on each select item, with metricTables + // threaded through runConfigTile's builder branch. The prompt has to + // teach the model the discovery workflow (list_sources -> describe + // _source -> list_metrics -> describe_metric -> timeseries|table) + // and the per-kind aggFn rules so it doesn't fall back to raw SQL. const prompt = buildQueryGuidePrompt(); - expect(prompt).not.toMatch(/metricName \+ metricType/); - expect(prompt).not.toMatch(/exactly 1 select item with metricName/); - const constraintsIdx = prompt.indexOf('== PER-TILE TYPE CONSTRAINTS =='); - const constraintsBody = prompt.slice(constraintsIdx); - // The constraints section closes with an explicit note that builder - // tiles on a metric source are not reliable today, with a fallback - // recipe to raw SQL. Anchor on the phrase so a future diff that - // drops the gap-acknowledgement fails loudly. - expect(constraintsBody).toMatch( + const metricsIdx = prompt.indexOf('== METRIC SOURCES =='); + expect(metricsIdx).toBeGreaterThan(-1); + const metricsBody = prompt.slice( + metricsIdx, + prompt.indexOf('\n== ', metricsIdx + 1), + ); + // The four supported metric select fields are named. + expect(metricsBody).toContain('metricType'); + expect(metricsBody).toContain('metricName'); + expect(metricsBody).toContain('isDelta'); + // Per-kind aggregation guidance is present. + expect(metricsBody).toMatch(/gauge\s+Use aggFn:"last_value"/); + expect(metricsBody).toMatch(/sum\s+Use aggFn:"increase"/); + expect(metricsBody).toMatch(/histogram\s+Use aggFn:"quantile"/); + // The 20-group cap on increase + groupBy is documented. + expect(metricsBody).toMatch(/top 20 groups/); + // The four-tool discovery chain is documented in order. + expect(metricsBody).toContain('clickstack_describe_source'); + expect(metricsBody).toContain('clickstack_list_metrics'); + expect(metricsBody).toContain('clickstack_describe_metric'); + // The old "use raw SQL for metric tiles" workaround language is gone. + expect(prompt).not.toMatch( /Authoring builder tiles on a metric source is not reliable/, ); - expect(constraintsBody).toMatch(/MCP select-item shape does not carry/); + expect(prompt).not.toMatch(/Both table name and UUID are empty/); }); it('documents table-tile onClick linking features', () => { @@ -227,16 +241,30 @@ describe('MCP Dashboard Prompts', () => { ); }); - it('flags the metric source builder gap', () => { - // Builder tiles on metric sources currently save but render with - // "Both table name and UUID are empty". Claude went 100% raw SQL - // across 21 metric tiles for this reason; the prompt has to make - // the workaround obvious so the model doesn't try the builder - // first and fail silently. + it('walks the metric discovery workflow end-to-end with worked examples', () => { + // Metric source builder tiles now work — the prompt teaches the + // model how to find, characterise, and chart a metric without + // falling through to raw SQL. The examples cover one tile per + // supported metric kind so the pattern is unambiguous. const prompt = buildQueryGuidePrompt(); - expect(prompt).toMatch(/Both table name and UUID are empty/); - expect(prompt).toMatch(/use a raw SQL tile/); - expect(prompt).toMatch(/otel_metrics_gauge/); + const metricsIdx = prompt.indexOf('== METRIC SOURCES =='); + const metricsBody = prompt.slice( + metricsIdx, + prompt.indexOf('\n== ', metricsIdx + 1), + ); + // The five-step discovery workflow is enumerated. + expect(metricsBody).toMatch(/clickstack_list_sources/); + expect(metricsBody).toMatch(/clickstack_describe_source/); + expect(metricsBody).toMatch(/clickstack_list_metrics/); + expect(metricsBody).toMatch(/clickstack_describe_metric/); + expect(metricsBody).toMatch(/clickstack_timeseries/); + // One worked example per supported kind, each using a real OTel + // metric name so the agent has a concrete template. + expect(metricsBody).toContain('system.cpu.utilization'); + expect(metricsBody).toContain('http.server.request.count'); + expect(metricsBody).toContain('http.server.request.duration'); + // valueExpression default is documented. + expect(metricsBody).toMatch(/valueExpression defaults to "Value"/); }); it('contains no em-dashes or en-dashes used as em-dashes', () => { diff --git a/packages/api/src/mcp/__tests__/listMetricsCursor.test.ts b/packages/api/src/mcp/__tests__/listMetricsCursor.test.ts new file mode 100644 index 0000000000..77947a5dc1 --- /dev/null +++ b/packages/api/src/mcp/__tests__/listMetricsCursor.test.ts @@ -0,0 +1,104 @@ +// Mock heavy dependencies that break in unit-test context (no ClickHouse/Mongo) +jest.mock('@/models/source', () => ({})); +jest.mock('@/controllers/sources', () => ({})); +jest.mock('@/controllers/connection', () => ({})); +jest.mock('@/utils/trimToolResponse', () => ({ + trimToolResponse: (data: unknown) => ({ data, isTrimmed: false }), +})); + +import { decodeCursor, encodeCursor } from '../tools/sources/listMetrics'; + +describe('listMetrics cursor', () => { + describe('encodeCursor / decodeCursor round-trip', () => { + it('round-trips a gauge cursor', () => { + const payload = { kind: 'gauge' as const, lastName: 'system.cpu.idle' }; + const encoded = encodeCursor(payload); + expect(encoded).toMatch(/^[A-Za-z0-9+/]+=*$/); // base64 + expect(decodeCursor(encoded)).toEqual(payload); + }); + + it('round-trips a sum cursor', () => { + const payload = { + kind: 'sum' as const, + lastName: 'http.server.request.count', + }; + expect(decodeCursor(encodeCursor(payload))).toEqual(payload); + }); + + it('round-trips a histogram cursor', () => { + const payload = { + kind: 'histogram' as const, + lastName: 'http.server.request.duration', + }; + expect(decodeCursor(encodeCursor(payload))).toEqual(payload); + }); + + it('round-trips metric names with dots, dashes, and unicode', () => { + const payload = { + kind: 'gauge' as const, + lastName: 'system.cpu.utilization-µ.naïve', + }; + expect(decodeCursor(encodeCursor(payload))).toEqual(payload); + }); + }); + + describe('decodeCursor rejection cases', () => { + it('returns null for non-base64 input', () => { + expect(decodeCursor('not-base64!')).toBeNull(); + }); + + it('returns null for base64 of invalid JSON', () => { + const garbage = Buffer.from('not json').toString('base64'); + expect(decodeCursor(garbage)).toBeNull(); + }); + + it('returns null when kind is missing', () => { + const malformed = Buffer.from(JSON.stringify({ lastName: 'x' })).toString( + 'base64', + ); + expect(decodeCursor(malformed)).toBeNull(); + }); + + it('returns null when lastName is missing', () => { + const malformed = Buffer.from(JSON.stringify({ kind: 'gauge' })).toString( + 'base64', + ); + expect(decodeCursor(malformed)).toBeNull(); + }); + + it('returns null when kind is not a queryable metric kind', () => { + const summaryCursor = Buffer.from( + JSON.stringify({ kind: 'summary', lastName: 'x' }), + ).toString('base64'); + expect(decodeCursor(summaryCursor)).toBeNull(); + + const expHistCursor = Buffer.from( + JSON.stringify({ kind: 'exponential histogram', lastName: 'x' }), + ).toString('base64'); + expect(decodeCursor(expHistCursor)).toBeNull(); + + const bogusCursor = Buffer.from( + JSON.stringify({ kind: 'bogus', lastName: 'x' }), + ).toString('base64'); + expect(decodeCursor(bogusCursor)).toBeNull(); + }); + + it('returns null when kind has the wrong type', () => { + const malformed = Buffer.from( + JSON.stringify({ kind: 1, lastName: 'x' }), + ).toString('base64'); + expect(decodeCursor(malformed)).toBeNull(); + }); + + it('returns null when lastName has the wrong type', () => { + const malformed = Buffer.from( + JSON.stringify({ kind: 'gauge', lastName: 42 }), + ).toString('base64'); + expect(decodeCursor(malformed)).toBeNull(); + }); + + it('returns null for empty string', () => { + expect(decodeCursor('')).toBeNull(); + }); + }); +}); diff --git a/packages/api/src/mcp/__tests__/metricKinds.test.ts b/packages/api/src/mcp/__tests__/metricKinds.test.ts new file mode 100644 index 0000000000..a1d4d6aca0 --- /dev/null +++ b/packages/api/src/mcp/__tests__/metricKinds.test.ts @@ -0,0 +1,86 @@ +import { sanitizeMetricTables } from '../tools/sources/metricKinds'; + +describe('sanitizeMetricTables', () => { + it('returns undefined for null / undefined input', () => { + expect(sanitizeMetricTables(undefined)).toBeUndefined(); + expect(sanitizeMetricTables(null)).toBeUndefined(); + }); + + it('preserves valid queryable kind entries', () => { + expect( + sanitizeMetricTables({ + gauge: 'otel_metrics_gauge', + sum: 'otel_metrics_sum', + histogram: 'otel_metrics_histogram', + }), + ).toEqual({ + gauge: 'otel_metrics_gauge', + sum: 'otel_metrics_sum', + histogram: 'otel_metrics_histogram', + }); + }); + + it('preserves non-queryable kinds the schema still declares', () => { + // summary and "exponential histogram" are valid MetricsDataType + // members even though the query renderer cannot translate them. + expect( + sanitizeMetricTables({ + gauge: 'otel_metrics_gauge', + summary: 'otel_metrics_summary', + 'exponential histogram': 'otel_metrics_exponential_histogram', + }), + ).toEqual({ + gauge: 'otel_metrics_gauge', + summary: 'otel_metrics_summary', + 'exponential histogram': 'otel_metrics_exponential_histogram', + }); + }); + + it('strips Mongoose _id leaking from the metricTables subdoc', () => { + // Existing documents persisted before the schema fix may still + // carry a stray ObjectId in the embedded metricTables subdoc. + // The sanitizer keeps only valid kind keys. + const result = sanitizeMetricTables({ + gauge: 'otel_metrics_gauge', + sum: 'otel_metrics_sum', + _id: '6a2ad3b4da94be764e2deed8', + }); + expect(result).toEqual({ + gauge: 'otel_metrics_gauge', + sum: 'otel_metrics_sum', + }); + expect(result).not.toHaveProperty('_id'); + }); + + it('drops unknown keys', () => { + expect( + sanitizeMetricTables({ + gauge: 'otel_metrics_gauge', + not_a_real_kind: 'something', + }), + ).toEqual({ + gauge: 'otel_metrics_gauge', + }); + }); + + it('drops non-string values for valid keys', () => { + expect( + sanitizeMetricTables({ + gauge: 'otel_metrics_gauge', + sum: 12345, + histogram: null, + } as Record), + ).toEqual({ + gauge: 'otel_metrics_gauge', + }); + }); + + it('returns undefined when no valid entries remain', () => { + expect( + sanitizeMetricTables({ + _id: 'whatever', + not_a_real_kind: 'x', + }), + ).toBeUndefined(); + }); +}); diff --git a/packages/api/src/mcp/__tests__/query.test.ts b/packages/api/src/mcp/__tests__/query.test.ts index c523a67999..6033e6c712 100644 --- a/packages/api/src/mcp/__tests__/query.test.ts +++ b/packages/api/src/mcp/__tests__/query.test.ts @@ -7,10 +7,18 @@ jest.mock('@/utils/trimToolResponse', () => ({ })); import { + annotateIncreaseTopNHint, + assertSourceKindMatchesSelect, errorHint, + INCREASE_TOP_N_CAP, mergeWhereIntoSelectItems, parseTimeRange, } from '../tools/query/helpers'; +import { + applyMetricSelectDefaults, + getMetricSelectIssues, + validateMetricSelectItems, +} from '../tools/query/schemas'; import { resolveOrderBy } from '../tools/query/table'; describe('parseTimeRange', () => { @@ -352,4 +360,483 @@ describe('resolveOrderBy', () => { ]), ).toBe('quantile'); }); + + it('should NOT synthesize for "increase" aggFn (metric-only marker)', () => { + // increase compiles to a multi-CTE sum(Rate) pipeline in the renderer, + // not a standalone SQL function. resolveOrderBy must leave bare + // "increase" alone so the renderer-assigned alias can take over. + expect( + resolveOrderBy('increase', [ + { aggFn: 'increase', valueExpression: 'Value' }, + ]), + ).toBe('increase'); + }); +}); + +// ─── getMetricSelectIssues ─────────────────────────────────────────────────── + +describe('getMetricSelectIssues', () => { + it('returns no issues for a plain non-metric count', () => { + expect(getMetricSelectIssues({ aggFn: 'count' })).toEqual([]); + }); + + it('returns no issues for a plain non-metric avg with valueExpression', () => { + expect( + getMetricSelectIssues({ aggFn: 'avg', valueExpression: 'Duration' }), + ).toEqual([]); + }); + + it('requires valueExpression for non-count non-metric aggregations', () => { + const issues = getMetricSelectIssues({ aggFn: 'avg' }); + expect(issues).toHaveLength(1); + expect(issues[0].path).toEqual(['valueExpression']); + expect(issues[0].message).toContain('required for non-count'); + }); + + it('does NOT require valueExpression when metricType is set', () => { + // valueExpression defaults to "Value" for metric sources + expect( + getMetricSelectIssues({ + aggFn: 'avg', + metricType: 'gauge', + metricName: 'system.cpu.utilization', + }), + ).toEqual([]); + }); + + it('rejects valueExpression on aggFn:"count"', () => { + const issues = getMetricSelectIssues({ + aggFn: 'count', + valueExpression: 'Duration', + }); + expect(issues).toHaveLength(1); + expect(issues[0].path).toEqual(['valueExpression']); + }); + + it('rejects metricType without metricName', () => { + const issues = getMetricSelectIssues({ + aggFn: 'avg', + metricType: 'gauge', + }); + expect(issues.find(i => i.path[0] === 'metricName')).toBeDefined(); + }); + + it('rejects metricName without metricType', () => { + const issues = getMetricSelectIssues({ + aggFn: 'avg', + metricName: 'system.cpu.utilization', + }); + expect(issues.find(i => i.path[0] === 'metricType')).toBeDefined(); + }); + + it('rejects aggFn:"increase" on a gauge metric', () => { + const issues = getMetricSelectIssues({ + aggFn: 'increase', + metricType: 'gauge', + metricName: 'system.cpu.utilization', + }); + expect(issues.find(i => i.path[0] === 'aggFn')).toBeDefined(); + }); + + it('accepts aggFn:"increase" on a sum metric', () => { + expect( + getMetricSelectIssues({ + aggFn: 'increase', + metricType: 'sum', + metricName: 'http.server.request.count', + }), + ).toEqual([]); + }); + + it('rejects aggFn:"avg" on a histogram metric', () => { + const issues = getMetricSelectIssues({ + aggFn: 'avg', + metricType: 'histogram', + metricName: 'http.server.request.duration', + }); + expect(issues.find(i => i.path[0] === 'aggFn')).toBeDefined(); + }); + + it('accepts aggFn:"count" on a histogram metric (no level required)', () => { + expect( + getMetricSelectIssues({ + aggFn: 'count', + metricType: 'histogram', + metricName: 'http.server.request.duration', + }), + ).toEqual([]); + }); + + it('requires level for aggFn:"quantile" on a histogram metric', () => { + const issues = getMetricSelectIssues({ + aggFn: 'quantile', + metricType: 'histogram', + metricName: 'http.server.request.duration', + }); + expect(issues.find(i => i.path[0] === 'level')).toBeDefined(); + }); + + it('accepts aggFn:"quantile" with level on a histogram metric', () => { + expect( + getMetricSelectIssues({ + aggFn: 'quantile', + level: 0.95, + metricType: 'histogram', + metricName: 'http.server.request.duration', + }), + ).toEqual([]); + }); + + it('rejects isDelta on a non-gauge metric', () => { + const issues = getMetricSelectIssues({ + aggFn: 'sum', + metricType: 'sum', + metricName: 'http.request.count', + isDelta: true, + }); + expect(issues.find(i => i.path[0] === 'isDelta')).toBeDefined(); + }); + + it('accepts isDelta on a gauge metric', () => { + expect( + getMetricSelectIssues({ + aggFn: 'avg', + metricType: 'gauge', + metricName: 'system.cpu.utilization', + isDelta: true, + }), + ).toEqual([]); + }); + + it('rejects level when aggFn is not quantile', () => { + const issues = getMetricSelectIssues({ + aggFn: 'avg', + valueExpression: 'Duration', + level: 0.95, + }); + expect(issues.find(i => i.path[0] === 'level')).toBeDefined(); + }); +}); + +// ─── validateMetricSelectItems ─────────────────────────────────────────────── + +describe('validateMetricSelectItems', () => { + it('returns null for a valid items array', () => { + expect( + validateMetricSelectItems([ + { aggFn: 'count' }, + { aggFn: 'avg', valueExpression: 'Duration' }, + ]), + ).toBeNull(); + }); + + it('returns an error envelope and labels each item by index', () => { + const result = validateMetricSelectItems([ + { aggFn: 'avg' }, // missing valueExpression + { aggFn: 'increase', metricType: 'gauge', metricName: 'x' }, // increase requires sum + ]); + expect(result).not.toBeNull(); + if (!result) return; + expect(result.isError).toBe(true); + expect(result.content[0].type).toBe('text'); + const text = result.content[0].text; + expect(text).toContain('select[0].valueExpression'); + expect(text).toContain('select[1].aggFn'); + }); + + it('returns an error envelope for a single bad item', () => { + const result = validateMetricSelectItems([ + { + aggFn: 'quantile', + metricType: 'histogram', + metricName: 'http.server.request.duration', + }, // missing level + ]); + expect(result).not.toBeNull(); + if (!result) return; + expect(result.content[0].text).toContain('select[0].level'); + }); +}); + +// ─── applyMetricSelectDefaults ─────────────────────────────────────────────── + +// Typed as McpSelectItem so the inferred generic preserves the +// optional valueExpression field. Otherwise structural inference +// narrows away `valueExpression` and the assertions below stop +// type-checking. +type SelectItem = { + aggFn: string; + metricType?: 'gauge' | 'sum' | 'histogram'; + metricName?: string; + valueExpression?: string; +}; + +describe('applyMetricSelectDefaults', () => { + it('defaults valueExpression to "Value" when metricType is set', () => { + const input: SelectItem[] = [ + { + aggFn: 'avg', + metricType: 'gauge', + metricName: 'system.cpu.utilization', + }, + ]; + const out = applyMetricSelectDefaults(input); + expect(out[0].valueExpression).toBe('Value'); + }); + + it('preserves an explicit valueExpression on metric items', () => { + const input: SelectItem[] = [ + { + aggFn: 'avg', + metricType: 'gauge', + metricName: 'x', + valueExpression: 'Value * 100', + }, + ]; + const out = applyMetricSelectDefaults(input); + expect(out[0].valueExpression).toBe('Value * 100'); + }); + + it('leaves non-metric items untouched', () => { + const input: SelectItem[] = [{ aggFn: 'count' }]; + const out = applyMetricSelectDefaults(input); + expect(out[0]).toEqual({ aggFn: 'count' }); + expect(out[0].valueExpression).toBeUndefined(); + }); + + it('returns new objects only for the items it mutates', () => { + const input: SelectItem[] = [ + { aggFn: 'count' }, + { aggFn: 'avg', metricType: 'gauge', metricName: 'x' }, + ]; + const out = applyMetricSelectDefaults(input); + expect(out[0]).toBe(input[0]); // unchanged item is the same reference + expect(out[1]).not.toBe(input[1]); // mutated item is a new object + }); +}); + +// ─── annotateIncreaseTopNHint ──────────────────────────────────────────────── + +function buildResult(data: unknown[]) { + return { + content: [ + { + type: 'text', + text: JSON.stringify({ result: { data } }), + }, + ], + isError: false, + }; +} + +/** Rows with `count` distinct ServiceName group values. */ +function buildGroupedRows(count: number) { + return Array.from({ length: count }, (_, i) => ({ + ServiceName: `svc-${i}`, + Series: i, + })); +} + +describe('annotateIncreaseTopNHint', () => { + it('exposes the renderer cap as a constant', () => { + expect(INCREASE_TOP_N_CAP).toBe(20); + }); + + it('appends a hint when the distinct group count reaches the cap', () => { + const result = buildResult(buildGroupedRows(INCREASE_TOP_N_CAP)); + annotateIncreaseTopNHint(result, [{ aggFn: 'increase' }], 'ServiceName'); + const parsed = JSON.parse(result.content[0].text); + expect(parsed.hints).toHaveLength(1); + expect(parsed.hints[0]).toContain('top 20'); + expect(parsed.hints[0]).toContain('aggFn:"increase"'); + }); + + it('does NOT annotate when the distinct group count is below the cap', () => { + const result = buildResult(buildGroupedRows(INCREASE_TOP_N_CAP - 1)); + annotateIncreaseTopNHint(result, [{ aggFn: 'increase' }], 'ServiceName'); + const parsed = JSON.parse(result.content[0].text); + expect(parsed.hints).toBeUndefined(); + }); + + it('counts distinct groups across repeated time buckets, not raw rows', () => { + // 3 groups × 10 buckets = 30 rows but only 3 distinct groups. + const rows = Array.from({ length: 30 }, (_, i) => ({ + ServiceName: `svc-${i % 3}`, + bucket: i, + })); + const result = buildResult(rows); + annotateIncreaseTopNHint(result, [{ aggFn: 'increase' }], 'ServiceName'); + const parsed = JSON.parse(result.content[0].text); + expect(parsed.hints).toBeUndefined(); + }); + + it('does NOT annotate when the groupBy column cannot be resolved from rows', () => { + const result = buildResult([{ x: 1 }, { x: 2 }]); + annotateIncreaseTopNHint(result, [{ aggFn: 'increase' }], 'ServiceName'); + const parsed = JSON.parse(result.content[0].text); + expect(parsed.hints).toBeUndefined(); + }); + + it('supports multi-column groupBy by counting distinct combinations', () => { + // 4 ServiceName values × 5 SpanName values = 20 distinct combos. + const rows = Array.from({ length: 20 }, (_, i) => ({ + ServiceName: `svc-${i % 4}`, + SpanName: `op-${Math.floor(i / 4)}`, + })); + const result = buildResult(rows); + annotateIncreaseTopNHint( + result, + [{ aggFn: 'increase' }], + 'ServiceName, SpanName', + ); + const parsed = JSON.parse(result.content[0].text); + expect(parsed.hints).toHaveLength(1); + }); + + it('appends alongside an existing hint instead of overwriting', () => { + const result = { + content: [ + { + type: 'text', + text: JSON.stringify({ + result: { data: buildGroupedRows(INCREASE_TOP_N_CAP) }, + hints: ['previous hint from another writer'], + }), + }, + ], + isError: false, + }; + annotateIncreaseTopNHint(result, [{ aggFn: 'increase' }], 'ServiceName'); + const parsed = JSON.parse(result.content[0].text); + expect(parsed.hints).toHaveLength(2); + expect(parsed.hints[0]).toBe('previous hint from another writer'); + expect(parsed.hints[1]).toContain('top 20'); + }); + + it('does NOT annotate when increase is used WITHOUT a groupBy', () => { + const result = buildResult(buildGroupedRows(INCREASE_TOP_N_CAP)); + annotateIncreaseTopNHint(result, [{ aggFn: 'increase' }], undefined); + const parsed = JSON.parse(result.content[0].text); + expect(parsed.hints).toBeUndefined(); + }); + + it('does NOT annotate when groupBy is an empty string', () => { + const result = buildResult(buildGroupedRows(INCREASE_TOP_N_CAP)); + annotateIncreaseTopNHint(result, [{ aggFn: 'increase' }], ' '); + const parsed = JSON.parse(result.content[0].text); + expect(parsed.hints).toBeUndefined(); + }); + + it('does NOT annotate when no select item uses increase', () => { + const result = buildResult(buildGroupedRows(INCREASE_TOP_N_CAP)); + annotateIncreaseTopNHint( + result, + [{ aggFn: 'sum' }, { aggFn: 'avg' }], + 'ServiceName', + ); + const parsed = JSON.parse(result.content[0].text); + expect(parsed.hints).toBeUndefined(); + }); + + it('does NOT annotate empty results (already labelled by formatQueryResult)', () => { + const result = buildResult([]); + annotateIncreaseTopNHint(result, [{ aggFn: 'increase' }], 'ServiceName'); + const parsed = JSON.parse(result.content[0].text); + expect(parsed.hints).toBeUndefined(); + }); + + it('does NOT annotate error results', () => { + const result = { + content: [{ type: 'text', text: 'an error message' }], + isError: true, + }; + annotateIncreaseTopNHint(result, [{ aggFn: 'increase' }], 'ServiceName'); + expect(result.content[0].text).toBe('an error message'); + }); + + it('leaves unparseable text content unchanged', () => { + const result = { + content: [{ type: 'text', text: 'not json' }], + isError: false, + }; + annotateIncreaseTopNHint(result, [{ aggFn: 'increase' }], 'ServiceName'); + expect(result.content[0].text).toBe('not json'); + }); + + it('is a no-op when content is missing', () => { + const result = {} as Parameters[0]; + expect(() => + annotateIncreaseTopNHint(result, [{ aggFn: 'increase' }], 'ServiceName'), + ).not.toThrow(); + }); +}); + +describe('assertSourceKindMatchesSelect', () => { + it('returns null when metric select runs against a metric source', () => { + const source = { kind: 'metric' }; + const select = [ + { + aggFn: 'avg', + metricType: 'gauge', + metricName: 'system.cpu.utilization', + }, + ]; + expect(assertSourceKindMatchesSelect(source, select)).toBeNull(); + }); + + it('returns null when non-metric select runs against a trace source', () => { + const source = { kind: 'trace' }; + const select = [{ aggFn: 'count' }]; + expect(assertSourceKindMatchesSelect(source, select)).toBeNull(); + }); + + it('errors when metric params target a non-metric source', () => { + const source = { kind: 'trace' }; + const select = [ + { + aggFn: 'avg', + metricType: 'gauge', + metricName: 'system.cpu.utilization', + }, + ]; + const result = assertSourceKindMatchesSelect(source, select); + expect(result?.isError).toBe(true); + expect(result?.content[0].text).toMatch(/not metric/); + expect(result?.content[0].text).toMatch(/clickstack_list_sources/); + }); + + it('errors when a metric source receives select items without metricType', () => { + const source = { kind: 'metric' }; + const select = [{ aggFn: 'avg', valueExpression: 'Value' }]; + const result = assertSourceKindMatchesSelect(source, select); + expect(result?.isError).toBe(true); + expect(result?.content[0].text).toMatch(/metricType \+ metricName/); + expect(result?.content[0].text).toMatch(/clickstack_describe_source/); + }); + + it('counts only items whose metricType is a non-empty string', () => { + const source = { kind: 'trace' }; + // Items with empty/missing metricType should not trip the check + expect( + assertSourceKindMatchesSelect(source, [ + { aggFn: 'count' }, + { aggFn: 'avg', valueExpression: 'Duration', metricType: '' }, + ]), + ).toBeNull(); + }); + + it('returns null for a raw string select', () => { + // Raw-string selects bypass the metric-annotation check; the + // renderer handles them directly. + expect( + assertSourceKindMatchesSelect({ kind: 'metric' }, 'count()'), + ).toBeNull(); + }); + + it('returns null when select is not an array', () => { + expect( + assertSourceKindMatchesSelect({ kind: 'trace' }, undefined), + ).toBeNull(); + expect(assertSourceKindMatchesSelect({ kind: 'trace' }, null)).toBeNull(); + }); }); diff --git a/packages/api/src/mcp/__tests__/queryTool.test.ts b/packages/api/src/mcp/__tests__/queryTool.test.ts index 725e55047c..175411e181 100644 --- a/packages/api/src/mcp/__tests__/queryTool.test.ts +++ b/packages/api/src/mcp/__tests__/queryTool.test.ts @@ -1,10 +1,13 @@ import { ClickhouseClient } from '@hyperdx/common-utils/dist/clickhouse/node'; -import { SourceKind } from '@hyperdx/common-utils/dist/types'; +import { MetricsDataType, SourceKind } from '@hyperdx/common-utils/dist/types'; import { Client } from '@modelcontextprotocol/sdk/client/index.js'; import * as config from '@/config'; import { bulkInsertLogs, + bulkInsertMetricsGauge, + bulkInsertMetricsHistogram, + bulkInsertMetricsSum, DEFAULT_DATABASE, DEFAULT_LOGS_TABLE, DEFAULT_TRACES_TABLE, @@ -802,4 +805,227 @@ describe('MCP Query Tools', () => { }); }); }); + + // ── Metric source query coverage ───────────────────────────────────────── + + describe('Metric sources (timeseries + table)', () => { + const createMetricSource = () => + Source.create({ + kind: SourceKind.Metric, + team: team._id, + from: { databaseName: DEFAULT_DATABASE, tableName: '' }, + metricTables: { + [MetricsDataType.Gauge.toLowerCase()]: 'otel_metrics_gauge', + [MetricsDataType.Sum.toLowerCase()]: 'otel_metrics_sum', + [MetricsDataType.Histogram.toLowerCase()]: 'otel_metrics_histogram', + }, + timestampValueExpression: 'TimeUnix', + connection: connection._id, + name: 'Metrics', + }); + + const seedGauge = async (now: Date) => { + await bulkInsertMetricsGauge([ + { + MetricName: 'system.cpu.utilization', + ResourceAttributes: { 'service.name': 'svc-a' }, + ServiceName: 'svc-a', + TimeUnix: now, + Value: 0.42, + }, + { + MetricName: 'system.cpu.utilization', + ResourceAttributes: { 'service.name': 'svc-a' }, + ServiceName: 'svc-a', + TimeUnix: new Date(now.getTime() + 1000), + Value: 0.61, + }, + ]); + }; + + const seedSum = async (now: Date) => { + await bulkInsertMetricsSum([ + { + MetricName: 'http.server.request.count', + AggregationTemporality: 1, // DELTA + IsMonotonic: true, + ResourceAttributes: { 'service.name': 'svc-a' }, + ServiceName: 'svc-a', + TimeUnix: now, + Value: 100, + }, + { + MetricName: 'http.server.request.count', + AggregationTemporality: 1, + IsMonotonic: true, + ResourceAttributes: { 'service.name': 'svc-a' }, + ServiceName: 'svc-a', + TimeUnix: new Date(now.getTime() + 1000), + Value: 25, + }, + ]); + }; + + const seedHistogram = async (now: Date) => { + await bulkInsertMetricsHistogram([ + { + MetricName: 'http.server.request.duration', + ResourceAttributes: { 'service.name': 'svc-a' }, + TimeUnix: now, + BucketCounts: [1, 2, 3, 4], + ExplicitBounds: [10, 100, 1000], + AggregationTemporality: 1, + }, + ]); + }; + + it('runs clickstack_timeseries against a gauge metric source', async () => { + const metricSource = await createMetricSource(); + const now = new Date(); + await seedGauge(now); + + const result = await callTool(client, 'clickstack_timeseries', { + sourceId: metricSource._id.toString(), + select: [ + { + aggFn: 'avg', + metricType: 'gauge', + metricName: 'system.cpu.utilization', + alias: 'cpu', + }, + ], + startTime: new Date(now.getTime() - 60_000).toISOString(), + endTime: new Date(now.getTime() + 60_000).toISOString(), + granularity: '1 minute', + }); + + expect(result.isError).toBeFalsy(); + // No "Both table name and UUID are empty" error from the renderer. + expect(getFirstText(result)).not.toMatch(/UUID are empty/); + }); + + it('runs clickstack_table aggFn:"increase" against a sum metric source', async () => { + const metricSource = await createMetricSource(); + const now = new Date(); + await seedSum(now); + + const result = await callTool(client, 'clickstack_table', { + sourceId: metricSource._id.toString(), + select: [ + { + aggFn: 'increase', + metricType: 'sum', + metricName: 'http.server.request.count', + alias: 'requests', + }, + ], + startTime: new Date(now.getTime() - 60_000).toISOString(), + endTime: new Date(now.getTime() + 60_000).toISOString(), + }); + + expect(result.isError).toBeFalsy(); + expect(getFirstText(result)).not.toMatch(/UUID are empty/); + }); + + it('runs clickstack_timeseries quantile against a histogram metric source', async () => { + const metricSource = await createMetricSource(); + const now = new Date(); + await seedHistogram(now); + + const result = await callTool(client, 'clickstack_timeseries', { + sourceId: metricSource._id.toString(), + select: [ + { + aggFn: 'quantile', + level: 0.95, + metricType: 'histogram', + metricName: 'http.server.request.duration', + alias: 'p95', + }, + ], + startTime: new Date(now.getTime() - 60_000).toISOString(), + endTime: new Date(now.getTime() + 60_000).toISOString(), + granularity: '1 minute', + }); + + expect(result.isError).toBeFalsy(); + }); + + it('rejects aggFn:"increase" on a gauge select item with a friendly schema error', async () => { + const metricSource = await createMetricSource(); + const result = await callTool(client, 'clickstack_timeseries', { + sourceId: metricSource._id.toString(), + select: [ + { + aggFn: 'increase', + metricType: 'gauge', + metricName: 'system.cpu.utilization', + }, + ], + }); + expect(result.isError).toBe(true); + expect(getFirstText(result)).toMatch(/"increase" is only valid for sum/); + }); + + it('rejects aggFn:"quantile" on a histogram without level', async () => { + const metricSource = await createMetricSource(); + const result = await callTool(client, 'clickstack_timeseries', { + sourceId: metricSource._id.toString(), + select: [ + { + aggFn: 'quantile', + metricType: 'histogram', + metricName: 'http.server.request.duration', + }, + ], + }); + expect(result.isError).toBe(true); + expect(getFirstText(result)).toMatch(/level is required/); + }); + + it('rejects aggFn:"avg" on a histogram select item', async () => { + const metricSource = await createMetricSource(); + const result = await callTool(client, 'clickstack_timeseries', { + sourceId: metricSource._id.toString(), + select: [ + { + aggFn: 'avg', + metricType: 'histogram', + metricName: 'http.server.request.duration', + }, + ], + }); + expect(result.isError).toBe(true); + expect(getFirstText(result)).toMatch( + /Histogram metrics only support aggFn/, + ); + }); + + it('rejects metricType without metricName', async () => { + const metricSource = await createMetricSource(); + const result = await callTool(client, 'clickstack_timeseries', { + sourceId: metricSource._id.toString(), + select: [{ aggFn: 'avg', metricType: 'gauge' }], + }); + expect(result.isError).toBe(true); + expect(getFirstText(result)).toMatch(/metricName is required/); + }); + + it('rejects isDelta on a non-gauge metric', async () => { + const metricSource = await createMetricSource(); + const result = await callTool(client, 'clickstack_timeseries', { + sourceId: metricSource._id.toString(), + select: [ + { + aggFn: 'increase', + metricType: 'sum', + metricName: 'http.server.request.count', + isDelta: true, + }, + ], + }); + expect(result.isError).toBe(true); + expect(getFirstText(result)).toMatch(/isDelta is only valid for gauge/); + }); + }); }); diff --git a/packages/api/src/mcp/__tests__/sources.test.ts b/packages/api/src/mcp/__tests__/sources.test.ts index 008e4fe852..8acee4dfed 100644 --- a/packages/api/src/mcp/__tests__/sources.test.ts +++ b/packages/api/src/mcp/__tests__/sources.test.ts @@ -3,6 +3,9 @@ import { Client } from '@modelcontextprotocol/sdk/client/index.js'; import * as config from '@/config'; import { + bulkInsertMetricsGauge, + bulkInsertMetricsHistogram, + bulkInsertMetricsSum, DEFAULT_DATABASE, DEFAULT_LOGS_TABLE, DEFAULT_TRACES_TABLE, @@ -249,7 +252,7 @@ describe('MCP Source Tools', () => { expect(output.source.keyColumns).toHaveProperty('spanId'); }); - it('should return metricTables for a metric source (no column schema)', async () => { + it('should return metricTables AND column schema for a metric source', async () => { const metricSource = await Source.create({ kind: SourceKind.Metric, team: team._id, @@ -280,13 +283,34 @@ describe('MCP Source Tools', () => { kind: SourceKind.Metric, }); - // Metric sources have metricTables but no column schema or value samples + // Metric sources surface metricTables AND run column / map-key + // discovery against the representative metric table (gauge picked + // first by pickRepresentativeMetricTable). expect(output.source.metricTables).toBeDefined(); expect(output.source.metricTables).toHaveProperty('gauge'); - expect(output.source.columns).toBeUndefined(); - expect(output.source.lowCardinalityValues).toBeUndefined(); - expect(output.source.mapAttributeKeys).toBeUndefined(); - expect(output.source.mapAttributeValues).toBeUndefined(); + // metricTables should only contain valid kind keys — not a stray + // Mongoose `_id` from the embedded subdoc. + expect(output.source.metricTables).not.toHaveProperty('_id'); + expect(Object.keys(output.source.metricTables).sort()).toEqual( + ['gauge', 'histogram', 'sum'].sort(), + ); + expect(output.source.discoveryMetricKind).toBe('gauge'); + expect(output.source.columns).toBeDefined(); + expect(Array.isArray(output.source.columns)).toBe(true); + // The OTel Collector gauge schema includes MetricName + Value + // as native columns. + const columnNames = output.source.columns.map( + (c: { name: string }) => c.name, + ); + expect(columnNames).toContain('MetricName'); + expect(columnNames).toContain('Value'); + + // nextSteps points at the new metric discovery tools. + expect(output.nextSteps.query).toContain('metricType'); + expect(output.nextSteps.discovery).toContain('clickstack_list_metrics'); + expect(output.nextSteps.discovery).toContain( + 'clickstack_describe_metric', + ); }); it('should include usage guidance and nextSteps', async () => { @@ -338,4 +362,513 @@ describe('MCP Source Tools', () => { expect(getFirstText(result)).toContain('not found'); }); }); + + // ── clickstack_list_metrics ────────────────────────────────────────────── + + describe('clickstack_list_metrics', () => { + const createMetricSource = () => + Source.create({ + kind: SourceKind.Metric, + team: team._id, + from: { databaseName: DEFAULT_DATABASE, tableName: '' }, + metricTables: { + [MetricsDataType.Gauge.toLowerCase()]: 'otel_metrics_gauge', + [MetricsDataType.Sum.toLowerCase()]: 'otel_metrics_sum', + [MetricsDataType.Histogram.toLowerCase()]: 'otel_metrics_histogram', + }, + timestampValueExpression: 'TimeUnix', + connection: connection._id, + name: 'Metrics', + }); + + const seedMetricNames = async () => { + const now = new Date(); + await bulkInsertMetricsGauge([ + { + MetricName: 'system.cpu.utilization', + ResourceAttributes: { 'service.name': 'svc-a' }, + ServiceName: 'svc-a', + TimeUnix: now, + Value: 0.42, + }, + { + MetricName: 'system.memory.usage', + ResourceAttributes: { 'service.name': 'svc-a' }, + ServiceName: 'svc-a', + TimeUnix: now, + Value: 12345, + }, + ]); + await bulkInsertMetricsSum([ + { + MetricName: 'http.server.request.count', + AggregationTemporality: 1, + IsMonotonic: true, + ResourceAttributes: { 'service.name': 'svc-a' }, + ServiceName: 'svc-a', + TimeUnix: now, + Value: 100, + }, + ]); + await bulkInsertMetricsHistogram([ + { + MetricName: 'http.server.request.duration', + ResourceAttributes: { 'service.name': 'svc-a' }, + TimeUnix: now, + BucketCounts: [1, 2, 3], + ExplicitBounds: [10, 100, 1000], + AggregationTemporality: 1, + }, + ]); + }; + + it('rejects non-metric sources with a friendly error', async () => { + const result = await callTool(client, 'clickstack_list_metrics', { + sourceId: traceSource._id.toString(), + }); + expect(result.isError).toBe(true); + expect(getFirstText(result)).toContain('not a metric source'); + }); + + it('returns 404 for unknown source IDs', async () => { + const result = await callTool(client, 'clickstack_list_metrics', { + sourceId: '000000000000000000000000', + }); + expect(result.isError).toBe(true); + expect(getFirstText(result)).toContain('not found'); + }); + + it('returns metrics across all populated kinds when kind is omitted', async () => { + const metricSource = await createMetricSource(); + await seedMetricNames(); + + const result = await callTool(client, 'clickstack_list_metrics', { + sourceId: metricSource._id.toString(), + }); + + expect(result.isError).toBeFalsy(); + const output = JSON.parse(getFirstText(result)); + const namesByKind: Record = {}; + for (const entry of output.metrics ?? []) { + if (!namesByKind[entry.kind]) namesByKind[entry.kind] = []; + namesByKind[entry.kind].push(entry.name); + } + expect(namesByKind.gauge ?? []).toEqual( + expect.arrayContaining([ + 'system.cpu.utilization', + 'system.memory.usage', + ]), + ); + expect(namesByKind.sum ?? []).toEqual( + expect.arrayContaining(['http.server.request.count']), + ); + expect(namesByKind.histogram ?? []).toEqual( + expect.arrayContaining(['http.server.request.duration']), + ); + }); + + it('restricts results to a single kind when kind is set', async () => { + const metricSource = await createMetricSource(); + await seedMetricNames(); + + const result = await callTool(client, 'clickstack_list_metrics', { + sourceId: metricSource._id.toString(), + kind: 'gauge', + }); + expect(result.isError).toBeFalsy(); + const output = JSON.parse(getFirstText(result)); + for (const entry of output.metrics ?? []) { + expect(entry.kind).toBe('gauge'); + } + }); + + it('applies namePattern as a server-side ILIKE filter', async () => { + const metricSource = await createMetricSource(); + await seedMetricNames(); + + const result = await callTool(client, 'clickstack_list_metrics', { + sourceId: metricSource._id.toString(), + namePattern: 'system.cpu.%', + }); + const output = JSON.parse(getFirstText(result)); + for (const entry of output.metrics ?? []) { + expect(entry.name).toMatch(/^system\.cpu\./); + } + }); + + it('paginates via opaque nextCursor and resumes cleanly', async () => { + const metricSource = await createMetricSource(); + await seedMetricNames(); + + // First page: only ask for one entry so the cap is forced even with + // a small seed; sanity-check the cursor round-trip. + const first = await callTool(client, 'clickstack_list_metrics', { + sourceId: metricSource._id.toString(), + kind: 'gauge', + limit: 1, + }); + const firstOutput = JSON.parse(getFirstText(first)); + expect(firstOutput.metrics.length).toBe(1); + expect(typeof firstOutput.nextCursor).toBe('string'); + + const second = await callTool(client, 'clickstack_list_metrics', { + sourceId: metricSource._id.toString(), + kind: 'gauge', + limit: 5, + cursor: firstOutput.nextCursor, + }); + expect(second.isError).toBeFalsy(); + const secondOutput = JSON.parse(getFirstText(second)); + // The second page must not repeat the first page's last name. + const firstNames = new Set( + (firstOutput.metrics as { name: string }[]).map(m => m.name), + ); + for (const entry of secondOutput.metrics as { name: string }[]) { + expect(firstNames.has(entry.name)).toBe(false); + } + }); + + it('rejects a malformed cursor with an actionable error', async () => { + const metricSource = await createMetricSource(); + const result = await callTool(client, 'clickstack_list_metrics', { + sourceId: metricSource._id.toString(), + cursor: 'not-base64!', + }); + expect(result.isError).toBe(true); + expect(getFirstText(result)).toMatch(/Invalid cursor/); + }); + + it('returns an empty-result hint when nothing matches', async () => { + const metricSource = await createMetricSource(); + const result = await callTool(client, 'clickstack_list_metrics', { + sourceId: metricSource._id.toString(), + namePattern: 'this.name.does.not.exist.%', + }); + expect(result.isError).toBeFalsy(); + const output = JSON.parse(getFirstText(result)); + expect(output.metrics).toEqual([]); + expect(output.hint).toMatch(/widening|removing|omitting/); + }); + + it('surfaces partialFailure instead of the empty hint when a kind fetch fails', async () => { + // Point the gauge kind at the logs table: it exists (so source + // resolution succeeds) but has no MetricName/TimeUnix columns, so + // the per-kind listing query throws. + const brokenSource = await Source.create({ + kind: SourceKind.Metric, + team: team._id, + from: { databaseName: DEFAULT_DATABASE, tableName: '' }, + metricTables: { + [MetricsDataType.Gauge.toLowerCase()]: DEFAULT_LOGS_TABLE, + }, + timestampValueExpression: 'TimeUnix', + connection: connection._id, + name: 'BrokenMetrics', + }); + const result = await callTool(client, 'clickstack_list_metrics', { + sourceId: brokenSource._id.toString(), + }); + expect(result.isError).toBeFalsy(); + const output = JSON.parse(getFirstText(result)); + expect(output.metrics).toEqual([]); + expect(output.partialFailure).toHaveLength(1); + expect(output.partialFailure[0].kind).toBe('gauge'); + expect(output.partialFailure[0].error).toBeTruthy(); + // The misleading "No metrics matched … widen the window" hint must + // NOT appear — the agent should retry, not widen. + expect(output.hint).not.toMatch(/No metrics matched/); + }); + }); + + // ── clickstack_describe_metric ─────────────────────────────────────────── + + describe('clickstack_describe_metric', () => { + const createMetricSource = () => + Source.create({ + kind: SourceKind.Metric, + team: team._id, + from: { databaseName: DEFAULT_DATABASE, tableName: '' }, + metricTables: { + [MetricsDataType.Gauge.toLowerCase()]: 'otel_metrics_gauge', + [MetricsDataType.Sum.toLowerCase()]: 'otel_metrics_sum', + [MetricsDataType.Histogram.toLowerCase()]: 'otel_metrics_histogram', + }, + timestampValueExpression: 'TimeUnix', + connection: connection._id, + name: 'Metrics', + }); + + it('rejects non-metric sources', async () => { + const result = await callTool(client, 'clickstack_describe_metric', { + sourceId: traceSource._id.toString(), + metricName: 'system.cpu.utilization', + kind: 'gauge', + }); + expect(result.isError).toBe(true); + expect(getFirstText(result)).toContain('not a metric source'); + }); + + it('rejects calls that omit kind', async () => { + // `kind` is required. Calling without it returns an MCP error + // surfaced from the schema parser rather than reaching ClickHouse. + const metricSource = await createMetricSource(); + const result = await callTool(client, 'clickstack_describe_metric', { + sourceId: metricSource._id.toString(), + metricName: 'system.cpu.utilization', + }); + expect(result.isError).toBe(true); + }); + + it('returns an actionable hint when the metric has no data in the kind table', async () => { + const metricSource = await createMetricSource(); + const result = await callTool(client, 'clickstack_describe_metric', { + sourceId: metricSource._id.toString(), + metricName: 'definitely.not.a.metric', + kind: 'gauge', + }); + expect(result.isError).toBeFalsy(); + const output = JSON.parse(getFirstText(result)); + // We always return a single kindDetail for the requested kind — + // attribute keys are empty when the metric has no data. + expect(output.kinds).toHaveLength(1); + expect(output.kinds[0].kind).toBe('gauge'); + expect(output.kinds[0].attributeKeys).toEqual({}); + expect(output.hint).toMatch(/No data found/); + expect(output.partialFailure).toBeUndefined(); + }); + + it('surfaces partialFailure instead of the no-data hint when discovery fails', async () => { + // Point the gauge kind at the logs table: it exists (so getColumns + // succeeds) but lacks MetricName/TimeUnix columns, so the + // attribute-keys discovery query throws. + const brokenSource = await Source.create({ + kind: SourceKind.Metric, + team: team._id, + from: { databaseName: DEFAULT_DATABASE, tableName: '' }, + metricTables: { + [MetricsDataType.Gauge.toLowerCase()]: DEFAULT_LOGS_TABLE, + }, + timestampValueExpression: 'TimeUnix', + connection: connection._id, + name: 'BrokenMetrics', + }); + const result = await callTool(client, 'clickstack_describe_metric', { + sourceId: brokenSource._id.toString(), + metricName: 'whatever', + kind: 'gauge', + }); + expect(result.isError).toBeFalsy(); + const output = JSON.parse(getFirstText(result)); + expect(output.partialFailure).toBeDefined(); + expect( + output.partialFailure.map((f: { stage: string }) => f.stage), + ).toContain('attributeKeys'); + // The misleading "No data found … widen startTime/endTime" hint + // must NOT appear — the fetch failed; widening would not help. + expect(output.hint).not.toMatch(/No data found/); + }); + + it('returns attribute keys for a gauge metric when kind is specified', async () => { + const metricSource = await createMetricSource(); + const now = new Date(); + await bulkInsertMetricsGauge([ + { + MetricName: 'system.cpu.utilization', + ResourceAttributes: { 'service.name': 'auto-detect-svc' }, + ServiceName: 'auto-detect-svc', + TimeUnix: now, + Value: 0.5, + }, + ]); + + const result = await callTool(client, 'clickstack_describe_metric', { + sourceId: metricSource._id.toString(), + metricName: 'system.cpu.utilization', + kind: 'gauge', + }); + expect(result.isError).toBeFalsy(); + const output = JSON.parse(getFirstText(result)); + expect(output.metricName).toBe('system.cpu.utilization'); + expect(output.kinds.length).toBe(1); + const detail = output.kinds[0]; + expect(detail.kind).toBe('gauge'); + expect(detail.usage).toContain('aggFn'); + expect(detail.attributeKeys).toBeDefined(); + // ResourceAttributes['service.name'] should land in the keys map + // under ResourceAttributes. + expect(detail.attributeKeys.ResourceAttributes ?? []).toEqual( + expect.arrayContaining(['service.name']), + ); + // Attribute values should be sampled by default. + expect(detail.attributeValues).toBeDefined(); + // nextSteps.query carries a worked example matching the requested kind. + expect(output.nextSteps.query).toContain('metricType: "gauge"'); + }); + + it('reports sampledKeys and truncatedKeys so unsampled keys are distinguishable', async () => { + // 15 attribute keys > MAX_ATTR_KEYS_TO_SAMPLE (12): the overflow + // must land in truncatedKeys so the agent knows those keys were + // never queried (vs. sampled-but-empty). + const metricSource = await createMetricSource(); + const manyAttrs: Record = {}; + for (let i = 0; i < 15; i++) { + manyAttrs[`attr.key.${String(i).padStart(2, '0')}`] = `value-${i}`; + } + await bulkInsertMetricsGauge([ + { + MetricName: 'many.attrs.metric', + ResourceAttributes: manyAttrs, + ServiceName: 'many-attrs-svc', + TimeUnix: new Date(), + Value: 1, + }, + ]); + + const result = await callTool(client, 'clickstack_describe_metric', { + sourceId: metricSource._id.toString(), + metricName: 'many.attrs.metric', + kind: 'gauge', + }); + expect(result.isError).toBeFalsy(); + const output = JSON.parse(getFirstText(result)); + const detail = output.kinds[0]; + const meta = detail.attributeValuesMeta; + expect(meta).toBeDefined(); + expect(meta.sampledKeys).toHaveLength(12); + expect(meta.truncatedKeys).toHaveLength(3); + // Sampled and truncated sets are disjoint and cover all 15 keys. + expect(new Set([...meta.sampledKeys, ...meta.truncatedKeys]).size).toBe( + 15, + ); + // Every key with values must have been sampled. + for (const key of Object.keys(detail.attributeValues ?? {})) { + expect(meta.sampledKeys).toContain(key); + } + }); + + it('reports empty truncatedKeys when all attribute keys fit the cap', async () => { + const metricSource = await createMetricSource(); + await bulkInsertMetricsGauge([ + { + MetricName: 'few.attrs.metric', + ResourceAttributes: { 'service.name': 'few-attrs-svc' }, + ServiceName: 'few-attrs-svc', + TimeUnix: new Date(), + Value: 1, + }, + ]); + + const result = await callTool(client, 'clickstack_describe_metric', { + sourceId: metricSource._id.toString(), + metricName: 'few.attrs.metric', + kind: 'gauge', + }); + expect(result.isError).toBeFalsy(); + const output = JSON.parse(getFirstText(result)); + const meta = output.kinds[0].attributeValuesMeta; + expect(meta).toBeDefined(); + expect(meta.truncatedKeys).toEqual([]); + expect(meta.sampledKeys).toContain("ResourceAttributes['service.name']"); + }); + + it('skips value sampling when sampleValues is false', async () => { + const metricSource = await createMetricSource(); + const now = new Date(); + await bulkInsertMetricsGauge([ + { + MetricName: 'system.cpu.utilization', + ResourceAttributes: { 'service.name': 'no-sample-svc' }, + ServiceName: 'no-sample-svc', + TimeUnix: now, + Value: 0.5, + }, + ]); + + const result = await callTool(client, 'clickstack_describe_metric', { + sourceId: metricSource._id.toString(), + metricName: 'system.cpu.utilization', + kind: 'gauge', + sampleValues: false, + }); + expect(result.isError).toBeFalsy(); + const output = JSON.parse(getFirstText(result)); + expect(output.kinds[0].attributeValues).toBeUndefined(); + }); + + it('returns the correct usage example for a sum metric', async () => { + const metricSource = await createMetricSource(); + const now = new Date(); + await bulkInsertMetricsSum([ + { + MetricName: 'http.server.request.count', + AggregationTemporality: 1, + IsMonotonic: true, + ResourceAttributes: { 'service.name': 'sum-svc' }, + ServiceName: 'sum-svc', + TimeUnix: now, + Value: 42, + }, + ]); + + const result = await callTool(client, 'clickstack_describe_metric', { + sourceId: metricSource._id.toString(), + metricName: 'http.server.request.count', + kind: 'sum', + }); + expect(result.isError).toBeFalsy(); + const output = JSON.parse(getFirstText(result)); + expect(output.kinds[0].kind).toBe('sum'); + expect(output.kinds[0].usage).toContain('increase'); + expect(output.nextSteps.query).toContain('"increase"'); + }); + + it('returns the quantile + level example for a histogram metric', async () => { + const metricSource = await createMetricSource(); + const now = new Date(); + await bulkInsertMetricsHistogram([ + { + MetricName: 'http.server.request.duration', + ResourceAttributes: { 'service.name': 'hist-svc' }, + TimeUnix: now, + BucketCounts: [1, 2, 3], + ExplicitBounds: [10, 100, 1000], + AggregationTemporality: 1, + }, + ]); + + const result = await callTool(client, 'clickstack_describe_metric', { + sourceId: metricSource._id.toString(), + metricName: 'http.server.request.duration', + kind: 'histogram', + }); + expect(result.isError).toBeFalsy(); + const output = JSON.parse(getFirstText(result)); + expect(output.kinds[0].kind).toBe('histogram'); + expect(output.kinds[0].usage).toContain('quantile'); + expect(output.nextSteps.query).toContain('"quantile"'); + expect(output.nextSteps.query).toContain('level: 0.95'); + }); + + it('rejects explicit kind when the source has no table for that kind', async () => { + // Source with only gauge populated. + const gaugeOnly = await Source.create({ + kind: SourceKind.Metric, + team: team._id, + from: { databaseName: DEFAULT_DATABASE, tableName: '' }, + metricTables: { + [MetricsDataType.Gauge.toLowerCase()]: 'otel_metrics_gauge', + }, + timestampValueExpression: 'TimeUnix', + connection: connection._id, + name: 'GaugeOnly', + }); + const result = await callTool(client, 'clickstack_describe_metric', { + sourceId: gaugeOnly._id.toString(), + metricName: 'whatever', + kind: 'sum', + }); + expect(result.isError).toBe(true); + expect(getFirstText(result)).toMatch(/no "sum" metric table/); + }); + }); }); diff --git a/packages/api/src/mcp/prompts/dashboards/content.ts b/packages/api/src/mcp/prompts/dashboards/content.ts index a79fde71ab..3293c26611 100644 --- a/packages/api/src/mcp/prompts/dashboards/content.ts +++ b/packages/api/src/mcp/prompts/dashboards/content.ts @@ -124,7 +124,7 @@ Apply these before calling clickstack_save_dashboard. Each rule is enforced by t ] Use tabs when one container needs to show different views of the same data (Throughput / Latency / Errors over time, for instance); the tab bar appears when a container has two or more tabs declared. -12. VALIDATE EVERY TILE AFTER SAVE. After clickstack_save_dashboard, call clickstack_query_tile on EVERY tile (not just one). Save validates input shape; it does NOT validate query semantics. Some queries pass save and fail at render time (known gaps: Lucene comparison/wildcard on map attributes, metric tiles with multiple metricTables, malformed having clauses). If query_tile returns an error, fix the tile and re-save before declaring the dashboard ready. +12. VALIDATE EVERY TILE AFTER SAVE. After clickstack_save_dashboard, call clickstack_query_tile on EVERY tile (not just one). Save validates input shape; it does NOT validate query semantics. Some queries pass save and fail at render time (known gaps: Lucene comparison/wildcard on map attributes, malformed having clauses). If query_tile returns an error, fix the tile and re-save before declaring the dashboard ready. 13. NO TITLE-RECAP MARKDOWN TILE. The dashboard's name shows in the title bar. Adding a markdown tile with the dashboard name (or a "About this dashboard" header) doubles the title and eats a row of vertical space because markdown heading styles render at title-bar scale. Skip the markdown tile entirely on starter dashboards. @@ -958,7 +958,29 @@ For configType: "sql" tiles, write ClickHouse SQL with template macros: search No select items (select is a column list string). where is the filter. markdown No select items. Set markdown field with content. -NOTE: Authoring builder tiles on a metric source is not reliable today. The MCP select-item shape does not carry the metricName / metricType fields the metric query path needs, and a save with a metric sourceId may render in the UI as "Both table name and UUID are empty" even though the save itself succeeded. For metrics, use a raw SQL tile (configType: "sql") with explicit table reference. The standard tables that back a metric source are otel_metrics_gauge, otel_metrics_sum, and otel_metrics_histogram; clickstack_list_sources returns the metric source's metricTables map so you know which table holds which metric kind. Discovery: metric source schemas today do NOT publish mapAttributeKeys for ResourceAttributes / Attributes the way log and trace sources do, so attribute keys must be discovered by sampling (SELECT DISTINCT mapKeys(Attributes) FROM ...). +== METRIC SOURCES == + +Builder tiles work on metric sources. Each select item on a metric tile MUST set metricType ("gauge" | "sum" | "histogram") and metricName (the OTel metric name, e.g. "system.cpu.utilization"). valueExpression defaults to "Value" when omitted, so a typical metric series is { aggFn: "", metricType: "", metricName: "" }. summary and "exponential histogram" kinds are not yet supported by the renderer. + +Per-kind aggregation guidance: + gauge Use aggFn:"last_value" | "avg" | "min" | "max". Set isDelta:true for Prometheus-style delta over each bucket. + sum Use aggFn:"increase" for the per-bucket counter increase (reset-aware), or aggFn:"sum" | "avg" on the computed rate. increase + groupBy is capped at the top 20 groups by the renderer; pre-filter via where or pick a coarser groupBy when you need broader coverage. + histogram Use aggFn:"quantile" with level ∈ {0.5, 0.9, 0.95, 0.99} for percentiles, or aggFn:"count" for the total bucket count. quantile without level is rejected. + +Discovery workflow for metrics: + 1. clickstack_list_sources: find the metric source ID and its metricTables map (which kinds are populated). + 2. clickstack_describe_source(sourceId): returns columns, attribute keys, low-cardinality values, AND a per-kind metric-name sample (up to 20 names per kind). + 3. clickstack_list_metrics(sourceId, ...): paginate the full metric catalog with optional kind + namePattern (ILIKE) + time-window filters. Pass nextCursor unchanged for the next page. + 4. clickstack_describe_metric(sourceId, metricName): drill into a single metric: kind(s), unit, description, attribute keys per map column, and sampled values per attribute. Attribute keys vary per metric, not per source, so always call this before authoring tiles for a metric you've never queried. + 5. clickstack_timeseries | clickstack_table: author the chart. Set metricType + metricName on each select item; pass the discovered attribute keys via groupBy / where. + +Examples: + Gauge p95-like spread by service (use last_value or avg): + { aggFn: "avg", metricType: "gauge", metricName: "system.cpu.utilization" }, groupBy: "ServiceName" + Sum counter increase: + { aggFn: "increase", metricType: "sum", metricName: "http.server.request.count", alias: "Requests" }, groupBy: "ServiceName" + Histogram p95 latency: + { aggFn: "quantile", level: 0.95, metricType: "histogram", metricName: "http.server.request.duration", alias: "P95 Latency" }, groupBy: "ServiceName" == NUMBER FORMAT == @@ -1244,7 +1266,7 @@ Example: find top patterns for production services over the last 4 hours: The dashboard filter applies globally; tiles do not need the literal. On mixed-source dashboards where only some tiles carry the column, add appliesToSourceIds: ["", ...] to scope the filter to just those sources instead of breaking the unrelated tiles. 8. Forgetting to validate tiles after saving - Always call clickstack_query_tile on EVERY tile after clickstack_save_dashboard, not just one. Save validates input shape but not query semantics. Several known gaps (Lucene comparison/wildcard on map attributes, builder tiles on metric sources, malformed having) pass save and fail at render time. A dashboard with one bad tile renders the whole page in a degraded state; the user sees "Error loading chart" with no way to know which tile broke unless you validated. If query_tile returns an error, fix the where / SQL and re-save before declaring the dashboard ready. + Always call clickstack_query_tile on EVERY tile after clickstack_save_dashboard, not just one. Save validates input shape but not query semantics. Several known gaps (Lucene comparison/wildcard on map attributes, malformed having) pass save and fail at render time. A dashboard with one bad tile renders the whole page in a degraded state; the user sees "Error loading chart" with no way to know which tile broke unless you validated. If query_tile returns an error, fix the where / SQL and re-save before declaring the dashboard ready. 9. Using connectionId with builder tiles, or omitting connectionId or sourceId on a single-table SQL tile Builder tiles (line, table, etc.) use sourceId (no connectionId). diff --git a/packages/api/src/mcp/tools/dashboards/schemas.ts b/packages/api/src/mcp/tools/dashboards/schemas.ts index e1dbad9674..0f543561ca 100644 --- a/packages/api/src/mcp/tools/dashboards/schemas.ts +++ b/packages/api/src/mcp/tools/dashboards/schemas.ts @@ -15,6 +15,17 @@ import { z } from 'zod'; import { externalQuantileLevelSchema, objectIdSchema } from '@/utils/zod'; +import { getMetricSelectIssues } from '../query/schemas'; +import { QUERYABLE_METRIC_KINDS } from '../sources/metricKinds'; + +/** + * Metric type values exposed on dashboard tile select items. Restricted to + * the three kinds the query renderer can translate today; summary and + * exponential histogram are intentionally excluded. Imports the shared + * `QUERYABLE_METRIC_KINDS` source-of-truth tuple from `../sources/metricKinds`. + */ +const mcpTileMetricTypeSchema = z.enum(QUERYABLE_METRIC_KINDS); + // ─── Shared tile schemas for MCP dashboard tools ───────────────────────────── const seriesLevelNumberFormatDescription = @@ -87,7 +98,10 @@ const mcpNumberFormatSchema = z.object({ const mcpTileSelectItemSchema = z .object({ aggFn: AggregateFunctionSchema.describe( - 'Aggregation function. "count" requires no valueExpression; all others do.', + 'Aggregation function. "count" requires no valueExpression; all others do. ' + + 'METRIC SOURCES: "increase" computes the per-bucket counter increase for Sum metrics ' + + '(reset-aware). For Gauges use last_value/avg/min/max. For Histograms use "quantile" ' + + 'with level or "count".', ), valueExpression: z .string() @@ -96,7 +110,9 @@ const mcpTileSelectItemSchema = z 'Column or expression to aggregate. Required for all aggFn except "count". ' + 'Use PascalCase for top-level columns (e.g. "Duration", "StatusCode"). ' + "For span attributes use: SpanAttributes['key'] (e.g. SpanAttributes['http.method']). " + - "For resource attributes use: ResourceAttributes['key'] (e.g. ResourceAttributes['service.name']).", + "For resource attributes use: ResourceAttributes['key'] (e.g. ResourceAttributes['service.name']).\n\n" + + 'METRIC SOURCES: optional — defaults to "Value" (the metric value column) when ' + + 'metricType/metricName are set.', ), where: z .string() @@ -115,29 +131,53 @@ const mcpTileSelectItemSchema = z ), level: externalQuantileLevelSchema .optional() - .describe('Percentile level for aggFn="quantile"'), + .describe( + 'Percentile level for aggFn="quantile". REQUIRED for histogram metrics with aggFn:"quantile".', + ), numberFormat: mcpNumberFormatSchema .optional() .describe(seriesLevelNumberFormatDescription), + metricType: mcpTileMetricTypeSchema + .optional() + .describe( + 'METRIC SOURCES ONLY. OTel metric kind: gauge, sum, or histogram. ' + + 'Required (with metricName) when the tile sourceId is a metric source. ' + + 'summary and exponential histogram are not supported by the renderer yet.', + ), + metricName: z + .string() + .optional() + .describe( + 'METRIC SOURCES ONLY. OTel metric name (e.g. "system.cpu.utilization"). ' + + 'Required when metricType is set.', + ), + isDelta: z + .boolean() + .optional() + .describe( + 'METRIC SOURCES ONLY (gauge metrics). When true, computes the Prometheus-style ' + + 'delta over each bucket. Default false.', + ), }) .superRefine((data, ctx) => { - if (data.level && data.aggFn !== 'quantile') { - ctx.addIssue({ - code: z.ZodIssueCode.custom, - message: 'Level can only be used with quantile aggregation function', - }); - } - if (data.valueExpression && data.aggFn === 'count') { - ctx.addIssue({ - code: z.ZodIssueCode.custom, - message: - 'Value expression cannot be used with count aggregation function', - }); - } else if (!data.valueExpression && data.aggFn !== 'count') { + const narrow = { + aggFn: typeof data.aggFn === 'string' ? data.aggFn : undefined, + metricType: + typeof data.metricType === 'string' ? data.metricType : undefined, + metricName: + typeof data.metricName === 'string' ? data.metricName : undefined, + isDelta: typeof data.isDelta === 'boolean' ? data.isDelta : undefined, + level: typeof data.level === 'number' ? data.level : undefined, + valueExpression: + typeof data.valueExpression === 'string' + ? data.valueExpression + : undefined, + }; + for (const issue of getMetricSelectIssues(narrow)) { ctx.addIssue({ code: z.ZodIssueCode.custom, - message: - 'Value expression is required for non-count aggregation functions', + path: issue.path, + message: issue.message, }); } }); diff --git a/packages/api/src/mcp/tools/query/helpers.ts b/packages/api/src/mcp/tools/query/helpers.ts index 87f59c362c..2c466dc0da 100644 --- a/packages/api/src/mcp/tools/query/helpers.ts +++ b/packages/api/src/mcp/tools/query/helpers.ts @@ -78,6 +78,90 @@ const MCP_CLICKHOUSE_SETTINGS = { */ const MCP_REQUEST_TIMEOUT = 32_000; // 30s query limit + 2s buffer +// ─── Increase top-N cap hint ──────────────────────────────────────────────── + +/** + * Group limit applied by the metric renderer when `aggFn:"increase"` is + * combined with `groupBy`. Mirrors `INCREASE_MAX_NUM_GROUPS` in + * `packages/common-utils/src/core/renderChartConfig.ts`. Surfaced to MCP + * callers as a hint so they can reason about why high-cardinality groupBys + * may be truncated. + */ +export const INCREASE_TOP_N_CAP = 20; + +/** + * Append a hint message onto a parsed tool response body. All hint + * writers share `hints: string[]` so multiple advisories can coexist + * (e.g. the increase top-N cap and the single-bucket collapse hint can + * legitimately both apply to the same result). + */ +export function appendHint(parsed: Record, hint: string) { + const existing = Array.isArray(parsed.hints) ? parsed.hints : []; + parsed.hints = [...existing, hint]; +} + +/** + * Count the distinct groupBy combinations present in a result set. + * Resolves each comma-separated groupBy segment against the row keys + * (the renderer aliases group columns by their literal expression). + * Returns null when no segment matches any row key — the caller should + * treat that as "count unknown". + */ +function countDistinctGroups( + data: Array>, + groupBy: string, +): number | null { + const segments = splitAndTrimWithBracket(groupBy); + const sample = data[0]; + if (!sample || typeof sample !== 'object') return null; + const resolvable = segments.filter(seg => seg in sample); + if (resolvable.length === 0) return null; + const seen = new Set(); + for (const row of data) { + seen.add(JSON.stringify(resolvable.map(seg => row[seg]))); + } + return seen.size; +} + +/** + * Mutates a successful tool result envelope in place to add a neutral + * informational hint when the renderer's increase+groupBy top-N cap + * likely applied. Safe to call unconditionally — does nothing for empty + * results, error results, queries that don't combine `aggFn:"increase"` + * with a non-empty `groupBy`, or results whose distinct group count is + * below the cap (no truncation possible). + */ +export function annotateIncreaseTopNHint( + result: { content?: { type: string; text?: string }[]; isError?: boolean }, + selectItems: ReadonlyArray<{ aggFn?: string }>, + groupBy: string | undefined, +): void { + if (result.isError) return; + const first = result.content?.[0]; + if (first?.type !== 'text' || typeof first.text !== 'string') return; + if (!groupBy || groupBy.trim() === '') return; + if (!selectItems.some(s => s.aggFn === 'increase')) return; + try { + const parsed = JSON.parse(first.text); + const data = parsed?.result?.data; + if (!Array.isArray(data) || data.length === 0) return; + // Only warn when the result actually carries enough distinct groups + // to have hit the renderer cap. When the group column cannot be + // resolved from the rows, skip the hint rather than crying wolf. + const distinctGroups = countDistinctGroups(data, groupBy); + if (distinctGroups === null || distinctGroups < INCREASE_TOP_N_CAP) return; + appendHint( + parsed, + `aggFn:"increase" combined with groupBy is capped at the top ${INCREASE_TOP_N_CAP} ` + + `groups by max bucket sum at the renderer layer. ` + + `Results may not include every group present in the underlying data.`, + ); + first.text = JSON.stringify(parsed, null, 2); + } catch { + // leave result unmodified on parse failure + } +} + // ─── Where merging ─────────────────────────────────────────────────────────── export interface MergeWhereResult { @@ -210,7 +294,9 @@ function formatQueryResult(result: unknown) { : {}), ...(empty ? { - hint: 'No data found in the queried time range. Try setting startTime to a wider window (e.g. 24 hours ago) or check that filters match existing data.', + hints: [ + 'No data found in the queried time range. Try setting startTime to a wider window (e.g. 24 hours ago) or check that filters match existing data.', + ], } : {}), }, @@ -222,6 +308,80 @@ function formatQueryResult(result: unknown) { }; } +// ─── Source-kind / select-shape guardrail ──────────────────────────────────── + +type SelectItemForKindCheck = { metricType?: unknown }; + +/** + * Reject builder-config queries where the select items' metric annotations + * don't match the source kind: + * - Non-metric source with any select item carrying `metricType` would + * fall through to SQL generation that references the metric `Value` + * column and fail with a cryptic ClickHouse error. + * - Metric source with no select item carrying `metricType` would also + * reach the renderer in a broken state. + * + * Catching both up-front gives the agent a clear next action that mirrors + * the wording on `clickstack_list_metrics` / `clickstack_describe_metric`. + * + * Returns an error envelope on mismatch, or `null` when the select shape + * is consistent with the source kind (or the select is a raw string that + * the caller already parsed by hand). + */ +export function assertSourceKindMatchesSelect( + source: { kind: string }, + select: unknown, +): { isError: true; content: [{ type: 'text'; text: string }] } | null { + // Raw-string select (rare on the builder path) — the renderer handles + // it; no metric annotations to inspect. + if (typeof select === 'string') return null; + if (!Array.isArray(select)) return null; + + const metricItemCount = (select as SelectItemForKindCheck[]).filter( + item => + typeof item === 'object' && + item !== null && + typeof item.metricType === 'string' && + item.metricType.length > 0, + ).length; + + const isMetricSource = source.kind === SourceKind.Metric; + + if (isMetricSource && metricItemCount === 0) { + return { + isError: true as const, + content: [ + { + type: 'text' as const, + text: + 'Source kind is "metric", but no select item specifies metricType + metricName. ' + + 'Each select item on a metric source must set metricType ("gauge" | "sum" | "histogram") ' + + 'and metricName (e.g. metricName:"system.cpu.utilization"). Call ' + + 'clickstack_describe_source or clickstack_list_metrics to discover available metric names.', + }, + ], + }; + } + + if (!isMetricSource && metricItemCount > 0) { + return { + isError: true as const, + content: [ + { + type: 'text' as const, + text: + `Source kind is "${source.kind}", not metric — but ${metricItemCount} select item(s) ` + + 'set metricType. metricType + metricName only work on metric sources. ' + + 'Drop the metric fields to query this source, or call clickstack_list_sources to find a ' + + 'source whose kind is "metric".', + }, + ], + }; + } + + return null; +} + // ─── Tile execution ────────────────────────────────────────────────────────── export async function runConfigTile( @@ -273,6 +433,14 @@ export async function runConfigTile( }; } + // Reject metric-style select against a non-metric source (and vice + // versa) before the renderer composes SQL against the wrong table. + const kindMismatch = assertSourceKindMatchesSelect( + source, + builderConfig.select, + ); + if (kindMismatch) return kindMismatch; + const connection = await getConnectionById( teamId, source.connection.toString(), @@ -298,6 +466,7 @@ export async function runConfigTile( }); const isSearch = builderConfig.displayType === DisplayType.Search; + const isMetricSource = source.kind === SourceKind.Metric; const defaultTableSelect = 'defaultTableSelectExpression' in source ? source.defaultTableSelectExpression @@ -327,13 +496,37 @@ export async function runConfigTile( } : {}; + // Metric sources need three adjustments before the renderer can + // translate the query: + // 1. `from.tableName` is blank — the renderer picks the correct + // per-kind ClickHouse table from `metricTables` at render time. + // 2. `metricTables` must be threaded onto the chart config (mirrors + // packages/api/src/routers/external-api/v2/charts.ts:261-267). + // 3. Each select item's `valueExpression` defaults to "Value" (the + // metric value column) when missing — agents normally omit it. + const metricSelectOverrides = + isMetricSource && Array.isArray(builderConfig.select) + ? { + select: builderConfig.select.map(item => + typeof item === 'string' + ? item + : { + ...item, + valueExpression: item.valueExpression ?? 'Value', + }, + ), + } + : {}; + const chartConfig = { ...builderConfig, ...searchOverrides, + ...metricSelectOverrides, from: { databaseName: source.from.databaseName, - tableName: source.from.tableName, + tableName: isMetricSource ? '' : source.from.tableName, }, + ...(isMetricSource && { metricTables: source.metricTables }), connection: source.connection.toString(), timestampValueExpression: source.timestampValueExpression, implicitColumnExpression: implicitColumn, diff --git a/packages/api/src/mcp/tools/query/schemas.ts b/packages/api/src/mcp/tools/query/schemas.ts index 6bcdd91ab4..486bfbf2d1 100644 --- a/packages/api/src/mcp/tools/query/schemas.ts +++ b/packages/api/src/mcp/tools/query/schemas.ts @@ -1,5 +1,7 @@ import { z } from 'zod'; +import { QUERYABLE_METRIC_KINDS } from '../sources/metricKinds'; + // ─── Shared description fragments ──────────────────────────────────────────── const WHERE_DESCRIPTION = @@ -48,6 +50,10 @@ export const MCP_AGG_FN_OPTIONS = [ 'quantile', 'sum', 'none', + // 'increase' is only valid for Sum (counter) metrics. The renderer + // computes the per-bucket counter increase with reset-handling; the + // bare aggFn string maps to that behavior, not a SQL function. + 'increase', ] as const; const mcpAggFnSchema = z @@ -59,9 +65,146 @@ const mcpAggFnSchema = z ' count_distinct – unique value count (valueExpression required)\n' + ' quantile – percentile; also set level (valueExpression required)\n' + ' last_value – most recent value of a column\n' + - ' none – pass a raw expression through unchanged', + ' none – pass a raw expression through unchanged\n' + + ' increase – METRIC-ONLY (Sum/counter): per-bucket counter increase, ' + + 'reset-aware. Requires metricType:"sum" and metricName.', ); +/** + * Metric type values exposed to MCP tool callers. Restricted to the three + * kinds the renderer can translate today; summary and exponential histogram + * are intentionally excluded. See `../sources/metricKinds` for the shared + * source-of-truth constant used by every metric-aware tool. + */ +const mcpMetricTypeSchema = z + .enum(QUERYABLE_METRIC_KINDS) + .describe( + 'METRIC SOURCES ONLY. OTel metric kind. Required (along with metricName) ' + + 'when querying a metric source — discover via clickstack_describe_source ' + + 'or clickstack_describe_metric.\n' + + ' gauge – instantaneous values (CPU, memory, queue depth). Use last_value/avg/min/max; set isDelta:true for Prometheus-style delta over the bucket.\n' + + ' sum – cumulative or delta counters (request counts, bytes processed). Use aggFn:"increase" for counter increase, or sum/avg on the computed rate.\n' + + ' histogram – bucketed distributions (request duration). Use aggFn:"quantile" with level for percentiles, or aggFn:"count" for total bucket count.\n' + + 'NOTE: summary and exponential histogram are not supported by the query renderer yet.', + ); + +/** + * Shared cross-field validation issues for MCP select items. Returns the + * list of Zod issues a `.superRefine` callback should emit. Kept as a pure + * function so both `mcpSelectItemSchema` (query tools) and + * `mcpTileSelectItemSchema` (dashboard tile tools) can call it inline + * without widening Zod's output type inference. + * + * Constraints enforced: + * - metricType + metricName must be set together + * - aggFn:"increase" is Sum-only + * - histogram metrics only support quantile (with level) or count + * - isDelta is Gauge-only + * - level still requires aggFn:"quantile" + * - valueExpression is required for non-count aggFns UNLESS metricType is + * set (defaults to "Value" for metric sources) + * + * Note: summary and exponential histogram metric kinds are not in the input + * enum so they cannot reach this function — they are rejected by the field + * schema itself. + */ +export function getMetricSelectIssues(data: { + aggFn?: string; + metricType?: string; + metricName?: string; + isDelta?: boolean; + level?: number; + valueExpression?: string; +}): { path: (string | number)[]; message: string }[] { + const issues: { path: (string | number)[]; message: string }[] = []; + + // metricType ↔ metricName must be set together + if (data.metricType && !data.metricName) { + issues.push({ + path: ['metricName'], + message: + 'metricName is required when metricType is set. Discover metric names ' + + 'via clickstack_list_metrics or clickstack_describe_source.', + }); + } + if (data.metricName && !data.metricType) { + issues.push({ + path: ['metricType'], + message: + 'metricType is required when metricName is set. Use one of: gauge, sum, histogram.', + }); + } + + // increase is Sum-only + if (data.aggFn === 'increase' && data.metricType !== 'sum') { + issues.push({ + path: ['aggFn'], + message: + 'aggFn "increase" is only valid for sum (counter) metrics. ' + + 'Set metricType:"sum" and metricName, or pick a different aggFn.', + }); + } + + // Histogram supports only quantile (+ level) or count today + if (data.metricType === 'histogram') { + if (data.aggFn !== 'quantile' && data.aggFn !== 'count') { + issues.push({ + path: ['aggFn'], + message: + 'Histogram metrics only support aggFn "quantile" (with level) or "count" today.', + }); + } + if (data.aggFn === 'quantile' && data.level == null) { + issues.push({ + path: ['level'], + message: + 'level is required when aggFn is "quantile" on a histogram metric. ' + + 'Use 0.5, 0.9, 0.95, or 0.99.', + }); + } + } + + // isDelta is Gauge-only + if (data.isDelta && data.metricType !== 'gauge') { + issues.push({ + path: ['isDelta'], + message: 'isDelta is only valid for gauge metrics (metricType:"gauge").', + }); + } + + // level requires aggFn:"quantile" + if (data.level != null && data.aggFn !== 'quantile') { + issues.push({ + path: ['level'], + message: 'level is only valid with aggFn:"quantile".', + }); + } + + // valueExpression rules: + // - "count" never takes a valueExpression (existing rule) + // - non-count aggFns require valueExpression UNLESS metricType is set + // (metric sources default valueExpression to "Value" in the helper) + if (data.valueExpression && data.aggFn === 'count') { + issues.push({ + path: ['valueExpression'], + message: 'valueExpression cannot be used with aggFn:"count".', + }); + } else if ( + !data.valueExpression && + data.aggFn !== 'count' && + !data.metricType + ) { + issues.push({ + path: ['valueExpression'], + message: + 'valueExpression is required for non-count aggregation functions ' + + '(or set metricType to query a metric source, which defaults valueExpression to "Value").', + }); + } + + return issues; +} + export const mcpSelectItemSchema = z.object({ aggFn: mcpAggFnSchema, valueExpression: z @@ -74,7 +217,10 @@ export const mcpSelectItemSchema = z.object({ 'Any ClickHouse expression is allowed — common useful forms: ' + '"Duration / 1e6" (ns→ms), ' + '"toFloat64OrZero(SpanAttributes[\'response.size_bytes\'])" (cast attribute), ' + - '"if(StatusCode = \'STATUS_CODE_ERROR\', 1, 0)" (boolean→numeric for ratios).', + '"if(StatusCode = \'STATUS_CODE_ERROR\', 1, 0)" (boolean→numeric for ratios).\n\n' + + 'METRIC SOURCES: optional — defaults to "Value" (the metric value column) when ' + + 'metricType/metricName are set. Set explicitly only if you want to transform the ' + + 'metric value (e.g. "Value / 1e6").', ), where: z .string() @@ -113,10 +259,105 @@ export const mcpSelectItemSchema = z.object({ .union([z.literal(0.5), z.literal(0.9), z.literal(0.95), z.literal(0.99)]) .optional() .describe( - 'Percentile level. Only applicable when aggFn is "quantile". ' + + 'Percentile level. Required when aggFn is "quantile" on a histogram metric, ' + + 'optional otherwise. ' + 'Allowed values: 0.5, 0.9, 0.95, 0.99', ), + metricType: mcpMetricTypeSchema + .optional() + .describe( + 'METRIC SOURCES ONLY. OTel metric kind: gauge, sum, or histogram. ' + + 'Required (with metricName) when sourceId is a metric source. ' + + 'Discover via clickstack_describe_source (sample) or clickstack_describe_metric.', + ), + metricName: z + .string() + .optional() + .describe( + 'METRIC SOURCES ONLY. OTel metric name (e.g. "system.cpu.utilization", ' + + '"http.server.request.duration"). Required when metricType is set. ' + + 'Discover via clickstack_list_metrics or clickstack_describe_source.', + ), + isDelta: z + .boolean() + .optional() + .describe( + 'METRIC SOURCES ONLY (gauge metrics). When true, computes the Prometheus-style ' + + 'delta over each bucket: (argMax(Value) - argMin(Value)) * bucketSecs / timeDiff. ' + + 'Use for cumulative gauges where you want to chart growth per bucket. Default false.', + ), }); +// NOTE: cross-field validation (metric rules + level + valueExpression) is +// applied imperatively in the timeseries / table tool handlers via +// `validateMetricSelectItems`. We intentionally skip `.superRefine` here +// because Zod 3.x widens optional-field types post-refine, which breaks +// strict-typed downstream consumers like `mergeWhereIntoSelectItems` and +// `resolveOrderBy`. The dashboard tile schema, whose consumers don't trip +// on the widening, keeps its own `.superRefine`. + +/** + * Concrete shape of a parsed MCP select item. Mirrors the runtime values + * produced by `mcpSelectItemSchema` — needed as an explicit type because + * Zod 3.x's structural inference of `z.object({...})` callbacks (e.g. the + * MCP SDK's tool-handler input) can widen optional-field types into + * `unknown`. Cast `input.select` to `McpSelectItem[]` at tool boundaries. + */ +export type McpSelectItem = { + aggFn: string; + valueExpression?: string; + where?: string; + whereLanguage?: 'lucene' | 'sql'; + alias?: string; + level?: number; + metricType?: 'gauge' | 'sum' | 'histogram'; + metricName?: string; + isDelta?: boolean; +}; + +/** + * Default `valueExpression` to `"Value"` for every metric-tagged select + * item that omits it. Must be called BEFORE `buildTile`, because the + * external dashboard tile schema's `superRefine` rejects non-count + * aggregations with an empty `valueExpression`. The runtime renderer + * looks for `Value` on metric tables, so defaulting it here matches what + * `external-api/v2/charts.ts:240-250` does on the REST path. + */ +export function applyMetricSelectDefaults( + items: ReadonlyArray, +): T[] { + return items.map(item => + item.metricType && !item.valueExpression + ? { ...item, valueExpression: 'Value' } + : item, + ); +} + +/** + * Apply `getMetricSelectIssues` to every select item in a tool input. + * Returns an error-shaped tool response when any issue is detected, or + * `null` when all items pass. Call this from a tool handler before + * passing items to `runConfigTile`. + */ +export function validateMetricSelectItems( + items: ReadonlyArray, +): { isError: true; content: [{ type: 'text'; text: string }] } | null { + const errors: string[] = []; + items.forEach((item, idx) => { + for (const issue of getMetricSelectIssues(item)) { + errors.push(`select[${idx}].${issue.path.join('.')}: ${issue.message}`); + } + }); + if (errors.length === 0) return null; + return { + isError: true as const, + content: [ + { + type: 'text' as const, + text: errors.join('\n'), + }, + ], + }; +} export const startTimeSchema = z .string() diff --git a/packages/api/src/mcp/tools/query/table.ts b/packages/api/src/mcp/tools/query/table.ts index 41b7e44f0b..0f90f2c7dd 100644 --- a/packages/api/src/mcp/tools/query/table.ts +++ b/packages/api/src/mcp/tools/query/table.ts @@ -4,19 +4,23 @@ import { z } from 'zod'; import { withToolTracing } from '../../utils/tracing'; import type { McpContext } from '../types'; import { + annotateIncreaseTopNHint, buildTile, mergeWhereIntoSelectItems, parseTimeRange, runConfigTile, } from './helpers'; import { + applyMetricSelectDefaults, endTimeSchema, groupBySchema, MCP_AGG_FN_OPTIONS, + McpSelectItem, mcpSelectItemSchema, orderBySchema, sourceIdSchema, startTimeSchema, + validateMetricSelectItems, whereLanguageSchema, whereSchema, } from './schemas'; @@ -61,9 +65,13 @@ const tableSchema = z.object({ // ─── orderBy resolution ────────────────────────────────────────────────────── /** Aggregation function names that ClickHouse cannot resolve as bare identifiers in ORDER BY. - * 'none' is excluded — it passes a raw expression through unchanged and has no synthesizable form. */ + * Excluded: + * - 'none' passes a raw expression through unchanged and has no synthesizable form. + * - 'increase' is a metric-only renderer marker that compiles to a multi-CTE + * sum(Rate) pipeline — there is no standalone SQL function to synthesize. + * The renderer auto-aliases the resulting column; agents should orderBy by alias. */ const AGG_FN_NAMES: ReadonlySet = new Set( - MCP_AGG_FN_OPTIONS.filter(fn => fn !== 'none'), + MCP_AGG_FN_OPTIONS.filter(fn => fn !== 'none' && fn !== 'increase'), ); /** @@ -170,7 +178,18 @@ export function registerTable(server: McpServer, context: McpContext) { 'Map attributes work in groupBy and valueExpression, including ' + "toFloat64OrZero(SpanAttributes['key']).\n\n" + 'Shape auto-upgrade: if shape is "number" or "pie" but select has >1 item, ' + - 'it is transparently upgraded to "table".', + 'it is transparently upgraded to "table".\n\n' + + '── METRIC SOURCES ──\n' + + 'When sourceId is a metric source, each select item MUST set ' + + 'metricType ("gauge"|"sum"|"histogram") and metricName (the OTel metric name). ' + + 'valueExpression defaults to "Value" — set it explicitly only to transform the value.\n' + + 'Discovery: clickstack_describe_source returns a per-kind metric-name sample; ' + + 'clickstack_list_metrics paginates the full catalog; clickstack_describe_metric ' + + 'returns attribute keys + sampled values for a single metric.\n' + + 'Per kind: gauge uses last_value/avg/min/max; sum uses aggFn:"increase" for counter increase ' + + '(top-N capped at 20 groups when combined with groupBy), or sum/avg on the rate; ' + + 'histogram uses aggFn:"quantile" + level for percentiles, or aggFn:"count" for total bucket count.\n' + + 'summary and exponential histogram kinds are not supported by the query renderer yet.', inputSchema: tableSchema, }, withToolTracing('clickstack_table', context, async input => { @@ -183,12 +202,30 @@ export function registerTable(server: McpServer, context: McpContext) { } const { startDate, endDate } = timeRange; + // Cast to the concrete `McpSelectItem[]` because Zod 3.x widens + // optional-field inference at the MCP-SDK tool boundary; the + // runtime parser still produces the correct shape. + const rawSelect = input.select as McpSelectItem[]; + + // Validate cross-field constraints (metric rules, level/quantile, + // valueExpression presence) and surface friendly errors before we + // touch ClickHouse. + const validation = validateMetricSelectItems(rawSelect); + if (validation) return validation; + + // Default valueExpression="Value" for metric items BEFORE we call + // buildTile, because the external dashboard tile schema's + // superRefine rejects non-count aggregations with empty + // valueExpression and agents normally omit the field on metric + // queries. + const select = applyMetricSelectDefaults(rawSelect); + // Auto-upgrade shape when select has multiple items but shape is // single-value (number/pie). This is the #1 Zod error class from agents. let displayType: 'table' | 'number' | 'pie' = input.shape; if ( (displayType === 'number' || displayType === 'pie') && - input.select.length > 1 + select.length > 1 ) { displayType = 'table'; } @@ -197,11 +234,7 @@ export function registerTable(server: McpServer, context: McpContext) { // of the aggCondition for every metric. Table/line/number/pie display // types don't have a chart-level where — filtering is per-select-item. const { items: selectItems, warnings: mergeWarnings } = - mergeWhereIntoSelectItems( - input.select, - input.where, - input.whereLanguage, - ); + mergeWhereIntoSelectItems(select, input.where, input.whereLanguage); const tile = buildTile('MCP Table', 12, 4, { displayType, @@ -230,6 +263,10 @@ export function registerTable(server: McpServer, context: McpContext) { } } + // Surface the increase+groupBy top-N cap so the agent knows results + // may be truncated to 20 groups. + annotateIncreaseTopNHint(result, select, input.groupBy); + return result; }), ); diff --git a/packages/api/src/mcp/tools/query/timeseries.ts b/packages/api/src/mcp/tools/query/timeseries.ts index 283effb48a..0f80041da5 100644 --- a/packages/api/src/mcp/tools/query/timeseries.ts +++ b/packages/api/src/mcp/tools/query/timeseries.ts @@ -4,18 +4,23 @@ import { z } from 'zod'; import { withToolTracing } from '../../utils/tracing'; import type { McpContext } from '../types'; import { + annotateIncreaseTopNHint, + appendHint, buildTile, mergeWhereIntoSelectItems, parseTimeRange, runConfigTile, } from './helpers'; import { + applyMetricSelectDefaults, endTimeSchema, groupBySchema, + McpSelectItem, mcpSelectItemSchema, orderBySchema, sourceIdSchema, startTimeSchema, + validateMetricSelectItems, whereLanguageSchema, whereSchema, } from './schemas'; @@ -79,7 +84,20 @@ export function registerTimeseries(server: McpServer, context: McpContext) { 'Requires sourceId — call clickstack_list_sources then clickstack_describe_source first. ' + 'Each select item defines one plotted series.\n\n' + 'Column naming: top-level columns are PascalCase (Duration, StatusCode). ' + - "Map attributes use bracket syntax: SpanAttributes['http.method'].", + "Map attributes use bracket syntax: SpanAttributes['http.method'].\n\n" + + '── METRIC SOURCES ──\n' + + 'When sourceId is a metric source, each select item MUST set ' + + 'metricType ("gauge"|"sum"|"histogram") and metricName (the OTel metric name). ' + + 'valueExpression defaults to "Value" — set it explicitly only to transform the value.\n' + + 'Discovery: clickstack_describe_source returns a per-kind metric-name sample; ' + + 'clickstack_list_metrics paginates the full catalog; clickstack_describe_metric ' + + 'returns attribute keys + sampled values for a single metric.\n' + + 'Per kind: gauge uses last_value/avg/min/max (or aggFn:any + isDelta:true for Prometheus-style delta); ' + + 'sum uses aggFn:"increase" for the counter increase, or sum/avg on the computed rate; ' + + 'histogram uses aggFn:"quantile" + level for percentiles, or aggFn:"count" for total bucket count.\n' + + 'TOP-N CAP: aggFn:"increase" + groupBy is capped at 20 groups by the renderer ' + + '(top by max bucket sum). Narrow with where/groupBy to see other groups.\n' + + 'summary and exponential histogram kinds are not supported by the query renderer yet.', inputSchema: timeseriesSchema, }, withToolTracing('clickstack_timeseries', context, async input => { @@ -92,14 +110,28 @@ export function registerTimeseries(server: McpServer, context: McpContext) { } const { startDate, endDate } = timeRange; + // Cast to the concrete `McpSelectItem[]` because Zod 3.x widens + // optional-field inference at the MCP-SDK tool boundary; the + // runtime parser still produces the correct shape. + const rawSelect = input.select as McpSelectItem[]; + + // Validate cross-field constraints (metric rules, level/quantile, + // valueExpression presence) and surface friendly errors before we + // touch ClickHouse. + const validation = validateMetricSelectItems(rawSelect); + if (validation) return validation; + + // Default valueExpression="Value" for metric items BEFORE we call + // buildTile, because the external dashboard tile schema's + // superRefine rejects non-count aggregations with empty + // valueExpression and agents normally omit the field on metric + // queries. + const select = applyMetricSelectDefaults(rawSelect); + // Inject top-level where into each select item (timeseries uses // per-select-item aggCondition, not chart-level where) const { items: selectItems, warnings: mergeWarnings } = - mergeWhereIntoSelectItems( - input.select, - input.where, - input.whereLanguage, - ); + mergeWhereIntoSelectItems(select, input.where, input.whereLanguage); const tile = buildTile('MCP Timeseries', 12, 4, { displayType: input.shape, @@ -129,6 +161,10 @@ export function registerTimeseries(server: McpServer, context: McpContext) { } } + // Surface the increase+groupBy top-N cap so the agent knows results + // may be truncated to 20 groups. + annotateIncreaseTopNHint(result, select, input.groupBy); + // Detect single-bucket collapse: when a timeseries query returns only // 1 row, the data likely collapsed into a single time bucket. Add a // hint so the agent knows to adjust the time range. @@ -140,12 +176,14 @@ export function registerTimeseries(server: McpServer, context: McpContext) { const parsed = JSON.parse(result.content[0].text); const data = parsed?.result?.data; if (Array.isArray(data) && data.length === 1 && input.granularity) { - parsed.hint = + appendHint( + parsed, `Timeseries returned only 1 time bucket. ` + - `All data may have collapsed into a single "${input.granularity}" bucket. ` + - `The queried range was ${startDate.toISOString()} to ${endDate.toISOString()}. ` + - `If this looks wrong, adjust startTime/endTime to match the actual data range, ` + - `or try a coarser granularity.`; + `All data may have collapsed into a single "${input.granularity}" bucket. ` + + `The queried range was ${startDate.toISOString()} to ${endDate.toISOString()}. ` + + `If this looks wrong, adjust startTime/endTime to match the actual data range, ` + + `or try a coarser granularity.`, + ); result.content[0].text = JSON.stringify(parsed, null, 2); } } catch { diff --git a/packages/api/src/mcp/tools/sources/describeMetric.ts b/packages/api/src/mcp/tools/sources/describeMetric.ts new file mode 100644 index 0000000000..d481652c46 --- /dev/null +++ b/packages/api/src/mcp/tools/sources/describeMetric.ts @@ -0,0 +1,783 @@ +import { + chSql, + concatChSql, + convertCHDataTypeToJSType, + filterColumnMetaByType, + JSDataType, + tableExpr, +} from '@hyperdx/common-utils/dist/clickhouse'; +import { ClickhouseClient } from '@hyperdx/common-utils/dist/clickhouse/node'; +import { getMetadata } from '@hyperdx/common-utils/dist/core/metadata'; +import { SourceKind } from '@hyperdx/common-utils/dist/types'; +import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; +import { z } from 'zod'; + +import { getConnectionById } from '@/controllers/connection'; +import { getSource } from '@/controllers/sources'; +import logger from '@/utils/logger'; +import { trimToolResponse } from '@/utils/trimToolResponse'; + +import { withToolTracing } from '../../utils/tracing'; +import type { McpContext } from '../types'; +import { + QUERYABLE_METRIC_KINDS, + type QueryableMetricKind, +} from './metricKinds'; + +const DEFAULT_LOOKBACK_MS = 24 * 60 * 60 * 1000; +const DESCRIBE_TIMEOUT_MS = 10_000; + +// Server-side safety nets for the attribute-keys discovery query. +// Sample at most N rows that match (MetricName, time range), then +// aggregate from that sample. ClickHouse can stop scanning once the +// CTE has N matching rows so a hot metric does not starve the +// wall-clock budget. 100k rows is plenty to surface every unique map +// key on a healthy OTel metric. +const METRIC_ATTR_KEYS_SAMPLE_SIZE = 100_000; +const METRIC_ATTR_KEYS_MAX_EXEC_SECONDS = 8; + +// Max sampled values per attribute key (when sampleValues is true). +const MAX_ATTR_VALUES = 10; +// Cap on the number of attribute keys we sample values for per kind to +// avoid runaway fan-out on high-cardinality metrics. +const MAX_ATTR_KEYS_TO_SAMPLE = 12; + +// Per-kind aggregation guidance baked into the response so the agent can +// build a valid clickstack_timeseries / clickstack_table call without +// re-reading the schemas. +const KIND_USAGE: Record = { + gauge: + 'Gauge: use aggFn:"last_value"|"avg"|"min"|"max" on Value. Set isDelta:true for Prometheus-style delta over each bucket.', + sum: 'Sum (counter): use aggFn:"increase" for the per-bucket counter increase (reset-aware), or aggFn:"sum"/"avg" on the computed rate. increase+groupBy is capped at the top 20 groups.', + histogram: + 'Histogram: use aggFn:"quantile" with level ∈ {0.5, 0.9, 0.95, 0.99} for percentiles, or aggFn:"count" for the total bucket count.', +}; + +// ─── Schema ────────────────────────────────────────────────────────────────── + +const describeMetricSchema = z.object({ + sourceId: z + .string() + .describe( + 'Source ID. Must reference a metric source — get IDs from clickstack_list_sources.', + ), + metricName: z + .string() + .min(1) + .describe( + 'OTel metric name to describe (e.g. "system.cpu.utilization", "http.server.request.duration"). ' + + 'Discover via clickstack_list_metrics or clickstack_describe_source.', + ), + kind: z + .enum(QUERYABLE_METRIC_KINDS) + .describe( + 'Metric kind: "gauge" | "sum" | "histogram". Required. ' + + 'Discover via clickstack_list_metrics (which returns name + kind per entry) ' + + 'or clickstack_describe_source (which groups metric-name samples by kind). ' + + 'A metric name can legitimately live in more than one kind (e.g. ' + + '"container.cpu.usage" appears in both gauge and sum) — pick the kind ' + + 'matching the value you want to query.', + ), + startTime: z + .string() + .optional() + .describe( + 'Restrict discovery to data points after this ISO 8601 timestamp. Default: 24 hours before endTime (or now).', + ), + endTime: z + .string() + .optional() + .describe('End of the discovery window as ISO 8601. Default: now.'), + sampleValues: z + .boolean() + .optional() + .default(true) + .describe( + 'When true (default), sample up to ' + + `${MAX_ATTR_VALUES} distinct values per attribute key for the top ${MAX_ATTR_KEYS_TO_SAMPLE} attributes. ` + + 'Set false to skip value sampling for faster responses on high-cardinality metrics.', + ), +}); + +// ─── Helpers ───────────────────────────────────────────────────────────────── + +type AttributeValuesMeta = { + /** Display names (`MapColumn['key']`) queried for value samples. */ + sampledKeys: string[]; + /** Display names skipped because the MAX_ATTR_KEYS_TO_SAMPLE cap fired. */ + truncatedKeys: string[]; +}; + +type KindDetail = { + kind: QueryableMetricKind; + tableName: string; + unit?: string; + description?: string; + attributeKeys: Record; + attributeValues?: Record; + attributeValuesMeta?: AttributeValuesMeta; + usage: string; +}; + +/** + * Discriminated result for the discovery sub-queries so the caller can + * distinguish "fetch failed" from "genuinely empty" — the two cases + * need different agent guidance (retry/report vs. widen the window). + */ +type FetchResult = { ok: true; data: T } | { ok: false; error: string }; + +/** + * Compact an error for inclusion in a tool response: single line, + * capped length, no stack frames. + */ +function sanitizeFetchError(e: unknown): string { + const message = e instanceof Error ? e.message : String(e); + return message.replace(/\s+/g, ' ').trim().slice(0, 200); +} + +function parseTimeRange( + startTime?: string, + endTime?: string, +): { error: string } | { startDate: Date; endDate: Date } { + const endDate = endTime ? new Date(endTime) : new Date(); + const startDate = startTime + ? new Date(startTime) + : new Date(endDate.getTime() - DEFAULT_LOOKBACK_MS); + if (isNaN(endDate.getTime()) || isNaN(startDate.getTime())) { + return { + error: 'Invalid startTime or endTime: must be valid ISO 8601 strings', + }; + } + if (startDate >= endDate) { + return { error: 'endTime must be greater than startTime' }; + } + return { startDate, endDate }; +} + +/** + * Fetch unit and description for a metric name on a single kind table. + * Returns undefined values when the columns are absent from the schema. + */ +async function fetchUnitAndDescription({ + clickhouseClient, + databaseName, + tableName, + connectionId, + metricName, + startDate, + endDate, + hasUnit, + hasDescription, + signal, +}: { + clickhouseClient: ClickhouseClient; + databaseName: string; + tableName: string; + connectionId: string; + metricName: string; + startDate: Date; + endDate: Date; + hasUnit: boolean; + hasDescription: boolean; + signal: AbortSignal; +}): Promise<{ unit?: string; description?: string }> { + if (!hasUnit && !hasDescription) return {}; + const projections = [ + ...(hasUnit + ? [chSql`anyLast(${{ Identifier: 'MetricUnit' }}) AS MetricUnit`] + : []), + ...(hasDescription + ? [ + chSql`anyLast(${{ Identifier: 'MetricDescription' }}) AS MetricDescription`, + ] + : []), + ]; + const sql = chSql` + SELECT ${concatChSql(', ', projections)} + FROM ${tableExpr({ database: databaseName, table: tableName })} + WHERE MetricName = ${{ String: metricName }} + AND TimeUnix >= fromUnixTimestamp64Milli(${{ Int64: startDate.getTime() }}) + AND TimeUnix <= fromUnixTimestamp64Milli(${{ Int64: endDate.getTime() }}) + `; + try { + const response = await clickhouseClient.query<'JSON'>({ + query: sql.sql, + query_params: sql.params, + format: 'JSON', + connectionId, + abort_signal: signal, + }); + const result = (await response.json()) as { + data: Array<{ MetricUnit?: string; MetricDescription?: string }>; + }; + const row = result.data[0]; + if (!row) return {}; + return { + ...(row.MetricUnit ? { unit: row.MetricUnit } : {}), + ...(row.MetricDescription ? { description: row.MetricDescription } : {}), + }; + } catch (e) { + logger.warn( + { tableName, error: e instanceof Error ? e.message : String(e) }, + 'fetchUnitAndDescription failed', + ); + return {}; + } +} + +/** + * Discover the distinct attribute keys present for a single metric + * name on a single kind's table, grouped by the Map column they live on + * (typically ResourceAttributes / Attributes / ScopeAttributes on the + * OTel Collector default schema). Issues one query per Map column with + * `mapKeys(col) AS keys` and aggregates the distinct keys server-side. + * + * Bounds the scan two ways so it stays fast on production-shaped metric + * tables (where a single MetricName can match millions of rows): + * - WHERE TimeUnix BETWEEN startDate AND endDate scopes to the + * discovery window the caller already passed. + * - max_rows_to_read caps the per-query scan server-side so a hot + * metric cannot starve the wall-clock budget on its own. + * + * The earlier inline SQL did neither — an unbounded + * `WHERE MetricName = ?` scan consistently exceeded the wall-clock + * timeout on production tables. + */ +async function fetchAttributeKeys({ + clickhouseClient, + databaseName, + tableName, + connectionId, + metricName, + columns, + startDate, + endDate, + signal, +}: { + clickhouseClient: ClickhouseClient; + databaseName: string; + tableName: string; + connectionId: string; + metricName: string; + columns: { name: string; type: string }[]; + startDate: Date; + endDate: Date; + signal: AbortSignal; +}): Promise>> { + const mapColumns = + filterColumnMetaByType(columns, [JSDataType.Map])?.filter( + c => convertCHDataTypeToJSType(c.type) === JSDataType.Map, + ) ?? []; + if (mapColumns.length === 0) return { ok: true, data: {} }; + + const sampleProjections = mapColumns.map( + col => chSql`${{ Identifier: col.name }}`, + ); + + const aggProjections = mapColumns.map( + col => + chSql`arrayDistinct(arrayFlatten(groupArray(mapKeys(${{ Identifier: col.name }})))) AS ${{ Identifier: col.name }}`, + ); + + // Aggregate from a bounded sample of matching rows. The inner LIMIT + // lets ClickHouse stop scanning once it has SAMPLE_SIZE rows that + // match (MetricName, time range), which keeps the query fast on hot + // metric tables. ResourceAttributes / Attributes / ScopeAttributes + // are rarely keyed independently per row, so 100k rows surfaces + // every realistic key set. + const sql = chSql` + SELECT ${concatChSql(', ', aggProjections)} + FROM ( + SELECT ${concatChSql(', ', sampleProjections)} + FROM ${tableExpr({ database: databaseName, table: tableName })} + WHERE MetricName = ${{ String: metricName }} + AND TimeUnix >= fromUnixTimestamp64Milli(${{ Int64: startDate.getTime() }}) + AND TimeUnix <= fromUnixTimestamp64Milli(${{ Int64: endDate.getTime() }}) + LIMIT ${{ Int32: METRIC_ATTR_KEYS_SAMPLE_SIZE }} + ) + `; + + try { + const response = await clickhouseClient.query<'JSON'>({ + query: sql.sql, + query_params: sql.params, + format: 'JSON', + connectionId, + clickhouse_settings: { + max_execution_time: METRIC_ATTR_KEYS_MAX_EXEC_SECONDS, + timeout_overflow_mode: 'break', + }, + abort_signal: signal, + }); + const result = (await response.json()) as { + data: Array>; + }; + const row = result.data[0]; + if (!row) return { ok: true, data: {} }; + const out: Record = {}; + for (const col of mapColumns) { + const keys = row[col.name]; + if (Array.isArray(keys) && keys.length > 0) { + out[col.name] = keys.filter(k => typeof k === 'string' && k.length > 0); + } + } + return { ok: true, data: out }; + } catch (e) { + logger.warn( + { tableName, error: e instanceof Error ? e.message : String(e) }, + 'fetchAttributeKeys failed', + ); + return { ok: false, error: sanitizeFetchError(e) }; + } +} + +type SampleAttributeValuesData = { + /** Keys with at least one non-empty sampled value. */ + values: Record; + meta: AttributeValuesMeta; +}; + +/** + * Sample distinct values per attribute key. Uses one composed + * groupArray-per-key query so all keys are fetched in a single round + * trip. + * + * The returned meta records which keys were actually queried + * (`sampledKeys`) vs. skipped by the MAX_ATTR_KEYS_TO_SAMPLE cap + * (`truncatedKeys`) so callers can distinguish "not sampled" from + * "sampled but empty in the window". + */ +async function sampleAttributeValues({ + clickhouseClient, + databaseName, + tableName, + connectionId, + metricName, + attributeKeys, + startDate, + endDate, + signal, +}: { + clickhouseClient: ClickhouseClient; + databaseName: string; + tableName: string; + connectionId: string; + metricName: string; + // attributeKeys is shape { mapColumn: keyName[] } e.g. { Attributes: ['http.method', 'http.route'] } + attributeKeys: Record; + startDate: Date; + endDate: Date; + signal: AbortSignal; +}): Promise> { + const flatKeyExprs: { display: string; mapColumn: string; key: string }[] = + []; + const truncatedKeys: string[] = []; + for (const [mapColumn, keys] of Object.entries(attributeKeys)) { + for (const key of keys) { + const display = `${mapColumn}['${key}']`; + if (flatKeyExprs.length >= MAX_ATTR_KEYS_TO_SAMPLE) { + truncatedKeys.push(display); + continue; + } + flatKeyExprs.push({ display, mapColumn, key }); + } + } + const meta: AttributeValuesMeta = { + sampledKeys: flatKeyExprs.map(e => e.display), + truncatedKeys, + }; + if (flatKeyExprs.length === 0) { + return { ok: true, data: { values: {}, meta } }; + } + + const projections = flatKeyExprs.map( + ({ mapColumn, key }, idx) => chSql` + groupUniqArray(${{ Int32: MAX_ATTR_VALUES }})( + ${{ Identifier: mapColumn }}[${{ String: key }}] + ) AS param${{ UNSAFE_RAW_SQL: String(idx) }} + `, + ); + + // Distinct set of map columns we need from each sampled row, so the + // inner subquery only projects what's required for the outer + // groupUniqArray aggregates. + const sampledMapColumns = Array.from( + new Set(flatKeyExprs.map(e => e.mapColumn)), + ); + const sampleProjections = sampledMapColumns.map( + col => chSql`${{ Identifier: col }}`, + ); + + // Aggregate from a bounded sample of matching rows. Mirrors + // fetchAttributeKeys: the inner LIMIT lets ClickHouse stop scanning + // once it has SAMPLE_SIZE rows that match (MetricName, time range), + // and the matching clickhouse_settings cap server-side execution so a + // hot metric cannot starve the wall-clock budget. + const sql = chSql` + SELECT ${concatChSql(', ', projections)} + FROM ( + SELECT ${concatChSql(', ', sampleProjections)} + FROM ${tableExpr({ database: databaseName, table: tableName })} + WHERE MetricName = ${{ String: metricName }} + AND TimeUnix >= fromUnixTimestamp64Milli(${{ Int64: startDate.getTime() }}) + AND TimeUnix <= fromUnixTimestamp64Milli(${{ Int64: endDate.getTime() }}) + LIMIT ${{ Int32: METRIC_ATTR_KEYS_SAMPLE_SIZE }} + ) + `; + + try { + const response = await clickhouseClient.query<'JSON'>({ + query: sql.sql, + query_params: sql.params, + format: 'JSON', + connectionId, + clickhouse_settings: { + max_execution_time: METRIC_ATTR_KEYS_MAX_EXEC_SECONDS, + timeout_overflow_mode: 'break', + }, + abort_signal: signal, + }); + const result = (await response.json()) as { + data: Array>; + }; + const row = result.data[0]; + if (!row) return { ok: true, data: { values: {}, meta } }; + const values: Record = {}; + flatKeyExprs.forEach((keyExpr, idx) => { + const sample = (row[`param${idx}`] ?? []).filter(v => v !== ''); + if (sample.length > 0) { + values[keyExpr.display] = sample; + } + }); + return { ok: true, data: { values, meta } }; + } catch (e) { + logger.warn( + { tableName, error: e instanceof Error ? e.message : String(e) }, + 'sampleAttributeValues failed', + ); + return { ok: false, error: sanitizeFetchError(e) }; + } +} + +async function describeMetricImpl( + teamId: string, + input: z.infer, + signal: AbortSignal, +) { + const source = await getSource(teamId, input.sourceId); + if (!source) { + return { + isError: true, + content: [ + { + type: 'text' as const, + text: `Source "${input.sourceId}" not found. Call clickstack_list_sources to see available source IDs.`, + }, + ], + }; + } + if (source.kind !== SourceKind.Metric) { + return { + isError: true, + content: [ + { + type: 'text' as const, + text: `Source "${input.sourceId}" is a "${source.kind}" source, not a metric source. clickstack_describe_metric only works on metric sources.`, + }, + ], + }; + } + + const timeRange = parseTimeRange(input.startTime, input.endTime); + if ('error' in timeRange) { + return { + isError: true, + content: [{ type: 'text' as const, text: timeRange.error }], + }; + } + const { startDate, endDate } = timeRange; + + const connection = await getConnectionById( + teamId, + source.connection.toString(), + true, + ); + if (!connection) { + return { + isError: true, + content: [ + { + type: 'text' as const, + text: `Connection not found for source "${input.sourceId}".`, + }, + ], + }; + } + + const clickhouseClient = new ClickhouseClient({ + host: connection.host, + username: connection.username, + password: connection.password, + }); + const metadata = getMetadata(clickhouseClient); + const databaseName = source.from.databaseName; + const connectionId = source.connection.toString(); + + // Validate the requested kind has a populated table on this source. + // The schema requires `kind`, so we always have an exact target. + const kind = input.kind; + const tableName = source.metricTables[kind]; + if (!tableName) { + return { + isError: true, + content: [ + { + type: 'text' as const, + text: `Source "${input.sourceId}" has no "${kind}" metric table populated. Populated kinds: ${Object.keys(source.metricTables).join(', ')}.`, + }, + ], + }; + } + + // Defensive column-presence check before referencing MetricUnit / + // MetricDescription on this kind's table. + let columns: { name: string; type: string }[]; + try { + columns = await metadata.getColumns({ + databaseName, + tableName, + connectionId, + }); + } catch (e) { + logger.warn( + { kind, error: e instanceof Error ? e.message : String(e) }, + 'describeMetric: getColumns failed', + ); + return { + isError: true, + content: [ + { + type: 'text' as const, + text: `Failed to load columns for "${tableName}". The metric table may be missing or unreachable.`, + }, + ], + }; + } + const columnNames = new Set(columns.map(c => c.name)); + const hasUnit = columnNames.has('MetricUnit'); + const hasDescription = columnNames.has('MetricDescription'); + + const [meta, attributeKeysResult] = await Promise.all([ + fetchUnitAndDescription({ + clickhouseClient, + databaseName, + tableName, + connectionId, + metricName: input.metricName, + startDate, + endDate, + hasUnit, + hasDescription, + signal, + }), + fetchAttributeKeys({ + clickhouseClient, + databaseName, + tableName, + connectionId, + metricName: input.metricName, + columns, + startDate, + endDate, + signal, + }), + ]); + + // Track discovery sub-queries that failed so the agent can distinguish + // "fetch failed" (retry / report) from "genuinely empty" (widen the + // window). Without this, transient ClickHouse errors surfaced as the + // misleading "No data found" hint. + const partialFailure: { stage: string; error: string }[] = []; + const attributeKeys = attributeKeysResult.ok ? attributeKeysResult.data : {}; + if (!attributeKeysResult.ok) { + partialFailure.push({ + stage: 'attributeKeys', + error: attributeKeysResult.error, + }); + } + + const kindDetail: KindDetail = { + kind, + tableName, + ...(meta.unit ? { unit: meta.unit } : {}), + ...(meta.description ? { description: meta.description } : {}), + attributeKeys, + usage: KIND_USAGE[kind], + }; + + if (input.sampleValues && Object.keys(attributeKeys).length > 0) { + const valuesResult = await sampleAttributeValues({ + clickhouseClient, + databaseName, + tableName, + connectionId, + metricName: input.metricName, + attributeKeys, + startDate, + endDate, + signal, + }); + if (valuesResult.ok) { + kindDetail.attributeValues = valuesResult.data.values; + // sampledKeys / truncatedKeys let the agent tell "key absent + // because we never queried it" (truncated) apart from "queried + // but empty in the window" (in sampledKeys, missing from values). + kindDetail.attributeValuesMeta = valuesResult.data.meta; + } else { + partialFailure.push({ + stage: 'attributeValues', + error: valuesResult.error, + }); + } + } + + const kindDetails: KindDetail[] = [kindDetail]; + + // Heuristic "no data in this kind" hint: when neither attribute keys, + // unit, nor description came back, the (metric, kind) pair likely has + // no data in the requested window. The agent's recourse is to widen + // startTime/endTime or double-check the kind via clickstack_list_metrics. + // Suppressed when any discovery stage failed — an empty attributeKeys + // can't be trusted as "no data" then. + const noSignal = + partialFailure.length === 0 && + Object.keys(attributeKeys).length === 0 && + !meta.unit && + !meta.description; + + const queryExample = `clickstack_timeseries({ sourceId: "${input.sourceId}", select: [{ aggFn: ${ + kind === 'sum' + ? '"increase"' + : kind === 'histogram' + ? '"quantile", level: 0.95' + : '"avg"' + }, metricType: "${kind}", metricName: "${input.metricName}" }] })`; + + const responseObj: Record = { + metricName: input.metricName, + kinds: kindDetails, + ...(partialFailure.length > 0 && { + partialFailure, + hint: + 'Some discovery queries failed — the fields above may be incomplete. ' + + 'Retry the call; if the failure persists, narrow startTime/endTime or set sampleValues:false.', + }), + ...(noSignal && { + hint: + `No data found for MetricName "${input.metricName}" with kind "${kind}" ` + + `between ${startDate.toISOString()} and ${endDate.toISOString()}. ` + + 'Try widening startTime/endTime, or call clickstack_list_metrics to ' + + 'confirm the metric name + kind combination exists.', + }), + nextSteps: { + query: `Example: ${queryExample}`, + }, + }; + + const { data: trimmed, isTrimmed } = trimToolResponse(responseObj); + return { + content: [ + { + type: 'text' as const, + text: JSON.stringify( + isTrimmed + ? { + ...trimmed, + note: 'Result was trimmed for context size. Set sampleValues:false or narrow startTime/endTime.', + } + : trimmed, + null, + 2, + ), + }, + ], + }; +} + +// ─── Tool registration ─────────────────────────────────────────────────────── + +export function registerDescribeMetric( + server: McpServer, + context: McpContext, +): void { + const { teamId } = context; + + server.registerTool( + 'clickstack_describe_metric', + { + title: 'Describe Metric', + description: + 'DRILL-DOWN: Use after clickstack_list_metrics (or after a clickstack_describe_source ' + + 'sample) to get attribute keys, sampled values, unit, and description for a ' + + 'specific (metricName, kind) pair. Attribute keys vary per metric — not per source — ' + + "so always call this before clickstack_timeseries / clickstack_table for any metric you've never queried.\n\n" + + 'REQUIRES `kind` — pass the gauge/sum/histogram value emitted alongside the metric name by ' + + 'clickstack_list_metrics or clickstack_describe_source. A metric name can legitimately ' + + 'live in more than one kind (e.g. "container.cpu.usage" appears in both gauge and sum); ' + + 'call this tool once per kind you care about.\n\n' + + 'attributeValuesMeta on each kind reports sampledKeys (queried for values) and ' + + 'truncatedKeys (skipped by the per-call sampling cap) — a key in truncatedKeys was ' + + 'never queried, so query it directly if you need its values.\n\n' + + 'Workflow: clickstack_list_sources → clickstack_list_metrics → ' + + 'clickstack_describe_metric → clickstack_timeseries|clickstack_table.', + inputSchema: describeMetricSchema, + }, + withToolTracing('clickstack_describe_metric', context, async rawInput => { + // Re-parse explicitly: the MCP SDK callback signature widens + // optional-field types into `unknown`, but the parser produces + // the typed shape we need for downstream calls. + const input = describeMetricSchema.parse(rawInput); + const controller = new AbortController(); + // Hoist the timer handle so the finally block can cancel it on the + // success path — otherwise a stale controller.abort() fires + // DESCRIBE_TIMEOUT_MS after every successful call and the + // setTimeout closure stays pinned for the same duration. + let timeoutId: ReturnType | undefined; + const timeoutPromise = new Promise((_, reject) => { + timeoutId = setTimeout(() => { + controller.abort(); + reject(new Error('DESCRIBE_METRIC_TIMEOUT')); + }, DESCRIBE_TIMEOUT_MS); + }); + try { + return await Promise.race([ + describeMetricImpl(teamId.toString(), input, controller.signal), + timeoutPromise, + ]); + } catch (e) { + if (e instanceof Error && e.message === 'DESCRIBE_METRIC_TIMEOUT') { + logger.warn( + { teamId, sourceId: input.sourceId, metricName: input.metricName }, + 'clickstack_describe_metric timed out', + ); + return { + isError: true as const, + content: [ + { + type: 'text' as const, + text: + 'Discovery timed out. The metric table may be under load or the ' + + 'attribute set may be very high-cardinality. Try narrowing ' + + 'startTime/endTime or setting sampleValues:false to skip the ' + + 'value-sampling stage.', + }, + ], + }; + } + throw e; + } finally { + if (timeoutId !== undefined) clearTimeout(timeoutId); + } + }), + ); +} diff --git a/packages/api/src/mcp/tools/sources/describeSource.ts b/packages/api/src/mcp/tools/sources/describeSource.ts index 5674c7e2cf..757cccf6bd 100644 --- a/packages/api/src/mcp/tools/sources/describeSource.ts +++ b/packages/api/src/mcp/tools/sources/describeSource.ts @@ -1,11 +1,14 @@ import { + chSql, + concatChSql, convertCHDataTypeToJSType, filterColumnMetaByType, JSDataType, + tableExpr, } from '@hyperdx/common-utils/dist/clickhouse'; import { ClickhouseClient } from '@hyperdx/common-utils/dist/clickhouse/node'; import { getMetadata } from '@hyperdx/common-utils/dist/core/metadata'; -import { SourceKind } from '@hyperdx/common-utils/dist/types'; +import { type MetricTable, SourceKind } from '@hyperdx/common-utils/dist/types'; import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { z } from 'zod'; @@ -16,6 +19,11 @@ import { trimToolResponse } from '@/utils/trimToolResponse'; import { withToolTracing } from '../../utils/tracing'; import type { McpContext } from '../types'; +import { + QUERYABLE_METRIC_KINDS, + type QueryableMetricKind, + sanitizeMetricTables, +} from './metricKinds'; // How far back to look when querying the rollup tables for value samples. const VALUE_SAMPLE_LOOKBACK_MS = 24 * 60 * 60 * 1000; // 24 hours @@ -28,6 +36,200 @@ const MAX_LC_VALUES = 20; const MAX_MAP_KEY_VALUES = 5; const MAX_MAP_KEYS_TO_SAMPLE = 10; +// Max MetricName values returned per metric kind by the starter sample. +// clickstack_list_metrics provides paginated discovery beyond this cap. +const MAX_METRIC_NAMES_PER_KIND = 20; + +/** + * Pick the representative metric table to use as the starting point for + * schema/attribute discovery on a metric source. Prefers gauge → sum → + * histogram from the source's populated metricTables map. Returns the + * ClickHouse table name, or undefined when no queryable metric table is + * populated. + */ +function pickRepresentativeMetricTable( + metricTables: MetricTable, +): { kind: QueryableMetricKind; tableName: string } | undefined { + for (const kind of QUERYABLE_METRIC_KINDS) { + const tableName = metricTables[kind]; + if (tableName) { + return { kind, tableName }; + } + } + return undefined; +} + +type MetricNameSample = { + name: string; + unit?: string; + description?: string; +}; + +/** + * Sample distinct MetricName values for a single metric kind. Optionally + * enriches each name with MetricUnit / MetricDescription when those + * columns are present on the table (the OTel Collector default schema + * includes them; custom schemas may not). + */ +async function sampleMetricNamesForKind({ + metadata, + clickhouseClient, + databaseName, + tableName, + connectionId, + dateRange, + timestampValueExpression, + signal, + cachedColumns, +}: { + metadata: ReturnType; + clickhouseClient: ClickhouseClient; + databaseName: string; + tableName: string; + connectionId: string; + dateRange: [Date, Date]; + timestampValueExpression: string; + signal: AbortSignal; + cachedColumns?: { name: string }[]; +}): Promise { + // Defensive column presence check for MetricUnit / MetricDescription. + const kindColumns = + cachedColumns ?? + (await metadata.getColumns({ databaseName, tableName, connectionId })); + const columnNames = new Set(kindColumns.map(c => c.name)); + const hasUnit = columnNames.has('MetricUnit'); + const hasDescription = columnNames.has('MetricDescription'); + + // First fetch the distinct metric names; this is the only step that + // strictly needs to succeed for the kind to appear in the response. + // Pass timestampValueExpression so the no-rollup fallback path scopes + // its scan to dateRange instead of going unbounded against the raw + // metric table on cold cache. + const nameResults = await metadata.getAllKeyValues({ + databaseName, + tableName, + keyExpressions: ['MetricName'], + maxValuesPerKey: MAX_METRIC_NAMES_PER_KIND, + connectionId, + dateRange, + timestampValueExpression, + signal, + }); + const names = nameResults[0]?.value ?? []; + if (names.length === 0) return []; + + // Best-effort enrichment with unit + description. One small query + // returns one row per metric name with the most-recent unit / desc. + let enrichments: Record = {}; + if ((hasUnit || hasDescription) && !signal.aborted) { + try { + enrichments = await fetchMetricNameEnrichments({ + clickhouseClient, + databaseName, + tableName, + connectionId, + names, + dateRange, + hasUnit, + hasDescription, + signal, + }); + } catch (e) { + logger.warn( + { databaseName, tableName, error: e }, + 'Failed to enrich metric names with unit/description', + ); + } + } + + return names.map(name => { + const enrichment = enrichments[name] ?? {}; + const sample: MetricNameSample = { name }; + if (enrichment.unit) sample.unit = enrichment.unit; + if (enrichment.description) sample.description = enrichment.description; + return sample; + }); +} + +/** + * Fetch MetricUnit and MetricDescription for a batch of metric names. + * Uses `anyLast` so the most-recent value wins when a metric has changed + * unit/description over time. + */ +async function fetchMetricNameEnrichments({ + clickhouseClient, + databaseName, + tableName, + connectionId, + names, + dateRange, + hasUnit, + hasDescription, + signal, +}: { + clickhouseClient: ClickhouseClient; + databaseName: string; + tableName: string; + connectionId: string; + names: string[]; + dateRange: [Date, Date]; + hasUnit: boolean; + hasDescription: boolean; + signal: AbortSignal; +}): Promise> { + // Build the projection fragments via the parameterised chSql DSL so + // identifiers are quoted and the unit/description columns only appear + // when present on the source table. + const projections = [ + chSql`MetricName`, + ...(hasUnit + ? [chSql`anyLast(${{ Identifier: 'MetricUnit' }}) AS MetricUnit`] + : []), + ...(hasDescription + ? [ + chSql`anyLast(${{ Identifier: 'MetricDescription' }}) AS MetricDescription`, + ] + : []), + ]; + const namePlaceholders = concatChSql( + ',', + names.map(name => chSql`${{ String: name }}`), + ); + const sql = chSql` + SELECT ${concatChSql(', ', projections)} + FROM ${tableExpr({ database: databaseName, table: tableName })} + WHERE MetricName IN (${namePlaceholders}) + AND TimeUnix >= fromUnixTimestamp64Milli(${{ Int64: dateRange[0].getTime() }}) + AND TimeUnix <= fromUnixTimestamp64Milli(${{ Int64: dateRange[1].getTime() }}) + GROUP BY MetricName + `; + + type EnrichmentRow = { + MetricName: string; + MetricUnit?: string; + MetricDescription?: string; + }; + + const response = await clickhouseClient.query<'JSON'>({ + query: sql.sql, + query_params: sql.params, + format: 'JSON', + connectionId, + abort_signal: signal, + }); + const result = (await response.json()) as { data: EnrichmentRow[] }; + + const enrichments: Record = + {}; + for (const row of result.data) { + enrichments[row.MetricName] = { + ...(row.MetricUnit ? { unit: row.MetricUnit } : {}), + ...(row.MetricDescription ? { description: row.MetricDescription } : {}), + }; + } + return enrichments; +} + /** * Core schema-discovery logic. Extracted so the caller can wrap it in * Promise.race for wall-clock timeout enforcement. @@ -72,6 +274,9 @@ async function describeSourceSchema( } // Key columns by source kind + let representativeMetric: + | { kind: QueryableMetricKind; tableName: string } + | undefined; if (source.kind === SourceKind.Trace) { meta.keyColumns = { spanName: source.spanNameExpression, @@ -90,11 +295,29 @@ async function describeSourceSchema( traceId: source.traceIdExpression, }; } else if (source.kind === SourceKind.Metric) { - meta.metricTables = source.metricTables; + // Filter out implementation-detail keys (e.g. a stray Mongoose `_id` + // on the metricTables subdoc) so the agent only sees valid metric + // kinds. + const tables = sanitizeMetricTables( + source.metricTables as Record | undefined, + ); + if (tables) meta.metricTables = tables; + representativeMetric = pickRepresentativeMetricTable(source.metricTables); + if (representativeMetric) { + meta.discoveryMetricKind = representativeMetric.kind; + } } - // For sources without a table (e.g. metric sources), return early - if (!source.from.tableName) { + // Resolve the table name we'll use for column / map-key / value + // discovery. For non-metric sources this is just source.from.tableName. + // For metric sources we use the representative metric table picked + // above (gauge → sum → histogram). + const discoveryTableName = + source.from.tableName || representativeMetric?.tableName || ''; + + // Only early-return when there is truly no table to discover schema + // against (e.g. a metric source with no populated metric tables). + if (!discoveryTableName) { return { content: [ { @@ -103,7 +326,7 @@ async function describeSourceSchema( { source: meta, nextSteps: { - query: `Use clickstack_timeseries, clickstack_table, or clickstack_search with sourceId "${sourceId}" and the metric tables above.`, + query: `Use clickstack_timeseries, clickstack_table, or clickstack_search with sourceId "${sourceId}".`, }, }, null, @@ -137,7 +360,8 @@ async function describeSourceSchema( password: connection.password, }); const metadata = getMetadata(clickhouseClient); - const { databaseName, tableName } = source.from; + const databaseName = source.from.databaseName; + const tableName = discoveryTableName; const connectionId = source.connection.toString(); // Track which sampling stages were skipped due to timeout @@ -170,6 +394,12 @@ async function describeSourceSchema( })); // ── 2. Map attribute keys ───────────────────────────────────────────── + // timestampValueExpression is threaded into getMapKeys / getAllKeyValues / + // sampleMetricNamesForKind below so the no-rollup fallback path (i.e. + // metric sources, which don't have metadataMaterializedViews configured) + // can scope its scan to dateRange instead of going unbounded against + // the raw metric table on cold cache. + const timestampValueExpression = source.timestampValueExpression; const mapColumns = filterColumnMetaByType(columns, [JSDataType.Map]); const mapKeysResults: Record = {}; @@ -185,6 +415,7 @@ async function describeSourceSchema( connectionId, metadataMVs, dateRange, + timestampValueExpression, signal, }); mapKeysResults[col.name] = keys; @@ -226,6 +457,7 @@ async function describeSourceSchema( connectionId, metadataMVs, dateRange, + timestampValueExpression, signal, }); for (const { key, value } of results) { @@ -265,6 +497,7 @@ async function describeSourceSchema( connectionId, metadataMVs, dateRange, + timestampValueExpression, signal, }); for (const { key, value } of results) { @@ -287,6 +520,55 @@ async function describeSourceSchema( skippedStages.push('mapAttributeValues'); } + // ── 5. Metric name + unit + description sampling ────────────────────── + // For metric sources, sample distinct MetricName values per queryable + // kind so the agent has a starter list without needing a follow-up call + // to clickstack_list_metrics for the common case (<= 20 metrics/kind). + // Defensively check for MetricUnit / MetricDescription columns: they + // exist on the standard OTel Collector schema but a custom metric table + // may not declare them. + if (source.kind === SourceKind.Metric && !signal.aborted) { + const metricNames: Record = {}; + await Promise.all( + QUERYABLE_METRIC_KINDS.map(async kind => { + const kindTableName = source.metricTables[kind]; + if (!kindTableName) return; + try { + const samples = await sampleMetricNamesForKind({ + metadata, + clickhouseClient, + databaseName, + tableName: kindTableName, + connectionId, + dateRange, + timestampValueExpression, + signal, + // Reuse representative columns when the kind matches the + // representative table to avoid a second getColumns round-trip. + cachedColumns: + representativeMetric?.tableName === kindTableName + ? columns + : undefined, + }); + if (samples.length > 0) { + metricNames[kind] = samples; + } + } catch (e) { + logger.warn( + { sourceId, kind, error: e }, + 'Failed to sample metric names for kind', + ); + } + }), + ); + if (signal.aborted && Object.keys(metricNames).length === 0) { + skippedStages.push('metricNames'); + } + if (Object.keys(metricNames).length > 0) { + meta.metricNames = metricNames; + } + } + // Flag partial results so the LLM knows value samples may be incomplete if (skippedStages.length > 0) { meta.partial = true; @@ -300,6 +582,14 @@ async function describeSourceSchema( : 'These are the REAL values in your data — use them in filters instead of guessing. ' + 'Example: where: "SeverityText:error" (if \'error\' appears in the sampled values above).'; + const isMetricSource = source.kind === SourceKind.Metric; + const queryNextStep = isMetricSource + ? `Use clickstack_timeseries or clickstack_table with sourceId "${sourceId}" and metricType/metricName from above.` + : `Use clickstack_timeseries, clickstack_table, or clickstack_search with sourceId "${sourceId}" and the columns/attributes above.`; + const discoveryNextStep = isMetricSource + ? `For more metric names than the sample above, call clickstack_list_metrics with sourceId "${sourceId}". For per-metric attribute keys + sampled values, call clickstack_describe_metric with sourceId and metricName.` + : undefined; + const { data: output, isTrimmed } = trimToolResponse({ source: meta, usage: { @@ -308,11 +598,17 @@ async function describeSourceSchema( mapAttributes: "Use bracket syntax: SpanAttributes['http.method'], ResourceAttributes['service.name']", lowCardinalityValues: lcValuesHint, + ...(isMetricSource && { + metricNames: + 'Each entry maps a metric kind (gauge/sum/histogram) to a sample of metric names ' + + 'available on that table. Pass metricType + metricName on each select item.', + }), }, nextSteps: { - query: `Use clickstack_timeseries, clickstack_table, or clickstack_search with sourceId "${sourceId}" and the columns/attributes above.`, + query: queryNextStep, mapAttributeAccess: "Use bracket syntax for map columns: ResourceAttributes['service.name'], SpanAttributes['http.method']", + ...(discoveryNextStep && { discovery: discoveryNextStep }), }, }); @@ -372,9 +668,14 @@ export function registerDescribeSource( const controller = new AbortController(); // Promise.race enforces wall-clock timeout regardless of whether - // internal ClickHouse calls honour the AbortSignal. + // internal ClickHouse calls honour the AbortSignal. Hoist the + // timer handle so the finally block can cancel it on the success + // path — otherwise a stale controller.abort() fires + // DESCRIBE_TIMEOUT_MS after every successful call and the + // setTimeout closure stays pinned for the same duration. + let timeoutId: ReturnType | undefined; const timeoutPromise = new Promise((_, reject) => { - setTimeout(() => { + timeoutId = setTimeout(() => { controller.abort(); reject(new Error('DESCRIBE_TIMEOUT')); }, DESCRIBE_TIMEOUT_MS); @@ -412,6 +713,8 @@ export function registerDescribeSource( 'Failed to describe source schema', ); throw e; + } finally { + if (timeoutId !== undefined) clearTimeout(timeoutId); } }, ), diff --git a/packages/api/src/mcp/tools/sources/index.ts b/packages/api/src/mcp/tools/sources/index.ts index 250e4abd91..5d43d1d455 100644 --- a/packages/api/src/mcp/tools/sources/index.ts +++ b/packages/api/src/mcp/tools/sources/index.ts @@ -1,7 +1,9 @@ import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import type { McpContext, ToolDefinition } from '../types'; +import { registerDescribeMetric } from './describeMetric'; import { registerDescribeSource } from './describeSource'; +import { registerListMetrics } from './listMetrics'; import { registerListSources } from './listSources'; const sourcesTools: ToolDefinition = ( @@ -10,6 +12,8 @@ const sourcesTools: ToolDefinition = ( ) => { registerListSources(server, context); registerDescribeSource(server, context); + registerListMetrics(server, context); + registerDescribeMetric(server, context); }; export default sourcesTools; diff --git a/packages/api/src/mcp/tools/sources/listMetrics.ts b/packages/api/src/mcp/tools/sources/listMetrics.ts new file mode 100644 index 0000000000..20f8ff8009 --- /dev/null +++ b/packages/api/src/mcp/tools/sources/listMetrics.ts @@ -0,0 +1,445 @@ +import { + chSql, + concatChSql, + tableExpr, +} from '@hyperdx/common-utils/dist/clickhouse'; +import { ClickhouseClient } from '@hyperdx/common-utils/dist/clickhouse/node'; +import { getMetadata } from '@hyperdx/common-utils/dist/core/metadata'; +import { SourceKind } from '@hyperdx/common-utils/dist/types'; +import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; +import { z } from 'zod'; + +import { getConnectionById } from '@/controllers/connection'; +import { getSource } from '@/controllers/sources'; +import logger from '@/utils/logger'; + +import { withToolTracing } from '../../utils/tracing'; +import type { McpContext } from '../types'; +import { + QUERYABLE_METRIC_KINDS, + type QueryableMetricKind, +} from './metricKinds'; + +const DEFAULT_LIMIT = 50; +const MAX_LIMIT = 500; +const DEFAULT_LOOKBACK_MS = 24 * 60 * 60 * 1000; + +// ─── Cursor ────────────────────────────────────────────────────────────────── + +export type ListMetricsCursorPayload = { + kind: QueryableMetricKind; + lastName: string; +}; + +/** @internal Exported for testing. */ +export function encodeCursor(payload: ListMetricsCursorPayload): string { + return Buffer.from(JSON.stringify(payload), 'utf8').toString('base64'); +} + +/** @internal Exported for testing. */ +export function decodeCursor(raw: string): ListMetricsCursorPayload | null { + try { + const decoded = Buffer.from(raw, 'base64').toString('utf8'); + const parsed = JSON.parse(decoded); + if ( + typeof parsed === 'object' && + parsed !== null && + typeof parsed.kind === 'string' && + typeof parsed.lastName === 'string' && + (QUERYABLE_METRIC_KINDS as readonly string[]).includes(parsed.kind) + ) { + return { + kind: parsed.kind as QueryableMetricKind, + lastName: parsed.lastName, + }; + } + } catch { + // fall through + } + return null; +} + +// ─── Schema ────────────────────────────────────────────────────────────────── + +const listMetricsSchema = z.object({ + sourceId: z + .string() + .describe( + 'Source ID. Must reference a metric source — get IDs from clickstack_list_sources.', + ), + kind: z + .enum(QUERYABLE_METRIC_KINDS) + .optional() + .describe( + 'Optional metric kind filter. Omit to scan every populated kind on the source ' + + '(gauge → sum → histogram). Set to narrow results to one kind.', + ), + namePattern: z + .string() + .optional() + .describe( + 'Optional ClickHouse ILIKE pattern applied to MetricName server-side. ' + + 'Use % as the wildcard. Examples: "system.cpu.%", "%duration%", "http.server.%".', + ), + startTime: z + .string() + .optional() + .describe( + 'Restrict to metrics with data points after this ISO 8601 timestamp. ' + + 'Default: 24 hours before endTime (or now).', + ), + endTime: z + .string() + .optional() + .describe('End of the time window as ISO 8601. Default: now.'), + limit: z + .number() + .int() + .min(1) + .max(MAX_LIMIT) + .optional() + .default(DEFAULT_LIMIT) + .describe( + `Max metrics returned per page. Default ${DEFAULT_LIMIT}, max ${MAX_LIMIT}.`, + ), + cursor: z + .string() + .optional() + .describe( + 'Opaque pagination cursor returned by a previous call as `nextCursor`. ' + + 'Pass it back unchanged to get the next page.', + ), +}); + +// ─── Helpers ───────────────────────────────────────────────────────────────── + +type MetricEntry = { + name: string; + kind: QueryableMetricKind; + unit?: string; + description?: string; +}; + +function parseTimeRange( + startTime?: string, + endTime?: string, +): { error: string } | { startDate: Date; endDate: Date } { + const endDate = endTime ? new Date(endTime) : new Date(); + const startDate = startTime + ? new Date(startTime) + : new Date(endDate.getTime() - DEFAULT_LOOKBACK_MS); + if (isNaN(endDate.getTime()) || isNaN(startDate.getTime())) { + return { + error: 'Invalid startTime or endTime: must be valid ISO 8601 strings', + }; + } + if (startDate >= endDate) { + return { error: 'endTime must be greater than startTime' }; + } + return { startDate, endDate }; +} + +async function fetchMetricsForKind({ + clickhouseClient, + metadata, + kind, + databaseName, + tableName, + connectionId, + startDate, + endDate, + namePattern, + afterName, + limit, +}: { + clickhouseClient: ClickhouseClient; + metadata: ReturnType; + kind: QueryableMetricKind; + databaseName: string; + tableName: string; + connectionId: string; + startDate: Date; + endDate: Date; + namePattern: string | undefined; + afterName: string | undefined; + limit: number; +}): Promise { + // Defensive column-presence check so we don't reference MetricUnit / + // MetricDescription on non-OTel-default schemas. + const columns = await metadata.getColumns({ + databaseName, + tableName, + connectionId, + }); + const columnNames = new Set(columns.map(c => c.name)); + const hasUnit = columnNames.has('MetricUnit'); + const hasDescription = columnNames.has('MetricDescription'); + + const projections = [ + chSql`MetricName`, + ...(hasUnit + ? [chSql`anyLast(${{ Identifier: 'MetricUnit' }}) AS MetricUnit`] + : []), + ...(hasDescription + ? [ + chSql`anyLast(${{ Identifier: 'MetricDescription' }}) AS MetricDescription`, + ] + : []), + ]; + + const whereParts = [ + chSql`TimeUnix >= fromUnixTimestamp64Milli(${{ Int64: startDate.getTime() }})`, + chSql`TimeUnix <= fromUnixTimestamp64Milli(${{ Int64: endDate.getTime() }})`, + ...(afterName !== undefined + ? [chSql`MetricName > ${{ String: afterName }}`] + : []), + ...(namePattern + ? [chSql`MetricName ILIKE ${{ String: namePattern }}`] + : []), + ]; + + const sql = chSql` + SELECT ${concatChSql(', ', projections)} + FROM ${tableExpr({ database: databaseName, table: tableName })} + WHERE ${concatChSql(' AND ', whereParts)} + GROUP BY MetricName + ORDER BY MetricName ASC + LIMIT ${{ Int32: limit }} + `; + + type Row = { + MetricName: string; + MetricUnit?: string; + MetricDescription?: string; + }; + + const response = await clickhouseClient.query<'JSON'>({ + query: sql.sql, + query_params: sql.params, + format: 'JSON', + connectionId, + }); + const result = (await response.json()) as { data: Row[] }; + return result.data.map(row => { + const entry: MetricEntry = { name: row.MetricName, kind }; + if (row.MetricUnit) entry.unit = row.MetricUnit; + if (row.MetricDescription) entry.description = row.MetricDescription; + return entry; + }); +} + +// ─── Tool registration ─────────────────────────────────────────────────────── + +export function registerListMetrics( + server: McpServer, + context: McpContext, +): void { + const { teamId } = context; + + server.registerTool( + 'clickstack_list_metrics', + { + title: 'List Metric Names', + description: + 'DISCOVERY: Use this after clickstack_describe_source when you need more metric ' + + 'names than the per-kind sample shows, or when you want to narrow by ' + + 'kind / name pattern / time window. ' + + 'Returns paginated metric names per kind (gauge/sum/histogram) ' + + 'with optional unit and description (when the OTel-default columns are present). ' + + 'Pass the returned `nextCursor` back unchanged to fetch the next page.\n\n' + + 'Workflow: clickstack_list_sources → clickstack_describe_source → ' + + 'clickstack_list_metrics → clickstack_describe_metric → ' + + 'clickstack_timeseries|clickstack_table.', + inputSchema: listMetricsSchema, + }, + withToolTracing('clickstack_list_metrics', context, async rawInput => { + // Re-parse explicitly: the MCP SDK callback signature widens + // optional-field types into `unknown`, but the parser produces + // the typed shape we need for downstream calls. + const input: z.infer = + listMetricsSchema.parse(rawInput); + const source = await getSource(teamId.toString(), input.sourceId); + if (!source) { + return { + isError: true, + content: [ + { + type: 'text' as const, + text: `Source "${input.sourceId}" not found. Call clickstack_list_sources to see available source IDs.`, + }, + ], + }; + } + if (source.kind !== SourceKind.Metric) { + return { + isError: true, + content: [ + { + type: 'text' as const, + text: `Source "${input.sourceId}" is a "${source.kind}" source, not a metric source. clickstack_list_metrics only works on metric sources — call clickstack_list_sources to find one whose kind is "metric".`, + }, + ], + }; + } + + const timeRange = parseTimeRange(input.startTime, input.endTime); + if ('error' in timeRange) { + return { + isError: true, + content: [{ type: 'text' as const, text: timeRange.error }], + }; + } + const { startDate, endDate } = timeRange; + + // Decode cursor; reject silently and start over if malformed so a + // truncated or tampered cursor does not surface internals. + const cursor = input.cursor ? decodeCursor(input.cursor) : null; + if (input.cursor && !cursor) { + return { + isError: true, + content: [ + { + type: 'text' as const, + text: 'Invalid cursor. Omit cursor to start over, or pass the exact `nextCursor` value returned by a previous call.', + }, + ], + }; + } + + // Resolve which kinds to scan, in order. When a cursor is set, + // skip kinds before the cursor's kind (already returned) and start + // the cursor's kind at the lastName-exclusive position. + const requestedKinds: QueryableMetricKind[] = input.kind + ? [input.kind] + : QUERYABLE_METRIC_KINDS.filter(k => Boolean(source.metricTables[k])); + const startKindIdx = cursor ? requestedKinds.indexOf(cursor.kind) : 0; + if (startKindIdx < 0) { + // Cursor points at a kind that's not in scope for this call — + // safer to error than silently skip. + return { + isError: true, + content: [ + { + type: 'text' as const, + text: `Cursor references kind "${cursor!.kind}" but that kind is not in scope for this call. Drop the kind filter or pass a matching cursor.`, + }, + ], + }; + } + + const connection = await getConnectionById( + teamId.toString(), + source.connection.toString(), + true, + ); + if (!connection) { + return { + isError: true, + content: [ + { + type: 'text' as const, + text: `Connection not found for source "${input.sourceId}".`, + }, + ], + }; + } + + const clickhouseClient = new ClickhouseClient({ + host: connection.host, + username: connection.username, + password: connection.password, + }); + const metadata = getMetadata(clickhouseClient); + + const limit = input.limit ?? DEFAULT_LIMIT; + const databaseName = source.from.databaseName; + + const metrics: MetricEntry[] = []; + // Per-kind fetch failures, surfaced on the response so the agent + // can distinguish "kind genuinely has no metrics" from "the fetch + // for that kind failed" — the two need different recovery steps. + const partialFailure: { kind: string; error: string }[] = []; + let nextCursor: string | undefined; + for (let i = startKindIdx; i < requestedKinds.length; i++) { + const kind = requestedKinds[i]; + const tableName = source.metricTables[kind]; + if (!tableName) continue; + const afterName = + cursor && cursor.kind === kind && i === startKindIdx + ? cursor.lastName + : undefined; + const remaining = limit - metrics.length; + if (remaining <= 0) break; + // Fetch one extra row so we can detect more-data-available. + let kindMetrics: MetricEntry[]; + try { + kindMetrics = await fetchMetricsForKind({ + clickhouseClient, + metadata, + kind, + databaseName, + tableName, + connectionId: source.connection.toString(), + startDate, + endDate, + namePattern: input.namePattern, + afterName, + limit: remaining + 1, + }); + } catch (e) { + const message = e instanceof Error ? e.message : String(e); + logger.warn( + { sourceId: input.sourceId, kind, error: message }, + 'Failed to list metrics for kind', + ); + partialFailure.push({ + kind, + error: message.replace(/\s+/g, ' ').trim().slice(0, 200), + }); + continue; + } + if (kindMetrics.length > remaining) { + // We hit the cap for this kind; emit cursor pointing at the + // last returned name and stop iterating further kinds. + const truncated = kindMetrics.slice(0, remaining); + metrics.push(...truncated); + nextCursor = encodeCursor({ + kind, + lastName: truncated[truncated.length - 1].name, + }); + break; + } + metrics.push(...kindMetrics); + } + + const responseObj: Record = { + metrics, + ...(nextCursor && { nextCursor }), + ...(partialFailure.length > 0 && { + partialFailure, + hint: + 'Listing failed for some metric kinds — results may be incomplete. ' + + 'Retry the call; if the failure persists, narrow startTime/endTime or pin a single `kind`.', + }), + ...(metrics.length === 0 && + partialFailure.length === 0 && { + hint: + 'No metrics matched. Try widening the time window (startTime/endTime), ' + + 'removing the namePattern filter, or omitting `kind` to scan every populated metric table.', + }), + usage: + 'Pass `metricType` + `metricName` from each entry to clickstack_timeseries / clickstack_table. ' + + 'For per-metric attribute keys and sampled values, call clickstack_describe_metric.', + }; + + return { + content: [ + { + type: 'text' as const, + text: JSON.stringify(responseObj, null, 2), + }, + ], + }; + }), + ); +} diff --git a/packages/api/src/mcp/tools/sources/listSources.ts b/packages/api/src/mcp/tools/sources/listSources.ts index 0c1e614689..a69d62ae87 100644 --- a/packages/api/src/mcp/tools/sources/listSources.ts +++ b/packages/api/src/mcp/tools/sources/listSources.ts @@ -7,6 +7,7 @@ import { getSources } from '@/controllers/sources'; import { withToolTracing } from '../../utils/tracing'; import type { McpContext } from '../types'; +import { sanitizeMetricTables } from './metricKinds'; export function registerListSources( server: McpServer, @@ -72,7 +73,13 @@ export function registerListSources( traceId: s.traceIdExpression, }; } else if (s.kind === SourceKind.Metric) { - meta.metricTables = s.metricTables; + // Filter out implementation-detail keys (e.g. a stray Mongoose + // `_id` on the metricTables subdoc) so the agent only sees + // valid metric kinds. + const tables = sanitizeMetricTables( + s.metricTables as Record | undefined, + ); + if (tables) meta.metricTables = tables; } return meta; diff --git a/packages/api/src/mcp/tools/sources/metricKinds.ts b/packages/api/src/mcp/tools/sources/metricKinds.ts new file mode 100644 index 0000000000..bdaac920ba --- /dev/null +++ b/packages/api/src/mcp/tools/sources/metricKinds.ts @@ -0,0 +1,75 @@ +import { MetricsDataType } from '@hyperdx/common-utils/dist/types'; + +/** + * Metric kinds the query renderer can translate today. Mirrors the three + * MetricsDataType members that `translateMetricChartConfig` in + * common-utils branches over; `summary` and `"exponential histogram"` + * are intentionally excluded because the renderer throws on them. + * + * Declared as plain string literals (not MetricsDataType enum members) + * so `z.enum(...)` narrows correctly at the MCP SDK callback boundary — + * referencing the enum here pessimises Zod's inference to `unknown` on + * every optional field of the surrounding tool input schema, which then + * cascades into "not assignable to type string" errors at every handler + * call site. The compile-time assertion below guarantees the literals + * stay in sync with the enum so adding a new queryable kind cannot be + * done in only one place. + * + * Imported by every metric-aware MCP tool (clickstack_describe_source, + * clickstack_list_metrics, clickstack_describe_metric) and by the + * clickstack_timeseries / clickstack_table select-item schema. + */ +export const QUERYABLE_METRIC_KINDS = ['gauge', 'sum', 'histogram'] as const; + +export type QueryableMetricKind = (typeof QUERYABLE_METRIC_KINDS)[number]; + +// Compile-time assertion that the string literals above still match the +// MetricsDataType enum members. If a future MetricsDataType rename +// breaks this list, this line fails to type-check. +const _assertKindsMatchEnum: readonly QueryableMetricKind[] = [ + MetricsDataType.Gauge, + MetricsDataType.Sum, + MetricsDataType.Histogram, +]; +void _assertKindsMatchEnum; + +/** + * Allowed kind keys on the `metricTables` map when serialising a metric + * source into an MCP response. Includes the non-queryable kinds + * (`summary`, `exponential histogram`) because the model schema declares + * tables for them even though the query renderer cannot yet translate + * them. + */ +const ALLOWED_METRIC_TABLE_KINDS: readonly string[] = + Object.values(MetricsDataType); + +/** + * Filter source.metricTables to the known kind keys before emitting it + * in a clickstack_list_sources / clickstack_describe_source response. + * + * Belt-and-braces defense: even after the model schema declares + * `_id: false` on the metricTables subdoc, existing documents persisted + * before the schema fix may still carry a stray `_id` value that + * Mongoose serialises alongside the legitimate kind keys. Filtering at + * the response boundary keeps the agent-facing payload free of + * implementation-detail keys. + * + * Accepts both plain objects and Mongoose subdocuments — we look up + * each allowed kind via property access rather than enumerating keys, + * because Mongoose subdoc instances expose field values through getters + * (not as own enumerable properties). + */ +export function sanitizeMetricTables( + metricTables: Record | undefined | null, +): Record | undefined { + if (!metricTables) return undefined; + const source = metricTables as Record; + const out: Record = {}; + for (const kind of ALLOWED_METRIC_TABLE_KINDS) { + const value = source[kind]; + if (typeof value === 'string' && value.length > 0) { + out[kind] = value; + } + } + return Object.keys(out).length > 0 ? out : undefined; +} diff --git a/packages/api/src/models/source.ts b/packages/api/src/models/source.ts index b7e7d55d33..e570617985 100644 --- a/packages/api/src/models/source.ts +++ b/packages/api/src/models/source.ts @@ -237,18 +237,27 @@ export const SessionSource = Source.discriminator( // -------------------------- // Metric discriminator // -------------------------- +// metricTables is declared as a nested Schema with `_id: false` so the +// embedded subdoc does not auto-generate an ObjectId. Without this opt-out +// Mongoose adds an `_id` field that leaks into MCP responses alongside +// the queryable kind keys (gauge/sum/histogram/...). +const MetricTablesSchema = new Schema( + { + [MetricsDataType.Gauge]: String, + [MetricsDataType.Histogram]: String, + [MetricsDataType.Sum]: String, + [MetricsDataType.Summary]: String, + [MetricsDataType.ExponentialHistogram]: String, + }, + { _id: false }, +); + type IMetricSource = Extract; export const MetricSource = Source.discriminator( SourceKind.Metric, new Schema>({ metricTables: { - type: { - [MetricsDataType.Gauge]: String, - [MetricsDataType.Histogram]: String, - [MetricsDataType.Sum]: String, - [MetricsDataType.Summary]: String, - [MetricsDataType.ExponentialHistogram]: String, - }, + type: MetricTablesSchema, default: undefined, }, resourceAttributesExpression: String,