hyperdxio · dhable · Jun 9, 2026 · Jun 9, 2026 · Jun 9, 2026 · Jun 9, 2026
diff --git a/.changeset/mcp-metric-source-support.md b/.changeset/mcp-metric-source-support.md
@@ -0,0 +1,11 @@
+---
+'@hyperdx/api': minor
+---
+
+feat(mcp): first-class metric source support
+
+- Two new tools: `clickstack_list_metrics` paginates the metric-name catalog with optional kind / namePattern (ILIKE) / time-window filters and opaque cursor pagination; `clickstack_describe_metric` returns per-metric kind(s), unit, description, attribute keys, and sampled values (with kind auto-detection).
+- `clickstack_describe_source` is metric-aware: picks a representative metric table (gauge → sum → histogram), runs column / map-key / value-sampling against it, and adds a per-kind metric-name sample.
+- `clickstack_timeseries` and `clickstack_table` accept `metricType` (gauge / sum / histogram), `metricName`, and `isDelta` on each select item, plus `aggFn:"increase"` for Sum counters. `valueExpression` defaults to `"Value"` for metric sources. Surfaces the renderer's 20-group top-N cap on `increase + groupBy` as a neutral hint.
+- Dashboard prompt's "use raw SQL for metric tiles" workaround is replaced with positive discovery-workflow guidance and one worked example per supported kind.
+- `summary` and `"exponential histogram"` kinds remain out of scope (no query renderer support yet).
diff --git a/MCP.md b/MCP.md
@@ -109,9 +109,11 @@ with:
 | Tool                          | Description                                                                                  |
 | ----------------------------- | -------------------------------------------------------------------------------------------- |
 | `clickstack_list_sources`        | List all data sources and connections as a lightweight catalog (IDs, names, kinds)            |
-| `clickstack_describe_source`     | Full column schema, attribute keys, and sampled low-cardinality values for a single source    |
-| `clickstack_timeseries`          | Plot metrics over time as a line or stacked bar chart                                        |
-| `clickstack_table`               | Compute aggregated metrics as a table, single number, or pie chart                           |
+| `clickstack_describe_source`     | Full column schema, attribute keys, and sampled low-cardinality values for a single source; for metric sources also returns a per-kind metric-name sample |
+| `clickstack_list_metrics`        | Paginated catalog of metric names on a metric source with optional kind / namePattern / time-window filters |
+| `clickstack_describe_metric`     | Per-metric drill-down: kind(s), unit, description, attribute keys per map column, and sampled values |
+| `clickstack_timeseries`          | Plot metrics over time as a line or stacked bar chart (works on log, trace, and metric sources)        |
+| `clickstack_table`               | Compute aggregated metrics as a table, single number, or pie chart (works on log, trace, and metric sources) |
 | `clickstack_search`              | Browse individual log, event, or trace rows                                                  |
 | `clickstack_event_patterns`      | Discover the most common log messages and event patterns using Drain clustering               |
 | `clickstack_event_deltas`        | Compare two row groups and rank properties by how their value distributions differ            |
@@ -127,3 +129,43 @@ with:
 | `clickstack_trace_waterfall`     | Fetch all spans in a single trace as a parent/child waterfall tree with optional correlated logs |
 | `clickstack_trace_top_time_consuming_operations` | Aggregate breakdown of child operations by cumulative time across matching parent traces |
 | `clickstack_get_webhook`         | List available webhook destinations for use as alert notification channels                    |
+
+### Metric Sources
+
+`clickstack_timeseries`, `clickstack_table`, and the dashboard builder tile
+tools accept metric sources transparently. Each `select` item on a metric
+query must set `metricType` (`"gauge"`, `"sum"`, or `"histogram"`) and
+`metricName` (the OTel metric name, e.g. `system.cpu.utilization`).
+`valueExpression` defaults to `"Value"` when omitted, so a typical metric
+series looks like:
+
+```jsonc
+{ "aggFn": "avg", "metricType": "gauge", "metricName": "system.cpu.utilization" }
+```
+
+Per-kind aggregation guidance:
+
+- **Gauge**: `avg`, `last_value`, `min`, or `max`. Set `"isDelta": true` for
+  Prometheus-style delta over each bucket.
+- **Sum (counter)**: `"increase"` returns the per-bucket counter increase
+  (reset-aware), or `sum` / `avg` on the computed rate. `increase` combined
+  with `groupBy` is capped at the top 20 groups by the renderer; the tool
+  emits a neutral hint when the cap may apply.
+- **Histogram**: `"quantile"` with `level` ∈ {0.5, 0.9, 0.95, 0.99} for
+  percentiles, or `"count"` for the total bucket count.
+
+`summary` and `"exponential histogram"` metric kinds are not yet supported
+by the query renderer.
+
+Discovery workflow for metrics:
+
+1. `clickstack_list_sources` to find the metric source ID and its
+   `metricTables` map.
+2. `clickstack_describe_source` to see columns, attribute keys, and a
+   per-kind metric-name sample.
+3. `clickstack_list_metrics` for paginated catalog access with optional
+   kind / namePattern / time-window filters; pass `nextCursor` unchanged
+   for the next page.
+4. `clickstack_describe_metric` to drill into a specific metric and
+   discover its attribute keys + sampled values before authoring queries.
+5. `clickstack_timeseries` / `clickstack_table` to chart the metric.
diff --git a/packages/api/src/controllers/__tests__/sources.test.ts b/packages/api/src/controllers/__tests__/sources.test.ts
@@ -0,0 +1,39 @@
+import mongoose from 'mongoose';
+
+import { getSource } from '@/controllers/sources';
+import { clearDBCollections, closeDB, connectDB } from '@/fixtures';
+
+describe('sources controller', () => {
+  beforeAll(async () => {
+    await connectDB();
+  });
+
+  afterEach(async () => {
+    await clearDBCollections();
+  });
+
+  afterAll(async () => {
+    await closeDB();
+  });
+
+  describe('getSource', () => {
+    it('returns null when sourceId is not a valid ObjectId', async () => {
+      // Non-ObjectId strings used to bubble a Mongoose CastError up
+      // through MCP tools as "Cast to ObjectId failed for value ...".
+      // The wrapper now short-circuits before hitting MongoDB so the
+      // caller's not-found branch fires cleanly.
+      const team = new mongoose.Types.ObjectId().toString();
+
+      expect(await getSource(team, 'not-an-objectid')).toBeNull();
+      expect(await getSource(team, '')).toBeNull();
+      expect(await getSource(team, '   ')).toBeNull();
+    });
+
+    it('returns null for a well-formed but missing ObjectId', async () => {
+      const team = new mongoose.Types.ObjectId().toString();
+      const missingSourceId = new mongoose.Types.ObjectId().toString();
+
+      expect(await getSource(team, missingSourceId)).toBeNull();
+    });
+  });
+});
diff --git a/packages/api/src/controllers/sources.ts b/packages/api/src/controllers/sources.ts
@@ -1,4 +1,5 @@
 import { SourceKind, SourceSchema } from '@hyperdx/common-utils/dist/types';
+import mongoose from 'mongoose';
 
 import {
   ISourceInput,
@@ -35,8 +36,21 @@ export function getSources(team: string) {
   return Source.find({ team });
 }
 
-export function getSource(team: string, sourceId: string) {
-  return Source.findOne({ _id: sourceId, team });
+export async function getSource(team: string, sourceId: string) {
+  // Pre-check the sourceId shape so a non-ObjectId input returns null
+  // (the caller's "not found" branch) instead of bubbling a Mongoose
+  // CastError.
+  if (!mongoose.Types.ObjectId.isValid(sourceId)) {
+    return null;
+  }
+  try {
+    return await Source.findOne({ _id: sourceId, team });
+  } catch {
+    // Defense-in-depth: if Mongoose still throws (e.g. a future cast
+    // path), treat it as "not found" so the caller can surface a clean
+    // error.
+    return null;
+  }
 }
 
 type DistributiveOmit<T, K extends PropertyKey> = T extends T

diff --git a/packages/api/src/mcp/__tests__/dashboards/prompts.test.ts b/packages/api/src/mcp/__tests__/dashboards/prompts.test.ts
@@ -50,25 +50,39 @@ describe('MCP Dashboard Prompts', () => {
       expect(numberFormat.toLowerCase()).toContain('per-series');
     });
 
-    it('declares the MCP metric-authoring gap rather than referencing fields that do not exist', () => {
-      // The MCP select-item schema does not carry metricName / metricType,
-      // so any prompt that tells the model to author a metric tile via
-      // the builder path is teaching it to ship JSON that gets silently
-      // stripped. Guard with an explicit assertion so a future diff that
-      // restores metric-tile guidance fails this test loudly.
+    it('documents the metric-source builder support with the discovery workflow', () => {
+      // Builder tiles on a metric source now work via the metricType +
+      // metricName + isDelta fields on each select item, with metricTables
+      // threaded through runConfigTile's builder branch. The prompt has to
+      // teach the model the discovery workflow (list_sources -> describe
+      // _source -> list_metrics -> describe_metric -> timeseries|table)
+      // and the per-kind aggFn rules so it doesn't fall back to raw SQL.
       const prompt = buildQueryGuidePrompt();
-      expect(prompt).not.toMatch(/metricName \+ metricType/);
-      expect(prompt).not.toMatch(/exactly 1 select item with metricName/);
-      const constraintsIdx = prompt.indexOf('== PER-TILE TYPE CONSTRAINTS ==');
-      const constraintsBody = prompt.slice(constraintsIdx);
-      // The constraints section closes with an explicit note that builder
-      // tiles on a metric source are not reliable today, with a fallback
-      // recipe to raw SQL. Anchor on the phrase so a future diff that
-      // drops the gap-acknowledgement fails loudly.
-      expect(constraintsBody).toMatch(
+      const metricsIdx = prompt.indexOf('== METRIC SOURCES ==');
+      expect(metricsIdx).toBeGreaterThan(-1);
+      const metricsBody = prompt.slice(
+        metricsIdx,
+        prompt.indexOf('\n== ', metricsIdx + 1),
+      );
+      // The four supported metric select fields are named.
+      expect(metricsBody).toContain('metricType');
+      expect(metricsBody).toContain('metricName');
+      expect(metricsBody).toContain('isDelta');
+      // Per-kind aggregation guidance is present.
+      expect(metricsBody).toMatch(/gauge\s+Use aggFn:"last_value"/);
+      expect(metricsBody).toMatch(/sum\s+Use aggFn:"increase"/);
+      expect(metricsBody).toMatch(/histogram\s+Use aggFn:"quantile"/);
+      // The 20-group cap on increase + groupBy is documented.
+      expect(metricsBody).toMatch(/top 20 groups/);
+      // The four-tool discovery chain is documented in order.
+      expect(metricsBody).toContain('clickstack_describe_source');
+      expect(metricsBody).toContain('clickstack_list_metrics');
+      expect(metricsBody).toContain('clickstack_describe_metric');
+      // The old "use raw SQL for metric tiles" workaround language is gone.
+      expect(prompt).not.toMatch(
         /Authoring builder tiles on a metric source is not reliable/,
       );
-      expect(constraintsBody).toMatch(/MCP select-item shape does not carry/);
+      expect(prompt).not.toMatch(/Both table name and UUID are empty/);
     });
 
     it('documents table-tile onClick linking features', () => {
@@ -227,16 +241,30 @@ describe('MCP Dashboard Prompts', () => {
       );
     });
 
-    it('flags the metric source builder gap', () => {
-      // Builder tiles on metric sources currently save but render with
-      // "Both table name and UUID are empty". Claude went 100% raw SQL
-      // across 21 metric tiles for this reason; the prompt has to make
-      // the workaround obvious so the model doesn't try the builder
-      // first and fail silently.
+    it('walks the metric discovery workflow end-to-end with worked examples', () => {
+      // Metric source builder tiles now work — the prompt teaches the
+      // model how to find, characterise, and chart a metric without
+      // falling through to raw SQL. The examples cover one tile per
+      // supported metric kind so the pattern is unambiguous.
       const prompt = buildQueryGuidePrompt();
-      expect(prompt).toMatch(/Both table name and UUID are empty/);
-      expect(prompt).toMatch(/use a raw SQL tile/);
-      expect(prompt).toMatch(/otel_metrics_gauge/);
+      const metricsIdx = prompt.indexOf('== METRIC SOURCES ==');
+      const metricsBody = prompt.slice(
+        metricsIdx,
+        prompt.indexOf('\n== ', metricsIdx + 1),
+      );
+      // The five-step discovery workflow is enumerated.
+      expect(metricsBody).toMatch(/clickstack_list_sources/);
+      expect(metricsBody).toMatch(/clickstack_describe_source/);
+      expect(metricsBody).toMatch(/clickstack_list_metrics/);
+      expect(metricsBody).toMatch(/clickstack_describe_metric/);
+      expect(metricsBody).toMatch(/clickstack_timeseries/);
+      // One worked example per supported kind, each using a real OTel
+      // metric name so the agent has a concrete template.
+      expect(metricsBody).toContain('system.cpu.utilization');
+      expect(metricsBody).toContain('http.server.request.count');
+      expect(metricsBody).toContain('http.server.request.duration');
+      // valueExpression default is documented.
+      expect(metricsBody).toMatch(/valueExpression defaults to "Value"/);
     });
 
     it('contains no em-dashes or en-dashes used as em-dashes', () => {

diff --git a/packages/api/src/mcp/__tests__/listMetricsCursor.test.ts b/packages/api/src/mcp/__tests__/listMetricsCursor.test.ts
@@ -0,0 +1,104 @@
+// Mock heavy dependencies that break in unit-test context (no ClickHouse/Mongo)
+jest.mock('@/models/source', () => ({}));
+jest.mock('@/controllers/sources', () => ({}));
+jest.mock('@/controllers/connection', () => ({}));
+jest.mock('@/utils/trimToolResponse', () => ({
+  trimToolResponse: (data: unknown) => ({ data, isTrimmed: false }),
+}));
+
+import { decodeCursor, encodeCursor } from '../tools/sources/listMetrics';
+
+describe('listMetrics cursor', () => {
+  describe('encodeCursor / decodeCursor round-trip', () => {
+    it('round-trips a gauge cursor', () => {
+      const payload = { kind: 'gauge' as const, lastName: 'system.cpu.idle' };
+      const encoded = encodeCursor(payload);
+      expect(encoded).toMatch(/^[A-Za-z0-9+/]+=*$/); // base64
+      expect(decodeCursor(encoded)).toEqual(payload);
+    });
+
+    it('round-trips a sum cursor', () => {
+      const payload = {
+        kind: 'sum' as const,
+        lastName: 'http.server.request.count',
+      };
+      expect(decodeCursor(encodeCursor(payload))).toEqual(payload);
+    });
+
+    it('round-trips a histogram cursor', () => {
+      const payload = {
+        kind: 'histogram' as const,
+        lastName: 'http.server.request.duration',
+      };
+      expect(decodeCursor(encodeCursor(payload))).toEqual(payload);
+    });
+
+    it('round-trips metric names with dots, dashes, and unicode', () => {
+      const payload = {
+        kind: 'gauge' as const,
+        lastName: 'system.cpu.utilization-µ.naïve',
+      };
+      expect(decodeCursor(encodeCursor(payload))).toEqual(payload);
+    });
+  });
+
+  describe('decodeCursor rejection cases', () => {
+    it('returns null for non-base64 input', () => {
+      expect(decodeCursor('not-base64!')).toBeNull();
+    });
+
+    it('returns null for base64 of invalid JSON', () => {
+      const garbage = Buffer.from('not json').toString('base64');
+      expect(decodeCursor(garbage)).toBeNull();
+    });
+
+    it('returns null when kind is missing', () => {
+      const malformed = Buffer.from(JSON.stringify({ lastName: 'x' })).toString(
+        'base64',
+      );
+      expect(decodeCursor(malformed)).toBeNull();
+    });
+
+    it('returns null when lastName is missing', () => {
+      const malformed = Buffer.from(JSON.stringify({ kind: 'gauge' })).toString(
+        'base64',
+      );
+      expect(decodeCursor(malformed)).toBeNull();
+    });
+
+    it('returns null when kind is not a queryable metric kind', () => {
+      const summaryCursor = Buffer.from(
+        JSON.stringify({ kind: 'summary', lastName: 'x' }),
+      ).toString('base64');
+      expect(decodeCursor(summaryCursor)).toBeNull();
+
+      const expHistCursor = Buffer.from(
+        JSON.stringify({ kind: 'exponential histogram', lastName: 'x' }),
+      ).toString('base64');
+      expect(decodeCursor(expHistCursor)).toBeNull();
+
+      const bogusCursor = Buffer.from(
+        JSON.stringify({ kind: 'bogus', lastName: 'x' }),
+      ).toString('base64');
+      expect(decodeCursor(bogusCursor)).toBeNull();
+    });
+
+    it('returns null when kind has the wrong type', () => {
+      const malformed = Buffer.from(
+        JSON.stringify({ kind: 1, lastName: 'x' }),
+      ).toString('base64');
+      expect(decodeCursor(malformed)).toBeNull();
+    });
+
+    it('returns null when lastName has the wrong type', () => {
+      const malformed = Buffer.from(
+        JSON.stringify({ kind: 'gauge', lastName: 42 }),
+      ).toString('base64');
+      expect(decodeCursor(malformed)).toBeNull();
+    });
+
+    it('returns null for empty string', () => {
+      expect(decodeCursor('')).toBeNull();
+    });
+  });
+});