diff --git a/.changeset/mcp-metric-source-support.md b/.changeset/mcp-metric-source-support.md
new file mode 100644
index 0000000000..4dfd135051
--- /dev/null
+++ b/.changeset/mcp-metric-source-support.md
@@ -0,0 +1,11 @@
+---
+'@hyperdx/api': minor
+---
+
+feat(mcp): first-class metric source support
+
+- Two new tools: `clickstack_list_metrics` paginates the metric-name catalog with optional kind / namePattern (ILIKE) / time-window filters and opaque cursor pagination; `clickstack_describe_metric` returns per-metric kind(s), unit, description, attribute keys, and sampled values (with kind auto-detection).
+- `clickstack_describe_source` is metric-aware: picks a representative metric table (gauge → sum → histogram), runs column / map-key / value-sampling against it, and adds a per-kind metric-name sample.
+- `clickstack_timeseries` and `clickstack_table` accept `metricType` (gauge / sum / histogram), `metricName`, and `isDelta` on each select item, plus `aggFn:"increase"` for Sum counters. `valueExpression` defaults to `"Value"` for metric sources. Surfaces the renderer's 20-group top-N cap on `increase + groupBy` as a neutral hint.
+- Dashboard prompt's "use raw SQL for metric tiles" workaround is replaced with positive discovery-workflow guidance and one worked example per supported kind.
+- `summary` and `"exponential histogram"` kinds remain out of scope (no query renderer support yet).
diff --git a/MCP.md b/MCP.md
index 3616e5c9a6..312276cb57 100644
--- a/MCP.md
+++ b/MCP.md
@@ -109,9 +109,11 @@ with:
 | Tool                          | Description                                                                                  |
 | ----------------------------- | -------------------------------------------------------------------------------------------- |
 | `clickstack_list_sources`        | List all data sources and connections as a lightweight catalog (IDs, names, kinds)            |
-| `clickstack_describe_source`     | Full column schema, attribute keys, and sampled low-cardinality values for a single source    |
-| `clickstack_timeseries`          | Plot metrics over time as a line or stacked bar chart                                        |
-| `clickstack_table`               | Compute aggregated metrics as a table, single number, or pie chart                           |
+| `clickstack_describe_source`     | Full column schema, attribute keys, and sampled low-cardinality values for a single source; for metric sources also returns a per-kind metric-name sample |
+| `clickstack_list_metrics`        | Paginated catalog of metric names on a metric source with optional kind / namePattern / time-window filters |
+| `clickstack_describe_metric`     | Per-metric drill-down: kind(s), unit, description, attribute keys per map column, and sampled values |
+| `clickstack_timeseries`          | Plot metrics over time as a line or stacked bar chart (works on log, trace, and metric sources)        |
+| `clickstack_table`               | Compute aggregated metrics as a table, single number, or pie chart (works on log, trace, and metric sources) |
 | `clickstack_search`              | Browse individual log, event, or trace rows                                                  |
 | `clickstack_event_patterns`      | Discover the most common log messages and event patterns using Drain clustering               |
 | `clickstack_event_deltas`        | Compare two row groups and rank properties by how their value distributions differ            |
@@ -127,3 +129,43 @@ with:
 | `clickstack_trace_waterfall`     | Fetch all spans in a single trace as a parent/child waterfall tree with optional correlated logs |
 | `clickstack_trace_top_time_consuming_operations` | Aggregate breakdown of child operations by cumulative time across matching parent traces |
 | `clickstack_get_webhook`         | List available webhook destinations for use as alert notification channels                    |
+
+### Metric Sources
+
+`clickstack_timeseries`, `clickstack_table`, and the dashboard builder tile
+tools accept metric sources transparently. Each `select` item on a metric
+query must set `metricType` (`"gauge"`, `"sum"`, or `"histogram"`) and
+`metricName` (the OTel metric name, e.g. `system.cpu.utilization`).
+`valueExpression` defaults to `"Value"` when omitted, so a typical metric
+series looks like:
+
+```jsonc
+{ "aggFn": "avg", "metricType": "gauge", "metricName": "system.cpu.utilization" }
+```
+
+Per-kind aggregation guidance:
+
+- **Gauge**: `avg`, `last_value`, `min`, or `max`. Set `"isDelta": true` for
+  Prometheus-style delta over each bucket.
+- **Sum (counter)**: `"increase"` returns the per-bucket counter increase
+  (reset-aware), or `sum` / `avg` on the computed rate. `increase` combined
+  with `groupBy` is capped at the top 20 groups by the renderer; the tool
+  emits a neutral hint when the cap may apply.
+- **Histogram**: `"quantile"` with `level` ∈ {0.5, 0.9, 0.95, 0.99} for
+  percentiles, or `"count"` for the total bucket count.
+
+`summary` and `"exponential histogram"` metric kinds are not yet supported
+by the query renderer.
+
+Discovery workflow for metrics:
+
+1. `clickstack_list_sources` to find the metric source ID and its
+   `metricTables` map.
+2. `clickstack_describe_source` to see columns, attribute keys, and a
+   per-kind metric-name sample.
+3. `clickstack_list_metrics` for paginated catalog access with optional
+   kind / namePattern / time-window filters; pass `nextCursor` unchanged
+   for the next page.
+4. `clickstack_describe_metric` to drill into a specific metric and
+   discover its attribute keys + sampled values before authoring queries.
+5. `clickstack_timeseries` / `clickstack_table` to chart the metric.
diff --git a/packages/api/src/controllers/__tests__/sources.test.ts b/packages/api/src/controllers/__tests__/sources.test.ts
new file mode 100644
index 0000000000..71a7d73466
--- /dev/null
+++ b/packages/api/src/controllers/__tests__/sources.test.ts
@@ -0,0 +1,39 @@
+import mongoose from 'mongoose';
+
+import { getSource } from '@/controllers/sources';
+import { clearDBCollections, closeDB, connectDB } from '@/fixtures';
+
+describe('sources controller', () => {
+  beforeAll(async () => {
+    await connectDB();
+  });
+
+  afterEach(async () => {
+    await clearDBCollections();
+  });
+
+  afterAll(async () => {
+    await closeDB();
+  });
+
+  describe('getSource', () => {
+    it('returns null when sourceId is not a valid ObjectId', async () => {
+      // Non-ObjectId strings used to bubble a Mongoose CastError up
+      // through MCP tools as "Cast to ObjectId failed for value ...".
+      // The wrapper now short-circuits before hitting MongoDB so the
+      // caller's not-found branch fires cleanly.
+      const team = new mongoose.Types.ObjectId().toString();
+
+      expect(await getSource(team, 'not-an-objectid')).toBeNull();
+      expect(await getSource(team, '')).toBeNull();
+      expect(await getSource(team, '   ')).toBeNull();
+    });
+
+    it('returns null for a well-formed but missing ObjectId', async () => {
+      const team = new mongoose.Types.ObjectId().toString();
+      const missingSourceId = new mongoose.Types.ObjectId().toString();
+
+      expect(await getSource(team, missingSourceId)).toBeNull();
+    });
+  });
+});
diff --git a/packages/api/src/controllers/sources.ts b/packages/api/src/controllers/sources.ts
index a34f931dff..daa24cf810 100644
--- a/packages/api/src/controllers/sources.ts
+++ b/packages/api/src/controllers/sources.ts
@@ -1,4 +1,5 @@
 import { SourceKind, SourceSchema } from '@hyperdx/common-utils/dist/types';
+import mongoose from 'mongoose';
 
 import {
   ISourceInput,
@@ -35,8 +36,21 @@ export function getSources(team: string) {
   return Source.find({ team });
 }
 
-export function getSource(team: string, sourceId: string) {
-  return Source.findOne({ _id: sourceId, team });
+export async function getSource(team: string, sourceId: string) {
+  // Pre-check the sourceId shape so a non-ObjectId input returns null
+  // (the caller's "not found" branch) instead of bubbling a Mongoose
+  // CastError.
+  if (!mongoose.Types.ObjectId.isValid(sourceId)) {
+    return null;
+  }
+  try {
+    return await Source.findOne({ _id: sourceId, team });
+  } catch {
+    // Defense-in-depth: if Mongoose still throws (e.g. a future cast
+    // path), treat it as "not found" so the caller can surface a clean
+    // error.
+    return null;
+  }
 }
 
 type DistributiveOmit<T, K extends PropertyKey> = T extends T
diff --git a/packages/api/src/mcp/__tests__/dashboards/prompts.test.ts b/packages/api/src/mcp/__tests__/dashboards/prompts.test.ts
index 5bb16e6113..b2a30d7439 100644
--- a/packages/api/src/mcp/__tests__/dashboards/prompts.test.ts
+++ b/packages/api/src/mcp/__tests__/dashboards/prompts.test.ts
@@ -50,25 +50,39 @@ describe('MCP Dashboard Prompts', () => {
       expect(numberFormat.toLowerCase()).toContain('per-series');
     });
 
-    it('declares the MCP metric-authoring gap rather than referencing fields that do not exist', () => {
-      // The MCP select-item schema does not carry metricName / metricType,
-      // so any prompt that tells the model to author a metric tile via
-      // the builder path is teaching it to ship JSON that gets silently
-      // stripped. Guard with an explicit assertion so a future diff that
-      // restores metric-tile guidance fails this test loudly.
+    it('documents the metric-source builder support with the discovery workflow', () => {
+      // Builder tiles on a metric source now work via the metricType +
+      // metricName + isDelta fields on each select item, with metricTables
+      // threaded through runConfigTile's builder branch. The prompt has to
+      // teach the model the discovery workflow (list_sources -> describe
+      // _source -> list_metrics -> describe_metric -> timeseries|table)
+      // and the per-kind aggFn rules so it doesn't fall back to raw SQL.
       const prompt = buildQueryGuidePrompt();
-      expect(prompt).not.toMatch(/metricName \+ metricType/);
-      expect(prompt).not.toMatch(/exactly 1 select item with metricName/);
-      const constraintsIdx = prompt.indexOf('== PER-TILE TYPE CONSTRAINTS ==');
-      const constraintsBody = prompt.slice(constraintsIdx);
-      // The constraints section closes with an explicit note that builder
-      // tiles on a metric source are not reliable today, with a fallback
-      // recipe to raw SQL. Anchor on the phrase so a future diff that
-      // drops the gap-acknowledgement fails loudly.
-      expect(constraintsBody).toMatch(
+      const metricsIdx = prompt.indexOf('== METRIC SOURCES ==');
+      expect(metricsIdx).toBeGreaterThan(-1);
+      const metricsBody = prompt.slice(
+        metricsIdx,
+        prompt.indexOf('\n== ', metricsIdx + 1),
+      );
+      // The four supported metric select fields are named.
+      expect(metricsBody).toContain('metricType');
+      expect(metricsBody).toContain('metricName');
+      expect(metricsBody).toContain('isDelta');
+      // Per-kind aggregation guidance is present.
+      expect(metricsBody).toMatch(/gauge\s+Use aggFn:"last_value"/);
+      expect(metricsBody).toMatch(/sum\s+Use aggFn:"increase"/);
+      expect(metricsBody).toMatch(/histogram\s+Use aggFn:"quantile"/);
+      // The 20-group cap on increase + groupBy is documented.
+      expect(metricsBody).toMatch(/top 20 groups/);
+      // The four-tool discovery chain is documented in order.
+      expect(metricsBody).toContain('clickstack_describe_source');
+      expect(metricsBody).toContain('clickstack_list_metrics');
+      expect(metricsBody).toContain('clickstack_describe_metric');
+      // The old "use raw SQL for metric tiles" workaround language is gone.
+      expect(prompt).not.toMatch(
         /Authoring builder tiles on a metric source is not reliable/,
       );
-      expect(constraintsBody).toMatch(/MCP select-item shape does not carry/);
+      expect(prompt).not.toMatch(/Both table name and UUID are empty/);
     });
 
     it('documents table-tile onClick linking features', () => {
@@ -227,16 +241,30 @@ describe('MCP Dashboard Prompts', () => {
       );
     });
 
-    it('flags the metric source builder gap', () => {
-      // Builder tiles on metric sources currently save but render with
-      // "Both table name and UUID are empty". Claude went 100% raw SQL
-      // across 21 metric tiles for this reason; the prompt has to make
-      // the workaround obvious so the model doesn't try the builder
-      // first and fail silently.
+    it('walks the metric discovery workflow end-to-end with worked examples', () => {
+      // Metric source builder tiles now work — the prompt teaches the
+      // model how to find, characterise, and chart a metric without
+      // falling through to raw SQL. The examples cover one tile per
+      // supported metric kind so the pattern is unambiguous.
       const prompt = buildQueryGuidePrompt();
-      expect(prompt).toMatch(/Both table name and UUID are empty/);
-      expect(prompt).toMatch(/use a raw SQL tile/);
-      expect(prompt).toMatch(/otel_metrics_gauge/);
+      const metricsIdx = prompt.indexOf('== METRIC SOURCES ==');
+      const metricsBody = prompt.slice(
+        metricsIdx,
+        prompt.indexOf('\n== ', metricsIdx + 1),
+      );
+      // The five-step discovery workflow is enumerated.
+      expect(metricsBody).toMatch(/clickstack_list_sources/);
+      expect(metricsBody).toMatch(/clickstack_describe_source/);
+      expect(metricsBody).toMatch(/clickstack_list_metrics/);
+      expect(metricsBody).toMatch(/clickstack_describe_metric/);
+      expect(metricsBody).toMatch(/clickstack_timeseries/);
+      // One worked example per supported kind, each using a real OTel
+      // metric name so the agent has a concrete template.
+      expect(metricsBody).toContain('system.cpu.utilization');
+      expect(metricsBody).toContain('http.server.request.count');
+      expect(metricsBody).toContain('http.server.request.duration');
+      // valueExpression default is documented.
+      expect(metricsBody).toMatch(/valueExpression defaults to "Value"/);
     });
 
     it('contains no em-dashes or en-dashes used as em-dashes', () => {
diff --git a/packages/api/src/mcp/__tests__/listMetricsCursor.test.ts b/packages/api/src/mcp/__tests__/listMetricsCursor.test.ts
new file mode 100644
index 0000000000..77947a5dc1
--- /dev/null
+++ b/packages/api/src/mcp/__tests__/listMetricsCursor.test.ts
@@ -0,0 +1,104 @@
+// Mock heavy dependencies that break in unit-test context (no ClickHouse/Mongo)
+jest.mock('@/models/source', () => ({}));
+jest.mock('@/controllers/sources', () => ({}));
+jest.mock('@/controllers/connection', () => ({}));
+jest.mock('@/utils/trimToolResponse', () => ({
+  trimToolResponse: (data: unknown) => ({ data, isTrimmed: false }),
+}));
+
+import { decodeCursor, encodeCursor } from '../tools/sources/listMetrics';
+
+describe('listMetrics cursor', () => {
+  describe('encodeCursor / decodeCursor round-trip', () => {
+    it('round-trips a gauge cursor', () => {
+      const payload = { kind: 'gauge' as const, lastName: 'system.cpu.idle' };
+      const encoded = encodeCursor(payload);
+      expect(encoded).toMatch(/^[A-Za-z0-9+/]+=*$/); // base64
+      expect(decodeCursor(encoded)).toEqual(payload);
+    });
+
+    it('round-trips a sum cursor', () => {
+      const payload = {
+        kind: 'sum' as const,
+        lastName: 'http.server.request.count',
+      };
+      expect(decodeCursor(encodeCursor(payload))).toEqual(payload);
+    });
+
+    it('round-trips a histogram cursor', () => {
+      const payload = {
+        kind: 'histogram' as const,
+        lastName: 'http.server.request.duration',
+      };
+      expect(decodeCursor(encodeCursor(payload))).toEqual(payload);
+    });
+
+    it('round-trips metric names with dots, dashes, and unicode', () => {
+      const payload = {
+        kind: 'gauge' as const,
+        lastName: 'system.cpu.utilization-µ.naïve',
+      };
+      expect(decodeCursor(encodeCursor(payload))).toEqual(payload);
+    });
+  });
+
+  describe('decodeCursor rejection cases', () => {
+    it('returns null for non-base64 input', () => {
+      expect(decodeCursor('not-base64!')).toBeNull();
+    });
+
+    it('returns null for base64 of invalid JSON', () => {
+      const garbage = Buffer.from('not json').toString('base64');
+      expect(decodeCursor(garbage)).toBeNull();
+    });
+
+    it('returns null when kind is missing', () => {
+      const malformed = Buffer.from(JSON.stringify({ lastName: 'x' })).toString(
+        'base64',
+      );
+      expect(decodeCursor(malformed)).toBeNull();
+    });
+
+    it('returns null when lastName is missing', () => {
+      const malformed = Buffer.from(JSON.stringify({ kind: 'gauge' })).toString(
+        'base64',
+      );
+      expect(decodeCursor(malformed)).toBeNull();
+    });
+
+    it('returns null when kind is not a queryable metric kind', () => {
+      const summaryCursor = Buffer.from(
+        JSON.stringify({ kind: 'summary', lastName: 'x' }),
+      ).toString('base64');
+      expect(decodeCursor(summaryCursor)).toBeNull();
+
+      const expHistCursor = Buffer.from(
+        JSON.stringify({ kind: 'exponential histogram', lastName: 'x' }),
+      ).toString('base64');
+      expect(decodeCursor(expHistCursor)).toBeNull();
+
+      const bogusCursor = Buffer.from(
+        JSON.stringify({ kind: 'bogus', lastName: 'x' }),
+      ).toString('base64');
+      expect(decodeCursor(bogusCursor)).toBeNull();
+    });
+
+    it('returns null when kind has the wrong type', () => {
+      const malformed = Buffer.from(
+        JSON.stringify({ kind: 1, lastName: 'x' }),
+      ).toString('base64');
+      expect(decodeCursor(malformed)).toBeNull();
+    });
+
+    it('returns null when lastName has the wrong type', () => {
+      const malformed = Buffer.from(
+        JSON.stringify({ kind: 'gauge', lastName: 42 }),
+      ).toString('base64');
+      expect(decodeCursor(malformed)).toBeNull();
+    });
+
+    it('returns null for empty string', () => {
+      expect(decodeCursor('')).toBeNull();
+    });
+  });
+});
diff --git a/packages/api/src/mcp/__tests__/metricKinds.test.ts b/packages/api/src/mcp/__tests__/metricKinds.test.ts
new file mode 100644
index 0000000000..a1d4d6aca0
--- /dev/null
+++ b/packages/api/src/mcp/__tests__/metricKinds.test.ts
@@ -0,0 +1,86 @@
+import { sanitizeMetricTables } from '../tools/sources/metricKinds';
+
+describe('sanitizeMetricTables', () => {
+  it('returns undefined for null / undefined input', () => {
+    expect(sanitizeMetricTables(undefined)).toBeUndefined();
+    expect(sanitizeMetricTables(null)).toBeUndefined();
+  });
+
+  it('preserves valid queryable kind entries', () => {
+    expect(
+      sanitizeMetricTables({
+        gauge: 'otel_metrics_gauge',
+        sum: 'otel_metrics_sum',
+        histogram: 'otel_metrics_histogram',
+      }),
+    ).toEqual({
+      gauge: 'otel_metrics_gauge',
+      sum: 'otel_metrics_sum',
+      histogram: 'otel_metrics_histogram',
+    });
+  });
+
+  it('preserves non-queryable kinds the schema still declares', () => {
+    // summary and "exponential histogram" are valid MetricsDataType
+    // members even though the query renderer cannot translate them.
+    expect(
+      sanitizeMetricTables({
+        gauge: 'otel_metrics_gauge',
+        summary: 'otel_metrics_summary',
+        'exponential histogram': 'otel_metrics_exponential_histogram',
+      }),
+    ).toEqual({
+      gauge: 'otel_metrics_gauge',
+      summary: 'otel_metrics_summary',
+      'exponential histogram': 'otel_metrics_exponential_histogram',
+    });
+  });
+
+  it('strips Mongoose _id leaking from the metricTables subdoc', () => {
+    // Existing documents persisted before the schema fix may still
+    // carry a stray ObjectId in the embedded metricTables subdoc.
+    // The sanitizer keeps only valid kind keys.
+    const result = sanitizeMetricTables({
+      gauge: 'otel_metrics_gauge',
+      sum: 'otel_metrics_sum',
+      _id: '6a2ad3b4da94be764e2deed8',
+    });
+    expect(result).toEqual({
+      gauge: 'otel_metrics_gauge',
+      sum: 'otel_metrics_sum',
+    });
+    expect(result).not.toHaveProperty('_id');
+  });
+
+  it('drops unknown keys', () => {
+    expect(
+      sanitizeMetricTables({
+        gauge: 'otel_metrics_gauge',
+        not_a_real_kind: 'something',
+      }),
+    ).toEqual({
+      gauge: 'otel_metrics_gauge',
+    });
+  });
+
+  it('drops non-string values for valid keys', () => {
+    expect(
+      sanitizeMetricTables({
+        gauge: 'otel_metrics_gauge',
+        sum: 12345,
+        histogram: null,
+      } as Record<string, unknown>),
+    ).toEqual({
+      gauge: 'otel_metrics_gauge',
+    });
+  });
+
+  it('returns undefined when no valid entries remain', () => {
+    expect(
+      sanitizeMetricTables({
+        _id: 'whatever',
+        not_a_real_kind: 'x',
+      }),
+    ).toBeUndefined();
+  });
+});
diff --git a/packages/api/src/mcp/__tests__/query.test.ts b/packages/api/src/mcp/__tests__/query.test.ts
index c523a67999..6033e6c712 100644
--- a/packages/api/src/mcp/__tests__/query.test.ts
+++ b/packages/api/src/mcp/__tests__/query.test.ts
@@ -7,10 +7,18 @@ jest.mock('@/utils/trimToolResponse', () => ({
 }));
 
 import {
+  annotateIncreaseTopNHint,
+  assertSourceKindMatchesSelect,
   errorHint,
+  INCREASE_TOP_N_CAP,
   mergeWhereIntoSelectItems,
   parseTimeRange,
 } from '../tools/query/helpers';
+import {
+  applyMetricSelectDefaults,
+  getMetricSelectIssues,
+  validateMetricSelectItems,
+} from '../tools/query/schemas';
 import { resolveOrderBy } from '../tools/query/table';
 
 describe('parseTimeRange', () => {
@@ -352,4 +360,483 @@ describe('resolveOrderBy', () => {
       ]),
     ).toBe('quantile');
   });
+
+  it('should NOT synthesize for "increase" aggFn (metric-only marker)', () => {
+    // increase compiles to a multi-CTE sum(Rate) pipeline in the renderer,
+    // not a standalone SQL function. resolveOrderBy must leave bare
+    // "increase" alone so the renderer-assigned alias can take over.
+    expect(
+      resolveOrderBy('increase', [
+        { aggFn: 'increase', valueExpression: 'Value' },
+      ]),
+    ).toBe('increase');
+  });
+});
+
+// ─── getMetricSelectIssues ───────────────────────────────────────────────────
+
+describe('getMetricSelectIssues', () => {
+  it('returns no issues for a plain non-metric count', () => {
+    expect(getMetricSelectIssues({ aggFn: 'count' })).toEqual([]);
+  });
+
+  it('returns no issues for a plain non-metric avg with valueExpression', () => {
+    expect(
+      getMetricSelectIssues({ aggFn: 'avg', valueExpression: 'Duration' }),
+    ).toEqual([]);
+  });
+
+  it('requires valueExpression for non-count non-metric aggregations', () => {
+    const issues = getMetricSelectIssues({ aggFn: 'avg' });
+    expect(issues).toHaveLength(1);
+    expect(issues[0].path).toEqual(['valueExpression']);
+    expect(issues[0].message).toContain('required for non-count');
+  });
+
+  it('does NOT require valueExpression when metricType is set', () => {
+    // valueExpression defaults to "Value" for metric sources
+    expect(
+      getMetricSelectIssues({
+        aggFn: 'avg',
+        metricType: 'gauge',
+        metricName: 'system.cpu.utilization',
+      }),
+    ).toEqual([]);
+  });
+
+  it('rejects valueExpression on aggFn:"count"', () => {
+    const issues = getMetricSelectIssues({
+      aggFn: 'count',
+      valueExpression: 'Duration',
+    });
+    expect(issues).toHaveLength(1);
+    expect(issues[0].path).toEqual(['valueExpression']);
+  });
+
+  it('rejects metricType without metricName', () => {
+    const issues = getMetricSelectIssues({
+      aggFn: 'avg',
+      metricType: 'gauge',
+    });
+    expect(issues.find(i => i.path[0] === 'metricName')).toBeDefined();
+  });
+
+  it('rejects metricName without metricType', () => {
+    const issues = getMetricSelectIssues({
+      aggFn: 'avg',
+      metricName: 'system.cpu.utilization',
+    });
+    expect(issues.find(i => i.path[0] === 'metricType')).toBeDefined();
+  });
+
+  it('rejects aggFn:"increase" on a gauge metric', () => {
+    const issues = getMetricSelectIssues({
+      aggFn: 'increase',
+      metricType: 'gauge',
+      metricName: 'system.cpu.utilization',
+    });
+    expect(issues.find(i => i.path[0] === 'aggFn')).toBeDefined();
+  });
+
+  it('accepts aggFn:"increase" on a sum metric', () => {
+    expect(
+      getMetricSelectIssues({
+        aggFn: 'increase',
+        metricType: 'sum',
+        metricName: 'http.server.request.count',
+      }),
+    ).toEqual([]);
+  });
+
+  it('rejects aggFn:"avg" on a histogram metric', () => {
+    const issues = getMetricSelectIssues({
+      aggFn: 'avg',
+      metricType: 'histogram',
+      metricName: 'http.server.request.duration',
+    });
+    expect(issues.find(i => i.path[0] === 'aggFn')).toBeDefined();
+  });
+
+  it('accepts aggFn:"count" on a histogram metric (no level required)', () => {
+    expect(
+      getMetricSelectIssues({
+        aggFn: 'count',
+        metricType: 'histogram',
+        metricName: 'http.server.request.duration',
+      }),
+    ).toEqual([]);
+  });
+
+  it('requires level for aggFn:"quantile" on a histogram metric', () => {
+    const issues = getMetricSelectIssues({
+      aggFn: 'quantile',
+      metricType: 'histogram',
+      metricName: 'http.server.request.duration',
+    });
+    expect(issues.find(i => i.path[0] === 'level')).toBeDefined();
+  });
+
+  it('accepts aggFn:"quantile" with level on a histogram metric', () => {
+    expect(
+      getMetricSelectIssues({
+        aggFn: 'quantile',
+        level: 0.95,
+        metricType: 'histogram',
+        metricName: 'http.server.request.duration',
+      }),
+    ).toEqual([]);
+  });
+
+  it('rejects isDelta on a non-gauge metric', () => {
+    const issues = getMetricSelectIssues({
+      aggFn: 'sum',
+      metricType: 'sum',
+      metricName: 'http.request.count',
+      isDelta: true,
+    });
+    expect(issues.find(i => i.path[0] === 'isDelta')).toBeDefined();
+  });
+
+  it('accepts isDelta on a gauge metric', () => {
+    expect(
+      getMetricSelectIssues({
+        aggFn: 'avg',
+        metricType: 'gauge',
+        metricName: 'system.cpu.utilization',
+        isDelta: true,
+      }),
+    ).toEqual([]);
+  });
+
+  it('rejects level when aggFn is not quantile', () => {
+    const issues = getMetricSelectIssues({
+      aggFn: 'avg',
+      valueExpression: 'Duration',
+      level: 0.95,
+    });
+    expect(issues.find(i => i.path[0] === 'level')).toBeDefined();
+  });
+});
+
+// ─── validateMetricSelectItems ───────────────────────────────────────────────
+
+describe('validateMetricSelectItems', () => {
+  it('returns null for a valid items array', () => {
+    expect(
+      validateMetricSelectItems([
+        { aggFn: 'count' },
+        { aggFn: 'avg', valueExpression: 'Duration' },
+      ]),
+    ).toBeNull();
+  });
+
+  it('returns an error envelope and labels each item by index', () => {
+    const result = validateMetricSelectItems([
+      { aggFn: 'avg' }, // missing valueExpression
+      { aggFn: 'increase', metricType: 'gauge', metricName: 'x' }, // increase requires sum
+    ]);
+    expect(result).not.toBeNull();
+    if (!result) return;
+    expect(result.isError).toBe(true);
+    expect(result.content[0].type).toBe('text');
+    const text = result.content[0].text;
+    expect(text).toContain('select[0].valueExpression');
+    expect(text).toContain('select[1].aggFn');
+  });
+
+  it('returns an error envelope for a single bad item', () => {
+    const result = validateMetricSelectItems([
+      {
+        aggFn: 'quantile',
+        metricType: 'histogram',
+        metricName: 'http.server.request.duration',
+      }, // missing level
+    ]);
+    expect(result).not.toBeNull();
+    if (!result) return;
+    expect(result.content[0].text).toContain('select[0].level');
+  });
+});
+
+// ─── applyMetricSelectDefaults ───────────────────────────────────────────────
+
+// Typed as McpSelectItem so the inferred generic preserves the
+// optional valueExpression field. Otherwise structural inference
+// narrows away `valueExpression` and the assertions below stop
+// type-checking.
+type SelectItem = {
+  aggFn: string;
+  metricType?: 'gauge' | 'sum' | 'histogram';
+  metricName?: string;
+  valueExpression?: string;
+};
+
+describe('applyMetricSelectDefaults', () => {
+  it('defaults valueExpression to "Value" when metricType is set', () => {
+    const input: SelectItem[] = [
+      {
+        aggFn: 'avg',
+        metricType: 'gauge',
+        metricName: 'system.cpu.utilization',
+      },
+    ];
+    const out = applyMetricSelectDefaults(input);
+    expect(out[0].valueExpression).toBe('Value');
+  });
+
+  it('preserves an explicit valueExpression on metric items', () => {
+    const input: SelectItem[] = [
+      {
+        aggFn: 'avg',
+        metricType: 'gauge',
+        metricName: 'x',
+        valueExpression: 'Value * 100',
+      },
+    ];
+    const out = applyMetricSelectDefaults(input);
+    expect(out[0].valueExpression).toBe('Value * 100');
+  });
+
+  it('leaves non-metric items untouched', () => {
+    const input: SelectItem[] = [{ aggFn: 'count' }];
+    const out = applyMetricSelectDefaults(input);
+    expect(out[0]).toEqual({ aggFn: 'count' });
+    expect(out[0].valueExpression).toBeUndefined();
+  });
+
+  it('returns new objects only for the items it mutates', () => {
+    const input: SelectItem[] = [
+      { aggFn: 'count' },
+      { aggFn: 'avg', metricType: 'gauge', metricName: 'x' },
+    ];
+    const out = applyMetricSelectDefaults(input);
+    expect(out[0]).toBe(input[0]); // unchanged item is the same reference
+    expect(out[1]).not.toBe(input[1]); // mutated item is a new object
+  });
+});
+
+// ─── annotateIncreaseTopNHint ────────────────────────────────────────────────
+
+function buildResult(data: unknown[]) {
+  return {
+    content: [
+      {
+        type: 'text',
+        text: JSON.stringify({ result: { data } }),
+      },
+    ],
+    isError: false,
+  };
+}
+
+/** Rows with `count` distinct ServiceName group values. */
+function buildGroupedRows(count: number) {
+  return Array.from({ length: count }, (_, i) => ({
+    ServiceName: `svc-${i}`,
+    Series: i,
+  }));
+}
+
+describe('annotateIncreaseTopNHint', () => {
+  it('exposes the renderer cap as a constant', () => {
+    expect(INCREASE_TOP_N_CAP).toBe(20);
+  });
+
+  it('appends a hint when the distinct group count reaches the cap', () => {
+    const result = buildResult(buildGroupedRows(INCREASE_TOP_N_CAP));
+    annotateIncreaseTopNHint(result, [{ aggFn: 'increase' }], 'ServiceName');
+    const parsed = JSON.parse(result.content[0].text);
+    expect(parsed.hints).toHaveLength(1);
+    expect(parsed.hints[0]).toContain('top 20');
+    expect(parsed.hints[0]).toContain('aggFn:"increase"');
+  });
+
+  it('does NOT annotate when the distinct group count is below the cap', () => {
+    const result = buildResult(buildGroupedRows(INCREASE_TOP_N_CAP - 1));
+    annotateIncreaseTopNHint(result, [{ aggFn: 'increase' }], 'ServiceName');
+    const parsed = JSON.parse(result.content[0].text);
+    expect(parsed.hints).toBeUndefined();
+  });
+
+  it('counts distinct groups across repeated time buckets, not raw rows', () => {
+    // 3 groups × 10 buckets = 30 rows but only 3 distinct groups.
+    const rows = Array.from({ length: 30 }, (_, i) => ({
+      ServiceName: `svc-${i % 3}`,
+      bucket: i,
+    }));
+    const result = buildResult(rows);
+    annotateIncreaseTopNHint(result, [{ aggFn: 'increase' }], 'ServiceName');
+    const parsed = JSON.parse(result.content[0].text);
+    expect(parsed.hints).toBeUndefined();
+  });
+
+  it('does NOT annotate when the groupBy column cannot be resolved from rows', () => {
+    const result = buildResult([{ x: 1 }, { x: 2 }]);
+    annotateIncreaseTopNHint(result, [{ aggFn: 'increase' }], 'ServiceName');
+    const parsed = JSON.parse(result.content[0].text);
+    expect(parsed.hints).toBeUndefined();
+  });
+
+  it('supports multi-column groupBy by counting distinct combinations', () => {
+    // 4 ServiceName values × 5 SpanName values = 20 distinct combos.
+    const rows = Array.from({ length: 20 }, (_, i) => ({
+      ServiceName: `svc-${i % 4}`,
+      SpanName: `op-${Math.floor(i / 4)}`,
+    }));
+    const result = buildResult(rows);
+    annotateIncreaseTopNHint(
+      result,
+      [{ aggFn: 'increase' }],
+      'ServiceName, SpanName',
+    );
+    const parsed = JSON.parse(result.content[0].text);
+    expect(parsed.hints).toHaveLength(1);
+  });
+
+  it('appends alongside an existing hint instead of overwriting', () => {
+    const result = {
+      content: [
+        {
+          type: 'text',
+          text: JSON.stringify({
+            result: { data: buildGroupedRows(INCREASE_TOP_N_CAP) },
+            hints: ['previous hint from another writer'],
+          }),
+        },
+      ],
+      isError: false,
+    };
+    annotateIncreaseTopNHint(result, [{ aggFn: 'increase' }], 'ServiceName');
+    const parsed = JSON.parse(result.content[0].text);
+    expect(parsed.hints).toHaveLength(2);
+    expect(parsed.hints[0]).toBe('previous hint from another writer');
+    expect(parsed.hints[1]).toContain('top 20');
+  });
+
+  it('does NOT annotate when increase is used WITHOUT a groupBy', () => {
+    const result = buildResult(buildGroupedRows(INCREASE_TOP_N_CAP));
+    annotateIncreaseTopNHint(result, [{ aggFn: 'increase' }], undefined);
+    const parsed = JSON.parse(result.content[0].text);
+    expect(parsed.hints).toBeUndefined();
+  });
+
+  it('does NOT annotate when groupBy is an empty string', () => {
+    const result = buildResult(buildGroupedRows(INCREASE_TOP_N_CAP));
+    annotateIncreaseTopNHint(result, [{ aggFn: 'increase' }], '   ');
+    const parsed = JSON.parse(result.content[0].text);
+    expect(parsed.hints).toBeUndefined();
+  });
+
+  it('does NOT annotate when no select item uses increase', () => {
+    const result = buildResult(buildGroupedRows(INCREASE_TOP_N_CAP));
+    annotateIncreaseTopNHint(
+      result,
+      [{ aggFn: 'sum' }, { aggFn: 'avg' }],
+      'ServiceName',
+    );
+    const parsed = JSON.parse(result.content[0].text);
+    expect(parsed.hints).toBeUndefined();
+  });
+
+  it('does NOT annotate empty results (already labelled by formatQueryResult)', () => {
+    const result = buildResult([]);
+    annotateIncreaseTopNHint(result, [{ aggFn: 'increase' }], 'ServiceName');
+    const parsed = JSON.parse(result.content[0].text);
+    expect(parsed.hints).toBeUndefined();
+  });
+
+  it('does NOT annotate error results', () => {
+    const result = {
+      content: [{ type: 'text', text: 'an error message' }],
+      isError: true,
+    };
+    annotateIncreaseTopNHint(result, [{ aggFn: 'increase' }], 'ServiceName');
+    expect(result.content[0].text).toBe('an error message');
+  });
+
+  it('leaves unparseable text content unchanged', () => {
+    const result = {
+      content: [{ type: 'text', text: 'not json' }],
+      isError: false,
+    };
+    annotateIncreaseTopNHint(result, [{ aggFn: 'increase' }], 'ServiceName');
+    expect(result.content[0].text).toBe('not json');
+  });
+
+  it('is a no-op when content is missing', () => {
+    const result = {} as Parameters<typeof annotateIncreaseTopNHint>[0];
+    expect(() =>
+      annotateIncreaseTopNHint(result, [{ aggFn: 'increase' }], 'ServiceName'),
+    ).not.toThrow();
+  });
+});
+
+describe('assertSourceKindMatchesSelect', () => {
+  it('returns null when metric select runs against a metric source', () => {
+    const source = { kind: 'metric' };
+    const select = [
+      {
+        aggFn: 'avg',
+        metricType: 'gauge',
+        metricName: 'system.cpu.utilization',
+      },
+    ];
+    expect(assertSourceKindMatchesSelect(source, select)).toBeNull();
+  });
+
+  it('returns null when non-metric select runs against a trace source', () => {
+    const source = { kind: 'trace' };
+    const select = [{ aggFn: 'count' }];
+    expect(assertSourceKindMatchesSelect(source, select)).toBeNull();
+  });
+
+  it('errors when metric params target a non-metric source', () => {
+    const source = { kind: 'trace' };
+    const select = [
+      {
+        aggFn: 'avg',
+        metricType: 'gauge',
+        metricName: 'system.cpu.utilization',
+      },
+    ];
+    const result = assertSourceKindMatchesSelect(source, select);
+    expect(result?.isError).toBe(true);
+    expect(result?.content[0].text).toMatch(/not metric/);
+    expect(result?.content[0].text).toMatch(/clickstack_list_sources/);
+  });
+
+  it('errors when a metric source receives select items without metricType', () => {
+    const source = { kind: 'metric' };
+    const select = [{ aggFn: 'avg', valueExpression: 'Value' }];
+    const result = assertSourceKindMatchesSelect(source, select);
+    expect(result?.isError).toBe(true);
+    expect(result?.content[0].text).toMatch(/metricType \+ metricName/);
+    expect(result?.content[0].text).toMatch(/clickstack_describe_source/);
+  });
+
+  it('counts only items whose metricType is a non-empty string', () => {
+    const source = { kind: 'trace' };
+    // Items with empty/missing metricType should not trip the check
+    expect(
+      assertSourceKindMatchesSelect(source, [
+        { aggFn: 'count' },
+        { aggFn: 'avg', valueExpression: 'Duration', metricType: '' },
+      ]),
+    ).toBeNull();
+  });
+
+  it('returns null for a raw string select', () => {
+    // Raw-string selects bypass the metric-annotation check; the
+    // renderer handles them directly.
+    expect(
+      assertSourceKindMatchesSelect({ kind: 'metric' }, 'count()'),
+    ).toBeNull();
+  });
+
+  it('returns null when select is not an array', () => {
+    expect(
+      assertSourceKindMatchesSelect({ kind: 'trace' }, undefined),
+    ).toBeNull();
+    expect(assertSourceKindMatchesSelect({ kind: 'trace' }, null)).toBeNull();
+  });
 });
diff --git a/packages/api/src/mcp/__tests__/queryTool.test.ts b/packages/api/src/mcp/__tests__/queryTool.test.ts
index 725e55047c..175411e181 100644
--- a/packages/api/src/mcp/__tests__/queryTool.test.ts
+++ b/packages/api/src/mcp/__tests__/queryTool.test.ts
@@ -1,10 +1,13 @@
 import { ClickhouseClient } from '@hyperdx/common-utils/dist/clickhouse/node';
-import { SourceKind } from '@hyperdx/common-utils/dist/types';
+import { MetricsDataType, SourceKind } from '@hyperdx/common-utils/dist/types';
 import { Client } from '@modelcontextprotocol/sdk/client/index.js';
 
 import * as config from '@/config';
 import {
   bulkInsertLogs,
+  bulkInsertMetricsGauge,
+  bulkInsertMetricsHistogram,
+  bulkInsertMetricsSum,
   DEFAULT_DATABASE,
   DEFAULT_LOGS_TABLE,
   DEFAULT_TRACES_TABLE,
@@ -802,4 +805,227 @@ describe('MCP Query Tools', () => {
       });
     });
   });
+
+  // ── Metric source query coverage ─────────────────────────────────────────
+
+  describe('Metric sources (timeseries + table)', () => {
+    const createMetricSource = () =>
+      Source.create({
+        kind: SourceKind.Metric,
+        team: team._id,
+        from: { databaseName: DEFAULT_DATABASE, tableName: '' },
+        metricTables: {
+          [MetricsDataType.Gauge.toLowerCase()]: 'otel_metrics_gauge',
+          [MetricsDataType.Sum.toLowerCase()]: 'otel_metrics_sum',
+          [MetricsDataType.Histogram.toLowerCase()]: 'otel_metrics_histogram',
+        },
+        timestampValueExpression: 'TimeUnix',
+        connection: connection._id,
+        name: 'Metrics',
+      });
+
+    const seedGauge = async (now: Date) => {
+      await bulkInsertMetricsGauge([
+        {
+          MetricName: 'system.cpu.utilization',
+          ResourceAttributes: { 'service.name': 'svc-a' },
+          ServiceName: 'svc-a',
+          TimeUnix: now,
+          Value: 0.42,
+        },
+        {
+          MetricName: 'system.cpu.utilization',
+          ResourceAttributes: { 'service.name': 'svc-a' },
+          ServiceName: 'svc-a',
+          TimeUnix: new Date(now.getTime() + 1000),
+          Value: 0.61,
+        },
+      ]);
+    };
+
+    const seedSum = async (now: Date) => {
+      await bulkInsertMetricsSum([
+        {
+          MetricName: 'http.server.request.count',
+          AggregationTemporality: 1, // DELTA
+          IsMonotonic: true,
+          ResourceAttributes: { 'service.name': 'svc-a' },
+          ServiceName: 'svc-a',
+          TimeUnix: now,
+          Value: 100,
+        },
+        {
+          MetricName: 'http.server.request.count',
+          AggregationTemporality: 1,
+          IsMonotonic: true,
+          ResourceAttributes: { 'service.name': 'svc-a' },
+          ServiceName: 'svc-a',
+          TimeUnix: new Date(now.getTime() + 1000),
+          Value: 25,
+        },
+      ]);
+    };
+
+    const seedHistogram = async (now: Date) => {
+      await bulkInsertMetricsHistogram([
+        {
+          MetricName: 'http.server.request.duration',
+          ResourceAttributes: { 'service.name': 'svc-a' },
+          TimeUnix: now,
+          BucketCounts: [1, 2, 3, 4],
+          ExplicitBounds: [10, 100, 1000],
+          AggregationTemporality: 1,
+        },
+      ]);
+    };
+
+    it('runs clickstack_timeseries against a gauge metric source', async () => {
+      const metricSource = await createMetricSource();
+      const now = new Date();
+      await seedGauge(now);
+
+      const result = await callTool(client, 'clickstack_timeseries', {
+        sourceId: metricSource._id.toString(),
+        select: [
+          {
+            aggFn: 'avg',
+            metricType: 'gauge',
+            metricName: 'system.cpu.utilization',
+            alias: 'cpu',
+          },
+        ],
+        startTime: new Date(now.getTime() - 60_000).toISOString(),
+        endTime: new Date(now.getTime() + 60_000).toISOString(),
+        granularity: '1 minute',
+      });
+
+      expect(result.isError).toBeFalsy();
+      // No "Both table name and UUID are empty" error from the renderer.
+      expect(getFirstText(result)).not.toMatch(/UUID are empty/);
+    });
+
+    it('runs clickstack_table aggFn:"increase" against a sum metric source', async () => {
+      const metricSource = await createMetricSource();
+      const now = new Date();
+      await seedSum(now);
+
+      const result = await callTool(client, 'clickstack_table', {
+        sourceId: metricSource._id.toString(),
+        select: [
+          {
+            aggFn: 'increase',
+            metricType: 'sum',
+            metricName: 'http.server.request.count',
+            alias: 'requests',
+          },
+        ],
+        startTime: new Date(now.getTime() - 60_000).toISOString(),
+        endTime: new Date(now.getTime() + 60_000).toISOString(),
+      });
+
+      expect(result.isError).toBeFalsy();
+      expect(getFirstText(result)).not.toMatch(/UUID are empty/);
+    });
+
+    it('runs clickstack_timeseries quantile against a histogram metric source', async () => {
+      const metricSource = await createMetricSource();
+      const now = new Date();
+      await seedHistogram(now);
+
+      const result = await callTool(client, 'clickstack_timeseries', {
+        sourceId: metricSource._id.toString(),
+        select: [
+          {
+            aggFn: 'quantile',
+            level: 0.95,
+            metricType: 'histogram',
+            metricName: 'http.server.request.duration',
+            alias: 'p95',
+          },
+        ],
+        startTime: new Date(now.getTime() - 60_000).toISOString(),
+        endTime: new Date(now.getTime() + 60_000).toISOString(),
+        granularity: '1 minute',
+      });
+
+      expect(result.isError).toBeFalsy();
+    });
+
+    it('rejects aggFn:"increase" on a gauge select item with a friendly schema error', async () => {
+      const metricSource = await createMetricSource();
+      const result = await callTool(client, 'clickstack_timeseries', {
+        sourceId: metricSource._id.toString(),
+        select: [
+          {
+            aggFn: 'increase',
+            metricType: 'gauge',
+            metricName: 'system.cpu.utilization',
+          },
+        ],
+      });
+      expect(result.isError).toBe(true);
+      expect(getFirstText(result)).toMatch(/"increase" is only valid for sum/);
+    });
+
+    it('rejects aggFn:"quantile" on a histogram without level', async () => {
+      const metricSource = await createMetricSource();
+      const result = await callTool(client, 'clickstack_timeseries', {
+        sourceId: metricSource._id.toString(),
+        select: [
+          {
+            aggFn: 'quantile',
+            metricType: 'histogram',
+            metricName: 'http.server.request.duration',
+          },
+        ],
+      });
+      expect(result.isError).toBe(true);
+      expect(getFirstText(result)).toMatch(/level is required/);
+    });
+
+    it('rejects aggFn:"avg" on a histogram select item', async () => {
+      const metricSource = await createMetricSource();
+      const result = await callTool(client, 'clickstack_timeseries', {
+        sourceId: metricSource._id.toString(),
+        select: [
+          {
+            aggFn: 'avg',
+            metricType: 'histogram',
+            metricName: 'http.server.request.duration',
+          },
+        ],
+      });
+      expect(result.isError).toBe(true);
+      expect(getFirstText(result)).toMatch(
+        /Histogram metrics only support aggFn/,
+      );
+    });
+
+    it('rejects metricType without metricName', async () => {
+      const metricSource = await createMetricSource();
+      const result = await callTool(client, 'clickstack_timeseries', {
+        sourceId: metricSource._id.toString(),
+        select: [{ aggFn: 'avg', metricType: 'gauge' }],
+      });
+      expect(result.isError).toBe(true);
+      expect(getFirstText(result)).toMatch(/metricName is required/);
+    });
+
+    it('rejects isDelta on a non-gauge metric', async () => {
+      const metricSource = await createMetricSource();
+      const result = await callTool(client, 'clickstack_timeseries', {
+        sourceId: metricSource._id.toString(),
+        select: [
+          {
+            aggFn: 'increase',
+            metricType: 'sum',
+            metricName: 'http.server.request.count',
+            isDelta: true,
+          },
+        ],
+      });
+      expect(result.isError).toBe(true);
+      expect(getFirstText(result)).toMatch(/isDelta is only valid for gauge/);
+    });
+  });
 });
diff --git a/packages/api/src/mcp/__tests__/sources.test.ts b/packages/api/src/mcp/__tests__/sources.test.ts
index 008e4fe852..8acee4dfed 100644
--- a/packages/api/src/mcp/__tests__/sources.test.ts
+++ b/packages/api/src/mcp/__tests__/sources.test.ts
@@ -3,6 +3,9 @@ import { Client } from '@modelcontextprotocol/sdk/client/index.js';
 
 import * as config from '@/config';
 import {
+  bulkInsertMetricsGauge,
+  bulkInsertMetricsHistogram,
+  bulkInsertMetricsSum,
   DEFAULT_DATABASE,
   DEFAULT_LOGS_TABLE,
   DEFAULT_TRACES_TABLE,
@@ -249,7 +252,7 @@ describe('MCP Source Tools', () => {
       expect(output.source.keyColumns).toHaveProperty('spanId');
     });
 
-    it('should return metricTables for a metric source (no column schema)', async () => {
+    it('should return metricTables AND column schema for a metric source', async () => {
       const metricSource = await Source.create({
         kind: SourceKind.Metric,
         team: team._id,
@@ -280,13 +283,34 @@ describe('MCP Source Tools', () => {
         kind: SourceKind.Metric,
       });
 
-      // Metric sources have metricTables but no column schema or value samples
+      // Metric sources surface metricTables AND run column / map-key
+      // discovery against the representative metric table (gauge picked
+      // first by pickRepresentativeMetricTable).
       expect(output.source.metricTables).toBeDefined();
       expect(output.source.metricTables).toHaveProperty('gauge');
-      expect(output.source.columns).toBeUndefined();
-      expect(output.source.lowCardinalityValues).toBeUndefined();
-      expect(output.source.mapAttributeKeys).toBeUndefined();
-      expect(output.source.mapAttributeValues).toBeUndefined();
+      // metricTables should only contain valid kind keys — not a stray
+      // Mongoose `_id` from the embedded subdoc.
+      expect(output.source.metricTables).not.toHaveProperty('_id');
+      expect(Object.keys(output.source.metricTables).sort()).toEqual(
+        ['gauge', 'histogram', 'sum'].sort(),
+      );
+      expect(output.source.discoveryMetricKind).toBe('gauge');
+      expect(output.source.columns).toBeDefined();
+      expect(Array.isArray(output.source.columns)).toBe(true);
+      // The OTel Collector gauge schema includes MetricName + Value
+      // as native columns.
+      const columnNames = output.source.columns.map(
+        (c: { name: string }) => c.name,
+      );
+      expect(columnNames).toContain('MetricName');
+      expect(columnNames).toContain('Value');
+
+      // nextSteps points at the new metric discovery tools.
+      expect(output.nextSteps.query).toContain('metricType');
+      expect(output.nextSteps.discovery).toContain('clickstack_list_metrics');
+      expect(output.nextSteps.discovery).toContain(
+        'clickstack_describe_metric',
+      );
     });
 
     it('should include usage guidance and nextSteps', async () => {
@@ -338,4 +362,513 @@ describe('MCP Source Tools', () => {
       expect(getFirstText(result)).toContain('not found');
     });
   });
+
+  // ── clickstack_list_metrics ──────────────────────────────────────────────
+
+  describe('clickstack_list_metrics', () => {
+    const createMetricSource = () =>
+      Source.create({
+        kind: SourceKind.Metric,
+        team: team._id,
+        from: { databaseName: DEFAULT_DATABASE, tableName: '' },
+        metricTables: {
+          [MetricsDataType.Gauge.toLowerCase()]: 'otel_metrics_gauge',
+          [MetricsDataType.Sum.toLowerCase()]: 'otel_metrics_sum',
+          [MetricsDataType.Histogram.toLowerCase()]: 'otel_metrics_histogram',
+        },
+        timestampValueExpression: 'TimeUnix',
+        connection: connection._id,
+        name: 'Metrics',
+      });
+
+    const seedMetricNames = async () => {
+      const now = new Date();
+      await bulkInsertMetricsGauge([
+        {
+          MetricName: 'system.cpu.utilization',
+          ResourceAttributes: { 'service.name': 'svc-a' },
+          ServiceName: 'svc-a',
+          TimeUnix: now,
+          Value: 0.42,
+        },
+        {
+          MetricName: 'system.memory.usage',
+          ResourceAttributes: { 'service.name': 'svc-a' },
+          ServiceName: 'svc-a',
+          TimeUnix: now,
+          Value: 12345,
+        },
+      ]);
+      await bulkInsertMetricsSum([
+        {
+          MetricName: 'http.server.request.count',
+          AggregationTemporality: 1,
+          IsMonotonic: true,
+          ResourceAttributes: { 'service.name': 'svc-a' },
+          ServiceName: 'svc-a',
+          TimeUnix: now,
+          Value: 100,
+        },
+      ]);
+      await bulkInsertMetricsHistogram([
+        {
+          MetricName: 'http.server.request.duration',
+          ResourceAttributes: { 'service.name': 'svc-a' },
+          TimeUnix: now,
+          BucketCounts: [1, 2, 3],
+          ExplicitBounds: [10, 100, 1000],
+          AggregationTemporality: 1,
+        },
+      ]);
+    };
+
+    it('rejects non-metric sources with a friendly error', async () => {
+      const result = await callTool(client, 'clickstack_list_metrics', {
+        sourceId: traceSource._id.toString(),
+      });
+      expect(result.isError).toBe(true);
+      expect(getFirstText(result)).toContain('not a metric source');
+    });
+
+    it('returns 404 for unknown source IDs', async () => {
+      const result = await callTool(client, 'clickstack_list_metrics', {
+        sourceId: '000000000000000000000000',
+      });
+      expect(result.isError).toBe(true);
+      expect(getFirstText(result)).toContain('not found');
+    });
+
+    it('returns metrics across all populated kinds when kind is omitted', async () => {
+      const metricSource = await createMetricSource();
+      await seedMetricNames();
+
+      const result = await callTool(client, 'clickstack_list_metrics', {
+        sourceId: metricSource._id.toString(),
+      });
+
+      expect(result.isError).toBeFalsy();
+      const output = JSON.parse(getFirstText(result));
+      const namesByKind: Record<string, string[]> = {};
+      for (const entry of output.metrics ?? []) {
+        if (!namesByKind[entry.kind]) namesByKind[entry.kind] = [];
+        namesByKind[entry.kind].push(entry.name);
+      }
+      expect(namesByKind.gauge ?? []).toEqual(
+        expect.arrayContaining([
+          'system.cpu.utilization',
+          'system.memory.usage',
+        ]),
+      );
+      expect(namesByKind.sum ?? []).toEqual(
+        expect.arrayContaining(['http.server.request.count']),
+      );
+      expect(namesByKind.histogram ?? []).toEqual(
+        expect.arrayContaining(['http.server.request.duration']),
+      );
+    });
+
+    it('restricts results to a single kind when kind is set', async () => {
+      const metricSource = await createMetricSource();
+      await seedMetricNames();
+
+      const result = await callTool(client, 'clickstack_list_metrics', {
+        sourceId: metricSource._id.toString(),
+        kind: 'gauge',
+      });
+      expect(result.isError).toBeFalsy();
+      const output = JSON.parse(getFirstText(result));
+      for (const entry of output.metrics ?? []) {
+        expect(entry.kind).toBe('gauge');
+      }
+    });
+
+    it('applies namePattern as a server-side ILIKE filter', async () => {
+      const metricSource = await createMetricSource();
+      await seedMetricNames();
+
+      const result = await callTool(client, 'clickstack_list_metrics', {
+        sourceId: metricSource._id.toString(),
+        namePattern: 'system.cpu.%',
+      });
+      const output = JSON.parse(getFirstText(result));
+      for (const entry of output.metrics ?? []) {
+        expect(entry.name).toMatch(/^system\.cpu\./);
+      }
+    });
+
+    it('paginates via opaque nextCursor and resumes cleanly', async () => {
+      const metricSource = await createMetricSource();
+      await seedMetricNames();
+
+      // First page: only ask for one entry so the cap is forced even with
+      // a small seed; sanity-check the cursor round-trip.
+      const first = await callTool(client, 'clickstack_list_metrics', {
+        sourceId: metricSource._id.toString(),
+        kind: 'gauge',
+        limit: 1,
+      });
+      const firstOutput = JSON.parse(getFirstText(first));
+      expect(firstOutput.metrics.length).toBe(1);
+      expect(typeof firstOutput.nextCursor).toBe('string');
+
+      const second = await callTool(client, 'clickstack_list_metrics', {
+        sourceId: metricSource._id.toString(),
+        kind: 'gauge',
+        limit: 5,
+        cursor: firstOutput.nextCursor,
+      });
+      expect(second.isError).toBeFalsy();
+      const secondOutput = JSON.parse(getFirstText(second));
+      // The second page must not repeat the first page's last name.
+      const firstNames = new Set(
+        (firstOutput.metrics as { name: string }[]).map(m => m.name),
+      );
+      for (const entry of secondOutput.metrics as { name: string }[]) {
+        expect(firstNames.has(entry.name)).toBe(false);
+      }
+    });
+
+    it('rejects a malformed cursor with an actionable error', async () => {
+      const metricSource = await createMetricSource();
+      const result = await callTool(client, 'clickstack_list_metrics', {
+        sourceId: metricSource._id.toString(),
+        cursor: 'not-base64!',
+      });
+      expect(result.isError).toBe(true);
+      expect(getFirstText(result)).toMatch(/Invalid cursor/);
+    });
+
+    it('returns an empty-result hint when nothing matches', async () => {
+      const metricSource = await createMetricSource();
+      const result = await callTool(client, 'clickstack_list_metrics', {
+        sourceId: metricSource._id.toString(),
+        namePattern: 'this.name.does.not.exist.%',
+      });
+      expect(result.isError).toBeFalsy();
+      const output = JSON.parse(getFirstText(result));
+      expect(output.metrics).toEqual([]);
+      expect(output.hint).toMatch(/widening|removing|omitting/);
+    });
+
+    it('surfaces partialFailure instead of the empty hint when a kind fetch fails', async () => {
+      // Point the gauge kind at the logs table: it exists (so source
+      // resolution succeeds) but has no MetricName/TimeUnix columns, so
+      // the per-kind listing query throws.
+      const brokenSource = await Source.create({
+        kind: SourceKind.Metric,
+        team: team._id,
+        from: { databaseName: DEFAULT_DATABASE, tableName: '' },
+        metricTables: {
+          [MetricsDataType.Gauge.toLowerCase()]: DEFAULT_LOGS_TABLE,
+        },
+        timestampValueExpression: 'TimeUnix',
+        connection: connection._id,
+        name: 'BrokenMetrics',
+      });
+      const result = await callTool(client, 'clickstack_list_metrics', {
+        sourceId: brokenSource._id.toString(),
+      });
+      expect(result.isError).toBeFalsy();
+      const output = JSON.parse(getFirstText(result));
+      expect(output.metrics).toEqual([]);
+      expect(output.partialFailure).toHaveLength(1);
+      expect(output.partialFailure[0].kind).toBe('gauge');
+      expect(output.partialFailure[0].error).toBeTruthy();
+      // The misleading "No metrics matched … widen the window" hint must
+      // NOT appear — the agent should retry, not widen.
+      expect(output.hint).not.toMatch(/No metrics matched/);
+    });
+  });
+
+  // ── clickstack_describe_metric ───────────────────────────────────────────
+
+  describe('clickstack_describe_metric', () => {
+    const createMetricSource = () =>
+      Source.create({
+        kind: SourceKind.Metric,
+        team: team._id,
+        from: { databaseName: DEFAULT_DATABASE, tableName: '' },
+        metricTables: {
+          [MetricsDataType.Gauge.toLowerCase()]: 'otel_metrics_gauge',
+          [MetricsDataType.Sum.toLowerCase()]: 'otel_metrics_sum',
+          [MetricsDataType.Histogram.toLowerCase()]: 'otel_metrics_histogram',
+        },
+        timestampValueExpression: 'TimeUnix',
+        connection: connection._id,
+        name: 'Metrics',
+      });
+
+    it('rejects non-metric sources', async () => {
+      const result = await callTool(client, 'clickstack_describe_metric', {
+        sourceId: traceSource._id.toString(),
+        metricName: 'system.cpu.utilization',
+        kind: 'gauge',
+      });
+      expect(result.isError).toBe(true);
+      expect(getFirstText(result)).toContain('not a metric source');
+    });
+
+    it('rejects calls that omit kind', async () => {
+      // `kind` is required. Calling without it returns an MCP error
+      // surfaced from the schema parser rather than reaching ClickHouse.
+      const metricSource = await createMetricSource();
+      const result = await callTool(client, 'clickstack_describe_metric', {
+        sourceId: metricSource._id.toString(),
+        metricName: 'system.cpu.utilization',
+      });
+      expect(result.isError).toBe(true);
+    });
+
+    it('returns an actionable hint when the metric has no data in the kind table', async () => {
+      const metricSource = await createMetricSource();
+      const result = await callTool(client, 'clickstack_describe_metric', {
+        sourceId: metricSource._id.toString(),
+        metricName: 'definitely.not.a.metric',
+        kind: 'gauge',
+      });
+      expect(result.isError).toBeFalsy();
+      const output = JSON.parse(getFirstText(result));
+      // We always return a single kindDetail for the requested kind —
+      // attribute keys are empty when the metric has no data.
+      expect(output.kinds).toHaveLength(1);
+      expect(output.kinds[0].kind).toBe('gauge');
+      expect(output.kinds[0].attributeKeys).toEqual({});
+      expect(output.hint).toMatch(/No data found/);
+      expect(output.partialFailure).toBeUndefined();
+    });
+
+    it('surfaces partialFailure instead of the no-data hint when discovery fails', async () => {
+      // Point the gauge kind at the logs table: it exists (so getColumns
+      // succeeds) but lacks MetricName/TimeUnix columns, so the
+      // attribute-keys discovery query throws.
+      const brokenSource = await Source.create({
+        kind: SourceKind.Metric,
+        team: team._id,
+        from: { databaseName: DEFAULT_DATABASE, tableName: '' },
+        metricTables: {
+          [MetricsDataType.Gauge.toLowerCase()]: DEFAULT_LOGS_TABLE,
+        },
+        timestampValueExpression: 'TimeUnix',
+        connection: connection._id,
+        name: 'BrokenMetrics',
+      });
+      const result = await callTool(client, 'clickstack_describe_metric', {
+        sourceId: brokenSource._id.toString(),
+        metricName: 'whatever',
+        kind: 'gauge',
+      });
+      expect(result.isError).toBeFalsy();
+      const output = JSON.parse(getFirstText(result));
+      expect(output.partialFailure).toBeDefined();
+      expect(
+        output.partialFailure.map((f: { stage: string }) => f.stage),
+      ).toContain('attributeKeys');
+      // The misleading "No data found … widen startTime/endTime" hint
+      // must NOT appear — the fetch failed; widening would not help.
+      expect(output.hint).not.toMatch(/No data found/);
+    });
+
+    it('returns attribute keys for a gauge metric when kind is specified', async () => {
+      const metricSource = await createMetricSource();
+      const now = new Date();
+      await bulkInsertMetricsGauge([
+        {
+          MetricName: 'system.cpu.utilization',
+          ResourceAttributes: { 'service.name': 'auto-detect-svc' },
+          ServiceName: 'auto-detect-svc',
+          TimeUnix: now,
+          Value: 0.5,
+        },
+      ]);
+
+      const result = await callTool(client, 'clickstack_describe_metric', {
+        sourceId: metricSource._id.toString(),
+        metricName: 'system.cpu.utilization',
+        kind: 'gauge',
+      });
+      expect(result.isError).toBeFalsy();
+      const output = JSON.parse(getFirstText(result));
+      expect(output.metricName).toBe('system.cpu.utilization');
+      expect(output.kinds.length).toBe(1);
+      const detail = output.kinds[0];
+      expect(detail.kind).toBe('gauge');
+      expect(detail.usage).toContain('aggFn');
+      expect(detail.attributeKeys).toBeDefined();
+      // ResourceAttributes['service.name'] should land in the keys map
+      // under ResourceAttributes.
+      expect(detail.attributeKeys.ResourceAttributes ?? []).toEqual(
+        expect.arrayContaining(['service.name']),
+      );
+      // Attribute values should be sampled by default.
+      expect(detail.attributeValues).toBeDefined();
+      // nextSteps.query carries a worked example matching the requested kind.
+      expect(output.nextSteps.query).toContain('metricType: "gauge"');
+    });
+
+    it('reports sampledKeys and truncatedKeys so unsampled keys are distinguishable', async () => {
+      // 15 attribute keys > MAX_ATTR_KEYS_TO_SAMPLE (12): the overflow
+      // must land in truncatedKeys so the agent knows those keys were
+      // never queried (vs. sampled-but-empty).
+      const metricSource = await createMetricSource();
+      const manyAttrs: Record<string, string> = {};
+      for (let i = 0; i < 15; i++) {
+        manyAttrs[`attr.key.${String(i).padStart(2, '0')}`] = `value-${i}`;
+      }
+      await bulkInsertMetricsGauge([
+        {
+          MetricName: 'many.attrs.metric',
+          ResourceAttributes: manyAttrs,
+          ServiceName: 'many-attrs-svc',
+          TimeUnix: new Date(),
+          Value: 1,
+        },
+      ]);
+
+      const result = await callTool(client, 'clickstack_describe_metric', {
+        sourceId: metricSource._id.toString(),
+        metricName: 'many.attrs.metric',
+        kind: 'gauge',
+      });
+      expect(result.isError).toBeFalsy();
+      const output = JSON.parse(getFirstText(result));
+      const detail = output.kinds[0];
+      const meta = detail.attributeValuesMeta;
+      expect(meta).toBeDefined();
+      expect(meta.sampledKeys).toHaveLength(12);
+      expect(meta.truncatedKeys).toHaveLength(3);
+      // Sampled and truncated sets are disjoint and cover all 15 keys.
+      expect(new Set([...meta.sampledKeys, ...meta.truncatedKeys]).size).toBe(
+        15,
+      );
+      // Every key with values must have been sampled.
+      for (const key of Object.keys(detail.attributeValues ?? {})) {
+        expect(meta.sampledKeys).toContain(key);
+      }
+    });
+
+    it('reports empty truncatedKeys when all attribute keys fit the cap', async () => {
+      const metricSource = await createMetricSource();
+      await bulkInsertMetricsGauge([
+        {
+          MetricName: 'few.attrs.metric',
+          ResourceAttributes: { 'service.name': 'few-attrs-svc' },
+          ServiceName: 'few-attrs-svc',
+          TimeUnix: new Date(),
+          Value: 1,
+        },
+      ]);
+
+      const result = await callTool(client, 'clickstack_describe_metric', {
+        sourceId: metricSource._id.toString(),
+        metricName: 'few.attrs.metric',
+        kind: 'gauge',
+      });
+      expect(result.isError).toBeFalsy();
+      const output = JSON.parse(getFirstText(result));
+      const meta = output.kinds[0].attributeValuesMeta;
+      expect(meta).toBeDefined();
+      expect(meta.truncatedKeys).toEqual([]);
+      expect(meta.sampledKeys).toContain("ResourceAttributes['service.name']");
+    });
+
+    it('skips value sampling when sampleValues is false', async () => {
+      const metricSource = await createMetricSource();
+      const now = new Date();
+      await bulkInsertMetricsGauge([
+        {
+          MetricName: 'system.cpu.utilization',
+          ResourceAttributes: { 'service.name': 'no-sample-svc' },
+          ServiceName: 'no-sample-svc',
+          TimeUnix: now,
+          Value: 0.5,
+        },
+      ]);
+
+      const result = await callTool(client, 'clickstack_describe_metric', {
+        sourceId: metricSource._id.toString(),
+        metricName: 'system.cpu.utilization',
+        kind: 'gauge',
+        sampleValues: false,
+      });
+      expect(result.isError).toBeFalsy();
+      const output = JSON.parse(getFirstText(result));
+      expect(output.kinds[0].attributeValues).toBeUndefined();
+    });
+
+    it('returns the correct usage example for a sum metric', async () => {
+      const metricSource = await createMetricSource();
+      const now = new Date();
+      await bulkInsertMetricsSum([
+        {
+          MetricName: 'http.server.request.count',
+          AggregationTemporality: 1,
+          IsMonotonic: true,
+          ResourceAttributes: { 'service.name': 'sum-svc' },
+          ServiceName: 'sum-svc',
+          TimeUnix: now,
+          Value: 42,
+        },
+      ]);
+
+      const result = await callTool(client, 'clickstack_describe_metric', {
+        sourceId: metricSource._id.toString(),
+        metricName: 'http.server.request.count',
+        kind: 'sum',
+      });
+      expect(result.isError).toBeFalsy();
+      const output = JSON.parse(getFirstText(result));
+      expect(output.kinds[0].kind).toBe('sum');
+      expect(output.kinds[0].usage).toContain('increase');
+      expect(output.nextSteps.query).toContain('"increase"');
+    });
+
+    it('returns the quantile + level example for a histogram metric', async () => {
+      const metricSource = await createMetricSource();
+      const now = new Date();
+      await bulkInsertMetricsHistogram([
+        {
+          MetricName: 'http.server.request.duration',
+          ResourceAttributes: { 'service.name': 'hist-svc' },
+          TimeUnix: now,
+          BucketCounts: [1, 2, 3],
+          ExplicitBounds: [10, 100, 1000],
+          AggregationTemporality: 1,
+        },
+      ]);
+
+      const result = await callTool(client, 'clickstack_describe_metric', {
+        sourceId: metricSource._id.toString(),
+        metricName: 'http.server.request.duration',
+        kind: 'histogram',
+      });
+      expect(result.isError).toBeFalsy();
+      const output = JSON.parse(getFirstText(result));
+      expect(output.kinds[0].kind).toBe('histogram');
+      expect(output.kinds[0].usage).toContain('quantile');
+      expect(output.nextSteps.query).toContain('"quantile"');
+      expect(output.nextSteps.query).toContain('level: 0.95');
+    });
+
+    it('rejects explicit kind when the source has no table for that kind', async () => {
+      // Source with only gauge populated.
+      const gaugeOnly = await Source.create({
+        kind: SourceKind.Metric,
+        team: team._id,
+        from: { databaseName: DEFAULT_DATABASE, tableName: '' },
+        metricTables: {
+          [MetricsDataType.Gauge.toLowerCase()]: 'otel_metrics_gauge',
+        },
+        timestampValueExpression: 'TimeUnix',
+        connection: connection._id,
+        name: 'GaugeOnly',
+      });
+      const result = await callTool(client, 'clickstack_describe_metric', {
+        sourceId: gaugeOnly._id.toString(),
+        metricName: 'whatever',
+        kind: 'sum',
+      });
+      expect(result.isError).toBe(true);
+      expect(getFirstText(result)).toMatch(/no "sum" metric table/);
+    });
+  });
 });
diff --git a/packages/api/src/mcp/prompts/dashboards/content.ts b/packages/api/src/mcp/prompts/dashboards/content.ts
index a79fde71ab..3293c26611 100644
--- a/packages/api/src/mcp/prompts/dashboards/content.ts
+++ b/packages/api/src/mcp/prompts/dashboards/content.ts
@@ -124,7 +124,7 @@ Apply these before calling clickstack_save_dashboard. Each rule is enforced by t
      ]
    Use tabs when one container needs to show different views of the same data (Throughput / Latency / Errors over time, for instance); the tab bar appears when a container has two or more tabs declared.
 
-12. VALIDATE EVERY TILE AFTER SAVE. After clickstack_save_dashboard, call clickstack_query_tile on EVERY tile (not just one). Save validates input shape; it does NOT validate query semantics. Some queries pass save and fail at render time (known gaps: Lucene comparison/wildcard on map attributes, metric tiles with multiple metricTables, malformed having clauses). If query_tile returns an error, fix the tile and re-save before declaring the dashboard ready.
+12. VALIDATE EVERY TILE AFTER SAVE. After clickstack_save_dashboard, call clickstack_query_tile on EVERY tile (not just one). Save validates input shape; it does NOT validate query semantics. Some queries pass save and fail at render time (known gaps: Lucene comparison/wildcard on map attributes, malformed having clauses). If query_tile returns an error, fix the tile and re-save before declaring the dashboard ready.
 
 13. NO TITLE-RECAP MARKDOWN TILE. The dashboard's name shows in the title bar. Adding a markdown tile with the dashboard name (or a "About this dashboard" header) doubles the title and eats a row of vertical space because markdown heading styles render at title-bar scale. Skip the markdown tile entirely on starter dashboards.
 
@@ -958,7 +958,29 @@ For configType: "sql" tiles, write ClickHouse SQL with template macros:
   search       No select items (select is a column list string). where is the filter.
   markdown     No select items. Set markdown field with content.
 
-NOTE: Authoring builder tiles on a metric source is not reliable today. The MCP select-item shape does not carry the metricName / metricType fields the metric query path needs, and a save with a metric sourceId may render in the UI as "Both table name and UUID are empty" even though the save itself succeeded. For metrics, use a raw SQL tile (configType: "sql") with explicit table reference. The standard tables that back a metric source are otel_metrics_gauge, otel_metrics_sum, and otel_metrics_histogram; clickstack_list_sources returns the metric source's metricTables map so you know which table holds which metric kind. Discovery: metric source schemas today do NOT publish mapAttributeKeys for ResourceAttributes / Attributes the way log and trace sources do, so attribute keys must be discovered by sampling (SELECT DISTINCT mapKeys(Attributes) FROM ...).
+== METRIC SOURCES ==
+
+Builder tiles work on metric sources. Each select item on a metric tile MUST set metricType ("gauge" | "sum" | "histogram") and metricName (the OTel metric name, e.g. "system.cpu.utilization"). valueExpression defaults to "Value" when omitted, so a typical metric series is { aggFn: "<fn>", metricType: "<kind>", metricName: "<name>" }. summary and "exponential histogram" kinds are not yet supported by the renderer.
+
+Per-kind aggregation guidance:
+  gauge      Use aggFn:"last_value" | "avg" | "min" | "max". Set isDelta:true for Prometheus-style delta over each bucket.
+  sum        Use aggFn:"increase" for the per-bucket counter increase (reset-aware), or aggFn:"sum" | "avg" on the computed rate. increase + groupBy is capped at the top 20 groups by the renderer; pre-filter via where or pick a coarser groupBy when you need broader coverage.
+  histogram  Use aggFn:"quantile" with level ∈ {0.5, 0.9, 0.95, 0.99} for percentiles, or aggFn:"count" for the total bucket count. quantile without level is rejected.
+
+Discovery workflow for metrics:
+  1. clickstack_list_sources: find the metric source ID and its metricTables map (which kinds are populated).
+  2. clickstack_describe_source(sourceId): returns columns, attribute keys, low-cardinality values, AND a per-kind metric-name sample (up to 20 names per kind).
+  3. clickstack_list_metrics(sourceId, ...): paginate the full metric catalog with optional kind + namePattern (ILIKE) + time-window filters. Pass nextCursor unchanged for the next page.
+  4. clickstack_describe_metric(sourceId, metricName): drill into a single metric: kind(s), unit, description, attribute keys per map column, and sampled values per attribute. Attribute keys vary per metric, not per source, so always call this before authoring tiles for a metric you've never queried.
+  5. clickstack_timeseries | clickstack_table: author the chart. Set metricType + metricName on each select item; pass the discovered attribute keys via groupBy / where.
+
+Examples:
+  Gauge p95-like spread by service (use last_value or avg):
+    { aggFn: "avg", metricType: "gauge", metricName: "system.cpu.utilization" }, groupBy: "ServiceName"
+  Sum counter increase:
+    { aggFn: "increase", metricType: "sum", metricName: "http.server.request.count", alias: "Requests" }, groupBy: "ServiceName"
+  Histogram p95 latency:
+    { aggFn: "quantile", level: 0.95, metricType: "histogram", metricName: "http.server.request.duration", alias: "P95 Latency" }, groupBy: "ServiceName"
 
 == NUMBER FORMAT ==
 
@@ -1244,7 +1266,7 @@ Example: find top patterns for production services over the last 4 hours:
    The dashboard filter applies globally; tiles do not need the literal. On mixed-source dashboards where only some tiles carry the column, add appliesToSourceIds: ["<id>", ...] to scope the filter to just those sources instead of breaking the unrelated tiles.
 
 8. Forgetting to validate tiles after saving
-   Always call clickstack_query_tile on EVERY tile after clickstack_save_dashboard, not just one. Save validates input shape but not query semantics. Several known gaps (Lucene comparison/wildcard on map attributes, builder tiles on metric sources, malformed having) pass save and fail at render time. A dashboard with one bad tile renders the whole page in a degraded state; the user sees "Error loading chart" with no way to know which tile broke unless you validated. If query_tile returns an error, fix the where / SQL and re-save before declaring the dashboard ready.
+   Always call clickstack_query_tile on EVERY tile after clickstack_save_dashboard, not just one. Save validates input shape but not query semantics. Several known gaps (Lucene comparison/wildcard on map attributes, malformed having) pass save and fail at render time. A dashboard with one bad tile renders the whole page in a degraded state; the user sees "Error loading chart" with no way to know which tile broke unless you validated. If query_tile returns an error, fix the where / SQL and re-save before declaring the dashboard ready.
 
 9. Using connectionId with builder tiles, or omitting connectionId or sourceId on a single-table SQL tile
    Builder tiles (line, table, etc.) use sourceId (no connectionId).
diff --git a/packages/api/src/mcp/tools/dashboards/schemas.ts b/packages/api/src/mcp/tools/dashboards/schemas.ts
index e1dbad9674..0f543561ca 100644
--- a/packages/api/src/mcp/tools/dashboards/schemas.ts
+++ b/packages/api/src/mcp/tools/dashboards/schemas.ts
@@ -15,6 +15,17 @@ import { z } from 'zod';
 
 import { externalQuantileLevelSchema, objectIdSchema } from '@/utils/zod';
 
+import { getMetricSelectIssues } from '../query/schemas';
+import { QUERYABLE_METRIC_KINDS } from '../sources/metricKinds';
+
+/**
+ * Metric type values exposed on dashboard tile select items. Restricted to
+ * the three kinds the query renderer can translate today; summary and
+ * exponential histogram are intentionally excluded. Imports the shared
+ * `QUERYABLE_METRIC_KINDS` source-of-truth tuple from `../sources/metricKinds`.
+ */
+const mcpTileMetricTypeSchema = z.enum(QUERYABLE_METRIC_KINDS);
+
 // ─── Shared tile schemas for MCP dashboard tools ─────────────────────────────
 
 const seriesLevelNumberFormatDescription =
@@ -87,7 +98,10 @@ const mcpNumberFormatSchema = z.object({
 const mcpTileSelectItemSchema = z
   .object({
     aggFn: AggregateFunctionSchema.describe(
-      'Aggregation function. "count" requires no valueExpression; all others do.',
+      'Aggregation function. "count" requires no valueExpression; all others do. ' +
+        'METRIC SOURCES: "increase" computes the per-bucket counter increase for Sum metrics ' +
+        '(reset-aware). For Gauges use last_value/avg/min/max. For Histograms use "quantile" ' +
+        'with level or "count".',
     ),
     valueExpression: z
       .string()
@@ -96,7 +110,9 @@ const mcpTileSelectItemSchema = z
         'Column or expression to aggregate. Required for all aggFn except "count". ' +
           'Use PascalCase for top-level columns (e.g. "Duration", "StatusCode"). ' +
           "For span attributes use: SpanAttributes['key'] (e.g. SpanAttributes['http.method']). " +
-          "For resource attributes use: ResourceAttributes['key'] (e.g. ResourceAttributes['service.name']).",
+          "For resource attributes use: ResourceAttributes['key'] (e.g. ResourceAttributes['service.name']).\n\n" +
+          'METRIC SOURCES: optional — defaults to "Value" (the metric value column) when ' +
+          'metricType/metricName are set.',
       ),
     where: z
       .string()
@@ -115,29 +131,53 @@ const mcpTileSelectItemSchema = z
       ),
     level: externalQuantileLevelSchema
       .optional()
-      .describe('Percentile level for aggFn="quantile"'),
+      .describe(
+        'Percentile level for aggFn="quantile". REQUIRED for histogram metrics with aggFn:"quantile".',
+      ),
     numberFormat: mcpNumberFormatSchema
       .optional()
       .describe(seriesLevelNumberFormatDescription),
+    metricType: mcpTileMetricTypeSchema
+      .optional()
+      .describe(
+        'METRIC SOURCES ONLY. OTel metric kind: gauge, sum, or histogram. ' +
+          'Required (with metricName) when the tile sourceId is a metric source. ' +
+          'summary and exponential histogram are not supported by the renderer yet.',
+      ),
+    metricName: z
+      .string()
+      .optional()
+      .describe(
+        'METRIC SOURCES ONLY. OTel metric name (e.g. "system.cpu.utilization"). ' +
+          'Required when metricType is set.',
+      ),
+    isDelta: z
+      .boolean()
+      .optional()
+      .describe(
+        'METRIC SOURCES ONLY (gauge metrics). When true, computes the Prometheus-style ' +
+          'delta over each bucket. Default false.',
+      ),
   })
   .superRefine((data, ctx) => {
-    if (data.level && data.aggFn !== 'quantile') {
-      ctx.addIssue({
-        code: z.ZodIssueCode.custom,
-        message: 'Level can only be used with quantile aggregation function',
-      });
-    }
-    if (data.valueExpression && data.aggFn === 'count') {
-      ctx.addIssue({
-        code: z.ZodIssueCode.custom,
-        message:
-          'Value expression cannot be used with count aggregation function',
-      });
-    } else if (!data.valueExpression && data.aggFn !== 'count') {
+    const narrow = {
+      aggFn: typeof data.aggFn === 'string' ? data.aggFn : undefined,
+      metricType:
+        typeof data.metricType === 'string' ? data.metricType : undefined,
+      metricName:
+        typeof data.metricName === 'string' ? data.metricName : undefined,
+      isDelta: typeof data.isDelta === 'boolean' ? data.isDelta : undefined,
+      level: typeof data.level === 'number' ? data.level : undefined,
+      valueExpression:
+        typeof data.valueExpression === 'string'
+          ? data.valueExpression
+          : undefined,
+    };
+    for (const issue of getMetricSelectIssues(narrow)) {
       ctx.addIssue({
         code: z.ZodIssueCode.custom,
-        message:
-          'Value expression is required for non-count aggregation functions',
+        path: issue.path,
+        message: issue.message,
       });
     }
   });
diff --git a/packages/api/src/mcp/tools/query/helpers.ts b/packages/api/src/mcp/tools/query/helpers.ts
index 87f59c362c..2c466dc0da 100644
--- a/packages/api/src/mcp/tools/query/helpers.ts
+++ b/packages/api/src/mcp/tools/query/helpers.ts
@@ -78,6 +78,90 @@ const MCP_CLICKHOUSE_SETTINGS = {
  */
 const MCP_REQUEST_TIMEOUT = 32_000; // 30s query limit + 2s buffer
 
+// ─── Increase top-N cap hint ────────────────────────────────────────────────
+
+/**
+ * Group limit applied by the metric renderer when `aggFn:"increase"` is
+ * combined with `groupBy`. Mirrors `INCREASE_MAX_NUM_GROUPS` in
+ * `packages/common-utils/src/core/renderChartConfig.ts`. Surfaced to MCP
+ * callers as a hint so they can reason about why high-cardinality groupBys
+ * may be truncated.
+ */
+export const INCREASE_TOP_N_CAP = 20;
+
+/**
+ * Append a hint message onto a parsed tool response body. All hint
+ * writers share `hints: string[]` so multiple advisories can coexist
+ * (e.g. the increase top-N cap and the single-bucket collapse hint can
+ * legitimately both apply to the same result).
+ */
+export function appendHint(parsed: Record<string, unknown>, hint: string) {
+  const existing = Array.isArray(parsed.hints) ? parsed.hints : [];
+  parsed.hints = [...existing, hint];
+}
+
+/**
+ * Count the distinct groupBy combinations present in a result set.
+ * Resolves each comma-separated groupBy segment against the row keys
+ * (the renderer aliases group columns by their literal expression).
+ * Returns null when no segment matches any row key — the caller should
+ * treat that as "count unknown".
+ */
+function countDistinctGroups(
+  data: Array<Record<string, unknown>>,
+  groupBy: string,
+): number | null {
+  const segments = splitAndTrimWithBracket(groupBy);
+  const sample = data[0];
+  if (!sample || typeof sample !== 'object') return null;
+  const resolvable = segments.filter(seg => seg in sample);
+  if (resolvable.length === 0) return null;
+  const seen = new Set<string>();
+  for (const row of data) {
+    seen.add(JSON.stringify(resolvable.map(seg => row[seg])));
+  }
+  return seen.size;
+}
+
+/**
+ * Mutates a successful tool result envelope in place to add a neutral
+ * informational hint when the renderer's increase+groupBy top-N cap
+ * likely applied. Safe to call unconditionally — does nothing for empty
+ * results, error results, queries that don't combine `aggFn:"increase"`
+ * with a non-empty `groupBy`, or results whose distinct group count is
+ * below the cap (no truncation possible).
+ */
+export function annotateIncreaseTopNHint(
+  result: { content?: { type: string; text?: string }[]; isError?: boolean },
+  selectItems: ReadonlyArray<{ aggFn?: string }>,
+  groupBy: string | undefined,
+): void {
+  if (result.isError) return;
+  const first = result.content?.[0];
+  if (first?.type !== 'text' || typeof first.text !== 'string') return;
+  if (!groupBy || groupBy.trim() === '') return;
+  if (!selectItems.some(s => s.aggFn === 'increase')) return;
+  try {
+    const parsed = JSON.parse(first.text);
+    const data = parsed?.result?.data;
+    if (!Array.isArray(data) || data.length === 0) return;
+    // Only warn when the result actually carries enough distinct groups
+    // to have hit the renderer cap. When the group column cannot be
+    // resolved from the rows, skip the hint rather than crying wolf.
+    const distinctGroups = countDistinctGroups(data, groupBy);
+    if (distinctGroups === null || distinctGroups < INCREASE_TOP_N_CAP) return;
+    appendHint(
+      parsed,
+      `aggFn:"increase" combined with groupBy is capped at the top ${INCREASE_TOP_N_CAP} ` +
+        `groups by max bucket sum at the renderer layer. ` +
+        `Results may not include every group present in the underlying data.`,
+    );
+    first.text = JSON.stringify(parsed, null, 2);
+  } catch {
+    // leave result unmodified on parse failure
+  }
+}
+
 // ─── Where merging ───────────────────────────────────────────────────────────
 
 export interface MergeWhereResult<T> {
@@ -210,7 +294,9 @@ function formatQueryResult(result: unknown) {
               : {}),
             ...(empty
               ? {
-                  hint: 'No data found in the queried time range. Try setting startTime to a wider window (e.g. 24 hours ago) or check that filters match existing data.',
+                  hints: [
+                    'No data found in the queried time range. Try setting startTime to a wider window (e.g. 24 hours ago) or check that filters match existing data.',
+                  ],
                 }
               : {}),
           },
@@ -222,6 +308,80 @@ function formatQueryResult(result: unknown) {
   };
 }
 
+// ─── Source-kind / select-shape guardrail ────────────────────────────────────
+
+type SelectItemForKindCheck = { metricType?: unknown };
+
+/**
+ * Reject builder-config queries where the select items' metric annotations
+ * don't match the source kind:
+ *   - Non-metric source with any select item carrying `metricType` would
+ *     fall through to SQL generation that references the metric `Value`
+ *     column and fail with a cryptic ClickHouse error.
+ *   - Metric source with no select item carrying `metricType` would also
+ *     reach the renderer in a broken state.
+ *
+ * Catching both up-front gives the agent a clear next action that mirrors
+ * the wording on `clickstack_list_metrics` / `clickstack_describe_metric`.
+ *
+ * Returns an error envelope on mismatch, or `null` when the select shape
+ * is consistent with the source kind (or the select is a raw string that
+ * the caller already parsed by hand).
+ */
+export function assertSourceKindMatchesSelect(
+  source: { kind: string },
+  select: unknown,
+): { isError: true; content: [{ type: 'text'; text: string }] } | null {
+  // Raw-string select (rare on the builder path) — the renderer handles
+  // it; no metric annotations to inspect.
+  if (typeof select === 'string') return null;
+  if (!Array.isArray(select)) return null;
+
+  const metricItemCount = (select as SelectItemForKindCheck[]).filter(
+    item =>
+      typeof item === 'object' &&
+      item !== null &&
+      typeof item.metricType === 'string' &&
+      item.metricType.length > 0,
+  ).length;
+
+  const isMetricSource = source.kind === SourceKind.Metric;
+
+  if (isMetricSource && metricItemCount === 0) {
+    return {
+      isError: true as const,
+      content: [
+        {
+          type: 'text' as const,
+          text:
+            'Source kind is "metric", but no select item specifies metricType + metricName. ' +
+            'Each select item on a metric source must set metricType ("gauge" | "sum" | "histogram") ' +
+            'and metricName (e.g. metricName:"system.cpu.utilization"). Call ' +
+            'clickstack_describe_source or clickstack_list_metrics to discover available metric names.',
+        },
+      ],
+    };
+  }
+
+  if (!isMetricSource && metricItemCount > 0) {
+    return {
+      isError: true as const,
+      content: [
+        {
+          type: 'text' as const,
+          text:
+            `Source kind is "${source.kind}", not metric — but ${metricItemCount} select item(s) ` +
+            'set metricType. metricType + metricName only work on metric sources. ' +
+            'Drop the metric fields to query this source, or call clickstack_list_sources to find a ' +
+            'source whose kind is "metric".',
+        },
+      ],
+    };
+  }
+
+  return null;
+}
+
 // ─── Tile execution ──────────────────────────────────────────────────────────
 
 export async function runConfigTile(
@@ -273,6 +433,14 @@ export async function runConfigTile(
       };
     }
 
+    // Reject metric-style select against a non-metric source (and vice
+    // versa) before the renderer composes SQL against the wrong table.
+    const kindMismatch = assertSourceKindMatchesSelect(
+      source,
+      builderConfig.select,
+    );
+    if (kindMismatch) return kindMismatch;
+
     const connection = await getConnectionById(
       teamId,
       source.connection.toString(),
@@ -298,6 +466,7 @@ export async function runConfigTile(
     });
 
     const isSearch = builderConfig.displayType === DisplayType.Search;
+    const isMetricSource = source.kind === SourceKind.Metric;
     const defaultTableSelect =
       'defaultTableSelectExpression' in source
         ? source.defaultTableSelectExpression
@@ -327,13 +496,37 @@ export async function runConfigTile(
         }
       : {};
 
+    // Metric sources need three adjustments before the renderer can
+    // translate the query:
+    //   1. `from.tableName` is blank — the renderer picks the correct
+    //      per-kind ClickHouse table from `metricTables` at render time.
+    //   2. `metricTables` must be threaded onto the chart config (mirrors
+    //      packages/api/src/routers/external-api/v2/charts.ts:261-267).
+    //   3. Each select item's `valueExpression` defaults to "Value" (the
+    //      metric value column) when missing — agents normally omit it.
+    const metricSelectOverrides =
+      isMetricSource && Array.isArray(builderConfig.select)
+        ? {
+            select: builderConfig.select.map(item =>
+              typeof item === 'string'
+                ? item
+                : {
+                    ...item,
+                    valueExpression: item.valueExpression ?? 'Value',
+                  },
+            ),
+          }
+        : {};
+
     const chartConfig = {
       ...builderConfig,
       ...searchOverrides,
+      ...metricSelectOverrides,
       from: {
         databaseName: source.from.databaseName,
-        tableName: source.from.tableName,
+        tableName: isMetricSource ? '' : source.from.tableName,
       },
+      ...(isMetricSource && { metricTables: source.metricTables }),
       connection: source.connection.toString(),
       timestampValueExpression: source.timestampValueExpression,
       implicitColumnExpression: implicitColumn,
diff --git a/packages/api/src/mcp/tools/query/schemas.ts b/packages/api/src/mcp/tools/query/schemas.ts
index 6bcdd91ab4..486bfbf2d1 100644
--- a/packages/api/src/mcp/tools/query/schemas.ts
+++ b/packages/api/src/mcp/tools/query/schemas.ts
@@ -1,5 +1,7 @@
 import { z } from 'zod';
 
+import { QUERYABLE_METRIC_KINDS } from '../sources/metricKinds';
+
 // ─── Shared description fragments ────────────────────────────────────────────
 
 const WHERE_DESCRIPTION =
@@ -48,6 +50,10 @@ export const MCP_AGG_FN_OPTIONS = [
   'quantile',
   'sum',
   'none',
+  // 'increase' is only valid for Sum (counter) metrics. The renderer
+  // computes the per-bucket counter increase with reset-handling; the
+  // bare aggFn string maps to that behavior, not a SQL function.
+  'increase',
 ] as const;
 
 const mcpAggFnSchema = z
@@ -59,9 +65,146 @@ const mcpAggFnSchema = z
       '  count_distinct – unique value count (valueExpression required)\n' +
       '  quantile – percentile; also set level (valueExpression required)\n' +
       '  last_value – most recent value of a column\n' +
-      '  none – pass a raw expression through unchanged',
+      '  none – pass a raw expression through unchanged\n' +
+      '  increase – METRIC-ONLY (Sum/counter): per-bucket counter increase, ' +
+      'reset-aware. Requires metricType:"sum" and metricName.',
   );
 
+/**
+ * Metric type values exposed to MCP tool callers. Restricted to the three
+ * kinds the renderer can translate today; summary and exponential histogram
+ * are intentionally excluded. See `../sources/metricKinds` for the shared
+ * source-of-truth constant used by every metric-aware tool.
+ */
+const mcpMetricTypeSchema = z
+  .enum(QUERYABLE_METRIC_KINDS)
+  .describe(
+    'METRIC SOURCES ONLY. OTel metric kind. Required (along with metricName) ' +
+      'when querying a metric source — discover via clickstack_describe_source ' +
+      'or clickstack_describe_metric.\n' +
+      '  gauge – instantaneous values (CPU, memory, queue depth). Use last_value/avg/min/max; set isDelta:true for Prometheus-style delta over the bucket.\n' +
+      '  sum – cumulative or delta counters (request counts, bytes processed). Use aggFn:"increase" for counter increase, or sum/avg on the computed rate.\n' +
+      '  histogram – bucketed distributions (request duration). Use aggFn:"quantile" with level for percentiles, or aggFn:"count" for total bucket count.\n' +
+      'NOTE: summary and exponential histogram are not supported by the query renderer yet.',
+  );
+
+/**
+ * Shared cross-field validation issues for MCP select items. Returns the
+ * list of Zod issues a `.superRefine` callback should emit. Kept as a pure
+ * function so both `mcpSelectItemSchema` (query tools) and
+ * `mcpTileSelectItemSchema` (dashboard tile tools) can call it inline
+ * without widening Zod's output type inference.
+ *
+ * Constraints enforced:
+ *   - metricType + metricName must be set together
+ *   - aggFn:"increase" is Sum-only
+ *   - histogram metrics only support quantile (with level) or count
+ *   - isDelta is Gauge-only
+ *   - level still requires aggFn:"quantile"
+ *   - valueExpression is required for non-count aggFns UNLESS metricType is
+ *     set (defaults to "Value" for metric sources)
+ *
+ * Note: summary and exponential histogram metric kinds are not in the input
+ * enum so they cannot reach this function — they are rejected by the field
+ * schema itself.
+ */
+export function getMetricSelectIssues(data: {
+  aggFn?: string;
+  metricType?: string;
+  metricName?: string;
+  isDelta?: boolean;
+  level?: number;
+  valueExpression?: string;
+}): { path: (string | number)[]; message: string }[] {
+  const issues: { path: (string | number)[]; message: string }[] = [];
+
+  // metricType ↔ metricName must be set together
+  if (data.metricType && !data.metricName) {
+    issues.push({
+      path: ['metricName'],
+      message:
+        'metricName is required when metricType is set. Discover metric names ' +
+        'via clickstack_list_metrics or clickstack_describe_source.',
+    });
+  }
+  if (data.metricName && !data.metricType) {
+    issues.push({
+      path: ['metricType'],
+      message:
+        'metricType is required when metricName is set. Use one of: gauge, sum, histogram.',
+    });
+  }
+
+  // increase is Sum-only
+  if (data.aggFn === 'increase' && data.metricType !== 'sum') {
+    issues.push({
+      path: ['aggFn'],
+      message:
+        'aggFn "increase" is only valid for sum (counter) metrics. ' +
+        'Set metricType:"sum" and metricName, or pick a different aggFn.',
+    });
+  }
+
+  // Histogram supports only quantile (+ level) or count today
+  if (data.metricType === 'histogram') {
+    if (data.aggFn !== 'quantile' && data.aggFn !== 'count') {
+      issues.push({
+        path: ['aggFn'],
+        message:
+          'Histogram metrics only support aggFn "quantile" (with level) or "count" today.',
+      });
+    }
+    if (data.aggFn === 'quantile' && data.level == null) {
+      issues.push({
+        path: ['level'],
+        message:
+          'level is required when aggFn is "quantile" on a histogram metric. ' +
+          'Use 0.5, 0.9, 0.95, or 0.99.',
+      });
+    }
+  }
+
+  // isDelta is Gauge-only
+  if (data.isDelta && data.metricType !== 'gauge') {
+    issues.push({
+      path: ['isDelta'],
+      message: 'isDelta is only valid for gauge metrics (metricType:"gauge").',
+    });
+  }
+
+  // level requires aggFn:"quantile"
+  if (data.level != null && data.aggFn !== 'quantile') {
+    issues.push({
+      path: ['level'],
+      message: 'level is only valid with aggFn:"quantile".',
+    });
+  }
+
+  // valueExpression rules:
+  //   - "count" never takes a valueExpression (existing rule)
+  //   - non-count aggFns require valueExpression UNLESS metricType is set
+  //     (metric sources default valueExpression to "Value" in the helper)
+  if (data.valueExpression && data.aggFn === 'count') {
+    issues.push({
+      path: ['valueExpression'],
+      message: 'valueExpression cannot be used with aggFn:"count".',
+    });
+  } else if (
+    !data.valueExpression &&
+    data.aggFn !== 'count' &&
+    !data.metricType
+  ) {
+    issues.push({
+      path: ['valueExpression'],
+      message:
+        'valueExpression is required for non-count aggregation functions ' +
+        '(or set metricType to query a metric source, which defaults valueExpression to "Value").',
+    });
+  }
+
+  return issues;
+}
+
 export const mcpSelectItemSchema = z.object({
   aggFn: mcpAggFnSchema,
   valueExpression: z
@@ -74,7 +217,10 @@ export const mcpSelectItemSchema = z.object({
         'Any ClickHouse expression is allowed — common useful forms: ' +
         '"Duration / 1e6" (ns→ms), ' +
         '"toFloat64OrZero(SpanAttributes[\'response.size_bytes\'])" (cast attribute), ' +
-        '"if(StatusCode = \'STATUS_CODE_ERROR\', 1, 0)" (boolean→numeric for ratios).',
+        '"if(StatusCode = \'STATUS_CODE_ERROR\', 1, 0)" (boolean→numeric for ratios).\n\n' +
+        'METRIC SOURCES: optional — defaults to "Value" (the metric value column) when ' +
+        'metricType/metricName are set. Set explicitly only if you want to transform the ' +
+        'metric value (e.g. "Value / 1e6").',
     ),
   where: z
     .string()
@@ -113,10 +259,105 @@ export const mcpSelectItemSchema = z.object({
     .union([z.literal(0.5), z.literal(0.9), z.literal(0.95), z.literal(0.99)])
     .optional()
     .describe(
-      'Percentile level. Only applicable when aggFn is "quantile". ' +
+      'Percentile level. Required when aggFn is "quantile" on a histogram metric, ' +
+        'optional otherwise. ' +
         'Allowed values: 0.5, 0.9, 0.95, 0.99',
     ),
+  metricType: mcpMetricTypeSchema
+    .optional()
+    .describe(
+      'METRIC SOURCES ONLY. OTel metric kind: gauge, sum, or histogram. ' +
+        'Required (with metricName) when sourceId is a metric source. ' +
+        'Discover via clickstack_describe_source (sample) or clickstack_describe_metric.',
+    ),
+  metricName: z
+    .string()
+    .optional()
+    .describe(
+      'METRIC SOURCES ONLY. OTel metric name (e.g. "system.cpu.utilization", ' +
+        '"http.server.request.duration"). Required when metricType is set. ' +
+        'Discover via clickstack_list_metrics or clickstack_describe_source.',
+    ),
+  isDelta: z
+    .boolean()
+    .optional()
+    .describe(
+      'METRIC SOURCES ONLY (gauge metrics). When true, computes the Prometheus-style ' +
+        'delta over each bucket: (argMax(Value) - argMin(Value)) * bucketSecs / timeDiff. ' +
+        'Use for cumulative gauges where you want to chart growth per bucket. Default false.',
+    ),
 });
+// NOTE: cross-field validation (metric rules + level + valueExpression) is
+// applied imperatively in the timeseries / table tool handlers via
+// `validateMetricSelectItems`. We intentionally skip `.superRefine` here
+// because Zod 3.x widens optional-field types post-refine, which breaks
+// strict-typed downstream consumers like `mergeWhereIntoSelectItems` and
+// `resolveOrderBy`. The dashboard tile schema, whose consumers don't trip
+// on the widening, keeps its own `.superRefine`.
+
+/**
+ * Concrete shape of a parsed MCP select item. Mirrors the runtime values
+ * produced by `mcpSelectItemSchema` — needed as an explicit type because
+ * Zod 3.x's structural inference of `z.object({...})` callbacks (e.g. the
+ * MCP SDK's tool-handler input) can widen optional-field types into
+ * `unknown`. Cast `input.select` to `McpSelectItem[]` at tool boundaries.
+ */
+export type McpSelectItem = {
+  aggFn: string;
+  valueExpression?: string;
+  where?: string;
+  whereLanguage?: 'lucene' | 'sql';
+  alias?: string;
+  level?: number;
+  metricType?: 'gauge' | 'sum' | 'histogram';
+  metricName?: string;
+  isDelta?: boolean;
+};
+
+/**
+ * Default `valueExpression` to `"Value"` for every metric-tagged select
+ * item that omits it. Must be called BEFORE `buildTile`, because the
+ * external dashboard tile schema's `superRefine` rejects non-count
+ * aggregations with an empty `valueExpression`. The runtime renderer
+ * looks for `Value` on metric tables, so defaulting it here matches what
+ * `external-api/v2/charts.ts:240-250` does on the REST path.
+ */
+export function applyMetricSelectDefaults<T extends McpSelectItem>(
+  items: ReadonlyArray<T>,
+): T[] {
+  return items.map(item =>
+    item.metricType && !item.valueExpression
+      ? { ...item, valueExpression: 'Value' }
+      : item,
+  );
+}
+
+/**
+ * Apply `getMetricSelectIssues` to every select item in a tool input.
+ * Returns an error-shaped tool response when any issue is detected, or
+ * `null` when all items pass. Call this from a tool handler before
+ * passing items to `runConfigTile`.
+ */
+export function validateMetricSelectItems(
+  items: ReadonlyArray<McpSelectItem>,
+): { isError: true; content: [{ type: 'text'; text: string }] } | null {
+  const errors: string[] = [];
+  items.forEach((item, idx) => {
+    for (const issue of getMetricSelectIssues(item)) {
+      errors.push(`select[${idx}].${issue.path.join('.')}: ${issue.message}`);
+    }
+  });
+  if (errors.length === 0) return null;
+  return {
+    isError: true as const,
+    content: [
+      {
+        type: 'text' as const,
+        text: errors.join('\n'),
+      },
+    ],
+  };
+}
 
 export const startTimeSchema = z
   .string()
diff --git a/packages/api/src/mcp/tools/query/table.ts b/packages/api/src/mcp/tools/query/table.ts
index 41b7e44f0b..0f90f2c7dd 100644
--- a/packages/api/src/mcp/tools/query/table.ts
+++ b/packages/api/src/mcp/tools/query/table.ts
@@ -4,19 +4,23 @@ import { z } from 'zod';
 import { withToolTracing } from '../../utils/tracing';
 import type { McpContext } from '../types';
 import {
+  annotateIncreaseTopNHint,
   buildTile,
   mergeWhereIntoSelectItems,
   parseTimeRange,
   runConfigTile,
 } from './helpers';
 import {
+  applyMetricSelectDefaults,
   endTimeSchema,
   groupBySchema,
   MCP_AGG_FN_OPTIONS,
+  McpSelectItem,
   mcpSelectItemSchema,
   orderBySchema,
   sourceIdSchema,
   startTimeSchema,
+  validateMetricSelectItems,
   whereLanguageSchema,
   whereSchema,
 } from './schemas';
@@ -61,9 +65,13 @@ const tableSchema = z.object({
 // ─── orderBy resolution ──────────────────────────────────────────────────────
 
 /** Aggregation function names that ClickHouse cannot resolve as bare identifiers in ORDER BY.
- *  'none' is excluded — it passes a raw expression through unchanged and has no synthesizable form. */
+ *  Excluded:
+ *  - 'none' passes a raw expression through unchanged and has no synthesizable form.
+ *  - 'increase' is a metric-only renderer marker that compiles to a multi-CTE
+ *    sum(Rate) pipeline — there is no standalone SQL function to synthesize.
+ *    The renderer auto-aliases the resulting column; agents should orderBy by alias. */
 const AGG_FN_NAMES: ReadonlySet<string> = new Set(
-  MCP_AGG_FN_OPTIONS.filter(fn => fn !== 'none'),
+  MCP_AGG_FN_OPTIONS.filter(fn => fn !== 'none' && fn !== 'increase'),
 );
 
 /**
@@ -170,7 +178,18 @@ export function registerTable(server: McpServer, context: McpContext) {
         'Map attributes work in groupBy and valueExpression, including ' +
         "toFloat64OrZero(SpanAttributes['key']).\n\n" +
         'Shape auto-upgrade: if shape is "number" or "pie" but select has >1 item, ' +
-        'it is transparently upgraded to "table".',
+        'it is transparently upgraded to "table".\n\n' +
+        '── METRIC SOURCES ──\n' +
+        'When sourceId is a metric source, each select item MUST set ' +
+        'metricType ("gauge"|"sum"|"histogram") and metricName (the OTel metric name). ' +
+        'valueExpression defaults to "Value" — set it explicitly only to transform the value.\n' +
+        'Discovery: clickstack_describe_source returns a per-kind metric-name sample; ' +
+        'clickstack_list_metrics paginates the full catalog; clickstack_describe_metric ' +
+        'returns attribute keys + sampled values for a single metric.\n' +
+        'Per kind: gauge uses last_value/avg/min/max; sum uses aggFn:"increase" for counter increase ' +
+        '(top-N capped at 20 groups when combined with groupBy), or sum/avg on the rate; ' +
+        'histogram uses aggFn:"quantile" + level for percentiles, or aggFn:"count" for total bucket count.\n' +
+        'summary and exponential histogram kinds are not supported by the query renderer yet.',
       inputSchema: tableSchema,
     },
     withToolTracing('clickstack_table', context, async input => {
@@ -183,12 +202,30 @@ export function registerTable(server: McpServer, context: McpContext) {
       }
       const { startDate, endDate } = timeRange;
 
+      // Cast to the concrete `McpSelectItem[]` because Zod 3.x widens
+      // optional-field inference at the MCP-SDK tool boundary; the
+      // runtime parser still produces the correct shape.
+      const rawSelect = input.select as McpSelectItem[];
+
+      // Validate cross-field constraints (metric rules, level/quantile,
+      // valueExpression presence) and surface friendly errors before we
+      // touch ClickHouse.
+      const validation = validateMetricSelectItems(rawSelect);
+      if (validation) return validation;
+
+      // Default valueExpression="Value" for metric items BEFORE we call
+      // buildTile, because the external dashboard tile schema's
+      // superRefine rejects non-count aggregations with empty
+      // valueExpression and agents normally omit the field on metric
+      // queries.
+      const select = applyMetricSelectDefaults(rawSelect);
+
       // Auto-upgrade shape when select has multiple items but shape is
       // single-value (number/pie). This is the #1 Zod error class from agents.
       let displayType: 'table' | 'number' | 'pie' = input.shape;
       if (
         (displayType === 'number' || displayType === 'pie') &&
-        input.select.length > 1
+        select.length > 1
       ) {
         displayType = 'table';
       }
@@ -197,11 +234,7 @@ export function registerTable(server: McpServer, context: McpContext) {
       // of the aggCondition for every metric. Table/line/number/pie display
       // types don't have a chart-level where — filtering is per-select-item.
       const { items: selectItems, warnings: mergeWarnings } =
-        mergeWhereIntoSelectItems(
-          input.select,
-          input.where,
-          input.whereLanguage,
-        );
+        mergeWhereIntoSelectItems(select, input.where, input.whereLanguage);
 
       const tile = buildTile('MCP Table', 12, 4, {
         displayType,
@@ -230,6 +263,10 @@ export function registerTable(server: McpServer, context: McpContext) {
         }
       }
 
+      // Surface the increase+groupBy top-N cap so the agent knows results
+      // may be truncated to 20 groups.
+      annotateIncreaseTopNHint(result, select, input.groupBy);
+
       return result;
     }),
   );
diff --git a/packages/api/src/mcp/tools/query/timeseries.ts b/packages/api/src/mcp/tools/query/timeseries.ts
index 283effb48a..0f80041da5 100644
--- a/packages/api/src/mcp/tools/query/timeseries.ts
+++ b/packages/api/src/mcp/tools/query/timeseries.ts
@@ -4,18 +4,23 @@ import { z } from 'zod';
 import { withToolTracing } from '../../utils/tracing';
 import type { McpContext } from '../types';
 import {
+  annotateIncreaseTopNHint,
+  appendHint,
   buildTile,
   mergeWhereIntoSelectItems,
   parseTimeRange,
   runConfigTile,
 } from './helpers';
 import {
+  applyMetricSelectDefaults,
   endTimeSchema,
   groupBySchema,
+  McpSelectItem,
   mcpSelectItemSchema,
   orderBySchema,
   sourceIdSchema,
   startTimeSchema,
+  validateMetricSelectItems,
   whereLanguageSchema,
   whereSchema,
 } from './schemas';
@@ -79,7 +84,20 @@ export function registerTimeseries(server: McpServer, context: McpContext) {
         'Requires sourceId — call clickstack_list_sources then clickstack_describe_source first. ' +
         'Each select item defines one plotted series.\n\n' +
         'Column naming: top-level columns are PascalCase (Duration, StatusCode). ' +
-        "Map attributes use bracket syntax: SpanAttributes['http.method'].",
+        "Map attributes use bracket syntax: SpanAttributes['http.method'].\n\n" +
+        '── METRIC SOURCES ──\n' +
+        'When sourceId is a metric source, each select item MUST set ' +
+        'metricType ("gauge"|"sum"|"histogram") and metricName (the OTel metric name). ' +
+        'valueExpression defaults to "Value" — set it explicitly only to transform the value.\n' +
+        'Discovery: clickstack_describe_source returns a per-kind metric-name sample; ' +
+        'clickstack_list_metrics paginates the full catalog; clickstack_describe_metric ' +
+        'returns attribute keys + sampled values for a single metric.\n' +
+        'Per kind: gauge uses last_value/avg/min/max (or aggFn:any + isDelta:true for Prometheus-style delta); ' +
+        'sum uses aggFn:"increase" for the counter increase, or sum/avg on the computed rate; ' +
+        'histogram uses aggFn:"quantile" + level for percentiles, or aggFn:"count" for total bucket count.\n' +
+        'TOP-N CAP: aggFn:"increase" + groupBy is capped at 20 groups by the renderer ' +
+        '(top by max bucket sum). Narrow with where/groupBy to see other groups.\n' +
+        'summary and exponential histogram kinds are not supported by the query renderer yet.',
       inputSchema: timeseriesSchema,
     },
     withToolTracing('clickstack_timeseries', context, async input => {
@@ -92,14 +110,28 @@ export function registerTimeseries(server: McpServer, context: McpContext) {
       }
       const { startDate, endDate } = timeRange;
 
+      // Cast to the concrete `McpSelectItem[]` because Zod 3.x widens
+      // optional-field inference at the MCP-SDK tool boundary; the
+      // runtime parser still produces the correct shape.
+      const rawSelect = input.select as McpSelectItem[];
+
+      // Validate cross-field constraints (metric rules, level/quantile,
+      // valueExpression presence) and surface friendly errors before we
+      // touch ClickHouse.
+      const validation = validateMetricSelectItems(rawSelect);
+      if (validation) return validation;
+
+      // Default valueExpression="Value" for metric items BEFORE we call
+      // buildTile, because the external dashboard tile schema's
+      // superRefine rejects non-count aggregations with empty
+      // valueExpression and agents normally omit the field on metric
+      // queries.
+      const select = applyMetricSelectDefaults(rawSelect);
+
       // Inject top-level where into each select item (timeseries uses
       // per-select-item aggCondition, not chart-level where)
       const { items: selectItems, warnings: mergeWarnings } =
-        mergeWhereIntoSelectItems(
-          input.select,
-          input.where,
-          input.whereLanguage,
-        );
+        mergeWhereIntoSelectItems(select, input.where, input.whereLanguage);
 
       const tile = buildTile('MCP Timeseries', 12, 4, {
         displayType: input.shape,
@@ -129,6 +161,10 @@ export function registerTimeseries(server: McpServer, context: McpContext) {
         }
       }
 
+      // Surface the increase+groupBy top-N cap so the agent knows results
+      // may be truncated to 20 groups.
+      annotateIncreaseTopNHint(result, select, input.groupBy);
+
       // Detect single-bucket collapse: when a timeseries query returns only
       // 1 row, the data likely collapsed into a single time bucket. Add a
       // hint so the agent knows to adjust the time range.
@@ -140,12 +176,14 @@ export function registerTimeseries(server: McpServer, context: McpContext) {
           const parsed = JSON.parse(result.content[0].text);
           const data = parsed?.result?.data;
           if (Array.isArray(data) && data.length === 1 && input.granularity) {
-            parsed.hint =
+            appendHint(
+              parsed,
               `Timeseries returned only 1 time bucket. ` +
-              `All data may have collapsed into a single "${input.granularity}" bucket. ` +
-              `The queried range was ${startDate.toISOString()} to ${endDate.toISOString()}. ` +
-              `If this looks wrong, adjust startTime/endTime to match the actual data range, ` +
-              `or try a coarser granularity.`;
+                `All data may have collapsed into a single "${input.granularity}" bucket. ` +
+                `The queried range was ${startDate.toISOString()} to ${endDate.toISOString()}. ` +
+                `If this looks wrong, adjust startTime/endTime to match the actual data range, ` +
+                `or try a coarser granularity.`,
+            );
             result.content[0].text = JSON.stringify(parsed, null, 2);
           }
         } catch {
diff --git a/packages/api/src/mcp/tools/sources/describeMetric.ts b/packages/api/src/mcp/tools/sources/describeMetric.ts
new file mode 100644
index 0000000000..d481652c46
--- /dev/null
+++ b/packages/api/src/mcp/tools/sources/describeMetric.ts
@@ -0,0 +1,783 @@
+import {
+  chSql,
+  concatChSql,
+  convertCHDataTypeToJSType,
+  filterColumnMetaByType,
+  JSDataType,
+  tableExpr,
+} from '@hyperdx/common-utils/dist/clickhouse';
+import { ClickhouseClient } from '@hyperdx/common-utils/dist/clickhouse/node';
+import { getMetadata } from '@hyperdx/common-utils/dist/core/metadata';
+import { SourceKind } from '@hyperdx/common-utils/dist/types';
+import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
+import { z } from 'zod';
+
+import { getConnectionById } from '@/controllers/connection';
+import { getSource } from '@/controllers/sources';
+import logger from '@/utils/logger';
+import { trimToolResponse } from '@/utils/trimToolResponse';
+
+import { withToolTracing } from '../../utils/tracing';
+import type { McpContext } from '../types';
+import {
+  QUERYABLE_METRIC_KINDS,
+  type QueryableMetricKind,
+} from './metricKinds';
+
+const DEFAULT_LOOKBACK_MS = 24 * 60 * 60 * 1000;
+const DESCRIBE_TIMEOUT_MS = 10_000;
+
+// Server-side safety nets for the attribute-keys discovery query.
+// Sample at most N rows that match (MetricName, time range), then
+// aggregate from that sample. ClickHouse can stop scanning once the
+// CTE has N matching rows so a hot metric does not starve the
+// wall-clock budget. 100k rows is plenty to surface every unique map
+// key on a healthy OTel metric.
+const METRIC_ATTR_KEYS_SAMPLE_SIZE = 100_000;
+const METRIC_ATTR_KEYS_MAX_EXEC_SECONDS = 8;
+
+// Max sampled values per attribute key (when sampleValues is true).
+const MAX_ATTR_VALUES = 10;
+// Cap on the number of attribute keys we sample values for per kind to
+// avoid runaway fan-out on high-cardinality metrics.
+const MAX_ATTR_KEYS_TO_SAMPLE = 12;
+
+// Per-kind aggregation guidance baked into the response so the agent can
+// build a valid clickstack_timeseries / clickstack_table call without
+// re-reading the schemas.
+const KIND_USAGE: Record<QueryableMetricKind, string> = {
+  gauge:
+    'Gauge: use aggFn:"last_value"|"avg"|"min"|"max" on Value. Set isDelta:true for Prometheus-style delta over each bucket.',
+  sum: 'Sum (counter): use aggFn:"increase" for the per-bucket counter increase (reset-aware), or aggFn:"sum"/"avg" on the computed rate. increase+groupBy is capped at the top 20 groups.',
+  histogram:
+    'Histogram: use aggFn:"quantile" with level ∈ {0.5, 0.9, 0.95, 0.99} for percentiles, or aggFn:"count" for the total bucket count.',
+};
+
+// ─── Schema ──────────────────────────────────────────────────────────────────
+
+const describeMetricSchema = z.object({
+  sourceId: z
+    .string()
+    .describe(
+      'Source ID. Must reference a metric source — get IDs from clickstack_list_sources.',
+    ),
+  metricName: z
+    .string()
+    .min(1)
+    .describe(
+      'OTel metric name to describe (e.g. "system.cpu.utilization", "http.server.request.duration"). ' +
+        'Discover via clickstack_list_metrics or clickstack_describe_source.',
+    ),
+  kind: z
+    .enum(QUERYABLE_METRIC_KINDS)
+    .describe(
+      'Metric kind: "gauge" | "sum" | "histogram". Required. ' +
+        'Discover via clickstack_list_metrics (which returns name + kind per entry) ' +
+        'or clickstack_describe_source (which groups metric-name samples by kind). ' +
+        'A metric name can legitimately live in more than one kind (e.g. ' +
+        '"container.cpu.usage" appears in both gauge and sum) — pick the kind ' +
+        'matching the value you want to query.',
+    ),
+  startTime: z
+    .string()
+    .optional()
+    .describe(
+      'Restrict discovery to data points after this ISO 8601 timestamp. Default: 24 hours before endTime (or now).',
+    ),
+  endTime: z
+    .string()
+    .optional()
+    .describe('End of the discovery window as ISO 8601. Default: now.'),
+  sampleValues: z
+    .boolean()
+    .optional()
+    .default(true)
+    .describe(
+      'When true (default), sample up to ' +
+        `${MAX_ATTR_VALUES} distinct values per attribute key for the top ${MAX_ATTR_KEYS_TO_SAMPLE} attributes. ` +
+        'Set false to skip value sampling for faster responses on high-cardinality metrics.',
+    ),
+});
+
+// ─── Helpers ─────────────────────────────────────────────────────────────────
+
+type AttributeValuesMeta = {
+  /** Display names (`MapColumn['key']`) queried for value samples. */
+  sampledKeys: string[];
+  /** Display names skipped because the MAX_ATTR_KEYS_TO_SAMPLE cap fired. */
+  truncatedKeys: string[];
+};
+
+type KindDetail = {
+  kind: QueryableMetricKind;
+  tableName: string;
+  unit?: string;
+  description?: string;
+  attributeKeys: Record<string, string[]>;
+  attributeValues?: Record<string, string[]>;
+  attributeValuesMeta?: AttributeValuesMeta;
+  usage: string;
+};
+
+/**
+ * Discriminated result for the discovery sub-queries so the caller can
+ * distinguish "fetch failed" from "genuinely empty" — the two cases
+ * need different agent guidance (retry/report vs. widen the window).
+ */
+type FetchResult<T> = { ok: true; data: T } | { ok: false; error: string };
+
+/**
+ * Compact an error for inclusion in a tool response: single line,
+ * capped length, no stack frames.
+ */
+function sanitizeFetchError(e: unknown): string {
+  const message = e instanceof Error ? e.message : String(e);
+  return message.replace(/\s+/g, ' ').trim().slice(0, 200);
+}
+
+function parseTimeRange(
+  startTime?: string,
+  endTime?: string,
+): { error: string } | { startDate: Date; endDate: Date } {
+  const endDate = endTime ? new Date(endTime) : new Date();
+  const startDate = startTime
+    ? new Date(startTime)
+    : new Date(endDate.getTime() - DEFAULT_LOOKBACK_MS);
+  if (isNaN(endDate.getTime()) || isNaN(startDate.getTime())) {
+    return {
+      error: 'Invalid startTime or endTime: must be valid ISO 8601 strings',
+    };
+  }
+  if (startDate >= endDate) {
+    return { error: 'endTime must be greater than startTime' };
+  }
+  return { startDate, endDate };
+}
+
+/**
+ * Fetch unit and description for a metric name on a single kind table.
+ * Returns undefined values when the columns are absent from the schema.
+ */
+async function fetchUnitAndDescription({
+  clickhouseClient,
+  databaseName,
+  tableName,
+  connectionId,
+  metricName,
+  startDate,
+  endDate,
+  hasUnit,
+  hasDescription,
+  signal,
+}: {
+  clickhouseClient: ClickhouseClient;
+  databaseName: string;
+  tableName: string;
+  connectionId: string;
+  metricName: string;
+  startDate: Date;
+  endDate: Date;
+  hasUnit: boolean;
+  hasDescription: boolean;
+  signal: AbortSignal;
+}): Promise<{ unit?: string; description?: string }> {
+  if (!hasUnit && !hasDescription) return {};
+  const projections = [
+    ...(hasUnit
+      ? [chSql`anyLast(${{ Identifier: 'MetricUnit' }}) AS MetricUnit`]
+      : []),
+    ...(hasDescription
+      ? [
+          chSql`anyLast(${{ Identifier: 'MetricDescription' }}) AS MetricDescription`,
+        ]
+      : []),
+  ];
+  const sql = chSql`
+    SELECT ${concatChSql(', ', projections)}
+    FROM ${tableExpr({ database: databaseName, table: tableName })}
+    WHERE MetricName = ${{ String: metricName }}
+      AND TimeUnix >= fromUnixTimestamp64Milli(${{ Int64: startDate.getTime() }})
+      AND TimeUnix <= fromUnixTimestamp64Milli(${{ Int64: endDate.getTime() }})
+  `;
+  try {
+    const response = await clickhouseClient.query<'JSON'>({
+      query: sql.sql,
+      query_params: sql.params,
+      format: 'JSON',
+      connectionId,
+      abort_signal: signal,
+    });
+    const result = (await response.json()) as {
+      data: Array<{ MetricUnit?: string; MetricDescription?: string }>;
+    };
+    const row = result.data[0];
+    if (!row) return {};
+    return {
+      ...(row.MetricUnit ? { unit: row.MetricUnit } : {}),
+      ...(row.MetricDescription ? { description: row.MetricDescription } : {}),
+    };
+  } catch (e) {
+    logger.warn(
+      { tableName, error: e instanceof Error ? e.message : String(e) },
+      'fetchUnitAndDescription failed',
+    );
+    return {};
+  }
+}
+
+/**
+ * Discover the distinct attribute keys present for a single metric
+ * name on a single kind's table, grouped by the Map column they live on
+ * (typically ResourceAttributes / Attributes / ScopeAttributes on the
+ * OTel Collector default schema). Issues one query per Map column with
+ * `mapKeys(col) AS keys` and aggregates the distinct keys server-side.
+ *
+ * Bounds the scan two ways so it stays fast on production-shaped metric
+ * tables (where a single MetricName can match millions of rows):
+ *   - WHERE TimeUnix BETWEEN startDate AND endDate scopes to the
+ *     discovery window the caller already passed.
+ *   - max_rows_to_read caps the per-query scan server-side so a hot
+ *     metric cannot starve the wall-clock budget on its own.
+ *
+ * The earlier inline SQL did neither — an unbounded
+ * `WHERE MetricName = ?` scan consistently exceeded the wall-clock
+ * timeout on production tables.
+ */
+async function fetchAttributeKeys({
+  clickhouseClient,
+  databaseName,
+  tableName,
+  connectionId,
+  metricName,
+  columns,
+  startDate,
+  endDate,
+  signal,
+}: {
+  clickhouseClient: ClickhouseClient;
+  databaseName: string;
+  tableName: string;
+  connectionId: string;
+  metricName: string;
+  columns: { name: string; type: string }[];
+  startDate: Date;
+  endDate: Date;
+  signal: AbortSignal;
+}): Promise<FetchResult<Record<string, string[]>>> {
+  const mapColumns =
+    filterColumnMetaByType(columns, [JSDataType.Map])?.filter(
+      c => convertCHDataTypeToJSType(c.type) === JSDataType.Map,
+    ) ?? [];
+  if (mapColumns.length === 0) return { ok: true, data: {} };
+
+  const sampleProjections = mapColumns.map(
+    col => chSql`${{ Identifier: col.name }}`,
+  );
+
+  const aggProjections = mapColumns.map(
+    col =>
+      chSql`arrayDistinct(arrayFlatten(groupArray(mapKeys(${{ Identifier: col.name }})))) AS ${{ Identifier: col.name }}`,
+  );
+
+  // Aggregate from a bounded sample of matching rows. The inner LIMIT
+  // lets ClickHouse stop scanning once it has SAMPLE_SIZE rows that
+  // match (MetricName, time range), which keeps the query fast on hot
+  // metric tables. ResourceAttributes / Attributes / ScopeAttributes
+  // are rarely keyed independently per row, so 100k rows surfaces
+  // every realistic key set.
+  const sql = chSql`
+    SELECT ${concatChSql(', ', aggProjections)}
+    FROM (
+      SELECT ${concatChSql(', ', sampleProjections)}
+      FROM ${tableExpr({ database: databaseName, table: tableName })}
+      WHERE MetricName = ${{ String: metricName }}
+        AND TimeUnix >= fromUnixTimestamp64Milli(${{ Int64: startDate.getTime() }})
+        AND TimeUnix <= fromUnixTimestamp64Milli(${{ Int64: endDate.getTime() }})
+      LIMIT ${{ Int32: METRIC_ATTR_KEYS_SAMPLE_SIZE }}
+    )
+  `;
+
+  try {
+    const response = await clickhouseClient.query<'JSON'>({
+      query: sql.sql,
+      query_params: sql.params,
+      format: 'JSON',
+      connectionId,
+      clickhouse_settings: {
+        max_execution_time: METRIC_ATTR_KEYS_MAX_EXEC_SECONDS,
+        timeout_overflow_mode: 'break',
+      },
+      abort_signal: signal,
+    });
+    const result = (await response.json()) as {
+      data: Array<Record<string, string[]>>;
+    };
+    const row = result.data[0];
+    if (!row) return { ok: true, data: {} };
+    const out: Record<string, string[]> = {};
+    for (const col of mapColumns) {
+      const keys = row[col.name];
+      if (Array.isArray(keys) && keys.length > 0) {
+        out[col.name] = keys.filter(k => typeof k === 'string' && k.length > 0);
+      }
+    }
+    return { ok: true, data: out };
+  } catch (e) {
+    logger.warn(
+      { tableName, error: e instanceof Error ? e.message : String(e) },
+      'fetchAttributeKeys failed',
+    );
+    return { ok: false, error: sanitizeFetchError(e) };
+  }
+}
+
+type SampleAttributeValuesData = {
+  /** Keys with at least one non-empty sampled value. */
+  values: Record<string, string[]>;
+  meta: AttributeValuesMeta;
+};
+
+/**
+ * Sample distinct values per attribute key. Uses one composed
+ * groupArray-per-key query so all keys are fetched in a single round
+ * trip.
+ *
+ * The returned meta records which keys were actually queried
+ * (`sampledKeys`) vs. skipped by the MAX_ATTR_KEYS_TO_SAMPLE cap
+ * (`truncatedKeys`) so callers can distinguish "not sampled" from
+ * "sampled but empty in the window".
+ */
+async function sampleAttributeValues({
+  clickhouseClient,
+  databaseName,
+  tableName,
+  connectionId,
+  metricName,
+  attributeKeys,
+  startDate,
+  endDate,
+  signal,
+}: {
+  clickhouseClient: ClickhouseClient;
+  databaseName: string;
+  tableName: string;
+  connectionId: string;
+  metricName: string;
+  // attributeKeys is shape { mapColumn: keyName[] } e.g. { Attributes: ['http.method', 'http.route'] }
+  attributeKeys: Record<string, string[]>;
+  startDate: Date;
+  endDate: Date;
+  signal: AbortSignal;
+}): Promise<FetchResult<SampleAttributeValuesData>> {
+  const flatKeyExprs: { display: string; mapColumn: string; key: string }[] =
+    [];
+  const truncatedKeys: string[] = [];
+  for (const [mapColumn, keys] of Object.entries(attributeKeys)) {
+    for (const key of keys) {
+      const display = `${mapColumn}['${key}']`;
+      if (flatKeyExprs.length >= MAX_ATTR_KEYS_TO_SAMPLE) {
+        truncatedKeys.push(display);
+        continue;
+      }
+      flatKeyExprs.push({ display, mapColumn, key });
+    }
+  }
+  const meta: AttributeValuesMeta = {
+    sampledKeys: flatKeyExprs.map(e => e.display),
+    truncatedKeys,
+  };
+  if (flatKeyExprs.length === 0) {
+    return { ok: true, data: { values: {}, meta } };
+  }
+
+  const projections = flatKeyExprs.map(
+    ({ mapColumn, key }, idx) => chSql`
+      groupUniqArray(${{ Int32: MAX_ATTR_VALUES }})(
+        ${{ Identifier: mapColumn }}[${{ String: key }}]
+      ) AS param${{ UNSAFE_RAW_SQL: String(idx) }}
+    `,
+  );
+
+  // Distinct set of map columns we need from each sampled row, so the
+  // inner subquery only projects what's required for the outer
+  // groupUniqArray aggregates.
+  const sampledMapColumns = Array.from(
+    new Set(flatKeyExprs.map(e => e.mapColumn)),
+  );
+  const sampleProjections = sampledMapColumns.map(
+    col => chSql`${{ Identifier: col }}`,
+  );
+
+  // Aggregate from a bounded sample of matching rows. Mirrors
+  // fetchAttributeKeys: the inner LIMIT lets ClickHouse stop scanning
+  // once it has SAMPLE_SIZE rows that match (MetricName, time range),
+  // and the matching clickhouse_settings cap server-side execution so a
+  // hot metric cannot starve the wall-clock budget.
+  const sql = chSql`
+    SELECT ${concatChSql(', ', projections)}
+    FROM (
+      SELECT ${concatChSql(', ', sampleProjections)}
+      FROM ${tableExpr({ database: databaseName, table: tableName })}
+      WHERE MetricName = ${{ String: metricName }}
+        AND TimeUnix >= fromUnixTimestamp64Milli(${{ Int64: startDate.getTime() }})
+        AND TimeUnix <= fromUnixTimestamp64Milli(${{ Int64: endDate.getTime() }})
+      LIMIT ${{ Int32: METRIC_ATTR_KEYS_SAMPLE_SIZE }}
+    )
+  `;
+
+  try {
+    const response = await clickhouseClient.query<'JSON'>({
+      query: sql.sql,
+      query_params: sql.params,
+      format: 'JSON',
+      connectionId,
+      clickhouse_settings: {
+        max_execution_time: METRIC_ATTR_KEYS_MAX_EXEC_SECONDS,
+        timeout_overflow_mode: 'break',
+      },
+      abort_signal: signal,
+    });
+    const result = (await response.json()) as {
+      data: Array<Record<string, string[]>>;
+    };
+    const row = result.data[0];
+    if (!row) return { ok: true, data: { values: {}, meta } };
+    const values: Record<string, string[]> = {};
+    flatKeyExprs.forEach((keyExpr, idx) => {
+      const sample = (row[`param${idx}`] ?? []).filter(v => v !== '');
+      if (sample.length > 0) {
+        values[keyExpr.display] = sample;
+      }
+    });
+    return { ok: true, data: { values, meta } };
+  } catch (e) {
+    logger.warn(
+      { tableName, error: e instanceof Error ? e.message : String(e) },
+      'sampleAttributeValues failed',
+    );
+    return { ok: false, error: sanitizeFetchError(e) };
+  }
+}
+
+async function describeMetricImpl(
+  teamId: string,
+  input: z.infer<typeof describeMetricSchema>,
+  signal: AbortSignal,
+) {
+  const source = await getSource(teamId, input.sourceId);
+  if (!source) {
+    return {
+      isError: true,
+      content: [
+        {
+          type: 'text' as const,
+          text: `Source "${input.sourceId}" not found. Call clickstack_list_sources to see available source IDs.`,
+        },
+      ],
+    };
+  }
+  if (source.kind !== SourceKind.Metric) {
+    return {
+      isError: true,
+      content: [
+        {
+          type: 'text' as const,
+          text: `Source "${input.sourceId}" is a "${source.kind}" source, not a metric source. clickstack_describe_metric only works on metric sources.`,
+        },
+      ],
+    };
+  }
+
+  const timeRange = parseTimeRange(input.startTime, input.endTime);
+  if ('error' in timeRange) {
+    return {
+      isError: true,
+      content: [{ type: 'text' as const, text: timeRange.error }],
+    };
+  }
+  const { startDate, endDate } = timeRange;
+
+  const connection = await getConnectionById(
+    teamId,
+    source.connection.toString(),
+    true,
+  );
+  if (!connection) {
+    return {
+      isError: true,
+      content: [
+        {
+          type: 'text' as const,
+          text: `Connection not found for source "${input.sourceId}".`,
+        },
+      ],
+    };
+  }
+
+  const clickhouseClient = new ClickhouseClient({
+    host: connection.host,
+    username: connection.username,
+    password: connection.password,
+  });
+  const metadata = getMetadata(clickhouseClient);
+  const databaseName = source.from.databaseName;
+  const connectionId = source.connection.toString();
+
+  // Validate the requested kind has a populated table on this source.
+  // The schema requires `kind`, so we always have an exact target.
+  const kind = input.kind;
+  const tableName = source.metricTables[kind];
+  if (!tableName) {
+    return {
+      isError: true,
+      content: [
+        {
+          type: 'text' as const,
+          text: `Source "${input.sourceId}" has no "${kind}" metric table populated. Populated kinds: ${Object.keys(source.metricTables).join(', ')}.`,
+        },
+      ],
+    };
+  }
+
+  // Defensive column-presence check before referencing MetricUnit /
+  // MetricDescription on this kind's table.
+  let columns: { name: string; type: string }[];
+  try {
+    columns = await metadata.getColumns({
+      databaseName,
+      tableName,
+      connectionId,
+    });
+  } catch (e) {
+    logger.warn(
+      { kind, error: e instanceof Error ? e.message : String(e) },
+      'describeMetric: getColumns failed',
+    );
+    return {
+      isError: true,
+      content: [
+        {
+          type: 'text' as const,
+          text: `Failed to load columns for "${tableName}". The metric table may be missing or unreachable.`,
+        },
+      ],
+    };
+  }
+  const columnNames = new Set(columns.map(c => c.name));
+  const hasUnit = columnNames.has('MetricUnit');
+  const hasDescription = columnNames.has('MetricDescription');
+
+  const [meta, attributeKeysResult] = await Promise.all([
+    fetchUnitAndDescription({
+      clickhouseClient,
+      databaseName,
+      tableName,
+      connectionId,
+      metricName: input.metricName,
+      startDate,
+      endDate,
+      hasUnit,
+      hasDescription,
+      signal,
+    }),
+    fetchAttributeKeys({
+      clickhouseClient,
+      databaseName,
+      tableName,
+      connectionId,
+      metricName: input.metricName,
+      columns,
+      startDate,
+      endDate,
+      signal,
+    }),
+  ]);
+
+  // Track discovery sub-queries that failed so the agent can distinguish
+  // "fetch failed" (retry / report) from "genuinely empty" (widen the
+  // window). Without this, transient ClickHouse errors surfaced as the
+  // misleading "No data found" hint.
+  const partialFailure: { stage: string; error: string }[] = [];
+  const attributeKeys = attributeKeysResult.ok ? attributeKeysResult.data : {};
+  if (!attributeKeysResult.ok) {
+    partialFailure.push({
+      stage: 'attributeKeys',
+      error: attributeKeysResult.error,
+    });
+  }
+
+  const kindDetail: KindDetail = {
+    kind,
+    tableName,
+    ...(meta.unit ? { unit: meta.unit } : {}),
+    ...(meta.description ? { description: meta.description } : {}),
+    attributeKeys,
+    usage: KIND_USAGE[kind],
+  };
+
+  if (input.sampleValues && Object.keys(attributeKeys).length > 0) {
+    const valuesResult = await sampleAttributeValues({
+      clickhouseClient,
+      databaseName,
+      tableName,
+      connectionId,
+      metricName: input.metricName,
+      attributeKeys,
+      startDate,
+      endDate,
+      signal,
+    });
+    if (valuesResult.ok) {
+      kindDetail.attributeValues = valuesResult.data.values;
+      // sampledKeys / truncatedKeys let the agent tell "key absent
+      // because we never queried it" (truncated) apart from "queried
+      // but empty in the window" (in sampledKeys, missing from values).
+      kindDetail.attributeValuesMeta = valuesResult.data.meta;
+    } else {
+      partialFailure.push({
+        stage: 'attributeValues',
+        error: valuesResult.error,
+      });
+    }
+  }
+
+  const kindDetails: KindDetail[] = [kindDetail];
+
+  // Heuristic "no data in this kind" hint: when neither attribute keys,
+  // unit, nor description came back, the (metric, kind) pair likely has
+  // no data in the requested window. The agent's recourse is to widen
+  // startTime/endTime or double-check the kind via clickstack_list_metrics.
+  // Suppressed when any discovery stage failed — an empty attributeKeys
+  // can't be trusted as "no data" then.
+  const noSignal =
+    partialFailure.length === 0 &&
+    Object.keys(attributeKeys).length === 0 &&
+    !meta.unit &&
+    !meta.description;
+
+  const queryExample = `clickstack_timeseries({ sourceId: "${input.sourceId}", select: [{ aggFn: ${
+    kind === 'sum'
+      ? '"increase"'
+      : kind === 'histogram'
+        ? '"quantile", level: 0.95'
+        : '"avg"'
+  }, metricType: "${kind}", metricName: "${input.metricName}" }] })`;
+
+  const responseObj: Record<string, unknown> = {
+    metricName: input.metricName,
+    kinds: kindDetails,
+    ...(partialFailure.length > 0 && {
+      partialFailure,
+      hint:
+        'Some discovery queries failed — the fields above may be incomplete. ' +
+        'Retry the call; if the failure persists, narrow startTime/endTime or set sampleValues:false.',
+    }),
+    ...(noSignal && {
+      hint:
+        `No data found for MetricName "${input.metricName}" with kind "${kind}" ` +
+        `between ${startDate.toISOString()} and ${endDate.toISOString()}. ` +
+        'Try widening startTime/endTime, or call clickstack_list_metrics to ' +
+        'confirm the metric name + kind combination exists.',
+    }),
+    nextSteps: {
+      query: `Example: ${queryExample}`,
+    },
+  };
+
+  const { data: trimmed, isTrimmed } = trimToolResponse(responseObj);
+  return {
+    content: [
+      {
+        type: 'text' as const,
+        text: JSON.stringify(
+          isTrimmed
+            ? {
+                ...trimmed,
+                note: 'Result was trimmed for context size. Set sampleValues:false or narrow startTime/endTime.',
+              }
+            : trimmed,
+          null,
+          2,
+        ),
+      },
+    ],
+  };
+}
+
+// ─── Tool registration ───────────────────────────────────────────────────────
+
+export function registerDescribeMetric(
+  server: McpServer,
+  context: McpContext,
+): void {
+  const { teamId } = context;
+
+  server.registerTool(
+    'clickstack_describe_metric',
+    {
+      title: 'Describe Metric',
+      description:
+        'DRILL-DOWN: Use after clickstack_list_metrics (or after a clickstack_describe_source ' +
+        'sample) to get attribute keys, sampled values, unit, and description for a ' +
+        'specific (metricName, kind) pair. Attribute keys vary per metric — not per source — ' +
+        "so always call this before clickstack_timeseries / clickstack_table for any metric you've never queried.\n\n" +
+        'REQUIRES `kind` — pass the gauge/sum/histogram value emitted alongside the metric name by ' +
+        'clickstack_list_metrics or clickstack_describe_source. A metric name can legitimately ' +
+        'live in more than one kind (e.g. "container.cpu.usage" appears in both gauge and sum); ' +
+        'call this tool once per kind you care about.\n\n' +
+        'attributeValuesMeta on each kind reports sampledKeys (queried for values) and ' +
+        'truncatedKeys (skipped by the per-call sampling cap) — a key in truncatedKeys was ' +
+        'never queried, so query it directly if you need its values.\n\n' +
+        'Workflow: clickstack_list_sources → clickstack_list_metrics → ' +
+        'clickstack_describe_metric → clickstack_timeseries|clickstack_table.',
+      inputSchema: describeMetricSchema,
+    },
+    withToolTracing('clickstack_describe_metric', context, async rawInput => {
+      // Re-parse explicitly: the MCP SDK callback signature widens
+      // optional-field types into `unknown`, but the parser produces
+      // the typed shape we need for downstream calls.
+      const input = describeMetricSchema.parse(rawInput);
+      const controller = new AbortController();
+      // Hoist the timer handle so the finally block can cancel it on the
+      // success path — otherwise a stale controller.abort() fires
+      // DESCRIBE_TIMEOUT_MS after every successful call and the
+      // setTimeout closure stays pinned for the same duration.
+      let timeoutId: ReturnType<typeof setTimeout> | undefined;
+      const timeoutPromise = new Promise<never>((_, reject) => {
+        timeoutId = setTimeout(() => {
+          controller.abort();
+          reject(new Error('DESCRIBE_METRIC_TIMEOUT'));
+        }, DESCRIBE_TIMEOUT_MS);
+      });
+      try {
+        return await Promise.race([
+          describeMetricImpl(teamId.toString(), input, controller.signal),
+          timeoutPromise,
+        ]);
+      } catch (e) {
+        if (e instanceof Error && e.message === 'DESCRIBE_METRIC_TIMEOUT') {
+          logger.warn(
+            { teamId, sourceId: input.sourceId, metricName: input.metricName },
+            'clickstack_describe_metric timed out',
+          );
+          return {
+            isError: true as const,
+            content: [
+              {
+                type: 'text' as const,
+                text:
+                  'Discovery timed out. The metric table may be under load or the ' +
+                  'attribute set may be very high-cardinality. Try narrowing ' +
+                  'startTime/endTime or setting sampleValues:false to skip the ' +
+                  'value-sampling stage.',
+              },
+            ],
+          };
+        }
+        throw e;
+      } finally {
+        if (timeoutId !== undefined) clearTimeout(timeoutId);
+      }
+    }),
+  );
+}
diff --git a/packages/api/src/mcp/tools/sources/describeSource.ts b/packages/api/src/mcp/tools/sources/describeSource.ts
index 5674c7e2cf..757cccf6bd 100644
--- a/packages/api/src/mcp/tools/sources/describeSource.ts
+++ b/packages/api/src/mcp/tools/sources/describeSource.ts
@@ -1,11 +1,14 @@
 import {
+  chSql,
+  concatChSql,
   convertCHDataTypeToJSType,
   filterColumnMetaByType,
   JSDataType,
+  tableExpr,
 } from '@hyperdx/common-utils/dist/clickhouse';
 import { ClickhouseClient } from '@hyperdx/common-utils/dist/clickhouse/node';
 import { getMetadata } from '@hyperdx/common-utils/dist/core/metadata';
-import { SourceKind } from '@hyperdx/common-utils/dist/types';
+import { type MetricTable, SourceKind } from '@hyperdx/common-utils/dist/types';
 import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
 import { z } from 'zod';
 
@@ -16,6 +19,11 @@ import { trimToolResponse } from '@/utils/trimToolResponse';
 
 import { withToolTracing } from '../../utils/tracing';
 import type { McpContext } from '../types';
+import {
+  QUERYABLE_METRIC_KINDS,
+  type QueryableMetricKind,
+  sanitizeMetricTables,
+} from './metricKinds';
 
 // How far back to look when querying the rollup tables for value samples.
 const VALUE_SAMPLE_LOOKBACK_MS = 24 * 60 * 60 * 1000; // 24 hours
@@ -28,6 +36,200 @@ const MAX_LC_VALUES = 20;
 const MAX_MAP_KEY_VALUES = 5;
 const MAX_MAP_KEYS_TO_SAMPLE = 10;
 
+// Max MetricName values returned per metric kind by the starter sample.
+// clickstack_list_metrics provides paginated discovery beyond this cap.
+const MAX_METRIC_NAMES_PER_KIND = 20;
+
+/**
+ * Pick the representative metric table to use as the starting point for
+ * schema/attribute discovery on a metric source. Prefers gauge → sum →
+ * histogram from the source's populated metricTables map. Returns the
+ * ClickHouse table name, or undefined when no queryable metric table is
+ * populated.
+ */
+function pickRepresentativeMetricTable(
+  metricTables: MetricTable,
+): { kind: QueryableMetricKind; tableName: string } | undefined {
+  for (const kind of QUERYABLE_METRIC_KINDS) {
+    const tableName = metricTables[kind];
+    if (tableName) {
+      return { kind, tableName };
+    }
+  }
+  return undefined;
+}
+
+type MetricNameSample = {
+  name: string;
+  unit?: string;
+  description?: string;
+};
+
+/**
+ * Sample distinct MetricName values for a single metric kind. Optionally
+ * enriches each name with MetricUnit / MetricDescription when those
+ * columns are present on the table (the OTel Collector default schema
+ * includes them; custom schemas may not).
+ */
+async function sampleMetricNamesForKind({
+  metadata,
+  clickhouseClient,
+  databaseName,
+  tableName,
+  connectionId,
+  dateRange,
+  timestampValueExpression,
+  signal,
+  cachedColumns,
+}: {
+  metadata: ReturnType<typeof getMetadata>;
+  clickhouseClient: ClickhouseClient;
+  databaseName: string;
+  tableName: string;
+  connectionId: string;
+  dateRange: [Date, Date];
+  timestampValueExpression: string;
+  signal: AbortSignal;
+  cachedColumns?: { name: string }[];
+}): Promise<MetricNameSample[]> {
+  // Defensive column presence check for MetricUnit / MetricDescription.
+  const kindColumns =
+    cachedColumns ??
+    (await metadata.getColumns({ databaseName, tableName, connectionId }));
+  const columnNames = new Set(kindColumns.map(c => c.name));
+  const hasUnit = columnNames.has('MetricUnit');
+  const hasDescription = columnNames.has('MetricDescription');
+
+  // First fetch the distinct metric names; this is the only step that
+  // strictly needs to succeed for the kind to appear in the response.
+  // Pass timestampValueExpression so the no-rollup fallback path scopes
+  // its scan to dateRange instead of going unbounded against the raw
+  // metric table on cold cache.
+  const nameResults = await metadata.getAllKeyValues({
+    databaseName,
+    tableName,
+    keyExpressions: ['MetricName'],
+    maxValuesPerKey: MAX_METRIC_NAMES_PER_KIND,
+    connectionId,
+    dateRange,
+    timestampValueExpression,
+    signal,
+  });
+  const names = nameResults[0]?.value ?? [];
+  if (names.length === 0) return [];
+
+  // Best-effort enrichment with unit + description. One small query
+  // returns one row per metric name with the most-recent unit / desc.
+  let enrichments: Record<string, { unit?: string; description?: string }> = {};
+  if ((hasUnit || hasDescription) && !signal.aborted) {
+    try {
+      enrichments = await fetchMetricNameEnrichments({
+        clickhouseClient,
+        databaseName,
+        tableName,
+        connectionId,
+        names,
+        dateRange,
+        hasUnit,
+        hasDescription,
+        signal,
+      });
+    } catch (e) {
+      logger.warn(
+        { databaseName, tableName, error: e },
+        'Failed to enrich metric names with unit/description',
+      );
+    }
+  }
+
+  return names.map(name => {
+    const enrichment = enrichments[name] ?? {};
+    const sample: MetricNameSample = { name };
+    if (enrichment.unit) sample.unit = enrichment.unit;
+    if (enrichment.description) sample.description = enrichment.description;
+    return sample;
+  });
+}
+
+/**
+ * Fetch MetricUnit and MetricDescription for a batch of metric names.
+ * Uses `anyLast` so the most-recent value wins when a metric has changed
+ * unit/description over time.
+ */
+async function fetchMetricNameEnrichments({
+  clickhouseClient,
+  databaseName,
+  tableName,
+  connectionId,
+  names,
+  dateRange,
+  hasUnit,
+  hasDescription,
+  signal,
+}: {
+  clickhouseClient: ClickhouseClient;
+  databaseName: string;
+  tableName: string;
+  connectionId: string;
+  names: string[];
+  dateRange: [Date, Date];
+  hasUnit: boolean;
+  hasDescription: boolean;
+  signal: AbortSignal;
+}): Promise<Record<string, { unit?: string; description?: string }>> {
+  // Build the projection fragments via the parameterised chSql DSL so
+  // identifiers are quoted and the unit/description columns only appear
+  // when present on the source table.
+  const projections = [
+    chSql`MetricName`,
+    ...(hasUnit
+      ? [chSql`anyLast(${{ Identifier: 'MetricUnit' }}) AS MetricUnit`]
+      : []),
+    ...(hasDescription
+      ? [
+          chSql`anyLast(${{ Identifier: 'MetricDescription' }}) AS MetricDescription`,
+        ]
+      : []),
+  ];
+  const namePlaceholders = concatChSql(
+    ',',
+    names.map(name => chSql`${{ String: name }}`),
+  );
+  const sql = chSql`
+    SELECT ${concatChSql(', ', projections)}
+    FROM ${tableExpr({ database: databaseName, table: tableName })}
+    WHERE MetricName IN (${namePlaceholders})
+      AND TimeUnix >= fromUnixTimestamp64Milli(${{ Int64: dateRange[0].getTime() }})
+      AND TimeUnix <= fromUnixTimestamp64Milli(${{ Int64: dateRange[1].getTime() }})
+    GROUP BY MetricName
+  `;
+
+  type EnrichmentRow = {
+    MetricName: string;
+    MetricUnit?: string;
+    MetricDescription?: string;
+  };
+
+  const response = await clickhouseClient.query<'JSON'>({
+    query: sql.sql,
+    query_params: sql.params,
+    format: 'JSON',
+    connectionId,
+    abort_signal: signal,
+  });
+  const result = (await response.json()) as { data: EnrichmentRow[] };
+
+  const enrichments: Record<string, { unit?: string; description?: string }> =
+    {};
+  for (const row of result.data) {
+    enrichments[row.MetricName] = {
+      ...(row.MetricUnit ? { unit: row.MetricUnit } : {}),
+      ...(row.MetricDescription ? { description: row.MetricDescription } : {}),
+    };
+  }
+  return enrichments;
+}
+
 /**
  * Core schema-discovery logic. Extracted so the caller can wrap it in
  * Promise.race for wall-clock timeout enforcement.
@@ -72,6 +274,9 @@ async function describeSourceSchema(
   }
 
   // Key columns by source kind
+  let representativeMetric:
+    | { kind: QueryableMetricKind; tableName: string }
+    | undefined;
   if (source.kind === SourceKind.Trace) {
     meta.keyColumns = {
       spanName: source.spanNameExpression,
@@ -90,11 +295,29 @@ async function describeSourceSchema(
       traceId: source.traceIdExpression,
     };
   } else if (source.kind === SourceKind.Metric) {
-    meta.metricTables = source.metricTables;
+    // Filter out implementation-detail keys (e.g. a stray Mongoose `_id`
+    // on the metricTables subdoc) so the agent only sees valid metric
+    // kinds.
+    const tables = sanitizeMetricTables(
+      source.metricTables as Record<string, unknown> | undefined,
+    );
+    if (tables) meta.metricTables = tables;
+    representativeMetric = pickRepresentativeMetricTable(source.metricTables);
+    if (representativeMetric) {
+      meta.discoveryMetricKind = representativeMetric.kind;
+    }
   }
 
-  // For sources without a table (e.g. metric sources), return early
-  if (!source.from.tableName) {
+  // Resolve the table name we'll use for column / map-key / value
+  // discovery. For non-metric sources this is just source.from.tableName.
+  // For metric sources we use the representative metric table picked
+  // above (gauge → sum → histogram).
+  const discoveryTableName =
+    source.from.tableName || representativeMetric?.tableName || '';
+
+  // Only early-return when there is truly no table to discover schema
+  // against (e.g. a metric source with no populated metric tables).
+  if (!discoveryTableName) {
     return {
       content: [
         {
@@ -103,7 +326,7 @@ async function describeSourceSchema(
             {
               source: meta,
               nextSteps: {
-                query: `Use clickstack_timeseries, clickstack_table, or clickstack_search with sourceId "${sourceId}" and the metric tables above.`,
+                query: `Use clickstack_timeseries, clickstack_table, or clickstack_search with sourceId "${sourceId}".`,
               },
             },
             null,
@@ -137,7 +360,8 @@ async function describeSourceSchema(
     password: connection.password,
   });
   const metadata = getMetadata(clickhouseClient);
-  const { databaseName, tableName } = source.from;
+  const databaseName = source.from.databaseName;
+  const tableName = discoveryTableName;
   const connectionId = source.connection.toString();
 
   // Track which sampling stages were skipped due to timeout
@@ -170,6 +394,12 @@ async function describeSourceSchema(
   }));
 
   // ── 2. Map attribute keys ─────────────────────────────────────────────
+  // timestampValueExpression is threaded into getMapKeys / getAllKeyValues /
+  // sampleMetricNamesForKind below so the no-rollup fallback path (i.e.
+  // metric sources, which don't have metadataMaterializedViews configured)
+  // can scope its scan to dateRange instead of going unbounded against
+  // the raw metric table on cold cache.
+  const timestampValueExpression = source.timestampValueExpression;
   const mapColumns = filterColumnMetaByType(columns, [JSDataType.Map]);
   const mapKeysResults: Record<string, string[]> = {};
 
@@ -185,6 +415,7 @@ async function describeSourceSchema(
             connectionId,
             metadataMVs,
             dateRange,
+            timestampValueExpression,
             signal,
           });
           mapKeysResults[col.name] = keys;
@@ -226,6 +457,7 @@ async function describeSourceSchema(
         connectionId,
         metadataMVs,
         dateRange,
+        timestampValueExpression,
         signal,
       });
       for (const { key, value } of results) {
@@ -265,6 +497,7 @@ async function describeSourceSchema(
         connectionId,
         metadataMVs,
         dateRange,
+        timestampValueExpression,
         signal,
       });
       for (const { key, value } of results) {
@@ -287,6 +520,55 @@ async function describeSourceSchema(
     skippedStages.push('mapAttributeValues');
   }
 
+  // ── 5. Metric name + unit + description sampling ──────────────────────
+  // For metric sources, sample distinct MetricName values per queryable
+  // kind so the agent has a starter list without needing a follow-up call
+  // to clickstack_list_metrics for the common case (<= 20 metrics/kind).
+  // Defensively check for MetricUnit / MetricDescription columns: they
+  // exist on the standard OTel Collector schema but a custom metric table
+  // may not declare them.
+  if (source.kind === SourceKind.Metric && !signal.aborted) {
+    const metricNames: Record<string, MetricNameSample[]> = {};
+    await Promise.all(
+      QUERYABLE_METRIC_KINDS.map(async kind => {
+        const kindTableName = source.metricTables[kind];
+        if (!kindTableName) return;
+        try {
+          const samples = await sampleMetricNamesForKind({
+            metadata,
+            clickhouseClient,
+            databaseName,
+            tableName: kindTableName,
+            connectionId,
+            dateRange,
+            timestampValueExpression,
+            signal,
+            // Reuse representative columns when the kind matches the
+            // representative table to avoid a second getColumns round-trip.
+            cachedColumns:
+              representativeMetric?.tableName === kindTableName
+                ? columns
+                : undefined,
+          });
+          if (samples.length > 0) {
+            metricNames[kind] = samples;
+          }
+        } catch (e) {
+          logger.warn(
+            { sourceId, kind, error: e },
+            'Failed to sample metric names for kind',
+          );
+        }
+      }),
+    );
+    if (signal.aborted && Object.keys(metricNames).length === 0) {
+      skippedStages.push('metricNames');
+    }
+    if (Object.keys(metricNames).length > 0) {
+      meta.metricNames = metricNames;
+    }
+  }
+
   // Flag partial results so the LLM knows value samples may be incomplete
   if (skippedStages.length > 0) {
     meta.partial = true;
@@ -300,6 +582,14 @@ async function describeSourceSchema(
       : 'These are the REAL values in your data — use them in filters instead of guessing. ' +
         'Example: where: "SeverityText:error" (if \'error\' appears in the sampled values above).';
 
+  const isMetricSource = source.kind === SourceKind.Metric;
+  const queryNextStep = isMetricSource
+    ? `Use clickstack_timeseries or clickstack_table with sourceId "${sourceId}" and metricType/metricName from above.`
+    : `Use clickstack_timeseries, clickstack_table, or clickstack_search with sourceId "${sourceId}" and the columns/attributes above.`;
+  const discoveryNextStep = isMetricSource
+    ? `For more metric names than the sample above, call clickstack_list_metrics with sourceId "${sourceId}". For per-metric attribute keys + sampled values, call clickstack_describe_metric with sourceId and metricName.`
+    : undefined;
+
   const { data: output, isTrimmed } = trimToolResponse({
     source: meta,
     usage: {
@@ -308,11 +598,17 @@ async function describeSourceSchema(
       mapAttributes:
         "Use bracket syntax: SpanAttributes['http.method'], ResourceAttributes['service.name']",
       lowCardinalityValues: lcValuesHint,
+      ...(isMetricSource && {
+        metricNames:
+          'Each entry maps a metric kind (gauge/sum/histogram) to a sample of metric names ' +
+          'available on that table. Pass metricType + metricName on each select item.',
+      }),
     },
     nextSteps: {
-      query: `Use clickstack_timeseries, clickstack_table, or clickstack_search with sourceId "${sourceId}" and the columns/attributes above.`,
+      query: queryNextStep,
       mapAttributeAccess:
         "Use bracket syntax for map columns: ResourceAttributes['service.name'], SpanAttributes['http.method']",
+      ...(discoveryNextStep && { discovery: discoveryNextStep }),
     },
   });
 
@@ -372,9 +668,14 @@ export function registerDescribeSource(
         const controller = new AbortController();
 
         // Promise.race enforces wall-clock timeout regardless of whether
-        // internal ClickHouse calls honour the AbortSignal.
+        // internal ClickHouse calls honour the AbortSignal. Hoist the
+        // timer handle so the finally block can cancel it on the success
+        // path — otherwise a stale controller.abort() fires
+        // DESCRIBE_TIMEOUT_MS after every successful call and the
+        // setTimeout closure stays pinned for the same duration.
+        let timeoutId: ReturnType<typeof setTimeout> | undefined;
         const timeoutPromise = new Promise<never>((_, reject) => {
-          setTimeout(() => {
+          timeoutId = setTimeout(() => {
             controller.abort();
             reject(new Error('DESCRIBE_TIMEOUT'));
           }, DESCRIBE_TIMEOUT_MS);
@@ -412,6 +713,8 @@ export function registerDescribeSource(
             'Failed to describe source schema',
           );
           throw e;
+        } finally {
+          if (timeoutId !== undefined) clearTimeout(timeoutId);
         }
       },
     ),
diff --git a/packages/api/src/mcp/tools/sources/index.ts b/packages/api/src/mcp/tools/sources/index.ts
index 250e4abd91..5d43d1d455 100644
--- a/packages/api/src/mcp/tools/sources/index.ts
+++ b/packages/api/src/mcp/tools/sources/index.ts
@@ -1,7 +1,9 @@
 import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
 
 import type { McpContext, ToolDefinition } from '../types';
+import { registerDescribeMetric } from './describeMetric';
 import { registerDescribeSource } from './describeSource';
+import { registerListMetrics } from './listMetrics';
 import { registerListSources } from './listSources';
 
 const sourcesTools: ToolDefinition = (
@@ -10,6 +12,8 @@ const sourcesTools: ToolDefinition = (
 ) => {
   registerListSources(server, context);
   registerDescribeSource(server, context);
+  registerListMetrics(server, context);
+  registerDescribeMetric(server, context);
 };
 
 export default sourcesTools;
diff --git a/packages/api/src/mcp/tools/sources/listMetrics.ts b/packages/api/src/mcp/tools/sources/listMetrics.ts
new file mode 100644
index 0000000000..20f8ff8009
--- /dev/null
+++ b/packages/api/src/mcp/tools/sources/listMetrics.ts
@@ -0,0 +1,445 @@
+import {
+  chSql,
+  concatChSql,
+  tableExpr,
+} from '@hyperdx/common-utils/dist/clickhouse';
+import { ClickhouseClient } from '@hyperdx/common-utils/dist/clickhouse/node';
+import { getMetadata } from '@hyperdx/common-utils/dist/core/metadata';
+import { SourceKind } from '@hyperdx/common-utils/dist/types';
+import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
+import { z } from 'zod';
+
+import { getConnectionById } from '@/controllers/connection';
+import { getSource } from '@/controllers/sources';
+import logger from '@/utils/logger';
+
+import { withToolTracing } from '../../utils/tracing';
+import type { McpContext } from '../types';
+import {
+  QUERYABLE_METRIC_KINDS,
+  type QueryableMetricKind,
+} from './metricKinds';
+
+const DEFAULT_LIMIT = 50;
+const MAX_LIMIT = 500;
+const DEFAULT_LOOKBACK_MS = 24 * 60 * 60 * 1000;
+
+// ─── Cursor ──────────────────────────────────────────────────────────────────
+
+export type ListMetricsCursorPayload = {
+  kind: QueryableMetricKind;
+  lastName: string;
+};
+
+/** @internal Exported for testing. */
+export function encodeCursor(payload: ListMetricsCursorPayload): string {
+  return Buffer.from(JSON.stringify(payload), 'utf8').toString('base64');
+}
+
+/** @internal Exported for testing. */
+export function decodeCursor(raw: string): ListMetricsCursorPayload | null {
+  try {
+    const decoded = Buffer.from(raw, 'base64').toString('utf8');
+    const parsed = JSON.parse(decoded);
+    if (
+      typeof parsed === 'object' &&
+      parsed !== null &&
+      typeof parsed.kind === 'string' &&
+      typeof parsed.lastName === 'string' &&
+      (QUERYABLE_METRIC_KINDS as readonly string[]).includes(parsed.kind)
+    ) {
+      return {
+        kind: parsed.kind as QueryableMetricKind,
+        lastName: parsed.lastName,
+      };
+    }
+  } catch {
+    // fall through
+  }
+  return null;
+}
+
+// ─── Schema ──────────────────────────────────────────────────────────────────
+
+const listMetricsSchema = z.object({
+  sourceId: z
+    .string()
+    .describe(
+      'Source ID. Must reference a metric source — get IDs from clickstack_list_sources.',
+    ),
+  kind: z
+    .enum(QUERYABLE_METRIC_KINDS)
+    .optional()
+    .describe(
+      'Optional metric kind filter. Omit to scan every populated kind on the source ' +
+        '(gauge → sum → histogram). Set to narrow results to one kind.',
+    ),
+  namePattern: z
+    .string()
+    .optional()
+    .describe(
+      'Optional ClickHouse ILIKE pattern applied to MetricName server-side. ' +
+        'Use % as the wildcard. Examples: "system.cpu.%", "%duration%", "http.server.%".',
+    ),
+  startTime: z
+    .string()
+    .optional()
+    .describe(
+      'Restrict to metrics with data points after this ISO 8601 timestamp. ' +
+        'Default: 24 hours before endTime (or now).',
+    ),
+  endTime: z
+    .string()
+    .optional()
+    .describe('End of the time window as ISO 8601. Default: now.'),
+  limit: z
+    .number()
+    .int()
+    .min(1)
+    .max(MAX_LIMIT)
+    .optional()
+    .default(DEFAULT_LIMIT)
+    .describe(
+      `Max metrics returned per page. Default ${DEFAULT_LIMIT}, max ${MAX_LIMIT}.`,
+    ),
+  cursor: z
+    .string()
+    .optional()
+    .describe(
+      'Opaque pagination cursor returned by a previous call as `nextCursor`. ' +
+        'Pass it back unchanged to get the next page.',
+    ),
+});
+
+// ─── Helpers ─────────────────────────────────────────────────────────────────
+
+type MetricEntry = {
+  name: string;
+  kind: QueryableMetricKind;
+  unit?: string;
+  description?: string;
+};
+
+function parseTimeRange(
+  startTime?: string,
+  endTime?: string,
+): { error: string } | { startDate: Date; endDate: Date } {
+  const endDate = endTime ? new Date(endTime) : new Date();
+  const startDate = startTime
+    ? new Date(startTime)
+    : new Date(endDate.getTime() - DEFAULT_LOOKBACK_MS);
+  if (isNaN(endDate.getTime()) || isNaN(startDate.getTime())) {
+    return {
+      error: 'Invalid startTime or endTime: must be valid ISO 8601 strings',
+    };
+  }
+  if (startDate >= endDate) {
+    return { error: 'endTime must be greater than startTime' };
+  }
+  return { startDate, endDate };
+}
+
+async function fetchMetricsForKind({
+  clickhouseClient,
+  metadata,
+  kind,
+  databaseName,
+  tableName,
+  connectionId,
+  startDate,
+  endDate,
+  namePattern,
+  afterName,
+  limit,
+}: {
+  clickhouseClient: ClickhouseClient;
+  metadata: ReturnType<typeof getMetadata>;
+  kind: QueryableMetricKind;
+  databaseName: string;
+  tableName: string;
+  connectionId: string;
+  startDate: Date;
+  endDate: Date;
+  namePattern: string | undefined;
+  afterName: string | undefined;
+  limit: number;
+}): Promise<MetricEntry[]> {
+  // Defensive column-presence check so we don't reference MetricUnit /
+  // MetricDescription on non-OTel-default schemas.
+  const columns = await metadata.getColumns({
+    databaseName,
+    tableName,
+    connectionId,
+  });
+  const columnNames = new Set(columns.map(c => c.name));
+  const hasUnit = columnNames.has('MetricUnit');
+  const hasDescription = columnNames.has('MetricDescription');
+
+  const projections = [
+    chSql`MetricName`,
+    ...(hasUnit
+      ? [chSql`anyLast(${{ Identifier: 'MetricUnit' }}) AS MetricUnit`]
+      : []),
+    ...(hasDescription
+      ? [
+          chSql`anyLast(${{ Identifier: 'MetricDescription' }}) AS MetricDescription`,
+        ]
+      : []),
+  ];
+
+  const whereParts = [
+    chSql`TimeUnix >= fromUnixTimestamp64Milli(${{ Int64: startDate.getTime() }})`,
+    chSql`TimeUnix <= fromUnixTimestamp64Milli(${{ Int64: endDate.getTime() }})`,
+    ...(afterName !== undefined
+      ? [chSql`MetricName > ${{ String: afterName }}`]
+      : []),
+    ...(namePattern
+      ? [chSql`MetricName ILIKE ${{ String: namePattern }}`]
+      : []),
+  ];
+
+  const sql = chSql`
+    SELECT ${concatChSql(', ', projections)}
+    FROM ${tableExpr({ database: databaseName, table: tableName })}
+    WHERE ${concatChSql(' AND ', whereParts)}
+    GROUP BY MetricName
+    ORDER BY MetricName ASC
+    LIMIT ${{ Int32: limit }}
+  `;
+
+  type Row = {
+    MetricName: string;
+    MetricUnit?: string;
+    MetricDescription?: string;
+  };
+
+  const response = await clickhouseClient.query<'JSON'>({
+    query: sql.sql,
+    query_params: sql.params,
+    format: 'JSON',
+    connectionId,
+  });
+  const result = (await response.json()) as { data: Row[] };
+  return result.data.map(row => {
+    const entry: MetricEntry = { name: row.MetricName, kind };
+    if (row.MetricUnit) entry.unit = row.MetricUnit;
+    if (row.MetricDescription) entry.description = row.MetricDescription;
+    return entry;
+  });
+}
+
+// ─── Tool registration ───────────────────────────────────────────────────────
+
+export function registerListMetrics(
+  server: McpServer,
+  context: McpContext,
+): void {
+  const { teamId } = context;
+
+  server.registerTool(
+    'clickstack_list_metrics',
+    {
+      title: 'List Metric Names',
+      description:
+        'DISCOVERY: Use this after clickstack_describe_source when you need more metric ' +
+        'names than the per-kind sample shows, or when you want to narrow by ' +
+        'kind / name pattern / time window. ' +
+        'Returns paginated metric names per kind (gauge/sum/histogram) ' +
+        'with optional unit and description (when the OTel-default columns are present). ' +
+        'Pass the returned `nextCursor` back unchanged to fetch the next page.\n\n' +
+        'Workflow: clickstack_list_sources → clickstack_describe_source → ' +
+        'clickstack_list_metrics → clickstack_describe_metric → ' +
+        'clickstack_timeseries|clickstack_table.',
+      inputSchema: listMetricsSchema,
+    },
+    withToolTracing('clickstack_list_metrics', context, async rawInput => {
+      // Re-parse explicitly: the MCP SDK callback signature widens
+      // optional-field types into `unknown`, but the parser produces
+      // the typed shape we need for downstream calls.
+      const input: z.infer<typeof listMetricsSchema> =
+        listMetricsSchema.parse(rawInput);
+      const source = await getSource(teamId.toString(), input.sourceId);
+      if (!source) {
+        return {
+          isError: true,
+          content: [
+            {
+              type: 'text' as const,
+              text: `Source "${input.sourceId}" not found. Call clickstack_list_sources to see available source IDs.`,
+            },
+          ],
+        };
+      }
+      if (source.kind !== SourceKind.Metric) {
+        return {
+          isError: true,
+          content: [
+            {
+              type: 'text' as const,
+              text: `Source "${input.sourceId}" is a "${source.kind}" source, not a metric source. clickstack_list_metrics only works on metric sources — call clickstack_list_sources to find one whose kind is "metric".`,
+            },
+          ],
+        };
+      }
+
+      const timeRange = parseTimeRange(input.startTime, input.endTime);
+      if ('error' in timeRange) {
+        return {
+          isError: true,
+          content: [{ type: 'text' as const, text: timeRange.error }],
+        };
+      }
+      const { startDate, endDate } = timeRange;
+
+      // Decode cursor; reject silently and start over if malformed so a
+      // truncated or tampered cursor does not surface internals.
+      const cursor = input.cursor ? decodeCursor(input.cursor) : null;
+      if (input.cursor && !cursor) {
+        return {
+          isError: true,
+          content: [
+            {
+              type: 'text' as const,
+              text: 'Invalid cursor. Omit cursor to start over, or pass the exact `nextCursor` value returned by a previous call.',
+            },
+          ],
+        };
+      }
+
+      // Resolve which kinds to scan, in order. When a cursor is set,
+      // skip kinds before the cursor's kind (already returned) and start
+      // the cursor's kind at the lastName-exclusive position.
+      const requestedKinds: QueryableMetricKind[] = input.kind
+        ? [input.kind]
+        : QUERYABLE_METRIC_KINDS.filter(k => Boolean(source.metricTables[k]));
+      const startKindIdx = cursor ? requestedKinds.indexOf(cursor.kind) : 0;
+      if (startKindIdx < 0) {
+        // Cursor points at a kind that's not in scope for this call —
+        // safer to error than silently skip.
+        return {
+          isError: true,
+          content: [
+            {
+              type: 'text' as const,
+              text: `Cursor references kind "${cursor!.kind}" but that kind is not in scope for this call. Drop the kind filter or pass a matching cursor.`,
+            },
+          ],
+        };
+      }
+
+      const connection = await getConnectionById(
+        teamId.toString(),
+        source.connection.toString(),
+        true,
+      );
+      if (!connection) {
+        return {
+          isError: true,
+          content: [
+            {
+              type: 'text' as const,
+              text: `Connection not found for source "${input.sourceId}".`,
+            },
+          ],
+        };
+      }
+
+      const clickhouseClient = new ClickhouseClient({
+        host: connection.host,
+        username: connection.username,
+        password: connection.password,
+      });
+      const metadata = getMetadata(clickhouseClient);
+
+      const limit = input.limit ?? DEFAULT_LIMIT;
+      const databaseName = source.from.databaseName;
+
+      const metrics: MetricEntry[] = [];
+      // Per-kind fetch failures, surfaced on the response so the agent
+      // can distinguish "kind genuinely has no metrics" from "the fetch
+      // for that kind failed" — the two need different recovery steps.
+      const partialFailure: { kind: string; error: string }[] = [];
+      let nextCursor: string | undefined;
+      for (let i = startKindIdx; i < requestedKinds.length; i++) {
+        const kind = requestedKinds[i];
+        const tableName = source.metricTables[kind];
+        if (!tableName) continue;
+        const afterName =
+          cursor && cursor.kind === kind && i === startKindIdx
+            ? cursor.lastName
+            : undefined;
+        const remaining = limit - metrics.length;
+        if (remaining <= 0) break;
+        // Fetch one extra row so we can detect more-data-available.
+        let kindMetrics: MetricEntry[];
+        try {
+          kindMetrics = await fetchMetricsForKind({
+            clickhouseClient,
+            metadata,
+            kind,
+            databaseName,
+            tableName,
+            connectionId: source.connection.toString(),
+            startDate,
+            endDate,
+            namePattern: input.namePattern,
+            afterName,
+            limit: remaining + 1,
+          });
+        } catch (e) {
+          const message = e instanceof Error ? e.message : String(e);
+          logger.warn(
+            { sourceId: input.sourceId, kind, error: message },
+            'Failed to list metrics for kind',
+          );
+          partialFailure.push({
+            kind,
+            error: message.replace(/\s+/g, ' ').trim().slice(0, 200),
+          });
+          continue;
+        }
+        if (kindMetrics.length > remaining) {
+          // We hit the cap for this kind; emit cursor pointing at the
+          // last returned name and stop iterating further kinds.
+          const truncated = kindMetrics.slice(0, remaining);
+          metrics.push(...truncated);
+          nextCursor = encodeCursor({
+            kind,
+            lastName: truncated[truncated.length - 1].name,
+          });
+          break;
+        }
+        metrics.push(...kindMetrics);
+      }
+
+      const responseObj: Record<string, unknown> = {
+        metrics,
+        ...(nextCursor && { nextCursor }),
+        ...(partialFailure.length > 0 && {
+          partialFailure,
+          hint:
+            'Listing failed for some metric kinds — results may be incomplete. ' +
+            'Retry the call; if the failure persists, narrow startTime/endTime or pin a single `kind`.',
+        }),
+        ...(metrics.length === 0 &&
+          partialFailure.length === 0 && {
+            hint:
+              'No metrics matched. Try widening the time window (startTime/endTime), ' +
+              'removing the namePattern filter, or omitting `kind` to scan every populated metric table.',
+          }),
+        usage:
+          'Pass `metricType` + `metricName` from each entry to clickstack_timeseries / clickstack_table. ' +
+          'For per-metric attribute keys and sampled values, call clickstack_describe_metric.',
+      };
+
+      return {
+        content: [
+          {
+            type: 'text' as const,
+            text: JSON.stringify(responseObj, null, 2),
+          },
+        ],
+      };
+    }),
+  );
+}
diff --git a/packages/api/src/mcp/tools/sources/listSources.ts b/packages/api/src/mcp/tools/sources/listSources.ts
index 0c1e614689..a69d62ae87 100644
--- a/packages/api/src/mcp/tools/sources/listSources.ts
+++ b/packages/api/src/mcp/tools/sources/listSources.ts
@@ -7,6 +7,7 @@ import { getSources } from '@/controllers/sources';
 
 import { withToolTracing } from '../../utils/tracing';
 import type { McpContext } from '../types';
+import { sanitizeMetricTables } from './metricKinds';
 
 export function registerListSources(
   server: McpServer,
@@ -72,7 +73,13 @@ export function registerListSources(
             traceId: s.traceIdExpression,
           };
         } else if (s.kind === SourceKind.Metric) {
-          meta.metricTables = s.metricTables;
+          // Filter out implementation-detail keys (e.g. a stray Mongoose
+          // `_id` on the metricTables subdoc) so the agent only sees
+          // valid metric kinds.
+          const tables = sanitizeMetricTables(
+            s.metricTables as Record<string, unknown> | undefined,
+          );
+          if (tables) meta.metricTables = tables;
         }
 
         return meta;
diff --git a/packages/api/src/mcp/tools/sources/metricKinds.ts b/packages/api/src/mcp/tools/sources/metricKinds.ts
new file mode 100644
index 0000000000..bdaac920ba
--- /dev/null
+++ b/packages/api/src/mcp/tools/sources/metricKinds.ts
@@ -0,0 +1,75 @@
+import { MetricsDataType } from '@hyperdx/common-utils/dist/types';
+
+/**
+ * Metric kinds the query renderer can translate today. Mirrors the three
+ * MetricsDataType members that `translateMetricChartConfig` in
+ * common-utils branches over; `summary` and `"exponential histogram"`
+ * are intentionally excluded because the renderer throws on them.
+ *
+ * Declared as plain string literals (not MetricsDataType enum members)
+ * so `z.enum(...)` narrows correctly at the MCP SDK callback boundary —
+ * referencing the enum here pessimises Zod's inference to `unknown` on
+ * every optional field of the surrounding tool input schema, which then
+ * cascades into "not assignable to type string" errors at every handler
+ * call site. The compile-time assertion below guarantees the literals
+ * stay in sync with the enum so adding a new queryable kind cannot be
+ * done in only one place.
+ *
+ * Imported by every metric-aware MCP tool (clickstack_describe_source,
+ * clickstack_list_metrics, clickstack_describe_metric) and by the
+ * clickstack_timeseries / clickstack_table select-item schema.
+ */
+export const QUERYABLE_METRIC_KINDS = ['gauge', 'sum', 'histogram'] as const;
+
+export type QueryableMetricKind = (typeof QUERYABLE_METRIC_KINDS)[number];
+
+// Compile-time assertion that the string literals above still match the
+// MetricsDataType enum members. If a future MetricsDataType rename
+// breaks this list, this line fails to type-check.
+const _assertKindsMatchEnum: readonly QueryableMetricKind[] = [
+  MetricsDataType.Gauge,
+  MetricsDataType.Sum,
+  MetricsDataType.Histogram,
+];
+void _assertKindsMatchEnum;
+
+/**
+ * Allowed kind keys on the `metricTables` map when serialising a metric
+ * source into an MCP response. Includes the non-queryable kinds
+ * (`summary`, `exponential histogram`) because the model schema declares
+ * tables for them even though the query renderer cannot yet translate
+ * them.
+ */
+const ALLOWED_METRIC_TABLE_KINDS: readonly string[] =
+  Object.values(MetricsDataType);
+
+/**
+ * Filter source.metricTables to the known kind keys before emitting it
+ * in a clickstack_list_sources / clickstack_describe_source response.
+ *
+ * Belt-and-braces defense: even after the model schema declares
+ * `_id: false` on the metricTables subdoc, existing documents persisted
+ * before the schema fix may still carry a stray `_id` value that
+ * Mongoose serialises alongside the legitimate kind keys. Filtering at
+ * the response boundary keeps the agent-facing payload free of
+ * implementation-detail keys.
+ *
+ * Accepts both plain objects and Mongoose subdocuments — we look up
+ * each allowed kind via property access rather than enumerating keys,
+ * because Mongoose subdoc instances expose field values through getters
+ * (not as own enumerable properties).
+ */
+export function sanitizeMetricTables(
+  metricTables: Record<string, unknown> | undefined | null,
+): Record<string, string> | undefined {
+  if (!metricTables) return undefined;
+  const source = metricTables as Record<string, unknown>;
+  const out: Record<string, string> = {};
+  for (const kind of ALLOWED_METRIC_TABLE_KINDS) {
+    const value = source[kind];
+    if (typeof value === 'string' && value.length > 0) {
+      out[kind] = value;
+    }
+  }
+  return Object.keys(out).length > 0 ? out : undefined;
+}
diff --git a/packages/api/src/models/source.ts b/packages/api/src/models/source.ts
index b7e7d55d33..e570617985 100644
--- a/packages/api/src/models/source.ts
+++ b/packages/api/src/models/source.ts
@@ -237,18 +237,27 @@ export const SessionSource = Source.discriminator<ISessionSource>(
 // --------------------------
 // Metric discriminator
 // --------------------------
+// metricTables is declared as a nested Schema with `_id: false` so the
+// embedded subdoc does not auto-generate an ObjectId. Without this opt-out
+// Mongoose adds an `_id` field that leaks into MCP responses alongside
+// the queryable kind keys (gauge/sum/histogram/...).
+const MetricTablesSchema = new Schema(
+  {
+    [MetricsDataType.Gauge]: String,
+    [MetricsDataType.Histogram]: String,
+    [MetricsDataType.Sum]: String,
+    [MetricsDataType.Summary]: String,
+    [MetricsDataType.ExponentialHistogram]: String,
+  },
+  { _id: false },
+);
+
 type IMetricSource = Extract<ISource, { kind: SourceKind.Metric }>;
 export const MetricSource = Source.discriminator<IMetricSource>(
   SourceKind.Metric,
   new Schema<Extract<ISource, { kind: SourceKind.Metric }>>({
     metricTables: {
-      type: {
-        [MetricsDataType.Gauge]: String,
-        [MetricsDataType.Histogram]: String,
-        [MetricsDataType.Sum]: String,
-        [MetricsDataType.Summary]: String,
-        [MetricsDataType.ExponentialHistogram]: String,
-      },
+      type: MetricTablesSchema,
       default: undefined,
     },
     resourceAttributesExpression: String,