diff --git a/.github/workflows/evals.yml b/.github/workflows/evals.yml
new file mode 100644
index 0000000..c4b951a
--- /dev/null
+++ b/.github/workflows/evals.yml
@@ -0,0 +1,87 @@
+# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+# or more contributor license agreements. Licensed under the Elastic License
+# 2.0; you may not use this file except in compliance with the Elastic License
+# 2.0.
+
+name: Evals
+
+on:
+  # Manually trigger a run from the Actions UI (useful for ad-hoc evaluation).
+  workflow_dispatch:
+
+  # Nightly run at 02:00 UTC to catch regressions before the work day starts.
+  schedule:
+    - cron: "0 2 * * *"
+
+  # Run when a PR is labeled with `evals`. Labels require write permission, so
+  # this implicitly limits triggering to maintainers — acceptable because
+  # pull_request_target runs with base-repo secrets.
+  pull_request_target:
+    types: [labeled]
+
+# Cancel any in-progress run for the same ref so a fast push doesn't queue up
+# redundant eval jobs that waste LLM quota.
+concurrency:
+  group: evals-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  evals:
+    name: LLM Eval Suite
+    runs-on: ubuntu-latest
+
+    # For pull_request_target, gate strictly on the evals label so the job
+    # doesn't fire for every other label event.
+    if: |
+      github.event_name == 'workflow_dispatch' ||
+      github.event_name == 'schedule' ||
+      (github.event_name == 'pull_request_target' && github.event.label.name == 'evals')
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          # For pull_request_target, check out the PR head so the eval runs
+          # against the proposed changes, not the base branch.
+          ref: >-
+            ${{
+              github.event_name == 'pull_request_target'
+                && github.event.pull_request.head.sha
+                || github.sha
+            }}
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 22
+          cache: npm
+
+      - name: Install dependencies
+        run: npm ci
+
+      - name: Run evals
+        env:
+          RUN_LLM_EVALS: "1"
+          # Set ANTHROPIC_API_KEY to use Claude Haiku (preferred); fall back to
+          # OPENAI_API_KEY for GPT-4o-mini. Set EVAL_LITELLM_BASE_URL to route
+          # through a LiteLLM proxy instead of the direct OpenAI endpoint.
+          ANTHROPIC_API_KEY: ${{ secrets.EVAL_ANTHROPIC_API_KEY }}
+          OPENAI_API_KEY: ${{ secrets.EVAL_OPENAI_API_KEY }}
+          LITELLM_BASE_URL: ${{ secrets.EVAL_LITELLM_BASE_URL }}
+          # JSON array describing the Elastic cluster the MCP server targets.
+          # Shape: [{"name":"primary","elasticsearchUrl":"...","kibanaUrl":"...","elasticsearchApiKey":"..."}]
+          CLUSTERS_JSON: ${{ secrets.EVAL_CLUSTERS_JSON }}
+        run: |
+          set -o pipefail
+          npm run test:evals 2>&1 | tee eval-output.txt
+
+      - name: Post eval results to job summary
+        if: always()
+        run: |
+          if [ -f eval-output.txt ]; then
+            echo "## Eval results" >> "$GITHUB_STEP_SUMMARY"
+            echo "" >> "$GITHUB_STEP_SUMMARY"
+            cat eval-output.txt >> "$GITHUB_STEP_SUMMARY"
+          else
+            echo "## Eval results" >> "$GITHUB_STEP_SUMMARY"
+            echo "" >> "$GITHUB_STEP_SUMMARY"
+            echo "_No eval output captured._" >> "$GITHUB_STEP_SUMMARY"
+          fi
diff --git a/README.md b/README.md
index b110713..7f92df6 100644
--- a/README.md
+++ b/README.md
@@ -14,7 +14,7 @@ An [MCP App](https://modelcontextprotocol.io/extensions/apps/overview) that brin
 
 ## What This Does
 
-This project provides six interactive security operations tools, each with a rich React-based UI that renders inline when Claude (or another MCP host) calls the tool:
+This project provides seven interactive security operations tools, each with a rich React-based UI that renders inline when Claude (or another MCP host) calls the tool:
 
 | Tool | What It Does |
 |------|-------------|
@@ -24,6 +24,7 @@ This project provides six interactive security operations tools, each with a ric
 | **Detection Rules** | Browse, tune, and manage detection rules with KQL search and noisy rules analysis |
 | **Threat Hunt** | ES\|QL workbench with clickable entities and a D3 investigation graph |
 | **Sample Data** | Generate ECS security events for demos across 4 attack chain scenarios |
+| **SIEM Migration** | Migrate detection rules from Splunk to Elastic Security — upload SPL, AI-translate, review per-rule diff, fix resources, and install |
 
 See [docs/features.md](docs/features.md) for a full breakdown of each tool's capabilities.
 
diff --git a/docs/evals.md b/docs/evals.md
new file mode 100644
index 0000000..ee144cc
--- /dev/null
+++ b/docs/evals.md
@@ -0,0 +1,318 @@
+# Eval Harness
+
+LLM-powered evals for the Elastic Security MCP app's skill layer. The harness
+tests whether the LLM host activates the right skill, calls the right tools in
+the right order, and does not fire on unrelated queries.
+
+Regular `npm test` never touches this harness — it only runs when
+`RUN_LLM_EVALS=1` is set, so CI stays fast and free of LLM costs.
+
+---
+
+## Architecture
+
+```
+Dataset (examples)
+   │
+   ▼
+runner.ts ─ describe.skipIf(!RUN_LLM_EVALS)(dataset.name, () => {
+   │            for each example:
+   │               trajectory = await runMcpHostLoop(input)
+   │               scores     = await evaluators[*](trajectory, expected)
+   │               assert score >= passingScore
+   │            afterAll: print Markdown table to stdout
+   │         })
+   │
+   ├── runMcpHostLoop(input, opts?)
+   │      InMemoryTransport ─ Client ─ McpServer
+   │      LLM provider (Anthropic / OpenAI / LiteLLM)
+   │      loop ≤ MAX_TURNS=8: LLM → tool calls → results → repeat
+   │      returns Trajectory (ordered ToolCall[])
+   │      opts.systemPrompt: optional host-level system prompt (see below)
+   │
+   └── Evaluators
+          skill-activation    binary: was skill tool called?
+          negative-activation binary: was skill tool correctly absent?
+          tool-selection      F1 precision/recall against expected.tools
+          trajectory          LCS similarity of actual vs expected sequence
+          criteria            LLM-as-judge against natural-language assertions
+```
+
+### Key design choices
+
+| Decision | Rationale |
+|---|---|
+| In-process via `InMemoryTransport` | No network, no server process — evals run anywhere |
+| `describe.skipIf(!RUN_LLM_EVALS)` | Zero LLM cost in regular `npm test` |
+| `Evaluator` is a plain function | Easy to compose; factory pattern for stateful evaluators (criteria) |
+| `'N/A'` return instead of 0 | Datasets omit irrelevant evaluator dimensions without masking real regressions |
+| LCS for trajectory | Order matters; set-based coverage is tool-selection's job |
+
+---
+
+## Dataset shape
+
+A dataset is a `Dataset` object exported from a `*.dataset.ts` file:
+
+```typescript
+import type { Dataset } from "../types.js";
+
+export const myDataset: Dataset = {
+  name: "my-skill",
+  examples: [
+    {
+      id: "ms-pos-01",                    // stable, unique — appears in CI summaries
+      input: "user message to the LLM",   // the query sent to runMcpHostLoop
+      expected: {
+        skill: "entry-point-tool-name",   // tool the skill SKILL.md instructs the LLM to call
+        tools: ["entry-point-tool-name"], // ordered list for trajectory/tool-selection
+        criteria: [                       // natural-language assertions for LLM-as-judge
+          "The model called the correct entry-point tool",
+        ],
+      },
+    },
+  ],
+};
+```
+
+All three `expected` fields are **optional**:
+
+| Field | Evaluators that use it | Omit when… |
+|---|---|---|
+| `skill` | `skill-activation`, `negative-activation` | Dataset doesn't test skill routing |
+| `tools` | `tool-selection`, `trajectory` | No ordered tool expectation |
+| `criteria` | `criteria` | No LLM-as-judge needed (saves cost) |
+
+Omitting a field causes the evaluator to return `'N/A'` for that example rather than a false 0.
+
+### Positive vs distractor examples
+
+A **positive** example is a query that *should* activate the skill.  
+A **distractor** example is an unrelated query that *should not*.
+
+Use separate `runDataset` calls with different evaluators for each group:
+
+```typescript
+// Positive: skill should fire
+runDataset(
+  { name: "my-skill: positives", examples: positiveExamples },
+  { "skill-activation": skillActivation, "tool-selection": toolSelection },
+  { passingScore: 0.8 }
+);
+
+// Distractor: skill must NOT fire (gate is 100%)
+runDataset(
+  { name: "my-skill: distractors", examples: distractorExamples },
+  { "negative-activation": negativeActivation },
+  { passingScore: 1.0 }
+);
+```
+
+---
+
+## Evaluator catalog
+
+### `skill-activation`
+
+**Type**: binary · **Score**: `1` if `expected.skill` found in trajectory, `0` otherwise  
+**Returns `'N/A'`**: when `expected.skill` is absent  
+**Gate**: ≥ 0.8 on positive examples (use `passingScore: 0.8`)
+
+Tests whether the LLM called the skill's model-facing entry-point tool at
+least once.
+
+### `negative-activation`
+
+**Type**: binary · **Score**: `1` if `expected.skill` is *absent* from trajectory, `0` if present  
+**Returns `'N/A'`**: when `expected.skill` is absent  
+**Gate**: 1.0 on distractor examples (use `passingScore: 1.0`)
+
+Tests that the skill does not over-trigger on unrelated queries. Any false
+positive here means the skill's SKILL.md is too broad.
+
+### `tool-selection`
+
+**Type**: F1 · **Score**: harmonic mean of precision and recall against `expected.tools` (set-based)  
+**Returns `'N/A'`**: when `expected.tools` is absent  
+**Gate**: ≥ 0.8 on positive examples
+
+Tests *which* tools were called, ignoring order. Missed tools lower recall;
+spurious tools lower precision. Failure reason includes `missed: [...]` and
+`extra: [...]`.
+
+### `trajectory`
+
+**Type**: LCS similarity · **Score**: `lcs(actual, expected) / max(|actual|, |expected|)`  
+**Returns `'N/A'`**: when `expected.tools` is absent  
+**Gate**: ≥ 0.7 on positive examples (sequence matching is looser than set matching)
+
+Tests *order*. Dividing by `max` penalises both missing and extra steps.
+Use alongside `tool-selection` for full coverage.
+
+### `criteria`
+
+**Type**: LLM-as-judge · **Score**: `0.0–1.0` parsed from a rubric prompt response  
+**Returns `'N/A'`**: when `expected.criteria` is absent  
+**Gate**: ≥ 0.7
+
+Calls the judge LLM with the trajectory `{tool, args}` pairs and the
+criteria list. Asks for `{"score": <0–1>, "reasoning": "..."}`. Falls back
+to regex number extraction if JSON parse fails. Use for semantic assertions
+that structural evaluators can't express.
+
+**Cost**: one extra LLM call per example. Omit `expected.criteria` to skip.
+
+---
+
+## How to add a dataset
+
+1. **Create the data file** `evals/datasets/<skill-name>.dataset.ts`:
+
+   ```typescript
+   import type { Dataset, Example } from "../types.js";
+
+   const SKILL_TOOL = "my-tool"; // the model-facing entry-point tool
+
+   export const positiveExamples: Example[] = [
+     { id: "ms-pos-01", input: "...", expected: { skill: SKILL_TOOL, tools: [SKILL_TOOL] } },
+     // add ≥ 4 examples
+   ];
+
+   export const distractorExamples: Example[] = [
+     { id: "ms-neg-01", input: "...", expected: { skill: SKILL_TOOL } },
+     // add ≥ 4 examples
+   ];
+
+   export const myDataset: Dataset = {
+     name: "<skill-name>",
+     examples: [...positiveExamples, ...distractorExamples],
+   };
+   ```
+
+2. **Create the eval spec** `evals/<skill-name>.eval.test.ts`:
+
+   ```typescript
+   import { runDataset } from "./runner.js";
+   import { positiveExamples, distractorExamples } from "./datasets/<skill-name>.dataset.js";
+   import { skillActivation } from "./evaluators/skill-activation.js";
+   import { negativeActivation } from "./evaluators/negative-activation.js";
+   import { toolSelection } from "./evaluators/tool-selection.js";
+
+   runDataset(
+     { name: "<skill-name>: positives", examples: positiveExamples },
+     { "skill-activation": skillActivation, "tool-selection": toolSelection },
+     { passingScore: 0.8 }
+   );
+
+   runDataset(
+     { name: "<skill-name>: distractors", examples: distractorExamples },
+     { "negative-activation": negativeActivation },
+     { passingScore: 1.0 }
+   );
+   ```
+
+3. **Run locally**:
+
+   ```bash
+   # Anthropic (preferred)
+   ANTHROPIC_API_KEY=sk-ant-... npm run test:evals
+
+   # OpenAI / LiteLLM proxy
+   OPENAI_API_KEY=sk-... LITELLM_BASE_URL=https://... npm run test:evals
+
+   # Local Ollama (zero-cost smoke run; tool-calling quality varies by model)
+   #
+   # Pick a model that meets BOTH of these requirements:
+   #   (1) ≥14B parameters — anything smaller (e.g. llama3.1:8b, qwen3:8b)
+   #       falls below the threshold where tool-calling decisions become
+   #       useful signal rather than noise; sub-14B "passes" are coincidence,
+   #       not a result.
+   #   (2) Exposes /v1/chat/completions — required by this harness. A few
+   #       Ollama tags expose /generate only and return
+   #       "does not support chat" (notably qwen2.5:32b-instruct-q4_K_M as
+   #       of Ollama 0.3.x).
+   #
+   # Verified candidates: `qwen2.5:14b-instruct`, `qwen3:14b`, `mistral-small:24b`,
+   # `qwen2.5:32b-instruct` (non-q4_K_M tags). `ollama pull <model>` first.
+   OPENAI_API_KEY=ollama \
+     LITELLM_BASE_URL=http://localhost:11434/v1 \
+     OPENAI_MODEL=qwen2.5:14b-instruct \
+     npm run test:evals
+   ```
+
+   `createEvalServer` stubs all Elastic-cluster calls, so no `CLUSTERS_JSON`
+   is needed when running skill-routing evaluators (`skill-activation`,
+   `tool-selection`, `negative-activation`, `trajectory`, `criteria`).
+
+4. **Trigger in CI**: open a PR and add the `evals` label (requires write access).
+
+---
+
+## Host system prompt (`HostLoopOptions.systemPrompt`)
+
+Real MCP hosts (Claude Desktop, Cursor) inject a host-level system
+prompt that constrains tool selection, response shape, and confirmation
+flow. Without one, the harness measures raw model-vs-tools behavior —
+which can over- or under-report activation depending on the model
+family. Use `HostLoopOptions.systemPrompt` to pin behavior to what
+production will instruct, or to swap in a `SKILL.md` body when testing
+skill-driven flows.
+
+```typescript
+import { runMcpHostLoop } from "./runMcpHostLoop.js";
+import { skillBody } from "../skills/automatic-migration/SKILL.md?raw";
+
+const trajectory = await runMcpHostLoop(example.input, {
+  server: createEvalServer(),
+  systemPrompt: skillBody,    // copy SKILL.md verbatim, like the real host
+});
+```
+
+Provider handling:
+
+- **OpenAI / LiteLLM** — `role: "system"` message is the first entry in
+  the `messages` array, per the Chat Completions schema.
+- **Anthropic** — the adapter strips system-roled messages out of the
+  array and passes their concatenated content via the top-level
+  `system` parameter on `messages.create` (the only place Anthropic
+  accepts a system prompt).
+- **Empty / whitespace-only string** — treated identically to omitting
+  the option (no system message is injected, no top-level parameter is
+  sent). This keeps "absence of system prompt" observable in evals.
+
+---
+
+## CI gating
+
+### Workflow: `.github/workflows/evals.yml`
+
+| Trigger | When |
+|---|---|
+| `workflow_dispatch` | Manual run from Actions UI |
+| `schedule` | Nightly at 02:00 UTC |
+| `pull_request_target` | When `evals` label is added to a PR |
+
+The concurrency group `evals-<ref>` cancels superseded runs to avoid wasting
+LLM quota on stale pushes.
+
+### Required secrets
+
+| Secret | Purpose |
+|---|---|
+| `EVAL_ANTHROPIC_API_KEY` | Anthropic API key (priority provider) |
+| `EVAL_OPENAI_API_KEY` | OpenAI / LiteLLM API key (fallback) |
+| `EVAL_LITELLM_BASE_URL` | Optional LiteLLM proxy base URL |
+| `EVAL_CLUSTERS_JSON` | Elastic cluster credentials for the MCP server |
+
+### Passing thresholds (recommended defaults)
+
+| Evaluator | Positives | Distractors |
+|---|---|---|
+| `skill-activation` | ≥ 0.8 | — |
+| `negative-activation` | — | = 1.0 |
+| `tool-selection` | ≥ 0.8 | — |
+| `trajectory` | ≥ 0.7 | — |
+| `criteria` | ≥ 0.7 | — |
+
+Results are posted as a Markdown table to the GitHub Actions job summary
+(`$GITHUB_STEP_SUMMARY`) after every run.
diff --git a/docs/features.md b/docs/features.md
index 027b5c9..c2c4620 100644
--- a/docs/features.md
+++ b/docs/features.md
@@ -79,3 +79,17 @@ Rule management dashboard:
 Generate ECS-compliant security events:
 - Windows Credential Theft, AWS Privilege Escalation, Okta Identity Takeover, Ransomware Kill Chain
 - All data tagged for safe cleanup
+
+## SIEM Migration
+
+Guided workbench for migrating detection rules from Splunk (QRadar and Sentinel-One support coming) to Elastic Security. Triggered by the `automatic-migration` skill (`migrate-rules` tool):
+
+- **Vendor selector**: Splunk active; QRadar and Sentinel-One shown as "Coming soon" — re-enabling a vendor is a one-line flag flip
+- **Upload step**: drag-and-drop a JSON export file, use the file picker, or paste a rules array directly
+- **AI translation**: Kibana's SIEM migrations service converts SPL to Elastic detection rule JSON; a live progress bar polls every 3 seconds
+- **Three-column review**: original SPL / generated rule (read-only) / editable rule side-by-side for every translated rule
+- **Per-rule drawer**: structured form for key rule fields (name, description, type, query, language, severity, risk score); "Re-validate" saves as `partial`, "Save" uses the selected translation result
+- **Resources drawer**: lists all unresolved macros and lookups auto-expanded; each row has an individual Save button; resolved definitions collapsible
+- **Translation statuses**: `full` (production-ready), `partial` (needs tuning), `untranslatable` (skipped at install)
+- **Install step**: one-click install of all translatable rules into Elastic Security as disabled; "Back to review" available before confirming
+- **Done summary**: installed/failed tile counts; "Start another migration" resets the workbench
diff --git a/eslint.config.js b/eslint.config.js
index 382ca72..cde436c 100644
--- a/eslint.config.js
+++ b/eslint.config.js
@@ -16,6 +16,7 @@ export default tseslint.config(
     files: [
       'src/**/*.ts',
       'src/**/*.tsx',
+      'evals/**/*.ts',
       '*.ts',
       'scripts/**/*.js',
       '*.mjs',
diff --git a/evals/automatic-migration.eval.test.ts b/evals/automatic-migration.eval.test.ts
new file mode 100644
index 0000000..438bb71
--- /dev/null
+++ b/evals/automatic-migration.eval.test.ts
@@ -0,0 +1,55 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+/**
+ * End-to-end eval spec for the automatic-migration skill.
+ *
+ * Proves skill-activation and boundary discrimination against the
+ * automatic-migration proof dataset. Run via:
+ *
+ *   npm run test:evals
+ *
+ * This suite is skipped in regular `npm test` because runDataset wraps
+ * everything in `describe.skipIf(!process.env.RUN_LLM_EVALS)`.
+ *
+ * Gate summary:
+ *   positives   — skill-activation + tool-selection ≥ 80%
+ *   distractors — negative-activation = 100% (any false positive is a regression)
+ */
+
+import { runDataset } from "./runner.js";
+import {
+  positiveExamples,
+  distractorExamples,
+} from "./datasets/automatic-migration.dataset.js";
+import { skillActivation } from "./evaluators/skill-activation.js";
+import { negativeActivation } from "./evaluators/negative-activation.js";
+import { toolSelection } from "./evaluators/tool-selection.js";
+import { createEvalServer } from "./helpers/evalServer.js";
+
+runDataset(
+  {
+    name: "automatic-migration: positives",
+    examples: positiveExamples,
+  },
+  {
+    "skill-activation": skillActivation,
+    "tool-selection": toolSelection,
+  },
+  { passingScore: 0.8, createServer: createEvalServer }
+);
+
+runDataset(
+  {
+    name: "automatic-migration: distractors",
+    examples: distractorExamples,
+  },
+  {
+    "negative-activation": negativeActivation,
+  },
+  { passingScore: 1.0, createServer: createEvalServer } // 100% — any false positive is a regression
+);
diff --git a/evals/datasets/automatic-migration.dataset.ts b/evals/datasets/automatic-migration.dataset.ts
new file mode 100644
index 0000000..9c02ca1
--- /dev/null
+++ b/evals/datasets/automatic-migration.dataset.ts
@@ -0,0 +1,128 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import type { Dataset, Example } from "../types.js";
+
+/**
+ * The model-facing entry-point tool registered by the
+ * automatic-migration skill (src/tools/migration.ts).
+ */
+const SKILL_TOOL = "migrate-rules";
+
+// ---------------------------------------------------------------------------
+// Positive examples — the LLM should call migrate-rules
+// ---------------------------------------------------------------------------
+
+export const positiveExamples: Example[] = [
+  {
+    id: "am-pos-01",
+    input: "I want to migrate my Splunk rules to Elastic Security",
+    expected: {
+      skill: SKILL_TOOL,
+      tools: [SKILL_TOOL],
+    },
+  },
+  {
+    id: "am-pos-02",
+    input: "Help me upload my SPL bundle and convert the detections",
+    expected: {
+      skill: SKILL_TOOL,
+      tools: [SKILL_TOOL],
+    },
+  },
+  {
+    id: "am-pos-03",
+    input: "We're onboarding from Splunk — how do I bring our detection rules over?",
+    expected: {
+      skill: SKILL_TOOL,
+      tools: [SKILL_TOOL],
+    },
+  },
+  {
+    id: "am-pos-04",
+    input: "Start a SIEM migration for our 200 Splunk correlation rules",
+    expected: {
+      skill: SKILL_TOOL,
+      tools: [SKILL_TOOL],
+    },
+  },
+  {
+    id: "am-pos-05",
+    input: "Convert our detection rules from Splunk to Elastic format",
+    expected: {
+      skill: SKILL_TOOL,
+      tools: [SKILL_TOOL],
+    },
+  },
+  {
+    id: "am-pos-06",
+    input: "Install the translated rules from my last migration run",
+    expected: {
+      skill: SKILL_TOOL,
+      tools: [SKILL_TOOL],
+    },
+  },
+];
+
+// ---------------------------------------------------------------------------
+// Distractor examples — the LLM should NOT call migrate-rules
+// ---------------------------------------------------------------------------
+
+export const distractorExamples: Example[] = [
+  {
+    id: "am-neg-01",
+    input: "Show me which detection rules are generating the most false positives",
+    expected: {
+      // skill is set so negativeActivation knows which tool to check for absence
+      skill: SKILL_TOOL,
+    },
+  },
+  {
+    id: "am-neg-02",
+    input: "Triage the open critical alerts from the last 24 hours",
+    expected: {
+      skill: SKILL_TOOL,
+    },
+  },
+  {
+    id: "am-neg-03",
+    input: "Create a threat hunt for lateral movement via PsExec",
+    expected: {
+      skill: SKILL_TOOL,
+    },
+  },
+  {
+    id: "am-neg-04",
+    input: "Open a new case for the ransomware incident on host SRVWIN04",
+    expected: {
+      skill: SKILL_TOOL,
+    },
+  },
+  {
+    id: "am-neg-05",
+    input: "Run an ES|QL query to find brute-force login attempts in the last hour",
+    expected: {
+      skill: SKILL_TOOL,
+    },
+  },
+  {
+    id: "am-neg-06",
+    input: "Generate sample endpoint data so I can test my detection rules",
+    expected: {
+      skill: SKILL_TOOL,
+    },
+  },
+];
+
+// ---------------------------------------------------------------------------
+// Export the full dataset for reference / cross-dataset tooling
+// ---------------------------------------------------------------------------
+
+export const automaticMigrationDataset: Dataset = {
+  name: "automatic-migration",
+  examples: [...positiveExamples, ...distractorExamples],
+};
diff --git a/evals/datasets/detection-rule-management.dataset.ts b/evals/datasets/detection-rule-management.dataset.ts
new file mode 100644
index 0000000..a1e2a2c
--- /dev/null
+++ b/evals/datasets/detection-rule-management.dataset.ts
@@ -0,0 +1,99 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import type { Dataset, Example } from "../types.js";
+
+/**
+ * The model-facing entry-point tool registered by the
+ * detection-rule-management skill (src/tools/detection-rules.ts).
+ */
+const SKILL_TOOL = "manage-rules";
+
+// ---------------------------------------------------------------------------
+// Positive examples — the LLM should call manage-rules
+// ---------------------------------------------------------------------------
+
+export const positiveExamples: Example[] = [
+  {
+    id: "drm-pos-01",
+    input: "Show me my noisy rules — which detection rules are generating the most alerts?",
+    expected: {
+      skill: SKILL_TOOL,
+      tools: [SKILL_TOOL],
+    },
+  },
+  {
+    id: "drm-pos-02",
+    input: "List all my currently enabled detection rules",
+    expected: {
+      skill: SKILL_TOOL,
+      tools: [SKILL_TOOL],
+    },
+  },
+  {
+    id: "drm-pos-03",
+    input: "Find high severity detection rules related to PowerShell execution",
+    expected: {
+      skill: SKILL_TOOL,
+      tools: [SKILL_TOOL],
+    },
+  },
+  {
+    id: "drm-pos-04",
+    input: "What detection rules do I have covering initial access tactics?",
+    expected: {
+      skill: SKILL_TOOL,
+      tools: [SKILL_TOOL],
+    },
+  },
+];
+
+// ---------------------------------------------------------------------------
+// Distractor examples — the LLM should NOT call manage-rules
+// ---------------------------------------------------------------------------
+
+export const distractorExamples: Example[] = [
+  {
+    id: "drm-neg-01",
+    input: "Create a new case for a ransomware incident I'm currently investigating",
+    expected: {
+      // skill is set so negativeActivation knows which tool to check for absence
+      skill: SKILL_TOOL,
+    },
+  },
+  {
+    id: "drm-neg-02",
+    input: "Show me all critical alerts that fired in the last hour",
+    expected: {
+      skill: SKILL_TOOL,
+    },
+  },
+  {
+    id: "drm-neg-03",
+    input: "Run an ES|QL query to find failed SSH login attempts on my Linux hosts",
+    expected: {
+      skill: SKILL_TOOL,
+    },
+  },
+  {
+    id: "drm-neg-04",
+    input: "A process on host web-01 just spawned cmd.exe — help me investigate",
+    expected: {
+      skill: SKILL_TOOL,
+    },
+  },
+];
+
+// ---------------------------------------------------------------------------
+// Export the full dataset for reference / cross-dataset tooling
+// ---------------------------------------------------------------------------
+
+export const detectionRuleManagementDataset: Dataset = {
+  name: "detection-rule-management",
+  examples: [...positiveExamples, ...distractorExamples],
+};
+
diff --git a/evals/detection-rule-management.eval.test.ts b/evals/detection-rule-management.eval.test.ts
new file mode 100644
index 0000000..23d14f2
--- /dev/null
+++ b/evals/detection-rule-management.eval.test.ts
@@ -0,0 +1,55 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+/**
+ * End-to-end eval spec for the detection-rule-management skill.
+ *
+ * Proves the eval harness (runner → runMcpHostLoop → evaluators) works
+ * against a real registered skill using the proof dataset. Run via:
+ *
+ *   npm run test:evals
+ *
+ * This suite is skipped in regular `npm test` because runDataset wraps
+ * everything in `describe.skipIf(!process.env.RUN_LLM_EVALS)`.
+ *
+ * Gate summary:
+ *   positives  — skill-activation + tool-selection ≥ 80%
+ *   distractors — negative-activation = 100% (any false positive is a regression)
+ */
+
+import { runDataset } from "./runner.js";
+import {
+  positiveExamples,
+  distractorExamples,
+} from "./datasets/detection-rule-management.dataset.js";
+import { skillActivation } from "./evaluators/skill-activation.js";
+import { negativeActivation } from "./evaluators/negative-activation.js";
+import { toolSelection } from "./evaluators/tool-selection.js";
+import { createEvalServer } from "./helpers/evalServer.js";
+
+runDataset(
+  {
+    name: "detection-rule-management: positives",
+    examples: positiveExamples,
+  },
+  {
+    "skill-activation": skillActivation,
+    "tool-selection": toolSelection,
+  },
+  { passingScore: 0.8, createServer: createEvalServer }
+);
+
+runDataset(
+  {
+    name: "detection-rule-management: distractors",
+    examples: distractorExamples,
+  },
+  {
+    "negative-activation": negativeActivation,
+  },
+  { passingScore: 1.0, createServer: createEvalServer } // 100% — any false positive is a regression
+);
diff --git a/evals/evaluators/criteria.ts b/evals/evaluators/criteria.ts
new file mode 100644
index 0000000..1994eac
--- /dev/null
+++ b/evals/evaluators/criteria.ts
@@ -0,0 +1,142 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import type { Evaluator, EvaluatorResult, ExpectedBehavior, Trajectory } from "../types.js";
+import type { LlmProvider } from "../llm/types.js";
+
+/**
+ * LLM-as-judge evaluator: asks an LLM to score the trajectory against
+ * the natural-language assertions in `expected.criteria`.
+ *
+ * Returns `'N/A'` when `expected.criteria` is absent or empty so datasets
+ * that rely only on structural evaluators don't incur extra LLM calls.
+ *
+ * Usage:
+ *   import { createCriteriaEvaluator } from "./criteria.js";
+ *   import { createDefaultLlmProvider } from "../llm/index.js";
+ *
+ *   runDataset(dataset, {
+ *     criteria: createCriteriaEvaluator(createDefaultLlmProvider()),
+ *   });
+ *
+ * The factory pattern is necessary because the `Evaluator` type is a plain
+ * function — the LLM provider is closed over rather than passed as an arg.
+ */
+export function createCriteriaEvaluator(llm: LlmProvider): Evaluator {
+  return async (
+    trajectory: Trajectory,
+    expected: ExpectedBehavior
+  ): Promise<EvaluatorResult> => {
+    if (!expected.criteria || expected.criteria.length === 0) {
+      return { score: "N/A" };
+    }
+
+    const prompt = buildJudgePrompt(trajectory, expected.criteria);
+    const response = await llm.chat([{ role: "user", content: prompt }], []);
+    const text = response.content ?? "";
+
+    return parseJudgeResponse(text);
+  };
+}
+
+/**
+ * Builds the rubric prompt sent to the judge LLM.
+ *
+ * Asks for a JSON object with `score` (0–1) and `reasoning` (string) so
+ * parsing is deterministic. The trajectory is serialised as a compact JSON
+ * array of `{tool, args}` pairs — `result` is omitted to avoid token bloat
+ * from large tool outputs.
+ */
+function buildJudgePrompt(trajectory: Trajectory, criteria: string[]): string {
+  const trajectoryStr = JSON.stringify(
+    trajectory.map(({ tool, args }) => ({ tool, args })),
+    null,
+    2
+  );
+
+  const criteriaList = criteria
+    .map((c, i) => `${i + 1}. ${c}`)
+    .join("\n");
+
+  return `You are an impartial evaluator assessing the quality of an AI assistant's tool-calling behaviour.
+
+## Trajectory (tools the assistant called, in order)
+
+\`\`\`json
+${trajectoryStr}
+\`\`\`
+
+## Evaluation criteria
+
+${criteriaList}
+
+## Task
+
+Score how well the trajectory satisfies ALL of the criteria above on a scale from 0.0 to 1.0:
+- 1.0  All criteria fully satisfied
+- 0.75 Most criteria satisfied with minor gaps
+- 0.5  About half the criteria satisfied
+- 0.25 Most criteria unmet with only minor satisfaction
+- 0.0  No criteria satisfied at all
+
+Respond with a single JSON object — no markdown fences, no extra text:
+{"score": <number 0.0–1.0>, "reasoning": "<concise explanation referencing specific criteria>"}`;
+}
+
+/**
+ * Parses the judge LLM's response into an EvaluatorResult.
+ *
+ * Tries JSON.parse first. Falls back to a regex that extracts a bare number
+ * from the text in case the model wraps the response in prose.
+ */
+function parseJudgeResponse(text: string): EvaluatorResult {
+  const trimmed = text.trim();
+
+  // Primary: extract the first {...} object in the response
+  const jsonMatch = trimmed.match(/\{[\s\S]*\}/);
+  if (jsonMatch) {
+    try {
+      const parsed = JSON.parse(jsonMatch[0]) as unknown;
+      if (
+        typeof parsed === "object" &&
+        parsed !== null &&
+        "score" in parsed &&
+        typeof (parsed as Record<string, unknown>).score === "number"
+      ) {
+        const { score, reasoning } = parsed as {
+          score: number;
+          reasoning?: unknown;
+        };
+        const clampedScore = Math.min(1, Math.max(0, score));
+        return {
+          score: clampedScore,
+          reason:
+            typeof reasoning === "string"
+              ? reasoning
+              : `raw judge response: ${trimmed}`,
+        };
+      }
+    } catch {
+      // fall through to regex fallback
+    }
+  }
+
+  // Fallback: look for a bare decimal / integer in [0, 1]
+  const numMatch = trimmed.match(/\b(1(?:\.0+)?|0(?:\.\d+)?)\b/);
+  if (numMatch) {
+    const score = parseFloat(numMatch[1]);
+    return {
+      score,
+      reason: `score parsed from prose; raw response: ${trimmed.slice(0, 200)}`,
+    };
+  }
+
+  return {
+    score: 0,
+    reason: `judge response could not be parsed; raw response: ${trimmed.slice(0, 200)}`,
+  };
+}
diff --git a/evals/evaluators/negative-activation.ts b/evals/evaluators/negative-activation.ts
new file mode 100644
index 0000000..e08d315
--- /dev/null
+++ b/evals/evaluators/negative-activation.ts
@@ -0,0 +1,46 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import type { Evaluator, EvaluatorResult, ExpectedBehavior, Trajectory } from "../types.js";
+
+/**
+ * Binary evaluator for distractor examples: did the LLM correctly avoid
+ * calling the skill's entry-point tool?
+ *
+ * This is the complement of `skillActivation`. Use it on examples where the
+ * user query should NOT trigger the skill — e.g. a migration skill dataset
+ * includes unrelated queries (case management, threat hunting) to confirm the
+ * LLM doesn't call `migrate-rules` for everything.
+ *
+ * Score semantics (binary):
+ *   1 — skill tool absent from trajectory (correct — not distracted)
+ *   0 — skill tool present in trajectory (false positive — skill over-triggered)
+ *
+ * Returns `'N/A'` when `expected.skill` is absent, consistent with how
+ * `skillActivation` handles missing skill declarations.
+ *
+ * CI gate: datasets should require 100% on this evaluator for distractor
+ * examples — a false positive means the skill's SKILL.md is too aggressive
+ * and will fire on unrelated queries in production.
+ */
+export const negativeActivation: Evaluator = (
+  trajectory: Trajectory,
+  expected: ExpectedBehavior
+): EvaluatorResult => {
+  if (!expected.skill) {
+    return { score: "N/A" };
+  }
+
+  const falsePositive = trajectory.some((tc) => tc.tool === expected.skill);
+
+  return {
+    score: falsePositive ? 0 : 1,
+    reason: falsePositive
+      ? `Tool "${expected.skill}" was called but should not have been (false positive)`
+      : `Tool "${expected.skill}" was correctly absent from the trajectory`,
+  };
+};
diff --git a/evals/evaluators/skill-activation.ts b/evals/evaluators/skill-activation.ts
new file mode 100644
index 0000000..b7deb8d
--- /dev/null
+++ b/evals/evaluators/skill-activation.ts
@@ -0,0 +1,37 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import type { Evaluator, EvaluatorResult, ExpectedBehavior, Trajectory } from "../types.js";
+
+/**
+ * Binary evaluator: did the LLM call the skill's entry-point tool?
+ *
+ * Each MCP skill has a single model-facing entry-point tool (e.g. `migrate-rules`
+ * for the automatic-migration skill, `manage-rules` for detection-rule-management).
+ * `expected.skill` holds that tool name. The evaluator checks whether the
+ * trajectory contains at least one call to that tool.
+ *
+ * Returns `'N/A'` when `expected.skill` is absent so datasets that don't
+ * care about skill routing can omit the field without failing the run.
+ */
+export const skillActivation: Evaluator = (
+  trajectory: Trajectory,
+  expected: ExpectedBehavior
+): EvaluatorResult => {
+  if (!expected.skill) {
+    return { score: "N/A" };
+  }
+
+  const activated = trajectory.some((tc) => tc.tool === expected.skill);
+
+  return {
+    score: activated ? 1 : 0,
+    reason: activated
+      ? `Tool "${expected.skill}" was called`
+      : `Tool "${expected.skill}" was never called (trajectory: [${trajectory.map((t) => t.tool).join(", ") || "empty"}])`,
+  };
+};
diff --git a/evals/evaluators/tool-selection.ts b/evals/evaluators/tool-selection.ts
new file mode 100644
index 0000000..71cf7b1
--- /dev/null
+++ b/evals/evaluators/tool-selection.ts
@@ -0,0 +1,60 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import type { Evaluator, EvaluatorResult, ExpectedBehavior, Trajectory } from "../types.js";
+
+/**
+ * Set-based tool-selection evaluator: how well did the LLM pick the right tools?
+ *
+ * Computes precision, recall, and their harmonic mean (F1) against the
+ * set of tool names in `expected.tools`. Deduplicates both sides — order
+ * and repetition are tested by the trajectory evaluator instead.
+ *
+ *   precision = |called ∩ expected| / |called|   (no spurious calls)
+ *   recall    = |called ∩ expected| / |expected|  (no missed calls)
+ *   score     = F1 = 2·P·R / (P+R)               ∈ [0, 1]
+ *
+ * Returns `'N/A'` when `expected.tools` is absent so datasets that only
+ * care about skill routing don't need to declare tool lists.
+ *
+ * CI gate: datasets should require ≥0.8 (80%) on positive examples.
+ * The failure reason lists missed and extra tools to make debugging fast.
+ */
+export const toolSelection: Evaluator = (
+  trajectory: Trajectory,
+  expected: ExpectedBehavior
+): EvaluatorResult => {
+  if (!expected.tools) {
+    return { score: "N/A" };
+  }
+
+  const expectedSet = new Set(expected.tools);
+  const calledSet = new Set(trajectory.map((tc) => tc.tool));
+
+  if (expectedSet.size === 0 && calledSet.size === 0) {
+    return { score: 1, reason: "No tools expected and none called" };
+  }
+
+  const tp = [...calledSet].filter((t) => expectedSet.has(t)).length;
+  const precision = calledSet.size > 0 ? tp / calledSet.size : 0;
+  const recall = expectedSet.size > 0 ? tp / expectedSet.size : 0;
+  const f1 =
+    precision + recall > 0
+      ? (2 * precision * recall) / (precision + recall)
+      : 0;
+
+  const missed = [...expectedSet].filter((t) => !calledSet.has(t));
+  const extra = [...calledSet].filter((t) => !expectedSet.has(t));
+
+  const parts = [
+    `F1=${f1.toFixed(2)} (precision=${precision.toFixed(2)}, recall=${recall.toFixed(2)})`,
+    ...(missed.length > 0 ? [`missed: [${missed.join(", ")}]`] : []),
+    ...(extra.length > 0 ? [`extra: [${extra.join(", ")}]`] : []),
+  ];
+
+  return { score: f1, reason: parts.join(" | ") };
+};
diff --git a/evals/evaluators/trajectory.ts b/evals/evaluators/trajectory.ts
new file mode 100644
index 0000000..4e71ec8
--- /dev/null
+++ b/evals/evaluators/trajectory.ts
@@ -0,0 +1,79 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import type { Evaluator, EvaluatorResult, ExpectedBehavior, Trajectory } from "../types.js";
+
+/**
+ * Sequence-aware evaluator: how closely did the LLM follow the expected tool order?
+ *
+ * Computes the Longest Common Subsequence (LCS) of the actual tool-call
+ * sequence against `expected.tools`, then normalises by the longer of the
+ * two sequences:
+ *
+ *   score = lcs(actual, expected) / max(|actual|, |expected|) ∈ [0, 1]
+ *
+ * Dividing by the max penalises both missing tools (low recall) and extra
+ * spurious tools (low precision) without needing separate P/R components —
+ * those are tool-selection's job.
+ *
+ * Returns `'N/A'` when `expected.tools` is absent so datasets that don't
+ * specify an ordered tool sequence don't fail on this evaluator. This guard
+ * is load-bearing: running LCS against an undefined expectation would produce
+ * meaningless 0-scores that mask real regressions in other evaluators.
+ */
+export const trajectoryScore: Evaluator = (
+  trajectory: Trajectory,
+  expected: ExpectedBehavior
+): EvaluatorResult => {
+  if (!expected.tools) {
+    return { score: "N/A" };
+  }
+
+  const actual = trajectory.map((tc) => tc.tool);
+  const exp = expected.tools;
+
+  if (actual.length === 0 && exp.length === 0) {
+    return { score: 1, reason: "Both actual and expected sequences are empty" };
+  }
+
+  const lcsLen = lcs(actual, exp);
+  const denom = Math.max(actual.length, exp.length);
+  const score = lcsLen / denom;
+
+  return {
+    score,
+    reason:
+      `LCS=${lcsLen} / max(|actual|=${actual.length}, |expected|=${exp.length})` +
+      `=${denom} → score=${score.toFixed(2)}` +
+      (score < 1
+        ? ` | actual=[${actual.join(", ")}] expected=[${exp.join(", ")}]`
+        : ""),
+  };
+};
+
+/**
+ * Classic O(m·n) DP implementation of Longest Common Subsequence length.
+ * Compares elements by identity (===), which is correct for tool name strings.
+ */
+function lcs(a: string[], b: string[]): number {
+  const m = a.length;
+  const n = b.length;
+  // Single flat array instead of Array<Array<number>> avoids inner allocation
+  const dp = new Array<number>((m + 1) * (n + 1)).fill(0);
+  const idx = (i: number, j: number) => i * (n + 1) + j;
+
+  for (let i = 1; i <= m; i++) {
+    for (let j = 1; j <= n; j++) {
+      dp[idx(i, j)] =
+        a[i - 1] === b[j - 1]
+          ? dp[idx(i - 1, j - 1)] + 1
+          : Math.max(dp[idx(i - 1, j)], dp[idx(i, j - 1)]);
+    }
+  }
+
+  return dp[idx(m, n)];
+}
diff --git a/evals/harness.test.ts b/evals/harness.test.ts
new file mode 100644
index 0000000..07b14ae
--- /dev/null
+++ b/evals/harness.test.ts
@@ -0,0 +1,247 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+/**
+ * Mock-based harness integration test.
+ *
+ * Exercises the full eval pipeline (runMcpHostLoop → evaluators) with a
+ * deterministic mock LLM, proving the harness mechanics satisfy both dataset
+ * gate requirements without requiring real API keys.
+ *
+ * No API keys needed — runs as part of `npm run test:evals`.
+ * Gate thresholds match the LLM eval specs:
+ *   positives   — skill-activation + tool-selection ≥ 80%
+ *   distractors — negative-activation = 100%
+ */
+
+import { describe, it, expect } from "vitest";
+import { runMcpHostLoop } from "./runMcpHostLoop.js";
+import { skillActivation } from "./evaluators/skill-activation.js";
+import { toolSelection } from "./evaluators/tool-selection.js";
+import { negativeActivation } from "./evaluators/negative-activation.js";
+import {
+  positiveExamples as drmPositives,
+  distractorExamples as drmDistractors,
+} from "./datasets/detection-rule-management.dataset.js";
+import {
+  positiveExamples as amPositives,
+  distractorExamples as amDistractors,
+} from "./datasets/automatic-migration.dataset.js";
+import type {
+  LlmProvider,
+  AssistantMessage,
+  LlmMessage,
+} from "./llm/types.js";
+import { createEvalServer } from "./helpers/evalServer.js";
+
+// ---------------------------------------------------------------------------
+// Gate thresholds — must match the LLM eval specs in *.eval.test.ts
+// ---------------------------------------------------------------------------
+
+const POSITIVE_GATE = 0.8;
+const DISTRACTOR_GATE = 1.0;
+
+// ---------------------------------------------------------------------------
+// Mock LLM implementations
+// ---------------------------------------------------------------------------
+
+/**
+ * Returns an LLM that calls `toolName` exactly once, then returns plain text.
+ * Used for positive examples to simulate correct skill activation.
+ */
+function makeActivatingLlm(toolName: string): LlmProvider {
+  let called = false;
+  return {
+    async chat(_messages, tools): Promise<AssistantMessage> {
+      if (!called && tools.some((t) => t.name === toolName)) {
+        called = true;
+        return {
+          role: "assistant",
+          content: null,
+          tool_calls: [
+            {
+              id: "call_mock_0",
+              type: "function" as const,
+              function: { name: toolName, arguments: "{}" },
+            },
+          ],
+        };
+      }
+      return { role: "assistant", content: "Done." };
+    },
+  };
+}
+
+/** Always returns plain text without calling any tool. Used for distractor examples. */
+const passiveLlm: LlmProvider = {
+  async chat(): Promise<AssistantMessage> {
+    return {
+      role: "assistant",
+      content: "I can help with that directly without additional tools.",
+    };
+  },
+};
+
+// ---------------------------------------------------------------------------
+// detection-rule-management harness tests
+// ---------------------------------------------------------------------------
+
+describe("eval harness: detection-rule-management positives", () => {
+  for (const example of drmPositives) {
+    it(`${example.id} — skill-activation + tool-selection ≥ ${POSITIVE_GATE}`, async () => {
+      const trajectory = await runMcpHostLoop(example.input, {
+        server: createEvalServer(),
+        llm: makeActivatingLlm("manage-rules"),
+      });
+
+      const activation = await skillActivation(trajectory, example.expected);
+      const selection = await toolSelection(trajectory, example.expected);
+
+      if (activation.score !== "N/A") {
+        expect(activation.score, `skill-activation: ${activation.reason}`).toBeGreaterThanOrEqual(POSITIVE_GATE);
+      }
+      if (selection.score !== "N/A") {
+        expect(selection.score, `tool-selection: ${selection.reason}`).toBeGreaterThanOrEqual(POSITIVE_GATE);
+      }
+    });
+  }
+});
+
+describe("eval harness: detection-rule-management distractors", () => {
+  for (const example of drmDistractors) {
+    it(`${example.id} — negative-activation = 100%`, async () => {
+      const trajectory = await runMcpHostLoop(example.input, {
+        server: createEvalServer(),
+        llm: passiveLlm,
+      });
+
+      const result = await negativeActivation(trajectory, example.expected);
+      if (result.score !== "N/A") {
+        expect(result.score, `negative-activation: ${result.reason}`).toBe(DISTRACTOR_GATE);
+      }
+    });
+  }
+});
+
+// ---------------------------------------------------------------------------
+// automatic-migration harness tests
+// ---------------------------------------------------------------------------
+
+describe("eval harness: automatic-migration positives", () => {
+  for (const example of amPositives) {
+    it(`${example.id} — skill-activation + tool-selection ≥ ${POSITIVE_GATE}`, async () => {
+      const trajectory = await runMcpHostLoop(example.input, {
+        server: createEvalServer(),
+        llm: makeActivatingLlm("migrate-rules"),
+      });
+
+      const activation = await skillActivation(trajectory, example.expected);
+      const selection = await toolSelection(trajectory, example.expected);
+
+      if (activation.score !== "N/A") {
+        expect(activation.score, `skill-activation: ${activation.reason}`).toBeGreaterThanOrEqual(POSITIVE_GATE);
+      }
+      if (selection.score !== "N/A") {
+        expect(selection.score, `tool-selection: ${selection.reason}`).toBeGreaterThanOrEqual(POSITIVE_GATE);
+      }
+    });
+  }
+});
+
+describe("eval harness: automatic-migration distractors", () => {
+  for (const example of amDistractors) {
+    it(`${example.id} — negative-activation = 100%`, async () => {
+      const trajectory = await runMcpHostLoop(example.input, {
+        server: createEvalServer(),
+        llm: passiveLlm,
+      });
+
+      const result = await negativeActivation(trajectory, example.expected);
+      if (result.score !== "N/A") {
+        expect(result.score, `negative-activation: ${result.reason}`).toBe(DISTRACTOR_GATE);
+      }
+    });
+  }
+});
+
+// ---------------------------------------------------------------------------
+// HostLoopOptions.systemPrompt — propagation contract
+//
+// Real MCP hosts inject a system prompt that constrains tool selection.
+// Verify the option flows from `runMcpHostLoop` to the provider's `chat()`
+// as a `role: "system"` message, AND that empty / whitespace-only strings
+// are dropped so the absence of a system prompt is observable.
+// ---------------------------------------------------------------------------
+
+describe("eval harness: systemPrompt propagation", () => {
+  /**
+   * Captures every `messages` array the provider's `chat()` receives so
+   * the test can assert what the harness handed off.
+   */
+  function makeRecordingLlm(): {
+    provider: LlmProvider;
+    calls: LlmMessage[][];
+  } {
+    const calls: LlmMessage[][] = [];
+    const provider: LlmProvider = {
+      async chat(messages): Promise<AssistantMessage> {
+        calls.push([...messages]);
+        return { role: "assistant", content: "Done." };
+      },
+    };
+    return { provider, calls };
+  }
+
+  it("prepends a system message when systemPrompt is provided", async () => {
+    const { provider, calls } = makeRecordingLlm();
+    await runMcpHostLoop("Find me my noisy rules", {
+      server: createEvalServer(),
+      llm: provider,
+      systemPrompt: "You are a security analyst. Always call a tool before answering.",
+    });
+
+    expect(calls.length).toBeGreaterThanOrEqual(1);
+    const firstTurn = calls[0]!;
+    expect(firstTurn[0]).toEqual({
+      role: "system",
+      content: "You are a security analyst. Always call a tool before answering.",
+    });
+    expect(firstTurn[1]).toEqual({
+      role: "user",
+      content: "Find me my noisy rules",
+    });
+  });
+
+  it("does not inject a system message when systemPrompt is omitted", async () => {
+    const { provider, calls } = makeRecordingLlm();
+    await runMcpHostLoop("Find me my noisy rules", {
+      server: createEvalServer(),
+      llm: provider,
+    });
+
+    expect(calls.length).toBeGreaterThanOrEqual(1);
+    const firstTurn = calls[0]!;
+    expect(firstTurn[0]?.role).toBe("user");
+    expect(firstTurn.some((m) => m.role === "system")).toBe(false);
+  });
+
+  it("treats empty / whitespace-only systemPrompt as omitted", async () => {
+    for (const prompt of ["", "   ", "\n\t"]) {
+      const { provider, calls } = makeRecordingLlm();
+      await runMcpHostLoop("Find me my noisy rules", {
+        server: createEvalServer(),
+        llm: provider,
+        systemPrompt: prompt,
+      });
+      const firstTurn = calls[0]!;
+      expect(
+        firstTurn.some((m) => m.role === "system"),
+        `empty-string systemPrompt (${JSON.stringify(prompt)}) should not inject a system message`
+      ).toBe(false);
+    }
+  });
+});
diff --git a/evals/helpers/evalServer.ts b/evals/helpers/evalServer.ts
new file mode 100644
index 0000000..2d1412f
--- /dev/null
+++ b/evals/helpers/evalServer.ts
@@ -0,0 +1,98 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { vi } from "vitest";
+import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
+import { registerAlertTriageTools } from "../../src/tools/alert-triage.js";
+import { registerAttackDiscoveryTools } from "../../src/tools/attack-discovery.js";
+import { registerCaseManagementTools } from "../../src/tools/case-management.js";
+import { registerDetectionRuleTools } from "../../src/tools/detection-rules.js";
+import { registerMigrationTools } from "../../src/tools/migration.js";
+import { registerSampleDataTools } from "../../src/tools/sample-data.js";
+import { registerThreatHuntTools } from "../../src/tools/threat-hunt.js";
+import type { AlertsService } from "../../src/elastic/service/alertsService.js";
+import type { AttackDiscoveryService } from "../../src/elastic/service/attackDiscoveryService.js";
+import type { CasesService } from "../../src/elastic/service/casesService.js";
+import type { EntityDetailService } from "../../src/elastic/service/entityDetailService.js";
+import type { EsqlService } from "../../src/elastic/service/esqlService.js";
+import type { IndicesService } from "../../src/elastic/service/indicesService.js";
+import type { InvestigateService } from "../../src/elastic/service/investigateService.js";
+import type { MigrationsService } from "../../src/elastic/service/migrationsService.js";
+import type { RulesService } from "../../src/elastic/service/rulesService.js";
+import type { SampleDataService } from "../../src/elastic/service/sampleDataService.js";
+
+/**
+ * Stubs every service used by the seven tool groups registered on the live
+ * MCP server. Methods invoked by model-facing entry tools resolve to
+ * realistic-shaped empty payloads; other methods are bare `vi.fn()` because
+ * skill-routing evaluators only inspect which tools the LLM called, not
+ * what those tools returned.
+ *
+ * Mirrors `src/server.ts` exactly: the LLM that drives the eval host loop
+ * must see the same tool surface a real MCP host (Claude Desktop, Cursor)
+ * exposes — otherwise we measure skill-selection against an artificially
+ * narrow distractor set and over-state activation rates for small models.
+ */
+export function createEvalServer(): McpServer {
+  const server = new McpServer({ name: "eval-server", version: "0.0.0" });
+
+  const alertsService = {
+    searchAlerts: vi.fn().mockResolvedValue({ alerts: [], total: 0 }),
+    findAlertById: vi.fn().mockResolvedValue(null),
+  } as unknown as AlertsService;
+
+  const attackDiscoveryService = {
+    listAttackDiscoveries: vi.fn().mockResolvedValue([]),
+  } as unknown as AttackDiscoveryService;
+
+  const casesService = {
+    findCases: vi.fn().mockResolvedValue({ cases: [], total: 0 }),
+  } as unknown as CasesService;
+
+  const entityDetailService = {
+    getEntityDetail: vi.fn().mockResolvedValue(null),
+  } as unknown as EntityDetailService;
+
+  const esqlService = {
+    executeQuery: vi.fn().mockResolvedValue({ columns: [], values: [] }),
+  } as unknown as EsqlService;
+
+  const indicesService = {
+    listIndices: vi.fn().mockResolvedValue([]),
+  } as unknown as IndicesService;
+
+  const investigateService = {
+    getRelatedAlerts: vi.fn().mockResolvedValue([]),
+  } as unknown as InvestigateService;
+
+  const migrationsService = {
+    listMigrations: vi.fn().mockResolvedValue([]),
+  } as unknown as MigrationsService;
+
+  const rulesService = {
+    findRules: vi.fn().mockResolvedValue({ data: [], total: 0 }),
+  } as unknown as RulesService;
+
+  const sampleDataService = {
+    listScenarios: vi.fn().mockResolvedValue([]),
+  } as unknown as SampleDataService;
+
+  registerAlertTriageTools(server, { alertsService });
+  registerAttackDiscoveryTools(server, { attackDiscoveryService, casesService });
+  registerCaseManagementTools(server, { casesService });
+  registerDetectionRuleTools(server, { rulesService });
+  registerMigrationTools(server, { migrationsService });
+  registerSampleDataTools(server, { sampleDataService });
+  registerThreatHuntTools(server, {
+    esqlService,
+    indicesService,
+    investigateService,
+    entityDetailService,
+  });
+
+  return server;
+}
diff --git a/evals/llm/anthropic.ts b/evals/llm/anthropic.ts
new file mode 100644
index 0000000..a52ba6e
--- /dev/null
+++ b/evals/llm/anthropic.ts
@@ -0,0 +1,167 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import Anthropic from "@anthropic-ai/sdk";
+import type {
+  AssistantMessage,
+  LlmMessage,
+  LlmProvider,
+  LlmToolDefinition,
+} from "./types.js";
+
+const DEFAULT_MODEL = "claude-haiku-4-5-20251001";
+
+/** Max tokens to request from the Anthropic API per turn. */
+const MAX_TOKENS = 4096;
+
+export interface AnthropicProviderOptions {
+  /** Chat model to use. Defaults to claude-haiku-4-5-20251001. */
+  model?: string;
+  /**
+   * API key. Defaults to the ANTHROPIC_API_KEY environment variable, which is
+   * the standard Anthropic SDK default.
+   */
+  apiKey?: string;
+}
+
+export class AnthropicProvider implements LlmProvider {
+  private readonly client: Anthropic;
+  private readonly model: string;
+
+  constructor({
+    model = DEFAULT_MODEL,
+    apiKey,
+  }: AnthropicProviderOptions = {}) {
+    this.model = model;
+    this.client = new Anthropic({
+      ...(apiKey !== undefined ? { apiKey } : {}),
+    });
+  }
+
+  async chat(
+    messages: LlmMessage[],
+    tools: LlmToolDefinition[]
+  ): Promise<AssistantMessage> {
+    // Anthropic accepts the system prompt as a top-level parameter, not as
+    // a message in the array. Concatenate any system-roled messages from
+    // the unified LlmMessage shape into one string and strip them before
+    // converting the remaining history.
+    const systemMessages = messages.filter(
+      (m): m is Extract<LlmMessage, { role: "system" }> => m.role === "system"
+    );
+    const system = systemMessages.map((m) => m.content).join("\n\n");
+    const nonSystem = messages.filter(
+      (m): m is Exclude<LlmMessage, { role: "system" }> => m.role !== "system"
+    );
+
+    const response = await this.client.messages.create({
+      model: this.model,
+      max_tokens: MAX_TOKENS,
+      ...(system.length > 0 ? { system } : {}),
+      messages: toAnthropicMessages(nonSystem),
+      ...(tools.length > 0 ? { tools: tools.map(toAnthropicTool) } : {}),
+    });
+
+    const textBlocks = response.content.filter(
+      (c): c is Anthropic.TextBlock => c.type === "text"
+    );
+    const toolUseBlocks = response.content.filter(
+      (c): c is Anthropic.ToolUseBlock => c.type === "tool_use"
+    );
+
+    return {
+      role: "assistant",
+      content: textBlocks.map((b) => b.text).join("") || null,
+      ...(toolUseBlocks.length > 0
+        ? {
+            tool_calls: toolUseBlocks.map((tu) => ({
+              id: tu.id,
+              type: "function" as const,
+              function: {
+                name: tu.name,
+                // Anthropic returns a parsed object; re-encode to match the
+                // OpenAI-style LlmToolCallRequest.function.arguments shape.
+                arguments: JSON.stringify(tu.input),
+              },
+            })),
+          }
+        : {}),
+    };
+  }
+}
+
+/**
+ * Converts OpenAI-style LlmMessage[] to Anthropic MessageParam[].
+ *
+ * Structural differences from OpenAI:
+ *   - Anthropic has no `tool` role. Tool results go as `user` messages with
+ *     `tool_result` content blocks.
+ *   - Anthropic has no `system` message role — system prompts flow through
+ *     the top-level `system` parameter on `messages.create`. Callers strip
+ *     system messages before calling this function; the parameter type
+ *     enforces that invariant.
+ *   - Consecutive tool-result messages are merged into a single user message
+ *     so the API never receives two adjacent user turns.
+ *   - Assistant content is an array of TextBlockParam / ToolUseBlockParam.
+ */
+function toAnthropicMessages(
+  messages: Exclude<LlmMessage, { role: "system" }>[]
+): Anthropic.MessageParam[] {
+  const result: Anthropic.MessageParam[] = [];
+
+  for (const msg of messages) {
+    if (msg.role === "user") {
+      result.push({ role: "user", content: msg.content });
+    } else if (msg.role === "assistant") {
+      const content: Anthropic.ContentBlockParam[] = [];
+      if (msg.content) {
+        content.push({ type: "text", text: msg.content });
+      }
+      for (const tc of msg.tool_calls ?? []) {
+        let input: unknown;
+        try {
+          input = JSON.parse(tc.function.arguments);
+        } catch {
+          input = {};
+        }
+        content.push({ type: "tool_use", id: tc.id, name: tc.function.name, input });
+      }
+      result.push({ role: "assistant", content });
+    } else {
+      // msg.role === "tool"
+      const block: Anthropic.ToolResultBlockParam = {
+        type: "tool_result",
+        tool_use_id: msg.tool_call_id,
+        content: msg.content,
+      };
+
+      // Merge into the preceding user message when it already holds
+      // tool_result blocks — the Anthropic API rejects two adjacent user turns.
+      const prev = result[result.length - 1];
+      if (
+        prev?.role === "user" &&
+        Array.isArray(prev.content) &&
+        (prev.content as Anthropic.ContentBlockParam[])[0]?.type ===
+          "tool_result"
+      ) {
+        (prev.content as Anthropic.ContentBlockParam[]).push(block);
+      } else {
+        result.push({ role: "user", content: [block] });
+      }
+    }
+  }
+
+  return result;
+}
+
+function toAnthropicTool(tool: LlmToolDefinition): Anthropic.Tool {
+  return {
+    name: tool.name,
+    description: tool.description,
+    input_schema: tool.parameters as Anthropic.Tool.InputSchema,
+  };
+}
diff --git a/evals/llm/index.ts b/evals/llm/index.ts
new file mode 100644
index 0000000..b959fd4
--- /dev/null
+++ b/evals/llm/index.ts
@@ -0,0 +1,38 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import type { LlmProvider } from "./types.js";
+import { AnthropicProvider } from "./anthropic.js";
+import { OpenAiProvider } from "./openai.js";
+
+/**
+ * Returns the default LLM provider by inspecting environment variables.
+ *
+ * Priority order:
+ *   1. ANTHROPIC_API_KEY → AnthropicProvider (claude-haiku-4-5-20251001)
+ *   2. OPENAI_API_KEY    → OpenAiProvider / LiteLLM proxy / Ollama (gpt-4o-mini default)
+ *
+ * Set LITELLM_BASE_URL alongside OPENAI_API_KEY to route through a LiteLLM
+ * proxy, e.g. to reach Claude via the OpenAI-compatible endpoint. Set
+ * OPENAI_MODEL to override the chat model (e.g. `qwen2.5:32b-instruct-q4_K_M`
+ * when proxying through Ollama at `http://localhost:11434/v1`).
+ */
+export function createDefaultLlmProvider(): LlmProvider {
+  if (process.env.ANTHROPIC_API_KEY) {
+    return new AnthropicProvider();
+  }
+  if (process.env.OPENAI_API_KEY) {
+    return new OpenAiProvider({
+      model: process.env.OPENAI_MODEL,
+      baseURL: process.env.LITELLM_BASE_URL,
+    });
+  }
+  throw new Error(
+    "No LLM provider configured. Set ANTHROPIC_API_KEY or OPENAI_API_KEY " +
+      "before running evals (npm run test:evals)."
+  );
+}
diff --git a/evals/llm/openai.ts b/evals/llm/openai.ts
new file mode 100644
index 0000000..ab6e1fc
--- /dev/null
+++ b/evals/llm/openai.ts
@@ -0,0 +1,130 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import OpenAI from "openai";
+import type {
+  AssistantMessage,
+  LlmMessage,
+  LlmProvider,
+  LlmToolDefinition,
+} from "./types.js";
+
+const DEFAULT_MODEL = "gpt-4o-mini";
+
+export interface OpenAiProviderOptions {
+  /** Chat model to use. Defaults to gpt-4o-mini. */
+  model?: string;
+  /**
+   * Override the API base URL. Point this at a LiteLLM proxy to route calls
+   * through any provider the proxy supports without changing client code.
+   */
+  baseURL?: string;
+  /**
+   * API key. Defaults to the OPENAI_API_KEY environment variable, which is
+   * the standard OpenAI SDK default.
+   */
+  apiKey?: string;
+}
+
+export class OpenAiProvider implements LlmProvider {
+  private readonly client: OpenAI;
+  private readonly model: string;
+
+  constructor({
+    model = DEFAULT_MODEL,
+    baseURL,
+    apiKey,
+  }: OpenAiProviderOptions = {}) {
+    this.model = model;
+    this.client = new OpenAI({
+      ...(apiKey !== undefined ? { apiKey } : {}),
+      ...(baseURL !== undefined ? { baseURL } : {}),
+    });
+  }
+
+  async chat(
+    messages: LlmMessage[],
+    tools: LlmToolDefinition[]
+  ): Promise<AssistantMessage> {
+    const response = await this.client.chat.completions.create({
+      model: this.model,
+      messages: messages.map(toOaiMessage),
+      ...(tools.length > 0 ? { tools: tools.map(toOaiTool) } : {}),
+    });
+
+    const choice = response.choices[0];
+    if (!choice) {
+      throw new Error("OpenAI returned no choices");
+    }
+
+    const msg = choice.message;
+    return {
+      role: "assistant",
+      content: msg.content ?? null,
+      ...(msg.tool_calls
+        ? {
+            tool_calls: msg.tool_calls
+              .filter(
+                (tc): tc is OpenAI.ChatCompletionMessageFunctionToolCall =>
+                  tc.type === "function"
+              )
+              .map((tc) => ({
+                id: tc.id,
+                type: "function" as const,
+                function: {
+                  name: tc.function.name,
+                  arguments: tc.function.arguments,
+                },
+              })),
+          }
+        : {}),
+    };
+  }
+}
+
+function toOaiMessage(msg: LlmMessage): OpenAI.ChatCompletionMessageParam {
+  switch (msg.role) {
+    case "system":
+      return { role: "system", content: msg.content };
+    case "user":
+      return { role: "user", content: msg.content };
+    case "assistant":
+      return {
+        role: "assistant",
+        content: msg.content,
+        ...(msg.tool_calls
+          ? {
+              tool_calls: msg.tool_calls.map((tc) => ({
+                id: tc.id,
+                type: "function" as const,
+                function: {
+                  name: tc.function.name,
+                  arguments: tc.function.arguments,
+                },
+              })),
+            }
+          : {}),
+      };
+    case "tool":
+      return {
+        role: "tool",
+        content: msg.content,
+        tool_call_id: msg.tool_call_id,
+      };
+  }
+}
+
+function toOaiTool(tool: LlmToolDefinition): OpenAI.ChatCompletionTool {
+  return {
+    type: "function",
+    function: {
+      name: tool.name,
+      description: tool.description,
+      parameters: tool.parameters,
+    },
+  };
+}
diff --git a/evals/llm/types.ts b/evals/llm/types.ts
new file mode 100644
index 0000000..44a4d04
--- /dev/null
+++ b/evals/llm/types.ts
@@ -0,0 +1,61 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+/** A single tool the LLM may call, described in JSON Schema. */
+export interface LlmToolDefinition {
+  name: string;
+  description: string;
+  /** JSON Schema object describing the tool's input parameters. */
+  parameters: Record<string, unknown>;
+}
+
+/** One tool invocation requested by the LLM in an assistant turn. */
+export interface LlmToolCallRequest {
+  id: string;
+  type: "function";
+  function: {
+    name: string;
+    /** JSON-encoded argument object. */
+    arguments: string;
+  };
+}
+
+/**
+ * Discriminated union covering every role that can appear in a chat thread.
+ * Shaped after the OpenAI chat messages API so a single interface works for
+ * both the OpenAI and Anthropic adapters (and any LiteLLM proxy in between).
+ *
+ * Anthropic note: Anthropic's HTTP API takes the system prompt as a
+ * top-level `system: string` parameter on `messages.create`, not inside
+ * the messages array. The adapter extracts `system`-roled messages from
+ * the union and passes them via that parameter — this discriminant only
+ * dictates the SHAPE the harness uses internally.
+ */
+export type LlmMessage =
+  | { role: "system"; content: string }
+  | { role: "user"; content: string }
+  | {
+      role: "assistant";
+      content: string | null;
+      tool_calls?: LlmToolCallRequest[];
+    }
+  | { role: "tool"; content: string; tool_call_id: string };
+
+/** Narrowed assistant message — what LlmProvider.chat() must return. */
+export type AssistantMessage = Extract<LlmMessage, { role: "assistant" }>;
+
+/**
+ * Minimal provider contract every LLM adapter must satisfy.
+ * The interface is intentionally thin: give it a message history + tool
+ * catalogue, get back the next assistant turn (possibly with tool calls).
+ */
+export interface LlmProvider {
+  chat(
+    messages: LlmMessage[],
+    tools: LlmToolDefinition[]
+  ): Promise<AssistantMessage>;
+}
diff --git a/evals/runMcpHostLoop.ts b/evals/runMcpHostLoop.ts
new file mode 100644
index 0000000..b750f2f
--- /dev/null
+++ b/evals/runMcpHostLoop.ts
@@ -0,0 +1,183 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { InMemoryTransport } from "@modelcontextprotocol/sdk/inMemory.js";
+import { Client } from "@modelcontextprotocol/sdk/client/index.js";
+import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
+import { createServer } from "../src/server.js";
+import type { Trajectory, ToolCall } from "./types.js";
+import type { LlmProvider, LlmMessage } from "./llm/types.js";
+import { createDefaultLlmProvider } from "./llm/index.js";
+
+/** Maximum LLM → tool-call turns before halting to prevent runaway evals. */
+const MAX_TURNS = 8;
+
+/**
+ * Returns true when an MCP tool should be exposed to the LLM.
+ *
+ * Mirrors the MCP host visibility contract — tools marked
+ * `_meta.ui.visibility: ["app"]` (without `"model"`) are invoked exclusively
+ * by an MCP app via `app.callServerTool()`. Real hosts (Claude Desktop,
+ * Cursor) hide those from the LLM; the eval harness must do the same to
+ * match what the model actually sees in production.
+ */
+function isVisibleToModel(tool: { _meta?: unknown }): boolean {
+  const meta = tool._meta as
+    | { ui?: { visibility?: readonly string[] } }
+    | undefined;
+  const visibility = meta?.ui?.visibility;
+  if (!visibility || visibility.length === 0) return true;
+  if (visibility.includes("model")) return true;
+  return !visibility.includes("app");
+}
+
+export interface HostLoopOptions {
+  /**
+   * Pre-built MCP server to test against.
+   *
+   * Pass a server constructed with mocked services for dataset-level evals
+   * that don't need a live cluster. Omit to use `createServer()`, which reads
+   * CLUSTERS_JSON / CLUSTERS_FILE and requires a real Elastic cluster.
+   *
+   * Each call to `runMcpHostLoop` should receive a **fresh** server instance;
+   * reusing a connected server across calls is not supported.
+   */
+  server?: McpServer;
+  /**
+   * LLM provider used to simulate the MCP host making tool-call decisions.
+   * Defaults to auto-selecting from ANTHROPIC_API_KEY / OPENAI_API_KEY.
+   */
+  llm?: LlmProvider;
+  /**
+   * Maximum number of LLM→tool-call turns per run.
+   * Defaults to MAX_TURNS (8).
+   */
+  maxTurns?: number;
+  /**
+   * Optional system prompt prepended to the message history.
+   *
+   * Real MCP hosts (Claude Desktop, Cursor) inject a host-level system prompt
+   * that constrains tool selection, response shape, and confirmation flow.
+   * Without one, the harness measures raw model-vs-tools behavior, which can
+   * over- or under-report activation depending on the model family. Use this
+   * to pin behavior to what the production host will instruct, or to swap in
+   * a SKILL.md body when testing skill-driven flows.
+   *
+   * Pass a non-empty string. Empty strings are ignored to keep behavior
+   * identical to omitting the option.
+   */
+  systemPrompt?: string;
+}
+
+/**
+ * Simulates one MCP host loop run entirely in-process.
+ *
+ * Architecture:
+ *   LLM ↔ Client ↔─InMemoryTransport─↔ McpServer ↔ (ES / Kibana clients)
+ *
+ * The function:
+ *   1. Wires a fresh Client to the server via InMemoryTransport.
+ *   2. Lists available MCP tools and hands them to the LLM as tool definitions.
+ *   3. Loops up to `maxTurns` times:
+ *        a. Asks the LLM for the next assistant turn.
+ *        b. If the LLM emits tool calls, executes each via client.callTool().
+ *        c. Records every call in the trajectory.
+ *        d. Feeds results back into the message history.
+ *        e. Breaks when the LLM emits no tool calls (task complete).
+ *   4. Closes the client and returns the trajectory.
+ */
+export async function runMcpHostLoop(
+  input: string,
+  {
+    server,
+    llm,
+    maxTurns = MAX_TURNS,
+    systemPrompt,
+  }: HostLoopOptions = {}
+): Promise<Trajectory> {
+  const resolvedServer = server ?? createServer();
+  const resolvedLlm = llm ?? createDefaultLlmProvider();
+
+  const [clientTransport, serverTransport] = InMemoryTransport.createLinkedPair();
+  await resolvedServer.connect(serverTransport);
+
+  const client = new Client({ name: "eval-host", version: "1.0.0" });
+  await client.connect(clientTransport);
+
+  try {
+    const { tools: mcpTools } = await client.listTools();
+    // Strip app-only tools — they're invoked by the React workbench via
+    // `app.callServerTool()` and a real MCP host (Claude Desktop, Cursor)
+    // hides them from the LLM by inspecting `_meta.ui.visibility`. Without
+    // this filter the model sees `find-rules`, `start-translation`,
+    // `install-rules`, etc. as alternatives to the model-facing entry
+    // points and the activation rate collapses on smaller models.
+    const toolDefs = mcpTools.filter(isVisibleToModel).map((t) => ({
+      name: t.name,
+      description: t.description ?? "",
+      parameters: t.inputSchema as Record<string, unknown>,
+    }));
+
+    const messages: LlmMessage[] = [];
+    if (systemPrompt && systemPrompt.trim().length > 0) {
+      messages.push({ role: "system", content: systemPrompt });
+    }
+    messages.push({ role: "user", content: input });
+    const trajectory: Trajectory = [];
+
+    for (let turn = 0; turn < maxTurns; turn++) {
+      const response = await resolvedLlm.chat(messages, toolDefs);
+      messages.push(response);
+
+      if (!response.tool_calls || response.tool_calls.length === 0) {
+        // LLM chose not to call a tool — simulation complete.
+        break;
+      }
+
+      for (const toolCall of response.tool_calls) {
+        const toolName = toolCall.function.name;
+        let toolArgs: Record<string, unknown>;
+        try {
+          toolArgs = JSON.parse(toolCall.function.arguments) as Record<
+            string,
+            unknown
+          >;
+        } catch {
+          // Malformed JSON from the LLM; record the call with empty args
+          // so the trajectory evaluator can detect the failure.
+          toolArgs = {};
+        }
+
+        const result = await client.callTool({
+          name: toolName,
+          arguments: toolArgs,
+        });
+
+        const record: ToolCall = {
+          tool: toolName,
+          args: toolArgs,
+          result: result.content,
+        };
+        trajectory.push(record);
+
+        // Feed the tool result back so the LLM can reason about it.
+        messages.push({
+          role: "tool",
+          content: JSON.stringify(result.content),
+          tool_call_id: toolCall.id,
+        });
+      }
+    }
+
+    return trajectory;
+  } finally {
+    // Closing the client also closes clientTransport, which triggers
+    // serverTransport.onclose() — the InMemoryTransport linked pair
+    // tears down cleanly without needing an explicit server.close().
+    await client.close();
+  }
+}
diff --git a/evals/runner.ts b/evals/runner.ts
new file mode 100644
index 0000000..f92a338
--- /dev/null
+++ b/evals/runner.ts
@@ -0,0 +1,128 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { describe, it, expect, afterAll } from "vitest";
+import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
+import type { Dataset, EvalResult, EvaluatorResult, Evaluator } from "./types.js";
+import { runMcpHostLoop } from "./runMcpHostLoop.js";
+
+export interface RunnerOptions {
+  /** Minimum numeric score [0–1] for a test to pass. Defaults to 0.5. */
+  passingScore?: number;
+  /**
+   * Factory that produces a fresh McpServer for each example.
+   *
+   * A fresh instance is required per-run because InMemoryTransport is torn
+   * down after each `runMcpHostLoop` call. When omitted, `runMcpHostLoop`
+   * falls back to `createServer()`, which requires `CLUSTERS_JSON`.
+   *
+   * Pass `createEvalServer` from `evals/helpers/evalServer.ts` to run eval
+   * suites without a live Elastic cluster (only API keys are needed).
+   */
+  createServer?: () => McpServer;
+}
+
+/**
+ * Registers a Vitest suite for every example in `dataset`.
+ *
+ * The entire suite is skipped unless `RUN_LLM_EVALS=1` is set in the
+ * environment, so regular `npm test` incurs zero LLM cost.
+ *
+ * Each example becomes one `it` that:
+ *   1. Runs the in-process MCP host loop to collect a trajectory.
+ *   2. Passes the trajectory to every evaluator.
+ *   3. Asserts that numeric scores meet `passingScore`.
+ *
+ * After all examples complete, a Markdown summary is written to stdout so
+ * the GitHub Actions job summary (>> $GITHUB_STEP_SUMMARY) can capture it.
+ */
+export function runDataset(
+  dataset: Dataset,
+  evaluators: Record<string, Evaluator>,
+  options: RunnerOptions = {}
+): void {
+  const { passingScore = 0.5, createServer } = options;
+
+  const hasLlmProvider =
+    !!process.env.ANTHROPIC_API_KEY || !!process.env.OPENAI_API_KEY;
+  describe.skipIf(!process.env.RUN_LLM_EVALS || !hasLlmProvider)(dataset.name, () => {
+    const results: EvalResult[] = [];
+
+    for (const example of dataset.examples) {
+      it(example.id, async () => {
+        const trajectory = await runMcpHostLoop(example.input, {
+          server: createServer?.(),
+        });
+
+        const evalResults: Record<string, EvaluatorResult> = {};
+        for (const [name, evaluator] of Object.entries(evaluators)) {
+          evalResults[name] = await evaluator(trajectory, example.expected);
+        }
+
+        const result: EvalResult = {
+          exampleId: example.id,
+          input: example.input,
+          trajectory,
+          evaluators: evalResults,
+        };
+        results.push(result);
+
+        for (const [name, evalResult] of Object.entries(evalResults)) {
+          if (evalResult.score !== "N/A") {
+            expect(
+              evalResult.score,
+              `[${name}] score ${evalResult.score.toFixed(2)} < ${passingScore}` +
+                (evalResult.reason ? `: ${evalResult.reason}` : "")
+            ).toBeGreaterThanOrEqual(passingScore);
+          }
+        }
+      });
+    }
+
+    afterAll(() => {
+      process.stdout.write(buildMarkdownSummary(dataset.name, results) + "\n");
+    });
+  });
+}
+
+function buildMarkdownSummary(datasetName: string, results: EvalResult[]): string {
+  if (results.length === 0) {
+    return `## Eval results: ${datasetName}\n\n_No examples ran._\n`;
+  }
+
+  const evaluatorNames = Array.from(
+    new Set(results.flatMap((r) => Object.keys(r.evaluators)))
+  );
+
+  const headers = ["id", "input", ...evaluatorNames];
+  const separator = headers.map(() => "---");
+
+  const rows = results.map((r) => {
+    const scoreCells = evaluatorNames.map((name) => {
+      const e = r.evaluators[name];
+      if (!e) return "—";
+      if (e.score === "N/A") return "N/A";
+      return `${(e.score * 100).toFixed(0)}%`;
+    });
+    return [r.exampleId, truncate(r.input, 60), ...scoreCells];
+  });
+
+  const lines = [
+    `## Eval results: ${datasetName}`,
+    "",
+    `| ${headers.join(" | ")} |`,
+    `| ${separator.join(" | ")} |`,
+    ...rows.map((row) => `| ${row.join(" | ")} |`),
+    "",
+  ];
+
+  return lines.join("\n");
+}
+
+function truncate(s: string, maxLen: number): string {
+  return s.length <= maxLen ? s : `${s.slice(0, maxLen - 1)}…`;
+}
diff --git a/evals/types.ts b/evals/types.ts
new file mode 100644
index 0000000..4722075
--- /dev/null
+++ b/evals/types.ts
@@ -0,0 +1,75 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+/** A single tool invocation captured during an MCP host loop run. */
+export interface ToolCall {
+  tool: string;
+  args: Record<string, unknown>;
+  result?: unknown;
+}
+
+/** Ordered sequence of tool calls produced by one eval run. */
+export type Trajectory = ToolCall[];
+
+/**
+ * What a passing run should look like.
+ * `tools` and `criteria` are both optional — evaluators that depend on them
+ * return `'N/A'` when the field is absent, so a dataset can omit whichever
+ * dimension is irrelevant for a given example.
+ */
+export interface ExpectedBehavior {
+  /** Ordered list of tool names the host should call. Used by trajectory / tool-selection evaluators. */
+  tools?: string[];
+  /** Natural-language assertions checked by the criteria (LLM-as-judge) evaluator. */
+  criteria?: string[];
+  /** Skill ID that should be activated. Used by the skill-activation evaluator. */
+  skill?: string;
+}
+
+/** One test case inside a dataset. */
+export interface Example {
+  /** Stable identifier — used as a key in result tables and CI summaries. */
+  id: string;
+  /** The user message sent to the LLM host at the start of the simulation. */
+  input: string;
+  expected: ExpectedBehavior;
+}
+
+/** A named collection of examples that can be loaded by the runner. */
+export interface Dataset {
+  name: string;
+  examples: Example[];
+}
+
+/**
+ * Output of a single evaluator for one example.
+ * `score` is a value in [0, 1] when the evaluator ran, or `'N/A'` when the
+ * evaluator skipped (e.g. `expected.tools` was absent for trajectory evaluator).
+ */
+export interface EvaluatorResult {
+  score: number | 'N/A';
+  /** Human-readable explanation of the score, required when score is numeric. */
+  reason?: string;
+}
+
+/** Aggregate result for one example after all evaluators have run. */
+export interface EvalResult {
+  exampleId: string;
+  input: string;
+  trajectory: Trajectory;
+  /** Keys are evaluator names (e.g. `'skill-activation'`, `'trajectory'`). */
+  evaluators: Record<string, EvaluatorResult>;
+}
+
+/**
+ * Contract every evaluator module must satisfy.
+ * Async to accommodate LLM-as-judge evaluators that call an LLM provider.
+ */
+export type Evaluator = (
+  trajectory: Trajectory,
+  expected: ExpectedBehavior
+) => EvaluatorResult | Promise<EvaluatorResult>;
diff --git a/evals/vitest.config.ts b/evals/vitest.config.ts
new file mode 100644
index 0000000..9b363f4
--- /dev/null
+++ b/evals/vitest.config.ts
@@ -0,0 +1,24 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { defineConfig } from "vitest/config";
+
+/**
+ * Vitest config for LLM eval suites.  Intentionally separate from the main
+ * vitest.config.ts so `npm test` never picks up eval files (and thus never
+ * makes LLM calls or requires API keys in a regular dev/CI run).
+ *
+ * Run via: npm run test:evals
+ */
+export default defineConfig({
+  test: {
+    environment: "node",
+    globals: true,
+    include: ["evals/**/*.{test,spec,eval}.ts"],
+    testTimeout: 120_000,
+  },
+});
diff --git a/manifest.json b/manifest.json
index b0694a4..a27d689 100644
--- a/manifest.json
+++ b/manifest.json
@@ -2,7 +2,7 @@
   "manifest_version": "0.3",
   "name": "elastic-security-mcp-app",
   "display_name": "Elastic Security",
-  "version": "1.0.2",
+  "version": "1.1.0",
   "description": "Interactive blue-team security operations for Elastic Security — alert triage, attack discovery, case management, detection rules, threat hunting, and sample data generation.",
   "long_description": "An MCP App server that brings interactive blue-team security operations directly into Claude Desktop. Provides six rich React-based UIs that render inline in the conversation: alert triage with AI verdicts, AI-powered attack discovery with confidence scoring and MITRE mapping, case management with the Kibana Cases API, detection rule browsing and tuning, an ES|QL threat-hunting workbench with a D3 investigation graph, and an ECS sample-data generator for demos.",
   "author": {
@@ -57,6 +57,10 @@
     {
       "name": "generate-sample-data",
       "description": "Generate ECS-compliant security events for demos"
+    },
+    {
+      "name": "migrate-rules",
+      "description": "Migrate detection rules from Splunk (and other SIEMs) to Elastic Security"
     }
   ],
   "tools_generated": true,
diff --git a/package-lock.json b/package-lock.json
index 08e9dde..156fc31 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -33,6 +33,7 @@
         "elastic-security-mcp-app": "dist/main.js"
       },
       "devDependencies": {
+        "@anthropic-ai/sdk": "^0.96.0",
         "@tailwindcss/vite": "^4.2.2",
         "@testing-library/jest-dom": "^6.9.1",
         "@testing-library/react": "^16.3.2",
@@ -54,6 +55,7 @@
         "husky": "^9.1.7",
         "jsdom": "^29.1.1",
         "lint-staged": "^16.4.0",
+        "openai": "^6.37.0",
         "tailwindcss": "^4.2.2",
         "tsx": "^4.21.0",
         "typescript": "^6.0.2",
@@ -73,6 +75,28 @@
       "dev": true,
       "license": "MIT"
     },
+    "node_modules/@anthropic-ai/sdk": {
+      "version": "0.96.0",
+      "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.96.0.tgz",
+      "integrity": "sha512-KlCsODtTyb17bLUVCSDC2HtSvAbJf60sEiPEax9dInF+aDF92vS4TZJ5XD7YCQXNb1/5icYaw8Y7wMjPlIV9Zg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "json-schema-to-ts": "^3.1.1",
+        "standardwebhooks": "^1.0.0"
+      },
+      "bin": {
+        "anthropic-ai-sdk": "bin/cli"
+      },
+      "peerDependencies": {
+        "zod": "^3.25.0 || ^4.0.0"
+      },
+      "peerDependenciesMeta": {
+        "zod": {
+          "optional": true
+        }
+      }
+    },
     "node_modules/@asamuzakjp/css-color": {
       "version": "5.1.11",
       "resolved": "https://registry.npmjs.org/@asamuzakjp/css-color/-/css-color-5.1.11.tgz",
@@ -1859,6 +1883,13 @@
       ],
       "peer": true
     },
+    "node_modules/@stablelib/base64": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/@stablelib/base64/-/base64-1.0.1.tgz",
+      "integrity": "sha512-1bnPQqSxSuc3Ii6MhBysoWCg58j97aUjuCSZrGSmDxNqtytIi0k8utUenAwTZN4V5mXXYGsVUI9zeBqy+jBOSQ==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/@standard-schema/spec": {
       "version": "1.1.0",
       "resolved": "https://registry.npmjs.org/@standard-schema/spec/-/spec-1.1.0.tgz",
@@ -4239,6 +4270,13 @@
       "dev": true,
       "license": "MIT"
     },
+    "node_modules/fast-sha256": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/fast-sha256/-/fast-sha256-1.3.0.tgz",
+      "integrity": "sha512-n11RGP/lrWEFI/bWdygLxhI+pVeo1ZYIVwvvPkW7azl/rOy+F3HYRZ2K5zeE9mmkhQppyv9sQFx0JM9UabnpPQ==",
+      "dev": true,
+      "license": "Unlicense"
+    },
     "node_modules/fast-uri": {
       "version": "3.1.0",
       "resolved": "https://registry.npmjs.org/fast-uri/-/fast-uri-3.1.0.tgz",
@@ -4944,6 +4982,20 @@
       "dev": true,
       "license": "MIT"
     },
+    "node_modules/json-schema-to-ts": {
+      "version": "3.1.1",
+      "resolved": "https://registry.npmjs.org/json-schema-to-ts/-/json-schema-to-ts-3.1.1.tgz",
+      "integrity": "sha512-+DWg8jCJG2TEnpy7kOm/7/AxaYoaRbjVB4LFZLySZlWn8exGs3A4OLJR966cVvU26N7X9TWxl+Jsw7dzAqKT6g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/runtime": "^7.18.3",
+        "ts-algebra": "^2.0.0"
+      },
+      "engines": {
+        "node": ">=16"
+      }
+    },
     "node_modules/json-schema-traverse": {
       "version": "1.0.0",
       "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz",
@@ -5750,14 +5802,6 @@
         "node": ">= 18"
       }
     },
-    "node_modules/monaco-promql": {
-      "version": "1.8.0",
-      "resolved": "https://registry.npmjs.org/monaco-promql/-/monaco-promql-1.8.0.tgz",
-      "integrity": "sha512-XdgRojBzEe/rKtrJaHbSfoMFOMD5TXymDHIitTngmBT6XEjtAirnA7Rb2YJAO1SZrJfgvAo4LFCzJ71fH7+WOw==",
-      "license": "MIT",
-      "optional": true,
-      "peer": true
-    },
     "node_modules/ms": {
       "version": "2.1.3",
       "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
@@ -5868,6 +5912,28 @@
         "url": "https://github.com/sponsors/sindresorhus"
       }
     },
+    "node_modules/openai": {
+      "version": "6.37.0",
+      "resolved": "https://registry.npmjs.org/openai/-/openai-6.37.0.tgz",
+      "integrity": "sha512-0H5dEGFmmLv6KSd0W1w2nyL8WsLkX6yoLeQpU+dZAOuGcany5qkYQMmj35ZrKgb6yiyYqpUzFOpR8mZQkgqeEQ==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "bin": {
+        "openai": "bin/cli"
+      },
+      "peerDependencies": {
+        "ws": "^8.18.0",
+        "zod": "^3.25 || ^4.0"
+      },
+      "peerDependenciesMeta": {
+        "ws": {
+          "optional": true
+        },
+        "zod": {
+          "optional": true
+        }
+      }
+    },
     "node_modules/optionator": {
       "version": "0.9.4",
       "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz",
@@ -6632,6 +6698,17 @@
       "dev": true,
       "license": "MIT"
     },
+    "node_modules/standardwebhooks": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/standardwebhooks/-/standardwebhooks-1.0.0.tgz",
+      "integrity": "sha512-BbHGOQK9olHPMvQNHWul6MYlrRTAOKn03rOe4A8O3CLWhNf4YHBqq2HJKKC+sfqpxiBY52pNeesD6jIiLDz8jg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@stablelib/base64": "^1.0.0",
+        "fast-sha256": "^1.3.0"
+      }
+    },
     "node_modules/state-local": {
       "version": "1.0.7",
       "resolved": "https://registry.npmjs.org/state-local/-/state-local-1.0.7.tgz",
@@ -6871,6 +6948,13 @@
         "tree-kill": "cli.js"
       }
     },
+    "node_modules/ts-algebra": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/ts-algebra/-/ts-algebra-2.0.0.tgz",
+      "integrity": "sha512-FPAhNPFMrkwz76P7cdjdmiShwMynZYN6SgOujD1urY4oNm80Ou9oMdmbR45LotcKOXoy7wSmHkRFE6Mxbrhefw==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/ts-api-utils": {
       "version": "2.5.0",
       "resolved": "https://registry.npmjs.org/ts-api-utils/-/ts-api-utils-2.5.0.tgz",
diff --git a/package.json b/package.json
index 983e3ca..2308b39 100644
--- a/package.json
+++ b/package.json
@@ -47,6 +47,7 @@
     "test": "vitest",
     "test:run": "vitest run",
     "test:coverage": "vitest run --coverage",
+    "test:evals": "cross-env RUN_LLM_EVALS=1 vitest run --config evals/vitest.config.ts --reporter=verbose",
     "prepublishOnly": "npm run build",
     "prepare": "husky",
     "version": "node -e \"const m=JSON.parse(require('fs').readFileSync('manifest.json','utf8'));m.version=require('./package.json').version;require('fs').writeFileSync('manifest.json',JSON.stringify(m,null,2)+'\\n')\" && git add manifest.json"
@@ -81,6 +82,7 @@
     "react-dom": "^19.2.4"
   },
   "devDependencies": {
+    "@anthropic-ai/sdk": "^0.96.0",
     "@tailwindcss/vite": "^4.2.2",
     "@testing-library/jest-dom": "^6.9.1",
     "@testing-library/react": "^16.3.2",
@@ -102,6 +104,7 @@
     "husky": "^9.1.7",
     "jsdom": "^29.1.1",
     "lint-staged": "^16.4.0",
+    "openai": "^6.37.0",
     "tailwindcss": "^4.2.2",
     "tsx": "^4.21.0",
     "typescript": "^6.0.2",
diff --git a/skills/automatic-migration/SKILL.md b/skills/automatic-migration/SKILL.md
new file mode 100644
index 0000000..ce51d85
--- /dev/null
+++ b/skills/automatic-migration/SKILL.md
@@ -0,0 +1,101 @@
+---
+name: automatic-migration
+description: >
+  Migrate detection rules from Splunk (or other SIEMs) to Elastic Security. Use for
+  "migrate my Splunk rules", "import SPL", "onboard from Splunk", "SIEM migration",
+  "convert detection rules", "translate SPL to EQL", or any request to move security
+  rules from a third-party platform into Elastic. Vendor support: Splunk (active),
+  QRadar / Sentinel-One (coming soon).
+---
+
+# Automatic Migration
+
+Migrate third-party SIEM detection rules into Elastic Security using the `elastic-security`
+MCP connector. Call `migrate-rules` ONCE — it opens an interactive workbench that guides
+the SOC engineer through every stage of the migration. Do NOT attempt to drive the process
+step-by-step through prose or individual tool calls; the workbench handles all state
+transitions internally.
+
+## Tools
+
+| Tool | Caller | Purpose |
+|------|--------|---------|
+| `migrate-rules` | Model | **Entry point.** Opens the interactive migration workbench. No parameters required. |
+| `list-migrations` | Workbench | List all existing SIEM migrations |
+| `get-migration` | Workbench | Get status and rule counts for a specific migration |
+| `get-translated-rules` | Workbench | Fetch translated rules (paginated, filterable) |
+| `start-translation` | Workbench | Trigger AI translation of uploaded rules |
+| `stop-translation` | Workbench | Cancel an in-progress translation |
+| `update-translated-rule` | Workbench | Save manual edits to a translated rule |
+| `get-resources` | Workbench | List macro/lookup resources referenced by translated rules |
+| `upsert-resource` | Workbench | Create or update a macro or lookup definition |
+| `install-rules` | Workbench | Install translated rules into Elastic Security (installed as disabled) |
+| `get-stats` | Workbench | Get translation progress counts for a migration |
+
+Only `migrate-rules` is model-facing. All other tools are called by the workbench via its
+back-channel. Do not call them directly in conversation.
+
+## Workbench Lifecycle
+
+| Stage | What the user does | Completion signal |
+|-------|--------------------|-------------------|
+| **vendor-select** | Picks the source SIEM (Splunk active; QRadar / Sentinel-One coming soon) | Vendor button clicked |
+| **upload** | Drops a JSON export file, uses the file picker, or pastes a rules array | "Upload & start translation" clicked |
+| **translating** | Waits while the AI translator processes rules; live progress bar | Migration status reaches `finished` or `error` |
+| **review** | Reviews each rule's three-column diff (original SPL / generated / editable) | "Install N rules" clicked |
+| **fix-rule-drawer** | Edits key fields of a single rule (name, query, language, severity, risk score) via structured form; "Re-validate" marks it `partial`, "Save" uses the selected result | Drawer closed |
+| **fix-resources-drawer** | Provides definitions for unresolved macros and lookups; each row has an individual Save button calling `upsert-resource` | "Done" in the drawer |
+| **install** | Confirms installation of all translatable rules; "Back to review" is available | "Confirm install" clicked |
+| **done** | Views the installed / failed summary | — |
+
+## Correction Strategy
+
+If the user wants to revisit or undo a step:
+
+- **Start over at any step**: the "Start over" button in the header resets to vendor-select.
+- **Back from install confirmation**: click "Back to review" to return without installing.
+- **Re-edit a specific rule**: re-open the rule drawer from the review list and save again;
+  each save calls `update-translated-rule` and refreshes the list in-place.
+- **Re-edit a resource**: re-open the resources drawer; each per-row "Save" calls
+  `upsert-resource` and re-fetches the resources list without closing the drawer.
+- **Restart translation**: use "Start over", re-upload the rules, then re-trigger translation.
+
+The workbench never permanently deletes data. Translation results and rule edits are persisted
+in Kibana; re-opening the workbench via `migrate-rules` will show all prior migrations.
+
+## Common Gotchas
+
+**Vendor not supported.** QRadar and Sentinel-One show as "Coming soon" — their vendor-select
+buttons are disabled. If the user asks to migrate from a non-Splunk platform, explain that
+only Splunk is currently supported and suggest they check the Elastic roadmap for updates.
+
+**Calling app-only tools directly.** Do not call `start-translation`, `get-translated-rules`,
+`install-rules`, or any other app-only tool manually. They are wired to the workbench
+back-channel and will return raw JSON with no useful context in a prose conversation. Always
+call `migrate-rules` once and let the workbench drive everything else.
+
+**Upload format.** The upload step expects a JSON array of Splunk rule objects as exported from
+the Splunk Enterprise Security Rules page. Each object must include a `search` field containing
+the raw SPL query. Other formats (YAML, CSV, Splunk `.conf` files) are not supported and will
+fail silently.
+
+**Partial translations.** Rules marked `partial` were AI-translated but may need tuning before
+they match the customer's data. They can be installed, but Elastic Security will show them as
+disabled; the SOC engineer should review and enable them manually. Rules marked `untranslatable`
+are skipped during installation entirely.
+
+**Macro and lookup references.** Splunk rules that reference custom macros or lookups will
+translate with placeholder references. The fix-resources-drawer lists all detected unresolved
+references and auto-expands them. Fill in each definition before installing — installed rules
+that reference undefined macros will not fire correctly.
+
+**Large rule sets.** Translation is asynchronous. For large exports (hundreds of rules), the
+translating stage may run for several minutes. The progress bar polls every 3 seconds
+automatically. Do not suggest calling `stop-translation` unless the user explicitly wants to
+cancel and discard in-progress results.
+
+**Re-opening an existing migration.** Calling `migrate-rules` when one or more migrations
+already exist will show them in the response JSON. The workbench starts at vendor-select each
+time — there is no "resume" flow yet. To continue working on an existing migration, the user
+must navigate through the workbench stages again; prior translations are preserved on the
+server and will reappear in the review step after re-triggering translation.
diff --git a/src/elastic/service/index.ts b/src/elastic/service/index.ts
index 38671ee..3c6e574 100644
--- a/src/elastic/service/index.ts
+++ b/src/elastic/service/index.ts
@@ -19,3 +19,18 @@ export type {
   ScenarioRuleDef,
 } from "./sampleDataService.js";
 export { SampleDataService, SCENARIO_NAMES, SCENARIO_RULES } from "./sampleDataService.js";
+export type {
+  SiemMigration,
+  TranslatedRule,
+  MigrationResource,
+  MigrationStats,
+  ListTranslatedRulesOptions,
+  ListTranslatedRulesResult,
+  InstallRulesOptions,
+  InstallRulesResult,
+} from "./migrationsService.js";
+export {
+  MigrationApiError,
+  MigrationsService,
+  SIEM_MIGRATIONS_API_BASE,
+} from "./migrationsService.js";
diff --git a/src/elastic/service/migrationsService.test.ts b/src/elastic/service/migrationsService.test.ts
new file mode 100644
index 0000000..0c184e7
--- /dev/null
+++ b/src/elastic/service/migrationsService.test.ts
@@ -0,0 +1,329 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { describe, it, expect, beforeEach } from "vitest";
+import {
+  MigrationsService,
+  MigrationApiError,
+  SIEM_MIGRATIONS_API_BASE,
+} from "./migrationsService.js";
+import type { KibanaClient } from "../kibana-client/index.js";
+import {
+  createMockKibanaClient,
+  dataEnvelope,
+  type MockHttpClient,
+} from "../../test/helpers/mockHttpClient.js";
+import type { SiemMigration, TranslatedRule, MigrationResource } from "./migrationsService.js";
+
+const BASE = SIEM_MIGRATIONS_API_BASE;
+const HEADERS = { headers: { "elastic-api-version": "2023-10-31" } };
+
+const MIGRATION_ID = "migration-1";
+const RULE_ID = "rule-1";
+
+const fakeMigration: SiemMigration = {
+  id: MIGRATION_ID,
+  name: "test-migration",
+  status: "ready",
+  created_at: "2026-01-01T00:00:00Z",
+  last_updated_at: "2026-01-01T00:00:00Z",
+  rules: {
+    total: 0, pending: 0, processing: 0, completed: 0, failed: 0,
+    installable: 0, installed: 0, partially_translated: 0, untranslatable: 0,
+  },
+};
+
+const fakeRule: TranslatedRule = {
+  id: RULE_ID,
+  migration_id: MIGRATION_ID,
+  status: "completed",
+  translation_result: "full",
+  original_rule: { name: "splunk-rule" },
+};
+
+const fakeResource: MigrationResource = {
+  type: "macro",
+  name: "my_macro",
+  content: "| where true",
+};
+
+describe("MigrationsService", () => {
+  let kibanaClient: KibanaClient & MockHttpClient;
+  let service: MigrationsService;
+
+  beforeEach(() => {
+    kibanaClient = createMockKibanaClient();
+    service = new MigrationsService({ kibanaClient });
+  });
+
+  // ── Migration lifecycle ────────────────────────────────────────────────────
+
+  describe("createMigration", () => {
+    it("POSTs to /rules with the migration name and returns migration_id", async () => {
+      kibanaClient.post.mockResolvedValueOnce(dataEnvelope({ migration_id: MIGRATION_ID }));
+
+      const result = await service.createMigration("My Migration");
+
+      expect(kibanaClient.post).toHaveBeenCalledWith(
+        `${BASE}/rules`,
+        { name: "My Migration" },
+        HEADERS
+      );
+      expect(result).toEqual({ migration_id: MIGRATION_ID });
+    });
+  });
+
+  describe("listMigrations", () => {
+    it("GETs /rules and returns the array", async () => {
+      kibanaClient.get.mockResolvedValueOnce(dataEnvelope([fakeMigration]));
+
+      const result = await service.listMigrations();
+
+      expect(kibanaClient.get).toHaveBeenCalledWith(`${BASE}/rules`, HEADERS);
+      expect(result).toEqual([fakeMigration]);
+    });
+  });
+
+  describe("getMigration", () => {
+    it("GETs /rules/:migrationId and returns the migration", async () => {
+      kibanaClient.get.mockResolvedValueOnce(dataEnvelope(fakeMigration));
+
+      const result = await service.getMigration(MIGRATION_ID);
+
+      expect(kibanaClient.get).toHaveBeenCalledWith(
+        `${BASE}/rules/${MIGRATION_ID}`,
+        HEADERS
+      );
+      expect(result).toEqual(fakeMigration);
+    });
+  });
+
+  describe("deleteMigration", () => {
+    it("DELETEs /rules/:migrationId", async () => {
+      await service.deleteMigration(MIGRATION_ID);
+
+      expect(kibanaClient.delete).toHaveBeenCalledWith(
+        `${BASE}/rules/${MIGRATION_ID}`,
+        HEADERS
+      );
+    });
+  });
+
+  // ── Rule upload ────────────────────────────────────────────────────────────
+
+  describe("uploadRules", () => {
+    it("POSTs rules array to /rules/:migrationId/rules and returns totals", async () => {
+      kibanaClient.post.mockResolvedValueOnce(dataEnvelope({ total: 5 }));
+      const splunkRules = [{ search: "index=main" }, { search: "index=security" }];
+
+      const result = await service.uploadRules(MIGRATION_ID, splunkRules);
+
+      expect(kibanaClient.post).toHaveBeenCalledWith(
+        `${BASE}/rules/${MIGRATION_ID}/rules`,
+        splunkRules,
+        HEADERS
+      );
+      expect(result).toEqual({ total: 5 });
+    });
+  });
+
+  // ── Translated rules ───────────────────────────────────────────────────────
+
+  describe("getTranslatedRules", () => {
+    it("GETs /rules/:migrationId/rules with default pagination", async () => {
+      kibanaClient.get.mockResolvedValueOnce(dataEnvelope({ data: [fakeRule], total: 1 }));
+
+      const result = await service.getTranslatedRules(MIGRATION_ID);
+
+      const [path, config] = kibanaClient.get.mock.calls[0] as [string, Record<string, unknown>];
+      expect(path).toBe(`${BASE}/rules/${MIGRATION_ID}/rules`);
+      expect(config.params).toMatchObject({ page: "1", per_page: "20" });
+      expect(result).toEqual({ data: [fakeRule], total: 1 });
+    });
+
+    it("forwards custom page, perPage and filter params", async () => {
+      kibanaClient.get.mockResolvedValueOnce(dataEnvelope({ data: [], total: 0 }));
+
+      await service.getTranslatedRules(MIGRATION_ID, { page: 2, perPage: 50, filter: "status:completed" });
+
+      const [, config] = kibanaClient.get.mock.calls[0] as [string, Record<string, unknown>];
+      expect(config.params).toEqual({ page: "2", per_page: "50", filter: "status:completed" });
+    });
+  });
+
+  describe("getTranslatedRule", () => {
+    it("GETs /rules/:migrationId/rules/:ruleId", async () => {
+      kibanaClient.get.mockResolvedValueOnce(dataEnvelope(fakeRule));
+
+      const result = await service.getTranslatedRule(MIGRATION_ID, RULE_ID);
+
+      expect(kibanaClient.get).toHaveBeenCalledWith(
+        `${BASE}/rules/${MIGRATION_ID}/rules/${RULE_ID}`,
+        HEADERS
+      );
+      expect(result).toEqual(fakeRule);
+    });
+  });
+
+  describe("updateTranslatedRule", () => {
+    it("PUTs updates to /rules/:migrationId/rules/:ruleId and returns the updated rule", async () => {
+      const updated = { ...fakeRule, translation_result: "partial" as const };
+      kibanaClient.put.mockResolvedValueOnce(dataEnvelope(updated));
+
+      const result = await service.updateTranslatedRule(MIGRATION_ID, RULE_ID, {
+        translation_result: "partial",
+      });
+
+      expect(kibanaClient.put).toHaveBeenCalledWith(
+        `${BASE}/rules/${MIGRATION_ID}/rules/${RULE_ID}`,
+        { translation_result: "partial" },
+        HEADERS
+      );
+      expect(result).toEqual(updated);
+    });
+  });
+
+  // ── Translation control ────────────────────────────────────────────────────
+
+  describe("startTranslation", () => {
+    it("POSTs to /rules/:migrationId/start", async () => {
+      await service.startTranslation(MIGRATION_ID);
+
+      expect(kibanaClient.post).toHaveBeenCalledWith(
+        `${BASE}/rules/${MIGRATION_ID}/start`,
+        {},
+        HEADERS
+      );
+    });
+  });
+
+  describe("stopTranslation", () => {
+    it("POSTs to /rules/:migrationId/stop", async () => {
+      await service.stopTranslation(MIGRATION_ID);
+
+      expect(kibanaClient.post).toHaveBeenCalledWith(
+        `${BASE}/rules/${MIGRATION_ID}/stop`,
+        {},
+        HEADERS
+      );
+    });
+  });
+
+  // ── Resources ──────────────────────────────────────────────────────────────
+
+  describe("getResources", () => {
+    it("GETs /resources/:migrationId and returns the array", async () => {
+      kibanaClient.get.mockResolvedValueOnce(dataEnvelope([fakeResource]));
+
+      const result = await service.getResources(MIGRATION_ID);
+
+      expect(kibanaClient.get).toHaveBeenCalledWith(
+        `${BASE}/resources/${MIGRATION_ID}`,
+        HEADERS
+      );
+      expect(result).toEqual([fakeResource]);
+    });
+  });
+
+  describe("upsertResources", () => {
+    it("POSTs resources array to /resources/:migrationId", async () => {
+      await service.upsertResources(MIGRATION_ID, [fakeResource]);
+
+      expect(kibanaClient.post).toHaveBeenCalledWith(
+        `${BASE}/resources/${MIGRATION_ID}`,
+        [fakeResource],
+        HEADERS
+      );
+    });
+  });
+
+  // ── Installation ───────────────────────────────────────────────────────────
+
+  describe("installRules", () => {
+    it("POSTs empty body to /rules/:migrationId/install when no ids given", async () => {
+      kibanaClient.post.mockResolvedValueOnce(dataEnvelope({ installed: 3, failed: 0 }));
+
+      const result = await service.installRules(MIGRATION_ID);
+
+      expect(kibanaClient.post).toHaveBeenCalledWith(
+        `${BASE}/rules/${MIGRATION_ID}/install`,
+        {},
+        HEADERS
+      );
+      expect(result).toEqual({ installed: 3, failed: 0 });
+    });
+
+    it("includes ids in the body when provided", async () => {
+      kibanaClient.post.mockResolvedValueOnce(dataEnvelope({ installed: 1, failed: 0 }));
+
+      await service.installRules(MIGRATION_ID, { ids: ["r1", "r2"] });
+
+      const [, body] = kibanaClient.post.mock.calls[0] as [string, Record<string, unknown>];
+      expect(body).toEqual({ ids: ["r1", "r2"] });
+    });
+  });
+
+  // ── Stats ──────────────────────────────────────────────────────────────────
+
+  describe("getStats", () => {
+    it("GETs /rules/:migrationId/stats and returns the stats", async () => {
+      const stats = { id: MIGRATION_ID, status: "ready" as const, rules: fakeMigration.rules };
+      kibanaClient.get.mockResolvedValueOnce(dataEnvelope(stats));
+
+      const result = await service.getStats(MIGRATION_ID);
+
+      expect(kibanaClient.get).toHaveBeenCalledWith(
+        `${BASE}/rules/${MIGRATION_ID}/stats`,
+        HEADERS
+      );
+      expect(result).toEqual(stats);
+    });
+  });
+
+  // ── MigrationApiError ──────────────────────────────────────────────────────
+
+  describe("MigrationApiError", () => {
+    it("wraps non-2xx with status parsed from Kibana error format", async () => {
+      const path = `${BASE}/rules/${MIGRATION_ID}`;
+      kibanaClient.get.mockRejectedValue(
+        new Error("Kibana [test-cluster] 404: migration not found")
+      );
+
+      await expect(service.getMigration(MIGRATION_ID)).rejects.toBeInstanceOf(MigrationApiError);
+      await expect(service.getMigration(MIGRATION_ID)).rejects.toMatchObject({
+        status: 404,
+        path,
+        message: expect.stringContaining(path) as string,
+      });
+    });
+
+    it("sets status 0 when error message has no HTTP status code", async () => {
+      kibanaClient.get.mockRejectedValueOnce(new Error("network timeout"));
+
+      const err = await service.getMigration(MIGRATION_ID).catch((e) => e as MigrationApiError);
+      expect(err).toBeInstanceOf(MigrationApiError);
+      expect(err.status).toBe(0);
+    });
+
+    it("surfaces a MigrationApiError from every mutating method", async () => {
+      const netErr = new Error("Kibana [test-cluster] 503: service unavailable");
+
+      kibanaClient.post.mockRejectedValue(netErr);
+      kibanaClient.put.mockRejectedValue(netErr);
+      kibanaClient.delete.mockRejectedValue(netErr);
+
+      await expect(service.createMigration("x")).rejects.toBeInstanceOf(MigrationApiError);
+      await expect(service.uploadRules(MIGRATION_ID, [])).rejects.toBeInstanceOf(MigrationApiError);
+      await expect(service.startTranslation(MIGRATION_ID)).rejects.toBeInstanceOf(MigrationApiError);
+      await expect(service.stopTranslation(MIGRATION_ID)).rejects.toBeInstanceOf(MigrationApiError);
+      await expect(service.upsertResources(MIGRATION_ID, [])).rejects.toBeInstanceOf(MigrationApiError);
+      await expect(service.installRules(MIGRATION_ID)).rejects.toBeInstanceOf(MigrationApiError);
+      await expect(service.updateTranslatedRule(MIGRATION_ID, RULE_ID, {})).rejects.toBeInstanceOf(MigrationApiError);
+      await expect(service.deleteMigration(MIGRATION_ID)).rejects.toBeInstanceOf(MigrationApiError);
+    });
+  });
+});
diff --git a/src/elastic/service/migrationsService.ts b/src/elastic/service/migrationsService.ts
new file mode 100644
index 0000000..ffd0dd4
--- /dev/null
+++ b/src/elastic/service/migrationsService.ts
@@ -0,0 +1,361 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import type { KibanaClient } from "../kibana-client/index.js";
+
+// ---------------------------------------------------------------------------
+// Constants
+// ---------------------------------------------------------------------------
+
+export const SIEM_MIGRATIONS_API_BASE = "/internal/siem_migrations";
+
+/**
+ * Per-request headers required by the Kibana internal SIEM migrations API.
+ * `x-elastic-internal-origin: Kibana` is pre-baked into `KibanaClient`;
+ * only the versioning header needs to be added on each call.
+ */
+const MIGRATION_HEADERS = {
+  "elastic-api-version": "2023-10-31",
+} as const;
+
+// ---------------------------------------------------------------------------
+// Domain types
+// ---------------------------------------------------------------------------
+
+export interface SiemMigration {
+  id: string;
+  name: string;
+  /** Lifecycle status of the migration. */
+  status: "ready" | "running" | "finished" | "error";
+  created_at: string;
+  last_updated_at: string;
+  rules: {
+    total: number;
+    pending: number;
+    processing: number;
+    completed: number;
+    failed: number;
+    installable: number;
+    installed: number;
+    partially_translated: number;
+    untranslatable: number;
+  };
+}
+
+export interface TranslatedRule {
+  id: string;
+  migration_id: string;
+  status: "pending" | "processing" | "completed" | "failed";
+  translation_result?: "full" | "partial" | "untranslatable";
+  elastic_rule?: Record<string, unknown>;
+  original_rule: Record<string, unknown>;
+  comments?: string[];
+}
+
+export interface MigrationResource {
+  type: "macro" | "lookup";
+  name: string;
+  content: string;
+}
+
+export interface MigrationStats {
+  id: string;
+  status: SiemMigration["status"];
+  rules: SiemMigration["rules"];
+}
+
+export interface ListTranslatedRulesOptions {
+  readonly page?: number;
+  readonly perPage?: number;
+  readonly filter?: string;
+}
+
+export interface ListTranslatedRulesResult {
+  data: TranslatedRule[];
+  total: number;
+}
+
+export interface InstallRulesOptions {
+  /** Specific rule IDs to install; omit to install all installable rules. */
+  ids?: string[];
+}
+
+export interface InstallRulesResult {
+  installed: number;
+  failed: number;
+}
+
+// ---------------------------------------------------------------------------
+// Typed error
+// ---------------------------------------------------------------------------
+
+/**
+ * Thrown by every {@link MigrationsService} method on a non-2xx response.
+ *
+ * The Kibana client's response interceptor formats AxiosErrors as
+ * `"Kibana [<cluster>] <status>: <body>"` before they reach here, so
+ * `status` is extracted from that message when available.
+ */
+export class MigrationApiError extends Error {
+  readonly status: number;
+  readonly path: string;
+
+  constructor(path: string, cause: unknown) {
+    const causeMsg = cause instanceof Error ? cause.message : String(cause);
+    // Match the Kibana client error format: "Kibana [name] STATUS: detail"
+    const statusMatch = causeMsg.match(/\b([1-5]\d{2})\b/);
+    const status = statusMatch ? parseInt(statusMatch[1], 10) : 0;
+
+    super(`SIEM Migrations API error on ${path}: ${causeMsg}`);
+    this.name = "MigrationApiError";
+    this.status = status;
+    this.path = path;
+    if (cause instanceof Error) {
+      this.cause = cause;
+    }
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Service
+// ---------------------------------------------------------------------------
+
+interface MigrationsServiceOptions {
+  readonly kibanaClient: KibanaClient;
+}
+
+/**
+ * Thin wrapper over the 14 `/internal/siem_migrations/*` Kibana routes.
+ *
+ * Every method adds `elastic-api-version: 2023-10-31`; the underlying
+ * {@link KibanaClient} supplies `x-elastic-internal-origin: Kibana` and
+ * authentication on every request. Non-2xx responses are re-thrown as
+ * {@link MigrationApiError}.
+ */
+export class MigrationsService {
+  private readonly client: KibanaClient;
+
+  constructor(options: MigrationsServiceOptions) {
+    this.client = options.kibanaClient;
+  }
+
+  // ── Migration lifecycle ──────────────────────────────────────────────────
+
+  /** POST /internal/siem_migrations/rules */
+  async createMigration(name: string): Promise<{ migration_id: string }> {
+    const path = `${SIEM_MIGRATIONS_API_BASE}/rules`;
+    try {
+      const { data } = await this.client.post<{ migration_id: string }>(
+        path,
+        { name },
+        { headers: MIGRATION_HEADERS }
+      );
+      return data;
+    } catch (err) {
+      throw new MigrationApiError(path, err);
+    }
+  }
+
+  /** GET /internal/siem_migrations/rules */
+  async listMigrations(): Promise<SiemMigration[]> {
+    const path = `${SIEM_MIGRATIONS_API_BASE}/rules`;
+    try {
+      const { data } = await this.client.get<SiemMigration[]>(path, {
+        headers: MIGRATION_HEADERS,
+      });
+      return data;
+    } catch (err) {
+      throw new MigrationApiError(path, err);
+    }
+  }
+
+  /** GET /internal/siem_migrations/rules/:migrationId */
+  async getMigration(migrationId: string): Promise<SiemMigration> {
+    const path = `${SIEM_MIGRATIONS_API_BASE}/rules/${migrationId}`;
+    try {
+      const { data } = await this.client.get<SiemMigration>(path, {
+        headers: MIGRATION_HEADERS,
+      });
+      return data;
+    } catch (err) {
+      throw new MigrationApiError(path, err);
+    }
+  }
+
+  /** DELETE /internal/siem_migrations/rules/:migrationId */
+  async deleteMigration(migrationId: string): Promise<void> {
+    const path = `${SIEM_MIGRATIONS_API_BASE}/rules/${migrationId}`;
+    try {
+      await this.client.delete(path, { headers: MIGRATION_HEADERS });
+    } catch (err) {
+      throw new MigrationApiError(path, err);
+    }
+  }
+
+  // ── Splunk rule upload ───────────────────────────────────────────────────
+
+  /** POST /internal/siem_migrations/rules/:migrationId/rules */
+  async uploadRules(
+    migrationId: string,
+    rules: Record<string, unknown>[]
+  ): Promise<{ total: number }> {
+    const path = `${SIEM_MIGRATIONS_API_BASE}/rules/${migrationId}/rules`;
+    try {
+      const { data } = await this.client.post<{ total: number }>(
+        path,
+        rules,
+        { headers: MIGRATION_HEADERS }
+      );
+      return data;
+    } catch (err) {
+      throw new MigrationApiError(path, err);
+    }
+  }
+
+  // ── Translated rules ─────────────────────────────────────────────────────
+
+  /** GET /internal/siem_migrations/rules/:migrationId/rules */
+  async getTranslatedRules(
+    migrationId: string,
+    options: ListTranslatedRulesOptions = {}
+  ): Promise<ListTranslatedRulesResult> {
+    const path = `${SIEM_MIGRATIONS_API_BASE}/rules/${migrationId}/rules`;
+    const params: Record<string, string> = {
+      page: String(options.page ?? 1),
+      per_page: String(options.perPage ?? 20),
+    };
+    if (options.filter) params.filter = options.filter;
+
+    try {
+      const { data } = await this.client.get<ListTranslatedRulesResult>(path, {
+        params,
+        headers: MIGRATION_HEADERS,
+      });
+      return data;
+    } catch (err) {
+      throw new MigrationApiError(path, err);
+    }
+  }
+
+  /** GET /internal/siem_migrations/rules/:migrationId/rules/:ruleId */
+  async getTranslatedRule(
+    migrationId: string,
+    ruleId: string
+  ): Promise<TranslatedRule> {
+    const path = `${SIEM_MIGRATIONS_API_BASE}/rules/${migrationId}/rules/${ruleId}`;
+    try {
+      const { data } = await this.client.get<TranslatedRule>(path, {
+        headers: MIGRATION_HEADERS,
+      });
+      return data;
+    } catch (err) {
+      throw new MigrationApiError(path, err);
+    }
+  }
+
+  /** PUT /internal/siem_migrations/rules/:migrationId/rules/:ruleId */
+  async updateTranslatedRule(
+    migrationId: string,
+    ruleId: string,
+    updates: Partial<Pick<TranslatedRule, "elastic_rule" | "translation_result" | "comments">>
+  ): Promise<TranslatedRule> {
+    const path = `${SIEM_MIGRATIONS_API_BASE}/rules/${migrationId}/rules/${ruleId}`;
+    try {
+      const { data } = await this.client.put<TranslatedRule>(path, updates, {
+        headers: MIGRATION_HEADERS,
+      });
+      return data;
+    } catch (err) {
+      throw new MigrationApiError(path, err);
+    }
+  }
+
+  // ── Translation control ──────────────────────────────────────────────────
+
+  /** POST /internal/siem_migrations/rules/:migrationId/start */
+  async startTranslation(migrationId: string): Promise<void> {
+    const path = `${SIEM_MIGRATIONS_API_BASE}/rules/${migrationId}/start`;
+    try {
+      await this.client.post(path, {}, { headers: MIGRATION_HEADERS });
+    } catch (err) {
+      throw new MigrationApiError(path, err);
+    }
+  }
+
+  /** POST /internal/siem_migrations/rules/:migrationId/stop */
+  async stopTranslation(migrationId: string): Promise<void> {
+    const path = `${SIEM_MIGRATIONS_API_BASE}/rules/${migrationId}/stop`;
+    try {
+      await this.client.post(path, {}, { headers: MIGRATION_HEADERS });
+    } catch (err) {
+      throw new MigrationApiError(path, err);
+    }
+  }
+
+  // ── Resources ────────────────────────────────────────────────────────────
+
+  /** GET /internal/siem_migrations/resources/:migrationId */
+  async getResources(migrationId: string): Promise<MigrationResource[]> {
+    const path = `${SIEM_MIGRATIONS_API_BASE}/resources/${migrationId}`;
+    try {
+      const { data } = await this.client.get<MigrationResource[]>(path, {
+        headers: MIGRATION_HEADERS,
+      });
+      return data;
+    } catch (err) {
+      throw new MigrationApiError(path, err);
+    }
+  }
+
+  /** POST /internal/siem_migrations/resources/:migrationId */
+  async upsertResources(
+    migrationId: string,
+    resources: MigrationResource[]
+  ): Promise<void> {
+    const path = `${SIEM_MIGRATIONS_API_BASE}/resources/${migrationId}`;
+    try {
+      await this.client.post(path, resources, { headers: MIGRATION_HEADERS });
+    } catch (err) {
+      throw new MigrationApiError(path, err);
+    }
+  }
+
+  // ── Installation ─────────────────────────────────────────────────────────
+
+  /** POST /internal/siem_migrations/rules/:migrationId/install */
+  async installRules(
+    migrationId: string,
+    options: InstallRulesOptions = {}
+  ): Promise<InstallRulesResult> {
+    const path = `${SIEM_MIGRATIONS_API_BASE}/rules/${migrationId}/install`;
+    try {
+      const { data } = await this.client.post<InstallRulesResult>(
+        path,
+        options.ids ? { ids: options.ids } : {},
+        { headers: MIGRATION_HEADERS }
+      );
+      return data;
+    } catch (err) {
+      throw new MigrationApiError(path, err);
+    }
+  }
+
+  // ── Stats ────────────────────────────────────────────────────────────────
+
+  /** GET /internal/siem_migrations/rules/:migrationId/stats */
+  async getStats(migrationId: string): Promise<MigrationStats> {
+    const path = `${SIEM_MIGRATIONS_API_BASE}/rules/${migrationId}/stats`;
+    try {
+      const { data } = await this.client.get<MigrationStats>(path, {
+        headers: MIGRATION_HEADERS,
+      });
+      return data;
+    } catch (err) {
+      throw new MigrationApiError(path, err);
+    }
+  }
+}
diff --git a/src/server.ts b/src/server.ts
index deb1a38..814f04a 100644
--- a/src/server.ts
+++ b/src/server.ts
@@ -31,6 +31,7 @@ import {
   EsqlService,
   IndicesService,
   InvestigateService,
+  MigrationsService,
   RulesService,
   SampleDataService,
 } from "./elastic/service/index.js";
@@ -38,6 +39,7 @@ import { registerAlertTriageTools } from "./tools/alert-triage.js";
 import { registerAttackDiscoveryTools } from "./tools/attack-discovery.js";
 import { registerCaseManagementTools } from "./tools/case-management.js";
 import { registerDetectionRuleTools } from "./tools/detection-rules.js";
+import { registerMigrationTools } from "./tools/migration.js";
 import { registerSampleDataTools } from "./tools/sample-data.js";
 import { registerThreatHuntTools } from "./tools/threat-hunt.js";
 
@@ -95,6 +97,7 @@ export function createServer(deps: CreateServerDeps = {}): McpServer {
     sampleDataClient: new SampleDataClient({ esClient }),
     rulesService,
   });
+  const migrationsService = new MigrationsService({ kibanaClient });
 
   const server = new McpServer({
     name: "elastic-security",
@@ -115,6 +118,7 @@ export function createServer(deps: CreateServerDeps = {}): McpServer {
     attackDiscoveryService,
     casesService,
   });
+  registerMigrationTools(server, { migrationsService });
 
   return server;
 }
diff --git a/src/test/helpers/mockHttpClient.ts b/src/test/helpers/mockHttpClient.ts
index b843524..f640f2c 100644
--- a/src/test/helpers/mockHttpClient.ts
+++ b/src/test/helpers/mockHttpClient.ts
@@ -17,6 +17,7 @@ import type { KibanaClient } from "../../elastic/kibana-client/kibana-client.js"
 export interface MockHttpClient {
   get: Mock;
   post: Mock;
+  put: Mock;
   patch: Mock;
   delete: Mock;
   clusterName: string;
@@ -48,6 +49,7 @@ function makeMock(clusterName: string): MockHttpClient {
   return {
     get: vi.fn().mockResolvedValue({ data: undefined }),
     post: vi.fn().mockResolvedValue({ data: undefined }),
+    put: vi.fn().mockResolvedValue({ data: undefined }),
     patch: vi.fn().mockResolvedValue({ data: undefined }),
     delete: vi.fn().mockResolvedValue({ data: undefined }),
     clusterName,
diff --git a/src/test/helpers/mockServices.ts b/src/test/helpers/mockServices.ts
index bb77c48..819e95c 100644
--- a/src/test/helpers/mockServices.ts
+++ b/src/test/helpers/mockServices.ts
@@ -13,6 +13,7 @@ import type { EntityDetailService } from "../../elastic/service/entityDetailServ
 import type { EsqlService } from "../../elastic/service/esqlService.js";
 import type { IndicesService } from "../../elastic/service/indicesService.js";
 import type { InvestigateService } from "../../elastic/service/investigateService.js";
+import type { MigrationsService } from "../../elastic/service/migrationsService.js";
 import type { RulesService } from "../../elastic/service/rulesService.js";
 import type { SampleDataService } from "../../elastic/service/sampleDataService.js";
 
@@ -99,6 +100,25 @@ export function createMockRulesService(): RulesService {
   ]);
 }
 
+export function createMockMigrationsService(): MigrationsService {
+  return mockService<MigrationsService>([
+    "createMigration",
+    "listMigrations",
+    "getMigration",
+    "deleteMigration",
+    "uploadRules",
+    "getTranslatedRules",
+    "getTranslatedRule",
+    "updateTranslatedRule",
+    "startTranslation",
+    "stopTranslation",
+    "getResources",
+    "upsertResources",
+    "installRules",
+    "getStats",
+  ]);
+}
+
 export function createMockSampleDataService(): SampleDataService {
   return mockService<SampleDataService>([
     "generateSampleData",
diff --git a/src/test/integration/server.integration.test.ts b/src/test/integration/server.integration.test.ts
index eb26b9e..1771fac 100644
--- a/src/test/integration/server.integration.test.ts
+++ b/src/test/integration/server.integration.test.ts
@@ -139,6 +139,18 @@ describe("MCP server integration (in-process Client + Server)", () => {
           "generate-attack-discovery",
           "get-generation-status",
           "list-ai-connectors",
+          // automatic-migration
+          "migrate-rules",
+          "list-migrations",
+          "get-migration",
+          "get-translated-rules",
+          "start-translation",
+          "stop-translation",
+          "update-translated-rule",
+          "get-resources",
+          "upsert-resource",
+          "install-rules",
+          "get-stats",
         ].sort()
       );
     } finally {
@@ -159,6 +171,7 @@ describe("MCP server integration (in-process Client + Server)", () => {
           "ui://threat-hunt/mcp-app.html",
           "ui://generate-sample-data/mcp-app.html",
           "ui://triage-attack-discoveries/mcp-app.html",
+          "ui://migrate-rules/mcp-app.html",
         ].sort()
       );
     } finally {
diff --git a/src/tools/migration.test.ts b/src/tools/migration.test.ts
new file mode 100644
index 0000000..7193075
--- /dev/null
+++ b/src/tools/migration.test.ts
@@ -0,0 +1,408 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import fs from "fs";
+import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
+
+import { registerMigrationTools } from "./migration.js";
+import {
+  createMockMcpServer,
+  parseToolText,
+  type MockMcpServer,
+} from "../test/helpers/mockMcpServer.js";
+import { createMockMigrationsService } from "../test/helpers/mockServices.js";
+import type { MigrationsService } from "../elastic/service/index.js";
+
+const RESOURCE_URI = "ui://migrate-rules/mcp-app.html";
+const MIGRATION_ID = "m-1";
+const RULE_ID = "r-1";
+
+function setup() {
+  const server = createMockMcpServer();
+  const migrationsService = createMockMigrationsService();
+  vi.spyOn(fs, "existsSync").mockReturnValue(false);
+  vi.spyOn(fs, "readFileSync").mockReturnValue("<html>migration</html>");
+  registerMigrationTools(server as unknown as McpServer, { migrationsService });
+  return { server, migrationsService };
+}
+
+describe("registerMigrationTools", () => {
+  let server: MockMcpServer;
+  let migrationsService: MigrationsService;
+
+  beforeEach(() => {
+    ({ server, migrationsService } = setup());
+  });
+
+  // ── Registration ───────────────────────────────────────────────────────────
+
+  it("registers all 11 tools and the HTML resource", () => {
+    expect([...server.tools.keys()].sort()).toEqual(
+      [
+        "migrate-rules",
+        "list-migrations",
+        "get-migration",
+        "get-translated-rules",
+        "start-translation",
+        "stop-translation",
+        "update-translated-rule",
+        "get-resources",
+        "upsert-resource",
+        "install-rules",
+        "get-stats",
+      ].sort()
+    );
+    expect([...server.resources.keys()]).toEqual([RESOURCE_URI]);
+  });
+
+  // ── migrate-rules (model-facing) ───────────────────────────────────────────
+
+  describe("migrate-rules", () => {
+    it("returns a compact migration list for the LLM to see", async () => {
+      vi.mocked(migrationsService.listMigrations).mockResolvedValueOnce([
+        {
+          id: MIGRATION_ID,
+          name: "Splunk prod",
+          status: "ready",
+          created_at: "2026-01-01T00:00:00Z",
+          last_updated_at: "2026-01-01T00:00:00Z",
+          rules: {
+            total: 10, pending: 5, processing: 0, completed: 5, failed: 0,
+            installable: 5, installed: 0, partially_translated: 0, untranslatable: 0,
+          },
+        },
+      ]);
+
+      const out = parseToolText<{ message: string; migrations: unknown[] }>(
+        await server.tool("migrate-rules").callback({})
+      );
+
+      expect(out.message).toContain("workbench");
+      expect(out.migrations).toHaveLength(1);
+      expect(out.migrations[0]).toMatchObject({ id: MIGRATION_ID, name: "Splunk prod" });
+    });
+  });
+
+  // ── list-migrations ────────────────────────────────────────────────────────
+
+  describe("list-migrations", () => {
+    it("delegates to migrationsService.listMigrations and returns the array", async () => {
+      vi.mocked(migrationsService.listMigrations).mockResolvedValueOnce([]);
+
+      const out = parseToolText<unknown[]>(
+        await server.tool("list-migrations").callback({})
+      );
+
+      expect(migrationsService.listMigrations).toHaveBeenCalledTimes(1);
+      expect(out).toEqual([]);
+    });
+  });
+
+  // ── get-migration ──────────────────────────────────────────────────────────
+
+  describe("get-migration", () => {
+    it("calls getMigration with the provided ID", async () => {
+      vi.mocked(migrationsService.getMigration).mockResolvedValueOnce({
+        id: MIGRATION_ID,
+        name: "test",
+        status: "ready",
+        created_at: "",
+        last_updated_at: "",
+        rules: {
+          total: 0, pending: 0, processing: 0, completed: 0, failed: 0,
+          installable: 0, installed: 0, partially_translated: 0, untranslatable: 0,
+        },
+      });
+
+      await server.tool("get-migration").callback({ migrationId: MIGRATION_ID });
+
+      expect(migrationsService.getMigration).toHaveBeenCalledWith(MIGRATION_ID);
+    });
+  });
+
+  // ── get-translated-rules ───────────────────────────────────────────────────
+
+  describe("get-translated-rules", () => {
+    it("forwards pagination params to getTranslatedRules", async () => {
+      vi.mocked(migrationsService.getTranslatedRules).mockResolvedValueOnce({
+        data: [],
+        total: 0,
+      });
+
+      await server.tool("get-translated-rules").callback({
+        migrationId: MIGRATION_ID,
+        vendor: "splunk",
+        page: 2,
+        perPage: 50,
+        filter: "status:completed",
+      });
+
+      expect(migrationsService.getTranslatedRules).toHaveBeenCalledWith(
+        MIGRATION_ID,
+        { page: 2, perPage: 50, filter: "status:completed" }
+      );
+    });
+
+    it("returns vendorNotSupported for a non-Splunk vendor", async () => {
+      const out = parseToolText<{ error: string; vendor: string }>(
+        await server.tool("get-translated-rules").callback({
+          migrationId: MIGRATION_ID,
+          vendor: "qradar",
+        })
+      );
+
+      expect(out).toEqual({ error: "vendorNotSupported", vendor: "qradar" });
+      expect(migrationsService.getTranslatedRules).not.toHaveBeenCalled();
+    });
+  });
+
+  // ── start-translation ──────────────────────────────────────────────────────
+
+  describe("start-translation", () => {
+    it("calls startTranslation and returns { status: 'started' }", async () => {
+      vi.mocked(migrationsService.startTranslation).mockResolvedValueOnce(undefined);
+
+      const out = parseToolText<{ status: string }>(
+        await server.tool("start-translation").callback({
+          migrationId: MIGRATION_ID,
+          vendor: "splunk",
+        })
+      );
+
+      expect(migrationsService.startTranslation).toHaveBeenCalledWith(MIGRATION_ID);
+      expect(out.status).toBe("started");
+    });
+
+    it("returns vendorNotSupported for sentinel-one", async () => {
+      const out = parseToolText<{ error: string; vendor: string }>(
+        await server.tool("start-translation").callback({
+          migrationId: MIGRATION_ID,
+          vendor: "sentinel-one",
+        })
+      );
+
+      expect(out).toEqual({ error: "vendorNotSupported", vendor: "sentinel-one" });
+      expect(migrationsService.startTranslation).not.toHaveBeenCalled();
+    });
+  });
+
+  // ── stop-translation ───────────────────────────────────────────────────────
+
+  describe("stop-translation", () => {
+    it("calls stopTranslation and returns { status: 'stopped' }", async () => {
+      vi.mocked(migrationsService.stopTranslation).mockResolvedValueOnce(undefined);
+
+      const out = parseToolText<{ status: string }>(
+        await server.tool("stop-translation").callback({
+          migrationId: MIGRATION_ID,
+          vendor: "splunk",
+        })
+      );
+
+      expect(migrationsService.stopTranslation).toHaveBeenCalledWith(MIGRATION_ID);
+      expect(out.status).toBe("stopped");
+    });
+
+    it("returns vendorNotSupported for an unknown vendor", async () => {
+      const out = parseToolText<{ error: string }>(
+        await server.tool("stop-translation").callback({
+          migrationId: MIGRATION_ID,
+          vendor: "unknown-siem",
+        })
+      );
+
+      expect(out.error).toBe("vendorNotSupported");
+      expect(migrationsService.stopTranslation).not.toHaveBeenCalled();
+    });
+  });
+
+  // ── update-translated-rule ─────────────────────────────────────────────────
+
+  describe("update-translated-rule", () => {
+    it("parses elasticRule JSON and passes updates to service", async () => {
+      vi.mocked(migrationsService.updateTranslatedRule).mockResolvedValueOnce({
+        id: RULE_ID,
+        migration_id: MIGRATION_ID,
+        status: "completed",
+        translation_result: "partial",
+        original_rule: {},
+      });
+      const elasticRule = { name: "Fixed rule", type: "query" };
+
+      await server.tool("update-translated-rule").callback({
+        migrationId: MIGRATION_ID,
+        ruleId: RULE_ID,
+        vendor: "splunk",
+        elasticRule: JSON.stringify(elasticRule),
+        translationResult: "partial",
+      });
+
+      expect(migrationsService.updateTranslatedRule).toHaveBeenCalledWith(
+        MIGRATION_ID,
+        RULE_ID,
+        expect.objectContaining({
+          elastic_rule: elasticRule,
+          translation_result: "partial",
+        })
+      );
+    });
+
+    it("returns vendorNotSupported without calling service", async () => {
+      const out = parseToolText<{ error: string }>(
+        await server.tool("update-translated-rule").callback({
+          migrationId: MIGRATION_ID,
+          ruleId: RULE_ID,
+          vendor: "qradar",
+        })
+      );
+
+      expect(out.error).toBe("vendorNotSupported");
+      expect(migrationsService.updateTranslatedRule).not.toHaveBeenCalled();
+    });
+  });
+
+  // ── get-resources ──────────────────────────────────────────────────────────
+
+  describe("get-resources", () => {
+    it("calls getResources with migrationId", async () => {
+      vi.mocked(migrationsService.getResources).mockResolvedValueOnce([
+        { type: "macro", name: "my_macro", content: "| where true" },
+      ]);
+
+      const out = parseToolText<unknown[]>(
+        await server.tool("get-resources").callback({
+          migrationId: MIGRATION_ID,
+          vendor: "splunk",
+        })
+      );
+
+      expect(migrationsService.getResources).toHaveBeenCalledWith(MIGRATION_ID);
+      expect(out).toHaveLength(1);
+    });
+
+    it("returns vendorNotSupported for non-Splunk", async () => {
+      const out = parseToolText<{ error: string }>(
+        await server.tool("get-resources").callback({
+          migrationId: MIGRATION_ID,
+          vendor: "qradar",
+        })
+      );
+
+      expect(out.error).toBe("vendorNotSupported");
+    });
+  });
+
+  // ── upsert-resource ────────────────────────────────────────────────────────
+
+  describe("upsert-resource", () => {
+    it("calls upsertResources with a single-element array", async () => {
+      vi.mocked(migrationsService.upsertResources).mockResolvedValueOnce(undefined);
+
+      await server.tool("upsert-resource").callback({
+        migrationId: MIGRATION_ID,
+        vendor: "splunk",
+        type: "macro",
+        name: "splunk_macro",
+        content: "| eval x=1",
+      });
+
+      expect(migrationsService.upsertResources).toHaveBeenCalledWith(
+        MIGRATION_ID,
+        [{ type: "macro", name: "splunk_macro", content: "| eval x=1" }]
+      );
+    });
+
+    it("returns vendorNotSupported for non-Splunk", async () => {
+      const out = parseToolText<{ error: string }>(
+        await server.tool("upsert-resource").callback({
+          migrationId: MIGRATION_ID,
+          vendor: "sentinel-one",
+          type: "macro",
+          name: "m",
+          content: "",
+        })
+      );
+
+      expect(out.error).toBe("vendorNotSupported");
+      expect(migrationsService.upsertResources).not.toHaveBeenCalled();
+    });
+  });
+
+  // ── install-rules ──────────────────────────────────────────────────────────
+
+  describe("install-rules", () => {
+    it("passes ids array to installRules", async () => {
+      vi.mocked(migrationsService.installRules).mockResolvedValueOnce({
+        installed: 2,
+        failed: 0,
+      });
+
+      const out = parseToolText<{ installed: number; failed: number }>(
+        await server.tool("install-rules").callback({
+          migrationId: MIGRATION_ID,
+          vendor: "splunk",
+          ids: ["r-1", "r-2"],
+        })
+      );
+
+      expect(migrationsService.installRules).toHaveBeenCalledWith(
+        MIGRATION_ID,
+        { ids: ["r-1", "r-2"] }
+      );
+      expect(out).toEqual({ installed: 2, failed: 0 });
+    });
+
+    it("returns vendorNotSupported for non-Splunk", async () => {
+      const out = parseToolText<{ error: string }>(
+        await server.tool("install-rules").callback({
+          migrationId: MIGRATION_ID,
+          vendor: "qradar",
+        })
+      );
+
+      expect(out.error).toBe("vendorNotSupported");
+      expect(migrationsService.installRules).not.toHaveBeenCalled();
+    });
+  });
+
+  // ── get-stats ──────────────────────────────────────────────────────────────
+
+  describe("get-stats", () => {
+    it("calls getStats and returns the result (no vendor gate)", async () => {
+      const stats = {
+        id: MIGRATION_ID,
+        status: "ready" as const,
+        rules: {
+          total: 5, pending: 5, processing: 0, completed: 0, failed: 0,
+          installable: 0, installed: 0, partially_translated: 0, untranslatable: 0,
+        },
+      };
+      vi.mocked(migrationsService.getStats).mockResolvedValueOnce(stats);
+
+      const out = parseToolText<typeof stats>(
+        await server.tool("get-stats").callback({ migrationId: MIGRATION_ID })
+      );
+
+      expect(migrationsService.getStats).toHaveBeenCalledWith(MIGRATION_ID);
+      expect(out).toEqual(stats);
+    });
+  });
+
+  // ── Vendor gate: undefined vendor is allowed ───────────────────────────────
+
+  it("proceeds when vendor parameter is absent (defaults to Splunk path)", async () => {
+    vi.mocked(migrationsService.startTranslation).mockResolvedValueOnce(undefined);
+
+    const out = parseToolText<{ status: string }>(
+      await server.tool("start-translation").callback({ migrationId: MIGRATION_ID })
+    );
+
+    expect(out.status).toBe("started");
+    expect(migrationsService.startTranslation).toHaveBeenCalled();
+  });
+});
diff --git a/src/tools/migration.ts b/src/tools/migration.ts
new file mode 100644
index 0000000..5502bd2
--- /dev/null
+++ b/src/tools/migration.ts
@@ -0,0 +1,353 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
+import {
+  registerAppTool,
+  registerAppResource,
+  RESOURCE_MIME_TYPE,
+} from "@modelcontextprotocol/ext-apps/server";
+import { z } from "zod";
+import fs from "fs";
+import type { MigrationsService } from "../elastic/service/index.js";
+import { resolveViewPath } from "./view-path.js";
+
+const RESOURCE_URI = "ui://migrate-rules/mcp-app.html";
+
+/**
+ * Vendors for which the Kibana SIEM migrations translator is production-ready.
+ * Re-enabling a vendor is a one-line change to this array once the translator
+ * matures — QRadar and Sentinel-One are the next candidates.
+ */
+const SUPPORTED_VENDORS: readonly string[] = ["splunk"];
+
+export interface MigrationToolDeps {
+  readonly migrationsService: MigrationsService;
+}
+
+/** Returns a vendor-gate error response for app-only tools. */
+function vendorNotSupportedResponse(vendor: string) {
+  return {
+    content: [
+      {
+        type: "text" as const,
+        text: JSON.stringify({ error: "vendorNotSupported", vendor }),
+      },
+    ],
+  };
+}
+
+/** Returns true when `vendor` is explicitly provided but not in SUPPORTED_VENDORS. */
+function isUnsupportedVendor(vendor: string | undefined): vendor is string {
+  return vendor !== undefined && !SUPPORTED_VENDORS.includes(vendor);
+}
+
+export function registerMigrationTools(
+  server: McpServer,
+  deps: MigrationToolDeps
+) {
+  const { migrationsService } = deps;
+
+  // ── Model-facing entry-point ───────────────────────────────────────────────
+
+  registerAppTool(
+    server,
+    "migrate-rules",
+    {
+      title: "Migrate Rules",
+      description:
+        "Migrate detection rules from Splunk (and other SIEMs) to Elastic Security. " +
+        "Opens an interactive migration workbench for uploading, translating, reviewing, " +
+        "and installing rules. Vendor support: Splunk (active), QRadar / Sentinel-One (coming soon).",
+      inputSchema: {},
+      _meta: { ui: { resourceUri: RESOURCE_URI } },
+    },
+    async () => {
+      const migrations = await migrationsService.listMigrations();
+      return {
+        content: [
+          {
+            type: "text" as const,
+            text: JSON.stringify({
+              message: "Opening SIEM migration workbench",
+              migrations: migrations.map(({ id, name, status }) => ({ id, name, status })),
+            }),
+          },
+        ],
+      };
+    }
+  );
+
+  // ── App-only tools ─────────────────────────────────────────────────────────
+
+  registerAppTool(
+    server,
+    "list-migrations",
+    {
+      title: "List Migrations",
+      description: "List all SIEM rule migrations.",
+      inputSchema: {},
+      _meta: { ui: { visibility: ["app"] } },
+    },
+    async () => {
+      const migrations = await migrationsService.listMigrations();
+      return {
+        content: [{ type: "text" as const, text: JSON.stringify(migrations) }],
+      };
+    }
+  );
+
+  registerAppTool(
+    server,
+    "get-migration",
+    {
+      title: "Get Migration",
+      description: "Get details for a specific SIEM migration.",
+      inputSchema: {
+        migrationId: z.string().describe("Migration ID"),
+      },
+      _meta: { ui: { visibility: ["app"] } },
+    },
+    async ({ migrationId }) => {
+      const migration = await migrationsService.getMigration(migrationId);
+      return {
+        content: [{ type: "text" as const, text: JSON.stringify(migration) }],
+      };
+    }
+  );
+
+  registerAppTool(
+    server,
+    "get-translated-rules",
+    {
+      title: "Get Translated Rules",
+      description: "Get translated rules for a SIEM migration.",
+      inputSchema: {
+        migrationId: z.string().describe("Migration ID"),
+        vendor: z
+          .string()
+          .optional()
+          .describe("Source vendor (e.g. 'splunk'). Non-Splunk returns an error."),
+        page: z.number().optional(),
+        perPage: z.number().optional(),
+        filter: z.string().optional(),
+      },
+      _meta: { ui: { visibility: ["app"] } },
+    },
+    async ({ migrationId, vendor, page, perPage, filter }) => {
+      if (isUnsupportedVendor(vendor)) return vendorNotSupportedResponse(vendor);
+      const result = await migrationsService.getTranslatedRules(migrationId, {
+        page,
+        perPage,
+        filter,
+      });
+      return {
+        content: [{ type: "text" as const, text: JSON.stringify(result) }],
+      };
+    }
+  );
+
+  registerAppTool(
+    server,
+    "start-translation",
+    {
+      title: "Start Translation",
+      description: "Start the AI translation process for a SIEM migration.",
+      inputSchema: {
+        migrationId: z.string().describe("Migration ID"),
+        vendor: z
+          .string()
+          .optional()
+          .describe("Source vendor (e.g. 'splunk'). Non-Splunk returns an error."),
+      },
+      _meta: { ui: { visibility: ["app"] } },
+    },
+    async ({ migrationId, vendor }) => {
+      if (isUnsupportedVendor(vendor)) return vendorNotSupportedResponse(vendor);
+      await migrationsService.startTranslation(migrationId);
+      return {
+        content: [{ type: "text" as const, text: JSON.stringify({ status: "started" }) }],
+      };
+    }
+  );
+
+  registerAppTool(
+    server,
+    "stop-translation",
+    {
+      title: "Stop Translation",
+      description: "Stop the AI translation process for a SIEM migration.",
+      inputSchema: {
+        migrationId: z.string().describe("Migration ID"),
+        vendor: z
+          .string()
+          .optional()
+          .describe("Source vendor (e.g. 'splunk'). Non-Splunk returns an error."),
+      },
+      _meta: { ui: { visibility: ["app"] } },
+    },
+    async ({ migrationId, vendor }) => {
+      if (isUnsupportedVendor(vendor)) return vendorNotSupportedResponse(vendor);
+      await migrationsService.stopTranslation(migrationId);
+      return {
+        content: [{ type: "text" as const, text: JSON.stringify({ status: "stopped" }) }],
+      };
+    }
+  );
+
+  registerAppTool(
+    server,
+    "update-translated-rule",
+    {
+      title: "Update Translated Rule",
+      description: "Update a translated rule in a SIEM migration (e.g. fix its Elastic rule JSON).",
+      inputSchema: {
+        migrationId: z.string().describe("Migration ID"),
+        ruleId: z.string().describe("Translated rule ID"),
+        vendor: z
+          .string()
+          .optional()
+          .describe("Source vendor (e.g. 'splunk'). Non-Splunk returns an error."),
+        elasticRule: z
+          .string()
+          .optional()
+          .describe("JSON-encoded Elastic rule updates"),
+        translationResult: z
+          .enum(["full", "partial", "untranslatable"])
+          .optional(),
+        comments: z.array(z.string()).optional(),
+      },
+      _meta: { ui: { visibility: ["app"] } },
+    },
+    async ({ migrationId, ruleId, vendor, elasticRule, translationResult, comments }) => {
+      if (isUnsupportedVendor(vendor)) return vendorNotSupportedResponse(vendor);
+      const updates: Record<string, unknown> = {};
+      if (elasticRule !== undefined)
+        updates.elastic_rule = JSON.parse(elasticRule) as Record<string, unknown>;
+      if (translationResult !== undefined) updates.translation_result = translationResult;
+      if (comments !== undefined) updates.comments = comments;
+      const result = await migrationsService.updateTranslatedRule(migrationId, ruleId, updates);
+      return {
+        content: [{ type: "text" as const, text: JSON.stringify(result) }],
+      };
+    }
+  );
+
+  registerAppTool(
+    server,
+    "get-resources",
+    {
+      title: "Get Resources",
+      description: "Get macro/lookup resources for a SIEM migration.",
+      inputSchema: {
+        migrationId: z.string().describe("Migration ID"),
+        vendor: z
+          .string()
+          .optional()
+          .describe("Source vendor (e.g. 'splunk'). Non-Splunk returns an error."),
+      },
+      _meta: { ui: { visibility: ["app"] } },
+    },
+    async ({ migrationId, vendor }) => {
+      if (isUnsupportedVendor(vendor)) return vendorNotSupportedResponse(vendor);
+      const resources = await migrationsService.getResources(migrationId);
+      return {
+        content: [{ type: "text" as const, text: JSON.stringify(resources) }],
+      };
+    }
+  );
+
+  registerAppTool(
+    server,
+    "upsert-resource",
+    {
+      title: "Upsert Resource",
+      description: "Create or update a macro/lookup resource in a SIEM migration.",
+      inputSchema: {
+        migrationId: z.string().describe("Migration ID"),
+        vendor: z
+          .string()
+          .optional()
+          .describe("Source vendor (e.g. 'splunk'). Non-Splunk returns an error."),
+        type: z.enum(["macro", "lookup"]).describe("Resource type"),
+        name: z.string().describe("Resource name"),
+        content: z.string().describe("Resource content"),
+      },
+      _meta: { ui: { visibility: ["app"] } },
+    },
+    async ({ migrationId, vendor, type, name, content }) => {
+      if (isUnsupportedVendor(vendor)) return vendorNotSupportedResponse(vendor);
+      await migrationsService.upsertResources(migrationId, [{ type, name, content }]);
+      return {
+        content: [{ type: "text" as const, text: JSON.stringify({ status: "ok" }) }],
+      };
+    }
+  );
+
+  registerAppTool(
+    server,
+    "install-rules",
+    {
+      title: "Install Rules",
+      description: "Install translated rules from a SIEM migration into Elastic Security.",
+      inputSchema: {
+        migrationId: z.string().describe("Migration ID"),
+        vendor: z
+          .string()
+          .optional()
+          .describe("Source vendor (e.g. 'splunk'). Non-Splunk returns an error."),
+        ids: z
+          .array(z.string())
+          .optional()
+          .describe("Specific rule IDs to install. Omit to install all installable rules."),
+      },
+      _meta: { ui: { visibility: ["app"] } },
+    },
+    async ({ migrationId, vendor, ids }) => {
+      if (isUnsupportedVendor(vendor)) return vendorNotSupportedResponse(vendor);
+      const result = await migrationsService.installRules(migrationId, { ids });
+      return {
+        content: [{ type: "text" as const, text: JSON.stringify(result) }],
+      };
+    }
+  );
+
+  registerAppTool(
+    server,
+    "get-stats",
+    {
+      title: "Get Stats",
+      description: "Get translation and installation statistics for a SIEM migration.",
+      inputSchema: {
+        migrationId: z.string().describe("Migration ID"),
+      },
+      _meta: { ui: { visibility: ["app"] } },
+    },
+    async ({ migrationId }) => {
+      const stats = await migrationsService.getStats(migrationId);
+      return {
+        content: [{ type: "text" as const, text: JSON.stringify(stats) }],
+      };
+    }
+  );
+
+  // ── App resource (HTML workbench) ──────────────────────────────────────────
+
+  const viewPath = resolveViewPath("migration");
+  registerAppResource(
+    server,
+    RESOURCE_URI,
+    RESOURCE_URI,
+    { mimeType: RESOURCE_MIME_TYPE },
+    async () => {
+      const html = fs.readFileSync(viewPath, "utf-8");
+      return {
+        contents: [{ uri: RESOURCE_URI, mimeType: RESOURCE_MIME_TYPE, text: html }],
+      };
+    }
+  );
+}
diff --git a/src/views/migration/App.tsx b/src/views/migration/App.tsx
new file mode 100644
index 0000000..badcc8d
--- /dev/null
+++ b/src/views/migration/App.tsx
@@ -0,0 +1,1383 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import React, { useState, useCallback, useEffect, useMemo, useRef } from "react";
+import type { App as McpApp } from "@modelcontextprotocol/ext-apps";
+import { extractCallResult } from "../../shared/extract-tool-text";
+import {
+  AppHeader,
+  AppShell,
+  BackButton,
+  EmptyState,
+  KpiStrip,
+  KpiTile,
+  LoadingState,
+} from "../../shared/components";
+import { useFullscreen } from "../../shared/hooks/useFullscreen";
+import { useMcpApp } from "../../shared/hooks/useMcpApp";
+import "./styles.css";
+
+// ---------------------------------------------------------------------------
+// Local domain types (shapes returned by the app-only migration tools)
+// ---------------------------------------------------------------------------
+
+interface MigrationStats {
+  id: string;
+  name?: string;
+  /** Lifecycle status returned by get-migration. */
+  status: "ready" | "running" | "finished" | "error" | string;
+  rules: {
+    total: number;
+    pending: number;
+    processing: number;
+    completed: number;
+    failed: number;
+    installable: number;
+    installed: number;
+    partially_translated: number;
+    untranslatable: number;
+  };
+}
+
+interface TranslatedRule {
+  id: string;
+  status: string;
+  translation_result?: "full" | "partial" | "untranslatable";
+  original_rule: Record<string, unknown>;
+  elastic_rule?: Record<string, unknown>;
+  comments?: string[];
+}
+
+interface MigrationResource {
+  type: "macro" | "lookup";
+  name: string;
+  content: string;
+}
+
+interface InstallResult {
+  installed: number;
+  failed: number;
+}
+
+// ---------------------------------------------------------------------------
+// WorkbenchState discriminated union
+//
+// Each stage carries exactly the data it needs and no more. Transitions
+// always move forward through the pipeline — no implicit shared state.
+// ---------------------------------------------------------------------------
+
+export type WorkbenchState =
+  | {
+      stage: "vendor-select";
+    }
+  | {
+      stage: "upload";
+      vendor: string;
+      migrationId: string;
+    }
+  | {
+      stage: "translating";
+      vendor: string;
+      migrationId: string;
+      stats: MigrationStats | null;
+    }
+  | {
+      stage: "review";
+      vendor: string;
+      migrationId: string;
+      translations: TranslatedRule[];
+      resources: MigrationResource[];
+    }
+  | {
+      stage: "fix-rule-drawer";
+      vendor: string;
+      migrationId: string;
+      translations: TranslatedRule[];
+      resources: MigrationResource[];
+      selectedRule: TranslatedRule;
+    }
+  | {
+      stage: "fix-resources-drawer";
+      vendor: string;
+      migrationId: string;
+      translations: TranslatedRule[];
+      resources: MigrationResource[];
+    }
+  | {
+      stage: "install";
+      vendor: string;
+      migrationId: string;
+      translations: TranslatedRule[];
+      resources: MigrationResource[];
+    }
+  | {
+      stage: "done";
+      installed: number;
+      failed: number;
+    };
+
+// ---------------------------------------------------------------------------
+// Vendor catalogue — re-enabling a vendor is a one-line change here
+// ---------------------------------------------------------------------------
+
+const SUPPORTED_VENDORS: readonly string[] = ["splunk"];
+
+const VENDOR_CATALOGUE = [
+  { id: "splunk", label: "Splunk" },
+  { id: "qradar", label: "IBM QRadar" },
+  { id: "sentinel-one", label: "Sentinel One" },
+] as const;
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+async function callTool<T = unknown>(
+  app: McpApp,
+  name: string,
+  args: Record<string, unknown>
+): Promise<T | null> {
+  try {
+    const result = await app.callServerTool({ name, arguments: args });
+    const text = extractCallResult(result);
+    if (!text) return null;
+    return JSON.parse(text) as T;
+  } catch (e) {
+    console.error(`[migration] ${name} failed:`, e);
+    return null;
+  }
+}
+
+// ---------------------------------------------------------------------------
+// App
+// ---------------------------------------------------------------------------
+
+export function App() {
+  const [state, setState] = useState<WorkbenchState>({ stage: "vendor-select" });
+  const [loading, setLoading] = useState(false);
+  const [error, setError] = useState<string | null>(null);
+
+  // For the translating stage: poll stats until translation completes
+  const pollTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
+
+  const clearPoll = useCallback(() => {
+    if (pollTimerRef.current !== null) {
+      clearTimeout(pollTimerRef.current);
+      pollTimerRef.current = null;
+    }
+  }, []);
+
+  useEffect(() => () => clearPoll(), [clearPoll]);
+
+  const { connected, getApp } = useMcpApp({
+    name: "migration",
+    version: "1.0.0",
+    onConnect: (_app, _gotResult) => {
+      // No initial data load needed — the workbench starts at vendor-select.
+    },
+  });
+
+  const fullscreen = useFullscreen(getApp);
+
+  // ── Stage transitions ──────────────────────────────────────────────────────
+
+  const selectVendor = useCallback(
+    async (vendor: string) => {
+      const app = getApp();
+      if (!app) return;
+      setLoading(true);
+      setError(null);
+      try {
+        const res = await callTool<{ migration_id: string }>(app, "create-migration", {
+          name: `Migration ${new Date().toISOString().slice(0, 10)}`,
+        });
+        if (!res?.migration_id) throw new Error("Failed to create migration");
+        setState({ stage: "upload", vendor, migrationId: res.migration_id });
+      } catch (e) {
+        setError(e instanceof Error ? e.message : String(e));
+      } finally {
+        setLoading(false);
+      }
+    },
+    [getApp]
+  );
+
+  const uploadRules = useCallback(
+    async (rulesJson: string) => {
+      const app = getApp();
+      if (!app || state.stage !== "upload") return;
+      const { vendor, migrationId } = state;
+      setLoading(true);
+      setError(null);
+      try {
+        const rules = JSON.parse(rulesJson) as Record<string, unknown>[];
+        await callTool(app, "upload-rules", { migrationId, vendor, rules });
+        await callTool(app, "start-translation", { migrationId, vendor });
+        const stats = await callTool<MigrationStats>(app, "get-stats", { migrationId });
+        setState({ stage: "translating", vendor, migrationId, stats: stats ?? null });
+        schedulePoll(app, vendor, migrationId);
+      } catch (e) {
+        setError(e instanceof Error ? e.message : String(e));
+      } finally {
+        setLoading(false);
+      }
+    },
+    [getApp, state]
+  );
+
+  const schedulePoll = useCallback(
+    (app: McpApp, vendor: string, migrationId: string) => {
+      clearPoll();
+      pollTimerRef.current = setTimeout(async () => {
+        // Use get-migration (not get-stats) so we get the strongly-typed status
+        // field ("ready" | "running" | "finished" | "error") alongside the rule counts.
+        const migration = await callTool<MigrationStats>(app, "get-migration", { migrationId });
+        setState((prev) => {
+          if (prev.stage !== "translating") return prev;
+          return { ...prev, stats: migration ?? prev.stats };
+        });
+        // Translation is complete when Kibana sets status to "finished" or "error".
+        if (migration && (migration.status === "finished" || migration.status === "error")) {
+          void (async () => {
+            const translationsRes = await callTool<{ data: TranslatedRule[] }>(
+              app, "get-translated-rules", { migrationId, vendor, perPage: 500 }
+            );
+            const resources =
+              (await callTool<MigrationResource[]>(app, "get-resources", { migrationId, vendor })) ?? [];
+            setState({
+              stage: "review",
+              vendor,
+              migrationId,
+              translations: translationsRes?.data ?? [],
+              resources,
+            });
+          })();
+        } else {
+          schedulePoll(app, vendor, migrationId);
+        }
+      }, 3000);
+    },
+    [clearPoll]
+  );
+
+  const openRuleDrawer = useCallback((rule: TranslatedRule) => {
+    setState((prev) => {
+      if (prev.stage !== "review") return prev;
+      return { ...prev, stage: "fix-rule-drawer", selectedRule: rule };
+    });
+  }, []);
+
+  const saveRuleFix = useCallback(
+    async (elasticRuleJson: string, translationResult: "full" | "partial" | "untranslatable") => {
+      const app = getApp();
+      if (!app || state.stage !== "fix-rule-drawer") return;
+      const { vendor, migrationId, translations, resources, selectedRule } = state;
+      setLoading(true);
+      setError(null);
+      try {
+        const updated = await callTool<TranslatedRule>(
+          app,
+          "update-translated-rule",
+          { migrationId, ruleId: selectedRule.id, vendor, elasticRule: elasticRuleJson, translationResult }
+        );
+        setState({
+          stage: "review",
+          vendor,
+          migrationId,
+          resources,
+          translations: translations.map((t) =>
+            t.id === selectedRule.id ? (updated ?? t) : t
+          ),
+        });
+      } catch (e) {
+        setError(e instanceof Error ? e.message : String(e));
+      } finally {
+        setLoading(false);
+      }
+    },
+    [getApp, state]
+  );
+
+  const saveRuleInline = useCallback(
+    async (
+      ruleId: string,
+      elasticRuleJson: string,
+      translationResult: "full" | "partial" | "untranslatable"
+    ) => {
+      const app = getApp();
+      if (!app || state.stage !== "review") return;
+      const { vendor, migrationId, translations, resources } = state;
+      setLoading(true);
+      setError(null);
+      try {
+        const updated = await callTool<TranslatedRule>(app, "update-translated-rule", {
+          migrationId,
+          ruleId,
+          vendor,
+          elasticRule: elasticRuleJson,
+          translationResult,
+        });
+        setState({
+          stage: "review",
+          vendor,
+          migrationId,
+          resources,
+          translations: translations.map((t) => (t.id === ruleId ? (updated ?? t) : t)),
+        });
+      } catch (e) {
+        setError(e instanceof Error ? e.message : String(e));
+      } finally {
+        setLoading(false);
+      }
+    },
+    [getApp, state]
+  );
+
+  const openResourcesDrawer = useCallback(() => {
+    setState((prev) => {
+      if (prev.stage !== "review") return prev;
+      return { ...prev, stage: "fix-resources-drawer" };
+    });
+  }, []);
+
+  const saveResources = useCallback(
+    async (resource: MigrationResource) => {
+      const app = getApp();
+      if (!app || state.stage !== "fix-resources-drawer") return;
+      const { vendor, migrationId, translations } = state;
+      setLoading(true);
+      setError(null);
+      try {
+        await callTool(app, "upsert-resource", { migrationId, vendor, ...resource });
+        const resources =
+          (await callTool<MigrationResource[]>(app, "get-resources", { migrationId, vendor })) ?? [];
+        setState({ stage: "fix-resources-drawer", vendor, migrationId, translations, resources });
+      } catch (e) {
+        setError(e instanceof Error ? e.message : String(e));
+      } finally {
+        setLoading(false);
+      }
+    },
+    [getApp, state]
+  );
+
+  const closeDrawer = useCallback(() => {
+    setState((prev) => {
+      if (prev.stage === "fix-rule-drawer" || prev.stage === "fix-resources-drawer") {
+        const { stage: _stage, ...rest } = prev as WorkbenchState & {
+          stage: "fix-rule-drawer" | "fix-resources-drawer";
+        };
+        void _stage;
+        return { ...(rest as { vendor: string; migrationId: string; translations: TranslatedRule[]; resources: MigrationResource[] }), stage: "review" };
+      }
+      if (prev.stage === "install") {
+        return { stage: "review", vendor: prev.vendor, migrationId: prev.migrationId, translations: prev.translations, resources: prev.resources };
+      }
+      return prev;
+    });
+  }, []);
+
+  const startInstall = useCallback(() => {
+    setState((prev) => {
+      if (prev.stage !== "review") return prev;
+      return { stage: "install", vendor: prev.vendor, migrationId: prev.migrationId, translations: prev.translations, resources: prev.resources };
+    });
+  }, []);
+
+  const confirmInstall = useCallback(async () => {
+    const app = getApp();
+    if (!app || state.stage !== "install") return;
+    const { vendor, migrationId } = state;
+    setLoading(true);
+    setError(null);
+    try {
+      const result = await callTool<InstallResult>(app, "install-rules", { migrationId, vendor });
+      setState({ stage: "done", installed: result?.installed ?? 0, failed: result?.failed ?? 0 });
+    } catch (e) {
+      setError(e instanceof Error ? e.message : String(e));
+    } finally {
+      setLoading(false);
+    }
+  }, [getApp, state]);
+
+  const reset = useCallback(() => {
+    clearPoll();
+    setState({ stage: "vendor-select" });
+    setError(null);
+  }, [clearPoll]);
+
+  // ── Render ─────────────────────────────────────────────────────────────────
+
+  // AppHeader expects { isFullscreen, onToggle } — useFullscreen returns { isFullscreen, toggle }
+  const fullscreenProp = { isFullscreen: fullscreen.isFullscreen, onToggle: fullscreen.toggle };
+
+  if (!connected) {
+    return (
+      <AppShell>
+        <AppHeader title="SIEM Migration" fullscreen={fullscreenProp} />
+        <LoadingState>Connecting to Elastic Security…</LoadingState>
+      </AppShell>
+    );
+  }
+
+  return (
+    <AppShell>
+      <AppHeader
+        title="SIEM Migration"
+        fullscreen={fullscreenProp}
+        actions={
+          state.stage !== "vendor-select" && state.stage !== "done" ? (
+            <BackButton onClick={reset} label="Start over" />
+          ) : undefined
+        }
+      />
+
+      {error && (
+        <div className="p-3 m-4 rounded bg-red-50 border border-red-200 text-red-700 text-sm">
+          {error}
+          <button className="ml-2 underline" onClick={() => setError(null)}>
+            Dismiss
+          </button>
+        </div>
+      )}
+
+      {loading && <LoadingState>Working…</LoadingState>}
+
+      {!loading && renderStage(state, {
+        selectVendor,
+        uploadRules,
+        openRuleDrawer,
+        saveRuleFix,
+        saveRuleInline,
+        openResourcesDrawer,
+        saveResources,
+        closeDrawer,
+        startInstall,
+        confirmInstall,
+        reset,
+      })}
+    </AppShell>
+  );
+}
+
+// ---------------------------------------------------------------------------
+// Per-stage renderers (extracted to keep App() readable)
+// ---------------------------------------------------------------------------
+
+interface StageHandlers {
+  selectVendor: (vendor: string) => void;
+  uploadRules: (json: string) => void;
+  openRuleDrawer: (rule: TranslatedRule) => void;
+  saveRuleFix: (json: string, result: "full" | "partial" | "untranslatable") => void;
+  saveRuleInline: (id: string, json: string, result: "full" | "partial" | "untranslatable") => void;
+  openResourcesDrawer: () => void;
+  saveResources: (resource: MigrationResource) => void;
+  closeDrawer: () => void;
+  startInstall: () => void;
+  confirmInstall: () => void;
+  reset: () => void;
+}
+
+function renderStage(state: WorkbenchState, h: StageHandlers): React.ReactNode {
+  switch (state.stage) {
+    case "vendor-select":
+      return <VendorSelect onSelect={h.selectVendor} />;
+
+    case "upload":
+      return <Upload vendor={state.vendor} onUpload={h.uploadRules} />;
+
+    case "translating":
+      return <Translating stats={state.stats} />;
+
+    case "review":
+      return (
+        <Review
+          translations={state.translations}
+          resources={state.resources}
+          onOpenRule={h.openRuleDrawer}
+          onSaveRule={h.saveRuleInline}
+          onOpenResources={h.openResourcesDrawer}
+          onInstall={h.startInstall}
+        />
+      );
+
+    case "fix-rule-drawer":
+      return (
+        <>
+          <Review
+            translations={state.translations}
+            resources={state.resources}
+            onOpenRule={h.openRuleDrawer}
+            onSaveRule={h.saveRuleInline}
+            onOpenResources={h.openResourcesDrawer}
+            onInstall={h.startInstall}
+            dimmed
+          />
+          <RuleDrawer rule={state.selectedRule} onSave={h.saveRuleFix} onClose={h.closeDrawer} />
+        </>
+      );
+
+    case "fix-resources-drawer":
+      return (
+        <>
+          <Review
+            translations={state.translations}
+            resources={state.resources}
+            onOpenRule={h.openRuleDrawer}
+            onSaveRule={h.saveRuleInline}
+            onOpenResources={h.openResourcesDrawer}
+            onInstall={h.startInstall}
+            dimmed
+          />
+          <ResourcesDrawer resources={state.resources} onSave={h.saveResources} onClose={h.closeDrawer} />
+        </>
+      );
+
+    case "install":
+      return (
+        <Install
+          count={state.translations.filter((t) => t.translation_result !== "untranslatable").length}
+          onConfirm={h.confirmInstall}
+          onBack={h.closeDrawer}
+        />
+      );
+
+    case "done":
+      return <Done installed={state.installed} failed={state.failed} onReset={h.reset} />;
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Stage components
+// ---------------------------------------------------------------------------
+
+function VendorSelect({ onSelect }: { onSelect: (vendor: string) => void }) {
+  return (
+    <div className="p-6 max-w-2xl mx-auto">
+      <h2 className="text-lg font-semibold mb-1">Select your source SIEM</h2>
+      <p className="text-sm text-gray-500 mb-4">
+        Choose the platform you are migrating detection rules from.
+      </p>
+      <div className="migration-vendor-grid">
+        {VENDOR_CATALOGUE.map(({ id, label }) => {
+          // ≤5-LOC client-side gate: only Splunk is production-ready.
+          // Add a vendor to SUPPORTED_VENDORS to re-enable it.
+          const active = SUPPORTED_VENDORS.includes(id);
+          return (
+            <button
+              key={id}
+              className={`migration-vendor-card${active ? "" : " opacity-50 cursor-not-allowed"}`}
+              disabled={!active}
+              onClick={() => active && onSelect(id)}
+            >
+              <span className="migration-vendor-label">{label}</span>
+              {!active && <span className="migration-vendor-badge">Coming soon</span>}
+            </button>
+          );
+        })}
+      </div>
+    </div>
+  );
+}
+
+function Upload({ vendor, onUpload }: { vendor: string; onUpload: (json: string) => void }) {
+  const [text, setText] = useState("");
+  const [dragOver, setDragOver] = useState(false);
+  const fileInputRef = React.useRef<HTMLInputElement>(null);
+
+  const readFile = (file: File) => {
+    const reader = new FileReader();
+    reader.onload = (e) => setText((e.target?.result as string | null) ?? "");
+    reader.readAsText(file);
+  };
+
+  const handleDrop = (e: React.DragEvent) => {
+    e.preventDefault();
+    setDragOver(false);
+    const file = e.dataTransfer.files[0];
+    if (file) readFile(file);
+  };
+
+  return (
+    <div className="p-6 max-w-2xl mx-auto">
+      <h2 className="text-lg font-semibold mb-1">Upload {vendor} rules</h2>
+      <p className="text-sm text-gray-500 mb-4">
+        Drop a JSON export file, use the file picker, or paste the rules array directly.
+      </p>
+
+      {/* Hidden file input wired to the drop zone button */}
+      <input
+        ref={fileInputRef}
+        type="file"
+        accept=".json,application/json"
+        className="sr-only"
+        onChange={(e) => {
+          const file = e.target.files?.[0];
+          if (file) readFile(file);
+          e.target.value = "";
+        }}
+      />
+
+      <div
+        className={`migration-upload-area${dragOver ? " border-blue-400 bg-blue-50" : ""}`}
+        onDragOver={(e) => { e.preventDefault(); setDragOver(true); }}
+        onDragLeave={() => setDragOver(false)}
+        onDrop={handleDrop}
+      >
+        <button
+          type="button"
+          className="mb-3 px-3 py-1.5 text-sm border border-gray-300 rounded hover:bg-gray-50"
+          onClick={() => fileInputRef.current?.click()}
+        >
+          Choose file…
+        </button>
+        <p className="text-xs text-gray-400 mb-2">or drop a .json file here, or paste below</p>
+        <textarea
+          className="w-full h-36 p-2 text-xs font-mono border border-gray-200 rounded resize-y"
+          placeholder={`[\n  { "search": "index=main sourcetype=syslog..." },\n  ...\n]`}
+          value={text}
+          onChange={(e) => setText(e.target.value)}
+        />
+      </div>
+
+      <button
+        className="mt-3 px-4 py-2 bg-blue-600 text-white rounded text-sm font-medium disabled:opacity-50"
+        disabled={!text.trim()}
+        onClick={() => onUpload(text)}
+      >
+        Upload &amp; start translation
+      </button>
+    </div>
+  );
+}
+
+function Translating({ stats }: { stats: MigrationStats | null }) {
+  const rules = stats?.rules;
+  const done = stats?.rules.total ?? 0;
+  const pending = rules?.pending ?? 0;
+  const pct = done > 0 ? Math.round(((done - pending) / done) * 100) : 0;
+  const isError = stats?.status === "error";
+
+  return (
+    <div className="p-6 max-w-xl mx-auto">
+      <h2 className="text-lg font-semibold mb-1">
+        {isError ? "Translation encountered an error" : "Translating rules…"}
+      </h2>
+      <p className="text-sm text-gray-500 mb-6">
+        {isError
+          ? "Some rules could not be translated. Loading results…"
+          : "The AI translator is converting your rules to Elastic detection rule format. This may take a few minutes."}
+      </p>
+      {rules && (
+        <>
+          <KpiStrip tileCount={4}>
+            <KpiTile label="Total" value={rules.total} />
+            <KpiTile label="Translated" value={rules.completed} />
+            <KpiTile label="Pending" value={rules.pending} />
+            <KpiTile label="Failed" value={rules.failed} />
+          </KpiStrip>
+          {!isError && (
+            <>
+              <div className="migration-progress-bar-track mt-4">
+                <div className="migration-progress-bar-fill" style={{ width: `${pct}%` }} />
+              </div>
+              <p className="text-xs text-gray-400 mt-1">{pct}% complete</p>
+            </>
+          )}
+        </>
+      )}
+      {!rules && <LoadingState>Waiting for translation to start…</LoadingState>}
+    </div>
+  );
+}
+
+function Review({
+  translations,
+  resources,
+  onOpenRule,
+  onSaveRule,
+  onOpenResources,
+  onInstall,
+  dimmed,
+}: {
+  translations: TranslatedRule[];
+  resources: MigrationResource[];
+  onOpenRule: (rule: TranslatedRule) => void;
+  onSaveRule: (id: string, json: string, result: "full" | "partial" | "untranslatable") => void;
+  onOpenResources: () => void;
+  onInstall: () => void;
+  dimmed?: boolean;
+}) {
+  const [expandedId, setExpandedId] = useState<string | null>(null);
+
+  const installable = translations.filter(
+    (t) => t.translation_result && t.translation_result !== "untranslatable"
+  ).length;
+  const needsFix = translations.filter((t) => t.translation_result === "partial").length;
+
+  const toggleExpand = (id: string) =>
+    setExpandedId((prev) => (prev === id ? null : id));
+
+  return (
+    <div className={`p-6${dimmed ? " opacity-50 pointer-events-none" : ""}`}>
+      <div className="flex items-center justify-between mb-4">
+        <h2 className="text-lg font-semibold">Review translated rules</h2>
+        <div className="flex gap-2">
+          {resources.length > 0 && (
+            <button
+              className="px-3 py-1.5 text-sm border border-gray-300 rounded"
+              onClick={onOpenResources}
+            >
+              Fix resources ({resources.length})
+            </button>
+          )}
+          <button
+            className="px-3 py-1.5 text-sm bg-blue-600 text-white rounded disabled:opacity-50"
+            disabled={installable === 0}
+            onClick={onInstall}
+          >
+            Install {installable} rules
+          </button>
+        </div>
+      </div>
+
+      {needsFix > 0 && (
+        <div className="mb-4 p-3 bg-yellow-50 border border-yellow-200 rounded text-sm text-yellow-800">
+          {needsFix} rule{needsFix !== 1 ? "s" : ""} need manual review before installation.
+        </div>
+      )}
+
+      {translations.length === 0 ? (
+        <EmptyState>No translated rules found.</EmptyState>
+      ) : (
+        <div className="space-y-2">
+          {translations.map((rule) => (
+            <div key={rule.id} className="border border-gray-200 rounded overflow-hidden">
+              <RuleRow
+                rule={rule}
+                expanded={expandedId === rule.id}
+                onToggle={() => toggleExpand(rule.id)}
+                onOpenDrawer={() => onOpenRule(rule)}
+              />
+              {expandedId === rule.id && (
+                <RuleDiff
+                  rule={rule}
+                  onSave={(json, result) => {
+                    onSaveRule(rule.id, json, result);
+                    setExpandedId(null);
+                  }}
+                  onCancel={() => setExpandedId(null)}
+                />
+              )}
+            </div>
+          ))}
+        </div>
+      )}
+    </div>
+  );
+}
+
+function RuleRow({
+  rule,
+  expanded,
+  onToggle,
+  onOpenDrawer,
+}: {
+  rule: TranslatedRule;
+  expanded: boolean;
+  onToggle: () => void;
+  onOpenDrawer: () => void;
+}) {
+  const name =
+    (rule.elastic_rule?.name as string | undefined) ??
+    (rule.original_rule?.title as string | undefined) ??
+    rule.id;
+  return (
+    <div
+      className="flex items-center justify-between p-3 cursor-pointer hover:bg-gray-50 select-none"
+      onClick={onToggle}
+    >
+      <div className="flex items-center gap-3 min-w-0">
+        <TranslationBadge result={rule.translation_result} />
+        <span className="text-sm truncate">{name}</span>
+      </div>
+      <div className="flex items-center gap-2 shrink-0" onClick={(e) => e.stopPropagation()}>
+        {(rule.translation_result === "partial" || !rule.elastic_rule) && (
+          <button
+            className="text-xs text-blue-600 underline"
+            onClick={(e) => { e.stopPropagation(); onOpenDrawer(); }}
+          >
+            Drawer
+          </button>
+        )}
+        <span className="text-xs text-gray-400">{expanded ? "▲" : "▼"}</span>
+      </div>
+    </div>
+  );
+}
+
+// ---------------------------------------------------------------------------
+// Three-column diff panel (inline within the review step)
+//
+// Monaco editor is intentionally omitted to keep the singlefile HTML bundle
+// under 1 MB. The generated column uses a styled <pre>; the editable column
+// uses a <textarea> with the same monospace style.
+// ---------------------------------------------------------------------------
+
+function RuleDiff({
+  rule,
+  onSave,
+  onCancel,
+}: {
+  rule: TranslatedRule;
+  onSave: (json: string, result: "full" | "partial" | "untranslatable") => void;
+  onCancel: () => void;
+}) {
+  const [editedJson, setEditedJson] = useState(() =>
+    JSON.stringify(rule.elastic_rule ?? {}, null, 2)
+  );
+  const [result, setResult] = useState<"full" | "partial" | "untranslatable">(
+    rule.translation_result ?? "partial"
+  );
+
+  const originalSpl = useMemo(() => {
+    const r = rule.original_rule;
+    return (r.search as string | undefined) ?? (r.spl as string | undefined) ??
+      JSON.stringify(r, null, 2);
+  }, [rule.original_rule]);
+
+  const generatedJson = useMemo(
+    () => JSON.stringify(rule.elastic_rule ?? {}, null, 2),
+    [rule.elastic_rule]
+  );
+
+  return (
+    <div className="migration-diff-panel border-t border-gray-200">
+      <div className="migration-diff-columns">
+        {/* Left: original SPL */}
+        <div className="migration-diff-col">
+          <div className="migration-diff-col-header">Original SPL</div>
+          <pre className="migration-diff-spl">{originalSpl}</pre>
+        </div>
+
+        {/* Middle: generated Elastic rule JSON (read-only) */}
+        <div className="migration-diff-col">
+          <div className="migration-diff-col-header">Generated (read-only)</div>
+          <pre className="migration-diff-spl">{generatedJson}</pre>
+        </div>
+
+        {/* Right: user-editable Elastic rule JSON */}
+        <div className="migration-diff-col">
+          <div className="migration-diff-col-header">Edit</div>
+          <textarea
+            className="migration-diff-spl migration-diff-textarea"
+            value={editedJson}
+            onChange={(e) => setEditedJson(e.target.value)}
+            spellCheck={false}
+          />
+        </div>
+      </div>
+
+      <div className="migration-diff-footer">
+        <select
+          className="text-sm border border-gray-200 rounded p-1"
+          value={result}
+          onChange={(e) => setResult(e.target.value as typeof result)}
+        >
+          <option value="full">Full — production-ready</option>
+          <option value="partial">Partial — needs tuning</option>
+          <option value="untranslatable">Untranslatable — skip</option>
+        </select>
+        <div className="flex gap-2">
+          <button className="text-sm text-gray-500 px-3 py-1.5" onClick={onCancel}>
+            Cancel
+          </button>
+          <button
+            className="text-sm px-3 py-1.5 bg-blue-600 text-white rounded"
+            onClick={() => onSave(editedJson, result)}
+          >
+            Save
+          </button>
+        </div>
+      </div>
+    </div>
+  );
+}
+
+function TranslationBadge({ result }: { result?: string }) {
+  const cls = `migration-rule-status-badge migration-rule-status-badge--${result ?? "pending"}`;
+  const label = result ?? "pending";
+  return <span className={cls}>{label}</span>;
+}
+
+// ---------------------------------------------------------------------------
+// ElasticRulePartial — key fields of an Elastic detection rule
+// ---------------------------------------------------------------------------
+
+interface ElasticRulePartial {
+  name: string;
+  description: string;
+  type: string;
+  query: string;
+  language: string;
+  severity: string;
+  risk_score: number;
+  [key: string]: unknown;
+}
+
+function fromRuleJson(raw: Record<string, unknown>): ElasticRulePartial {
+  return {
+    name: (raw.name as string | undefined) ?? "",
+    description: (raw.description as string | undefined) ?? "",
+    type: (raw.type as string | undefined) ?? "query",
+    query: (raw.query as string | undefined) ?? "",
+    language: (raw.language as string | undefined) ?? "kuery",
+    severity: (raw.severity as string | undefined) ?? "medium",
+    risk_score: typeof raw.risk_score === "number" ? raw.risk_score : 50,
+    ...raw,
+  };
+}
+
+function ElasticRuleForm({
+  fields,
+  onChange,
+}: {
+  fields: ElasticRulePartial;
+  onChange: (patch: Partial<ElasticRulePartial>) => void;
+}) {
+  return (
+    <div className="space-y-3 text-sm">
+      <FormRow label="Name">
+        <input
+          className="migration-form-input"
+          value={fields.name}
+          onChange={(e) => onChange({ name: e.target.value })}
+        />
+      </FormRow>
+      <FormRow label="Description">
+        <textarea
+          className="migration-form-input h-16 resize-none"
+          value={fields.description}
+          onChange={(e) => onChange({ description: e.target.value })}
+        />
+      </FormRow>
+      <div className="flex gap-3">
+        <FormRow label="Type" className="flex-1">
+          <select
+            className="migration-form-input"
+            value={fields.type}
+            onChange={(e) => onChange({ type: e.target.value })}
+          >
+            {["query", "eql", "esql", "threshold", "machine_learning", "new_terms"].map(
+              (t) => <option key={t} value={t}>{t}</option>
+            )}
+          </select>
+        </FormRow>
+        <FormRow label="Language" className="flex-1">
+          <select
+            className="migration-form-input"
+            value={fields.language}
+            onChange={(e) => onChange({ language: e.target.value })}
+          >
+            {["kuery", "eql", "esql", "lucene"].map(
+              (l) => <option key={l} value={l}>{l}</option>
+            )}
+          </select>
+        </FormRow>
+      </div>
+      <FormRow label="Query">
+        <textarea
+          className="migration-form-input h-28 resize-y font-mono text-xs"
+          value={fields.query}
+          onChange={(e) => onChange({ query: e.target.value })}
+        />
+      </FormRow>
+      <div className="flex gap-3">
+        <FormRow label="Severity" className="flex-1">
+          <select
+            className="migration-form-input"
+            value={fields.severity}
+            onChange={(e) => onChange({ severity: e.target.value })}
+          >
+            {["low", "medium", "high", "critical"].map(
+              (s) => <option key={s} value={s}>{s}</option>
+            )}
+          </select>
+        </FormRow>
+        <FormRow label="Risk score" className="flex-1">
+          <input
+            type="number"
+            min={0}
+            max={100}
+            className="migration-form-input"
+            value={fields.risk_score}
+            onChange={(e) => onChange({ risk_score: Math.min(100, Math.max(0, Number(e.target.value))) })}
+          />
+        </FormRow>
+      </div>
+    </div>
+  );
+}
+
+function FormRow({
+  label,
+  className,
+  children,
+}: {
+  label: string;
+  className?: string;
+  children: React.ReactNode;
+}) {
+  return (
+    <div className={className}>
+      <label className="block text-xs font-medium text-gray-600 mb-1">{label}</label>
+      {children}
+    </div>
+  );
+}
+
+// ---------------------------------------------------------------------------
+// RuleDrawer — slide-over with ElasticRulePartial form
+// ---------------------------------------------------------------------------
+
+function RuleDrawer({
+  rule,
+  onSave,
+  onClose,
+}: {
+  rule: TranslatedRule;
+  onSave: (json: string, result: "full" | "partial" | "untranslatable") => void;
+  onClose: () => void;
+}) {
+  const rawRule = rule.elastic_rule ?? {};
+  const [fields, setFields] = useState<ElasticRulePartial>(() => fromRuleJson(rawRule));
+  const [result, setResult] = useState<"full" | "partial" | "untranslatable">(
+    rule.translation_result ?? "partial"
+  );
+  const [revalidating, setRevalidating] = useState(false);
+
+  const patch = (update: Partial<ElasticRulePartial>) =>
+    setFields((prev) => ({ ...prev, ...update }));
+
+  const toJson = () => JSON.stringify({ ...rawRule, ...fields }, null, 2);
+
+  const handleRevalidate = async () => {
+    setRevalidating(true);
+    try {
+      // Save the current edits; caller persists via update-translated-rule
+      // and can determine a new translation result from the API response.
+      onSave(toJson(), "partial");
+    } finally {
+      setRevalidating(false);
+    }
+  };
+
+  const ruleName =
+    fields.name ||
+    (rule.original_rule?.title as string | undefined) ||
+    rule.id;
+
+  return (
+    <div className="migration-drawer">
+      <div className="migration-drawer-header">
+        <div className="min-w-0">
+          <h3 className="font-semibold text-sm truncate">{ruleName}</h3>
+          <TranslationBadge result={rule.translation_result} />
+        </div>
+        <button className="text-gray-400 hover:text-gray-700 shrink-0" onClick={onClose}>
+          ✕
+        </button>
+      </div>
+
+      <div className="migration-drawer-body">
+        <ElasticRuleForm fields={fields} onChange={patch} />
+
+        <div className="mt-4">
+          <label className="block text-xs font-medium text-gray-600 mb-1">
+            Translation result
+          </label>
+          <select
+            className="migration-form-input"
+            value={result}
+            onChange={(e) => setResult(e.target.value as typeof result)}
+          >
+            <option value="full">Full — rule is production-ready</option>
+            <option value="partial">Partial — rule needs tuning</option>
+            <option value="untranslatable">Untranslatable — skip this rule</option>
+          </select>
+        </div>
+      </div>
+
+      <div className="migration-drawer-footer">
+        <button className="text-sm text-gray-500" onClick={onClose}>
+          Cancel
+        </button>
+        <button
+          className="text-sm px-3 py-1.5 border border-gray-300 rounded disabled:opacity-50"
+          disabled={revalidating}
+          onClick={() => void handleRevalidate()}
+          title="Save edits and mark as partial for further review"
+        >
+          {revalidating ? "Saving…" : "Re-validate"}
+        </button>
+        <button
+          className="text-sm px-3 py-1.5 bg-blue-600 text-white rounded"
+          onClick={() => onSave(toJson(), result)}
+        >
+          Save
+        </button>
+      </div>
+    </div>
+  );
+}
+
+function ResourcesDrawer({
+  resources,
+  onSave,
+  onClose,
+}: {
+  resources: MigrationResource[];
+  onSave: (resource: MigrationResource) => void;
+  onClose: () => void;
+}) {
+  const [addName, setAddName] = useState("");
+  const [addType, setAddType] = useState<"macro" | "lookup">("macro");
+  const [addContent, setAddContent] = useState("");
+
+  const unresolved = resources.filter((r) => !r.content.trim());
+  const resolved = resources.filter((r) => r.content.trim());
+
+  return (
+    <div className="migration-drawer">
+      <div className="migration-drawer-header">
+        <div className="min-w-0">
+          <h3 className="font-semibold text-sm">Manage resources</h3>
+          {unresolved.length > 0 && (
+            <span className="text-xs text-yellow-700">
+              {unresolved.length} unresolved
+            </span>
+          )}
+        </div>
+        <button className="text-gray-400 hover:text-gray-700 shrink-0" onClick={onClose}>
+          ✕
+        </button>
+      </div>
+
+      <div className="migration-drawer-body space-y-4">
+        {unresolved.length > 0 && (
+          <section>
+            <p className="text-xs font-semibold text-yellow-700 uppercase tracking-wide mb-2">
+              Unresolved ({unresolved.length})
+            </p>
+            <div className="space-y-2">
+              {unresolved.map((r) => (
+                <ResourceEditRow
+                  key={`${r.type}:${r.name}`}
+                  resource={r}
+                  defaultExpanded
+                  onSave={onSave}
+                />
+              ))}
+            </div>
+          </section>
+        )}
+
+        {resolved.length > 0 && (
+          <section>
+            <p className="text-xs font-semibold text-gray-500 uppercase tracking-wide mb-2">
+              Defined ({resolved.length})
+            </p>
+            <div className="space-y-2">
+              {resolved.map((r) => (
+                <ResourceEditRow
+                  key={`${r.type}:${r.name}`}
+                  resource={r}
+                  defaultExpanded={false}
+                  onSave={onSave}
+                />
+              ))}
+            </div>
+          </section>
+        )}
+
+        {resources.length === 0 && (
+          <p className="text-sm text-gray-400 text-center py-8">No resources found.</p>
+        )}
+
+        <section>
+          <p className="text-xs font-semibold text-gray-500 uppercase tracking-wide mb-2">
+            Add resource
+          </p>
+          <div className="space-y-2">
+            <div className="flex gap-2">
+              <select
+                className="migration-form-input"
+                value={addType}
+                onChange={(e) => setAddType(e.target.value as typeof addType)}
+              >
+                <option value="macro">Macro</option>
+                <option value="lookup">Lookup</option>
+              </select>
+              <input
+                className="migration-form-input"
+                placeholder="Resource name"
+                value={addName}
+                onChange={(e) => setAddName(e.target.value)}
+              />
+            </div>
+            <textarea
+              className="migration-rule-json-editor h-20"
+              placeholder="Paste definition…"
+              value={addContent}
+              onChange={(e) => setAddContent(e.target.value)}
+            />
+            <div className="flex justify-end">
+              <button
+                className="text-xs px-3 py-1.5 bg-blue-600 text-white rounded disabled:opacity-50"
+                disabled={!addName.trim()}
+                onClick={() => {
+                  onSave({ type: addType, name: addName.trim(), content: addContent });
+                  setAddName("");
+                  setAddContent("");
+                }}
+              >
+                Add
+              </button>
+            </div>
+          </div>
+        </section>
+      </div>
+
+      <div className="migration-drawer-footer">
+        <button className="text-sm text-gray-500" onClick={onClose}>
+          Done
+        </button>
+      </div>
+    </div>
+  );
+}
+
+function ResourceEditRow({
+  resource,
+  defaultExpanded,
+  onSave,
+}: {
+  resource: MigrationResource;
+  defaultExpanded: boolean;
+  onSave: (r: MigrationResource) => void;
+}) {
+  const [expanded, setExpanded] = useState(defaultExpanded);
+  const [content, setContent] = useState(resource.content);
+  const isUnresolved = !resource.content.trim();
+  const isDirty = content !== resource.content;
+
+  useEffect(() => {
+    setContent(resource.content);
+  }, [resource.content]);
+
+  return (
+    <div
+      className={`border rounded p-2 space-y-2${isUnresolved ? " border-yellow-300 bg-yellow-50" : " border-gray-200"}`}
+    >
+      <div className="flex items-center justify-between">
+        <div className="flex items-center gap-2 min-w-0">
+          <span className="text-xs font-mono bg-gray-100 px-1 rounded shrink-0">
+            {resource.type}
+          </span>
+          <span className="text-sm font-medium truncate">{resource.name}</span>
+          {isUnresolved && (
+            <span className="text-xs text-yellow-600 shrink-0">unresolved</span>
+          )}
+        </div>
+        <button
+          className="text-xs text-gray-400 shrink-0 ml-2"
+          onClick={() => setExpanded((p) => !p)}
+        >
+          {expanded ? "▲" : "▼"}
+        </button>
+      </div>
+
+      {expanded && (
+        <>
+          <textarea
+            className="migration-rule-json-editor h-24 w-full"
+            placeholder="Paste macro or lookup definition…"
+            value={content}
+            onChange={(e) => setContent(e.target.value)}
+          />
+          <div className="flex justify-end">
+            <button
+              className="text-xs px-3 py-1.5 bg-blue-600 text-white rounded disabled:opacity-50"
+              disabled={!isDirty && !isUnresolved}
+              onClick={() => onSave({ ...resource, content })}
+            >
+              Save
+            </button>
+          </div>
+        </>
+      )}
+    </div>
+  );
+}
+
+function Install({
+  count,
+  onConfirm,
+  onBack,
+}: {
+  count: number;
+  onConfirm: () => void;
+  onBack: () => void;
+}) {
+  return (
+    <div className="p-6 max-w-xl mx-auto text-center">
+      <h2 className="text-lg font-semibold mb-2">Install {count} rules</h2>
+      <p className="text-sm text-gray-500 mb-6">
+        The translated rules will be installed as disabled detection rules in Elastic Security.
+        You can enable them after reviewing their configuration.
+      </p>
+      <div className="flex gap-3 justify-center">
+        <button className="px-4 py-2 text-sm border border-gray-300 rounded" onClick={onBack}>
+          Back to review
+        </button>
+        <button
+          className="px-4 py-2 text-sm bg-blue-600 text-white rounded"
+          onClick={onConfirm}
+        >
+          Confirm install
+        </button>
+      </div>
+    </div>
+  );
+}
+
+function Done({
+  installed,
+  failed,
+  onReset,
+}: {
+  installed: number;
+  failed: number;
+  onReset: () => void;
+}) {
+  return (
+    <div className="p-6 max-w-xl mx-auto text-center">
+      <div className="text-4xl mb-4">✓</div>
+      <h2 className="text-lg font-semibold mb-2">Migration complete</h2>
+      <KpiStrip tileCount={failed > 0 ? 2 : 1}>
+        <KpiTile label="Installed" value={installed} />
+        {failed > 0 && <KpiTile label="Failed" value={failed} />}
+      </KpiStrip>
+      <p className="text-sm text-gray-500 mt-4 mb-6">
+        Rules have been installed as disabled. Navigate to Detection Rules to enable and tune them.
+      </p>
+      <button className="px-4 py-2 text-sm border border-gray-300 rounded" onClick={onReset}>
+        Start another migration
+      </button>
+    </div>
+  );
+}
diff --git a/src/views/migration/mcp-app.html b/src/views/migration/mcp-app.html
new file mode 100644
index 0000000..69fe301
--- /dev/null
+++ b/src/views/migration/mcp-app.html
@@ -0,0 +1,12 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>SIEM Migration</title>
+</head>
+<body>
+  <div id="root"></div>
+  <script type="module" src="./mcp-app.tsx"></script>
+</body>
+</html>
diff --git a/src/views/migration/mcp-app.tsx b/src/views/migration/mcp-app.tsx
new file mode 100644
index 0000000..7251dbf
--- /dev/null
+++ b/src/views/migration/mcp-app.tsx
@@ -0,0 +1,12 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import React from "react";
+import { createRoot } from "react-dom/client";
+import { App } from "./App";
+
+createRoot(document.getElementById("root")!).render(<App />);
diff --git a/src/views/migration/monaco-environment.ts b/src/views/migration/monaco-environment.ts
new file mode 100644
index 0000000..744f2a0
--- /dev/null
+++ b/src/views/migration/monaco-environment.ts
@@ -0,0 +1,26 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import * as monaco from "monaco-editor";
+import { loader } from "@monaco-editor/react";
+import EditorWorker from "monaco-editor/esm/vs/editor/editor.worker?worker&inline";
+
+/**
+ * The view ships as a single inlined HTML bundle (vite-plugin-singlefile).
+ * Workers and JS chunks are not reachable at runtime, so:
+ *
+ *  - `?worker&inline` base64-inlines the editor worker into the bundle.
+ *  - `loader.config({ monaco })` makes @monaco-editor/react use the
+ *    locally-bundled monaco instead of fetching it from the CDN.
+ */
+(globalThis as unknown as { MonacoEnvironment: { getWorker: (...args: unknown[]) => Worker } }).MonacoEnvironment = {
+  getWorker() {
+    return new EditorWorker();
+  },
+};
+
+loader.config({ monaco });
diff --git a/src/views/migration/styles.css b/src/views/migration/styles.css
new file mode 100644
index 0000000..8a28a42
--- /dev/null
+++ b/src/views/migration/styles.css
@@ -0,0 +1,248 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+/* Migration workbench — view-specific overrides */
+
+.migration-vendor-grid {
+  display: grid;
+  grid-template-columns: repeat(auto-fit, minmax(160px, 1fr));
+  gap: 12px;
+  margin-top: 24px;
+}
+
+.migration-vendor-card {
+  display: flex;
+  flex-direction: column;
+  align-items: center;
+  gap: 8px;
+  padding: 20px 16px;
+  border: 1px solid var(--border-color, #d4d4d4);
+  border-radius: 8px;
+  background: var(--surface-color, #fff);
+  cursor: pointer;
+  transition: border-color 0.15s, box-shadow 0.15s;
+}
+
+.migration-vendor-card:hover:not(.migration-vendor-card--disabled) {
+  border-color: var(--accent-color, #0077cc);
+  box-shadow: 0 0 0 2px var(--accent-color-alpha, rgba(0, 119, 204, 0.15));
+}
+
+
+.migration-vendor-label {
+  font-size: 14px;
+  font-weight: 500;
+}
+
+.migration-vendor-badge {
+  font-size: 11px;
+  color: var(--text-muted, #737373);
+}
+
+.migration-upload-area {
+  border: 2px dashed var(--border-color, #d4d4d4);
+  border-radius: 8px;
+  padding: 40px;
+  text-align: center;
+  margin: 16px 0;
+  transition: border-color 0.15s;
+}
+
+.migration-upload-area:hover {
+  border-color: var(--accent-color, #0077cc);
+}
+
+.migration-progress-bar-track {
+  height: 6px;
+  background: var(--surface-subtle, #f0f0f0);
+  border-radius: 3px;
+  overflow: hidden;
+  margin: 8px 0;
+}
+
+.migration-progress-bar-fill {
+  height: 100%;
+  background: var(--accent-color, #0077cc);
+  border-radius: 3px;
+  transition: width 0.4s ease;
+}
+
+.migration-rule-status-badge {
+  display: inline-flex;
+  align-items: center;
+  gap: 4px;
+  padding: 2px 8px;
+  border-radius: 12px;
+  font-size: 11px;
+  font-weight: 500;
+  text-transform: capitalize;
+}
+
+.migration-rule-status-badge--full {
+  background: #d1fae5;
+  color: #065f46;
+}
+
+.migration-rule-status-badge--partial {
+  background: #fef3c7;
+  color: #92400e;
+}
+
+.migration-rule-status-badge--untranslatable {
+  background: #fee2e2;
+  color: #991b1b;
+}
+
+.migration-rule-status-badge--pending {
+  background: #f0f0f0;
+  color: #525252;
+}
+
+.migration-drawer {
+  position: fixed;
+  right: 0;
+  top: 0;
+  bottom: 0;
+  width: 520px;
+  max-width: 100vw;
+  background: var(--surface-color, #fff);
+  border-left: 1px solid var(--border-color, #d4d4d4);
+  box-shadow: -4px 0 16px rgba(0, 0, 0, 0.08);
+  display: flex;
+  flex-direction: column;
+  z-index: 100;
+}
+
+.migration-drawer-header {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  padding: 16px 20px;
+  border-bottom: 1px solid var(--border-color, #d4d4d4);
+}
+
+.migration-drawer-body {
+  flex: 1;
+  overflow: auto;
+  padding: 20px;
+}
+
+.migration-drawer-footer {
+  padding: 16px 20px;
+  border-top: 1px solid var(--border-color, #d4d4d4);
+  display: flex;
+  gap: 8px;
+  justify-content: flex-end;
+}
+
+.migration-rule-json-editor {
+  width: 100%;
+  font-family: "Fira Code", "Cascadia Code", monospace;
+  font-size: 12px;
+  line-height: 1.5;
+  border: 1px solid var(--border-color, #d4d4d4);
+  border-radius: 4px;
+  padding: 8px;
+  resize: vertical;
+  min-height: 240px;
+  background: var(--surface-subtle, #fafafa);
+}
+
+.migration-resource-row {
+  display: flex;
+  gap: 8px;
+  align-items: flex-start;
+  padding: 8px 0;
+  border-bottom: 1px solid var(--border-color, #e5e5e5);
+}
+
+/* Three-column diff panel */
+
+.migration-diff-panel {
+  background: var(--surface-subtle, #fafafa);
+}
+
+.migration-diff-columns {
+  display: grid;
+  grid-template-columns: 1fr 1fr 1fr;
+  min-height: 320px;
+}
+
+.migration-diff-col {
+  display: flex;
+  flex-direction: column;
+  border-right: 1px solid var(--border-color, #e5e5e5);
+  overflow: hidden;
+}
+
+.migration-diff-col:last-child {
+  border-right: none;
+}
+
+.migration-diff-col-header {
+  padding: 6px 10px;
+  font-size: 11px;
+  font-weight: 600;
+  text-transform: uppercase;
+  letter-spacing: 0.04em;
+  color: var(--text-muted, #737373);
+  background: var(--surface-color, #fff);
+  border-bottom: 1px solid var(--border-color, #e5e5e5);
+  flex-shrink: 0;
+}
+
+.migration-diff-spl {
+  flex: 1;
+  margin: 0;
+  padding: 8px 10px;
+  font-family: "Fira Code", "Cascadia Code", monospace;
+  font-size: 11px;
+  line-height: 1.6;
+  white-space: pre-wrap;
+  word-break: break-all;
+  overflow: auto;
+  background: var(--surface-subtle, #fafafa);
+  color: var(--text-color, #171717);
+}
+
+/* Editable column textarea — same look as the read-only <pre> siblings */
+
+.migration-diff-textarea {
+  resize: vertical;
+  border: none;
+  outline: none;
+  min-height: 280px;
+}
+
+.migration-diff-footer {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  padding: 10px 14px;
+  border-top: 1px solid var(--border-color, #e5e5e5);
+  background: var(--surface-color, #fff);
+  gap: 8px;
+}
+
+/* Shared input style used across the ElasticRuleForm and drawer selects */
+
+.migration-form-input {
+  width: 100%;
+  font-size: 13px;
+  border: 1px solid var(--border-color, #d4d4d4);
+  border-radius: 4px;
+  padding: 4px 8px;
+  background: var(--surface-color, #fff);
+  color: var(--text-color, #171717);
+  line-height: 1.5;
+}
+
+.migration-form-input:focus {
+  outline: none;
+  border-color: var(--accent-color, #0077cc);
+  box-shadow: 0 0 0 2px var(--accent-color-alpha, rgba(0, 119, 204, 0.15));
+}
diff --git a/tsconfig.json b/tsconfig.json
index 23b7968..5dc2901 100644
--- a/tsconfig.json
+++ b/tsconfig.json
@@ -14,6 +14,6 @@
     "lib": ["ES2022", "DOM", "DOM.Iterable"],
     "types": ["vitest/globals", "@testing-library/jest-dom"]
   },
-  "include": ["src/**/*", "main.ts", "vite.config.ts", "vitest.config.ts", "scripts/**/*"],
+  "include": ["src/**/*", "evals/**/*", "main.ts", "vite.config.ts", "vitest.config.ts", "scripts/**/*"],
   "exclude": ["node_modules", "dist"]
 }