From fa7846ec65de481fff63510a531c11322710a31f Mon Sep 17 00:00:00 2001
From: Daniel Wise <io.dwise@gmail.com>
Date: Tue, 3 Mar 2026 17:15:37 -0800
Subject: [PATCH 1/3] feat(retrieval): add retrieval quality proofing workflow
 and archive change

- add retrieval proofing benchmark fixtures and profile thresholds
- add proofing runner, deterministic scoring, and report generation
- add retrieval-proof CLI command and smoke CI gate
- add tests and docs for local/CI proofing workflow
- archive retrieval-quality-proofing change and sync main spec
---
 .github/workflows/pr-checks.yml               |   3 +
 README.md                                     |   2 +
 .../retrieval-proofing/benchmark.v1.json      | 217 ++++++++++++++++++
 .../retrieval-proofing/profiles.v1.json       |  36 +++
 docs/retrieval-proofing-benchmark-schema.md   |  63 +++++
 docs/retrieval-proofing.md                    |  54 +++++
 .../.openspec.yaml                            |   0
 .../design.md                                 |   0
 .../proposal.md                               |   0
 .../specs/retrieval-quality-proofing/spec.md  |   0
 .../tasks.md                                  |  23 ++
 .../retrieval-quality-proofing/tasks.md       |  23 --
 .../specs/retrieval-quality-proofing/spec.md  |  37 +++
 src/cli/commands.ts                           |  33 +++
 src/cli/commands/retrieval-proof.ts           |  51 ++++
 src/context/retrieval/proofing/reports.ts     |  65 ++++++
 src/context/retrieval/proofing/runner.ts      | 213 +++++++++++++++++
 src/context/retrieval/proofing/schema.ts      | 118 ++++++++++
 src/context/retrieval/proofing/scoring.ts     |  84 +++++++
 tests/retrieval-proofing.test.ts              |  90 ++++++++
 20 files changed, 1089 insertions(+), 23 deletions(-)
 create mode 100644 benchmarks/retrieval-proofing/benchmark.v1.json
 create mode 100644 benchmarks/retrieval-proofing/profiles.v1.json
 create mode 100644 docs/retrieval-proofing-benchmark-schema.md
 create mode 100644 docs/retrieval-proofing.md
 rename openspec/changes/{retrieval-quality-proofing => archive/2026-03-04-retrieval-quality-proofing}/.openspec.yaml (100%)
 rename openspec/changes/{retrieval-quality-proofing => archive/2026-03-04-retrieval-quality-proofing}/design.md (100%)
 rename openspec/changes/{retrieval-quality-proofing => archive/2026-03-04-retrieval-quality-proofing}/proposal.md (100%)
 rename openspec/changes/{retrieval-quality-proofing => archive/2026-03-04-retrieval-quality-proofing}/specs/retrieval-quality-proofing/spec.md (100%)
 create mode 100644 openspec/changes/archive/2026-03-04-retrieval-quality-proofing/tasks.md
 delete mode 100644 openspec/changes/retrieval-quality-proofing/tasks.md
 create mode 100644 openspec/specs/retrieval-quality-proofing/spec.md
 create mode 100644 src/cli/commands/retrieval-proof.ts
 create mode 100644 src/context/retrieval/proofing/reports.ts
 create mode 100644 src/context/retrieval/proofing/runner.ts
 create mode 100644 src/context/retrieval/proofing/schema.ts
 create mode 100644 src/context/retrieval/proofing/scoring.ts
 create mode 100644 tests/retrieval-proofing.test.ts

diff --git a/.github/workflows/pr-checks.yml b/.github/workflows/pr-checks.yml
index 2ec99dc..f97e85a 100644
--- a/.github/workflows/pr-checks.yml
+++ b/.github/workflows/pr-checks.yml
@@ -43,6 +43,9 @@ jobs:
       - name: Run checks
         run: pnpm checks
 
+      - name: Run retrieval proofing (smoke profile)
+        run: pnpm dev retrieval-proof --profile smoke --output-dir artifacts/retrieval-proofing
+
       - name: Comment success summary on PR
         if: ${{ success() && github.event_name == 'pull_request' }}
         continue-on-error: true
diff --git a/README.md b/README.md
index b643150..1da25f9 100644
--- a/README.md
+++ b/README.md
@@ -33,6 +33,7 @@ pnpm dev -- chat
 pnpm dev -- chat "summarize this repo"
 pnpm dev -- plan "create a rollout plan for indexing"
 pnpm dev -- index .
+pnpm dev retrieval-proof --profile smoke
 pnpm dev -- automations list
 pnpm dev -- automations add --name "Hourly Check" --cron "0 * * * *" --prompt "summarize local status"
 pnpm dev -- automations run
@@ -94,3 +95,4 @@ Environment variables (BYOK):
 
 - Anthropic embeddings currently fall back to deterministic local vectors.
 - This project intentionally uses Biome only (no ESLint/Prettier).
+- Retrieval proofing benchmark schema/workflow docs: `docs/retrieval-proofing-benchmark-schema.md` and `docs/retrieval-proofing.md`.
diff --git a/benchmarks/retrieval-proofing/benchmark.v1.json b/benchmarks/retrieval-proofing/benchmark.v1.json
new file mode 100644
index 0000000..734c13b
--- /dev/null
+++ b/benchmarks/retrieval-proofing/benchmark.v1.json
@@ -0,0 +1,217 @@
+{
+  "version": "1.0",
+  "datasetName": "retrieval-proofing-core",
+  "datasetVersion": "2026.03.01",
+  "cases": [
+    {
+      "id": "repo-layout",
+      "title": "Find retrieval implementation location",
+      "query": "where is hybrid retrieval implemented",
+      "intent": "lookup",
+      "difficulty": "low",
+      "topK": 3,
+      "expectedEvidenceDocIds": ["d1", "d2"],
+      "documents": [
+        {
+          "id": "d1",
+          "path": "src/context/retrieval/hybrid.ts",
+          "title": "Hybrid retrieval source",
+          "content": "The hybrid retrieval runner combines lexical search, vector similarity, and ranking metadata."
+        },
+        {
+          "id": "d2",
+          "path": "src/context/retrieval/rerank.ts",
+          "title": "Rerank helpers",
+          "content": "hybridRerank computes weighted retrieval ordering and cosine similarity for vector retrieval."
+        },
+        {
+          "id": "d3",
+          "path": "README.md",
+          "title": "Project overview",
+          "content": "General project overview and quick start commands for local development."
+        },
+        {
+          "id": "d4",
+          "path": "src/cli/commands.ts",
+          "title": "CLI commands",
+          "content": "Registers chat, plan, index, and automations commands."
+        }
+      ]
+    },
+    {
+      "id": "quality-commands",
+      "title": "Identify quality gates",
+      "query": "what command runs lint typecheck and tests",
+      "intent": "lookup",
+      "difficulty": "medium",
+      "topK": 3,
+      "expectedEvidenceDocIds": ["d1"],
+      "documents": [
+        {
+          "id": "d1",
+          "path": "package.json",
+          "title": "Project scripts",
+          "content": "The checks script runs pnpm test, pnpm typecheck, pnpm lint, and pnpm build."
+        },
+        {
+          "id": "d2",
+          "path": "README.md",
+          "title": "README quality commands",
+          "content": "The README includes lint, typecheck, test, and build as quality commands."
+        },
+        {
+          "id": "d3",
+          "path": "src/automation/runner.ts",
+          "title": "Automation runner",
+          "content": "Executes configured prompts on cron schedules."
+        },
+        {
+          "id": "d4",
+          "path": "src/db/client.ts",
+          "title": "Database client",
+          "content": "Initializes PGLite and exposes query and exec methods."
+        }
+      ]
+    },
+    {
+      "id": "provider-preflight",
+      "title": "Provider preflight requirements",
+      "query": "which env var is required for google provider preflight",
+      "intent": "lookup",
+      "difficulty": "medium",
+      "topK": 3,
+      "expectedEvidenceDocIds": ["d1", "d2"],
+      "documents": [
+        {
+          "id": "d1",
+          "path": "src/cli/commands/chat.tsx",
+          "title": "Chat preflight",
+          "content": "Chat preflight checks provider env vars and prints setup instructions for google openai and anthropic."
+        },
+        {
+          "id": "d2",
+          "path": "README.md",
+          "title": "Environment variable docs",
+          "content": "GOOGLE_GENERATIVE_AI_API_KEY is required when using the google provider."
+        },
+        {
+          "id": "d3",
+          "path": "src/mcp/client.ts",
+          "title": "MCP client",
+          "content": "Starts and interacts with external MCP servers."
+        },
+        {
+          "id": "d4",
+          "path": "src/context/indexer/full-index.ts",
+          "title": "Indexer",
+          "content": "Indexes repository files and writes chunks and embeddings."
+        }
+      ]
+    },
+    {
+      "id": "policy-approval",
+      "title": "Approval behavior",
+      "query": "which actions require approval in interactive mode",
+      "intent": "reasoning",
+      "difficulty": "high",
+      "topK": 3,
+      "expectedEvidenceDocIds": ["d1", "d2"],
+      "documents": [
+        {
+          "id": "d1",
+          "path": "README.md",
+          "title": "Interactive approval docs",
+          "content": "Sensitive write and destructive tool actions require explicit approve, deny, or dismiss decisions in the TUI."
+        },
+        {
+          "id": "d2",
+          "path": "src/policy/engine.ts",
+          "title": "Policy engine",
+          "content": "Policy engine classifies tool side effects and enforces approval decisions."
+        },
+        {
+          "id": "d3",
+          "path": "src/context/retrieval/rerank.ts",
+          "title": "Rerank",
+          "content": "Reranks retrieval candidates with weighted score combination."
+        },
+        {
+          "id": "d4",
+          "path": "src/db/migrate.ts",
+          "title": "Migrations",
+          "content": "Applies schema migrations at startup."
+        }
+      ]
+    },
+    {
+      "id": "automation-hooks",
+      "title": "Hook trigger behavior",
+      "query": "what hooks trigger tests or typecheck",
+      "intent": "lookup",
+      "difficulty": "low",
+      "topK": 3,
+      "expectedEvidenceDocIds": ["d1"],
+      "documents": [
+        {
+          "id": "d1",
+          "path": "AGENTS.md",
+          "title": "Hook definitions",
+          "content": "file-change runs pnpm test and git-head-change runs pnpm typecheck."
+        },
+        {
+          "id": "d2",
+          "path": "README.md",
+          "title": "README automation",
+          "content": "Automations can run prompts but does not define hook command mapping."
+        },
+        {
+          "id": "d3",
+          "path": "src/automation/scheduler.ts",
+          "title": "Scheduler",
+          "content": "Cron scheduler dispatches queued automation specs."
+        },
+        {
+          "id": "d4",
+          "path": "src/agent/orchestrator.ts",
+          "title": "Orchestrator",
+          "content": "Runs gather reason act verify loops with validation retries."
+        }
+      ]
+    },
+    {
+      "id": "ci-workflow",
+      "title": "CI checks pipeline",
+      "query": "where is pr checks workflow defined",
+      "intent": "lookup",
+      "difficulty": "medium",
+      "topK": 3,
+      "expectedEvidenceDocIds": ["d1", "d2"],
+      "documents": [
+        {
+          "id": "d1",
+          "path": ".github/workflows/pr-checks.yml",
+          "title": "PR checks workflow",
+          "content": "Runs pnpm checks in CI on pull requests."
+        },
+        {
+          "id": "d2",
+          "path": "README.md",
+          "title": "Project quality commands",
+          "content": "The quality command list maps to CI checks execution."
+        },
+        {
+          "id": "d3",
+          "path": "src/tools/registry.ts",
+          "title": "Tool registry",
+          "content": "Defines registered local tools and validation."
+        },
+        {
+          "id": "d4",
+          "path": "src/db/client.ts",
+          "title": "Database client",
+          "content": "Provides thin wrapper around PGLite query execution."
+        }
+      ]
+    }
+  ]
+}
diff --git a/benchmarks/retrieval-proofing/profiles.v1.json b/benchmarks/retrieval-proofing/profiles.v1.json
new file mode 100644
index 0000000..ff5c585
--- /dev/null
+++ b/benchmarks/retrieval-proofing/profiles.v1.json
@@ -0,0 +1,36 @@
+{
+  "version": "1.0",
+  "profiles": {
+    "smoke": {
+      "description": "Fast CI profile with a representative subset of benchmark cases",
+      "caseIds": ["repo-layout", "quality-commands", "provider-preflight"],
+      "thresholds": {
+        "hybridMinimums": {
+          "evidenceRelevance": 0.55,
+          "citationSupportCoverage": 0.75,
+          "compositeScore": 0.62,
+          "maxUnsupportedClaimPenalty": 0.45
+        },
+        "baselineDeltaFloors": {
+          "lexical": -0.03,
+          "vector": 0.02
+        }
+      }
+    },
+    "full": {
+      "description": "Full benchmark profile for deeper retrieval proofing",
+      "thresholds": {
+        "hybridMinimums": {
+          "evidenceRelevance": 0.5,
+          "citationSupportCoverage": 0.7,
+          "compositeScore": 0.58,
+          "maxUnsupportedClaimPenalty": 0.5
+        },
+        "baselineDeltaFloors": {
+          "lexical": -0.01,
+          "vector": 0.02
+        }
+      }
+    }
+  }
+}
diff --git a/docs/retrieval-proofing-benchmark-schema.md b/docs/retrieval-proofing-benchmark-schema.md
new file mode 100644
index 0000000..3551b00
--- /dev/null
+++ b/docs/retrieval-proofing-benchmark-schema.md
@@ -0,0 +1,63 @@
+# Retrieval Proofing Benchmark Schema (v1.0)
+
+This document defines the versioned fixture format used by retrieval proofing.
+
+## Fixture File
+
+Path: `benchmarks/retrieval-proofing/benchmark.v1.json`
+
+Top-level shape:
+
+```json
+{
+  "version": "1.0",
+  "datasetName": "retrieval-proofing-core",
+  "datasetVersion": "2026.03.01",
+  "cases": [
+    {
+      "id": "repo-layout",
+      "title": "Find retrieval implementation location",
+      "query": "where is hybrid retrieval implemented",
+      "intent": "lookup",
+      "difficulty": "low",
+      "topK": 3,
+      "expectedEvidenceDocIds": ["d1", "d2"],
+      "documents": [
+        {
+          "id": "d1",
+          "path": "src/context/retrieval/hybrid.ts",
+          "title": "Hybrid retrieval source",
+          "content": "..."
+        }
+      ]
+    }
+  ]
+}
+```
+
+## Field Semantics
+
+- `version`: Fixture schema version. Must be `1.0` for this release.
+- `datasetName`: Human-readable benchmark dataset name.
+- `datasetVersion`: Version of benchmark content. Bump when case content or labels change.
+- `cases`: Benchmark case list.
+- `cases[].id`: Stable identifier used by profile filters and reports.
+- `cases[].query`: Query string used by all retrieval strategies.
+- `cases[].topK`: Number of retrieved documents considered for scoring.
+- `cases[].documents`: Candidate evidence set for the case.
+- `cases[].expectedEvidenceDocIds`: Canonical evidence documents used for deterministic scoring.
+
+## Profile File
+
+Path: `benchmarks/retrieval-proofing/profiles.v1.json`
+
+- `version`: Profile schema version (`1.0`).
+- `profiles.<name>.caseIds`: Optional subset of case IDs for this profile.
+- `profiles.<name>.thresholds.hybridMinimums`: Absolute floors for hybrid metrics.
+- `profiles.<name>.thresholds.baselineDeltaFloors`: Minimum hybrid-vs-baseline composite deltas.
+
+## Versioning Rules
+
+- Bump `datasetVersion` whenever benchmark content changes.
+- Keep schema `version` at `1.0` unless the JSON structure changes.
+- Prefer adding new cases over mutating existing case IDs to preserve comparability.
diff --git a/docs/retrieval-proofing.md b/docs/retrieval-proofing.md
new file mode 100644
index 0000000..88d6c5a
--- /dev/null
+++ b/docs/retrieval-proofing.md
@@ -0,0 +1,54 @@
+# Retrieval Quality Proofing
+
+Retrieval proofing evaluates `lexical`, `vector`, and `hybrid` strategies on the same benchmark dataset, emits JSON/Markdown artifacts, and enforces hybrid quality gates.
+
+## Run Locally
+
+Smoke profile:
+
+```bash
+pnpm dev retrieval-proof --profile smoke
+```
+
+Full profile:
+
+```bash
+pnpm dev retrieval-proof --profile full
+```
+
+Custom output directory:
+
+```bash
+pnpm dev retrieval-proof --profile smoke --output-dir artifacts/retrieval-proofing
+```
+
+## Artifacts
+
+Each run writes:
+
+- `<profile>-<timestamp>.json`: per-case metrics, aggregate metrics, hybrid deltas, gate result.
+- `<profile>-<timestamp>.md`: concise human-readable summary for PR/release notes.
+
+Default output path:
+
+- `artifacts/retrieval-proofing/`
+
+## CI Workflow
+
+PR checks run retrieval proofing with the smoke profile:
+
+```bash
+pnpm dev retrieval-proof --profile smoke --output-dir artifacts/retrieval-proofing
+```
+
+If hybrid thresholds fail, the command exits non-zero and CI fails.
+
+## Updating Baseline Thresholds Safely
+
+1. Run the full profile locally and inspect both JSON and Markdown reports.
+2. Confirm changes are intentional and linked to retrieval behavior changes.
+3. Update thresholds in `benchmarks/retrieval-proofing/profiles.v1.json`.
+4. Re-run both `smoke` and `full` profiles and ensure results are stable.
+5. Include rationale for threshold changes in PR description (what changed and why).
+
+Avoid lowering thresholds to mask regressions. Prefer improving retrieval behavior first.
diff --git a/openspec/changes/retrieval-quality-proofing/.openspec.yaml b/openspec/changes/archive/2026-03-04-retrieval-quality-proofing/.openspec.yaml
similarity index 100%
rename from openspec/changes/retrieval-quality-proofing/.openspec.yaml
rename to openspec/changes/archive/2026-03-04-retrieval-quality-proofing/.openspec.yaml
diff --git a/openspec/changes/retrieval-quality-proofing/design.md b/openspec/changes/archive/2026-03-04-retrieval-quality-proofing/design.md
similarity index 100%
rename from openspec/changes/retrieval-quality-proofing/design.md
rename to openspec/changes/archive/2026-03-04-retrieval-quality-proofing/design.md
diff --git a/openspec/changes/retrieval-quality-proofing/proposal.md b/openspec/changes/archive/2026-03-04-retrieval-quality-proofing/proposal.md
similarity index 100%
rename from openspec/changes/retrieval-quality-proofing/proposal.md
rename to openspec/changes/archive/2026-03-04-retrieval-quality-proofing/proposal.md
diff --git a/openspec/changes/retrieval-quality-proofing/specs/retrieval-quality-proofing/spec.md b/openspec/changes/archive/2026-03-04-retrieval-quality-proofing/specs/retrieval-quality-proofing/spec.md
similarity index 100%
rename from openspec/changes/retrieval-quality-proofing/specs/retrieval-quality-proofing/spec.md
rename to openspec/changes/archive/2026-03-04-retrieval-quality-proofing/specs/retrieval-quality-proofing/spec.md
diff --git a/openspec/changes/archive/2026-03-04-retrieval-quality-proofing/tasks.md b/openspec/changes/archive/2026-03-04-retrieval-quality-proofing/tasks.md
new file mode 100644
index 0000000..b914fd6
--- /dev/null
+++ b/openspec/changes/archive/2026-03-04-retrieval-quality-proofing/tasks.md
@@ -0,0 +1,23 @@
+## 1. Benchmark Dataset and Configuration
+
+- [x] 1.1 Define and document versioned benchmark fixture schema for retrieval proofing cases
+- [x] 1.2 Add initial benchmark dataset covering multiple query intents and grounding difficulty levels
+- [x] 1.3 Implement profile-based proofing configuration (for example `smoke` and `full`) with threshold settings
+
+## 2. Evaluation Runner and Scoring
+
+- [x] 2.1 Implement retrieval proofing runner that executes lexical, vector, and hybrid modes over the same case set
+- [x] 2.2 Implement deterministic grounding metric scoring for evidence relevance, citation support, and unsupported-claim penalty
+- [x] 2.3 Add aggregate scoring and strategy delta computation suitable for pass/fail gating
+
+## 3. Reporting and CLI Integration
+
+- [x] 3.1 Add CLI command(s) to run retrieval proofing for a selected benchmark profile
+- [x] 3.2 Generate JSON report artifacts with per-case and aggregate metrics for each strategy
+- [x] 3.3 Generate Markdown summary report highlighting hybrid-vs-baseline outcomes and gate status
+
+## 4. Quality Gates and Verification
+
+- [x] 4.1 Integrate proofing command into CI with non-zero exit on failed hybrid thresholds
+- [x] 4.2 Add tests for scoring determinism, report schema stability, and gate pass/fail behavior
+- [x] 4.3 Document local and CI proofing workflows, including how to update baseline thresholds safely
diff --git a/openspec/changes/retrieval-quality-proofing/tasks.md b/openspec/changes/retrieval-quality-proofing/tasks.md
deleted file mode 100644
index 0fe1789..0000000
--- a/openspec/changes/retrieval-quality-proofing/tasks.md
+++ /dev/null
@@ -1,23 +0,0 @@
-## 1. Benchmark Dataset and Configuration
-
-- [ ] 1.1 Define and document versioned benchmark fixture schema for retrieval proofing cases
-- [ ] 1.2 Add initial benchmark dataset covering multiple query intents and grounding difficulty levels
-- [ ] 1.3 Implement profile-based proofing configuration (for example `smoke` and `full`) with threshold settings
-
-## 2. Evaluation Runner and Scoring
-
-- [ ] 2.1 Implement retrieval proofing runner that executes lexical, vector, and hybrid modes over the same case set
-- [ ] 2.2 Implement deterministic grounding metric scoring for evidence relevance, citation support, and unsupported-claim penalty
-- [ ] 2.3 Add aggregate scoring and strategy delta computation suitable for pass/fail gating
-
-## 3. Reporting and CLI Integration
-
-- [ ] 3.1 Add CLI command(s) to run retrieval proofing for a selected benchmark profile
-- [ ] 3.2 Generate JSON report artifacts with per-case and aggregate metrics for each strategy
-- [ ] 3.3 Generate Markdown summary report highlighting hybrid-vs-baseline outcomes and gate status
-
-## 4. Quality Gates and Verification
-
-- [ ] 4.1 Integrate proofing command into CI with non-zero exit on failed hybrid thresholds
-- [ ] 4.2 Add tests for scoring determinism, report schema stability, and gate pass/fail behavior
-- [ ] 4.3 Document local and CI proofing workflows, including how to update baseline thresholds safely
diff --git a/openspec/specs/retrieval-quality-proofing/spec.md b/openspec/specs/retrieval-quality-proofing/spec.md
new file mode 100644
index 0000000..ef4b8ec
--- /dev/null
+++ b/openspec/specs/retrieval-quality-proofing/spec.md
@@ -0,0 +1,37 @@
+# retrieval-quality-proofing Specification
+
+## Purpose
+TBD - created by archiving change retrieval-quality-proofing. Update Purpose after archive.
+## Requirements
+### Requirement: Multi-Strategy Retrieval Evaluation
+The system MUST execute the same benchmark question set against at least three retrieval strategies: lexical-only, vector-only, and hybrid.
+
+#### Scenario: Compare strategies on shared benchmark
+- **WHEN** a proofing run starts for a benchmark profile
+- **THEN** the system runs every benchmark case across lexical, vector, and hybrid modes using identical inputs and scoring configuration
+
+### Requirement: Deterministic Grounding Metrics
+The system MUST calculate deterministic grounding metrics for each benchmark case and strategy, including evidence relevance, citation support coverage, and unsupported-claim penalty.
+
+#### Scenario: Produce deterministic scores
+- **WHEN** the same benchmark profile and repository state are evaluated multiple times
+- **THEN** the computed grounding metrics and aggregate scores are identical across runs except for explicitly declared non-deterministic fields
+
+### Requirement: Versioned Benchmark and Report Artifacts
+The system MUST support versioned benchmark fixtures and emit both machine-readable and human-readable report artifacts for every proofing run.
+
+#### Scenario: Generate proof artifacts
+- **WHEN** a proofing run completes
+- **THEN** the system writes a JSON report containing per-case and aggregate metric values and writes a Markdown summary highlighting strategy deltas and pass/fail gate status
+
+### Requirement: Hybrid Quality Gate Enforcement
+The system MUST enforce configurable quality gates that verify hybrid retrieval outperforms configured baseline strategies on grounding metrics.
+
+#### Scenario: Gate fails on hybrid regression
+- **WHEN** a proofing run determines that hybrid retrieval does not meet configured improvement thresholds versus baseline
+- **THEN** the command exits non-zero and marks the run as failed for CI enforcement
+
+#### Scenario: Gate passes on acceptable hybrid improvement
+- **WHEN** a proofing run determines that hybrid retrieval meets configured improvement thresholds versus baseline
+- **THEN** the command exits zero and marks the run as passing
+
diff --git a/src/cli/commands.ts b/src/cli/commands.ts
index 683716f..00564bc 100644
--- a/src/cli/commands.ts
+++ b/src/cli/commands.ts
@@ -7,6 +7,7 @@ import {
 import { runChatCommand } from './commands/chat';
 import { runIndexCommand } from './commands/index';
 import { runPlanCommand } from './commands/plan';
+import { runRetrievalProofCommand } from './commands/retrieval-proof';
 
 export function createProgram(): Command {
   const program = new Command();
@@ -37,6 +38,38 @@ export function createProgram(): Command {
       await runIndexCommand(repoRoot);
     });
 
+  program
+    .command('retrieval-proof')
+    .description('Run retrieval quality proofing against benchmark profiles')
+    .option(
+      '--benchmark <path>',
+      'benchmark fixture JSON path',
+      'benchmarks/retrieval-proofing/benchmark.v1.json'
+    )
+    .option(
+      '--profiles <path>',
+      'benchmark profiles JSON path',
+      'benchmarks/retrieval-proofing/profiles.v1.json'
+    )
+    .option('--profile <name>', 'benchmark profile name', 'smoke')
+    .option(
+      '--output-dir <path>',
+      'directory for generated reports',
+      'artifacts/retrieval-proofing'
+    )
+    .option('--no-fail-on-gate', 'do not exit non-zero when gate fails')
+    .action(
+      async (options: {
+        benchmark: string;
+        profiles: string;
+        profile: string;
+        outputDir: string;
+        failOnGate: boolean;
+      }) => {
+        await runRetrievalProofCommand(options);
+      }
+    );
+
   const automations = program.command('automations').description('Manage local automations');
 
   automations.command('list').action(async () => {
diff --git a/src/cli/commands/retrieval-proof.ts b/src/cli/commands/retrieval-proof.ts
new file mode 100644
index 0000000..905a2c1
--- /dev/null
+++ b/src/cli/commands/retrieval-proof.ts
@@ -0,0 +1,51 @@
+import { mkdir, writeFile } from 'node:fs/promises';
+import { isAbsolute, join, resolve } from 'node:path';
+import { formatProofingMarkdown } from '../../context/retrieval/proofing/reports';
+import { runRetrievalProofing } from '../../context/retrieval/proofing/runner';
+
+type RetrievalProofCommandOptions = {
+  benchmark: string;
+  profiles: string;
+  profile: string;
+  outputDir: string;
+  failOnGate: boolean;
+};
+
+export async function runRetrievalProofCommand(
+  options: RetrievalProofCommandOptions
+): Promise<void> {
+  const benchmarkPath = absoluteFromCwd(options.benchmark);
+  const profilesPath = absoluteFromCwd(options.profiles);
+  const outputDir = absoluteFromCwd(options.outputDir);
+
+  const report = await runRetrievalProofing({
+    benchmarkPath,
+    profilesPath,
+    profileName: options.profile,
+  });
+
+  await mkdir(outputDir, { recursive: true });
+  const timestamp = report.generatedAt.replaceAll(':', '-');
+  const baseName = `${report.profile}-${timestamp}`;
+  const jsonPath = join(outputDir, `${baseName}.json`);
+  const markdownPath = join(outputDir, `${baseName}.md`);
+
+  await writeFile(jsonPath, `${JSON.stringify(report, null, 2)}\n`, 'utf8');
+  await writeFile(markdownPath, `${formatProofingMarkdown(report)}\n`, 'utf8');
+
+  console.log(`Retrieval proofing complete for profile "${report.profile}".`);
+  console.log(`Gate status: ${report.gate.passed ? 'PASS' : 'FAIL'}`);
+  console.log(`JSON report: ${jsonPath}`);
+  console.log(`Markdown report: ${markdownPath}`);
+
+  if (!report.gate.passed && options.failOnGate) {
+    throw new Error(`Retrieval proofing gate failed: ${report.gate.failures.join('; ')}`);
+  }
+}
+
+function absoluteFromCwd(path: string): string {
+  if (isAbsolute(path)) {
+    return path;
+  }
+  return resolve(process.cwd(), path);
+}
diff --git a/src/context/retrieval/proofing/reports.ts b/src/context/retrieval/proofing/reports.ts
new file mode 100644
index 0000000..0067ec2
--- /dev/null
+++ b/src/context/retrieval/proofing/reports.ts
@@ -0,0 +1,65 @@
+import type { RetrievalProofingReport } from './schema';
+
+export function formatProofingMarkdown(report: RetrievalProofingReport): string {
+  const hybridAggregate = report.strategies.hybrid.aggregate.metrics;
+  const lexicalAggregate = report.strategies.lexical.aggregate.metrics;
+  const vectorAggregate = report.strategies.vector.aggregate.metrics;
+
+  const lines = [
+    '# Retrieval Quality Proofing Report',
+    '',
+    `- Generated: ${report.generatedAt}`,
+    `- Benchmark: ${report.benchmark.datasetName}@${report.benchmark.datasetVersion}`,
+    `- Profile: ${report.profile}`,
+    `- Gate: ${report.gate.passed ? 'PASS' : 'FAIL'}`,
+    '',
+    '## Aggregate Metrics',
+    '',
+    '| Strategy | Evidence Relevance | Citation Coverage | Unsupported Penalty | Composite |',
+    '| --- | ---: | ---: | ---: | ---: |',
+    renderAggregateRow('hybrid', hybridAggregate),
+    renderAggregateRow('lexical', lexicalAggregate),
+    renderAggregateRow('vector', vectorAggregate),
+    '',
+    '## Hybrid Deltas vs Baselines',
+    '',
+    '| Baseline | Evidence Relevance Δ | Citation Coverage Δ | Unsupported Penalty Δ | Composite Δ |',
+    '| --- | ---: | ---: | ---: | ---: |',
+    ...report.hybridDeltas.map((entry) =>
+      [
+        `| ${entry.baseline}`,
+        `${entry.metricDeltas.evidenceRelevance.toFixed(3)}`,
+        `${entry.metricDeltas.citationSupportCoverage.toFixed(3)}`,
+        `${entry.metricDeltas.unsupportedClaimPenalty.toFixed(3)}`,
+        `${entry.metricDeltas.compositeScore.toFixed(3)} |`,
+      ].join(' | ')
+    ),
+    '',
+    '## Gate Status',
+    '',
+    report.gate.passed ? '- All configured thresholds passed.' : '- Failure reasons:',
+    ...report.gate.failures.map((failure) => `  - ${failure}`),
+    '',
+    '## Per-Case Hybrid Summary',
+    '',
+    '| Case | Retrieved Doc IDs | Expected Evidence IDs | Composite |',
+    '| --- | --- | --- | ---: |',
+    ...report.strategies.hybrid.cases.map((entry) =>
+      [
+        `| ${entry.caseId}`,
+        entry.retrievedDocIds.join(', '),
+        entry.expectedEvidenceDocIds.join(', '),
+        `${entry.metrics.compositeScore.toFixed(3)} |`,
+      ].join(' | ')
+    ),
+  ];
+
+  return lines.join('\n');
+}
+
+function renderAggregateRow(
+  strategy: string,
+  metrics: RetrievalProofingReport['strategies']['hybrid']['aggregate']['metrics']
+): string {
+  return `| ${strategy} | ${metrics.evidenceRelevance.toFixed(3)} | ${metrics.citationSupportCoverage.toFixed(3)} | ${metrics.unsupportedClaimPenalty.toFixed(3)} | ${metrics.compositeScore.toFixed(3)} |`;
+}
diff --git a/src/context/retrieval/proofing/runner.ts b/src/context/retrieval/proofing/runner.ts
new file mode 100644
index 0000000..00be525
--- /dev/null
+++ b/src/context/retrieval/proofing/runner.ts
@@ -0,0 +1,213 @@
+import { readFile } from 'node:fs/promises';
+import { cosineSimilarity, deterministicEmbedding } from '../rerank';
+import {
+  type BenchmarkCase,
+  type BenchmarkProfile,
+  BenchmarkProfilesSchema,
+  RetrievalBenchmarkSchema,
+  type RetrievalProofingReport,
+  RetrievalProofingReportSchema,
+  type RetrievalProofingStrategy,
+} from './schema';
+import { averageCaseMetrics, scoreCaseMetrics, subtractMetrics } from './scoring';
+
+const STRATEGIES: RetrievalProofingStrategy[] = ['lexical', 'vector', 'hybrid'];
+
+export async function runRetrievalProofing(input: {
+  benchmarkPath: string;
+  profilesPath: string;
+  profileName: string;
+}): Promise<RetrievalProofingReport> {
+  const benchmark = await loadBenchmark(input.benchmarkPath);
+  const profiles = await loadProfiles(input.profilesPath);
+  const profile = profiles.profiles[input.profileName];
+
+  if (!profile) {
+    throw new Error(
+      `Unknown benchmark profile "${input.profileName}". Available: ${Object.keys(profiles.profiles).join(', ')}`
+    );
+  }
+
+  const selectedCases = selectCases(benchmark.cases, profile);
+  const strategyReports = Object.fromEntries(
+    STRATEGIES.map((strategy) => {
+      const cases = selectedCases.map((benchmarkCase) => {
+        const retrievedDocIds = retrieveDocsForCase(benchmarkCase, strategy);
+        const metrics = scoreCaseMetrics({ benchmarkCase, retrievedDocIds });
+        return {
+          caseId: benchmarkCase.id,
+          strategy,
+          retrievedDocIds,
+          expectedEvidenceDocIds: benchmarkCase.expectedEvidenceDocIds,
+          metrics,
+        };
+      });
+      const aggregate = {
+        strategy,
+        metrics: averageCaseMetrics(cases.map((entry) => entry.metrics)),
+      };
+      return [strategy, { cases, aggregate }];
+    })
+  ) as RetrievalProofingReport['strategies'];
+
+  const hybridAggregate = strategyReports.hybrid.aggregate.metrics;
+  const lexicalAggregate = strategyReports.lexical.aggregate.metrics;
+  const vectorAggregate = strategyReports.vector.aggregate.metrics;
+
+  const hybridDeltas = [
+    {
+      baseline: 'lexical' as const,
+      metricDeltas: subtractMetrics(hybridAggregate, lexicalAggregate),
+    },
+    {
+      baseline: 'vector' as const,
+      metricDeltas: subtractMetrics(hybridAggregate, vectorAggregate),
+    },
+  ];
+
+  const gateFailures = evaluateGate({
+    profile,
+    hybridAggregate,
+    lexicalAggregate,
+    vectorAggregate,
+  });
+
+  return RetrievalProofingReportSchema.parse({
+    schemaVersion: '1.0',
+    benchmark: {
+      datasetName: benchmark.datasetName,
+      datasetVersion: benchmark.datasetVersion,
+    },
+    profile: input.profileName,
+    generatedAt: new Date().toISOString(),
+    strategies: strategyReports,
+    hybridDeltas,
+    gate: {
+      passed: gateFailures.length === 0,
+      failures: gateFailures,
+    },
+  });
+}
+
+function evaluateGate(input: {
+  profile: BenchmarkProfile;
+  hybridAggregate: RetrievalProofingReport['strategies']['hybrid']['aggregate']['metrics'];
+  lexicalAggregate: RetrievalProofingReport['strategies']['lexical']['aggregate']['metrics'];
+  vectorAggregate: RetrievalProofingReport['strategies']['vector']['aggregate']['metrics'];
+}): string[] {
+  const failures: string[] = [];
+  const minimums = input.profile.thresholds.hybridMinimums;
+  const deltas = input.profile.thresholds.baselineDeltaFloors;
+  const hybrid = input.hybridAggregate;
+
+  if (hybrid.evidenceRelevance < minimums.evidenceRelevance) {
+    failures.push(
+      `hybrid evidenceRelevance ${hybrid.evidenceRelevance.toFixed(3)} < ${minimums.evidenceRelevance.toFixed(3)}`
+    );
+  }
+  if (hybrid.citationSupportCoverage < minimums.citationSupportCoverage) {
+    failures.push(
+      `hybrid citationSupportCoverage ${hybrid.citationSupportCoverage.toFixed(3)} < ${minimums.citationSupportCoverage.toFixed(3)}`
+    );
+  }
+  if (hybrid.compositeScore < minimums.compositeScore) {
+    failures.push(
+      `hybrid compositeScore ${hybrid.compositeScore.toFixed(3)} < ${minimums.compositeScore.toFixed(3)}`
+    );
+  }
+  if (hybrid.unsupportedClaimPenalty > minimums.maxUnsupportedClaimPenalty) {
+    failures.push(
+      `hybrid unsupportedClaimPenalty ${hybrid.unsupportedClaimPenalty.toFixed(3)} > ${minimums.maxUnsupportedClaimPenalty.toFixed(3)}`
+    );
+  }
+
+  const hybridVsLexical = hybrid.compositeScore - input.lexicalAggregate.compositeScore;
+  if (hybridVsLexical < deltas.lexical) {
+    failures.push(
+      `hybrid-vs-lexical composite delta ${hybridVsLexical.toFixed(3)} < ${deltas.lexical.toFixed(3)}`
+    );
+  }
+
+  const hybridVsVector = hybrid.compositeScore - input.vectorAggregate.compositeScore;
+  if (hybridVsVector < deltas.vector) {
+    failures.push(
+      `hybrid-vs-vector composite delta ${hybridVsVector.toFixed(3)} < ${deltas.vector.toFixed(3)}`
+    );
+  }
+
+  return failures;
+}
+
+function selectCases(cases: BenchmarkCase[], profile: BenchmarkProfile): BenchmarkCase[] {
+  if (!profile.caseIds || profile.caseIds.length === 0) {
+    return cases;
+  }
+
+  const wanted = new Set(profile.caseIds);
+  const selected = cases.filter((entry) => wanted.has(entry.id));
+  if (selected.length !== profile.caseIds.length) {
+    const selectedIds = new Set(selected.map((entry) => entry.id));
+    const missing = profile.caseIds.filter((id) => !selectedIds.has(id));
+    throw new Error(`Profile references missing benchmark case IDs: ${missing.join(', ')}`);
+  }
+  return selected;
+}
+
+function retrieveDocsForCase(
+  benchmarkCase: BenchmarkCase,
+  strategy: RetrievalProofingStrategy
+): string[] {
+  const rows = benchmarkCase.documents.map((doc) => {
+    const lexicalScore = computeLexicalScore(benchmarkCase.query, `${doc.title} ${doc.content}`);
+    const vectorScore = cosineSimilarity(
+      deterministicEmbedding(benchmarkCase.query),
+      deterministicEmbedding(`${doc.title} ${doc.content}`)
+    );
+    const totalScore =
+      strategy === 'lexical'
+        ? lexicalScore
+        : strategy === 'vector'
+          ? vectorScore
+          : lexicalScore * 0.7 + vectorScore * 0.3;
+
+    return {
+      id: doc.id,
+      totalScore,
+    };
+  });
+
+  return rows
+    .sort((a, b) => b.totalScore - a.totalScore)
+    .slice(0, benchmarkCase.topK)
+    .map((entry) => entry.id);
+}
+
+function computeLexicalScore(query: string, haystack: string): number {
+  const tokens = query
+    .toLowerCase()
+    .split(/[^a-z0-9]+/g)
+    .filter(Boolean);
+  if (tokens.length === 0) {
+    return 0;
+  }
+
+  const source = haystack.toLowerCase();
+  let score = 0;
+  for (const token of tokens) {
+    if (source.includes(token)) {
+      score += 1;
+    }
+  }
+
+  return score / tokens.length;
+}
+
+async function loadBenchmark(path: string) {
+  const raw = await readFile(path, 'utf8');
+  return RetrievalBenchmarkSchema.parse(JSON.parse(raw));
+}
+
+async function loadProfiles(path: string) {
+  const raw = await readFile(path, 'utf8');
+  return BenchmarkProfilesSchema.parse(JSON.parse(raw));
+}
diff --git a/src/context/retrieval/proofing/schema.ts b/src/context/retrieval/proofing/schema.ts
new file mode 100644
index 0000000..0e7a78c
--- /dev/null
+++ b/src/context/retrieval/proofing/schema.ts
@@ -0,0 +1,118 @@
+import { z } from 'zod';
+
+export const RetrievalProofingStrategySchema = z.enum(['lexical', 'vector', 'hybrid']);
+export type RetrievalProofingStrategy = z.infer<typeof RetrievalProofingStrategySchema>;
+
+export const BenchmarkDocumentSchema = z.object({
+  id: z.string().min(1),
+  path: z.string().min(1),
+  title: z.string().min(1),
+  content: z.string().min(1),
+});
+
+export const BenchmarkCaseSchema = z.object({
+  id: z.string().min(1),
+  title: z.string().min(1),
+  query: z.string().min(1),
+  intent: z.string().min(1),
+  difficulty: z.enum(['low', 'medium', 'high']),
+  topK: z.number().int().positive().default(3),
+  documents: z.array(BenchmarkDocumentSchema).min(2),
+  expectedEvidenceDocIds: z.array(z.string().min(1)).min(1),
+});
+export type BenchmarkCase = z.infer<typeof BenchmarkCaseSchema>;
+
+export const RetrievalBenchmarkSchema = z.object({
+  version: z.literal('1.0'),
+  datasetName: z.string().min(1),
+  datasetVersion: z.string().min(1),
+  cases: z.array(BenchmarkCaseSchema).min(1),
+});
+export type RetrievalBenchmark = z.infer<typeof RetrievalBenchmarkSchema>;
+
+export const ProofingThresholdsSchema = z.object({
+  hybridMinimums: z.object({
+    evidenceRelevance: z.number().min(0).max(1),
+    citationSupportCoverage: z.number().min(0).max(1),
+    compositeScore: z.number().min(0).max(1),
+    maxUnsupportedClaimPenalty: z.number().min(0).max(1),
+  }),
+  baselineDeltaFloors: z.object({
+    lexical: z.number(),
+    vector: z.number(),
+  }),
+});
+export type ProofingThresholds = z.infer<typeof ProofingThresholdsSchema>;
+
+export const BenchmarkProfileSchema = z.object({
+  description: z.string().min(1),
+  caseIds: z.array(z.string().min(1)).optional(),
+  thresholds: ProofingThresholdsSchema,
+});
+export type BenchmarkProfile = z.infer<typeof BenchmarkProfileSchema>;
+
+export const BenchmarkProfilesSchema = z.object({
+  version: z.literal('1.0'),
+  profiles: z.record(z.string(), BenchmarkProfileSchema),
+});
+export type BenchmarkProfiles = z.infer<typeof BenchmarkProfilesSchema>;
+
+export const CaseMetricsSchema = z.object({
+  evidenceRelevance: z.number().min(0).max(1),
+  citationSupportCoverage: z.number().min(0).max(1),
+  unsupportedClaimPenalty: z.number().min(0).max(1),
+  compositeScore: z.number().min(0).max(1),
+});
+export type CaseMetrics = z.infer<typeof CaseMetricsSchema>;
+
+export const StrategyCaseResultSchema = z.object({
+  caseId: z.string(),
+  strategy: RetrievalProofingStrategySchema,
+  retrievedDocIds: z.array(z.string()),
+  expectedEvidenceDocIds: z.array(z.string()),
+  metrics: CaseMetricsSchema,
+});
+export type StrategyCaseResult = z.infer<typeof StrategyCaseResultSchema>;
+
+export const StrategyAggregateSchema = z.object({
+  strategy: RetrievalProofingStrategySchema,
+  metrics: CaseMetricsSchema,
+});
+export type StrategyAggregate = z.infer<typeof StrategyAggregateSchema>;
+
+export const StrategyDeltaSchema = z.object({
+  baseline: z.enum(['lexical', 'vector']),
+  metricDeltas: z.object({
+    evidenceRelevance: z.number(),
+    citationSupportCoverage: z.number(),
+    unsupportedClaimPenalty: z.number(),
+    compositeScore: z.number(),
+  }),
+});
+export type StrategyDelta = z.infer<typeof StrategyDeltaSchema>;
+
+export const GateResultSchema = z.object({
+  passed: z.boolean(),
+  failures: z.array(z.string()),
+});
+export type GateResult = z.infer<typeof GateResultSchema>;
+
+export const RetrievalProofingReportSchema = z.object({
+  schemaVersion: z.literal('1.0'),
+  benchmark: z.object({
+    datasetName: z.string(),
+    datasetVersion: z.string(),
+  }),
+  profile: z.string(),
+  generatedAt: z.string(),
+  strategies: z.record(
+    RetrievalProofingStrategySchema,
+    z.object({
+      cases: z.array(StrategyCaseResultSchema),
+      aggregate: StrategyAggregateSchema,
+    })
+  ),
+  hybridDeltas: z.array(StrategyDeltaSchema),
+  gate: GateResultSchema,
+});
+export type RetrievalProofingReport = z.infer<typeof RetrievalProofingReportSchema>;
diff --git a/src/context/retrieval/proofing/scoring.ts b/src/context/retrieval/proofing/scoring.ts
new file mode 100644
index 0000000..790a032
--- /dev/null
+++ b/src/context/retrieval/proofing/scoring.ts
@@ -0,0 +1,84 @@
+import type { BenchmarkCase, CaseMetrics } from './schema';
+
+export function scoreCaseMetrics(input: {
+  benchmarkCase: BenchmarkCase;
+  retrievedDocIds: string[];
+}): CaseMetrics {
+  const { benchmarkCase, retrievedDocIds } = input;
+  const topK = benchmarkCase.topK;
+  const expectedSet = new Set(benchmarkCase.expectedEvidenceDocIds);
+  const top = retrievedDocIds.slice(0, topK);
+  const weightedHits = top.reduce((acc, docId, index) => {
+    if (!expectedSet.has(docId)) {
+      return acc;
+    }
+    return acc + (topK - index) / topK;
+  }, 0);
+  const weightDenominator = top.reduce((acc, _docId, index) => acc + (topK - index) / topK, 0);
+  const hits = top.filter((docId) => expectedSet.has(docId)).length;
+
+  const evidenceRelevance = divide(weightedHits, weightDenominator);
+  const citationSupportCoverage = divide(hits, benchmarkCase.expectedEvidenceDocIds.length);
+  const unsupportedClaimPenalty = divide(topK - hits, topK);
+  const compositeScore = clamp01(
+    evidenceRelevance * 0.45 + citationSupportCoverage * 0.45 + (1 - unsupportedClaimPenalty) * 0.1
+  );
+
+  return {
+    evidenceRelevance,
+    citationSupportCoverage,
+    unsupportedClaimPenalty,
+    compositeScore,
+  };
+}
+
+export function averageCaseMetrics(metrics: CaseMetrics[]): CaseMetrics {
+  if (metrics.length === 0) {
+    return {
+      evidenceRelevance: 0,
+      citationSupportCoverage: 0,
+      unsupportedClaimPenalty: 1,
+      compositeScore: 0,
+    };
+  }
+
+  return {
+    evidenceRelevance: average(metrics.map((metric) => metric.evidenceRelevance)),
+    citationSupportCoverage: average(metrics.map((metric) => metric.citationSupportCoverage)),
+    unsupportedClaimPenalty: average(metrics.map((metric) => metric.unsupportedClaimPenalty)),
+    compositeScore: average(metrics.map((metric) => metric.compositeScore)),
+  };
+}
+
+export function subtractMetrics(a: CaseMetrics, b: CaseMetrics): CaseMetrics {
+  return {
+    evidenceRelevance: a.evidenceRelevance - b.evidenceRelevance,
+    citationSupportCoverage: a.citationSupportCoverage - b.citationSupportCoverage,
+    unsupportedClaimPenalty: a.unsupportedClaimPenalty - b.unsupportedClaimPenalty,
+    compositeScore: a.compositeScore - b.compositeScore,
+  };
+}
+
+function average(values: number[]): number {
+  if (values.length === 0) {
+    return 0;
+  }
+  return values.reduce((acc, value) => acc + value, 0) / values.length;
+}
+
+function divide(num: number, den: number): number {
+  if (den <= 0) {
+    return 0;
+  }
+  return num / den;
+}
+
+function clamp01(value: number): number {
+  if (value <= 0) {
+    return 0;
+  }
+  if (value >= 1) {
+    return 1;
+  }
+  return value;
+}
diff --git a/tests/retrieval-proofing.test.ts b/tests/retrieval-proofing.test.ts
new file mode 100644
index 0000000..66333fb
--- /dev/null
+++ b/tests/retrieval-proofing.test.ts
@@ -0,0 +1,90 @@
+import { mkdtemp, readFile, writeFile } from 'node:fs/promises';
+import { tmpdir } from 'node:os';
+import { join, resolve } from 'node:path';
+import { describe, expect, it } from 'vitest';
+import { runRetrievalProofing } from '../src/context/retrieval/proofing/runner';
+import { RetrievalProofingReportSchema } from '../src/context/retrieval/proofing/schema';
+
+const benchmarkPath = resolve(process.cwd(), 'benchmarks/retrieval-proofing/benchmark.v1.json');
+const profilesPath = resolve(process.cwd(), 'benchmarks/retrieval-proofing/profiles.v1.json');
+
+describe('retrieval proofing', () => {
+  it('produces deterministic scoring for fixed benchmark/profile inputs', async () => {
+    const first = await runRetrievalProofing({
+      benchmarkPath,
+      profilesPath,
+      profileName: 'smoke',
+    });
+    const second = await runRetrievalProofing({
+      benchmarkPath,
+      profilesPath,
+      profileName: 'smoke',
+    });
+
+    expect(first.generatedAt).not.toEqual(second.generatedAt);
+    expect({ ...first, generatedAt: 'fixed' }).toEqual({ ...second, generatedAt: 'fixed' });
+  });
+
+  it('keeps JSON report schema stable and parseable', async () => {
+    const report = await runRetrievalProofing({
+      benchmarkPath,
+      profilesPath,
+      profileName: 'smoke',
+    });
+
+    const parsed = RetrievalProofingReportSchema.parse(report);
+
+    expect(parsed.schemaVersion).toBe('1.0');
+    expect(Object.keys(parsed.strategies)).toEqual(['lexical', 'vector', 'hybrid']);
+    expect(parsed.strategies.hybrid.cases.length).toBeGreaterThan(0);
+    expect(typeof parsed.gate.passed).toBe('boolean');
+  });
+
+  it('reports gate pass/fail based on configured thresholds', async () => {
+    const passReport = await runRetrievalProofing({
+      benchmarkPath,
+      profilesPath,
+      profileName: 'smoke',
+    });
+    expect(passReport.gate.passed).toBe(true);
+
+    const tempDir = await mkdtemp(join(tmpdir(), 'retrieval-proofing-'));
+    const strictProfilesPath = join(tempDir, 'profiles.strict.json');
+    const baseProfiles = JSON.parse(await readFile(profilesPath, 'utf8')) as {
+      version: string;
+      profiles: Record<string, unknown>;
+    };
+
+    const strictProfiles = {
+      ...baseProfiles,
+      profiles: {
+        ...baseProfiles.profiles,
+        smoke: {
+          description: 'strict gate for failure test',
+          thresholds: {
+            hybridMinimums: {
+              evidenceRelevance: 0.99,
+              citationSupportCoverage: 0.99,
+              compositeScore: 0.99,
+              maxUnsupportedClaimPenalty: 0.01,
+            },
+            baselineDeltaFloors: {
+              lexical: 0.2,
+              vector: 0.2,
+            },
+          },
+        },
+      },
+    };
+    await writeFile(strictProfilesPath, `${JSON.stringify(strictProfiles, null, 2)}\n`, 'utf8');
+
+    const failReport = await runRetrievalProofing({
+      benchmarkPath,
+      profilesPath: strictProfilesPath,
+      profileName: 'smoke',
+    });
+
+    expect(failReport.gate.passed).toBe(false);
+    expect(failReport.gate.failures.length).toBeGreaterThan(0);
+  });
+});

From c3c28895ba80c6fb1194462848e6998111a0bee2 Mon Sep 17 00:00:00 2001
From: Daniel Wise <io.dwise@gmail.com>
Date: Tue, 3 Mar 2026 19:06:24 -0800
Subject: [PATCH 2/3] feat(embedding): add strategy-based fallback engine with
 provenance

- add typed embedding strategy config, validation, and deterministic resolution
- add anthropic native-first fallback policy with failure-category control
- add embedding provenance envelope and persist provenance on chunk embeddings
- surface provenance in retrieval metadata and optional provenance logging
- add migration runner support for sequential SQL migrations and provenance column
- add conformance/policy/provenance tests and rollout documentation
- archive embedding-parity-hardening change and sync new capability specs
---
 README.md                                     |   4 +
 docs/embedding-strategy-rollout.md            |  31 +++
 .../.openspec.yaml                            |   0
 .../design.md                                 |   0
 .../proposal.md                               |   0
 .../spec.md                                   |   0
 .../embedding-strategy-configuration/spec.md  |   0
 .../tasks.md                                  |  26 +-
 .../spec.md                                   |  38 +++
 .../embedding-strategy-configuration/spec.md  |  27 ++
 src/cli/runtime.ts                            |   3 +
 src/context/embedding/config.ts               |  77 ++++++
 src/context/embedding/engine.ts               | 173 ++++++++++++
 src/context/embedding/strategy.ts             | 185 +++++++++++++
 src/context/indexer/full-index.ts             | 104 +++++--
 src/context/retrieval/hybrid.ts               |   8 +-
 src/daemon/main.ts                            |   2 +
 src/db/migrate.ts                             |  30 +-
 .../migrations/0002_embedding_provenance.sql  |   3 +
 tests/embedding-strategy.test.ts              | 259 ++++++++++++++++++
 20 files changed, 929 insertions(+), 41 deletions(-)
 create mode 100644 docs/embedding-strategy-rollout.md
 rename openspec/changes/{embedding-parity-hardening => archive/2026-03-04-embedding-parity-hardening}/.openspec.yaml (100%)
 rename openspec/changes/{embedding-parity-hardening => archive/2026-03-04-embedding-parity-hardening}/design.md (100%)
 rename openspec/changes/{embedding-parity-hardening => archive/2026-03-04-embedding-parity-hardening}/proposal.md (100%)
 rename openspec/changes/{embedding-parity-hardening => archive/2026-03-04-embedding-parity-hardening}/specs/anthropic-embedding-fallback-and-provenance/spec.md (100%)
 rename openspec/changes/{embedding-parity-hardening => archive/2026-03-04-embedding-parity-hardening}/specs/embedding-strategy-configuration/spec.md (100%)
 rename openspec/changes/{embedding-parity-hardening => archive/2026-03-04-embedding-parity-hardening}/tasks.md (50%)
 create mode 100644 openspec/specs/anthropic-embedding-fallback-and-provenance/spec.md
 create mode 100644 openspec/specs/embedding-strategy-configuration/spec.md
 create mode 100644 src/context/embedding/config.ts
 create mode 100644 src/context/embedding/engine.ts
 create mode 100644 src/context/embedding/strategy.ts
 create mode 100644 src/db/migrations/0002_embedding_provenance.sql
 create mode 100644 tests/embedding-strategy.test.ts

diff --git a/README.md b/README.md
index 1da25f9..e0c0a62 100644
--- a/README.md
+++ b/README.md
@@ -90,9 +90,13 @@ Environment variables (BYOK):
 - `DUBSBOT_ANTHROPIC_MODEL`
 - `DUBSBOT_GOOGLE_MODEL` (defaults to `gemini-3.1-pro-preview`)
 - `DUBSBOT_OTEL_ENABLED=1` to enable telemetry export hooks
+- `DUBSBOT_EMBEDDING_STRATEGY_V2=1` to enable explicit embedding strategy resolution/fallback
+- `DUBSBOT_EMBEDDING_STRATEGY_CONFIG_JSON` to provide explicit strategy config
+- `DUBSBOT_EMBEDDING_PROVENANCE_LOG=1` to emit embedding provenance log lines
 
 ## Notes
 
 - Anthropic embeddings currently fall back to deterministic local vectors.
 - This project intentionally uses Biome only (no ESLint/Prettier).
 - Retrieval proofing benchmark schema/workflow docs: `docs/retrieval-proofing-benchmark-schema.md` and `docs/retrieval-proofing.md`.
+- Embedding strategy rollout guide: `docs/embedding-strategy-rollout.md`.
diff --git a/docs/embedding-strategy-rollout.md b/docs/embedding-strategy-rollout.md
new file mode 100644
index 0000000..e8f983a
--- /dev/null
+++ b/docs/embedding-strategy-rollout.md
@@ -0,0 +1,31 @@
+# Embedding Strategy V2 Rollout
+
+This rollout gates the explicit embedding strategy engine behind:
+
+- `DUBSBOT_EMBEDDING_STRATEGY_V2=1`
+
+Optional config override:
+
+- `DUBSBOT_EMBEDDING_STRATEGY_CONFIG_JSON` (JSON string matching schema version `1.0`)
+
+Optional provenance logging:
+
+- `DUBSBOT_EMBEDDING_PROVENANCE_LOG=1`
+
+## Enable (staged)
+
+1. Set `DUBSBOT_EMBEDDING_STRATEGY_V2=1` in a non-production environment.
+2. Start with default legacy-mapped config (no custom JSON).
+3. Run indexing and retrieval checks.
+4. If needed, provide explicit strategy JSON to control Anthropic fallback paths.
+5. Verify fallback/provenance behavior with tests:
+   - `pnpm test -- embedding-strategy`
+
+## Rollback
+
+1. Unset or set `DUBSBOT_EMBEDDING_STRATEGY_V2=0`.
+2. Restart CLI/daemon processes.
+3. System returns to legacy embedding execution path.
+
+Rollback is safe because provenance fields are additive and read-compatible.
+
diff --git a/openspec/changes/embedding-parity-hardening/.openspec.yaml b/openspec/changes/archive/2026-03-04-embedding-parity-hardening/.openspec.yaml
similarity index 100%
rename from openspec/changes/embedding-parity-hardening/.openspec.yaml
rename to openspec/changes/archive/2026-03-04-embedding-parity-hardening/.openspec.yaml
diff --git a/openspec/changes/embedding-parity-hardening/design.md b/openspec/changes/archive/2026-03-04-embedding-parity-hardening/design.md
similarity index 100%
rename from openspec/changes/embedding-parity-hardening/design.md
rename to openspec/changes/archive/2026-03-04-embedding-parity-hardening/design.md
diff --git a/openspec/changes/embedding-parity-hardening/proposal.md b/openspec/changes/archive/2026-03-04-embedding-parity-hardening/proposal.md
similarity index 100%
rename from openspec/changes/embedding-parity-hardening/proposal.md
rename to openspec/changes/archive/2026-03-04-embedding-parity-hardening/proposal.md
diff --git a/openspec/changes/embedding-parity-hardening/specs/anthropic-embedding-fallback-and-provenance/spec.md b/openspec/changes/archive/2026-03-04-embedding-parity-hardening/specs/anthropic-embedding-fallback-and-provenance/spec.md
similarity index 100%
rename from openspec/changes/embedding-parity-hardening/specs/anthropic-embedding-fallback-and-provenance/spec.md
rename to openspec/changes/archive/2026-03-04-embedding-parity-hardening/specs/anthropic-embedding-fallback-and-provenance/spec.md
diff --git a/openspec/changes/embedding-parity-hardening/specs/embedding-strategy-configuration/spec.md b/openspec/changes/archive/2026-03-04-embedding-parity-hardening/specs/embedding-strategy-configuration/spec.md
similarity index 100%
rename from openspec/changes/embedding-parity-hardening/specs/embedding-strategy-configuration/spec.md
rename to openspec/changes/archive/2026-03-04-embedding-parity-hardening/specs/embedding-strategy-configuration/spec.md
diff --git a/openspec/changes/embedding-parity-hardening/tasks.md b/openspec/changes/archive/2026-03-04-embedding-parity-hardening/tasks.md
similarity index 50%
rename from openspec/changes/embedding-parity-hardening/tasks.md
rename to openspec/changes/archive/2026-03-04-embedding-parity-hardening/tasks.md
index 533c582..a3b7e6f 100644
--- a/openspec/changes/embedding-parity-hardening/tasks.md
+++ b/openspec/changes/archive/2026-03-04-embedding-parity-hardening/tasks.md
@@ -1,24 +1,24 @@
 ## 1. Strategy Configuration Foundation
 
-- [ ] 1.1 Introduce typed `embeddingStrategy` config schema with provider/model primary and ordered fallback entries
-- [ ] 1.2 Add startup validation for unknown providers, missing models, and cyclic fallback paths with structured errors
-- [ ] 1.3 Add backward-compatible default mapping from legacy embedding settings to explicit strategy definitions
+- [x] 1.1 Introduce typed `embeddingStrategy` config schema with provider/model primary and ordered fallback entries
+- [x] 1.2 Add startup validation for unknown providers, missing models, and cyclic fallback paths with structured errors
+- [x] 1.3 Add backward-compatible default mapping from legacy embedding settings to explicit strategy definitions
 
 ## 2. Runtime Strategy Resolution and Anthropic Policy
 
-- [ ] 2.1 Implement deterministic strategy resolver that requires a valid strategy id for each embedding request
-- [ ] 2.2 Implement Anthropic native-first execution path with explicit fallback eligibility based on configured failure categories
-- [ ] 2.3 Enforce configured fallback order and terminal failure behavior when fallback is disallowed or exhausted
+- [x] 2.1 Implement deterministic strategy resolver that requires a valid strategy id for each embedding request
+- [x] 2.2 Implement Anthropic native-first execution path with explicit fallback eligibility based on configured failure categories
+- [x] 2.3 Enforce configured fallback order and terminal failure behavior when fallback is disallowed or exhausted
 
 ## 3. Provenance Envelope and Data Plumbing
 
-- [ ] 3.1 Define a normalized embedding result envelope including strategy id, provider/model attempt path, fallback state, and failure category
-- [ ] 3.2 Propagate provenance fields through indexing writes and retrieval/query responses
-- [ ] 3.3 Update logging/metrics hooks to include provenance identifiers for debugging and parity analysis
+- [x] 3.1 Define a normalized embedding result envelope including strategy id, provider/model attempt path, fallback state, and failure category
+- [x] 3.2 Propagate provenance fields through indexing writes and retrieval/query responses
+- [x] 3.3 Update logging/metrics hooks to include provenance identifiers for debugging and parity analysis
 
 ## 4. Verification and Rollout Safety
 
-- [ ] 4.1 Add conformance tests for valid/invalid strategy config loading and runtime resolution behavior
-- [ ] 4.2 Add Anthropic policy tests covering success, non-fallbackable failures, fallbackable failures, and no-fallback scenarios
-- [ ] 4.3 Add provenance completeness tests for both successful and terminal-failure embedding outcomes
-- [ ] 4.4 Gate rollout behind a feature flag and document enable/rollback procedure for staged deployment
+- [x] 4.1 Add conformance tests for valid/invalid strategy config loading and runtime resolution behavior
+- [x] 4.2 Add Anthropic policy tests covering success, non-fallbackable failures, fallbackable failures, and no-fallback scenarios
+- [x] 4.3 Add provenance completeness tests for both successful and terminal-failure embedding outcomes
+- [x] 4.4 Gate rollout behind a feature flag and document enable/rollback procedure for staged deployment
diff --git a/openspec/specs/anthropic-embedding-fallback-and-provenance/spec.md b/openspec/specs/anthropic-embedding-fallback-and-provenance/spec.md
new file mode 100644
index 0000000..e34f9d8
--- /dev/null
+++ b/openspec/specs/anthropic-embedding-fallback-and-provenance/spec.md
@@ -0,0 +1,38 @@
+# anthropic-embedding-fallback-and-provenance Specification
+
+## Purpose
+TBD - created by archiving change embedding-parity-hardening. Update Purpose after archive.
+## Requirements
+### Requirement: Anthropic Native-First Execution Policy
+For strategies configured with Anthropic as primary, the system SHALL attempt Anthropic native embedding first and SHALL only consider fallback providers explicitly listed in that strategy.
+
+#### Scenario: Anthropic primary succeeds
+- **WHEN** a request resolves to a strategy with Anthropic as primary and Anthropic returns embeddings successfully
+- **THEN** the system returns the Anthropic embedding result without invoking fallback providers
+
+#### Scenario: Anthropic primary fails with non-fallbackable error
+- **WHEN** Anthropic returns an error outside configured fallbackable categories
+- **THEN** the system returns a terminal embedding error and MUST NOT invoke fallback providers
+
+### Requirement: Controlled Anthropic Fallback Behavior
+The system SHALL invoke fallback providers for Anthropic strategies only for configured fallbackable failure categories and in configured fallback order.
+
+#### Scenario: Fallback is invoked in configured order
+- **WHEN** Anthropic primary fails with a fallbackable error category and fallback providers are configured
+- **THEN** the system attempts fallback providers sequentially in strategy order until one succeeds or all fail
+
+#### Scenario: No fallback configured
+- **WHEN** Anthropic primary fails with a fallbackable error category but no fallback providers are configured
+- **THEN** the system returns a structured failure indicating no fallback path was available
+
+### Requirement: Embedding Provenance Metadata
+The system SHALL attach provenance metadata to every embedding result and terminal failure outcome, including strategy id, attempt provider/model path, and fallback usage state.
+
+#### Scenario: Provenance is emitted on success
+- **WHEN** any provider successfully returns embeddings
+- **THEN** the result includes provenance fields for strategy id, resolved provider/model, attempt path, and whether fallback was used
+
+#### Scenario: Provenance is emitted on terminal failure
+- **WHEN** all attempts fail or fallback is disallowed
+- **THEN** the error payload includes provenance fields for attempted providers/models, failure category, and terminal resolution reason
+
diff --git a/openspec/specs/embedding-strategy-configuration/spec.md b/openspec/specs/embedding-strategy-configuration/spec.md
new file mode 100644
index 0000000..3d34504
--- /dev/null
+++ b/openspec/specs/embedding-strategy-configuration/spec.md
@@ -0,0 +1,27 @@
+# embedding-strategy-configuration Specification
+
+## Purpose
+TBD - created by archiving change embedding-parity-hardening. Update Purpose after archive.
+## Requirements
+### Requirement: Provider-Configurable Embedding Strategy
+The system SHALL support explicit embedding strategy configuration per embedding use-case, including primary provider/model selection and an ordered fallback list.
+
+#### Scenario: Valid strategy is loaded
+- **WHEN** the service starts with a strategy configuration where each strategy has a primary provider/model and valid fallback entries
+- **THEN** the system initializes successfully and registers the strategy for runtime resolution
+
+#### Scenario: Invalid strategy is rejected
+- **WHEN** the service starts with a strategy configuration that references an unknown provider, missing model, or cyclic fallback path
+- **THEN** the system MUST fail validation and return a configuration error that identifies the invalid strategy entry
+
+### Requirement: Deterministic Runtime Strategy Resolution
+The system SHALL resolve embedding strategies deterministically for each request using the configured strategy identifier and SHALL NOT use implicit provider defaults.
+
+#### Scenario: Strategy id resolves to configured primary
+- **WHEN** an embedding request specifies a known strategy id
+- **THEN** the system uses the configured primary provider/model for the first execution attempt
+
+#### Scenario: Unknown strategy id is rejected
+- **WHEN** an embedding request specifies a strategy id not present in configuration
+- **THEN** the system returns a structured error and MUST NOT attempt embedding generation
+
diff --git a/src/cli/runtime.ts b/src/cli/runtime.ts
index d1528a6..7b45964 100644
--- a/src/cli/runtime.ts
+++ b/src/cli/runtime.ts
@@ -1,5 +1,6 @@
 import { AgentOrchestrator } from '../agent/orchestrator';
 import { loadAgentsConfig } from '../config/agents-loader';
+import { loadEmbeddingStrategyConfig } from '../context/embedding/config';
 import { createDb } from '../db/client';
 import { runMigrations } from '../db/migrate';
 import { OptionalOtelExporter } from '../observability/otel';
@@ -13,6 +14,7 @@ import { ToolRegistry } from '../tools/registry';
 export async function createRuntime() {
   await runMigrations();
   const db = await createDb();
+  const embeddingStrategyConfig = loadEmbeddingStrategyConfig();
   const agentsConfig = await loadAgentsConfig(process.cwd());
   const provider = createProviderAdapter(detectProvider());
   const policyEngine = new DefaultPolicyEngine(createDefaultApprovalPolicy());
@@ -24,6 +26,7 @@ export async function createRuntime() {
 
   return {
     db,
+    embeddingStrategyConfig,
     provider,
     policyEngine,
     orchestrator,
diff --git a/src/context/embedding/config.ts b/src/context/embedding/config.ts
new file mode 100644
index 0000000..83b713e
--- /dev/null
+++ b/src/context/embedding/config.ts
@@ -0,0 +1,77 @@
+import { detectProvider, type ProviderName } from '../../providers';
+import {
+  type EmbeddingStrategyConfig,
+  EmbeddingStrategyConfigError,
+  parseEmbeddingStrategyConfig,
+} from './strategy';
+
+export function loadEmbeddingStrategyConfig(): EmbeddingStrategyConfig {
+  const rawFromEnv = process.env.DUBSBOT_EMBEDDING_STRATEGY_CONFIG_JSON;
+  const raw = rawFromEnv ? JSON.parse(rawFromEnv) : buildLegacyDefaultConfig();
+  const parsed = parseEmbeddingStrategyConfig(raw);
+  if (!parsed.config) {
+    throw new EmbeddingStrategyConfigError(parsed.issues);
+  }
+  return parsed.config;
+}
+
+export function isEmbeddingStrategyV2Enabled(): boolean {
+  return process.env.DUBSBOT_EMBEDDING_STRATEGY_V2 === '1';
+}
+
+function buildLegacyDefaultConfig(): EmbeddingStrategyConfig {
+  const primaryProvider = detectProvider();
+  const primary = toPrimaryStrategy(primaryProvider, 'default-primary');
+  const strategies = [primary];
+
+  if (primaryProvider === 'anthropic') {
+    strategies.push(toPrimaryStrategy('openai', 'fallback-openai'));
+    strategies.push(toPrimaryStrategy('google', 'fallback-google'));
+    primary.fallback = [
+      {
+        strategyId: 'fallback-openai',
+        onFailure: ['rate_limit', 'timeout', 'service_unavailable'],
+      },
+      {
+        strategyId: 'fallback-google',
+        onFailure: ['rate_limit', 'timeout', 'service_unavailable'],
+      },
+    ];
+  }
+
+  return {
+    version: '1.0',
+    defaults: {
+      indexing: 'default-primary',
+      query: 'default-primary',
+    },
+    strategies,
+  };
+}
+
+function toPrimaryStrategy(provider: ProviderName, id: string) {
+  return {
+    id,
+    provider,
+    model: defaultEmbeddingModel(provider),
+    fallback: [] as Array<{
+      strategyId: string;
+      onFailure: Array<
+        'rate_limit' | 'timeout' | 'service_unavailable' | 'auth' | 'invalid_request' | 'unknown'
+      >;
+    }>,
+  };
+}
+
+function defaultEmbeddingModel(provider: ProviderName): string {
+  switch (provider) {
+    case 'openai':
+      return process.env.DUBSBOT_OPENAI_EMBEDDING_MODEL ?? 'text-embedding-3-small';
+    case 'google':
+      return process.env.DUBSBOT_GOOGLE_EMBEDDING_MODEL ?? 'text-embedding-004';
+    case 'anthropic':
+      return process.env.DUBSBOT_ANTHROPIC_EMBEDDING_MODEL ?? 'deterministic-v1';
+    default:
+      return 'deterministic-v1';
+  }
+}
diff --git a/src/context/embedding/engine.ts b/src/context/embedding/engine.ts
new file mode 100644
index 0000000..9159281
--- /dev/null
+++ b/src/context/embedding/engine.ts
@@ -0,0 +1,173 @@
+import type { ProviderAdapter } from '../../providers/types';
+import {
+  type EmbeddingStrategyConfig,
+  type FailureCategory,
+  resolveEmbeddingStrategy,
+} from './strategy';
+
+export type EmbeddingAttempt = {
+  strategyId: string;
+  provider: string;
+  model: string;
+  status: 'success' | 'failure';
+  failureCategory?: FailureCategory;
+};
+
+export type EmbeddingProvenance = {
+  strategyId: string;
+  attemptPath: EmbeddingAttempt[];
+  resolvedBy?: { strategyId: string; provider: string; model: string };
+  fallbackUsed: boolean;
+  failureCategory?: FailureCategory;
+  terminalReason?: 'fallback_disallowed' | 'fallback_exhausted' | 'no_fallback';
+};
+
+export type EmbeddingExecutionSuccess = {
+  ok: true;
+  embedding: number[];
+  provider: string;
+  model: string;
+  provenance: EmbeddingProvenance;
+};
+
+export type EmbeddingExecutionFailure = {
+  ok: false;
+  message: string;
+  provenance: EmbeddingProvenance;
+};
+
+export type EmbeddingExecutionResult = EmbeddingExecutionSuccess | EmbeddingExecutionFailure;
+
+export class EmbeddingExecutionError extends Error {
+  constructor(
+    message: string,
+    public readonly provenance: EmbeddingProvenance
+  ) {
+    super(message);
+    this.name = 'EmbeddingExecutionError';
+  }
+}
+
+export async function executeEmbeddingWithStrategy(input: {
+  config: EmbeddingStrategyConfig;
+  strategyId: string;
+  value: string;
+  adapterForProvider: (provider: string) => ProviderAdapter;
+}): Promise<EmbeddingExecutionResult> {
+  const attemptPath: EmbeddingAttempt[] = [];
+  const queue: string[] = [input.strategyId];
+  const visited = new Set<string>();
+  let fallbackUsed = false;
+
+  while (queue.length > 0) {
+    const currentId = queue.shift();
+    if (!currentId || visited.has(currentId)) {
+      continue;
+    }
+    visited.add(currentId);
+
+    const strategy = resolveEmbeddingStrategy(input.config, currentId);
+    const adapter = input.adapterForProvider(strategy.provider);
+
+    try {
+      const vectors = await adapter.embed({
+        model: strategy.model,
+        values: [input.value],
+      });
+      const vector = vectors[0] ?? [];
+      attemptPath.push({
+        strategyId: currentId,
+        provider: strategy.provider,
+        model: strategy.model,
+        status: 'success',
+      });
+      return {
+        ok: true,
+        embedding: vector,
+        provider: strategy.provider,
+        model: strategy.model,
+        provenance: {
+          strategyId: input.strategyId,
+          attemptPath,
+          fallbackUsed,
+          resolvedBy: {
+            strategyId: currentId,
+            provider: strategy.provider,
+            model: strategy.model,
+          },
+        },
+      };
+    } catch (error) {
+      const failureCategory = classifyEmbeddingFailure(error);
+      attemptPath.push({
+        strategyId: currentId,
+        provider: strategy.provider,
+        model: strategy.model,
+        status: 'failure',
+        failureCategory,
+      });
+
+      const eligibleFallback = strategy.fallback.find((entry) =>
+        entry.onFailure.includes(failureCategory)
+      );
+      if (!eligibleFallback) {
+        return {
+          ok: false,
+          message: `Embedding failed for strategy "${currentId}" with category "${failureCategory}" and no eligible fallback.`,
+          provenance: {
+            strategyId: input.strategyId,
+            attemptPath,
+            fallbackUsed,
+            failureCategory,
+            terminalReason: strategy.fallback.length > 0 ? 'fallback_disallowed' : 'no_fallback',
+          },
+        };
+      }
+
+      fallbackUsed = true;
+      queue.push(eligibleFallback.strategyId);
+    }
+  }
+
+  return {
+    ok: false,
+    message: `Embedding failed for strategy "${input.strategyId}" after exhausting fallback chain.`,
+    provenance: {
+      strategyId: input.strategyId,
+      attemptPath,
+      fallbackUsed,
+      failureCategory: attemptPath.at(-1)?.failureCategory,
+      terminalReason: 'fallback_exhausted',
+    },
+  };
+}
+
+export function classifyEmbeddingFailure(error: unknown): FailureCategory {
+  const message =
+    error instanceof Error ? error.message.toLowerCase() : String(error).toLowerCase();
+  if (message.includes('rate limit') || message.includes('429')) {
+    return 'rate_limit';
+  }
+  if (message.includes('timeout') || message.includes('timed out')) {
+    return 'timeout';
+  }
+  if (message.includes('503') || message.includes('unavailable')) {
+    return 'service_unavailable';
+  }
+  if (message.includes('401') || message.includes('403') || message.includes('auth')) {
+    return 'auth';
+  }
+  if (message.includes('400') || message.includes('invalid')) {
+    return 'invalid_request';
+  }
+  return 'unknown';
+}
+
+export function assertEmbeddingSuccess(
+  result: EmbeddingExecutionResult
+): EmbeddingExecutionSuccess {
+  if (result.ok) {
+    return result;
+  }
+  throw new EmbeddingExecutionError(result.message, result.provenance);
+}
diff --git a/src/context/embedding/strategy.ts b/src/context/embedding/strategy.ts
new file mode 100644
index 0000000..f3eac27
--- /dev/null
+++ b/src/context/embedding/strategy.ts
@@ -0,0 +1,185 @@
+import { z } from 'zod';
+import type { ProviderName } from '../../providers';
+
+export const FailureCategorySchema = z.enum([
+  'rate_limit',
+  'timeout',
+  'service_unavailable',
+  'auth',
+  'invalid_request',
+  'unknown',
+]);
+export type FailureCategory = z.infer<typeof FailureCategorySchema>;
+
+export const EmbeddingStrategyRefSchema = z.object({
+  strategyId: z.string().min(1),
+  onFailure: z.array(FailureCategorySchema).min(1),
+});
+export type EmbeddingStrategyRef = z.infer<typeof EmbeddingStrategyRefSchema>;
+
+export const EmbeddingStrategySchema = z.object({
+  id: z.string().min(1),
+  provider: z.enum(['openai', 'anthropic', 'google']),
+  model: z.string().min(1),
+  fallback: z.array(EmbeddingStrategyRefSchema).default([]),
+});
+export type EmbeddingStrategy = z.infer<typeof EmbeddingStrategySchema> & {
+  provider: ProviderName;
+};
+
+export const EmbeddingStrategyConfigSchema = z.object({
+  version: z.literal('1.0'),
+  defaults: z.object({
+    indexing: z.string().min(1),
+    query: z.string().min(1),
+  }),
+  strategies: z.array(EmbeddingStrategySchema).min(1),
+});
+export type EmbeddingStrategyConfig = z.infer<typeof EmbeddingStrategyConfigSchema>;
+
+export type EmbeddingStrategyValidationIssue = {
+  code:
+    | 'unknown_provider'
+    | 'missing_model'
+    | 'duplicate_strategy'
+    | 'unknown_fallback_strategy'
+    | 'cyclic_fallback_path'
+    | 'unknown_default_strategy';
+  strategyId?: string;
+  detail: string;
+};
+
+export class EmbeddingStrategyConfigError extends Error {
+  constructor(public readonly issues: EmbeddingStrategyValidationIssue[]) {
+    super(
+      `Invalid embedding strategy configuration: ${issues.map((issue) => issue.detail).join('; ')}`
+    );
+    this.name = 'EmbeddingStrategyConfigError';
+  }
+}
+
+export class EmbeddingStrategyResolutionError extends Error {
+  constructor(
+    public readonly strategyId: string,
+    public readonly reason: 'unknown_strategy'
+  ) {
+    super(`Unable to resolve embedding strategy "${strategyId}": ${reason}`);
+    this.name = 'EmbeddingStrategyResolutionError';
+  }
+}
+
+export function parseEmbeddingStrategyConfig(raw: unknown): {
+  config?: EmbeddingStrategyConfig;
+  issues: EmbeddingStrategyValidationIssue[];
+} {
+  const parsed = EmbeddingStrategyConfigSchema.safeParse(raw);
+  if (!parsed.success) {
+    return {
+      issues: parsed.error.issues.map((issue) => ({
+        code: issue.path.includes('provider') ? 'unknown_provider' : 'missing_model',
+        detail: `${issue.path.join('.') || '<root>'}: ${issue.message}`,
+      })),
+    };
+  }
+
+  const config = parsed.data;
+  const issues: EmbeddingStrategyValidationIssue[] = [];
+  const byId = new Map<string, EmbeddingStrategy>();
+  for (const strategy of config.strategies) {
+    if (byId.has(strategy.id)) {
+      issues.push({
+        code: 'duplicate_strategy',
+        strategyId: strategy.id,
+        detail: `Duplicate strategy id "${strategy.id}"`,
+      });
+      continue;
+    }
+    byId.set(strategy.id, strategy);
+
+    if (!strategy.model.trim()) {
+      issues.push({
+        code: 'missing_model',
+        strategyId: strategy.id,
+        detail: `Strategy "${strategy.id}" is missing model`,
+      });
+    }
+  }
+
+  for (const strategy of config.strategies) {
+    for (const fallback of strategy.fallback) {
+      if (!byId.has(fallback.strategyId)) {
+        issues.push({
+          code: 'unknown_fallback_strategy',
+          strategyId: strategy.id,
+          detail: `Strategy "${strategy.id}" references unknown fallback strategy "${fallback.strategyId}"`,
+        });
+      }
+    }
+  }
+
+  if (!byId.has(config.defaults.indexing)) {
+    issues.push({
+      code: 'unknown_default_strategy',
+      strategyId: config.defaults.indexing,
+      detail: `Default indexing strategy "${config.defaults.indexing}" does not exist`,
+    });
+  }
+  if (!byId.has(config.defaults.query)) {
+    issues.push({
+      code: 'unknown_default_strategy',
+      strategyId: config.defaults.query,
+      detail: `Default query strategy "${config.defaults.query}" does not exist`,
+    });
+  }
+
+  const visited = new Set<string>();
+  const inStack = new Set<string>();
+  const path: string[] = [];
+
+  function walk(strategyId: string) {
+    if (inStack.has(strategyId)) {
+      const cycleStart = path.indexOf(strategyId);
+      const cycle = [...path.slice(cycleStart), strategyId].join(' -> ');
+      issues.push({
+        code: 'cyclic_fallback_path',
+        strategyId,
+        detail: `Cyclic fallback path detected: ${cycle}`,
+      });
+      return;
+    }
+    if (visited.has(strategyId)) {
+      return;
+    }
+
+    visited.add(strategyId);
+    inStack.add(strategyId);
+    path.push(strategyId);
+    const strategy = byId.get(strategyId);
+    if (strategy) {
+      for (const fallback of strategy.fallback) {
+        if (byId.has(fallback.strategyId)) {
+          walk(fallback.strategyId);
+        }
+      }
+    }
+    path.pop();
+    inStack.delete(strategyId);
+  }
+
+  for (const strategy of config.strategies) {
+    walk(strategy.id);
+  }
+
+  return issues.length > 0 ? { issues } : { config, issues: [] };
+}
+
+export function resolveEmbeddingStrategy(
+  config: EmbeddingStrategyConfig,
+  strategyId: string
+): EmbeddingStrategy {
+  const strategy = config.strategies.find((entry) => entry.id === strategyId);
+  if (!strategy) {
+    throw new EmbeddingStrategyResolutionError(strategyId, 'unknown_strategy');
+  }
+  return strategy as EmbeddingStrategy;
+}
diff --git a/src/context/indexer/full-index.ts b/src/context/indexer/full-index.ts
index 1e1c76e..9bd2390 100644
--- a/src/context/indexer/full-index.ts
+++ b/src/context/indexer/full-index.ts
@@ -2,7 +2,14 @@ import { createHash, randomUUID } from 'node:crypto';
 import { readFile } from 'node:fs/promises';
 import fg from 'fast-glob';
 import type { DubsbotDb } from '../../db/client';
+import { createProviderAdapter } from '../../providers';
 import type { ProviderAdapter } from '../../providers/types';
+import { isEmbeddingStrategyV2Enabled, loadEmbeddingStrategyConfig } from '../embedding/config';
+import {
+  assertEmbeddingSuccess,
+  type EmbeddingProvenance,
+  executeEmbeddingWithStrategy,
+} from '../embedding/engine';
 import { deterministicEmbedding } from '../retrieval/rerank';
 
 type Chunk = {
@@ -57,6 +64,7 @@ export async function runFullIndex(input: {
   repoRoot: string;
   embedProvider?: ProviderAdapter;
   embeddingModel?: string;
+  embeddingStrategyId?: string;
 }): Promise<{ filesIndexed: number; chunksIndexed: number }> {
   const paths = await fg(['**/*', '!node_modules/**', '!.git/**', '!dist/**', '!coverage/**'], {
     cwd: input.repoRoot,
@@ -68,6 +76,20 @@ export async function runFullIndex(input: {
   let filesIndexed = 0;
   let chunksIndexed = 0;
 
+  const isStrategyV2 = isEmbeddingStrategyV2Enabled();
+  const strategyConfig = isStrategyV2 ? loadEmbeddingStrategyConfig() : null;
+  const adapterCache = new Map<string, ProviderAdapter>();
+
+  function getAdapter(provider: string): ProviderAdapter {
+    const cached = adapterCache.get(provider);
+    if (cached) {
+      return cached;
+    }
+    const adapter = createProviderAdapter(provider as 'openai' | 'anthropic' | 'google');
+    adapterCache.set(provider, adapter);
+    return adapter;
+  }
+
   for (const relativePath of paths) {
     const absolutePath = `${input.repoRoot}/${relativePath}`;
     const content = await readFile(absolutePath, 'utf8').catch(() => null);
@@ -104,26 +126,58 @@ export async function runFullIndex(input: {
         [chunkId, persistedFileId, chunk.index, chunk.content, chunk.startLine, chunk.endLine]
       );
 
-      const embedding =
-        input.embedProvider != null
-          ? (
-              await input.embedProvider.embed({
-                model: input.embeddingModel ?? 'text-embedding-3-small',
-                values: [chunk.content],
-              })
-            )[0]
-          : deterministicEmbedding(chunk.content);
+      let embedding: number[];
+      let provider = input.embedProvider ? 'remote' : 'local';
+      let model = input.embeddingModel ?? 'deterministic-v1';
+      let provenance: EmbeddingProvenance = {
+        strategyId: 'legacy-default',
+        attemptPath: [
+          {
+            strategyId: 'legacy-default',
+            provider,
+            model,
+            status: 'success',
+          },
+        ],
+        fallbackUsed: false,
+        resolvedBy: {
+          strategyId: 'legacy-default',
+          provider,
+          model,
+        },
+      };
+
+      if (isStrategyV2 && strategyConfig) {
+        const strategyId = input.embeddingStrategyId ?? strategyConfig.defaults.indexing;
+        const result = await executeEmbeddingWithStrategy({
+          config: strategyConfig,
+          strategyId,
+          value: chunk.content,
+          adapterForProvider: getAdapter,
+        });
+        const success = assertEmbeddingSuccess(result);
+        embedding = success.embedding;
+        provider = success.provider;
+        model = success.model;
+        provenance = success.provenance;
+        emitEmbeddingTelemetry(success.provenance);
+      } else {
+        embedding =
+          input.embedProvider != null
+            ? (
+                await input.embedProvider.embed({
+                  model: input.embeddingModel ?? 'text-embedding-3-small',
+                  values: [chunk.content],
+                })
+              )[0]
+            : deterministicEmbedding(chunk.content);
+      }
 
       await input.db.query(
-        `INSERT INTO chunk_embeddings (chunk_id, provider, model, embedding)
-         VALUES ($1, $2, $3, $4::jsonb)
-         ON CONFLICT (chunk_id) DO UPDATE SET provider = EXCLUDED.provider, model = EXCLUDED.model, embedding = EXCLUDED.embedding`,
-        [
-          chunkId,
-          input.embedProvider ? 'remote' : 'local',
-          input.embeddingModel ?? 'deterministic-v1',
-          JSON.stringify(embedding),
-        ]
+        `INSERT INTO chunk_embeddings (chunk_id, provider, model, embedding, provenance)
+         VALUES ($1, $2, $3, $4::jsonb, $5::jsonb)
+         ON CONFLICT (chunk_id) DO UPDATE SET provider = EXCLUDED.provider, model = EXCLUDED.model, embedding = EXCLUDED.embedding, provenance = EXCLUDED.provenance`,
+        [chunkId, provider, model, JSON.stringify(embedding), JSON.stringify(provenance)]
       );
 
       await input.db.query('INSERT INTO bm25_documents (id, chunk_id, body) VALUES ($1, $2, $3)', [
@@ -136,3 +190,17 @@ export async function runFullIndex(input: {
 
   return { filesIndexed, chunksIndexed };
 }
+
+function emitEmbeddingTelemetry(provenance: EmbeddingProvenance): void {
+  if (process.env.DUBSBOT_EMBEDDING_PROVENANCE_LOG !== '1') {
+    return;
+  }
+  const resolved = provenance.resolvedBy
+    ? `${provenance.resolvedBy.provider}:${provenance.resolvedBy.model}`
+    : 'none';
+  console.info(
+    `[embedding] strategy=${provenance.strategyId} resolved=${resolved} fallback=${provenance.fallbackUsed} attempts=${provenance.attemptPath
+      .map((attempt) => `${attempt.provider}:${attempt.model}:${attempt.status}`)
+      .join('>')}`
+  );
+}
diff --git a/src/context/retrieval/hybrid.ts b/src/context/retrieval/hybrid.ts
index f052d21..1ea6c75 100644
--- a/src/context/retrieval/hybrid.ts
+++ b/src/context/retrieval/hybrid.ts
@@ -9,6 +9,9 @@ type ChunkRow = {
   content: string;
   path: string;
   embedding: string | null;
+  provider: string | null;
+  model: string | null;
+  provenance: string | null;
 };
 
 async function grepSearch(
@@ -61,7 +64,7 @@ export async function runHybridRetrieval(input: {
   const queryVector = deterministicEmbedding(query.vectorQuery || query.lexicalQuery);
 
   const rows = await input.db.query<ChunkRow>(
-    `SELECT c.id, c.content, f.path, ce.embedding::text as embedding
+    `SELECT c.id, c.content, f.path, ce.embedding::text as embedding, ce.provider, ce.model, ce.provenance::text as provenance
      FROM chunks c
      JOIN files f ON f.id = c.file_id
      LEFT JOIN chunk_embeddings ce ON ce.chunk_id = c.id
@@ -104,6 +107,9 @@ export async function runHybridRetrieval(input: {
     score: entry.totalScore,
     metadata: {
       path: entry.item.path,
+      provider: entry.item.provider ?? 'unknown',
+      model: entry.item.model ?? 'unknown',
+      embeddingProvenance: entry.item.provenance ? JSON.parse(entry.item.provenance) : null,
       lexicalScore: entry.lexicalScore,
       vectorScore: entry.vectorScore,
       rank: index + 1,
diff --git a/src/daemon/main.ts b/src/daemon/main.ts
index 01c535b..33e226b 100644
--- a/src/daemon/main.ts
+++ b/src/daemon/main.ts
@@ -3,6 +3,7 @@ import { EventHookRunner } from '../automation/event-hooks';
 import { AutomationRunner } from '../automation/runner';
 import { AutomationScheduler } from '../automation/scheduler';
 import { loadAgentsConfig } from '../config/agents-loader';
+import { loadEmbeddingStrategyConfig } from '../context/embedding/config';
 import { RepoFsWatcher } from '../context/fs-watcher';
 import { GitWatcher } from '../context/git-watcher';
 import { runIncrementalIndex } from '../context/indexer/incremental';
@@ -15,6 +16,7 @@ import { createProviderAdapter, detectProvider } from '../providers';
 async function main(): Promise<void> {
   await runMigrations();
   const db = await createDb();
+  loadEmbeddingStrategyConfig();
   const provider = createProviderAdapter(detectProvider());
   const policy = new DefaultPolicyEngine(createDefaultApprovalPolicy());
   const orchestrator = new AgentOrchestrator({ provider, policyEngine: policy });
diff --git a/src/db/migrate.ts b/src/db/migrate.ts
index 397be47..e58a85c 100644
--- a/src/db/migrate.ts
+++ b/src/db/migrate.ts
@@ -1,5 +1,6 @@
 import { readFile } from 'node:fs/promises';
 import { join } from 'node:path';
+import fg from 'fast-glob';
 import { createDb } from './client';
 
 export async function runMigrations(): Promise<void> {
@@ -8,17 +9,28 @@ export async function runMigrations(): Promise<void> {
     'CREATE TABLE IF NOT EXISTS schema_migrations (version TEXT PRIMARY KEY, applied_at TIMESTAMPTZ NOT NULL DEFAULT NOW());'
   );
 
-  const migrationPath = join(process.cwd(), 'src', 'db', 'migrations', '0001_init.sql');
-  const migrationSql = await readFile(migrationPath, 'utf8');
+  const migrationFiles = await fg(['*.sql'], {
+    cwd: join(process.cwd(), 'src', 'db', 'migrations'),
+    onlyFiles: true,
+    absolute: false,
+  });
+  migrationFiles.sort();
 
-  const already = await db.query<{ exists: boolean }>(
-    "SELECT EXISTS (SELECT 1 FROM schema_migrations WHERE version = '0001_init') AS exists"
+  const existing = await db.query<{ version: string }>(
+    'SELECT version FROM schema_migrations ORDER BY version ASC'
   );
+  const applied = new Set(existing.rows.map((row) => row.version));
 
-  if (already.rows[0]?.exists) {
-    return;
-  }
+  for (const file of migrationFiles) {
+    const version = file.replace(/\.sql$/, '');
+    if (applied.has(version)) {
+      continue;
+    }
+
+    const migrationPath = join(process.cwd(), 'src', 'db', 'migrations', file);
+    const migrationSql = await readFile(migrationPath, 'utf8');
 
-  await db.exec(migrationSql);
-  await db.query("INSERT INTO schema_migrations (version) VALUES ('0001_init')");
+    await db.exec(migrationSql);
+    await db.query('INSERT INTO schema_migrations (version) VALUES ($1)', [version]);
+  }
 }
diff --git a/src/db/migrations/0002_embedding_provenance.sql b/src/db/migrations/0002_embedding_provenance.sql
new file mode 100644
index 0000000..a49cddd
--- /dev/null
+++ b/src/db/migrations/0002_embedding_provenance.sql
@@ -0,0 +1,3 @@
+ALTER TABLE chunk_embeddings
+ADD COLUMN IF NOT EXISTS provenance JSONB NOT NULL DEFAULT '{}'::jsonb;
+
diff --git a/tests/embedding-strategy.test.ts b/tests/embedding-strategy.test.ts
new file mode 100644
index 0000000..852a3d2
--- /dev/null
+++ b/tests/embedding-strategy.test.ts
@@ -0,0 +1,259 @@
+import { beforeEach, describe, expect, it, vi } from 'vitest';
+import { loadEmbeddingStrategyConfig } from '../src/context/embedding/config';
+import { executeEmbeddingWithStrategy } from '../src/context/embedding/engine';
+import {
+  EmbeddingStrategyConfigError,
+  EmbeddingStrategyResolutionError,
+  parseEmbeddingStrategyConfig,
+  resolveEmbeddingStrategy,
+} from '../src/context/embedding/strategy';
+import type { ProviderAdapter } from '../src/providers/types';
+
+class FakeProvider implements ProviderAdapter {
+  constructor(
+    private readonly behavior: 'ok' | 'rate_limit' | 'auth' | 'timeout' | 'service_unavailable'
+  ) {}
+
+  async generateStructured(): Promise<never> {
+    throw new Error('not used');
+  }
+
+  async *streamStructured(): AsyncIterable<never> {}
+
+  async embed(): Promise<number[][]> {
+    if (this.behavior === 'ok') {
+      return [[0.1, 0.2, 0.3]];
+    }
+    if (this.behavior === 'rate_limit') {
+      throw new Error('429 rate limit');
+    }
+    if (this.behavior === 'auth') {
+      throw new Error('401 auth');
+    }
+    if (this.behavior === 'timeout') {
+      throw new Error('timeout');
+    }
+    throw new Error('503 unavailable');
+  }
+
+  async countTokens(): Promise<number> {
+    return 1;
+  }
+
+  supports(): boolean {
+    return true;
+  }
+}
+
+const baseConfig = {
+  version: '1.0',
+  defaults: { indexing: 'indexing', query: 'query' },
+  strategies: [
+    {
+      id: 'indexing',
+      provider: 'anthropic',
+      model: 'claude-embed',
+      fallback: [{ strategyId: 'fallback-openai', onFailure: ['rate_limit', 'timeout'] }],
+    },
+    {
+      id: 'query',
+      provider: 'anthropic',
+      model: 'claude-embed',
+      fallback: [{ strategyId: 'fallback-openai', onFailure: ['rate_limit'] }],
+    },
+    {
+      id: 'fallback-openai',
+      provider: 'openai',
+      model: 'text-embedding-3-small',
+      fallback: [],
+    },
+  ],
+} as const;
+
+function requireConfig(raw: unknown) {
+  const parsed = parseEmbeddingStrategyConfig(raw);
+  if (!parsed.config) {
+    throw new Error(
+      `Expected valid config, received issues: ${parsed.issues.map((i) => i.detail).join('; ')}`
+    );
+  }
+  return parsed.config;
+}
+
+describe('embedding strategy configuration', () => {
+  beforeEach(() => {
+    delete process.env.DUBSBOT_EMBEDDING_STRATEGY_CONFIG_JSON;
+  });
+
+  it('loads valid strategy config and resolves known strategy ids', () => {
+    const config = requireConfig(baseConfig);
+    expect(resolveEmbeddingStrategy(config, 'indexing').provider).toBe('anthropic');
+  });
+
+  it('rejects invalid config entries (unknown provider, missing fallback strategy, cycles)', () => {
+    const parsed = parseEmbeddingStrategyConfig({
+      version: '1.0',
+      defaults: { indexing: 'a', query: 'b' },
+      strategies: [
+        {
+          id: 'a',
+          provider: 'openai',
+          model: 'x',
+          fallback: [{ strategyId: 'b', onFailure: ['rate_limit'] }],
+        },
+        {
+          id: 'b',
+          provider: 'google',
+          model: 'x',
+          fallback: [{ strategyId: 'a', onFailure: ['rate_limit'] }],
+        },
+        {
+          id: 'bad',
+          provider: 'openai',
+          model: 'x',
+          fallback: [{ strategyId: 'missing', onFailure: ['rate_limit'] }],
+        },
+      ],
+    });
+
+    expect(parsed.config).toBeUndefined();
+    expect(parsed.issues.some((issue) => issue.code === 'cyclic_fallback_path')).toBe(true);
+    expect(parsed.issues.some((issue) => issue.code === 'unknown_fallback_strategy')).toBe(true);
+  });
+
+  it('throws structured config error at startup when env config is invalid', () => {
+    process.env.DUBSBOT_EMBEDDING_STRATEGY_CONFIG_JSON = JSON.stringify({
+      version: '1.0',
+      defaults: { indexing: 'missing', query: 'missing' },
+      strategies: [{ id: 'only', provider: 'openai', model: 'x', fallback: [] }],
+    });
+
+    expect(() => loadEmbeddingStrategyConfig()).toThrow(EmbeddingStrategyConfigError);
+  });
+
+  it('throws structured runtime error when strategy id is unknown', () => {
+    const config = requireConfig(baseConfig);
+    expect(() => resolveEmbeddingStrategy(config, 'not-found')).toThrow(
+      EmbeddingStrategyResolutionError
+    );
+  });
+});
+
+describe('anthropic native-first fallback policy', () => {
+  it('returns anthropic result directly on success', async () => {
+    const config = requireConfig(baseConfig);
+    const result = await executeEmbeddingWithStrategy({
+      config,
+      strategyId: 'indexing',
+      value: 'hello',
+      adapterForProvider: (provider) =>
+        provider === 'anthropic' ? new FakeProvider('ok') : new FakeProvider('ok'),
+    });
+
+    expect(result.ok).toBe(true);
+    if (result.ok) {
+      expect(result.provenance.fallbackUsed).toBe(false);
+      expect(result.provenance.attemptPath).toHaveLength(1);
+      expect(result.provenance.resolvedBy?.provider).toBe('anthropic');
+    }
+  });
+
+  it('does not fallback on non-fallbackable anthropic failure', async () => {
+    const config = requireConfig(baseConfig);
+    const adapterSpy = vi.fn((provider: string) =>
+      provider === 'anthropic' ? new FakeProvider('auth') : new FakeProvider('ok')
+    );
+
+    const result = await executeEmbeddingWithStrategy({
+      config,
+      strategyId: 'indexing',
+      value: 'hello',
+      adapterForProvider: adapterSpy,
+    });
+
+    expect(result.ok).toBe(false);
+    expect(adapterSpy).toHaveBeenCalledTimes(1);
+    if (!result.ok) {
+      expect(result.provenance.failureCategory).toBe('auth');
+      expect(result.provenance.terminalReason).toBe('fallback_disallowed');
+    }
+  });
+
+  it('falls back in configured order for fallbackable errors', async () => {
+    const config = requireConfig(baseConfig);
+    const result = await executeEmbeddingWithStrategy({
+      config,
+      strategyId: 'indexing',
+      value: 'hello',
+      adapterForProvider: (provider) =>
+        provider === 'anthropic' ? new FakeProvider('rate_limit') : new FakeProvider('ok'),
+    });
+
+    expect(result.ok).toBe(true);
+    if (result.ok) {
+      expect(result.provenance.fallbackUsed).toBe(true);
+      expect(result.provenance.attemptPath.map((entry) => entry.provider)).toEqual([
+        'anthropic',
+        'openai',
+      ]);
+    }
+  });
+
+  it('returns terminal failure when no fallback is configured', async () => {
+    const config = requireConfig({
+      version: '1.0',
+      defaults: { indexing: 'solo', query: 'solo' },
+      strategies: [{ id: 'solo', provider: 'anthropic', model: 'claude-embed', fallback: [] }],
+    });
+    const result = await executeEmbeddingWithStrategy({
+      config,
+      strategyId: 'solo',
+      value: 'hello',
+      adapterForProvider: () => new FakeProvider('rate_limit'),
+    });
+
+    expect(result.ok).toBe(false);
+    if (!result.ok) {
+      expect(result.provenance.terminalReason).toBe('no_fallback');
+      expect(result.provenance.attemptPath).toHaveLength(1);
+    }
+  });
+});
+
+describe('embedding provenance completeness', () => {
+  it('includes complete provenance fields on success', async () => {
+    const config = requireConfig(baseConfig);
+    const result = await executeEmbeddingWithStrategy({
+      config,
+      strategyId: 'indexing',
+      value: 'hello',
+      adapterForProvider: (provider) =>
+        provider === 'anthropic' ? new FakeProvider('rate_limit') : new FakeProvider('ok'),
+    });
+
+    expect(result.ok).toBe(true);
+    if (result.ok) {
+      expect(result.provenance.strategyId).toBe('indexing');
+      expect(result.provenance.resolvedBy).toBeDefined();
+      expect(result.provenance.attemptPath.length).toBeGreaterThan(0);
+    }
+  });
+
+  it('includes complete provenance fields on terminal failure', async () => {
+    const config = requireConfig(baseConfig);
+    const result = await executeEmbeddingWithStrategy({
+      config,
+      strategyId: 'indexing',
+      value: 'hello',
+      adapterForProvider: () => new FakeProvider('timeout'),
+    });
+
+    expect(result.ok).toBe(false);
+    if (!result.ok) {
+      expect(result.provenance.strategyId).toBe('indexing');
+      expect(result.provenance.attemptPath.length).toBeGreaterThan(0);
+      expect(result.provenance.failureCategory).toBeDefined();
+      expect(result.provenance.terminalReason).toBeDefined();
+    }
+  });
+});

From ee0110e128457996b398acea0db00c845bb746fc Mon Sep 17 00:00:00 2001
From: Daniel Wise <io.dwise@gmail.com>
Date: Tue, 3 Mar 2026 19:17:53 -0800
Subject: [PATCH 3/3] fix(embedding): address PR review feedback for strategy
 rollout

---
 .../spec.md                                   |  5 ++--
 .../embedding-strategy-configuration/spec.md  |  5 ++--
 src/context/embedding/config.ts               | 22 +++++++++++++---
 src/context/embedding/engine.ts               | 26 ++++++++++++++++---
 src/context/embedding/strategy.ts             |  9 +++++--
 src/context/retrieval/hybrid.ts               | 13 +++++++++-
 src/daemon/main.ts                            |  9 +++++--
 src/db/migrate.ts                             | 15 +++++++++--
 8 files changed, 87 insertions(+), 17 deletions(-)

diff --git a/openspec/specs/anthropic-embedding-fallback-and-provenance/spec.md b/openspec/specs/anthropic-embedding-fallback-and-provenance/spec.md
index e34f9d8..1943817 100644
--- a/openspec/specs/anthropic-embedding-fallback-and-provenance/spec.md
+++ b/openspec/specs/anthropic-embedding-fallback-and-provenance/spec.md
@@ -1,7 +1,9 @@
 # anthropic-embedding-fallback-and-provenance Specification
 
 ## Purpose
-TBD - created by archiving change embedding-parity-hardening. Update Purpose after archive.
+Define expected Anthropic-primary embedding behavior, including native-first execution,
+failure-category-gated fallback sequencing, and required provenance metadata for both successful
+embedding results and terminal failures.
 ## Requirements
 ### Requirement: Anthropic Native-First Execution Policy
 For strategies configured with Anthropic as primary, the system SHALL attempt Anthropic native embedding first and SHALL only consider fallback providers explicitly listed in that strategy.
@@ -35,4 +37,3 @@ The system SHALL attach provenance metadata to every embedding result and termin
 #### Scenario: Provenance is emitted on terminal failure
 - **WHEN** all attempts fail or fallback is disallowed
 - **THEN** the error payload includes provenance fields for attempted providers/models, failure category, and terminal resolution reason
-
diff --git a/openspec/specs/embedding-strategy-configuration/spec.md b/openspec/specs/embedding-strategy-configuration/spec.md
index 3d34504..78a2c89 100644
--- a/openspec/specs/embedding-strategy-configuration/spec.md
+++ b/openspec/specs/embedding-strategy-configuration/spec.md
@@ -1,7 +1,9 @@
 # embedding-strategy-configuration Specification
 
 ## Purpose
-TBD - created by archiving change embedding-parity-hardening. Update Purpose after archive.
+Define how embedding strategies are configured and resolved across providers and models,
+including named strategy IDs, primary provider/model selection, ordered fallback chains, and
+deterministic runtime resolution with startup validation of invalid or inconsistent configurations.
 ## Requirements
 ### Requirement: Provider-Configurable Embedding Strategy
 The system SHALL support explicit embedding strategy configuration per embedding use-case, including primary provider/model selection and an ordered fallback list.
@@ -24,4 +26,3 @@ The system SHALL resolve embedding strategies deterministically for each request
 #### Scenario: Unknown strategy id is rejected
 - **WHEN** an embedding request specifies a strategy id not present in configuration
 - **THEN** the system returns a structured error and MUST NOT attempt embedding generation
-
diff --git a/src/context/embedding/config.ts b/src/context/embedding/config.ts
index 83b713e..4d211c6 100644
--- a/src/context/embedding/config.ts
+++ b/src/context/embedding/config.ts
@@ -7,7 +7,7 @@ import {
 
 export function loadEmbeddingStrategyConfig(): EmbeddingStrategyConfig {
   const rawFromEnv = process.env.DUBSBOT_EMBEDDING_STRATEGY_CONFIG_JSON;
-  const raw = rawFromEnv ? JSON.parse(rawFromEnv) : buildLegacyDefaultConfig();
+  const raw = rawFromEnv ? parseJsonConfigFromEnv(rawFromEnv) : buildLegacyDefaultConfig();
   const parsed = parseEmbeddingStrategyConfig(raw);
   if (!parsed.config) {
     throw new EmbeddingStrategyConfigError(parsed.issues);
@@ -70,8 +70,24 @@ function defaultEmbeddingModel(provider: ProviderName): string {
     case 'google':
       return process.env.DUBSBOT_GOOGLE_EMBEDDING_MODEL ?? 'text-embedding-004';
     case 'anthropic':
-      return process.env.DUBSBOT_ANTHROPIC_EMBEDDING_MODEL ?? 'deterministic-v1';
+      return process.env.DUBSBOT_ANTHROPIC_EMBEDDING_MODEL ?? 'local-deterministic';
     default:
-      return 'deterministic-v1';
+      return 'local-deterministic';
+  }
+}
+
+function parseJsonConfigFromEnv(rawFromEnv: string): unknown {
+  try {
+    return JSON.parse(rawFromEnv);
+  } catch (error) {
+    if (error instanceof SyntaxError) {
+      throw new EmbeddingStrategyConfigError([
+        {
+          code: 'schema_invalid',
+          detail: `DUBSBOT_EMBEDDING_STRATEGY_CONFIG_JSON is invalid JSON: ${error.message}`,
+        },
+      ]);
+    }
+    throw error;
   }
 }
diff --git a/src/context/embedding/engine.ts b/src/context/embedding/engine.ts
index 9159281..ec65453 100644
--- a/src/context/embedding/engine.ts
+++ b/src/context/embedding/engine.ts
@@ -107,10 +107,10 @@ export async function executeEmbeddingWithStrategy(input: {
         failureCategory,
       });
 
-      const eligibleFallback = strategy.fallback.find((entry) =>
+      const eligibleFallbacks = strategy.fallback.filter((entry) =>
         entry.onFailure.includes(failureCategory)
       );
-      if (!eligibleFallback) {
+      if (eligibleFallbacks.length === 0) {
         return {
           ok: false,
           message: `Embedding failed for strategy "${currentId}" with category "${failureCategory}" and no eligible fallback.`,
@@ -124,8 +124,28 @@ export async function executeEmbeddingWithStrategy(input: {
         };
       }
 
+      const fallbackIds = eligibleFallbacks
+        .map((entry) => entry.strategyId)
+        .filter((strategyId) => !visited.has(strategyId) && !queue.includes(strategyId));
+
+      if (fallbackIds.length === 0) {
+        return {
+          ok: false,
+          message: `Embedding failed for strategy "${currentId}" with category "${failureCategory}" and exhausted fallback chain.`,
+          provenance: {
+            strategyId: input.strategyId,
+            attemptPath,
+            fallbackUsed: true,
+            failureCategory,
+            terminalReason: 'fallback_exhausted',
+          },
+        };
+      }
+
       fallbackUsed = true;
-      queue.push(eligibleFallback.strategyId);
+      for (const fallbackId of fallbackIds) {
+        queue.push(fallbackId);
+      }
     }
   }
 
diff --git a/src/context/embedding/strategy.ts b/src/context/embedding/strategy.ts
index f3eac27..503fe15 100644
--- a/src/context/embedding/strategy.ts
+++ b/src/context/embedding/strategy.ts
@@ -44,7 +44,8 @@ export type EmbeddingStrategyValidationIssue = {
     | 'duplicate_strategy'
     | 'unknown_fallback_strategy'
     | 'cyclic_fallback_path'
-    | 'unknown_default_strategy';
+    | 'unknown_default_strategy'
+    | 'schema_invalid';
   strategyId?: string;
   detail: string;
 };
@@ -76,7 +77,11 @@ export function parseEmbeddingStrategyConfig(raw: unknown): {
   if (!parsed.success) {
     return {
       issues: parsed.error.issues.map((issue) => ({
-        code: issue.path.includes('provider') ? 'unknown_provider' : 'missing_model',
+        code: issue.path.includes('provider')
+          ? 'unknown_provider'
+          : issue.path.includes('model')
+            ? 'missing_model'
+            : 'schema_invalid',
         detail: `${issue.path.join('.') || '<root>'}: ${issue.message}`,
       })),
     };
diff --git a/src/context/retrieval/hybrid.ts b/src/context/retrieval/hybrid.ts
index 1ea6c75..f1a2efc 100644
--- a/src/context/retrieval/hybrid.ts
+++ b/src/context/retrieval/hybrid.ts
@@ -109,7 +109,7 @@ export async function runHybridRetrieval(input: {
       path: entry.item.path,
       provider: entry.item.provider ?? 'unknown',
       model: entry.item.model ?? 'unknown',
-      embeddingProvenance: entry.item.provenance ? JSON.parse(entry.item.provenance) : null,
+      embeddingProvenance: safeJsonParse(entry.item.provenance),
       lexicalScore: entry.lexicalScore,
       vectorScore: entry.vectorScore,
       rank: index + 1,
@@ -145,3 +145,14 @@ export async function runHybridRetrieval(input: {
 
   return bundle;
 }
+
+function safeJsonParse(value: string | null): unknown {
+  if (!value) {
+    return null;
+  }
+  try {
+    return JSON.parse(value);
+  } catch {
+    return null;
+  }
+}
diff --git a/src/daemon/main.ts b/src/daemon/main.ts
index 33e226b..4edf875 100644
--- a/src/daemon/main.ts
+++ b/src/daemon/main.ts
@@ -3,7 +3,10 @@ import { EventHookRunner } from '../automation/event-hooks';
 import { AutomationRunner } from '../automation/runner';
 import { AutomationScheduler } from '../automation/scheduler';
 import { loadAgentsConfig } from '../config/agents-loader';
-import { loadEmbeddingStrategyConfig } from '../context/embedding/config';
+import {
+  isEmbeddingStrategyV2Enabled,
+  loadEmbeddingStrategyConfig,
+} from '../context/embedding/config';
 import { RepoFsWatcher } from '../context/fs-watcher';
 import { GitWatcher } from '../context/git-watcher';
 import { runIncrementalIndex } from '../context/indexer/incremental';
@@ -16,7 +19,9 @@ import { createProviderAdapter, detectProvider } from '../providers';
 async function main(): Promise<void> {
   await runMigrations();
   const db = await createDb();
-  loadEmbeddingStrategyConfig();
+  if (isEmbeddingStrategyV2Enabled()) {
+    loadEmbeddingStrategyConfig();
+  }
   const provider = createProviderAdapter(detectProvider());
   const policy = new DefaultPolicyEngine(createDefaultApprovalPolicy());
   const orchestrator = new AgentOrchestrator({ provider, policyEngine: policy });
diff --git a/src/db/migrate.ts b/src/db/migrate.ts
index e58a85c..3260914 100644
--- a/src/db/migrate.ts
+++ b/src/db/migrate.ts
@@ -30,7 +30,18 @@ export async function runMigrations(): Promise<void> {
     const migrationPath = join(process.cwd(), 'src', 'db', 'migrations', file);
     const migrationSql = await readFile(migrationPath, 'utf8');
 
-    await db.exec(migrationSql);
-    await db.query('INSERT INTO schema_migrations (version) VALUES ($1)', [version]);
+    try {
+      await db.exec('BEGIN');
+      await db.exec(migrationSql);
+      await db.query('INSERT INTO schema_migrations (version) VALUES ($1)', [version]);
+      await db.exec('COMMIT');
+    } catch (error) {
+      try {
+        await db.exec('ROLLBACK');
+      } catch {
+        // Ignore rollback failures to avoid masking original migration errors.
+      }
+      throw error;
+    }
   }
 }