From 92e88528757a28259a167fd74c1c4187560f77ae Mon Sep 17 00:00:00 2001
From: ossirytk <ossirytk@gmail.com>
Date: Sun, 29 Mar 2026 08:42:41 +0300
Subject: [PATCH 1/2] Web ui work

---
 .github/copilot-instructions.md               |  43 ++
 AGENTS.MD                                     |  43 ++
 core/job_queue.py                             |  87 ++++
 core/preset_profiles.py                       |  82 +++
 core/rag_manager.py                           | 419 ++++++++++++++++
 docs/future_work/COPILOT_COMPACT_REFERENCE.md |   8 +-
 docs/future_work/REFINEMENTS.md               |  88 +++-
 docs/future_work/UI_REFINEMENTS.md            |  90 +++-
 templates/diagnostics_panel.html              |  37 ++
 templates/index.html                          | 270 ++++++++++
 templates/presets_panel.html                  |  40 ++
 templates/rag/benchmark_results.html          |  32 ++
 templates/rag/collection_detail.html          |  78 +++
 templates/rag/collections_list.html           |  43 ++
 templates/rag/coverage_report.html            |  40 ++
 templates/rag/evaluate_index.html             |  33 ++
 templates/rag/evaluate_results.html           |  51 ++
 templates/rag/file_view.html                  |   6 +
 templates/rag/files_list.html                 |  61 +++
 templates/rag/layout.html                     | 467 ++++++++++++++++++
 templates/rag/lint_results.html               |  49 ++
 templates/rag/push_status.html                |  34 ++
 templates/rag/query_results.html              |  22 +
 templates/rag/trends_table.html               |  25 +
 web_app.py                                    | 354 ++++++++++++-
 25 files changed, 2483 insertions(+), 19 deletions(-)
 create mode 100644 core/job_queue.py
 create mode 100644 core/preset_profiles.py
 create mode 100644 core/rag_manager.py
 create mode 100644 templates/diagnostics_panel.html
 create mode 100644 templates/presets_panel.html
 create mode 100644 templates/rag/benchmark_results.html
 create mode 100644 templates/rag/collection_detail.html
 create mode 100644 templates/rag/collections_list.html
 create mode 100644 templates/rag/coverage_report.html
 create mode 100644 templates/rag/evaluate_index.html
 create mode 100644 templates/rag/evaluate_results.html
 create mode 100644 templates/rag/file_view.html
 create mode 100644 templates/rag/files_list.html
 create mode 100644 templates/rag/layout.html
 create mode 100644 templates/rag/lint_results.html
 create mode 100644 templates/rag/push_status.html
 create mode 100644 templates/rag/query_results.html
 create mode 100644 templates/rag/trends_table.html

diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md
index d71b265..8aa2c59 100644
--- a/.github/copilot-instructions.md
+++ b/.github/copilot-instructions.md
@@ -25,6 +25,49 @@ All terminal commands should be reproducible from the supported shell/editor com
 
 ---
 
+## 0.1 Available CLI Tools
+
+The following tools are installed locally and available for use in terminal workflows and agent tasks:
+
+| Tool | Purpose |
+|------|---------|
+| `diffutils` | File comparison (`diff`, `cmp`, `diff3`, `sdiff`) |
+| `fd` | Fast, user-friendly alternative to `find` for file search |
+| `fzf` | General-purpose fuzzy finder for interactive filtering |
+| `ripgrep` (`rg`) | Fast regex search across files; prefer over `grep`/`Select-String` |
+| `zip` | Archive creation and extraction |
+| `tokei` | Count lines of code by language |
+| `ast-grep` (`sg`) | Structural code search and rewriting using AST patterns |
+| `jq` | JSON query and transformation CLI |
+| `yq` | YAML/JSON/TOML query and transformation CLI |
+| `hyperfine` | Command-line benchmarking with statistical output |
+| `pre-commit` | Run and manage repository pre-commit hooks |
+| `http` / `https` (HTTPie) | Human-friendly HTTP API client |
+| `just` | Project task runner via `justfile` recipes |
+| `difft` (difftastic) | Syntax-aware structural diffing |
+
+Prefer these tools over PowerShell built-ins where applicable (e.g., use `rg` instead of `Select-String`, use `fd` instead of `Get-ChildItem` for file discovery).
+
+### Preferred command order
+
+- Content search: `rg` first, then `ast-grep` for structural/language-aware matching
+- File discovery: `fd` first, then `rg --files` as a fallback
+- JSON config inspection: `jq`
+- YAML/TOML inspection: `yq`
+- HTTP/API smoke checks: `http` / `https` (HTTPie)
+- Task orchestration: `just` recipes when a `justfile` exists
+- Diff/review: `difft` for syntax-aware diffs, `diff` for plain text diffs
+- Performance comparisons: `hyperfine` for repeatable timing
+
+### Avoid in autonomous runs
+
+- Avoid interactive-only flows (for example `fzf` prompts) unless the user explicitly asks for interactive selection
+- Avoid destructive git/file operations unless the user explicitly approves them
+- Avoid long-running watch commands by default; use one-shot checks first, then switch to watch mode only when requested
+- Avoid invoking `pre-commit run --all-files` on very large repos when a targeted path or hook is enough for the task
+
+---
+
 ## 1. Authoritative Tools & Source of Truth
 
 ### Python
diff --git a/AGENTS.MD b/AGENTS.MD
index 9ffdbbc..942a979 100644
--- a/AGENTS.MD
+++ b/AGENTS.MD
@@ -25,6 +25,49 @@ All terminal commands should be reproducible from the supported shell/editor com
 
 ---
 
+## 0.1 Available CLI Tools
+
+The following tools are installed locally and available for use in terminal workflows and agent tasks:
+
+| Tool | Purpose |
+|------|---------|
+| `diffutils` | File comparison (`diff`, `cmp`, `diff3`, `sdiff`) |
+| `fd` | Fast, user-friendly alternative to `find` for file search |
+| `fzf` | General-purpose fuzzy finder for interactive filtering |
+| `ripgrep` (`rg`) | Fast regex search across files; prefer over `grep`/`Select-String` |
+| `zip` | Archive creation and extraction |
+| `tokei` | Count lines of code by language |
+| `ast-grep` (`sg`) | Structural code search and rewriting using AST patterns |
+| `jq` | JSON query and transformation CLI |
+| `yq` | YAML/JSON/TOML query and transformation CLI |
+| `hyperfine` | Command-line benchmarking with statistical output |
+| `pre-commit` | Run and manage repository pre-commit hooks |
+| `http` / `https` (HTTPie) | Human-friendly HTTP API client |
+| `just` | Project task runner via `justfile` recipes |
+| `difft` (difftastic) | Syntax-aware structural diffing |
+
+Prefer these tools over PowerShell built-ins where applicable (e.g., use `rg` instead of `Select-String`, use `fd` instead of `Get-ChildItem` for file discovery).
+
+### Preferred command order
+
+- Content search: `rg` first, then `ast-grep` for structural/language-aware matching
+- File discovery: `fd` first, then `rg --files` as a fallback
+- JSON config inspection: `jq`
+- YAML/TOML inspection: `yq`
+- HTTP/API smoke checks: `http` / `https` (HTTPie)
+- Task orchestration: `just` recipes when a `justfile` exists
+- Diff/review: `difft` for syntax-aware diffs, `diff` for plain text diffs
+- Performance comparisons: `hyperfine` for repeatable timing
+
+### Avoid in autonomous runs
+
+- Avoid interactive-only flows (for example `fzf` prompts) unless the user explicitly asks for interactive selection
+- Avoid destructive git/file operations unless the user explicitly approves them
+- Avoid long-running watch commands by default; use one-shot checks first, then switch to watch mode only when requested
+- Avoid invoking `pre-commit run --all-files` on very large repos when a targeted path or hook is enough for the task
+
+---
+
 ## 1. Authoritative Tools & Source of Truth
 
 ### Python
diff --git a/core/job_queue.py b/core/job_queue.py
new file mode 100644
index 0000000..0020c51
--- /dev/null
+++ b/core/job_queue.py
@@ -0,0 +1,87 @@
+"""Simple in-memory job store for long-running RAG web operations.
+
+Jobs run in background threads. Route handlers poll for status via HTMX
+(`hx-trigger="every 2s"`). The job status endpoint stops including the
+polling trigger once the job reaches a terminal state (done or error).
+"""
+
+from __future__ import annotations
+
+import threading
+import time
+import uuid
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+    from collections.abc import Callable
+
+
+class Job:
+    """In-memory representation of a background job."""
+
+    __slots__ = ("error", "finished_at", "id", "result", "started_at", "status")
+
+    def __init__(self, job_id: str) -> None:
+        self.id = job_id
+        self.status: str = "pending"
+        self.result: Any = None
+        self.error: str | None = None
+        self.started_at: float = time.monotonic()
+        self.finished_at: float | None = None
+
+    def to_dict(self) -> dict[str, Any]:
+        elapsed = round((self.finished_at or time.monotonic()) - self.started_at, 2)
+        return {
+            "id": self.id,
+            "status": self.status,
+            "result": self.result,
+            "error": self.error,
+            "elapsed_s": elapsed,
+        }
+
+
+class JobStore:
+    """Thread-safe store for background jobs."""
+
+    MAX_JOBS: int = 50
+
+    def __init__(self) -> None:
+        self._jobs: dict[str, Job] = {}
+        self._lock = threading.Lock()
+
+    def submit(self, fn: Callable[..., Any], *args: object, **kwargs: object) -> str:
+        """Submit a callable as a background job; returns a job_id immediately."""
+        job_id = uuid.uuid4().hex[:12]
+        job = Job(job_id)
+        with self._lock:
+            self._jobs[job_id] = job
+            self._evict_old()
+
+        def _run() -> None:
+            job.status = "running"
+            try:
+                job.result = fn(*args, **kwargs)
+                job.status = "done"
+            except Exception as exc:
+                job.error = str(exc)
+                job.status = "error"
+            finally:
+                job.finished_at = time.monotonic()
+
+        threading.Thread(target=_run, daemon=True).start()
+        return job_id
+
+    def get(self, job_id: str) -> dict[str, Any] | None:
+        """Return job state dict, or None if job_id is unknown."""
+        with self._lock:
+            job = self._jobs.get(job_id)
+        return job.to_dict() if job else None
+
+    def _evict_old(self) -> None:
+        """Remove oldest finished jobs when over the cap (called under lock)."""
+        if len(self._jobs) <= self.MAX_JOBS:
+            return
+        finished = [j for j in self._jobs.values() if j.status in {"done", "error"}]
+        finished.sort(key=lambda j: j.finished_at or 0)
+        for j in finished[: len(self._jobs) - self.MAX_JOBS]:
+            del self._jobs[j.id]
diff --git a/core/preset_profiles.py b/core/preset_profiles.py
new file mode 100644
index 0000000..f7e6db9
--- /dev/null
+++ b/core/preset_profiles.py
@@ -0,0 +1,82 @@
+"""Saveable preset profiles for runtime retrieval settings."""
+
+from __future__ import annotations
+
+import json
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+    from core.config import ConversationRuntimeConfig
+
+PROFILE_FIELDS: list[str] = [
+    "use_mmr",
+    "rag_rerank_enabled",
+    "rag_sentence_compression_enabled",
+    "rag_multi_query_enabled",
+    "rag_k",
+    "rag_k_mes",
+    "debug_context",
+]
+
+
+class ProfileStore:
+    """Persist and apply named retrieval-setting presets stored in a JSON file."""
+
+    def __init__(self, path: Path) -> None:
+        self._path = path
+
+    def _load(self) -> dict[str, dict[str, object]]:
+        if not self._path.exists():
+            return {}
+        try:
+            data = json.loads(self._path.read_text(encoding="utf-8"))
+            return data if isinstance(data, dict) else {}
+        except Exception:
+            return {}
+
+    def _save(self, data: dict[str, dict[str, object]]) -> None:
+        self._path.parent.mkdir(parents=True, exist_ok=True)
+        self._path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
+
+    def list_profiles(self) -> list[str]:
+        """Return sorted list of saved profile names."""
+        return sorted(self._load().keys())
+
+    def save_profile(self, name: str, config: ConversationRuntimeConfig) -> None:
+        """Snapshot the profile-eligible fields from *config* under *name*."""
+        data = self._load()
+        data[name] = {field: getattr(config, field) for field in PROFILE_FIELDS}
+        self._save(data)
+
+    def get_profile(self, name: str) -> dict[str, object]:
+        """Return the stored settings dict for *name*."""
+        data = self._load()
+        if name not in data:
+            msg = f"Profile {name!r} not found"
+            raise KeyError(msg)
+        return dict(data[name])
+
+    def apply_profile(self, name: str, config: ConversationRuntimeConfig) -> list[str]:
+        """Write profile values onto *config* in place; return list of changed field names."""
+        profile = self.get_profile(name)
+        changed: list[str] = []
+        for field, value in profile.items():
+            if field not in PROFILE_FIELDS:
+                continue
+            current = getattr(config, field, None)
+            if current != value:
+                setattr(config, field, value)
+                changed.append(field)
+        return changed
+
+    def delete_profile(self, name: str) -> None:
+        """Remove *name* from the store (no-op if not found)."""
+        data = self._load()
+        data.pop(name, None)
+        self._save(data)
+
+    def current_values(self, config: ConversationRuntimeConfig) -> dict[str, object]:
+        """Return current values of the profile-eligible fields from *config*."""
+        return {field: getattr(config, field) for field in PROFILE_FIELDS}
diff --git a/core/rag_manager.py b/core/rag_manager.py
new file mode 100644
index 0000000..8a6c95c
--- /dev/null
+++ b/core/rag_manager.py
@@ -0,0 +1,419 @@
+"""Thin façade over scripts/rag/* for the RAG Management web UI.
+
+All functions are synchronous and designed to be called from route handlers
+via asyncio.to_thread. None of these functions load the LLM model.
+"""
+
+from __future__ import annotations
+
+import csv
+import json
+import time
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+
+import chromadb
+from chromadb.config import Settings
+
+if TYPE_CHECKING:
+    from core.config import RagScriptConfig
+
+
+def _chroma_client(persist_dir: str) -> chromadb.PersistentClient:
+    return chromadb.PersistentClient(
+        path=persist_dir,
+        settings=Settings(anonymized_telemetry=False),
+    )
+
+
+# ---------------------------------------------------------------------------
+# Collections
+# ---------------------------------------------------------------------------
+
+
+def list_collections(config: RagScriptConfig) -> list[dict[str, Any]]:
+    """List all ChromaDB collections with counts and fingerprint metadata."""
+    client = _chroma_client(config.persist_directory)
+    results: list[dict[str, Any]] = []
+    for col in client.list_collections():
+        try:
+            count = col.count()
+        except Exception:
+            count = None
+        meta = col.metadata or {}
+        results.append(
+            {
+                "name": col.name,
+                "count": count,
+                "embedding_model": meta.get("embedding:model", ""),
+                "embedding_dimension": meta.get("embedding:dimension", ""),
+                "embedding_normalize": meta.get("embedding:normalize", ""),
+            }
+        )
+    results.sort(key=lambda c: c["name"])
+    return results
+
+
+def collection_info(config: RagScriptConfig, name: str) -> dict[str, Any] | None:
+    """Return detailed info for a single collection, or None if not found."""
+    client = _chroma_client(config.persist_directory)
+    try:
+        col = client.get_collection(name)
+    except Exception:
+        return None
+    try:
+        count = col.count()
+    except Exception:
+        count = 0
+    meta = col.metadata or {}
+    try:
+        sample = col.peek(limit=5)
+        sample_docs = [
+            {"id": id_, "text": (doc or "")[:200], "metadata": m}
+            for id_, doc, m in zip(
+                sample.get("ids", []),
+                sample.get("documents", []) or [],
+                sample.get("metadatas", []) or [],
+                strict=False,
+            )
+        ]
+    except Exception:
+        sample_docs = []
+    return {
+        "name": name,
+        "count": count,
+        "metadata": meta,
+        "embedding_model": meta.get("embedding:model", ""),
+        "embedding_dimension": meta.get("embedding:dimension", ""),
+        "embedding_normalize": meta.get("embedding:normalize", ""),
+        "sample_docs": sample_docs,
+    }
+
+
+def delete_collection(config: RagScriptConfig, name: str) -> None:
+    """Delete a ChromaDB collection by name."""
+    client = _chroma_client(config.persist_directory)
+    client.delete_collection(name)
+
+
+def query_collection(
+    config: RagScriptConfig,
+    name: str,
+    query: str,
+    k: int = 5,
+) -> list[dict[str, Any]]:
+    """Run ad-hoc similarity search. Returns top-k chunks with scores."""
+    from langchain_chroma import Chroma  # noqa: PLC0415
+    from langchain_huggingface import HuggingFaceEmbeddings  # noqa: PLC0415
+
+    embedder = HuggingFaceEmbeddings(
+        model_name=config.embedding_model,
+        model_kwargs={"device": config.embedding_device},
+        encode_kwargs={"normalize_embeddings": True},
+        cache_folder=config.embedding_cache,
+    )
+    client = _chroma_client(config.persist_directory)
+    db = Chroma(
+        client=client,
+        collection_name=name,
+        embedding_function=embedder,
+    )
+    results = db.similarity_search_with_score(query, k=k)
+    return [
+        {
+            "rank": i + 1,
+            "text": doc.page_content,
+            "score": round(float(score), 4),
+            "metadata": doc.metadata,
+        }
+        for i, (doc, score) in enumerate(results)
+    ]
+
+
+def backfill_fingerprint(config: RagScriptConfig, name: str) -> dict[str, Any]:
+    """Write embedding fingerprint metadata onto an existing collection."""
+    from langchain_huggingface import HuggingFaceEmbeddings  # noqa: PLC0415
+
+    from scripts.rag.manage_collections_core_collection import (  # noqa: PLC0415
+        build_embedding_fingerprint,
+        infer_embedding_dimension,
+    )
+
+    embedder = HuggingFaceEmbeddings(
+        model_name=config.embedding_model,
+        model_kwargs={"device": config.embedding_device},
+        encode_kwargs={"normalize_embeddings": True},
+        cache_folder=config.embedding_cache,
+    )
+    dimension = infer_embedding_dimension(embedder)
+    fingerprint = build_embedding_fingerprint(
+        embedding_model=config.embedding_model,
+        normalize_embeddings=True,
+        embedding_dimension=dimension,
+    )
+    client = _chroma_client(config.persist_directory)
+    col = client.get_collection(name)
+    existing_meta = col.metadata or {}
+    col.modify(metadata={**existing_meta, **fingerprint})
+    return fingerprint
+
+
+# ---------------------------------------------------------------------------
+# RAG Data Files
+# ---------------------------------------------------------------------------
+
+
+def list_rag_files(config: RagScriptConfig) -> list[dict[str, Any]]:
+    """List .txt source files in rag_data/ with type classification."""
+    rag_dir = Path(config.documents_directory)
+    if not rag_dir.exists():
+        return []
+    files: list[dict[str, Any]] = []
+    for path in sorted(rag_dir.glob("*.txt")):
+        stem = path.stem
+        files.append(
+            {
+                "name": path.name,
+                "stem": stem,
+                "type": "message_examples" if stem.endswith("_message_examples") else "lore",
+                "size": path.stat().st_size,
+                "has_metadata": (rag_dir / f"{stem}.json").exists(),
+            }
+        )
+    return files
+
+
+def file_content(config: RagScriptConfig, filename: str) -> str | None:
+    """Return the text content of a rag_data file, guarding against path traversal."""
+    rag_dir = Path(config.documents_directory).resolve()
+    candidate = (rag_dir / filename).resolve()
+    if not candidate.is_relative_to(rag_dir):
+        return None
+    if not candidate.exists() or not candidate.is_file():
+        return None
+    if candidate.suffix not in {".txt", ".json"}:
+        return None
+    return candidate.read_text(encoding="utf-8")
+
+
+# ---------------------------------------------------------------------------
+# Linting
+# ---------------------------------------------------------------------------
+
+
+def run_lint(config: RagScriptConfig, *, auto_fix: bool = False) -> list[dict[str, Any]]:
+    """Lint all *_message_examples.txt files. Returns list of report dicts."""
+    from scripts.rag.lint_message_examples import lint_file_path  # noqa: PLC0415
+
+    rag_dir = Path(config.documents_directory)
+    reports: list[dict[str, Any]] = []
+    for path in sorted(rag_dir.glob("*_message_examples.txt")):
+        report = lint_file_path(path, auto_fix=auto_fix)
+        reports.append(
+            {
+                "file": path.name,
+                "valid": report.valid,
+                "auto_fixed": report.auto_fixed,
+                "violations": [
+                    {
+                        "line_no": v.line_no,
+                        "rule_id": v.rule_id,
+                        "message": v.message,
+                        "severity": v.severity.value if hasattr(v.severity, "value") else str(v.severity),
+                        "suggested_fix": v.suggested_fix,
+                    }
+                    for v in report.violations
+                ],
+            }
+        )
+    return reports
+
+
+# ---------------------------------------------------------------------------
+# Coverage
+# ---------------------------------------------------------------------------
+
+
+def run_coverage(config: RagScriptConfig, stem: str) -> dict[str, Any] | None:
+    """Run coverage analysis for a character (lore + metadata pair)."""
+    from scripts.rag.analyze_rag_coverage import (  # noqa: PLC0415
+        extract_coverage_metrics,
+        format_coverage_report,
+        load_metadata_file,
+    )
+
+    rag_dir = Path(config.documents_directory)
+    source_file = rag_dir / f"{stem}.txt"
+    metadata_file = rag_dir / f"{stem}.json"
+    if not source_file.exists() or not metadata_file.exists():
+        return None
+    source_text = source_file.read_text(encoding="utf-8")
+    metadata_list = load_metadata_file(metadata_file)
+    metrics = extract_coverage_metrics(source_text, metadata_list)
+    report_text = format_coverage_report(metrics)
+    return {
+        "stem": stem,
+        "entities_count": metrics.entities_count,
+        "source_coverage_ratio": round(metrics.source_coverage_ratio, 4),
+        "total_source_chars": metrics.total_source_chars,
+        "covered_chars": metrics.covered_chars,
+        "unmapped_segments": metrics.unmapped_segments[:20],
+        "category_distribution": metrics.category_distribution,
+        "report_text": report_text,
+    }
+
+
+# ---------------------------------------------------------------------------
+# Fixture Evaluation
+# ---------------------------------------------------------------------------
+
+
+def list_fixture_packs(tests_dir: str = "tests/fixtures") -> list[str]:
+    """List available fixture JSON files."""
+    fixture_dir = Path(tests_dir)
+    if not fixture_dir.exists():
+        return []
+    return sorted(p.name for p in fixture_dir.glob("*.json"))
+
+
+def run_evaluate_fixtures(
+    config: RagScriptConfig,
+    fixture_file: str,
+    tests_dir: str = "tests/fixtures",
+) -> dict[str, Any] | None:
+    """Run fixture evaluation in similarity mode. Returns metrics dict."""
+    from scripts.rag.manage_collections_core_evaluation import _execute_fixture_evaluation  # noqa: PLC0415
+    from scripts.rag.manage_collections_core_types import FixtureEvalOptions  # noqa: PLC0415
+
+    fixture_path = Path(tests_dir) / fixture_file
+    if not fixture_path.exists():
+        return None
+    options = FixtureEvalOptions(
+        fixture_file=fixture_path,
+        k=None,
+        retrieval_mode="similarity",
+        persist_directory=config.persist_directory,
+        embedding_model=config.embedding_model,
+        embedding_device=config.embedding_device,
+        show_failures=False,
+    )
+    run = _execute_fixture_evaluation(options)
+    return {
+        "fixture_file": fixture_file,
+        "default_k": run.default_k,
+        "skipped": run.skipped,
+        "metrics": run.metrics,
+        "case_results": [
+            {
+                "case_id": c.case_id,
+                "rank": c.rank,
+                "status": c.status,
+                "query": c.query[:120],
+                "collection": c.collection,
+                "forbidden_hit": c.forbidden_hit,
+                "precision_at_k": round(c.precision_at_k, 4),
+                "average_precision_at_k": round(c.average_precision_at_k, 4),
+                "matched_expected": c.matched_expected,
+                "expected_total": c.expected_total,
+            }
+            for c in run.case_results
+        ],
+    }
+
+
+def get_fixture_trends(logs_dir: str = "logs/retrieval_eval") -> list[dict[str, Any]]:
+    """Read retrieval evaluation trend history from CSV (newest first)."""
+    history_path = Path(logs_dir) / "history.csv"
+    if not history_path.exists():
+        return []
+    rows: list[dict[str, Any]] = []
+    try:
+        with history_path.open(encoding="utf-8", newline="") as f:
+            reader = csv.DictReader(f)
+            rows.extend(dict(row) for row in reader)
+    except Exception:
+        return []
+    return list(reversed(rows))
+
+
+# ---------------------------------------------------------------------------
+# Collection Push
+# ---------------------------------------------------------------------------
+
+
+def push_collection(
+    config: RagScriptConfig,
+    stem: str,
+    collection_name: str,
+    *,
+    overwrite: bool = True,
+) -> dict[str, Any]:
+    """Chunk, enrich, and push a rag_data text file into a ChromaDB collection."""
+
+    from langchain_huggingface import HuggingFaceEmbeddings  # noqa: PLC0415
+
+    from scripts.rag.push_rag_data import (  # noqa: PLC0415
+        ProcessingContext,
+        PushConfig,
+        build_embedding_fingerprint,
+        enrich_documents_with_metadata,
+        infer_embedding_dimension,
+        load_and_chunk_text_file,
+        push_to_collection,
+        resolve_metadata_file,
+    )
+
+    rag_dir = Path(config.documents_directory)
+    file_path = rag_dir / f"{stem}.txt"
+    if not file_path.exists():
+        msg = f"Source file not found: {file_path}"
+        raise FileNotFoundError(msg)
+
+    embedder = HuggingFaceEmbeddings(
+        model_name=config.embedding_model,
+        model_kwargs={"device": config.embedding_device},
+        encode_kwargs={"normalize_embeddings": True},
+        cache_folder=config.embedding_cache,
+    )
+    client = _chroma_client(config.persist_directory)
+    dimension = infer_embedding_dimension(embedder)
+    fingerprint = build_embedding_fingerprint(
+        embedding_model=config.embedding_model,
+        normalize_embeddings=True,
+        embedding_dimension=dimension,
+    )
+    documents = load_and_chunk_text_file(file_path, config.chunk_size, config.chunk_overlap)
+    metadata_file = resolve_metadata_file(file_path, config.key_storage, None)
+    documents = enrich_documents_with_metadata(documents, metadata_file, config.threads)
+
+    push_cfg = PushConfig(
+        persist_directory=config.persist_directory,
+        chunk_size=config.chunk_size,
+        chunk_overlap=config.chunk_overlap,
+        key_storage=config.key_storage,
+        threads=config.threads,
+        dry_run=False,
+        overwrite=overwrite,
+    )
+    ctx = ProcessingContext(embedder=embedder, client=client)
+    t0 = time.monotonic()
+    push_to_collection(collection_name, documents, push_cfg, ctx, fingerprint)
+    elapsed = time.monotonic() - t0
+    return {
+        "collection": collection_name,
+        "stem": stem,
+        "doc_count": len(documents),
+        "elapsed_s": round(elapsed, 2),
+    }
+
+
+def get_benchmark_results(benchmark_dir: str = "logs/benchmark") -> dict[str, Any] | None:
+    """Load the most recent benchmark JSON from logs/benchmark/, if present."""
+    benchmark_path = Path(benchmark_dir) / "last_benchmark.json"
+    if not benchmark_path.exists():
+        return None
+    try:
+        with benchmark_path.open(encoding="utf-8") as f:
+            return json.load(f)
+    except Exception:
+        return None
diff --git a/docs/future_work/COPILOT_COMPACT_REFERENCE.md b/docs/future_work/COPILOT_COMPACT_REFERENCE.md
index 0db7cd0..8a8fbe7 100644
--- a/docs/future_work/COPILOT_COMPACT_REFERENCE.md
+++ b/docs/future_work/COPILOT_COMPACT_REFERENCE.md
@@ -1,6 +1,6 @@
 # Copilot Compact Reference — Implemented State
 
-Last verified: 2026-03-26
+Last verified: 2026-03-29
 
 Use this as the single compact reference for implemented work across conversation quality, RAG quality, and web app behavior.
 
@@ -140,15 +140,21 @@ Primary files:
 - In-UI session picker with naming support.
 - Per-turn retrieval trace history in the debug panel.
 - Session exports persist conversation-quality metadata and drift traces for later calibration.
+- **Per-turn diagnostics panel**: collapsible sidebar panel showing Turn, Latency (s), Chars, Main chunks, MES chunks, Cross-removed, and Drift score (colour-coded at warning/fail thresholds) for the last 40 turns. Auto-refreshes after each stream. Route: `GET /chat/diagnostics`.
+- **Saveable preset profiles**: collapsible sidebar panel for saving/applying/deleting named snapshots of 7 retrieval settings (`use_mmr`, `rag_rerank_enabled`, `rag_sentence_compression_enabled`, `rag_multi_query_enabled`, `rag_k`, `rag_k_mes`, `debug_context`). Profiles persisted in `configs/profiles.json`; applied in-place to the live `ConversationRuntimeConfig` without restart. Routes: `GET/POST /settings/profiles/*`.
+- **One-click export bundle**: `GET /chat/export/bundle` downloads a ZIP containing `manifest.json`, `conversation.json` (full session), `retrieval_traces.json` (per-turn history), and `drift_history.json`. Button in composer quick-actions.
 
 Primary files:
 
 - `web_app.py`
 - `main.py`
+- `core/preset_profiles.py`
 - `templates/index.html`
 - `templates/chat_message_pair.html`
 - `templates/chat_messages.html`
 - `templates/chat_single_message.html`
+- `templates/diagnostics_panel.html`
+- `templates/presets_panel.html`
 
 ## Current Defaults Snapshot
 
diff --git a/docs/future_work/REFINEMENTS.md b/docs/future_work/REFINEMENTS.md
index 230aee1..b0c6ff0 100644
--- a/docs/future_work/REFINEMENTS.md
+++ b/docs/future_work/REFINEMENTS.md
@@ -1,6 +1,6 @@
 # Refinements Backlog
 
-Last updated: 2026-03-26
+Last updated: 2026-04-03
 
 This is the single source for remaining and future work across quality and retrieval.
 
@@ -64,6 +64,87 @@ Implemented state lives in `docs/future_work/COPILOT_COMPACT_REFERENCE.md`.
 - Benchmark sentence compression and rerank combinations on a fixed fixture matrix.
 - Add an embedding model tiering profile (`small`, `balanced`, `quality`) with measured quality/cost tradeoffs.
 
+### 6. Memory & Session Continuity
+
+Inspired by analysis of Claude's leaked markdown-based memory system. The current RAG pipeline is
+entirely character-centric (lore and style). There is no persistent cross-session user memory —
+facts about the user, relationship state, or conversation history carry zero weight between sessions.
+
+A two-tier hybrid is the right approach:
+
+- **Tier 1 — Markdown persona memory (do first):** Small per-user file
+  (`memory/<char_name>/<user_id>.md`, ~200–400 tokens) containing relationship facts, user
+  preferences, and conversation state. Written by the LLM at session end via a lightweight
+  summarisation prompt. Loaded at session start and injected into the context budget before RAG
+  content. Human-readable, editable, and debuggable without any vector tooling.
+- **Tier 2 — RAG over conversation archives (later):** When a user accumulates many sessions,
+  semantic search over past conversations using a `<user_id>_memory` ChromaDB collection. Reuses
+  the existing retrieval pipeline. Only worthwhile at scale.
+
+Prerequisites before Tier 1 can be built:
+  1. User/session identity scoping (who is this user across sessions?).
+  2. Session-end write hook (trigger point for LLM memory extraction).
+  3. Reserved context budget slot (~300 tokens, injected before RAG).
+  4. Memory write prompt (instructs the LLM to extract 5–10 facts from the session).
+
+Idle-time memory consolidation: merge/summarise older notes when count exceeds threshold
+(equivalent to AutoDream consolidation in the Claude spec). New commands:
+`/memory list`, `/memory add <note>`, `/memory forget <id>`, `/memory clear`.
+
+Web UI: memory panel showing injected facts per turn (see `UI_REFINEMENTS.md §A.5`).
+
+### 7. Chat Experience & Conversation Control
+
+- **Conversation branching:** `/fork` to snapshot current state to a named branch, `/forks` to
+  list all saved branches, `/fork restore <id>` to rewind and continue from that point. Branches
+  stored in session JSON under a `branches` key; pairs with web UI controls (see `UI_REFINEMENTS.md §A.3`).
+- **Character hot-reload:** `/character <card_name>` to swap the active character card mid-session
+  without a full restart. Preserve conversation history; reset persona drift state and reload the
+  RAG collection. Insert a visible "Character switched → <name>" marker in the conversation.
+  List available cards with `/character list`.
+- **Stop hooks:** user-defined stop conditions in config (`generation.stop_hooks`) — regex patterns
+  or keyword lists with `stop | redirect | warn` actions. Useful for OOC marker detection,
+  character-break detection, or content policy enforcement. Log stop events to telemetry and web
+  diagnostics.
+- **User-defined command macros (skills):** define custom `/skill` commands in
+  `configs/skills.json`, mapping names to message templates injected at send time. Support template
+  variables: `{{char}}`, `{{user}}`, `{{last_response}}`. Commands: `/skill list`,
+  `/skill add <name> <template>`, `/skill remove <name>`. Web UI: skills dropdown in the chat
+  input area (see `UI_REFINEMENTS.md §A.6`).
+
+All features should be opt-in via config and must not alter existing behaviour when disabled.
+
+### 8. Token & Context Observability
+
+- **Pressure-aware context compaction:** replace threshold-only history summarisation with
+  continuous token fill-rate tracking. Trigger compaction when the context window exceeds ~80%
+  capacity. Compress oldest history segments first; keep recent turns verbatim. Emit visible
+  compaction markers in the web chat UI. Expose compaction stats in the diagnostics panel.
+  Builds on the existing `context_manager.py` token budget logic — low-risk addition.
+- **Per-turn token usage stats:** track prompt tokens, completion tokens, context window %, and
+  RAG chunk count per turn. Add session-level cumulative totals to export metadata and the ZIP
+  bundle. CLI verbose mode: print token counts after each response. Web UI: extend the diagnostics
+  panel (see `UI_REFINEMENTS.md §A.1–A.2`).
+
+### 9. CLI Quality of Life
+
+- **Output themes & syntax highlighting:** configurable terminal colour themes (dark, light,
+  minimal, retro). Style character name, user input, system messages, and warnings with distinct
+  ANSI colours. Store preference as `ui.cli_theme` in config. Show theme options in `/help`.
+- **Customisable keybindings:** load from `configs/keybindings.json`; allow remapping of clear,
+  reload, save, export, continue, and help actions. Sensible defaults matching current behaviour.
+  Show current bindings in `/help` output.
+
+### 10. Multi-Character Conversation Mode (Exploratory)
+
+Two simultaneous active characters (e.g., narrator + character, or character A ↔ character B).
+Each character maintains its own RAG collection and persona drift tracker. A turn-router
+(rule-based or LLM-directed) decides which character responds each turn. Config:
+`multi_character: { enabled: true, characters: ["CharA", "CharB"] }`.
+
+**Large effort, Medium value.** Treat as a long-horizon milestone — do not start until §6–8 are
+stable. The only "Large" effort item in this backlog.
+
 *(Web UX and observability improvements are tracked in `docs/future_work/UI_REFINEMENTS.md`.)*
 
 ## Suggested Execution Order
@@ -77,6 +158,11 @@ Implemented state lives in `docs/future_work/COPILOT_COMPACT_REFERENCE.md`.
 7. ✅ Wire conversation fixture evaluation into unified quality-gate command and CI policy. (2026-03-26)
 8. ✅ Add retrieval trend rendering and debug export artifacts. (2026-03-26)
 9. Iterate on higher-level UX and explainability improvements — see `docs/future_work/UI_REFINEMENTS.md`.
+10. Add pressure-aware context compaction and per-turn token usage stats (§8).
+11. Implement Tier 1 markdown persona memory (§6) — requires user identity scoping first.
+12. Add conversation branching, character hot-reload, stop hooks, and skills macros (§7).
+13. CLI quality-of-life pass: themes and keybindings (§9).
+14. Multi-character conversation mode (§10) — long-horizon, after §6–8 are stable.
 
 ## Next Steps
 
diff --git a/docs/future_work/UI_REFINEMENTS.md b/docs/future_work/UI_REFINEMENTS.md
index 5214b5c..f68193b 100644
--- a/docs/future_work/UI_REFINEMENTS.md
+++ b/docs/future_work/UI_REFINEMENTS.md
@@ -1,22 +1,82 @@
 # UI Refinements Backlog
 
-Last updated: 2026-03-26
+Last updated: 2026-04-03
 
-Forward-looking improvements to the web interface. This covers both general UX polish (moved
-from `REFINEMENTS.md`) and the larger RAG management UI plan.
+Forward-looking improvements to the web interface. This covers chat UI enhancements and the larger
+RAG management UI plan.
 
 Implemented state lives in `docs/future_work/COPILOT_COMPACT_REFERENCE.md`.
 
 ---
 
-## A. General Web UX (Moved from REFINEMENTS.md §6)
+## A. Chat UI Enhancements
 
-- Add a compact run diagnostics panel (latency, tokens/chars, retrieval counts, guardrail
-  triggers per turn).
-- Add saveable preset profiles for debug mode and retrieval settings (toggle rerank, MMR,
-  sentence compression without editing config).
-- Add a one-click export bundle for support/debug sessions (conversation JSON + retrieval
-  traces + drift history in a single download).
+Quality-of-life improvements to the main chat view and diagnostics panel. These complement the
+backend features described in `docs/future_work/REFINEMENTS.md §§7–8`.
+
+### A.1 Token Budget Visualization
+
+Add a token budget bar to the diagnostics panel (or below the chat input area).
+
+- Displays real-time allocation breakdown: system prompt %, history %, RAG context %, headroom.
+- Updates after each turn alongside the persona drift score.
+- Colour-coded: green (< 70%) / yellow (70–90%) / red (> 90%) by fill pressure.
+- Data source: extend `ContextBudget` in `context_manager.py` to return percentage breakdowns;
+  emit alongside each response payload or via a dedicated `/diagnostics/budget` endpoint.
+
+### A.2 Per-Turn Token Usage Stats Panel
+
+Extend the existing diagnostics panel with turn-level token stats.
+
+- Show per-turn: prompt tokens, completion tokens, context window %, RAG chunks retrieved.
+- Show session-level cumulative totals at the bottom of the panel.
+- Backend: `context_manager.py` already computes budgets; add a `TokenUsageRecord` emitted
+  alongside each streamed response.
+- Depends on `REFINEMENTS.md §8` (per-turn token usage stats) for the backend data.
+
+### A.3 Conversation Branching Controls
+
+UI surface for the fork/restore feature described in `REFINEMENTS.md §7`.
+
+- **Fork button** in the chat header: prompts for an optional name, then snapshots current
+  conversation state as a named branch.
+- **Branch list** in the Sessions panel: shows all forks for the active session with timestamps
+  and restore buttons.
+- **Restore action**: rewinds the chat view and conversation state to the fork point; inserts a
+  visible `— Restored from fork: <name> —` divider in the chat history.
+- Backend: extend `export_conversation_state` / `import_conversation_state` to support a
+  `branches` key in the session JSON.
+- Depends on `REFINEMENTS.md §7` (conversation branching) for the backend state model.
+
+### A.4 Session History Search
+
+Full-text search across saved session JSON files, surfaced in the Sessions panel.
+
+- Search input with character name filter and optional date range.
+- Results show matching turns with surrounding context snippets.
+- Routes: `GET /sessions/search?q=<query>&character=<name>&from=<date>&to=<date>`.
+- Returns HTMX partial with a paginated results table; consistent with existing panel pattern.
+
+### A.5 Memory Panel
+
+UI surface for the persistent memory system described in `REFINEMENTS.md §6`. Deferred until
+Tier 1 markdown memory is implemented.
+
+- Add a "Memory" tab in the diagnostics sidebar.
+- Show which memory entries were injected for the current turn.
+- If Tier 2 RAG memory is active, show relevance scores alongside each entry.
+- Manual actions: add fact, forget entry (calls `/memory` endpoints).
+- Routes: `GET /memory`, `POST /memory/add`, `DELETE /memory/{id}`.
+
+### A.6 Skills / Macros Dropdown
+
+UI surface for the skills system described in `REFINEMENTS.md §7`. Deferred until the skills
+config backend is implemented.
+
+- Skills dropdown button adjacent to the chat input field.
+- Lists available `/skill` commands with their template previews; selecting one inserts the
+  expanded template into the input for editing before send.
+- Reflects live state from `configs/skills.json` without a page reload.
 
 ---
 
@@ -153,6 +213,10 @@ Or, given the project's existing pattern, call the CLI module functions directly
 
 ## Suggested Execution Order (UI)
 
-1. General UX polish items (§A) alongside any ongoing chat-quality work.
-2. RAG Management UI (§B) as a self-contained milestone — implement §B.6 steps in order.
-3. Diagnostics panel and preset profiles (§A) after RAG panel is stable.
+1. RAG Management UI (§B) as a self-contained milestone — implement §B.6 steps in order.
+2. Token budget visualization (§A.1) and per-turn token stats panel (§A.2) — low-risk extensions
+   to the existing diagnostics panel; depends on `REFINEMENTS.md §8` backend work.
+3. Session history search (§A.4) — stateless read-only feature, no new backend state model needed.
+4. Conversation branching controls (§A.3) — depends on `REFINEMENTS.md §7` session state changes.
+5. Memory panel (§A.5) — depends on `REFINEMENTS.md §6` Tier 1 memory being implemented first.
+6. Skills dropdown (§A.6) — depends on `REFINEMENTS.md §7` skills config backend.
diff --git a/templates/diagnostics_panel.html b/templates/diagnostics_panel.html
new file mode 100644
index 0000000..d4d6b50
--- /dev/null
+++ b/templates/diagnostics_panel.html
@@ -0,0 +1,37 @@
+<div id="diag-content">
+{% if history %}
+<table class="diag-table">
+    <thead>
+        <tr>
+            <th>T</th>
+            <th>Latency</th>
+            <th>Chars</th>
+            <th>Main</th>
+            <th>MES</th>
+            <th>Cross−</th>
+            <th>Drift</th>
+        </tr>
+    </thead>
+    <tbody>
+    {% for entry in history %}
+        {% set r = entry.retrieval if entry.retrieval else {} %}
+        {% set cl = r.cleanup if r.cleanup else {} %}
+        {% set drift = entry.persona.drift_score if entry.persona else none %}
+        <tr>
+            <td>{{ entry.turn }}</td>
+            <td>{{ "%.2fs" | format(entry.latency_s) if entry.latency_s is not none else "—" }}</td>
+            <td>{{ entry.chars_emitted if entry.chars_emitted is not none else "—" }}</td>
+            <td>{{ cl.main if cl.main is defined else 0 }}</td>
+            <td>{{ cl.mes if cl.mes is defined else 0 }}</td>
+            <td>{{ cl.cross_removed if cl.cross_removed is defined else 0 }}</td>
+            <td{% if drift is not none %} style="color: {% if drift >= fail_threshold %}#f8b4b4{% elif drift >= warn_threshold %}#ffd166{% else %}#a8d8a8{% endif %}"{% endif %}>
+                {{ "%.3f" | format(drift) if drift is not none else "—" }}
+            </td>
+        </tr>
+    {% endfor %}
+    </tbody>
+</table>
+{% else %}
+<span class="diag-empty">No turns recorded yet.</span>
+{% endif %}
+</div>
diff --git a/templates/index.html b/templates/index.html
index addacec..4aa450b 100644
--- a/templates/index.html
+++ b/templates/index.html
@@ -27,6 +27,7 @@
         .sidebar {
             border-right: 1px solid #2d3743;
             padding: 1rem;
+            overflow-y: auto;
         }
 
         .sidebar h1 {
@@ -249,6 +250,195 @@
             margin-top: 0.35rem;
             color: #f8b4b4;
         }
+
+        .diag-panel,
+        .presets-panel {
+            margin-top: 1rem;
+            border: 1px solid #2d3743;
+            border-radius: 8px;
+            padding: 0.6rem;
+            background: #0f1317;
+            overflow: hidden;
+        }
+
+        .diag-panel summary,
+        .presets-panel summary {
+            font-size: 0.9rem;
+            font-weight: 600;
+            cursor: pointer;
+            margin-bottom: 0.3rem;
+            list-style: none;
+            padding: 0.1rem 0;
+        }
+
+        .diag-panel summary::after,
+        .presets-panel summary::after {
+            content: " ▸";
+            font-size: 0.75rem;
+            color: #98a6b6;
+        }
+
+        .diag-panel[open] summary::after,
+        .presets-panel[open] summary::after {
+            content: " ▾";
+        }
+
+        .diag-table {
+            width: 100%;
+            table-layout: fixed;
+            border-collapse: collapse;
+            font-size: 0.75rem;
+            color: #98a6b6;
+            margin-top: 0.3rem;
+        }
+
+        .diag-table th,
+        .diag-table td {
+            border: 1px solid #2d3743;
+            padding: 0.2rem 0.35rem;
+            text-align: right;
+            overflow-wrap: break-word;
+            word-break: break-all;
+        }
+
+        .diag-table th {
+            color: #e6edf3;
+            background: #1a1f26;
+            text-align: center;
+        }
+
+        .diag-table th:first-child,
+        .diag-table td:first-child {
+            text-align: left;
+        }
+
+        .diag-empty {
+            font-size: 0.8rem;
+            color: #98a6b6;
+        }
+
+        .profile-current-table {
+            margin-top: 0.5rem;
+        }
+
+        .profile-row {
+            display: grid;
+            grid-template-columns: 1fr auto;
+            gap: 0.35rem;
+            margin-bottom: 0.4rem;
+        }
+
+        .profile-select,
+        .profile-input {
+            border: 1px solid #2d3743;
+            background: #111418;
+            color: #e6edf3;
+            border-radius: 6px;
+            padding: 0.35rem 0.4rem;
+            font-size: 0.8rem;
+            min-width: 0;
+        }
+
+        .profile-btn {
+            padding: 0.35rem 0.55rem;
+            font-size: 0.8rem;
+            border-radius: 6px;
+            background: #1e242b;
+            border: 1px solid #2d3743;
+            color: #e6edf3;
+        }
+
+        .profile-btn-danger {
+            background: #3b1a1a;
+            border-color: #7a3030;
+        }
+
+        .profile-empty {
+            font-size: 0.8rem;
+            color: #98a6b6;
+            margin: 0.2rem 0 0.4rem;
+        }
+
+        #refresh-diag-btn {
+            margin-top: 0.4rem;
+            padding: 0.3rem 0.55rem;
+            font-size: 0.78rem;
+            background: #1e242b;
+            border: 1px solid #2d3743;
+            border-radius: 6px;
+            color: #e6edf3;
+        }
+
+        .rag-link-btn {
+            display: block;
+            margin-top: 1rem;
+            padding: 0.55rem 0.75rem;
+            background: #1e242b;
+            border: 1px solid #2d3743;
+            border-radius: 8px;
+            color: #79c0ff;
+            text-decoration: none;
+            font-size: 0.85rem;
+            font-weight: 600;
+            text-align: center;
+            cursor: pointer;
+            transition: background 0.15s;
+        }
+
+        .rag-link-btn:hover {
+            background: #262e38;
+            color: #a8d8ff;
+        }
+
+        .guide-panel {
+            margin-top: 1rem;
+            border: 1px solid #2d3743;
+            border-radius: 8px;
+            padding: 0.6rem;
+            background: #0f1317;
+        }
+
+        .guide-panel summary {
+            font-size: 0.9rem;
+            font-weight: 600;
+            cursor: pointer;
+            list-style: none;
+            padding: 0.1rem 0;
+        }
+
+        .guide-panel summary::after {
+            content: " ▸";
+            font-size: 0.75rem;
+            color: #98a6b6;
+        }
+
+        .guide-panel[open] summary::after {
+            content: " ▾";
+        }
+
+        .guide-item {
+            margin-top: 0.65rem;
+            padding-top: 0.5rem;
+            border-top: 1px solid #2d3743;
+        }
+
+        .guide-item:first-of-type {
+            border-top: none;
+            padding-top: 0.4rem;
+        }
+
+        .guide-item h3 {
+            margin: 0 0 0.2rem;
+            font-size: 0.82rem;
+            color: #c9d1d9;
+        }
+
+        .guide-item p {
+            margin: 0;
+            font-size: 0.77rem;
+            color: #98a6b6;
+            line-height: 1.45;
+        }
     </style>
 </head>
 
@@ -289,6 +479,57 @@ <h2>Retrieval Debug</h2>
                 <div id="debug-history-content">No retrieval history yet.</div>
                 <button type="button" id="refresh-debug-btn">Refresh Debug</button>
             </div>
+            <details class="diag-panel">
+                <summary>Diagnostics</summary>
+                <div id="diag-content"><span class="diag-empty">No turns recorded yet.</span></div>
+                <button type="button" id="refresh-diag-btn">Refresh</button>
+            </details>
+            <details class="presets-panel"
+                     hx-get="/settings/profiles"
+                     hx-trigger="toggle once"
+                     hx-target="#presets-content"
+                     hx-swap="outerHTML">
+                <summary>Presets</summary>
+                <div id="presets-content">
+                    <span class="profile-empty">Loading…</span>
+                </div>
+            </details>
+            <a class="rag-link-btn" href="/rag">⚙ RAG Manager →</a>
+            <details class="guide-panel">
+                <summary>💡 How to use this chat</summary>
+                <div class="guide-item">
+                    <h3>Sending messages</h3>
+                    <p>Type in the box at the bottom and press <strong>Send</strong> (or hit Enter). The character will reply in real time. If the reply gets cut off, press <strong>Continue</strong> to pick up where it left off.</p>
+                </div>
+                <div class="guide-item">
+                    <h3>Clear &amp; Reload</h3>
+                    <p><strong>Clear</strong> removes messages from the screen but keeps the character loaded. <strong>Reload</strong> fully restarts the character — useful if responses drift off-topic or feel broken.</p>
+                </div>
+                <div class="guide-item">
+                    <h3>Sessions — saving &amp; loading</h3>
+                    <p>Type an optional name and press <strong>Save</strong> to store the current conversation. Use the dropdown to pick a past conversation and press <strong>Load</strong> to restore it. <strong>Load Latest</strong> skips the menu and restores your most recent save.</p>
+                </div>
+                <div class="guide-item">
+                    <h3>Copy Last &amp; Export</h3>
+                    <p><strong>Copy Last</strong> puts the character's most recent reply on your clipboard. The Export buttons let you download the full conversation as a plain text file, a structured JSON file, or a ZIP bundle that includes metadata.</p>
+                </div>
+                <div class="guide-item">
+                    <h3>RAG — the knowledge base</h3>
+                    <p>The <strong>RAG</strong> value at the top shows which knowledge collection the character is using. This is a database of text the AI can search through when answering your messages — think of it as the character's long-term memory or reference library. You can manage these collections in the <strong>RAG Manager</strong>.</p>
+                </div>
+                <div class="guide-item">
+                    <h3>Retrieval Debug</h3>
+                    <p>Shows what information was fetched from the knowledge base to help answer your last message — how many chunks were found, how they were ranked, and which collection was searched. Useful for understanding why the character knows (or doesn't know) something.</p>
+                </div>
+                <div class="guide-item">
+                    <h3>Diagnostics</h3>
+                    <p>Advanced statistics about response quality, token counts, and persona consistency scores. Mainly useful for developers fine-tuning the character.</p>
+                </div>
+                <div class="guide-item">
+                    <h3>Presets</h3>
+                    <p>Save and restore AI model settings (temperature, context size, repetition penalties, etc.). If you've found a configuration that works well, save it as a preset so you can switch back to it quickly.</p>
+                </div>
+            </details>
         </aside>
 
         <main class="main">
@@ -306,6 +547,7 @@ <h2>Retrieval Debug</h2>
                     <button type="button" id="copy-last-btn">Copy Last</button>
                     <button type="button" id="export-txt-btn">Export TXT</button>
                     <button type="button" id="export-json-btn">Export JSON</button>
+                    <button type="button" id="export-bundle-btn">Export Bundle</button>
                 </div>
                 <form id="chat-form" hx-post="/chat/send" hx-target="#chat-log" hx-swap="beforeend">
                     <input id="user-input" name="message" placeholder="Type your message..." autocomplete="off"
@@ -330,6 +572,7 @@ <h2>Retrieval Debug</h2>
         const copyLastButton = document.getElementById("copy-last-btn");
         const exportTxtButton = document.getElementById("export-txt-btn");
         const exportJsonButton = document.getElementById("export-json-btn");
+        const exportBundleButton = document.getElementById("export-bundle-btn");
         const clearButton = document.getElementById("clear-btn");
         const reloadButton = document.getElementById("reload-btn");
         const helpButton = document.getElementById("help-btn");
@@ -445,6 +688,22 @@ <h2>Retrieval Debug</h2>
             }
         }
 
+        async function refreshDiagnosticsPanel() {
+            try {
+                const response = await fetch("/chat/diagnostics", { method: "GET" });
+                if (!response.ok) {
+                    return;
+                }
+                const html = await response.text();
+                const diagContent = document.getElementById("diag-content");
+                if (diagContent) {
+                    diagContent.outerHTML = html;
+                }
+            } catch {
+                // diagnostics panel is non-critical; fail silently
+            }
+        }
+
         async function refreshSessionList() {
             try {
                 const response = await fetch("/chat/session/list", { method: "GET" });
@@ -556,6 +815,7 @@ <h2>Retrieval Debug</h2>
                 setStatus(receivedAnyToken ? "Response complete" : "No visible response");
                 await refreshDebugPanel();
                 await refreshDebugHistoryPanel();
+                await refreshDiagnosticsPanel();
             } catch (error) {
                 const reason = error?.name === "AbortError"
                     ? "[Timed out while waiting for model response]"
@@ -679,6 +939,10 @@ <h2>Retrieval Debug</h2>
             setStatus("Ready");
         });
 
+        document.getElementById("refresh-diag-btn").addEventListener("click", async () => {
+            await refreshDiagnosticsPanel();
+        });
+
         continueButton.addEventListener("click", () => {
             requestContinuation();
         });
@@ -793,6 +1057,11 @@ <h2>Retrieval Debug</h2>
             setStatus("Exported JSON");
         });
 
+        exportBundleButton.addEventListener("click", () => {
+            window.location.href = "/chat/export/bundle";
+            setStatus("Downloading bundle…");
+        });
+
         userInput.addEventListener("keydown", (event) => {
             if (event.key === "Enter" && event.ctrlKey) {
                 event.preventDefault();
@@ -855,6 +1124,7 @@ <h2>Retrieval Debug</h2>
             scrollChatToBottom();
             refreshDebugPanel();
             refreshDebugHistoryPanel();
+            refreshDiagnosticsPanel();
             refreshSessionList();
         });
 
diff --git a/templates/presets_panel.html b/templates/presets_panel.html
new file mode 100644
index 0000000..af4ed52
--- /dev/null
+++ b/templates/presets_panel.html
@@ -0,0 +1,40 @@
+<div id="presets-content">
+    {% if profiles %}
+    <form class="profile-row" hx-post="/settings/profiles/apply" hx-target="#presets-content" hx-swap="outerHTML">
+        <select name="name" id="profile-picker" class="profile-select">
+            {% for name in profiles %}
+            <option value="{{ name }}">{{ name }}</option>
+            {% endfor %}
+        </select>
+        <button type="submit" class="profile-btn">Apply</button>
+    </form>
+    <form class="profile-row" hx-post="/settings/profiles/delete" hx-target="#presets-content" hx-swap="outerHTML"
+          onsubmit="return confirm('Delete profile \'' + this.querySelector(\'select\').value + '\'?')">
+        <select name="name" class="profile-select">
+            {% for name in profiles %}
+            <option value="{{ name }}">{{ name }}</option>
+            {% endfor %}
+        </select>
+        <button type="submit" class="profile-btn profile-btn-danger">Delete</button>
+    </form>
+    {% else %}
+    <p class="profile-empty">No saved profiles.</p>
+    {% endif %}
+    <form class="profile-row" hx-post="/settings/profiles/save" hx-target="#presets-content" hx-swap="outerHTML">
+        <input type="text" name="name" placeholder="Profile name" maxlength="40" class="profile-input" required />
+        <button type="submit" class="profile-btn">Save Current</button>
+    </form>
+    <table class="diag-table profile-current-table">
+        <thead>
+            <tr><th>Setting</th><th>Value</th></tr>
+        </thead>
+        <tbody>
+            {% for key, val in current.items() %}
+            <tr>
+                <td>{{ key }}</td>
+                <td>{{ val }}</td>
+            </tr>
+            {% endfor %}
+        </tbody>
+    </table>
+</div>
diff --git a/templates/rag/benchmark_results.html b/templates/rag/benchmark_results.html
new file mode 100644
index 0000000..b455c91
--- /dev/null
+++ b/templates/rag/benchmark_results.html
@@ -0,0 +1,32 @@
+<div class="rag-section-header">Embedding Benchmark</div>
+
+{% if results %}
+<p style="font-size:0.78rem;color:#98a6b6;margin-bottom:0.65rem">
+    Generated: {{ results.generated_at[:16] if results.generated_at else "?" }} &nbsp;·&nbsp;
+    Fixture: {{ results.fixture_file }} &nbsp;·&nbsp; k={{ results.k }}
+</p>
+<table class="rag-table">
+    <thead>
+        <tr><th>Model</th><th>Recall@k</th><th>MRR</th><th>Precision@k</th><th>MAP@k</th><th>Evaluated</th><th>Skipped</th></tr>
+    </thead>
+    <tbody>
+    {% for mr in results.model_results %}
+        {% set m = mr.metrics %}
+        <tr>
+            <td><strong>{{ mr.label }}</strong></td>
+            <td>{{ "%.3f" | format(m.recall_at_k | float) }}</td>
+            <td>{{ "%.3f" | format(m.mrr | float) }}</td>
+            <td>{{ "%.3f" | format(m.precision_at_k | float) }}</td>
+            <td>{{ "%.3f" | format(m.map_at_k | float) }}</td>
+            <td>{{ mr.evaluated }}</td>
+            <td>{{ mr.skipped }}</td>
+        </tr>
+    {% endfor %}
+    </tbody>
+</table>
+{% else %}
+<div class="rag-msg rag-msg-info">
+    No benchmark results found. Run the benchmark from the CLI:<br />
+    <code>uv run python -m scripts.rag.manage_collections benchmark-embedding-models</code>
+</div>
+{% endif %}
diff --git a/templates/rag/collection_detail.html b/templates/rag/collection_detail.html
new file mode 100644
index 0000000..019b7ff
--- /dev/null
+++ b/templates/rag/collection_detail.html
@@ -0,0 +1,78 @@
+<div class="rag-section-header">
+    <button class="back-link rag-btn"
+        hx-get="/rag/collections" hx-target="#rag-main" hx-swap="innerHTML">← Collections</button>
+    Collection: <strong>{{ info.name }}</strong>
+</div>
+
+{% if backfilled %}
+<div class="rag-msg rag-msg-ok">Fingerprint backfilled: model={{ backfilled["embedding:model"] }}, dim={{ backfilled.get("embedding:dimension", "?") }}</div>
+{% endif %}
+
+<table class="rag-table" style="max-width:480px">
+    <tbody>
+        <tr><td><strong>Documents</strong></td><td>{{ info.count }}</td></tr>
+        <tr><td><strong>Embedding Model</strong></td><td>{{ info.embedding_model or "—" }}</td></tr>
+        <tr><td><strong>Dimension</strong></td><td>{{ info.embedding_dimension or "—" }}</td></tr>
+        <tr><td><strong>Normalize</strong></td><td>{{ info.embedding_normalize }}</td></tr>
+    </tbody>
+</table>
+
+<!-- Ad-hoc Query -->
+<div class="rag-subsection">
+    <div class="rag-subsection-header">Ad-hoc Query</div>
+    <form class="rag-form-row"
+        hx-post="/rag/collections/{{ info.name }}/query"
+        hx-target="#query-results" hx-swap="innerHTML"
+        hx-indicator="#query-spinner">
+        <input class="rag-input rag-input-wide" name="query" placeholder="Enter query text…" required />
+        <label class="rag-label">k</label>
+        <input class="rag-input" name="k" type="number" value="5" min="1" max="20" style="width:52px" />
+        <button type="submit" class="rag-btn rag-btn-primary">Search</button>
+        <span id="query-spinner" class="htmx-indicator">Searching…</span>
+    </form>
+    <div id="query-results"></div>
+</div>
+
+<!-- Rebuild (Push) -->
+<div class="rag-subsection">
+    <div class="rag-subsection-header">Rebuild Collection</div>
+    <form class="rag-form-row"
+        hx-post="/rag/collections/{{ info.name }}/push"
+        hx-target="#push-status" hx-swap="innerHTML"
+        hx-confirm="Rebuild '{{ info.name }}'? This overwrites the existing collection.">
+        <label class="rag-label">Source stem</label>
+        <input class="rag-input" name="stem" placeholder="e.g. shodan" value="{{ info.name }}" required />
+        <button type="submit" class="rag-btn rag-btn-primary">Rebuild</button>
+    </form>
+    <div id="push-status"></div>
+</div>
+
+<!-- Backfill Fingerprint -->
+<div class="rag-subsection">
+    <div class="rag-subsection-header">Fingerprint</div>
+    <div class="rag-btn-group">
+        <button class="rag-btn"
+            hx-post="/rag/collections/{{ info.name }}/backfill-fingerprint"
+            hx-target="#rag-main" hx-swap="innerHTML"
+            hx-confirm="Backfill embedding fingerprint for '{{ info.name }}'?">Backfill Fingerprint</button>
+    </div>
+</div>
+
+<!-- Sample Docs -->
+{% if info.sample_docs %}
+<div class="rag-subsection">
+    <div class="rag-subsection-header">Sample Documents (first {{ info.sample_docs | length }})</div>
+    <table class="rag-table">
+        <thead><tr><th style="width:40%">ID</th><th>Text Preview</th><th>Metadata Keys</th></tr></thead>
+        <tbody>
+        {% for doc in info.sample_docs %}
+            <tr>
+                <td class="truncate" style="font-size:0.72rem;color:#98a6b6">{{ doc.id }}</td>
+                <td class="truncate">{{ doc.text }}</td>
+                <td style="font-size:0.74rem;color:#98a6b6">{{ doc.metadata.keys() | list | join(", ") if doc.metadata else "—" }}</td>
+            </tr>
+        {% endfor %}
+        </tbody>
+    </table>
+</div>
+{% endif %}
diff --git a/templates/rag/collections_list.html b/templates/rag/collections_list.html
new file mode 100644
index 0000000..23df393
--- /dev/null
+++ b/templates/rag/collections_list.html
@@ -0,0 +1,43 @@
+<div class="rag-section-header">
+    Collections
+    <button class="rag-btn" hx-get="/rag/collections" hx-target="#rag-main" hx-swap="innerHTML">↻ Refresh</button>
+</div>
+
+{% if deleted %}
+<div class="rag-msg rag-msg-ok">Collection "{{ deleted }}" deleted.</div>
+{% endif %}
+
+{% if collections %}
+<table class="rag-table">
+    <thead>
+        <tr>
+            <th>Name</th>
+            <th>Docs</th>
+            <th>Embedding Model</th>
+            <th>Dim</th>
+            <th>Actions</th>
+        </tr>
+    </thead>
+    <tbody>
+    {% for col in collections %}
+        <tr>
+            <td><strong>{{ col.name }}</strong></td>
+            <td>{{ col.count if col.count is not none else "?" }}</td>
+            <td class="truncate" title="{{ col.embedding_model }}">{{ col.embedding_model or "—" }}</td>
+            <td>{{ col.embedding_dimension or "—" }}</td>
+            <td class="rag-actions-col">
+                <button class="rag-btn"
+                    hx-get="/rag/collections/{{ col.name }}"
+                    hx-target="#rag-main" hx-swap="innerHTML">Detail</button>
+                <button class="rag-btn rag-btn-danger"
+                    hx-delete="/rag/collections/{{ col.name }}"
+                    hx-target="#rag-main" hx-swap="innerHTML"
+                    hx-confirm="Delete collection '{{ col.name }}'? This cannot be undone.">Delete</button>
+            </td>
+        </tr>
+    {% endfor %}
+    </tbody>
+</table>
+{% else %}
+<p class="rag-empty">No collections found in <code>character_storage/</code>.</p>
+{% endif %}
diff --git a/templates/rag/coverage_report.html b/templates/rag/coverage_report.html
new file mode 100644
index 0000000..da1d631
--- /dev/null
+++ b/templates/rag/coverage_report.html
@@ -0,0 +1,40 @@
+{% set pct = (result.source_coverage_ratio * 100) | round(1) %}
+{% if pct >= 75 %}{% set bar_class = "coverage-high" %}{% elif pct >= 50 %}{% set bar_class = "coverage-mid" %}{% else %}{% set bar_class = "coverage-low" %}{% endif %}
+
+<div class="rag-section-header">Coverage: {{ result.stem }}</div>
+
+<table class="rag-table" style="max-width:360px">
+    <tbody>
+        <tr><td><strong>Entities</strong></td><td>{{ result.entities_count }}</td></tr>
+        <tr><td><strong>Coverage</strong></td>
+            <td>
+                {{ pct }}%
+                <div class="coverage-bar-outer">
+                    <div class="coverage-bar-inner {{ bar_class }}" style="width:{{ pct }}%"></div>
+                </div>
+            </td>
+        </tr>
+        <tr><td><strong>Chars covered</strong></td><td>{{ result.covered_chars }} / {{ result.total_source_chars }}</td></tr>
+    </tbody>
+</table>
+
+{% if result.category_distribution %}
+<div class="rag-subsection">
+    <div class="rag-subsection-header">Category Distribution</div>
+    <table class="rag-table" style="max-width:280px">
+        <thead><tr><th>Category</th><th>Count</th></tr></thead>
+        <tbody>
+        {% for cat, cnt in result.category_distribution.items() %}
+            <tr><td>{{ cat }}</td><td>{{ cnt }}</td></tr>
+        {% endfor %}
+        </tbody>
+    </table>
+</div>
+{% endif %}
+
+{% if result.unmapped_segments %}
+<div class="rag-subsection">
+    <div class="rag-subsection-header">Unmapped Segments (first {{ result.unmapped_segments | length }})</div>
+    <pre class="rag-pre">{{ result.unmapped_segments | join("\n---\n") }}</pre>
+</div>
+{% endif %}
diff --git a/templates/rag/evaluate_index.html b/templates/rag/evaluate_index.html
new file mode 100644
index 0000000..6742fb1
--- /dev/null
+++ b/templates/rag/evaluate_index.html
@@ -0,0 +1,33 @@
+<div class="rag-section-header">Fixture Evaluation</div>
+
+{% if fixture_packs %}
+<form class="rag-form-row"
+    hx-post="/rag/evaluate/run"
+    hx-target="#eval-results" hx-swap="innerHTML"
+    hx-indicator="#eval-spinner">
+    <label class="rag-label">Fixture pack</label>
+    <select class="rag-select" name="fixture_file">
+        {% for pack in fixture_packs %}
+        <option value="{{ pack }}">{{ pack }}</option>
+        {% endfor %}
+    </select>
+    <button type="submit" class="rag-btn rag-btn-primary">Run Evaluation</button>
+    <span id="eval-spinner" class="htmx-indicator">Running… (may take ~30–60 s)</span>
+</form>
+{% else %}
+<p class="rag-empty">No fixture files found in <code>tests/fixtures/</code>.</p>
+{% endif %}
+
+<div id="eval-results"></div>
+
+<div class="rag-subsection">
+    <div style="display:flex;align-items:center;gap:0.6rem">
+        <div class="rag-subsection-header" style="margin:0">Retrieval Trends</div>
+        <button class="rag-btn"
+            hx-get="/rag/evaluate/trends"
+            hx-target="#trends-content" hx-swap="innerHTML">↻ Refresh</button>
+    </div>
+    <div id="trends-content" style="margin-top:0.5rem">
+        {% include "rag/trends_table.html" %}
+    </div>
+</div>
diff --git a/templates/rag/evaluate_results.html b/templates/rag/evaluate_results.html
new file mode 100644
index 0000000..5094bb0
--- /dev/null
+++ b/templates/rag/evaluate_results.html
@@ -0,0 +1,51 @@
+{% if result %}
+{% set m = result.metrics %}
+<div class="rag-subsection-header">Results: {{ result.fixture_file }} (k={{ result.default_k }})</div>
+
+<table class="rag-table" style="max-width:560px;margin-bottom:0.75rem">
+    <thead><tr><th>Evaluated</th><th>Skipped</th><th>Hits@k</th><th>Recall@k</th><th>MRR</th><th>Precision@k</th><th>MAP@k</th></tr></thead>
+    <tbody>
+        <tr>
+            <td>{{ m.cases | int }}</td>
+            <td>{{ result.skipped }}</td>
+            <td>{{ m.hits | int }}</td>
+            <td>{{ "%.3f" | format(m.recall_at_k) }}</td>
+            <td>{{ "%.3f" | format(m.mrr) }}</td>
+            <td>{{ "%.3f" | format(m.precision_at_k) }}</td>
+            <td>{{ "%.3f" | format(m.map_at_k) }}</td>
+        </tr>
+    </tbody>
+</table>
+
+{% if result.case_results %}
+<table class="rag-table">
+    <thead>
+        <tr><th>Case ID</th><th>Collection</th><th>Query</th><th>Status</th><th>Rank</th><th>Match</th><th>Forbidden</th></tr>
+    </thead>
+    <tbody>
+    {% for c in result.case_results %}
+        {% set hit = c.rank is not none and not c.forbidden_hit %}
+        <tr>
+            <td style="font-size:0.74rem">{{ c.case_id }}</td>
+            <td>{{ c.collection }}</td>
+            <td class="truncate" style="max-width:200px" title="{{ c.query }}">{{ c.query }}</td>
+            <td>
+                {% if c.status == "invalid" or c.status == "missing_collection" %}
+                <span class="badge badge-warn">{{ c.status }}</span>
+                {% elif c.forbidden_hit %}
+                <span class="badge badge-error">forbidden</span>
+                {% elif hit %}
+                <span class="badge badge-ok">HIT@{{ c.rank }}</span>
+                {% else %}
+                <span class="badge badge-error">MISS</span>
+                {% endif %}
+            </td>
+            <td>{{ c.rank if c.rank is not none else "—" }}</td>
+            <td>{{ c.matched_expected }}/{{ c.expected_total }}</td>
+            <td>{{ "⚠" if c.forbidden_hit else "—" }}</td>
+        </tr>
+    {% endfor %}
+    </tbody>
+</table>
+{% endif %}
+{% endif %}
diff --git a/templates/rag/file_view.html b/templates/rag/file_view.html
new file mode 100644
index 0000000..9afcd03
--- /dev/null
+++ b/templates/rag/file_view.html
@@ -0,0 +1,6 @@
+<div class="rag-section-header">
+    <button class="back-link rag-btn"
+        hx-get="/rag/files" hx-target="#rag-main" hx-swap="innerHTML">← Files</button>
+    {{ filename }}
+</div>
+<pre class="rag-pre">{{ content }}</pre>
diff --git a/templates/rag/files_list.html b/templates/rag/files_list.html
new file mode 100644
index 0000000..518f222
--- /dev/null
+++ b/templates/rag/files_list.html
@@ -0,0 +1,61 @@
+<div class="rag-section-header">RAG Files</div>
+
+<div class="rag-btn-group">
+    <button class="rag-btn"
+        hx-post="/rag/lint" hx-target="#lint-results" hx-swap="innerHTML"
+        hx-indicator="#lint-spinner">Run Lint</button>
+    <button class="rag-btn"
+        hx-post="/rag/lint/fix" hx-target="#lint-results" hx-swap="innerHTML"
+        hx-indicator="#lint-spinner"
+        hx-confirm="Apply auto-fix to all message_examples files?">Lint + Fix</button>
+    <span id="lint-spinner" class="htmx-indicator">Running…</span>
+</div>
+
+<div id="lint-results"></div>
+
+{% if files %}
+<table class="rag-table" style="margin-top:0.85rem">
+    <thead>
+        <tr><th>File</th><th>Type</th><th>Size</th><th>Metadata</th><th>Actions</th></tr>
+    </thead>
+    <tbody>
+    {% for f in files %}
+        <tr>
+            <td><strong>{{ f.name }}</strong></td>
+            <td>
+                {% if f.type == "lore" %}
+                <span class="badge badge-info">lore</span>
+                {% else %}
+                <span class="badge badge-muted">mes</span>
+                {% endif %}
+            </td>
+            <td>{{ (f.size / 1024) | round(1) }} KB</td>
+            <td>
+                {% if f.has_metadata %}
+                <span class="badge badge-ok">✓</span>
+                {% else %}
+                <span class="badge badge-muted">—</span>
+                {% endif %}
+            </td>
+            <td class="rag-actions-col">
+                <button class="rag-btn"
+                    hx-get="/rag/files/{{ f.name }}"
+                    hx-target="#rag-main" hx-swap="innerHTML">View</button>
+                {% if f.type == "lore" and f.has_metadata %}
+                <button class="rag-btn"
+                    hx-post="/rag/coverage"
+                    hx-vals='{"stem": "{{ f.stem }}"}'
+                    hx-target="#coverage-results" hx-swap="innerHTML"
+                    hx-indicator="#coverage-spinner">Coverage</button>
+                {% endif %}
+            </td>
+        </tr>
+    {% endfor %}
+    </tbody>
+</table>
+{% else %}
+<p class="rag-empty">No <code>.txt</code> files found in <code>rag_data/</code>.</p>
+{% endif %}
+
+<span id="coverage-spinner" class="htmx-indicator" style="display:block;margin-top:0.5rem">Running coverage…</span>
+<div id="coverage-results" style="margin-top:0.5rem"></div>
diff --git a/templates/rag/layout.html b/templates/rag/layout.html
new file mode 100644
index 0000000..3e5c3ab
--- /dev/null
+++ b/templates/rag/layout.html
@@ -0,0 +1,467 @@
+<!doctype html>
+<html lang="en">
+
+<head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1" />
+    <title>light-chat · RAG</title>
+    <script src="https://unpkg.com/htmx.org@1.9.12"></script>
+    <style>
+        :root {
+            color-scheme: dark;
+            font-family: Inter, system-ui, -apple-system, Segoe UI, Roboto, sans-serif;
+        }
+
+        body {
+            margin: 0;
+            background: #111418;
+            color: #e6edf3;
+        }
+
+        .rag-layout {
+            display: grid;
+            grid-template-columns: 180px 1fr;
+            height: 100vh;
+        }
+
+        /* ── Nav ── */
+        .rag-nav {
+            border-right: 1px solid #2d3743;
+            padding: 0.85rem 0.75rem;
+            overflow-y: auto;
+            background: #0d1117;
+            display: flex;
+            flex-direction: column;
+            gap: 0.15rem;
+        }
+
+        .nav-brand {
+            font-size: 0.82rem;
+            color: #98a6b6;
+            margin-bottom: 0.85rem;
+            padding-bottom: 0.55rem;
+            border-bottom: 1px solid #2d3743;
+            display: flex;
+            flex-direction: column;
+            gap: 0.15rem;
+        }
+
+        .nav-brand a {
+            color: #98a6b6;
+            text-decoration: none;
+            font-size: 0.78rem;
+        }
+
+        .nav-brand a:hover {
+            color: #e6edf3;
+        }
+
+        .nav-brand strong {
+            font-size: 0.9rem;
+            color: #e6edf3;
+        }
+
+        .nav-link {
+            display: block;
+            padding: 0.45rem 0.6rem;
+            border-radius: 6px;
+            color: #c9d1d9;
+            text-decoration: none;
+            font-size: 0.85rem;
+            cursor: pointer;
+            border: none;
+            background: none;
+            text-align: left;
+            width: 100%;
+        }
+
+        .nav-link:hover,
+        .nav-link.active {
+            background: #1e242b;
+            color: #e6edf3;
+        }
+
+        .nav-link.active {
+            border-left: 2px solid #1f6feb;
+            padding-left: calc(0.6rem - 2px);
+        }
+
+        /* ── Content ── */
+        .rag-content {
+            overflow-y: auto;
+            padding: 1.25rem 1.5rem;
+        }
+
+        .rag-section-header {
+            font-size: 1rem;
+            font-weight: 600;
+            margin: 0 0 0.85rem;
+            color: #e6edf3;
+            display: flex;
+            align-items: center;
+            gap: 0.6rem;
+        }
+
+        .rag-subsection {
+            margin-top: 1.25rem;
+            padding-top: 0.85rem;
+            border-top: 1px solid #2d3743;
+        }
+
+        .rag-subsection-header {
+            font-size: 0.88rem;
+            font-weight: 600;
+            color: #c9d1d9;
+            margin: 0 0 0.55rem;
+        }
+
+        /* ── Tables ── */
+        .rag-table {
+            width: 100%;
+            border-collapse: collapse;
+            font-size: 0.8rem;
+            margin-top: 0.5rem;
+        }
+
+        .rag-table th {
+            background: #1a1f26;
+            color: #e6edf3;
+            text-align: left;
+            padding: 0.35rem 0.55rem;
+            border: 1px solid #2d3743;
+            font-weight: 600;
+            white-space: nowrap;
+        }
+
+        .rag-table td {
+            padding: 0.35rem 0.55rem;
+            border: 1px solid #2d3743;
+            color: #c9d1d9;
+            vertical-align: top;
+        }
+
+        .rag-table tr:nth-child(even) td {
+            background: #0f1317;
+        }
+
+        .rag-table td.truncate {
+            max-width: 280px;
+            overflow: hidden;
+            text-overflow: ellipsis;
+            white-space: nowrap;
+        }
+
+        /* ── Buttons ── */
+        .rag-btn {
+            padding: 0.35rem 0.65rem;
+            border: 1px solid #2d3743;
+            border-radius: 6px;
+            background: #1e242b;
+            color: #e6edf3;
+            font-size: 0.79rem;
+            cursor: pointer;
+            white-space: nowrap;
+        }
+
+        .rag-btn:hover {
+            background: #262d36;
+        }
+
+        .rag-btn-primary {
+            background: #1f6feb;
+            border-color: #1f6feb;
+        }
+
+        .rag-btn-primary:hover {
+            background: #2277f5;
+        }
+
+        .rag-btn-danger {
+            background: #3b1a1a;
+            border-color: #7a3030;
+            color: #f8b4b4;
+        }
+
+        .rag-btn-danger:hover {
+            background: #4a2020;
+        }
+
+        .rag-btn-group {
+            display: flex;
+            gap: 0.4rem;
+            flex-wrap: wrap;
+            margin-bottom: 0.75rem;
+        }
+
+        /* ── Badges ── */
+        .badge {
+            display: inline-block;
+            padding: 0.12rem 0.4rem;
+            border-radius: 10px;
+            font-size: 0.72rem;
+            font-weight: 600;
+            white-space: nowrap;
+        }
+
+        .badge-ok {
+            background: #1a3b1a;
+            color: #7ec88a;
+        }
+
+        .badge-error {
+            background: #3b1a1a;
+            color: #f8b4b4;
+        }
+
+        .badge-warn {
+            background: #3b2f0a;
+            color: #ffd166;
+        }
+
+        .badge-info {
+            background: #1a2b3b;
+            color: #79c0ff;
+        }
+
+        .badge-muted {
+            background: #1e242b;
+            color: #98a6b6;
+        }
+
+        /* ── Forms ── */
+        .rag-form-row {
+            display: flex;
+            gap: 0.45rem;
+            align-items: center;
+            flex-wrap: wrap;
+            margin-bottom: 0.6rem;
+        }
+
+        .rag-input,
+        .rag-select {
+            border: 1px solid #2d3743;
+            background: #111418;
+            color: #e6edf3;
+            border-radius: 6px;
+            padding: 0.35rem 0.5rem;
+            font-size: 0.8rem;
+            min-width: 0;
+        }
+
+        .rag-input:focus,
+        .rag-select:focus {
+            outline: 1px solid #1f6feb;
+        }
+
+        .rag-input-wide {
+            flex: 1;
+            min-width: 200px;
+        }
+
+        .rag-label {
+            font-size: 0.78rem;
+            color: #98a6b6;
+            white-space: nowrap;
+        }
+
+        /* ── Status messages ── */
+        .rag-msg {
+            padding: 0.5rem 0.75rem;
+            border-radius: 6px;
+            font-size: 0.82rem;
+            margin-bottom: 0.65rem;
+        }
+
+        .rag-msg-error {
+            background: #2a1a1a;
+            border: 1px solid #7a3030;
+            color: #f8b4b4;
+        }
+
+        .rag-msg-ok {
+            background: #1a2a1a;
+            border: 1px solid #3a703a;
+            color: #7ec88a;
+        }
+
+        .rag-msg-info {
+            background: #1a2030;
+            border: 1px solid #2d5070;
+            color: #79c0ff;
+        }
+
+        /* ── Coverage bar ── */
+        .coverage-bar-outer {
+            width: 100%;
+            max-width: 320px;
+            height: 12px;
+            background: #2d3743;
+            border-radius: 6px;
+            overflow: hidden;
+            margin: 0.3rem 0;
+        }
+
+        .coverage-bar-inner {
+            height: 100%;
+            border-radius: 6px;
+            transition: width 0.3s;
+        }
+
+        .coverage-high {
+            background: #238636;
+        }
+
+        .coverage-mid {
+            background: #d29922;
+        }
+
+        .coverage-low {
+            background: #da3633;
+        }
+
+        /* ── Code / pre ── */
+        pre.rag-pre {
+            background: #0d1117;
+            border: 1px solid #2d3743;
+            border-radius: 6px;
+            padding: 0.75rem;
+            font-size: 0.76rem;
+            overflow-x: auto;
+            white-space: pre-wrap;
+            color: #98a6b6;
+            max-height: 460px;
+            overflow-y: auto;
+            margin: 0.5rem 0 0;
+        }
+
+        /* ── Job status widget ── */
+        .job-widget {
+            padding: 0.65rem 0.85rem;
+            border: 1px solid #2d3743;
+            border-radius: 8px;
+            background: #0f1317;
+            font-size: 0.82rem;
+        }
+
+        /* ── Spinner via HTMX indicator ── */
+        .htmx-indicator {
+            display: none;
+            color: #98a6b6;
+            font-size: 0.78rem;
+        }
+
+        .htmx-request .htmx-indicator {
+            display: inline;
+        }
+
+        /* ── Misc ── */
+        .rag-empty {
+            color: #98a6b6;
+            font-size: 0.82rem;
+        }
+
+        .rag-actions-col {
+            white-space: nowrap;
+        }
+
+        .back-link {
+            font-size: 0.8rem;
+            color: #98a6b6;
+            text-decoration: none;
+            cursor: pointer;
+        }
+
+        .back-link:hover {
+            color: #e6edf3;
+        }
+
+        code {
+            font-family: "Cascadia Code", "Fira Code", "Consolas", monospace;
+            font-size: 0.85em;
+            background: #1e242b;
+            padding: 0.1rem 0.3rem;
+            border-radius: 4px;
+        }
+    </style>
+</head>
+
+<body>
+    <div class="rag-layout">
+        <nav class="rag-nav">
+            <div class="nav-brand">
+                <a href="/">← Chat</a>
+                <strong>RAG</strong>
+            </div>
+            <button class="nav-link active" data-section="collections"
+                hx-get="/rag/collections" hx-target="#rag-main" hx-swap="innerHTML"
+                hx-indicator="#rag-spinner">Collections</button>
+            <button class="nav-link" data-section="files"
+                hx-get="/rag/files" hx-target="#rag-main" hx-swap="innerHTML"
+                hx-indicator="#rag-spinner">Files</button>
+            <button class="nav-link" data-section="evaluate"
+                hx-get="/rag/evaluate" hx-target="#rag-main" hx-swap="innerHTML"
+                hx-indicator="#rag-spinner">Evaluate</button>
+            <button class="nav-link" data-section="benchmark"
+                hx-get="/rag/benchmark" hx-target="#rag-main" hx-swap="innerHTML"
+                hx-indicator="#rag-spinner">Benchmark</button>
+            <span id="rag-spinner" class="htmx-indicator" style="margin-top:0.5rem;">Loading…</span>
+
+            <details style="margin-top:auto;padding-top:0.75rem;border-top:1px solid #2d3743;">
+                <summary style="font-size:0.78rem;color:#98a6b6;cursor:pointer;list-style:none;padding:0.2rem 0;">💡 What is this?</summary>
+                <div style="font-size:0.74rem;color:#98a6b6;line-height:1.5;margin-top:0.5rem;display:flex;flex-direction:column;gap:0.6rem;">
+                    <div>
+                        <strong style="color:#c9d1d9">Collections</strong><br>
+                        A collection is a searchable knowledge base — a database of text chunks the AI searches through during chat. Each character typically has one named after them.
+                    </div>
+                    <div>
+                        <strong style="color:#c9d1d9">Detail &amp; Query</strong><br>
+                        Open a collection to search it directly (just like the AI would during a conversation), or rebuild it from updated source files.
+                    </div>
+                    <div>
+                        <strong style="color:#c9d1d9">Files</strong><br>
+                        The raw text documents (lore files, dialogue examples) that the knowledge base is built from. You can view them, check formatting, and see coverage.
+                    </div>
+                    <div>
+                        <strong style="color:#c9d1d9">Lint</strong><br>
+                        Checks dialogue example files for formatting problems that could confuse the AI. Auto-fix applies safe corrections automatically.
+                    </div>
+                    <div>
+                        <strong style="color:#c9d1d9">Coverage</strong><br>
+                        Shows what percentage of a character's lore file is actually captured in the knowledge base. Low coverage means some information may never be retrieved.
+                    </div>
+                    <div>
+                        <strong style="color:#c9d1d9">Evaluate</strong><br>
+                        Runs a test suite of questions against the knowledge base and scores how well it finds the right information. Higher Recall and MRR scores are better.
+                    </div>
+                    <div>
+                        <strong style="color:#c9d1d9">Benchmark</strong><br>
+                        Compares different AI text-embedding models to find which one retrieves the most relevant results for your characters. Run from the command line — results appear here.
+                    </div>
+                    <div>
+                        <strong style="color:#c9d1d9">Rebuild</strong><br>
+                        After editing a source file, use Rebuild on the collection detail page to update the knowledge base with the new content.
+                    </div>
+                </div>
+            </details>
+        </nav>
+
+        <main id="rag-main" class="rag-content"
+            hx-get="/rag/collections"
+            hx-trigger="load"
+            hx-swap="innerHTML">
+            <span class="rag-empty">Loading…</span>
+        </main>
+    </div>
+
+    <script>
+        document.addEventListener("click", (e) => {
+            const link = e.target.closest(".nav-link");
+            if (link) {
+                document.querySelectorAll(".nav-link").forEach((l) => l.classList.remove("active"));
+                link.classList.add("active");
+            }
+        });
+    </script>
+</body>
+
+</html>
diff --git a/templates/rag/lint_results.html b/templates/rag/lint_results.html
new file mode 100644
index 0000000..d594de2
--- /dev/null
+++ b/templates/rag/lint_results.html
@@ -0,0 +1,49 @@
+{% if fixed %}
+<div class="rag-msg rag-msg-ok">Auto-fix applied. Re-check results below.</div>
+{% endif %}
+
+{% if not reports %}
+<p class="rag-empty">No <code>*_message_examples.txt</code> files found in <code>rag_data/</code>.</p>
+{% else %}
+{% for rep in reports %}
+<div style="margin-bottom:1rem">
+    <div style="display:flex;align-items:center;gap:0.5rem;margin-bottom:0.35rem">
+        <strong style="font-size:0.85rem">{{ rep.file }}</strong>
+        {% if rep.valid %}
+        <span class="badge badge-ok">✓ PASS</span>
+        {% else %}
+        <span class="badge badge-error">✗ FAIL</span>
+        {% endif %}
+        {% if rep.auto_fixed %}
+        <span class="badge badge-warn">auto-fixed</span>
+        {% endif %}
+    </div>
+    {% if rep.violations %}
+    <table class="rag-table">
+        <thead>
+            <tr><th>Line</th><th>Rule</th><th>Severity</th><th>Message</th><th>Suggested Fix</th></tr>
+        </thead>
+        <tbody>
+        {% for v in rep.violations %}
+            <tr>
+                <td>{{ v.line_no }}</td>
+                <td><code>{{ v.rule_id }}</code></td>
+                <td>
+                    {% if v.severity == "error" %}
+                    <span class="badge badge-error">error</span>
+                    {% else %}
+                    <span class="badge badge-warn">warning</span>
+                    {% endif %}
+                </td>
+                <td>{{ v.message }}</td>
+                <td style="color:#98a6b6">{{ v.suggested_fix or "—" }}</td>
+            </tr>
+        {% endfor %}
+        </tbody>
+    </table>
+    {% else %}
+    <p class="rag-empty" style="margin:0.2rem 0">No violations.</p>
+    {% endif %}
+</div>
+{% endfor %}
+{% endif %}
diff --git a/templates/rag/push_status.html b/templates/rag/push_status.html
new file mode 100644
index 0000000..8503fdd
--- /dev/null
+++ b/templates/rag/push_status.html
@@ -0,0 +1,34 @@
+<div id="job-{{ job_id }}" class="job-widget"
+    {% if status in ("pending", "running") %}
+    hx-get="/rag/jobs/{{ job_id }}?kind={{ kind }}"
+    hx-trigger="every 2s"
+    hx-target="#job-{{ job_id }}"
+    hx-swap="outerHTML"
+    {% endif %}>
+
+    {% if status == "pending" %}
+    <span class="badge badge-muted">⏳ Queued</span>
+    <span style="font-size:0.78rem;color:#98a6b6;margin-left:0.4rem">{{ elapsed_s }}s</span>
+
+    {% elif status == "running" %}
+    <span class="badge badge-info">⚙ Running…</span>
+    <span style="font-size:0.78rem;color:#98a6b6;margin-left:0.4rem">{{ elapsed_s }}s</span>
+
+    {% elif status == "done" %}
+    <span class="badge badge-ok">✓ Done</span>
+    <span style="font-size:0.78rem;color:#98a6b6;margin-left:0.4rem">{{ elapsed_s }}s</span>
+    {% if kind == "push" and result %}
+    <p style="margin:0.45rem 0 0;font-size:0.82rem">
+        Pushed <strong>{{ result.doc_count }}</strong> chunks →
+        <strong>{{ result.collection }}</strong>
+        ({{ result.elapsed_s }}s).
+    </p>
+    {% elif result %}
+    <pre class="rag-pre" style="margin-top:0.45rem">{{ result | tojson(indent=2) }}</pre>
+    {% endif %}
+
+    {% elif status == "error" %}
+    <span class="badge badge-error">✗ Error</span>
+    <div class="rag-msg rag-msg-error" style="margin-top:0.45rem">{{ error }}</div>
+    {% endif %}
+</div>
diff --git a/templates/rag/query_results.html b/templates/rag/query_results.html
new file mode 100644
index 0000000..341faec
--- /dev/null
+++ b/templates/rag/query_results.html
@@ -0,0 +1,22 @@
+{% if results %}
+<p style="font-size:0.8rem;color:#98a6b6;margin:0.4rem 0 0.6rem">
+    {{ results | length }} result(s) for: <em>{{ query }}</em>
+</p>
+<table class="rag-table">
+    <thead>
+        <tr><th>#</th><th>Score</th><th>Text</th><th>Metadata Keys</th></tr>
+    </thead>
+    <tbody>
+    {% for r in results %}
+        <tr>
+            <td>{{ r.rank }}</td>
+            <td>{{ r.score }}</td>
+            <td><pre class="rag-pre" style="max-height:120px;margin:0">{{ r.text }}</pre></td>
+            <td style="font-size:0.72rem;color:#98a6b6">{{ r.metadata.keys() | list | join(", ") if r.metadata else "—" }}</td>
+        </tr>
+    {% endfor %}
+    </tbody>
+</table>
+{% else %}
+<p class="rag-empty">No results returned.</p>
+{% endif %}
diff --git a/templates/rag/trends_table.html b/templates/rag/trends_table.html
new file mode 100644
index 0000000..961ebd2
--- /dev/null
+++ b/templates/rag/trends_table.html
@@ -0,0 +1,25 @@
+{% if trends %}
+<table class="rag-table">
+    <thead>
+        <tr><th>Date</th><th>Fixture</th><th>k</th><th>Mode</th><th>Eval</th><th>Skip</th><th>Recall@k</th><th>MRR</th><th>Precision@k</th><th>MAP@k</th></tr>
+    </thead>
+    <tbody>
+    {% for row in trends %}
+        <tr>
+            <td style="font-size:0.72rem;white-space:nowrap">{{ row.generated_at[:16] if row.generated_at else "—" }}</td>
+            <td class="truncate" style="max-width:140px" title="{{ row.fixture_file }}">{{ row.fixture_file | replace("tests/fixtures/", "") }}</td>
+            <td>{{ row.k }}</td>
+            <td>{{ row.retrieval_mode }}</td>
+            <td>{{ row.evaluated }}</td>
+            <td>{{ row.skipped }}</td>
+            <td>{{ "%.3f" | format(row.recall_at_k | float) }}</td>
+            <td>{{ "%.3f" | format(row.mrr | float) }}</td>
+            <td>{{ "%.3f" | format(row.precision_at_k | float) }}</td>
+            <td>{{ "%.3f" | format(row.map_at_k | float) }}</td>
+        </tr>
+    {% endfor %}
+    </tbody>
+</table>
+{% else %}
+<p class="rag-empty">No trend history yet. Run an evaluation to begin tracking.</p>
+{% endif %}
diff --git a/web_app.py b/web_app.py
index d9177d5..e129617 100644
--- a/web_app.py
+++ b/web_app.py
@@ -2,23 +2,28 @@
 
 import asyncio
 import contextlib
+import io
 import json
 import threading
 import time
 import uuid
+import zipfile
 from collections.abc import AsyncGenerator, Callable
 from contextlib import asynccontextmanager
 from datetime import UTC, datetime
 from pathlib import Path
 
-from fastapi import FastAPI, Form, Request
+from fastapi import FastAPI, Form, Query, Request
 from fastapi.responses import HTMLResponse, JSONResponse, Response, StreamingResponse
 from fastapi.templating import Jinja2Templates
 from loguru import logger
 from pydantic import BaseModel
 
-from core.config import configure_logging, load_app_config
+from core import rag_manager
+from core.config import configure_logging, load_app_config, load_rag_script_config
 from core.conversation_manager import ConversationManager
+from core.job_queue import JobStore
+from core.preset_profiles import ProfileStore
 
 
 class StreamRequest(BaseModel):
@@ -64,6 +69,9 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None]:
     configure_logging(app_config)
     manager = await asyncio.to_thread(ConversationManager)
     app.state.chat_runtime = ChatRuntime(manager)
+    app.state.profile_store = ProfileStore(Path("configs") / "profiles.json")
+    app.state.rag_config = load_rag_script_config(app_config)
+    app.state.job_store = JobStore()
     try:
         yield
     finally:
@@ -78,6 +86,18 @@ def _get_runtime(request: Request) -> ChatRuntime:
     return request.app.state.chat_runtime
 
 
+def _get_profile_store(request: Request) -> ProfileStore:
+    return request.app.state.profile_store
+
+
+def _get_rag_config(request: Request) -> object:
+    return request.app.state.rag_config
+
+
+def _get_job_store(request: Request) -> JobStore:
+    return request.app.state.job_store
+
+
 def _render_chat_log(messages: list[dict[str, str]]) -> str:
     return templates.get_template("chat_messages.html").render(messages=messages)
 
@@ -157,13 +177,20 @@ def _session_listing(runtime: ChatRuntime) -> list[dict[str, str]]:
     return sessions
 
 
-def _record_retrieval_trace(runtime: ChatRuntime, message: str) -> None:
+def _record_retrieval_trace(
+    runtime: ChatRuntime,
+    message: str,
+    latency_s: float | None = None,
+    chars_emitted: int | None = None,
+) -> None:
     manager = runtime.manager
     turn_number = min(len(manager.user_message_history), len(manager.ai_message_history))
     trace = {
         "turn": turn_number,
         "at": datetime.now(tz=UTC).isoformat(),
         "query": message[:200],
+        "latency_s": round(latency_s, 2) if latency_s is not None else None,
+        "chars_emitted": chars_emitted,
         "retrieval": manager.last_retrieval_debug,
         "persona": manager.last_persona_drift,
     }
@@ -283,6 +310,55 @@ async def chat_debug_history(request: Request) -> dict[str, object]:
     }
 
 
+@app.get("/chat/diagnostics", response_class=HTMLResponse)
+async def chat_diagnostics(request: Request) -> HTMLResponse:
+    """Render per-turn diagnostics table as an HTMX partial."""
+    runtime = _get_runtime(request)
+    manager = runtime.manager
+    history = list(reversed(runtime.retrieval_history))
+    html = templates.get_template("diagnostics_panel.html").render(
+        history=history,
+        warn_threshold=manager.runtime_config.persona_drift_warning_threshold,
+        fail_threshold=manager.runtime_config.persona_drift_fail_threshold,
+    )
+    return HTMLResponse(content=html)
+
+
+@app.get("/chat/export/bundle")
+async def chat_export_bundle(request: Request) -> Response:
+    """Download a ZIP bundle containing conversation, retrieval traces, and drift history."""
+    runtime = _get_runtime(request)
+    manager = runtime.manager
+    ts = datetime.now(tz=UTC).strftime("%Y%m%dT%H%M%SZ")
+    turn_count = min(len(manager.user_message_history), len(manager.ai_message_history))
+    manifest = {
+        "exported_at": datetime.now(tz=UTC).isoformat(),
+        "character_name": manager.character_name,
+        "rag_collection": str(manager.rag_collection),
+        "turns": turn_count,
+    }
+    buf = io.BytesIO()
+    with zipfile.ZipFile(buf, mode="w", compression=zipfile.ZIP_DEFLATED) as zf:
+        zf.writestr("manifest.json", json.dumps(manifest, ensure_ascii=False, indent=2))
+        zf.writestr(
+            "conversation.json",
+            json.dumps(_build_session_payload(runtime), ensure_ascii=False, indent=2),
+        )
+        zf.writestr(
+            "retrieval_traces.json",
+            json.dumps(runtime.retrieval_history, ensure_ascii=False, indent=2),
+        )
+        zf.writestr(
+            "drift_history.json",
+            json.dumps(list(manager.persona_drift_trace), ensure_ascii=False, indent=2),
+        )
+    return Response(
+        content=buf.getvalue(),
+        media_type="application/zip",
+        headers={"Content-Disposition": f"attachment; filename=bundle_{ts}.zip"},
+    )
+
+
 @app.post("/chat/send", response_class=HTMLResponse)
 async def chat_send(request: Request, message: str = Form(...)) -> HTMLResponse:
     """Append user message and assistant placeholder via HTMX."""
@@ -375,7 +451,8 @@ async def guarded_stream() -> AsyncGenerator[str]:
                         runtime.ui_messages.append({"role": "assistant", "content": latest_assistant})
                 else:
                     runtime.ui_messages.append({"role": "assistant", "content": latest_assistant})
-                    _record_retrieval_trace(runtime, payload.message)
+                    elapsed = time.monotonic() - start
+                    _record_retrieval_trace(runtime, payload.message, latency_s=elapsed, chars_emitted=emitted_chars)
             elapsed = time.monotonic() - start
             logger.debug(
                 "chat_stream done chunks={} chars={} elapsed={:.2f}s",
@@ -509,3 +586,272 @@ async def chat_session_load(request: Request, session_id: str = Form(...)) -> HT
             runtime.ui_messages = [{"role": "assistant", "content": manager.first_message}]
         runtime.retrieval_history = []
         return HTMLResponse(content=_render_chat_log(runtime.ui_messages))
+
+
+def _render_profiles_panel(request: Request) -> HTMLResponse:
+    runtime = _get_runtime(request)
+    store = _get_profile_store(request)
+    html = templates.get_template("presets_panel.html").render(
+        profiles=store.list_profiles(),
+        current=store.current_values(runtime.manager.runtime_config),
+    )
+    return HTMLResponse(content=html)
+
+
+@app.get("/settings/profiles", response_class=HTMLResponse)
+async def settings_profiles_list(request: Request) -> HTMLResponse:
+    """Render the presets panel partial."""
+    return _render_profiles_panel(request)
+
+
+@app.post("/settings/profiles/save", response_class=HTMLResponse)
+async def settings_profiles_save(request: Request, name: str = Form(...)) -> HTMLResponse:
+    """Save current runtime config as a named profile, then re-render the panel."""
+    runtime = _get_runtime(request)
+    store = _get_profile_store(request)
+    clean_name = name.strip()[:40]
+    if clean_name:
+        store.save_profile(clean_name, runtime.manager.runtime_config)
+    return _render_profiles_panel(request)
+
+
+@app.post("/settings/profiles/apply", response_class=HTMLResponse)
+async def settings_profiles_apply(request: Request, name: str = Form(...)) -> HTMLResponse:
+    """Apply a saved profile to the live runtime config, then re-render the panel."""
+    runtime = _get_runtime(request)
+    store = _get_profile_store(request)
+    try:
+        changed = store.apply_profile(name, runtime.manager.runtime_config)
+        logger.info("Applied profile {!r}; changed fields: {}", name, changed)
+    except KeyError:
+        logger.warning("Profile {!r} not found", name)
+    return _render_profiles_panel(request)
+
+
+@app.post("/settings/profiles/delete", response_class=HTMLResponse)
+async def settings_profiles_delete(request: Request, name: str = Form(...)) -> HTMLResponse:
+    """Delete a saved profile, then re-render the panel."""
+    store = _get_profile_store(request)
+    store.delete_profile(name)
+    return _render_profiles_panel(request)
+
+
+# ---------------------------------------------------------------------------
+# RAG Management UI
+# ---------------------------------------------------------------------------
+
+
+@app.get("/rag", response_class=HTMLResponse)
+async def rag_page(request: Request) -> HTMLResponse:
+    """Render the standalone RAG management page."""
+    return templates.TemplateResponse("rag/layout.html", {"request": request})
+
+
+@app.get("/rag/collections", response_class=HTMLResponse)
+async def rag_collections(request: Request) -> HTMLResponse:
+    config = _get_rag_config(request)
+    collections = await asyncio.to_thread(rag_manager.list_collections, config)
+    return templates.TemplateResponse(
+        "rag/collections_list.html",
+        {"request": request, "collections": collections},
+    )
+
+
+@app.get("/rag/collections/{name}", response_class=HTMLResponse)
+async def rag_collection_detail(request: Request, name: str) -> HTMLResponse:
+    config = _get_rag_config(request)
+    info = await asyncio.to_thread(rag_manager.collection_info, config, name)
+    return templates.TemplateResponse(
+        "rag/collection_detail.html",
+        {"request": request, "info": info},
+    )
+
+
+@app.delete("/rag/collections/{name}", response_class=HTMLResponse)
+async def rag_collection_delete(request: Request, name: str) -> HTMLResponse:
+    config = _get_rag_config(request)
+    await asyncio.to_thread(rag_manager.delete_collection, config, name)
+    collections = await asyncio.to_thread(rag_manager.list_collections, config)
+    return templates.TemplateResponse(
+        "rag/collections_list.html",
+        {"request": request, "collections": collections, "deleted": name},
+    )
+
+
+@app.post("/rag/collections/{name}/query", response_class=HTMLResponse)
+async def rag_collection_query(
+    request: Request,
+    name: str,
+    query: str = Form(...),
+    k: int = Form(5),
+) -> HTMLResponse:
+    config = _get_rag_config(request)
+    results = await asyncio.to_thread(rag_manager.query_collection, config, name, query, k)
+    return templates.TemplateResponse(
+        "rag/query_results.html",
+        {"request": request, "results": results, "query": query, "collection": name},
+    )
+
+
+@app.post("/rag/collections/{name}/push", response_class=HTMLResponse)
+async def rag_collection_push(
+    request: Request,
+    name: str,
+    stem: str = Form(...),
+) -> HTMLResponse:
+    config = _get_rag_config(request)
+    job_store = _get_job_store(request)
+    job_id = job_store.submit(rag_manager.push_collection, config, stem.strip(), name)
+    return templates.TemplateResponse(
+        "rag/push_status.html",
+        {
+            "request": request,
+            "job_id": job_id,
+            "status": "pending",
+            "elapsed_s": 0,
+            "result": None,
+            "error": None,
+            "kind": "push",
+        },
+    )
+
+
+@app.post("/rag/collections/{name}/backfill-fingerprint", response_class=HTMLResponse)
+async def rag_collection_backfill(request: Request, name: str) -> HTMLResponse:
+    config = _get_rag_config(request)
+    backfilled = await asyncio.to_thread(rag_manager.backfill_fingerprint, config, name)
+    info = await asyncio.to_thread(rag_manager.collection_info, config, name)
+    return templates.TemplateResponse(
+        "rag/collection_detail.html",
+        {"request": request, "info": info, "backfilled": backfilled},
+    )
+
+
+@app.get("/rag/files", response_class=HTMLResponse)
+async def rag_files(request: Request) -> HTMLResponse:
+    config = _get_rag_config(request)
+    files = await asyncio.to_thread(rag_manager.list_rag_files, config)
+    return templates.TemplateResponse(
+        "rag/files_list.html",
+        {"request": request, "files": files},
+    )
+
+
+@app.get("/rag/files/{filename}", response_class=HTMLResponse)
+async def rag_file_view(request: Request, filename: str) -> HTMLResponse:
+    config = _get_rag_config(request)
+    content = await asyncio.to_thread(rag_manager.file_content, config, filename)
+    return templates.TemplateResponse(
+        "rag/file_view.html",
+        {"request": request, "filename": filename, "content": content},
+    )
+
+
+@app.post("/rag/lint", response_class=HTMLResponse)
+async def rag_lint(request: Request) -> HTMLResponse:
+    config = _get_rag_config(request)
+    reports = await asyncio.to_thread(rag_manager.run_lint, config)
+    return templates.TemplateResponse(
+        "rag/lint_results.html",
+        {"request": request, "reports": reports, "fixed": False},
+    )
+
+
+@app.post("/rag/lint/fix", response_class=HTMLResponse)
+async def rag_lint_fix(request: Request) -> HTMLResponse:
+    config = _get_rag_config(request)
+    reports = await asyncio.to_thread(rag_manager.run_lint, config, auto_fix=True)
+    return templates.TemplateResponse(
+        "rag/lint_results.html",
+        {"request": request, "reports": reports, "fixed": True},
+    )
+
+
+@app.post("/rag/coverage", response_class=HTMLResponse)
+async def rag_coverage(request: Request, stem: str = Form(...)) -> HTMLResponse:
+    config = _get_rag_config(request)
+    result = await asyncio.to_thread(rag_manager.run_coverage, config, stem.strip())
+    return templates.TemplateResponse(
+        "rag/coverage_report.html",
+        {"request": request, "result": result},
+    )
+
+
+@app.get("/rag/evaluate", response_class=HTMLResponse)
+async def rag_evaluate_index(request: Request) -> HTMLResponse:
+    fixture_packs = await asyncio.to_thread(rag_manager.list_fixture_packs)
+    trends = await asyncio.to_thread(rag_manager.get_fixture_trends)
+    return templates.TemplateResponse(
+        "rag/evaluate_index.html",
+        {"request": request, "fixture_packs": fixture_packs, "trends": trends},
+    )
+
+
+@app.post("/rag/evaluate/run", response_class=HTMLResponse)
+async def rag_evaluate_run(
+    request: Request,
+    fixture_file: str = Form(...),
+) -> HTMLResponse:
+    config = _get_rag_config(request)
+    job_store = _get_job_store(request)
+    job_id = job_store.submit(rag_manager.run_evaluate_fixtures, config, fixture_file)
+    return templates.TemplateResponse(
+        "rag/push_status.html",
+        {
+            "request": request,
+            "job_id": job_id,
+            "status": "pending",
+            "elapsed_s": 0,
+            "result": None,
+            "error": None,
+            "kind": "evaluate",
+            "label": "Evaluating fixtures…",
+        },
+    )
+
+
+@app.get("/rag/evaluate/trends", response_class=HTMLResponse)
+async def rag_evaluate_trends(request: Request) -> HTMLResponse:
+    trends = await asyncio.to_thread(rag_manager.get_fixture_trends)
+    return templates.TemplateResponse(
+        "rag/trends_table.html",
+        {"request": request, "trends": trends},
+    )
+
+
+@app.get("/rag/benchmark", response_class=HTMLResponse)
+async def rag_benchmark(request: Request) -> HTMLResponse:
+    results = await asyncio.to_thread(rag_manager.get_benchmark_results)
+    return templates.TemplateResponse(
+        "rag/benchmark_results.html",
+        {"request": request, "results": results},
+    )
+
+
+@app.get("/rag/jobs/{job_id}", response_class=HTMLResponse)
+async def rag_job_status(
+    request: Request,
+    job_id: str,
+    kind: str = Query("push"),
+) -> HTMLResponse:
+    job_store = _get_job_store(request)
+    job = job_store.get(job_id)
+    if job is None:
+        return HTMLResponse('<span class="badge badge-error">Job not found</span>')
+    if job["status"] == "done" and kind == "evaluate":
+        return templates.TemplateResponse(
+            "rag/evaluate_results.html",
+            {"request": request, "result": job["result"]},
+        )
+    return templates.TemplateResponse(
+        "rag/push_status.html",
+        {
+            "request": request,
+            "job_id": job_id,
+            "status": job["status"],
+            "elapsed_s": round(job["elapsed_s"], 1),
+            "result": job["result"],
+            "error": job["error"],
+            "kind": kind,
+        },
+    )

From b4741535d61ab7ac43b05a1b3599f201d54c7821 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 3 Apr 2026 11:13:34 +0000
Subject: [PATCH 2/2] Address PR review comments: fix frozen config apply,
 locking, path traversal, error handling, XSS, tests

Agent-Logs-Url: https://github.com/ossirytk/light-chat/sessions/0fba773e-cf10-4054-af37-88aebf97d94b

Co-authored-by: ossirytk <136164501+ossirytk@users.noreply.github.com>
---
 core/job_queue.py             |   5 +-
 core/preset_profiles.py       |  14 +-
 core/rag_manager.py           |  44 +++++--
 tests/test_preset_profiles.py | 234 ++++++++++++++++++++++++++++++++++
 web_app.py                    |  67 ++++++++--
 5 files changed, 341 insertions(+), 23 deletions(-)
 create mode 100644 tests/test_preset_profiles.py

diff --git a/core/job_queue.py b/core/job_queue.py
index 0020c51..6b85413 100644
--- a/core/job_queue.py
+++ b/core/job_queue.py
@@ -54,8 +54,11 @@ def submit(self, fn: Callable[..., Any], *args: object, **kwargs: object) -> str
         job_id = uuid.uuid4().hex[:12]
         job = Job(job_id)
         with self._lock:
-            self._jobs[job_id] = job
             self._evict_old()
+            if len(self._jobs) >= self.MAX_JOBS:
+                msg = f"Job store is full ({self.MAX_JOBS} active jobs); please wait for a job to finish"
+                raise RuntimeError(msg)
+            self._jobs[job_id] = job
 
         def _run() -> None:
             job.status = "running"
diff --git a/core/preset_profiles.py b/core/preset_profiles.py
index f7e6db9..e7b7749 100644
--- a/core/preset_profiles.py
+++ b/core/preset_profiles.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import dataclasses
 import json
 from typing import TYPE_CHECKING
 
@@ -58,18 +59,23 @@ def get_profile(self, name: str) -> dict[str, object]:
             raise KeyError(msg)
         return dict(data[name])
 
-    def apply_profile(self, name: str, config: ConversationRuntimeConfig) -> list[str]:
-        """Write profile values onto *config* in place; return list of changed field names."""
+    def apply_profile(
+        self, name: str, config: ConversationRuntimeConfig
+    ) -> tuple[ConversationRuntimeConfig, list[str]]:
+        """Return a new config with profile values applied and list of changed field names."""
         profile = self.get_profile(name)
+        validated_updates: dict[str, object] = {}
         changed: list[str] = []
         for field, value in profile.items():
             if field not in PROFILE_FIELDS:
                 continue
             current = getattr(config, field, None)
             if current != value:
-                setattr(config, field, value)
+                validated_updates[field] = value
                 changed.append(field)
-        return changed
+        if validated_updates:
+            config = dataclasses.replace(config, **validated_updates)
+        return config, changed
 
     def delete_profile(self, name: str) -> None:
         """Remove *name* from the store (no-op if not found)."""
diff --git a/core/rag_manager.py b/core/rag_manager.py
index 8a6c95c..dc0be54 100644
--- a/core/rag_manager.py
+++ b/core/rag_manager.py
@@ -8,6 +8,7 @@
 
 import csv
 import json
+import re
 import time
 from pathlib import Path
 from typing import TYPE_CHECKING, Any
@@ -18,6 +19,13 @@
 if TYPE_CHECKING:
     from core.config import RagScriptConfig
 
+_SAFE_STEM_RE: re.Pattern[str] = re.compile(r"^[a-zA-Z0-9_-]+$")
+
+
+def is_valid_stem(stem: str) -> bool:
+    """Return True when *stem* contains only letters, digits, underscores, and hyphens."""
+    return bool(_SAFE_STEM_RE.match(stem))
+
 
 def _chroma_client(persist_dir: str) -> chromadb.PersistentClient:
     return chromadb.PersistentClient(
@@ -242,9 +250,16 @@ def run_coverage(config: RagScriptConfig, stem: str) -> dict[str, Any] | None:
         load_metadata_file,
     )
 
-    rag_dir = Path(config.documents_directory)
-    source_file = rag_dir / f"{stem}.txt"
-    metadata_file = rag_dir / f"{stem}.json"
+    if not is_valid_stem(stem):
+        msg = f"Invalid stem {stem!r}: only letters, digits, underscores, and hyphens are allowed"
+        raise ValueError(msg)
+
+    rag_dir = Path(config.documents_directory).resolve()
+    source_file = (rag_dir / f"{stem}.txt").resolve()
+    metadata_file = (rag_dir / f"{stem}.json").resolve()
+    if not source_file.is_relative_to(rag_dir) or not metadata_file.is_relative_to(rag_dir):
+        msg = f"Stem {stem!r} resolves outside documents directory"
+        raise ValueError(msg)
     if not source_file.exists() or not metadata_file.exists():
         return None
     source_text = source_file.read_text(encoding="utf-8")
@@ -285,9 +300,16 @@ def run_evaluate_fixtures(
     from scripts.rag.manage_collections_core_evaluation import _execute_fixture_evaluation  # noqa: PLC0415
     from scripts.rag.manage_collections_core_types import FixtureEvalOptions  # noqa: PLC0415
 
-    fixture_path = Path(tests_dir) / fixture_file
-    if not fixture_path.exists():
-        return None
+    available_fixtures = set(list_fixture_packs(tests_dir))
+    if fixture_file not in available_fixtures:
+        msg = f"Unknown fixture pack: {fixture_file!r}"
+        raise FileNotFoundError(msg)
+
+    fixture_dir = Path(tests_dir).resolve()
+    fixture_path = (fixture_dir / fixture_file).resolve()
+    if not fixture_path.is_relative_to(fixture_dir) or not fixture_path.exists():
+        msg = f"Fixture pack not found: {fixture_file!r}"
+        raise FileNotFoundError(msg)
     options = FixtureEvalOptions(
         fixture_file=fixture_path,
         k=None,
@@ -363,8 +385,14 @@ def push_collection(
         resolve_metadata_file,
     )
 
-    rag_dir = Path(config.documents_directory)
-    file_path = rag_dir / f"{stem}.txt"
+    rag_dir = Path(config.documents_directory).resolve()
+    if not is_valid_stem(stem):
+        msg = f"Invalid stem {stem!r}: only letters, digits, underscores, and hyphens are allowed"
+        raise ValueError(msg)
+    file_path = (rag_dir / f"{stem}.txt").resolve()
+    if not file_path.is_relative_to(rag_dir):
+        msg = f"Stem {stem!r} resolves outside documents directory"
+        raise ValueError(msg)
     if not file_path.exists():
         msg = f"Source file not found: {file_path}"
         raise FileNotFoundError(msg)
diff --git a/tests/test_preset_profiles.py b/tests/test_preset_profiles.py
new file mode 100644
index 0000000..1f4d0d7
--- /dev/null
+++ b/tests/test_preset_profiles.py
@@ -0,0 +1,234 @@
+"""Unit tests for core/preset_profiles.py."""
+
+from __future__ import annotations
+
+import json
+from typing import TYPE_CHECKING, Any
+
+import pytest
+
+from core.config import ConversationRuntimeConfig
+from core.preset_profiles import PROFILE_FIELDS, ProfileStore
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+
+def _make_config(**overrides: Any) -> ConversationRuntimeConfig:
+    """Return a minimal ConversationRuntimeConfig with sensible defaults."""
+    defaults: dict[str, Any] = {
+        "persist_directory": "db",
+        "key_storage": "keys",
+        "embedding_cache": "cache",
+        "embedding_device": "cpu",
+        "embedding_model": "model",
+        "rag_collection": "col",
+        "rag_k": 5,
+        "rag_k_mes": 3,
+        "max_history_turns": 10,
+        "use_dynamic_context": False,
+        "reserved_for_response": 200,
+        "min_history_turns": 2,
+        "history_summarization_enabled": False,
+        "history_summarization_threshold": 20,
+        "history_summarization_keep_recent": 5,
+        "history_summarization_max_entries": 10,
+        "history_summarization_max_chars": 500,
+        "check_model_context": False,
+        "auto_adjust_model_context": False,
+        "model_type": "llama",
+        "target_vram_usage": 0.9,
+        "layers": -1,
+        "kv_cache_quant": "q8_0",
+        "max_vector_context_chars": 2200,
+        "small_talk_max_words": 6,
+        "followup_rag_max_words": 10,
+        "persona_drift_enabled": False,
+        "persona_drift_warning_threshold": 0.4,
+        "persona_drift_fail_threshold": 0.6,
+        "persona_drift_history_window": 5,
+        "persona_drift_heuristic_weight": 0.5,
+        "persona_drift_semantic_weight": 0.5,
+        "use_mmr": False,
+        "rag_fetch_k": 20,
+        "lambda_mult": 0.5,
+        "rag_rerank_enabled": False,
+        "rag_rerank_model": "",
+        "rag_rerank_top_n": 3,
+        "rag_telemetry_enabled": False,
+        "rag_multi_query_enabled": False,
+        "rag_multi_query_max_variants": 3,
+        "rag_sentence_compression_enabled": False,
+        "rag_sentence_compression_max_sentences": 3,
+        "chunk_size_estimate": 512,
+        "max_initial_retrieval": 20,
+        "debug_context": False,
+        "debug_prompt": False,
+        "debug_prompt_fingerprint": False,
+        "max_stream_chars": 4000,
+        "max_silent_stream_chars": 200,
+        "empty_stream_fallback": "",
+        "quality_fallback_response": "",
+    }
+    defaults.update(overrides)
+    return ConversationRuntimeConfig(**defaults)
+
+
+class TestProfileStoreLoadSave:
+    def test_empty_when_file_missing(self, tmp_path: Path) -> None:
+        store = ProfileStore(tmp_path / "profiles.json")
+        assert store.list_profiles() == []
+
+    def test_returns_empty_for_invalid_json(self, tmp_path: Path) -> None:
+        p = tmp_path / "profiles.json"
+        p.write_text("not json", encoding="utf-8")
+        store = ProfileStore(p)
+        assert store.list_profiles() == []
+
+    def test_save_creates_file(self, tmp_path: Path) -> None:
+        path = tmp_path / "profiles.json"
+        store = ProfileStore(path)
+        config = _make_config()
+        store.save_profile("myprofile", config)
+        assert path.exists()
+
+    def test_save_and_list(self, tmp_path: Path) -> None:
+        store = ProfileStore(tmp_path / "profiles.json")
+        config = _make_config()
+        store.save_profile("alpha", config)
+        store.save_profile("beta", config)
+        assert store.list_profiles() == ["alpha", "beta"]
+
+    def test_save_snapshots_profile_fields(self, tmp_path: Path) -> None:
+        store = ProfileStore(tmp_path / "profiles.json")
+        config = _make_config(rag_k=7, use_mmr=True)
+        store.save_profile("p1", config)
+        data = json.loads((tmp_path / "profiles.json").read_text(encoding="utf-8"))
+        assert data["p1"]["rag_k"] == 7
+        assert data["p1"]["use_mmr"] is True
+
+    def test_save_only_stores_profile_fields(self, tmp_path: Path) -> None:
+        store = ProfileStore(tmp_path / "profiles.json")
+        config = _make_config()
+        store.save_profile("p1", config)
+        data = json.loads((tmp_path / "profiles.json").read_text(encoding="utf-8"))
+        for key in data["p1"]:
+            assert key in PROFILE_FIELDS
+
+    def test_overwrite_existing_profile(self, tmp_path: Path) -> None:
+        store = ProfileStore(tmp_path / "profiles.json")
+        store.save_profile("p1", _make_config(rag_k=3))
+        store.save_profile("p1", _make_config(rag_k=9))
+        profile = store.get_profile("p1")
+        assert profile["rag_k"] == 9
+
+
+class TestProfileStoreGetDelete:
+    def test_get_returns_stored_values(self, tmp_path: Path) -> None:
+        store = ProfileStore(tmp_path / "profiles.json")
+        config = _make_config(rag_k=11, debug_context=True)
+        store.save_profile("test", config)
+        profile = store.get_profile("test")
+        assert profile["rag_k"] == 11
+        assert profile["debug_context"] is True
+
+    def test_get_unknown_profile_raises_key_error(self, tmp_path: Path) -> None:
+        store = ProfileStore(tmp_path / "profiles.json")
+        with pytest.raises(KeyError, match="not found"):
+            store.get_profile("nonexistent")
+
+    def test_delete_removes_profile(self, tmp_path: Path) -> None:
+        store = ProfileStore(tmp_path / "profiles.json")
+        config = _make_config()
+        store.save_profile("gone", config)
+        assert "gone" in store.list_profiles()
+        store.delete_profile("gone")
+        assert "gone" not in store.list_profiles()
+
+    def test_delete_nonexistent_is_noop(self, tmp_path: Path) -> None:
+        store = ProfileStore(tmp_path / "profiles.json")
+        store.delete_profile("doesnotexist")  # should not raise
+
+
+class TestProfileStoreApply:
+    def test_apply_returns_new_config_instance(self, tmp_path: Path) -> None:
+        store = ProfileStore(tmp_path / "profiles.json")
+        config = _make_config(rag_k=5)
+        store.save_profile("p", config)
+        new_config, _ = store.apply_profile("p", config)
+        # apply_profile must return a new (or equal) instance - never mutate in place
+        assert isinstance(new_config, ConversationRuntimeConfig)
+
+    def test_apply_updates_fields(self, tmp_path: Path) -> None:
+        store = ProfileStore(tmp_path / "profiles.json")
+        store.save_profile("fast", _make_config(rag_k=2, use_mmr=True))
+        original = _make_config(rag_k=5, use_mmr=False)
+        new_config, changed = store.apply_profile("fast", original)
+        assert new_config.rag_k == 2
+        assert new_config.use_mmr is True
+        assert set(changed) == {"rag_k", "use_mmr"}
+
+    def test_apply_does_not_mutate_original_config(self, tmp_path: Path) -> None:
+        """ConversationRuntimeConfig is frozen; apply must not raise FrozenInstanceError."""
+        store = ProfileStore(tmp_path / "profiles.json")
+        store.save_profile("mmr_on", _make_config(use_mmr=True))
+        original = _make_config(use_mmr=False)
+        new_config, _ = store.apply_profile("mmr_on", original)
+        # Original must be unchanged (frozen dataclass semantics)
+        assert original.use_mmr is False
+        assert new_config.use_mmr is True
+
+    def test_apply_no_change_returns_same_values(self, tmp_path: Path) -> None:
+        store = ProfileStore(tmp_path / "profiles.json")
+        config = _make_config(rag_k=5)
+        store.save_profile("same", config)
+        new_config, changed = store.apply_profile("same", config)
+        assert changed == []
+        assert new_config.rag_k == 5
+
+    def test_apply_unknown_profile_raises_key_error(self, tmp_path: Path) -> None:
+        store = ProfileStore(tmp_path / "profiles.json")
+        config = _make_config()
+        with pytest.raises(KeyError):
+            store.apply_profile("ghost", config)
+
+    def test_apply_skips_unknown_fields_in_stored_data(self, tmp_path: Path) -> None:
+        """Stored profiles with extra/unknown fields must be silently ignored."""
+        path = tmp_path / "profiles.json"
+        path.write_text(
+            json.dumps({"p": {"rag_k": 4, "unknown_field": "ignored"}}),
+            encoding="utf-8",
+        )
+        store = ProfileStore(path)
+        original = _make_config(rag_k=7)
+        new_config, changed = store.apply_profile("p", original)
+        assert new_config.rag_k == 4
+        assert "unknown_field" not in changed
+
+    def test_apply_returns_dataclass_replace_result(self, tmp_path: Path) -> None:
+        """Verify that apply_profile uses dataclasses.replace (frozen-safe)."""
+        store = ProfileStore(tmp_path / "profiles.json")
+        store.save_profile("r", _make_config(rag_k=99))
+        original = _make_config(rag_k=1)
+        new_config, _ = store.apply_profile("r", original)
+        # dataclasses.replace produces a new instance
+        assert new_config is not original
+        assert new_config.rag_k == 99
+        # All other fields are preserved
+        assert new_config.embedding_model == original.embedding_model
+
+
+class TestProfileStoreCurrentValues:
+    def test_current_values_returns_all_profile_fields(self, tmp_path: Path) -> None:
+        store = ProfileStore(tmp_path / "profiles.json")
+        config = _make_config()
+        values = store.current_values(config)
+        assert set(values.keys()) == set(PROFILE_FIELDS)
+
+    def test_current_values_matches_config(self, tmp_path: Path) -> None:
+        store = ProfileStore(tmp_path / "profiles.json")
+        config = _make_config(rag_k=42, use_mmr=True, debug_context=True)
+        values = store.current_values(config)
+        assert values["rag_k"] == 42
+        assert values["use_mmr"] is True
+        assert values["debug_context"] is True
diff --git a/web_app.py b/web_app.py
index e129617..ea5e240 100644
--- a/web_app.py
+++ b/web_app.py
@@ -2,6 +2,7 @@
 
 import asyncio
 import contextlib
+import html
 import io
 import json
 import threading
@@ -620,12 +621,16 @@ async def settings_profiles_apply(request: Request, name: str = Form(...)) -> HT
     """Apply a saved profile to the live runtime config, then re-render the panel."""
     runtime = _get_runtime(request)
     store = _get_profile_store(request)
-    try:
-        changed = store.apply_profile(name, runtime.manager.runtime_config)
-        logger.info("Applied profile {!r}; changed fields: {}", name, changed)
-    except KeyError:
-        logger.warning("Profile {!r} not found", name)
-    return _render_profiles_panel(request)
+    async with runtime.lock:
+        try:
+            new_config, changed = store.apply_profile(name, runtime.manager.runtime_config)
+            runtime.manager.runtime_config = new_config
+            runtime.manager.rag_k = new_config.rag_k
+            runtime.manager.rag_k_mes = new_config.rag_k_mes
+            logger.info("Applied profile {!r}; changed fields: {}", name, changed)
+        except KeyError:
+            logger.warning("Profile {!r} not found", name)
+        return _render_profiles_panel(request)
 
 
 @app.post("/settings/profiles/delete", response_class=HTMLResponse)
@@ -661,6 +666,10 @@ async def rag_collections(request: Request) -> HTMLResponse:
 async def rag_collection_detail(request: Request, name: str) -> HTMLResponse:
     config = _get_rag_config(request)
     info = await asyncio.to_thread(rag_manager.collection_info, config, name)
+    if info is None:
+        return HTMLResponse(
+            content=f"<p>Collection <b>{html.escape(name)}</b> not found.</p>", status_code=404
+        )
     return templates.TemplateResponse(
         "rag/collection_detail.html",
         {"request": request, "info": info},
@@ -670,7 +679,14 @@ async def rag_collection_detail(request: Request, name: str) -> HTMLResponse:
 @app.delete("/rag/collections/{name}", response_class=HTMLResponse)
 async def rag_collection_delete(request: Request, name: str) -> HTMLResponse:
     config = _get_rag_config(request)
-    await asyncio.to_thread(rag_manager.delete_collection, config, name)
+    try:
+        await asyncio.to_thread(rag_manager.delete_collection, config, name)
+    except Exception as exc:
+        logger.warning("Failed to delete collection {!r}: {}", name, exc)
+        return HTMLResponse(
+            content=f"<p class='text-error'>Could not delete collection <b>{html.escape(name)}</b>.</p>",
+            status_code=404,
+        )
     collections = await asyncio.to_thread(rag_manager.list_collections, config)
     return templates.TemplateResponse(
         "rag/collections_list.html",
@@ -686,7 +702,14 @@ async def rag_collection_query(
     k: int = Form(5),
 ) -> HTMLResponse:
     config = _get_rag_config(request)
-    results = await asyncio.to_thread(rag_manager.query_collection, config, name, query, k)
+    try:
+        results = await asyncio.to_thread(rag_manager.query_collection, config, name, query, k)
+    except Exception as exc:
+        logger.warning("Query failed for collection {!r}: {}", name, exc)
+        return HTMLResponse(
+            content="<p class='text-error'>Query failed. See server logs for details.</p>",
+            status_code=400,
+        )
     return templates.TemplateResponse(
         "rag/query_results.html",
         {"request": request, "results": results, "query": query, "collection": name},
@@ -699,9 +722,13 @@ async def rag_collection_push(
     name: str,
     stem: str = Form(...),
 ) -> HTMLResponse:
+    clean_stem = stem.strip()
+    if not rag_manager.is_valid_stem(clean_stem):
+        _msg = "Invalid stem: only letters, digits, underscores, and hyphens are allowed."
+        return HTMLResponse(content=f"<p class='text-error'>{_msg}</p>", status_code=400)
     config = _get_rag_config(request)
     job_store = _get_job_store(request)
-    job_id = job_store.submit(rag_manager.push_collection, config, stem.strip(), name)
+    job_id = job_store.submit(rag_manager.push_collection, config, clean_stem, name)
     return templates.TemplateResponse(
         "rag/push_status.html",
         {
@@ -741,6 +768,11 @@ async def rag_files(request: Request) -> HTMLResponse:
 async def rag_file_view(request: Request, filename: str) -> HTMLResponse:
     config = _get_rag_config(request)
     content = await asyncio.to_thread(rag_manager.file_content, config, filename)
+    if content is None:
+        return HTMLResponse(
+            content=f"<p>File <b>{html.escape(filename)}</b> not found or not accessible.</p>",
+            status_code=404,
+        )
     return templates.TemplateResponse(
         "rag/file_view.html",
         {"request": request, "filename": filename, "content": content},
@@ -769,8 +801,17 @@ async def rag_lint_fix(request: Request) -> HTMLResponse:
 
 @app.post("/rag/coverage", response_class=HTMLResponse)
 async def rag_coverage(request: Request, stem: str = Form(...)) -> HTMLResponse:
+    clean_stem = stem.strip()
+    if not rag_manager.is_valid_stem(clean_stem):
+        _msg = "Invalid stem: only letters, digits, underscores, and hyphens are allowed."
+        return HTMLResponse(content=f"<p class='text-error'>{_msg}</p>", status_code=400)
     config = _get_rag_config(request)
-    result = await asyncio.to_thread(rag_manager.run_coverage, config, stem.strip())
+    result = await asyncio.to_thread(rag_manager.run_coverage, config, clean_stem)
+    if result is None:
+        return HTMLResponse(
+            content="<p>Coverage data not found (missing .txt or .json pair).</p>",
+            status_code=404,
+        )
     return templates.TemplateResponse(
         "rag/coverage_report.html",
         {"request": request, "result": result},
@@ -792,6 +833,12 @@ async def rag_evaluate_run(
     request: Request,
     fixture_file: str = Form(...),
 ) -> HTMLResponse:
+    available = await asyncio.to_thread(rag_manager.list_fixture_packs)
+    if fixture_file not in available:
+        return HTMLResponse(
+            content="<p class='text-error'>Unknown fixture pack.</p>",
+            status_code=400,
+        )
     config = _get_rag_config(request)
     job_store = _get_job_store(request)
     job_id = job_store.submit(rag_manager.run_evaluate_fixtures, config, fixture_file)