ossirytk · ossirytk · Apr 3, 2026 · Mar 29, 2026 · Apr 3, 2026 · Copilot
diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md
@@ -25,6 +25,49 @@ All terminal commands should be reproducible from the supported shell/editor com
 
 ---
 
+## 0.1 Available CLI Tools
+
+The following tools are installed locally and available for use in terminal workflows and agent tasks:
+
+| Tool | Purpose |
+|------|---------|
+| `diffutils` | File comparison (`diff`, `cmp`, `diff3`, `sdiff`) |
+| `fd` | Fast, user-friendly alternative to `find` for file search |
+| `fzf` | General-purpose fuzzy finder for interactive filtering |
+| `ripgrep` (`rg`) | Fast regex search across files; prefer over `grep`/`Select-String` |
+| `zip` | Archive creation and extraction |
+| `tokei` | Count lines of code by language |
+| `ast-grep` (`sg`) | Structural code search and rewriting using AST patterns |
+| `jq` | JSON query and transformation CLI |
+| `yq` | YAML/JSON/TOML query and transformation CLI |
+| `hyperfine` | Command-line benchmarking with statistical output |
+| `pre-commit` | Run and manage repository pre-commit hooks |
+| `http` / `https` (HTTPie) | Human-friendly HTTP API client |
+| `just` | Project task runner via `justfile` recipes |
+| `difft` (difftastic) | Syntax-aware structural diffing |
+
+Prefer these tools over PowerShell built-ins where applicable (e.g., use `rg` instead of `Select-String`, use `fd` instead of `Get-ChildItem` for file discovery).
+
+### Preferred command order
+
+- Content search: `rg` first, then `ast-grep` for structural/language-aware matching
+- File discovery: `fd` first, then `rg --files` as a fallback
+- JSON config inspection: `jq`
+- YAML/TOML inspection: `yq`
+- HTTP/API smoke checks: `http` / `https` (HTTPie)
+- Task orchestration: `just` recipes when a `justfile` exists
+- Diff/review: `difft` for syntax-aware diffs, `diff` for plain text diffs
+- Performance comparisons: `hyperfine` for repeatable timing
+
+### Avoid in autonomous runs
+
+- Avoid interactive-only flows (for example `fzf` prompts) unless the user explicitly asks for interactive selection
+- Avoid destructive git/file operations unless the user explicitly approves them
+- Avoid long-running watch commands by default; use one-shot checks first, then switch to watch mode only when requested
+- Avoid invoking `pre-commit run --all-files` on very large repos when a targeted path or hook is enough for the task
+
+---
+
 ## 1. Authoritative Tools & Source of Truth
 
 ### Python

diff --git a/AGENTS.MD b/AGENTS.MD
@@ -25,6 +25,49 @@ All terminal commands should be reproducible from the supported shell/editor com
 
 ---
 
+## 0.1 Available CLI Tools
+
+The following tools are installed locally and available for use in terminal workflows and agent tasks:
+
+| Tool | Purpose |
+|------|---------|
+| `diffutils` | File comparison (`diff`, `cmp`, `diff3`, `sdiff`) |
+| `fd` | Fast, user-friendly alternative to `find` for file search |
+| `fzf` | General-purpose fuzzy finder for interactive filtering |
+| `ripgrep` (`rg`) | Fast regex search across files; prefer over `grep`/`Select-String` |
+| `zip` | Archive creation and extraction |
+| `tokei` | Count lines of code by language |
+| `ast-grep` (`sg`) | Structural code search and rewriting using AST patterns |
+| `jq` | JSON query and transformation CLI |
+| `yq` | YAML/JSON/TOML query and transformation CLI |
+| `hyperfine` | Command-line benchmarking with statistical output |
+| `pre-commit` | Run and manage repository pre-commit hooks |
+| `http` / `https` (HTTPie) | Human-friendly HTTP API client |
+| `just` | Project task runner via `justfile` recipes |
+| `difft` (difftastic) | Syntax-aware structural diffing |
+
+Prefer these tools over PowerShell built-ins where applicable (e.g., use `rg` instead of `Select-String`, use `fd` instead of `Get-ChildItem` for file discovery).
+
+### Preferred command order
+
+- Content search: `rg` first, then `ast-grep` for structural/language-aware matching
+- File discovery: `fd` first, then `rg --files` as a fallback
+- JSON config inspection: `jq`
+- YAML/TOML inspection: `yq`
+- HTTP/API smoke checks: `http` / `https` (HTTPie)
+- Task orchestration: `just` recipes when a `justfile` exists
+- Diff/review: `difft` for syntax-aware diffs, `diff` for plain text diffs
+- Performance comparisons: `hyperfine` for repeatable timing
+
+### Avoid in autonomous runs
+
+- Avoid interactive-only flows (for example `fzf` prompts) unless the user explicitly asks for interactive selection
+- Avoid destructive git/file operations unless the user explicitly approves them
+- Avoid long-running watch commands by default; use one-shot checks first, then switch to watch mode only when requested
+- Avoid invoking `pre-commit run --all-files` on very large repos when a targeted path or hook is enough for the task
+
+---
+
 ## 1. Authoritative Tools & Source of Truth
 
 ### Python

diff --git a/core/job_queue.py b/core/job_queue.py
@@ -0,0 +1,90 @@
+"""Simple in-memory job store for long-running RAG web operations.
+
+Jobs run in background threads. Route handlers poll for status via HTMX
+(`hx-trigger="every 2s"`). The job status endpoint stops including the
+polling trigger once the job reaches a terminal state (done or error).
+"""
+
+from __future__ import annotations
+
+import threading
+import time
+import uuid
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+    from collections.abc import Callable
+
+
+class Job:
+    """In-memory representation of a background job."""
+
+    __slots__ = ("error", "finished_at", "id", "result", "started_at", "status")
+
+    def __init__(self, job_id: str) -> None:
+        self.id = job_id
+        self.status: str = "pending"
+        self.result: Any = None
+        self.error: str | None = None
+        self.started_at: float = time.monotonic()
+        self.finished_at: float | None = None
+
+    def to_dict(self) -> dict[str, Any]:
+        elapsed = round((self.finished_at or time.monotonic()) - self.started_at, 2)
+        return {
+            "id": self.id,
+            "status": self.status,
+            "result": self.result,
+            "error": self.error,
+            "elapsed_s": elapsed,
+        }
+
+
+class JobStore:
+    """Thread-safe store for background jobs."""
+
+    MAX_JOBS: int = 50
+
+    def __init__(self) -> None:
+        self._jobs: dict[str, Job] = {}
+        self._lock = threading.Lock()
+
+    def submit(self, fn: Callable[..., Any], *args: object, **kwargs: object) -> str:
+        """Submit a callable as a background job; returns a job_id immediately."""
+        job_id = uuid.uuid4().hex[:12]
+        job = Job(job_id)
+        with self._lock:
+            self._evict_old()
+            if len(self._jobs) >= self.MAX_JOBS:
+                msg = f"Job store is full ({self.MAX_JOBS} active jobs); please wait for a job to finish"
+                raise RuntimeError(msg)
+            self._jobs[job_id] = job
+
+        def _run() -> None:
+            job.status = "running"
+            try:
+                job.result = fn(*args, **kwargs)
+                job.status = "done"
+            except Exception as exc:
+                job.error = str(exc)
+                job.status = "error"
+            finally:
+                job.finished_at = time.monotonic()
+
+        threading.Thread(target=_run, daemon=True).start()
+        return job_id
+
+    def get(self, job_id: str) -> dict[str, Any] | None:
+        """Return job state dict, or None if job_id is unknown."""
+        with self._lock:
+            job = self._jobs.get(job_id)
+        return job.to_dict() if job else None
+
+    def _evict_old(self) -> None:
+        """Remove oldest finished jobs when over the cap (called under lock)."""
+        if len(self._jobs) <= self.MAX_JOBS:
+            return
+        finished = [j for j in self._jobs.values() if j.status in {"done", "error"}]
+        finished.sort(key=lambda j: j.finished_at or 0)
+        for j in finished[: len(self._jobs) - self.MAX_JOBS]:
+            del self._jobs[j.id]
diff --git a/core/preset_profiles.py b/core/preset_profiles.py
@@ -0,0 +1,88 @@
+"""Saveable preset profiles for runtime retrieval settings."""
+
+from __future__ import annotations
+
+import dataclasses
+import json
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+    from core.config import ConversationRuntimeConfig
+
+PROFILE_FIELDS: list[str] = [
+    "use_mmr",
+    "rag_rerank_enabled",
+    "rag_sentence_compression_enabled",
+    "rag_multi_query_enabled",
+    "rag_k",
+    "rag_k_mes",
+    "debug_context",
+]
+
+
+class ProfileStore:
+    """Persist and apply named retrieval-setting presets stored in a JSON file."""
+
+    def __init__(self, path: Path) -> None:
+        self._path = path
+
+    def _load(self) -> dict[str, dict[str, object]]:
+        if not self._path.exists():
+            return {}
+        try:
+            data = json.loads(self._path.read_text(encoding="utf-8"))
+            return data if isinstance(data, dict) else {}
+        except Exception:
+            return {}
+
+    def _save(self, data: dict[str, dict[str, object]]) -> None:
+        self._path.parent.mkdir(parents=True, exist_ok=True)
+        self._path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
+
+    def list_profiles(self) -> list[str]:
+        """Return sorted list of saved profile names."""
+        return sorted(self._load().keys())
+
+    def save_profile(self, name: str, config: ConversationRuntimeConfig) -> None:
+        """Snapshot the profile-eligible fields from *config* under *name*."""
+        data = self._load()
+        data[name] = {field: getattr(config, field) for field in PROFILE_FIELDS}
+        self._save(data)
+
+    def get_profile(self, name: str) -> dict[str, object]:
+        """Return the stored settings dict for *name*."""
+        data = self._load()
+        if name not in data:
+            msg = f"Profile {name!r} not found"
+            raise KeyError(msg)
+        return dict(data[name])
+
+    def apply_profile(
+        self, name: str, config: ConversationRuntimeConfig
+    ) -> tuple[ConversationRuntimeConfig, list[str]]:
+        """Return a new config with profile values applied and list of changed field names."""
+        profile = self.get_profile(name)
+        validated_updates: dict[str, object] = {}
+        changed: list[str] = []
+        for field, value in profile.items():
+            if field not in PROFILE_FIELDS:
+                continue
+            current = getattr(config, field, None)
+            if current != value:
+                validated_updates[field] = value
+                changed.append(field)
+        if validated_updates:
+            config = dataclasses.replace(config, **validated_updates)
+        return config, changed
+
+    def delete_profile(self, name: str) -> None:
+        """Remove *name* from the store (no-op if not found)."""
+        data = self._load()
+        data.pop(name, None)
+        self._save(data)
+
+    def current_values(self, config: ConversationRuntimeConfig) -> dict[str, object]:
+        """Return current values of the profile-eligible fields from *config*."""
+        return {field: getattr(config, field) for field in PROFILE_FIELDS}