diff --git a/README.md b/README.md index db5b1673..b7b5c931 100644 --- a/README.md +++ b/README.md @@ -275,9 +275,10 @@ mcp: Full schema: [docs/reference/config.md](docs/reference/config.md). -Alternative config presets: `config.ollama.example.yml` (local Ollama), -`config.claude_code.example.yml` (Claude Code CLI), `config.codex.example.yml`, -`config.hermes.example.yml`, `config.openclaw.example.yml`. +Ready-made presets live in [`configs/`](configs/README.md): LLM-provider presets under +`configs/llm/` (Ollama, Claude Code, Codex, Hermes, OpenClaw, OpenRouter-free) and +embedding/retrieval presets under `configs/embedders/` (bge-m3, openai-large, specter2, …). +Point at one with `-c`, e.g. `uv run perspicacite -c configs/embedders/bge_m3.yml serve`. --- diff --git a/configs/README.md b/configs/README.md new file mode 100644 index 00000000..26f85ee6 --- /dev/null +++ b/configs/README.md @@ -0,0 +1,53 @@ +# Configuration presets + +The canonical, fully-documented template is **[`../config.example.yml`](../config.example.yml)**. +Copy it to `config.yml` (git-ignored) and edit: + +```bash +cp config.example.yml config.yml +``` + +`config.yml` in the repo root is the default the CLI loads when you don't pass `-c`. +The files here are ready-made starting points for specific LLM providers or embedding +backends — copy one over `config.yml`, or point at it directly: + +```bash +uv run perspicacite -c configs/embedders/openai_large.yml serve +``` + +## `llm/` — LLM provider presets + +Swap the chat/synthesis backend. Each sets `llm.*` for one provider; embedding defaults +to the open local `all-MiniLM-L6-v2`. + +| Preset | Backend | +|--------|---------| +| [`llm/claude_code.yml`](llm/claude_code.yml) | Claude Code subscription (CLI auth) | +| [`llm/codex.yml`](llm/codex.yml) | OpenAI Codex CLI subscription | +| [`llm/hermes.yml`](llm/hermes.yml) | Hermes Agent (Nous Research) | +| [`llm/ollama.yml`](llm/ollama.yml) | Local-only / zero cloud cost (Ollama) | +| [`llm/openclaw.yml`](llm/openclaw.yml) | OpenClaw agent | +| [`llm/openrouter-free.yml`](llm/openrouter-free.yml) | OpenRouter free tier | + +## `embedders/` — embedding / retrieval presets + +Swap the KB embedding model (and matching reranker). See +[`../docs/embedding-models.md`](../docs/embedding-models.md) for the benchmark table. +A KB must be **rebuilt** when you change its embedding model. + +| Preset | Embedding model | Notes | +|--------|-----------------|-------| +| [`embedders/bge_m3.yml`](embedders/bge_m3.yml) | `BAAI/bge-m3` | Production biomedical (recommended) | +| [`embedders/openai_large.yml`](embedders/openai_large.yml) | `text-embedding-3-large` | Cross-domain, best generalisation | +| [`embedders/specter2.yml`](embedders/specter2.yml) | `allenai/specter2_base` | Scientific-paper embeddings | +| [`embedders/pubmedbert.yml`](embedders/pubmedbert.yml) | `pritamdeka/S-PubMedBert-MS-MARCO` | Biomedical | +| [`embedders/neuml_pubmedbert.yml`](embedders/neuml_pubmedbert.yml) | `NeuML/pubmedbert-base-embeddings` | Biomedical (NeuML) | +| [`embedders/biomedbert.yml`](embedders/biomedbert.yml) | `microsoft/BiomedNLP-BiomedBERT-…` | Biomedical (Microsoft) | +| [`embedders/bge_en_icl.yml`](embedders/bge_en_icl.yml) | `BAAI/bge-en-icl` | In-context-learning embeddings | +| [`embedders/gte_qwen2_7b.yml`](embedders/gte_qwen2_7b.yml) | `Alibaba-NLP/gte-Qwen2-7B-instruct` | Large instruct embedder | +| [`embedders/stella_1_5b.yml`](embedders/stella_1_5b.yml) | `dunzhang/stella_en_1.5B_v5` | Compact high-quality embedder | +| [`embedders/qwen3_14b.yml`](embedders/qwen3_14b.yml) | `text-embedding-3-large` | Qwen3-14B chat + OpenAI embeddings | +| [`embedders/code_kb.yml`](embedders/code_kb.yml) | `mistralai/codestral-embed-2505` | Code knowledge bases | + +Every preset here is parse-validated against the config schema by +`tests/integration/test_config_audit.py`. diff --git a/config_bge_en_icl.yml b/configs/embedders/bge_en_icl.yml similarity index 97% rename from config_bge_en_icl.yml rename to configs/embedders/bge_en_icl.yml index 9821831a..8ddb0485 100644 --- a/config_bge_en_icl.yml +++ b/configs/embedders/bge_en_icl.yml @@ -21,7 +21,7 @@ # Reranker: bge-reranker-v2-m3 pairs well with all BGE embeddings. # # Launch: -# uv run perspicacite -c config_bge_en_icl.yml serve +# uv run perspicacite -c configs/embedders/bge_en_icl.yml serve version: "2.0.0" config_name: "bge-en-icl-port8005" diff --git a/config_bge_m3.yml b/configs/embedders/bge_m3.yml similarity index 94% rename from config_bge_m3.yml rename to configs/embedders/bge_m3.yml index 7defd54c..99315370 100644 --- a/config_bge_m3.yml +++ b/configs/embedders/bge_m3.yml @@ -20,7 +20,7 @@ # Full benchmark: perspicacite-eval/docs/retrieval_benchmark_2026_05_26.md # # Launch: -# uv run perspicacite -c config_bge_m3.yml serve +# uv run perspicacite -c configs/embedders/bge_m3.yml serve version: "2.0.0" config_name: "bge-m3-port8004" @@ -69,7 +69,7 @@ rag_modes: # bge-reranker-v2-m3: domain-aware pair for bge-m3 embeddings. # Key finding: bge-reranker HELPS weaker/domain embeddings (bge-m3, MiniLM, PubMedBERT) # but HURTS strong embeddings (OpenAI 3-large: −2.1 pp vs ms-marco). - # Do not swap to ms-marco in this config — use config_openai_large.yml for OpenAI. + # Do not swap to ms-marco in this config — use configs/embedders/openai_large.yml for OpenAI. reranker_model: "BAAI/bge-reranker-v2-m3" basic: diff --git a/config_biomedbert.yml b/configs/embedders/biomedbert.yml similarity index 96% rename from config_biomedbert.yml rename to configs/embedders/biomedbert.yml index 88a4ffdc..99853272 100644 --- a/config_biomedbert.yml +++ b/configs/embedders/biomedbert.yml @@ -12,7 +12,7 @@ # No API key required (fully local via sentence-transformers). # # Launch: -# uv run perspicacite -c config_biomedbert.yml serve +# uv run perspicacite -c configs/embedders/biomedbert.yml serve version: "2.0.0" config_name: "biomedbert-port8006" diff --git a/config_code_kb.yml b/configs/embedders/code_kb.yml similarity index 99% rename from config_code_kb.yml rename to configs/embedders/code_kb.yml index f65323aa..20031367 100644 --- a/config_code_kb.yml +++ b/configs/embedders/code_kb.yml @@ -22,7 +22,7 @@ # Costs: codestral-embed billed per OpenRouter pricing (new docs only; queries cheap). # # Launch: -# OPENROUTER_API_KEY=$OPENROUTER_API_KEY uv run perspicacite -c config_code_kb.yml serve +# OPENROUTER_API_KEY=$OPENROUTER_API_KEY uv run perspicacite -c configs/embedders/code_kb.yml serve version: "2.0.0" config_name: "code-kb-codestral-port8003" diff --git a/config_gte_qwen2_7b.yml b/configs/embedders/gte_qwen2_7b.yml similarity index 96% rename from config_gte_qwen2_7b.yml rename to configs/embedders/gte_qwen2_7b.yml index 7401c788..d5e8e99b 100644 --- a/config_gte_qwen2_7b.yml +++ b/configs/embedders/gte_qwen2_7b.yml @@ -18,7 +18,7 @@ # Note: requires `st:` prefix since Alibaba-NLP/ is not in auto-detected namespaces. # # Launch: -# uv run perspicacite -c config_gte_qwen2_7b.yml serve +# uv run perspicacite -c configs/embedders/gte_qwen2_7b.yml serve version: "2.0.0" config_name: "gte-qwen2-7b-port8007" diff --git a/config_neuml_pubmedbert.yml b/configs/embedders/neuml_pubmedbert.yml similarity index 96% rename from config_neuml_pubmedbert.yml rename to configs/embedders/neuml_pubmedbert.yml index e0dea996..226250b9 100644 --- a/config_neuml_pubmedbert.yml +++ b/configs/embedders/neuml_pubmedbert.yml @@ -12,7 +12,7 @@ # No API key required (fully local via sentence-transformers). # # Launch: -# uv run perspicacite -c config_neuml_pubmedbert.yml serve +# uv run perspicacite -c configs/embedders/neuml_pubmedbert.yml serve version: "2.0.0" config_name: "neuml-pubmedbert-port8007" diff --git a/config_openai_large.yml b/configs/embedders/openai_large.yml similarity index 99% rename from config_openai_large.yml rename to configs/embedders/openai_large.yml index 99745595..e82b8807 100644 --- a/config_openai_large.yml +++ b/configs/embedders/openai_large.yml @@ -20,7 +20,7 @@ # Full benchmark: perspicacite-eval/docs/retrieval_benchmark_2026_05_26.md # # Launch: -# OPENAI_API_KEY=$OPENAI_API_KEY uv run perspicacite -c config_openai_large.yml serve +# OPENAI_API_KEY=$OPENAI_API_KEY uv run perspicacite -c configs/embedders/openai_large.yml serve version: "2.0.0" config_name: "openai-large-port8002" diff --git a/config_pubmedbert.yml b/configs/embedders/pubmedbert.yml similarity index 96% rename from config_pubmedbert.yml rename to configs/embedders/pubmedbert.yml index 4e90605a..289bbc34 100644 --- a/config_pubmedbert.yml +++ b/configs/embedders/pubmedbert.yml @@ -11,7 +11,7 @@ # No API key required (fully local via sentence-transformers). # # Launch: -# uv run perspicacite -c config_pubmedbert.yml serve +# uv run perspicacite -c configs/embedders/pubmedbert.yml serve version: "2.0.0" config_name: "pubmedbert-port8005" diff --git a/config_qwen3_14b.yml b/configs/embedders/qwen3_14b.yml similarity index 98% rename from config_qwen3_14b.yml rename to configs/embedders/qwen3_14b.yml index c07469ab..82a2262f 100644 --- a/config_qwen3_14b.yml +++ b/configs/embedders/qwen3_14b.yml @@ -8,7 +8,7 @@ # /opt/homebrew/opt/ollama/bin/ollama serve (or brew services start ollama) # # Launch: -# OPENAI_API_KEY=$OPENAI_API_KEY uv run perspicacite -c config_qwen3_14b.yml serve +# OPENAI_API_KEY=$OPENAI_API_KEY uv run perspicacite -c configs/embedders/qwen3_14b.yml serve # # Thinking mode: Qwen3 supports /think ... /no_think tokens. # Set QWEN3_NO_THINK=1 to prepend /no_think to all prompts (faster, less depth). diff --git a/config_specter2.yml b/configs/embedders/specter2.yml similarity index 97% rename from config_specter2.yml rename to configs/embedders/specter2.yml index 703b74bb..9ba0b003 100644 --- a/config_specter2.yml +++ b/configs/embedders/specter2.yml @@ -9,7 +9,7 @@ # 0.7 default filters everything → 0 results) # Launch: # TRANSFORMERS_OFFLINE=1 HF_DATASETS_OFFLINE=1 \ -# uv run perspicacite -c config_specter2.yml serve +# uv run perspicacite -c configs/embedders/specter2.yml serve # # TRANSFORMERS_OFFLINE/HF_DATASETS_OFFLINE prevent the model from trying # to re-fetch on every startup (we got hit by HF 429s in the previous diff --git a/config_stella_1_5b.yml b/configs/embedders/stella_1_5b.yml similarity index 97% rename from config_stella_1_5b.yml rename to configs/embedders/stella_1_5b.yml index 043a8c30..c8f3125c 100644 --- a/config_stella_1_5b.yml +++ b/configs/embedders/stella_1_5b.yml @@ -19,7 +19,7 @@ # in the auto-detected namespace list; prefix stripped before loading. # # Launch: -# uv run perspicacite -c config_stella_1_5b.yml serve +# uv run perspicacite -c configs/embedders/stella_1_5b.yml serve version: "2.0.0" config_name: "stella-1.5b-port8006" diff --git a/config.claude_code.example.yml b/configs/llm/claude_code.yml similarity index 98% rename from config.claude_code.example.yml rename to configs/llm/claude_code.yml index b1114e0a..0dbe7d78 100644 --- a/config.claude_code.example.yml +++ b/configs/llm/claude_code.yml @@ -9,7 +9,7 @@ # 2. Sign in (one-time): `claude login` # 3. Verify the CLI works: `echo "say hi" | claude -p --model haiku` # 4. Use this file as your config: -# perspicacite -c config.claude_code.example.yml serve +# perspicacite -c configs/llm/claude_code.yml serve # # **Caveat — shared rate limits.** Perspicacité shares your # interactive Claude Code rate window. A heavy ingest can freeze diff --git a/config.codex.example.yml b/configs/llm/codex.yml similarity index 98% rename from config.codex.example.yml rename to configs/llm/codex.yml index a9ffe194..f42a0b72 100644 --- a/config.codex.example.yml +++ b/configs/llm/codex.yml @@ -14,7 +14,7 @@ # 2. Sign in: `codex login` (browser-based ChatGPT auth) # 3. Verify: `echo "say hi" | codex exec --skip-git-repo-check` # 4. Use this file as your config: -# perspicacite -c config.codex.example.yml serve +# perspicacite -c configs/llm/codex.yml serve # # **Caveat — Codex is an agent, not a pure completion endpoint.** # Each call spins up Codex's full session machinery (sandbox, tool diff --git a/config.hermes.example.yml b/configs/llm/hermes.yml similarity index 96% rename from config.hermes.example.yml rename to configs/llm/hermes.yml index 476607a0..c8bdf7f3 100644 --- a/config.hermes.example.yml +++ b/configs/llm/hermes.yml @@ -19,13 +19,13 @@ # 2. Configure: `hermes setup` # 3. Verify: `hermes` (interactive) or your version's one-shot flag # 4. Use this file as your config: -# perspicacite -c config.hermes.example.yml serve +# perspicacite -c configs/llm/hermes.yml serve # # **Note:** If Hermes doesn't ship a one-shot completion mode in # your version, the alternative is to run Hermes models directly # via Ollama (the Hermes family is published on Ollama as e.g. # `hermes-3:70b`) — that's a fully supported path today via -# config.ollama.example.yml. +# configs/llm/ollama.yml. llm: default_provider: "agent_cli" diff --git a/config.ollama.example.yml b/configs/llm/ollama.yml similarity index 98% rename from config.ollama.example.yml rename to configs/llm/ollama.yml index bf0dbe8e..072474a1 100644 --- a/config.ollama.example.yml +++ b/configs/llm/ollama.yml @@ -13,7 +13,7 @@ # 3. Start the Ollama server (auto-starts on macOS once installed): # ollama serve # 4. Use this file as your config: -# perspicacite -c config.ollama.example.yml serve +# perspicacite -c configs/llm/ollama.yml serve # # **Quality vs hardware tradeoffs** # - 70B models need ~40 GB RAM. Worth it for synthesis quality on diff --git a/config.openclaw.example.yml b/configs/llm/openclaw.yml similarity index 97% rename from config.openclaw.example.yml rename to configs/llm/openclaw.yml index 7c9e8623..27c5ed73 100644 --- a/config.openclaw.example.yml +++ b/configs/llm/openclaw.yml @@ -19,7 +19,7 @@ # 2. Ensure the gateway is running: `openclaw onboard --install-daemon` # 3. Verify: `openclaw agent --message "say hi"` # 4. Use this file as your config: -# perspicacite -c config.openclaw.example.yml serve +# perspicacite -c configs/llm/openclaw.yml serve # # Same caveats as the other agent-CLI presets: no prompt caching, no # per-call temperature/max_tokens, output buffered (no streaming). diff --git a/config.example.openrouter-free.yml b/configs/llm/openrouter-free.yml similarity index 99% rename from config.example.openrouter-free.yml rename to configs/llm/openrouter-free.yml index 4d6e7831..328b508d 100644 --- a/config.example.openrouter-free.yml +++ b/configs/llm/openrouter-free.yml @@ -9,7 +9,7 @@ # 3. Add to your shell profile (~/.zshrc or ~/.bashrc): # export OPENROUTER_API_KEY="sk-or-v1-..." # 4. Copy this file: -# cp config.example.openrouter-free.yml config.yml +# cp configs/llm/openrouter-free.yml config.yml # 5. Start the server: # source ~/.zshrc && uv run perspicacite -c config.yml serve # diff --git a/docs/agent-cli-caveats.md b/docs/agent-cli-caveats.md index 77fa03e2..e3f05aab 100644 --- a/docs/agent-cli-caveats.md +++ b/docs/agent-cli-caveats.md @@ -5,10 +5,10 @@ subprocess-based LLM routing path). Captured from live testing during the May 2026 rollout. Keep this in sync as upstream CLIs evolve. See also: -- [`config.claude_code.example.yml`](../config.claude_code.example.yml) -- [`config.codex.example.yml`](../config.codex.example.yml) -- [`config.openclaw.example.yml`](../config.openclaw.example.yml) -- [`config.hermes.example.yml`](../config.hermes.example.yml) +- [`configs/llm/claude_code.yml`](../configs/llm/claude_code.yml) +- [`configs/llm/codex.yml`](../configs/llm/codex.yml) +- [`configs/llm/openclaw.yml`](../configs/llm/openclaw.yml) +- [`configs/llm/hermes.yml`](../configs/llm/hermes.yml) - [`src/perspicacite/llm/agent_cli.py`](../src/perspicacite/llm/agent_cli.py) ## What "agent CLI" routing means @@ -170,7 +170,7 @@ installed version). `hermes setup`), not by a CLI flag. - **Simpler alternative for Hermes models:** the Hermes family is published on Ollama as `hermes-3:70b` etc. Use - [`config.ollama.example.yml`](../config.ollama.example.yml) with + [`configs/llm/ollama.yml`](../configs/llm/ollama.yml) with `default_model: "hermes-3:70b"` — fully supported today, no CLI dependency. diff --git a/docs/embedding-models.md b/docs/embedding-models.md index b74f369e..329b1f41 100644 --- a/docs/embedding-models.md +++ b/docs/embedding-models.md @@ -12,8 +12,8 @@ Full benchmark data: [perspicacite-eval/docs/retrieval_benchmark_2026_05_26.md]( | Config file | Embedding | Dims | Reranker | NDCG@10 (SciFact) | Use when | |---|---|---|---|---|---| | `config.yml` | all-MiniLM-L6-v2 | 384 | ms-marco-MiniLM-L-12-v2 | **0.851** | Dev, resource-constrained, fast setup | -| `config_bge_m3.yml` | BAAI/bge-m3 | 1024 | bge-reranker-v2-m3 | **0.879** | Production biomedical (recommended) | -| `config_openai_large.yml` | text-embedding-3-large | 3072 | ms-marco-MiniLM-L-12-v2 | **0.872** | Cross-domain, best generalisation | +| `configs/embedders/bge_m3.yml` | BAAI/bge-m3 | 1024 | bge-reranker-v2-m3 | **0.879** | Production biomedical (recommended) | +| `configs/embedders/openai_large.yml` | text-embedding-3-large | 3072 | ms-marco-MiniLM-L-12-v2 | **0.872** | Cross-domain, best generalisation | --- diff --git a/docs/guides/embedding-and-rag-configuration.md b/docs/guides/embedding-and-rag-configuration.md index e8660d12..747fd442 100644 --- a/docs/guides/embedding-and-rag-configuration.md +++ b/docs/guides/embedding-and-rag-configuration.md @@ -82,7 +82,7 @@ llm: ## Tier 2 — OpenAI: best accuracy, cloud cost -**Config file:** `config_openai_large.yml` +**Config file:** `configs/embedders/openai_large.yml` ### Model `text-embedding-3-large` — 3 072-dim, OpenAI API, ~$0.13 per million tokens. @@ -102,7 +102,7 @@ Gain over MiniLM baseline: **+12 pp NDCG@10 (no rerank), +2 pp with CE reranker* export OPENAI_API_KEY="sk-..." # Using the dedicated config (port 8002 by default) -uv run perspicacite -c config_openai_large.yml serve +uv run perspicacite -c configs/embedders/openai_large.yml serve ``` Key settings: @@ -125,7 +125,7 @@ You can run both servers simultaneously (they share `chroma_db/` but use differe uv run perspicacite -c config.yml serve # Terminal 2 — OpenAI on :8002 -OPENAI_API_KEY=$OPENAI_API_KEY uv run perspicacite -c config_openai_large.yml serve +OPENAI_API_KEY=$OPENAI_API_KEY uv run perspicacite -c configs/embedders/openai_large.yml serve ``` Each server uses its own KB (`scifact_abstracts` for MiniLM, `scifact_openai_large` @@ -136,7 +136,7 @@ for OpenAI) and embeds queries with the matching model. See ## Tier 3a — Biomedical local: best life-science accuracy -**Config file:** `config_pubmedbert.yml` +**Config file:** `configs/embedders/pubmedbert.yml` ### Model `pritamdeka/S-PubMedBert-MS-MARCO` — 768-dim, PubMedBERT fine-tuned for retrieval on @@ -157,14 +157,14 @@ domain-adapted retrieval model with a powerful cross-encoder reranker. ### Launch ```bash -uv run perspicacite -c config_pubmedbert.yml serve +uv run perspicacite -c configs/embedders/pubmedbert.yml serve # Model auto-downloads from HuggingFace on first run (~440 MB) ``` For offline environments (after first download): ```bash TRANSFORMERS_OFFLINE=1 HF_DATASETS_OFFLINE=1 \ - uv run perspicacite -c config_pubmedbert.yml serve + uv run perspicacite -c configs/embedders/pubmedbert.yml serve ``` Key settings: @@ -187,7 +187,7 @@ rag_modes: ## Tier 3b — General local SOTA -**Config file:** `config_bge_m3.yml` +**Config file:** `configs/embedders/bge_m3.yml` ### Model `BAAI/bge-m3` — 1 024-dim, multilingual MTEB SOTA retrieval model. ~2.3 GB. @@ -208,7 +208,7 @@ knowledge_base: GPU launch: ```bash # If you have a CUDA GPU, sentence-transformers will use it automatically -uv run perspicacite -c config_bge_m3.yml serve +uv run perspicacite -c configs/embedders/bge_m3.yml serve ``` --- @@ -227,11 +227,11 @@ uv run perspicacite -c config.yml serve & # Port 8001 — SPECTER2 (scientific citation context) TRANSFORMERS_OFFLINE=1 HF_DATASETS_OFFLINE=1 \ - uv run perspicacite -c config_specter2.yml serve & + uv run perspicacite -c configs/embedders/specter2.yml serve & # Port 8002 — OpenAI 3-large (highest accuracy, paid) OPENAI_API_KEY=$OPENAI_API_KEY \ - uv run perspicacite -c config_openai_large.yml serve & + uv run perspicacite -c configs/embedders/openai_large.yml serve & ``` ### Ingest the same corpus into each KB @@ -338,7 +338,7 @@ llm: timeout: 300 # 14B can be slow for long answers ``` -See `config_qwen3_14b.yml` for a complete example. +See `configs/embedders/qwen3_14b.yml` for a complete example. **Thinking mode (Qwen3):** Qwen3 supports `/think` and `/no_think` tokens. The server inserts these based on mode complexity. Set `QWEN3_NO_THINK=1` env var to always @@ -364,10 +364,10 @@ RAM: ~300 MB. Works on any machine with internet for LLM calls. ```bash # Port 8005 — PubMedBERT + bge-reranker + local Qwen3 # First run: downloads ~2.6 GB of models -uv run perspicacite -c config_pubmedbert.yml serve +uv run perspicacite -c configs/embedders/pubmedbert.yml serve # With local LLM (Ollama): -# Edit config_pubmedbert.yml: llm.default_provider = "ollama", default_model = "qwen3:14b" +# Edit configs/embedders/pubmedbert.yml: llm.default_provider = "ollama", default_model = "qwen3:14b" ``` RAM: ~3 GB (PubMedBERT + bge-reranker + Qwen3 8B) or ~11 GB (Qwen3 14B). @@ -383,8 +383,8 @@ share the same server. ```bash # OpenAI 3-large + bge-reranker + Claude Opus OPENAI_API_KEY=$OPENAI_API_KEY ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY \ - uv run perspicacite -c config_openai_large.yml serve -# Edit config_openai_large.yml: + uv run perspicacite -c configs/embedders/openai_large.yml serve +# Edit configs/embedders/openai_large.yml: # llm.default_provider = "anthropic" # llm.default_model = "claude-opus-4-5" # rag_modes.reranker_model = "BAAI/bge-reranker-v2-m3" diff --git a/docs/recipe-book-2026-05-15.md b/docs/recipe-book-2026-05-15.md index 1fea59dc..b593c4f8 100644 --- a/docs/recipe-book-2026-05-15.md +++ b/docs/recipe-book-2026-05-15.md @@ -10,7 +10,7 @@ documents *intent* — "what should I run when I want to ...?" > **Prereqs:** `pip install -e .`, `perspicacite serve` is reachable > at `http://localhost:8000` for the recipes that need it, and a > `.env` with at least one `*_API_KEY` is in place. See -> `config/config.example.yml` for the canonical config layout. +> `config.example.yml` for the canonical config layout. --- diff --git a/tests/integration/test_config_audit.py b/tests/integration/test_config_audit.py index 4a0fa5ba..4c189a63 100644 --- a/tests/integration/test_config_audit.py +++ b/tests/integration/test_config_audit.py @@ -1,6 +1,6 @@ """Config loading audit — Wave 1.4 of framework-hardening roadmap. -Verifies every config.*.example.yml at repo root parses cleanly, +Verifies every shipped preset (config.example.yml + configs/) parses cleanly, stage-resolution fall-through behaves correctly, backward compat is preserved, and the new agent_cli LLMProviderConfig fields parse. @@ -78,15 +78,16 @@ class _Cfg: # --------------------------------------------------------------------------- _YAML_PRESETS = sorted( - glob.glob(str(REPO_ROOT / "config.*.example.yml")) - + glob.glob(str(REPO_ROOT / "config.example.yml")) + glob.glob(str(REPO_ROOT / "config.example.yml")) + + glob.glob(str(REPO_ROOT / "configs" / "llm" / "*.yml")) + + glob.glob(str(REPO_ROOT / "configs" / "embedders" / "*.yml")) ) @pytest.mark.config @pytest.mark.parametrize("yaml_path", _YAML_PRESETS, ids=lambda p: Path(p).name) def test_yaml_preset_parses(yaml_path: str) -> None: - """Every config.*.example.yml parses cleanly into Config.""" + """Every shipped preset (config.example.yml + configs/llm + configs/embedders) parses cleanly into Config.""" with open(yaml_path) as fh: data = yaml.safe_load(fh) assert isinstance(data, dict), f"{yaml_path} did not parse to a dict" diff --git a/tests/integration/test_provider_matrix.py b/tests/integration/test_provider_matrix.py index 21c5865c..3de43c66 100644 --- a/tests/integration/test_provider_matrix.py +++ b/tests/integration/test_provider_matrix.py @@ -172,7 +172,7 @@ def _make_config_for(provider: str, model: str, **kwargs: Any) -> LLMConfig: timeout=180, max_retries=1, ), - # Codex preset — mirrors config.codex.example.yml. Verified live + # Codex preset — mirrors configs/llm/codex.yml. Verified live # in commit 7f1e7d7 (~16 s round-trip on this machine). "agent_cli": LLMProviderConfig( executable="codex",