diff --git a/README.md b/README.md index 0ef8221..b80f940 100644 --- a/README.md +++ b/README.md @@ -13,10 +13,11 @@ Character-focused local chatbot with RAG support (ChromaDB + LangChain), CLI and ## What It Includes - Local chat runtime backed by `llama-cpp-python` -- Character-card-driven prompting (`cards/*.json`) +- Character-card-driven prompting (`cards/*.json`) with avatar display - RAG retrieval from ChromaDB collections - Dynamic context budgeting and history management - GPU offload auto-layer calculation and KV cache quant support +- Web UI (FastAPI + Jinja2 + HTMX): chat, session management, RAG management, diagnostics - Scripted workflows for analyzing, pushing, and managing RAG data ## Current Runtime Entry Points @@ -75,9 +76,20 @@ Notes for web chat behavior: - Shows status updates (`Ready`, `Sending`, `Thinking`, `Streaming`, `Timed out`). - Applies a stream timeout and surfaces a `Retry` button on stream failure. -- Supports named session save + explicit session picker load in the sidebar. -- Shows both latest retrieval debug stats and per-turn retrieval trace history. -- Provides quick actions for copy/export and command-equivalent controls (`clear`, `reload`, `help`). +- Sidebar has three tabs: **Character** (avatar + card info), **Sessions** (save/load/search), **Debug** (per-turn retrieval trace + diagnostics). +- Named session save/load and full-text session search with character and date filters. +- Token budget bar in the Diagnostics tab shows real-time context-window allocation (system / history / RAG / examples / input / reserved / free). +- Per-turn stats: estimated prompt and completion tokens, context window fill %, RAG chunks used. +- Quick actions for copy/export (TXT, JSON, ZIP bundle) and command-equivalent controls (`clear`, `reload`, `help`). +- Saveable preset profiles for retrieval settings (MMR, rerank, multi-query, k values). + +RAG management UI at **`/rag`** (link in the chat sidebar): + +- Upload new source files (`.txt`) and create ChromaDB collections directly from the browser. +- View, lint, and run coverage analysis on `rag_data/` files. +- List, query, rebuild, and delete collections. +- Run fixture evaluations and view retrieval trend history. +- View embedding benchmark results. ## Setup diff --git a/cards/Shodan-specV2.jpg b/cards/Shodan-specV2.jpg new file mode 100644 index 0000000..eb3c3b5 Binary files /dev/null and b/cards/Shodan-specV2.jpg differ diff --git a/core/conversation_manager.py b/core/conversation_manager.py index 6673e48..9dcc009 100644 --- a/core/conversation_manager.py +++ b/core/conversation_manager.py @@ -143,6 +143,17 @@ def __init__(self) -> None: "mes": {"mode": "unknown", "returned": 0, "candidates": 0, "queries": 0, "rerank_applied": False}, "cleanup": {"main": 0, "mes": 0, "cross_removed": 0}, } + self.last_token_budget: dict[str, int] = { + "system_prompt_tokens": 0, + "history_tokens": 0, + "rag_tokens": 0, + "examples_tokens": 0, + "input_tokens": 0, + "total_estimated": 0, + "context_window": 0, + "available_for_context": 0, + "reserved_for_response": 0, + } self._vector_client: object | None = None self._vector_embedder: object | None = None self._cross_encoder: object | None = None diff --git a/core/conversation_prompt_history_mixin.py b/core/conversation_prompt_history_mixin.py index 8720927..6bbde69 100644 --- a/core/conversation_prompt_history_mixin.py +++ b/core/conversation_prompt_history_mixin.py @@ -186,6 +186,18 @@ def _prepare_dynamic_vector_context(self, message: str, mes_example: str) -> tup vector_context = str(allocation["allocated_context"]) allocated_history = str(allocation["allocated_history"]) + self.last_token_budget = { + "system_prompt_tokens": budget.system_prompt_tokens, + "history_tokens": int(allocation["history_tokens"]), + "rag_tokens": int(allocation["context_tokens"]), + "examples_tokens": int(allocation["examples_tokens"]), + "input_tokens": int(allocation["input_tokens"]), + "total_estimated": int(allocation["total_allocated"]) + budget.system_prompt_tokens, + "context_window": budget.total_context, + "available_for_context": budget.available_for_context, + "reserved_for_response": budget.reserved_for_response, + } + if self.runtime_config.debug_context: logger.debug(self.context_manager.get_context_info(budget, allocation)) diff --git a/core/rag_manager.py b/core/rag_manager.py index dc0be54..f208a76 100644 --- a/core/rag_manager.py +++ b/core/rag_manager.py @@ -204,6 +204,36 @@ def file_content(config: RagScriptConfig, filename: str) -> str | None: return candidate.read_text(encoding="utf-8") +def save_rag_file(config: RagScriptConfig, stem: str, content: bytes) -> dict[str, Any]: + """Save *content* as ``{stem}.txt`` in the rag_data directory. + + Raises ``ValueError`` if *stem* is invalid. + Returns a file-info dict matching the shape produced by :func:`list_rag_files`. + """ + if not is_valid_stem(stem): + msg = f"Invalid stem {stem!r}: only letters, digits, underscores, and hyphens are allowed." + raise ValueError(msg) + rag_dir = Path(config.documents_directory) + rag_dir.mkdir(parents=True, exist_ok=True) + dest = rag_dir / f"{stem}.txt" + dest.write_bytes(content) + return { + "name": dest.name, + "stem": stem, + "type": "message_examples" if stem.endswith("_message_examples") else "lore", + "size": len(content), + "has_metadata": (rag_dir / f"{stem}.json").exists(), + } + + +def list_rag_stems(config: RagScriptConfig) -> list[str]: + """Return a sorted list of stems for all .txt files in rag_data/.""" + rag_dir = Path(config.documents_directory) + if not rag_dir.exists(): + return [] + return sorted(p.stem for p in rag_dir.glob("*.txt")) + + # --------------------------------------------------------------------------- # Linting # --------------------------------------------------------------------------- diff --git a/docs/future_work/COPILOT_COMPACT_REFERENCE.md b/docs/future_work/COPILOT_COMPACT_REFERENCE.md index 8a8fbe7..83211f5 100644 --- a/docs/future_work/COPILOT_COMPACT_REFERENCE.md +++ b/docs/future_work/COPILOT_COMPACT_REFERENCE.md @@ -1,6 +1,6 @@ # Copilot Compact Reference — Implemented State -Last verified: 2026-03-29 +Last verified: 2026-04-03 Use this as the single compact reference for implemented work across conversation quality, RAG quality, and web app behavior. @@ -143,18 +143,46 @@ Primary files: - **Per-turn diagnostics panel**: collapsible sidebar panel showing Turn, Latency (s), Chars, Main chunks, MES chunks, Cross-removed, and Drift score (colour-coded at warning/fail thresholds) for the last 40 turns. Auto-refreshes after each stream. Route: `GET /chat/diagnostics`. - **Saveable preset profiles**: collapsible sidebar panel for saving/applying/deleting named snapshots of 7 retrieval settings (`use_mmr`, `rag_rerank_enabled`, `rag_sentence_compression_enabled`, `rag_multi_query_enabled`, `rag_k`, `rag_k_mes`, `debug_context`). Profiles persisted in `configs/profiles.json`; applied in-place to the live `ConversationRuntimeConfig` without restart. Routes: `GET/POST /settings/profiles/*`. - **One-click export bundle**: `GET /chat/export/bundle` downloads a ZIP containing `manifest.json`, `conversation.json` (full session), `retrieval_traces.json` (per-turn history), and `drift_history.json`. Button in composer quick-actions. +- **RAG Management UI** (`/rag`): Standalone dark-theme page with left nav. Sections: Collections (list, detail, delete, ad-hoc query, rebuild/push with async job, fingerprint backfill), Files (list, view, lint run/fix, coverage analysis), Evaluate (fixture pack selector, run evaluate-fixtures, results table, retrieval trend history), Benchmark (last-run model comparison table). Long-running ops (push, evaluate) use in-memory `JobStore` + HTMX polling (`every 2s`). Link from chat sidebar. +- **Session history search**: Collapsible "Search sessions" panel inside the Sessions sidebar panel. Searches all saved `logs/web_sessions/session_*.json` files by free text (matches session name and message content), character name filter, and optional date range. Returns matching sessions with inline message excerpts and a Load button. Route: `GET /sessions/search?q=&character=&from_date=&to_date=`. +- **Token budget visualization + per-turn stats** (`/chat/diagnostics`): A stacked colour-coded bar at the top of the Diagnostics panel shows the current context-window allocation split across System prompt, History, RAG context, Examples, User input, Reserved, and Free headroom (green/yellow/red by fill %). The per-turn table now shows estimated Prompt tokens, estimated Completion tokens (chars/4), Context window % fill (colour-coded), and RAG chunks retrieved. A session-totals row below the table shows cumulative prompt/completion tokens and average context %. Backend: `ConversationManager.last_token_budget` dict populated from `ContextBudget` + `allocate_content()` return values in `_prepare_dynamic_vector_context()`; stored per trace in `_record_retrieval_trace`. +- **Character avatar display + tabbed sidebar**: The chat sidebar is restructured into three tabs + — 🎭 Character, 💾 Sessions, 🔍 Debug — with a compact always-visible header showing a small + avatar and character name. The Character tab displays the full avatar image (if present) alongside + card metadata. Route: `GET /characters/avatar` returns the avatar as a `FileResponse`; + `_character_avatar_path()` searches `character_storage//avatar.{ext}` then `cards/.{ext}`. + `has_avatar` bool is passed to the index template context. +- **RAG file upload + create-collection from UI**: The RAG Files page now includes an "Upload + Source File" panel — file picker (`.txt`), auto-filled stem, optional collection name for + immediate ingest. Uploading without a collection name saves the file and refreshes the file list. + With a collection name it triggers a push job. Each lore file row has an "Ingest →" toggle that + reveals an inline form to build a collection from that file. The Collections page has a "Create + New Collection" section with a dropdown of existing file stems. New routes: + `POST /rag/files/upload` (multipart), `POST /rag/collections`. + New backend: `rag_manager.save_rag_file()`, `rag_manager.list_rag_stems()`. +- **Bug fix — creating new ChromaDB collections**: `push_to_collection()` in + `scripts/rag/push_rag_data.py` previously only caught `ValueError` when deleting a non-existent + collection before recreating it. ChromaDB raises `chromadb.errors.NotFoundError` for missing + collections; that exception was uncaught and crashed the entire push. Fixed by widening the + `except` clause to use the already-defined `MISSING_COLLECTION_ERRORS` tuple + (`ValueError | NotFoundError`). This was a latent bug exposed by the first UI-driven + collection creation. Primary files: - `web_app.py` - `main.py` - `core/preset_profiles.py` +- `core/rag_manager.py` (+ `save_rag_file`, `list_rag_stems`, `_character_avatar_path` helpers) +- `core/job_queue.py` +- `scripts/rag/push_rag_data.py` (bug fix: `MISSING_COLLECTION_ERRORS` in `push_to_collection`) - `templates/index.html` - `templates/chat_message_pair.html` - `templates/chat_messages.html` - `templates/chat_single_message.html` - `templates/diagnostics_panel.html` - `templates/presets_panel.html` +- `templates/rag/layout.html` (+ 13 RAG partial templates incl. `upload_result.html`) ## Current Defaults Snapshot diff --git a/docs/future_work/REFINEMENTS.md b/docs/future_work/REFINEMENTS.md index b0c6ff0..c2b06a6 100644 --- a/docs/future_work/REFINEMENTS.md +++ b/docs/future_work/REFINEMENTS.md @@ -17,10 +17,25 @@ Implemented state lives in `docs/future_work/COPILOT_COMPACT_REFERENCE.md`. - ✅ Integrate conversation quality command into a single quality-gate workflow with retrieval and RAG-data checks. (2026-03-26) - ✅ Add CI regression policy for conversation quality baselines (warn vs hard fail by severity). (2026-03-26) - Add docs for fixture authoring rules and baseline refresh workflow. +- **Character name mismatch on first turn (investigate).** On the first message of a session the + persona drift scorer may not match the character name correctly, producing an artificially high + drift score or misfire. Two candidate causes: (1) lazy initialisation — `character_name` may be + empty when the first `PersonaAnchor` is built, as card loading (`parse_prompt`) runs during + `__init__` before all attributes are set; (2) mes_example normalisation — the card linter + normalises `` / `` markers to plain `user:` / `assistant:` format, stripping the + original character name from example turns, which may confuse the heuristic name-match on turn 1. + Investigation steps: add a log line in `_record_retrieval_trace` printing `character_name` and + `drift_score` at turn 1; check whether `PersonaAnchor.character_name` is populated before the + first call; and compare drift scores with and without mes_example injection on turn 1. ### RAG Data Quality -*(All scoped items completed; implemented state is tracked in `docs/future_work/COPILOT_COMPACT_REFERENCE.md`.)* +- **Shodan lore coverage is low.** The `rag_data/` source files for Shodan are sparse relative to the + character's depth. Coverage analysis shows many lore topics unmapped. Work needed: expand lore files + with canonical game text (System Shock 1 & 2 dialogue, environment descriptions, terminal messages), + re-run lint and coverage checks, then rebuild the collection. + +*(Previously scoped items completed; implemented state is tracked in `docs/future_work/COPILOT_COMPACT_REFERENCE.md`.)* ### Retrieval Quality @@ -145,6 +160,134 @@ Each character maintains its own RAG collection and persona drift tracker. A tur **Large effort, Medium value.** Treat as a long-horizon milestone — do not start until §6–8 are stable. The only "Large" effort item in this backlog. +### 11. Character Card Import & Avatar Support + +Improve the character loading pipeline to support richer card formats and give each character a +visible identity in the UI. + +#### 11.1 Character Avatar / Icon Upload + +Each character should have an optional avatar image displayed in the chat UI next to assistant +messages and in the character selector. Implementation: + +- Store avatars in `character_storage//avatar.png` (or `.jpg`, `.webp`). +- Serve via `GET /characters/{name}/avatar` — returns the image, falls back to a generated + initial/monogram placeholder if no avatar is found. +- Web UI: display avatar thumbnail in the chat header and optionally next to each assistant message + bubble. Upload button on the character settings page (see `UI_REFINEMENTS.md §C`). +- Keep the image small (≤ 512 px, ≤ 200 KB) — resize on upload with Pillow. + +#### 11.2 Character Card V2 / V3 Import + +The project currently loads character data from plain JSON files in `cards/`. Extend this to +support importing from standard character card formats used by the wider AI chat ecosystem. + +**Character Card V2 (PNG `chara` tEXt chunk):** +- PNG files with a `chara` tEXt chunk containing base64-encoded JSON (TavernCardV2 format). +- Fields map directly to existing config: `name`, `description`, `scenario`, `mes_example`, + `first_mes`, `personality`, `system_prompt`, `post_history_instructions`, `character_book`. +- Already partially supported via `cards/leonardo_da_vinci.png` — formalise the import path. + +**Character Card V3 (CCv3 — the current community standard):** +- Spec: +- PNG/APNG: JSON embedded in `ccv3` tEXt chunk as UTF-8 → base64. If both `chara` and `ccv3` + chunks are present, prefer `ccv3`. +- CHARX: zip file with `card.json` at root. Assets (icons, backgrounds, emotion sprites) live in + `assets/{type}/` subdirectories and can be accessed via `embeded://path` URIs. +- JSON: plain `.json` file containing the CharacterCardV3 object directly. +- V3 adds: `assets[]` (icon, background, emotion images), `nickname`, `group_only_greetings`, + `creation_date`, `modification_date`, `source[]`, multilingual creator notes. +- **Implementation priority:** PNG V2 import is simplest and highest value (most cards in the wild + are V2 PNG). V3 PNG import is a small additional step. CHARX support can come later. +- Use the `pypng` or `Pillow` library to read tEXt chunks; no heavyweight dependency needed. +- On import: write a normalised JSON card to `cards/` and optionally extract the embedded avatar + to `character_storage//avatar.png`. + +**Suggested implementation order:** +1. Formalise V2 PNG import (read `chara` chunk → normalise → save JSON + avatar). +2. V3 PNG import (read `ccv3` chunk → normalise; fall back to `chara` if absent). +3. Avatar display in web UI (§11.1 + `UI_REFINEMENTS.md §C`). +4. CHARX import (zip extraction + asset handling). +5. In-app card editor (§11.3 below). + +#### 11.3 In-App Character Card Editor + +A web-based form editor for creating and editing character cards without leaving the application. +Several community implementations can be used as reference for field layout and PNG embedding: + +- [ZoltanAI/character-editor](https://github.com/ZoltanAI/character-editor) — lightweight + browser-side editor for V1/V2 cards; entirely static HTML/JS, good reference for field layout. +- SillyTavern's built-in editor supports V2 fields and lorebook editing. +- The [CCv3 spec](https://github.com/kwaroran/character-card-spec-v3/blob/main/SPEC_V3.md) + provides the canonical field reference for a V3-compatible editor. + +**Backend requirements:** + +- `GET /characters/{name}/edit` — load existing card fields into the edit form. +- `POST /characters/{name}/edit` — validate and save edited fields to `cards/.json`. +- `GET /characters/new` / `POST /characters/new` — create a new card from scratch. +- `POST /characters/{name}/export/png` — embed card JSON into a PNG tEXt chunk (`ccv3`) and + return the PNG for download. Uses the stored avatar as the base image. +- `POST /characters/{name}/avatar` — upload a new avatar image (resize to ≤ 512 px with Pillow, + save to `character_storage//avatar.png`). Replaces `UI_REFINEMENTS.md §C.2`. + +**Field coverage (minimum viable):** +`name`, `description`, `scenario`, `personality`, `first_mes`, `mes_example`, +`voice_instructions` (project-specific), `tags`, `creator`, `system_prompt`. +Lorebook / `character_book` editing is out of scope for the initial version. + +**PNG embedding:** +Read tEXt chunks with `struct` (stdlib) or `Pillow`; write `ccv3` chunk (base64-encoded UTF-8 +JSON). Also write a `chara` chunk for backward compatibility with V2 readers. No new heavy +dependencies needed — `Pillow` is already a likely dependency for image resizing. + +**Effort:** Medium. The form and routing are straightforward; the PNG round-trip (read → edit → +re-embed) is the only non-trivial part. Build after §11.1–11.2 so the parsing layer is shared. + +## §12 User-Facing Documentation Site + +A non-technical, friendly guide for people who want to use light-chat without programming knowledge, +hosted alongside the repository as a static site. + +### Motivation + +The web UI has grown substantially (chat, RAG management, diagnostics, session search, character +management). Many features have in-UI help text, but there is no cohesive end-user reference. +A dedicated documentation site lowers the barrier to entry and helps non-technical users +understand what the tool does and how to use it. + +### Hosting options (all compatible with GitHub Pages) + +| Option | Notes | +|--------|-------| +| **MkDocs Material** (recommended) | Python-based, clean modern theme, markdown source. Fits the project's Python tooling; `mkdocs gh-deploy` publishes to GitHub Pages. Add as a `uv` dev dependency. | +| **Docsify** | Single HTML file + plain markdown; zero build step, works directly from a `docs/` folder on GitHub Pages. Good for rapid publishing. | +| **Docusaurus** | Node.js/React, strong search and versioning. More setup overhead; worthwhile if the docs grow large. | + +**Recommended starting point:** MkDocs Material. One `mkdocs.yml` config, `uv add --dev mkdocs-material`, +and `uv run mkdocs gh-deploy` is all that's needed. The source already lives in `docs/`. + +### Content scope (minimum viable) + +| Page | Audience-level description | +|------|---------------------------| +| **Welcome / What is this?** | Plain-language intro: local AI chatbot, character cards, no data sent to cloud | +| **Getting started** | How to install, configure a model, and start the server | +| **Using the chat** | Sending messages, sessions (save/load/search), keyboard shortcuts, export | +| **Character cards** | What they are, where to put card files, adding an avatar image | +| **Knowledge base (RAG)** | Plain-language: what a "collection" is, how to add a new character's info, what "coverage" means | +| **Settings and profiles** | What each retrieval setting does in plain English; saving and applying profiles | +| **Diagnostics panel** | What the token bar and per-turn table show; how to read drift scores | +| **Troubleshooting** | Common errors, model not loading, no collections found, stream timeout | + +### Implementation notes + +- Place MkDocs source in `docs/` (already exists) with `mkdocs.yml` at the repository root. +- Separate developer/contributor documentation (current `docs/`) from user-guide pages + (`docs/user_guide/`) using MkDocs navigation sections. +- The in-UI help guides (chat sidebar and RAG page) can be reused or adapted as source material. +- A GitHub Actions workflow can automate `mkdocs gh-deploy` on every push to `main`. + *(Web UX and observability improvements are tracked in `docs/future_work/UI_REFINEMENTS.md`.)* ## Suggested Execution Order @@ -159,10 +302,13 @@ stable. The only "Large" effort item in this backlog. 8. ✅ Add retrieval trend rendering and debug export artifacts. (2026-03-26) 9. Iterate on higher-level UX and explainability improvements — see `docs/future_work/UI_REFINEMENTS.md`. 10. Add pressure-aware context compaction and per-turn token usage stats (§8). -11. Implement Tier 1 markdown persona memory (§6) — requires user identity scoping first. -12. Add conversation branching, character hot-reload, stop hooks, and skills macros (§7). -13. CLI quality-of-life pass: themes and keybindings (§9). -14. Multi-character conversation mode (§10) — long-horizon, after §6–8 are stable. +11. Implement V2/V3 card import and avatar upload pipeline (§11.1–11.2). +12. Build in-app character card editor with PNG export (§11.3 + `UI_REFINEMENTS.md §C.5`). +13. Implement Tier 1 markdown persona memory (§6) — requires user identity scoping first. +14. Add conversation branching, character hot-reload, stop hooks, and skills macros (§7). +15. CLI quality-of-life pass: themes and keybindings (§9). +16. Multi-character conversation mode (§10) — long-horizon, after §6–8 are stable. +17. Publish user-facing documentation site (§12) — MkDocs Material on GitHub Pages. ## Next Steps diff --git a/docs/future_work/UI_REFINEMENTS.md b/docs/future_work/UI_REFINEMENTS.md index f68193b..e05119c 100644 --- a/docs/future_work/UI_REFINEMENTS.md +++ b/docs/future_work/UI_REFINEMENTS.md @@ -90,15 +90,14 @@ the `scripts/rag/` CLI toolset, accessible from the browser without a terminal. | Area | Features | |------|---------| -| **RAG data files** | List, view, run linting, run coverage analysis | -| **Collections** | List, inspect, delete, rebuild, query test | +| **RAG data files** | List, view, upload new files, run linting, run coverage analysis | +| **Collections** | List, inspect, delete, rebuild, create from uploaded file, query test | | **Fixture evaluation** | Run evaluate-fixtures, view results, view trend history | | **Embedding benchmarking** | Trigger benchmark run, view results | | **Collection migration** | Re-embed to new model, backfill fingerprints | Out of scope for this plan (requires broader changes): - In-browser text editing of `rag_data/` source files -- File upload / new character creation - Real-time log streaming during long-running jobs (deferred to async job tracker) ### B.2 Routes @@ -109,12 +108,14 @@ imports (no subprocess); UI responses use HTMX partial renders consistent with e | Method | Path | Purpose | |--------|------|---------| | GET | `/rag` | RAG management root panel | -| GET | `/rag/files` | List `rag_data/` files with status badges | +| GET | `/rag/files` | List `rag_data/` files with status badges; upload panel | | GET | `/rag/files/{filename}` | View file content (read-only) | +| POST | `/rag/files/upload` | Upload a new `.txt` source file; optional immediate ingest | | POST | `/rag/lint` | Run message-example linting; return results table | | POST | `/rag/lint/fix` | Run linting with auto-fix; return diff summary | | POST | `/rag/coverage` | Run coverage analysis on a lore file; return score + report | | GET | `/rag/collections` | List ChromaDB collections with counts and fingerprints | +| POST | `/rag/collections` | Create a new collection from an existing source file | | GET | `/rag/collections/{name}` | Collection detail: model, dimensions, sample docs | | DELETE | `/rag/collections/{name}` | Delete collection (with confirmation step) | | POST | `/rag/collections/{name}/query` | Ad-hoc test query; return top-k chunks with scores | @@ -205,18 +206,97 @@ Or, given the project's existing pattern, call the CLI module functions directly ### B.8 Non-Goals (Deferred) - In-browser text editor for `rag_data/` source files (use VS Code or a dedicated CMS). -- File upload for new character data (filesystem write from web raises deployment concerns). - Real-time log streaming for long-running jobs (stdout pipe to WebSocket — separate effort). - Multi-user / authentication (single-user local tool assumption). --- +## C. Character Management UI + +UI surfaces for character card import, avatar upload, and character switching. Depends on +`REFINEMENTS.md §11` backend work for card parsing and avatar storage. + +### C.1 Character Avatar Display + +Show a character avatar image in the chat interface. + +- Display the avatar in the chat header next to the character name. +- Optionally show a small avatar thumbnail next to each assistant message bubble. +- Fall back to a coloured monogram/initial placeholder if no avatar is set. +- Source: `GET /characters/avatar` — served by the web app, returns the stored image or a + generated fallback. + +### C.2 Avatar Upload + +Allow uploading a custom avatar image for the active character. + +- Upload button in the character settings area (or a dedicated character management page). +- Accepts PNG, JPEG, or WebP; server resizes to ≤ 512 px and saves to + `character_storage//avatar.png`. +- Instant preview update after upload via HTMX partial replace. + +### C.3 Character Card Import + +A drag-and-drop or file-picker import flow for standard character card files. + +- Accepts: PNG (V2 `chara` chunk or V3 `ccv3` chunk), plain JSON (CCv2 or CCv3), and CHARX zip. +- On import: extracts card fields, normalises to the project's JSON format, saves to `cards/`, + and optionally extracts the embedded avatar. +- Shows a preview of extracted fields (name, description snippet, scenario snippet) before + confirming the import. +- After import, allows immediately switching to the new character without restarting. +- Route: `POST /characters/import` (multipart form upload). + +### C.4 Character Selector / Switcher + +A UI for browsing and switching the active character without restarting the server. + +- Lists all cards in `cards/` with avatar thumbnails, name, and a brief description snippet. +- "Switch" button triggers a hot-reload (see `REFINEMENTS.md §7` character hot-reload). +- Shows which character is currently active. +- Route: `GET /characters` (list), `POST /characters/{name}/activate`. + +### C.5 In-App Character Card Editor + +A form-based editor for creating and editing character cards within the web UI. Several +open-source implementations already exist and could be referenced or adapted: + +- [ZoltanAI/character-editor](https://github.com/ZoltanAI/character-editor) — standalone + browser-based editor for V1/V2 cards; good reference for field layout and PNG embedding. +- SillyTavern has a built-in card editor that supports V2 and lorebook editing. +- [character-card-spec-v3](https://github.com/kwaroran/character-card-spec-v3) provides the + canonical field reference for a V3-compatible editor. + +Scope for this project: + +- Edit core fields: name, description, scenario, personality, first message, mes_example, + voice instructions, tags. +- Avatar upload inline (replaces §C.2 standalone upload). +- Save as JSON to `cards/` and optionally export as CCv2/CCv3 PNG (embed in tEXt chunk). +- Route: `GET /characters/{name}/edit`, `POST /characters/{name}/edit`. +- Depends on `REFINEMENTS.md §11` card format backend and §C.3 import pipeline (shared parsing). + +**Note on avatar management without a full editor:** Until §C.5 is built, the simplest path +is to drop an image file (`avatar.png`, `.jpg`, or `.webp`) into +`character_storage//` — the web app serves it automatically via +`GET /characters/avatar`. The stem is the character name lowercased with spaces → underscores +(e.g., "SHODAN" → `character_storage/shodan/avatar.jpg`). + +--- + ## Suggested Execution Order (UI) -1. RAG Management UI (§B) as a self-contained milestone — implement §B.6 steps in order. -2. Token budget visualization (§A.1) and per-turn token stats panel (§A.2) — low-risk extensions +1. ✅ RAG Management UI (§B) as a self-contained milestone — implement §B.6 steps in order. +2. ✅ Token budget visualization (§A.1) and per-turn token stats panel (§A.2) — low-risk extensions to the existing diagnostics panel; depends on `REFINEMENTS.md §8` backend work. -3. Session history search (§A.4) — stateless read-only feature, no new backend state model needed. -4. Conversation branching controls (§A.3) — depends on `REFINEMENTS.md §7` session state changes. -5. Memory panel (§A.5) — depends on `REFINEMENTS.md §6` Tier 1 memory being implemented first. -6. Skills dropdown (§A.6) — depends on `REFINEMENTS.md §7` skills config backend. +3. ✅ Session history search (§A.4) — stateless read-only feature, no new backend state model needed. +4. ✅ Character avatar display + tab sidebar (§C.1) — avatar served from `character_storage//`. +5. ✅ RAG file upload + create collection from UI (§B.1/B.2 extension) — `POST /rag/files/upload`, + `POST /rag/collections`; "Ingest →" per-file action; upload-and-ingest combined flow. +6. Conversation branching controls (§A.3) — depends on `REFINEMENTS.md §7` session state changes. +7. Memory panel (§A.5) — depends on `REFINEMENTS.md §6` Tier 1 memory being implemented first. +8. Skills dropdown (§A.6) — depends on `REFINEMENTS.md §7` skills config backend. +9. Avatar upload UI (§C.2) — small addition; no major backend dependency. +10. Character card import UI (§C.3) — depends on `REFINEMENTS.md §11.2` import backend. +11. Character switcher (§C.4) — depends on `REFINEMENTS.md §7` character hot-reload. +12. In-app character card editor (§C.5) — largest UI item; depends on §C.3 + §C.4. diff --git a/scripts/rag/push_rag_data.py b/scripts/rag/push_rag_data.py index 2cc77bc..4265a89 100644 --- a/scripts/rag/push_rag_data.py +++ b/scripts/rag/push_rag_data.py @@ -236,7 +236,7 @@ def push_to_collection( try: context.client.delete_collection(collection_name) logger.info(f"Deleted existing collection: {collection_name}") - except ValueError: + except MISSING_COLLECTION_ERRORS: logger.debug(f"Collection {collection_name} doesn't exist, creating new") else: assert_collection_fingerprint_compatible(context.client, collection_name, expected_fingerprint) diff --git a/templates/diagnostics_panel.html b/templates/diagnostics_panel.html index d4d6b50..2183eeb 100644 --- a/templates/diagnostics_panel.html +++ b/templates/diagnostics_panel.html @@ -1,29 +1,69 @@
+{% if last_budget and last_budget.context_window > 0 %} +{% set cw = last_budget.context_window %} +{% set sys_pct = (last_budget.system_prompt_tokens / cw * 100) | round(1) %} +{% set hist_pct = (last_budget.history_tokens / cw * 100) | round(1) %} +{% set rag_pct = (last_budget.rag_tokens / cw * 100) | round(1) %} +{% set ex_pct = (last_budget.examples_tokens / cw * 100) | round(1) %} +{% set inp_pct = (last_budget.input_tokens / cw * 100) | round(1) %} +{% set res_pct = (last_budget.reserved_for_response / cw * 100) | round(1) %} +{% set used_pct = (last_budget.total_estimated / cw * 100) | round(1) %} +
+
+ Context window: {{ last_budget.total_estimated }} / {{ cw }} tokens + {{ used_pct }}% +
+
+
+
+
+
+
+
+
+
+
+ System + History + RAG + Examples + Input + Reserved + Free +
+
+{% endif %} {% if history %} - - - - - - - + + + + + + + {% for entry in history %} {% set r = entry.retrieval if entry.retrieval else {} %} - {% set cl = r.cleanup if r.cleanup else {} %} {% set drift = entry.persona.drift_score if entry.persona else none %} + {% set tb = entry.token_budget if entry.token_budget else {} %} + {% set cw = tb.context_window if tb.context_window else 0 %} + {% set used = tb.total_estimated if tb.total_estimated else 0 %} + {% set ctx_pct = (used / cw * 100) | round(0) | int if cw > 0 else none %} + {% set main_chunks = r.main.returned if r.main is defined else 0 %} - - - - + + + + {{ ctx_pct ~ "%" if ctx_pct is not none else "—" }} + + {{ "%.3f" | format(drift) if drift is not none else "—" }} @@ -31,6 +71,23 @@ {% endfor %}
TLatencyCharsMainMESCross−DriftTLatencyPrompt~Compl~Ctx%RAGDrift
{{ entry.turn }} {{ "%.2fs" | format(entry.latency_s) if entry.latency_s is not none else "—" }}{{ entry.chars_emitted if entry.chars_emitted is not none else "—" }}{{ cl.main if cl.main is defined else 0 }}{{ cl.mes if cl.mes is defined else 0 }}{{ cl.cross_removed if cl.cross_removed is defined else 0 }}{{ used if used > 0 else "—" }}{{ entry.estimated_completion_tokens if entry.estimated_completion_tokens is not none else "—" }}{{ main_chunks }}
+{% set ns = namespace(total_prompt=0, total_compl=0, count=0) %} +{% for entry in history %} + {% set tb = entry.token_budget if entry.token_budget else {} %} + {% if tb.total_estimated and tb.total_estimated > 0 %} + {% set ns.total_prompt = ns.total_prompt + tb.total_estimated %} + {% set ns.total_compl = ns.total_compl + (entry.estimated_completion_tokens or 0) %} + {% set ns.count = ns.count + 1 %} + {% endif %} +{% endfor %} +{% if ns.count > 0 %} +
+ Session totals — Prompt: {{ ns.total_prompt }} tok  ·  + Completion: {{ ns.total_compl }} tok  ·  + Avg ctx: {{ (ns.total_prompt / ns.count / (last_budget.context_window or 1) * 100) | round(1) if last_budget and last_budget.context_window else "—" }}% + ({{ ns.count }} turn{{ "s" if ns.count != 1 else "" }}) +
+{% endif %} {% else %} No turns recorded yet. {% endif %} diff --git a/templates/index.html b/templates/index.html index 4aa450b..235643c 100644 --- a/templates/index.html +++ b/templates/index.html @@ -31,16 +31,121 @@ } .sidebar h1 { - margin: 0 0 0.5rem; + margin: 0; + font-size: 1rem; + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; + } + + .sidebar-header { + display: flex; + align-items: center; + gap: 0.55rem; + margin-bottom: 0.75rem; + } + + .sidebar-avatar-sm { + width: 38px; + height: 38px; + border-radius: 50%; + object-fit: cover; + flex-shrink: 0; + border: 2px solid #2d3743; + } + + .sidebar-avatar-sm-placeholder { + width: 38px; + height: 38px; + border-radius: 50%; + background: #1f2d3d; + border: 2px solid #2d3743; + display: flex; + align-items: center; + justify-content: center; font-size: 1.1rem; + font-weight: 700; + color: #7cb3e8; + flex-shrink: 0; } - .meta { - font-size: 0.9rem; + /* Sidebar tabs */ + .sidebar-tabs { + display: flex; + gap: 2px; + margin-bottom: 0; + } + + .sidebar-tab { + flex: 1; + padding: 0.35rem 0.2rem; + font-size: 0.73rem; + font-weight: 600; + background: #0f1317; + border: 1px solid #2d3743; + border-bottom: none; + border-radius: 5px 5px 0 0; + color: #8b949e; + cursor: pointer; + text-align: center; + transition: color 0.15s, background 0.15s; + } + + .sidebar-tab:hover { color: #c9d1d9; } + + .sidebar-tab.active { + background: #161b22; + color: #e6edf3; + border-color: #3d4751; + } + + .sidebar-pane { + display: none; + border: 1px solid #3d4751; + border-radius: 0 0 8px 8px; + padding: 0.75rem; + background: #0f1317; + } + + .sidebar-pane.active { display: block; } + + /* Character tab */ + .char-avatar-wrap { + display: flex; + justify-content: center; + margin-bottom: 0.75rem; + } + + .char-avatar { + width: 180px; + height: 180px; + border-radius: 12px; + object-fit: cover; + border: 2px solid #2d3743; + } + + .char-avatar-placeholder { + width: 180px; + height: 180px; + border-radius: 12px; + background: #1f2d3d; + border: 2px solid #2d3743; + display: flex; + align-items: center; + justify-content: center; + font-size: 4rem; + font-weight: 700; + color: #7cb3e8; + } + + .char-meta { + font-size: 0.82rem; color: #98a6b6; - line-height: 1.5; + line-height: 1.7; } + .char-meta strong { color: #c9d1d9; } + .sidebar-actions { display: grid; grid-template-columns: 1fr 1fr; @@ -317,6 +422,65 @@ color: #98a6b6; } + /* Token budget bar */ + .budget-section { + margin: 0.5rem 0 0.75rem; + } + .budget-label { + font-size: 0.75rem; + color: #c5cdd6; + margin-bottom: 4px; + display: flex; + align-items: center; + gap: 0.4rem; + } + .budget-pct { + font-size: 0.7rem; + padding: 1px 5px; + border-radius: 3px; + font-weight: 600; + } + .bpct-green { background: #1e3a2a; color: #a8d8a8; } + .bpct-yellow { background: #3a2e10; color: #ffd166; } + .bpct-red { background: #3a1a1a; color: #f8b4b4; } + .budget-bar { + display: flex; + height: 12px; + border-radius: 4px; + overflow: hidden; + background: #1a1e2e; + cursor: help; + } + .budget-seg { min-width: 0; transition: width 0.4s; } + .seg-system { background: #7c6fdc; } + .seg-history { background: #3a9bd5; } + .seg-rag { background: #48c774; } + .seg-examples { background: #f9a825; } + .seg-input { background: #ff7043; } + .seg-reserved { background: #667788; } + .seg-headroom { background: #252840; flex: 1; } + .budget-legend { + display: flex; + flex-wrap: wrap; + gap: 4px 8px; + margin-top: 4px; + } + .budget-legend .leg { + font-size: 0.65rem; + padding: 1px 5px; + border-radius: 2px; + color: #0a0a14; + font-weight: 600; + opacity: 0.9; + } + .diag-totals { + font-size: 0.72rem; + color: #98a6b6; + margin-top: 0.5rem; + padding-top: 0.4rem; + border-top: 1px solid #2a2e3e; + } + .profile-current-table { margin-top: 0.5rem; } @@ -433,11 +597,139 @@ color: #c9d1d9; } - .guide-item p { - margin: 0; - font-size: 0.77rem; + .search-result-item { + padding: 0.4rem 0.5rem; + border: 1px solid #2d3743; + border-radius: 6px; + background: #111418; + margin-top: 0.3rem; + } + + .search-result-header { + display: flex; + align-items: baseline; + gap: 0.3rem; + margin-bottom: 0.2rem; + flex-wrap: wrap; + } + + .search-result-name { + font-size: 0.78rem; + font-weight: 600; + color: #c9d1d9; + flex: 1; + min-width: 0; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + } + + .badge-char { + font-size: 0.68rem; + background: #1a2b3b; + color: #79c0ff; + padding: 0.1rem 0.35rem; + border-radius: 8px; + white-space: nowrap; + } + + .search-result-date { + font-size: 0.7rem; color: #98a6b6; - line-height: 1.45; + white-space: nowrap; + } + + .search-snippet { + font-size: 0.73rem; + color: #98a6b6; + line-height: 1.35; + margin-bottom: 0.15rem; + word-break: break-word; + } + + .snip-role { + font-size: 0.65rem; + font-weight: 700; + text-transform: uppercase; + color: #4e8bc4; + margin-right: 0.25rem; + } + + .search-load-btn { + margin-top: 0.3rem; + padding: 0.22rem 0.45rem; + font-size: 0.74rem; + background: #1e242b; + border: 1px solid #2d3743; + border-radius: 5px; + color: #79c0ff; + cursor: pointer; + } + + .search-load-btn:hover { + background: #262d36; + } + + #session-search summary { + font-size: 0.8rem; + color: #98a6b6; + cursor: pointer; + list-style: none; + padding: 0.25rem 0; + margin-top: 0.35rem; + } + + #session-search summary::after { + content: " ▸"; + font-size: 0.7rem; + } + + #session-search[open] summary::after { + content: " ▾"; + } + + .search-form { + display: flex; + flex-direction: column; + gap: 0.3rem; + margin-top: 0.4rem; + } + + .search-form input[type="text"], + .search-form input[type="date"] { + border: 1px solid #2d3743; + background: #111418; + color: #e6edf3; + border-radius: 5px; + padding: 0.3rem 0.45rem; + font-size: 0.78rem; + width: 100%; + box-sizing: border-box; + } + + .search-form input[type="date"]::-webkit-calendar-picker-indicator { + filter: invert(0.65); + } + + .search-date-row { + display: grid; + grid-template-columns: 1fr 1fr; + gap: 0.3rem; + } + + .search-submit-btn { + padding: 0.3rem 0.5rem; + font-size: 0.78rem; + background: #1f6feb; + border: 1px solid #1f6feb; + border-radius: 5px; + color: white; + cursor: pointer; + } + + #session-search-results { + max-height: 360px; + overflow-y: auto; } @@ -445,12 +737,17 @@
@@ -559,6 +901,17 @@

Presets