diff --git a/CHANGELOG.md b/CHANGELOG.md index ab783b1..1e12af3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,41 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added +- **Summary disk cache (Phase 3)** — project list and tab summaries cached under + `~/.cache/cursor-chat-browser/`, invalidated when global or per-workspace DB + mtimes change; bypass with `?nocache=1` or `CURSOR_CHAT_BROWSER_NOCACHE=1` (#84) +- **Lazy-load workspace UI** — workspace sidebar renders from a lightweight summary + payload; full bubble content is fetched per-conversation when the user selects it, + reducing first-paint time from 1–2 min to < 3 s on large local fixtures (#84) +- **`GET /api/workspaces//tabs?summary=1`** — new summary-only variant returns + `id`, `title`, `timestamp`, `messageCount`, and optional `metadata.modelsUsed` + without loading any bubble data (#84) +- **`GET /api/workspaces//tabs/`** — new single-conversation + endpoint loads only scoped `bubbleId:{id}:%`, `messageRequestContext:{id}:%`, + and `codeBlockDiff:{id}:%` KV rows, avoiding a full global bubble scan (#84) +- **Scoped KV loaders** in `services/workspace_db.py`: + `load_bubbles_for_composer`, `load_message_request_context_for_composer`, + `load_code_block_diffs_for_composer` — used by the single-tab path (#84) + +### Changed +- **List-path performance** — skip full `messageRequestContext` scan unless + invalid workspace aliases are needed; filter `composerData` in SQL; skip + `Composer.from_dict` on list/summary paths; cache `composer_id_to_ws` mapping (#84) +- **`GET /api/workspaces`** (`list_workspace_projects`) no longer performs a + global `bubbleId:%` scan; conversation presence is determined from + `fullConversationHeadersOnly` headers alone, and workspace assignment relies + on `composer_id_to_ws` (primary) plus `projectLayouts` from MRC (#84) +- **`assemble_workspace_tabs`** inner per-composer loop refactored into a shared + `_assemble_tab_from_composer_data` helper reused by `assemble_single_tab`; full + path behaviour is unchanged (#84) + +### Deprecated +- Direct use of `GET /api/workspaces//tabs` (no `?summary=1`) from the workspace + UI on page load; the UI now calls `?summary=1` for first paint and lazy-fetches + individual tabs. The full-assembly endpoint remains available for export, + search, and backward-compatible consumers (planned removal: post-1.0) (#84) + + - **Web UI** — browse and search all Cursor AI workspaces; conversation view with syntax-highlighted code blocks, dark/light mode, and bookmarkable chat URLs (#63) - **Export formats** — one-click export of chats as Markdown, HTML, PDF, JSON, and CSV from the web UI (#63) - **CLI export** (`cursor-chat-export` / `scripts/export.py`) — zip archive or individual Markdown files with YAML frontmatter; incremental mode (`--since last`) preserves state across runs (#63, #42, #61) diff --git a/api/workspaces.py b/api/workspaces.py index b322eef..d8556c9 100644 --- a/api/workspaces.py +++ b/api/workspaces.py @@ -11,7 +11,7 @@ import os from datetime import datetime, timezone -from flask import Blueprint, jsonify +from flask import Blueprint, jsonify, request from api.flask_config import exclusion_rules @@ -29,7 +29,11 @@ ) from services.cli_tabs import get_cli_workspace_tabs from services.workspace_listing import list_workspace_projects -from services.workspace_tabs import assemble_workspace_tabs +from services.workspace_tabs import ( + assemble_single_tab, + assemble_workspace_tabs, + list_workspace_tab_summaries, +) # Re-exported for tests/test_models_wired_at_read_sites.py — the typed-model # spy harness patches `workspaces_mod.Bubble` / `.Composer` / `.Workspace` to @@ -46,12 +50,18 @@ # GET /api/workspaces # --------------------------------------------------------------------------- +def _request_nocache() -> bool: + return request.args.get("nocache") in ("1", "true") + + @bp.route("/api/workspaces") def list_workspaces(): try: workspace_path = resolve_workspace_path() rules = exclusion_rules() - projects, warnings = list_workspace_projects(workspace_path, rules) + projects, warnings = list_workspace_projects( + workspace_path, rules, nocache=_request_nocache(), + ) payload: dict = {"projects": projects} if warnings: payload["warnings"] = warnings @@ -154,9 +164,33 @@ def get_workspace_tabs(workspace_id): try: workspace_path = resolve_workspace_path() rules = exclusion_rules() - payload, status = assemble_workspace_tabs(workspace_id, workspace_path, rules) + summary = request.args.get("summary") in ("1", "true") + if summary: + payload, status = list_workspace_tab_summaries( + workspace_id, workspace_path, rules, nocache=_request_nocache(), + ) + else: + payload, status = assemble_workspace_tabs(workspace_id, workspace_path, rules) return jsonify(payload), status except Exception: _logger.exception("Failed to get workspace tabs") return jsonify({"error": "Failed to get workspace tabs"}), 500 + +# --------------------------------------------------------------------------- +# GET /api/workspaces//tabs/ +# --------------------------------------------------------------------------- + +@bp.route("/api/workspaces//tabs/") +def get_workspace_tab(workspace_id, composer_id): + if workspace_id.startswith("cli:"): + return jsonify({"error": "Per-tab lazy load is not supported for CLI workspaces"}), 400 + try: + workspace_path = resolve_workspace_path() + rules = exclusion_rules() + payload, status = assemble_single_tab(workspace_id, composer_id, workspace_path, rules) + return jsonify(payload), status + except Exception: + _logger.exception("Failed to get workspace tab") + return jsonify({"error": "Failed to get workspace tab"}), 500 + diff --git a/services/summary_cache.py b/services/summary_cache.py new file mode 100644 index 0000000..b0010e1 --- /dev/null +++ b/services/summary_cache.py @@ -0,0 +1,226 @@ +"""Disk cache for derived workspace summaries (issue #84 Phase 3). + +Caches project lists and per-workspace tab summaries keyed by storage mtimes +so repeat page loads avoid re-scanning Cursor's global KV index. + +Bypass: set env ``CURSOR_CHAT_BROWSER_NOCACHE=1`` or pass ``?nocache=1`` on API +requests. Cache files live under ``~/.cache/cursor-chat-browser/``. +""" + +from __future__ import annotations + +import hashlib +import json +import logging +import os +from pathlib import Path +from typing import Any + +_logger = logging.getLogger(__name__) + +CACHE_VERSION = 1 +CACHE_DIR = Path.home() / ".cache" / "cursor-chat-browser" +PROJECTS_CACHE_FILE = CACHE_DIR / "projects.json" +COMPOSER_MAP_CACHE_FILE = CACHE_DIR / "composer-id-to-ws.json" +TAB_SUMMARIES_PREFIX = "tab-summaries-" + + +def nocache_enabled(*, request_nocache: bool = False) -> bool: + if request_nocache: + return True + return os.environ.get("CURSOR_CHAT_BROWSER_NOCACHE", "").strip().lower() in ( + "1", + "true", + "yes", + ) + + +def _rules_digest(rules: list) -> str: + try: + payload = json.dumps(rules, sort_keys=True, ensure_ascii=False) + except (TypeError, ValueError): + payload = repr(rules) + return hashlib.sha256(payload.encode("utf-8")).hexdigest()[:16] + + +def _file_mtime_ns(path: str | None) -> int | None: + if not path or not os.path.isfile(path): + return None + try: + return os.stat(path).st_mtime_ns + except OSError: + return None + + +def fingerprint_workspace_storage( + workspace_path: str, + workspace_entries: list[dict], + *, + global_db_path: str | None, + rules: list, + cli_chats_path: str | None = None, +) -> dict[str, Any]: + """Build a fingerprint dict for cache invalidation.""" + ws_mt: list[list[str | int]] = [] + for entry in workspace_entries: + name = entry.get("name") + if not isinstance(name, str): + continue + base = os.path.join(workspace_path, name) + for rel in ("state.vscdb", "workspace.json"): + p = os.path.join(base, rel) + mtime = _file_mtime_ns(p) + if mtime is not None: + ws_mt.append([f"{name}/{rel}", mtime]) + ws_mt.sort(key=lambda row: row[0]) + + return { + "version": CACHE_VERSION, + "workspace_path": os.path.normpath(workspace_path), + "global_db_mtime_ns": _file_mtime_ns(global_db_path), + "workspace_files": ws_mt, + "rules_digest": _rules_digest(rules), + "cli_chats_mtime_ns": _file_mtime_ns(cli_chats_path), + } + + +def _normalize_fingerprint(fp: dict[str, Any]) -> dict[str, Any]: + """Normalize fingerprint for comparison (JSON round-trip uses lists, not tuples).""" + normalized = dict(fp) + wf = fp.get("workspace_files") + if isinstance(wf, list): + normalized["workspace_files"] = [ + [row[0], row[1]] if isinstance(row, (list, tuple)) and len(row) == 2 else row + for row in wf + ] + return normalized + + +def _fingerprint_equal(a: object, b: dict[str, Any]) -> bool: + if not isinstance(a, dict): + return False + return _normalize_fingerprint(a) == _normalize_fingerprint(b) + + +def _read_cache_file(path: Path | str) -> dict[str, Any] | None: + p = Path(path) + if not p.is_file(): + return None + try: + with p.open(encoding="utf-8") as f: + data = json.load(f) + if not isinstance(data, dict): + return None + return data + except (OSError, json.JSONDecodeError) as e: + _logger.debug("Summary cache read failed for %s: %s", path, e) + return None + + +def _write_cache_file(path: Path | str, payload: dict[str, Any]) -> None: + p = Path(path) + try: + p.parent.mkdir(parents=True, exist_ok=True) + tmp = p.with_suffix(p.suffix + ".tmp") + with tmp.open("w", encoding="utf-8") as f: + json.dump(payload, f, ensure_ascii=False) + tmp.replace(p) + except OSError as e: + _logger.warning("Summary cache write failed for %s: %s", path, e) + + +def get_cached_projects(fingerprint: dict[str, Any]) -> tuple[list[dict], list[dict]] | None: + data = _read_cache_file(PROJECTS_CACHE_FILE) + if not data: + return None + if not _fingerprint_equal(data.get("fingerprint"), fingerprint): + return None + projects = data.get("projects") + warnings = data.get("warnings") + if not isinstance(projects, list): + return None + if not isinstance(warnings, list): + warnings = [] + return projects, warnings + + +def set_cached_projects( + fingerprint: dict[str, Any], + projects: list[dict], + warnings: list[dict], +) -> None: + _write_cache_file( + PROJECTS_CACHE_FILE, + { + "fingerprint": fingerprint, + "projects": projects, + "warnings": warnings, + }, + ) + + +def get_cached_composer_id_to_ws( + fingerprint: dict[str, Any], +) -> dict[str, str] | None: + data = _read_cache_file(COMPOSER_MAP_CACHE_FILE) + if not data: + return None + if not _fingerprint_equal(data.get("fingerprint"), fingerprint): + return None + mapping = data.get("composer_id_to_ws") + if not isinstance(mapping, dict): + return None + return {str(k): str(v) for k, v in mapping.items()} + + +def set_cached_composer_id_to_ws( + fingerprint: dict[str, Any], + mapping: dict[str, str], +) -> None: + _write_cache_file( + COMPOSER_MAP_CACHE_FILE, + { + "fingerprint": fingerprint, + "composer_id_to_ws": mapping, + }, + ) + + +def _tab_summaries_path(workspace_id: str) -> Path: + safe = hashlib.sha256(workspace_id.encode("utf-8")).hexdigest()[:16] + return CACHE_DIR / f"{TAB_SUMMARIES_PREFIX}{safe}.json" + + +def get_cached_tab_summaries( + fingerprint: dict[str, Any], + workspace_id: str, +) -> tuple[dict, int] | None: + data = _read_cache_file(_tab_summaries_path(workspace_id)) + if not data: + return None + if data.get("workspace_id") != workspace_id: + return None + if not _fingerprint_equal(data.get("fingerprint"), fingerprint): + return None + payload = data.get("payload") + status = data.get("status", 200) + if not isinstance(payload, dict) or not isinstance(status, int): + return None + return payload, status + + +def set_cached_tab_summaries( + fingerprint: dict[str, Any], + workspace_id: str, + payload: dict, + status: int, +) -> None: + _write_cache_file( + _tab_summaries_path(workspace_id), + { + "workspace_id": workspace_id, + "fingerprint": fingerprint, + "payload": payload, + "status": status, + }, + ) diff --git a/services/workspace_db.py b/services/workspace_db.py index bfe7c0b..3a90c4d 100644 --- a/services/workspace_db.py +++ b/services/workspace_db.py @@ -47,11 +47,52 @@ def load_bubble_map(global_db) -> dict[str, dict]: return bubble_map +def _extract_root_paths_from_context(ctx: dict) -> list[str]: + """Pull ``rootPath`` strings from a messageRequestContext JSON object.""" + paths: list[str] = [] + layouts = ctx.get("projectLayouts") + if not isinstance(layouts, list): + return paths + for layout in layouts: + try: + o = json.loads(layout) if isinstance(layout, str) else layout + if isinstance(o, dict) and o.get("rootPath"): + paths.append(o["rootPath"]) + except (json.JSONDecodeError, ValueError, KeyError, TypeError): + continue + return paths + + +def load_project_layouts_for_composer(global_db, composer_id: str) -> list[str]: + """Scoped MRC load: ``messageRequestContext:{composer_id}:%`` only.""" + paths: list[str] = [] + try: + rows = global_db.execute( + "SELECT key, value FROM cursorDiskKV WHERE key LIKE ?", + (f"messageRequestContext:{composer_id}:%",), + ).fetchall() + except sqlite3.Error: + return paths + for row in rows: + try: + ctx = json.loads(row["value"]) + if isinstance(ctx, dict): + paths.extend(_extract_root_paths_from_context(ctx)) + except (json.JSONDecodeError, ValueError, KeyError, TypeError) as e: + _logger.debug( + "Skipping malformed messageRequestContext row %s: %s", + row["key"], + e, + ) + return paths + + def load_project_layouts_map(global_db) -> dict[str, list]: - """Load ``projectLayouts`` from ``messageRequestContext:*`` KV entries. + """Load ``projectLayouts`` from all ``messageRequestContext:*`` KV entries. - Returns ``{composer_id: [root_path_str, ...]}``. String-encoded layout - objects are JSON-decoded before the ``rootPath`` field is extracted. + Returns ``{composer_id: [root_path_str, ...]}``. Prefer + :func:`load_project_layouts_for_composer` on list paths when only a few + composers need layout fallbacks. """ layouts_map: dict[str, list] = {} try: @@ -67,17 +108,9 @@ def load_project_layouts_map(global_db) -> dict[str, list]: cid = parts[1] try: ctx = json.loads(row["value"]) - layouts = ctx.get("projectLayouts") - if not isinstance(layouts, list): - continue - layouts_map.setdefault(cid, []) - for layout in layouts: - try: - o = json.loads(layout) if isinstance(layout, str) else layout - if isinstance(o, dict) and o.get("rootPath"): - layouts_map[cid].append(o["rootPath"]) - except (json.JSONDecodeError, ValueError, KeyError, TypeError) as e: - _logger.debug("Skipping malformed layout entry in %s: %s", row["key"], e) + if isinstance(ctx, dict): + layouts_map.setdefault(cid, []) + layouts_map[cid].extend(_extract_root_paths_from_context(ctx)) except (json.JSONDecodeError, ValueError, KeyError, TypeError) as e: _logger.debug("Skipping malformed messageRequestContext row %s: %s", row["key"], e) return layouts_map @@ -113,6 +146,99 @@ def load_code_block_diff_map(global_db) -> dict[str, list]: return diff_map +def load_bubbles_for_composer(global_db, composer_id: str) -> dict[str, dict]: + """Load ``bubbleId:{composer_id}:*`` KV entries into ``{bubble_id: bubble_dict}``. + + Scoped alternative to :func:`load_bubble_map` for single-conversation assembly; + avoids a full global ``bubbleId:%`` scan. + """ + bubble_map: dict[str, dict] = {} + try: + rows = global_db.execute( + "SELECT key, value FROM cursorDiskKV WHERE key LIKE ?", + (f"bubbleId:{composer_id}:%",), + ).fetchall() + except sqlite3.Error: + return bubble_map + for row in rows: + parts = row["key"].split(":") + if len(parts) < 3: + continue + bid = parts[2] + try: + b = json.loads(row["value"]) + if isinstance(b, dict): + bubble_map[bid] = b + except (json.JSONDecodeError, ValueError, KeyError, TypeError) as e: + _logger.debug("Skipping malformed bubbleId row %s: %s", row["key"], e) + return bubble_map + + +def load_message_request_context_for_composer( + global_db, composer_id: str +) -> list[dict]: + """Load ``messageRequestContext:{composer_id}:*`` KV entries. + + Returns a list of context dicts, each with an injected ``contextId`` key + taken from the third path component of the KV key. Scoped alternative to + the global MRC pass inside :func:`load_project_layouts_map`. + """ + contexts: list[dict] = [] + try: + rows = global_db.execute( + "SELECT key, value FROM cursorDiskKV WHERE key LIKE ?", + (f"messageRequestContext:{composer_id}:%",), + ).fetchall() + except sqlite3.Error: + return contexts + for row in rows: + parts = row["key"].split(":") + if len(parts) < 3: + continue + context_id = parts[2] + try: + ctx = json.loads(row["value"]) + if isinstance(ctx, dict): + contexts.append({**ctx, "contextId": context_id}) + except (json.JSONDecodeError, ValueError, KeyError, TypeError) as e: + _logger.debug( + "Skipping malformed messageRequestContext row %s: %s", + row["key"], + e, + ) + return contexts + + +def load_code_block_diffs_for_composer( + global_db, composer_id: str +) -> list[dict]: + """Load ``codeBlockDiff:{composer_id}:*`` KV entries. + + Returns a list of diff dicts, each with an injected ``diffId`` key. + Scoped alternative to :func:`load_code_block_diff_map` for single-conversation + assembly. + """ + diffs: list[dict] = [] + try: + rows = global_db.execute( + "SELECT key, value FROM cursorDiskKV WHERE key LIKE ?", + (f"codeBlockDiff:{composer_id}:%",), + ).fetchall() + except sqlite3.Error: + return diffs + for row in rows: + parts = row["key"].split(":") + try: + d = json.loads(row["value"]) + if isinstance(d, dict): + diffs.append({**d, "diffId": parts[2] if len(parts) > 2 else None}) + except (json.JSONDecodeError, ValueError, KeyError, TypeError) as e: + _logger.debug( + "Skipping malformed codeBlockDiff row %s: %s", row["key"], e + ) + return diffs + + def collect_workspace_entries(workspace_path: str) -> list[dict]: """Scan workspace directory and return entries with workspace.json. @@ -164,6 +290,21 @@ def collect_invalid_workspace_ids(workspace_entries: list[dict]) -> set[str]: return invalid +# Composers that have at least one header entry (list/summary paths). +COMPOSER_ROWS_WITH_HEADERS_SQL = ( + "SELECT key, value FROM cursorDiskKV WHERE key LIKE 'composerData:%'" + " AND LENGTH(value) > 10" + " AND value LIKE '%fullConversationHeadersOnly%'" + " AND value NOT LIKE '%fullConversationHeadersOnly\":[]%'" + " AND value NOT LIKE '%fullConversationHeadersOnly\": []%'" +) + + +def global_storage_db_path(workspace_path: str) -> str: + """Resolved path to Cursor global ``state.vscdb`` for a workspace storage root.""" + return os.path.normpath(os.path.join(workspace_path, "..", "globalStorage", "state.vscdb")) + + def build_composer_id_to_workspace_id(workspace_path: str, workspace_entries: list) -> dict: """Build mapping from composer ID to workspace folder name. @@ -212,6 +353,41 @@ def build_composer_id_to_workspace_id(workspace_path: str, workspace_entries: li return mapping +def build_composer_id_to_workspace_id_cached( + workspace_path: str, + workspace_entries: list, + rules: list, + *, + nocache: bool = False, +) -> dict: + """Like :func:`build_composer_id_to_workspace_id` with optional disk cache.""" + from services.summary_cache import ( + fingerprint_workspace_storage, + get_cached_composer_id_to_ws, + nocache_enabled, + set_cached_composer_id_to_ws, + ) + from utils.workspace_path import get_cli_chats_path + + gdb = global_storage_db_path(workspace_path) + cli_path = get_cli_chats_path() + fingerprint = fingerprint_workspace_storage( + workspace_path, + workspace_entries, + global_db_path=gdb if os.path.isfile(gdb) else None, + rules=rules, + cli_chats_path=cli_path if os.path.isdir(cli_path) else None, + ) + if not nocache_enabled(request_nocache=nocache): + cached = get_cached_composer_id_to_ws(fingerprint) + if cached is not None: + return cached + mapping = build_composer_id_to_workspace_id(workspace_path, workspace_entries) + if not nocache_enabled(request_nocache=nocache): + set_cached_composer_id_to_ws(fingerprint, mapping) + return mapping + + @contextmanager def open_global_db(workspace_path: str): """Open Cursor global storage SQLite database read-only. @@ -224,8 +400,7 @@ def open_global_db(workspace_path: str): ``row_factory=sqlite3.Row``, or ``None`` if the database file is missing or cannot be opened. ``path`` is always the resolved global DB path. """ - global_db_path = os.path.join(workspace_path, "..", "globalStorage", "state.vscdb") - global_db_path = os.path.normpath(global_db_path) + global_db_path = global_storage_db_path(workspace_path) if not os.path.isfile(global_db_path): yield None, global_db_path return diff --git a/services/workspace_listing.py b/services/workspace_listing.py index 879710c..890b8c1 100644 --- a/services/workspace_listing.py +++ b/services/workspace_listing.py @@ -17,16 +17,24 @@ warn_workspace_json_read, ) from utils.workspace_descriptor import read_json_file -from utils.workspace_path import get_cli_chats_path -from models import Composer, ParseWarningCollector, SchemaError +from models import ParseWarningCollector +from services.summary_cache import ( + fingerprint_workspace_storage, + get_cached_projects, + nocache_enabled, + set_cached_projects, +) from services.workspace_db import ( - build_composer_id_to_workspace_id, + COMPOSER_ROWS_WITH_HEADERS_SQL, + build_composer_id_to_workspace_id_cached, collect_invalid_workspace_ids, collect_workspace_entries, - load_bubble_map, + global_storage_db_path, + load_project_layouts_for_composer, load_project_layouts_map, open_global_db, ) +from utils.workspace_path import get_cli_chats_path from services.workspace_resolver import ( create_project_name_to_workspace_id_map, create_workspace_path_to_id_map, @@ -37,12 +45,46 @@ ) -def list_workspace_projects(workspace_path: str, rules: list) -> tuple[list[dict], list[dict]]: +def _composer_valid_for_listing( + cd: dict, + composer_id: str, + parse_warnings: ParseWarningCollector, +) -> bool: + """Lightweight list-path checks aligned with :class:`models.Composer` requirements.""" + if "fullConversationHeadersOnly" not in cd: + return False + created_at = cd.get("createdAt") + if not isinstance(created_at, (int, float)) or isinstance(created_at, bool): + _logger.warning( + "Failed to parse Composer from composerData:%s: expected timestamp number for createdAt, got %s", + composer_id, + type(created_at).__name__, + ) + parse_warnings.record_composer_skipped() + return False + headers = cd.get("fullConversationHeadersOnly") + if not isinstance(headers, list): + _logger.warning( + "Failed to parse Composer from composerData:%s: fullConversationHeadersOnly must be a list", + composer_id, + ) + parse_warnings.record_composer_skipped() + return False + return True + + +def list_workspace_projects( + workspace_path: str, + rules: list, + *, + nocache: bool = False, +) -> tuple[list[dict], list[dict]]: """List workspace projects for GET /api/workspaces. Args: workspace_path: Cursor ``workspaceStorage`` root. rules: Exclusion rule token lists from :func:`utils.exclusion_rules.load_rules`. + nocache: When ``True``, skip the mtime-keyed disk cache (Phase 3). Returns: ``(projects, warnings)``. Each project dict has ``id``, ``name``, @@ -52,17 +94,47 @@ def list_workspace_projects(workspace_path: str, rules: list) -> tuple[list[dict parse-error dicts (``type``, ``count``, ``detail``) from :meth:`models.ParseWarningCollector.to_api_list`; empty when no skips. """ - parse_warnings = ParseWarningCollector() workspace_entries = collect_workspace_entries(workspace_path) + gdb = global_storage_db_path(workspace_path) + cli_path = get_cli_chats_path() + fingerprint = fingerprint_workspace_storage( + workspace_path, + workspace_entries, + global_db_path=gdb if os.path.isfile(gdb) else None, + rules=rules, + cli_chats_path=cli_path if os.path.isdir(cli_path) else None, + ) + if not nocache_enabled(request_nocache=nocache): + cached = get_cached_projects(fingerprint) + if cached is not None: + return cached + + projects, warnings = _build_workspace_projects_uncached( + workspace_path, rules, workspace_entries, nocache=nocache, + ) + if not nocache_enabled(request_nocache=nocache): + set_cached_projects(fingerprint, projects, warnings) + return projects, warnings + + +def _build_workspace_projects_uncached( + workspace_path: str, + rules: list, + workspace_entries: list[dict], + *, + nocache: bool, +) -> tuple[list[dict], list[dict]]: + parse_warnings = ParseWarningCollector() invalid_workspace_ids = collect_invalid_workspace_ids(workspace_entries) project_name_map = create_project_name_to_workspace_id_map(workspace_entries) workspace_path_map = create_workspace_path_to_id_map(workspace_entries) - composer_id_to_ws = build_composer_id_to_workspace_id(workspace_path, workspace_entries) + composer_id_to_ws = build_composer_id_to_workspace_id_cached( + workspace_path, workspace_entries, rules, nocache=nocache, + ) conversation_map: dict[str, list] = {} - # closing semantics now baked into the context manager (issue #17). with open_global_db(workspace_path) as (global_db, _): if global_db: def _safe_fetchall(query: str, params: tuple = ()) -> list: @@ -70,28 +142,32 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: return global_db.execute(query, params).fetchall() except sqlite3.Error: return [] + try: - composer_rows = _safe_fetchall( - "SELECT key, value FROM cursorDiskKV WHERE key LIKE 'composerData:%' AND LENGTH(value) > 10" - ) + composer_rows = _safe_fetchall(COMPOSER_ROWS_WITH_HEADERS_SQL) + + project_layouts_map: dict[str, list] = {} + if invalid_workspace_ids: + project_layouts_map = load_project_layouts_map(global_db) + + bubble_map: dict[str, dict] = {} + invalid_workspace_aliases: dict[str, str] = {} + if invalid_workspace_ids: + invalid_workspace_aliases = infer_invalid_workspace_aliases( + composer_rows=composer_rows, + project_layouts_map=project_layouts_map, + project_name_map=project_name_map, + workspace_path_map=workspace_path_map, + workspace_entries=workspace_entries, + bubble_map=bubble_map, + composer_id_to_ws=composer_id_to_ws, + invalid_workspace_ids=invalid_workspace_ids, + ) - project_layouts_map: dict[str, list] = load_project_layouts_map(global_db) - bubble_map: dict[str, dict] = load_bubble_map(global_db) - - invalid_workspace_aliases = infer_invalid_workspace_aliases( - composer_rows=composer_rows, - project_layouts_map=project_layouts_map, - project_name_map=project_name_map, - workspace_path_map=workspace_path_map, - workspace_entries=workspace_entries, - bubble_map=bubble_map, - composer_id_to_ws=composer_id_to_ws, - invalid_workspace_ids=invalid_workspace_ids, - ) for row in composer_rows: cid = row["key"].split(":")[1] try: - parsed = json.loads(row["value"]) + cd = json.loads(row["value"]) except (json.JSONDecodeError, TypeError, ValueError) as e: _logger.warning( "Failed to decode Composer from composerData:%s: %s", @@ -100,26 +176,24 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: ) parse_warnings.record_composer_skipped() continue - if not isinstance(parsed, dict): + if not isinstance(cd, dict): _logger.warning( "Failed to parse Composer from composerData:%s: expected object, got %s", cid, - type(parsed).__name__, + type(cd).__name__, ) parse_warnings.record_composer_skipped() continue - try: - composer = Composer.from_dict(parsed, composer_id=cid) - except SchemaError as e: - _logger.warning( - "Failed to parse Composer from composerData:%s: %s", - cid, - e, - ) - parse_warnings.record_composer_skipped() + if not _composer_valid_for_listing(cd, cid, parse_warnings): continue - cd = composer.raw try: + if ( + cid not in composer_id_to_ws + and cid not in project_layouts_map + ): + project_layouts_map[cid] = load_project_layouts_for_composer( + global_db, cid, + ) pid = determine_project_for_conversation( cd, cid, project_layouts_map, project_name_map, workspace_path_map, @@ -131,14 +205,7 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: assigned = pid if pid else "global" headers = cd.get("fullConversationHeadersOnly") or [] - has_bubbles = any( - bubble_map.get(bubble_id) - for h in headers - if isinstance(h, dict) - for bubble_id in [h.get("bubbleId")] - if isinstance(bubble_id, str) - ) - if not has_bubbles: + if not headers: continue conversation_map.setdefault(assigned, []).append({ diff --git a/services/workspace_tabs.py b/services/workspace_tabs.py index f11a94b..3cd4e2e 100644 --- a/services/workspace_tabs.py +++ b/services/workspace_tabs.py @@ -21,13 +21,27 @@ from utils.tool_parser import parse_tool_call from utils.workspace_descriptor import read_json_file from models import Bubble, Composer, ParseWarningCollector, SchemaError +from services.summary_cache import ( + fingerprint_workspace_storage, + get_cached_tab_summaries, + nocache_enabled, + set_cached_tab_summaries, +) from services.workspace_db import ( - build_composer_id_to_workspace_id, + COMPOSER_ROWS_WITH_HEADERS_SQL, + build_composer_id_to_workspace_id_cached, collect_invalid_workspace_ids, collect_workspace_entries, + global_storage_db_path, + load_bubbles_for_composer, load_code_block_diff_map, + load_code_block_diffs_for_composer, + load_message_request_context_for_composer, + load_project_layouts_for_composer, + load_project_layouts_map, open_global_db, ) +from utils.workspace_path import get_cli_chats_path from services.workspace_resolver import ( create_project_name_to_workspace_id_map, create_workspace_path_to_id_map, @@ -77,6 +91,662 @@ def _kv_payload_log_meta(value: object | None) -> tuple[int, str | None]: return len(payload), hashlib.sha256(payload).hexdigest()[:12] +def _assemble_tab_from_composer_data( + composer_id: str, + cd: dict, + bubble_map: dict[str, dict], + contexts: list[dict], + code_block_diffs: list[dict], + workspace_display_name: str, + rules: list, + parse_warnings: ParseWarningCollector, +) -> dict | None: + """Assemble a single tab dict from an already-parsed composer dict. + + Args: + composer_id: Composer UUID. + cd: Raw ``composerData`` dict (``composer.raw``). + bubble_map: ``{bubble_id: bubble_dict}`` — may be global or scoped. + contexts: ``messageRequestContext`` entries for *this* composer + (list of dicts, each with an injected ``contextId`` key and a + ``bubbleId`` field from the JSON value). + code_block_diffs: ``codeBlockDiff`` entries for *this* composer. + workspace_display_name: Human-readable workspace name for rule matching. + rules: Exclusion rule token lists. + parse_warnings: Collector for skipped-bubble warnings. + + Returns: + A tab dict on success, or ``None`` when the tab should be omitted + (no renderable bubbles or excluded by rules). + """ + headers = cd.get("fullConversationHeadersOnly") or [] + + bubbles: list[dict[str, Any]] = [] + for header in headers: + if not isinstance(header, dict): + continue + bubble_id = header.get("bubbleId") + if not isinstance(bubble_id, str): + continue + bubble = bubble_map.get(bubble_id) + if not bubble: + continue + + is_user = header.get("type") == 1 + msg_type = "user" if is_user else "ai" + text = extract_text_from_bubble(bubble) + + context_text = "" + for ctx in contexts: + if ctx.get("bubbleId") == bubble_id: + if ctx.get("gitStatusRaw"): + context_text += f"\n\n**Git Status:**\n```\n{ctx['gitStatusRaw']}\n```" + tf = ctx.get("terminalFiles") + if isinstance(tf, list) and tf: + context_text += "\n\n**Terminal Files:**" + for f in tf: + if not isinstance(f, dict): + continue + context_text += f"\n- {f.get('path', '')}" + af = ctx.get("attachedFoldersListDirResults") + if isinstance(af, list) and af: + context_text += "\n\n**Attached Folders:**" + for fld in af: + if not isinstance(fld, dict): + continue + files = fld.get("files") + if isinstance(files, list) and files: + context_text += f"\n\n**Folder:** {fld.get('path', 'Unknown')}" + for fi in files: + if not isinstance(fi, dict): + continue + context_text += f"\n- {fi.get('name', '')} ({fi.get('type', '')})" + cr = ctx.get("cursorRules") + if isinstance(cr, list) and cr: + context_text += "\n\n**Cursor Rules:**" + for rule in cr: + if not isinstance(rule, dict): + continue + context_text += f"\n- {rule.get('name') or rule.get('description') or 'Rule'}" + sc = ctx.get("summarizedComposers") + if isinstance(sc, list) and sc: + context_text += "\n\n**Related Conversations:**" + for comp in sc: + if not isinstance(comp, dict): + continue + context_text += f"\n- {comp.get('name') or comp.get('composerId') or 'Conversation'}" + + full_text = text + context_text + raw = bubble + token_count = raw.get("tokenCount") + + tool_calls = None + tfd = raw.get("toolFormerData") + if isinstance(tfd, dict): + tool_call = parse_tool_call(tfd) + if isinstance(tool_call, dict): + tool_calls = [tool_call] + + thinking = None + thinking_duration_ms = None + if raw.get("thinking"): + thinking = raw["thinking"] if isinstance(raw["thinking"], str) else (raw["thinking"].get("text") if isinstance(raw["thinking"], dict) else None) + thinking_duration_ms = raw.get("thinkingDurationMs") + + has_content = full_text.strip() or tool_calls or thinking + if not has_content: + continue + + ctx_window = raw.get("contextWindowStatusAtCreation") or {} + ctx_pct = None + if isinstance(ctx_window, dict): + if ctx_window.get("percentageRemainingFloat") is not None: + ctx_pct = ctx_window.get("percentageRemainingFloat") + elif ctx_window.get("percentageRemaining") is not None: + ctx_pct = ctx_window.get("percentageRemaining") + + display_text = full_text.strip() + if not display_text and tool_calls: + tc = tool_calls[0] + if isinstance(tc, dict): + display_text = f"**Tool: {tc.get('name', 'unknown')}**" + if tc.get("status"): + display_text += f" ({tc['status']})" + if not display_text and thinking: + display_text = thinking + + bubble_meta = None + model_info = raw.get("modelInfo") or {} + model_name = model_info.get("modelName") + if model_name == "default": + model_name = None + + if msg_type == "ai": + tc_dict = token_count if isinstance(token_count, dict) else {} + in_tok = tc_dict.get("inputTokens") or 0 + out_tok = tc_dict.get("outputTokens") or 0 + cached_tok = tc_dict.get("cachedTokens") or 0 + bubble_meta = { + "modelName": model_name, + "inputTokens": in_tok if in_tok > 0 else None, + "outputTokens": out_tok if out_tok > 0 else None, + "cachedTokens": cached_tok if cached_tok > 0 else None, + "toolResultsCount": (len(tool_calls) if tool_calls else None) or (len(raw["toolResults"]) if isinstance(raw.get("toolResults"), list) and raw["toolResults"] else None), + "toolResults": raw.get("toolResults") if isinstance(raw.get("toolResults"), list) and raw["toolResults"] else None, + "toolCalls": tool_calls, + "thinking": thinking, + "thinkingDurationMs": thinking_duration_ms, + "contextWindowPercent": ctx_pct, + } + elif msg_type == "user": + bubble_meta = { + "modelName": model_name, + "contextWindowPercent": ctx_pct, + } + if ctx_window: + tokens_used = ctx_window.get("tokensUsed", 0) + token_limit = ctx_window.get("tokenLimit", 0) + if tokens_used > 0: + bubble_meta["contextTokensUsed"] = tokens_used + if token_limit > 0: + bubble_meta["contextTokenLimit"] = token_limit + + if bubble_meta: + bubble_meta = {k: v for k, v in bubble_meta.items() if v is not None} + if not bubble_meta: + bubble_meta = None + + b_entry = { + "type": msg_type, + "text": display_text, + "timestamp": to_epoch_ms(bubble.get("createdAt")) or to_epoch_ms(bubble.get("timestamp")) or int(datetime.now().timestamp() * 1000), + } + if bubble_meta: + b_entry["metadata"] = bubble_meta + bubbles.append(b_entry) + + if not bubbles: + return None + + title = cd.get("name") or f"Conversation {composer_id[:8]}" + if not cd.get("name") and bubbles: + first_msg = bubbles[0].get("text", "") + if first_msg: + first_lines = [ln for ln in first_msg.split("\n") if ln.strip()] + if first_lines: + title = first_lines[0][:100] + if len(title) == 100: + title += "..." + + _early_model_config = cd.get("modelConfig") or {} + _early_model_name = _early_model_config.get("modelName") + _early_model_names = [_early_model_name] if _early_model_name and _early_model_name != "default" else None + if is_excluded_by_rules(rules, build_searchable_text( + project_name=workspace_display_name, + chat_title=title, + model_names=_early_model_names, + )): + return None + + bubbles.sort(key=lambda b: b.get("timestamp") or 0) + + last_user_ts = None + for b in bubbles: + if b["type"] == "user": + last_user_ts = b.get("timestamp") + elif b["type"] == "ai" and last_user_ts is not None: + ts = b.get("timestamp") + if ts and ts > last_user_ts: + meta = b.setdefault("metadata", {}) + meta["responseTimeMs"] = ts - last_user_ts + + total_input = 0 + total_output = 0 + total_cached = 0 + total_response_ms = 0 + total_cost = 0.0 + total_tool_calls = 0 + total_thinking_ms = 0 + models_set: set = set() + for b in bubbles: + m = b.get("metadata") or {} + if m.get("inputTokens"): + total_input += m["inputTokens"] + if m.get("outputTokens"): + total_output += m["outputTokens"] + if m.get("cachedTokens"): + total_cached += m["cachedTokens"] + if m.get("responseTimeMs"): + total_response_ms += m["responseTimeMs"] + if m.get("cost") is not None: + total_cost += m["cost"] + if m.get("modelName"): + models_set.add(m["modelName"]) + if m.get("toolCalls"): + total_tool_calls += len(m["toolCalls"]) + if m.get("thinkingDurationMs"): + total_thinking_ms += m["thinkingDurationMs"] + + usage = cd.get("usageData") or {} + composer_cost = usage.get("cost") or usage.get("estimatedCost") + if isinstance(composer_cost, (int, float)) and total_cost == 0: + total_cost = composer_cost + + lines_added = cd.get("totalLinesAdded", 0) + lines_removed = cd.get("totalLinesRemoved", 0) + files_added = cd.get("addedFiles", 0) + files_removed = cd.get("removedFiles", 0) + + max_ctx_tokens = 0 + ctx_token_limit = 0 + for b in bubbles: + m = b.get("metadata") or {} + if m.get("contextTokensUsed", 0) > max_ctx_tokens: + max_ctx_tokens = m["contextTokensUsed"] + if m.get("contextTokenLimit", 0) > ctx_token_limit: + ctx_token_limit = m["contextTokenLimit"] + + tab_meta = None + has_any = any([total_input, total_output, total_cached, total_response_ms, + total_cost, models_set, total_tool_calls, total_thinking_ms, + lines_added, lines_removed, files_added, files_removed, + max_ctx_tokens]) + if has_any: + tab_meta_raw = { + "totalInputTokens": total_input or None, + "totalOutputTokens": total_output or None, + "totalCachedTokens": total_cached or None, + "modelsUsed": list(models_set) if models_set else None, + "totalResponseTimeMs": total_response_ms or None, + "totalCost": total_cost if total_cost > 0 else None, + "totalToolCalls": total_tool_calls or None, + "totalThinkingDurationMs": total_thinking_ms or None, + "totalLinesAdded": lines_added if lines_added else None, + "totalLinesRemoved": lines_removed if lines_removed else None, + "totalFilesAdded": files_added if files_added else None, + "totalFilesRemoved": files_removed if files_removed else None, + "maxContextTokensUsed": max_ctx_tokens if max_ctx_tokens else None, + "contextTokenLimit": ctx_token_limit if ctx_token_limit else None, + } + tab_meta = {k: v for k, v in tab_meta_raw.items() if v is not None} + + model_config = cd.get("modelConfig") or {} + model_name_from_config = model_config.get("modelName") + if model_name_from_config and model_name_from_config != "default": + if not tab_meta: + tab_meta = {} + models_used = tab_meta.get("modelsUsed") + if not isinstance(models_used, list): + tab_meta["modelsUsed"] = [model_name_from_config] + elif model_name_from_config not in models_used: + models_used.insert(0, model_name_from_config) + + tab: dict[str, Any] = { + "id": composer_id, + "title": title, + "timestamp": to_epoch_ms(cd.get("lastUpdatedAt")) or to_epoch_ms(cd.get("createdAt")) or int(datetime.now().timestamp() * 1000), + "bubbles": [{ + "type": b["type"], + "text": b.get("text", ""), + "timestamp": b.get("timestamp", 0), + **({"metadata": b["metadata"]} if b.get("metadata") else {}), + } for b in bubbles], + "codeBlockDiffs": code_block_diffs, + } + if tab_meta: + tab["metadata"] = tab_meta + return tab + + +def _build_matching_ws_ids(workspace_id: str, workspace_path: str, workspace_entries: list) -> set[str]: + """Return the set of workspace folder IDs that share the same project folder as *workspace_id*. + + Cursor sometimes creates multiple workspace entries for the same on-disk + project; conversations recorded under any of those IDs belong to the same + project view. + """ + matching: set[str] = {workspace_id} + if workspace_id == "global": + return matching + target_folder = "" + wj_path = os.path.join(workspace_path, workspace_id, "workspace.json") + try: + wd = read_json_file(wj_path) + folders = get_workspace_folder_paths(wd) + first_folder = folders[0] if folders else None + if first_folder: + target_folder = normalize_file_path(first_folder) + except Exception as e: + warn_workspace_json_read(_logger, workspace_id, e) + if target_folder: + for entry in workspace_entries: + try: + wd2 = read_json_file(entry["workspaceJsonPath"]) + folders2 = get_workspace_folder_paths(wd2) + f2 = folders2[0] if folders2 else None + if f2 and normalize_file_path(f2) == target_folder: + matching.add(entry["name"]) + except Exception as e: + warn_workspace_json_read(_logger, entry["name"], e) + return matching + + +def list_workspace_tab_summaries( + workspace_id: str, + workspace_path: str, + rules: list, + *, + nocache: bool = False, +) -> tuple[dict, int]: + """Return summary tab list for GET /api/workspaces//tabs?summary=1. + + Does **not** load the global ``bubbleId:%`` index. Each tab entry contains + only the fields needed by the sidebar: ``id``, ``title``, ``timestamp``, + ``messageCount``, and an optional ``metadata.modelsUsed``. Full bubble + bodies are omitted; the UI fetches them on demand via + ``GET /api/workspaces//tabs/``. + + Args: + workspace_id: Workspace folder name, or ``"global"`` for unassigned chats. + workspace_path: Cursor ``workspaceStorage`` root. + rules: Exclusion rule token lists. + + Returns: + ``(payload, status)`` — same envelope as :func:`assemble_workspace_tabs` + but ``tabs`` entries carry no ``bubbles`` field. + """ + workspace_entries = collect_workspace_entries(workspace_path) + gdb = global_storage_db_path(workspace_path) + cli_path = get_cli_chats_path() + fingerprint = fingerprint_workspace_storage( + workspace_path, + workspace_entries, + global_db_path=gdb if os.path.isfile(gdb) else None, + rules=rules, + cli_chats_path=cli_path if os.path.isdir(cli_path) else None, + ) + if not nocache_enabled(request_nocache=nocache): + cached = get_cached_tab_summaries(fingerprint, workspace_id) + if cached is not None: + return cached + + payload, status = _build_workspace_tab_summaries_uncached( + workspace_id, workspace_path, rules, workspace_entries, nocache=nocache, + ) + if status == 200 and not nocache_enabled(request_nocache=nocache): + set_cached_tab_summaries(fingerprint, workspace_id, payload, status) + return payload, status + + +def _build_workspace_tab_summaries_uncached( + workspace_id: str, + workspace_path: str, + rules: list, + workspace_entries: list, + *, + nocache: bool, +) -> tuple[dict, int]: + parse_warnings = ParseWarningCollector() + response: dict = {"tabs": []} + + invalid_workspace_ids = collect_invalid_workspace_ids(workspace_entries) + project_name_map = create_project_name_to_workspace_id_map(workspace_entries) + workspace_path_map = create_workspace_path_to_id_map(workspace_entries) + composer_id_to_ws = build_composer_id_to_workspace_id_cached( + workspace_path, workspace_entries, rules, nocache=nocache, + ) + matching_ws_ids = _build_matching_ws_ids(workspace_id, workspace_path, workspace_entries) + + with open_global_db(workspace_path) as (global_db, _): + if global_db is None: + return {"error": "Global storage not found"}, 404 + + workspace_display_name = lookup_workspace_display_name(workspace_path, workspace_id) + + def _safe_fetchall(query: str, params: tuple = ()) -> list: + try: + return global_db.execute(query, params).fetchall() + except sqlite3.Error: + return [] + + project_layouts_map: dict[str, list] = {} + if invalid_workspace_ids: + project_layouts_map = load_project_layouts_map(global_db) + + composer_rows = _safe_fetchall(COMPOSER_ROWS_WITH_HEADERS_SQL) + + invalid_workspace_aliases: dict[str, str] = {} + if invalid_workspace_ids: + invalid_workspace_aliases = infer_invalid_workspace_aliases( + composer_rows=composer_rows, + project_layouts_map=project_layouts_map, + project_name_map=project_name_map, + workspace_path_map=workspace_path_map, + workspace_entries=workspace_entries, + bubble_map={}, + composer_id_to_ws=composer_id_to_ws, + invalid_workspace_ids=invalid_workspace_ids, + ) + + for row in composer_rows: + composer_id = row["key"].split(":")[1] + try: + cd = json.loads(row["value"]) + except (json.JSONDecodeError, TypeError, ValueError) as e: + payload_len, payload_fp = _kv_payload_log_meta(row["value"]) + _logger.warning( + "Failed to decode Composer from composerData:%s: %s (payload_len=%d, payload_sha256=%s)", + composer_id, + e, + payload_len, + payload_fp, + ) + parse_warnings.record_composer_skipped() + continue + if not isinstance(cd, dict): + parse_warnings.record_composer_skipped() + continue + try: + if ( + composer_id not in composer_id_to_ws + and composer_id not in project_layouts_map + ): + project_layouts_map[composer_id] = load_project_layouts_for_composer( + global_db, composer_id, + ) + pid = determine_project_for_conversation( + cd, composer_id, project_layouts_map, + project_name_map, workspace_path_map, + workspace_entries, {}, composer_id_to_ws, invalid_workspace_ids, + ) + mapped_ws = composer_id_to_ws.get(composer_id) + if not pid and mapped_ws in invalid_workspace_ids: + pid = invalid_workspace_aliases.get(mapped_ws) + assigned = pid if pid else "global" + + if assigned not in matching_ws_ids: + continue + + headers = cd.get("fullConversationHeadersOnly") or [] + if not headers: + continue + + title = cd.get("name") or f"Conversation {composer_id[:8]}" + + _early_model_config = cd.get("modelConfig") or {} + _early_model_name = _early_model_config.get("modelName") + _early_model_names = [_early_model_name] if _early_model_name and _early_model_name != "default" else None + if is_excluded_by_rules(rules, build_searchable_text( + project_name=workspace_display_name, + chat_title=title, + model_names=_early_model_names, + )): + continue + + tab_meta: dict | None = None + if _early_model_names: + tab_meta = {"modelsUsed": _early_model_names} + + tab_entry: dict = { + "id": composer_id, + "title": title, + "timestamp": to_epoch_ms(cd.get("lastUpdatedAt")) or to_epoch_ms(cd.get("createdAt")) or int(datetime.now().timestamp() * 1000), + "messageCount": len(headers), + } + if tab_meta: + tab_entry["metadata"] = tab_meta + response["tabs"].append(tab_entry) + + except Exception as e: + _logger.warning( + "Failed to process Composer from composerData:%s: %s", + composer_id, + e, + ) + parse_warnings.record_composer_processing_failure() + + response["tabs"].sort(key=lambda t: t.get("timestamp") or 0, reverse=True) + return parse_warnings.attach_to(response), 200 + + +def assemble_single_tab( + workspace_id: str, + composer_id: str, + workspace_path: str, + rules: list, +) -> tuple[dict, int]: + """Assemble a single conversation tab for GET /api/workspaces//tabs/. + + Loads only the KV rows scoped to *composer_id* (``bubbleId:{id}:%``, + ``messageRequestContext:{id}:%``, ``codeBlockDiff:{id}:%``) instead of + performing a full global scan. + + Args: + workspace_id: Workspace folder name, or ``"global"``. + composer_id: UUID of the composer / conversation to assemble. + workspace_path: Cursor ``workspaceStorage`` root. + rules: Exclusion rule token lists. + + Returns: + ``(payload, status)``. On success (``200``), *payload* is + ``{"tab": {...}}``, optionally with ``"warnings"``. ``404`` when the + global DB is missing, the composer is not found, or it is not assigned + to *workspace_id*. + """ + parse_warnings = ParseWarningCollector() + + workspace_entries = collect_workspace_entries(workspace_path) + invalid_workspace_ids = collect_invalid_workspace_ids(workspace_entries) + project_name_map = create_project_name_to_workspace_id_map(workspace_entries) + workspace_path_map = create_workspace_path_to_id_map(workspace_entries) + composer_id_to_ws = build_composer_id_to_workspace_id_cached( + workspace_path, workspace_entries, rules, + ) + matching_ws_ids = _build_matching_ws_ids(workspace_id, workspace_path, workspace_entries) + + with open_global_db(workspace_path) as (global_db, _): + if global_db is None: + return {"error": "Global storage not found"}, 404 + + workspace_display_name = lookup_workspace_display_name(workspace_path, workspace_id) + + def _safe_fetchall(query: str, params: tuple = ()) -> list: + try: + return global_db.execute(query, params).fetchall() + except sqlite3.Error: + return [] + + rows = _safe_fetchall( + "SELECT key, value FROM cursorDiskKV WHERE key = ?", + (f"composerData:{composer_id}",), + ) + if not rows: + return {"error": "Conversation not found"}, 404 + + row = rows[0] + try: + parsed = json.loads(row["value"]) + except (json.JSONDecodeError, TypeError, ValueError) as e: + payload_len, payload_fp = _kv_payload_log_meta(row["value"]) + _logger.warning( + "Failed to decode Composer from composerData:%s: %s (payload_len=%d, payload_sha256=%s)", + composer_id, + e, + payload_len, + payload_fp, + ) + return {"error": "Failed to parse conversation"}, 500 + try: + composer = Composer.from_dict(parsed, composer_id=composer_id) + except SchemaError as e: + _logger.warning( + "Failed to parse Composer from composerData:%s: %s", + composer_id, + e, + ) + return {"error": "Failed to parse conversation"}, 500 + + cd = composer.raw + + # Verify the conversation belongs to the requested workspace. + # Scoped MRC load for this composer only; full map + alias scan only + # when invalid workspace folders need majority-vote reassignment. + project_layouts_map: dict[str, list] = {} + invalid_workspace_aliases: dict[str, str] = {} + if invalid_workspace_ids: + project_layouts_map = load_project_layouts_map(global_db) + composer_rows_for_aliases = _safe_fetchall(COMPOSER_ROWS_WITH_HEADERS_SQL) + invalid_workspace_aliases = infer_invalid_workspace_aliases( + composer_rows=composer_rows_for_aliases, + project_layouts_map=project_layouts_map, + project_name_map=project_name_map, + workspace_path_map=workspace_path_map, + workspace_entries=workspace_entries, + bubble_map={}, + composer_id_to_ws=composer_id_to_ws, + invalid_workspace_ids=invalid_workspace_ids, + ) + else: + project_layouts_map[composer_id] = load_project_layouts_for_composer( + global_db, composer_id, + ) + + pid = determine_project_for_conversation( + cd, composer_id, project_layouts_map, + project_name_map, workspace_path_map, + workspace_entries, {}, composer_id_to_ws, invalid_workspace_ids, + ) + mapped_ws = composer_id_to_ws.get(composer_id) + if not pid and mapped_ws in invalid_workspace_ids: + pid = invalid_workspace_aliases.get(mapped_ws) + assigned = pid if pid else "global" + + if assigned not in matching_ws_ids: + return {"error": "Conversation not found"}, 404 + + # Scoped loads — only rows for this composer_id. + bubble_map = load_bubbles_for_composer(global_db, composer_id) + contexts = load_message_request_context_for_composer(global_db, composer_id) + code_block_diffs = load_code_block_diffs_for_composer(global_db, composer_id) + + tab = _assemble_tab_from_composer_data( + composer_id=composer_id, + cd=cd, + bubble_map=bubble_map, + contexts=contexts, + code_block_diffs=code_block_diffs, + workspace_display_name=workspace_display_name, + rules=rules, + parse_warnings=parse_warnings, + ) + + if tab is None: + return {"error": "Conversation not found"}, 404 + + response: dict = {"tab": tab} + return parse_warnings.attach_to(response), 200 + + def assemble_workspace_tabs( workspace_id: str, workspace_path: str, @@ -103,32 +773,10 @@ def assemble_workspace_tabs( invalid_workspace_ids = collect_invalid_workspace_ids(workspace_entries) project_name_map = create_project_name_to_workspace_id_map(workspace_entries) workspace_path_map = create_workspace_path_to_id_map(workspace_entries) - composer_id_to_ws = build_composer_id_to_workspace_id(workspace_path, workspace_entries) - - # Build set of all workspace IDs that share the same folder as workspace_id - # (handles Cursor creating multiple workspace entries for the same project) - matching_ws_ids = {workspace_id} - if workspace_id != "global": - target_folder = "" - wj_path = os.path.join(workspace_path, workspace_id, "workspace.json") - try: - wd = read_json_file(wj_path) - folders = get_workspace_folder_paths(wd) - first_folder = folders[0] if folders else None - if first_folder: - target_folder = normalize_file_path(first_folder) - except Exception as e: - warn_workspace_json_read(_logger, workspace_id, e) - if target_folder: - for entry in workspace_entries: - try: - wd2 = read_json_file(entry["workspaceJsonPath"]) - folders2 = get_workspace_folder_paths(wd2) - f2 = folders2[0] if folders2 else None - if f2 and normalize_file_path(f2) == target_folder: - matching_ws_ids.add(entry["name"]) - except Exception as e: - warn_workspace_json_read(_logger, entry["name"], e) + composer_id_to_ws = build_composer_id_to_workspace_id_cached( + workspace_path, workspace_entries, rules, + ) + matching_ws_ids = _build_matching_ws_ids(workspace_id, workspace_path, workspace_entries) bubble_map: dict[str, dict] = {} code_block_diff_map: dict[str, list] = {} @@ -285,311 +933,18 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: if assigned not in matching_ws_ids: continue - headers = cd.get("fullConversationHeadersOnly") or [] - - # Build bubbles. Annotated as list[dict[str, Any]] so mypy - # treats nested .get("metadata") / m["inputTokens"] etc. as - # accessing dict values rather than `object`. - bubbles: list[dict[str, Any]] = [] - for header in headers: - if not isinstance(header, dict): - continue - bubble_id = header.get("bubbleId") - if not isinstance(bubble_id, str): - continue - bubble = bubble_map.get(bubble_id) - if not bubble: - continue - - is_user = header.get("type") == 1 - msg_type = "user" if is_user else "ai" - text = extract_text_from_bubble(bubble) - - # Append messageRequestContext info - context_text = "" - for ctx in message_request_context_map.get(composer_id, []): - if ctx.get("bubbleId") == bubble_id: - if ctx.get("gitStatusRaw"): - context_text += f"\n\n**Git Status:**\n```\n{ctx['gitStatusRaw']}\n```" - tf = ctx.get("terminalFiles") - if isinstance(tf, list) and tf: - context_text += "\n\n**Terminal Files:**" - for f in tf: - if not isinstance(f, dict): - continue - context_text += f"\n- {f.get('path', '')}" - af = ctx.get("attachedFoldersListDirResults") - if isinstance(af, list) and af: - context_text += "\n\n**Attached Folders:**" - for fld in af: - if not isinstance(fld, dict): - continue - files = fld.get("files") - if isinstance(files, list) and files: - context_text += f"\n\n**Folder:** {fld.get('path', 'Unknown')}" - for fi in files: - if not isinstance(fi, dict): - continue - context_text += f"\n- {fi.get('name', '')} ({fi.get('type', '')})" - cr = ctx.get("cursorRules") - if isinstance(cr, list) and cr: - context_text += "\n\n**Cursor Rules:**" - for rule in cr: - if not isinstance(rule, dict): - continue - context_text += f"\n- {rule.get('name') or rule.get('description') or 'Rule'}" - sc = ctx.get("summarizedComposers") - if isinstance(sc, list) and sc: - context_text += "\n\n**Related Conversations:**" - for comp in sc: - if not isinstance(comp, dict): - continue - context_text += f"\n- {comp.get('name') or comp.get('composerId') or 'Conversation'}" - - full_text = text + context_text - - raw = bubble - token_count = raw.get("tokenCount") - - # Tool calls - tool_calls = None - tfd = raw.get("toolFormerData") - if isinstance(tfd, dict): - tool_call = parse_tool_call(tfd) - if isinstance(tool_call, dict): - tool_calls = [tool_call] - - # Thinking - thinking = None - thinking_duration_ms = None - if raw.get("thinking"): - thinking = raw["thinking"] if isinstance(raw["thinking"], str) else (raw["thinking"].get("text") if isinstance(raw["thinking"], dict) else None) - thinking_duration_ms = raw.get("thinkingDurationMs") - - has_content = full_text.strip() or tool_calls or thinking - if not has_content: - continue - - # Context window - ctx_window = raw.get("contextWindowStatusAtCreation") or {} - ctx_pct = None - if isinstance(ctx_window, dict): - if ctx_window.get("percentageRemainingFloat") is not None: - ctx_pct = ctx_window.get("percentageRemainingFloat") - elif ctx_window.get("percentageRemaining") is not None: - ctx_pct = ctx_window.get("percentageRemaining") - - # Display text fallbacks - display_text = full_text.strip() - if not display_text and tool_calls: - tc = tool_calls[0] - if isinstance(tc, dict): - display_text = f"**Tool: {tc.get('name', 'unknown')}**" - if tc.get("status"): - display_text += f" ({tc['status']})" - if not display_text and thinking: - display_text = thinking - - # Build metadata for BOTH user and AI bubbles - bubble_meta = None - if bubble: - model_info = raw.get("modelInfo") or {} - model_name = model_info.get("modelName") - if model_name == "default": - model_name = None - - if msg_type == "ai": - tc_dict = token_count if isinstance(token_count, dict) else {} - in_tok = tc_dict.get("inputTokens") or 0 - out_tok = tc_dict.get("outputTokens") or 0 - cached_tok = tc_dict.get("cachedTokens") or 0 - bubble_meta = { - "modelName": model_name, - "inputTokens": in_tok if in_tok > 0 else None, - "outputTokens": out_tok if out_tok > 0 else None, - "cachedTokens": cached_tok if cached_tok > 0 else None, - "toolResultsCount": (len(tool_calls) if tool_calls else None) or (len(raw["toolResults"]) if isinstance(raw.get("toolResults"), list) and raw["toolResults"] else None), - "toolResults": raw.get("toolResults") if isinstance(raw.get("toolResults"), list) and raw["toolResults"] else None, - "toolCalls": tool_calls, - "thinking": thinking, - "thinkingDurationMs": thinking_duration_ms, - "contextWindowPercent": ctx_pct, - } - elif msg_type == "user": - bubble_meta = { - "modelName": model_name, - "contextWindowPercent": ctx_pct, - } - if ctx_window: - tokens_used = ctx_window.get("tokensUsed", 0) - token_limit = ctx_window.get("tokenLimit", 0) - if tokens_used > 0: - bubble_meta["contextTokensUsed"] = tokens_used - if token_limit > 0: - bubble_meta["contextTokenLimit"] = token_limit - - if bubble_meta: - bubble_meta = {k: v for k, v in bubble_meta.items() if v is not None} - if not bubble_meta: - bubble_meta = None - - b_entry = { - "type": msg_type, - "text": display_text, - "timestamp": to_epoch_ms(bubble.get("createdAt")) or to_epoch_ms(bubble.get("timestamp")) or int(datetime.now().timestamp() * 1000), - } - if bubble_meta: - b_entry["metadata"] = bubble_meta - bubbles.append(b_entry) - - if not bubbles: - continue - - # Title - title = cd.get("name") or f"Conversation {composer_id[:8]}" - if not cd.get("name") and bubbles: - first_msg = bubbles[0].get("text", "") - if first_msg: - first_lines = [ln for ln in first_msg.split("\n") if ln.strip()] - if first_lines: - title = first_lines[0][:100] - if len(title) == 100: - title += "..." - - # Early exclusion check — before expensive metadata aggregation - _early_model_config = cd.get("modelConfig") or {} - _early_model_name = _early_model_config.get("modelName") - _early_model_names = [_early_model_name] if _early_model_name and _early_model_name != "default" else None - if is_excluded_by_rules(rules, build_searchable_text( - project_name=workspace_display_name, - chat_title=title, - model_names=_early_model_names, - )): - continue - - # codeBlockDiffs are emitted as a structured ``tab.codeBlockDiffs`` - # field below; the dashboard reads them from there (download.js, - # workspace.html). Previously this loop also pushed a synthetic - # ``Tool Action`` AI bubble into ``tab.bubbles``, double-representing - # every diff on the wire and forcing a ``synthetic`` filter in the - # response-time pass. Dropping the synthesis — frontend never read it. - diffs = code_block_diff_map.get(composer_id, []) - - bubbles.sort(key=lambda b: b.get("timestamp") or 0) - - # Response time calculation - last_user_ts = None - for b in bubbles: - if b["type"] == "user": - last_user_ts = b.get("timestamp") - elif b["type"] == "ai" and last_user_ts is not None: - ts = b.get("timestamp") - if ts and ts > last_user_ts: - meta = b.setdefault("metadata", {}) - meta["responseTimeMs"] = ts - last_user_ts - - # Aggregate metadata - total_input = 0 - total_output = 0 - total_cached = 0 - total_response_ms = 0 - total_cost = 0.0 - total_tool_calls = 0 - total_thinking_ms = 0 - models_set: set = set() - for b in bubbles: - m = b.get("metadata") or {} - if m.get("inputTokens"): - total_input += m["inputTokens"] - if m.get("outputTokens"): - total_output += m["outputTokens"] - if m.get("cachedTokens"): - total_cached += m["cachedTokens"] - if m.get("responseTimeMs"): - total_response_ms += m["responseTimeMs"] - if m.get("cost") is not None: - total_cost += m["cost"] - if m.get("modelName"): - models_set.add(m["modelName"]) - if m.get("toolCalls"): - total_tool_calls += len(m["toolCalls"]) - if m.get("thinkingDurationMs"): - total_thinking_ms += m["thinkingDurationMs"] - - # Composer-level cost fallback - usage = cd.get("usageData") or {} - composer_cost = usage.get("cost") or usage.get("estimatedCost") - if isinstance(composer_cost, (int, float)) and total_cost == 0: - total_cost = composer_cost - - # Composer-level lines/files changed - lines_added = cd.get("totalLinesAdded", 0) - lines_removed = cd.get("totalLinesRemoved", 0) - files_added = cd.get("addedFiles", 0) - files_removed = cd.get("removedFiles", 0) - - # Context window progression from user bubbles - max_ctx_tokens = 0 - ctx_token_limit = 0 - for b in bubbles: - m = b.get("metadata") or {} - if m.get("contextTokensUsed", 0) > max_ctx_tokens: - max_ctx_tokens = m["contextTokensUsed"] - if m.get("contextTokenLimit", 0) > ctx_token_limit: - ctx_token_limit = m["contextTokenLimit"] - - tab_meta = None - has_any = any([total_input, total_output, total_cached, total_response_ms, - total_cost, models_set, total_tool_calls, total_thinking_ms, - lines_added, lines_removed, files_added, files_removed, - max_ctx_tokens]) - if has_any: - tab_meta_raw = { - "totalInputTokens": total_input or None, - "totalOutputTokens": total_output or None, - "totalCachedTokens": total_cached or None, - "modelsUsed": list(models_set) if models_set else None, - "totalResponseTimeMs": total_response_ms or None, - "totalCost": total_cost if total_cost > 0 else None, - "totalToolCalls": total_tool_calls or None, - "totalThinkingDurationMs": total_thinking_ms or None, - "totalLinesAdded": lines_added if lines_added else None, - "totalLinesRemoved": lines_removed if lines_removed else None, - "totalFilesAdded": files_added if files_added else None, - "totalFilesRemoved": files_removed if files_removed else None, - "maxContextTokensUsed": max_ctx_tokens if max_ctx_tokens else None, - "contextTokenLimit": ctx_token_limit if ctx_token_limit else None, - } - tab_meta = {k: v for k, v in tab_meta_raw.items() if v is not None} - - # Model config from composer data - model_config = cd.get("modelConfig") or {} - model_name_from_config = model_config.get("modelName") - if model_name_from_config and model_name_from_config != "default": - if not tab_meta: - tab_meta = {} - models_used = tab_meta.get("modelsUsed") - if not isinstance(models_used, list): - tab_meta["modelsUsed"] = [model_name_from_config] - elif model_name_from_config not in models_used: - models_used.insert(0, model_name_from_config) - - tab = { - "id": composer_id, - "title": title, - "timestamp": to_epoch_ms(cd.get("lastUpdatedAt")) or to_epoch_ms(cd.get("createdAt")) or int(datetime.now().timestamp() * 1000), - "bubbles": [{ - "type": b["type"], - "text": b.get("text", ""), - "timestamp": b.get("timestamp", 0), - **({"metadata": b["metadata"]} if b.get("metadata") else {}), - } for b in bubbles], - "codeBlockDiffs": diffs, - } - if tab_meta: - tab["metadata"] = tab_meta - - response["tabs"].append(tab) + tab = _assemble_tab_from_composer_data( + composer_id=composer_id, + cd=cd, + bubble_map=bubble_map, + contexts=message_request_context_map.get(composer_id, []), + code_block_diffs=code_block_diff_map.get(composer_id, []), + workspace_display_name=workspace_display_name, + rules=rules, + parse_warnings=parse_warnings, + ) + if tab is not None: + response["tabs"].append(tab) except Exception as e: _logger.warning( diff --git a/static/css/style.css b/static/css/style.css index d911fdb..d6387f0 100644 --- a/static/css/style.css +++ b/static/css/style.css @@ -106,7 +106,10 @@ a { color: var(--link); text-decoration: none; } a:hover { text-decoration: underline; } /* ---------- Layout ---------- */ -.container { max-width: 1200px; margin: 0 auto; padding: 1.5rem 1rem; flex: 1; } +/* width: 100% is required here: body is display:flex + flex-direction:column, + * and auto horizontal margins (margin: 0 auto) suppress align-items:stretch, + * so without an explicit width the container collapses to content width. */ +.container { width: 100%; max-width: 1200px; margin: 0 auto; padding: 1.5rem 1rem; flex: 1; } /* ---------- Navbar ---------- */ .navbar { @@ -297,10 +300,24 @@ h3 { font-size: 1.15rem; font-weight: 600; } .dropdown-item:hover { background: var(--bg-hover); text-decoration: none; } /* ---------- Loading ---------- */ -.loading { display: flex; flex-direction: column; align-items: center; padding: 3rem 0; gap: 1rem; } +.loading, +.loading-center { + display: flex; + flex-direction: column; + align-items: center; + justify-content: center; + width: 100%; + gap: 1rem; + text-align: center; +} +.loading { padding: 3rem 0; } +.loading-center { color: var(--text-muted); font-size: 0.9rem; } .spinner { + display: block; + flex-shrink: 0; width: 2rem; height: 2rem; + margin-inline: auto; border: 3px solid var(--border); border-top-color: var(--spinner); border-radius: 50%; @@ -324,7 +341,17 @@ h3 { font-size: 1.15rem; font-weight: 600; } * URL inside .main-content would push the column wider than 1fr — overflowing * the viewport on the right. min-width: 0 lets the column shrink and lets the * existing overflow-x: auto on .prose pre handle the scroll inside the bubble. */ -.main-content { min-width: 0; } +/* min-height prevents the right column from collapsing during lazy-load (issue #84). */ +.main-content { min-width: 0; min-height: 60vh; } + +/* Loading state: full-width cell; .loading-center centers spinner + label. */ +.main-content.is-loading { + width: 100%; + min-height: 60vh; +} +.main-content.is-loading .loading-center { + min-height: 60vh; +} /* ---------- Sidebar ---------- */ .sidebar { @@ -358,6 +385,13 @@ h3 { font-size: 1.15rem; font-weight: 600; } .sidebar::-webkit-scrollbar-thumb { background: var(--border); border-radius: 3px; } .sidebar::-webkit-scrollbar-track { background: transparent; } +/* Placeholder shown in the sidebar column while summary tabs are fetching */ +.sidebar-loading { + width: 100%; + padding: 1.5rem 0; + opacity: 0.5; +} + /* Scroll hint at the bottom */ .sidebar-scroll-hint { text-align: center; diff --git a/templates/workspace.html b/templates/workspace.html index 73917d4..9c78b9f 100644 --- a/templates/workspace.html +++ b/templates/workspace.html @@ -39,17 +39,17 @@

Loading...

-
-
-

Loading conversations...

-
- - -