From e9fc1eb56a3ccef23c87037ff7ff85cd89288b64 Mon Sep 17 00:00:00 2001 From: Monsky Date: Sat, 20 Jun 2026 18:42:53 -0400 Subject: [PATCH 01/10] feat: add diagnostic snapshot foundation --- docs/cli-json-schemas.md | 44 +++ src/codex_usage_tracker/cli.py | 8 + src/codex_usage_tracker/cli_parser.py | 12 + .../diagnostic_snapshots.py | 274 ++++++++++++++++++ src/codex_usage_tracker/json_contracts.py | 11 + src/codex_usage_tracker/server.py | 61 ++++ src/codex_usage_tracker/store.py | 91 ++++++ src/codex_usage_tracker/store_schema.py | 29 +- tests/store_dashboard_helpers.py | 18 +- tests/test_cli_lifecycle.py | 36 ++- tests/test_dashboard_server.py | 39 +++ tests/test_diagnostic_snapshots.py | 96 ++++++ tests/test_store_dashboard_mcp.py | 12 +- tests/test_store_migrations.py | 9 +- 14 files changed, 725 insertions(+), 15 deletions(-) create mode 100644 src/codex_usage_tracker/diagnostic_snapshots.py create mode 100644 tests/test_diagnostic_snapshots.py diff --git a/docs/cli-json-schemas.md b/docs/cli-json-schemas.md index 84c6ed3..5ec7397 100644 --- a/docs/cli-json-schemas.md +++ b/docs/cli-json-schemas.md @@ -47,6 +47,7 @@ Tracked schema ids: | `codex-usage-tracker-query-v1` | CLI `query`, MCP `usage_query(...)` | | `codex-usage-tracker-recommendations-v1` | CLI `recommendations --json`, MCP `usage_recommendations(response_format="json")` | | `codex-usage-tracker-diagnostics-v1` | CLI `diagnostics ... --json`, dashboard server `/api/diagnostics/*` | +| `codex-usage-tracker-diagnostic-overview-v1` | CLI `diagnostics overview --json`, dashboard server `/api/diagnostics/overview` | | `codex-usage-tracker-session-v1` | CLI `session --json`, MCP `session_usage(response_format="json")` | | `codex-usage-tracker-context-v1` | CLI `context`, MCP `usage_call_context` when raw context is explicitly enabled | | `codex-usage-tracker-context-disabled-v1` | MCP `usage_call_context` when raw context is disabled | @@ -281,6 +282,49 @@ Schema: `codex-usage-tracker-diagnostics-v1` Diagnostics payloads report aggregate structured facts such as compaction, tool/function/MCP activity, command families, structured skill labels, search/read loops, and outcome events. They do not include prompts, assistant messages, tool arguments, tool output, patch text, raw commands, command arguments, file contents, or JSONL fragments. Token totals are associated with facts observed before a token-count row; they are not causal allocations. +## Diagnostic Overview Snapshot + +Commands: + +```bash +codex-usage-tracker diagnostics overview --json +codex-usage-tracker diagnostics overview --refresh --json +``` + +Dashboard server API: + +- `GET /api/diagnostics/overview` +- `POST /api/diagnostics/overview/refresh` + +Schema: `codex-usage-tracker-diagnostic-overview-v1` + +```json +{ + "schema": "codex-usage-tracker-diagnostic-overview-v1", + "section": "overview", + "status": "ready", + "refreshed": false, + "raw_context_included": false, + "snapshot": { + "computed_at": "2026-06-20T18:00:00+00:00", + "history_scope": "active", + "source_logs_scanned": 3, + "usage_rows_scanned": 10, + "raw_content_included": false + }, + "overview": { + "usage_rows": 10, + "total_tokens": 12345, + "cached_input_tokens": 9000, + "uncached_input_tokens": 2000, + "cache_ratio": 0.75 + }, + "notes": [] +} +``` + +The overview snapshot is recomputed only when explicitly refreshed. Ordinary dashboard usage refreshes do not update diagnostic snapshots. + ## Pricing Coverage Command: diff --git a/src/codex_usage_tracker/cli.py b/src/codex_usage_tracker/cli.py index 96b7465..5608c0f 100644 --- a/src/codex_usage_tracker/cli.py +++ b/src/codex_usage_tracker/cli.py @@ -29,6 +29,7 @@ build_diagnostics_facts_report, build_diagnostics_summary_report, ) +from codex_usage_tracker.diagnostic_snapshots import build_diagnostic_overview_report from codex_usage_tracker.diagnostics import run_doctor from codex_usage_tracker.formatting import ( format_doctor, @@ -394,6 +395,7 @@ def _run_recommendations(args: argparse.Namespace) -> int: def _run_diagnostics(args: argparse.Namespace) -> int: command = args.diagnostics_command + report: Any if command == "summary": report = build_diagnostics_summary_report( db_path=args.db, @@ -448,6 +450,12 @@ def _run_diagnostics(args: argparse.Namespace) -> int: direction=args.direction, privacy_mode=args.privacy_mode, ) + elif command == "overview": + report = build_diagnostic_overview_report( + db_path=args.db, + include_archived=args.include_archived, + refresh=args.refresh, + ) else: raise ValueError(f"unknown diagnostics command: {command}") diff --git a/src/codex_usage_tracker/cli_parser.py b/src/codex_usage_tracker/cli_parser.py index 193fe7a..d16b8b9 100644 --- a/src/codex_usage_tracker/cli_parser.py +++ b/src/codex_usage_tracker/cli_parser.py @@ -327,6 +327,18 @@ def _add_diagnostics_parser( _add_diagnostics_base_filters(tools) _add_diagnostics_fact_sort(tools, default_limit=50) + overview = diagnostic_subparsers.add_parser( + "overview", + help="Show the on-demand aggregate diagnostic overview snapshot", + ) + overview.add_argument("--include-archived", action="store_true") + overview.add_argument( + "--refresh", + action="store_true", + help="Recompute and persist the overview snapshot before reading it.", + ) + overview.add_argument("--json", action="store_true", dest="as_json") + fact_calls = diagnostic_subparsers.add_parser( "fact-calls", help="List calls associated with one diagnostic fact", diff --git a/src/codex_usage_tracker/diagnostic_snapshots.py b/src/codex_usage_tracker/diagnostic_snapshots.py new file mode 100644 index 0000000..9fe28c2 --- /dev/null +++ b/src/codex_usage_tracker/diagnostic_snapshots.py @@ -0,0 +1,274 @@ +"""On-demand aggregate diagnostic report snapshots.""" + +from __future__ import annotations + +from dataclasses import dataclass +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +from codex_usage_tracker.paths import DEFAULT_DB_PATH +from codex_usage_tracker.store import ( + connect, + query_diagnostic_snapshot, + upsert_diagnostic_snapshot, +) +from codex_usage_tracker.store_schema import init_db + +DIAGNOSTIC_OVERVIEW_SCHEMA = "codex-usage-tracker-diagnostic-overview-v1" +DIAGNOSTIC_OVERVIEW_SECTION = "overview" +DIAGNOSTIC_HISTORY_ACTIVE = "active" +DIAGNOSTIC_HISTORY_ALL = "all" +DIAGNOSTIC_OVERVIEW_NOTES = [ + "Diagnostic snapshots are recomputed only by explicit diagnostic refresh.", + "Overview totals use persisted aggregate usage rows and do not include raw context.", +] + + +@dataclass(frozen=True) +class DiagnosticSnapshotReport: + """Resolved diagnostic snapshot payload for CLI and API surfaces.""" + + payload: dict[str, Any] + + def render(self) -> str: + if self.payload.get("status") != "ready": + return "No diagnostic overview snapshot. Run diagnostics overview --refresh first." + snapshot = self.payload.get("snapshot") or {} + overview = self.payload.get("overview") or {} + return "\n".join( + [ + "Diagnostic overview snapshot", + f"Computed: {snapshot.get('computed_at')}", + f"History scope: {snapshot.get('history_scope')}", + f"Usage rows: {_int_text(overview.get('usage_rows'))}", + f"Total tokens: {_int_text(overview.get('total_tokens'))}", + f"Cached input: {_int_text(overview.get('cached_input_tokens'))}", + f"Uncached input: {_int_text(overview.get('uncached_input_tokens'))}", + f"Cache ratio: {_pct_text(overview.get('cache_ratio'))}", + ] + ) + + +def build_diagnostic_overview_report( + *, + db_path: Path = DEFAULT_DB_PATH, + include_archived: bool = False, + refresh: bool = False, +) -> DiagnosticSnapshotReport: + """Return the latest overview snapshot, optionally recomputing it first.""" + + if refresh: + return DiagnosticSnapshotReport( + refresh_diagnostic_overview_snapshot( + db_path=db_path, + include_archived=include_archived, + ) + ) + return DiagnosticSnapshotReport( + diagnostic_overview_payload( + db_path=db_path, + include_archived=include_archived, + ) + ) + + +def refresh_diagnostic_overview_snapshot( + *, + db_path: Path = DEFAULT_DB_PATH, + include_archived: bool = False, +) -> dict[str, Any]: + """Recompute and persist the aggregate overview diagnostic snapshot.""" + + history_scope = _history_scope(include_archived) + computed_at = _utc_now() + overview, source_logs_scanned = _compute_overview( + db_path=db_path, + include_archived=include_archived, + ) + snapshot = _snapshot_metadata( + computed_at=computed_at, + history_scope=history_scope, + source_logs_scanned=source_logs_scanned, + usage_rows_scanned=int(overview["usage_rows"]), + ) + payload = _ready_payload(snapshot=snapshot, overview=overview, refreshed=True) + upsert_diagnostic_snapshot( + db_path=db_path, + section=DIAGNOSTIC_OVERVIEW_SECTION, + history_scope=history_scope, + payload=payload, + computed_at=computed_at, + source_logs_scanned=source_logs_scanned, + usage_rows_scanned=int(overview["usage_rows"]), + raw_content_included=False, + ) + return payload + + +def diagnostic_overview_payload( + *, + db_path: Path = DEFAULT_DB_PATH, + include_archived: bool = False, +) -> dict[str, Any]: + """Return the latest persisted overview snapshot without recomputing it.""" + + history_scope = _history_scope(include_archived) + stored = query_diagnostic_snapshot( + db_path=db_path, + section=DIAGNOSTIC_OVERVIEW_SECTION, + history_scope=history_scope, + ) + if stored is None: + return _missing_payload(history_scope=history_scope) + payload = dict(stored["payload"]) + payload["status"] = "ready" + payload["refreshed"] = False + payload["snapshot"] = _snapshot_metadata( + computed_at=str(stored["computed_at"]), + history_scope=str(stored["history_scope"]), + source_logs_scanned=int(stored["source_logs_scanned"]), + usage_rows_scanned=int(stored["usage_rows_scanned"]), + ) + payload["raw_context_included"] = bool(stored["raw_content_included"]) + return payload + + +def _compute_overview( + *, + db_path: Path, + include_archived: bool, +) -> tuple[dict[str, Any], int]: + usage_where = "" if include_archived else "WHERE is_archived = 0" + source_where = "" if include_archived else "WHERE is_archived = 0" + with connect(db_path) as conn: + init_db(conn) + usage_row = conn.execute( + f""" + SELECT + COUNT(*) AS usage_rows, + COUNT(DISTINCT session_id) AS session_count, + COUNT(DISTINCT thread_key) AS thread_count, + COUNT(DISTINCT model) AS model_count, + MIN(event_timestamp) AS first_event_timestamp, + MAX(event_timestamp) AS latest_event_timestamp, + coalesce(SUM(input_tokens), 0) AS input_tokens, + coalesce(SUM(cached_input_tokens), 0) AS cached_input_tokens, + coalesce(SUM(uncached_input_tokens), 0) AS uncached_input_tokens, + coalesce(SUM(output_tokens), 0) AS output_tokens, + coalesce(SUM(reasoning_output_tokens), 0) AS reasoning_output_tokens, + coalesce(SUM(total_tokens), 0) AS total_tokens, + AVG(cache_ratio) AS avg_cache_ratio + FROM usage_events + {usage_where} + """ + ).fetchone() + facts_row = conn.execute( + f""" + SELECT COUNT(*) AS diagnostic_fact_rows + FROM call_diagnostic_facts AS facts + JOIN usage_events ON usage_events.record_id = facts.record_id + {usage_where} + """ + ).fetchone() + source_row = conn.execute( + f"SELECT COUNT(*) AS source_logs_scanned FROM source_files {source_where}" + ).fetchone() + input_tokens = _int_value(usage_row["input_tokens"]) + cached_input_tokens = _int_value(usage_row["cached_input_tokens"]) + overview = { + "usage_rows": _int_value(usage_row["usage_rows"]), + "session_count": _int_value(usage_row["session_count"]), + "thread_count": _int_value(usage_row["thread_count"]), + "model_count": _int_value(usage_row["model_count"]), + "first_event_timestamp": usage_row["first_event_timestamp"], + "latest_event_timestamp": usage_row["latest_event_timestamp"], + "input_tokens": input_tokens, + "cached_input_tokens": cached_input_tokens, + "uncached_input_tokens": _int_value(usage_row["uncached_input_tokens"]), + "output_tokens": _int_value(usage_row["output_tokens"]), + "reasoning_output_tokens": _int_value(usage_row["reasoning_output_tokens"]), + "total_tokens": _int_value(usage_row["total_tokens"]), + "cache_ratio": cached_input_tokens / input_tokens if input_tokens else 0.0, + "avg_call_cache_ratio": float(usage_row["avg_cache_ratio"] or 0), + "diagnostic_fact_rows": _int_value(facts_row["diagnostic_fact_rows"]), + } + return overview, _int_value(source_row["source_logs_scanned"]) + + +def _ready_payload( + *, + snapshot: dict[str, Any], + overview: dict[str, Any], + refreshed: bool, +) -> dict[str, Any]: + return { + "schema": DIAGNOSTIC_OVERVIEW_SCHEMA, + "section": DIAGNOSTIC_OVERVIEW_SECTION, + "status": "ready", + "refreshed": refreshed, + "raw_context_included": False, + "snapshot": snapshot, + "overview": overview, + "notes": list(DIAGNOSTIC_OVERVIEW_NOTES), + } + + +def _missing_payload(*, history_scope: str) -> dict[str, Any]: + return { + "schema": DIAGNOSTIC_OVERVIEW_SCHEMA, + "section": DIAGNOSTIC_OVERVIEW_SECTION, + "status": "missing", + "refreshed": False, + "raw_context_included": False, + "snapshot": None, + "overview": None, + "history_scope": history_scope, + "notes": list(DIAGNOSTIC_OVERVIEW_NOTES), + } + + +def _snapshot_metadata( + *, + computed_at: str, + history_scope: str, + source_logs_scanned: int, + usage_rows_scanned: int, +) -> dict[str, Any]: + return { + "computed_at": computed_at, + "history_scope": history_scope, + "source_logs_scanned": int(source_logs_scanned), + "usage_rows_scanned": int(usage_rows_scanned), + "raw_content_included": False, + } + + +def _history_scope(include_archived: bool) -> str: + return DIAGNOSTIC_HISTORY_ALL if include_archived else DIAGNOSTIC_HISTORY_ACTIVE + + +def _utc_now() -> str: + return datetime.now(timezone.utc).replace(microsecond=0).isoformat() + + +def _int_value(value: object) -> int: + if isinstance(value, int): + return value + if isinstance(value, float): + return int(value) + if isinstance(value, str) and value: + return int(value) + return 0 + + +def _int_text(value: object) -> str: + return f"{_int_value(value):,}" + + +def _pct_text(value: object) -> str: + try: + ratio = float(value) if isinstance(value, int | float | str) and value != "" else 0.0 + except (TypeError, ValueError): + ratio = 0.0 + return f"{ratio:.1%}" diff --git a/src/codex_usage_tracker/json_contracts.py b/src/codex_usage_tracker/json_contracts.py index bddcc11..502b565 100644 --- a/src/codex_usage_tracker/json_contracts.py +++ b/src/codex_usage_tracker/json_contracts.py @@ -158,6 +158,17 @@ } }, }, + "codex-usage-tracker-diagnostic-overview-v1": { + "required": { + "section": str, + "status": str, + "refreshed": bool, + "raw_context_included": bool, + "snapshot": (dict, NoneType), + "overview": (dict, NoneType), + "notes": list, + } + }, "codex-usage-tracker-session-v1": { "required": { "requested_session_id": (str, NoneType), diff --git a/src/codex_usage_tracker/server.py b/src/codex_usage_tracker/server.py index 42e01ae..562c313 100644 --- a/src/codex_usage_tracker/server.py +++ b/src/codex_usage_tracker/server.py @@ -35,6 +35,7 @@ build_diagnostics_facts_report, build_diagnostics_summary_report, ) +from codex_usage_tracker.diagnostic_snapshots import build_diagnostic_overview_report from codex_usage_tracker.i18n import normalize_language from codex_usage_tracker.paths import ( DEFAULT_ALLOWANCE_PATH, @@ -304,6 +305,9 @@ def do_GET(self) -> None: # noqa: N802 - stdlib hook name if parsed.path == "/api/diagnostics/tools": self._handle_diagnostics_facts(parsed.query, fact_group="tools") return + if parsed.path == "/api/diagnostics/overview": + self._handle_diagnostics_overview(parsed.query) + return if parsed.path == "/api/usage": self._handle_usage(parsed.query) return @@ -315,6 +319,16 @@ def do_GET(self) -> None: # noqa: N802 - stdlib hook name return super().do_GET() + def do_POST(self) -> None: # noqa: N802 - stdlib hook name + parsed = urlparse(self.path) + if not self._request_origin_allowed(): + self._send_json(HTTPStatus.FORBIDDEN, {"error": "Request host or origin is not allowed"}) + return + if parsed.path == "/api/diagnostics/overview/refresh": + self._handle_diagnostics_overview_refresh(parsed.query) + return + self._send_json(HTTPStatus.NOT_FOUND, {"error": "Unknown API endpoint"}) + def end_headers(self) -> None: if self._is_dashboard_html_request(): self.send_header("Cache-Control", "no-store") @@ -943,6 +957,53 @@ def _handle_diagnostics_fact_calls(self, query: str) -> None: return self._send_json(HTTPStatus.OK, payload) + def _handle_diagnostics_overview(self, query: str) -> None: + params = parse_qs(query) + include_archived = _parse_bool( + _first(params.get("include_archived")), + self._include_archived, + ) + try: + payload = build_diagnostic_overview_report( + db_path=self._db_path, + include_archived=include_archived, + refresh=False, + ).payload + except sqlite3.Error as exc: + self._send_json( + HTTPStatus.INTERNAL_SERVER_ERROR, + {"error": f"Database error while reading diagnostic overview: {exc}"}, + ) + return + self._send_json(HTTPStatus.OK, payload) + + def _handle_diagnostics_overview_refresh(self, query: str) -> None: + params = parse_qs(query) + if not self._has_valid_api_token(params): + self._send_json( + HTTPStatus.FORBIDDEN, + {"error": "Valid API token is required for diagnostic refresh"}, + ) + return + include_archived = _parse_bool( + _first(params.get("include_archived")), + self._include_archived, + ) + try: + with self._refresh_lock: + payload = build_diagnostic_overview_report( + db_path=self._db_path, + include_archived=include_archived, + refresh=True, + ).payload + except sqlite3.Error as exc: + self._send_json( + HTTPStatus.INTERNAL_SERVER_ERROR, + {"error": f"Database error while refreshing diagnostic overview: {exc}"}, + ) + return + self._send_json(HTTPStatus.OK, payload) + def _live_query_params( self, params: dict[str, list[str]], diff --git a/src/codex_usage_tracker/store.py b/src/codex_usage_tracker/store.py index 0af764a..13b894c 100644 --- a/src/codex_usage_tracker/store.py +++ b/src/codex_usage_tracker/store.py @@ -3,6 +3,7 @@ from __future__ import annotations import csv +import json import sqlite3 from collections.abc import Iterable, Iterator from contextlib import contextmanager, suppress @@ -125,6 +126,7 @@ def rebuild_usage_index( with connect(db_path) as conn: init_db(conn) conn.execute("DELETE FROM call_diagnostic_facts") + conn.execute("DELETE FROM diagnostic_snapshots") conn.execute("DELETE FROM usage_events") conn.execute("DELETE FROM thread_summaries") conn.execute("DELETE FROM source_files") @@ -144,6 +146,7 @@ def reset_usage_database(db_path: Path = DEFAULT_DB_PATH) -> dict[str, Any]: row = conn.execute("SELECT COUNT(*) AS count FROM usage_events").fetchone() deleted_rows = int(row["count"] if row is not None else 0) conn.execute("DELETE FROM call_diagnostic_facts") + conn.execute("DELETE FROM diagnostic_snapshots") conn.execute("DELETE FROM usage_events") conn.execute("DELETE FROM thread_summaries") conn.execute("DELETE FROM source_files") @@ -222,6 +225,94 @@ def refresh_metadata(db_path: Path = DEFAULT_DB_PATH) -> dict[str, str]: return {str(row["key"]): str(row["value"]) for row in rows} +def upsert_diagnostic_snapshot( + db_path: Path = DEFAULT_DB_PATH, + *, + section: str, + history_scope: str, + payload: dict[str, Any], + computed_at: str, + source_logs_scanned: int, + usage_rows_scanned: int, + raw_content_included: bool = False, +) -> None: + """Persist one aggregate diagnostic report snapshot.""" + + payload_json = json.dumps(payload, sort_keys=True, separators=(",", ":")) + with connect(db_path) as conn: + init_db(conn) + conn.execute( + """ + INSERT INTO diagnostic_snapshots ( + section, + history_scope, + payload_json, + computed_at, + source_logs_scanned, + usage_rows_scanned, + raw_content_included + ) + VALUES (?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(section, history_scope) DO UPDATE SET + payload_json = excluded.payload_json, + computed_at = excluded.computed_at, + source_logs_scanned = excluded.source_logs_scanned, + usage_rows_scanned = excluded.usage_rows_scanned, + raw_content_included = excluded.raw_content_included + """, + ( + section, + history_scope, + payload_json, + computed_at, + int(source_logs_scanned), + int(usage_rows_scanned), + 1 if raw_content_included else 0, + ), + ) + + +def query_diagnostic_snapshot( + db_path: Path = DEFAULT_DB_PATH, + *, + section: str, + history_scope: str, +) -> dict[str, Any] | None: + """Return one persisted aggregate diagnostic report snapshot.""" + + if not db_path.exists(): + return None + with connect(db_path) as conn: + init_db(conn) + row = conn.execute( + """ + SELECT + section, + history_scope, + payload_json, + computed_at, + source_logs_scanned, + usage_rows_scanned, + raw_content_included + FROM diagnostic_snapshots + WHERE section = ? AND history_scope = ? + """, + (section, history_scope), + ).fetchone() + if row is None: + return None + payload = json.loads(str(row["payload_json"])) + return { + "section": str(row["section"]), + "history_scope": str(row["history_scope"]), + "payload": payload if isinstance(payload, dict) else {}, + "computed_at": str(row["computed_at"]), + "source_logs_scanned": int(row["source_logs_scanned"]), + "usage_rows_scanned": int(row["usage_rows_scanned"]), + "raw_content_included": bool(row["raw_content_included"]), + } + + def schema_state(db_path: Path = DEFAULT_DB_PATH) -> dict[str, Any]: """Return database migration and usage_events checksum state.""" diff --git a/src/codex_usage_tracker/store_schema.py b/src/codex_usage_tracker/store_schema.py index c5e973c..da85a5e 100644 --- a/src/codex_usage_tracker/store_schema.py +++ b/src/codex_usage_tracker/store_schema.py @@ -12,7 +12,7 @@ USAGE_EVENT_SCHEMA_CHECKSUM, ) -SCHEMA_VERSION = 9 +SCHEMA_VERSION = 10 MIGRATION_NAMES = { 1: "create usage_events aggregate fact table", 2: "track schema migration checksum metadata", @@ -23,6 +23,7 @@ 7: "persist source file parser cursors", 8: "persist observed Codex usage snapshots", 9: "persist aggregate diagnostic facts", + 10: "persist on-demand diagnostic report snapshots", } CALL_ORIGIN_REPAIR_COLUMNS = { "call_initiator": "TEXT", @@ -102,6 +103,12 @@ def init_db(conn: sqlite3.Connection) -> None: else: _migrate_v9(conn) _record_migration_if_missing(conn, 9) + if user_version < 10: + _migrate_v10(conn) + _record_migration(conn, 10) + else: + _migrate_v10(conn) + _record_migration_if_missing(conn, 10) _validate_usage_events_schema(conn) conn.execute(f"PRAGMA user_version = {SCHEMA_VERSION}") @@ -280,6 +287,26 @@ def _migrate_v9(conn: sqlite3.Connection) -> None: ) +def _migrate_v10(conn: sqlite3.Connection) -> None: + conn.executescript( + """ + CREATE TABLE IF NOT EXISTS diagnostic_snapshots ( + section TEXT NOT NULL, + history_scope TEXT NOT NULL, + payload_json TEXT NOT NULL, + computed_at TEXT NOT NULL, + source_logs_scanned INTEGER NOT NULL DEFAULT 0, + usage_rows_scanned INTEGER NOT NULL DEFAULT 0, + raw_content_included INTEGER NOT NULL DEFAULT 0, + PRIMARY KEY (section, history_scope) + ); + + CREATE INDEX IF NOT EXISTS idx_diagnostic_snapshots_computed_at + ON diagnostic_snapshots(computed_at); + """ + ) + + def _record_migration(conn: sqlite3.Connection, version: int) -> None: conn.execute( """ diff --git a/tests/store_dashboard_helpers.py b/tests/store_dashboard_helpers.py index 92806ab..4fcd2eb 100644 --- a/tests/store_dashboard_helpers.py +++ b/tests/store_dashboard_helpers.py @@ -349,14 +349,24 @@ def _assert_contract(payload: object) -> None: assert validate_json_payload_contract(payload) == [] -def _read_json(url: str, headers: dict[str, str] | None = None) -> dict[str, object]: - request = urllib.request.Request(url, headers=headers or {}) +def _read_json( + url: str, + headers: dict[str, str] | None = None, + data: bytes | None = None, + method: str | None = None, +) -> dict[str, object]: + request = urllib.request.Request(url, data=data, headers=headers or {}, method=method) with urllib.request.urlopen(request, timeout=5) as response: # noqa: S310 - local test server only return json.loads(response.read().decode("utf-8")) -def _http_error_json(url: str, headers: dict[str, str] | None = None) -> dict[str, object]: - request = urllib.request.Request(url, headers=headers or {}) +def _http_error_json( + url: str, + headers: dict[str, str] | None = None, + data: bytes | None = None, + method: str | None = None, +) -> dict[str, object]: + request = urllib.request.Request(url, data=data, headers=headers or {}, method=method) try: urllib.request.urlopen(request, timeout=5) # noqa: S310 - local test server only except urllib.error.HTTPError as exc: diff --git a/tests/test_cli_lifecycle.py b/tests/test_cli_lifecycle.py index 7a02eab..be5a7ee 100644 --- a/tests/test_cli_lifecycle.py +++ b/tests/test_cli_lifecycle.py @@ -383,6 +383,23 @@ def test_diagnostics_cli_returns_aggregate_json(tmp_path: Path) -> None: "tools", "--json", ) + overview_missing = _run_cli( + tmp_path, + "--db", + str(db_path), + "diagnostics", + "overview", + "--json", + ) + overview_refresh = _run_cli( + tmp_path, + "--db", + str(db_path), + "diagnostics", + "overview", + "--refresh", + "--json", + ) fact_calls = _run_cli( tmp_path, "--db", @@ -403,17 +420,28 @@ def test_diagnostics_cli_returns_aggregate_json(tmp_path: Path) -> None: facts_payload = json.loads(facts.stdout) compactions_payload = json.loads(compactions.stdout) tools_payload = json.loads(tools.stdout) + overview_missing_payload = json.loads(overview_missing.stdout) + overview_refresh_payload = json.loads(overview_refresh.stdout) fact_calls_payload = json.loads(fact_calls.stdout) for payload in ( summary_payload, facts_payload, compactions_payload, tools_payload, + overview_missing_payload, + overview_refresh_payload, fact_calls_payload, ): _assert_contract(payload) - assert payload["schema"] == "codex-usage-tracker-diagnostics-v1" assert payload["raw_context_included"] is False + for payload in ( + summary_payload, + facts_payload, + compactions_payload, + tools_payload, + fact_calls_payload, + ): + assert payload["schema"] == "codex-usage-tracker-diagnostics-v1" assert "Associated token totals are not additive" in payload["notes"][0] fact_names = {row["fact_name"] for row in facts_payload["rows"]} @@ -429,6 +457,12 @@ def test_diagnostics_cli_returns_aggregate_json(tmp_path: Path) -> None: assert tools_payload["filters"]["fact_type"] is None assert tools_payload["filters"]["fact_group"] == "tools" assert {row["fact_type"] for row in tools_payload["rows"]} == {"tool"} + assert overview_missing_payload["schema"] == "codex-usage-tracker-diagnostic-overview-v1" + assert overview_missing_payload["status"] == "missing" + assert overview_refresh_payload["schema"] == "codex-usage-tracker-diagnostic-overview-v1" + assert overview_refresh_payload["status"] == "ready" + assert overview_refresh_payload["overview"]["usage_rows"] == 2 + assert overview_refresh_payload["refreshed"] is True assert fact_calls_payload["view"] == "fact-calls" assert fact_calls_payload["filters"]["privacy_mode"] == "strict" assert fact_calls_payload["rows"][0]["cwd"].startswith("[redacted cwd:") diff --git a/tests/test_dashboard_server.py b/tests/test_dashboard_server.py index a174f76..a432bb8 100644 --- a/tests/test_dashboard_server.py +++ b/tests/test_dashboard_server.py @@ -79,6 +79,35 @@ def test_dashboard_server_usage_api_refreshes_aggregate_rows(tmp_path: Path) -> content_security_policy = response.headers.get("Content-Security-Policy") referrer_policy = response.headers.get("Referrer-Policy") limited_payload = json.loads(response.read().decode("utf-8")) + diagnostic_overview_after_usage_refresh = _read_json( + f"http://127.0.0.1:{server.server_port}/api/diagnostics/overview" + ) + diagnostic_refresh_without_token = _http_error_json( + f"http://127.0.0.1:{server.server_port}/api/diagnostics/overview/refresh", + data=b"", + method="POST", + ) + diagnostic_refresh_payload = _read_json( + f"http://127.0.0.1:{server.server_port}/api/diagnostics/overview/refresh", + headers={"X-Codex-Usage-Token": "test-token"}, + data=b"", + method="POST", + ) + diagnostic_stored_payload = _read_json( + f"http://127.0.0.1:{server.server_port}/api/diagnostics/overview" + ) + diagnostic_computed_at = diagnostic_stored_payload["snapshot"]["computed_at"] + with urllib.request.urlopen( # noqa: S310 - local test server only + urllib.request.Request( + f"http://127.0.0.1:{server.server_port}/api/usage?refresh=1&limit=2", + headers={"X-Codex-Usage-Token": "test-token"}, + ), + timeout=5, + ) as response: + second_usage_refresh_payload = json.loads(response.read().decode("utf-8")) + diagnostic_after_second_usage_refresh = _read_json( + f"http://127.0.0.1:{server.server_port}/api/diagnostics/overview" + ) with urllib.request.urlopen( # noqa: S310 - local test server only f"http://127.0.0.1:{server.server_port}/api/usage?limit=all", timeout=5, @@ -104,6 +133,7 @@ def test_dashboard_server_usage_api_refreshes_aggregate_rows(tmp_path: Path) -> thread.join(timeout=5) assert refresh_without_token["status"] == 403 + assert diagnostic_refresh_without_token["status"] == 403 assert dashboard_cache_control == "no-store" shell_raw_payload = dashboard_html.split( ' + + diff --git a/tests/playwright/dashboard-diagnostics.spec.mjs b/tests/playwright/dashboard-diagnostics.spec.mjs new file mode 100644 index 0000000..9a6e47e --- /dev/null +++ b/tests/playwright/dashboard-diagnostics.spec.mjs @@ -0,0 +1,28 @@ +import { expect, test } from '@playwright/test'; + +test.describe('diagnostics dashboard smoke', () => { + test('renders diagnostics panels with explicit refresh control', async ({ page }) => { + await page.goto('/dashboard.html?view=diagnostics'); + + await expect(page.getByRole('button', { name: 'Diagnostics', exact: true })).toHaveAttribute( + 'aria-pressed', + 'true', + ); + await expect(page.locator('#diagnosticsPanel')).toBeVisible(); + await expect(page.getByRole('button', { name: 'Refresh diagnostics' })).toBeVisible(); + await expect(page.locator('#diagnosticsPanel')).not.toContainText( + 'Live API required for diagnostics refresh', + ); + + for (const heading of [ + 'Overview', + 'Tool Output', + 'Commands', + 'File Reads', + 'Read Productivity', + 'Concentration', + ]) { + await expect(page.getByRole('heading', { name: heading })).toBeVisible(); + } + }); +}); diff --git a/tests/test_dashboard_payload.py b/tests/test_dashboard_payload.py index 19bb5ba..9dc9b18 100644 --- a/tests/test_dashboard_payload.py +++ b/tests/test_dashboard_payload.py @@ -55,6 +55,12 @@ def test_dashboard_and_csv_are_aggregate_only(tmp_path: Path) -> None: dashboard_diagnostics_js = (asset_dir / "dashboard_diagnostics.js").read_text( encoding="utf-8" ) + dashboard_diagnostics_facts_js = ( + asset_dir / "dashboard_diagnostics_facts.js" + ).read_text(encoding="utf-8") + dashboard_diagnostics_snapshots_js = ( + asset_dir / "dashboard_diagnostics_snapshots.js" + ).read_text(encoding="utf-8") dashboard_call_diagnostics_js = ( asset_dir / "dashboard_call_diagnostics.js" ).read_text(encoding="utf-8") @@ -94,6 +100,8 @@ def test_dashboard_and_csv_are_aggregate_only(tmp_path: Path) -> None: dashboard_live_js, dashboard_events_js, dashboard_diagnostics_js, + dashboard_diagnostics_facts_js, + dashboard_diagnostics_snapshots_js, dashboard_call_diagnostics_js, dashboard_call_js, dashboard_js, @@ -119,6 +127,8 @@ def test_dashboard_and_csv_are_aggregate_only(tmp_path: Path) -> None: assert "SECRET RAW PROMPT" not in dashboard_live_js assert "SECRET RAW PROMPT" not in dashboard_events_js assert "SECRET RAW PROMPT" not in dashboard_diagnostics_js + assert "SECRET RAW PROMPT" not in dashboard_diagnostics_facts_js + assert "SECRET RAW PROMPT" not in dashboard_diagnostics_snapshots_js assert "SECRET RAW PROMPT" not in dashboard_call_diagnostics_js assert "SECRET RAW PROMPT" not in dashboard_call_js assert "SECRET RAW PROMPT" not in dashboard_css @@ -139,6 +149,8 @@ def test_dashboard_and_csv_are_aggregate_only(tmp_path: Path) -> None: assert "COMPACTED REPLACEMENT SUMMARY" not in dashboard_live_js assert "COMPACTED REPLACEMENT SUMMARY" not in dashboard_events_js assert "COMPACTED REPLACEMENT SUMMARY" not in dashboard_diagnostics_js + assert "COMPACTED REPLACEMENT SUMMARY" not in dashboard_diagnostics_facts_js + assert "COMPACTED REPLACEMENT SUMMARY" not in dashboard_diagnostics_snapshots_js assert "COMPACTED REPLACEMENT SUMMARY" not in dashboard_call_diagnostics_js assert "COMPACTED REPLACEMENT SUMMARY" not in dashboard_call_js assert "EVENT MSG COMPACTION SUMMARY" not in dashboard @@ -157,6 +169,8 @@ def test_dashboard_and_csv_are_aggregate_only(tmp_path: Path) -> None: assert "EVENT MSG COMPACTION SUMMARY" not in dashboard_live_js assert "EVENT MSG COMPACTION SUMMARY" not in dashboard_events_js assert "EVENT MSG COMPACTION SUMMARY" not in dashboard_diagnostics_js + assert "EVENT MSG COMPACTION SUMMARY" not in dashboard_diagnostics_facts_js + assert "EVENT MSG COMPACTION SUMMARY" not in dashboard_diagnostics_snapshots_js assert "EVENT MSG COMPACTION SUMMARY" not in dashboard_call_diagnostics_js assert "EVENT MSG COMPACTION SUMMARY" not in dashboard_call_js for stylesheet in dashboard_stylesheets: @@ -177,6 +191,8 @@ def test_dashboard_and_csv_are_aggregate_only(tmp_path: Path) -> None: assert 'src="codex-usage-tracker-assets/dashboard_actions.js?v=' in dashboard assert 'src="codex-usage-tracker-assets/dashboard_live.js?v=' in dashboard assert 'src="codex-usage-tracker-assets/dashboard_events.js?v=' in dashboard + assert 'src="codex-usage-tracker-assets/dashboard_diagnostics_snapshots.js?v=' in dashboard + assert 'src="codex-usage-tracker-assets/dashboard_diagnostics_facts.js?v=' in dashboard assert 'src="codex-usage-tracker-assets/dashboard_diagnostics.js?v=' in dashboard assert 'src="codex-usage-tracker-assets/dashboard_call_diagnostics.js?v=' in dashboard assert 'src="codex-usage-tracker-assets/dashboard_call_investigator.js?v=' in dashboard @@ -197,6 +213,8 @@ def test_dashboard_and_csv_are_aggregate_only(tmp_path: Path) -> None: assert "CodexUsageDashboardActions" in dashboard_actions_js assert "CodexUsageDashboardLive" in dashboard_live_js assert "CodexUsageDashboardEvents" in dashboard_events_js + assert "CodexUsageDashboardDiagnosticSnapshots" in dashboard_diagnostics_snapshots_js + assert "CodexUsageDashboardDiagnosticFacts" in dashboard_diagnostics_facts_js assert "CodexUsageDashboardDiagnostics" in dashboard_diagnostics_js assert "CodexUsageCallDiagnostics" in dashboard_call_diagnostics_js assert "CodexUsageCallInvestigator" in dashboard_call_js @@ -282,22 +300,41 @@ def test_dashboard_and_csv_are_aggregate_only(tmp_path: Path) -> None: assert "/api/diagnostics/tools" in dashboard_diagnostics_js assert "/api/diagnostics/compactions" in dashboard_diagnostics_js assert "/api/diagnostics/fact-calls" in dashboard_diagnostics_js + assert "dashboard_diagnostics_snapshots.js" in dashboard + assert "dashboard_diagnostics_facts.js" in dashboard + assert "/api/diagnostics/overview" in dashboard_diagnostics_snapshots_js + assert "/api/diagnostics/tool-output/refresh" in dashboard_diagnostics_snapshots_js + assert "/api/diagnostics/commands/refresh" in dashboard_diagnostics_snapshots_js + assert "/api/diagnostics/file-reads/refresh" in dashboard_diagnostics_snapshots_js + assert "/api/diagnostics/read-productivity/refresh" in dashboard_diagnostics_snapshots_js + assert "/api/diagnostics/concentration/refresh" in dashboard_diagnostics_snapshots_js + assert "Refresh diagnostics" in dashboard_diagnostics_snapshots_js + assert "data-diagnostics-refresh" in dashboard_diagnostics_js + assert "Live API required for diagnostics refresh" in dashboard_diagnostics_js + assert "Overview" in dashboard_diagnostics_snapshots_js + assert "Tool Output" in dashboard_diagnostics_snapshots_js + assert "File Reads" in dashboard_diagnostics_snapshots_js + assert "Read Productivity" in dashboard_diagnostics_snapshots_js + assert "Concentration" in dashboard_diagnostics_snapshots_js assert "Associated token totals" in dashboard_diagnostics_js assert "Raw context remains on-demand" in dashboard_diagnostics_js assert "rowInvestigatorLink" in dashboard_diagnostics_js - assert "diagnostics-drilldown-row" in dashboard_diagnostics_js - assert 'td colspan="11"' in dashboard_diagnostics_js - assert "associated_cached_input_tokens" in dashboard_diagnostics_js - assert "row.cached_input_tokens" in dashboard_diagnostics_js - assert "Occurrences: count of matching diagnostic fact events" in dashboard_diagnostics_js - assert "Associated total tokens for those calls" in dashboard_diagnostics_js - assert "Average cache ratio across associated calls" in dashboard_diagnostics_js - assert "data-diagnostics-fact-sort-key" in dashboard_diagnostics_js - assert "data-diagnostics-fact-sort-active" in dashboard_diagnostics_js + assert "diagnostics-drilldown-row" in dashboard_diagnostics_facts_js + assert 'td colspan="11"' in dashboard_diagnostics_facts_js + assert "associated_cached_input_tokens" in dashboard_diagnostics_facts_js + assert "row.cached_input_tokens" in dashboard_diagnostics_facts_js + assert "Occurrences: count of matching diagnostic fact events" in dashboard_diagnostics_facts_js + assert "Associated total tokens for those calls" in dashboard_diagnostics_facts_js + assert "Average cache ratio across associated calls" in dashboard_diagnostics_facts_js + assert "data-diagnostics-fact-sort-key" in dashboard_diagnostics_facts_js + assert "data-diagnostics-fact-sort-active" in dashboard_diagnostics_facts_js assert "sortFactRows" in dashboard_diagnostics_js - assert "diagnosticFactHeader" in dashboard_diagnostics_js + assert "diagnosticFactHeader" in dashboard_diagnostics_facts_js assert "diagnostics-facts-table" in dashboard_surface assert "diagnostics-fact-cell" in dashboard_surface + assert "diagnostics-snapshot-grid" in dashboard_css + assert "diagnostics-toolbar" in dashboard_css + assert "diagnostics-mini-table" in dashboard_css assert "diagnostics-facts-table th:first-child" in dashboard_css assert "td.diagnostics-fact-cell" in dashboard_css assert "captureScrollAnchor" in dashboard_diagnostics_js @@ -306,7 +343,7 @@ def test_dashboard_and_csv_are_aggregate_only(tmp_path: Path) -> None: assert "offset: String(offset)" in dashboard_diagnostics_js assert "mergeFactCallPayload" in dashboard_diagnostics_js assert "data-diagnostics-call-sort-key" in dashboard_diagnostics_js - assert "data-diagnostics-call-sort-active" in dashboard_diagnostics_js + assert "data-diagnostics-call-sort-active" in dashboard_diagnostics_facts_js assert "sortFactCalls" in dashboard_diagnostics_js assert "defaultFactCallSortDirection" in dashboard_diagnostics_js assert "sort: sortState.sort" in dashboard_diagnostics_js @@ -501,6 +538,7 @@ def test_dashboard_and_csv_are_aggregate_only(tmp_path: Path) -> None: assert (asset_dir / "dashboard_details.js").exists() assert (asset_dir / "dashboard_insights.js").exists() assert (asset_dir / "dashboard_tables.js").exists() + assert (asset_dir / "dashboard_diagnostics_snapshots.js").exists() assert (asset_dir / "dashboard_filters.js").exists() assert (asset_dir / "dashboard_state.js").exists() assert (asset_dir / "dashboard_payload_cache.js").exists() From 72ff139aa453e7e5105f93547ab4cc49c1c2143c Mon Sep 17 00:00:00 2001 From: Monsky Date: Sat, 20 Jun 2026 19:29:10 -0400 Subject: [PATCH 06/10] docs: document diagnostic dashboard reports --- docs/architecture.md | 6 ++++-- docs/cli-json-schemas.md | 2 ++ docs/cli-reference.md | 7 +++++++ docs/dashboard-guide.md | 2 ++ docs/privacy.md | 4 +++- 5 files changed, 18 insertions(+), 3 deletions(-) diff --git a/docs/architecture.md b/docs/architecture.md index b58f8eb..4353847 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -13,7 +13,8 @@ Codex Usage Tracker is a local sidecar app. It reads aggregate token counters fr - `costing.py`, `pricing_config.py`, `pricing_openai.py`, `pricing_estimates.py`, and `allowance.py` own cost, credit, rate-card, and allowance annotation. Keep estimate confidence and source metadata attached to rows. - `projects.py`, `threads.py`, and `recommendations.py` annotate aggregate rows with project identity, thread relationships, and actionable signals. Project privacy redaction also belongs in `projects.py` so CLI, MCP, dashboard, CSV, and support-bundle surfaces share the same behavior. - `dashboard.py` builds aggregate-only static dashboard payloads and writes HTML/assets. `server.py` adds localhost refresh, the compatibility `/api/usage` endpoint, SQL-backed live API slices, and explicit lazy context loading. -- `plugin_data/dashboard/dashboard_format.js` owns dashboard formatting primitives. `dashboard_data.js` owns row payload and thread relationship helpers. `dashboard_analysis.js` owns scoring, sorting, recommendation, and thread grouping logic. `dashboard_cells.js` owns reusable table/cell HTML helpers. `dashboard_details.js` owns sidebar detail and thread narrative rendering. `dashboard_insights.js` owns insight cards and investigation preset UI. `dashboard_tables.js` owns Calls, Threads, and expanded thread-call table rendering. `dashboard_diagnostics.js` owns the Diagnostics tab that consumes `/api/diagnostics/*` aggregate payloads. `dashboard_filters.js` owns date range parsing and row date matching. `dashboard_state.js` owns URL, CSV, and download state utilities. `dashboard_i18n.js`, `dashboard_payload_cache.js`, and `dashboard_tooltips.js` own localization, session aggregate cache, and fast tooltip helpers. `dashboard_call_investigator.js` owns the dedicated call drilldown surface. `dashboard.js` owns top-level DOM rendering, event handling, and API refresh orchestration. +- `diagnostic_snapshots.py` owns persisted diagnostic snapshot refresh/load orchestration. `diagnostic_snapshot_analysis.py`, `diagnostic_snapshot_events.py`, `diagnostic_snapshot_rows.py`, and `diagnostic_snapshot_concentration.py` own source-log aggregation, safe event parsing, row shaping, and concentration math. `diagnostic_snapshot_report.py` owns CLI rendering. Keep these modules synthetic-testable and aggregate-only. +- `plugin_data/dashboard/dashboard_format.js` owns dashboard formatting primitives. `dashboard_data.js` owns row payload and thread relationship helpers. `dashboard_analysis.js` owns scoring, sorting, recommendation, and thread grouping logic. `dashboard_cells.js` owns reusable table/cell HTML helpers. `dashboard_details.js` owns sidebar detail and thread narrative rendering. `dashboard_insights.js` owns insight cards and investigation preset UI. `dashboard_tables.js` owns Calls, Threads, and expanded thread-call table rendering. `dashboard_diagnostics.js` coordinates the Diagnostics tab data flow and events, `dashboard_diagnostics_snapshots.js` renders on-demand snapshot panels, and `dashboard_diagnostics_facts.js` renders the fact tables and drilldowns. `dashboard_filters.js` owns date range parsing and row date matching. `dashboard_state.js` owns URL, CSV, and download state utilities. `dashboard_i18n.js`, `dashboard_payload_cache.js`, and `dashboard_tooltips.js` own localization, session aggregate cache, and fast tooltip helpers. `dashboard_call_investigator.js` owns the dedicated call drilldown surface. `dashboard.js` owns top-level DOM rendering, event handling, and API refresh orchestration. - `context.py` is the only normal path that reads raw log context, and it does so only for one selected record on demand with redaction and size limits. Its default quick mode omits tool output and serialized groups; full serialized JSONL group analysis is explicit. - `plugin_installer.py`, `.mcp.json`, `skills/`, and `scripts/check_release.py` own install and packaging behavior. - `scripts/benchmark_synthetic_history.py` owns generated large-history query timing and threshold enforcement for 10k, 100k, and 500k aggregate-row fixtures. Its optional `--with-source-logs` mode writes synthetic JSONL source logs to time explicit context loading and to guard normal dashboard payload assembly against source-log reads. It must stay synthetic-only and must not read real Codex logs. @@ -26,10 +27,11 @@ Codex Usage Tracker is a local sidecar app. It reads aggregate token counters fr 1. Add new persisted usage-event metrics through `UsageEvent`, `schema.py`, migrations, store queries, dashboard payload tests, and CSV/export checks. Add auxiliary aggregate tables such as `thread_summaries` or `source_files` through `store.py` migrations plus focused migration/privacy tests. 2. Add new report views through `reports.py` first, then wire CLI and MCP wrappers to that shared service. 3. Add new machine-readable outputs through `api_payloads.py` or report payload methods with a `schema` value, a `json_contracts.py` entry, and focused tests. -4. Add dashboard-only interactions in `plugin_data/dashboard/dashboard.js` and keep URL state in `dashboard_state.js`. +4. Add dashboard-only interactions in the narrowest dashboard module and keep URL state in `dashboard_state.js`. Diagnostics snapshot panels should stay in `dashboard_diagnostics_snapshots.js`; fact tables should stay in `dashboard_diagnostics_facts.js`. 5. Keep all examples, screenshots, mocks, and tests synthetic. Never derive fixtures from real logs. 6. When editing skill instructions, update both the source `skills/...` file and the bundled `src/codex_usage_tracker/plugin_data/skills/...` copy. `scripts/check_release.py` verifies that installable plugin assets stay complete and synced. 7. When adding fields derived from `cwd`, Git metadata, source paths, or log-event metadata, decide how they behave in `normal`, `redacted`, and `strict` privacy modes before exposing them in dashboard, JSON, CSV, MCP, or support-bundle output. +8. Diagnostic snapshot refresh must remain explicit and on demand. Normal usage refresh paths may load stored snapshots, but they must not rescan source logs for diagnostic sections unless the user calls a diagnostics `--refresh` command or a `/api/diagnostics/
/refresh` endpoint. ## Validation diff --git a/docs/cli-json-schemas.md b/docs/cli-json-schemas.md index 61219d7..3b1dd2b 100644 --- a/docs/cli-json-schemas.md +++ b/docs/cli-json-schemas.md @@ -287,6 +287,8 @@ Schema: `codex-usage-tracker-diagnostics-v1` Diagnostics payloads report aggregate structured facts such as compaction, tool/function/MCP activity, command families, structured skill labels, search/read loops, and outcome events. They do not include prompts, assistant messages, tool arguments, tool output, patch text, raw commands, command arguments, file contents, or JSONL fragments. Token totals are associated with facts observed before a token-count row; they are not causal allocations. +Diagnostic snapshots use separate section endpoints instead of one large payload. `GET` returns the latest stored section snapshot or `status: "missing"`; `POST /api/diagnostics/
/refresh` recomputes and replaces only that section. This keeps ordinary dashboard refresh fast and prevents source-log rescans unless a diagnostics refresh is explicit. + ## Diagnostic Overview Snapshot Commands: diff --git a/docs/cli-reference.md b/docs/cli-reference.md index bc69420..341dc9a 100644 --- a/docs/cli-reference.md +++ b/docs/cli-reference.md @@ -120,6 +120,9 @@ codex-usage-tracker diagnostics summary codex-usage-tracker diagnostics facts --sort uncached codex-usage-tracker diagnostics compactions codex-usage-tracker diagnostics tools +codex-usage-tracker diagnostics overview --refresh +codex-usage-tracker diagnostics tool-output --refresh +codex-usage-tracker diagnostics commands --refresh codex-usage-tracker diagnostics file-reads --refresh codex-usage-tracker diagnostics read-productivity --refresh codex-usage-tracker diagnostics concentration --refresh @@ -128,6 +131,10 @@ codex-usage-tracker diagnostics fact-calls --fact-type compaction --fact-name po Diagnostics expose structured event patterns and their associated token totals. They can show compactions, tool/function/MCP activity, safe command families, structured skill labels, patch outcomes, task completion, search/read loops, and aborted or rolled-back turns. Associated totals are not causal allocations and are not additive when one model call has multiple diagnostic facts. +Snapshot diagnostics are persisted aggregate reports. Without `--refresh`, snapshot commands return the latest stored payload or a `missing` status. With `--refresh`, they recompute from indexed source logs and replace the stored section snapshot. Ordinary `refresh`, `open-dashboard`, and dashboard `Refresh` update usage rows only; they do not recompute diagnostic snapshots. + +The snapshot sections answer different questions: `overview` summarizes usage rows and aggregate token totals, `tool-output` counts functions and terminal `Original token count` coverage, `commands` keeps command roots plus one safe child label, `file-reads` counts reader/path activity and allocated read-output tokens, `read-productivity` reports later-edit correlations for matching path keys, and `concentration` shows top-N token share by source/session, cwd/project, and day. + Diagnostic payloads are aggregate-only. They do not include prompts, assistant text, tool arguments, tool output, patch text, raw commands, command arguments, file contents, raw absolute paths, or JSONL fragments. File-read diagnostics use basename-only path labels plus short irreversible hashes, read-productivity percentages are temporal correlations rather than proof that a read caused a later edit, and concentration reports use safe source/session, cwd, and day labels only. ## JSON Queries diff --git a/docs/dashboard-guide.md b/docs/dashboard-guide.md index 100b1fb..14b6672 100644 --- a/docs/dashboard-guide.md +++ b/docs/dashboard-guide.md @@ -135,6 +135,8 @@ Use `Diagnostics` view when you want to see what structured event patterns are h - Command diagnostics store only a command family such as `pytest`, `git`, or `unknown_command`. Skill and MCP labels are detected only when they are present as structured event metadata. - Newer on-demand diagnostic snapshot endpoints are section-specific (`overview`, `tool-output`, `commands`, `file-reads`, `read-productivity`, and `concentration`). Heavy recomputation happens only through each section's explicit refresh endpoint. - Click `Refresh diagnostics` when you want to recompute stored diagnostic snapshots. The normal dashboard `Refresh` action updates usage rows only. +- Snapshot panels show their stored status, last computed time, history scope, and logs scanned count. Missing or stale panels still render without forcing a source-log scan. +- `Tool Output` totals come from terminal wrapper metadata such as `Original token count`; missing-count rows show coverage gaps where that header was absent. - File-read snapshots use basename-only path labels and short hashes. Read-productivity rates are temporal correlations between earlier reads and later structured patch events, not causation. - Concentration snapshots show top-N share and effective group count by source log/session, cwd/project label, and day without exposing raw source-log or cwd paths. - Click `Calls` on a fact row to load associated model calls. Call links and largest-call links open the Call Investigator, where raw context remains explicit and on demand. diff --git a/docs/privacy.md b/docs/privacy.md index 748c4c6..d868045 100644 --- a/docs/privacy.md +++ b/docs/privacy.md @@ -35,7 +35,9 @@ Call-origin metadata is heuristic and confidence-labeled. It stores categories s Diagnostic facts follow the same aggregate-only rule. They can store safe structured labels such as `patch_applied`, `function_call_output`, `post_compaction`, MCP tool/server labels, structured skill labels, and command families such as `pytest`, `git`, or `unknown_command`, along with event counts and source line ranges. Command text may be classified in memory during parsing, but it is not persisted. Diagnostic facts do not store tool arguments, command text, command output, patch text, prompt or assistant text, file contents, raw JSONL fragments, or raw context evidence. -On-demand diagnostic snapshots follow the same boundary. File-read snapshots classify common read commands and path scans, but persist only counters, reader families, basename-only path labels, and short irreversible path hashes. They do not persist raw absolute paths, raw command strings, command arguments, file contents, tool output, or patch text. Read-productivity snapshots report only temporal read-to-modify correlations for matching path keys in the same source log; they do not claim causation. Concentration snapshots group by safe source/session, cwd, and day labels and do not expose raw source-log or cwd paths. +On-demand diagnostic snapshots follow the same boundary. Tool-output snapshots use terminal wrapper metadata such as `Original token count` when present and persist only counts, coverage gaps, and safe function/command labels. Command snapshots keep command roots plus one conservative child label. File-read snapshots classify common read commands and path scans, but persist only counters, reader families, basename-only path labels, and short irreversible path hashes. They do not persist raw absolute paths, raw command strings, command arguments, file contents, tool output, or patch text. Read-productivity snapshots report only temporal read-to-modify correlations for matching path keys in the same source log; they do not claim causation. Concentration snapshots group by safe source/session, cwd, and day labels and do not expose raw source-log or cwd paths. + +Diagnostic snapshots are not live recomputed during ordinary dashboard or usage refresh. Stored snapshots can be displayed without rescanning source logs, and recomputation requires an explicit diagnostics `--refresh` command or a localhost `/api/diagnostics/
/refresh` request. ## On-Demand Context From 362c9fdad3da322c8e6d77cf6fa5989541832aa5 Mon Sep 17 00:00:00 2001 From: Monsky Date: Sat, 20 Jun 2026 19:47:34 -0400 Subject: [PATCH 07/10] fix: batch diagnostic dashboard refresh --- docs/cli-json-schemas.md | 2 +- docs/dashboard-guide.md | 2 +- docs/privacy.md | 2 +- .../diagnostic_snapshot_analysis.py | 69 ++++++++-------- .../diagnostic_snapshot_constants.py | 1 + .../diagnostic_snapshots.py | 81 +++++++++++++++++++ .../dashboard/dashboard_diagnostics.js | 8 +- src/codex_usage_tracker/server.py | 30 +++++++ tests/test_dashboard_payload.py | 1 + tests/test_dashboard_server.py | 42 ++++------ tests/test_diagnostic_snapshots.py | 42 ++++++++++ 11 files changed, 217 insertions(+), 63 deletions(-) diff --git a/docs/cli-json-schemas.md b/docs/cli-json-schemas.md index 3b1dd2b..d604e19 100644 --- a/docs/cli-json-schemas.md +++ b/docs/cli-json-schemas.md @@ -287,7 +287,7 @@ Schema: `codex-usage-tracker-diagnostics-v1` Diagnostics payloads report aggregate structured facts such as compaction, tool/function/MCP activity, command families, structured skill labels, search/read loops, and outcome events. They do not include prompts, assistant messages, tool arguments, tool output, patch text, raw commands, command arguments, file contents, or JSONL fragments. Token totals are associated with facts observed before a token-count row; they are not causal allocations. -Diagnostic snapshots use separate section endpoints instead of one large payload. `GET` returns the latest stored section snapshot or `status: "missing"`; `POST /api/diagnostics/
/refresh` recomputes and replaces only that section. This keeps ordinary dashboard refresh fast and prevents source-log rescans unless a diagnostics refresh is explicit. +Diagnostic snapshots use separate section endpoints instead of one large read payload. `GET` returns the latest stored section snapshot or `status: "missing"`; `POST /api/diagnostics/
/refresh` recomputes and replaces only that section. The dashboard button calls `POST /api/diagnostics/refresh`, which returns a small wrapper with `sections` and recomputes source-log-derived sections with one shared analyzer pass. This keeps ordinary dashboard refresh fast and prevents source-log rescans unless a diagnostics refresh is explicit. ## Diagnostic Overview Snapshot diff --git a/docs/dashboard-guide.md b/docs/dashboard-guide.md index 14b6672..6d535b1 100644 --- a/docs/dashboard-guide.md +++ b/docs/dashboard-guide.md @@ -133,7 +133,7 @@ Use `Diagnostics` view when you want to see what structured event patterns are h - The tab consumes the localhost `/api/diagnostics/*` endpoints; static file dashboards show a live-API unavailable state. - The first table shows top diagnostic facts by associated uncached input tokens. Tool/function/MCP/command-family and compaction sections expose narrower slices of the same fact data. - Command diagnostics store only a command family such as `pytest`, `git`, or `unknown_command`. Skill and MCP labels are detected only when they are present as structured event metadata. -- Newer on-demand diagnostic snapshot endpoints are section-specific (`overview`, `tool-output`, `commands`, `file-reads`, `read-productivity`, and `concentration`). Heavy recomputation happens only through each section's explicit refresh endpoint. +- Newer on-demand diagnostic snapshot endpoints are section-specific (`overview`, `tool-output`, `commands`, `file-reads`, `read-productivity`, and `concentration`). Heavy recomputation happens only through explicit diagnostic refresh endpoints. The dashboard's `Refresh diagnostics` button uses one batched refresh so source-log sections share one scan. - Click `Refresh diagnostics` when you want to recompute stored diagnostic snapshots. The normal dashboard `Refresh` action updates usage rows only. - Snapshot panels show their stored status, last computed time, history scope, and logs scanned count. Missing or stale panels still render without forcing a source-log scan. - `Tool Output` totals come from terminal wrapper metadata such as `Original token count`; missing-count rows show coverage gaps where that header was absent. diff --git a/docs/privacy.md b/docs/privacy.md index d868045..39faa33 100644 --- a/docs/privacy.md +++ b/docs/privacy.md @@ -37,7 +37,7 @@ Diagnostic facts follow the same aggregate-only rule. They can store safe struct On-demand diagnostic snapshots follow the same boundary. Tool-output snapshots use terminal wrapper metadata such as `Original token count` when present and persist only counts, coverage gaps, and safe function/command labels. Command snapshots keep command roots plus one conservative child label. File-read snapshots classify common read commands and path scans, but persist only counters, reader families, basename-only path labels, and short irreversible path hashes. They do not persist raw absolute paths, raw command strings, command arguments, file contents, tool output, or patch text. Read-productivity snapshots report only temporal read-to-modify correlations for matching path keys in the same source log; they do not claim causation. Concentration snapshots group by safe source/session, cwd, and day labels and do not expose raw source-log or cwd paths. -Diagnostic snapshots are not live recomputed during ordinary dashboard or usage refresh. Stored snapshots can be displayed without rescanning source logs, and recomputation requires an explicit diagnostics `--refresh` command or a localhost `/api/diagnostics/
/refresh` request. +Diagnostic snapshots are not live recomputed during ordinary dashboard or usage refresh. Stored snapshots can be displayed without rescanning source logs, and recomputation requires an explicit diagnostics `--refresh` command, the batched localhost `/api/diagnostics/refresh` request, or a targeted `/api/diagnostics/
/refresh` request. ## On-Demand Context diff --git a/src/codex_usage_tracker/diagnostic_snapshot_analysis.py b/src/codex_usage_tracker/diagnostic_snapshot_analysis.py index 3f78227..070857a 100644 --- a/src/codex_usage_tracker/diagnostic_snapshot_analysis.py +++ b/src/codex_usage_tracker/diagnostic_snapshot_analysis.py @@ -111,43 +111,46 @@ def _scan_source_log(source_log: Path, *, counters: dict[str, Any], meta: Counte source_read_events: list[int] = [] modified_orders_by_path: dict[str, list[int]] = defaultdict(list) try: - lines = source_log.read_text(encoding="utf-8").splitlines() + lines = source_log.open(encoding="utf-8") except OSError: meta["read_errors"] += 1 return - for order, line in enumerate(lines): - envelope = _json_envelope(line, meta=meta) - if envelope is None: - continue - payload = envelope.get("payload") - if not isinstance(payload, dict): - continue - if envelope.get("type") == "event_msg": - for path_ref in modified_path_refs(payload): - modified_orders_by_path[path_ref["path_key"]].append(order) - continue - if envelope.get("type") != "response_item": - continue - if payload.get("type") == "function_call": - _record_function_call( - payload, - order=order, - counters=counters, - meta=meta, - call_names=call_names, - call_roots=call_roots, - call_read_events=call_read_events, - source_read_events=source_read_events, - ) - elif payload.get("type") == "function_call_output": - _record_function_output( - payload, - counters=counters, - call_names=call_names, - call_roots=call_roots, - call_read_events=call_read_events, - ) + with lines: + for order, line in enumerate(lines): + if '"response_item"' not in line and '"patch_apply_end"' not in line: + continue + envelope = _json_envelope(line, meta=meta) + if envelope is None: + continue + payload = envelope.get("payload") + if not isinstance(payload, dict): + continue + if envelope.get("type") == "event_msg": + for path_ref in modified_path_refs(payload): + modified_orders_by_path[path_ref["path_key"]].append(order) + continue + if envelope.get("type") != "response_item": + continue + if payload.get("type") == "function_call": + _record_function_call( + payload, + order=order, + counters=counters, + meta=meta, + call_names=call_names, + call_roots=call_roots, + call_read_events=call_read_events, + source_read_events=source_read_events, + ) + elif payload.get("type") == "function_call_output": + _record_function_output( + payload, + counters=counters, + call_names=call_names, + call_roots=call_roots, + call_read_events=call_read_events, + ) _mark_later_modifications( counters=counters, diff --git a/src/codex_usage_tracker/diagnostic_snapshot_constants.py b/src/codex_usage_tracker/diagnostic_snapshot_constants.py index e98ce92..2852510 100644 --- a/src/codex_usage_tracker/diagnostic_snapshot_constants.py +++ b/src/codex_usage_tracker/diagnostic_snapshot_constants.py @@ -6,6 +6,7 @@ DIAGNOSTIC_FILE_READS_SCHEMA = "codex-usage-tracker-diagnostic-file-reads-v1" DIAGNOSTIC_READ_PRODUCTIVITY_SCHEMA = "codex-usage-tracker-diagnostic-read-productivity-v1" DIAGNOSTIC_CONCENTRATION_SCHEMA = "codex-usage-tracker-diagnostic-concentration-v1" +DIAGNOSTIC_BATCH_REFRESH_SCHEMA = "codex-usage-tracker-diagnostic-snapshot-refresh-v1" DIAGNOSTIC_OVERVIEW_SECTION = "overview" DIAGNOSTIC_TOOL_OUTPUT_SECTION = "tool-output" DIAGNOSTIC_COMMANDS_SECTION = "commands" diff --git a/src/codex_usage_tracker/diagnostic_snapshots.py b/src/codex_usage_tracker/diagnostic_snapshots.py index 774db2b..c962da3 100644 --- a/src/codex_usage_tracker/diagnostic_snapshots.py +++ b/src/codex_usage_tracker/diagnostic_snapshots.py @@ -15,6 +15,7 @@ concentration_privacy_metadata, ) from codex_usage_tracker.diagnostic_snapshot_constants import ( + DIAGNOSTIC_BATCH_REFRESH_SCHEMA, DIAGNOSTIC_COMMANDS_SCHEMA, DIAGNOSTIC_COMMANDS_SECTION, DIAGNOSTIC_CONCENTRATION_SCHEMA, @@ -196,6 +197,67 @@ def refresh_diagnostic_overview_snapshot( return payload +def refresh_diagnostic_snapshots( + *, + db_path: Path = DEFAULT_DB_PATH, + include_archived: bool = False, +) -> dict[str, Any]: + """Recompute and persist all dashboard diagnostic snapshots. + + Source-log-derived sections share one analyzer pass so the dashboard refresh + button does not rescan the same logs once per panel. + """ + + history_scope = _history_scope(include_archived) + overview_payload = refresh_diagnostic_overview_snapshot( + db_path=db_path, + include_archived=include_archived, + ) + computed_at = _utc_now() + analysis = analyze_indexed_source_logs(db_path=db_path, include_archived=include_archived) + sections = { + DIAGNOSTIC_TOOL_OUTPUT_SECTION: DIAGNOSTIC_TOOL_OUTPUT_SCHEMA, + DIAGNOSTIC_COMMANDS_SECTION: DIAGNOSTIC_COMMANDS_SCHEMA, + DIAGNOSTIC_FILE_READS_SECTION: DIAGNOSTIC_FILE_READS_SCHEMA, + DIAGNOSTIC_READ_PRODUCTIVITY_SECTION: DIAGNOSTIC_READ_PRODUCTIVITY_SCHEMA, + } + source_payloads = { + section: _persist_source_log_snapshot( + db_path=db_path, + section=section, + schema=schema, + history_scope=history_scope, + computed_at=computed_at, + analysis=analysis, + ) + for section, schema in sections.items() + } + concentration_payload = _refresh_concentration_snapshot( + db_path=db_path, + include_archived=include_archived, + ) + return { + "schema": DIAGNOSTIC_BATCH_REFRESH_SCHEMA, + "status": "ready", + "refreshed": True, + "raw_context_included": False, + "history_scope": history_scope, + "sections": { + "overview": overview_payload, + "toolOutput": source_payloads[DIAGNOSTIC_TOOL_OUTPUT_SECTION], + "commands": source_payloads[DIAGNOSTIC_COMMANDS_SECTION], + "fileReads": source_payloads[DIAGNOSTIC_FILE_READS_SECTION], + "readProductivity": source_payloads[DIAGNOSTIC_READ_PRODUCTIVITY_SECTION], + "concentration": concentration_payload, + }, + "meta": { + "source_log_analysis_passes": 1, + "source_logs_scanned": analysis["meta"]["source_logs_scanned"], + "usage_rows_scanned": analysis["meta"]["usage_rows_scanned"], + }, + } + + def _build_source_log_snapshot_report( *, db_path: Path, @@ -233,6 +295,25 @@ def _refresh_source_log_snapshot( history_scope = _history_scope(include_archived) computed_at = _utc_now() analysis = analyze_indexed_source_logs(db_path=db_path, include_archived=include_archived) + return _persist_source_log_snapshot( + db_path=db_path, + section=section, + schema=schema, + history_scope=history_scope, + computed_at=computed_at, + analysis=analysis, + ) + + +def _persist_source_log_snapshot( + *, + db_path: Path, + section: str, + schema: str, + history_scope: str, + computed_at: str, + analysis: dict[str, Any], +) -> dict[str, Any]: snapshot = _snapshot_metadata( computed_at=computed_at, history_scope=history_scope, diff --git a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics.js b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics.js index b95b82f..7a37910 100644 --- a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics.js +++ b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics.js @@ -162,7 +162,13 @@ renderIfActive(); try { const filters = getDiagnosticFilters(); - const snapshots = await fetchSnapshotPayloads(filters, true); + const snapshotFilters = { include_archived: filters?.include_archived || '0' }; + const refreshPayload = await fetchPayload( + '/api/diagnostics/refresh', + snapshotFilters, + { method: 'POST' }, + ); + const snapshots = refreshPayload.sections || {}; if (signature !== activeSignature) return; payloads = { ...payloads, ...snapshots }; snapshotRefreshStatus = 'ready'; diff --git a/src/codex_usage_tracker/server.py b/src/codex_usage_tracker/server.py index 79abe1f..19c98cb 100644 --- a/src/codex_usage_tracker/server.py +++ b/src/codex_usage_tracker/server.py @@ -42,6 +42,7 @@ build_diagnostic_overview_report, build_diagnostic_read_productivity_report, build_diagnostic_tool_output_report, + refresh_diagnostic_snapshots, ) from codex_usage_tracker.i18n import normalize_language from codex_usage_tracker.paths import ( @@ -346,6 +347,9 @@ def do_POST(self) -> None: # noqa: N802 - stdlib hook name if not self._request_origin_allowed(): self._send_json(HTTPStatus.FORBIDDEN, {"error": "Request host or origin is not allowed"}) return + if parsed.path == "/api/diagnostics/refresh": + self._handle_diagnostics_refresh(parsed.query) + return if parsed.path == "/api/diagnostics/overview/refresh": self._handle_diagnostics_overview_refresh(parsed.query) return @@ -1002,6 +1006,32 @@ def _handle_diagnostics_overview(self, query: str) -> None: label="diagnostic overview", ) + def _handle_diagnostics_refresh(self, query: str) -> None: + params = parse_qs(query) + if not self._has_valid_api_token(params): + self._send_json( + HTTPStatus.FORBIDDEN, + {"error": "Valid API token is required for diagnostic refresh"}, + ) + return + include_archived = _parse_bool( + _first(params.get("include_archived")), + self._include_archived, + ) + try: + with self._refresh_lock: + payload = refresh_diagnostic_snapshots( + db_path=self._db_path, + include_archived=include_archived, + ) + except sqlite3.Error as exc: + self._send_json( + HTTPStatus.INTERNAL_SERVER_ERROR, + {"error": f"Database error while refreshing diagnostics: {exc}"}, + ) + return + self._send_json(HTTPStatus.OK, payload) + def _handle_diagnostics_overview_refresh(self, query: str) -> None: self._handle_diagnostic_snapshot( query, diff --git a/tests/test_dashboard_payload.py b/tests/test_dashboard_payload.py index 9dc9b18..5e73de8 100644 --- a/tests/test_dashboard_payload.py +++ b/tests/test_dashboard_payload.py @@ -300,6 +300,7 @@ def test_dashboard_and_csv_are_aggregate_only(tmp_path: Path) -> None: assert "/api/diagnostics/tools" in dashboard_diagnostics_js assert "/api/diagnostics/compactions" in dashboard_diagnostics_js assert "/api/diagnostics/fact-calls" in dashboard_diagnostics_js + assert "/api/diagnostics/refresh" in dashboard_diagnostics_js assert "dashboard_diagnostics_snapshots.js" in dashboard assert "dashboard_diagnostics_facts.js" in dashboard assert "/api/diagnostics/overview" in dashboard_diagnostics_snapshots_js diff --git a/tests/test_dashboard_server.py b/tests/test_dashboard_server.py index 2a9b38f..9dc3036 100644 --- a/tests/test_dashboard_server.py +++ b/tests/test_dashboard_server.py @@ -93,36 +93,21 @@ def test_dashboard_server_usage_api_refreshes_aggregate_rows(tmp_path: Path) -> data=b"", method="POST", ) - diagnostic_tool_output_refresh_payload = _read_json( - f"http://127.0.0.1:{server.server_port}/api/diagnostics/tool-output/refresh", - headers={"X-Codex-Usage-Token": "test-token"}, - data=b"", - method="POST", - ) - diagnostic_commands_refresh_payload = _read_json( - f"http://127.0.0.1:{server.server_port}/api/diagnostics/commands/refresh", - headers={"X-Codex-Usage-Token": "test-token"}, - data=b"", - method="POST", - ) - diagnostic_file_reads_refresh_payload = _read_json( - f"http://127.0.0.1:{server.server_port}/api/diagnostics/file-reads/refresh", - headers={"X-Codex-Usage-Token": "test-token"}, - data=b"", - method="POST", - ) - diagnostic_read_productivity_refresh_payload = _read_json( - f"http://127.0.0.1:{server.server_port}/api/diagnostics/read-productivity/refresh", - headers={"X-Codex-Usage-Token": "test-token"}, - data=b"", - method="POST", - ) - diagnostic_concentration_refresh_payload = _read_json( - f"http://127.0.0.1:{server.server_port}/api/diagnostics/concentration/refresh", + diagnostic_batch_refresh_payload = _read_json( + f"http://127.0.0.1:{server.server_port}/api/diagnostics/refresh", headers={"X-Codex-Usage-Token": "test-token"}, data=b"", method="POST", ) + diagnostic_tool_output_refresh_payload = diagnostic_batch_refresh_payload["sections"]["toolOutput"] + diagnostic_commands_refresh_payload = diagnostic_batch_refresh_payload["sections"]["commands"] + diagnostic_file_reads_refresh_payload = diagnostic_batch_refresh_payload["sections"]["fileReads"] + diagnostic_read_productivity_refresh_payload = diagnostic_batch_refresh_payload["sections"][ + "readProductivity" + ] + diagnostic_concentration_refresh_payload = diagnostic_batch_refresh_payload["sections"][ + "concentration" + ] diagnostic_stored_payload = _read_json( f"http://127.0.0.1:{server.server_port}/api/diagnostics/overview" ) @@ -197,6 +182,11 @@ def test_dashboard_server_usage_api_refreshes_aggregate_rows(tmp_path: Path) -> assert diagnostic_refresh_payload["refreshed"] is True assert diagnostic_refresh_payload["overview"]["usage_rows"] == 4 assert diagnostic_refresh_payload["overview"]["total_tokens"] == 400 + assert diagnostic_batch_refresh_payload["schema"] == ( + "codex-usage-tracker-diagnostic-snapshot-refresh-v1" + ) + assert diagnostic_batch_refresh_payload["status"] == "ready" + assert diagnostic_batch_refresh_payload["meta"]["source_log_analysis_passes"] == 1 assert ( diagnostic_tool_output_refresh_payload["schema"] == "codex-usage-tracker-diagnostic-tool-output-v1" diff --git a/tests/test_diagnostic_snapshots.py b/tests/test_diagnostic_snapshots.py index 16adfe8..70a17b9 100644 --- a/tests/test_diagnostic_snapshots.py +++ b/tests/test_diagnostic_snapshots.py @@ -14,6 +14,7 @@ _write_jsonl, ) +from codex_usage_tracker import diagnostic_snapshots as diagnostic_snapshot_module from codex_usage_tracker.diagnostic_snapshots import ( DIAGNOSTIC_OVERVIEW_SECTION, build_diagnostic_commands_report, @@ -22,6 +23,7 @@ build_diagnostic_overview_report, build_diagnostic_read_productivity_report, build_diagnostic_tool_output_report, + refresh_diagnostic_snapshots, ) from codex_usage_tracker.store import ( query_diagnostic_snapshot, @@ -111,6 +113,46 @@ def test_usage_refresh_does_not_recompute_diagnostic_overview_snapshot( assert stored["payload"]["overview"]["total_tokens"] == 7 +def test_batch_diagnostic_refresh_shares_source_log_analysis_pass( + tmp_path: Path, + monkeypatch, +) -> None: + codex_home = _make_codex_home(tmp_path) + db_path = tmp_path / "usage.sqlite3" + refresh_usage_index(codex_home=codex_home, db_path=db_path) + calls = 0 + original = diagnostic_snapshot_module.analyze_indexed_source_logs + + def counting_analyzer(*args, **kwargs): + nonlocal calls + calls += 1 + return original(*args, **kwargs) + + monkeypatch.setattr( + diagnostic_snapshot_module, + "analyze_indexed_source_logs", + counting_analyzer, + ) + + refreshed = refresh_diagnostic_snapshots(db_path=db_path) + stored_file_reads = build_diagnostic_file_reads_report(db_path=db_path).payload + stored_read_productivity = build_diagnostic_read_productivity_report(db_path=db_path).payload + + assert calls == 1 + assert refreshed["schema"] == "codex-usage-tracker-diagnostic-snapshot-refresh-v1" + assert refreshed["meta"]["source_log_analysis_passes"] == 1 + assert refreshed["sections"]["overview"]["status"] == "ready" + assert refreshed["sections"]["toolOutput"]["status"] == "ready" + assert refreshed["sections"]["commands"]["status"] == "ready" + assert refreshed["sections"]["fileReads"]["status"] == "ready" + assert refreshed["sections"]["readProductivity"]["status"] == "ready" + assert refreshed["sections"]["concentration"]["status"] == "ready" + assert stored_file_reads["status"] == "ready" + assert stored_file_reads["refreshed"] is False + assert stored_read_productivity["status"] == "ready" + assert stored_read_productivity["refreshed"] is False + + def test_tool_output_and_command_snapshots_use_safe_aggregate_labels( tmp_path: Path, ) -> None: From 2ff52f3a5a3167102ec177be7bd98c4ab37e424f Mon Sep 17 00:00:00 2001 From: Monsky Date: Sat, 20 Jun 2026 20:17:43 -0400 Subject: [PATCH 08/10] fix: isolate diagnostics from live refresh --- .../plugin_data/dashboard/dashboard_live.js | 25 +++--- tests/test_dashboard_live.py | 89 +++++++++++++++++++ 2 files changed, 104 insertions(+), 10 deletions(-) diff --git a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_live.js b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_live.js index 932acdd..1d8722e 100644 --- a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_live.js +++ b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_live.js @@ -44,6 +44,10 @@ let rowHydrationRestartRequested = false; let autoRefreshTimer = null; + function isUsageRefreshView() { + return !['call', 'diagnostics'].includes(activeView()); + } + function loadedRowsDescription() { const data = getData(); const loaded = number.format(data.length); @@ -71,7 +75,7 @@ if (!rowLoadProgressEl) return; const target = rowHydrationTarget(); const loaded = Math.min(getData().length, target || getData().length); - const shouldShow = !['call', 'diagnostics'].includes(activeView()) && liveRefreshSupported && (rowHydrationInFlight || rowsNeedHydration() || rowHydrationError); + const shouldShow = isUsageRefreshView() && liveRefreshSupported && (rowHydrationInFlight || rowsNeedHydration() || rowHydrationError); rowLoadProgressEl.hidden = !shouldShow; if (!shouldShow) return; const totalText = number.format(target || getTotalAvailableRows() || loaded); @@ -119,7 +123,7 @@ } async function hydrateDashboardRows(options = null) { - if (!liveRefreshSupported || ['call', 'diagnostics'].includes(activeView())) return; + if (!liveRefreshSupported || !isUsageRefreshView()) return; const hydrateOptions = options || {}; if (rowHydrationInFlight) { if (hydrateOptions.reset) rowHydrationRestartRequested = true; @@ -150,7 +154,7 @@ updateLiveStatus('status.checking', t('live.loading_rows')); updateRowLoadProgress(); try { - while (getData().length < target && generation === rowHydrationGeneration && !['call', 'diagnostics'].includes(activeView())) { + while (getData().length < target && generation === rowHydrationGeneration && isUsageRefreshView()) { const offset = getData().length; const remaining = target - offset; const chunkSize = Math.min( @@ -174,7 +178,7 @@ if (!response.ok) throw new Error(`HTTP ${response.status}`); const payload = await response.json(); if (payload.error) throw new Error(payload.error); - if (generation !== rowHydrationGeneration || ['call', 'diagnostics'].includes(activeView())) break; + if (generation !== rowHydrationGeneration || !isUsageRefreshView()) break; const rows = payloadRows(payload); if (!rows.length) break; applyDashboardPayload(payload, { appendRows: true }); @@ -189,7 +193,7 @@ } finally { rowHydrationInFlight = false; updateRowLoadProgress(); - const shouldRestart = rowHydrationRestartRequested && !['call', 'diagnostics'].includes(activeView()); + const shouldRestart = rowHydrationRestartRequested && isUsageRefreshView(); rowHydrationRestartRequested = false; if (shouldRestart) { hydrateDashboardRows(); @@ -200,7 +204,7 @@ } async function refreshDashboardIfStale() { - if (!liveRefreshSupported || !apiToken() || ['call', 'diagnostics'].includes(activeView())) return; + if (!liveRefreshSupported || !apiToken() || !isUsageRefreshView()) return; try { const params = new URLSearchParams({ include_archived: getIncludeArchived() ? '1' : '0', @@ -229,7 +233,7 @@ } async function refreshDashboardLive() { - if (!liveRefreshSupported || !apiToken() || activeView() === 'call') return; + if (!liveRefreshSupported || !apiToken() || !isUsageRefreshView()) return; if (refreshInFlight) return; const previousTotal = Number(getTotalAvailableRows() || getData().length || 0); refreshInFlight = true; @@ -254,12 +258,13 @@ if (!shellResponse.ok) throw new Error(`HTTP ${shellResponse.status}`); const shellPayload = await shellResponse.json(); if (shellPayload.error) throw new Error(shellPayload.error); + if (!isUsageRefreshView()) return; const nextTotal = Number(shellPayload.total_available_rows || previousTotal); const newRows = Math.max(0, nextTotal - previousTotal); applyDashboardPayload(shellPayload, { preserveRows: true }); - if (activeView() !== 'diagnostics' && newRows > 0) { + if (newRows > 0) { const loadedLimit = getLoadedLimit(); const visibleTarget = loadedLimit === null ? nextTotal : Math.min(nextTotal, Number(loadedLimit || nextTotal)); const rowsToFetch = Math.max(0, Math.min(newRows, visibleTarget || newRows)); @@ -285,7 +290,7 @@ } rowHydrationComplete = getData().length >= rowHydrationTarget(); updateRowLoadProgress(); - } else if (activeView() !== 'diagnostics' && rowsNeedHydration()) { + } else if (rowsNeedHydration()) { hydrateDashboardRows(); } @@ -373,7 +378,7 @@ function scheduleAutoRefresh() { if (autoRefreshTimer) window.clearInterval(autoRefreshTimer); autoRefreshTimer = null; - if (!autoRefreshEl.checked || !liveRefreshSupported || activeView() === 'call') return; + if (!autoRefreshEl.checked || !liveRefreshSupported || !isUsageRefreshView()) return; autoRefreshTimer = window.setInterval(() => { if (document.visibilityState === 'visible') refreshDashboardLive(); }, liveRefreshIntervalMs); diff --git a/tests/test_dashboard_live.py b/tests/test_dashboard_live.py index f470434..a25df3c 100644 --- a/tests/test_dashboard_live.py +++ b/tests/test_dashboard_live.py @@ -128,6 +128,95 @@ def test_dashboard_live_allows_diagnostics_bootstrap_refresh() -> None: assert payload["statusKeys"] == ["status.checking", "status.updated"] +def test_dashboard_live_skips_diagnostics_auto_refresh_cycle() -> None: + payload = _run_dashboard_live_script( + """ +(async () => { + const calls = []; + const statusUpdates = []; + const appliedPayloads = []; + let scheduledIntervals = 0; + context.window.setInterval = () => { + scheduledIntervals += 1; + return 1; + }; + context.window.clearInterval = () => {}; + globalThis.__fetch = async (url, options) => { + calls.push({ url, headers: options.headers }); + return { + ok: true, + json: async () => ({ + rows: [], + refreshed_at: '2026-06-19T00:00:00Z', + refresh_result: { + inserted_or_updated_events: 1, + scanned_files: 1, + skipped_events: 0, + }, + total_available_rows: 1, + }), + }; + }; + const refreshDashboardEl = { disabled: false }; + const runtime = factory.create({ + activeView: () => 'diagnostics', + apiToken: () => 'test-token', + applyDashboardPayload: payload => appliedPayloads.push(payload), + autoRefreshEl: { checked: true }, + backgroundHydrationChunkSize: 2000, + formatTimestamp: value => value, + getArchivedAvailableRows: () => 0, + getData: () => [], + getIncludeArchived: () => false, + getLoadedLimit: () => null, + getTotalAvailableRows: () => 1, + historyScopeEl: { value: 'active', parentElement: {} }, + i18n: { currentLanguage: 'en' }, + initialHydrationChunkSize: 500, + latestRefreshAt: () => '', + limitValue: value => value === null ? 'all' : String(value), + liveRefreshIntervalMs: 10000, + liveRefreshSupported: true, + loadLimitEl: { value: '5000', options: [], lastElementChild: null, insertBefore: () => {} }, + number: new Intl.NumberFormat('en-US'), + payloadRows: payload => payload.rows || [], + rebuildDashboardIndexes: () => {}, + rebuildFilterOptions: () => {}, + refreshDashboardEl, + render: () => {}, + resetRowsForHydration: () => {}, + rowLoadProgressBarEl: { style: {} }, + rowLoadProgressCountEl: { textContent: '' }, + rowLoadProgressEl: { hidden: true }, + rowLoadProgressLabelEl: { textContent: '' }, + setFastTooltip: () => {}, + t: key => key, + tf: (key, values = {}) => `${key}:${JSON.stringify(values)}`, + updateLiveStatus: (key, detail) => statusUpdates.push({ key, detail }), + }); + runtime.scheduleAutoRefresh(); + await runtime.refreshDashboardLive(); + console.log(JSON.stringify({ + fetchCount: calls.length, + appliedCount: appliedPayloads.length, + statusKeys: statusUpdates.map(entry => entry.key), + scheduledIntervals, + refreshDisabled: refreshDashboardEl.disabled, + })); +})().catch(error => { + console.error(error); + process.exit(1); +}); +""" + ) + + assert payload["fetchCount"] == 0 + assert payload["appliedCount"] == 0 + assert payload["statusKeys"] == [] + assert payload["scheduledIntervals"] == 0 + assert payload["refreshDisabled"] is False + + def test_dashboard_live_prepends_new_rows_after_cached_index_refresh() -> None: payload = _run_dashboard_live_script( """ From d60fbd421aa1e0f62305904cbfab5d1a82d9b0e1 Mon Sep 17 00:00:00 2001 From: Monsky Date: Sat, 20 Jun 2026 20:47:11 -0400 Subject: [PATCH 09/10] fix: expand diagnostic command children --- docs/cli-json-schemas.md | 4 +- docs/cli-reference.md | 2 +- docs/privacy.md | 2 +- .../dashboard_diagnostics_snapshots.js | 68 +++++++- .../dashboard/dashboard_tables.css | 47 +++++ tests/test_dashboard_diagnostics_snapshots.py | 160 ++++++++++++++++++ 6 files changed, 271 insertions(+), 12 deletions(-) create mode 100644 tests/test_dashboard_diagnostics_snapshots.py diff --git a/docs/cli-json-schemas.md b/docs/cli-json-schemas.md index d604e19..6cde68f 100644 --- a/docs/cli-json-schemas.md +++ b/docs/cli-json-schemas.md @@ -412,7 +412,7 @@ Schema: `codex-usage-tracker-diagnostic-commands-v1` } ``` -The commands snapshot keeps only command roots and safe one-level child labels such as `status`, `diff`, or `-m:pytest`. +The commands snapshot keeps only command roots and a bounded list of safe one-level child labels such as `status`, `diff`, or `-m:pytest`. ## Diagnostic File Reads Snapshot @@ -538,7 +538,7 @@ Schema: `codex-usage-tracker-diagnostic-concentration-v1` } ``` -The concentration snapshot computes top-1/top-3/top-5 share and effective group count by source log/session, cwd/project label, and day. Source log labels use session-id prefixes or source hashes, cwd labels use basename-only labels, and raw source paths/cwd paths are not included. +The concentration snapshot computes top-1/top-3/top-5 share and effective group count by source log/session, cwd/project label, and day. Metric ids such as `top_1_source_log_share` are stable JSON contract fields; dashboard views should render them as reader-facing labels. Source log labels use session-id prefixes or source hashes, cwd labels use basename-only labels, and raw source paths/cwd paths are not included. ## Pricing Coverage diff --git a/docs/cli-reference.md b/docs/cli-reference.md index 341dc9a..8d6c322 100644 --- a/docs/cli-reference.md +++ b/docs/cli-reference.md @@ -133,7 +133,7 @@ Diagnostics expose structured event patterns and their associated token totals. Snapshot diagnostics are persisted aggregate reports. Without `--refresh`, snapshot commands return the latest stored payload or a `missing` status. With `--refresh`, they recompute from indexed source logs and replace the stored section snapshot. Ordinary `refresh`, `open-dashboard`, and dashboard `Refresh` update usage rows only; they do not recompute diagnostic snapshots. -The snapshot sections answer different questions: `overview` summarizes usage rows and aggregate token totals, `tool-output` counts functions and terminal `Original token count` coverage, `commands` keeps command roots plus one safe child label, `file-reads` counts reader/path activity and allocated read-output tokens, `read-productivity` reports later-edit correlations for matching path keys, and `concentration` shows top-N token share by source/session, cwd/project, and day. +The snapshot sections answer different questions: `overview` summarizes usage rows and aggregate token totals, `tool-output` counts functions and terminal `Original token count` coverage, `commands` keeps command roots plus bounded safe child labels, `file-reads` counts reader/path activity and allocated read-output tokens, `read-productivity` reports later-edit correlations for matching path keys, and `concentration` shows top-N token share by source/session, cwd/project, and day. Diagnostic payloads are aggregate-only. They do not include prompts, assistant text, tool arguments, tool output, patch text, raw commands, command arguments, file contents, raw absolute paths, or JSONL fragments. File-read diagnostics use basename-only path labels plus short irreversible hashes, read-productivity percentages are temporal correlations rather than proof that a read caused a later edit, and concentration reports use safe source/session, cwd, and day labels only. diff --git a/docs/privacy.md b/docs/privacy.md index 39faa33..94eea7a 100644 --- a/docs/privacy.md +++ b/docs/privacy.md @@ -35,7 +35,7 @@ Call-origin metadata is heuristic and confidence-labeled. It stores categories s Diagnostic facts follow the same aggregate-only rule. They can store safe structured labels such as `patch_applied`, `function_call_output`, `post_compaction`, MCP tool/server labels, structured skill labels, and command families such as `pytest`, `git`, or `unknown_command`, along with event counts and source line ranges. Command text may be classified in memory during parsing, but it is not persisted. Diagnostic facts do not store tool arguments, command text, command output, patch text, prompt or assistant text, file contents, raw JSONL fragments, or raw context evidence. -On-demand diagnostic snapshots follow the same boundary. Tool-output snapshots use terminal wrapper metadata such as `Original token count` when present and persist only counts, coverage gaps, and safe function/command labels. Command snapshots keep command roots plus one conservative child label. File-read snapshots classify common read commands and path scans, but persist only counters, reader families, basename-only path labels, and short irreversible path hashes. They do not persist raw absolute paths, raw command strings, command arguments, file contents, tool output, or patch text. Read-productivity snapshots report only temporal read-to-modify correlations for matching path keys in the same source log; they do not claim causation. Concentration snapshots group by safe source/session, cwd, and day labels and do not expose raw source-log or cwd paths. +On-demand diagnostic snapshots follow the same boundary. Tool-output snapshots use terminal wrapper metadata such as `Original token count` when present and persist only counts, coverage gaps, and safe function/command labels. Command snapshots keep command roots plus a bounded list of conservative one-level child labels. File-read snapshots classify common read commands and path scans, but persist only counters, reader families, basename-only path labels, and short irreversible path hashes. They do not persist raw absolute paths, raw command strings, command arguments, file contents, tool output, or patch text. Read-productivity snapshots report only temporal read-to-modify correlations for matching path keys in the same source log; they do not claim causation. Concentration snapshots group by safe source/session, cwd, and day labels and do not expose raw source-log or cwd paths. Diagnostic snapshots are not live recomputed during ordinary dashboard or usage refresh. Stored snapshots can be displayed without rescanning source logs, and recomputation requires an explicit diagnostics `--refresh` command, the batched localhost `/api/diagnostics/refresh` request, or a targeted `/api/diagnostics/
/refresh` request. diff --git a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics_snapshots.js b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics_snapshots.js index 843626c..213c9f0 100644 --- a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics_snapshots.js +++ b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics_snapshots.js @@ -114,15 +114,38 @@ function renderCommands(payload) { const commands = Array.isArray(payload?.commands) ? payload.commands.slice(0, 10) : []; return renderSimpleTable( - ['Root', 'Total', 'Top child'], - commands.map(row => { - const child = Array.isArray(row.children) && row.children[0] ? row.children[0] : null; - return [row.root, tokenText(row.total), child ? `${child.child} (${tokenText(child.count)})` : '']; - }), + ['Root', 'Total', 'Children'], + commands.map(row => [ + row.root, + tokenText(row.total), + { html: renderCommandChildren(row.children), numeric: false }, + ]), 'No command rows in this snapshot.', ); } + function renderCommandChildren(children) { + const rows = Array.isArray(children) ? children : []; + if (!rows.length) { + return `${escapeHtml('')}`; + } + const childCount = rows.length; + const label = `${tokenText(childCount)} ${childCount === 1 ? 'child' : 'children'}`; + return ` +
+ ${escapeHtml(label)} +
    + ${rows.map(child => ` +
  • + ${escapeHtml(child.child || '')} + ${tokenText(child.count)} +
  • + `).join('')} +
+
+ `; + } + function renderFileReads(payload) { const byReader = Array.isArray(payload?.by_reader) ? payload.by_reader.slice(0, 8) : []; const paths = Array.isArray(payload?.top_paths) ? payload.top_paths.slice(0, 8) : []; @@ -173,13 +196,13 @@ ${renderSimpleTable( ['Metric', 'Share'], metrics.filter(row => row.top_n === 1 || row.top_n === 3 || row.top_n === 5) - .map(row => [row.metric, pct(row.share)]), + .map(row => [concentrationMetricLabel(row), pct(row.share)]), 'No concentration metrics in this snapshot.', )} ${renderSimpleTable( ['Dimension', 'Label', 'Share', 'Largest'], impacts.map(row => [ - row.dimension, + concentrationDimensionLabel(row.dimension), row.label, pct(row.share), row.largest_record_id ? { html: rowInvestigatorLink({ record_id: row.largest_record_id }, tokenText(row.largest_call_tokens), true) } : tokenText(row.largest_call_tokens), @@ -248,7 +271,7 @@ if (!rows.length) return renderState(emptyMessage); const head = headers.map(header => `${escapeHtml(header)}`).join(''); const body = rows.map(row => ` - ${row.map((cell, index) => ` 0 ? ' class="num"' : ''}>${cellHtml(cell)}`).join('')} + ${row.map((cell, index) => `${cellHtml(cell)}`).join('')} `).join(''); return `
@@ -266,12 +289,41 @@ return escapeHtml(String(value)); } + function cellNumeric(value, index) { + if (index === 0) return false; + if (typeof value === 'object' && value && value.numeric === false) return false; + return true; + } + function pathLabel(row) { const label = row.path_label || 'path'; const hash = row.path_hash ? ` ยท ${String(row.path_hash).slice(0, 6)}` : ''; return `${label}${hash}`; } + function concentrationMetricLabel(row) { + const topN = Number(row?.top_n || 0); + const dimension = concentrationDimensionLabel(row?.dimension); + if (topN > 0 && dimension) return `Top ${topN} ${dimension.toLowerCase()} share`; + return humanizeMetric(row?.metric || 'metric'); + } + + function concentrationDimensionLabel(value) { + return { + source_log: 'Source/session', + cwd: 'Project/cwd', + day: 'Day', + }[value] || humanizeMetric(value || ''); + } + + function humanizeMetric(value) { + return String(value || '') + .split('_') + .filter(Boolean) + .map(part => part.slice(0, 1).toUpperCase() + part.slice(1)) + .join(' '); + } + return { historyScope, latestComputed, diff --git a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_tables.css b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_tables.css index 0ee6a65..c62955d 100644 --- a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_tables.css +++ b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_tables.css @@ -363,6 +363,53 @@ .diagnostics-mini-table-wrap + .diagnostics-mini-table-wrap { margin-top: 10px; } + .diagnostics-muted { + color: var(--muted); + font-weight: 700; + } + .diagnostics-command-children { + text-align: left; + } + .diagnostics-command-children summary { + display: inline-flex; + align-items: center; + gap: 6px; + min-height: 24px; + color: var(--ink); + font-weight: 760; + cursor: pointer; + } + .diagnostics-command-children summary:focus-visible { + outline: 2px solid var(--blue); + outline-offset: 2px; + border-radius: 4px; + } + .diagnostics-command-children ul { + display: grid; + gap: 4px; + margin: 6px 0 0; + padding: 0; + list-style: none; + } + .diagnostics-command-children li { + display: flex; + align-items: baseline; + justify-content: space-between; + gap: 12px; + color: var(--muted); + font-size: 12px; + line-height: 1.35; + } + .diagnostics-command-children li span { + min-width: 0; + overflow-wrap: anywhere; + text-align: left; + } + .diagnostics-command-children li b { + flex: 0 0 auto; + color: var(--ink); + font-variant-numeric: tabular-nums; + } .diagnostics-facts-table { min-width: 1320px; } diff --git a/tests/test_dashboard_diagnostics_snapshots.py b/tests/test_dashboard_diagnostics_snapshots.py new file mode 100644 index 0000000..24f5b51 --- /dev/null +++ b/tests/test_dashboard_diagnostics_snapshots.py @@ -0,0 +1,160 @@ +from __future__ import annotations + +import json +import shutil +import subprocess +from pathlib import Path + +import pytest + + +def _run_snapshot_renderer_script(script: str) -> dict[str, object]: + node = shutil.which("node") + if node is None: + pytest.skip("node is required for dashboard diagnostic snapshot renderer tests") + repo_root = Path(__file__).resolve().parents[1] + script_path = ( + repo_root + / "src" + / "codex_usage_tracker" + / "plugin_data" + / "dashboard" + / "dashboard_diagnostics_snapshots.js" + ) + wrapped = f""" +const fs = require('fs'); +const vm = require('vm'); +const code = fs.readFileSync({json.dumps(str(script_path))}, 'utf8'); +const context = {{ + window: {{}}, + console, +}}; +vm.createContext(context); +vm.runInContext(code, context); +const factory = context.window.CodexUsageDashboardDiagnosticSnapshots; +function escapeHtml(value) {{ + return String(value) + .replace(/&/g, '&') + .replace(//g, '>') + .replace(/"/g, '"') + .replace(/'/g, '''); +}} +{script} +""" + result = subprocess.run( + [node, "-e", wrapped], + check=True, + capture_output=True, + text=True, + ) + return json.loads(result.stdout) + + +def test_dashboard_commands_snapshot_renders_collapsible_children() -> None: + payload = _run_snapshot_renderer_script( + """ +const renderer = factory.create({ + escapeHtml, + formatTimestamp: value => value, + number: new Intl.NumberFormat('en-US'), + pct: value => `${value}%`, + renderState: message => `
${escapeHtml(message)}
`, + rowInvestigatorLink: () => 'call', + tokenText: value => new Intl.NumberFormat('en-US').format(Number(value || 0)), +}); +const html = renderer.renderPanels({ + loading: false, + payloads: { + commands: { + status: 'ready', + refreshed: false, + snapshot: { + computed_at: '2026-06-20T00:00:00Z', + history_scope: 'active', + source_logs_scanned: 1, + }, + commands: [ + { + root: 'git', + total: 3, + children: [ + { child: 'status', count: 2 }, + { child: 'diff', count: 1 }, + ], + }, + ], + }, + }, +}); +console.log(JSON.stringify({ + hasDetails: html.includes('
'), + hasSummary: html.includes('2 children'), + hasFirstChild: html.includes('status') && html.includes('2'), + hasSecondChild: html.includes('diff') && html.includes('1'), + hasTopChildColumn: html.includes('Top child'), +})); +""" + ) + + assert payload["hasDetails"] is True + assert payload["hasSummary"] is True + assert payload["hasFirstChild"] is True + assert payload["hasSecondChild"] is True + assert payload["hasTopChildColumn"] is False + + +def test_dashboard_concentration_snapshot_renders_reader_facing_labels() -> None: + payload = _run_snapshot_renderer_script( + """ +const renderer = factory.create({ + escapeHtml, + formatTimestamp: value => value, + number: new Intl.NumberFormat('en-US'), + pct: value => `${Math.round(Number(value || 0) * 100)}%`, + renderState: message => `
${escapeHtml(message)}
`, + rowInvestigatorLink: () => '1,000', + tokenText: value => new Intl.NumberFormat('en-US').format(Number(value || 0)), +}); +const html = renderer.renderPanels({ + loading: false, + payloads: { + concentration: { + status: 'ready', + refreshed: false, + snapshot: { + computed_at: '2026-06-20T00:00:00Z', + history_scope: 'active', + source_logs_scanned: 1, + }, + metrics: [ + { metric: 'top_1_source_log_share', dimension: 'source_log', top_n: 1, share: 0.5 }, + { metric: 'top_3_cwd_share', dimension: 'cwd', top_n: 3, share: 0.9 }, + ], + largest_impact_rows: [ + { + dimension: 'source_log', + label: 'session:019e37d3', + share: 0.5, + largest_record_id: 'r1', + largest_call_tokens: 1000, + }, + ], + }, + }, +}); +console.log(JSON.stringify({ + hasSourceMetricLabel: html.includes('Top 1 source/session share'), + hasProjectMetricLabel: html.includes('Top 3 project/cwd share'), + hasDimensionLabel: html.includes('Source/session'), + hasSafeSourceLabel: html.includes('session:019e37d3'), + leaksMetricId: html.includes('top_1_source_log_share'), +})); +""" + ) + + assert payload["hasSourceMetricLabel"] is True + assert payload["hasProjectMetricLabel"] is True + assert payload["hasDimensionLabel"] is True + assert payload["hasSafeSourceLabel"] is True + assert payload["leaksMetricId"] is False From 9361f32fe438bac2bb956553c08cc7f527698011 Mon Sep 17 00:00:00 2001 From: Monsky Date: Sat, 20 Jun 2026 21:07:35 -0400 Subject: [PATCH 10/10] fix: clarify command child expansion --- .../dashboard_diagnostics_snapshots.js | 6 +++- .../dashboard/dashboard_tables.css | 34 +++++++++++++++++++ tests/test_dashboard_diagnostics_snapshots.py | 8 +++-- 3 files changed, 45 insertions(+), 3 deletions(-) diff --git a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics_snapshots.js b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics_snapshots.js index 213c9f0..f6f99e3 100644 --- a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics_snapshots.js +++ b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics_snapshots.js @@ -133,7 +133,11 @@ const label = `${tokenText(childCount)} ${childCount === 1 ? 'child' : 'children'}`; return `
- ${escapeHtml(label)} + + + ${escapeHtml(`Show all ${label}`)} + ${escapeHtml(`Hide ${label}`)} +
    ${rows.map(child => `
  • diff --git a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_tables.css b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_tables.css index c62955d..fdc4a10 100644 --- a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_tables.css +++ b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_tables.css @@ -379,11 +379,45 @@ font-weight: 760; cursor: pointer; } + .diagnostics-command-children summary::marker { + content: ""; + } + .diagnostics-command-children summary::-webkit-details-marker { + display: none; + } .diagnostics-command-children summary:focus-visible { outline: 2px solid var(--blue); outline-offset: 2px; border-radius: 4px; } + .diagnostics-command-toggle-icon { + display: inline-grid; + width: 18px; + height: 18px; + place-items: center; + border: 1px solid var(--line); + border-radius: 4px; + background: #ffffff; + color: var(--blue); + font-size: 13px; + font-weight: 850; + line-height: 1; + } + .diagnostics-command-toggle-icon::before { + content: "+"; + } + .diagnostics-command-children[open] .diagnostics-command-toggle-icon::before { + content: "-"; + } + .diagnostics-command-toggle-open { + display: none; + } + .diagnostics-command-children[open] .diagnostics-command-toggle-closed { + display: none; + } + .diagnostics-command-children[open] .diagnostics-command-toggle-open { + display: inline; + } .diagnostics-command-children ul { display: grid; gap: 4px; diff --git a/tests/test_dashboard_diagnostics_snapshots.py b/tests/test_dashboard_diagnostics_snapshots.py index 24f5b51..b25a74a 100644 --- a/tests/test_dashboard_diagnostics_snapshots.py +++ b/tests/test_dashboard_diagnostics_snapshots.py @@ -89,7 +89,9 @@ def test_dashboard_commands_snapshot_renders_collapsible_children() -> None: }); console.log(JSON.stringify({ hasDetails: html.includes('
    '), - hasSummary: html.includes('2 children'), + hasShowSummary: html.includes('Show all 2 children'), + hasHideSummary: html.includes('Hide 2 children'), + hasToggleIcon: html.includes('diagnostics-command-toggle-icon'), hasFirstChild: html.includes('status') && html.includes('2'), hasSecondChild: html.includes('diff') && html.includes('1'), hasTopChildColumn: html.includes('Top child'), @@ -98,7 +100,9 @@ def test_dashboard_commands_snapshot_renders_collapsible_children() -> None: ) assert payload["hasDetails"] is True - assert payload["hasSummary"] is True + assert payload["hasShowSummary"] is True + assert payload["hasHideSummary"] is True + assert payload["hasToggleIcon"] is True assert payload["hasFirstChild"] is True assert payload["hasSecondChild"] is True assert payload["hasTopChildColumn"] is False