From e9fc1eb56a3ccef23c87037ff7ff85cd89288b64 Mon Sep 17 00:00:00 2001
From: Monsky <douglas.monsky@gmail.com>
Date: Sat, 20 Jun 2026 18:42:53 -0400
Subject: [PATCH 01/10] feat: add diagnostic snapshot foundation

---
 docs/cli-json-schemas.md                      |  44 +++
 src/codex_usage_tracker/cli.py                |   8 +
 src/codex_usage_tracker/cli_parser.py         |  12 +
 .../diagnostic_snapshots.py                   | 274 ++++++++++++++++++
 src/codex_usage_tracker/json_contracts.py     |  11 +
 src/codex_usage_tracker/server.py             |  61 ++++
 src/codex_usage_tracker/store.py              |  91 ++++++
 src/codex_usage_tracker/store_schema.py       |  29 +-
 tests/store_dashboard_helpers.py              |  18 +-
 tests/test_cli_lifecycle.py                   |  36 ++-
 tests/test_dashboard_server.py                |  39 +++
 tests/test_diagnostic_snapshots.py            |  96 ++++++
 tests/test_store_dashboard_mcp.py             |  12 +-
 tests/test_store_migrations.py                |   9 +-
 14 files changed, 725 insertions(+), 15 deletions(-)
 create mode 100644 src/codex_usage_tracker/diagnostic_snapshots.py
 create mode 100644 tests/test_diagnostic_snapshots.py

diff --git a/docs/cli-json-schemas.md b/docs/cli-json-schemas.md
index 84c6ed3..5ec7397 100644
--- a/docs/cli-json-schemas.md
+++ b/docs/cli-json-schemas.md
@@ -47,6 +47,7 @@ Tracked schema ids:
 | `codex-usage-tracker-query-v1` | CLI `query`, MCP `usage_query(...)` |
 | `codex-usage-tracker-recommendations-v1` | CLI `recommendations --json`, MCP `usage_recommendations(response_format="json")` |
 | `codex-usage-tracker-diagnostics-v1` | CLI `diagnostics ... --json`, dashboard server `/api/diagnostics/*` |
+| `codex-usage-tracker-diagnostic-overview-v1` | CLI `diagnostics overview --json`, dashboard server `/api/diagnostics/overview` |
 | `codex-usage-tracker-session-v1` | CLI `session --json`, MCP `session_usage(response_format="json")` |
 | `codex-usage-tracker-context-v1` | CLI `context`, MCP `usage_call_context` when raw context is explicitly enabled |
 | `codex-usage-tracker-context-disabled-v1` | MCP `usage_call_context` when raw context is disabled |
@@ -281,6 +282,49 @@ Schema: `codex-usage-tracker-diagnostics-v1`
 
 Diagnostics payloads report aggregate structured facts such as compaction, tool/function/MCP activity, command families, structured skill labels, search/read loops, and outcome events. They do not include prompts, assistant messages, tool arguments, tool output, patch text, raw commands, command arguments, file contents, or JSONL fragments. Token totals are associated with facts observed before a token-count row; they are not causal allocations.
 
+## Diagnostic Overview Snapshot
+
+Commands:
+
+```bash
+codex-usage-tracker diagnostics overview --json
+codex-usage-tracker diagnostics overview --refresh --json
+```
+
+Dashboard server API:
+
+- `GET /api/diagnostics/overview`
+- `POST /api/diagnostics/overview/refresh`
+
+Schema: `codex-usage-tracker-diagnostic-overview-v1`
+
+```json
+{
+  "schema": "codex-usage-tracker-diagnostic-overview-v1",
+  "section": "overview",
+  "status": "ready",
+  "refreshed": false,
+  "raw_context_included": false,
+  "snapshot": {
+    "computed_at": "2026-06-20T18:00:00+00:00",
+    "history_scope": "active",
+    "source_logs_scanned": 3,
+    "usage_rows_scanned": 10,
+    "raw_content_included": false
+  },
+  "overview": {
+    "usage_rows": 10,
+    "total_tokens": 12345,
+    "cached_input_tokens": 9000,
+    "uncached_input_tokens": 2000,
+    "cache_ratio": 0.75
+  },
+  "notes": []
+}
+```
+
+The overview snapshot is recomputed only when explicitly refreshed. Ordinary dashboard usage refreshes do not update diagnostic snapshots.
+
 ## Pricing Coverage
 
 Command:
diff --git a/src/codex_usage_tracker/cli.py b/src/codex_usage_tracker/cli.py
index 96b7465..5608c0f 100644
--- a/src/codex_usage_tracker/cli.py
+++ b/src/codex_usage_tracker/cli.py
@@ -29,6 +29,7 @@
     build_diagnostics_facts_report,
     build_diagnostics_summary_report,
 )
+from codex_usage_tracker.diagnostic_snapshots import build_diagnostic_overview_report
 from codex_usage_tracker.diagnostics import run_doctor
 from codex_usage_tracker.formatting import (
     format_doctor,
@@ -394,6 +395,7 @@ def _run_recommendations(args: argparse.Namespace) -> int:
 
 def _run_diagnostics(args: argparse.Namespace) -> int:
     command = args.diagnostics_command
+    report: Any
     if command == "summary":
         report = build_diagnostics_summary_report(
             db_path=args.db,
@@ -448,6 +450,12 @@ def _run_diagnostics(args: argparse.Namespace) -> int:
             direction=args.direction,
             privacy_mode=args.privacy_mode,
         )
+    elif command == "overview":
+        report = build_diagnostic_overview_report(
+            db_path=args.db,
+            include_archived=args.include_archived,
+            refresh=args.refresh,
+        )
     else:
         raise ValueError(f"unknown diagnostics command: {command}")
 
diff --git a/src/codex_usage_tracker/cli_parser.py b/src/codex_usage_tracker/cli_parser.py
index 193fe7a..d16b8b9 100644
--- a/src/codex_usage_tracker/cli_parser.py
+++ b/src/codex_usage_tracker/cli_parser.py
@@ -327,6 +327,18 @@ def _add_diagnostics_parser(
     _add_diagnostics_base_filters(tools)
     _add_diagnostics_fact_sort(tools, default_limit=50)
 
+    overview = diagnostic_subparsers.add_parser(
+        "overview",
+        help="Show the on-demand aggregate diagnostic overview snapshot",
+    )
+    overview.add_argument("--include-archived", action="store_true")
+    overview.add_argument(
+        "--refresh",
+        action="store_true",
+        help="Recompute and persist the overview snapshot before reading it.",
+    )
+    overview.add_argument("--json", action="store_true", dest="as_json")
+
     fact_calls = diagnostic_subparsers.add_parser(
         "fact-calls",
         help="List calls associated with one diagnostic fact",
diff --git a/src/codex_usage_tracker/diagnostic_snapshots.py b/src/codex_usage_tracker/diagnostic_snapshots.py
new file mode 100644
index 0000000..9fe28c2
--- /dev/null
+++ b/src/codex_usage_tracker/diagnostic_snapshots.py
@@ -0,0 +1,274 @@
+"""On-demand aggregate diagnostic report snapshots."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any
+
+from codex_usage_tracker.paths import DEFAULT_DB_PATH
+from codex_usage_tracker.store import (
+    connect,
+    query_diagnostic_snapshot,
+    upsert_diagnostic_snapshot,
+)
+from codex_usage_tracker.store_schema import init_db
+
+DIAGNOSTIC_OVERVIEW_SCHEMA = "codex-usage-tracker-diagnostic-overview-v1"
+DIAGNOSTIC_OVERVIEW_SECTION = "overview"
+DIAGNOSTIC_HISTORY_ACTIVE = "active"
+DIAGNOSTIC_HISTORY_ALL = "all"
+DIAGNOSTIC_OVERVIEW_NOTES = [
+    "Diagnostic snapshots are recomputed only by explicit diagnostic refresh.",
+    "Overview totals use persisted aggregate usage rows and do not include raw context.",
+]
+
+
+@dataclass(frozen=True)
+class DiagnosticSnapshotReport:
+    """Resolved diagnostic snapshot payload for CLI and API surfaces."""
+
+    payload: dict[str, Any]
+
+    def render(self) -> str:
+        if self.payload.get("status") != "ready":
+            return "No diagnostic overview snapshot. Run diagnostics overview --refresh first."
+        snapshot = self.payload.get("snapshot") or {}
+        overview = self.payload.get("overview") or {}
+        return "\n".join(
+            [
+                "Diagnostic overview snapshot",
+                f"Computed: {snapshot.get('computed_at')}",
+                f"History scope: {snapshot.get('history_scope')}",
+                f"Usage rows: {_int_text(overview.get('usage_rows'))}",
+                f"Total tokens: {_int_text(overview.get('total_tokens'))}",
+                f"Cached input: {_int_text(overview.get('cached_input_tokens'))}",
+                f"Uncached input: {_int_text(overview.get('uncached_input_tokens'))}",
+                f"Cache ratio: {_pct_text(overview.get('cache_ratio'))}",
+            ]
+        )
+
+
+def build_diagnostic_overview_report(
+    *,
+    db_path: Path = DEFAULT_DB_PATH,
+    include_archived: bool = False,
+    refresh: bool = False,
+) -> DiagnosticSnapshotReport:
+    """Return the latest overview snapshot, optionally recomputing it first."""
+
+    if refresh:
+        return DiagnosticSnapshotReport(
+            refresh_diagnostic_overview_snapshot(
+                db_path=db_path,
+                include_archived=include_archived,
+            )
+        )
+    return DiagnosticSnapshotReport(
+        diagnostic_overview_payload(
+            db_path=db_path,
+            include_archived=include_archived,
+        )
+    )
+
+
+def refresh_diagnostic_overview_snapshot(
+    *,
+    db_path: Path = DEFAULT_DB_PATH,
+    include_archived: bool = False,
+) -> dict[str, Any]:
+    """Recompute and persist the aggregate overview diagnostic snapshot."""
+
+    history_scope = _history_scope(include_archived)
+    computed_at = _utc_now()
+    overview, source_logs_scanned = _compute_overview(
+        db_path=db_path,
+        include_archived=include_archived,
+    )
+    snapshot = _snapshot_metadata(
+        computed_at=computed_at,
+        history_scope=history_scope,
+        source_logs_scanned=source_logs_scanned,
+        usage_rows_scanned=int(overview["usage_rows"]),
+    )
+    payload = _ready_payload(snapshot=snapshot, overview=overview, refreshed=True)
+    upsert_diagnostic_snapshot(
+        db_path=db_path,
+        section=DIAGNOSTIC_OVERVIEW_SECTION,
+        history_scope=history_scope,
+        payload=payload,
+        computed_at=computed_at,
+        source_logs_scanned=source_logs_scanned,
+        usage_rows_scanned=int(overview["usage_rows"]),
+        raw_content_included=False,
+    )
+    return payload
+
+
+def diagnostic_overview_payload(
+    *,
+    db_path: Path = DEFAULT_DB_PATH,
+    include_archived: bool = False,
+) -> dict[str, Any]:
+    """Return the latest persisted overview snapshot without recomputing it."""
+
+    history_scope = _history_scope(include_archived)
+    stored = query_diagnostic_snapshot(
+        db_path=db_path,
+        section=DIAGNOSTIC_OVERVIEW_SECTION,
+        history_scope=history_scope,
+    )
+    if stored is None:
+        return _missing_payload(history_scope=history_scope)
+    payload = dict(stored["payload"])
+    payload["status"] = "ready"
+    payload["refreshed"] = False
+    payload["snapshot"] = _snapshot_metadata(
+        computed_at=str(stored["computed_at"]),
+        history_scope=str(stored["history_scope"]),
+        source_logs_scanned=int(stored["source_logs_scanned"]),
+        usage_rows_scanned=int(stored["usage_rows_scanned"]),
+    )
+    payload["raw_context_included"] = bool(stored["raw_content_included"])
+    return payload
+
+
+def _compute_overview(
+    *,
+    db_path: Path,
+    include_archived: bool,
+) -> tuple[dict[str, Any], int]:
+    usage_where = "" if include_archived else "WHERE is_archived = 0"
+    source_where = "" if include_archived else "WHERE is_archived = 0"
+    with connect(db_path) as conn:
+        init_db(conn)
+        usage_row = conn.execute(
+            f"""
+            SELECT
+                COUNT(*) AS usage_rows,
+                COUNT(DISTINCT session_id) AS session_count,
+                COUNT(DISTINCT thread_key) AS thread_count,
+                COUNT(DISTINCT model) AS model_count,
+                MIN(event_timestamp) AS first_event_timestamp,
+                MAX(event_timestamp) AS latest_event_timestamp,
+                coalesce(SUM(input_tokens), 0) AS input_tokens,
+                coalesce(SUM(cached_input_tokens), 0) AS cached_input_tokens,
+                coalesce(SUM(uncached_input_tokens), 0) AS uncached_input_tokens,
+                coalesce(SUM(output_tokens), 0) AS output_tokens,
+                coalesce(SUM(reasoning_output_tokens), 0) AS reasoning_output_tokens,
+                coalesce(SUM(total_tokens), 0) AS total_tokens,
+                AVG(cache_ratio) AS avg_cache_ratio
+            FROM usage_events
+            {usage_where}
+            """
+        ).fetchone()
+        facts_row = conn.execute(
+            f"""
+            SELECT COUNT(*) AS diagnostic_fact_rows
+            FROM call_diagnostic_facts AS facts
+            JOIN usage_events ON usage_events.record_id = facts.record_id
+            {usage_where}
+            """
+        ).fetchone()
+        source_row = conn.execute(
+            f"SELECT COUNT(*) AS source_logs_scanned FROM source_files {source_where}"
+        ).fetchone()
+    input_tokens = _int_value(usage_row["input_tokens"])
+    cached_input_tokens = _int_value(usage_row["cached_input_tokens"])
+    overview = {
+        "usage_rows": _int_value(usage_row["usage_rows"]),
+        "session_count": _int_value(usage_row["session_count"]),
+        "thread_count": _int_value(usage_row["thread_count"]),
+        "model_count": _int_value(usage_row["model_count"]),
+        "first_event_timestamp": usage_row["first_event_timestamp"],
+        "latest_event_timestamp": usage_row["latest_event_timestamp"],
+        "input_tokens": input_tokens,
+        "cached_input_tokens": cached_input_tokens,
+        "uncached_input_tokens": _int_value(usage_row["uncached_input_tokens"]),
+        "output_tokens": _int_value(usage_row["output_tokens"]),
+        "reasoning_output_tokens": _int_value(usage_row["reasoning_output_tokens"]),
+        "total_tokens": _int_value(usage_row["total_tokens"]),
+        "cache_ratio": cached_input_tokens / input_tokens if input_tokens else 0.0,
+        "avg_call_cache_ratio": float(usage_row["avg_cache_ratio"] or 0),
+        "diagnostic_fact_rows": _int_value(facts_row["diagnostic_fact_rows"]),
+    }
+    return overview, _int_value(source_row["source_logs_scanned"])
+
+
+def _ready_payload(
+    *,
+    snapshot: dict[str, Any],
+    overview: dict[str, Any],
+    refreshed: bool,
+) -> dict[str, Any]:
+    return {
+        "schema": DIAGNOSTIC_OVERVIEW_SCHEMA,
+        "section": DIAGNOSTIC_OVERVIEW_SECTION,
+        "status": "ready",
+        "refreshed": refreshed,
+        "raw_context_included": False,
+        "snapshot": snapshot,
+        "overview": overview,
+        "notes": list(DIAGNOSTIC_OVERVIEW_NOTES),
+    }
+
+
+def _missing_payload(*, history_scope: str) -> dict[str, Any]:
+    return {
+        "schema": DIAGNOSTIC_OVERVIEW_SCHEMA,
+        "section": DIAGNOSTIC_OVERVIEW_SECTION,
+        "status": "missing",
+        "refreshed": False,
+        "raw_context_included": False,
+        "snapshot": None,
+        "overview": None,
+        "history_scope": history_scope,
+        "notes": list(DIAGNOSTIC_OVERVIEW_NOTES),
+    }
+
+
+def _snapshot_metadata(
+    *,
+    computed_at: str,
+    history_scope: str,
+    source_logs_scanned: int,
+    usage_rows_scanned: int,
+) -> dict[str, Any]:
+    return {
+        "computed_at": computed_at,
+        "history_scope": history_scope,
+        "source_logs_scanned": int(source_logs_scanned),
+        "usage_rows_scanned": int(usage_rows_scanned),
+        "raw_content_included": False,
+    }
+
+
+def _history_scope(include_archived: bool) -> str:
+    return DIAGNOSTIC_HISTORY_ALL if include_archived else DIAGNOSTIC_HISTORY_ACTIVE
+
+
+def _utc_now() -> str:
+    return datetime.now(timezone.utc).replace(microsecond=0).isoformat()
+
+
+def _int_value(value: object) -> int:
+    if isinstance(value, int):
+        return value
+    if isinstance(value, float):
+        return int(value)
+    if isinstance(value, str) and value:
+        return int(value)
+    return 0
+
+
+def _int_text(value: object) -> str:
+    return f"{_int_value(value):,}"
+
+
+def _pct_text(value: object) -> str:
+    try:
+        ratio = float(value) if isinstance(value, int | float | str) and value != "" else 0.0
+    except (TypeError, ValueError):
+        ratio = 0.0
+    return f"{ratio:.1%}"
diff --git a/src/codex_usage_tracker/json_contracts.py b/src/codex_usage_tracker/json_contracts.py
index bddcc11..502b565 100644
--- a/src/codex_usage_tracker/json_contracts.py
+++ b/src/codex_usage_tracker/json_contracts.py
@@ -158,6 +158,17 @@
             }
         },
     },
+    "codex-usage-tracker-diagnostic-overview-v1": {
+        "required": {
+            "section": str,
+            "status": str,
+            "refreshed": bool,
+            "raw_context_included": bool,
+            "snapshot": (dict, NoneType),
+            "overview": (dict, NoneType),
+            "notes": list,
+        }
+    },
     "codex-usage-tracker-session-v1": {
         "required": {
             "requested_session_id": (str, NoneType),
diff --git a/src/codex_usage_tracker/server.py b/src/codex_usage_tracker/server.py
index 42e01ae..562c313 100644
--- a/src/codex_usage_tracker/server.py
+++ b/src/codex_usage_tracker/server.py
@@ -35,6 +35,7 @@
     build_diagnostics_facts_report,
     build_diagnostics_summary_report,
 )
+from codex_usage_tracker.diagnostic_snapshots import build_diagnostic_overview_report
 from codex_usage_tracker.i18n import normalize_language
 from codex_usage_tracker.paths import (
     DEFAULT_ALLOWANCE_PATH,
@@ -304,6 +305,9 @@ def do_GET(self) -> None:  # noqa: N802 - stdlib hook name
         if parsed.path == "/api/diagnostics/tools":
             self._handle_diagnostics_facts(parsed.query, fact_group="tools")
             return
+        if parsed.path == "/api/diagnostics/overview":
+            self._handle_diagnostics_overview(parsed.query)
+            return
         if parsed.path == "/api/usage":
             self._handle_usage(parsed.query)
             return
@@ -315,6 +319,16 @@ def do_GET(self) -> None:  # noqa: N802 - stdlib hook name
             return
         super().do_GET()
 
+    def do_POST(self) -> None:  # noqa: N802 - stdlib hook name
+        parsed = urlparse(self.path)
+        if not self._request_origin_allowed():
+            self._send_json(HTTPStatus.FORBIDDEN, {"error": "Request host or origin is not allowed"})
+            return
+        if parsed.path == "/api/diagnostics/overview/refresh":
+            self._handle_diagnostics_overview_refresh(parsed.query)
+            return
+        self._send_json(HTTPStatus.NOT_FOUND, {"error": "Unknown API endpoint"})
+
     def end_headers(self) -> None:
         if self._is_dashboard_html_request():
             self.send_header("Cache-Control", "no-store")
@@ -943,6 +957,53 @@ def _handle_diagnostics_fact_calls(self, query: str) -> None:
             return
         self._send_json(HTTPStatus.OK, payload)
 
+    def _handle_diagnostics_overview(self, query: str) -> None:
+        params = parse_qs(query)
+        include_archived = _parse_bool(
+            _first(params.get("include_archived")),
+            self._include_archived,
+        )
+        try:
+            payload = build_diagnostic_overview_report(
+                db_path=self._db_path,
+                include_archived=include_archived,
+                refresh=False,
+            ).payload
+        except sqlite3.Error as exc:
+            self._send_json(
+                HTTPStatus.INTERNAL_SERVER_ERROR,
+                {"error": f"Database error while reading diagnostic overview: {exc}"},
+            )
+            return
+        self._send_json(HTTPStatus.OK, payload)
+
+    def _handle_diagnostics_overview_refresh(self, query: str) -> None:
+        params = parse_qs(query)
+        if not self._has_valid_api_token(params):
+            self._send_json(
+                HTTPStatus.FORBIDDEN,
+                {"error": "Valid API token is required for diagnostic refresh"},
+            )
+            return
+        include_archived = _parse_bool(
+            _first(params.get("include_archived")),
+            self._include_archived,
+        )
+        try:
+            with self._refresh_lock:
+                payload = build_diagnostic_overview_report(
+                    db_path=self._db_path,
+                    include_archived=include_archived,
+                    refresh=True,
+                ).payload
+        except sqlite3.Error as exc:
+            self._send_json(
+                HTTPStatus.INTERNAL_SERVER_ERROR,
+                {"error": f"Database error while refreshing diagnostic overview: {exc}"},
+            )
+            return
+        self._send_json(HTTPStatus.OK, payload)
+
     def _live_query_params(
         self,
         params: dict[str, list[str]],
diff --git a/src/codex_usage_tracker/store.py b/src/codex_usage_tracker/store.py
index 0af764a..13b894c 100644
--- a/src/codex_usage_tracker/store.py
+++ b/src/codex_usage_tracker/store.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import csv
+import json
 import sqlite3
 from collections.abc import Iterable, Iterator
 from contextlib import contextmanager, suppress
@@ -125,6 +126,7 @@ def rebuild_usage_index(
     with connect(db_path) as conn:
         init_db(conn)
         conn.execute("DELETE FROM call_diagnostic_facts")
+        conn.execute("DELETE FROM diagnostic_snapshots")
         conn.execute("DELETE FROM usage_events")
         conn.execute("DELETE FROM thread_summaries")
         conn.execute("DELETE FROM source_files")
@@ -144,6 +146,7 @@ def reset_usage_database(db_path: Path = DEFAULT_DB_PATH) -> dict[str, Any]:
         row = conn.execute("SELECT COUNT(*) AS count FROM usage_events").fetchone()
         deleted_rows = int(row["count"] if row is not None else 0)
         conn.execute("DELETE FROM call_diagnostic_facts")
+        conn.execute("DELETE FROM diagnostic_snapshots")
         conn.execute("DELETE FROM usage_events")
         conn.execute("DELETE FROM thread_summaries")
         conn.execute("DELETE FROM source_files")
@@ -222,6 +225,94 @@ def refresh_metadata(db_path: Path = DEFAULT_DB_PATH) -> dict[str, str]:
     return {str(row["key"]): str(row["value"]) for row in rows}
 
 
+def upsert_diagnostic_snapshot(
+    db_path: Path = DEFAULT_DB_PATH,
+    *,
+    section: str,
+    history_scope: str,
+    payload: dict[str, Any],
+    computed_at: str,
+    source_logs_scanned: int,
+    usage_rows_scanned: int,
+    raw_content_included: bool = False,
+) -> None:
+    """Persist one aggregate diagnostic report snapshot."""
+
+    payload_json = json.dumps(payload, sort_keys=True, separators=(",", ":"))
+    with connect(db_path) as conn:
+        init_db(conn)
+        conn.execute(
+            """
+            INSERT INTO diagnostic_snapshots (
+                section,
+                history_scope,
+                payload_json,
+                computed_at,
+                source_logs_scanned,
+                usage_rows_scanned,
+                raw_content_included
+            )
+            VALUES (?, ?, ?, ?, ?, ?, ?)
+            ON CONFLICT(section, history_scope) DO UPDATE SET
+                payload_json = excluded.payload_json,
+                computed_at = excluded.computed_at,
+                source_logs_scanned = excluded.source_logs_scanned,
+                usage_rows_scanned = excluded.usage_rows_scanned,
+                raw_content_included = excluded.raw_content_included
+            """,
+            (
+                section,
+                history_scope,
+                payload_json,
+                computed_at,
+                int(source_logs_scanned),
+                int(usage_rows_scanned),
+                1 if raw_content_included else 0,
+            ),
+        )
+
+
+def query_diagnostic_snapshot(
+    db_path: Path = DEFAULT_DB_PATH,
+    *,
+    section: str,
+    history_scope: str,
+) -> dict[str, Any] | None:
+    """Return one persisted aggregate diagnostic report snapshot."""
+
+    if not db_path.exists():
+        return None
+    with connect(db_path) as conn:
+        init_db(conn)
+        row = conn.execute(
+            """
+            SELECT
+                section,
+                history_scope,
+                payload_json,
+                computed_at,
+                source_logs_scanned,
+                usage_rows_scanned,
+                raw_content_included
+            FROM diagnostic_snapshots
+            WHERE section = ? AND history_scope = ?
+            """,
+            (section, history_scope),
+        ).fetchone()
+    if row is None:
+        return None
+    payload = json.loads(str(row["payload_json"]))
+    return {
+        "section": str(row["section"]),
+        "history_scope": str(row["history_scope"]),
+        "payload": payload if isinstance(payload, dict) else {},
+        "computed_at": str(row["computed_at"]),
+        "source_logs_scanned": int(row["source_logs_scanned"]),
+        "usage_rows_scanned": int(row["usage_rows_scanned"]),
+        "raw_content_included": bool(row["raw_content_included"]),
+    }
+
+
 def schema_state(db_path: Path = DEFAULT_DB_PATH) -> dict[str, Any]:
     """Return database migration and usage_events checksum state."""
 
diff --git a/src/codex_usage_tracker/store_schema.py b/src/codex_usage_tracker/store_schema.py
index c5e973c..da85a5e 100644
--- a/src/codex_usage_tracker/store_schema.py
+++ b/src/codex_usage_tracker/store_schema.py
@@ -12,7 +12,7 @@
     USAGE_EVENT_SCHEMA_CHECKSUM,
 )
 
-SCHEMA_VERSION = 9
+SCHEMA_VERSION = 10
 MIGRATION_NAMES = {
     1: "create usage_events aggregate fact table",
     2: "track schema migration checksum metadata",
@@ -23,6 +23,7 @@
     7: "persist source file parser cursors",
     8: "persist observed Codex usage snapshots",
     9: "persist aggregate diagnostic facts",
+    10: "persist on-demand diagnostic report snapshots",
 }
 CALL_ORIGIN_REPAIR_COLUMNS = {
     "call_initiator": "TEXT",
@@ -102,6 +103,12 @@ def init_db(conn: sqlite3.Connection) -> None:
     else:
         _migrate_v9(conn)
         _record_migration_if_missing(conn, 9)
+    if user_version < 10:
+        _migrate_v10(conn)
+        _record_migration(conn, 10)
+    else:
+        _migrate_v10(conn)
+        _record_migration_if_missing(conn, 10)
     _validate_usage_events_schema(conn)
     conn.execute(f"PRAGMA user_version = {SCHEMA_VERSION}")
 
@@ -280,6 +287,26 @@ def _migrate_v9(conn: sqlite3.Connection) -> None:
     )
 
 
+def _migrate_v10(conn: sqlite3.Connection) -> None:
+    conn.executescript(
+        """
+        CREATE TABLE IF NOT EXISTS diagnostic_snapshots (
+            section TEXT NOT NULL,
+            history_scope TEXT NOT NULL,
+            payload_json TEXT NOT NULL,
+            computed_at TEXT NOT NULL,
+            source_logs_scanned INTEGER NOT NULL DEFAULT 0,
+            usage_rows_scanned INTEGER NOT NULL DEFAULT 0,
+            raw_content_included INTEGER NOT NULL DEFAULT 0,
+            PRIMARY KEY (section, history_scope)
+        );
+
+        CREATE INDEX IF NOT EXISTS idx_diagnostic_snapshots_computed_at
+            ON diagnostic_snapshots(computed_at);
+        """
+    )
+
+
 def _record_migration(conn: sqlite3.Connection, version: int) -> None:
     conn.execute(
         """
diff --git a/tests/store_dashboard_helpers.py b/tests/store_dashboard_helpers.py
index 92806ab..4fcd2eb 100644
--- a/tests/store_dashboard_helpers.py
+++ b/tests/store_dashboard_helpers.py
@@ -349,14 +349,24 @@ def _assert_contract(payload: object) -> None:
     assert validate_json_payload_contract(payload) == []
 
 
-def _read_json(url: str, headers: dict[str, str] | None = None) -> dict[str, object]:
-    request = urllib.request.Request(url, headers=headers or {})
+def _read_json(
+    url: str,
+    headers: dict[str, str] | None = None,
+    data: bytes | None = None,
+    method: str | None = None,
+) -> dict[str, object]:
+    request = urllib.request.Request(url, data=data, headers=headers or {}, method=method)
     with urllib.request.urlopen(request, timeout=5) as response:  # noqa: S310 - local test server only
         return json.loads(response.read().decode("utf-8"))
 
 
-def _http_error_json(url: str, headers: dict[str, str] | None = None) -> dict[str, object]:
-    request = urllib.request.Request(url, headers=headers or {})
+def _http_error_json(
+    url: str,
+    headers: dict[str, str] | None = None,
+    data: bytes | None = None,
+    method: str | None = None,
+) -> dict[str, object]:
+    request = urllib.request.Request(url, data=data, headers=headers or {}, method=method)
     try:
         urllib.request.urlopen(request, timeout=5)  # noqa: S310 - local test server only
     except urllib.error.HTTPError as exc:
diff --git a/tests/test_cli_lifecycle.py b/tests/test_cli_lifecycle.py
index 7a02eab..be5a7ee 100644
--- a/tests/test_cli_lifecycle.py
+++ b/tests/test_cli_lifecycle.py
@@ -383,6 +383,23 @@ def test_diagnostics_cli_returns_aggregate_json(tmp_path: Path) -> None:
         "tools",
         "--json",
     )
+    overview_missing = _run_cli(
+        tmp_path,
+        "--db",
+        str(db_path),
+        "diagnostics",
+        "overview",
+        "--json",
+    )
+    overview_refresh = _run_cli(
+        tmp_path,
+        "--db",
+        str(db_path),
+        "diagnostics",
+        "overview",
+        "--refresh",
+        "--json",
+    )
     fact_calls = _run_cli(
         tmp_path,
         "--db",
@@ -403,17 +420,28 @@ def test_diagnostics_cli_returns_aggregate_json(tmp_path: Path) -> None:
     facts_payload = json.loads(facts.stdout)
     compactions_payload = json.loads(compactions.stdout)
     tools_payload = json.loads(tools.stdout)
+    overview_missing_payload = json.loads(overview_missing.stdout)
+    overview_refresh_payload = json.loads(overview_refresh.stdout)
     fact_calls_payload = json.loads(fact_calls.stdout)
     for payload in (
         summary_payload,
         facts_payload,
         compactions_payload,
         tools_payload,
+        overview_missing_payload,
+        overview_refresh_payload,
         fact_calls_payload,
     ):
         _assert_contract(payload)
-        assert payload["schema"] == "codex-usage-tracker-diagnostics-v1"
         assert payload["raw_context_included"] is False
+    for payload in (
+        summary_payload,
+        facts_payload,
+        compactions_payload,
+        tools_payload,
+        fact_calls_payload,
+    ):
+        assert payload["schema"] == "codex-usage-tracker-diagnostics-v1"
         assert "Associated token totals are not additive" in payload["notes"][0]
 
     fact_names = {row["fact_name"] for row in facts_payload["rows"]}
@@ -429,6 +457,12 @@ def test_diagnostics_cli_returns_aggregate_json(tmp_path: Path) -> None:
     assert tools_payload["filters"]["fact_type"] is None
     assert tools_payload["filters"]["fact_group"] == "tools"
     assert {row["fact_type"] for row in tools_payload["rows"]} == {"tool"}
+    assert overview_missing_payload["schema"] == "codex-usage-tracker-diagnostic-overview-v1"
+    assert overview_missing_payload["status"] == "missing"
+    assert overview_refresh_payload["schema"] == "codex-usage-tracker-diagnostic-overview-v1"
+    assert overview_refresh_payload["status"] == "ready"
+    assert overview_refresh_payload["overview"]["usage_rows"] == 2
+    assert overview_refresh_payload["refreshed"] is True
     assert fact_calls_payload["view"] == "fact-calls"
     assert fact_calls_payload["filters"]["privacy_mode"] == "strict"
     assert fact_calls_payload["rows"][0]["cwd"].startswith("[redacted cwd:")
diff --git a/tests/test_dashboard_server.py b/tests/test_dashboard_server.py
index a174f76..a432bb8 100644
--- a/tests/test_dashboard_server.py
+++ b/tests/test_dashboard_server.py
@@ -79,6 +79,35 @@ def test_dashboard_server_usage_api_refreshes_aggregate_rows(tmp_path: Path) ->
             content_security_policy = response.headers.get("Content-Security-Policy")
             referrer_policy = response.headers.get("Referrer-Policy")
             limited_payload = json.loads(response.read().decode("utf-8"))
+        diagnostic_overview_after_usage_refresh = _read_json(
+            f"http://127.0.0.1:{server.server_port}/api/diagnostics/overview"
+        )
+        diagnostic_refresh_without_token = _http_error_json(
+            f"http://127.0.0.1:{server.server_port}/api/diagnostics/overview/refresh",
+            data=b"",
+            method="POST",
+        )
+        diagnostic_refresh_payload = _read_json(
+            f"http://127.0.0.1:{server.server_port}/api/diagnostics/overview/refresh",
+            headers={"X-Codex-Usage-Token": "test-token"},
+            data=b"",
+            method="POST",
+        )
+        diagnostic_stored_payload = _read_json(
+            f"http://127.0.0.1:{server.server_port}/api/diagnostics/overview"
+        )
+        diagnostic_computed_at = diagnostic_stored_payload["snapshot"]["computed_at"]
+        with urllib.request.urlopen(  # noqa: S310 - local test server only
+            urllib.request.Request(
+                f"http://127.0.0.1:{server.server_port}/api/usage?refresh=1&limit=2",
+                headers={"X-Codex-Usage-Token": "test-token"},
+            ),
+            timeout=5,
+        ) as response:
+            second_usage_refresh_payload = json.loads(response.read().decode("utf-8"))
+        diagnostic_after_second_usage_refresh = _read_json(
+            f"http://127.0.0.1:{server.server_port}/api/diagnostics/overview"
+        )
         with urllib.request.urlopen(  # noqa: S310 - local test server only
             f"http://127.0.0.1:{server.server_port}/api/usage?limit=all",
             timeout=5,
@@ -104,6 +133,7 @@ def test_dashboard_server_usage_api_refreshes_aggregate_rows(tmp_path: Path) ->
         thread.join(timeout=5)
 
     assert refresh_without_token["status"] == 403
+    assert diagnostic_refresh_without_token["status"] == 403
     assert dashboard_cache_control == "no-store"
     shell_raw_payload = dashboard_html.split(
         '<script id="usage-data" type="application/json">',
@@ -117,6 +147,15 @@ def test_dashboard_server_usage_api_refreshes_aggregate_rows(tmp_path: Path) ->
     assert limited_payload["refresh_result"]["parsed_events"] == 4
     assert limited_payload["refresh_result"]["skipped_events"] == 0
     assert limited_payload["refresh_result"]["parser_diagnostics"] == {}
+    assert diagnostic_overview_after_usage_refresh["status"] == "missing"
+    assert diagnostic_refresh_payload["status"] == "ready"
+    assert diagnostic_refresh_payload["refreshed"] is True
+    assert diagnostic_refresh_payload["overview"]["usage_rows"] == 4
+    assert diagnostic_refresh_payload["overview"]["total_tokens"] == 400
+    assert diagnostic_stored_payload["status"] == "ready"
+    assert diagnostic_stored_payload["refreshed"] is False
+    assert second_usage_refresh_payload["refresh_result"]["parsed_events"] == 0
+    assert diagnostic_after_second_usage_refresh["snapshot"]["computed_at"] == diagnostic_computed_at
     assert len(limited_payload["rows"]) == 2
     assert limited_payload["loaded_row_count"] == 2
     assert limited_payload["total_available_rows"] == 4
diff --git a/tests/test_diagnostic_snapshots.py b/tests/test_diagnostic_snapshots.py
new file mode 100644
index 0000000..ccb4079
--- /dev/null
+++ b/tests/test_diagnostic_snapshots.py
@@ -0,0 +1,96 @@
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+from store_dashboard_helpers import _assert_contract, _make_codex_home
+
+from codex_usage_tracker.diagnostic_snapshots import (
+    DIAGNOSTIC_OVERVIEW_SECTION,
+    build_diagnostic_overview_report,
+)
+from codex_usage_tracker.store import (
+    query_diagnostic_snapshot,
+    refresh_usage_index,
+    upsert_diagnostic_snapshot,
+)
+
+
+def test_diagnostic_overview_snapshot_is_explicit_and_aggregate_only(
+    tmp_path: Path,
+) -> None:
+    codex_home = _make_codex_home(tmp_path)
+    db_path = tmp_path / "usage.sqlite3"
+
+    missing_before_refresh = build_diagnostic_overview_report(db_path=db_path).payload
+    refresh_usage_index(codex_home=codex_home, db_path=db_path)
+    missing_after_usage_refresh = build_diagnostic_overview_report(db_path=db_path).payload
+    refreshed = build_diagnostic_overview_report(db_path=db_path, refresh=True).payload
+    stored = build_diagnostic_overview_report(db_path=db_path).payload
+
+    _assert_contract(missing_before_refresh)
+    _assert_contract(missing_after_usage_refresh)
+    _assert_contract(refreshed)
+    _assert_contract(stored)
+    assert missing_before_refresh["status"] == "missing"
+    assert missing_after_usage_refresh["status"] == "missing"
+    assert refreshed["status"] == "ready"
+    assert refreshed["refreshed"] is True
+    assert stored["status"] == "ready"
+    assert stored["refreshed"] is False
+    assert refreshed["overview"]["usage_rows"] == 4
+    assert refreshed["overview"]["total_tokens"] == 400
+    assert refreshed["snapshot"]["history_scope"] == "active"
+    assert refreshed["snapshot"]["raw_content_included"] is False
+
+    serialized = json.dumps(refreshed, sort_keys=True)
+    assert "SECRET RAW PROMPT" not in serialized
+    assert "sk-proj" not in serialized
+    assert "/tmp/codex-usage-tracker" not in serialized
+    assert "AGENTS.md instructions" not in serialized
+
+
+def test_usage_refresh_does_not_recompute_diagnostic_overview_snapshot(
+    tmp_path: Path,
+) -> None:
+    codex_home = _make_codex_home(tmp_path)
+    db_path = tmp_path / "usage.sqlite3"
+    refresh_usage_index(codex_home=codex_home, db_path=db_path)
+    build_diagnostic_overview_report(db_path=db_path, refresh=True)
+
+    stale_payload = {
+        "schema": "codex-usage-tracker-diagnostic-overview-v1",
+        "section": DIAGNOSTIC_OVERVIEW_SECTION,
+        "status": "ready",
+        "refreshed": True,
+        "raw_context_included": False,
+        "snapshot": {
+            "computed_at": "2000-01-01T00:00:00+00:00",
+            "history_scope": "active",
+            "source_logs_scanned": 1,
+            "usage_rows_scanned": 1,
+            "raw_content_included": False,
+        },
+        "overview": {"usage_rows": 1, "total_tokens": 7},
+        "notes": [],
+    }
+    upsert_diagnostic_snapshot(
+        db_path=db_path,
+        section=DIAGNOSTIC_OVERVIEW_SECTION,
+        history_scope="active",
+        payload=stale_payload,
+        computed_at="2000-01-01T00:00:00+00:00",
+        source_logs_scanned=1,
+        usage_rows_scanned=1,
+    )
+
+    refresh_usage_index(codex_home=codex_home, db_path=db_path)
+    stored = query_diagnostic_snapshot(
+        db_path=db_path,
+        section=DIAGNOSTIC_OVERVIEW_SECTION,
+        history_scope="active",
+    )
+
+    assert stored is not None
+    assert stored["computed_at"] == "2000-01-01T00:00:00+00:00"
+    assert stored["payload"]["overview"]["total_tokens"] == 7
diff --git a/tests/test_store_dashboard_mcp.py b/tests/test_store_dashboard_mcp.py
index f0e66e8..f2ab90c 100644
--- a/tests/test_store_dashboard_mcp.py
+++ b/tests/test_store_dashboard_mcp.py
@@ -81,12 +81,12 @@ def test_refresh_is_idempotent_and_summary_works(tmp_path: Path) -> None:
     assert meta["parsed_source_files"] == "0"
     assert meta["skipped_source_files"] == "3"
     assert meta["parser_adapter"] == "codex-jsonl-v2"
-    assert meta["schema_version"] == "9"
+    assert meta["schema_version"] == "10"
     assert meta["parser_skipped_events"] == "0"
     state = schema_state(db_path)
-    assert state["schema_version"] == 9
+    assert state["schema_version"] == 10
     assert state["checksum_matches"] is True
-    assert [row["version"] for row in state["migrations"]] == [1, 2, 3, 4, 5, 6, 7, 8, 9]
+    assert [row["version"] for row in state["migrations"]] == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
     with connect(db_path) as conn:
         init_db(conn)
         source_rows = [
@@ -654,7 +654,7 @@ def test_connect_sets_sqlite_concurrency_pragmas(tmp_path: Path) -> None:
 
     assert busy_timeout == 5000
     assert str(journal_mode).lower() == "wal"
-    assert user_version == 9
+    assert user_version == 10
 
 
 def test_init_db_repairs_version_zero_schema(tmp_path: Path) -> None:
@@ -727,8 +727,8 @@ def test_init_db_repairs_version_zero_schema(tmp_path: Path) -> None:
     assert "rate_limit_plan_type" in columns
     assert "rate_limit_primary_used_percent" in columns
     assert "idx_usage_observed_rate_limit_timestamp" in indexes
-    assert user_version == 9
-    assert [row["version"] for row in migrations] == [1, 2, 3, 4, 5, 6, 7, 8, 9]
+    assert user_version == 10
+    assert [row["version"] for row in migrations] == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
 
 
 def test_latest_observed_usage_prefers_normal_codex_limit_pool(tmp_path: Path) -> None:
diff --git a/tests/test_store_migrations.py b/tests/test_store_migrations.py
index 4a0df29..fb3ae63 100644
--- a/tests/test_store_migrations.py
+++ b/tests/test_store_migrations.py
@@ -56,14 +56,17 @@ def test_init_db_migrates_legacy_aggregate_table_without_data_loss(tmp_path: Pat
     assert rows[0]["rate_limit_secondary_used_percent"] is None
     assert metadata["parsed_events"] == "legacy"
     assert metadata["parser_invalid_integer"] == "2"
-    assert state["schema_version"] == 9
+    assert state["schema_version"] == 10
     assert state["checksum_matches"] is True
-    assert [row["version"] for row in state["migrations"]] == [1, 2, 3, 4, 5, 6, 7, 8, 9]
+    assert [row["version"] for row in state["migrations"]] == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
     with connect(db_path) as conn:
         init_db(conn)
         facts = conn.execute("SELECT COUNT(*) AS count FROM call_diagnostic_facts").fetchone()
+        snapshots = conn.execute("SELECT COUNT(*) AS count FROM diagnostic_snapshots").fetchone()
     assert facts is not None
     assert facts["count"] == 0
+    assert snapshots is not None
+    assert snapshots["count"] == 0
 
 
 def test_refresh_is_idempotent_after_legacy_migration(tmp_path: Path) -> None:
@@ -86,7 +89,7 @@ def test_refresh_is_idempotent_after_legacy_migration(tmp_path: Path) -> None:
     assert second_count == 2
     assert legacy_rows[0]["record_id"] == "legacy-record"
     assert new_rows[0]["thread_name"] == "Synthetic migration thread"
-    assert metadata["schema_version"] == "9"
+    assert metadata["schema_version"] == "10"
     assert metadata["parsed_events"] == "0"
     assert metadata["inserted_or_updated_events"] == "0"
     assert metadata["parsed_source_files"] == "0"

From 86286e401ac6eccb800c1743eb769b24ffe6abd9 Mon Sep 17 00:00:00 2001
From: Monsky <douglas.monsky@gmail.com>
Date: Sat, 20 Jun 2026 18:49:05 -0400
Subject: [PATCH 02/10] feat: add diagnostic tool command reports

---
 docs/cli-json-schemas.md                      |  84 +++
 src/codex_usage_tracker/cli.py                |  18 +-
 src/codex_usage_tracker/cli_parser.py         |  24 +
 .../diagnostic_snapshots.py                   | 574 +++++++++++++++++-
 src/codex_usage_tracker/json_contracts.py     |  26 +
 src/codex_usage_tracker/server.py             | 104 +++-
 tests/test_cli_lifecycle.py                   |  60 +-
 tests/test_dashboard_server.py                |  34 ++
 tests/test_diagnostic_snapshots.py            | 138 ++++-
 9 files changed, 1019 insertions(+), 43 deletions(-)

diff --git a/docs/cli-json-schemas.md b/docs/cli-json-schemas.md
index 5ec7397..b64ba63 100644
--- a/docs/cli-json-schemas.md
+++ b/docs/cli-json-schemas.md
@@ -48,6 +48,8 @@ Tracked schema ids:
 | `codex-usage-tracker-recommendations-v1` | CLI `recommendations --json`, MCP `usage_recommendations(response_format="json")` |
 | `codex-usage-tracker-diagnostics-v1` | CLI `diagnostics ... --json`, dashboard server `/api/diagnostics/*` |
 | `codex-usage-tracker-diagnostic-overview-v1` | CLI `diagnostics overview --json`, dashboard server `/api/diagnostics/overview` |
+| `codex-usage-tracker-diagnostic-tool-output-v1` | CLI `diagnostics tool-output --json`, dashboard server `/api/diagnostics/tool-output` |
+| `codex-usage-tracker-diagnostic-commands-v1` | CLI `diagnostics commands --json`, dashboard server `/api/diagnostics/commands` |
 | `codex-usage-tracker-session-v1` | CLI `session --json`, MCP `session_usage(response_format="json")` |
 | `codex-usage-tracker-context-v1` | CLI `context`, MCP `usage_call_context` when raw context is explicitly enabled |
 | `codex-usage-tracker-context-disabled-v1` | MCP `usage_call_context` when raw context is disabled |
@@ -325,6 +327,88 @@ Schema: `codex-usage-tracker-diagnostic-overview-v1`
 
 The overview snapshot is recomputed only when explicitly refreshed. Ordinary dashboard usage refreshes do not update diagnostic snapshots.
 
+## Diagnostic Tool Output Snapshot
+
+Commands:
+
+```bash
+codex-usage-tracker diagnostics tool-output --json
+codex-usage-tracker diagnostics tool-output --refresh --json
+```
+
+Dashboard server API:
+
+- `GET /api/diagnostics/tool-output`
+- `POST /api/diagnostics/tool-output/refresh`
+
+Schema: `codex-usage-tracker-diagnostic-tool-output-v1`
+
+```json
+{
+  "schema": "codex-usage-tracker-diagnostic-tool-output-v1",
+  "section": "tool-output",
+  "status": "ready",
+  "refreshed": false,
+  "raw_context_included": false,
+  "snapshot": {},
+  "summary": {
+    "function_calls": 1,
+    "function_outputs": 1,
+    "outputs_with_original_token_count": 1,
+    "outputs_missing_original_token_count": 0,
+    "original_token_sum": 42
+  },
+  "functions": [],
+  "command_roots": [],
+  "missing_reasons": [],
+  "notes": []
+}
+```
+
+The tool-output snapshot stores function names, conservative command roots, numeric counts, and terminal `Original token count` totals. It does not store raw tool output or command text.
+
+## Diagnostic Commands Snapshot
+
+Commands:
+
+```bash
+codex-usage-tracker diagnostics commands --json
+codex-usage-tracker diagnostics commands --refresh --json
+```
+
+Dashboard server API:
+
+- `GET /api/diagnostics/commands`
+- `POST /api/diagnostics/commands/refresh`
+
+Schema: `codex-usage-tracker-diagnostic-commands-v1`
+
+```json
+{
+  "schema": "codex-usage-tracker-diagnostic-commands-v1",
+  "section": "commands",
+  "status": "ready",
+  "refreshed": false,
+  "raw_context_included": false,
+  "snapshot": {},
+  "summary": {
+    "shell_function_calls": 1,
+    "command_root_count": 1,
+    "missing_command": 0
+  },
+  "commands": [
+    {
+      "root": "git",
+      "total": 1,
+      "children": [{"child": "status", "count": 1}]
+    }
+  ],
+  "notes": []
+}
+```
+
+The commands snapshot keeps only command roots and safe one-level child labels such as `status`, `diff`, or `-m:pytest`.
+
 ## Pricing Coverage
 
 Command:
diff --git a/src/codex_usage_tracker/cli.py b/src/codex_usage_tracker/cli.py
index 5608c0f..788ca85 100644
--- a/src/codex_usage_tracker/cli.py
+++ b/src/codex_usage_tracker/cli.py
@@ -29,7 +29,11 @@
     build_diagnostics_facts_report,
     build_diagnostics_summary_report,
 )
-from codex_usage_tracker.diagnostic_snapshots import build_diagnostic_overview_report
+from codex_usage_tracker.diagnostic_snapshots import (
+    build_diagnostic_commands_report,
+    build_diagnostic_overview_report,
+    build_diagnostic_tool_output_report,
+)
 from codex_usage_tracker.diagnostics import run_doctor
 from codex_usage_tracker.formatting import (
     format_doctor,
@@ -456,6 +460,18 @@ def _run_diagnostics(args: argparse.Namespace) -> int:
             include_archived=args.include_archived,
             refresh=args.refresh,
         )
+    elif command == "tool-output":
+        report = build_diagnostic_tool_output_report(
+            db_path=args.db,
+            include_archived=args.include_archived,
+            refresh=args.refresh,
+        )
+    elif command == "commands":
+        report = build_diagnostic_commands_report(
+            db_path=args.db,
+            include_archived=args.include_archived,
+            refresh=args.refresh,
+        )
     else:
         raise ValueError(f"unknown diagnostics command: {command}")
 
diff --git a/src/codex_usage_tracker/cli_parser.py b/src/codex_usage_tracker/cli_parser.py
index d16b8b9..25d70cc 100644
--- a/src/codex_usage_tracker/cli_parser.py
+++ b/src/codex_usage_tracker/cli_parser.py
@@ -339,6 +339,30 @@ def _add_diagnostics_parser(
     )
     overview.add_argument("--json", action="store_true", dest="as_json")
 
+    tool_output = diagnostic_subparsers.add_parser(
+        "tool-output",
+        help="Show the on-demand aggregate tool-output snapshot",
+    )
+    tool_output.add_argument("--include-archived", action="store_true")
+    tool_output.add_argument(
+        "--refresh",
+        action="store_true",
+        help="Recompute and persist the tool-output snapshot before reading it.",
+    )
+    tool_output.add_argument("--json", action="store_true", dest="as_json")
+
+    commands = diagnostic_subparsers.add_parser(
+        "commands",
+        help="Show the on-demand aggregate command root snapshot",
+    )
+    commands.add_argument("--include-archived", action="store_true")
+    commands.add_argument(
+        "--refresh",
+        action="store_true",
+        help="Recompute and persist the command snapshot before reading it.",
+    )
+    commands.add_argument("--json", action="store_true", dest="as_json")
+
     fact_calls = diagnostic_subparsers.add_parser(
         "fact-calls",
         help="List calls associated with one diagnostic fact",
diff --git a/src/codex_usage_tracker/diagnostic_snapshots.py b/src/codex_usage_tracker/diagnostic_snapshots.py
index 9fe28c2..5ecfded 100644
--- a/src/codex_usage_tracker/diagnostic_snapshots.py
+++ b/src/codex_usage_tracker/diagnostic_snapshots.py
@@ -2,6 +2,10 @@
 
 from __future__ import annotations
 
+import json
+import re
+import shlex
+from collections import Counter
 from dataclasses import dataclass
 from datetime import datetime, timezone
 from pathlib import Path
@@ -16,13 +20,35 @@
 from codex_usage_tracker.store_schema import init_db
 
 DIAGNOSTIC_OVERVIEW_SCHEMA = "codex-usage-tracker-diagnostic-overview-v1"
+DIAGNOSTIC_TOOL_OUTPUT_SCHEMA = "codex-usage-tracker-diagnostic-tool-output-v1"
+DIAGNOSTIC_COMMANDS_SCHEMA = "codex-usage-tracker-diagnostic-commands-v1"
 DIAGNOSTIC_OVERVIEW_SECTION = "overview"
+DIAGNOSTIC_TOOL_OUTPUT_SECTION = "tool-output"
+DIAGNOSTIC_COMMANDS_SECTION = "commands"
 DIAGNOSTIC_HISTORY_ACTIVE = "active"
 DIAGNOSTIC_HISTORY_ALL = "all"
-DIAGNOSTIC_OVERVIEW_NOTES = [
+DIAGNOSTIC_SNAPSHOT_NOTES = [
     "Diagnostic snapshots are recomputed only by explicit diagnostic refresh.",
-    "Overview totals use persisted aggregate usage rows and do not include raw context.",
+    "Snapshot totals are aggregate-only and do not include raw context.",
 ]
+SAFE_LABEL_RE = re.compile(r"^[A-Za-z0-9_.:-]{1,80}$")
+SENSITIVE_LABEL_PREFIXES = ("sk-", "sk_", "ghp_", "github_pat_", "xox")
+SHELL_TOOL_NAMES = {
+    "bash",
+    "exec_command",
+    "functions.exec_command",
+    "run_command",
+    "shell",
+    "terminal",
+    "write_stdin",
+}
+ORIGINAL_OUTPUT_RE = re.compile(
+    r"^Chunk ID: (?P<chunk>[^\n]+)\n"
+    r"Wall time: (?P<wall>[^\n]+)\n"
+    r"(?:(?P<status>Process exited with code -?\d+|Process running with session ID \d+)\n)?"
+    r"Original token count: (?P<count>\d+)\n",
+    re.S,
+)
 
 
 @dataclass(frozen=True)
@@ -33,7 +59,16 @@ class DiagnosticSnapshotReport:
 
     def render(self) -> str:
         if self.payload.get("status") != "ready":
-            return "No diagnostic overview snapshot. Run diagnostics overview --refresh first."
+            section = str(self.payload.get("section") or "snapshot")
+            return f"No diagnostic {section} snapshot. Run diagnostics {section} --refresh first."
+        section = self.payload.get("section")
+        if section == DIAGNOSTIC_TOOL_OUTPUT_SECTION:
+            return self._render_tool_output()
+        if section == DIAGNOSTIC_COMMANDS_SECTION:
+            return self._render_commands()
+        return self._render_overview()
+
+    def _render_overview(self) -> str:
         snapshot = self.payload.get("snapshot") or {}
         overview = self.payload.get("overview") or {}
         return "\n".join(
@@ -49,6 +84,35 @@ def render(self) -> str:
             ]
         )
 
+    def _render_tool_output(self) -> str:
+        snapshot = self.payload.get("snapshot") or {}
+        summary = self.payload.get("summary") or {}
+        return "\n".join(
+            [
+                "Diagnostic tool-output snapshot",
+                f"Computed: {snapshot.get('computed_at')}",
+                f"History scope: {snapshot.get('history_scope')}",
+                f"Function calls: {_int_text(summary.get('function_calls'))}",
+                f"Function outputs: {_int_text(summary.get('function_outputs'))}",
+                f"Outputs with Original token count: {_int_text(summary.get('outputs_with_original_token_count'))}",
+                f"Terminal output tokens: {_int_text(summary.get('original_token_sum'))}",
+            ]
+        )
+
+    def _render_commands(self) -> str:
+        snapshot = self.payload.get("snapshot") or {}
+        summary = self.payload.get("summary") or {}
+        return "\n".join(
+            [
+                "Diagnostic commands snapshot",
+                f"Computed: {snapshot.get('computed_at')}",
+                f"History scope: {snapshot.get('history_scope')}",
+                f"Shell calls: {_int_text(summary.get('shell_function_calls'))}",
+                f"Command roots: {_int_text(summary.get('command_root_count'))}",
+                f"Missing command text: {_int_text(summary.get('missing_command'))}",
+            ]
+        )
+
 
 def build_diagnostic_overview_report(
     *,
@@ -73,6 +137,40 @@ def build_diagnostic_overview_report(
     )
 
 
+def build_diagnostic_tool_output_report(
+    *,
+    db_path: Path = DEFAULT_DB_PATH,
+    include_archived: bool = False,
+    refresh: bool = False,
+) -> DiagnosticSnapshotReport:
+    """Return the latest tool-output snapshot, optionally recomputing it first."""
+
+    return _build_source_log_snapshot_report(
+        db_path=db_path,
+        include_archived=include_archived,
+        refresh=refresh,
+        section=DIAGNOSTIC_TOOL_OUTPUT_SECTION,
+        schema=DIAGNOSTIC_TOOL_OUTPUT_SCHEMA,
+    )
+
+
+def build_diagnostic_commands_report(
+    *,
+    db_path: Path = DEFAULT_DB_PATH,
+    include_archived: bool = False,
+    refresh: bool = False,
+) -> DiagnosticSnapshotReport:
+    """Return the latest commands snapshot, optionally recomputing it first."""
+
+    return _build_source_log_snapshot_report(
+        db_path=db_path,
+        include_archived=include_archived,
+        refresh=refresh,
+        section=DIAGNOSTIC_COMMANDS_SECTION,
+        schema=DIAGNOSTIC_COMMANDS_SCHEMA,
+    )
+
+
 def refresh_diagnostic_overview_snapshot(
     *,
     db_path: Path = DEFAULT_DB_PATH,
@@ -92,7 +190,13 @@ def refresh_diagnostic_overview_snapshot(
         source_logs_scanned=source_logs_scanned,
         usage_rows_scanned=int(overview["usage_rows"]),
     )
-    payload = _ready_payload(snapshot=snapshot, overview=overview, refreshed=True)
+    payload = _ready_payload(
+        schema=DIAGNOSTIC_OVERVIEW_SCHEMA,
+        section=DIAGNOSTIC_OVERVIEW_SECTION,
+        snapshot=snapshot,
+        refreshed=True,
+        overview=overview,
+    )
     upsert_diagnostic_snapshot(
         db_path=db_path,
         section=DIAGNOSTIC_OVERVIEW_SECTION,
@@ -106,6 +210,84 @@ def refresh_diagnostic_overview_snapshot(
     return payload
 
 
+def _build_source_log_snapshot_report(
+    *,
+    db_path: Path,
+    include_archived: bool,
+    refresh: bool,
+    section: str,
+    schema: str,
+) -> DiagnosticSnapshotReport:
+    if refresh:
+        return DiagnosticSnapshotReport(
+            _refresh_source_log_snapshot(
+                db_path=db_path,
+                include_archived=include_archived,
+                section=section,
+                schema=schema,
+            )
+        )
+    return DiagnosticSnapshotReport(
+        _source_log_snapshot_payload(
+            db_path=db_path,
+            include_archived=include_archived,
+            section=section,
+            schema=schema,
+        )
+    )
+
+
+def _refresh_source_log_snapshot(
+    *,
+    db_path: Path,
+    include_archived: bool,
+    section: str,
+    schema: str,
+) -> dict[str, Any]:
+    history_scope = _history_scope(include_archived)
+    computed_at = _utc_now()
+    analysis = _analyze_indexed_source_logs(db_path=db_path, include_archived=include_archived)
+    snapshot = _snapshot_metadata(
+        computed_at=computed_at,
+        history_scope=history_scope,
+        source_logs_scanned=analysis["meta"]["source_logs_scanned"],
+        usage_rows_scanned=analysis["meta"]["usage_rows_scanned"],
+    )
+    if section == DIAGNOSTIC_TOOL_OUTPUT_SECTION:
+        payload = _ready_payload(
+            schema=schema,
+            section=section,
+            snapshot=snapshot,
+            refreshed=True,
+            summary=analysis["tool_output"]["summary"],
+            functions=analysis["tool_output"]["functions"],
+            command_roots=analysis["tool_output"]["command_roots"],
+            missing_reasons=analysis["tool_output"]["missing_reasons"],
+        )
+    elif section == DIAGNOSTIC_COMMANDS_SECTION:
+        payload = _ready_payload(
+            schema=schema,
+            section=section,
+            snapshot=snapshot,
+            refreshed=True,
+            summary=analysis["commands"]["summary"],
+            commands=analysis["commands"]["commands"],
+        )
+    else:
+        raise ValueError(f"unknown diagnostic snapshot section: {section}")
+    upsert_diagnostic_snapshot(
+        db_path=db_path,
+        section=section,
+        history_scope=history_scope,
+        payload=payload,
+        computed_at=computed_at,
+        source_logs_scanned=analysis["meta"]["source_logs_scanned"],
+        usage_rows_scanned=analysis["meta"]["usage_rows_scanned"],
+        raw_content_included=False,
+    )
+    return payload
+
+
 def diagnostic_overview_payload(
     *,
     db_path: Path = DEFAULT_DB_PATH,
@@ -134,6 +316,34 @@ def diagnostic_overview_payload(
     return payload
 
 
+def _source_log_snapshot_payload(
+    *,
+    db_path: Path,
+    include_archived: bool,
+    section: str,
+    schema: str,
+) -> dict[str, Any]:
+    history_scope = _history_scope(include_archived)
+    stored = query_diagnostic_snapshot(
+        db_path=db_path,
+        section=section,
+        history_scope=history_scope,
+    )
+    if stored is None:
+        return _missing_payload(schema=schema, section=section, history_scope=history_scope)
+    payload = dict(stored["payload"])
+    payload["status"] = "ready"
+    payload["refreshed"] = False
+    payload["snapshot"] = _snapshot_metadata(
+        computed_at=str(stored["computed_at"]),
+        history_scope=str(stored["history_scope"]),
+        source_logs_scanned=int(stored["source_logs_scanned"]),
+        usage_rows_scanned=int(stored["usage_rows_scanned"]),
+    )
+    payload["raw_context_included"] = bool(stored["raw_content_included"])
+    return payload
+
+
 def _compute_overview(
     *,
     db_path: Path,
@@ -198,36 +408,368 @@ def _compute_overview(
 
 def _ready_payload(
     *,
+    schema: str,
+    section: str,
     snapshot: dict[str, Any],
-    overview: dict[str, Any],
     refreshed: bool,
+    **sections: object,
 ) -> dict[str, Any]:
-    return {
-        "schema": DIAGNOSTIC_OVERVIEW_SCHEMA,
-        "section": DIAGNOSTIC_OVERVIEW_SECTION,
+    payload: dict[str, Any] = {
+        "schema": schema,
+        "section": section,
         "status": "ready",
         "refreshed": refreshed,
         "raw_context_included": False,
         "snapshot": snapshot,
-        "overview": overview,
-        "notes": list(DIAGNOSTIC_OVERVIEW_NOTES),
+        "notes": list(DIAGNOSTIC_SNAPSHOT_NOTES),
     }
+    payload.update(sections)
+    return payload
 
 
-def _missing_payload(*, history_scope: str) -> dict[str, Any]:
-    return {
-        "schema": DIAGNOSTIC_OVERVIEW_SCHEMA,
-        "section": DIAGNOSTIC_OVERVIEW_SECTION,
+def _missing_payload(
+    *,
+    history_scope: str,
+    schema: str = DIAGNOSTIC_OVERVIEW_SCHEMA,
+    section: str = DIAGNOSTIC_OVERVIEW_SECTION,
+) -> dict[str, Any]:
+    payload: dict[str, Any] = {
+        "schema": schema,
+        "section": section,
         "status": "missing",
         "refreshed": False,
         "raw_context_included": False,
         "snapshot": None,
-        "overview": None,
         "history_scope": history_scope,
-        "notes": list(DIAGNOSTIC_OVERVIEW_NOTES),
+        "notes": list(DIAGNOSTIC_SNAPSHOT_NOTES),
+    }
+    if section == DIAGNOSTIC_OVERVIEW_SECTION:
+        payload["overview"] = None
+    elif section == DIAGNOSTIC_TOOL_OUTPUT_SECTION:
+        payload["summary"] = None
+        payload["functions"] = []
+        payload["command_roots"] = []
+        payload["missing_reasons"] = []
+    elif section == DIAGNOSTIC_COMMANDS_SECTION:
+        payload["summary"] = None
+        payload["commands"] = []
+    return payload
+
+
+def _analyze_indexed_source_logs(
+    *,
+    db_path: Path,
+    include_archived: bool,
+) -> dict[str, Any]:
+    source_logs, usage_rows_scanned = _indexed_source_logs(
+        db_path=db_path,
+        include_archived=include_archived,
+    )
+    function_calls: Counter[str] = Counter()
+    function_outputs: Counter[str] = Counter()
+    output_with_count: Counter[str] = Counter()
+    output_missing_count: Counter[str] = Counter()
+    output_token_sum: Counter[str] = Counter()
+    command_calls: Counter[str] = Counter()
+    command_children: dict[str, Counter[str]] = {}
+    command_with_count: Counter[str] = Counter()
+    command_missing_count: Counter[str] = Counter()
+    command_token_sum: Counter[str] = Counter()
+    missing_reasons: Counter[str] = Counter()
+    meta: Counter[str] = Counter()
+    meta["source_logs_scanned"] = len(source_logs)
+    meta["usage_rows_scanned"] = usage_rows_scanned
+
+    for source_log in source_logs:
+        call_names: dict[str, str] = {}
+        call_roots: dict[str, str] = {}
+        try:
+            lines = source_log.read_text(encoding="utf-8").splitlines()
+        except OSError:
+            meta["read_errors"] += 1
+            continue
+        for line in lines:
+            try:
+                envelope = json.loads(line)
+            except json.JSONDecodeError:
+                meta["invalid_json"] += 1
+                continue
+            if not isinstance(envelope, dict) or envelope.get("type") != "response_item":
+                continue
+            payload = envelope.get("payload")
+            if not isinstance(payload, dict):
+                continue
+            payload_type = payload.get("type")
+            if payload_type == "function_call":
+                call_id = _optional_str(payload.get("call_id") or payload.get("id"))
+                function_name = _safe_label(payload.get("name")) or "unknown_function"
+                function_calls[function_name] += 1
+                if call_id:
+                    call_names[call_id] = function_name
+                command = _shell_command_from_payload(payload, function_name=function_name)
+                if command is None:
+                    if _is_shell_tool(function_name):
+                        meta["missing_command"] += 1
+                    continue
+                root, child = _command_root_and_child(command)
+                command_calls[root] += 1
+                command_children.setdefault(root, Counter())[child] += 1
+                if call_id:
+                    call_roots[call_id] = root
+            elif payload_type == "function_call_output":
+                call_id = _optional_str(payload.get("call_id"))
+                function_name = call_names.get(call_id or "", "unknown_function")
+                function_outputs[function_name] += 1
+                output = payload.get("output")
+                count = _original_output_count(output)
+                if count is None:
+                    output_missing_count[function_name] += 1
+                    missing_reasons["string_no_header" if isinstance(output, str) else "non_string_output"] += 1
+                    root = call_roots.get(call_id or "")
+                    if root:
+                        command_missing_count[root] += 1
+                    continue
+                output_with_count[function_name] += 1
+                output_token_sum[function_name] += count
+                root = call_roots.get(call_id or "")
+                if root:
+                    command_with_count[root] += 1
+                    command_token_sum[root] += count
+
+    function_rows = _function_rows(
+        function_calls=function_calls,
+        function_outputs=function_outputs,
+        output_with_count=output_with_count,
+        output_missing_count=output_missing_count,
+        output_token_sum=output_token_sum,
+    )
+    command_output_rows = _command_output_rows(
+        command_calls=command_calls,
+        command_with_count=command_with_count,
+        command_missing_count=command_missing_count,
+        command_token_sum=command_token_sum,
+    )
+    command_rows = _command_rows(command_calls=command_calls, command_children=command_children)
+    return {
+        "meta": {key: int(value) for key, value in meta.items()},
+        "tool_output": {
+            "summary": {
+                "function_calls": int(sum(function_calls.values())),
+                "function_outputs": int(sum(function_outputs.values())),
+                "outputs_with_original_token_count": int(sum(output_with_count.values())),
+                "outputs_missing_original_token_count": int(sum(output_missing_count.values())),
+                "original_token_sum": int(sum(output_token_sum.values())),
+            },
+            "functions": function_rows,
+            "command_roots": command_output_rows,
+            "missing_reasons": _simple_rows(missing_reasons),
+        },
+        "commands": {
+            "summary": {
+                "shell_function_calls": int(sum(command_calls.values())),
+                "command_root_count": len(command_calls),
+                "missing_command": int(meta["missing_command"]),
+            },
+            "commands": command_rows,
+        },
     }
 
 
+def _indexed_source_logs(
+    *,
+    db_path: Path,
+    include_archived: bool,
+) -> tuple[list[Path], int]:
+    where = "" if include_archived else "WHERE is_archived = 0"
+    with connect(db_path) as conn:
+        init_db(conn)
+        rows = conn.execute(
+            f"SELECT source_file FROM source_files {where} ORDER BY source_file"
+        ).fetchall()
+        usage_row = conn.execute(
+            f"SELECT COUNT(*) AS usage_rows FROM usage_events {where}"
+        ).fetchone()
+    return [Path(str(row["source_file"])) for row in rows], _int_value(usage_row["usage_rows"])
+
+
+def _function_rows(
+    *,
+    function_calls: Counter[str],
+    function_outputs: Counter[str],
+    output_with_count: Counter[str],
+    output_missing_count: Counter[str],
+    output_token_sum: Counter[str],
+) -> list[dict[str, Any]]:
+    names = set(function_calls) | set(function_outputs) | set(output_with_count) | set(output_token_sum)
+    rows = [
+        {
+            "function": name,
+            "calls": int(function_calls[name]),
+            "outputs": int(function_outputs[name]),
+            "with_original_token_count": int(output_with_count[name]),
+            "missing_original_token_count": int(output_missing_count[name]),
+            "original_token_sum": int(output_token_sum[name]),
+        }
+        for name in names
+    ]
+    return sorted(rows, key=lambda row: (-int(row["original_token_sum"]), -int(row["calls"]), row["function"]))
+
+
+def _command_output_rows(
+    *,
+    command_calls: Counter[str],
+    command_with_count: Counter[str],
+    command_missing_count: Counter[str],
+    command_token_sum: Counter[str],
+) -> list[dict[str, Any]]:
+    rows = [
+        {
+            "root": root,
+            "calls": int(command_calls[root]),
+            "with_original_token_count": int(command_with_count[root]),
+            "missing_original_token_count": int(command_missing_count[root]),
+            "original_token_sum": int(command_token_sum[root]),
+        }
+        for root in set(command_calls) | set(command_token_sum)
+    ]
+    return sorted(rows, key=lambda row: (-int(row["original_token_sum"]), -int(row["calls"]), row["root"]))
+
+
+def _command_rows(
+    *,
+    command_calls: Counter[str],
+    command_children: dict[str, Counter[str]],
+) -> list[dict[str, Any]]:
+    rows = []
+    for root, total in command_calls.items():
+        children = _simple_rows(command_children.get(root, Counter()), key_name="child")
+        rows.append({"root": root, "total": int(total), "children": children[:25]})
+    return sorted(rows, key=lambda row: (-int(row["total"]), row["root"]))
+
+
+def _simple_rows(
+    counter: Counter[str],
+    *,
+    key_name: str = "name",
+) -> list[dict[str, Any]]:
+    return [
+        {key_name: name, "count": int(count)}
+        for name, count in sorted(counter.items(), key=lambda item: (-item[1], item[0]))
+    ]
+
+
+def _shell_command_from_payload(payload: dict[str, Any], *, function_name: str) -> str | None:
+    if not _is_shell_tool(function_name):
+        return None
+    arguments = payload.get("arguments")
+    if isinstance(arguments, str):
+        try:
+            loaded = json.loads(arguments)
+        except json.JSONDecodeError:
+            loaded = {}
+        if isinstance(loaded, dict):
+            command = loaded.get("cmd") or loaded.get("command")
+            if isinstance(command, str):
+                return command
+    if isinstance(arguments, dict):
+        command = arguments.get("cmd") or arguments.get("command")
+        if isinstance(command, str):
+            return command
+    command = payload.get("cmd") or payload.get("command")
+    return command if isinstance(command, str) else None
+
+
+def _is_shell_tool(function_name: str) -> bool:
+    lowered = function_name.lower()
+    suffix = lowered.rsplit(".", 1)[-1].rsplit("__", 1)[-1]
+    return lowered in SHELL_TOOL_NAMES or suffix in SHELL_TOOL_NAMES
+
+
+def _command_root_and_child(command: str) -> tuple[str, str]:
+    tokens = _strip_command_wrappers(_command_tokens(command))
+    if not tokens:
+        return "unknown_command", "unknown"
+    root = _command_root(tokens)
+    return root, _command_child(root, tokens)
+
+
+def _command_tokens(command: str) -> list[str]:
+    try:
+        return shlex.split(command, posix=True)
+    except ValueError:
+        return []
+
+
+def _strip_command_wrappers(tokens: list[str]) -> list[str]:
+    remaining = list(tokens)
+    while remaining:
+        while remaining and _looks_like_assignment(remaining[0]):
+            remaining.pop(0)
+        if not remaining:
+            break
+        base = _basename(remaining[0])
+        if base in {"command", "env", "sudo"}:
+            remaining.pop(0)
+            continue
+        break
+    return remaining
+
+
+def _command_root(tokens: list[str]) -> str:
+    base = _basename(tokens[0])
+    if base in {"py.test", "pytest"}:
+        return "pytest"
+    if base == "py" or base == "python" or base.startswith("python"):
+        return "python"
+    return _safe_label(base) or "unknown_command"
+
+
+def _command_child(root: str, tokens: list[str]) -> str:
+    if root == "python":
+        for index, token in enumerate(tokens[:-1]):
+            if token == "-m":
+                module = _safe_label(_basename(tokens[index + 1]).split(".", 1)[0])
+                return f"-m:{module}" if module else "-m:unknown"
+        return tokens[1] if len(tokens) > 1 and tokens[1].startswith("-") else "<script>"
+    if len(tokens) <= 1:
+        return "<none>"
+    child = _safe_label(_basename(tokens[1]))
+    return child or "<arg>"
+
+
+def _original_output_count(output: object) -> int | None:
+    if not isinstance(output, str):
+        return None
+    match = ORIGINAL_OUTPUT_RE.match(output)
+    if not match:
+        return None
+    return int(match.group("count"))
+
+
+def _optional_str(value: object) -> str | None:
+    return value if isinstance(value, str) and value else None
+
+
+def _safe_label(value: object) -> str | None:
+    if not isinstance(value, str):
+        return None
+    stripped = value.strip()
+    lowered = stripped.lower()
+    if lowered.startswith(SENSITIVE_LABEL_PREFIXES):
+        return None
+    if "/" in stripped or "\\" in stripped:
+        return None
+    return lowered if SAFE_LABEL_RE.fullmatch(stripped) else None
+
+
+def _looks_like_assignment(token: str) -> bool:
+    return bool(re.fullmatch(r"[A-Za-z_][A-Za-z0-9_]*=.*", token))
+
+
+def _basename(token: str) -> str:
+    return re.split(r"[\\/]", token)[-1].lower()
+
+
 def _snapshot_metadata(
     *,
     computed_at: str,
diff --git a/src/codex_usage_tracker/json_contracts.py b/src/codex_usage_tracker/json_contracts.py
index 502b565..48c46f0 100644
--- a/src/codex_usage_tracker/json_contracts.py
+++ b/src/codex_usage_tracker/json_contracts.py
@@ -169,6 +169,32 @@
             "notes": list,
         }
     },
+    "codex-usage-tracker-diagnostic-tool-output-v1": {
+        "required": {
+            "section": str,
+            "status": str,
+            "refreshed": bool,
+            "raw_context_included": bool,
+            "snapshot": (dict, NoneType),
+            "summary": (dict, NoneType),
+            "functions": list,
+            "command_roots": list,
+            "missing_reasons": list,
+            "notes": list,
+        }
+    },
+    "codex-usage-tracker-diagnostic-commands-v1": {
+        "required": {
+            "section": str,
+            "status": str,
+            "refreshed": bool,
+            "raw_context_included": bool,
+            "snapshot": (dict, NoneType),
+            "summary": (dict, NoneType),
+            "commands": list,
+            "notes": list,
+        }
+    },
     "codex-usage-tracker-session-v1": {
         "required": {
             "requested_session_id": (str, NoneType),
diff --git a/src/codex_usage_tracker/server.py b/src/codex_usage_tracker/server.py
index 562c313..ed50661 100644
--- a/src/codex_usage_tracker/server.py
+++ b/src/codex_usage_tracker/server.py
@@ -35,7 +35,11 @@
     build_diagnostics_facts_report,
     build_diagnostics_summary_report,
 )
-from codex_usage_tracker.diagnostic_snapshots import build_diagnostic_overview_report
+from codex_usage_tracker.diagnostic_snapshots import (
+    build_diagnostic_commands_report,
+    build_diagnostic_overview_report,
+    build_diagnostic_tool_output_report,
+)
 from codex_usage_tracker.i18n import normalize_language
 from codex_usage_tracker.paths import (
     DEFAULT_ALLOWANCE_PATH,
@@ -308,6 +312,12 @@ def do_GET(self) -> None:  # noqa: N802 - stdlib hook name
         if parsed.path == "/api/diagnostics/overview":
             self._handle_diagnostics_overview(parsed.query)
             return
+        if parsed.path == "/api/diagnostics/tool-output":
+            self._handle_diagnostics_tool_output(parsed.query)
+            return
+        if parsed.path == "/api/diagnostics/commands":
+            self._handle_diagnostics_commands(parsed.query)
+            return
         if parsed.path == "/api/usage":
             self._handle_usage(parsed.query)
             return
@@ -327,6 +337,12 @@ def do_POST(self) -> None:  # noqa: N802 - stdlib hook name
         if parsed.path == "/api/diagnostics/overview/refresh":
             self._handle_diagnostics_overview_refresh(parsed.query)
             return
+        if parsed.path == "/api/diagnostics/tool-output/refresh":
+            self._handle_diagnostics_tool_output_refresh(parsed.query)
+            return
+        if parsed.path == "/api/diagnostics/commands/refresh":
+            self._handle_diagnostics_commands_refresh(parsed.query)
+            return
         self._send_json(HTTPStatus.NOT_FOUND, {"error": "Unknown API endpoint"})
 
     def end_headers(self) -> None:
@@ -958,28 +974,63 @@ def _handle_diagnostics_fact_calls(self, query: str) -> None:
         self._send_json(HTTPStatus.OK, payload)
 
     def _handle_diagnostics_overview(self, query: str) -> None:
-        params = parse_qs(query)
-        include_archived = _parse_bool(
-            _first(params.get("include_archived")),
-            self._include_archived,
+        self._handle_diagnostic_snapshot(
+            query,
+            build_report=build_diagnostic_overview_report,
+            refresh=False,
+            label="diagnostic overview",
         )
-        try:
-            payload = build_diagnostic_overview_report(
-                db_path=self._db_path,
-                include_archived=include_archived,
-                refresh=False,
-            ).payload
-        except sqlite3.Error as exc:
-            self._send_json(
-                HTTPStatus.INTERNAL_SERVER_ERROR,
-                {"error": f"Database error while reading diagnostic overview: {exc}"},
-            )
-            return
-        self._send_json(HTTPStatus.OK, payload)
 
     def _handle_diagnostics_overview_refresh(self, query: str) -> None:
+        self._handle_diagnostic_snapshot(
+            query,
+            build_report=build_diagnostic_overview_report,
+            refresh=True,
+            label="diagnostic overview",
+        )
+
+    def _handle_diagnostics_tool_output(self, query: str) -> None:
+        self._handle_diagnostic_snapshot(
+            query,
+            build_report=build_diagnostic_tool_output_report,
+            refresh=False,
+            label="diagnostic tool output",
+        )
+
+    def _handle_diagnostics_tool_output_refresh(self, query: str) -> None:
+        self._handle_diagnostic_snapshot(
+            query,
+            build_report=build_diagnostic_tool_output_report,
+            refresh=True,
+            label="diagnostic tool output",
+        )
+
+    def _handle_diagnostics_commands(self, query: str) -> None:
+        self._handle_diagnostic_snapshot(
+            query,
+            build_report=build_diagnostic_commands_report,
+            refresh=False,
+            label="diagnostic commands",
+        )
+
+    def _handle_diagnostics_commands_refresh(self, query: str) -> None:
+        self._handle_diagnostic_snapshot(
+            query,
+            build_report=build_diagnostic_commands_report,
+            refresh=True,
+            label="diagnostic commands",
+        )
+
+    def _handle_diagnostic_snapshot(
+        self,
+        query: str,
+        *,
+        build_report: Any,
+        refresh: bool,
+        label: str,
+    ) -> None:
         params = parse_qs(query)
-        if not self._has_valid_api_token(params):
+        if refresh and not self._has_valid_api_token(params):
             self._send_json(
                 HTTPStatus.FORBIDDEN,
                 {"error": "Valid API token is required for diagnostic refresh"},
@@ -990,16 +1041,23 @@ def _handle_diagnostics_overview_refresh(self, query: str) -> None:
             self._include_archived,
         )
         try:
-            with self._refresh_lock:
-                payload = build_diagnostic_overview_report(
+            if refresh:
+                with self._refresh_lock:
+                    payload = build_report(
+                        db_path=self._db_path,
+                        include_archived=include_archived,
+                        refresh=True,
+                    ).payload
+            else:
+                payload = build_report(
                     db_path=self._db_path,
                     include_archived=include_archived,
-                    refresh=True,
+                    refresh=False,
                 ).payload
         except sqlite3.Error as exc:
             self._send_json(
                 HTTPStatus.INTERNAL_SERVER_ERROR,
-                {"error": f"Database error while refreshing diagnostic overview: {exc}"},
+                {"error": f"Database error while reading {label}: {exc}"},
             )
             return
         self._send_json(HTTPStatus.OK, payload)
diff --git a/tests/test_cli_lifecycle.py b/tests/test_cli_lifecycle.py
index be5a7ee..3ace0ba 100644
--- a/tests/test_cli_lifecycle.py
+++ b/tests/test_cli_lifecycle.py
@@ -400,6 +400,24 @@ def test_diagnostics_cli_returns_aggregate_json(tmp_path: Path) -> None:
         "--refresh",
         "--json",
     )
+    tool_output_refresh = _run_cli(
+        tmp_path,
+        "--db",
+        str(db_path),
+        "diagnostics",
+        "tool-output",
+        "--refresh",
+        "--json",
+    )
+    commands_refresh = _run_cli(
+        tmp_path,
+        "--db",
+        str(db_path),
+        "diagnostics",
+        "commands",
+        "--refresh",
+        "--json",
+    )
     fact_calls = _run_cli(
         tmp_path,
         "--db",
@@ -422,6 +440,8 @@ def test_diagnostics_cli_returns_aggregate_json(tmp_path: Path) -> None:
     tools_payload = json.loads(tools.stdout)
     overview_missing_payload = json.loads(overview_missing.stdout)
     overview_refresh_payload = json.loads(overview_refresh.stdout)
+    tool_output_refresh_payload = json.loads(tool_output_refresh.stdout)
+    commands_refresh_payload = json.loads(commands_refresh.stdout)
     fact_calls_payload = json.loads(fact_calls.stdout)
     for payload in (
         summary_payload,
@@ -430,6 +450,8 @@ def test_diagnostics_cli_returns_aggregate_json(tmp_path: Path) -> None:
         tools_payload,
         overview_missing_payload,
         overview_refresh_payload,
+        tool_output_refresh_payload,
+        commands_refresh_payload,
         fact_calls_payload,
     ):
         _assert_contract(payload)
@@ -456,13 +478,27 @@ def test_diagnostics_cli_returns_aggregate_json(tmp_path: Path) -> None:
     assert {row["fact_type"] for row in compactions_payload["rows"]} == {"compaction"}
     assert tools_payload["filters"]["fact_type"] is None
     assert tools_payload["filters"]["fact_group"] == "tools"
-    assert {row["fact_type"] for row in tools_payload["rows"]} == {"tool"}
+    assert "tool" in {row["fact_type"] for row in tools_payload["rows"]}
     assert overview_missing_payload["schema"] == "codex-usage-tracker-diagnostic-overview-v1"
     assert overview_missing_payload["status"] == "missing"
     assert overview_refresh_payload["schema"] == "codex-usage-tracker-diagnostic-overview-v1"
     assert overview_refresh_payload["status"] == "ready"
     assert overview_refresh_payload["overview"]["usage_rows"] == 2
     assert overview_refresh_payload["refreshed"] is True
+    assert (
+        tool_output_refresh_payload["schema"]
+        == "codex-usage-tracker-diagnostic-tool-output-v1"
+    )
+    assert tool_output_refresh_payload["summary"]["original_token_sum"] == 9
+    assert (
+        commands_refresh_payload["schema"]
+        == "codex-usage-tracker-diagnostic-commands-v1"
+    )
+    assert commands_refresh_payload["commands"][0]["root"] == "git"
+    assert commands_refresh_payload["commands"][0]["children"][0] == {
+        "child": "status",
+        "count": 1,
+    }
     assert fact_calls_payload["view"] == "fact-calls"
     assert fact_calls_payload["filters"]["privacy_mode"] == "strict"
     assert fact_calls_payload["rows"][0]["cwd"].startswith("[redacted cwd:")
@@ -554,7 +590,27 @@ def _make_diagnostics_codex_home(tmp_path: Path) -> Path:
             ),
             _entry(
                 "response_item",
-                {"type": "function_call_output", "output": "SECRET TOOL OUTPUT"},
+                {
+                    "type": "function_call",
+                    "call_id": "call-git",
+                    "name": "exec_command",
+                    "arguments": json.dumps({"cmd": "git status SECRET_ARG"}),
+                },
+            ),
+            _entry(
+                "response_item",
+                {
+                    "type": "function_call_output",
+                    "call_id": "call-git",
+                    "output": (
+                        "Chunk ID: abc123\n"
+                        "Wall time: 0.0000 seconds\n"
+                        "Process exited with code 0\n"
+                        "Original token count: 9\n"
+                        "Output:\n"
+                        "SECRET TOOL OUTPUT"
+                    ),
+                },
             ),
             _entry(
                 "event_msg",
diff --git a/tests/test_dashboard_server.py b/tests/test_dashboard_server.py
index a432bb8..7da7815 100644
--- a/tests/test_dashboard_server.py
+++ b/tests/test_dashboard_server.py
@@ -93,9 +93,27 @@ def test_dashboard_server_usage_api_refreshes_aggregate_rows(tmp_path: Path) ->
             data=b"",
             method="POST",
         )
+        diagnostic_tool_output_refresh_payload = _read_json(
+            f"http://127.0.0.1:{server.server_port}/api/diagnostics/tool-output/refresh",
+            headers={"X-Codex-Usage-Token": "test-token"},
+            data=b"",
+            method="POST",
+        )
+        diagnostic_commands_refresh_payload = _read_json(
+            f"http://127.0.0.1:{server.server_port}/api/diagnostics/commands/refresh",
+            headers={"X-Codex-Usage-Token": "test-token"},
+            data=b"",
+            method="POST",
+        )
         diagnostic_stored_payload = _read_json(
             f"http://127.0.0.1:{server.server_port}/api/diagnostics/overview"
         )
+        diagnostic_tool_output_stored_payload = _read_json(
+            f"http://127.0.0.1:{server.server_port}/api/diagnostics/tool-output"
+        )
+        diagnostic_commands_stored_payload = _read_json(
+            f"http://127.0.0.1:{server.server_port}/api/diagnostics/commands"
+        )
         diagnostic_computed_at = diagnostic_stored_payload["snapshot"]["computed_at"]
         with urllib.request.urlopen(  # noqa: S310 - local test server only
             urllib.request.Request(
@@ -152,8 +170,24 @@ def test_dashboard_server_usage_api_refreshes_aggregate_rows(tmp_path: Path) ->
     assert diagnostic_refresh_payload["refreshed"] is True
     assert diagnostic_refresh_payload["overview"]["usage_rows"] == 4
     assert diagnostic_refresh_payload["overview"]["total_tokens"] == 400
+    assert (
+        diagnostic_tool_output_refresh_payload["schema"]
+        == "codex-usage-tracker-diagnostic-tool-output-v1"
+    )
+    assert diagnostic_tool_output_refresh_payload["status"] == "ready"
+    assert diagnostic_tool_output_refresh_payload["summary"]["function_calls"] == 0
+    assert (
+        diagnostic_commands_refresh_payload["schema"]
+        == "codex-usage-tracker-diagnostic-commands-v1"
+    )
+    assert diagnostic_commands_refresh_payload["status"] == "ready"
+    assert diagnostic_commands_refresh_payload["summary"]["shell_function_calls"] == 0
     assert diagnostic_stored_payload["status"] == "ready"
     assert diagnostic_stored_payload["refreshed"] is False
+    assert diagnostic_tool_output_stored_payload["status"] == "ready"
+    assert diagnostic_tool_output_stored_payload["refreshed"] is False
+    assert diagnostic_commands_stored_payload["status"] == "ready"
+    assert diagnostic_commands_stored_payload["refreshed"] is False
     assert second_usage_refresh_payload["refresh_result"]["parsed_events"] == 0
     assert diagnostic_after_second_usage_refresh["snapshot"]["computed_at"] == diagnostic_computed_at
     assert len(limited_payload["rows"]) == 2
diff --git a/tests/test_diagnostic_snapshots.py b/tests/test_diagnostic_snapshots.py
index ccb4079..b8a668a 100644
--- a/tests/test_diagnostic_snapshots.py
+++ b/tests/test_diagnostic_snapshots.py
@@ -3,11 +3,20 @@
 import json
 from pathlib import Path
 
-from store_dashboard_helpers import _assert_contract, _make_codex_home
+from store_dashboard_helpers import (
+    SESSION_ID,
+    _assert_contract,
+    _entry,
+    _make_codex_home,
+    _token_event,
+    _write_jsonl,
+)
 
 from codex_usage_tracker.diagnostic_snapshots import (
     DIAGNOSTIC_OVERVIEW_SECTION,
+    build_diagnostic_commands_report,
     build_diagnostic_overview_report,
+    build_diagnostic_tool_output_report,
 )
 from codex_usage_tracker.store import (
     query_diagnostic_snapshot,
@@ -94,3 +103,130 @@ def test_usage_refresh_does_not_recompute_diagnostic_overview_snapshot(
     assert stored is not None
     assert stored["computed_at"] == "2000-01-01T00:00:00+00:00"
     assert stored["payload"]["overview"]["total_tokens"] == 7
+
+
+def test_tool_output_and_command_snapshots_use_safe_aggregate_labels(
+    tmp_path: Path,
+) -> None:
+    codex_home = tmp_path / ".codex"
+    log_path = (
+        codex_home
+        / "sessions"
+        / "2026"
+        / "05"
+        / "17"
+        / f"rollout-2026-05-17T14-58-23-{SESSION_ID}.jsonl"
+    )
+    _write_jsonl(
+        log_path,
+        [
+            _entry("session_meta", {"id": SESSION_ID}),
+            _entry(
+                "turn_context",
+                {
+                    "turn_id": "turn-a",
+                    "model": "gpt-5.5",
+                    "cwd": "/tmp/private-diagnostics",
+                },
+            ),
+            _entry(
+                "response_item",
+                {
+                    "type": "function_call",
+                    "call_id": "call-git",
+                    "name": "exec_command",
+                    "arguments": json.dumps({"cmd": "git diff --stat SECRET_RAW_ARGUMENT"}),
+                },
+            ),
+            _entry(
+                "response_item",
+                {
+                    "type": "function_call_output",
+                    "call_id": "call-git",
+                    "output": _terminal_output(42),
+                },
+            ),
+            _entry(
+                "response_item",
+                {
+                    "type": "function_call",
+                    "call_id": "call-python",
+                    "name": "exec_command",
+                    "arguments": json.dumps(
+                        {"cmd": "PYTHONPATH=src python -m pytest tests/test_private.py"}
+                    ),
+                },
+            ),
+            _entry(
+                "response_item",
+                {
+                    "type": "function_call_output",
+                    "call_id": "call-python",
+                    "output": "plain output without wrapper header SECRET_OUTPUT",
+                },
+            ),
+            _entry(
+                "response_item",
+                {
+                    "type": "function_call",
+                    "call_id": "call-stdin",
+                    "name": "write_stdin",
+                    "arguments": json.dumps({"chars": "SECRET_STDIN"}),
+                },
+            ),
+            _entry(
+                "response_item",
+                {
+                    "type": "function_call_output",
+                    "call_id": "call-stdin",
+                    "output": _terminal_output(5),
+                },
+            ),
+            _token_event(100, 100),
+        ],
+    )
+    db_path = tmp_path / "usage.sqlite3"
+    refresh_usage_index(codex_home=codex_home, db_path=db_path)
+
+    missing = build_diagnostic_tool_output_report(db_path=db_path).payload
+    tool_output = build_diagnostic_tool_output_report(db_path=db_path, refresh=True).payload
+    commands = build_diagnostic_commands_report(db_path=db_path, refresh=True).payload
+
+    _assert_contract(missing)
+    _assert_contract(tool_output)
+    _assert_contract(commands)
+    assert missing["status"] == "missing"
+    assert tool_output["status"] == "ready"
+    assert tool_output["summary"]["function_calls"] == 3
+    assert tool_output["summary"]["function_outputs"] == 3
+    assert tool_output["summary"]["outputs_with_original_token_count"] == 2
+    assert tool_output["summary"]["outputs_missing_original_token_count"] == 1
+    assert tool_output["summary"]["original_token_sum"] == 47
+    functions = {row["function"]: row for row in tool_output["functions"]}
+    assert functions["exec_command"]["calls"] == 2
+    assert functions["exec_command"]["with_original_token_count"] == 1
+    assert functions["exec_command"]["missing_original_token_count"] == 1
+    assert functions["write_stdin"]["original_token_sum"] == 5
+    assert tool_output["missing_reasons"] == [{"name": "string_no_header", "count": 1}]
+
+    command_rows = {row["root"]: row for row in commands["commands"]}
+    assert command_rows["git"]["total"] == 1
+    assert command_rows["git"]["children"][0] == {"child": "diff", "count": 1}
+    assert command_rows["python"]["children"][0] == {"child": "-m:pytest", "count": 1}
+    assert commands["summary"]["missing_command"] == 1
+
+    serialized = json.dumps([tool_output, commands], sort_keys=True)
+    assert "SECRET" not in serialized
+    assert "test_private.py" not in serialized
+    assert "/tmp/private-diagnostics" not in serialized
+
+
+def _terminal_output(count: int) -> str:
+    return (
+        "Chunk ID: abc123\n"
+        "Wall time: 0.0000 seconds\n"
+        "Process exited with code 0\n"
+        f"Original token count: {count}\n"
+        "Output:\n"
+        "redacted by test fixture"
+    )

From 7ac0c013fdd8df4f5153285468d463847a11da3b Mon Sep 17 00:00:00 2001
From: Monsky <douglas.monsky@gmail.com>
Date: Sat, 20 Jun 2026 18:58:35 -0400
Subject: [PATCH 03/10] feat: add diagnostic file read reports

---
 docs/cli-json-schemas.md                      |  86 +++
 docs/cli-reference.md                         |   4 +-
 docs/dashboard-guide.md                       |   2 +
 docs/privacy.md                               |   2 +
 src/codex_usage_tracker/cli.py                |  14 +
 src/codex_usage_tracker/cli_parser.py         |  24 +
 .../diagnostic_snapshots.py                   | 596 +++++++++++++++++-
 src/codex_usage_tracker/json_contracts.py     |  29 +
 src/codex_usage_tracker/server.py             |  46 ++
 tests/test_cli_lifecycle.py                   |  32 +
 tests/test_dashboard_server.py                |  34 +
 tests/test_diagnostic_snapshots.py            | 132 ++++
 12 files changed, 997 insertions(+), 4 deletions(-)

diff --git a/docs/cli-json-schemas.md b/docs/cli-json-schemas.md
index b64ba63..fb9a92e 100644
--- a/docs/cli-json-schemas.md
+++ b/docs/cli-json-schemas.md
@@ -50,6 +50,8 @@ Tracked schema ids:
 | `codex-usage-tracker-diagnostic-overview-v1` | CLI `diagnostics overview --json`, dashboard server `/api/diagnostics/overview` |
 | `codex-usage-tracker-diagnostic-tool-output-v1` | CLI `diagnostics tool-output --json`, dashboard server `/api/diagnostics/tool-output` |
 | `codex-usage-tracker-diagnostic-commands-v1` | CLI `diagnostics commands --json`, dashboard server `/api/diagnostics/commands` |
+| `codex-usage-tracker-diagnostic-file-reads-v1` | CLI `diagnostics file-reads --json`, dashboard server `/api/diagnostics/file-reads` |
+| `codex-usage-tracker-diagnostic-read-productivity-v1` | CLI `diagnostics read-productivity --json`, dashboard server `/api/diagnostics/read-productivity` |
 | `codex-usage-tracker-session-v1` | CLI `session --json`, MCP `session_usage(response_format="json")` |
 | `codex-usage-tracker-context-v1` | CLI `context`, MCP `usage_call_context` when raw context is explicitly enabled |
 | `codex-usage-tracker-context-disabled-v1` | MCP `usage_call_context` when raw context is disabled |
@@ -409,6 +411,90 @@ Schema: `codex-usage-tracker-diagnostic-commands-v1`
 
 The commands snapshot keeps only command roots and safe one-level child labels such as `status`, `diff`, or `-m:pytest`.
 
+## Diagnostic File Reads Snapshot
+
+Commands:
+
+```bash
+codex-usage-tracker diagnostics file-reads --json
+codex-usage-tracker diagnostics file-reads --refresh --json
+```
+
+Dashboard server API:
+
+- `GET /api/diagnostics/file-reads`
+- `POST /api/diagnostics/file-reads/refresh`
+
+Schema: `codex-usage-tracker-diagnostic-file-reads-v1`
+
+```json
+{
+  "schema": "codex-usage-tracker-diagnostic-file-reads-v1",
+  "section": "file-reads",
+  "status": "ready",
+  "refreshed": false,
+  "raw_context_included": false,
+  "snapshot": {},
+  "summary": {
+    "read_commands": 1,
+    "read_events": 1,
+    "unique_paths_read": 1,
+    "read_events_with_output_count": 1,
+    "read_events_missing_output_count": 0,
+    "allocated_output_token_sum": 42
+  },
+  "by_reader": [],
+  "top_paths": [],
+  "largest_read_commands": [],
+  "path_privacy": {},
+  "notes": []
+}
+```
+
+The file-reads snapshot classifies common shell readers such as `cat`, `sed`, `nl`, `rg`, and `find`. Path labels are basename-only with a short irreversible hash; raw commands, command arguments, absolute paths, file contents, and tool output are not stored.
+
+## Diagnostic Read Productivity Snapshot
+
+Commands:
+
+```bash
+codex-usage-tracker diagnostics read-productivity --json
+codex-usage-tracker diagnostics read-productivity --refresh --json
+```
+
+Dashboard server API:
+
+- `GET /api/diagnostics/read-productivity`
+- `POST /api/diagnostics/read-productivity/refresh`
+
+Schema: `codex-usage-tracker-diagnostic-read-productivity-v1`
+
+```json
+{
+  "schema": "codex-usage-tracker-diagnostic-read-productivity-v1",
+  "section": "read-productivity",
+  "status": "ready",
+  "refreshed": false,
+  "raw_context_included": false,
+  "snapshot": {},
+  "summary": {
+    "read_events": 1,
+    "read_events_modified_later": 1,
+    "read_events_modified_later_pct": 1.0,
+    "unique_paths_read": 1,
+    "unique_paths_modified_later": 1,
+    "unique_path_modified_later_pct": 1.0,
+    "correlation_note": "Read-to-modify counts are temporal correlations."
+  },
+  "by_reader": [],
+  "top_modified_paths": [],
+  "path_privacy": {},
+  "notes": []
+}
+```
+
+Read productivity is a temporal correlation, not causation. A read is counted as modified later only when the same privacy-preserving path key appears in a later structured patch event in the same source log.
+
 ## Pricing Coverage
 
 Command:
diff --git a/docs/cli-reference.md b/docs/cli-reference.md
index 79c08b4..58d2c62 100644
--- a/docs/cli-reference.md
+++ b/docs/cli-reference.md
@@ -120,12 +120,14 @@ codex-usage-tracker diagnostics summary
 codex-usage-tracker diagnostics facts --sort uncached
 codex-usage-tracker diagnostics compactions
 codex-usage-tracker diagnostics tools
+codex-usage-tracker diagnostics file-reads --refresh
+codex-usage-tracker diagnostics read-productivity --refresh
 codex-usage-tracker diagnostics fact-calls --fact-type compaction --fact-name post_compaction
 ```
 
 Diagnostics expose structured event patterns and their associated token totals. They can show compactions, tool/function/MCP activity, safe command families, structured skill labels, patch outcomes, task completion, search/read loops, and aborted or rolled-back turns. Associated totals are not causal allocations and are not additive when one model call has multiple diagnostic facts.
 
-Diagnostic payloads are aggregate-only. They do not include prompts, assistant text, tool arguments, tool output, patch text, raw commands, command arguments, file contents, or JSONL fragments.
+Diagnostic payloads are aggregate-only. They do not include prompts, assistant text, tool arguments, tool output, patch text, raw commands, command arguments, file contents, raw absolute paths, or JSONL fragments. File-read diagnostics use basename-only path labels plus short irreversible hashes, and read-productivity percentages are temporal correlations rather than proof that a read caused a later edit.
 
 ## JSON Queries
 
diff --git a/docs/dashboard-guide.md b/docs/dashboard-guide.md
index 40beb28..0ec462c 100644
--- a/docs/dashboard-guide.md
+++ b/docs/dashboard-guide.md
@@ -133,6 +133,8 @@ Use `Diagnostics` view when you want to see what structured event patterns are h
 - The tab consumes the localhost `/api/diagnostics/*` endpoints; static file dashboards show a live-API unavailable state.
 - The first table shows top diagnostic facts by associated uncached input tokens. Tool/function/MCP/command-family and compaction sections expose narrower slices of the same fact data.
 - Command diagnostics store only a command family such as `pytest`, `git`, or `unknown_command`. Skill and MCP labels are detected only when they are present as structured event metadata.
+- Newer on-demand diagnostic snapshot endpoints are section-specific (`overview`, `tool-output`, `commands`, `file-reads`, and `read-productivity`). Heavy recomputation happens only through each section's explicit refresh endpoint.
+- File-read snapshots use basename-only path labels and short hashes. Read-productivity rates are temporal correlations between earlier reads and later structured patch events, not causation.
 - Click `Calls` on a fact row to load associated model calls. Call links and largest-call links open the Call Investigator, where raw context remains explicit and on demand.
 - Associated token totals are not causal allocations and are not additive when one call has multiple diagnostic facts.
 
diff --git a/docs/privacy.md b/docs/privacy.md
index 50da9e3..80a9c0a 100644
--- a/docs/privacy.md
+++ b/docs/privacy.md
@@ -35,6 +35,8 @@ Call-origin metadata is heuristic and confidence-labeled. It stores categories s
 
 Diagnostic facts follow the same aggregate-only rule. They can store safe structured labels such as `patch_applied`, `function_call_output`, `post_compaction`, MCP tool/server labels, structured skill labels, and command families such as `pytest`, `git`, or `unknown_command`, along with event counts and source line ranges. Command text may be classified in memory during parsing, but it is not persisted. Diagnostic facts do not store tool arguments, command text, command output, patch text, prompt or assistant text, file contents, raw JSONL fragments, or raw context evidence.
 
+On-demand diagnostic snapshots follow the same boundary. File-read snapshots classify common read commands and path scans, but persist only counters, reader families, basename-only path labels, and short irreversible path hashes. They do not persist raw absolute paths, raw command strings, command arguments, file contents, tool output, or patch text. Read-productivity snapshots report only temporal read-to-modify correlations for matching path keys in the same source log; they do not claim causation.
+
 ## On-Demand Context
 
 `usage_call_context`, `codex-usage-tracker context`, and the `serve-dashboard` context endpoint read a single source JSONL file only when explicitly requested. Returned context is redacted for common secret patterns and capped in size by default for CLI/MCP requests. The call investigator uses the same endpoint at runtime and requests quick redacted evidence for the selected call when the local context API is enabled; that still does not persist raw context into SQLite, CSV, support bundles, or generated dashboard HTML.
diff --git a/src/codex_usage_tracker/cli.py b/src/codex_usage_tracker/cli.py
index 788ca85..85b9776 100644
--- a/src/codex_usage_tracker/cli.py
+++ b/src/codex_usage_tracker/cli.py
@@ -31,7 +31,9 @@
 )
 from codex_usage_tracker.diagnostic_snapshots import (
     build_diagnostic_commands_report,
+    build_diagnostic_file_reads_report,
     build_diagnostic_overview_report,
+    build_diagnostic_read_productivity_report,
     build_diagnostic_tool_output_report,
 )
 from codex_usage_tracker.diagnostics import run_doctor
@@ -472,6 +474,18 @@ def _run_diagnostics(args: argparse.Namespace) -> int:
             include_archived=args.include_archived,
             refresh=args.refresh,
         )
+    elif command == "file-reads":
+        report = build_diagnostic_file_reads_report(
+            db_path=args.db,
+            include_archived=args.include_archived,
+            refresh=args.refresh,
+        )
+    elif command == "read-productivity":
+        report = build_diagnostic_read_productivity_report(
+            db_path=args.db,
+            include_archived=args.include_archived,
+            refresh=args.refresh,
+        )
     else:
         raise ValueError(f"unknown diagnostics command: {command}")
 
diff --git a/src/codex_usage_tracker/cli_parser.py b/src/codex_usage_tracker/cli_parser.py
index 25d70cc..9c41051 100644
--- a/src/codex_usage_tracker/cli_parser.py
+++ b/src/codex_usage_tracker/cli_parser.py
@@ -363,6 +363,30 @@ def _add_diagnostics_parser(
     )
     commands.add_argument("--json", action="store_true", dest="as_json")
 
+    file_reads = diagnostic_subparsers.add_parser(
+        "file-reads",
+        help="Show the on-demand aggregate file-read snapshot",
+    )
+    file_reads.add_argument("--include-archived", action="store_true")
+    file_reads.add_argument(
+        "--refresh",
+        action="store_true",
+        help="Recompute and persist the file-read snapshot before reading it.",
+    )
+    file_reads.add_argument("--json", action="store_true", dest="as_json")
+
+    read_productivity = diagnostic_subparsers.add_parser(
+        "read-productivity",
+        help="Show temporal read-to-modify diagnostic correlations",
+    )
+    read_productivity.add_argument("--include-archived", action="store_true")
+    read_productivity.add_argument(
+        "--refresh",
+        action="store_true",
+        help="Recompute and persist the read-productivity snapshot before reading it.",
+    )
+    read_productivity.add_argument("--json", action="store_true", dest="as_json")
+
     fact_calls = diagnostic_subparsers.add_parser(
         "fact-calls",
         help="List calls associated with one diagnostic fact",
diff --git a/src/codex_usage_tracker/diagnostic_snapshots.py b/src/codex_usage_tracker/diagnostic_snapshots.py
index 5ecfded..81c7402 100644
--- a/src/codex_usage_tracker/diagnostic_snapshots.py
+++ b/src/codex_usage_tracker/diagnostic_snapshots.py
@@ -2,10 +2,11 @@
 
 from __future__ import annotations
 
+import hashlib
 import json
 import re
 import shlex
-from collections import Counter
+from collections import Counter, defaultdict
 from dataclasses import dataclass
 from datetime import datetime, timezone
 from pathlib import Path
@@ -22,9 +23,13 @@
 DIAGNOSTIC_OVERVIEW_SCHEMA = "codex-usage-tracker-diagnostic-overview-v1"
 DIAGNOSTIC_TOOL_OUTPUT_SCHEMA = "codex-usage-tracker-diagnostic-tool-output-v1"
 DIAGNOSTIC_COMMANDS_SCHEMA = "codex-usage-tracker-diagnostic-commands-v1"
+DIAGNOSTIC_FILE_READS_SCHEMA = "codex-usage-tracker-diagnostic-file-reads-v1"
+DIAGNOSTIC_READ_PRODUCTIVITY_SCHEMA = "codex-usage-tracker-diagnostic-read-productivity-v1"
 DIAGNOSTIC_OVERVIEW_SECTION = "overview"
 DIAGNOSTIC_TOOL_OUTPUT_SECTION = "tool-output"
 DIAGNOSTIC_COMMANDS_SECTION = "commands"
+DIAGNOSTIC_FILE_READS_SECTION = "file-reads"
+DIAGNOSTIC_READ_PRODUCTIVITY_SECTION = "read-productivity"
 DIAGNOSTIC_HISTORY_ACTIVE = "active"
 DIAGNOSTIC_HISTORY_ALL = "all"
 DIAGNOSTIC_SNAPSHOT_NOTES = [
@@ -32,6 +37,7 @@
     "Snapshot totals are aggregate-only and do not include raw context.",
 ]
 SAFE_LABEL_RE = re.compile(r"^[A-Za-z0-9_.:-]{1,80}$")
+SAFE_PATH_LABEL_RE = re.compile(r"^[A-Za-z0-9_.:@*+-]{1,80}$")
 SENSITIVE_LABEL_PREFIXES = ("sk-", "sk_", "ghp_", "github_pat_", "xox")
 SHELL_TOOL_NAMES = {
     "bash",
@@ -42,6 +48,12 @@
     "terminal",
     "write_stdin",
 }
+READ_COMMAND_ROOTS = {"cat", "find", "grep", "head", "nl", "rg", "sed", "strings", "tail", "wc"}
+SEARCH_READ_ROOTS = {"find", "rg"}
+READ_PRODUCTIVITY_NOTE = (
+    "Read-to-modify counts are temporal correlations: a read is counted when the same "
+    "privacy-preserving path key is modified later in the same source log."
+)
 ORIGINAL_OUTPUT_RE = re.compile(
     r"^Chunk ID: (?P<chunk>[^\n]+)\n"
     r"Wall time: (?P<wall>[^\n]+)\n"
@@ -66,6 +78,10 @@ def render(self) -> str:
             return self._render_tool_output()
         if section == DIAGNOSTIC_COMMANDS_SECTION:
             return self._render_commands()
+        if section == DIAGNOSTIC_FILE_READS_SECTION:
+            return self._render_file_reads()
+        if section == DIAGNOSTIC_READ_PRODUCTIVITY_SECTION:
+            return self._render_read_productivity()
         return self._render_overview()
 
     def _render_overview(self) -> str:
@@ -113,6 +129,36 @@ def _render_commands(self) -> str:
             ]
         )
 
+    def _render_file_reads(self) -> str:
+        snapshot = self.payload.get("snapshot") or {}
+        summary = self.payload.get("summary") or {}
+        return "\n".join(
+            [
+                "Diagnostic file-reads snapshot",
+                f"Computed: {snapshot.get('computed_at')}",
+                f"History scope: {snapshot.get('history_scope')}",
+                f"Read commands: {_int_text(summary.get('read_commands'))}",
+                f"Read events: {_int_text(summary.get('read_events'))}",
+                f"Allocated output tokens: {_int_text(summary.get('allocated_output_token_sum'))}",
+                f"Missing output counts: {_int_text(summary.get('read_events_missing_output_count'))}",
+            ]
+        )
+
+    def _render_read_productivity(self) -> str:
+        snapshot = self.payload.get("snapshot") or {}
+        summary = self.payload.get("summary") or {}
+        return "\n".join(
+            [
+                "Diagnostic read-productivity snapshot",
+                f"Computed: {snapshot.get('computed_at')}",
+                f"History scope: {snapshot.get('history_scope')}",
+                f"Read events: {_int_text(summary.get('read_events'))}",
+                f"Read events modified later: {_int_text(summary.get('read_events_modified_later'))}",
+                f"Read-to-modify rate: {_pct_text(summary.get('read_events_modified_later_pct'))}",
+                READ_PRODUCTIVITY_NOTE,
+            ]
+        )
+
 
 def build_diagnostic_overview_report(
     *,
@@ -171,6 +217,40 @@ def build_diagnostic_commands_report(
     )
 
 
+def build_diagnostic_file_reads_report(
+    *,
+    db_path: Path = DEFAULT_DB_PATH,
+    include_archived: bool = False,
+    refresh: bool = False,
+) -> DiagnosticSnapshotReport:
+    """Return the latest file-read snapshot, optionally recomputing it first."""
+
+    return _build_source_log_snapshot_report(
+        db_path=db_path,
+        include_archived=include_archived,
+        refresh=refresh,
+        section=DIAGNOSTIC_FILE_READS_SECTION,
+        schema=DIAGNOSTIC_FILE_READS_SCHEMA,
+    )
+
+
+def build_diagnostic_read_productivity_report(
+    *,
+    db_path: Path = DEFAULT_DB_PATH,
+    include_archived: bool = False,
+    refresh: bool = False,
+) -> DiagnosticSnapshotReport:
+    """Return the latest read-productivity snapshot, optionally recomputing it first."""
+
+    return _build_source_log_snapshot_report(
+        db_path=db_path,
+        include_archived=include_archived,
+        refresh=refresh,
+        section=DIAGNOSTIC_READ_PRODUCTIVITY_SECTION,
+        schema=DIAGNOSTIC_READ_PRODUCTIVITY_SCHEMA,
+    )
+
+
 def refresh_diagnostic_overview_snapshot(
     *,
     db_path: Path = DEFAULT_DB_PATH,
@@ -273,6 +353,29 @@ def _refresh_source_log_snapshot(
             summary=analysis["commands"]["summary"],
             commands=analysis["commands"]["commands"],
         )
+    elif section == DIAGNOSTIC_FILE_READS_SECTION:
+        payload = _ready_payload(
+            schema=schema,
+            section=section,
+            snapshot=snapshot,
+            refreshed=True,
+            summary=analysis["file_reads"]["summary"],
+            by_reader=analysis["file_reads"]["by_reader"],
+            top_paths=analysis["file_reads"]["top_paths"],
+            largest_read_commands=analysis["file_reads"]["largest_read_commands"],
+            path_privacy=analysis["file_reads"]["path_privacy"],
+        )
+    elif section == DIAGNOSTIC_READ_PRODUCTIVITY_SECTION:
+        payload = _ready_payload(
+            schema=schema,
+            section=section,
+            snapshot=snapshot,
+            refreshed=True,
+            summary=analysis["read_productivity"]["summary"],
+            by_reader=analysis["read_productivity"]["by_reader"],
+            top_modified_paths=analysis["read_productivity"]["top_modified_paths"],
+            path_privacy=analysis["read_productivity"]["path_privacy"],
+        )
     else:
         raise ValueError(f"unknown diagnostic snapshot section: {section}")
     upsert_diagnostic_snapshot(
@@ -453,6 +556,17 @@ def _missing_payload(
     elif section == DIAGNOSTIC_COMMANDS_SECTION:
         payload["summary"] = None
         payload["commands"] = []
+    elif section == DIAGNOSTIC_FILE_READS_SECTION:
+        payload["summary"] = None
+        payload["by_reader"] = []
+        payload["top_paths"] = []
+        payload["largest_read_commands"] = []
+        payload["path_privacy"] = _path_privacy_metadata()
+    elif section == DIAGNOSTIC_READ_PRODUCTIVITY_SECTION:
+        payload["summary"] = None
+        payload["by_reader"] = []
+        payload["top_modified_paths"] = []
+        payload["path_privacy"] = _path_privacy_metadata()
     return payload
 
 
@@ -475,6 +589,18 @@ def _analyze_indexed_source_logs(
     command_with_count: Counter[str] = Counter()
     command_missing_count: Counter[str] = Counter()
     command_token_sum: Counter[str] = Counter()
+    read_events: list[dict[str, Any]] = []
+    read_command_count = 0
+    read_events_by_reader: Counter[str] = Counter()
+    read_events_by_path: Counter[str] = Counter()
+    read_events_with_count_by_reader: Counter[str] = Counter()
+    read_events_missing_count_by_reader: Counter[str] = Counter()
+    read_tokens_by_reader: Counter[str] = Counter()
+    read_tokens_by_path: Counter[str] = Counter()
+    read_modified_by_reader: Counter[str] = Counter()
+    read_modified_by_path: Counter[str] = Counter()
+    read_path_refs: dict[str, dict[str, str]] = {}
+    largest_read_commands: list[dict[str, Any]] = []
     missing_reasons: Counter[str] = Counter()
     meta: Counter[str] = Counter()
     meta["source_logs_scanned"] = len(source_logs)
@@ -483,22 +609,31 @@ def _analyze_indexed_source_logs(
     for source_log in source_logs:
         call_names: dict[str, str] = {}
         call_roots: dict[str, str] = {}
+        call_read_events: dict[str, list[int]] = {}
+        source_read_events: list[int] = []
+        modified_orders_by_path: dict[str, list[int]] = defaultdict(list)
         try:
             lines = source_log.read_text(encoding="utf-8").splitlines()
         except OSError:
             meta["read_errors"] += 1
             continue
-        for line in lines:
+        for order, line in enumerate(lines):
             try:
                 envelope = json.loads(line)
             except json.JSONDecodeError:
                 meta["invalid_json"] += 1
                 continue
-            if not isinstance(envelope, dict) or envelope.get("type") != "response_item":
+            if not isinstance(envelope, dict):
                 continue
             payload = envelope.get("payload")
             if not isinstance(payload, dict):
                 continue
+            if envelope.get("type") == "event_msg":
+                for path_ref in _modified_path_refs(payload):
+                    modified_orders_by_path[path_ref["path_key"]].append(order)
+                continue
+            if envelope.get("type") != "response_item":
+                continue
             payload_type = payload.get("type")
             if payload_type == "function_call":
                 call_id = _optional_str(payload.get("call_id") or payload.get("id"))
@@ -516,18 +651,48 @@ def _analyze_indexed_source_logs(
                 command_children.setdefault(root, Counter())[child] += 1
                 if call_id:
                     call_roots[call_id] = root
+                read_refs = _read_path_refs_from_command(command, root=root)
+                if read_refs:
+                    read_command_count += 1
+                    indexes: list[int] = []
+                    reader = _read_reader(root)
+                    for path_ref in read_refs:
+                        path_key = path_ref["path_key"]
+                        read_path_refs[path_key] = path_ref
+                        event_index = len(read_events)
+                        read_events.append(
+                            {
+                                "reader": reader,
+                                "root": root,
+                                "path_key": path_key,
+                                "path_label": path_ref["path_label"],
+                                "path_hash": path_ref["path_hash"],
+                                "order": order,
+                                "modified_later": False,
+                            }
+                        )
+                        source_read_events.append(event_index)
+                        indexes.append(event_index)
+                        read_events_by_reader[reader] += 1
+                        read_events_by_path[path_key] += 1
+                    if call_id:
+                        call_read_events[call_id] = indexes
             elif payload_type == "function_call_output":
                 call_id = _optional_str(payload.get("call_id"))
                 function_name = call_names.get(call_id or "", "unknown_function")
                 function_outputs[function_name] += 1
                 output = payload.get("output")
                 count = _original_output_count(output)
+                read_indexes = call_read_events.get(call_id or "", [])
                 if count is None:
                     output_missing_count[function_name] += 1
                     missing_reasons["string_no_header" if isinstance(output, str) else "non_string_output"] += 1
                     root = call_roots.get(call_id or "")
                     if root:
                         command_missing_count[root] += 1
+                    for event_index in read_indexes:
+                        reader = str(read_events[event_index]["reader"])
+                        read_events_missing_count_by_reader[reader] += 1
                     continue
                 output_with_count[function_name] += 1
                 output_token_sum[function_name] += count
@@ -535,6 +700,40 @@ def _analyze_indexed_source_logs(
                 if root:
                     command_with_count[root] += 1
                     command_token_sum[root] += count
+                if read_indexes:
+                    allocations = _allocate_token_count(count, len(read_indexes))
+                    paths: list[dict[str, str]] = []
+                    readers: Counter[str] = Counter()
+                    for event_index, allocated in zip(read_indexes, allocations, strict=True):
+                        event = read_events[event_index]
+                        reader = str(event["reader"])
+                        path_key = str(event["path_key"])
+                        read_events_with_count_by_reader[reader] += 1
+                        read_tokens_by_reader[reader] += allocated
+                        read_tokens_by_path[path_key] += allocated
+                        readers[reader] += 1
+                        paths.append(
+                            {
+                                "path_label": str(event["path_label"]),
+                                "path_hash": str(event["path_hash"]),
+                            }
+                        )
+                    largest_read_commands.append(
+                        {
+                            "root": root or "unknown_command",
+                            "read_event_count": len(read_indexes),
+                            "original_token_count": int(count),
+                            "readers": _simple_rows(readers, key_name="reader"),
+                            "paths": _unique_path_rows(paths),
+                        }
+                    )
+        for event_index in source_read_events:
+            event = read_events[event_index]
+            path_key = str(event["path_key"])
+            if any(order > int(event["order"]) for order in modified_orders_by_path.get(path_key, [])):
+                event["modified_later"] = True
+                read_modified_by_reader[str(event["reader"])] += 1
+                read_modified_by_path[path_key] += 1
 
     function_rows = _function_rows(
         function_calls=function_calls,
@@ -572,6 +771,53 @@ def _analyze_indexed_source_logs(
             },
             "commands": command_rows,
         },
+        "file_reads": {
+            "summary": {
+                "read_commands": read_command_count,
+                "read_events": len(read_events),
+                "unique_paths_read": len(read_path_refs),
+                "read_events_with_output_count": int(sum(read_events_with_count_by_reader.values())),
+                "read_events_missing_output_count": int(sum(read_events_missing_count_by_reader.values())),
+                "allocated_output_token_sum": int(sum(read_tokens_by_reader.values())),
+            },
+            "by_reader": _read_reader_rows(
+                read_events_by_reader=read_events_by_reader,
+                read_events_with_count_by_reader=read_events_with_count_by_reader,
+                read_events_missing_count_by_reader=read_events_missing_count_by_reader,
+                read_tokens_by_reader=read_tokens_by_reader,
+            ),
+            "top_paths": _read_path_rows(
+                read_path_refs=read_path_refs,
+                read_events_by_path=read_events_by_path,
+                read_tokens_by_path=read_tokens_by_path,
+            ),
+            "largest_read_commands": _largest_read_command_rows(largest_read_commands),
+            "path_privacy": _path_privacy_metadata(),
+        },
+        "read_productivity": {
+            "summary": {
+                "read_events": len(read_events),
+                "read_events_modified_later": int(sum(read_modified_by_reader.values())),
+                "read_events_modified_later_pct": _ratio(
+                    int(sum(read_modified_by_reader.values())),
+                    len(read_events),
+                ),
+                "unique_paths_read": len(read_path_refs),
+                "unique_paths_modified_later": len(read_modified_by_path),
+                "unique_path_modified_later_pct": _ratio(len(read_modified_by_path), len(read_path_refs)),
+                "correlation_note": READ_PRODUCTIVITY_NOTE,
+            },
+            "by_reader": _read_productivity_reader_rows(
+                read_events_by_reader=read_events_by_reader,
+                read_modified_by_reader=read_modified_by_reader,
+            ),
+            "top_modified_paths": _read_productivity_path_rows(
+                read_path_refs=read_path_refs,
+                read_events_by_path=read_events_by_path,
+                read_modified_by_path=read_modified_by_path,
+            ),
+            "path_privacy": _path_privacy_metadata(),
+        },
     }
 
 
@@ -647,6 +893,125 @@ def _command_rows(
     return sorted(rows, key=lambda row: (-int(row["total"]), row["root"]))
 
 
+def _read_reader_rows(
+    *,
+    read_events_by_reader: Counter[str],
+    read_events_with_count_by_reader: Counter[str],
+    read_events_missing_count_by_reader: Counter[str],
+    read_tokens_by_reader: Counter[str],
+) -> list[dict[str, Any]]:
+    rows = [
+        {
+            "reader": reader,
+            "read_events": int(read_events_by_reader[reader]),
+            "events_with_output_count": int(read_events_with_count_by_reader[reader]),
+            "events_missing_output_count": int(read_events_missing_count_by_reader[reader]),
+            "allocated_output_token_sum": int(read_tokens_by_reader[reader]),
+        }
+        for reader in set(read_events_by_reader) | set(read_tokens_by_reader)
+    ]
+    return sorted(
+        rows,
+        key=lambda row: (-int(row["allocated_output_token_sum"]), -int(row["read_events"]), row["reader"]),
+    )
+
+
+def _read_path_rows(
+    *,
+    read_path_refs: dict[str, dict[str, str]],
+    read_events_by_path: Counter[str],
+    read_tokens_by_path: Counter[str],
+) -> list[dict[str, Any]]:
+    rows = [
+        {
+            "path_label": read_path_refs[path_key]["path_label"],
+            "path_hash": read_path_refs[path_key]["path_hash"],
+            "read_events": int(read_events_by_path[path_key]),
+            "allocated_output_token_sum": int(read_tokens_by_path[path_key]),
+        }
+        for path_key in set(read_events_by_path) | set(read_tokens_by_path)
+        if path_key in read_path_refs
+    ]
+    return sorted(
+        rows,
+        key=lambda row: (
+            -int(row["allocated_output_token_sum"]),
+            -int(row["read_events"]),
+            row["path_label"],
+            row["path_hash"],
+        ),
+    )[:50]
+
+
+def _largest_read_command_rows(rows: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    return sorted(
+        rows,
+        key=lambda row: (
+            -int(row["original_token_count"]),
+            -int(row["read_event_count"]),
+            row["root"],
+        ),
+    )[:25]
+
+
+def _read_productivity_reader_rows(
+    *,
+    read_events_by_reader: Counter[str],
+    read_modified_by_reader: Counter[str],
+) -> list[dict[str, Any]]:
+    rows = [
+        {
+            "reader": reader,
+            "read_events": int(read_events_by_reader[reader]),
+            "read_events_modified_later": int(read_modified_by_reader[reader]),
+            "read_events_modified_later_pct": _ratio(
+                int(read_modified_by_reader[reader]),
+                int(read_events_by_reader[reader]),
+            ),
+        }
+        for reader in read_events_by_reader
+    ]
+    return sorted(
+        rows,
+        key=lambda row: (
+            -int(row["read_events_modified_later"]),
+            -int(row["read_events"]),
+            row["reader"],
+        ),
+    )
+
+
+def _read_productivity_path_rows(
+    *,
+    read_path_refs: dict[str, dict[str, str]],
+    read_events_by_path: Counter[str],
+    read_modified_by_path: Counter[str],
+) -> list[dict[str, Any]]:
+    rows = [
+        {
+            "path_label": read_path_refs[path_key]["path_label"],
+            "path_hash": read_path_refs[path_key]["path_hash"],
+            "read_events": int(read_events_by_path[path_key]),
+            "read_events_modified_later": int(read_modified_by_path[path_key]),
+            "read_events_modified_later_pct": _ratio(
+                int(read_modified_by_path[path_key]),
+                int(read_events_by_path[path_key]),
+            ),
+        }
+        for path_key in read_modified_by_path
+        if path_key in read_path_refs
+    ]
+    return sorted(
+        rows,
+        key=lambda row: (
+            -int(row["read_events_modified_later"]),
+            -int(row["read_events"]),
+            row["path_label"],
+            row["path_hash"],
+        ),
+    )[:50]
+
+
 def _simple_rows(
     counter: Counter[str],
     *,
@@ -658,6 +1023,227 @@ def _simple_rows(
     ]
 
 
+def _unique_path_rows(paths: list[dict[str, str]]) -> list[dict[str, str]]:
+    rows: list[dict[str, str]] = []
+    seen: set[str] = set()
+    for path in paths:
+        path_hash = path["path_hash"]
+        if path_hash in seen:
+            continue
+        seen.add(path_hash)
+        rows.append({"path_label": path["path_label"], "path_hash": path_hash})
+    return rows[:25]
+
+
+def _allocate_token_count(count: int, bucket_count: int) -> list[int]:
+    if bucket_count <= 0:
+        return []
+    base = count // bucket_count
+    remainder = count % bucket_count
+    return [base + (1 if index < remainder else 0) for index in range(bucket_count)]
+
+
+def _read_path_refs_from_command(command: str, *, root: str) -> list[dict[str, str]]:
+    if root not in READ_COMMAND_ROOTS:
+        return []
+    tokens = _strip_command_wrappers(_command_tokens(command))
+    if not tokens:
+        return []
+    path_tokens = _read_path_tokens(root=root, tokens=tokens)
+    refs: list[dict[str, str]] = []
+    seen: set[str] = set()
+    for token in path_tokens:
+        path_ref = _path_ref_from_token(token)
+        if path_ref is None or path_ref["path_key"] in seen:
+            continue
+        seen.add(path_ref["path_key"])
+        refs.append(path_ref)
+    return refs
+
+
+def _read_path_tokens(*, root: str, tokens: list[str]) -> list[str]:
+    args = tokens[1:]
+    if root == "find":
+        return _find_path_tokens(args)
+    if root == "rg":
+        return _ripgrep_path_tokens(args)
+    if root == "grep":
+        operands = _non_option_operands(args, root=root)
+        return operands[1:] if len(operands) > 1 else []
+    if root == "sed":
+        operands = _non_option_operands(args, root=root)
+        return operands[1:] if len(operands) > 1 else []
+    return _non_option_operands(args, root=root)
+
+
+def _find_path_tokens(args: list[str]) -> list[str]:
+    paths: list[str] = []
+    for token in args:
+        if _is_shell_separator(token):
+            break
+        if token == "--":
+            continue
+        if token.startswith("-") or token in {"!", "(", ")"}:
+            break
+        paths.append(token)
+    return paths or ["."]
+
+
+def _ripgrep_path_tokens(args: list[str]) -> list[str]:
+    operands = _non_option_operands(args, root="rg")
+    if any(token == "--files" or token.startswith("--files=") for token in args):
+        return operands or ["."]
+    return operands[1:] if len(operands) > 1 else []
+
+
+def _non_option_operands(args: list[str], *, root: str) -> list[str]:
+    option_args = _option_args_for_root(root)
+    operands: list[str] = []
+    skip_next = False
+    passthrough = False
+    for token in args:
+        if skip_next:
+            skip_next = False
+            continue
+        if _is_shell_separator(token):
+            break
+        if token in {">", ">>", "<", "2>", "2>>"}:
+            break
+        if passthrough:
+            operands.append(token)
+            continue
+        if token == "--":
+            passthrough = True
+            continue
+        if token.startswith("-"):
+            option_name = token.split("=", 1)[0]
+            if option_name in option_args and "=" not in token:
+                skip_next = True
+            continue
+        operands.append(token)
+    return operands
+
+
+def _option_args_for_root(root: str) -> set[str]:
+    return {
+        "grep": {
+            "-A",
+            "-B",
+            "-C",
+            "-e",
+            "-f",
+            "-m",
+            "--after-context",
+            "--before-context",
+            "--context",
+            "--file",
+            "--max-count",
+            "--regexp",
+        },
+        "head": {"-c", "-n", "--bytes", "--lines"},
+        "rg": {
+            "-A",
+            "-B",
+            "-C",
+            "-e",
+            "-f",
+            "-g",
+            "-m",
+            "-t",
+            "-T",
+            "--after-context",
+            "--before-context",
+            "--context",
+            "--file",
+            "--glob",
+            "--max-count",
+            "--max-depth",
+            "--type",
+            "--type-not",
+        },
+        "sed": {"-e", "-f", "--expression", "--file"},
+        "tail": {"-c", "-n", "--bytes", "--lines"},
+    }.get(root, set())
+
+
+def _read_reader(root: str) -> str:
+    if root in SEARCH_READ_ROOTS:
+        return f"search_path_scan:{root}"
+    return f"direct_file_read:{root}"
+
+
+def _modified_path_refs(payload: dict[str, Any]) -> list[dict[str, str]]:
+    if payload.get("type") != "patch_apply_end":
+        return []
+    paths: list[str] = []
+    for key in ("changed_paths", "paths", "files", "modified_paths"):
+        paths.extend(_path_values(payload.get(key)))
+    paths.extend(_path_values(payload.get("changes")))
+    refs: list[dict[str, str]] = []
+    seen: set[str] = set()
+    for path in paths:
+        path_ref = _path_ref_from_token(path)
+        if path_ref is None or path_ref["path_key"] in seen:
+            continue
+        seen.add(path_ref["path_key"])
+        refs.append(path_ref)
+    return refs
+
+
+def _path_values(value: object) -> list[str]:
+    if isinstance(value, str):
+        return [value]
+    if isinstance(value, list | tuple):
+        paths: list[str] = []
+        for item in value:
+            paths.extend(_path_values(item))
+        return paths
+    if isinstance(value, dict):
+        paths = []
+        for key in ("path", "file", "filename", "new_path", "old_path"):
+            paths.extend(_path_values(value.get(key)))
+        return paths
+    return []
+
+
+def _path_ref_from_token(token: str) -> dict[str, str] | None:
+    raw = token.strip()
+    if not raw or raw == "-" or _is_shell_separator(raw) or _looks_like_assignment(raw):
+        return None
+    if raw.startswith(("$", "`")) or "://" in raw:
+        return None
+    label = _safe_path_label(raw)
+    if label is None:
+        return None
+    path_hash = hashlib.sha256(raw.encode("utf-8")).hexdigest()[:12]
+    return {"path_key": path_hash, "path_label": label, "path_hash": path_hash}
+
+
+def _safe_path_label(token: str) -> str | None:
+    normalized = token.rstrip("/")
+    label = normalized if normalized in {".", ".."} else Path(normalized).name
+    if not label:
+        return None
+    lowered = label.lower()
+    if lowered.startswith(SENSITIVE_LABEL_PREFIXES):
+        return "path"
+    return label if SAFE_PATH_LABEL_RE.fullmatch(label) else "path"
+
+
+def _is_shell_separator(token: str) -> bool:
+    return token in {"&&", "||", ";", "|"}
+
+
+def _path_privacy_metadata() -> dict[str, str]:
+    return {
+        "label_policy": "basename_only",
+        "hash_policy": "sha256_12",
+        "normal": "basename_only_with_hash",
+        "redacted": "basename_only_with_hash",
+        "strict": "hash_available_for_hiding_labels",
+    }
+
+
 def _shell_command_from_payload(payload: dict[str, Any], *, function_name: str) -> str | None:
     if not _is_shell_tool(function_name):
         return None
@@ -804,6 +1390,10 @@ def _int_value(value: object) -> int:
     return 0
 
 
+def _ratio(numerator: int, denominator: int) -> float:
+    return numerator / denominator if denominator else 0.0
+
+
 def _int_text(value: object) -> str:
     return f"{_int_value(value):,}"
 
diff --git a/src/codex_usage_tracker/json_contracts.py b/src/codex_usage_tracker/json_contracts.py
index 48c46f0..b6581e0 100644
--- a/src/codex_usage_tracker/json_contracts.py
+++ b/src/codex_usage_tracker/json_contracts.py
@@ -195,6 +195,35 @@
             "notes": list,
         }
     },
+    "codex-usage-tracker-diagnostic-file-reads-v1": {
+        "required": {
+            "section": str,
+            "status": str,
+            "refreshed": bool,
+            "raw_context_included": bool,
+            "snapshot": (dict, NoneType),
+            "summary": (dict, NoneType),
+            "by_reader": list,
+            "top_paths": list,
+            "largest_read_commands": list,
+            "path_privacy": dict,
+            "notes": list,
+        }
+    },
+    "codex-usage-tracker-diagnostic-read-productivity-v1": {
+        "required": {
+            "section": str,
+            "status": str,
+            "refreshed": bool,
+            "raw_context_included": bool,
+            "snapshot": (dict, NoneType),
+            "summary": (dict, NoneType),
+            "by_reader": list,
+            "top_modified_paths": list,
+            "path_privacy": dict,
+            "notes": list,
+        }
+    },
     "codex-usage-tracker-session-v1": {
         "required": {
             "requested_session_id": (str, NoneType),
diff --git a/src/codex_usage_tracker/server.py b/src/codex_usage_tracker/server.py
index ed50661..f3ec522 100644
--- a/src/codex_usage_tracker/server.py
+++ b/src/codex_usage_tracker/server.py
@@ -37,7 +37,9 @@
 )
 from codex_usage_tracker.diagnostic_snapshots import (
     build_diagnostic_commands_report,
+    build_diagnostic_file_reads_report,
     build_diagnostic_overview_report,
+    build_diagnostic_read_productivity_report,
     build_diagnostic_tool_output_report,
 )
 from codex_usage_tracker.i18n import normalize_language
@@ -318,6 +320,12 @@ def do_GET(self) -> None:  # noqa: N802 - stdlib hook name
         if parsed.path == "/api/diagnostics/commands":
             self._handle_diagnostics_commands(parsed.query)
             return
+        if parsed.path == "/api/diagnostics/file-reads":
+            self._handle_diagnostics_file_reads(parsed.query)
+            return
+        if parsed.path == "/api/diagnostics/read-productivity":
+            self._handle_diagnostics_read_productivity(parsed.query)
+            return
         if parsed.path == "/api/usage":
             self._handle_usage(parsed.query)
             return
@@ -343,6 +351,12 @@ def do_POST(self) -> None:  # noqa: N802 - stdlib hook name
         if parsed.path == "/api/diagnostics/commands/refresh":
             self._handle_diagnostics_commands_refresh(parsed.query)
             return
+        if parsed.path == "/api/diagnostics/file-reads/refresh":
+            self._handle_diagnostics_file_reads_refresh(parsed.query)
+            return
+        if parsed.path == "/api/diagnostics/read-productivity/refresh":
+            self._handle_diagnostics_read_productivity_refresh(parsed.query)
+            return
         self._send_json(HTTPStatus.NOT_FOUND, {"error": "Unknown API endpoint"})
 
     def end_headers(self) -> None:
@@ -1021,6 +1035,38 @@ def _handle_diagnostics_commands_refresh(self, query: str) -> None:
             label="diagnostic commands",
         )
 
+    def _handle_diagnostics_file_reads(self, query: str) -> None:
+        self._handle_diagnostic_snapshot(
+            query,
+            build_report=build_diagnostic_file_reads_report,
+            refresh=False,
+            label="diagnostic file reads",
+        )
+
+    def _handle_diagnostics_file_reads_refresh(self, query: str) -> None:
+        self._handle_diagnostic_snapshot(
+            query,
+            build_report=build_diagnostic_file_reads_report,
+            refresh=True,
+            label="diagnostic file reads",
+        )
+
+    def _handle_diagnostics_read_productivity(self, query: str) -> None:
+        self._handle_diagnostic_snapshot(
+            query,
+            build_report=build_diagnostic_read_productivity_report,
+            refresh=False,
+            label="diagnostic read productivity",
+        )
+
+    def _handle_diagnostics_read_productivity_refresh(self, query: str) -> None:
+        self._handle_diagnostic_snapshot(
+            query,
+            build_report=build_diagnostic_read_productivity_report,
+            refresh=True,
+            label="diagnostic read productivity",
+        )
+
     def _handle_diagnostic_snapshot(
         self,
         query: str,
diff --git a/tests/test_cli_lifecycle.py b/tests/test_cli_lifecycle.py
index 3ace0ba..63164ea 100644
--- a/tests/test_cli_lifecycle.py
+++ b/tests/test_cli_lifecycle.py
@@ -418,6 +418,24 @@ def test_diagnostics_cli_returns_aggregate_json(tmp_path: Path) -> None:
         "--refresh",
         "--json",
     )
+    file_reads_refresh = _run_cli(
+        tmp_path,
+        "--db",
+        str(db_path),
+        "diagnostics",
+        "file-reads",
+        "--refresh",
+        "--json",
+    )
+    read_productivity_refresh = _run_cli(
+        tmp_path,
+        "--db",
+        str(db_path),
+        "diagnostics",
+        "read-productivity",
+        "--refresh",
+        "--json",
+    )
     fact_calls = _run_cli(
         tmp_path,
         "--db",
@@ -442,6 +460,8 @@ def test_diagnostics_cli_returns_aggregate_json(tmp_path: Path) -> None:
     overview_refresh_payload = json.loads(overview_refresh.stdout)
     tool_output_refresh_payload = json.loads(tool_output_refresh.stdout)
     commands_refresh_payload = json.loads(commands_refresh.stdout)
+    file_reads_refresh_payload = json.loads(file_reads_refresh.stdout)
+    read_productivity_refresh_payload = json.loads(read_productivity_refresh.stdout)
     fact_calls_payload = json.loads(fact_calls.stdout)
     for payload in (
         summary_payload,
@@ -452,6 +472,8 @@ def test_diagnostics_cli_returns_aggregate_json(tmp_path: Path) -> None:
         overview_refresh_payload,
         tool_output_refresh_payload,
         commands_refresh_payload,
+        file_reads_refresh_payload,
+        read_productivity_refresh_payload,
         fact_calls_payload,
     ):
         _assert_contract(payload)
@@ -499,6 +521,16 @@ def test_diagnostics_cli_returns_aggregate_json(tmp_path: Path) -> None:
         "child": "status",
         "count": 1,
     }
+    assert (
+        file_reads_refresh_payload["schema"]
+        == "codex-usage-tracker-diagnostic-file-reads-v1"
+    )
+    assert file_reads_refresh_payload["summary"]["read_events"] == 0
+    assert (
+        read_productivity_refresh_payload["schema"]
+        == "codex-usage-tracker-diagnostic-read-productivity-v1"
+    )
+    assert read_productivity_refresh_payload["summary"]["read_events_modified_later"] == 0
     assert fact_calls_payload["view"] == "fact-calls"
     assert fact_calls_payload["filters"]["privacy_mode"] == "strict"
     assert fact_calls_payload["rows"][0]["cwd"].startswith("[redacted cwd:")
diff --git a/tests/test_dashboard_server.py b/tests/test_dashboard_server.py
index 7da7815..7c924b4 100644
--- a/tests/test_dashboard_server.py
+++ b/tests/test_dashboard_server.py
@@ -105,6 +105,18 @@ def test_dashboard_server_usage_api_refreshes_aggregate_rows(tmp_path: Path) ->
             data=b"",
             method="POST",
         )
+        diagnostic_file_reads_refresh_payload = _read_json(
+            f"http://127.0.0.1:{server.server_port}/api/diagnostics/file-reads/refresh",
+            headers={"X-Codex-Usage-Token": "test-token"},
+            data=b"",
+            method="POST",
+        )
+        diagnostic_read_productivity_refresh_payload = _read_json(
+            f"http://127.0.0.1:{server.server_port}/api/diagnostics/read-productivity/refresh",
+            headers={"X-Codex-Usage-Token": "test-token"},
+            data=b"",
+            method="POST",
+        )
         diagnostic_stored_payload = _read_json(
             f"http://127.0.0.1:{server.server_port}/api/diagnostics/overview"
         )
@@ -114,6 +126,12 @@ def test_dashboard_server_usage_api_refreshes_aggregate_rows(tmp_path: Path) ->
         diagnostic_commands_stored_payload = _read_json(
             f"http://127.0.0.1:{server.server_port}/api/diagnostics/commands"
         )
+        diagnostic_file_reads_stored_payload = _read_json(
+            f"http://127.0.0.1:{server.server_port}/api/diagnostics/file-reads"
+        )
+        diagnostic_read_productivity_stored_payload = _read_json(
+            f"http://127.0.0.1:{server.server_port}/api/diagnostics/read-productivity"
+        )
         diagnostic_computed_at = diagnostic_stored_payload["snapshot"]["computed_at"]
         with urllib.request.urlopen(  # noqa: S310 - local test server only
             urllib.request.Request(
@@ -182,12 +200,28 @@ def test_dashboard_server_usage_api_refreshes_aggregate_rows(tmp_path: Path) ->
     )
     assert diagnostic_commands_refresh_payload["status"] == "ready"
     assert diagnostic_commands_refresh_payload["summary"]["shell_function_calls"] == 0
+    assert (
+        diagnostic_file_reads_refresh_payload["schema"]
+        == "codex-usage-tracker-diagnostic-file-reads-v1"
+    )
+    assert diagnostic_file_reads_refresh_payload["status"] == "ready"
+    assert diagnostic_file_reads_refresh_payload["summary"]["read_events"] == 0
+    assert (
+        diagnostic_read_productivity_refresh_payload["schema"]
+        == "codex-usage-tracker-diagnostic-read-productivity-v1"
+    )
+    assert diagnostic_read_productivity_refresh_payload["status"] == "ready"
+    assert diagnostic_read_productivity_refresh_payload["summary"]["read_events"] == 0
     assert diagnostic_stored_payload["status"] == "ready"
     assert diagnostic_stored_payload["refreshed"] is False
     assert diagnostic_tool_output_stored_payload["status"] == "ready"
     assert diagnostic_tool_output_stored_payload["refreshed"] is False
     assert diagnostic_commands_stored_payload["status"] == "ready"
     assert diagnostic_commands_stored_payload["refreshed"] is False
+    assert diagnostic_file_reads_stored_payload["status"] == "ready"
+    assert diagnostic_file_reads_stored_payload["refreshed"] is False
+    assert diagnostic_read_productivity_stored_payload["status"] == "ready"
+    assert diagnostic_read_productivity_stored_payload["refreshed"] is False
     assert second_usage_refresh_payload["refresh_result"]["parsed_events"] == 0
     assert diagnostic_after_second_usage_refresh["snapshot"]["computed_at"] == diagnostic_computed_at
     assert len(limited_payload["rows"]) == 2
diff --git a/tests/test_diagnostic_snapshots.py b/tests/test_diagnostic_snapshots.py
index b8a668a..a3cc31e 100644
--- a/tests/test_diagnostic_snapshots.py
+++ b/tests/test_diagnostic_snapshots.py
@@ -15,7 +15,9 @@
 from codex_usage_tracker.diagnostic_snapshots import (
     DIAGNOSTIC_OVERVIEW_SECTION,
     build_diagnostic_commands_report,
+    build_diagnostic_file_reads_report,
     build_diagnostic_overview_report,
+    build_diagnostic_read_productivity_report,
     build_diagnostic_tool_output_report,
 )
 from codex_usage_tracker.store import (
@@ -221,6 +223,136 @@ def test_tool_output_and_command_snapshots_use_safe_aggregate_labels(
     assert "/tmp/private-diagnostics" not in serialized
 
 
+def test_file_read_snapshots_allocate_tokens_and_correlate_later_modifications(
+    tmp_path: Path,
+) -> None:
+    codex_home = tmp_path / ".codex"
+    log_path = (
+        codex_home
+        / "sessions"
+        / "2026"
+        / "05"
+        / "17"
+        / f"rollout-2026-05-17T14-58-23-{SESSION_ID}.jsonl"
+    )
+    _write_jsonl(
+        log_path,
+        [
+            _entry("session_meta", {"id": SESSION_ID}),
+            _entry(
+                "turn_context",
+                {
+                    "turn_id": "turn-a",
+                    "model": "gpt-5.5",
+                    "cwd": "/tmp/private-diagnostics",
+                },
+            ),
+            _function_call("call-cat", "cat src/app.py /tmp/private/readme.md"),
+            _function_output("call-cat", _terminal_output(90)),
+            _function_call("call-sed", "sed -n '1,120p' src/app.py"),
+            _function_output("call-sed", _terminal_output(30)),
+            _function_call("call-nl", "nl -ba src/app.py"),
+            _function_output("call-nl", _terminal_output(10)),
+            _function_call("call-rg", "rg -n SECRET_PATTERN src tests"),
+            _function_output("call-rg", _terminal_output(80)),
+            _function_call("call-find", "find src -name '*.py'"),
+            _function_output("call-find", _terminal_output(20)),
+            _function_call("call-missing", "cat docs/notes.md"),
+            _function_output("call-missing", "plain read output SECRET_OUTPUT"),
+            _entry(
+                "event_msg",
+                {
+                    "type": "patch_apply_end",
+                    "changed_paths": ["src/app.py", "/tmp/private/readme.md"],
+                    "patch": "SECRET PATCH TEXT",
+                },
+            ),
+            _token_event(100, 100),
+        ],
+    )
+    db_path = tmp_path / "usage.sqlite3"
+    refresh_usage_index(codex_home=codex_home, db_path=db_path)
+
+    missing = build_diagnostic_file_reads_report(db_path=db_path).payload
+    file_reads = build_diagnostic_file_reads_report(db_path=db_path, refresh=True).payload
+    read_productivity = build_diagnostic_read_productivity_report(
+        db_path=db_path,
+        refresh=True,
+    ).payload
+
+    _assert_contract(missing)
+    _assert_contract(file_reads)
+    _assert_contract(read_productivity)
+    assert missing["status"] == "missing"
+    assert file_reads["status"] == "ready"
+    assert file_reads["summary"]["read_commands"] == 6
+    assert file_reads["summary"]["read_events"] == 8
+    assert file_reads["summary"]["unique_paths_read"] == 5
+    assert file_reads["summary"]["read_events_with_output_count"] == 7
+    assert file_reads["summary"]["read_events_missing_output_count"] == 1
+    assert file_reads["summary"]["allocated_output_token_sum"] == 230
+
+    by_reader = {row["reader"]: row for row in file_reads["by_reader"]}
+    assert by_reader["direct_file_read:cat"]["read_events"] == 3
+    assert by_reader["direct_file_read:cat"]["events_missing_output_count"] == 1
+    assert by_reader["direct_file_read:cat"]["allocated_output_token_sum"] == 90
+    assert by_reader["search_path_scan:rg"]["allocated_output_token_sum"] == 80
+    assert by_reader["search_path_scan:find"]["allocated_output_token_sum"] == 20
+
+    paths = {row["path_label"]: row for row in file_reads["top_paths"]}
+    assert paths["app.py"]["read_events"] == 3
+    assert paths["app.py"]["allocated_output_token_sum"] == 85
+    assert paths["readme.md"]["allocated_output_token_sum"] == 45
+    assert paths["src"]["allocated_output_token_sum"] == 60
+    assert paths["tests"]["allocated_output_token_sum"] == 40
+
+    assert file_reads["largest_read_commands"][0]["root"] == "cat"
+    assert file_reads["largest_read_commands"][0]["original_token_count"] == 90
+
+    assert read_productivity["summary"]["read_events"] == 8
+    assert read_productivity["summary"]["read_events_modified_later"] == 4
+    assert read_productivity["summary"]["read_events_modified_later_pct"] == 0.5
+    assert read_productivity["summary"]["unique_paths_modified_later"] == 2
+    productivity_by_reader = {row["reader"]: row for row in read_productivity["by_reader"]}
+    assert productivity_by_reader["direct_file_read:cat"]["read_events_modified_later"] == 2
+    assert productivity_by_reader["direct_file_read:sed"]["read_events_modified_later"] == 1
+    assert productivity_by_reader["direct_file_read:nl"]["read_events_modified_later"] == 1
+    modified_paths = {row["path_label"]: row for row in read_productivity["top_modified_paths"]}
+    assert modified_paths["app.py"]["read_events_modified_later"] == 3
+    assert modified_paths["readme.md"]["read_events_modified_later"] == 1
+    assert "temporal correlations" in read_productivity["summary"]["correlation_note"]
+
+    serialized = json.dumps([file_reads, read_productivity], sort_keys=True)
+    assert "SECRET" not in serialized
+    assert "src/app.py" not in serialized
+    assert "/tmp/private" not in serialized
+    assert "1,120p" not in serialized
+    assert "SECRET PATCH TEXT" not in serialized
+
+
+def _function_call(call_id: str, command: str) -> dict[str, object]:
+    return _entry(
+        "response_item",
+        {
+            "type": "function_call",
+            "call_id": call_id,
+            "name": "exec_command",
+            "arguments": json.dumps({"cmd": command}),
+        },
+    )
+
+
+def _function_output(call_id: str, output: str) -> dict[str, object]:
+    return _entry(
+        "response_item",
+        {
+            "type": "function_call_output",
+            "call_id": call_id,
+            "output": output,
+        },
+    )
+
+
 def _terminal_output(count: int) -> str:
     return (
         "Chunk ID: abc123\n"

From 49ba0471c39a193dd68c798c011b5c88821084d6 Mon Sep 17 00:00:00 2001
From: Monsky <douglas.monsky@gmail.com>
Date: Sat, 20 Jun 2026 19:03:55 -0400
Subject: [PATCH 04/10] feat: add diagnostic concentration reports

---
 docs/cli-json-schemas.md                      |  43 +++
 docs/cli-reference.md                         |   3 +-
 docs/dashboard-guide.md                       |   3 +-
 docs/privacy.md                               |   2 +-
 src/codex_usage_tracker/cli.py                |   7 +
 src/codex_usage_tracker/cli_parser.py         |  12 +
 .../diagnostic_snapshots.py                   | 354 +++++++++++++++++-
 src/codex_usage_tracker/json_contracts.py     |  15 +
 src/codex_usage_tracker/server.py             |  23 ++
 tests/test_cli_lifecycle.py                   |  17 +
 tests/test_dashboard_server.py                |  17 +
 tests/test_diagnostic_snapshots.py            | 109 ++++++
 12 files changed, 601 insertions(+), 4 deletions(-)

diff --git a/docs/cli-json-schemas.md b/docs/cli-json-schemas.md
index fb9a92e..61219d7 100644
--- a/docs/cli-json-schemas.md
+++ b/docs/cli-json-schemas.md
@@ -52,6 +52,7 @@ Tracked schema ids:
 | `codex-usage-tracker-diagnostic-commands-v1` | CLI `diagnostics commands --json`, dashboard server `/api/diagnostics/commands` |
 | `codex-usage-tracker-diagnostic-file-reads-v1` | CLI `diagnostics file-reads --json`, dashboard server `/api/diagnostics/file-reads` |
 | `codex-usage-tracker-diagnostic-read-productivity-v1` | CLI `diagnostics read-productivity --json`, dashboard server `/api/diagnostics/read-productivity` |
+| `codex-usage-tracker-diagnostic-concentration-v1` | CLI `diagnostics concentration --json`, dashboard server `/api/diagnostics/concentration` |
 | `codex-usage-tracker-session-v1` | CLI `session --json`, MCP `session_usage(response_format="json")` |
 | `codex-usage-tracker-context-v1` | CLI `context`, MCP `usage_call_context` when raw context is explicitly enabled |
 | `codex-usage-tracker-context-disabled-v1` | MCP `usage_call_context` when raw context is disabled |
@@ -495,6 +496,48 @@ Schema: `codex-usage-tracker-diagnostic-read-productivity-v1`
 
 Read productivity is a temporal correlation, not causation. A read is counted as modified later only when the same privacy-preserving path key appears in a later structured patch event in the same source log.
 
+## Diagnostic Concentration Snapshot
+
+Commands:
+
+```bash
+codex-usage-tracker diagnostics concentration --json
+codex-usage-tracker diagnostics concentration --refresh --json
+```
+
+Dashboard server API:
+
+- `GET /api/diagnostics/concentration`
+- `POST /api/diagnostics/concentration/refresh`
+
+Schema: `codex-usage-tracker-diagnostic-concentration-v1`
+
+```json
+{
+  "schema": "codex-usage-tracker-diagnostic-concentration-v1",
+  "section": "concentration",
+  "status": "ready",
+  "refreshed": false,
+  "raw_context_included": false,
+  "snapshot": {},
+  "summary": {
+    "usage_rows": 4,
+    "total_tokens": 100,
+    "dimension_count": 3,
+    "history_scope": "active"
+  },
+  "metrics": [
+    {"metric": "top_1_source_log_share", "dimension": "source_log", "top_n": 1, "share": 0.5}
+  ],
+  "dimensions": [],
+  "largest_impact_rows": [],
+  "privacy": {},
+  "notes": []
+}
+```
+
+The concentration snapshot computes top-1/top-3/top-5 share and effective group count by source log/session, cwd/project label, and day. Source log labels use session-id prefixes or source hashes, cwd labels use basename-only labels, and raw source paths/cwd paths are not included.
+
 ## Pricing Coverage
 
 Command:
diff --git a/docs/cli-reference.md b/docs/cli-reference.md
index 58d2c62..bc69420 100644
--- a/docs/cli-reference.md
+++ b/docs/cli-reference.md
@@ -122,12 +122,13 @@ codex-usage-tracker diagnostics compactions
 codex-usage-tracker diagnostics tools
 codex-usage-tracker diagnostics file-reads --refresh
 codex-usage-tracker diagnostics read-productivity --refresh
+codex-usage-tracker diagnostics concentration --refresh
 codex-usage-tracker diagnostics fact-calls --fact-type compaction --fact-name post_compaction
 ```
 
 Diagnostics expose structured event patterns and their associated token totals. They can show compactions, tool/function/MCP activity, safe command families, structured skill labels, patch outcomes, task completion, search/read loops, and aborted or rolled-back turns. Associated totals are not causal allocations and are not additive when one model call has multiple diagnostic facts.
 
-Diagnostic payloads are aggregate-only. They do not include prompts, assistant text, tool arguments, tool output, patch text, raw commands, command arguments, file contents, raw absolute paths, or JSONL fragments. File-read diagnostics use basename-only path labels plus short irreversible hashes, and read-productivity percentages are temporal correlations rather than proof that a read caused a later edit.
+Diagnostic payloads are aggregate-only. They do not include prompts, assistant text, tool arguments, tool output, patch text, raw commands, command arguments, file contents, raw absolute paths, or JSONL fragments. File-read diagnostics use basename-only path labels plus short irreversible hashes, read-productivity percentages are temporal correlations rather than proof that a read caused a later edit, and concentration reports use safe source/session, cwd, and day labels only.
 
 ## JSON Queries
 
diff --git a/docs/dashboard-guide.md b/docs/dashboard-guide.md
index 0ec462c..70cb0ad 100644
--- a/docs/dashboard-guide.md
+++ b/docs/dashboard-guide.md
@@ -133,8 +133,9 @@ Use `Diagnostics` view when you want to see what structured event patterns are h
 - The tab consumes the localhost `/api/diagnostics/*` endpoints; static file dashboards show a live-API unavailable state.
 - The first table shows top diagnostic facts by associated uncached input tokens. Tool/function/MCP/command-family and compaction sections expose narrower slices of the same fact data.
 - Command diagnostics store only a command family such as `pytest`, `git`, or `unknown_command`. Skill and MCP labels are detected only when they are present as structured event metadata.
-- Newer on-demand diagnostic snapshot endpoints are section-specific (`overview`, `tool-output`, `commands`, `file-reads`, and `read-productivity`). Heavy recomputation happens only through each section's explicit refresh endpoint.
+- Newer on-demand diagnostic snapshot endpoints are section-specific (`overview`, `tool-output`, `commands`, `file-reads`, `read-productivity`, and `concentration`). Heavy recomputation happens only through each section's explicit refresh endpoint.
 - File-read snapshots use basename-only path labels and short hashes. Read-productivity rates are temporal correlations between earlier reads and later structured patch events, not causation.
+- Concentration snapshots show top-N share and effective group count by source log/session, cwd/project label, and day without exposing raw source-log or cwd paths.
 - Click `Calls` on a fact row to load associated model calls. Call links and largest-call links open the Call Investigator, where raw context remains explicit and on demand.
 - Associated token totals are not causal allocations and are not additive when one call has multiple diagnostic facts.
 
diff --git a/docs/privacy.md b/docs/privacy.md
index 80a9c0a..748c4c6 100644
--- a/docs/privacy.md
+++ b/docs/privacy.md
@@ -35,7 +35,7 @@ Call-origin metadata is heuristic and confidence-labeled. It stores categories s
 
 Diagnostic facts follow the same aggregate-only rule. They can store safe structured labels such as `patch_applied`, `function_call_output`, `post_compaction`, MCP tool/server labels, structured skill labels, and command families such as `pytest`, `git`, or `unknown_command`, along with event counts and source line ranges. Command text may be classified in memory during parsing, but it is not persisted. Diagnostic facts do not store tool arguments, command text, command output, patch text, prompt or assistant text, file contents, raw JSONL fragments, or raw context evidence.
 
-On-demand diagnostic snapshots follow the same boundary. File-read snapshots classify common read commands and path scans, but persist only counters, reader families, basename-only path labels, and short irreversible path hashes. They do not persist raw absolute paths, raw command strings, command arguments, file contents, tool output, or patch text. Read-productivity snapshots report only temporal read-to-modify correlations for matching path keys in the same source log; they do not claim causation.
+On-demand diagnostic snapshots follow the same boundary. File-read snapshots classify common read commands and path scans, but persist only counters, reader families, basename-only path labels, and short irreversible path hashes. They do not persist raw absolute paths, raw command strings, command arguments, file contents, tool output, or patch text. Read-productivity snapshots report only temporal read-to-modify correlations for matching path keys in the same source log; they do not claim causation. Concentration snapshots group by safe source/session, cwd, and day labels and do not expose raw source-log or cwd paths.
 
 ## On-Demand Context
 
diff --git a/src/codex_usage_tracker/cli.py b/src/codex_usage_tracker/cli.py
index 85b9776..a0d06f1 100644
--- a/src/codex_usage_tracker/cli.py
+++ b/src/codex_usage_tracker/cli.py
@@ -31,6 +31,7 @@
 )
 from codex_usage_tracker.diagnostic_snapshots import (
     build_diagnostic_commands_report,
+    build_diagnostic_concentration_report,
     build_diagnostic_file_reads_report,
     build_diagnostic_overview_report,
     build_diagnostic_read_productivity_report,
@@ -486,6 +487,12 @@ def _run_diagnostics(args: argparse.Namespace) -> int:
             include_archived=args.include_archived,
             refresh=args.refresh,
         )
+    elif command == "concentration":
+        report = build_diagnostic_concentration_report(
+            db_path=args.db,
+            include_archived=args.include_archived,
+            refresh=args.refresh,
+        )
     else:
         raise ValueError(f"unknown diagnostics command: {command}")
 
diff --git a/src/codex_usage_tracker/cli_parser.py b/src/codex_usage_tracker/cli_parser.py
index 9c41051..1721dd0 100644
--- a/src/codex_usage_tracker/cli_parser.py
+++ b/src/codex_usage_tracker/cli_parser.py
@@ -387,6 +387,18 @@ def _add_diagnostics_parser(
     )
     read_productivity.add_argument("--json", action="store_true", dest="as_json")
 
+    concentration = diagnostic_subparsers.add_parser(
+        "concentration",
+        help="Show concentration of token impact by source log, cwd, and day",
+    )
+    concentration.add_argument("--include-archived", action="store_true")
+    concentration.add_argument(
+        "--refresh",
+        action="store_true",
+        help="Recompute and persist the concentration snapshot before reading it.",
+    )
+    concentration.add_argument("--json", action="store_true", dest="as_json")
+
     fact_calls = diagnostic_subparsers.add_parser(
         "fact-calls",
         help="List calls associated with one diagnostic fact",
diff --git a/src/codex_usage_tracker/diagnostic_snapshots.py b/src/codex_usage_tracker/diagnostic_snapshots.py
index 81c7402..3af5fd1 100644
--- a/src/codex_usage_tracker/diagnostic_snapshots.py
+++ b/src/codex_usage_tracker/diagnostic_snapshots.py
@@ -25,11 +25,13 @@
 DIAGNOSTIC_COMMANDS_SCHEMA = "codex-usage-tracker-diagnostic-commands-v1"
 DIAGNOSTIC_FILE_READS_SCHEMA = "codex-usage-tracker-diagnostic-file-reads-v1"
 DIAGNOSTIC_READ_PRODUCTIVITY_SCHEMA = "codex-usage-tracker-diagnostic-read-productivity-v1"
+DIAGNOSTIC_CONCENTRATION_SCHEMA = "codex-usage-tracker-diagnostic-concentration-v1"
 DIAGNOSTIC_OVERVIEW_SECTION = "overview"
 DIAGNOSTIC_TOOL_OUTPUT_SECTION = "tool-output"
 DIAGNOSTIC_COMMANDS_SECTION = "commands"
 DIAGNOSTIC_FILE_READS_SECTION = "file-reads"
 DIAGNOSTIC_READ_PRODUCTIVITY_SECTION = "read-productivity"
+DIAGNOSTIC_CONCENTRATION_SECTION = "concentration"
 DIAGNOSTIC_HISTORY_ACTIVE = "active"
 DIAGNOSTIC_HISTORY_ALL = "all"
 DIAGNOSTIC_SNAPSHOT_NOTES = [
@@ -82,6 +84,8 @@ def render(self) -> str:
             return self._render_file_reads()
         if section == DIAGNOSTIC_READ_PRODUCTIVITY_SECTION:
             return self._render_read_productivity()
+        if section == DIAGNOSTIC_CONCENTRATION_SECTION:
+            return self._render_concentration()
         return self._render_overview()
 
     def _render_overview(self) -> str:
@@ -159,6 +163,20 @@ def _render_read_productivity(self) -> str:
             ]
         )
 
+    def _render_concentration(self) -> str:
+        snapshot = self.payload.get("snapshot") or {}
+        summary = self.payload.get("summary") or {}
+        return "\n".join(
+            [
+                "Diagnostic concentration snapshot",
+                f"Computed: {snapshot.get('computed_at')}",
+                f"History scope: {snapshot.get('history_scope')}",
+                f"Usage rows: {_int_text(summary.get('usage_rows'))}",
+                f"Total tokens: {_int_text(summary.get('total_tokens'))}",
+                f"Dimensions: {_int_text(summary.get('dimension_count'))}",
+            ]
+        )
+
 
 def build_diagnostic_overview_report(
     *,
@@ -251,6 +269,31 @@ def build_diagnostic_read_productivity_report(
     )
 
 
+def build_diagnostic_concentration_report(
+    *,
+    db_path: Path = DEFAULT_DB_PATH,
+    include_archived: bool = False,
+    refresh: bool = False,
+) -> DiagnosticSnapshotReport:
+    """Return the latest concentration snapshot, optionally recomputing it first."""
+
+    if refresh:
+        return DiagnosticSnapshotReport(
+            _refresh_concentration_snapshot(
+                db_path=db_path,
+                include_archived=include_archived,
+            )
+        )
+    return DiagnosticSnapshotReport(
+        _source_log_snapshot_payload(
+            db_path=db_path,
+            include_archived=include_archived,
+            section=DIAGNOSTIC_CONCENTRATION_SECTION,
+            schema=DIAGNOSTIC_CONCENTRATION_SCHEMA,
+        )
+    )
+
+
 def refresh_diagnostic_overview_snapshot(
     *,
     db_path: Path = DEFAULT_DB_PATH,
@@ -391,6 +434,44 @@ def _refresh_source_log_snapshot(
     return payload
 
 
+def _refresh_concentration_snapshot(
+    *,
+    db_path: Path,
+    include_archived: bool,
+) -> dict[str, Any]:
+    history_scope = _history_scope(include_archived)
+    computed_at = _utc_now()
+    analysis = _compute_concentration(db_path=db_path, include_archived=include_archived)
+    snapshot = _snapshot_metadata(
+        computed_at=computed_at,
+        history_scope=history_scope,
+        source_logs_scanned=analysis["meta"]["source_logs_scanned"],
+        usage_rows_scanned=analysis["summary"]["usage_rows"],
+    )
+    payload = _ready_payload(
+        schema=DIAGNOSTIC_CONCENTRATION_SCHEMA,
+        section=DIAGNOSTIC_CONCENTRATION_SECTION,
+        snapshot=snapshot,
+        refreshed=True,
+        summary=analysis["summary"],
+        metrics=analysis["metrics"],
+        dimensions=analysis["dimensions"],
+        largest_impact_rows=analysis["largest_impact_rows"],
+        privacy=analysis["privacy"],
+    )
+    upsert_diagnostic_snapshot(
+        db_path=db_path,
+        section=DIAGNOSTIC_CONCENTRATION_SECTION,
+        history_scope=history_scope,
+        payload=payload,
+        computed_at=computed_at,
+        source_logs_scanned=analysis["meta"]["source_logs_scanned"],
+        usage_rows_scanned=analysis["summary"]["usage_rows"],
+        raw_content_included=False,
+    )
+    return payload
+
+
 def diagnostic_overview_payload(
     *,
     db_path: Path = DEFAULT_DB_PATH,
@@ -567,6 +648,12 @@ def _missing_payload(
         payload["by_reader"] = []
         payload["top_modified_paths"] = []
         payload["path_privacy"] = _path_privacy_metadata()
+    elif section == DIAGNOSTIC_CONCENTRATION_SECTION:
+        payload["summary"] = None
+        payload["metrics"] = []
+        payload["dimensions"] = []
+        payload["largest_impact_rows"] = []
+        payload["privacy"] = _concentration_privacy_metadata()
     return payload
 
 
@@ -821,6 +908,99 @@ def _analyze_indexed_source_logs(
     }
 
 
+def _compute_concentration(
+    *,
+    db_path: Path,
+    include_archived: bool,
+) -> dict[str, Any]:
+    where = "" if include_archived else "WHERE is_archived = 0"
+    with connect(db_path) as conn:
+        init_db(conn)
+        rows = conn.execute(
+            f"""
+            SELECT
+                record_id,
+                session_id,
+                event_timestamp,
+                source_file,
+                cwd,
+                total_tokens
+            FROM usage_events
+            {where}
+            ORDER BY event_timestamp, record_id
+            """
+        ).fetchall()
+        source_row = conn.execute(
+            f"SELECT COUNT(DISTINCT source_file) AS source_logs_scanned FROM usage_events {where}"
+        ).fetchone()
+
+    source_groups: dict[str, dict[str, Any]] = {}
+    cwd_groups: dict[str, dict[str, Any]] = {}
+    day_groups: dict[str, dict[str, Any]] = {}
+    total_tokens = 0
+    for row in rows:
+        tokens = _int_value(row["total_tokens"])
+        total_tokens += tokens
+        record_id = str(row["record_id"])
+        session_id = _optional_str(row["session_id"])
+        _add_concentration_row(
+            source_groups,
+            key=_source_group_key(row["source_file"]),
+            label=_source_group_label(row["source_file"], session_id=session_id),
+            group_hash=_source_group_hash(row["source_file"]),
+            tokens=tokens,
+            record_id=record_id,
+            session_id=session_id,
+        )
+        cwd_ref = _cwd_group_ref(row["cwd"])
+        _add_concentration_row(
+            cwd_groups,
+            key=cwd_ref["group_hash"],
+            label=cwd_ref["label"],
+            group_hash=cwd_ref["group_hash"],
+            tokens=tokens,
+            record_id=record_id,
+            session_id=session_id,
+        )
+        day = _day_label(row["event_timestamp"])
+        _add_concentration_row(
+            day_groups,
+            key=day,
+            label=day,
+            group_hash=_stable_hash(day),
+            tokens=tokens,
+            record_id=record_id,
+            session_id=session_id,
+        )
+
+    dimensions = [
+        _concentration_dimension(
+            "source_log",
+            "Source Log / Session",
+            source_groups,
+            total_tokens=total_tokens,
+        ),
+        _concentration_dimension("cwd", "Cwd / Project", cwd_groups, total_tokens=total_tokens),
+        _concentration_dimension("day", "Day", day_groups, total_tokens=total_tokens),
+    ]
+    metrics = _concentration_metrics(dimensions)
+    return {
+        "meta": {
+            "source_logs_scanned": _int_value(source_row["source_logs_scanned"]),
+        },
+        "summary": {
+            "usage_rows": len(rows),
+            "total_tokens": total_tokens,
+            "dimension_count": len(dimensions),
+            "history_scope": _history_scope(include_archived),
+        },
+        "metrics": metrics,
+        "dimensions": dimensions,
+        "largest_impact_rows": _largest_impact_rows(dimensions),
+        "privacy": _concentration_privacy_metadata(),
+    }
+
+
 def _indexed_source_logs(
     *,
     db_path: Path,
@@ -893,6 +1073,160 @@ def _command_rows(
     return sorted(rows, key=lambda row: (-int(row["total"]), row["root"]))
 
 
+def _add_concentration_row(
+    groups: dict[str, dict[str, Any]],
+    *,
+    key: str,
+    label: str,
+    group_hash: str,
+    tokens: int,
+    record_id: str,
+    session_id: str | None,
+) -> None:
+    group = groups.setdefault(
+        key,
+        {
+            "label": label,
+            "group_hash": group_hash,
+            "total_tokens": 0,
+            "usage_rows": 0,
+            "largest_record_id": None,
+            "largest_call_tokens": 0,
+            "session_ids": set(),
+        },
+    )
+    group["total_tokens"] = int(group["total_tokens"]) + tokens
+    group["usage_rows"] = int(group["usage_rows"]) + 1
+    if tokens > int(group["largest_call_tokens"]):
+        group["largest_call_tokens"] = tokens
+        group["largest_record_id"] = record_id
+    if session_id:
+        group["session_ids"].add(session_id)
+
+
+def _concentration_dimension(
+    dimension: str,
+    label: str,
+    groups: dict[str, dict[str, Any]],
+    *,
+    total_tokens: int,
+) -> dict[str, Any]:
+    rows = [_concentration_group_row(dimension, group, total_tokens=total_tokens) for group in groups.values()]
+    rows = sorted(
+        rows,
+        key=lambda row: (-int(row["total_tokens"]), -int(row["usage_rows"]), row["label"]),
+    )
+    return {
+        "dimension": dimension,
+        "label": label,
+        "group_count": len(rows),
+        "total_tokens": total_tokens,
+        "top_1_share": _top_share(rows, 1, total_tokens=total_tokens),
+        "top_3_share": _top_share(rows, 3, total_tokens=total_tokens),
+        "top_5_share": _top_share(rows, 5, total_tokens=total_tokens),
+        "effective_group_count": _effective_group_count(rows, total_tokens=total_tokens),
+        "top_rows": rows[:10],
+    }
+
+
+def _concentration_group_row(
+    dimension: str,
+    group: dict[str, Any],
+    *,
+    total_tokens: int,
+) -> dict[str, Any]:
+    session_ids = sorted(group["session_ids"])
+    return {
+        "dimension": dimension,
+        "label": group["label"],
+        "group_hash": group["group_hash"],
+        "usage_rows": int(group["usage_rows"]),
+        "total_tokens": int(group["total_tokens"]),
+        "share": _rounded_ratio(int(group["total_tokens"]), total_tokens),
+        "largest_record_id": group["largest_record_id"],
+        "largest_call_tokens": int(group["largest_call_tokens"]),
+        "session_id": session_ids[0] if len(session_ids) == 1 else None,
+    }
+
+
+def _concentration_metrics(dimensions: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    rows: list[dict[str, Any]] = []
+    for dimension in dimensions:
+        dimension_key = str(dimension["dimension"])
+        for top_n in (1, 3, 5):
+            rows.append(
+                {
+                    "metric": f"top_{top_n}_{dimension_key}_share",
+                    "dimension": dimension_key,
+                    "top_n": top_n,
+                    "share": dimension[f"top_{top_n}_share"],
+                }
+            )
+    return rows
+
+
+def _largest_impact_rows(dimensions: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    rows: list[dict[str, Any]] = []
+    for dimension in dimensions:
+        for row in dimension["top_rows"]:
+            rows.append(dict(row))
+    return sorted(
+        rows,
+        key=lambda row: (-float(row["share"]), -int(row["total_tokens"]), row["dimension"], row["label"]),
+    )[:15]
+
+
+def _top_share(
+    rows: list[dict[str, Any]],
+    top_n: int,
+    *,
+    total_tokens: int,
+) -> float:
+    return _rounded_ratio(sum(int(row["total_tokens"]) for row in rows[:top_n]), total_tokens)
+
+
+def _effective_group_count(
+    rows: list[dict[str, Any]],
+    *,
+    total_tokens: int,
+) -> float:
+    if total_tokens <= 0:
+        return 0.0
+    hhi = sum((int(row["total_tokens"]) / total_tokens) ** 2 for row in rows)
+    return round(1 / hhi, 6) if hhi else 0.0
+
+
+def _source_group_key(value: object) -> str:
+    return _source_group_hash(value)
+
+
+def _source_group_hash(value: object) -> str:
+    source = value if isinstance(value, str) and value else "unknown_source"
+    return _stable_hash(source)
+
+
+def _source_group_label(value: object, *, session_id: str | None) -> str:
+    if session_id:
+        return f"session:{session_id[:8]}"
+    return f"source:{_source_group_hash(value)}"
+
+
+def _cwd_group_ref(value: object) -> dict[str, str]:
+    if isinstance(value, str) and value:
+        path_ref = _path_ref_from_token(value)
+        if path_ref is not None:
+            return {"label": path_ref["path_label"], "group_hash": path_ref["path_hash"]}
+    return {"label": "unknown_cwd", "group_hash": _stable_hash("unknown_cwd")}
+
+
+def _day_label(value: object) -> str:
+    if isinstance(value, str):
+        match = re.match(r"^\d{4}-\d{2}-\d{2}", value)
+        if match:
+            return match.group(0)
+    return "unknown_day"
+
+
 def _read_reader_rows(
     *,
     read_events_by_reader: Counter[str],
@@ -1215,7 +1549,7 @@ def _path_ref_from_token(token: str) -> dict[str, str] | None:
     label = _safe_path_label(raw)
     if label is None:
         return None
-    path_hash = hashlib.sha256(raw.encode("utf-8")).hexdigest()[:12]
+    path_hash = _stable_hash(raw)
     return {"path_key": path_hash, "path_label": label, "path_hash": path_hash}
 
 
@@ -1244,6 +1578,20 @@ def _path_privacy_metadata() -> dict[str, str]:
     }
 
 
+def _concentration_privacy_metadata() -> dict[str, str]:
+    return {
+        "source_log_label_policy": "session_id_prefix_or_source_hash",
+        "cwd_label_policy": "basename_only",
+        "hash_policy": "sha256_12",
+        "raw_source_paths_included": "false",
+        "raw_cwd_paths_included": "false",
+    }
+
+
+def _stable_hash(value: str) -> str:
+    return hashlib.sha256(value.encode("utf-8")).hexdigest()[:12]
+
+
 def _shell_command_from_payload(payload: dict[str, Any], *, function_name: str) -> str | None:
     if not _is_shell_tool(function_name):
         return None
@@ -1394,6 +1742,10 @@ def _ratio(numerator: int, denominator: int) -> float:
     return numerator / denominator if denominator else 0.0
 
 
+def _rounded_ratio(numerator: int, denominator: int) -> float:
+    return round(_ratio(numerator, denominator), 6)
+
+
 def _int_text(value: object) -> str:
     return f"{_int_value(value):,}"
 
diff --git a/src/codex_usage_tracker/json_contracts.py b/src/codex_usage_tracker/json_contracts.py
index b6581e0..cee01ff 100644
--- a/src/codex_usage_tracker/json_contracts.py
+++ b/src/codex_usage_tracker/json_contracts.py
@@ -224,6 +224,21 @@
             "notes": list,
         }
     },
+    "codex-usage-tracker-diagnostic-concentration-v1": {
+        "required": {
+            "section": str,
+            "status": str,
+            "refreshed": bool,
+            "raw_context_included": bool,
+            "snapshot": (dict, NoneType),
+            "summary": (dict, NoneType),
+            "metrics": list,
+            "dimensions": list,
+            "largest_impact_rows": list,
+            "privacy": dict,
+            "notes": list,
+        }
+    },
     "codex-usage-tracker-session-v1": {
         "required": {
             "requested_session_id": (str, NoneType),
diff --git a/src/codex_usage_tracker/server.py b/src/codex_usage_tracker/server.py
index f3ec522..79abe1f 100644
--- a/src/codex_usage_tracker/server.py
+++ b/src/codex_usage_tracker/server.py
@@ -37,6 +37,7 @@
 )
 from codex_usage_tracker.diagnostic_snapshots import (
     build_diagnostic_commands_report,
+    build_diagnostic_concentration_report,
     build_diagnostic_file_reads_report,
     build_diagnostic_overview_report,
     build_diagnostic_read_productivity_report,
@@ -326,6 +327,9 @@ def do_GET(self) -> None:  # noqa: N802 - stdlib hook name
         if parsed.path == "/api/diagnostics/read-productivity":
             self._handle_diagnostics_read_productivity(parsed.query)
             return
+        if parsed.path == "/api/diagnostics/concentration":
+            self._handle_diagnostics_concentration(parsed.query)
+            return
         if parsed.path == "/api/usage":
             self._handle_usage(parsed.query)
             return
@@ -357,6 +361,9 @@ def do_POST(self) -> None:  # noqa: N802 - stdlib hook name
         if parsed.path == "/api/diagnostics/read-productivity/refresh":
             self._handle_diagnostics_read_productivity_refresh(parsed.query)
             return
+        if parsed.path == "/api/diagnostics/concentration/refresh":
+            self._handle_diagnostics_concentration_refresh(parsed.query)
+            return
         self._send_json(HTTPStatus.NOT_FOUND, {"error": "Unknown API endpoint"})
 
     def end_headers(self) -> None:
@@ -1067,6 +1074,22 @@ def _handle_diagnostics_read_productivity_refresh(self, query: str) -> None:
             label="diagnostic read productivity",
         )
 
+    def _handle_diagnostics_concentration(self, query: str) -> None:
+        self._handle_diagnostic_snapshot(
+            query,
+            build_report=build_diagnostic_concentration_report,
+            refresh=False,
+            label="diagnostic concentration",
+        )
+
+    def _handle_diagnostics_concentration_refresh(self, query: str) -> None:
+        self._handle_diagnostic_snapshot(
+            query,
+            build_report=build_diagnostic_concentration_report,
+            refresh=True,
+            label="diagnostic concentration",
+        )
+
     def _handle_diagnostic_snapshot(
         self,
         query: str,
diff --git a/tests/test_cli_lifecycle.py b/tests/test_cli_lifecycle.py
index 63164ea..09caf47 100644
--- a/tests/test_cli_lifecycle.py
+++ b/tests/test_cli_lifecycle.py
@@ -436,6 +436,15 @@ def test_diagnostics_cli_returns_aggregate_json(tmp_path: Path) -> None:
         "--refresh",
         "--json",
     )
+    concentration_refresh = _run_cli(
+        tmp_path,
+        "--db",
+        str(db_path),
+        "diagnostics",
+        "concentration",
+        "--refresh",
+        "--json",
+    )
     fact_calls = _run_cli(
         tmp_path,
         "--db",
@@ -462,6 +471,7 @@ def test_diagnostics_cli_returns_aggregate_json(tmp_path: Path) -> None:
     commands_refresh_payload = json.loads(commands_refresh.stdout)
     file_reads_refresh_payload = json.loads(file_reads_refresh.stdout)
     read_productivity_refresh_payload = json.loads(read_productivity_refresh.stdout)
+    concentration_refresh_payload = json.loads(concentration_refresh.stdout)
     fact_calls_payload = json.loads(fact_calls.stdout)
     for payload in (
         summary_payload,
@@ -474,6 +484,7 @@ def test_diagnostics_cli_returns_aggregate_json(tmp_path: Path) -> None:
         commands_refresh_payload,
         file_reads_refresh_payload,
         read_productivity_refresh_payload,
+        concentration_refresh_payload,
         fact_calls_payload,
     ):
         _assert_contract(payload)
@@ -531,6 +542,12 @@ def test_diagnostics_cli_returns_aggregate_json(tmp_path: Path) -> None:
         == "codex-usage-tracker-diagnostic-read-productivity-v1"
     )
     assert read_productivity_refresh_payload["summary"]["read_events_modified_later"] == 0
+    assert (
+        concentration_refresh_payload["schema"]
+        == "codex-usage-tracker-diagnostic-concentration-v1"
+    )
+    assert concentration_refresh_payload["summary"]["usage_rows"] == 2
+    assert concentration_refresh_payload["metrics"]
     assert fact_calls_payload["view"] == "fact-calls"
     assert fact_calls_payload["filters"]["privacy_mode"] == "strict"
     assert fact_calls_payload["rows"][0]["cwd"].startswith("[redacted cwd:")
diff --git a/tests/test_dashboard_server.py b/tests/test_dashboard_server.py
index 7c924b4..2a9b38f 100644
--- a/tests/test_dashboard_server.py
+++ b/tests/test_dashboard_server.py
@@ -117,6 +117,12 @@ def test_dashboard_server_usage_api_refreshes_aggregate_rows(tmp_path: Path) ->
             data=b"",
             method="POST",
         )
+        diagnostic_concentration_refresh_payload = _read_json(
+            f"http://127.0.0.1:{server.server_port}/api/diagnostics/concentration/refresh",
+            headers={"X-Codex-Usage-Token": "test-token"},
+            data=b"",
+            method="POST",
+        )
         diagnostic_stored_payload = _read_json(
             f"http://127.0.0.1:{server.server_port}/api/diagnostics/overview"
         )
@@ -132,6 +138,9 @@ def test_dashboard_server_usage_api_refreshes_aggregate_rows(tmp_path: Path) ->
         diagnostic_read_productivity_stored_payload = _read_json(
             f"http://127.0.0.1:{server.server_port}/api/diagnostics/read-productivity"
         )
+        diagnostic_concentration_stored_payload = _read_json(
+            f"http://127.0.0.1:{server.server_port}/api/diagnostics/concentration"
+        )
         diagnostic_computed_at = diagnostic_stored_payload["snapshot"]["computed_at"]
         with urllib.request.urlopen(  # noqa: S310 - local test server only
             urllib.request.Request(
@@ -212,6 +221,12 @@ def test_dashboard_server_usage_api_refreshes_aggregate_rows(tmp_path: Path) ->
     )
     assert diagnostic_read_productivity_refresh_payload["status"] == "ready"
     assert diagnostic_read_productivity_refresh_payload["summary"]["read_events"] == 0
+    assert (
+        diagnostic_concentration_refresh_payload["schema"]
+        == "codex-usage-tracker-diagnostic-concentration-v1"
+    )
+    assert diagnostic_concentration_refresh_payload["status"] == "ready"
+    assert diagnostic_concentration_refresh_payload["summary"]["usage_rows"] == 4
     assert diagnostic_stored_payload["status"] == "ready"
     assert diagnostic_stored_payload["refreshed"] is False
     assert diagnostic_tool_output_stored_payload["status"] == "ready"
@@ -222,6 +237,8 @@ def test_dashboard_server_usage_api_refreshes_aggregate_rows(tmp_path: Path) ->
     assert diagnostic_file_reads_stored_payload["refreshed"] is False
     assert diagnostic_read_productivity_stored_payload["status"] == "ready"
     assert diagnostic_read_productivity_stored_payload["refreshed"] is False
+    assert diagnostic_concentration_stored_payload["status"] == "ready"
+    assert diagnostic_concentration_stored_payload["refreshed"] is False
     assert second_usage_refresh_payload["refresh_result"]["parsed_events"] == 0
     assert diagnostic_after_second_usage_refresh["snapshot"]["computed_at"] == diagnostic_computed_at
     assert len(limited_payload["rows"]) == 2
diff --git a/tests/test_diagnostic_snapshots.py b/tests/test_diagnostic_snapshots.py
index a3cc31e..16adfe8 100644
--- a/tests/test_diagnostic_snapshots.py
+++ b/tests/test_diagnostic_snapshots.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import json
+from dataclasses import replace
 from pathlib import Path
 
 from store_dashboard_helpers import (
@@ -9,12 +10,14 @@
     _entry,
     _make_codex_home,
     _token_event,
+    _usage_event,
     _write_jsonl,
 )
 
 from codex_usage_tracker.diagnostic_snapshots import (
     DIAGNOSTIC_OVERVIEW_SECTION,
     build_diagnostic_commands_report,
+    build_diagnostic_concentration_report,
     build_diagnostic_file_reads_report,
     build_diagnostic_overview_report,
     build_diagnostic_read_productivity_report,
@@ -24,6 +27,7 @@
     query_diagnostic_snapshot,
     refresh_usage_index,
     upsert_diagnostic_snapshot,
+    upsert_usage_events,
 )
 
 
@@ -330,6 +334,111 @@ def test_file_read_snapshots_allocate_tokens_and_correlate_later_modifications(
     assert "SECRET PATCH TEXT" not in serialized
 
 
+def test_concentration_snapshot_reports_top_shares_without_raw_paths(tmp_path: Path) -> None:
+    db_path = tmp_path / "usage.sqlite3"
+    upsert_usage_events(
+        [
+            _concentration_event(
+                record_id="r1",
+                session_id=SESSION_ID,
+                event_timestamp="2026-05-17T10:00:00Z",
+                source_file="/tmp/private/session-a.jsonl",
+                cwd="/tmp/private/project-a",
+                total_tokens=30,
+            ),
+            _concentration_event(
+                record_id="r2",
+                session_id=SESSION_ID,
+                event_timestamp="2026-05-17T11:00:00Z",
+                source_file="/tmp/private/session-a.jsonl",
+                cwd="/tmp/private/project-a",
+                total_tokens=20,
+            ),
+            _concentration_event(
+                record_id="r3",
+                session_id="019e37d4-c1f1-71aa-b154-2d5d837af92c",
+                event_timestamp="2026-05-18T10:00:00Z",
+                source_file="/tmp/private/session-b.jsonl",
+                cwd="/tmp/private/project-b",
+                total_tokens=30,
+            ),
+            _concentration_event(
+                record_id="r4",
+                session_id="019e37d5-01fd-71df-87f4-ae3e8d60df7a",
+                event_timestamp="2026-05-19T10:00:00Z",
+                source_file="/tmp/private/session-c.jsonl",
+                cwd="/tmp/private/project-b",
+                total_tokens=20,
+            ),
+        ],
+        db_path=db_path,
+    )
+
+    missing = build_diagnostic_concentration_report(db_path=db_path).payload
+    refreshed = build_diagnostic_concentration_report(db_path=db_path, refresh=True).payload
+    stored = build_diagnostic_concentration_report(db_path=db_path).payload
+
+    _assert_contract(missing)
+    _assert_contract(refreshed)
+    _assert_contract(stored)
+    assert missing["status"] == "missing"
+    assert refreshed["status"] == "ready"
+    assert stored["refreshed"] is False
+    assert refreshed["snapshot"]["source_logs_scanned"] == 3
+    assert refreshed["summary"]["usage_rows"] == 4
+    assert refreshed["summary"]["total_tokens"] == 100
+    metrics = {row["metric"]: row["share"] for row in refreshed["metrics"]}
+    assert metrics["top_1_source_log_share"] == 0.5
+    assert metrics["top_3_source_log_share"] == 1.0
+    assert metrics["top_5_source_log_share"] == 1.0
+    assert metrics["top_1_cwd_share"] == 0.5
+    assert metrics["top_3_day_share"] == 1.0
+
+    dimensions = {row["dimension"]: row for row in refreshed["dimensions"]}
+    assert dimensions["source_log"]["group_count"] == 3
+    assert dimensions["source_log"]["effective_group_count"] == 2.631579
+    assert dimensions["cwd"]["group_count"] == 2
+    assert dimensions["cwd"]["effective_group_count"] == 2.0
+    assert dimensions["day"]["top_rows"][0]["label"] == "2026-05-17"
+    assert dimensions["day"]["top_rows"][0]["largest_record_id"] == "r1"
+    assert refreshed["largest_impact_rows"][0]["largest_record_id"] == "r1"
+    assert refreshed["largest_impact_rows"][0]["session_id"] == SESSION_ID
+
+    serialized = json.dumps(refreshed, sort_keys=True)
+    assert "/tmp/private" not in serialized
+    assert "session-a.jsonl" not in serialized
+    assert "project-a" in serialized
+    assert "source_log_label_policy" in serialized
+
+
+def _concentration_event(
+    *,
+    record_id: str,
+    session_id: str,
+    event_timestamp: str,
+    source_file: str,
+    cwd: str,
+    total_tokens: int,
+):
+    base = _usage_event(
+        record_id=record_id,
+        session_id=session_id,
+        thread_key=f"thread:{record_id}",
+        event_timestamp=event_timestamp,
+        cumulative_total_tokens=total_tokens,
+    )
+    return replace(
+        base,
+        source_file=source_file,
+        cwd=cwd,
+        total_tokens=total_tokens,
+        input_tokens=total_tokens,
+        cached_input_tokens=0,
+        output_tokens=0,
+        reasoning_output_tokens=0,
+    )
+
+
 def _function_call(call_id: str, command: str) -> dict[str, object]:
     return _entry(
         "response_item",

From f61369beb9fd51027e9eed034454007aafc92e2f Mon Sep 17 00:00:00 2001
From: Monsky <douglas.monsky@gmail.com>
Date: Sat, 20 Jun 2026 19:27:04 -0400
Subject: [PATCH 05/10] feat: add diagnostic dashboard panels

---
 .gitignore                                    |    3 +
 docs/dashboard-guide.md                       |    1 +
 docs/development.md                           |    7 +
 package-lock.json                             |   76 +
 package.json                                  |   11 +
 playwright.config.mjs                         |   32 +
 src/codex_usage_tracker/dashboard.py          |   14 +
 .../diagnostic_snapshot_analysis.py           |  443 ++++++
 .../diagnostic_snapshot_concentration.py      |  338 +++++
 .../diagnostic_snapshot_constants.py          |   20 +
 .../diagnostic_snapshot_events.py             |  399 ++++++
 .../diagnostic_snapshot_report.py             |  140 ++
 .../diagnostic_snapshot_rows.py               |  182 +++
 .../diagnostic_snapshots.py                   | 1266 +----------------
 .../dashboard/dashboard_diagnostics.js        |  370 ++---
 .../dashboard/dashboard_diagnostics_facts.js  |  283 ++++
 .../dashboard_diagnostics_snapshots.js        |  287 ++++
 .../dashboard/dashboard_responsive.css        |    3 +
 .../dashboard/dashboard_tables.css            |   40 +
 .../dashboard/dashboard_template.html         |    2 +
 .../playwright/dashboard-diagnostics.spec.mjs |   28 +
 tests/test_dashboard_payload.py               |   60 +-
 22 files changed, 2496 insertions(+), 1509 deletions(-)
 create mode 100644 package-lock.json
 create mode 100644 package.json
 create mode 100644 playwright.config.mjs
 create mode 100644 src/codex_usage_tracker/diagnostic_snapshot_analysis.py
 create mode 100644 src/codex_usage_tracker/diagnostic_snapshot_concentration.py
 create mode 100644 src/codex_usage_tracker/diagnostic_snapshot_constants.py
 create mode 100644 src/codex_usage_tracker/diagnostic_snapshot_events.py
 create mode 100644 src/codex_usage_tracker/diagnostic_snapshot_report.py
 create mode 100644 src/codex_usage_tracker/diagnostic_snapshot_rows.py
 create mode 100644 src/codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics_facts.js
 create mode 100644 src/codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics_snapshots.js
 create mode 100644 tests/playwright/dashboard-diagnostics.spec.mjs

diff --git a/.gitignore b/.gitignore
index df2baaf..c709d90 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,6 +5,9 @@ __pycache__/
 .mypy_cache/
 .ruff_cache/
 .coverage*
+node_modules/
+playwright-report/
+test-results/
 build/
 dist/
 *.egg-info/
diff --git a/docs/dashboard-guide.md b/docs/dashboard-guide.md
index 70cb0ad..100b1fb 100644
--- a/docs/dashboard-guide.md
+++ b/docs/dashboard-guide.md
@@ -134,6 +134,7 @@ Use `Diagnostics` view when you want to see what structured event patterns are h
 - The first table shows top diagnostic facts by associated uncached input tokens. Tool/function/MCP/command-family and compaction sections expose narrower slices of the same fact data.
 - Command diagnostics store only a command family such as `pytest`, `git`, or `unknown_command`. Skill and MCP labels are detected only when they are present as structured event metadata.
 - Newer on-demand diagnostic snapshot endpoints are section-specific (`overview`, `tool-output`, `commands`, `file-reads`, `read-productivity`, and `concentration`). Heavy recomputation happens only through each section's explicit refresh endpoint.
+- Click `Refresh diagnostics` when you want to recompute stored diagnostic snapshots. The normal dashboard `Refresh` action updates usage rows only.
 - File-read snapshots use basename-only path labels and short hashes. Read-productivity rates are temporal correlations between earlier reads and later structured patch events, not causation.
 - Concentration snapshots show top-N share and effective group count by source log/session, cwd/project label, and day without exposing raw source-log or cwd paths.
 - Click `Calls` on a fact row to load associated model calls. Call links and largest-call links open the Call Investigator, where raw context remains explicit and on demand.
diff --git a/docs/development.md b/docs/development.md
index 8ca507f..431cb0a 100644
--- a/docs/development.md
+++ b/docs/development.md
@@ -182,6 +182,13 @@ codex-usage-tracker summary --preset by-subagent-role
 codex-usage-tracker expensive --limit 5
 ```
 
+For browser-level dashboard smoke after starting a live dashboard server:
+
+```bash
+npm install
+DASHBOARD_BASE_URL=http://127.0.0.1:8898 npm run smoke:dashboard:diagnostics
+```
+
 ## Dashboard Screenshots
 
 Dashboard screenshots in `docs/assets/` and `src/codex_usage_tracker/plugin_data/docs/assets/` must be generated from synthetic aggregate fixture data only.
diff --git a/package-lock.json b/package-lock.json
new file mode 100644
index 0000000..ed5cdf5
--- /dev/null
+++ b/package-lock.json
@@ -0,0 +1,76 @@
+{
+  "name": "codex-usage-tracker-dashboard-smoke",
+  "lockfileVersion": 3,
+  "requires": true,
+  "packages": {
+    "": {
+      "name": "codex-usage-tracker-dashboard-smoke",
+      "devDependencies": {
+        "@playwright/test": "1.61.0"
+      }
+    },
+    "node_modules/@playwright/test": {
+      "version": "1.61.0",
+      "resolved": "https://registry.npmjs.org/@playwright/test/-/test-1.61.0.tgz",
+      "integrity": "sha512-cKA5B6lpFEMyMGjxF54QihfYpB4FkEGH+qZhtArDEG+wezQAJY8Pq6C7T1SjWz+FFzt3TbyoXBQYk/0292TdJA==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "dependencies": {
+        "playwright": "1.61.0"
+      },
+      "bin": {
+        "playwright": "cli.js"
+      },
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/fsevents": {
+      "version": "2.3.2",
+      "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz",
+      "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==",
+      "dev": true,
+      "hasInstallScript": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": "^8.16.0 || ^10.6.0 || >=11.0.0"
+      }
+    },
+    "node_modules/playwright": {
+      "version": "1.61.0",
+      "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.61.0.tgz",
+      "integrity": "sha512-Z+7BeeqQPRRzklHsVFP4KTGIyMxKUmfeRA4WisM6G3/XW6nwGeX6fX9qYaDa+CiUqpOkb2f6X3nar05R3kSuJQ==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "dependencies": {
+        "playwright-core": "1.61.0"
+      },
+      "bin": {
+        "playwright": "cli.js"
+      },
+      "engines": {
+        "node": ">=18"
+      },
+      "optionalDependencies": {
+        "fsevents": "2.3.2"
+      }
+    },
+    "node_modules/playwright-core": {
+      "version": "1.61.0",
+      "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.61.0.tgz",
+      "integrity": "sha512-caX7TrY3Ml6egyDX0WUcTHDxodl/b51y5wJOdCEA36QviK/s2g081hvmGs8eaE3DWb6NYZQ6BjO/QkNRPenoPA==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "bin": {
+        "playwright-core": "cli.js"
+      },
+      "engines": {
+        "node": ">=18"
+      }
+    }
+  }
+}
diff --git a/package.json b/package.json
new file mode 100644
index 0000000..c5e1f46
--- /dev/null
+++ b/package.json
@@ -0,0 +1,11 @@
+{
+  "name": "codex-usage-tracker-dashboard-smoke",
+  "private": true,
+  "type": "module",
+  "scripts": {
+    "smoke:dashboard:diagnostics": "playwright test tests/playwright/dashboard-diagnostics.spec.mjs"
+  },
+  "devDependencies": {
+    "@playwright/test": "1.61.0"
+  }
+}
diff --git a/playwright.config.mjs b/playwright.config.mjs
new file mode 100644
index 0000000..2e57663
--- /dev/null
+++ b/playwright.config.mjs
@@ -0,0 +1,32 @@
+import { defineConfig, devices } from '@playwright/test';
+
+const baseURL = process.env.DASHBOARD_BASE_URL || 'http://127.0.0.1:8898';
+
+export default defineConfig({
+  testDir: './tests/playwright',
+  timeout: 30_000,
+  expect: {
+    timeout: 10_000,
+  },
+  use: {
+    baseURL,
+    trace: 'retain-on-failure',
+  },
+  reporter: [['list']],
+  projects: [
+    {
+      name: 'chromium-desktop',
+      use: {
+        ...devices['Desktop Chrome'],
+        viewport: { width: 1440, height: 1000 },
+      },
+    },
+    {
+      name: 'chromium-mobile',
+      use: {
+        ...devices['Pixel 5'],
+        viewport: { width: 393, height: 851 },
+      },
+    },
+  ],
+});
diff --git a/src/codex_usage_tracker/dashboard.py b/src/codex_usage_tracker/dashboard.py
index d014fde..665e0dd 100644
--- a/src/codex_usage_tracker/dashboard.py
+++ b/src/codex_usage_tracker/dashboard.py
@@ -76,6 +76,16 @@
     ("actions_script_src", "__ACTIONS_SCRIPT_SRC__", "dashboard_actions.js"),
     ("live_script_src", "__LIVE_SCRIPT_SRC__", "dashboard_live.js"),
     ("events_script_src", "__EVENTS_SCRIPT_SRC__", "dashboard_events.js"),
+    (
+        "diagnostics_snapshots_script_src",
+        "__DIAGNOSTICS_SNAPSHOTS_SCRIPT_SRC__",
+        "dashboard_diagnostics_snapshots.js",
+    ),
+    (
+        "diagnostics_facts_script_src",
+        "__DIAGNOSTICS_FACTS_SCRIPT_SRC__",
+        "dashboard_diagnostics_facts.js",
+    ),
     ("diagnostics_script_src", "__DIAGNOSTICS_SCRIPT_SRC__", "dashboard_diagnostics.js"),
     (
         "call_diagnostics_script_src",
@@ -320,6 +330,8 @@ def render_dashboard_html(
     actions_script_src: str | None = None,
     live_script_src: str | None = None,
     events_script_src: str | None = None,
+    diagnostics_snapshots_script_src: str | None = None,
+    diagnostics_facts_script_src: str | None = None,
     diagnostics_script_src: str | None = None,
     call_diagnostics_script_src: str | None = None,
     call_investigator_script_src: str | None = None,
@@ -347,6 +359,8 @@ def render_dashboard_html(
         "actions_script_src": actions_script_src,
         "live_script_src": live_script_src,
         "events_script_src": events_script_src,
+        "diagnostics_snapshots_script_src": diagnostics_snapshots_script_src,
+        "diagnostics_facts_script_src": diagnostics_facts_script_src,
         "diagnostics_script_src": diagnostics_script_src,
         "call_diagnostics_script_src": call_diagnostics_script_src,
         "call_investigator_script_src": call_investigator_script_src,
diff --git a/src/codex_usage_tracker/diagnostic_snapshot_analysis.py b/src/codex_usage_tracker/diagnostic_snapshot_analysis.py
new file mode 100644
index 0000000..3f78227
--- /dev/null
+++ b/src/codex_usage_tracker/diagnostic_snapshot_analysis.py
@@ -0,0 +1,443 @@
+"""Aggregate diagnostic snapshot analyzers."""
+
+from __future__ import annotations
+
+import json
+from collections import Counter, defaultdict
+from pathlib import Path
+from typing import Any
+
+from codex_usage_tracker.diagnostic_snapshot_events import (
+    READ_PRODUCTIVITY_NOTE,
+    allocate_token_count,
+    command_root_and_child,
+    int_value,
+    is_shell_tool,
+    modified_path_refs,
+    optional_str,
+    original_output_count,
+    path_privacy_metadata,
+    ratio,
+    read_path_refs_from_command,
+    read_reader,
+    safe_label,
+    shell_command_from_payload,
+    simple_rows,
+    unique_path_rows,
+)
+from codex_usage_tracker.diagnostic_snapshot_rows import (
+    command_output_rows,
+    command_rows,
+    function_rows,
+    largest_read_command_rows,
+    read_path_rows,
+    read_productivity_path_rows,
+    read_productivity_reader_rows,
+    read_reader_rows,
+)
+from codex_usage_tracker.store import connect
+from codex_usage_tracker.store_schema import init_db
+
+
+def analyze_indexed_source_logs(
+    *,
+    db_path: Path,
+    include_archived: bool,
+) -> dict[str, Any]:
+    source_logs, usage_rows_scanned = _indexed_source_logs(
+        db_path=db_path,
+        include_archived=include_archived,
+    )
+    counters = _empty_counters()
+    meta: Counter[str] = Counter()
+    meta["source_logs_scanned"] = len(source_logs)
+    meta["usage_rows_scanned"] = usage_rows_scanned
+
+    for source_log in source_logs:
+        _scan_source_log(source_log, counters=counters, meta=meta)
+
+    return _analysis_payload(counters=counters, meta=meta)
+
+
+def _indexed_source_logs(
+    *,
+    db_path: Path,
+    include_archived: bool,
+) -> tuple[list[Path], int]:
+    where = "" if include_archived else "WHERE is_archived = 0"
+    with connect(db_path) as conn:
+        init_db(conn)
+        rows = conn.execute(
+            f"SELECT source_file FROM source_files {where} ORDER BY source_file"
+        ).fetchall()
+        usage_row = conn.execute(
+            f"SELECT COUNT(*) AS usage_rows FROM usage_events {where}"
+        ).fetchone()
+    return [Path(str(row["source_file"])) for row in rows], int_value(usage_row["usage_rows"])
+
+
+def _empty_counters() -> dict[str, Any]:
+    return {
+        "function_calls": Counter(),
+        "function_outputs": Counter(),
+        "output_with_count": Counter(),
+        "output_missing_count": Counter(),
+        "output_token_sum": Counter(),
+        "command_calls": Counter(),
+        "command_children": {},
+        "command_with_count": Counter(),
+        "command_missing_count": Counter(),
+        "command_token_sum": Counter(),
+        "read_events": [],
+        "read_command_count": 0,
+        "read_events_by_reader": Counter(),
+        "read_events_by_path": Counter(),
+        "read_events_with_count_by_reader": Counter(),
+        "read_events_missing_count_by_reader": Counter(),
+        "read_tokens_by_reader": Counter(),
+        "read_tokens_by_path": Counter(),
+        "read_modified_by_reader": Counter(),
+        "read_modified_by_path": Counter(),
+        "read_path_refs": {},
+        "largest_read_commands": [],
+        "missing_reasons": Counter(),
+    }
+
+
+def _scan_source_log(source_log: Path, *, counters: dict[str, Any], meta: Counter[str]) -> None:
+    call_names: dict[str, str] = {}
+    call_roots: dict[str, str] = {}
+    call_read_events: dict[str, list[int]] = {}
+    source_read_events: list[int] = []
+    modified_orders_by_path: dict[str, list[int]] = defaultdict(list)
+    try:
+        lines = source_log.read_text(encoding="utf-8").splitlines()
+    except OSError:
+        meta["read_errors"] += 1
+        return
+
+    for order, line in enumerate(lines):
+        envelope = _json_envelope(line, meta=meta)
+        if envelope is None:
+            continue
+        payload = envelope.get("payload")
+        if not isinstance(payload, dict):
+            continue
+        if envelope.get("type") == "event_msg":
+            for path_ref in modified_path_refs(payload):
+                modified_orders_by_path[path_ref["path_key"]].append(order)
+            continue
+        if envelope.get("type") != "response_item":
+            continue
+        if payload.get("type") == "function_call":
+            _record_function_call(
+                payload,
+                order=order,
+                counters=counters,
+                meta=meta,
+                call_names=call_names,
+                call_roots=call_roots,
+                call_read_events=call_read_events,
+                source_read_events=source_read_events,
+            )
+        elif payload.get("type") == "function_call_output":
+            _record_function_output(
+                payload,
+                counters=counters,
+                call_names=call_names,
+                call_roots=call_roots,
+                call_read_events=call_read_events,
+            )
+
+    _mark_later_modifications(
+        counters=counters,
+        source_read_events=source_read_events,
+        modified_orders_by_path=modified_orders_by_path,
+    )
+
+
+def _json_envelope(line: str, *, meta: Counter[str]) -> dict[str, Any] | None:
+    try:
+        envelope = json.loads(line)
+    except json.JSONDecodeError:
+        meta["invalid_json"] += 1
+        return None
+    return envelope if isinstance(envelope, dict) else None
+
+
+def _record_function_call(
+    payload: dict[str, Any],
+    *,
+    order: int,
+    counters: dict[str, Any],
+    meta: Counter[str],
+    call_names: dict[str, str],
+    call_roots: dict[str, str],
+    call_read_events: dict[str, list[int]],
+    source_read_events: list[int],
+) -> None:
+    call_id = optional_str(payload.get("call_id") or payload.get("id"))
+    function_name = safe_label(payload.get("name")) or "unknown_function"
+    counters["function_calls"][function_name] += 1
+    if call_id:
+        call_names[call_id] = function_name
+    command = shell_command_from_payload(payload, function_name=function_name)
+    if command is None:
+        if is_shell_tool(function_name):
+            meta["missing_command"] += 1
+        return
+    root, child = command_root_and_child(command)
+    counters["command_calls"][root] += 1
+    counters["command_children"].setdefault(root, Counter())[child] += 1
+    if call_id:
+        call_roots[call_id] = root
+    read_refs = read_path_refs_from_command(command, root=root)
+    if read_refs:
+        counters["read_command_count"] += 1
+        read_event_indexes = _record_read_refs(
+            read_refs,
+            root=root,
+            order=order,
+            counters=counters,
+            source_read_events=source_read_events,
+        )
+        if call_id:
+            call_read_events[call_id] = read_event_indexes
+
+
+def _record_read_refs(
+    read_refs: list[dict[str, str]],
+    *,
+    root: str,
+    order: int,
+    counters: dict[str, Any],
+    source_read_events: list[int],
+) -> list[int]:
+    indexes: list[int] = []
+    reader = read_reader(root)
+    for path_ref in read_refs:
+        path_key = path_ref["path_key"]
+        counters["read_path_refs"][path_key] = path_ref
+        event_index = len(counters["read_events"])
+        counters["read_events"].append(
+            {
+                "reader": reader,
+                "root": root,
+                "path_key": path_key,
+                "path_label": path_ref["path_label"],
+                "path_hash": path_ref["path_hash"],
+                "order": order,
+                "modified_later": False,
+            }
+        )
+        source_read_events.append(event_index)
+        indexes.append(event_index)
+        counters["read_events_by_reader"][reader] += 1
+        counters["read_events_by_path"][path_key] += 1
+    return indexes
+
+
+def _record_function_output(
+    payload: dict[str, Any],
+    *,
+    counters: dict[str, Any],
+    call_names: dict[str, str],
+    call_roots: dict[str, str],
+    call_read_events: dict[str, list[int]],
+) -> None:
+    call_id = optional_str(payload.get("call_id"))
+    function_name = call_names.get(call_id or "", "unknown_function")
+    counters["function_outputs"][function_name] += 1
+    output = payload.get("output")
+    count = original_output_count(output)
+    read_indexes = call_read_events.get(call_id or "", [])
+    if count is None:
+        _record_missing_output_count(
+            output,
+            counters=counters,
+            function_name=function_name,
+            root=call_roots.get(call_id or ""),
+            read_indexes=read_indexes,
+        )
+        return
+    _record_output_count(
+        int(count),
+        counters=counters,
+        function_name=function_name,
+        root=call_roots.get(call_id or ""),
+        read_indexes=read_indexes,
+    )
+
+
+def _record_missing_output_count(
+    output: object,
+    *,
+    counters: dict[str, Any],
+    function_name: str,
+    root: str | None,
+    read_indexes: list[int],
+) -> None:
+    counters["output_missing_count"][function_name] += 1
+    counters["missing_reasons"]["string_no_header" if isinstance(output, str) else "non_string_output"] += 1
+    if root:
+        counters["command_missing_count"][root] += 1
+    for event_index in read_indexes:
+        reader = str(counters["read_events"][event_index]["reader"])
+        counters["read_events_missing_count_by_reader"][reader] += 1
+
+
+def _record_output_count(
+    count: int,
+    *,
+    counters: dict[str, Any],
+    function_name: str,
+    root: str | None,
+    read_indexes: list[int],
+) -> None:
+    counters["output_with_count"][function_name] += 1
+    counters["output_token_sum"][function_name] += count
+    if root:
+        counters["command_with_count"][root] += 1
+        counters["command_token_sum"][root] += count
+    if not read_indexes:
+        return
+    paths: list[dict[str, str]] = []
+    readers: Counter[str] = Counter()
+    allocations = allocate_token_count(count, len(read_indexes))
+    for event_index, allocated in zip(read_indexes, allocations, strict=True):
+        event = counters["read_events"][event_index]
+        reader = str(event["reader"])
+        path_key = str(event["path_key"])
+        counters["read_events_with_count_by_reader"][reader] += 1
+        counters["read_tokens_by_reader"][reader] += allocated
+        counters["read_tokens_by_path"][path_key] += allocated
+        readers[reader] += 1
+        paths.append({"path_label": str(event["path_label"]), "path_hash": str(event["path_hash"])})
+    counters["largest_read_commands"].append(
+        {
+            "root": root or "unknown_command",
+            "read_event_count": len(read_indexes),
+            "original_token_count": int(count),
+            "readers": simple_rows(readers, key_name="reader"),
+            "paths": unique_path_rows(paths),
+        }
+    )
+
+
+def _mark_later_modifications(
+    *,
+    counters: dict[str, Any],
+    source_read_events: list[int],
+    modified_orders_by_path: dict[str, list[int]],
+) -> None:
+    for event_index in source_read_events:
+        event = counters["read_events"][event_index]
+        path_key = str(event["path_key"])
+        if any(order > int(event["order"]) for order in modified_orders_by_path.get(path_key, [])):
+            event["modified_later"] = True
+            counters["read_modified_by_reader"][str(event["reader"])] += 1
+            counters["read_modified_by_path"][path_key] += 1
+
+
+def _analysis_payload(*, counters: dict[str, Any], meta: Counter[str]) -> dict[str, Any]:
+    return {
+        "meta": {key: int(value) for key, value in meta.items()},
+        "tool_output": _tool_output_payload(counters),
+        "commands": _commands_payload(counters, meta=meta),
+        "file_reads": _file_reads_payload(counters),
+        "read_productivity": _read_productivity_payload(counters),
+    }
+
+
+def _tool_output_payload(counters: dict[str, Any]) -> dict[str, Any]:
+    return {
+        "summary": {
+            "function_calls": int(sum(counters["function_calls"].values())),
+            "function_outputs": int(sum(counters["function_outputs"].values())),
+            "outputs_with_original_token_count": int(sum(counters["output_with_count"].values())),
+            "outputs_missing_original_token_count": int(sum(counters["output_missing_count"].values())),
+            "original_token_sum": int(sum(counters["output_token_sum"].values())),
+        },
+        "functions": function_rows(
+            function_calls=counters["function_calls"],
+            function_outputs=counters["function_outputs"],
+            output_with_count=counters["output_with_count"],
+            output_missing_count=counters["output_missing_count"],
+            output_token_sum=counters["output_token_sum"],
+        ),
+        "command_roots": command_output_rows(
+            command_calls=counters["command_calls"],
+            command_with_count=counters["command_with_count"],
+            command_missing_count=counters["command_missing_count"],
+            command_token_sum=counters["command_token_sum"],
+        ),
+        "missing_reasons": simple_rows(counters["missing_reasons"]),
+    }
+
+
+def _commands_payload(counters: dict[str, Any], *, meta: Counter[str]) -> dict[str, Any]:
+    return {
+        "summary": {
+            "shell_function_calls": int(sum(counters["command_calls"].values())),
+            "command_root_count": len(counters["command_calls"]),
+            "missing_command": int(meta["missing_command"]),
+        },
+        "commands": command_rows(
+            command_calls=counters["command_calls"],
+            command_children=counters["command_children"],
+        ),
+    }
+
+
+def _file_reads_payload(counters: dict[str, Any]) -> dict[str, Any]:
+    return {
+        "summary": {
+            "read_commands": counters["read_command_count"],
+            "read_events": len(counters["read_events"]),
+            "unique_paths_read": len(counters["read_path_refs"]),
+            "read_events_with_output_count": int(sum(counters["read_events_with_count_by_reader"].values())),
+            "read_events_missing_output_count": int(sum(counters["read_events_missing_count_by_reader"].values())),
+            "allocated_output_token_sum": int(sum(counters["read_tokens_by_reader"].values())),
+        },
+        "by_reader": read_reader_rows(
+            read_events_by_reader=counters["read_events_by_reader"],
+            read_events_with_count_by_reader=counters["read_events_with_count_by_reader"],
+            read_events_missing_count_by_reader=counters["read_events_missing_count_by_reader"],
+            read_tokens_by_reader=counters["read_tokens_by_reader"],
+        ),
+        "top_paths": read_path_rows(
+            read_path_refs=counters["read_path_refs"],
+            read_events_by_path=counters["read_events_by_path"],
+            read_tokens_by_path=counters["read_tokens_by_path"],
+        ),
+        "largest_read_commands": largest_read_command_rows(counters["largest_read_commands"]),
+        "path_privacy": path_privacy_metadata(),
+    }
+
+
+def _read_productivity_payload(counters: dict[str, Any]) -> dict[str, Any]:
+    read_modified_count = int(sum(counters["read_modified_by_reader"].values()))
+    return {
+        "summary": {
+            "read_events": len(counters["read_events"]),
+            "read_events_modified_later": read_modified_count,
+            "read_events_modified_later_pct": ratio(read_modified_count, len(counters["read_events"])),
+            "unique_paths_read": len(counters["read_path_refs"]),
+            "unique_paths_modified_later": len(counters["read_modified_by_path"]),
+            "unique_path_modified_later_pct": ratio(
+                len(counters["read_modified_by_path"]),
+                len(counters["read_path_refs"]),
+            ),
+            "correlation_note": READ_PRODUCTIVITY_NOTE,
+        },
+        "by_reader": read_productivity_reader_rows(
+            read_events_by_reader=counters["read_events_by_reader"],
+            read_modified_by_reader=counters["read_modified_by_reader"],
+        ),
+        "top_modified_paths": read_productivity_path_rows(
+            read_path_refs=counters["read_path_refs"],
+            read_events_by_path=counters["read_events_by_path"],
+            read_modified_by_path=counters["read_modified_by_path"],
+        ),
+        "path_privacy": path_privacy_metadata(),
+    }
diff --git a/src/codex_usage_tracker/diagnostic_snapshot_concentration.py b/src/codex_usage_tracker/diagnostic_snapshot_concentration.py
new file mode 100644
index 0000000..9139409
--- /dev/null
+++ b/src/codex_usage_tracker/diagnostic_snapshot_concentration.py
@@ -0,0 +1,338 @@
+"""Aggregate diagnostic concentration snapshot analysis."""
+
+from __future__ import annotations
+
+import hashlib
+import re
+from pathlib import Path
+from typing import Any
+
+from codex_usage_tracker.store import connect
+from codex_usage_tracker.store_schema import init_db
+
+DIAGNOSTIC_HISTORY_ACTIVE = "active"
+DIAGNOSTIC_HISTORY_ALL = "all"
+SAFE_PATH_LABEL_RE = re.compile(r"^[A-Za-z0-9_.:@*+-]{1,80}$")
+SENSITIVE_LABEL_PREFIXES = ("sk-", "sk_", "ghp_", "github_pat_", "xox")
+
+
+def compute_concentration(
+    *,
+    db_path: Path,
+    include_archived: bool,
+) -> dict[str, Any]:
+    where = "" if include_archived else "WHERE is_archived = 0"
+    with connect(db_path) as conn:
+        init_db(conn)
+        rows = conn.execute(
+            f"""
+            SELECT
+                record_id,
+                session_id,
+                event_timestamp,
+                source_file,
+                cwd,
+                total_tokens
+            FROM usage_events
+            {where}
+            ORDER BY event_timestamp, record_id
+            """
+        ).fetchall()
+        source_row = conn.execute(
+            f"SELECT COUNT(DISTINCT source_file) AS source_logs_scanned FROM usage_events {where}"
+        ).fetchone()
+
+    source_groups: dict[str, dict[str, Any]] = {}
+    cwd_groups: dict[str, dict[str, Any]] = {}
+    day_groups: dict[str, dict[str, Any]] = {}
+    total_tokens = 0
+    for row in rows:
+        tokens = _int_value(row["total_tokens"])
+        total_tokens += tokens
+        record_id = str(row["record_id"])
+        session_id = _optional_str(row["session_id"])
+        _add_concentration_row(
+            source_groups,
+            key=_source_group_key(row["source_file"]),
+            label=_source_group_label(row["source_file"], session_id=session_id),
+            group_hash=_source_group_hash(row["source_file"]),
+            tokens=tokens,
+            record_id=record_id,
+            session_id=session_id,
+        )
+        cwd_ref = _cwd_group_ref(row["cwd"])
+        _add_concentration_row(
+            cwd_groups,
+            key=cwd_ref["group_hash"],
+            label=cwd_ref["label"],
+            group_hash=cwd_ref["group_hash"],
+            tokens=tokens,
+            record_id=record_id,
+            session_id=session_id,
+        )
+        day = _day_label(row["event_timestamp"])
+        _add_concentration_row(
+            day_groups,
+            key=day,
+            label=day,
+            group_hash=_stable_hash(day),
+            tokens=tokens,
+            record_id=record_id,
+            session_id=session_id,
+        )
+
+    dimensions = [
+        _concentration_dimension(
+            "source_log",
+            "Source Log / Session",
+            source_groups,
+            total_tokens=total_tokens,
+        ),
+        _concentration_dimension("cwd", "Cwd / Project", cwd_groups, total_tokens=total_tokens),
+        _concentration_dimension("day", "Day", day_groups, total_tokens=total_tokens),
+    ]
+    metrics = _concentration_metrics(dimensions)
+    return {
+        "meta": {
+            "source_logs_scanned": _int_value(source_row["source_logs_scanned"]),
+        },
+        "summary": {
+            "usage_rows": len(rows),
+            "total_tokens": total_tokens,
+            "dimension_count": len(dimensions),
+            "history_scope": _history_scope(include_archived),
+        },
+        "metrics": metrics,
+        "dimensions": dimensions,
+        "largest_impact_rows": _largest_impact_rows(dimensions),
+        "privacy": concentration_privacy_metadata(),
+    }
+
+
+
+def _add_concentration_row(
+    groups: dict[str, dict[str, Any]],
+    *,
+    key: str,
+    label: str,
+    group_hash: str,
+    tokens: int,
+    record_id: str,
+    session_id: str | None,
+) -> None:
+    group = groups.setdefault(
+        key,
+        {
+            "label": label,
+            "group_hash": group_hash,
+            "total_tokens": 0,
+            "usage_rows": 0,
+            "largest_record_id": None,
+            "largest_call_tokens": 0,
+            "session_ids": set(),
+        },
+    )
+    group["total_tokens"] = int(group["total_tokens"]) + tokens
+    group["usage_rows"] = int(group["usage_rows"]) + 1
+    if tokens > int(group["largest_call_tokens"]):
+        group["largest_call_tokens"] = tokens
+        group["largest_record_id"] = record_id
+    if session_id:
+        group["session_ids"].add(session_id)
+
+
+def _concentration_dimension(
+    dimension: str,
+    label: str,
+    groups: dict[str, dict[str, Any]],
+    *,
+    total_tokens: int,
+) -> dict[str, Any]:
+    rows = [_concentration_group_row(dimension, group, total_tokens=total_tokens) for group in groups.values()]
+    rows = sorted(
+        rows,
+        key=lambda row: (-int(row["total_tokens"]), -int(row["usage_rows"]), row["label"]),
+    )
+    return {
+        "dimension": dimension,
+        "label": label,
+        "group_count": len(rows),
+        "total_tokens": total_tokens,
+        "top_1_share": _top_share(rows, 1, total_tokens=total_tokens),
+        "top_3_share": _top_share(rows, 3, total_tokens=total_tokens),
+        "top_5_share": _top_share(rows, 5, total_tokens=total_tokens),
+        "effective_group_count": _effective_group_count(rows, total_tokens=total_tokens),
+        "top_rows": rows[:10],
+    }
+
+
+def _concentration_group_row(
+    dimension: str,
+    group: dict[str, Any],
+    *,
+    total_tokens: int,
+) -> dict[str, Any]:
+    session_ids = sorted(group["session_ids"])
+    return {
+        "dimension": dimension,
+        "label": group["label"],
+        "group_hash": group["group_hash"],
+        "usage_rows": int(group["usage_rows"]),
+        "total_tokens": int(group["total_tokens"]),
+        "share": _rounded_ratio(int(group["total_tokens"]), total_tokens),
+        "largest_record_id": group["largest_record_id"],
+        "largest_call_tokens": int(group["largest_call_tokens"]),
+        "session_id": session_ids[0] if len(session_ids) == 1 else None,
+    }
+
+
+def _concentration_metrics(dimensions: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    rows: list[dict[str, Any]] = []
+    for dimension in dimensions:
+        dimension_key = str(dimension["dimension"])
+        for top_n in (1, 3, 5):
+            rows.append(
+                {
+                    "metric": f"top_{top_n}_{dimension_key}_share",
+                    "dimension": dimension_key,
+                    "top_n": top_n,
+                    "share": dimension[f"top_{top_n}_share"],
+                }
+            )
+    return rows
+
+
+def _largest_impact_rows(dimensions: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    rows: list[dict[str, Any]] = []
+    for dimension in dimensions:
+        for row in dimension["top_rows"]:
+            rows.append(dict(row))
+    return sorted(
+        rows,
+        key=lambda row: (-float(row["share"]), -int(row["total_tokens"]), row["dimension"], row["label"]),
+    )[:15]
+
+
+def _top_share(
+    rows: list[dict[str, Any]],
+    top_n: int,
+    *,
+    total_tokens: int,
+) -> float:
+    return _rounded_ratio(sum(int(row["total_tokens"]) for row in rows[:top_n]), total_tokens)
+
+
+def _effective_group_count(
+    rows: list[dict[str, Any]],
+    *,
+    total_tokens: int,
+) -> float:
+    if total_tokens <= 0:
+        return 0.0
+    hhi = sum((int(row["total_tokens"]) / total_tokens) ** 2 for row in rows)
+    return round(1 / hhi, 6) if hhi else 0.0
+
+
+def _source_group_key(value: object) -> str:
+    return _source_group_hash(value)
+
+
+def _source_group_hash(value: object) -> str:
+    source = value if isinstance(value, str) and value else "unknown_source"
+    return _stable_hash(source)
+
+
+def _source_group_label(value: object, *, session_id: str | None) -> str:
+    if session_id:
+        return f"session:{session_id[:8]}"
+    return f"source:{_source_group_hash(value)}"
+
+
+def _cwd_group_ref(value: object) -> dict[str, str]:
+    if isinstance(value, str) and value:
+        path_ref = _path_ref_from_token(value)
+        if path_ref is not None:
+            return {"label": path_ref["path_label"], "group_hash": path_ref["path_hash"]}
+    return {"label": "unknown_cwd", "group_hash": _stable_hash("unknown_cwd")}
+
+
+def _day_label(value: object) -> str:
+    if isinstance(value, str):
+        match = re.match(r"^\d{4}-\d{2}-\d{2}", value)
+        if match:
+            return match.group(0)
+    return "unknown_day"
+
+
+
+
+def concentration_privacy_metadata() -> dict[str, str]:
+    return {
+        "source_log_label_policy": "session_id_prefix_or_source_hash",
+        "cwd_label_policy": "basename_only",
+        "hash_policy": "sha256_12",
+        "raw_source_paths_included": "false",
+        "raw_cwd_paths_included": "false",
+    }
+
+
+def _path_ref_from_token(token: str) -> dict[str, str] | None:
+    raw = token.strip()
+    if not raw or raw == "-" or _is_shell_separator(raw) or _looks_like_assignment(raw):
+        return None
+    if raw.startswith(("$", "`")) or "://" in raw:
+        return None
+    label = _safe_path_label(raw)
+    if label is None:
+        return None
+    path_hash = _stable_hash(raw)
+    return {"path_key": path_hash, "path_label": label, "path_hash": path_hash}
+
+
+def _safe_path_label(token: str) -> str | None:
+    normalized = token.rstrip("/")
+    label = normalized if normalized in {".", ".."} else normalized.rsplit("/", 1)[-1].rsplit("\\", 1)[-1]
+    if not label:
+        return None
+    lowered = label.lower()
+    if lowered.startswith(SENSITIVE_LABEL_PREFIXES):
+        return "path"
+    return label if SAFE_PATH_LABEL_RE.fullmatch(label) else "path"
+
+
+def _is_shell_separator(token: str) -> bool:
+    return token in {"&&", "||", ";", "|"}
+
+
+def _looks_like_assignment(token: str) -> bool:
+    return bool(re.fullmatch(r"[A-Za-z_][A-Za-z0-9_]*=.*", token))
+
+
+def _stable_hash(value: str) -> str:
+    return hashlib.sha256(value.encode("utf-8")).hexdigest()[:12]
+
+
+def _optional_str(value: object) -> str | None:
+    return value if isinstance(value, str) and value else None
+
+
+def _history_scope(include_archived: bool) -> str:
+    return DIAGNOSTIC_HISTORY_ALL if include_archived else DIAGNOSTIC_HISTORY_ACTIVE
+
+
+def _int_value(value: object) -> int:
+    if isinstance(value, int):
+        return value
+    if isinstance(value, float):
+        return int(value)
+    if isinstance(value, str) and value:
+        return int(value)
+    return 0
+
+
+def _ratio(numerator: int, denominator: int) -> float:
+    return numerator / denominator if denominator else 0.0
+
+
+def _rounded_ratio(numerator: int, denominator: int) -> float:
+    return round(_ratio(numerator, denominator), 6)
diff --git a/src/codex_usage_tracker/diagnostic_snapshot_constants.py b/src/codex_usage_tracker/diagnostic_snapshot_constants.py
new file mode 100644
index 0000000..e98ce92
--- /dev/null
+++ b/src/codex_usage_tracker/diagnostic_snapshot_constants.py
@@ -0,0 +1,20 @@
+"""Shared constants for diagnostic snapshot reports."""
+
+DIAGNOSTIC_OVERVIEW_SCHEMA = "codex-usage-tracker-diagnostic-overview-v1"
+DIAGNOSTIC_TOOL_OUTPUT_SCHEMA = "codex-usage-tracker-diagnostic-tool-output-v1"
+DIAGNOSTIC_COMMANDS_SCHEMA = "codex-usage-tracker-diagnostic-commands-v1"
+DIAGNOSTIC_FILE_READS_SCHEMA = "codex-usage-tracker-diagnostic-file-reads-v1"
+DIAGNOSTIC_READ_PRODUCTIVITY_SCHEMA = "codex-usage-tracker-diagnostic-read-productivity-v1"
+DIAGNOSTIC_CONCENTRATION_SCHEMA = "codex-usage-tracker-diagnostic-concentration-v1"
+DIAGNOSTIC_OVERVIEW_SECTION = "overview"
+DIAGNOSTIC_TOOL_OUTPUT_SECTION = "tool-output"
+DIAGNOSTIC_COMMANDS_SECTION = "commands"
+DIAGNOSTIC_FILE_READS_SECTION = "file-reads"
+DIAGNOSTIC_READ_PRODUCTIVITY_SECTION = "read-productivity"
+DIAGNOSTIC_CONCENTRATION_SECTION = "concentration"
+DIAGNOSTIC_HISTORY_ACTIVE = "active"
+DIAGNOSTIC_HISTORY_ALL = "all"
+DIAGNOSTIC_SNAPSHOT_NOTES = [
+    "Diagnostic snapshots are recomputed only by explicit diagnostic refresh.",
+    "Snapshot totals are aggregate-only and do not include raw context.",
+]
diff --git a/src/codex_usage_tracker/diagnostic_snapshot_events.py b/src/codex_usage_tracker/diagnostic_snapshot_events.py
new file mode 100644
index 0000000..5170f6e
--- /dev/null
+++ b/src/codex_usage_tracker/diagnostic_snapshot_events.py
@@ -0,0 +1,399 @@
+"""Safe event parsing helpers for diagnostic snapshot reports."""
+
+from __future__ import annotations
+
+import hashlib
+import json
+import re
+import shlex
+from collections import Counter
+from pathlib import Path
+from typing import Any
+
+SAFE_LABEL_RE = re.compile(r"^[A-Za-z0-9_.:-]{1,80}$")
+SAFE_PATH_LABEL_RE = re.compile(r"^[A-Za-z0-9_.:@*+-]{1,80}$")
+SENSITIVE_LABEL_PREFIXES = ("sk-", "sk_", "ghp_", "github_pat_", "xox")
+SHELL_TOOL_NAMES = {
+    "bash",
+    "exec_command",
+    "functions.exec_command",
+    "run_command",
+    "shell",
+    "terminal",
+    "write_stdin",
+}
+READ_COMMAND_ROOTS = {"cat", "find", "grep", "head", "nl", "rg", "sed", "strings", "tail", "wc"}
+SEARCH_READ_ROOTS = {"find", "rg"}
+READ_PRODUCTIVITY_NOTE = (
+    "Read-to-modify counts are temporal correlations: a read is counted when the same "
+    "privacy-preserving path key is modified later in the same source log."
+)
+ORIGINAL_OUTPUT_RE = re.compile(
+    r"^Chunk ID: (?P<chunk>[^\n]+)\n"
+    r"Wall time: (?P<wall>[^\n]+)\n"
+    r"(?:(?P<status>Process exited with code -?\d+|Process running with session ID \d+)\n)?"
+    r"Original token count: (?P<count>\d+)\n",
+    re.S,
+)
+
+
+def shell_command_from_payload(payload: dict[str, Any], *, function_name: str) -> str | None:
+    if not is_shell_tool(function_name):
+        return None
+    arguments = payload.get("arguments")
+    if isinstance(arguments, str):
+        try:
+            loaded = json.loads(arguments)
+        except json.JSONDecodeError:
+            loaded = {}
+        if isinstance(loaded, dict):
+            command = loaded.get("cmd") or loaded.get("command")
+            if isinstance(command, str):
+                return command
+    if isinstance(arguments, dict):
+        command = arguments.get("cmd") or arguments.get("command")
+        if isinstance(command, str):
+            return command
+    command = payload.get("cmd") or payload.get("command")
+    return command if isinstance(command, str) else None
+
+
+def is_shell_tool(function_name: str) -> bool:
+    lowered = function_name.lower()
+    suffix = lowered.rsplit(".", 1)[-1].rsplit("__", 1)[-1]
+    return lowered in SHELL_TOOL_NAMES or suffix in SHELL_TOOL_NAMES
+
+
+def command_root_and_child(command: str) -> tuple[str, str]:
+    tokens = _strip_command_wrappers(_command_tokens(command))
+    if not tokens:
+        return "unknown_command", "unknown"
+    root = _command_root(tokens)
+    return root, _command_child(root, tokens)
+
+
+def read_path_refs_from_command(command: str, *, root: str) -> list[dict[str, str]]:
+    if root not in READ_COMMAND_ROOTS:
+        return []
+    tokens = _strip_command_wrappers(_command_tokens(command))
+    if not tokens:
+        return []
+    path_tokens = _read_path_tokens(root=root, tokens=tokens)
+    refs: list[dict[str, str]] = []
+    seen: set[str] = set()
+    for token in path_tokens:
+        path_ref = _path_ref_from_token(token)
+        if path_ref is None or path_ref["path_key"] in seen:
+            continue
+        seen.add(path_ref["path_key"])
+        refs.append(path_ref)
+    return refs
+
+
+def read_reader(root: str) -> str:
+    if root in SEARCH_READ_ROOTS:
+        return f"search_path_scan:{root}"
+    return f"direct_file_read:{root}"
+
+
+def modified_path_refs(payload: dict[str, Any]) -> list[dict[str, str]]:
+    if payload.get("type") != "patch_apply_end":
+        return []
+    paths: list[str] = []
+    for key in ("changed_paths", "paths", "files", "modified_paths"):
+        paths.extend(_path_values(payload.get(key)))
+    paths.extend(_path_values(payload.get("changes")))
+    refs: list[dict[str, str]] = []
+    seen: set[str] = set()
+    for path in paths:
+        path_ref = _path_ref_from_token(path)
+        if path_ref is None or path_ref["path_key"] in seen:
+            continue
+        seen.add(path_ref["path_key"])
+        refs.append(path_ref)
+    return refs
+
+
+def path_privacy_metadata() -> dict[str, str]:
+    return {
+        "label_policy": "basename_only",
+        "hash_policy": "sha256_12",
+        "normal": "basename_only_with_hash",
+        "redacted": "basename_only_with_hash",
+        "strict": "hash_available_for_hiding_labels",
+    }
+
+
+def original_output_count(output: object) -> int | None:
+    if not isinstance(output, str):
+        return None
+    match = ORIGINAL_OUTPUT_RE.match(output)
+    if not match:
+        return None
+    return int(match.group("count"))
+
+
+def optional_str(value: object) -> str | None:
+    return value if isinstance(value, str) and value else None
+
+
+def safe_label(value: object) -> str | None:
+    if not isinstance(value, str):
+        return None
+    stripped = value.strip()
+    lowered = stripped.lower()
+    if lowered.startswith(SENSITIVE_LABEL_PREFIXES):
+        return None
+    if "/" in stripped or "\\" in stripped:
+        return None
+    return lowered if SAFE_LABEL_RE.fullmatch(stripped) else None
+
+
+def simple_rows(
+    counter: Counter[str],
+    *,
+    key_name: str = "name",
+) -> list[dict[str, Any]]:
+    return [
+        {key_name: name, "count": int(count)}
+        for name, count in sorted(counter.items(), key=lambda item: (-item[1], item[0]))
+    ]
+
+
+def unique_path_rows(paths: list[dict[str, str]]) -> list[dict[str, str]]:
+    rows: list[dict[str, str]] = []
+    seen: set[str] = set()
+    for path in paths:
+        path_hash = path["path_hash"]
+        if path_hash in seen:
+            continue
+        seen.add(path_hash)
+        rows.append({"path_label": path["path_label"], "path_hash": path_hash})
+    return rows[:25]
+
+
+def allocate_token_count(count: int, bucket_count: int) -> list[int]:
+    if bucket_count <= 0:
+        return []
+    base = count // bucket_count
+    remainder = count % bucket_count
+    return [base + (1 if index < remainder else 0) for index in range(bucket_count)]
+
+
+def int_value(value: object) -> int:
+    if isinstance(value, int):
+        return value
+    if isinstance(value, float):
+        return int(value)
+    if isinstance(value, str) and value:
+        return int(value)
+    return 0
+
+
+def ratio(numerator: int, denominator: int) -> float:
+    return numerator / denominator if denominator else 0.0
+
+
+def _read_path_tokens(*, root: str, tokens: list[str]) -> list[str]:
+    args = tokens[1:]
+    if root == "find":
+        return _find_path_tokens(args)
+    if root == "rg":
+        return _ripgrep_path_tokens(args)
+    if root == "grep":
+        operands = _non_option_operands(args, root=root)
+        return operands[1:] if len(operands) > 1 else []
+    if root == "sed":
+        operands = _non_option_operands(args, root=root)
+        return operands[1:] if len(operands) > 1 else []
+    return _non_option_operands(args, root=root)
+
+
+def _find_path_tokens(args: list[str]) -> list[str]:
+    paths: list[str] = []
+    for token in args:
+        if _is_shell_separator(token):
+            break
+        if token == "--":
+            continue
+        if token.startswith("-") or token in {"!", "(", ")"}:
+            break
+        paths.append(token)
+    return paths or ["."]
+
+
+def _ripgrep_path_tokens(args: list[str]) -> list[str]:
+    operands = _non_option_operands(args, root="rg")
+    if any(token == "--files" or token.startswith("--files=") for token in args):
+        return operands or ["."]
+    return operands[1:] if len(operands) > 1 else []
+
+
+def _non_option_operands(args: list[str], *, root: str) -> list[str]:
+    option_args = _option_args_for_root(root)
+    operands: list[str] = []
+    skip_next = False
+    passthrough = False
+    for token in args:
+        if skip_next:
+            skip_next = False
+            continue
+        if _is_shell_separator(token):
+            break
+        if token in {">", ">>", "<", "2>", "2>>"}:
+            break
+        if passthrough:
+            operands.append(token)
+            continue
+        if token == "--":
+            passthrough = True
+            continue
+        if token.startswith("-"):
+            option_name = token.split("=", 1)[0]
+            if option_name in option_args and "=" not in token:
+                skip_next = True
+            continue
+        operands.append(token)
+    return operands
+
+
+def _option_args_for_root(root: str) -> set[str]:
+    return {
+        "grep": {
+            "-A",
+            "-B",
+            "-C",
+            "-e",
+            "-f",
+            "-m",
+            "--after-context",
+            "--before-context",
+            "--context",
+            "--file",
+            "--max-count",
+            "--regexp",
+        },
+        "head": {"-c", "-n", "--bytes", "--lines"},
+        "rg": {
+            "-A",
+            "-B",
+            "-C",
+            "-e",
+            "-f",
+            "-g",
+            "-m",
+            "-t",
+            "-T",
+            "--after-context",
+            "--before-context",
+            "--context",
+            "--file",
+            "--glob",
+            "--max-count",
+            "--max-depth",
+            "--type",
+            "--type-not",
+        },
+        "sed": {"-e", "-f", "--expression", "--file"},
+        "tail": {"-c", "-n", "--bytes", "--lines"},
+    }.get(root, set())
+
+
+def _path_values(value: object) -> list[str]:
+    if isinstance(value, str):
+        return [value]
+    if isinstance(value, list | tuple):
+        paths: list[str] = []
+        for item in value:
+            paths.extend(_path_values(item))
+        return paths
+    if isinstance(value, dict):
+        paths = []
+        for key in ("path", "file", "filename", "new_path", "old_path"):
+            paths.extend(_path_values(value.get(key)))
+        return paths
+    return []
+
+
+def _path_ref_from_token(token: str) -> dict[str, str] | None:
+    raw = token.strip()
+    if not raw or raw == "-" or _is_shell_separator(raw) or _looks_like_assignment(raw):
+        return None
+    if raw.startswith(("$", "`")) or "://" in raw:
+        return None
+    label = _safe_path_label(raw)
+    if label is None:
+        return None
+    path_hash = _stable_hash(raw)
+    return {"path_key": path_hash, "path_label": label, "path_hash": path_hash}
+
+
+def _safe_path_label(token: str) -> str | None:
+    normalized = token.rstrip("/")
+    label = normalized if normalized in {".", ".."} else Path(normalized).name
+    if not label:
+        return None
+    lowered = label.lower()
+    if lowered.startswith(SENSITIVE_LABEL_PREFIXES):
+        return "path"
+    return label if SAFE_PATH_LABEL_RE.fullmatch(label) else "path"
+
+
+def _stable_hash(value: str) -> str:
+    return hashlib.sha256(value.encode("utf-8")).hexdigest()[:12]
+
+
+def _command_tokens(command: str) -> list[str]:
+    try:
+        return shlex.split(command, posix=True)
+    except ValueError:
+        return []
+
+
+def _strip_command_wrappers(tokens: list[str]) -> list[str]:
+    remaining = list(tokens)
+    while remaining:
+        while remaining and _looks_like_assignment(remaining[0]):
+            remaining.pop(0)
+        if not remaining:
+            break
+        base = _basename(remaining[0])
+        if base in {"command", "env", "sudo"}:
+            remaining.pop(0)
+            continue
+        break
+    return remaining
+
+
+def _command_root(tokens: list[str]) -> str:
+    base = _basename(tokens[0])
+    if base in {"py.test", "pytest"}:
+        return "pytest"
+    if base == "py" or base == "python" or base.startswith("python"):
+        return "python"
+    return safe_label(base) or "unknown_command"
+
+
+def _command_child(root: str, tokens: list[str]) -> str:
+    if root == "python":
+        for index, token in enumerate(tokens[:-1]):
+            if token == "-m":
+                module = safe_label(_basename(tokens[index + 1]).split(".", 1)[0])
+                return f"-m:{module}" if module else "-m:unknown"
+        return tokens[1] if len(tokens) > 1 and tokens[1].startswith("-") else "<script>"
+    if len(tokens) <= 1:
+        return "<none>"
+    child = safe_label(_basename(tokens[1]))
+    return child or "<arg>"
+
+
+def _is_shell_separator(token: str) -> bool:
+    return token in {"&&", "||", ";", "|"}
+
+
+def _looks_like_assignment(token: str) -> bool:
+    return bool(re.fullmatch(r"[A-Za-z_][A-Za-z0-9_]*=.*", token))
+
+
+def _basename(token: str) -> str:
+    return re.split(r"[\\/]", token)[-1].lower()
diff --git a/src/codex_usage_tracker/diagnostic_snapshot_report.py b/src/codex_usage_tracker/diagnostic_snapshot_report.py
new file mode 100644
index 0000000..4f4177e
--- /dev/null
+++ b/src/codex_usage_tracker/diagnostic_snapshot_report.py
@@ -0,0 +1,140 @@
+"""CLI rendering for persisted diagnostic snapshot reports."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any
+
+from codex_usage_tracker.diagnostic_snapshot_constants import (
+    DIAGNOSTIC_COMMANDS_SECTION,
+    DIAGNOSTIC_CONCENTRATION_SECTION,
+    DIAGNOSTIC_FILE_READS_SECTION,
+    DIAGNOSTIC_READ_PRODUCTIVITY_SECTION,
+    DIAGNOSTIC_TOOL_OUTPUT_SECTION,
+)
+from codex_usage_tracker.diagnostic_snapshot_events import READ_PRODUCTIVITY_NOTE, int_value
+
+
+@dataclass(frozen=True)
+class DiagnosticSnapshotReport:
+    """Resolved diagnostic snapshot payload for CLI and API surfaces."""
+
+    payload: dict[str, Any]
+
+    def render(self) -> str:
+        if self.payload.get("status") != "ready":
+            section = str(self.payload.get("section") or "snapshot")
+            return f"No diagnostic {section} snapshot. Run diagnostics {section} --refresh first."
+        section = self.payload.get("section")
+        if section == DIAGNOSTIC_TOOL_OUTPUT_SECTION:
+            return self._render_tool_output()
+        if section == DIAGNOSTIC_COMMANDS_SECTION:
+            return self._render_commands()
+        if section == DIAGNOSTIC_FILE_READS_SECTION:
+            return self._render_file_reads()
+        if section == DIAGNOSTIC_READ_PRODUCTIVITY_SECTION:
+            return self._render_read_productivity()
+        if section == DIAGNOSTIC_CONCENTRATION_SECTION:
+            return self._render_concentration()
+        return self._render_overview()
+
+    def _render_overview(self) -> str:
+        snapshot = self.payload.get("snapshot") or {}
+        overview = self.payload.get("overview") or {}
+        return "\n".join(
+            [
+                "Diagnostic overview snapshot",
+                f"Computed: {snapshot.get('computed_at')}",
+                f"History scope: {snapshot.get('history_scope')}",
+                f"Usage rows: {_int_text(overview.get('usage_rows'))}",
+                f"Total tokens: {_int_text(overview.get('total_tokens'))}",
+                f"Cached input: {_int_text(overview.get('cached_input_tokens'))}",
+                f"Uncached input: {_int_text(overview.get('uncached_input_tokens'))}",
+                f"Cache ratio: {_pct_text(overview.get('cache_ratio'))}",
+            ]
+        )
+
+    def _render_tool_output(self) -> str:
+        snapshot = self.payload.get("snapshot") or {}
+        summary = self.payload.get("summary") or {}
+        return "\n".join(
+            [
+                "Diagnostic tool-output snapshot",
+                f"Computed: {snapshot.get('computed_at')}",
+                f"History scope: {snapshot.get('history_scope')}",
+                f"Function calls: {_int_text(summary.get('function_calls'))}",
+                f"Function outputs: {_int_text(summary.get('function_outputs'))}",
+                f"Outputs with Original token count: {_int_text(summary.get('outputs_with_original_token_count'))}",
+                f"Terminal output tokens: {_int_text(summary.get('original_token_sum'))}",
+            ]
+        )
+
+    def _render_commands(self) -> str:
+        snapshot = self.payload.get("snapshot") or {}
+        summary = self.payload.get("summary") or {}
+        return "\n".join(
+            [
+                "Diagnostic commands snapshot",
+                f"Computed: {snapshot.get('computed_at')}",
+                f"History scope: {snapshot.get('history_scope')}",
+                f"Shell calls: {_int_text(summary.get('shell_function_calls'))}",
+                f"Command roots: {_int_text(summary.get('command_root_count'))}",
+                f"Missing command text: {_int_text(summary.get('missing_command'))}",
+            ]
+        )
+
+    def _render_file_reads(self) -> str:
+        snapshot = self.payload.get("snapshot") or {}
+        summary = self.payload.get("summary") or {}
+        return "\n".join(
+            [
+                "Diagnostic file-reads snapshot",
+                f"Computed: {snapshot.get('computed_at')}",
+                f"History scope: {snapshot.get('history_scope')}",
+                f"Read commands: {_int_text(summary.get('read_commands'))}",
+                f"Read events: {_int_text(summary.get('read_events'))}",
+                f"Allocated output tokens: {_int_text(summary.get('allocated_output_token_sum'))}",
+                f"Missing output counts: {_int_text(summary.get('read_events_missing_output_count'))}",
+            ]
+        )
+
+    def _render_read_productivity(self) -> str:
+        snapshot = self.payload.get("snapshot") or {}
+        summary = self.payload.get("summary") or {}
+        return "\n".join(
+            [
+                "Diagnostic read-productivity snapshot",
+                f"Computed: {snapshot.get('computed_at')}",
+                f"History scope: {snapshot.get('history_scope')}",
+                f"Read events: {_int_text(summary.get('read_events'))}",
+                f"Read events modified later: {_int_text(summary.get('read_events_modified_later'))}",
+                f"Read-to-modify rate: {_pct_text(summary.get('read_events_modified_later_pct'))}",
+                READ_PRODUCTIVITY_NOTE,
+            ]
+        )
+
+    def _render_concentration(self) -> str:
+        snapshot = self.payload.get("snapshot") or {}
+        summary = self.payload.get("summary") or {}
+        return "\n".join(
+            [
+                "Diagnostic concentration snapshot",
+                f"Computed: {snapshot.get('computed_at')}",
+                f"History scope: {snapshot.get('history_scope')}",
+                f"Usage rows: {_int_text(summary.get('usage_rows'))}",
+                f"Total tokens: {_int_text(summary.get('total_tokens'))}",
+                f"Dimensions: {_int_text(summary.get('dimension_count'))}",
+            ]
+        )
+
+
+def _int_text(value: object) -> str:
+    return f"{int_value(value):,}"
+
+
+def _pct_text(value: object) -> str:
+    try:
+        ratio = float(value) if isinstance(value, int | float | str) and value != "" else 0.0
+    except (TypeError, ValueError):
+        ratio = 0.0
+    return f"{ratio:.1%}"
diff --git a/src/codex_usage_tracker/diagnostic_snapshot_rows.py b/src/codex_usage_tracker/diagnostic_snapshot_rows.py
new file mode 100644
index 0000000..55333321
--- /dev/null
+++ b/src/codex_usage_tracker/diagnostic_snapshot_rows.py
@@ -0,0 +1,182 @@
+"""Row shaping helpers for diagnostic snapshot reports."""
+
+from __future__ import annotations
+
+from collections import Counter
+from typing import Any
+
+from codex_usage_tracker.diagnostic_snapshot_events import ratio, simple_rows
+
+
+def function_rows(
+    *,
+    function_calls: Counter[str],
+    function_outputs: Counter[str],
+    output_with_count: Counter[str],
+    output_missing_count: Counter[str],
+    output_token_sum: Counter[str],
+) -> list[dict[str, Any]]:
+    names = set(function_calls) | set(function_outputs) | set(output_with_count) | set(output_token_sum)
+    rows = [
+        {
+            "function": name,
+            "calls": int(function_calls[name]),
+            "outputs": int(function_outputs[name]),
+            "with_original_token_count": int(output_with_count[name]),
+            "missing_original_token_count": int(output_missing_count[name]),
+            "original_token_sum": int(output_token_sum[name]),
+        }
+        for name in names
+    ]
+    return sorted(rows, key=lambda row: (-int(row["original_token_sum"]), -int(row["calls"]), row["function"]))
+
+
+def command_output_rows(
+    *,
+    command_calls: Counter[str],
+    command_with_count: Counter[str],
+    command_missing_count: Counter[str],
+    command_token_sum: Counter[str],
+) -> list[dict[str, Any]]:
+    rows = [
+        {
+            "root": root,
+            "calls": int(command_calls[root]),
+            "with_original_token_count": int(command_with_count[root]),
+            "missing_original_token_count": int(command_missing_count[root]),
+            "original_token_sum": int(command_token_sum[root]),
+        }
+        for root in set(command_calls) | set(command_token_sum)
+    ]
+    return sorted(rows, key=lambda row: (-int(row["original_token_sum"]), -int(row["calls"]), row["root"]))
+
+
+def command_rows(
+    *,
+    command_calls: Counter[str],
+    command_children: dict[str, Counter[str]],
+) -> list[dict[str, Any]]:
+    rows = []
+    for root, total in command_calls.items():
+        children = simple_rows(command_children.get(root, Counter()), key_name="child")
+        rows.append({"root": root, "total": int(total), "children": children[:25]})
+    return sorted(rows, key=lambda row: (-int(row["total"]), row["root"]))
+
+
+def read_reader_rows(
+    *,
+    read_events_by_reader: Counter[str],
+    read_events_with_count_by_reader: Counter[str],
+    read_events_missing_count_by_reader: Counter[str],
+    read_tokens_by_reader: Counter[str],
+) -> list[dict[str, Any]]:
+    rows = [
+        {
+            "reader": reader,
+            "read_events": int(read_events_by_reader[reader]),
+            "events_with_output_count": int(read_events_with_count_by_reader[reader]),
+            "events_missing_output_count": int(read_events_missing_count_by_reader[reader]),
+            "allocated_output_token_sum": int(read_tokens_by_reader[reader]),
+        }
+        for reader in set(read_events_by_reader) | set(read_tokens_by_reader)
+    ]
+    return sorted(
+        rows,
+        key=lambda row: (-int(row["allocated_output_token_sum"]), -int(row["read_events"]), row["reader"]),
+    )
+
+
+def read_path_rows(
+    *,
+    read_path_refs: dict[str, dict[str, str]],
+    read_events_by_path: Counter[str],
+    read_tokens_by_path: Counter[str],
+) -> list[dict[str, Any]]:
+    rows = [
+        {
+            "path_label": read_path_refs[path_key]["path_label"],
+            "path_hash": read_path_refs[path_key]["path_hash"],
+            "read_events": int(read_events_by_path[path_key]),
+            "allocated_output_token_sum": int(read_tokens_by_path[path_key]),
+        }
+        for path_key in set(read_events_by_path) | set(read_tokens_by_path)
+        if path_key in read_path_refs
+    ]
+    return sorted(
+        rows,
+        key=lambda row: (
+            -int(row["allocated_output_token_sum"]),
+            -int(row["read_events"]),
+            row["path_label"],
+            row["path_hash"],
+        ),
+    )[:50]
+
+
+def largest_read_command_rows(rows: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    return sorted(
+        rows,
+        key=lambda row: (
+            -int(row["original_token_count"]),
+            -int(row["read_event_count"]),
+            row["root"],
+        ),
+    )[:25]
+
+
+def read_productivity_reader_rows(
+    *,
+    read_events_by_reader: Counter[str],
+    read_modified_by_reader: Counter[str],
+) -> list[dict[str, Any]]:
+    rows = [
+        {
+            "reader": reader,
+            "read_events": int(read_events_by_reader[reader]),
+            "read_events_modified_later": int(read_modified_by_reader[reader]),
+            "read_events_modified_later_pct": ratio(
+                int(read_modified_by_reader[reader]),
+                int(read_events_by_reader[reader]),
+            ),
+        }
+        for reader in read_events_by_reader
+    ]
+    return sorted(
+        rows,
+        key=lambda row: (
+            -int(row["read_events_modified_later"]),
+            -int(row["read_events"]),
+            row["reader"],
+        ),
+    )
+
+
+def read_productivity_path_rows(
+    *,
+    read_path_refs: dict[str, dict[str, str]],
+    read_events_by_path: Counter[str],
+    read_modified_by_path: Counter[str],
+) -> list[dict[str, Any]]:
+    rows = [
+        {
+            "path_label": read_path_refs[path_key]["path_label"],
+            "path_hash": read_path_refs[path_key]["path_hash"],
+            "read_events": int(read_events_by_path[path_key]),
+            "read_events_modified_later": int(read_modified_by_path[path_key]),
+            "read_events_modified_later_pct": ratio(
+                int(read_modified_by_path[path_key]),
+                int(read_events_by_path[path_key]),
+            ),
+        }
+        for path_key in read_modified_by_path
+        if path_key in read_path_refs
+    ]
+    return sorted(
+        rows,
+        key=lambda row: (
+            -int(row["read_events_modified_later"]),
+            -int(row["read_events"]),
+            row["path_label"],
+            row["path_hash"],
+        ),
+    )[:50]
diff --git a/src/codex_usage_tracker/diagnostic_snapshots.py b/src/codex_usage_tracker/diagnostic_snapshots.py
index 3af5fd1..774db2b 100644
--- a/src/codex_usage_tracker/diagnostic_snapshots.py
+++ b/src/codex_usage_tracker/diagnostic_snapshots.py
@@ -2,16 +2,36 @@
 
 from __future__ import annotations
 
-import hashlib
-import json
-import re
-import shlex
-from collections import Counter, defaultdict
-from dataclasses import dataclass
 from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any
 
+from codex_usage_tracker.diagnostic_snapshot_analysis import (
+    analyze_indexed_source_logs,
+    path_privacy_metadata,
+)
+from codex_usage_tracker.diagnostic_snapshot_concentration import (
+    compute_concentration,
+    concentration_privacy_metadata,
+)
+from codex_usage_tracker.diagnostic_snapshot_constants import (
+    DIAGNOSTIC_COMMANDS_SCHEMA,
+    DIAGNOSTIC_COMMANDS_SECTION,
+    DIAGNOSTIC_CONCENTRATION_SCHEMA,
+    DIAGNOSTIC_CONCENTRATION_SECTION,
+    DIAGNOSTIC_FILE_READS_SCHEMA,
+    DIAGNOSTIC_FILE_READS_SECTION,
+    DIAGNOSTIC_HISTORY_ACTIVE,
+    DIAGNOSTIC_HISTORY_ALL,
+    DIAGNOSTIC_OVERVIEW_SCHEMA,
+    DIAGNOSTIC_OVERVIEW_SECTION,
+    DIAGNOSTIC_READ_PRODUCTIVITY_SCHEMA,
+    DIAGNOSTIC_READ_PRODUCTIVITY_SECTION,
+    DIAGNOSTIC_SNAPSHOT_NOTES,
+    DIAGNOSTIC_TOOL_OUTPUT_SCHEMA,
+    DIAGNOSTIC_TOOL_OUTPUT_SECTION,
+)
+from codex_usage_tracker.diagnostic_snapshot_report import DiagnosticSnapshotReport
 from codex_usage_tracker.paths import DEFAULT_DB_PATH
 from codex_usage_tracker.store import (
     connect,
@@ -20,163 +40,6 @@
 )
 from codex_usage_tracker.store_schema import init_db
 
-DIAGNOSTIC_OVERVIEW_SCHEMA = "codex-usage-tracker-diagnostic-overview-v1"
-DIAGNOSTIC_TOOL_OUTPUT_SCHEMA = "codex-usage-tracker-diagnostic-tool-output-v1"
-DIAGNOSTIC_COMMANDS_SCHEMA = "codex-usage-tracker-diagnostic-commands-v1"
-DIAGNOSTIC_FILE_READS_SCHEMA = "codex-usage-tracker-diagnostic-file-reads-v1"
-DIAGNOSTIC_READ_PRODUCTIVITY_SCHEMA = "codex-usage-tracker-diagnostic-read-productivity-v1"
-DIAGNOSTIC_CONCENTRATION_SCHEMA = "codex-usage-tracker-diagnostic-concentration-v1"
-DIAGNOSTIC_OVERVIEW_SECTION = "overview"
-DIAGNOSTIC_TOOL_OUTPUT_SECTION = "tool-output"
-DIAGNOSTIC_COMMANDS_SECTION = "commands"
-DIAGNOSTIC_FILE_READS_SECTION = "file-reads"
-DIAGNOSTIC_READ_PRODUCTIVITY_SECTION = "read-productivity"
-DIAGNOSTIC_CONCENTRATION_SECTION = "concentration"
-DIAGNOSTIC_HISTORY_ACTIVE = "active"
-DIAGNOSTIC_HISTORY_ALL = "all"
-DIAGNOSTIC_SNAPSHOT_NOTES = [
-    "Diagnostic snapshots are recomputed only by explicit diagnostic refresh.",
-    "Snapshot totals are aggregate-only and do not include raw context.",
-]
-SAFE_LABEL_RE = re.compile(r"^[A-Za-z0-9_.:-]{1,80}$")
-SAFE_PATH_LABEL_RE = re.compile(r"^[A-Za-z0-9_.:@*+-]{1,80}$")
-SENSITIVE_LABEL_PREFIXES = ("sk-", "sk_", "ghp_", "github_pat_", "xox")
-SHELL_TOOL_NAMES = {
-    "bash",
-    "exec_command",
-    "functions.exec_command",
-    "run_command",
-    "shell",
-    "terminal",
-    "write_stdin",
-}
-READ_COMMAND_ROOTS = {"cat", "find", "grep", "head", "nl", "rg", "sed", "strings", "tail", "wc"}
-SEARCH_READ_ROOTS = {"find", "rg"}
-READ_PRODUCTIVITY_NOTE = (
-    "Read-to-modify counts are temporal correlations: a read is counted when the same "
-    "privacy-preserving path key is modified later in the same source log."
-)
-ORIGINAL_OUTPUT_RE = re.compile(
-    r"^Chunk ID: (?P<chunk>[^\n]+)\n"
-    r"Wall time: (?P<wall>[^\n]+)\n"
-    r"(?:(?P<status>Process exited with code -?\d+|Process running with session ID \d+)\n)?"
-    r"Original token count: (?P<count>\d+)\n",
-    re.S,
-)
-
-
-@dataclass(frozen=True)
-class DiagnosticSnapshotReport:
-    """Resolved diagnostic snapshot payload for CLI and API surfaces."""
-
-    payload: dict[str, Any]
-
-    def render(self) -> str:
-        if self.payload.get("status") != "ready":
-            section = str(self.payload.get("section") or "snapshot")
-            return f"No diagnostic {section} snapshot. Run diagnostics {section} --refresh first."
-        section = self.payload.get("section")
-        if section == DIAGNOSTIC_TOOL_OUTPUT_SECTION:
-            return self._render_tool_output()
-        if section == DIAGNOSTIC_COMMANDS_SECTION:
-            return self._render_commands()
-        if section == DIAGNOSTIC_FILE_READS_SECTION:
-            return self._render_file_reads()
-        if section == DIAGNOSTIC_READ_PRODUCTIVITY_SECTION:
-            return self._render_read_productivity()
-        if section == DIAGNOSTIC_CONCENTRATION_SECTION:
-            return self._render_concentration()
-        return self._render_overview()
-
-    def _render_overview(self) -> str:
-        snapshot = self.payload.get("snapshot") or {}
-        overview = self.payload.get("overview") or {}
-        return "\n".join(
-            [
-                "Diagnostic overview snapshot",
-                f"Computed: {snapshot.get('computed_at')}",
-                f"History scope: {snapshot.get('history_scope')}",
-                f"Usage rows: {_int_text(overview.get('usage_rows'))}",
-                f"Total tokens: {_int_text(overview.get('total_tokens'))}",
-                f"Cached input: {_int_text(overview.get('cached_input_tokens'))}",
-                f"Uncached input: {_int_text(overview.get('uncached_input_tokens'))}",
-                f"Cache ratio: {_pct_text(overview.get('cache_ratio'))}",
-            ]
-        )
-
-    def _render_tool_output(self) -> str:
-        snapshot = self.payload.get("snapshot") or {}
-        summary = self.payload.get("summary") or {}
-        return "\n".join(
-            [
-                "Diagnostic tool-output snapshot",
-                f"Computed: {snapshot.get('computed_at')}",
-                f"History scope: {snapshot.get('history_scope')}",
-                f"Function calls: {_int_text(summary.get('function_calls'))}",
-                f"Function outputs: {_int_text(summary.get('function_outputs'))}",
-                f"Outputs with Original token count: {_int_text(summary.get('outputs_with_original_token_count'))}",
-                f"Terminal output tokens: {_int_text(summary.get('original_token_sum'))}",
-            ]
-        )
-
-    def _render_commands(self) -> str:
-        snapshot = self.payload.get("snapshot") or {}
-        summary = self.payload.get("summary") or {}
-        return "\n".join(
-            [
-                "Diagnostic commands snapshot",
-                f"Computed: {snapshot.get('computed_at')}",
-                f"History scope: {snapshot.get('history_scope')}",
-                f"Shell calls: {_int_text(summary.get('shell_function_calls'))}",
-                f"Command roots: {_int_text(summary.get('command_root_count'))}",
-                f"Missing command text: {_int_text(summary.get('missing_command'))}",
-            ]
-        )
-
-    def _render_file_reads(self) -> str:
-        snapshot = self.payload.get("snapshot") or {}
-        summary = self.payload.get("summary") or {}
-        return "\n".join(
-            [
-                "Diagnostic file-reads snapshot",
-                f"Computed: {snapshot.get('computed_at')}",
-                f"History scope: {snapshot.get('history_scope')}",
-                f"Read commands: {_int_text(summary.get('read_commands'))}",
-                f"Read events: {_int_text(summary.get('read_events'))}",
-                f"Allocated output tokens: {_int_text(summary.get('allocated_output_token_sum'))}",
-                f"Missing output counts: {_int_text(summary.get('read_events_missing_output_count'))}",
-            ]
-        )
-
-    def _render_read_productivity(self) -> str:
-        snapshot = self.payload.get("snapshot") or {}
-        summary = self.payload.get("summary") or {}
-        return "\n".join(
-            [
-                "Diagnostic read-productivity snapshot",
-                f"Computed: {snapshot.get('computed_at')}",
-                f"History scope: {snapshot.get('history_scope')}",
-                f"Read events: {_int_text(summary.get('read_events'))}",
-                f"Read events modified later: {_int_text(summary.get('read_events_modified_later'))}",
-                f"Read-to-modify rate: {_pct_text(summary.get('read_events_modified_later_pct'))}",
-                READ_PRODUCTIVITY_NOTE,
-            ]
-        )
-
-    def _render_concentration(self) -> str:
-        snapshot = self.payload.get("snapshot") or {}
-        summary = self.payload.get("summary") or {}
-        return "\n".join(
-            [
-                "Diagnostic concentration snapshot",
-                f"Computed: {snapshot.get('computed_at')}",
-                f"History scope: {snapshot.get('history_scope')}",
-                f"Usage rows: {_int_text(summary.get('usage_rows'))}",
-                f"Total tokens: {_int_text(summary.get('total_tokens'))}",
-                f"Dimensions: {_int_text(summary.get('dimension_count'))}",
-            ]
-        )
-
 
 def build_diagnostic_overview_report(
     *,
@@ -369,7 +232,7 @@ def _refresh_source_log_snapshot(
 ) -> dict[str, Any]:
     history_scope = _history_scope(include_archived)
     computed_at = _utc_now()
-    analysis = _analyze_indexed_source_logs(db_path=db_path, include_archived=include_archived)
+    analysis = analyze_indexed_source_logs(db_path=db_path, include_archived=include_archived)
     snapshot = _snapshot_metadata(
         computed_at=computed_at,
         history_scope=history_scope,
@@ -441,7 +304,7 @@ def _refresh_concentration_snapshot(
 ) -> dict[str, Any]:
     history_scope = _history_scope(include_archived)
     computed_at = _utc_now()
-    analysis = _compute_concentration(db_path=db_path, include_archived=include_archived)
+    analysis = compute_concentration(db_path=db_path, include_archived=include_archived)
     snapshot = _snapshot_metadata(
         computed_at=computed_at,
         history_scope=history_scope,
@@ -642,1068 +505,21 @@ def _missing_payload(
         payload["by_reader"] = []
         payload["top_paths"] = []
         payload["largest_read_commands"] = []
-        payload["path_privacy"] = _path_privacy_metadata()
+        payload["path_privacy"] = path_privacy_metadata()
     elif section == DIAGNOSTIC_READ_PRODUCTIVITY_SECTION:
         payload["summary"] = None
         payload["by_reader"] = []
         payload["top_modified_paths"] = []
-        payload["path_privacy"] = _path_privacy_metadata()
+        payload["path_privacy"] = path_privacy_metadata()
     elif section == DIAGNOSTIC_CONCENTRATION_SECTION:
         payload["summary"] = None
         payload["metrics"] = []
         payload["dimensions"] = []
         payload["largest_impact_rows"] = []
-        payload["privacy"] = _concentration_privacy_metadata()
+        payload["privacy"] = concentration_privacy_metadata()
     return payload
 
 
-def _analyze_indexed_source_logs(
-    *,
-    db_path: Path,
-    include_archived: bool,
-) -> dict[str, Any]:
-    source_logs, usage_rows_scanned = _indexed_source_logs(
-        db_path=db_path,
-        include_archived=include_archived,
-    )
-    function_calls: Counter[str] = Counter()
-    function_outputs: Counter[str] = Counter()
-    output_with_count: Counter[str] = Counter()
-    output_missing_count: Counter[str] = Counter()
-    output_token_sum: Counter[str] = Counter()
-    command_calls: Counter[str] = Counter()
-    command_children: dict[str, Counter[str]] = {}
-    command_with_count: Counter[str] = Counter()
-    command_missing_count: Counter[str] = Counter()
-    command_token_sum: Counter[str] = Counter()
-    read_events: list[dict[str, Any]] = []
-    read_command_count = 0
-    read_events_by_reader: Counter[str] = Counter()
-    read_events_by_path: Counter[str] = Counter()
-    read_events_with_count_by_reader: Counter[str] = Counter()
-    read_events_missing_count_by_reader: Counter[str] = Counter()
-    read_tokens_by_reader: Counter[str] = Counter()
-    read_tokens_by_path: Counter[str] = Counter()
-    read_modified_by_reader: Counter[str] = Counter()
-    read_modified_by_path: Counter[str] = Counter()
-    read_path_refs: dict[str, dict[str, str]] = {}
-    largest_read_commands: list[dict[str, Any]] = []
-    missing_reasons: Counter[str] = Counter()
-    meta: Counter[str] = Counter()
-    meta["source_logs_scanned"] = len(source_logs)
-    meta["usage_rows_scanned"] = usage_rows_scanned
-
-    for source_log in source_logs:
-        call_names: dict[str, str] = {}
-        call_roots: dict[str, str] = {}
-        call_read_events: dict[str, list[int]] = {}
-        source_read_events: list[int] = []
-        modified_orders_by_path: dict[str, list[int]] = defaultdict(list)
-        try:
-            lines = source_log.read_text(encoding="utf-8").splitlines()
-        except OSError:
-            meta["read_errors"] += 1
-            continue
-        for order, line in enumerate(lines):
-            try:
-                envelope = json.loads(line)
-            except json.JSONDecodeError:
-                meta["invalid_json"] += 1
-                continue
-            if not isinstance(envelope, dict):
-                continue
-            payload = envelope.get("payload")
-            if not isinstance(payload, dict):
-                continue
-            if envelope.get("type") == "event_msg":
-                for path_ref in _modified_path_refs(payload):
-                    modified_orders_by_path[path_ref["path_key"]].append(order)
-                continue
-            if envelope.get("type") != "response_item":
-                continue
-            payload_type = payload.get("type")
-            if payload_type == "function_call":
-                call_id = _optional_str(payload.get("call_id") or payload.get("id"))
-                function_name = _safe_label(payload.get("name")) or "unknown_function"
-                function_calls[function_name] += 1
-                if call_id:
-                    call_names[call_id] = function_name
-                command = _shell_command_from_payload(payload, function_name=function_name)
-                if command is None:
-                    if _is_shell_tool(function_name):
-                        meta["missing_command"] += 1
-                    continue
-                root, child = _command_root_and_child(command)
-                command_calls[root] += 1
-                command_children.setdefault(root, Counter())[child] += 1
-                if call_id:
-                    call_roots[call_id] = root
-                read_refs = _read_path_refs_from_command(command, root=root)
-                if read_refs:
-                    read_command_count += 1
-                    indexes: list[int] = []
-                    reader = _read_reader(root)
-                    for path_ref in read_refs:
-                        path_key = path_ref["path_key"]
-                        read_path_refs[path_key] = path_ref
-                        event_index = len(read_events)
-                        read_events.append(
-                            {
-                                "reader": reader,
-                                "root": root,
-                                "path_key": path_key,
-                                "path_label": path_ref["path_label"],
-                                "path_hash": path_ref["path_hash"],
-                                "order": order,
-                                "modified_later": False,
-                            }
-                        )
-                        source_read_events.append(event_index)
-                        indexes.append(event_index)
-                        read_events_by_reader[reader] += 1
-                        read_events_by_path[path_key] += 1
-                    if call_id:
-                        call_read_events[call_id] = indexes
-            elif payload_type == "function_call_output":
-                call_id = _optional_str(payload.get("call_id"))
-                function_name = call_names.get(call_id or "", "unknown_function")
-                function_outputs[function_name] += 1
-                output = payload.get("output")
-                count = _original_output_count(output)
-                read_indexes = call_read_events.get(call_id or "", [])
-                if count is None:
-                    output_missing_count[function_name] += 1
-                    missing_reasons["string_no_header" if isinstance(output, str) else "non_string_output"] += 1
-                    root = call_roots.get(call_id or "")
-                    if root:
-                        command_missing_count[root] += 1
-                    for event_index in read_indexes:
-                        reader = str(read_events[event_index]["reader"])
-                        read_events_missing_count_by_reader[reader] += 1
-                    continue
-                output_with_count[function_name] += 1
-                output_token_sum[function_name] += count
-                root = call_roots.get(call_id or "")
-                if root:
-                    command_with_count[root] += 1
-                    command_token_sum[root] += count
-                if read_indexes:
-                    allocations = _allocate_token_count(count, len(read_indexes))
-                    paths: list[dict[str, str]] = []
-                    readers: Counter[str] = Counter()
-                    for event_index, allocated in zip(read_indexes, allocations, strict=True):
-                        event = read_events[event_index]
-                        reader = str(event["reader"])
-                        path_key = str(event["path_key"])
-                        read_events_with_count_by_reader[reader] += 1
-                        read_tokens_by_reader[reader] += allocated
-                        read_tokens_by_path[path_key] += allocated
-                        readers[reader] += 1
-                        paths.append(
-                            {
-                                "path_label": str(event["path_label"]),
-                                "path_hash": str(event["path_hash"]),
-                            }
-                        )
-                    largest_read_commands.append(
-                        {
-                            "root": root or "unknown_command",
-                            "read_event_count": len(read_indexes),
-                            "original_token_count": int(count),
-                            "readers": _simple_rows(readers, key_name="reader"),
-                            "paths": _unique_path_rows(paths),
-                        }
-                    )
-        for event_index in source_read_events:
-            event = read_events[event_index]
-            path_key = str(event["path_key"])
-            if any(order > int(event["order"]) for order in modified_orders_by_path.get(path_key, [])):
-                event["modified_later"] = True
-                read_modified_by_reader[str(event["reader"])] += 1
-                read_modified_by_path[path_key] += 1
-
-    function_rows = _function_rows(
-        function_calls=function_calls,
-        function_outputs=function_outputs,
-        output_with_count=output_with_count,
-        output_missing_count=output_missing_count,
-        output_token_sum=output_token_sum,
-    )
-    command_output_rows = _command_output_rows(
-        command_calls=command_calls,
-        command_with_count=command_with_count,
-        command_missing_count=command_missing_count,
-        command_token_sum=command_token_sum,
-    )
-    command_rows = _command_rows(command_calls=command_calls, command_children=command_children)
-    return {
-        "meta": {key: int(value) for key, value in meta.items()},
-        "tool_output": {
-            "summary": {
-                "function_calls": int(sum(function_calls.values())),
-                "function_outputs": int(sum(function_outputs.values())),
-                "outputs_with_original_token_count": int(sum(output_with_count.values())),
-                "outputs_missing_original_token_count": int(sum(output_missing_count.values())),
-                "original_token_sum": int(sum(output_token_sum.values())),
-            },
-            "functions": function_rows,
-            "command_roots": command_output_rows,
-            "missing_reasons": _simple_rows(missing_reasons),
-        },
-        "commands": {
-            "summary": {
-                "shell_function_calls": int(sum(command_calls.values())),
-                "command_root_count": len(command_calls),
-                "missing_command": int(meta["missing_command"]),
-            },
-            "commands": command_rows,
-        },
-        "file_reads": {
-            "summary": {
-                "read_commands": read_command_count,
-                "read_events": len(read_events),
-                "unique_paths_read": len(read_path_refs),
-                "read_events_with_output_count": int(sum(read_events_with_count_by_reader.values())),
-                "read_events_missing_output_count": int(sum(read_events_missing_count_by_reader.values())),
-                "allocated_output_token_sum": int(sum(read_tokens_by_reader.values())),
-            },
-            "by_reader": _read_reader_rows(
-                read_events_by_reader=read_events_by_reader,
-                read_events_with_count_by_reader=read_events_with_count_by_reader,
-                read_events_missing_count_by_reader=read_events_missing_count_by_reader,
-                read_tokens_by_reader=read_tokens_by_reader,
-            ),
-            "top_paths": _read_path_rows(
-                read_path_refs=read_path_refs,
-                read_events_by_path=read_events_by_path,
-                read_tokens_by_path=read_tokens_by_path,
-            ),
-            "largest_read_commands": _largest_read_command_rows(largest_read_commands),
-            "path_privacy": _path_privacy_metadata(),
-        },
-        "read_productivity": {
-            "summary": {
-                "read_events": len(read_events),
-                "read_events_modified_later": int(sum(read_modified_by_reader.values())),
-                "read_events_modified_later_pct": _ratio(
-                    int(sum(read_modified_by_reader.values())),
-                    len(read_events),
-                ),
-                "unique_paths_read": len(read_path_refs),
-                "unique_paths_modified_later": len(read_modified_by_path),
-                "unique_path_modified_later_pct": _ratio(len(read_modified_by_path), len(read_path_refs)),
-                "correlation_note": READ_PRODUCTIVITY_NOTE,
-            },
-            "by_reader": _read_productivity_reader_rows(
-                read_events_by_reader=read_events_by_reader,
-                read_modified_by_reader=read_modified_by_reader,
-            ),
-            "top_modified_paths": _read_productivity_path_rows(
-                read_path_refs=read_path_refs,
-                read_events_by_path=read_events_by_path,
-                read_modified_by_path=read_modified_by_path,
-            ),
-            "path_privacy": _path_privacy_metadata(),
-        },
-    }
-
-
-def _compute_concentration(
-    *,
-    db_path: Path,
-    include_archived: bool,
-) -> dict[str, Any]:
-    where = "" if include_archived else "WHERE is_archived = 0"
-    with connect(db_path) as conn:
-        init_db(conn)
-        rows = conn.execute(
-            f"""
-            SELECT
-                record_id,
-                session_id,
-                event_timestamp,
-                source_file,
-                cwd,
-                total_tokens
-            FROM usage_events
-            {where}
-            ORDER BY event_timestamp, record_id
-            """
-        ).fetchall()
-        source_row = conn.execute(
-            f"SELECT COUNT(DISTINCT source_file) AS source_logs_scanned FROM usage_events {where}"
-        ).fetchone()
-
-    source_groups: dict[str, dict[str, Any]] = {}
-    cwd_groups: dict[str, dict[str, Any]] = {}
-    day_groups: dict[str, dict[str, Any]] = {}
-    total_tokens = 0
-    for row in rows:
-        tokens = _int_value(row["total_tokens"])
-        total_tokens += tokens
-        record_id = str(row["record_id"])
-        session_id = _optional_str(row["session_id"])
-        _add_concentration_row(
-            source_groups,
-            key=_source_group_key(row["source_file"]),
-            label=_source_group_label(row["source_file"], session_id=session_id),
-            group_hash=_source_group_hash(row["source_file"]),
-            tokens=tokens,
-            record_id=record_id,
-            session_id=session_id,
-        )
-        cwd_ref = _cwd_group_ref(row["cwd"])
-        _add_concentration_row(
-            cwd_groups,
-            key=cwd_ref["group_hash"],
-            label=cwd_ref["label"],
-            group_hash=cwd_ref["group_hash"],
-            tokens=tokens,
-            record_id=record_id,
-            session_id=session_id,
-        )
-        day = _day_label(row["event_timestamp"])
-        _add_concentration_row(
-            day_groups,
-            key=day,
-            label=day,
-            group_hash=_stable_hash(day),
-            tokens=tokens,
-            record_id=record_id,
-            session_id=session_id,
-        )
-
-    dimensions = [
-        _concentration_dimension(
-            "source_log",
-            "Source Log / Session",
-            source_groups,
-            total_tokens=total_tokens,
-        ),
-        _concentration_dimension("cwd", "Cwd / Project", cwd_groups, total_tokens=total_tokens),
-        _concentration_dimension("day", "Day", day_groups, total_tokens=total_tokens),
-    ]
-    metrics = _concentration_metrics(dimensions)
-    return {
-        "meta": {
-            "source_logs_scanned": _int_value(source_row["source_logs_scanned"]),
-        },
-        "summary": {
-            "usage_rows": len(rows),
-            "total_tokens": total_tokens,
-            "dimension_count": len(dimensions),
-            "history_scope": _history_scope(include_archived),
-        },
-        "metrics": metrics,
-        "dimensions": dimensions,
-        "largest_impact_rows": _largest_impact_rows(dimensions),
-        "privacy": _concentration_privacy_metadata(),
-    }
-
-
-def _indexed_source_logs(
-    *,
-    db_path: Path,
-    include_archived: bool,
-) -> tuple[list[Path], int]:
-    where = "" if include_archived else "WHERE is_archived = 0"
-    with connect(db_path) as conn:
-        init_db(conn)
-        rows = conn.execute(
-            f"SELECT source_file FROM source_files {where} ORDER BY source_file"
-        ).fetchall()
-        usage_row = conn.execute(
-            f"SELECT COUNT(*) AS usage_rows FROM usage_events {where}"
-        ).fetchone()
-    return [Path(str(row["source_file"])) for row in rows], _int_value(usage_row["usage_rows"])
-
-
-def _function_rows(
-    *,
-    function_calls: Counter[str],
-    function_outputs: Counter[str],
-    output_with_count: Counter[str],
-    output_missing_count: Counter[str],
-    output_token_sum: Counter[str],
-) -> list[dict[str, Any]]:
-    names = set(function_calls) | set(function_outputs) | set(output_with_count) | set(output_token_sum)
-    rows = [
-        {
-            "function": name,
-            "calls": int(function_calls[name]),
-            "outputs": int(function_outputs[name]),
-            "with_original_token_count": int(output_with_count[name]),
-            "missing_original_token_count": int(output_missing_count[name]),
-            "original_token_sum": int(output_token_sum[name]),
-        }
-        for name in names
-    ]
-    return sorted(rows, key=lambda row: (-int(row["original_token_sum"]), -int(row["calls"]), row["function"]))
-
-
-def _command_output_rows(
-    *,
-    command_calls: Counter[str],
-    command_with_count: Counter[str],
-    command_missing_count: Counter[str],
-    command_token_sum: Counter[str],
-) -> list[dict[str, Any]]:
-    rows = [
-        {
-            "root": root,
-            "calls": int(command_calls[root]),
-            "with_original_token_count": int(command_with_count[root]),
-            "missing_original_token_count": int(command_missing_count[root]),
-            "original_token_sum": int(command_token_sum[root]),
-        }
-        for root in set(command_calls) | set(command_token_sum)
-    ]
-    return sorted(rows, key=lambda row: (-int(row["original_token_sum"]), -int(row["calls"]), row["root"]))
-
-
-def _command_rows(
-    *,
-    command_calls: Counter[str],
-    command_children: dict[str, Counter[str]],
-) -> list[dict[str, Any]]:
-    rows = []
-    for root, total in command_calls.items():
-        children = _simple_rows(command_children.get(root, Counter()), key_name="child")
-        rows.append({"root": root, "total": int(total), "children": children[:25]})
-    return sorted(rows, key=lambda row: (-int(row["total"]), row["root"]))
-
-
-def _add_concentration_row(
-    groups: dict[str, dict[str, Any]],
-    *,
-    key: str,
-    label: str,
-    group_hash: str,
-    tokens: int,
-    record_id: str,
-    session_id: str | None,
-) -> None:
-    group = groups.setdefault(
-        key,
-        {
-            "label": label,
-            "group_hash": group_hash,
-            "total_tokens": 0,
-            "usage_rows": 0,
-            "largest_record_id": None,
-            "largest_call_tokens": 0,
-            "session_ids": set(),
-        },
-    )
-    group["total_tokens"] = int(group["total_tokens"]) + tokens
-    group["usage_rows"] = int(group["usage_rows"]) + 1
-    if tokens > int(group["largest_call_tokens"]):
-        group["largest_call_tokens"] = tokens
-        group["largest_record_id"] = record_id
-    if session_id:
-        group["session_ids"].add(session_id)
-
-
-def _concentration_dimension(
-    dimension: str,
-    label: str,
-    groups: dict[str, dict[str, Any]],
-    *,
-    total_tokens: int,
-) -> dict[str, Any]:
-    rows = [_concentration_group_row(dimension, group, total_tokens=total_tokens) for group in groups.values()]
-    rows = sorted(
-        rows,
-        key=lambda row: (-int(row["total_tokens"]), -int(row["usage_rows"]), row["label"]),
-    )
-    return {
-        "dimension": dimension,
-        "label": label,
-        "group_count": len(rows),
-        "total_tokens": total_tokens,
-        "top_1_share": _top_share(rows, 1, total_tokens=total_tokens),
-        "top_3_share": _top_share(rows, 3, total_tokens=total_tokens),
-        "top_5_share": _top_share(rows, 5, total_tokens=total_tokens),
-        "effective_group_count": _effective_group_count(rows, total_tokens=total_tokens),
-        "top_rows": rows[:10],
-    }
-
-
-def _concentration_group_row(
-    dimension: str,
-    group: dict[str, Any],
-    *,
-    total_tokens: int,
-) -> dict[str, Any]:
-    session_ids = sorted(group["session_ids"])
-    return {
-        "dimension": dimension,
-        "label": group["label"],
-        "group_hash": group["group_hash"],
-        "usage_rows": int(group["usage_rows"]),
-        "total_tokens": int(group["total_tokens"]),
-        "share": _rounded_ratio(int(group["total_tokens"]), total_tokens),
-        "largest_record_id": group["largest_record_id"],
-        "largest_call_tokens": int(group["largest_call_tokens"]),
-        "session_id": session_ids[0] if len(session_ids) == 1 else None,
-    }
-
-
-def _concentration_metrics(dimensions: list[dict[str, Any]]) -> list[dict[str, Any]]:
-    rows: list[dict[str, Any]] = []
-    for dimension in dimensions:
-        dimension_key = str(dimension["dimension"])
-        for top_n in (1, 3, 5):
-            rows.append(
-                {
-                    "metric": f"top_{top_n}_{dimension_key}_share",
-                    "dimension": dimension_key,
-                    "top_n": top_n,
-                    "share": dimension[f"top_{top_n}_share"],
-                }
-            )
-    return rows
-
-
-def _largest_impact_rows(dimensions: list[dict[str, Any]]) -> list[dict[str, Any]]:
-    rows: list[dict[str, Any]] = []
-    for dimension in dimensions:
-        for row in dimension["top_rows"]:
-            rows.append(dict(row))
-    return sorted(
-        rows,
-        key=lambda row: (-float(row["share"]), -int(row["total_tokens"]), row["dimension"], row["label"]),
-    )[:15]
-
-
-def _top_share(
-    rows: list[dict[str, Any]],
-    top_n: int,
-    *,
-    total_tokens: int,
-) -> float:
-    return _rounded_ratio(sum(int(row["total_tokens"]) for row in rows[:top_n]), total_tokens)
-
-
-def _effective_group_count(
-    rows: list[dict[str, Any]],
-    *,
-    total_tokens: int,
-) -> float:
-    if total_tokens <= 0:
-        return 0.0
-    hhi = sum((int(row["total_tokens"]) / total_tokens) ** 2 for row in rows)
-    return round(1 / hhi, 6) if hhi else 0.0
-
-
-def _source_group_key(value: object) -> str:
-    return _source_group_hash(value)
-
-
-def _source_group_hash(value: object) -> str:
-    source = value if isinstance(value, str) and value else "unknown_source"
-    return _stable_hash(source)
-
-
-def _source_group_label(value: object, *, session_id: str | None) -> str:
-    if session_id:
-        return f"session:{session_id[:8]}"
-    return f"source:{_source_group_hash(value)}"
-
-
-def _cwd_group_ref(value: object) -> dict[str, str]:
-    if isinstance(value, str) and value:
-        path_ref = _path_ref_from_token(value)
-        if path_ref is not None:
-            return {"label": path_ref["path_label"], "group_hash": path_ref["path_hash"]}
-    return {"label": "unknown_cwd", "group_hash": _stable_hash("unknown_cwd")}
-
-
-def _day_label(value: object) -> str:
-    if isinstance(value, str):
-        match = re.match(r"^\d{4}-\d{2}-\d{2}", value)
-        if match:
-            return match.group(0)
-    return "unknown_day"
-
-
-def _read_reader_rows(
-    *,
-    read_events_by_reader: Counter[str],
-    read_events_with_count_by_reader: Counter[str],
-    read_events_missing_count_by_reader: Counter[str],
-    read_tokens_by_reader: Counter[str],
-) -> list[dict[str, Any]]:
-    rows = [
-        {
-            "reader": reader,
-            "read_events": int(read_events_by_reader[reader]),
-            "events_with_output_count": int(read_events_with_count_by_reader[reader]),
-            "events_missing_output_count": int(read_events_missing_count_by_reader[reader]),
-            "allocated_output_token_sum": int(read_tokens_by_reader[reader]),
-        }
-        for reader in set(read_events_by_reader) | set(read_tokens_by_reader)
-    ]
-    return sorted(
-        rows,
-        key=lambda row: (-int(row["allocated_output_token_sum"]), -int(row["read_events"]), row["reader"]),
-    )
-
-
-def _read_path_rows(
-    *,
-    read_path_refs: dict[str, dict[str, str]],
-    read_events_by_path: Counter[str],
-    read_tokens_by_path: Counter[str],
-) -> list[dict[str, Any]]:
-    rows = [
-        {
-            "path_label": read_path_refs[path_key]["path_label"],
-            "path_hash": read_path_refs[path_key]["path_hash"],
-            "read_events": int(read_events_by_path[path_key]),
-            "allocated_output_token_sum": int(read_tokens_by_path[path_key]),
-        }
-        for path_key in set(read_events_by_path) | set(read_tokens_by_path)
-        if path_key in read_path_refs
-    ]
-    return sorted(
-        rows,
-        key=lambda row: (
-            -int(row["allocated_output_token_sum"]),
-            -int(row["read_events"]),
-            row["path_label"],
-            row["path_hash"],
-        ),
-    )[:50]
-
-
-def _largest_read_command_rows(rows: list[dict[str, Any]]) -> list[dict[str, Any]]:
-    return sorted(
-        rows,
-        key=lambda row: (
-            -int(row["original_token_count"]),
-            -int(row["read_event_count"]),
-            row["root"],
-        ),
-    )[:25]
-
-
-def _read_productivity_reader_rows(
-    *,
-    read_events_by_reader: Counter[str],
-    read_modified_by_reader: Counter[str],
-) -> list[dict[str, Any]]:
-    rows = [
-        {
-            "reader": reader,
-            "read_events": int(read_events_by_reader[reader]),
-            "read_events_modified_later": int(read_modified_by_reader[reader]),
-            "read_events_modified_later_pct": _ratio(
-                int(read_modified_by_reader[reader]),
-                int(read_events_by_reader[reader]),
-            ),
-        }
-        for reader in read_events_by_reader
-    ]
-    return sorted(
-        rows,
-        key=lambda row: (
-            -int(row["read_events_modified_later"]),
-            -int(row["read_events"]),
-            row["reader"],
-        ),
-    )
-
-
-def _read_productivity_path_rows(
-    *,
-    read_path_refs: dict[str, dict[str, str]],
-    read_events_by_path: Counter[str],
-    read_modified_by_path: Counter[str],
-) -> list[dict[str, Any]]:
-    rows = [
-        {
-            "path_label": read_path_refs[path_key]["path_label"],
-            "path_hash": read_path_refs[path_key]["path_hash"],
-            "read_events": int(read_events_by_path[path_key]),
-            "read_events_modified_later": int(read_modified_by_path[path_key]),
-            "read_events_modified_later_pct": _ratio(
-                int(read_modified_by_path[path_key]),
-                int(read_events_by_path[path_key]),
-            ),
-        }
-        for path_key in read_modified_by_path
-        if path_key in read_path_refs
-    ]
-    return sorted(
-        rows,
-        key=lambda row: (
-            -int(row["read_events_modified_later"]),
-            -int(row["read_events"]),
-            row["path_label"],
-            row["path_hash"],
-        ),
-    )[:50]
-
-
-def _simple_rows(
-    counter: Counter[str],
-    *,
-    key_name: str = "name",
-) -> list[dict[str, Any]]:
-    return [
-        {key_name: name, "count": int(count)}
-        for name, count in sorted(counter.items(), key=lambda item: (-item[1], item[0]))
-    ]
-
-
-def _unique_path_rows(paths: list[dict[str, str]]) -> list[dict[str, str]]:
-    rows: list[dict[str, str]] = []
-    seen: set[str] = set()
-    for path in paths:
-        path_hash = path["path_hash"]
-        if path_hash in seen:
-            continue
-        seen.add(path_hash)
-        rows.append({"path_label": path["path_label"], "path_hash": path_hash})
-    return rows[:25]
-
-
-def _allocate_token_count(count: int, bucket_count: int) -> list[int]:
-    if bucket_count <= 0:
-        return []
-    base = count // bucket_count
-    remainder = count % bucket_count
-    return [base + (1 if index < remainder else 0) for index in range(bucket_count)]
-
-
-def _read_path_refs_from_command(command: str, *, root: str) -> list[dict[str, str]]:
-    if root not in READ_COMMAND_ROOTS:
-        return []
-    tokens = _strip_command_wrappers(_command_tokens(command))
-    if not tokens:
-        return []
-    path_tokens = _read_path_tokens(root=root, tokens=tokens)
-    refs: list[dict[str, str]] = []
-    seen: set[str] = set()
-    for token in path_tokens:
-        path_ref = _path_ref_from_token(token)
-        if path_ref is None or path_ref["path_key"] in seen:
-            continue
-        seen.add(path_ref["path_key"])
-        refs.append(path_ref)
-    return refs
-
-
-def _read_path_tokens(*, root: str, tokens: list[str]) -> list[str]:
-    args = tokens[1:]
-    if root == "find":
-        return _find_path_tokens(args)
-    if root == "rg":
-        return _ripgrep_path_tokens(args)
-    if root == "grep":
-        operands = _non_option_operands(args, root=root)
-        return operands[1:] if len(operands) > 1 else []
-    if root == "sed":
-        operands = _non_option_operands(args, root=root)
-        return operands[1:] if len(operands) > 1 else []
-    return _non_option_operands(args, root=root)
-
-
-def _find_path_tokens(args: list[str]) -> list[str]:
-    paths: list[str] = []
-    for token in args:
-        if _is_shell_separator(token):
-            break
-        if token == "--":
-            continue
-        if token.startswith("-") or token in {"!", "(", ")"}:
-            break
-        paths.append(token)
-    return paths or ["."]
-
-
-def _ripgrep_path_tokens(args: list[str]) -> list[str]:
-    operands = _non_option_operands(args, root="rg")
-    if any(token == "--files" or token.startswith("--files=") for token in args):
-        return operands or ["."]
-    return operands[1:] if len(operands) > 1 else []
-
-
-def _non_option_operands(args: list[str], *, root: str) -> list[str]:
-    option_args = _option_args_for_root(root)
-    operands: list[str] = []
-    skip_next = False
-    passthrough = False
-    for token in args:
-        if skip_next:
-            skip_next = False
-            continue
-        if _is_shell_separator(token):
-            break
-        if token in {">", ">>", "<", "2>", "2>>"}:
-            break
-        if passthrough:
-            operands.append(token)
-            continue
-        if token == "--":
-            passthrough = True
-            continue
-        if token.startswith("-"):
-            option_name = token.split("=", 1)[0]
-            if option_name in option_args and "=" not in token:
-                skip_next = True
-            continue
-        operands.append(token)
-    return operands
-
-
-def _option_args_for_root(root: str) -> set[str]:
-    return {
-        "grep": {
-            "-A",
-            "-B",
-            "-C",
-            "-e",
-            "-f",
-            "-m",
-            "--after-context",
-            "--before-context",
-            "--context",
-            "--file",
-            "--max-count",
-            "--regexp",
-        },
-        "head": {"-c", "-n", "--bytes", "--lines"},
-        "rg": {
-            "-A",
-            "-B",
-            "-C",
-            "-e",
-            "-f",
-            "-g",
-            "-m",
-            "-t",
-            "-T",
-            "--after-context",
-            "--before-context",
-            "--context",
-            "--file",
-            "--glob",
-            "--max-count",
-            "--max-depth",
-            "--type",
-            "--type-not",
-        },
-        "sed": {"-e", "-f", "--expression", "--file"},
-        "tail": {"-c", "-n", "--bytes", "--lines"},
-    }.get(root, set())
-
-
-def _read_reader(root: str) -> str:
-    if root in SEARCH_READ_ROOTS:
-        return f"search_path_scan:{root}"
-    return f"direct_file_read:{root}"
-
-
-def _modified_path_refs(payload: dict[str, Any]) -> list[dict[str, str]]:
-    if payload.get("type") != "patch_apply_end":
-        return []
-    paths: list[str] = []
-    for key in ("changed_paths", "paths", "files", "modified_paths"):
-        paths.extend(_path_values(payload.get(key)))
-    paths.extend(_path_values(payload.get("changes")))
-    refs: list[dict[str, str]] = []
-    seen: set[str] = set()
-    for path in paths:
-        path_ref = _path_ref_from_token(path)
-        if path_ref is None or path_ref["path_key"] in seen:
-            continue
-        seen.add(path_ref["path_key"])
-        refs.append(path_ref)
-    return refs
-
-
-def _path_values(value: object) -> list[str]:
-    if isinstance(value, str):
-        return [value]
-    if isinstance(value, list | tuple):
-        paths: list[str] = []
-        for item in value:
-            paths.extend(_path_values(item))
-        return paths
-    if isinstance(value, dict):
-        paths = []
-        for key in ("path", "file", "filename", "new_path", "old_path"):
-            paths.extend(_path_values(value.get(key)))
-        return paths
-    return []
-
-
-def _path_ref_from_token(token: str) -> dict[str, str] | None:
-    raw = token.strip()
-    if not raw or raw == "-" or _is_shell_separator(raw) or _looks_like_assignment(raw):
-        return None
-    if raw.startswith(("$", "`")) or "://" in raw:
-        return None
-    label = _safe_path_label(raw)
-    if label is None:
-        return None
-    path_hash = _stable_hash(raw)
-    return {"path_key": path_hash, "path_label": label, "path_hash": path_hash}
-
-
-def _safe_path_label(token: str) -> str | None:
-    normalized = token.rstrip("/")
-    label = normalized if normalized in {".", ".."} else Path(normalized).name
-    if not label:
-        return None
-    lowered = label.lower()
-    if lowered.startswith(SENSITIVE_LABEL_PREFIXES):
-        return "path"
-    return label if SAFE_PATH_LABEL_RE.fullmatch(label) else "path"
-
-
-def _is_shell_separator(token: str) -> bool:
-    return token in {"&&", "||", ";", "|"}
-
-
-def _path_privacy_metadata() -> dict[str, str]:
-    return {
-        "label_policy": "basename_only",
-        "hash_policy": "sha256_12",
-        "normal": "basename_only_with_hash",
-        "redacted": "basename_only_with_hash",
-        "strict": "hash_available_for_hiding_labels",
-    }
-
-
-def _concentration_privacy_metadata() -> dict[str, str]:
-    return {
-        "source_log_label_policy": "session_id_prefix_or_source_hash",
-        "cwd_label_policy": "basename_only",
-        "hash_policy": "sha256_12",
-        "raw_source_paths_included": "false",
-        "raw_cwd_paths_included": "false",
-    }
-
-
-def _stable_hash(value: str) -> str:
-    return hashlib.sha256(value.encode("utf-8")).hexdigest()[:12]
-
-
-def _shell_command_from_payload(payload: dict[str, Any], *, function_name: str) -> str | None:
-    if not _is_shell_tool(function_name):
-        return None
-    arguments = payload.get("arguments")
-    if isinstance(arguments, str):
-        try:
-            loaded = json.loads(arguments)
-        except json.JSONDecodeError:
-            loaded = {}
-        if isinstance(loaded, dict):
-            command = loaded.get("cmd") or loaded.get("command")
-            if isinstance(command, str):
-                return command
-    if isinstance(arguments, dict):
-        command = arguments.get("cmd") or arguments.get("command")
-        if isinstance(command, str):
-            return command
-    command = payload.get("cmd") or payload.get("command")
-    return command if isinstance(command, str) else None
-
-
-def _is_shell_tool(function_name: str) -> bool:
-    lowered = function_name.lower()
-    suffix = lowered.rsplit(".", 1)[-1].rsplit("__", 1)[-1]
-    return lowered in SHELL_TOOL_NAMES or suffix in SHELL_TOOL_NAMES
-
-
-def _command_root_and_child(command: str) -> tuple[str, str]:
-    tokens = _strip_command_wrappers(_command_tokens(command))
-    if not tokens:
-        return "unknown_command", "unknown"
-    root = _command_root(tokens)
-    return root, _command_child(root, tokens)
-
-
-def _command_tokens(command: str) -> list[str]:
-    try:
-        return shlex.split(command, posix=True)
-    except ValueError:
-        return []
-
-
-def _strip_command_wrappers(tokens: list[str]) -> list[str]:
-    remaining = list(tokens)
-    while remaining:
-        while remaining and _looks_like_assignment(remaining[0]):
-            remaining.pop(0)
-        if not remaining:
-            break
-        base = _basename(remaining[0])
-        if base in {"command", "env", "sudo"}:
-            remaining.pop(0)
-            continue
-        break
-    return remaining
-
-
-def _command_root(tokens: list[str]) -> str:
-    base = _basename(tokens[0])
-    if base in {"py.test", "pytest"}:
-        return "pytest"
-    if base == "py" or base == "python" or base.startswith("python"):
-        return "python"
-    return _safe_label(base) or "unknown_command"
-
-
-def _command_child(root: str, tokens: list[str]) -> str:
-    if root == "python":
-        for index, token in enumerate(tokens[:-1]):
-            if token == "-m":
-                module = _safe_label(_basename(tokens[index + 1]).split(".", 1)[0])
-                return f"-m:{module}" if module else "-m:unknown"
-        return tokens[1] if len(tokens) > 1 and tokens[1].startswith("-") else "<script>"
-    if len(tokens) <= 1:
-        return "<none>"
-    child = _safe_label(_basename(tokens[1]))
-    return child or "<arg>"
-
-
-def _original_output_count(output: object) -> int | None:
-    if not isinstance(output, str):
-        return None
-    match = ORIGINAL_OUTPUT_RE.match(output)
-    if not match:
-        return None
-    return int(match.group("count"))
-
-
-def _optional_str(value: object) -> str | None:
-    return value if isinstance(value, str) and value else None
-
-
-def _safe_label(value: object) -> str | None:
-    if not isinstance(value, str):
-        return None
-    stripped = value.strip()
-    lowered = stripped.lower()
-    if lowered.startswith(SENSITIVE_LABEL_PREFIXES):
-        return None
-    if "/" in stripped or "\\" in stripped:
-        return None
-    return lowered if SAFE_LABEL_RE.fullmatch(stripped) else None
-
-
-def _looks_like_assignment(token: str) -> bool:
-    return bool(re.fullmatch(r"[A-Za-z_][A-Za-z0-9_]*=.*", token))
-
-
-def _basename(token: str) -> str:
-    return re.split(r"[\\/]", token)[-1].lower()
-
-
 def _snapshot_metadata(
     *,
     computed_at: str,
@@ -1736,23 +552,3 @@ def _int_value(value: object) -> int:
     if isinstance(value, str) and value:
         return int(value)
     return 0
-
-
-def _ratio(numerator: int, denominator: int) -> float:
-    return numerator / denominator if denominator else 0.0
-
-
-def _rounded_ratio(numerator: int, denominator: int) -> float:
-    return round(_ratio(numerator, denominator), 6)
-
-
-def _int_text(value: object) -> str:
-    return f"{_int_value(value):,}"
-
-
-def _pct_text(value: object) -> str:
-    try:
-        ratio = float(value) if isinstance(value, int | float | str) and value != "" else 0.0
-    except (TypeError, ValueError):
-        ratio = 0.0
-    return f"{ratio:.1%}"
diff --git a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics.js b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics.js
index 7185d58..b95b82f 100644
--- a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics.js
+++ b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics.js
@@ -27,6 +27,8 @@
     let activeSignature = '';
     let status = 'idle';
     let errorMessage = '';
+    let snapshotRefreshStatus = 'idle';
+    let snapshotRefreshError = '';
     let requestGeneration = 0;
     let payloads = emptyPayloads();
     let selectedFactKey = '';
@@ -35,7 +37,34 @@
     const factCallPayloads = new Map();
     const factCallSorts = new Map();
     const factSorts = new Map();
-
+    const snapshotRenderer = window.CodexUsageDashboardDiagnosticSnapshots.create({
+      escapeHtml,
+      formatTimestamp,
+      number,
+      pct,
+      renderState,
+      rowInvestigatorLink,
+      tokenText,
+    });
+    const factRenderer = window.CodexUsageDashboardDiagnosticFacts.create({
+      escapeHtml,
+      factCallRows,
+      factCallsHasMore,
+      factCallSortState,
+      factKey,
+      factSortState,
+      formatTimestamp,
+      getFactCallEntry: key => factCallPayloads.get(key),
+      getSelectedFactKey: () => selectedFactKey,
+      number,
+      pct,
+      renderState,
+      renderTimeCell,
+      rowInvestigatorLink,
+      t,
+      tokenText,
+      tooltipAttributes,
+    });
     function setActive(active) {
       diagnosticsPanelEl.hidden = !active;
       if (usageTableEl) usageTableEl.hidden = active;
@@ -49,6 +78,8 @@
       activeSignature = '';
       status = 'idle';
       errorMessage = '';
+      snapshotRefreshStatus = 'idle';
+      snapshotRefreshError = '';
       selectedFactKey = '';
       payloads = emptyPayloads();
       factCallPayloads.clear();
@@ -66,7 +97,7 @@
         return;
       }
       if (!liveRefreshSupported) {
-        diagnosticsPanelEl.innerHTML = renderState('Diagnostics require the live localhost dashboard API.');
+        diagnosticsPanelEl.innerHTML = renderState('Live API required for diagnostics refresh.');
         return;
       }
       const filters = getDiagnosticFilters(dateRange);
@@ -75,6 +106,8 @@
         activeSignature = signature;
         status = 'loading';
         errorMessage = '';
+        snapshotRefreshStatus = 'idle';
+        snapshotRefreshError = '';
         selectedFactKey = '';
         payloads = emptyPayloads();
         factCallPayloads.clear();
@@ -91,13 +124,14 @@
         const factsSort = factSortState('facts');
         const toolsSort = factSortState('tools');
         const compactionsSort = factSortState('compactions');
-        const [facts, tools, compactions] = await Promise.all([
+        const [facts, tools, compactions, snapshots] = await Promise.all([
           fetchPayload('/api/diagnostics/facts', { ...filters, limit: '50', sort: factsSort.sort, direction: factsSort.direction }),
           fetchPayload('/api/diagnostics/tools', { ...filters, limit: '25', sort: toolsSort.sort, direction: toolsSort.direction }),
           fetchPayload('/api/diagnostics/compactions', { ...filters, limit: '25', sort: compactionsSort.sort, direction: compactionsSort.direction }),
+          fetchSnapshotPayloads(filters, false),
         ]);
         if (generation !== requestGeneration || signature !== activeSignature) return;
-        payloads = { facts, tools, compactions };
+        payloads = { facts, tools, compactions, ...snapshots };
         status = 'ready';
       } catch (error) {
         if (generation !== requestGeneration || signature !== activeSignature) return;
@@ -107,6 +141,39 @@
       renderIfActive();
     }
 
+    async function fetchSnapshotPayloads(filters, refresh) {
+      const snapshotFilters = { include_archived: filters?.include_archived || '0' };
+      const entries = await Promise.all(snapshotRenderer.sections.map(async section => {
+        const payload = await fetchPayload(
+          refresh ? section.refreshPath : section.path,
+          snapshotFilters,
+          refresh ? { method: 'POST' } : {},
+        );
+        return [section.key, payload];
+      }));
+      return Object.fromEntries(entries);
+    }
+
+    async function refreshDiagnosticSnapshots() {
+      if (snapshotRefreshStatus === 'refreshing') return;
+      const signature = activeSignature;
+      snapshotRefreshStatus = 'refreshing';
+      snapshotRefreshError = '';
+      renderIfActive();
+      try {
+        const filters = getDiagnosticFilters();
+        const snapshots = await fetchSnapshotPayloads(filters, true);
+        if (signature !== activeSignature) return;
+        payloads = { ...payloads, ...snapshots };
+        snapshotRefreshStatus = 'ready';
+      } catch (error) {
+        if (signature !== activeSignature) return;
+        snapshotRefreshStatus = 'error';
+        snapshotRefreshError = error.message || String(error);
+      }
+      renderIfActive();
+    }
+
     async function fetchFactCalls(factType, factName, options = {}) {
       const key = factKey(factType, factName);
       const append = Boolean(options.append);
@@ -170,7 +237,7 @@
     }
 
     function sortFactRows(sectionKey, sortKey) {
-      if (!diagnosticFactSortLabels()[sortKey]) return;
+      if (!factRenderer.factSortLabels()[sortKey]) return;
       const current = factSortState(sectionKey);
       const next = current.sort === sortKey
         ? { sort: sortKey, direction: current.direction === 'asc' ? 'desc' : 'asc' }
@@ -184,7 +251,7 @@
     }
 
     function sortFactCalls(sortKey) {
-      if (!selectedFactKey || !diagnosticCallSortLabels()[sortKey]) return;
+      if (!selectedFactKey || !factRenderer.callSortLabels()[sortKey]) return;
       const current = factCallSortState(selectedFactKey);
       const next = current.sort === sortKey
         ? { sort: sortKey, direction: current.direction === 'asc' ? 'desc' : 'asc' }
@@ -194,7 +261,7 @@
       void fetchFactCalls(factType, factName, { force: true });
     }
 
-    async function fetchPayload(path, params) {
+    async function fetchPayload(path, params, options = {}) {
       const urlParams = new URLSearchParams();
       Object.entries(params || {}).forEach(([key, value]) => {
         if (value === null || value === undefined || value === '') return;
@@ -202,6 +269,7 @@
       });
       urlParams.set('_', String(Date.now()));
       const response = await fetch(`${path}?${urlParams.toString()}`, {
+        method: options.method || 'GET',
         headers: {
           'Accept': 'application/json',
           'X-Codex-Usage-Token': apiToken(),
@@ -219,185 +287,27 @@
       const loading = status === 'loading';
       return `
         <div class="diagnostics-stack">
+          ${snapshotRenderer.renderToolbar({
+            loading,
+            payloads,
+            refreshStatus: snapshotRefreshStatus,
+            refreshError: snapshotRefreshError,
+          })}
           <div class="diagnostics-readout">
-            ${readoutMetric('Fact rows', payloads.facts)}
-            ${readoutMetric('Tool/function rows', payloads.tools)}
-            ${readoutMetric('Compaction rows', payloads.compactions)}
+            ${factRenderer.readoutMetric('Fact rows', payloads.facts)}
+            ${factRenderer.readoutMetric('Tool/function rows', payloads.tools)}
+            ${factRenderer.readoutMetric('Compaction rows', payloads.compactions)}
+            ${snapshotRenderer.readoutMetric('Snapshot sections', snapshotRenderer.readyCount(payloads))}
             <span class="diagnostics-note">Structured labels only. Raw context remains on-demand in the call investigator.</span>
           </div>
-          ${renderFactSection('facts', 'Top Diagnostic Facts', 'Structured facts associated with model calls.', payloads.facts, loading)}
-          ${renderFactSection('tools', 'Tool and Function Activity', 'Tool/function facts associated with model calls.', payloads.tools, loading)}
-          ${renderFactSection('compactions', 'Compaction Activity', 'Compaction facts and post-compaction associated costs.', payloads.compactions, loading)}
-        </div>
-      `;
-    }
-
-    function renderFactSection(sectionKey, title, caption, payload, loading) {
-      const rows = Array.isArray(payload?.rows) ? payload.rows : [];
-      return `
-        <div class="diagnostics-section">
-          <div class="diagnostics-section-header">
-            <div>
-              <h3>${escapeHtml(title)}</h3>
-              <p>${escapeHtml(`${caption} Sorted by ${diagnosticFactSortDescription(sectionKey)}.`)}</p>
-            </div>
-            <span>${escapeHtml(payload ? `${number.format(payload.total_matched_rows || rows.length)} matched` : loading ? 'Loading' : 'No payload')}</span>
-          </div>
-          ${renderFactTable(sectionKey, rows, loading)}
-        </div>
-      `;
-    }
-
-    function renderFactTable(sectionKey, rows, loading) {
-      if (loading && !rows.length) return renderState('Loading diagnostics...');
-      if (!rows.length) return renderState('No diagnostic facts matched the current filters.');
-      const body = rows.map(row => {
-        const key = factKey(row.fact_type, row.fact_name);
-        const selected = key === selectedFactKey;
-        const largest = row.largest_record_id
-          ? rowInvestigatorLink({ record_id: row.largest_record_id }, tokenText(row.largest_call_tokens), true)
-          : tokenText(row.largest_call_tokens);
-        return `
-          <tr class="${selected ? 'selected-row' : ''}">
-            <td class="diagnostics-fact-cell">
-              <div class="diagnostic-fact">
-                <strong>${escapeHtml(row.fact_type || 'unknown')}/${escapeHtml(row.fact_name || 'unknown')}</strong>
-                <span>${escapeHtml(row.fact_category || 'uncategorized')}</span>
-              </div>
-            </td>
-            <td class="num">${number.format(Number(row.occurrences || 0))}</td>
-            <td class="num">${number.format(Number(row.associated_calls || 0))}</td>
-            <td class="num token-cell">${tokenText(row.associated_total_tokens)}</td>
-            <td class="num token-cell">${tokenText(row.associated_cached_input_tokens)}</td>
-            <td class="num token-cell">${tokenText(row.associated_uncached_input_tokens)}</td>
-            <td class="num token-cell">${tokenText(row.associated_output_tokens)}</td>
-            <td class="num">${pct(row.avg_cache_ratio)}</td>
-            <td class="num">${largest}</td>
-            <td>${escapeHtml(formatTimestamp(row.latest_event_timestamp || ''))}</td>
-            <td><button class="toolbar-button diagnostics-expand-button" type="button" aria-expanded="${selected ? 'true' : 'false'}" aria-label="${selected ? 'Hide associated calls' : 'Show associated calls'}" data-diagnostics-fact-type="${escapeHtml(row.fact_type || '')}" data-diagnostics-fact-name="${escapeHtml(row.fact_name || '')}">${selected ? '-' : '+'}</button></td>
-          </tr>
-          ${selected ? `
-            <tr class="diagnostics-drilldown-row">
-              <td colspan="11">${renderFactCallsPanel()}</td>
-            </tr>
-          ` : ''}
-        `;
-      }).join('');
-      return `
-        <div class="diagnostics-table-wrap">
-          <table class="diagnostics-table diagnostics-facts-table">
-            <colgroup>
-              <col class="diagnostics-fact-col">
-              <col class="diagnostics-count-col">
-              <col class="diagnostics-count-col">
-              <col class="diagnostics-token-col">
-              <col class="diagnostics-token-col">
-              <col class="diagnostics-token-col">
-              <col class="diagnostics-token-col">
-              <col class="diagnostics-ratio-col">
-              <col class="diagnostics-token-col">
-              <col class="diagnostics-latest-col">
-              <col class="diagnostics-action-col">
-            </colgroup>
-            <thead><tr>
-              ${diagnosticFactHeader(sectionKey, 'fact', 'Fact', false, 'Diagnostic fact type and name derived from structured local log metadata. Raw prompts, assistant text, and tool output are not persisted.')}
-              ${diagnosticFactHeader(sectionKey, 'occurrences', 'Occ', true, 'Occurrences: count of matching diagnostic fact events. One model call can contribute more than one occurrence.')}
-              ${diagnosticFactHeader(sectionKey, 'calls', 'Calls', true, 'Distinct model calls associated with this diagnostic fact.')}
-              ${diagnosticFactHeader(sectionKey, 'tokens', 'Assoc total', true, 'Associated total tokens for those calls. Totals are not additive across facts because one call can have multiple facts.')}
-              ${diagnosticFactHeader(sectionKey, 'cached', 'Cached', true, 'Associated cached input tokens for those calls.')}
-              ${diagnosticFactHeader(sectionKey, 'uncached', 'Uncached', true, 'Associated uncached input tokens for those calls.')}
-              ${diagnosticFactHeader(sectionKey, 'output', 'Output', true, 'Associated output tokens for those calls.')}
-              ${diagnosticFactHeader(sectionKey, 'cache', 'Cache %', true, 'Average cache ratio across associated calls.')}
-              ${diagnosticFactHeader(sectionKey, 'largest', 'Largest', true, 'Largest associated call by total tokens.')}
-              ${diagnosticFactHeader(sectionKey, 'time', 'Latest', false, 'Latest associated call timestamp.')}
-              ${columnHeader('Action', 'Expand or collapse the associated calls.')}
-            </tr></thead>
-            <tbody>${body}</tbody>
-          </table>
-        </div>
-      `;
-    }
-
-    function renderFactCallsPanel() {
-      const entry = factCallPayloads.get(selectedFactKey);
-      const label = selectedFactKey.replace('\u0000', '/');
-      if (!entry || (entry.status === 'loading' && !entry.payload)) {
-        return `<div class="diagnostics-drilldown">${renderState(`Loading calls for ${label}...`)}</div>`;
-      }
-      if (entry.status === 'error' && !entry.payload) {
-        return `<div class="diagnostics-drilldown">${renderState(`Could not load calls for ${label}: ${entry.error}`)}</div>`;
-      }
-      const rows = factCallRows(entry.payload);
-      if (!rows.length) {
-        return `<div class="diagnostics-drilldown">${renderState(`No calls found for ${label}.`)}</div>`;
-      }
-      const total = Number(entry.payload?.total_matched_rows || rows.length);
-      const loadingMore = entry.status === 'appending';
-      const body = rows.map(row => `
-        <tr class="thread-call-row" data-record-id="${escapeHtml(row.record_id || '')}">
-          <td>${rowInvestigatorLink(row, renderTimeCell(row.event_timestamp), true)}</td>
-          <td>${rowInvestigatorLink(row, escapeHtml(row.thread_name || row.parent_thread_name || row.session_id || 'Unknown'))}</td>
-          <td>${rowInvestigatorLink(row, `<span class="pill model-pill" data-full-label="${escapeHtml(row.model || 'Unknown')}">${escapeHtml(row.model || 'Unknown')}</span>`)}</td>
-          <td>${rowInvestigatorLink(row, escapeHtml(row.effort || 'unknown'))}</td>
-          <td class="num token-cell">${rowInvestigatorLink(row, tokenText(row.total_tokens))}</td>
-          <td class="num token-cell">${rowInvestigatorLink(row, tokenText(row.cached_input_tokens))}</td>
-          <td class="num token-cell">${rowInvestigatorLink(row, tokenText(row.uncached_input_tokens))}</td>
-          <td class="num token-cell">${rowInvestigatorLink(row, tokenText(row.output_tokens))}</td>
-          <td class="num token-cell">${rowInvestigatorLink(row, tokenText(row.reasoning_output_tokens))}</td>
-          <td class="num">${rowInvestigatorLink(row, pct(row.cache_ratio))}</td>
-        </tr>
-      `).join('');
-      return `
-        <div class="diagnostics-drilldown">
-          <div class="diagnostics-section-header">
-            <div>
-              <h3>Associated Calls</h3>
-              <p>${escapeHtml(`${label} sorted by ${diagnosticCallSortDescription()}.`)}</p>
-            </div>
-            <span>${escapeHtml(`${number.format(total)} matched`)}</span>
-          </div>
-          <div class="diagnostics-table-wrap">
-            <table class="diagnostics-table diagnostics-call-table">
-              <thead><tr>
-                ${diagnosticCallHeader('time', 'Time', false, 'Call timestamp.')}
-                ${diagnosticCallHeader('thread', 'Thread', false, 'Resolved thread, parent thread, or session label.')}
-                ${diagnosticCallHeader('model', 'Model', false, 'Model label for this associated call.')}
-                ${diagnosticCallHeader('effort', 'Effort', false, 'Reasoning effort label for this associated call.')}
-                ${diagnosticCallHeader('tokens', 'Tokens', true, 'Total tokens for this associated model call.')}
-                ${diagnosticCallHeader('cached', 'Cached', true, 'Cached input tokens for this associated model call.')}
-                ${diagnosticCallHeader('uncached', 'Uncached', true, 'Uncached input tokens for this associated model call.')}
-                ${diagnosticCallHeader('output', 'Output', true, 'Output tokens for this associated model call.')}
-                ${diagnosticCallHeader('reasoning', 'Reasoning', true, 'Reasoning output tokens for this associated model call.')}
-                ${diagnosticCallHeader('cache', 'Cache %', true, 'Cache ratio for this associated model call.')}
-              </tr></thead>
-              <tbody>${body}</tbody>
-            </table>
-            ${renderFactCallPager(entry, rows.length, total, loadingMore)}
-          </div>
-          ${entry.error ? `<div class="diagnostics-inline-error">${escapeHtml(`Could not load more calls: ${entry.error}`)}</div>` : ''}
-        </div>
-      `;
-    }
-
-    function renderFactCallPager(entry, loaded, total, loadingMore) {
-      const canLoadMore = loadingMore || factCallsHasMore(entry.payload);
-      const statusText = `Showing ${number.format(loaded)} of ${number.format(total)} calls`;
-      if (!canLoadMore) {
-        return `<div class="child-load-more diagnostics-call-load-more"><span>${escapeHtml(statusText)}</span></div>`;
-      }
-      return `
-        <div class="child-load-more diagnostics-call-load-more">
-          <span>${escapeHtml(statusText)}</span>
-          <button class="pager-button" type="button" data-diagnostics-call-load-more ${loadingMore ? 'disabled' : ''}>${escapeHtml(loadingMore ? 'Loading...' : t('button.load_more'))}</button>
+          ${snapshotRenderer.renderPanels({ loading, payloads })}
+          ${factRenderer.renderFactSection('facts', 'Top Diagnostic Facts', 'Structured facts associated with model calls.', payloads.facts, loading)}
+          ${factRenderer.renderFactSection('tools', 'Tool and Function Activity', 'Tool/function facts associated with model calls.', payloads.tools, loading)}
+          ${factRenderer.renderFactSection('compactions', 'Compaction Activity', 'Compaction facts and post-compaction associated costs.', payloads.compactions, loading)}
         </div>
       `;
     }
 
-    function readoutMetric(label, payload) {
-      const count = payload ? Number(payload.total_matched_rows || payload.row_count || 0) : 0;
-      return `<span><b>${number.format(count)}</b>${escapeHtml(label)}</span>`;
-    }
-
     function renderState(message) {
       return `<div class="empty-state diagnostics-empty">${escapeHtml(message)}</div>`;
     }
@@ -406,89 +316,6 @@
       return number.format(Math.round(Number(value || 0)));
     }
 
-    function columnHeader(label, tooltip, className = '') {
-      const classAttr = className ? ` class="${escapeHtml(className)}"` : '';
-      const tooltipAttr = tooltipAttributes(tooltip);
-      return `<th${classAttr}${tooltipAttr ? ` ${tooltipAttr}` : ''}>${escapeHtml(label)}</th>`;
-    }
-
-    function diagnosticFactHeader(sectionKey, sortKey, label, numeric = false, tooltip = '') {
-      const state = factSortState(sectionKey);
-      const active = state.sort === sortKey;
-      const indicator = active ? (state.direction === 'asc' ? '▲' : '▼') : '';
-      const ariaSort = active ? (state.direction === 'asc' ? 'ascending' : 'descending') : 'none';
-      const tooltipAttr = tooltipAttributes(tooltip);
-      return `
-        <th${numeric ? ' class="num"' : ''} data-diagnostics-fact-sort-active="${active ? 'true' : 'false'}" aria-sort="${ariaSort}"${tooltipAttr ? ` ${tooltipAttr}` : ''}>
-          <button class="sort-header child-sort-header" type="button" data-diagnostics-fact-section="${escapeHtml(sectionKey)}" data-diagnostics-fact-sort-key="${escapeHtml(sortKey)}">
-            <span>${escapeHtml(label)}</span>
-            <span class="sort-indicator">${escapeHtml(indicator)}</span>
-          </button>
-        </th>
-      `;
-    }
-
-    function diagnosticCallHeader(sortKey, label, numeric = false, tooltip = '') {
-      const state = factCallSortState(selectedFactKey);
-      const active = state.sort === sortKey;
-      const indicator = active ? (state.direction === 'asc' ? '▲' : '▼') : '';
-      const ariaSort = active ? (state.direction === 'asc' ? 'ascending' : 'descending') : 'none';
-      const tooltipAttr = tooltipAttributes(tooltip);
-      return `
-        <th${numeric ? ' class="num"' : ''} data-diagnostics-call-sort-active="${active ? 'true' : 'false'}" aria-sort="${ariaSort}"${tooltipAttr ? ` ${tooltipAttr}` : ''}>
-          <button class="sort-header child-sort-header" type="button" data-diagnostics-call-sort-key="${escapeHtml(sortKey)}">
-            <span>${escapeHtml(label)}</span>
-            <span class="sort-indicator">${escapeHtml(indicator)}</span>
-          </button>
-        </th>
-      `;
-    }
-
-    function diagnosticFactSortDescription(sectionKey) {
-      const state = factSortState(sectionKey);
-      const labels = diagnosticFactSortLabels();
-      const label = labels[state.sort] || state.sort;
-      return `${label} ${state.direction === 'asc' ? 'ascending' : 'descending'}`;
-    }
-
-    function diagnosticFactSortLabels() {
-      return {
-        cache: 'cache ratio',
-        cached: 'cached input tokens',
-        calls: 'associated calls',
-        fact: 'fact name',
-        largest: 'largest call',
-        occurrences: 'occurrences',
-        output: 'output tokens',
-        time: 'latest call time',
-        tokens: 'total tokens',
-        uncached: 'uncached input tokens',
-      };
-    }
-
-    function diagnosticCallSortDescription() {
-      const state = factCallSortState(selectedFactKey);
-      const labels = diagnosticCallSortLabels();
-      const label = labels[state.sort] || state.sort;
-      return `${label} ${state.direction === 'asc' ? 'ascending' : 'descending'}`;
-    }
-
-    function diagnosticCallSortLabels() {
-      return {
-        cache: 'cache ratio',
-        cached: 'cached input tokens',
-        effort: 'effort',
-        input: 'input tokens',
-        model: 'model',
-        output: 'output tokens',
-        reasoning: 'reasoning output tokens',
-        thread: 'thread',
-        time: 'time',
-        tokens: 'total tokens',
-        uncached: 'uncached input tokens',
-      };
-    }
-
     function factSortState(sectionKey) {
       return factSorts.get(sectionKey) || { sort: 'uncached', direction: 'desc' };
     }
@@ -605,7 +432,17 @@
     }
 
     function emptyPayloads() {
-      return { facts: null, tools: null, compactions: null };
+      return {
+        facts: null,
+        tools: null,
+        compactions: null,
+        overview: null,
+        toolOutput: null,
+        commands: null,
+        fileReads: null,
+        readProductivity: null,
+        concentration: null,
+      };
     }
 
     function renderIfActive() {
@@ -622,6 +459,13 @@
         void openInvestigatorUrl(link.href);
         return;
       }
+      const refreshButton = target.closest('[data-diagnostics-refresh]');
+      if (refreshButton && diagnosticsPanelEl.contains(refreshButton)) {
+        event.preventDefault();
+        event.stopPropagation();
+        void refreshDiagnosticSnapshots();
+        return;
+      }
       const loadMoreButton = target.closest('[data-diagnostics-call-load-more]');
       if (loadMoreButton && diagnosticsPanelEl.contains(loadMoreButton)) {
         event.preventDefault();
diff --git a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics_facts.js b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics_facts.js
new file mode 100644
index 0000000..89a72c9
--- /dev/null
+++ b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics_facts.js
@@ -0,0 +1,283 @@
+(() => {
+  function createFactRenderer(deps) {
+    const {
+      escapeHtml,
+      factCallRows,
+      factCallsHasMore,
+      factCallSortState,
+      factKey,
+      factSortState,
+      formatTimestamp,
+      getFactCallEntry,
+      getSelectedFactKey,
+      number,
+      pct,
+      renderState,
+      renderTimeCell,
+      rowInvestigatorLink,
+      t,
+      tokenText,
+      tooltipAttributes = () => '',
+    } = deps;
+
+    function renderFactSection(sectionKey, title, caption, payload, loading) {
+      const rows = Array.isArray(payload?.rows) ? payload.rows : [];
+      return `
+        <div class="diagnostics-section">
+          <div class="diagnostics-section-header">
+            <div>
+              <h3>${escapeHtml(title)}</h3>
+              <p>${escapeHtml(`${caption} Sorted by ${diagnosticFactSortDescription(sectionKey)}.`)}</p>
+            </div>
+            <span>${escapeHtml(payload ? `${number.format(payload.total_matched_rows || rows.length)} matched` : loading ? 'Loading' : 'No payload')}</span>
+          </div>
+          ${renderFactTable(sectionKey, rows, loading)}
+        </div>
+      `;
+    }
+
+    function renderFactTable(sectionKey, rows, loading) {
+      if (loading && !rows.length) return renderState('Loading diagnostics...');
+      if (!rows.length) return renderState('No diagnostic facts matched the current filters.');
+      const selectedFactKey = getSelectedFactKey();
+      const body = rows.map(row => {
+        const key = factKey(row.fact_type, row.fact_name);
+        const selected = key === selectedFactKey;
+        const largest = row.largest_record_id
+          ? rowInvestigatorLink({ record_id: row.largest_record_id }, tokenText(row.largest_call_tokens), true)
+          : tokenText(row.largest_call_tokens);
+        return `
+          <tr class="${selected ? 'selected-row' : ''}">
+            <td class="diagnostics-fact-cell">
+              <div class="diagnostic-fact">
+                <strong>${escapeHtml(row.fact_type || 'unknown')}/${escapeHtml(row.fact_name || 'unknown')}</strong>
+                <span>${escapeHtml(row.fact_category || 'uncategorized')}</span>
+              </div>
+            </td>
+            <td class="num">${number.format(Number(row.occurrences || 0))}</td>
+            <td class="num">${number.format(Number(row.associated_calls || 0))}</td>
+            <td class="num token-cell">${tokenText(row.associated_total_tokens)}</td>
+            <td class="num token-cell">${tokenText(row.associated_cached_input_tokens)}</td>
+            <td class="num token-cell">${tokenText(row.associated_uncached_input_tokens)}</td>
+            <td class="num token-cell">${tokenText(row.associated_output_tokens)}</td>
+            <td class="num">${pct(row.avg_cache_ratio)}</td>
+            <td class="num">${largest}</td>
+            <td>${escapeHtml(formatTimestamp(row.latest_event_timestamp || ''))}</td>
+            <td><button class="toolbar-button diagnostics-expand-button" type="button" aria-expanded="${selected ? 'true' : 'false'}" aria-label="${selected ? 'Hide associated calls' : 'Show associated calls'}" data-diagnostics-fact-type="${escapeHtml(row.fact_type || '')}" data-diagnostics-fact-name="${escapeHtml(row.fact_name || '')}">${selected ? '-' : '+'}</button></td>
+          </tr>
+          ${selected ? `
+            <tr class="diagnostics-drilldown-row">
+              <td colspan="11">${renderFactCallsPanel()}</td>
+            </tr>
+          ` : ''}
+        `;
+      }).join('');
+      return `
+        <div class="diagnostics-table-wrap">
+          <table class="diagnostics-table diagnostics-facts-table">
+            <colgroup>
+              <col class="diagnostics-fact-col">
+              <col class="diagnostics-count-col">
+              <col class="diagnostics-count-col">
+              <col class="diagnostics-token-col">
+              <col class="diagnostics-token-col">
+              <col class="diagnostics-token-col">
+              <col class="diagnostics-token-col">
+              <col class="diagnostics-ratio-col">
+              <col class="diagnostics-token-col">
+              <col class="diagnostics-latest-col">
+              <col class="diagnostics-action-col">
+            </colgroup>
+            <thead><tr>
+              ${diagnosticFactHeader(sectionKey, 'fact', 'Fact', false, 'Diagnostic fact type and name derived from structured local log metadata. Raw prompts, assistant text, and tool output are not persisted.')}
+              ${diagnosticFactHeader(sectionKey, 'occurrences', 'Occ', true, 'Occurrences: count of matching diagnostic fact events. One model call can contribute more than one occurrence.')}
+              ${diagnosticFactHeader(sectionKey, 'calls', 'Calls', true, 'Distinct model calls associated with this diagnostic fact.')}
+              ${diagnosticFactHeader(sectionKey, 'tokens', 'Assoc total', true, 'Associated total tokens for those calls. Totals are not additive across facts because one call can have multiple facts.')}
+              ${diagnosticFactHeader(sectionKey, 'cached', 'Cached', true, 'Associated cached input tokens for those calls.')}
+              ${diagnosticFactHeader(sectionKey, 'uncached', 'Uncached', true, 'Associated uncached input tokens for those calls.')}
+              ${diagnosticFactHeader(sectionKey, 'output', 'Output', true, 'Associated output tokens for those calls.')}
+              ${diagnosticFactHeader(sectionKey, 'cache', 'Cache %', true, 'Average cache ratio across associated calls.')}
+              ${diagnosticFactHeader(sectionKey, 'largest', 'Largest', true, 'Largest associated call by total tokens.')}
+              ${diagnosticFactHeader(sectionKey, 'time', 'Latest', false, 'Latest associated call timestamp.')}
+              ${columnHeader('Action', 'Expand or collapse the associated calls.')}
+            </tr></thead>
+            <tbody>${body}</tbody>
+          </table>
+        </div>
+      `;
+    }
+
+    function renderFactCallsPanel() {
+      const selectedFactKey = getSelectedFactKey();
+      const entry = getFactCallEntry(selectedFactKey);
+      const label = selectedFactKey.replace('\u0000', '/');
+      if (!entry || (entry.status === 'loading' && !entry.payload)) {
+        return `<div class="diagnostics-drilldown">${renderState(`Loading calls for ${label}...`)}</div>`;
+      }
+      if (entry.status === 'error' && !entry.payload) {
+        return `<div class="diagnostics-drilldown">${renderState(`Could not load calls for ${label}: ${entry.error}`)}</div>`;
+      }
+      const rows = factCallRows(entry.payload);
+      if (!rows.length) {
+        return `<div class="diagnostics-drilldown">${renderState(`No calls found for ${label}.`)}</div>`;
+      }
+      const total = Number(entry.payload?.total_matched_rows || rows.length);
+      const loadingMore = entry.status === 'appending';
+      const body = rows.map(row => `
+        <tr class="thread-call-row" data-record-id="${escapeHtml(row.record_id || '')}">
+          <td>${rowInvestigatorLink(row, renderTimeCell(row.event_timestamp), true)}</td>
+          <td>${rowInvestigatorLink(row, escapeHtml(row.thread_name || row.parent_thread_name || row.session_id || 'Unknown'))}</td>
+          <td>${rowInvestigatorLink(row, `<span class="pill model-pill" data-full-label="${escapeHtml(row.model || 'Unknown')}">${escapeHtml(row.model || 'Unknown')}</span>`)}</td>
+          <td>${rowInvestigatorLink(row, escapeHtml(row.effort || 'unknown'))}</td>
+          <td class="num token-cell">${rowInvestigatorLink(row, tokenText(row.total_tokens))}</td>
+          <td class="num token-cell">${rowInvestigatorLink(row, tokenText(row.cached_input_tokens))}</td>
+          <td class="num token-cell">${rowInvestigatorLink(row, tokenText(row.uncached_input_tokens))}</td>
+          <td class="num token-cell">${rowInvestigatorLink(row, tokenText(row.output_tokens))}</td>
+          <td class="num token-cell">${rowInvestigatorLink(row, tokenText(row.reasoning_output_tokens))}</td>
+          <td class="num">${rowInvestigatorLink(row, pct(row.cache_ratio))}</td>
+        </tr>
+      `).join('');
+      return `
+        <div class="diagnostics-drilldown">
+          <div class="diagnostics-section-header">
+            <div>
+              <h3>Associated Calls</h3>
+              <p>${escapeHtml(`${label} sorted by ${diagnosticCallSortDescription()}.`)}</p>
+            </div>
+            <span>${escapeHtml(`${number.format(total)} matched`)}</span>
+          </div>
+          <div class="diagnostics-table-wrap">
+            <table class="diagnostics-table diagnostics-call-table">
+              <thead><tr>
+                ${diagnosticCallHeader('time', 'Time', false, 'Call timestamp.')}
+                ${diagnosticCallHeader('thread', 'Thread', false, 'Resolved thread, parent thread, or session label.')}
+                ${diagnosticCallHeader('model', 'Model', false, 'Model label for this associated call.')}
+                ${diagnosticCallHeader('effort', 'Effort', false, 'Reasoning effort label for this associated call.')}
+                ${diagnosticCallHeader('tokens', 'Tokens', true, 'Total tokens for this associated model call.')}
+                ${diagnosticCallHeader('cached', 'Cached', true, 'Cached input tokens for this associated model call.')}
+                ${diagnosticCallHeader('uncached', 'Uncached', true, 'Uncached input tokens for this associated model call.')}
+                ${diagnosticCallHeader('output', 'Output', true, 'Output tokens for this associated model call.')}
+                ${diagnosticCallHeader('reasoning', 'Reasoning', true, 'Reasoning output tokens for this associated model call.')}
+                ${diagnosticCallHeader('cache', 'Cache %', true, 'Cache ratio for this associated model call.')}
+              </tr></thead>
+              <tbody>${body}</tbody>
+            </table>
+            ${renderFactCallPager(entry, rows.length, total, loadingMore)}
+          </div>
+          ${entry.error ? `<div class="diagnostics-inline-error">${escapeHtml(`Could not load more calls: ${entry.error}`)}</div>` : ''}
+        </div>
+      `;
+    }
+
+    function renderFactCallPager(entry, loaded, total, loadingMore) {
+      const canLoadMore = loadingMore || factCallsHasMore(entry.payload);
+      const statusText = `Showing ${number.format(loaded)} of ${number.format(total)} calls`;
+      if (!canLoadMore) {
+        return `<div class="child-load-more diagnostics-call-load-more"><span>${escapeHtml(statusText)}</span></div>`;
+      }
+      return `
+        <div class="child-load-more diagnostics-call-load-more">
+          <span>${escapeHtml(statusText)}</span>
+          <button class="pager-button" type="button" data-diagnostics-call-load-more ${loadingMore ? 'disabled' : ''}>${escapeHtml(loadingMore ? 'Loading...' : t('button.load_more'))}</button>
+        </div>
+      `;
+    }
+
+    function readoutMetric(label, payload) {
+      const count = payload ? Number(payload.total_matched_rows || payload.row_count || 0) : 0;
+      return `<span><b>${number.format(count)}</b>${escapeHtml(label)}</span>`;
+    }
+
+    function columnHeader(label, tooltip, className = '') {
+      const classAttr = className ? ` class="${escapeHtml(className)}"` : '';
+      const tooltipAttr = tooltipAttributes(tooltip);
+      return `<th${classAttr}${tooltipAttr ? ` ${tooltipAttr}` : ''}>${escapeHtml(label)}</th>`;
+    }
+
+    function diagnosticFactHeader(sectionKey, sortKey, label, numeric = false, tooltip = '') {
+      const state = factSortState(sectionKey);
+      const active = state.sort === sortKey;
+      const indicator = active ? (state.direction === 'asc' ? '▲' : '▼') : '';
+      const ariaSort = active ? (state.direction === 'asc' ? 'ascending' : 'descending') : 'none';
+      const tooltipAttr = tooltipAttributes(tooltip);
+      return `
+        <th${numeric ? ' class="num"' : ''} data-diagnostics-fact-sort-active="${active ? 'true' : 'false'}" aria-sort="${ariaSort}"${tooltipAttr ? ` ${tooltipAttr}` : ''}>
+          <button class="sort-header child-sort-header" type="button" data-diagnostics-fact-section="${escapeHtml(sectionKey)}" data-diagnostics-fact-sort-key="${escapeHtml(sortKey)}">
+            <span>${escapeHtml(label)}</span>
+            <span class="sort-indicator">${escapeHtml(indicator)}</span>
+          </button>
+        </th>
+      `;
+    }
+
+    function diagnosticCallHeader(sortKey, label, numeric = false, tooltip = '') {
+      const state = factCallSortState(getSelectedFactKey());
+      const active = state.sort === sortKey;
+      const indicator = active ? (state.direction === 'asc' ? '▲' : '▼') : '';
+      const ariaSort = active ? (state.direction === 'asc' ? 'ascending' : 'descending') : 'none';
+      const tooltipAttr = tooltipAttributes(tooltip);
+      return `
+        <th${numeric ? ' class="num"' : ''} data-diagnostics-call-sort-active="${active ? 'true' : 'false'}" aria-sort="${ariaSort}"${tooltipAttr ? ` ${tooltipAttr}` : ''}>
+          <button class="sort-header child-sort-header" type="button" data-diagnostics-call-sort-key="${escapeHtml(sortKey)}">
+            <span>${escapeHtml(label)}</span>
+            <span class="sort-indicator">${escapeHtml(indicator)}</span>
+          </button>
+        </th>
+      `;
+    }
+
+    function diagnosticFactSortDescription(sectionKey) {
+      const state = factSortState(sectionKey);
+      const labels = factSortLabels();
+      const label = labels[state.sort] || state.sort;
+      return `${label} ${state.direction === 'asc' ? 'ascending' : 'descending'}`;
+    }
+
+    function diagnosticCallSortDescription() {
+      const state = factCallSortState(getSelectedFactKey());
+      const labels = callSortLabels();
+      const label = labels[state.sort] || state.sort;
+      return `${label} ${state.direction === 'asc' ? 'ascending' : 'descending'}`;
+    }
+
+    function factSortLabels() {
+      return {
+        cache: 'cache ratio',
+        cached: 'cached input tokens',
+        calls: 'associated calls',
+        fact: 'fact name',
+        largest: 'largest call',
+        occurrences: 'occurrences',
+        output: 'output tokens',
+        time: 'latest call time',
+        tokens: 'total tokens',
+        uncached: 'uncached input tokens',
+      };
+    }
+
+    function callSortLabels() {
+      return {
+        cache: 'cache ratio',
+        cached: 'cached input tokens',
+        effort: 'effort',
+        input: 'input tokens',
+        model: 'model',
+        output: 'output tokens',
+        reasoning: 'reasoning output tokens',
+        thread: 'thread',
+        time: 'time',
+        tokens: 'total tokens',
+        uncached: 'uncached input tokens',
+      };
+    }
+
+    return {
+      callSortLabels,
+      factSortLabels,
+      readoutMetric,
+      renderFactSection,
+    };
+  }
+
+  window.CodexUsageDashboardDiagnosticFacts = { create: createFactRenderer };
+})();
diff --git a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics_snapshots.js b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics_snapshots.js
new file mode 100644
index 0000000..843626c
--- /dev/null
+++ b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics_snapshots.js
@@ -0,0 +1,287 @@
+(() => {
+  function createSnapshotRenderer(deps) {
+    const {
+      escapeHtml,
+      formatTimestamp,
+      number,
+      pct,
+      renderState,
+      rowInvestigatorLink,
+      tokenText,
+    } = deps;
+
+    const sections = [
+      { key: 'overview', title: 'Overview', path: '/api/diagnostics/overview', refreshPath: '/api/diagnostics/overview/refresh' },
+      { key: 'toolOutput', title: 'Tool Output', path: '/api/diagnostics/tool-output', refreshPath: '/api/diagnostics/tool-output/refresh' },
+      { key: 'commands', title: 'Commands', path: '/api/diagnostics/commands', refreshPath: '/api/diagnostics/commands/refresh' },
+      { key: 'fileReads', title: 'File Reads', path: '/api/diagnostics/file-reads', refreshPath: '/api/diagnostics/file-reads/refresh' },
+      { key: 'readProductivity', title: 'Read Productivity', path: '/api/diagnostics/read-productivity', refreshPath: '/api/diagnostics/read-productivity/refresh' },
+      { key: 'concentration', title: 'Concentration', path: '/api/diagnostics/concentration', refreshPath: '/api/diagnostics/concentration/refresh' },
+    ];
+
+    function renderToolbar({ loading, payloads, refreshStatus, refreshError }) {
+      const latest = latestComputed(payloads);
+      const scope = historyScope(payloads);
+      const statusText = refreshStatus === 'refreshing'
+        ? 'Refreshing diagnostics...'
+        : refreshStatus === 'error'
+          ? `Refresh failed: ${refreshError}`
+          : latest
+            ? `Last computed ${formatTimestamp(latest)}`
+            : loading
+              ? 'Loading stored snapshots...'
+              : 'No stored snapshots';
+      return `
+        <div class="diagnostics-toolbar">
+          <div>
+            <strong>Diagnostics</strong>
+            <span>${escapeHtml(`${statusText}${scope ? ` · ${scope}` : ''}`)}</span>
+          </div>
+          <button class="toolbar-button" type="button" data-diagnostics-refresh ${refreshStatus === 'refreshing' ? 'disabled' : ''}>
+            ${escapeHtml(refreshStatus === 'refreshing' ? 'Refreshing...' : 'Refresh diagnostics')}
+          </button>
+        </div>
+      `;
+    }
+
+    function renderPanels({ loading, payloads }) {
+      return `
+        <div class="diagnostics-snapshot-grid">
+          ${sections.map(section => renderPanel(section, payloads[section.key], loading)).join('')}
+        </div>
+      `;
+    }
+
+    function renderPanel(section, payload, loading) {
+      const meta = snapshotMeta(payload);
+      const state = snapshotState(payload, loading);
+      const body = state ? renderState(state) : renderBody(section.key, payload);
+      return `
+        <div class="diagnostics-section diagnostics-snapshot-panel" data-diagnostics-snapshot="${escapeHtml(section.key)}">
+          <div class="diagnostics-section-header">
+            <div>
+              <h3>${escapeHtml(section.title)}</h3>
+              <p>${escapeHtml(meta)}</p>
+            </div>
+            <span>${escapeHtml(snapshotBadge(payload, loading))}</span>
+          </div>
+          ${body}
+        </div>
+      `;
+    }
+
+    function renderBody(key, payload) {
+      if (key === 'overview') return renderOverview(payload);
+      if (key === 'toolOutput') return renderToolOutput(payload);
+      if (key === 'commands') return renderCommands(payload);
+      if (key === 'fileReads') return renderFileReads(payload);
+      if (key === 'readProductivity') return renderReadProductivity(payload);
+      if (key === 'concentration') return renderConcentration(payload);
+      return renderState('No renderer for this diagnostic section.');
+    }
+
+    function renderOverview(payload) {
+      const overview = payload?.overview || {};
+      return renderKeyValueTable([
+        ['Usage rows', tokenText(overview.usage_rows)],
+        ['Total tokens', tokenText(overview.total_tokens)],
+        ['Cached input', tokenText(overview.cached_input_tokens)],
+        ['Uncached input', tokenText(overview.uncached_input_tokens)],
+        ['Cache ratio', pct(overview.cache_ratio)],
+        ['Diagnostic facts', tokenText(overview.diagnostic_fact_rows)],
+      ]);
+    }
+
+    function renderToolOutput(payload) {
+      const summary = payload?.summary || {};
+      const functions = Array.isArray(payload?.functions) ? payload.functions.slice(0, 8) : [];
+      return `
+        ${renderKeyValueTable([
+          ['Function calls', tokenText(summary.function_calls)],
+          ['Function outputs', tokenText(summary.function_outputs)],
+          ['With token count', tokenText(summary.outputs_with_original_token_count)],
+          ['Missing token count', tokenText(summary.outputs_missing_original_token_count)],
+          ['Original tokens', tokenText(summary.original_token_sum)],
+        ])}
+        ${renderSimpleTable(
+          ['Function', 'Calls', 'Original tokens'],
+          functions.map(row => [row.function, tokenText(row.calls), tokenText(row.original_token_sum)]),
+          'No function output rows in this snapshot.',
+        )}
+      `;
+    }
+
+    function renderCommands(payload) {
+      const commands = Array.isArray(payload?.commands) ? payload.commands.slice(0, 10) : [];
+      return renderSimpleTable(
+        ['Root', 'Total', 'Top child'],
+        commands.map(row => {
+          const child = Array.isArray(row.children) && row.children[0] ? row.children[0] : null;
+          return [row.root, tokenText(row.total), child ? `${child.child} (${tokenText(child.count)})` : '<none>'];
+        }),
+        'No command rows in this snapshot.',
+      );
+    }
+
+    function renderFileReads(payload) {
+      const byReader = Array.isArray(payload?.by_reader) ? payload.by_reader.slice(0, 8) : [];
+      const paths = Array.isArray(payload?.top_paths) ? payload.top_paths.slice(0, 8) : [];
+      return `
+        ${renderSimpleTable(
+          ['Reader', 'Reads', 'Allocated tokens'],
+          byReader.map(row => [row.reader, tokenText(row.read_events), tokenText(row.allocated_output_token_sum)]),
+          'No file-read rows in this snapshot.',
+        )}
+        ${renderSimpleTable(
+          ['Path label', 'Reads', 'Allocated tokens'],
+          paths.map(row => [pathLabel(row), tokenText(row.read_events), tokenText(row.allocated_output_token_sum)]),
+          'No path rows in this snapshot.',
+        )}
+      `;
+    }
+
+    function renderReadProductivity(payload) {
+      const byReader = Array.isArray(payload?.by_reader) ? payload.by_reader.slice(0, 8) : [];
+      const paths = Array.isArray(payload?.top_modified_paths) ? payload.top_modified_paths.slice(0, 8) : [];
+      return `
+        ${renderSimpleTable(
+          ['Reader', 'Reads', 'Modified later', 'Rate'],
+          byReader.map(row => [
+            row.reader,
+            tokenText(row.read_events),
+            tokenText(row.read_events_modified_later),
+            pct(row.read_events_modified_later_pct),
+          ]),
+          'No read-productivity rows in this snapshot.',
+        )}
+        ${renderSimpleTable(
+          ['Path label', 'Modified later', 'Rate'],
+          paths.map(row => [
+            pathLabel(row),
+            tokenText(row.read_events_modified_later),
+            pct(row.read_events_modified_later_pct),
+          ]),
+          'No modified path rows in this snapshot.',
+        )}
+      `;
+    }
+
+    function renderConcentration(payload) {
+      const metrics = Array.isArray(payload?.metrics) ? payload.metrics : [];
+      const impacts = Array.isArray(payload?.largest_impact_rows) ? payload.largest_impact_rows.slice(0, 8) : [];
+      return `
+        ${renderSimpleTable(
+          ['Metric', 'Share'],
+          metrics.filter(row => row.top_n === 1 || row.top_n === 3 || row.top_n === 5)
+            .map(row => [row.metric, pct(row.share)]),
+          'No concentration metrics in this snapshot.',
+        )}
+        ${renderSimpleTable(
+          ['Dimension', 'Label', 'Share', 'Largest'],
+          impacts.map(row => [
+            row.dimension,
+            row.label,
+            pct(row.share),
+            row.largest_record_id ? { html: rowInvestigatorLink({ record_id: row.largest_record_id }, tokenText(row.largest_call_tokens), true) } : tokenText(row.largest_call_tokens),
+          ]),
+          'No largest-impact rows in this snapshot.',
+        )}
+      `;
+    }
+
+    function readoutMetric(label, count) {
+      return `<span><b>${number.format(Number(count || 0))}</b>${escapeHtml(label)}</span>`;
+    }
+
+    function readyCount(payloads) {
+      return sections.filter(section => payloads[section.key]?.status === 'ready').length;
+    }
+
+    function latestComputed(payloads) {
+      return sections
+        .map(section => payloads[section.key]?.snapshot?.computed_at || '')
+        .filter(Boolean)
+        .sort()
+        .pop() || '';
+    }
+
+    function historyScope(payloads) {
+      const scope = sections
+        .map(section => payloads[section.key]?.snapshot?.history_scope || payloads[section.key]?.history_scope || '')
+        .find(Boolean);
+      return scope ? `history ${scope}` : '';
+    }
+
+    function snapshotMeta(payload) {
+      const snapshot = payload?.snapshot;
+      if (snapshot) {
+        const computed = snapshot.computed_at ? formatTimestamp(snapshot.computed_at) : 'unknown time';
+        const scope = snapshot.history_scope || 'active';
+        const logs = tokenText(snapshot.source_logs_scanned);
+        return `last computed ${computed} · history ${scope} · logs scanned ${logs}`;
+      }
+      if (payload?.history_scope) return `history ${payload.history_scope} · no stored snapshot`;
+      return 'no stored snapshot';
+    }
+
+    function snapshotBadge(payload, loading) {
+      if (loading && !payload) return 'loading';
+      if (!payload) return 'empty';
+      if (payload.status === 'missing') return 'stale';
+      if (payload.status === 'ready') return payload.refreshed ? 'refreshed' : 'stored';
+      return payload.status || 'unknown';
+    }
+
+    function snapshotState(payload, loading) {
+      if (loading && !payload) return 'Loading stored diagnostics...';
+      if (!payload) return 'No diagnostic payload returned.';
+      if (payload.status === 'missing') return 'No stored snapshot yet. Refresh diagnostics to compute this section.';
+      if (payload.status !== 'ready') return `Snapshot status: ${payload.status || 'unknown'}.`;
+      return '';
+    }
+
+    function renderKeyValueTable(rows) {
+      return renderSimpleTable(['Metric', 'Value'], rows, 'No metrics in this snapshot.');
+    }
+
+    function renderSimpleTable(headers, rows, emptyMessage) {
+      if (!rows.length) return renderState(emptyMessage);
+      const head = headers.map(header => `<th>${escapeHtml(header)}</th>`).join('');
+      const body = rows.map(row => `
+        <tr>${row.map((cell, index) => `<td${index > 0 ? ' class="num"' : ''}>${cellHtml(cell)}</td>`).join('')}</tr>
+      `).join('');
+      return `
+        <div class="diagnostics-table-wrap diagnostics-mini-table-wrap">
+          <table class="diagnostics-table diagnostics-mini-table">
+            <thead><tr>${head}</tr></thead>
+            <tbody>${body}</tbody>
+          </table>
+        </div>
+      `;
+    }
+
+    function cellHtml(value) {
+      if (value === null || value === undefined || value === '') return '';
+      if (typeof value === 'object' && value.html) return value.html;
+      return escapeHtml(String(value));
+    }
+
+    function pathLabel(row) {
+      const label = row.path_label || 'path';
+      const hash = row.path_hash ? ` · ${String(row.path_hash).slice(0, 6)}` : '';
+      return `${label}${hash}`;
+    }
+
+    return {
+      historyScope,
+      latestComputed,
+      readoutMetric,
+      readyCount,
+      renderPanels,
+      renderToolbar,
+      sections,
+    };
+  }
+
+  window.CodexUsageDashboardDiagnosticSnapshots = { create: createSnapshotRenderer };
+})();
diff --git a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_responsive.css b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_responsive.css
index 2eddda7..333f3c8 100644
--- a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_responsive.css
+++ b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_responsive.css
@@ -30,6 +30,9 @@
       .pager { width: auto; }
       .action-status { min-width: 0; }
       .preset-card { grid-template-columns: 1fr; }
+      .diagnostics-toolbar { align-items: stretch; flex-direction: column; }
+      .diagnostics-toolbar .toolbar-button { width: 100%; }
+      .diagnostics-snapshot-grid { grid-template-columns: 1fr; }
       .diagnostics-section-header { display: grid; }
       .diagnostics-readout .diagnostics-note { grid-column: auto; }
       body[data-active-view="call"] .call-investigator {
diff --git a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_tables.css b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_tables.css
index f1182d3..0ee6a65 100644
--- a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_tables.css
+++ b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_tables.css
@@ -230,6 +230,32 @@
       display: grid;
       gap: 14px;
     }
+    .diagnostics-toolbar {
+      display: flex;
+      align-items: center;
+      justify-content: space-between;
+      gap: 12px;
+      padding: 10px 12px;
+      border: 1px solid var(--line);
+      border-radius: 8px;
+      background: #f8fafc;
+    }
+    .diagnostics-toolbar > div {
+      display: grid;
+      gap: 3px;
+      min-width: 0;
+    }
+    .diagnostics-toolbar strong {
+      color: var(--ink);
+      font-size: 13px;
+      font-weight: 780;
+    }
+    .diagnostics-toolbar span {
+      color: var(--muted);
+      font-size: 12px;
+      font-weight: 680;
+      line-height: 1.35;
+    }
     .diagnostics-readout {
       display: grid;
       grid-template-columns: repeat(auto-fit, minmax(170px, 1fr));
@@ -258,6 +284,14 @@
       grid-column: span 2;
       color: #475569;
     }
+    .diagnostics-snapshot-grid {
+      display: grid;
+      grid-template-columns: repeat(2, minmax(0, 1fr));
+      gap: 14px;
+    }
+    .diagnostics-snapshot-panel {
+      align-content: start;
+    }
     .diagnostics-section,
     .diagnostics-drilldown {
       display: grid;
@@ -323,6 +357,12 @@
       min-width: 1120px;
       table-layout: fixed;
     }
+    .diagnostics-mini-table {
+      min-width: 520px;
+    }
+    .diagnostics-mini-table-wrap + .diagnostics-mini-table-wrap {
+      margin-top: 10px;
+    }
     .diagnostics-facts-table {
       min-width: 1320px;
     }
diff --git a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_template.html b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_template.html
index ad15f74..cc5249a 100644
--- a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_template.html
+++ b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_template.html
@@ -171,6 +171,8 @@ <h2 data-i18n="dashboard.call_details">Call Details</h2>
   <script src="__ACTIONS_SCRIPT_SRC__"></script>
   <script src="__LIVE_SCRIPT_SRC__"></script>
   <script src="__EVENTS_SCRIPT_SRC__"></script>
+  <script src="__DIAGNOSTICS_SNAPSHOTS_SCRIPT_SRC__"></script>
+  <script src="__DIAGNOSTICS_FACTS_SCRIPT_SRC__"></script>
   <script src="__DIAGNOSTICS_SCRIPT_SRC__"></script>
   <script src="__CALL_DIAGNOSTICS_SCRIPT_SRC__"></script>
   <script src="__CALL_INVESTIGATOR_SCRIPT_SRC__"></script>
diff --git a/tests/playwright/dashboard-diagnostics.spec.mjs b/tests/playwright/dashboard-diagnostics.spec.mjs
new file mode 100644
index 0000000..9a6e47e
--- /dev/null
+++ b/tests/playwright/dashboard-diagnostics.spec.mjs
@@ -0,0 +1,28 @@
+import { expect, test } from '@playwright/test';
+
+test.describe('diagnostics dashboard smoke', () => {
+  test('renders diagnostics panels with explicit refresh control', async ({ page }) => {
+    await page.goto('/dashboard.html?view=diagnostics');
+
+    await expect(page.getByRole('button', { name: 'Diagnostics', exact: true })).toHaveAttribute(
+      'aria-pressed',
+      'true',
+    );
+    await expect(page.locator('#diagnosticsPanel')).toBeVisible();
+    await expect(page.getByRole('button', { name: 'Refresh diagnostics' })).toBeVisible();
+    await expect(page.locator('#diagnosticsPanel')).not.toContainText(
+      'Live API required for diagnostics refresh',
+    );
+
+    for (const heading of [
+      'Overview',
+      'Tool Output',
+      'Commands',
+      'File Reads',
+      'Read Productivity',
+      'Concentration',
+    ]) {
+      await expect(page.getByRole('heading', { name: heading })).toBeVisible();
+    }
+  });
+});
diff --git a/tests/test_dashboard_payload.py b/tests/test_dashboard_payload.py
index 19bb5ba..9dc9b18 100644
--- a/tests/test_dashboard_payload.py
+++ b/tests/test_dashboard_payload.py
@@ -55,6 +55,12 @@ def test_dashboard_and_csv_are_aggregate_only(tmp_path: Path) -> None:
     dashboard_diagnostics_js = (asset_dir / "dashboard_diagnostics.js").read_text(
         encoding="utf-8"
     )
+    dashboard_diagnostics_facts_js = (
+        asset_dir / "dashboard_diagnostics_facts.js"
+    ).read_text(encoding="utf-8")
+    dashboard_diagnostics_snapshots_js = (
+        asset_dir / "dashboard_diagnostics_snapshots.js"
+    ).read_text(encoding="utf-8")
     dashboard_call_diagnostics_js = (
         asset_dir / "dashboard_call_diagnostics.js"
     ).read_text(encoding="utf-8")
@@ -94,6 +100,8 @@ def test_dashboard_and_csv_are_aggregate_only(tmp_path: Path) -> None:
         dashboard_live_js,
         dashboard_events_js,
         dashboard_diagnostics_js,
+        dashboard_diagnostics_facts_js,
+        dashboard_diagnostics_snapshots_js,
         dashboard_call_diagnostics_js,
         dashboard_call_js,
         dashboard_js,
@@ -119,6 +127,8 @@ def test_dashboard_and_csv_are_aggregate_only(tmp_path: Path) -> None:
     assert "SECRET RAW PROMPT" not in dashboard_live_js
     assert "SECRET RAW PROMPT" not in dashboard_events_js
     assert "SECRET RAW PROMPT" not in dashboard_diagnostics_js
+    assert "SECRET RAW PROMPT" not in dashboard_diagnostics_facts_js
+    assert "SECRET RAW PROMPT" not in dashboard_diagnostics_snapshots_js
     assert "SECRET RAW PROMPT" not in dashboard_call_diagnostics_js
     assert "SECRET RAW PROMPT" not in dashboard_call_js
     assert "SECRET RAW PROMPT" not in dashboard_css
@@ -139,6 +149,8 @@ def test_dashboard_and_csv_are_aggregate_only(tmp_path: Path) -> None:
     assert "COMPACTED REPLACEMENT SUMMARY" not in dashboard_live_js
     assert "COMPACTED REPLACEMENT SUMMARY" not in dashboard_events_js
     assert "COMPACTED REPLACEMENT SUMMARY" not in dashboard_diagnostics_js
+    assert "COMPACTED REPLACEMENT SUMMARY" not in dashboard_diagnostics_facts_js
+    assert "COMPACTED REPLACEMENT SUMMARY" not in dashboard_diagnostics_snapshots_js
     assert "COMPACTED REPLACEMENT SUMMARY" not in dashboard_call_diagnostics_js
     assert "COMPACTED REPLACEMENT SUMMARY" not in dashboard_call_js
     assert "EVENT MSG COMPACTION SUMMARY" not in dashboard
@@ -157,6 +169,8 @@ def test_dashboard_and_csv_are_aggregate_only(tmp_path: Path) -> None:
     assert "EVENT MSG COMPACTION SUMMARY" not in dashboard_live_js
     assert "EVENT MSG COMPACTION SUMMARY" not in dashboard_events_js
     assert "EVENT MSG COMPACTION SUMMARY" not in dashboard_diagnostics_js
+    assert "EVENT MSG COMPACTION SUMMARY" not in dashboard_diagnostics_facts_js
+    assert "EVENT MSG COMPACTION SUMMARY" not in dashboard_diagnostics_snapshots_js
     assert "EVENT MSG COMPACTION SUMMARY" not in dashboard_call_diagnostics_js
     assert "EVENT MSG COMPACTION SUMMARY" not in dashboard_call_js
     for stylesheet in dashboard_stylesheets:
@@ -177,6 +191,8 @@ def test_dashboard_and_csv_are_aggregate_only(tmp_path: Path) -> None:
     assert 'src="codex-usage-tracker-assets/dashboard_actions.js?v=' in dashboard
     assert 'src="codex-usage-tracker-assets/dashboard_live.js?v=' in dashboard
     assert 'src="codex-usage-tracker-assets/dashboard_events.js?v=' in dashboard
+    assert 'src="codex-usage-tracker-assets/dashboard_diagnostics_snapshots.js?v=' in dashboard
+    assert 'src="codex-usage-tracker-assets/dashboard_diagnostics_facts.js?v=' in dashboard
     assert 'src="codex-usage-tracker-assets/dashboard_diagnostics.js?v=' in dashboard
     assert 'src="codex-usage-tracker-assets/dashboard_call_diagnostics.js?v=' in dashboard
     assert 'src="codex-usage-tracker-assets/dashboard_call_investigator.js?v=' in dashboard
@@ -197,6 +213,8 @@ def test_dashboard_and_csv_are_aggregate_only(tmp_path: Path) -> None:
     assert "CodexUsageDashboardActions" in dashboard_actions_js
     assert "CodexUsageDashboardLive" in dashboard_live_js
     assert "CodexUsageDashboardEvents" in dashboard_events_js
+    assert "CodexUsageDashboardDiagnosticSnapshots" in dashboard_diagnostics_snapshots_js
+    assert "CodexUsageDashboardDiagnosticFacts" in dashboard_diagnostics_facts_js
     assert "CodexUsageDashboardDiagnostics" in dashboard_diagnostics_js
     assert "CodexUsageCallDiagnostics" in dashboard_call_diagnostics_js
     assert "CodexUsageCallInvestigator" in dashboard_call_js
@@ -282,22 +300,41 @@ def test_dashboard_and_csv_are_aggregate_only(tmp_path: Path) -> None:
     assert "/api/diagnostics/tools" in dashboard_diagnostics_js
     assert "/api/diagnostics/compactions" in dashboard_diagnostics_js
     assert "/api/diagnostics/fact-calls" in dashboard_diagnostics_js
+    assert "dashboard_diagnostics_snapshots.js" in dashboard
+    assert "dashboard_diagnostics_facts.js" in dashboard
+    assert "/api/diagnostics/overview" in dashboard_diagnostics_snapshots_js
+    assert "/api/diagnostics/tool-output/refresh" in dashboard_diagnostics_snapshots_js
+    assert "/api/diagnostics/commands/refresh" in dashboard_diagnostics_snapshots_js
+    assert "/api/diagnostics/file-reads/refresh" in dashboard_diagnostics_snapshots_js
+    assert "/api/diagnostics/read-productivity/refresh" in dashboard_diagnostics_snapshots_js
+    assert "/api/diagnostics/concentration/refresh" in dashboard_diagnostics_snapshots_js
+    assert "Refresh diagnostics" in dashboard_diagnostics_snapshots_js
+    assert "data-diagnostics-refresh" in dashboard_diagnostics_js
+    assert "Live API required for diagnostics refresh" in dashboard_diagnostics_js
+    assert "Overview" in dashboard_diagnostics_snapshots_js
+    assert "Tool Output" in dashboard_diagnostics_snapshots_js
+    assert "File Reads" in dashboard_diagnostics_snapshots_js
+    assert "Read Productivity" in dashboard_diagnostics_snapshots_js
+    assert "Concentration" in dashboard_diagnostics_snapshots_js
     assert "Associated token totals" in dashboard_diagnostics_js
     assert "Raw context remains on-demand" in dashboard_diagnostics_js
     assert "rowInvestigatorLink" in dashboard_diagnostics_js
-    assert "diagnostics-drilldown-row" in dashboard_diagnostics_js
-    assert 'td colspan="11"' in dashboard_diagnostics_js
-    assert "associated_cached_input_tokens" in dashboard_diagnostics_js
-    assert "row.cached_input_tokens" in dashboard_diagnostics_js
-    assert "Occurrences: count of matching diagnostic fact events" in dashboard_diagnostics_js
-    assert "Associated total tokens for those calls" in dashboard_diagnostics_js
-    assert "Average cache ratio across associated calls" in dashboard_diagnostics_js
-    assert "data-diagnostics-fact-sort-key" in dashboard_diagnostics_js
-    assert "data-diagnostics-fact-sort-active" in dashboard_diagnostics_js
+    assert "diagnostics-drilldown-row" in dashboard_diagnostics_facts_js
+    assert 'td colspan="11"' in dashboard_diagnostics_facts_js
+    assert "associated_cached_input_tokens" in dashboard_diagnostics_facts_js
+    assert "row.cached_input_tokens" in dashboard_diagnostics_facts_js
+    assert "Occurrences: count of matching diagnostic fact events" in dashboard_diagnostics_facts_js
+    assert "Associated total tokens for those calls" in dashboard_diagnostics_facts_js
+    assert "Average cache ratio across associated calls" in dashboard_diagnostics_facts_js
+    assert "data-diagnostics-fact-sort-key" in dashboard_diagnostics_facts_js
+    assert "data-diagnostics-fact-sort-active" in dashboard_diagnostics_facts_js
     assert "sortFactRows" in dashboard_diagnostics_js
-    assert "diagnosticFactHeader" in dashboard_diagnostics_js
+    assert "diagnosticFactHeader" in dashboard_diagnostics_facts_js
     assert "diagnostics-facts-table" in dashboard_surface
     assert "diagnostics-fact-cell" in dashboard_surface
+    assert "diagnostics-snapshot-grid" in dashboard_css
+    assert "diagnostics-toolbar" in dashboard_css
+    assert "diagnostics-mini-table" in dashboard_css
     assert "diagnostics-facts-table th:first-child" in dashboard_css
     assert "td.diagnostics-fact-cell" in dashboard_css
     assert "captureScrollAnchor" in dashboard_diagnostics_js
@@ -306,7 +343,7 @@ def test_dashboard_and_csv_are_aggregate_only(tmp_path: Path) -> None:
     assert "offset: String(offset)" in dashboard_diagnostics_js
     assert "mergeFactCallPayload" in dashboard_diagnostics_js
     assert "data-diagnostics-call-sort-key" in dashboard_diagnostics_js
-    assert "data-diagnostics-call-sort-active" in dashboard_diagnostics_js
+    assert "data-diagnostics-call-sort-active" in dashboard_diagnostics_facts_js
     assert "sortFactCalls" in dashboard_diagnostics_js
     assert "defaultFactCallSortDirection" in dashboard_diagnostics_js
     assert "sort: sortState.sort" in dashboard_diagnostics_js
@@ -501,6 +538,7 @@ def test_dashboard_and_csv_are_aggregate_only(tmp_path: Path) -> None:
     assert (asset_dir / "dashboard_details.js").exists()
     assert (asset_dir / "dashboard_insights.js").exists()
     assert (asset_dir / "dashboard_tables.js").exists()
+    assert (asset_dir / "dashboard_diagnostics_snapshots.js").exists()
     assert (asset_dir / "dashboard_filters.js").exists()
     assert (asset_dir / "dashboard_state.js").exists()
     assert (asset_dir / "dashboard_payload_cache.js").exists()

From 72ff139aa453e7e5105f93547ab4cc49c1c2143c Mon Sep 17 00:00:00 2001
From: Monsky <douglas.monsky@gmail.com>
Date: Sat, 20 Jun 2026 19:29:10 -0400
Subject: [PATCH 06/10] docs: document diagnostic dashboard reports

---
 docs/architecture.md     | 6 ++++--
 docs/cli-json-schemas.md | 2 ++
 docs/cli-reference.md    | 7 +++++++
 docs/dashboard-guide.md  | 2 ++
 docs/privacy.md          | 4 +++-
 5 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/docs/architecture.md b/docs/architecture.md
index b58f8eb..4353847 100644
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -13,7 +13,8 @@ Codex Usage Tracker is a local sidecar app. It reads aggregate token counters fr
 - `costing.py`, `pricing_config.py`, `pricing_openai.py`, `pricing_estimates.py`, and `allowance.py` own cost, credit, rate-card, and allowance annotation. Keep estimate confidence and source metadata attached to rows.
 - `projects.py`, `threads.py`, and `recommendations.py` annotate aggregate rows with project identity, thread relationships, and actionable signals. Project privacy redaction also belongs in `projects.py` so CLI, MCP, dashboard, CSV, and support-bundle surfaces share the same behavior.
 - `dashboard.py` builds aggregate-only static dashboard payloads and writes HTML/assets. `server.py` adds localhost refresh, the compatibility `/api/usage` endpoint, SQL-backed live API slices, and explicit lazy context loading.
-- `plugin_data/dashboard/dashboard_format.js` owns dashboard formatting primitives. `dashboard_data.js` owns row payload and thread relationship helpers. `dashboard_analysis.js` owns scoring, sorting, recommendation, and thread grouping logic. `dashboard_cells.js` owns reusable table/cell HTML helpers. `dashboard_details.js` owns sidebar detail and thread narrative rendering. `dashboard_insights.js` owns insight cards and investigation preset UI. `dashboard_tables.js` owns Calls, Threads, and expanded thread-call table rendering. `dashboard_diagnostics.js` owns the Diagnostics tab that consumes `/api/diagnostics/*` aggregate payloads. `dashboard_filters.js` owns date range parsing and row date matching. `dashboard_state.js` owns URL, CSV, and download state utilities. `dashboard_i18n.js`, `dashboard_payload_cache.js`, and `dashboard_tooltips.js` own localization, session aggregate cache, and fast tooltip helpers. `dashboard_call_investigator.js` owns the dedicated call drilldown surface. `dashboard.js` owns top-level DOM rendering, event handling, and API refresh orchestration.
+- `diagnostic_snapshots.py` owns persisted diagnostic snapshot refresh/load orchestration. `diagnostic_snapshot_analysis.py`, `diagnostic_snapshot_events.py`, `diagnostic_snapshot_rows.py`, and `diagnostic_snapshot_concentration.py` own source-log aggregation, safe event parsing, row shaping, and concentration math. `diagnostic_snapshot_report.py` owns CLI rendering. Keep these modules synthetic-testable and aggregate-only.
+- `plugin_data/dashboard/dashboard_format.js` owns dashboard formatting primitives. `dashboard_data.js` owns row payload and thread relationship helpers. `dashboard_analysis.js` owns scoring, sorting, recommendation, and thread grouping logic. `dashboard_cells.js` owns reusable table/cell HTML helpers. `dashboard_details.js` owns sidebar detail and thread narrative rendering. `dashboard_insights.js` owns insight cards and investigation preset UI. `dashboard_tables.js` owns Calls, Threads, and expanded thread-call table rendering. `dashboard_diagnostics.js` coordinates the Diagnostics tab data flow and events, `dashboard_diagnostics_snapshots.js` renders on-demand snapshot panels, and `dashboard_diagnostics_facts.js` renders the fact tables and drilldowns. `dashboard_filters.js` owns date range parsing and row date matching. `dashboard_state.js` owns URL, CSV, and download state utilities. `dashboard_i18n.js`, `dashboard_payload_cache.js`, and `dashboard_tooltips.js` own localization, session aggregate cache, and fast tooltip helpers. `dashboard_call_investigator.js` owns the dedicated call drilldown surface. `dashboard.js` owns top-level DOM rendering, event handling, and API refresh orchestration.
 - `context.py` is the only normal path that reads raw log context, and it does so only for one selected record on demand with redaction and size limits. Its default quick mode omits tool output and serialized groups; full serialized JSONL group analysis is explicit.
 - `plugin_installer.py`, `.mcp.json`, `skills/`, and `scripts/check_release.py` own install and packaging behavior.
 - `scripts/benchmark_synthetic_history.py` owns generated large-history query timing and threshold enforcement for 10k, 100k, and 500k aggregate-row fixtures. Its optional `--with-source-logs` mode writes synthetic JSONL source logs to time explicit context loading and to guard normal dashboard payload assembly against source-log reads. It must stay synthetic-only and must not read real Codex logs.
@@ -26,10 +27,11 @@ Codex Usage Tracker is a local sidecar app. It reads aggregate token counters fr
 1. Add new persisted usage-event metrics through `UsageEvent`, `schema.py`, migrations, store queries, dashboard payload tests, and CSV/export checks. Add auxiliary aggregate tables such as `thread_summaries` or `source_files` through `store.py` migrations plus focused migration/privacy tests.
 2. Add new report views through `reports.py` first, then wire CLI and MCP wrappers to that shared service.
 3. Add new machine-readable outputs through `api_payloads.py` or report payload methods with a `schema` value, a `json_contracts.py` entry, and focused tests.
-4. Add dashboard-only interactions in `plugin_data/dashboard/dashboard.js` and keep URL state in `dashboard_state.js`.
+4. Add dashboard-only interactions in the narrowest dashboard module and keep URL state in `dashboard_state.js`. Diagnostics snapshot panels should stay in `dashboard_diagnostics_snapshots.js`; fact tables should stay in `dashboard_diagnostics_facts.js`.
 5. Keep all examples, screenshots, mocks, and tests synthetic. Never derive fixtures from real logs.
 6. When editing skill instructions, update both the source `skills/...` file and the bundled `src/codex_usage_tracker/plugin_data/skills/...` copy. `scripts/check_release.py` verifies that installable plugin assets stay complete and synced.
 7. When adding fields derived from `cwd`, Git metadata, source paths, or log-event metadata, decide how they behave in `normal`, `redacted`, and `strict` privacy modes before exposing them in dashboard, JSON, CSV, MCP, or support-bundle output.
+8. Diagnostic snapshot refresh must remain explicit and on demand. Normal usage refresh paths may load stored snapshots, but they must not rescan source logs for diagnostic sections unless the user calls a diagnostics `--refresh` command or a `/api/diagnostics/<section>/refresh` endpoint.
 
 ## Validation
 
diff --git a/docs/cli-json-schemas.md b/docs/cli-json-schemas.md
index 61219d7..3b1dd2b 100644
--- a/docs/cli-json-schemas.md
+++ b/docs/cli-json-schemas.md
@@ -287,6 +287,8 @@ Schema: `codex-usage-tracker-diagnostics-v1`
 
 Diagnostics payloads report aggregate structured facts such as compaction, tool/function/MCP activity, command families, structured skill labels, search/read loops, and outcome events. They do not include prompts, assistant messages, tool arguments, tool output, patch text, raw commands, command arguments, file contents, or JSONL fragments. Token totals are associated with facts observed before a token-count row; they are not causal allocations.
 
+Diagnostic snapshots use separate section endpoints instead of one large payload. `GET` returns the latest stored section snapshot or `status: "missing"`; `POST /api/diagnostics/<section>/refresh` recomputes and replaces only that section. This keeps ordinary dashboard refresh fast and prevents source-log rescans unless a diagnostics refresh is explicit.
+
 ## Diagnostic Overview Snapshot
 
 Commands:
diff --git a/docs/cli-reference.md b/docs/cli-reference.md
index bc69420..341dc9a 100644
--- a/docs/cli-reference.md
+++ b/docs/cli-reference.md
@@ -120,6 +120,9 @@ codex-usage-tracker diagnostics summary
 codex-usage-tracker diagnostics facts --sort uncached
 codex-usage-tracker diagnostics compactions
 codex-usage-tracker diagnostics tools
+codex-usage-tracker diagnostics overview --refresh
+codex-usage-tracker diagnostics tool-output --refresh
+codex-usage-tracker diagnostics commands --refresh
 codex-usage-tracker diagnostics file-reads --refresh
 codex-usage-tracker diagnostics read-productivity --refresh
 codex-usage-tracker diagnostics concentration --refresh
@@ -128,6 +131,10 @@ codex-usage-tracker diagnostics fact-calls --fact-type compaction --fact-name po
 
 Diagnostics expose structured event patterns and their associated token totals. They can show compactions, tool/function/MCP activity, safe command families, structured skill labels, patch outcomes, task completion, search/read loops, and aborted or rolled-back turns. Associated totals are not causal allocations and are not additive when one model call has multiple diagnostic facts.
 
+Snapshot diagnostics are persisted aggregate reports. Without `--refresh`, snapshot commands return the latest stored payload or a `missing` status. With `--refresh`, they recompute from indexed source logs and replace the stored section snapshot. Ordinary `refresh`, `open-dashboard`, and dashboard `Refresh` update usage rows only; they do not recompute diagnostic snapshots.
+
+The snapshot sections answer different questions: `overview` summarizes usage rows and aggregate token totals, `tool-output` counts functions and terminal `Original token count` coverage, `commands` keeps command roots plus one safe child label, `file-reads` counts reader/path activity and allocated read-output tokens, `read-productivity` reports later-edit correlations for matching path keys, and `concentration` shows top-N token share by source/session, cwd/project, and day.
+
 Diagnostic payloads are aggregate-only. They do not include prompts, assistant text, tool arguments, tool output, patch text, raw commands, command arguments, file contents, raw absolute paths, or JSONL fragments. File-read diagnostics use basename-only path labels plus short irreversible hashes, read-productivity percentages are temporal correlations rather than proof that a read caused a later edit, and concentration reports use safe source/session, cwd, and day labels only.
 
 ## JSON Queries
diff --git a/docs/dashboard-guide.md b/docs/dashboard-guide.md
index 100b1fb..14b6672 100644
--- a/docs/dashboard-guide.md
+++ b/docs/dashboard-guide.md
@@ -135,6 +135,8 @@ Use `Diagnostics` view when you want to see what structured event patterns are h
 - Command diagnostics store only a command family such as `pytest`, `git`, or `unknown_command`. Skill and MCP labels are detected only when they are present as structured event metadata.
 - Newer on-demand diagnostic snapshot endpoints are section-specific (`overview`, `tool-output`, `commands`, `file-reads`, `read-productivity`, and `concentration`). Heavy recomputation happens only through each section's explicit refresh endpoint.
 - Click `Refresh diagnostics` when you want to recompute stored diagnostic snapshots. The normal dashboard `Refresh` action updates usage rows only.
+- Snapshot panels show their stored status, last computed time, history scope, and logs scanned count. Missing or stale panels still render without forcing a source-log scan.
+- `Tool Output` totals come from terminal wrapper metadata such as `Original token count`; missing-count rows show coverage gaps where that header was absent.
 - File-read snapshots use basename-only path labels and short hashes. Read-productivity rates are temporal correlations between earlier reads and later structured patch events, not causation.
 - Concentration snapshots show top-N share and effective group count by source log/session, cwd/project label, and day without exposing raw source-log or cwd paths.
 - Click `Calls` on a fact row to load associated model calls. Call links and largest-call links open the Call Investigator, where raw context remains explicit and on demand.
diff --git a/docs/privacy.md b/docs/privacy.md
index 748c4c6..d868045 100644
--- a/docs/privacy.md
+++ b/docs/privacy.md
@@ -35,7 +35,9 @@ Call-origin metadata is heuristic and confidence-labeled. It stores categories s
 
 Diagnostic facts follow the same aggregate-only rule. They can store safe structured labels such as `patch_applied`, `function_call_output`, `post_compaction`, MCP tool/server labels, structured skill labels, and command families such as `pytest`, `git`, or `unknown_command`, along with event counts and source line ranges. Command text may be classified in memory during parsing, but it is not persisted. Diagnostic facts do not store tool arguments, command text, command output, patch text, prompt or assistant text, file contents, raw JSONL fragments, or raw context evidence.
 
-On-demand diagnostic snapshots follow the same boundary. File-read snapshots classify common read commands and path scans, but persist only counters, reader families, basename-only path labels, and short irreversible path hashes. They do not persist raw absolute paths, raw command strings, command arguments, file contents, tool output, or patch text. Read-productivity snapshots report only temporal read-to-modify correlations for matching path keys in the same source log; they do not claim causation. Concentration snapshots group by safe source/session, cwd, and day labels and do not expose raw source-log or cwd paths.
+On-demand diagnostic snapshots follow the same boundary. Tool-output snapshots use terminal wrapper metadata such as `Original token count` when present and persist only counts, coverage gaps, and safe function/command labels. Command snapshots keep command roots plus one conservative child label. File-read snapshots classify common read commands and path scans, but persist only counters, reader families, basename-only path labels, and short irreversible path hashes. They do not persist raw absolute paths, raw command strings, command arguments, file contents, tool output, or patch text. Read-productivity snapshots report only temporal read-to-modify correlations for matching path keys in the same source log; they do not claim causation. Concentration snapshots group by safe source/session, cwd, and day labels and do not expose raw source-log or cwd paths.
+
+Diagnostic snapshots are not live recomputed during ordinary dashboard or usage refresh. Stored snapshots can be displayed without rescanning source logs, and recomputation requires an explicit diagnostics `--refresh` command or a localhost `/api/diagnostics/<section>/refresh` request.
 
 ## On-Demand Context
 

From 362c9fdad3da322c8e6d77cf6fa5989541832aa5 Mon Sep 17 00:00:00 2001
From: Monsky <douglas.monsky@gmail.com>
Date: Sat, 20 Jun 2026 19:47:34 -0400
Subject: [PATCH 07/10] fix: batch diagnostic dashboard refresh

---
 docs/cli-json-schemas.md                      |  2 +-
 docs/dashboard-guide.md                       |  2 +-
 docs/privacy.md                               |  2 +-
 .../diagnostic_snapshot_analysis.py           | 69 ++++++++--------
 .../diagnostic_snapshot_constants.py          |  1 +
 .../diagnostic_snapshots.py                   | 81 +++++++++++++++++++
 .../dashboard/dashboard_diagnostics.js        |  8 +-
 src/codex_usage_tracker/server.py             | 30 +++++++
 tests/test_dashboard_payload.py               |  1 +
 tests/test_dashboard_server.py                | 42 ++++------
 tests/test_diagnostic_snapshots.py            | 42 ++++++++++
 11 files changed, 217 insertions(+), 63 deletions(-)

diff --git a/docs/cli-json-schemas.md b/docs/cli-json-schemas.md
index 3b1dd2b..d604e19 100644
--- a/docs/cli-json-schemas.md
+++ b/docs/cli-json-schemas.md
@@ -287,7 +287,7 @@ Schema: `codex-usage-tracker-diagnostics-v1`
 
 Diagnostics payloads report aggregate structured facts such as compaction, tool/function/MCP activity, command families, structured skill labels, search/read loops, and outcome events. They do not include prompts, assistant messages, tool arguments, tool output, patch text, raw commands, command arguments, file contents, or JSONL fragments. Token totals are associated with facts observed before a token-count row; they are not causal allocations.
 
-Diagnostic snapshots use separate section endpoints instead of one large payload. `GET` returns the latest stored section snapshot or `status: "missing"`; `POST /api/diagnostics/<section>/refresh` recomputes and replaces only that section. This keeps ordinary dashboard refresh fast and prevents source-log rescans unless a diagnostics refresh is explicit.
+Diagnostic snapshots use separate section endpoints instead of one large read payload. `GET` returns the latest stored section snapshot or `status: "missing"`; `POST /api/diagnostics/<section>/refresh` recomputes and replaces only that section. The dashboard button calls `POST /api/diagnostics/refresh`, which returns a small wrapper with `sections` and recomputes source-log-derived sections with one shared analyzer pass. This keeps ordinary dashboard refresh fast and prevents source-log rescans unless a diagnostics refresh is explicit.
 
 ## Diagnostic Overview Snapshot
 
diff --git a/docs/dashboard-guide.md b/docs/dashboard-guide.md
index 14b6672..6d535b1 100644
--- a/docs/dashboard-guide.md
+++ b/docs/dashboard-guide.md
@@ -133,7 +133,7 @@ Use `Diagnostics` view when you want to see what structured event patterns are h
 - The tab consumes the localhost `/api/diagnostics/*` endpoints; static file dashboards show a live-API unavailable state.
 - The first table shows top diagnostic facts by associated uncached input tokens. Tool/function/MCP/command-family and compaction sections expose narrower slices of the same fact data.
 - Command diagnostics store only a command family such as `pytest`, `git`, or `unknown_command`. Skill and MCP labels are detected only when they are present as structured event metadata.
-- Newer on-demand diagnostic snapshot endpoints are section-specific (`overview`, `tool-output`, `commands`, `file-reads`, `read-productivity`, and `concentration`). Heavy recomputation happens only through each section's explicit refresh endpoint.
+- Newer on-demand diagnostic snapshot endpoints are section-specific (`overview`, `tool-output`, `commands`, `file-reads`, `read-productivity`, and `concentration`). Heavy recomputation happens only through explicit diagnostic refresh endpoints. The dashboard's `Refresh diagnostics` button uses one batched refresh so source-log sections share one scan.
 - Click `Refresh diagnostics` when you want to recompute stored diagnostic snapshots. The normal dashboard `Refresh` action updates usage rows only.
 - Snapshot panels show their stored status, last computed time, history scope, and logs scanned count. Missing or stale panels still render without forcing a source-log scan.
 - `Tool Output` totals come from terminal wrapper metadata such as `Original token count`; missing-count rows show coverage gaps where that header was absent.
diff --git a/docs/privacy.md b/docs/privacy.md
index d868045..39faa33 100644
--- a/docs/privacy.md
+++ b/docs/privacy.md
@@ -37,7 +37,7 @@ Diagnostic facts follow the same aggregate-only rule. They can store safe struct
 
 On-demand diagnostic snapshots follow the same boundary. Tool-output snapshots use terminal wrapper metadata such as `Original token count` when present and persist only counts, coverage gaps, and safe function/command labels. Command snapshots keep command roots plus one conservative child label. File-read snapshots classify common read commands and path scans, but persist only counters, reader families, basename-only path labels, and short irreversible path hashes. They do not persist raw absolute paths, raw command strings, command arguments, file contents, tool output, or patch text. Read-productivity snapshots report only temporal read-to-modify correlations for matching path keys in the same source log; they do not claim causation. Concentration snapshots group by safe source/session, cwd, and day labels and do not expose raw source-log or cwd paths.
 
-Diagnostic snapshots are not live recomputed during ordinary dashboard or usage refresh. Stored snapshots can be displayed without rescanning source logs, and recomputation requires an explicit diagnostics `--refresh` command or a localhost `/api/diagnostics/<section>/refresh` request.
+Diagnostic snapshots are not live recomputed during ordinary dashboard or usage refresh. Stored snapshots can be displayed without rescanning source logs, and recomputation requires an explicit diagnostics `--refresh` command, the batched localhost `/api/diagnostics/refresh` request, or a targeted `/api/diagnostics/<section>/refresh` request.
 
 ## On-Demand Context
 
diff --git a/src/codex_usage_tracker/diagnostic_snapshot_analysis.py b/src/codex_usage_tracker/diagnostic_snapshot_analysis.py
index 3f78227..070857a 100644
--- a/src/codex_usage_tracker/diagnostic_snapshot_analysis.py
+++ b/src/codex_usage_tracker/diagnostic_snapshot_analysis.py
@@ -111,43 +111,46 @@ def _scan_source_log(source_log: Path, *, counters: dict[str, Any], meta: Counte
     source_read_events: list[int] = []
     modified_orders_by_path: dict[str, list[int]] = defaultdict(list)
     try:
-        lines = source_log.read_text(encoding="utf-8").splitlines()
+        lines = source_log.open(encoding="utf-8")
     except OSError:
         meta["read_errors"] += 1
         return
 
-    for order, line in enumerate(lines):
-        envelope = _json_envelope(line, meta=meta)
-        if envelope is None:
-            continue
-        payload = envelope.get("payload")
-        if not isinstance(payload, dict):
-            continue
-        if envelope.get("type") == "event_msg":
-            for path_ref in modified_path_refs(payload):
-                modified_orders_by_path[path_ref["path_key"]].append(order)
-            continue
-        if envelope.get("type") != "response_item":
-            continue
-        if payload.get("type") == "function_call":
-            _record_function_call(
-                payload,
-                order=order,
-                counters=counters,
-                meta=meta,
-                call_names=call_names,
-                call_roots=call_roots,
-                call_read_events=call_read_events,
-                source_read_events=source_read_events,
-            )
-        elif payload.get("type") == "function_call_output":
-            _record_function_output(
-                payload,
-                counters=counters,
-                call_names=call_names,
-                call_roots=call_roots,
-                call_read_events=call_read_events,
-            )
+    with lines:
+        for order, line in enumerate(lines):
+            if '"response_item"' not in line and '"patch_apply_end"' not in line:
+                continue
+            envelope = _json_envelope(line, meta=meta)
+            if envelope is None:
+                continue
+            payload = envelope.get("payload")
+            if not isinstance(payload, dict):
+                continue
+            if envelope.get("type") == "event_msg":
+                for path_ref in modified_path_refs(payload):
+                    modified_orders_by_path[path_ref["path_key"]].append(order)
+                continue
+            if envelope.get("type") != "response_item":
+                continue
+            if payload.get("type") == "function_call":
+                _record_function_call(
+                    payload,
+                    order=order,
+                    counters=counters,
+                    meta=meta,
+                    call_names=call_names,
+                    call_roots=call_roots,
+                    call_read_events=call_read_events,
+                    source_read_events=source_read_events,
+                )
+            elif payload.get("type") == "function_call_output":
+                _record_function_output(
+                    payload,
+                    counters=counters,
+                    call_names=call_names,
+                    call_roots=call_roots,
+                    call_read_events=call_read_events,
+                )
 
     _mark_later_modifications(
         counters=counters,
diff --git a/src/codex_usage_tracker/diagnostic_snapshot_constants.py b/src/codex_usage_tracker/diagnostic_snapshot_constants.py
index e98ce92..2852510 100644
--- a/src/codex_usage_tracker/diagnostic_snapshot_constants.py
+++ b/src/codex_usage_tracker/diagnostic_snapshot_constants.py
@@ -6,6 +6,7 @@
 DIAGNOSTIC_FILE_READS_SCHEMA = "codex-usage-tracker-diagnostic-file-reads-v1"
 DIAGNOSTIC_READ_PRODUCTIVITY_SCHEMA = "codex-usage-tracker-diagnostic-read-productivity-v1"
 DIAGNOSTIC_CONCENTRATION_SCHEMA = "codex-usage-tracker-diagnostic-concentration-v1"
+DIAGNOSTIC_BATCH_REFRESH_SCHEMA = "codex-usage-tracker-diagnostic-snapshot-refresh-v1"
 DIAGNOSTIC_OVERVIEW_SECTION = "overview"
 DIAGNOSTIC_TOOL_OUTPUT_SECTION = "tool-output"
 DIAGNOSTIC_COMMANDS_SECTION = "commands"
diff --git a/src/codex_usage_tracker/diagnostic_snapshots.py b/src/codex_usage_tracker/diagnostic_snapshots.py
index 774db2b..c962da3 100644
--- a/src/codex_usage_tracker/diagnostic_snapshots.py
+++ b/src/codex_usage_tracker/diagnostic_snapshots.py
@@ -15,6 +15,7 @@
     concentration_privacy_metadata,
 )
 from codex_usage_tracker.diagnostic_snapshot_constants import (
+    DIAGNOSTIC_BATCH_REFRESH_SCHEMA,
     DIAGNOSTIC_COMMANDS_SCHEMA,
     DIAGNOSTIC_COMMANDS_SECTION,
     DIAGNOSTIC_CONCENTRATION_SCHEMA,
@@ -196,6 +197,67 @@ def refresh_diagnostic_overview_snapshot(
     return payload
 
 
+def refresh_diagnostic_snapshots(
+    *,
+    db_path: Path = DEFAULT_DB_PATH,
+    include_archived: bool = False,
+) -> dict[str, Any]:
+    """Recompute and persist all dashboard diagnostic snapshots.
+
+    Source-log-derived sections share one analyzer pass so the dashboard refresh
+    button does not rescan the same logs once per panel.
+    """
+
+    history_scope = _history_scope(include_archived)
+    overview_payload = refresh_diagnostic_overview_snapshot(
+        db_path=db_path,
+        include_archived=include_archived,
+    )
+    computed_at = _utc_now()
+    analysis = analyze_indexed_source_logs(db_path=db_path, include_archived=include_archived)
+    sections = {
+        DIAGNOSTIC_TOOL_OUTPUT_SECTION: DIAGNOSTIC_TOOL_OUTPUT_SCHEMA,
+        DIAGNOSTIC_COMMANDS_SECTION: DIAGNOSTIC_COMMANDS_SCHEMA,
+        DIAGNOSTIC_FILE_READS_SECTION: DIAGNOSTIC_FILE_READS_SCHEMA,
+        DIAGNOSTIC_READ_PRODUCTIVITY_SECTION: DIAGNOSTIC_READ_PRODUCTIVITY_SCHEMA,
+    }
+    source_payloads = {
+        section: _persist_source_log_snapshot(
+            db_path=db_path,
+            section=section,
+            schema=schema,
+            history_scope=history_scope,
+            computed_at=computed_at,
+            analysis=analysis,
+        )
+        for section, schema in sections.items()
+    }
+    concentration_payload = _refresh_concentration_snapshot(
+        db_path=db_path,
+        include_archived=include_archived,
+    )
+    return {
+        "schema": DIAGNOSTIC_BATCH_REFRESH_SCHEMA,
+        "status": "ready",
+        "refreshed": True,
+        "raw_context_included": False,
+        "history_scope": history_scope,
+        "sections": {
+            "overview": overview_payload,
+            "toolOutput": source_payloads[DIAGNOSTIC_TOOL_OUTPUT_SECTION],
+            "commands": source_payloads[DIAGNOSTIC_COMMANDS_SECTION],
+            "fileReads": source_payloads[DIAGNOSTIC_FILE_READS_SECTION],
+            "readProductivity": source_payloads[DIAGNOSTIC_READ_PRODUCTIVITY_SECTION],
+            "concentration": concentration_payload,
+        },
+        "meta": {
+            "source_log_analysis_passes": 1,
+            "source_logs_scanned": analysis["meta"]["source_logs_scanned"],
+            "usage_rows_scanned": analysis["meta"]["usage_rows_scanned"],
+        },
+    }
+
+
 def _build_source_log_snapshot_report(
     *,
     db_path: Path,
@@ -233,6 +295,25 @@ def _refresh_source_log_snapshot(
     history_scope = _history_scope(include_archived)
     computed_at = _utc_now()
     analysis = analyze_indexed_source_logs(db_path=db_path, include_archived=include_archived)
+    return _persist_source_log_snapshot(
+        db_path=db_path,
+        section=section,
+        schema=schema,
+        history_scope=history_scope,
+        computed_at=computed_at,
+        analysis=analysis,
+    )
+
+
+def _persist_source_log_snapshot(
+    *,
+    db_path: Path,
+    section: str,
+    schema: str,
+    history_scope: str,
+    computed_at: str,
+    analysis: dict[str, Any],
+) -> dict[str, Any]:
     snapshot = _snapshot_metadata(
         computed_at=computed_at,
         history_scope=history_scope,
diff --git a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics.js b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics.js
index b95b82f..7a37910 100644
--- a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics.js
+++ b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics.js
@@ -162,7 +162,13 @@
       renderIfActive();
       try {
         const filters = getDiagnosticFilters();
-        const snapshots = await fetchSnapshotPayloads(filters, true);
+        const snapshotFilters = { include_archived: filters?.include_archived || '0' };
+        const refreshPayload = await fetchPayload(
+          '/api/diagnostics/refresh',
+          snapshotFilters,
+          { method: 'POST' },
+        );
+        const snapshots = refreshPayload.sections || {};
         if (signature !== activeSignature) return;
         payloads = { ...payloads, ...snapshots };
         snapshotRefreshStatus = 'ready';
diff --git a/src/codex_usage_tracker/server.py b/src/codex_usage_tracker/server.py
index 79abe1f..19c98cb 100644
--- a/src/codex_usage_tracker/server.py
+++ b/src/codex_usage_tracker/server.py
@@ -42,6 +42,7 @@
     build_diagnostic_overview_report,
     build_diagnostic_read_productivity_report,
     build_diagnostic_tool_output_report,
+    refresh_diagnostic_snapshots,
 )
 from codex_usage_tracker.i18n import normalize_language
 from codex_usage_tracker.paths import (
@@ -346,6 +347,9 @@ def do_POST(self) -> None:  # noqa: N802 - stdlib hook name
         if not self._request_origin_allowed():
             self._send_json(HTTPStatus.FORBIDDEN, {"error": "Request host or origin is not allowed"})
             return
+        if parsed.path == "/api/diagnostics/refresh":
+            self._handle_diagnostics_refresh(parsed.query)
+            return
         if parsed.path == "/api/diagnostics/overview/refresh":
             self._handle_diagnostics_overview_refresh(parsed.query)
             return
@@ -1002,6 +1006,32 @@ def _handle_diagnostics_overview(self, query: str) -> None:
             label="diagnostic overview",
         )
 
+    def _handle_diagnostics_refresh(self, query: str) -> None:
+        params = parse_qs(query)
+        if not self._has_valid_api_token(params):
+            self._send_json(
+                HTTPStatus.FORBIDDEN,
+                {"error": "Valid API token is required for diagnostic refresh"},
+            )
+            return
+        include_archived = _parse_bool(
+            _first(params.get("include_archived")),
+            self._include_archived,
+        )
+        try:
+            with self._refresh_lock:
+                payload = refresh_diagnostic_snapshots(
+                    db_path=self._db_path,
+                    include_archived=include_archived,
+                )
+        except sqlite3.Error as exc:
+            self._send_json(
+                HTTPStatus.INTERNAL_SERVER_ERROR,
+                {"error": f"Database error while refreshing diagnostics: {exc}"},
+            )
+            return
+        self._send_json(HTTPStatus.OK, payload)
+
     def _handle_diagnostics_overview_refresh(self, query: str) -> None:
         self._handle_diagnostic_snapshot(
             query,
diff --git a/tests/test_dashboard_payload.py b/tests/test_dashboard_payload.py
index 9dc9b18..5e73de8 100644
--- a/tests/test_dashboard_payload.py
+++ b/tests/test_dashboard_payload.py
@@ -300,6 +300,7 @@ def test_dashboard_and_csv_are_aggregate_only(tmp_path: Path) -> None:
     assert "/api/diagnostics/tools" in dashboard_diagnostics_js
     assert "/api/diagnostics/compactions" in dashboard_diagnostics_js
     assert "/api/diagnostics/fact-calls" in dashboard_diagnostics_js
+    assert "/api/diagnostics/refresh" in dashboard_diagnostics_js
     assert "dashboard_diagnostics_snapshots.js" in dashboard
     assert "dashboard_diagnostics_facts.js" in dashboard
     assert "/api/diagnostics/overview" in dashboard_diagnostics_snapshots_js
diff --git a/tests/test_dashboard_server.py b/tests/test_dashboard_server.py
index 2a9b38f..9dc3036 100644
--- a/tests/test_dashboard_server.py
+++ b/tests/test_dashboard_server.py
@@ -93,36 +93,21 @@ def test_dashboard_server_usage_api_refreshes_aggregate_rows(tmp_path: Path) ->
             data=b"",
             method="POST",
         )
-        diagnostic_tool_output_refresh_payload = _read_json(
-            f"http://127.0.0.1:{server.server_port}/api/diagnostics/tool-output/refresh",
-            headers={"X-Codex-Usage-Token": "test-token"},
-            data=b"",
-            method="POST",
-        )
-        diagnostic_commands_refresh_payload = _read_json(
-            f"http://127.0.0.1:{server.server_port}/api/diagnostics/commands/refresh",
-            headers={"X-Codex-Usage-Token": "test-token"},
-            data=b"",
-            method="POST",
-        )
-        diagnostic_file_reads_refresh_payload = _read_json(
-            f"http://127.0.0.1:{server.server_port}/api/diagnostics/file-reads/refresh",
-            headers={"X-Codex-Usage-Token": "test-token"},
-            data=b"",
-            method="POST",
-        )
-        diagnostic_read_productivity_refresh_payload = _read_json(
-            f"http://127.0.0.1:{server.server_port}/api/diagnostics/read-productivity/refresh",
-            headers={"X-Codex-Usage-Token": "test-token"},
-            data=b"",
-            method="POST",
-        )
-        diagnostic_concentration_refresh_payload = _read_json(
-            f"http://127.0.0.1:{server.server_port}/api/diagnostics/concentration/refresh",
+        diagnostic_batch_refresh_payload = _read_json(
+            f"http://127.0.0.1:{server.server_port}/api/diagnostics/refresh",
             headers={"X-Codex-Usage-Token": "test-token"},
             data=b"",
             method="POST",
         )
+        diagnostic_tool_output_refresh_payload = diagnostic_batch_refresh_payload["sections"]["toolOutput"]
+        diagnostic_commands_refresh_payload = diagnostic_batch_refresh_payload["sections"]["commands"]
+        diagnostic_file_reads_refresh_payload = diagnostic_batch_refresh_payload["sections"]["fileReads"]
+        diagnostic_read_productivity_refresh_payload = diagnostic_batch_refresh_payload["sections"][
+            "readProductivity"
+        ]
+        diagnostic_concentration_refresh_payload = diagnostic_batch_refresh_payload["sections"][
+            "concentration"
+        ]
         diagnostic_stored_payload = _read_json(
             f"http://127.0.0.1:{server.server_port}/api/diagnostics/overview"
         )
@@ -197,6 +182,11 @@ def test_dashboard_server_usage_api_refreshes_aggregate_rows(tmp_path: Path) ->
     assert diagnostic_refresh_payload["refreshed"] is True
     assert diagnostic_refresh_payload["overview"]["usage_rows"] == 4
     assert diagnostic_refresh_payload["overview"]["total_tokens"] == 400
+    assert diagnostic_batch_refresh_payload["schema"] == (
+        "codex-usage-tracker-diagnostic-snapshot-refresh-v1"
+    )
+    assert diagnostic_batch_refresh_payload["status"] == "ready"
+    assert diagnostic_batch_refresh_payload["meta"]["source_log_analysis_passes"] == 1
     assert (
         diagnostic_tool_output_refresh_payload["schema"]
         == "codex-usage-tracker-diagnostic-tool-output-v1"
diff --git a/tests/test_diagnostic_snapshots.py b/tests/test_diagnostic_snapshots.py
index 16adfe8..70a17b9 100644
--- a/tests/test_diagnostic_snapshots.py
+++ b/tests/test_diagnostic_snapshots.py
@@ -14,6 +14,7 @@
     _write_jsonl,
 )
 
+from codex_usage_tracker import diagnostic_snapshots as diagnostic_snapshot_module
 from codex_usage_tracker.diagnostic_snapshots import (
     DIAGNOSTIC_OVERVIEW_SECTION,
     build_diagnostic_commands_report,
@@ -22,6 +23,7 @@
     build_diagnostic_overview_report,
     build_diagnostic_read_productivity_report,
     build_diagnostic_tool_output_report,
+    refresh_diagnostic_snapshots,
 )
 from codex_usage_tracker.store import (
     query_diagnostic_snapshot,
@@ -111,6 +113,46 @@ def test_usage_refresh_does_not_recompute_diagnostic_overview_snapshot(
     assert stored["payload"]["overview"]["total_tokens"] == 7
 
 
+def test_batch_diagnostic_refresh_shares_source_log_analysis_pass(
+    tmp_path: Path,
+    monkeypatch,
+) -> None:
+    codex_home = _make_codex_home(tmp_path)
+    db_path = tmp_path / "usage.sqlite3"
+    refresh_usage_index(codex_home=codex_home, db_path=db_path)
+    calls = 0
+    original = diagnostic_snapshot_module.analyze_indexed_source_logs
+
+    def counting_analyzer(*args, **kwargs):
+        nonlocal calls
+        calls += 1
+        return original(*args, **kwargs)
+
+    monkeypatch.setattr(
+        diagnostic_snapshot_module,
+        "analyze_indexed_source_logs",
+        counting_analyzer,
+    )
+
+    refreshed = refresh_diagnostic_snapshots(db_path=db_path)
+    stored_file_reads = build_diagnostic_file_reads_report(db_path=db_path).payload
+    stored_read_productivity = build_diagnostic_read_productivity_report(db_path=db_path).payload
+
+    assert calls == 1
+    assert refreshed["schema"] == "codex-usage-tracker-diagnostic-snapshot-refresh-v1"
+    assert refreshed["meta"]["source_log_analysis_passes"] == 1
+    assert refreshed["sections"]["overview"]["status"] == "ready"
+    assert refreshed["sections"]["toolOutput"]["status"] == "ready"
+    assert refreshed["sections"]["commands"]["status"] == "ready"
+    assert refreshed["sections"]["fileReads"]["status"] == "ready"
+    assert refreshed["sections"]["readProductivity"]["status"] == "ready"
+    assert refreshed["sections"]["concentration"]["status"] == "ready"
+    assert stored_file_reads["status"] == "ready"
+    assert stored_file_reads["refreshed"] is False
+    assert stored_read_productivity["status"] == "ready"
+    assert stored_read_productivity["refreshed"] is False
+
+
 def test_tool_output_and_command_snapshots_use_safe_aggregate_labels(
     tmp_path: Path,
 ) -> None:

From 2ff52f3a5a3167102ec177be7bd98c4ab37e424f Mon Sep 17 00:00:00 2001
From: Monsky <douglas.monsky@gmail.com>
Date: Sat, 20 Jun 2026 20:17:43 -0400
Subject: [PATCH 08/10] fix: isolate diagnostics from live refresh

---
 .../plugin_data/dashboard/dashboard_live.js   | 25 +++---
 tests/test_dashboard_live.py                  | 89 +++++++++++++++++++
 2 files changed, 104 insertions(+), 10 deletions(-)

diff --git a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_live.js b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_live.js
index 932acdd..1d8722e 100644
--- a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_live.js
+++ b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_live.js
@@ -44,6 +44,10 @@
     let rowHydrationRestartRequested = false;
     let autoRefreshTimer = null;
 
+    function isUsageRefreshView() {
+      return !['call', 'diagnostics'].includes(activeView());
+    }
+
     function loadedRowsDescription() {
       const data = getData();
       const loaded = number.format(data.length);
@@ -71,7 +75,7 @@
       if (!rowLoadProgressEl) return;
       const target = rowHydrationTarget();
       const loaded = Math.min(getData().length, target || getData().length);
-      const shouldShow = !['call', 'diagnostics'].includes(activeView()) && liveRefreshSupported && (rowHydrationInFlight || rowsNeedHydration() || rowHydrationError);
+      const shouldShow = isUsageRefreshView() && liveRefreshSupported && (rowHydrationInFlight || rowsNeedHydration() || rowHydrationError);
       rowLoadProgressEl.hidden = !shouldShow;
       if (!shouldShow) return;
       const totalText = number.format(target || getTotalAvailableRows() || loaded);
@@ -119,7 +123,7 @@
     }
 
     async function hydrateDashboardRows(options = null) {
-      if (!liveRefreshSupported || ['call', 'diagnostics'].includes(activeView())) return;
+      if (!liveRefreshSupported || !isUsageRefreshView()) return;
       const hydrateOptions = options || {};
       if (rowHydrationInFlight) {
         if (hydrateOptions.reset) rowHydrationRestartRequested = true;
@@ -150,7 +154,7 @@
       updateLiveStatus('status.checking', t('live.loading_rows'));
       updateRowLoadProgress();
       try {
-        while (getData().length < target && generation === rowHydrationGeneration && !['call', 'diagnostics'].includes(activeView())) {
+        while (getData().length < target && generation === rowHydrationGeneration && isUsageRefreshView()) {
           const offset = getData().length;
           const remaining = target - offset;
           const chunkSize = Math.min(
@@ -174,7 +178,7 @@
           if (!response.ok) throw new Error(`HTTP ${response.status}`);
           const payload = await response.json();
           if (payload.error) throw new Error(payload.error);
-          if (generation !== rowHydrationGeneration || ['call', 'diagnostics'].includes(activeView())) break;
+          if (generation !== rowHydrationGeneration || !isUsageRefreshView()) break;
           const rows = payloadRows(payload);
           if (!rows.length) break;
           applyDashboardPayload(payload, { appendRows: true });
@@ -189,7 +193,7 @@
       } finally {
         rowHydrationInFlight = false;
         updateRowLoadProgress();
-        const shouldRestart = rowHydrationRestartRequested && !['call', 'diagnostics'].includes(activeView());
+        const shouldRestart = rowHydrationRestartRequested && isUsageRefreshView();
         rowHydrationRestartRequested = false;
         if (shouldRestart) {
           hydrateDashboardRows();
@@ -200,7 +204,7 @@
     }
 
     async function refreshDashboardIfStale() {
-      if (!liveRefreshSupported || !apiToken() || ['call', 'diagnostics'].includes(activeView())) return;
+      if (!liveRefreshSupported || !apiToken() || !isUsageRefreshView()) return;
       try {
         const params = new URLSearchParams({
           include_archived: getIncludeArchived() ? '1' : '0',
@@ -229,7 +233,7 @@
     }
 
     async function refreshDashboardLive() {
-      if (!liveRefreshSupported || !apiToken() || activeView() === 'call') return;
+      if (!liveRefreshSupported || !apiToken() || !isUsageRefreshView()) return;
       if (refreshInFlight) return;
       const previousTotal = Number(getTotalAvailableRows() || getData().length || 0);
       refreshInFlight = true;
@@ -254,12 +258,13 @@
         if (!shellResponse.ok) throw new Error(`HTTP ${shellResponse.status}`);
         const shellPayload = await shellResponse.json();
         if (shellPayload.error) throw new Error(shellPayload.error);
+        if (!isUsageRefreshView()) return;
 
         const nextTotal = Number(shellPayload.total_available_rows || previousTotal);
         const newRows = Math.max(0, nextTotal - previousTotal);
         applyDashboardPayload(shellPayload, { preserveRows: true });
 
-        if (activeView() !== 'diagnostics' && newRows > 0) {
+        if (newRows > 0) {
           const loadedLimit = getLoadedLimit();
           const visibleTarget = loadedLimit === null ? nextTotal : Math.min(nextTotal, Number(loadedLimit || nextTotal));
           const rowsToFetch = Math.max(0, Math.min(newRows, visibleTarget || newRows));
@@ -285,7 +290,7 @@
           }
           rowHydrationComplete = getData().length >= rowHydrationTarget();
           updateRowLoadProgress();
-        } else if (activeView() !== 'diagnostics' && rowsNeedHydration()) {
+        } else if (rowsNeedHydration()) {
           hydrateDashboardRows();
         }
 
@@ -373,7 +378,7 @@
     function scheduleAutoRefresh() {
       if (autoRefreshTimer) window.clearInterval(autoRefreshTimer);
       autoRefreshTimer = null;
-      if (!autoRefreshEl.checked || !liveRefreshSupported || activeView() === 'call') return;
+      if (!autoRefreshEl.checked || !liveRefreshSupported || !isUsageRefreshView()) return;
       autoRefreshTimer = window.setInterval(() => {
         if (document.visibilityState === 'visible') refreshDashboardLive();
       }, liveRefreshIntervalMs);
diff --git a/tests/test_dashboard_live.py b/tests/test_dashboard_live.py
index f470434..a25df3c 100644
--- a/tests/test_dashboard_live.py
+++ b/tests/test_dashboard_live.py
@@ -128,6 +128,95 @@ def test_dashboard_live_allows_diagnostics_bootstrap_refresh() -> None:
     assert payload["statusKeys"] == ["status.checking", "status.updated"]
 
 
+def test_dashboard_live_skips_diagnostics_auto_refresh_cycle() -> None:
+    payload = _run_dashboard_live_script(
+        """
+(async () => {
+  const calls = [];
+  const statusUpdates = [];
+  const appliedPayloads = [];
+  let scheduledIntervals = 0;
+  context.window.setInterval = () => {
+    scheduledIntervals += 1;
+    return 1;
+  };
+  context.window.clearInterval = () => {};
+  globalThis.__fetch = async (url, options) => {
+    calls.push({ url, headers: options.headers });
+    return {
+      ok: true,
+      json: async () => ({
+        rows: [],
+        refreshed_at: '2026-06-19T00:00:00Z',
+        refresh_result: {
+          inserted_or_updated_events: 1,
+          scanned_files: 1,
+          skipped_events: 0,
+        },
+        total_available_rows: 1,
+      }),
+    };
+  };
+  const refreshDashboardEl = { disabled: false };
+  const runtime = factory.create({
+    activeView: () => 'diagnostics',
+    apiToken: () => 'test-token',
+    applyDashboardPayload: payload => appliedPayloads.push(payload),
+    autoRefreshEl: { checked: true },
+    backgroundHydrationChunkSize: 2000,
+    formatTimestamp: value => value,
+    getArchivedAvailableRows: () => 0,
+    getData: () => [],
+    getIncludeArchived: () => false,
+    getLoadedLimit: () => null,
+    getTotalAvailableRows: () => 1,
+    historyScopeEl: { value: 'active', parentElement: {} },
+    i18n: { currentLanguage: 'en' },
+    initialHydrationChunkSize: 500,
+    latestRefreshAt: () => '',
+    limitValue: value => value === null ? 'all' : String(value),
+    liveRefreshIntervalMs: 10000,
+    liveRefreshSupported: true,
+    loadLimitEl: { value: '5000', options: [], lastElementChild: null, insertBefore: () => {} },
+    number: new Intl.NumberFormat('en-US'),
+    payloadRows: payload => payload.rows || [],
+    rebuildDashboardIndexes: () => {},
+    rebuildFilterOptions: () => {},
+    refreshDashboardEl,
+    render: () => {},
+    resetRowsForHydration: () => {},
+    rowLoadProgressBarEl: { style: {} },
+    rowLoadProgressCountEl: { textContent: '' },
+    rowLoadProgressEl: { hidden: true },
+    rowLoadProgressLabelEl: { textContent: '' },
+    setFastTooltip: () => {},
+    t: key => key,
+    tf: (key, values = {}) => `${key}:${JSON.stringify(values)}`,
+    updateLiveStatus: (key, detail) => statusUpdates.push({ key, detail }),
+  });
+  runtime.scheduleAutoRefresh();
+  await runtime.refreshDashboardLive();
+  console.log(JSON.stringify({
+    fetchCount: calls.length,
+    appliedCount: appliedPayloads.length,
+    statusKeys: statusUpdates.map(entry => entry.key),
+    scheduledIntervals,
+    refreshDisabled: refreshDashboardEl.disabled,
+  }));
+})().catch(error => {
+  console.error(error);
+  process.exit(1);
+});
+"""
+    )
+
+    assert payload["fetchCount"] == 0
+    assert payload["appliedCount"] == 0
+    assert payload["statusKeys"] == []
+    assert payload["scheduledIntervals"] == 0
+    assert payload["refreshDisabled"] is False
+
+
 def test_dashboard_live_prepends_new_rows_after_cached_index_refresh() -> None:
     payload = _run_dashboard_live_script(
         """

From d60fbd421aa1e0f62305904cbfab5d1a82d9b0e1 Mon Sep 17 00:00:00 2001
From: Monsky <douglas.monsky@gmail.com>
Date: Sat, 20 Jun 2026 20:47:11 -0400
Subject: [PATCH 09/10] fix: expand diagnostic command children

---
 docs/cli-json-schemas.md                      |   4 +-
 docs/cli-reference.md                         |   2 +-
 docs/privacy.md                               |   2 +-
 .../dashboard_diagnostics_snapshots.js        |  68 +++++++-
 .../dashboard/dashboard_tables.css            |  47 +++++
 tests/test_dashboard_diagnostics_snapshots.py | 160 ++++++++++++++++++
 6 files changed, 271 insertions(+), 12 deletions(-)
 create mode 100644 tests/test_dashboard_diagnostics_snapshots.py

diff --git a/docs/cli-json-schemas.md b/docs/cli-json-schemas.md
index d604e19..6cde68f 100644
--- a/docs/cli-json-schemas.md
+++ b/docs/cli-json-schemas.md
@@ -412,7 +412,7 @@ Schema: `codex-usage-tracker-diagnostic-commands-v1`
 }
 ```
 
-The commands snapshot keeps only command roots and safe one-level child labels such as `status`, `diff`, or `-m:pytest`.
+The commands snapshot keeps only command roots and a bounded list of safe one-level child labels such as `status`, `diff`, or `-m:pytest`.
 
 ## Diagnostic File Reads Snapshot
 
@@ -538,7 +538,7 @@ Schema: `codex-usage-tracker-diagnostic-concentration-v1`
 }
 ```
 
-The concentration snapshot computes top-1/top-3/top-5 share and effective group count by source log/session, cwd/project label, and day. Source log labels use session-id prefixes or source hashes, cwd labels use basename-only labels, and raw source paths/cwd paths are not included.
+The concentration snapshot computes top-1/top-3/top-5 share and effective group count by source log/session, cwd/project label, and day. Metric ids such as `top_1_source_log_share` are stable JSON contract fields; dashboard views should render them as reader-facing labels. Source log labels use session-id prefixes or source hashes, cwd labels use basename-only labels, and raw source paths/cwd paths are not included.
 
 ## Pricing Coverage
 
diff --git a/docs/cli-reference.md b/docs/cli-reference.md
index 341dc9a..8d6c322 100644
--- a/docs/cli-reference.md
+++ b/docs/cli-reference.md
@@ -133,7 +133,7 @@ Diagnostics expose structured event patterns and their associated token totals.
 
 Snapshot diagnostics are persisted aggregate reports. Without `--refresh`, snapshot commands return the latest stored payload or a `missing` status. With `--refresh`, they recompute from indexed source logs and replace the stored section snapshot. Ordinary `refresh`, `open-dashboard`, and dashboard `Refresh` update usage rows only; they do not recompute diagnostic snapshots.
 
-The snapshot sections answer different questions: `overview` summarizes usage rows and aggregate token totals, `tool-output` counts functions and terminal `Original token count` coverage, `commands` keeps command roots plus one safe child label, `file-reads` counts reader/path activity and allocated read-output tokens, `read-productivity` reports later-edit correlations for matching path keys, and `concentration` shows top-N token share by source/session, cwd/project, and day.
+The snapshot sections answer different questions: `overview` summarizes usage rows and aggregate token totals, `tool-output` counts functions and terminal `Original token count` coverage, `commands` keeps command roots plus bounded safe child labels, `file-reads` counts reader/path activity and allocated read-output tokens, `read-productivity` reports later-edit correlations for matching path keys, and `concentration` shows top-N token share by source/session, cwd/project, and day.
 
 Diagnostic payloads are aggregate-only. They do not include prompts, assistant text, tool arguments, tool output, patch text, raw commands, command arguments, file contents, raw absolute paths, or JSONL fragments. File-read diagnostics use basename-only path labels plus short irreversible hashes, read-productivity percentages are temporal correlations rather than proof that a read caused a later edit, and concentration reports use safe source/session, cwd, and day labels only.
 
diff --git a/docs/privacy.md b/docs/privacy.md
index 39faa33..94eea7a 100644
--- a/docs/privacy.md
+++ b/docs/privacy.md
@@ -35,7 +35,7 @@ Call-origin metadata is heuristic and confidence-labeled. It stores categories s
 
 Diagnostic facts follow the same aggregate-only rule. They can store safe structured labels such as `patch_applied`, `function_call_output`, `post_compaction`, MCP tool/server labels, structured skill labels, and command families such as `pytest`, `git`, or `unknown_command`, along with event counts and source line ranges. Command text may be classified in memory during parsing, but it is not persisted. Diagnostic facts do not store tool arguments, command text, command output, patch text, prompt or assistant text, file contents, raw JSONL fragments, or raw context evidence.
 
-On-demand diagnostic snapshots follow the same boundary. Tool-output snapshots use terminal wrapper metadata such as `Original token count` when present and persist only counts, coverage gaps, and safe function/command labels. Command snapshots keep command roots plus one conservative child label. File-read snapshots classify common read commands and path scans, but persist only counters, reader families, basename-only path labels, and short irreversible path hashes. They do not persist raw absolute paths, raw command strings, command arguments, file contents, tool output, or patch text. Read-productivity snapshots report only temporal read-to-modify correlations for matching path keys in the same source log; they do not claim causation. Concentration snapshots group by safe source/session, cwd, and day labels and do not expose raw source-log or cwd paths.
+On-demand diagnostic snapshots follow the same boundary. Tool-output snapshots use terminal wrapper metadata such as `Original token count` when present and persist only counts, coverage gaps, and safe function/command labels. Command snapshots keep command roots plus a bounded list of conservative one-level child labels. File-read snapshots classify common read commands and path scans, but persist only counters, reader families, basename-only path labels, and short irreversible path hashes. They do not persist raw absolute paths, raw command strings, command arguments, file contents, tool output, or patch text. Read-productivity snapshots report only temporal read-to-modify correlations for matching path keys in the same source log; they do not claim causation. Concentration snapshots group by safe source/session, cwd, and day labels and do not expose raw source-log or cwd paths.
 
 Diagnostic snapshots are not live recomputed during ordinary dashboard or usage refresh. Stored snapshots can be displayed without rescanning source logs, and recomputation requires an explicit diagnostics `--refresh` command, the batched localhost `/api/diagnostics/refresh` request, or a targeted `/api/diagnostics/<section>/refresh` request.
 
diff --git a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics_snapshots.js b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics_snapshots.js
index 843626c..213c9f0 100644
--- a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics_snapshots.js
+++ b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics_snapshots.js
@@ -114,15 +114,38 @@
     function renderCommands(payload) {
       const commands = Array.isArray(payload?.commands) ? payload.commands.slice(0, 10) : [];
       return renderSimpleTable(
-        ['Root', 'Total', 'Top child'],
-        commands.map(row => {
-          const child = Array.isArray(row.children) && row.children[0] ? row.children[0] : null;
-          return [row.root, tokenText(row.total), child ? `${child.child} (${tokenText(child.count)})` : '<none>'];
-        }),
+        ['Root', 'Total', 'Children'],
+        commands.map(row => [
+          row.root,
+          tokenText(row.total),
+          { html: renderCommandChildren(row.children), numeric: false },
+        ]),
         'No command rows in this snapshot.',
       );
     }
 
+    function renderCommandChildren(children) {
+      const rows = Array.isArray(children) ? children : [];
+      if (!rows.length) {
+        return `<span class="diagnostics-muted">${escapeHtml('<none>')}</span>`;
+      }
+      const childCount = rows.length;
+      const label = `${tokenText(childCount)} ${childCount === 1 ? 'child' : 'children'}`;
+      return `
+        <details class="diagnostics-command-children">
+          <summary>${escapeHtml(label)}</summary>
+          <ul>
+            ${rows.map(child => `
+              <li>
+                <span>${escapeHtml(child.child || '<child>')}</span>
+                <b>${tokenText(child.count)}</b>
+              </li>
+            `).join('')}
+          </ul>
+        </details>
+      `;
+    }
+
     function renderFileReads(payload) {
       const byReader = Array.isArray(payload?.by_reader) ? payload.by_reader.slice(0, 8) : [];
       const paths = Array.isArray(payload?.top_paths) ? payload.top_paths.slice(0, 8) : [];
@@ -173,13 +196,13 @@
         ${renderSimpleTable(
           ['Metric', 'Share'],
           metrics.filter(row => row.top_n === 1 || row.top_n === 3 || row.top_n === 5)
-            .map(row => [row.metric, pct(row.share)]),
+            .map(row => [concentrationMetricLabel(row), pct(row.share)]),
           'No concentration metrics in this snapshot.',
         )}
         ${renderSimpleTable(
           ['Dimension', 'Label', 'Share', 'Largest'],
           impacts.map(row => [
-            row.dimension,
+            concentrationDimensionLabel(row.dimension),
             row.label,
             pct(row.share),
             row.largest_record_id ? { html: rowInvestigatorLink({ record_id: row.largest_record_id }, tokenText(row.largest_call_tokens), true) } : tokenText(row.largest_call_tokens),
@@ -248,7 +271,7 @@
       if (!rows.length) return renderState(emptyMessage);
       const head = headers.map(header => `<th>${escapeHtml(header)}</th>`).join('');
       const body = rows.map(row => `
-        <tr>${row.map((cell, index) => `<td${index > 0 ? ' class="num"' : ''}>${cellHtml(cell)}</td>`).join('')}</tr>
+        <tr>${row.map((cell, index) => `<td${cellNumeric(cell, index) ? ' class="num"' : ''}>${cellHtml(cell)}</td>`).join('')}</tr>
       `).join('');
       return `
         <div class="diagnostics-table-wrap diagnostics-mini-table-wrap">
@@ -266,12 +289,41 @@
       return escapeHtml(String(value));
     }
 
+    function cellNumeric(value, index) {
+      if (index === 0) return false;
+      if (typeof value === 'object' && value && value.numeric === false) return false;
+      return true;
+    }
+
     function pathLabel(row) {
       const label = row.path_label || 'path';
       const hash = row.path_hash ? ` · ${String(row.path_hash).slice(0, 6)}` : '';
       return `${label}${hash}`;
     }
 
+    function concentrationMetricLabel(row) {
+      const topN = Number(row?.top_n || 0);
+      const dimension = concentrationDimensionLabel(row?.dimension);
+      if (topN > 0 && dimension) return `Top ${topN} ${dimension.toLowerCase()} share`;
+      return humanizeMetric(row?.metric || 'metric');
+    }
+
+    function concentrationDimensionLabel(value) {
+      return {
+        source_log: 'Source/session',
+        cwd: 'Project/cwd',
+        day: 'Day',
+      }[value] || humanizeMetric(value || '');
+    }
+
+    function humanizeMetric(value) {
+      return String(value || '')
+        .split('_')
+        .filter(Boolean)
+        .map(part => part.slice(0, 1).toUpperCase() + part.slice(1))
+        .join(' ');
+    }
+
     return {
       historyScope,
       latestComputed,
diff --git a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_tables.css b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_tables.css
index 0ee6a65..c62955d 100644
--- a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_tables.css
+++ b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_tables.css
@@ -363,6 +363,53 @@
     .diagnostics-mini-table-wrap + .diagnostics-mini-table-wrap {
       margin-top: 10px;
     }
+    .diagnostics-muted {
+      color: var(--muted);
+      font-weight: 700;
+    }
+    .diagnostics-command-children {
+      text-align: left;
+    }
+    .diagnostics-command-children summary {
+      display: inline-flex;
+      align-items: center;
+      gap: 6px;
+      min-height: 24px;
+      color: var(--ink);
+      font-weight: 760;
+      cursor: pointer;
+    }
+    .diagnostics-command-children summary:focus-visible {
+      outline: 2px solid var(--blue);
+      outline-offset: 2px;
+      border-radius: 4px;
+    }
+    .diagnostics-command-children ul {
+      display: grid;
+      gap: 4px;
+      margin: 6px 0 0;
+      padding: 0;
+      list-style: none;
+    }
+    .diagnostics-command-children li {
+      display: flex;
+      align-items: baseline;
+      justify-content: space-between;
+      gap: 12px;
+      color: var(--muted);
+      font-size: 12px;
+      line-height: 1.35;
+    }
+    .diagnostics-command-children li span {
+      min-width: 0;
+      overflow-wrap: anywhere;
+      text-align: left;
+    }
+    .diagnostics-command-children li b {
+      flex: 0 0 auto;
+      color: var(--ink);
+      font-variant-numeric: tabular-nums;
+    }
     .diagnostics-facts-table {
       min-width: 1320px;
     }
diff --git a/tests/test_dashboard_diagnostics_snapshots.py b/tests/test_dashboard_diagnostics_snapshots.py
new file mode 100644
index 0000000..24f5b51
--- /dev/null
+++ b/tests/test_dashboard_diagnostics_snapshots.py
@@ -0,0 +1,160 @@
+from __future__ import annotations
+
+import json
+import shutil
+import subprocess
+from pathlib import Path
+
+import pytest
+
+
+def _run_snapshot_renderer_script(script: str) -> dict[str, object]:
+    node = shutil.which("node")
+    if node is None:
+        pytest.skip("node is required for dashboard diagnostic snapshot renderer tests")
+    repo_root = Path(__file__).resolve().parents[1]
+    script_path = (
+        repo_root
+        / "src"
+        / "codex_usage_tracker"
+        / "plugin_data"
+        / "dashboard"
+        / "dashboard_diagnostics_snapshots.js"
+    )
+    wrapped = f"""
+const fs = require('fs');
+const vm = require('vm');
+const code = fs.readFileSync({json.dumps(str(script_path))}, 'utf8');
+const context = {{
+  window: {{}},
+  console,
+}};
+vm.createContext(context);
+vm.runInContext(code, context);
+const factory = context.window.CodexUsageDashboardDiagnosticSnapshots;
+function escapeHtml(value) {{
+  return String(value)
+    .replace(/&/g, '&amp;')
+    .replace(/</g, '&lt;')
+    .replace(/>/g, '&gt;')
+    .replace(/"/g, '&quot;')
+    .replace(/'/g, '&#39;');
+}}
+{script}
+"""
+    result = subprocess.run(
+        [node, "-e", wrapped],
+        check=True,
+        capture_output=True,
+        text=True,
+    )
+    return json.loads(result.stdout)
+
+
+def test_dashboard_commands_snapshot_renders_collapsible_children() -> None:
+    payload = _run_snapshot_renderer_script(
+        """
+const renderer = factory.create({
+  escapeHtml,
+  formatTimestamp: value => value,
+  number: new Intl.NumberFormat('en-US'),
+  pct: value => `${value}%`,
+  renderState: message => `<div>${escapeHtml(message)}</div>`,
+  rowInvestigatorLink: () => '<a>call</a>',
+  tokenText: value => new Intl.NumberFormat('en-US').format(Number(value || 0)),
+});
+const html = renderer.renderPanels({
+  loading: false,
+  payloads: {
+    commands: {
+      status: 'ready',
+      refreshed: false,
+      snapshot: {
+        computed_at: '2026-06-20T00:00:00Z',
+        history_scope: 'active',
+        source_logs_scanned: 1,
+      },
+      commands: [
+        {
+          root: 'git',
+          total: 3,
+          children: [
+            { child: 'status', count: 2 },
+            { child: 'diff', count: 1 },
+          ],
+        },
+      ],
+    },
+  },
+});
+console.log(JSON.stringify({
+  hasDetails: html.includes('<details class="diagnostics-command-children">'),
+  hasSummary: html.includes('2 children'),
+  hasFirstChild: html.includes('status') && html.includes('<b>2</b>'),
+  hasSecondChild: html.includes('diff') && html.includes('<b>1</b>'),
+  hasTopChildColumn: html.includes('Top child'),
+}));
+"""
+    )
+
+    assert payload["hasDetails"] is True
+    assert payload["hasSummary"] is True
+    assert payload["hasFirstChild"] is True
+    assert payload["hasSecondChild"] is True
+    assert payload["hasTopChildColumn"] is False
+
+
+def test_dashboard_concentration_snapshot_renders_reader_facing_labels() -> None:
+    payload = _run_snapshot_renderer_script(
+        """
+const renderer = factory.create({
+  escapeHtml,
+  formatTimestamp: value => value,
+  number: new Intl.NumberFormat('en-US'),
+  pct: value => `${Math.round(Number(value || 0) * 100)}%`,
+  renderState: message => `<div>${escapeHtml(message)}</div>`,
+  rowInvestigatorLink: () => '<a>1,000</a>',
+  tokenText: value => new Intl.NumberFormat('en-US').format(Number(value || 0)),
+});
+const html = renderer.renderPanels({
+  loading: false,
+  payloads: {
+    concentration: {
+      status: 'ready',
+      refreshed: false,
+      snapshot: {
+        computed_at: '2026-06-20T00:00:00Z',
+        history_scope: 'active',
+        source_logs_scanned: 1,
+      },
+      metrics: [
+        { metric: 'top_1_source_log_share', dimension: 'source_log', top_n: 1, share: 0.5 },
+        { metric: 'top_3_cwd_share', dimension: 'cwd', top_n: 3, share: 0.9 },
+      ],
+      largest_impact_rows: [
+        {
+          dimension: 'source_log',
+          label: 'session:019e37d3',
+          share: 0.5,
+          largest_record_id: 'r1',
+          largest_call_tokens: 1000,
+        },
+      ],
+    },
+  },
+});
+console.log(JSON.stringify({
+  hasSourceMetricLabel: html.includes('Top 1 source/session share'),
+  hasProjectMetricLabel: html.includes('Top 3 project/cwd share'),
+  hasDimensionLabel: html.includes('Source/session'),
+  hasSafeSourceLabel: html.includes('session:019e37d3'),
+  leaksMetricId: html.includes('top_1_source_log_share'),
+}));
+"""
+    )
+
+    assert payload["hasSourceMetricLabel"] is True
+    assert payload["hasProjectMetricLabel"] is True
+    assert payload["hasDimensionLabel"] is True
+    assert payload["hasSafeSourceLabel"] is True
+    assert payload["leaksMetricId"] is False

From 9361f32fe438bac2bb956553c08cc7f527698011 Mon Sep 17 00:00:00 2001
From: Monsky <douglas.monsky@gmail.com>
Date: Sat, 20 Jun 2026 21:07:35 -0400
Subject: [PATCH 10/10] fix: clarify command child expansion

---
 .../dashboard_diagnostics_snapshots.js        |  6 +++-
 .../dashboard/dashboard_tables.css            | 34 +++++++++++++++++++
 tests/test_dashboard_diagnostics_snapshots.py |  8 +++--
 3 files changed, 45 insertions(+), 3 deletions(-)

diff --git a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics_snapshots.js b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics_snapshots.js
index 213c9f0..f6f99e3 100644
--- a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics_snapshots.js
+++ b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics_snapshots.js
@@ -133,7 +133,11 @@
       const label = `${tokenText(childCount)} ${childCount === 1 ? 'child' : 'children'}`;
       return `
         <details class="diagnostics-command-children">
-          <summary>${escapeHtml(label)}</summary>
+          <summary>
+            <span class="diagnostics-command-toggle-icon" aria-hidden="true"></span>
+            <span class="diagnostics-command-toggle-closed">${escapeHtml(`Show all ${label}`)}</span>
+            <span class="diagnostics-command-toggle-open">${escapeHtml(`Hide ${label}`)}</span>
+          </summary>
           <ul>
             ${rows.map(child => `
               <li>
diff --git a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_tables.css b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_tables.css
index c62955d..fdc4a10 100644
--- a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_tables.css
+++ b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_tables.css
@@ -379,11 +379,45 @@
       font-weight: 760;
       cursor: pointer;
     }
+    .diagnostics-command-children summary::marker {
+      content: "";
+    }
+    .diagnostics-command-children summary::-webkit-details-marker {
+      display: none;
+    }
     .diagnostics-command-children summary:focus-visible {
       outline: 2px solid var(--blue);
       outline-offset: 2px;
       border-radius: 4px;
     }
+    .diagnostics-command-toggle-icon {
+      display: inline-grid;
+      width: 18px;
+      height: 18px;
+      place-items: center;
+      border: 1px solid var(--line);
+      border-radius: 4px;
+      background: #ffffff;
+      color: var(--blue);
+      font-size: 13px;
+      font-weight: 850;
+      line-height: 1;
+    }
+    .diagnostics-command-toggle-icon::before {
+      content: "+";
+    }
+    .diagnostics-command-children[open] .diagnostics-command-toggle-icon::before {
+      content: "-";
+    }
+    .diagnostics-command-toggle-open {
+      display: none;
+    }
+    .diagnostics-command-children[open] .diagnostics-command-toggle-closed {
+      display: none;
+    }
+    .diagnostics-command-children[open] .diagnostics-command-toggle-open {
+      display: inline;
+    }
     .diagnostics-command-children ul {
       display: grid;
       gap: 4px;
diff --git a/tests/test_dashboard_diagnostics_snapshots.py b/tests/test_dashboard_diagnostics_snapshots.py
index 24f5b51..b25a74a 100644
--- a/tests/test_dashboard_diagnostics_snapshots.py
+++ b/tests/test_dashboard_diagnostics_snapshots.py
@@ -89,7 +89,9 @@ def test_dashboard_commands_snapshot_renders_collapsible_children() -> None:
 });
 console.log(JSON.stringify({
   hasDetails: html.includes('<details class="diagnostics-command-children">'),
-  hasSummary: html.includes('2 children'),
+  hasShowSummary: html.includes('Show all 2 children'),
+  hasHideSummary: html.includes('Hide 2 children'),
+  hasToggleIcon: html.includes('diagnostics-command-toggle-icon'),
   hasFirstChild: html.includes('status') && html.includes('<b>2</b>'),
   hasSecondChild: html.includes('diff') && html.includes('<b>1</b>'),
   hasTopChildColumn: html.includes('Top child'),
@@ -98,7 +100,9 @@ def test_dashboard_commands_snapshot_renders_collapsible_children() -> None:
     )
 
     assert payload["hasDetails"] is True
-    assert payload["hasSummary"] is True
+    assert payload["hasShowSummary"] is True
+    assert payload["hasHideSummary"] is True
+    assert payload["hasToggleIcon"] is True
     assert payload["hasFirstChild"] is True
     assert payload["hasSecondChild"] is True
     assert payload["hasTopChildColumn"] is False