douglasmonsky · douglasmonsky · Jun 20, 2026 · Jun 18, 2026 · Jun 19, 2026 · Jun 19, 2026
diff --git a/README.md b/README.md
@@ -156,7 +156,7 @@ The tracker cannot read your logged-in ChatGPT plan or live remaining usage auto
 
 - Local SQLite index at `~/.codex-usage-tracker/usage.sqlite3`.
 - Static dashboard generation plus localhost live refresh.
-- `Insights`, `Calls`, and `Threads` dashboard views.
+- `Insights`, `Calls`, `Threads`, and `Diagnostics` dashboard views.
 - Active-only dashboards by default, with an explicit `All history` toggle for archived sessions.
 - CLI summaries, queries, CSV export, dashboard generation, doctor checks, and support bundles.
 - MCP tools for Codex sessions that want to query local usage data.

diff --git a/docs/architecture.md b/docs/architecture.md
@@ -4,7 +4,7 @@ Codex Usage Tracker is a local sidecar app. It reads aggregate token counters fr
 
 ## Boundaries
 
-- `parser.py` converts local JSONL events into aggregate `UsageEvent` records. It also attaches metadata-only call-origin categories, archived-session flags, and conservative thread keys. It must not persist prompts, assistant text, tool output, or transcript snippets.
+- `parser.py` converts local JSONL events into aggregate `UsageEvent` records. It also attaches metadata-only call-origin categories, diagnostic facts from `diagnostic_facts.py`, archived-session flags, and conservative thread keys. It must not persist prompts, assistant text, tool output, command text, patch text, or transcript snippets.
 - `call_origin.py` owns the pure call-origin classifier and migrated-row fallback. It must not open source JSONL files; source-log reads belong in parser refresh or explicit context loading only.
 - `schema.py` owns persisted `usage_events` columns. Add columns there before changing SQLite migrations or export behavior.
 - `store.py` owns SQLite setup, refresh, rebuild, query access, persisted per-thread previous/next call links, materialized thread summaries, source-file refresh cursors, and SQL-backed live dashboard API slices. Keep filesystem scanning, database writes, SQL prefilters, counts, limits, offsets, and incremental refresh decisions here.
@@ -13,7 +13,7 @@ Codex Usage Tracker is a local sidecar app. It reads aggregate token counters fr
 - `costing.py`, `pricing_config.py`, `pricing_openai.py`, `pricing_estimates.py`, and `allowance.py` own cost, credit, rate-card, and allowance annotation. Keep estimate confidence and source metadata attached to rows.
 - `projects.py`, `threads.py`, and `recommendations.py` annotate aggregate rows with project identity, thread relationships, and actionable signals. Project privacy redaction also belongs in `projects.py` so CLI, MCP, dashboard, CSV, and support-bundle surfaces share the same behavior.
 - `dashboard.py` builds aggregate-only static dashboard payloads and writes HTML/assets. `server.py` adds localhost refresh, the compatibility `/api/usage` endpoint, SQL-backed live API slices, and explicit lazy context loading.
-- `plugin_data/dashboard/dashboard_format.js` owns dashboard formatting primitives. `dashboard_data.js` owns row payload and thread relationship helpers. `dashboard_analysis.js` owns scoring, sorting, recommendation, and thread grouping logic. `dashboard_cells.js` owns reusable table/cell HTML helpers. `dashboard_details.js` owns sidebar detail and thread narrative rendering. `dashboard_insights.js` owns insight cards and investigation preset UI. `dashboard_tables.js` owns Calls, Threads, and expanded thread-call table rendering. `dashboard_filters.js` owns date range parsing and row date matching. `dashboard_state.js` owns URL, CSV, and download state utilities. `dashboard_i18n.js`, `dashboard_payload_cache.js`, and `dashboard_tooltips.js` own localization, session aggregate cache, and fast tooltip helpers. `dashboard_call_investigator.js` owns the dedicated call drilldown surface. `dashboard.js` owns top-level DOM rendering, event handling, and API refresh orchestration.
+- `plugin_data/dashboard/dashboard_format.js` owns dashboard formatting primitives. `dashboard_data.js` owns row payload and thread relationship helpers. `dashboard_analysis.js` owns scoring, sorting, recommendation, and thread grouping logic. `dashboard_cells.js` owns reusable table/cell HTML helpers. `dashboard_details.js` owns sidebar detail and thread narrative rendering. `dashboard_insights.js` owns insight cards and investigation preset UI. `dashboard_tables.js` owns Calls, Threads, and expanded thread-call table rendering. `dashboard_diagnostics.js` owns the Diagnostics tab that consumes `/api/diagnostics/*` aggregate payloads. `dashboard_filters.js` owns date range parsing and row date matching. `dashboard_state.js` owns URL, CSV, and download state utilities. `dashboard_i18n.js`, `dashboard_payload_cache.js`, and `dashboard_tooltips.js` own localization, session aggregate cache, and fast tooltip helpers. `dashboard_call_investigator.js` owns the dedicated call drilldown surface. `dashboard.js` owns top-level DOM rendering, event handling, and API refresh orchestration.
 - `context.py` is the only normal path that reads raw log context, and it does so only for one selected record on demand with redaction and size limits. Its default quick mode omits tool output and serialized groups; full serialized JSONL group analysis is explicit.
 - `plugin_installer.py`, `.mcp.json`, `skills/`, and `scripts/check_release.py` own install and packaging behavior.
 - `scripts/benchmark_synthetic_history.py` owns generated large-history query timing and threshold enforcement for 10k, 100k, and 500k aggregate-row fixtures. Its optional `--with-source-logs` mode writes synthetic JSONL source logs to time explicit context loading and to guard normal dashboard payload assembly against source-log reads. It must stay synthetic-only and must not read real Codex logs.

diff --git a/docs/cli-json-schemas.md b/docs/cli-json-schemas.md
@@ -46,6 +46,7 @@ Tracked schema ids:
 | `codex-usage-tracker-summary-v1` | CLI `summary --json`, CLI `expensive --json`, MCP summary/expensive JSON |
 | `codex-usage-tracker-query-v1` | CLI `query`, MCP `usage_query(...)` |
 | `codex-usage-tracker-recommendations-v1` | CLI `recommendations --json`, MCP `usage_recommendations(response_format="json")` |
+| `codex-usage-tracker-diagnostics-v1` | CLI `diagnostics ... --json`, dashboard server `/api/diagnostics/*` |
 | `codex-usage-tracker-session-v1` | CLI `session --json`, MCP `session_usage(response_format="json")` |
 | `codex-usage-tracker-context-v1` | CLI `context`, MCP `usage_call_context` when raw context is explicitly enabled |
 | `codex-usage-tracker-context-disabled-v1` | MCP `usage_call_context` when raw context is disabled |
@@ -225,6 +226,61 @@ Schema: `codex-usage-tracker-session-v1`
 }
 ```
 
+## Diagnostics
+
+Commands:
+
+```bash
+codex-usage-tracker diagnostics summary --json
+codex-usage-tracker diagnostics facts --sort uncached --json
+codex-usage-tracker diagnostics fact-calls --fact-type compaction --fact-name post_compaction --json
+```
+
+Dashboard server API:
+
+- `/api/diagnostics/summary`
+- `/api/diagnostics/facts`
+- `/api/diagnostics/fact-calls?fact_type=compaction&fact_name=post_compaction`
+- `/api/diagnostics/compactions`
+- `/api/diagnostics/tools`
+
+Schema: `codex-usage-tracker-diagnostics-v1`
+
+```json
+{
+  "schema": "codex-usage-tracker-diagnostics-v1",
+  "view": "facts",
+  "filters": {
+    "since": null,
+    "until": null,
+    "model": null,
+    "effort": null,
+    "thread": null,
+    "min_tokens": null,
+    "fact_type": null,
+    "fact_name": null,
+    "fact_category": null,
+    "fact_group": null,
+    "include_archived": false,
+    "sort": "uncached",
+    "direction": "desc",
+    "limit": 50,
+    "offset": 0,
+    "privacy_mode": "normal"
+  },
+  "row_count": 1,
+  "total_matched_rows": 1,
+  "truncated": false,
+  "raw_context_included": false,
+  "rows": [],
+  "notes": [
+    "Associated token totals are not additive when one call has multiple diagnostic facts."
+  ]
+}
+```
+
+Diagnostics payloads report aggregate structured facts such as compaction, tool/function/MCP activity, command families, structured skill labels, search/read loops, and outcome events. They do not include prompts, assistant messages, tool arguments, tool output, patch text, raw commands, command arguments, file contents, or JSONL fragments. Token totals are associated with facts observed before a token-count row; they are not causal allocations.
+
 ## Pricing Coverage
 
 Command:

diff --git a/docs/cli-reference.md b/docs/cli-reference.md
@@ -113,6 +113,20 @@ Useful investigations:
 - Use `expensive --limit 10` for a quick list of the highest-cost calls.
 - Use `recommendations --json` for ranked action rows and thread rollups with severity score, primary recommendation, and secondary signals.
 
+## Diagnostics
+
+```bash
+codex-usage-tracker diagnostics summary
+codex-usage-tracker diagnostics facts --sort uncached
+codex-usage-tracker diagnostics compactions
+codex-usage-tracker diagnostics tools
+codex-usage-tracker diagnostics fact-calls --fact-type compaction --fact-name post_compaction
+```
+
+Diagnostics expose structured event patterns and their associated token totals. They can show compactions, tool/function/MCP activity, safe command families, structured skill labels, patch outcomes, task completion, search/read loops, and aborted or rolled-back turns. Associated totals are not causal allocations and are not additive when one model call has multiple diagnostic facts.
+
+Diagnostic payloads are aggregate-only. They do not include prompts, assistant text, tool arguments, tool output, patch text, raw commands, command arguments, file contents, or JSONL fragments.
+
 ## JSON Queries
 
 ```bash

diff --git a/docs/dashboard-guide.md b/docs/dashboard-guide.md
@@ -65,7 +65,7 @@ The localhost server uses a random per-server token for refresh and context API
 
 ![Insights view with ranked attention cards, investigation presets, and top threads by attention score.](assets/dashboard-insights.png)
 
-The dashboard opens in `Insights` view. This view is designed to answer "what needs attention?" before you start sorting tables.
+Open the `Insights` view when you want to answer "what needs attention?" before you start sorting tables.
 
 - `Needs Attention` cards rank costly threads, Codex allowance usage, low cache reuse, context bloat, unpriced usage, estimated pricing, and reasoning-output spikes from aggregate fields only.
 - `Investigation Presets` apply a view, derived filter, sort order, and explanatory caption together.
@@ -126,7 +126,17 @@ Use `Threads` view when you want to understand a work session as a group instead
 - Expanded calls default to newest first. Click an expanded-call header such as `Time`, `Tokens`, `Cost`, or `Cache` to sort that thread's visible calls without changing the top-level Threads ranking.
 - Subagents with logged parent session ids are shown under the parent thread. Auto-review sessions without explicit parent ids may be attached by cwd and nearby activity and are marked as attached or inferred in the details.
 
-The same search, time range, confidence status, load limit, cards, and sort controls apply in `Insights`, `Calls`, and `Threads` views.
+## Diagnostics View
+
+Use `Diagnostics` view when you want to see what structured event patterns are happening and what token totals are associated with those patterns.
+
+- The tab consumes the localhost `/api/diagnostics/*` endpoints; static file dashboards show a live-API unavailable state.
+- The first table shows top diagnostic facts by associated uncached input tokens. Tool/function/MCP/command-family and compaction sections expose narrower slices of the same fact data.
+- Command diagnostics store only a command family such as `pytest`, `git`, or `unknown_command`. Skill and MCP labels are detected only when they are present as structured event metadata.
+- Click `Calls` on a fact row to load associated model calls. Call links and largest-call links open the Call Investigator, where raw context remains explicit and on demand.
+- Associated token totals are not causal allocations and are not additive when one call has multiple diagnostic facts.
+
+The same time range, model, reasoning, history scope, cards, and load controls apply in `Insights`, `Calls`, `Threads`, and `Diagnostics` views. Search, confidence, and sort controls currently scope the call/thread tables, not Diagnostics fact totals.
 
 ## Call Investigator
 

diff --git a/docs/privacy.md b/docs/privacy.md
@@ -15,6 +15,7 @@ The local SQLite database is stored at `~/.codex-usage-tracker/usage.sqlite3` by
 - materialized thread-level aggregate summaries for active and all-history scopes
 - source-file refresh metadata such as path, path hash, size, mtime, indexed line/byte offsets, latest aggregate record id, parser diagnostics, and last indexed time
 - observed Codex rate-limit snapshot metadata from local token-count logs, such as plan type, limit id, 5-hour/weekly used percentages, window lengths, and reset times
+- diagnostic fact labels tied to aggregate call records, such as safe event categories, payload type labels, counts, timestamps, and line ranges
 - pricing, credit, allowance, recommendation, and project metadata derived from aggregate fields
 
 ## Not Stored
@@ -32,6 +33,8 @@ Those fields are not written to SQLite, CSV exports, generated dashboard HTML, o
 
 Call-origin metadata is heuristic and confidence-labeled. It stores categories such as `user`, `codex`, or `unknown` plus a reason such as `user_message`, `tool_result`, `post_compaction`, or `agent_continuation`. It does not store the message text, tool output, compaction replacement text, or raw JSONL fragment that produced the category.
 
+Diagnostic facts follow the same aggregate-only rule. They can store safe structured labels such as `patch_applied`, `function_call_output`, `post_compaction`, MCP tool/server labels, structured skill labels, and command families such as `pytest`, `git`, or `unknown_command`, along with event counts and source line ranges. Command text may be classified in memory during parsing, but it is not persisted. Diagnostic facts do not store tool arguments, command text, command output, patch text, prompt or assistant text, file contents, raw JSONL fragments, or raw context evidence.
+
 ## On-Demand Context
 
 `usage_call_context`, `codex-usage-tracker context`, and the `serve-dashboard` context endpoint read a single source JSONL file only when explicitly requested. Returned context is redacted for common secret patterns and capped in size by default for CLI/MCP requests. The call investigator uses the same endpoint at runtime and requests quick redacted evidence for the selected call when the local context API is enabled; that still does not persist raw context into SQLite, CSV, support bundles, or generated dashboard HTML.

diff --git a/scripts/check_release.py b/scripts/check_release.py
@@ -120,6 +120,7 @@
     "src/codex_usage_tracker/plugin_data/dashboard/dashboard_tooltips.js",
     "src/codex_usage_tracker/plugin_data/dashboard/dashboard_status.js",
     "src/codex_usage_tracker/plugin_data/dashboard/dashboard_events.js",
+    "src/codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics.js",
     "src/codex_usage_tracker/plugin_data/dashboard/dashboard_call_diagnostics.js",
     "src/codex_usage_tracker/plugin_data/dashboard/dashboard.js",
     "src/codex_usage_tracker/plugin_data/dashboard/dashboard_state.js",
@@ -163,6 +164,7 @@
     "codex_usage_tracker/plugin_data/dashboard/dashboard_tooltips.js",
     "codex_usage_tracker/plugin_data/dashboard/dashboard_status.js",
     "codex_usage_tracker/plugin_data/dashboard/dashboard_events.js",
+    "codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics.js",
     "codex_usage_tracker/plugin_data/dashboard/dashboard_call_diagnostics.js",
     "codex_usage_tracker/plugin_data/dashboard/dashboard.js",
     "codex_usage_tracker/plugin_data/dashboard/dashboard_state.js",

diff --git a/scripts/smoke_installed_package.py b/scripts/smoke_installed_package.py
@@ -46,6 +46,7 @@
     "summary",
     "query",
     "recommendations",
+    "diagnostics",
     "session",
     "context",
     "dashboard",
@@ -86,6 +87,7 @@
     "dashboard/dashboard_events.js",
     "dashboard/dashboard_actions.js",
     "dashboard/dashboard_live.js",
+    "dashboard/dashboard_diagnostics.js",
     "dashboard/dashboard_call_diagnostics.js",
     "dashboard/dashboard.js",
     "dashboard/dashboard_state.js",

diff --git a/src/codex_usage_tracker/cli.py b/src/codex_usage_tracker/cli.py
@@ -24,6 +24,11 @@
 from codex_usage_tracker.cli_parser import build_parser
 from codex_usage_tracker.context import load_call_context
 from codex_usage_tracker.dashboard import generate_dashboard
+from codex_usage_tracker.diagnostic_reports import (
+    build_diagnostics_fact_calls_report,
+    build_diagnostics_facts_report,
+    build_diagnostics_summary_report,
+)
 from codex_usage_tracker.diagnostics import run_doctor
 from codex_usage_tracker.formatting import (
     format_doctor,
@@ -387,6 +392,79 @@ def _run_recommendations(args: argparse.Namespace) -> int:
     return 0
 
 
+def _run_diagnostics(args: argparse.Namespace) -> int:
+    command = args.diagnostics_command
+    if command == "summary":
+        report = build_diagnostics_summary_report(
+            db_path=args.db,
+            limit=args.limit,
+            since=args.since,
+            until=args.until,
+            model=args.model,
+            effort=args.effort,
+            thread=args.thread,
+            min_tokens=args.min_tokens,
+            fact_type=args.fact_type,
+            fact_name=args.fact_name,
+            fact_category=args.fact_category,
+            include_archived=args.include_archived,
+            sort=args.sort,
+            direction=args.direction,
+        )
+    elif command in {"facts", "compactions", "tools"}:
+        report = build_diagnostics_facts_report(
+            db_path=args.db,
+            limit=args.limit,
+            since=args.since,
+            until=args.until,
+            model=args.model,
+            effort=args.effort,
+            thread=args.thread,
+            min_tokens=args.min_tokens,
+            fact_type=_diagnostic_fact_type_filter(args),
+            fact_name=getattr(args, "fact_name", None),
+            fact_category=getattr(args, "fact_category", None),
+            include_archived=args.include_archived,
+            sort=args.sort,
+            direction=args.direction,
+            fact_group="tools" if command == "tools" else None,
+            view=command,
+        )
+    elif command == "fact-calls":
+        report = build_diagnostics_fact_calls_report(
+            db_path=args.db,
+            fact_type=args.fact_type,
+            fact_name=args.fact_name,
+            limit=args.limit,
+            offset=args.offset,
+            since=args.since,
+            until=args.until,
+            model=args.model,
+            effort=args.effort,
+            thread=args.thread,
+            min_tokens=args.min_tokens,
+            include_archived=args.include_archived,
+            sort=args.sort,
+            direction=args.direction,
+            privacy_mode=args.privacy_mode,
+        )
+    else:
+        raise ValueError(f"unknown diagnostics command: {command}")
+
+    if args.as_json:
+        _print_json(report.payload)
+        return 0
+    print(report.render())
+    return 0
+
+
+def _diagnostic_fact_type_filter(args: argparse.Namespace) -> str | None:
+    command = args.diagnostics_command
+    if command == "compactions":
+        return "compaction"
+    return getattr(args, "fact_type", None)
+
+
 def _run_session(args: argparse.Namespace) -> int:
     rows = query_session_usage(args.db, args.session_id, args.limit)
     rows = apply_project_privacy_to_rows(rows, privacy_mode=args.privacy_mode)
@@ -805,6 +883,7 @@ def _run_support_bundle(args: argparse.Namespace) -> int:
     "summary": _run_summary,
     "query": _run_query,
     "recommendations": _run_recommendations,
+    "diagnostics": _run_diagnostics,
     "session": _run_session,
     "context": _run_context,
     "dashboard": _run_dashboard,