diff --git a/README.md b/README.md
index 01830e3..e890187 100644
--- a/README.md
+++ b/README.md
@@ -156,7 +156,7 @@ The tracker cannot read your logged-in ChatGPT plan or live remaining usage auto
- Local SQLite index at `~/.codex-usage-tracker/usage.sqlite3`.
- Static dashboard generation plus localhost live refresh.
-- `Insights`, `Calls`, and `Threads` dashboard views.
+- `Insights`, `Calls`, `Threads`, and `Diagnostics` dashboard views.
- Active-only dashboards by default, with an explicit `All history` toggle for archived sessions.
- CLI summaries, queries, CSV export, dashboard generation, doctor checks, and support bundles.
- MCP tools for Codex sessions that want to query local usage data.
diff --git a/docs/architecture.md b/docs/architecture.md
index 71c7d5c..b58f8eb 100644
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -4,7 +4,7 @@ Codex Usage Tracker is a local sidecar app. It reads aggregate token counters fr
## Boundaries
-- `parser.py` converts local JSONL events into aggregate `UsageEvent` records. It also attaches metadata-only call-origin categories, archived-session flags, and conservative thread keys. It must not persist prompts, assistant text, tool output, or transcript snippets.
+- `parser.py` converts local JSONL events into aggregate `UsageEvent` records. It also attaches metadata-only call-origin categories, diagnostic facts from `diagnostic_facts.py`, archived-session flags, and conservative thread keys. It must not persist prompts, assistant text, tool output, command text, patch text, or transcript snippets.
- `call_origin.py` owns the pure call-origin classifier and migrated-row fallback. It must not open source JSONL files; source-log reads belong in parser refresh or explicit context loading only.
- `schema.py` owns persisted `usage_events` columns. Add columns there before changing SQLite migrations or export behavior.
- `store.py` owns SQLite setup, refresh, rebuild, query access, persisted per-thread previous/next call links, materialized thread summaries, source-file refresh cursors, and SQL-backed live dashboard API slices. Keep filesystem scanning, database writes, SQL prefilters, counts, limits, offsets, and incremental refresh decisions here.
@@ -13,7 +13,7 @@ Codex Usage Tracker is a local sidecar app. It reads aggregate token counters fr
- `costing.py`, `pricing_config.py`, `pricing_openai.py`, `pricing_estimates.py`, and `allowance.py` own cost, credit, rate-card, and allowance annotation. Keep estimate confidence and source metadata attached to rows.
- `projects.py`, `threads.py`, and `recommendations.py` annotate aggregate rows with project identity, thread relationships, and actionable signals. Project privacy redaction also belongs in `projects.py` so CLI, MCP, dashboard, CSV, and support-bundle surfaces share the same behavior.
- `dashboard.py` builds aggregate-only static dashboard payloads and writes HTML/assets. `server.py` adds localhost refresh, the compatibility `/api/usage` endpoint, SQL-backed live API slices, and explicit lazy context loading.
-- `plugin_data/dashboard/dashboard_format.js` owns dashboard formatting primitives. `dashboard_data.js` owns row payload and thread relationship helpers. `dashboard_analysis.js` owns scoring, sorting, recommendation, and thread grouping logic. `dashboard_cells.js` owns reusable table/cell HTML helpers. `dashboard_details.js` owns sidebar detail and thread narrative rendering. `dashboard_insights.js` owns insight cards and investigation preset UI. `dashboard_tables.js` owns Calls, Threads, and expanded thread-call table rendering. `dashboard_filters.js` owns date range parsing and row date matching. `dashboard_state.js` owns URL, CSV, and download state utilities. `dashboard_i18n.js`, `dashboard_payload_cache.js`, and `dashboard_tooltips.js` own localization, session aggregate cache, and fast tooltip helpers. `dashboard_call_investigator.js` owns the dedicated call drilldown surface. `dashboard.js` owns top-level DOM rendering, event handling, and API refresh orchestration.
+- `plugin_data/dashboard/dashboard_format.js` owns dashboard formatting primitives. `dashboard_data.js` owns row payload and thread relationship helpers. `dashboard_analysis.js` owns scoring, sorting, recommendation, and thread grouping logic. `dashboard_cells.js` owns reusable table/cell HTML helpers. `dashboard_details.js` owns sidebar detail and thread narrative rendering. `dashboard_insights.js` owns insight cards and investigation preset UI. `dashboard_tables.js` owns Calls, Threads, and expanded thread-call table rendering. `dashboard_diagnostics.js` owns the Diagnostics tab that consumes `/api/diagnostics/*` aggregate payloads. `dashboard_filters.js` owns date range parsing and row date matching. `dashboard_state.js` owns URL, CSV, and download state utilities. `dashboard_i18n.js`, `dashboard_payload_cache.js`, and `dashboard_tooltips.js` own localization, session aggregate cache, and fast tooltip helpers. `dashboard_call_investigator.js` owns the dedicated call drilldown surface. `dashboard.js` owns top-level DOM rendering, event handling, and API refresh orchestration.
- `context.py` is the only normal path that reads raw log context, and it does so only for one selected record on demand with redaction and size limits. Its default quick mode omits tool output and serialized groups; full serialized JSONL group analysis is explicit.
- `plugin_installer.py`, `.mcp.json`, `skills/`, and `scripts/check_release.py` own install and packaging behavior.
- `scripts/benchmark_synthetic_history.py` owns generated large-history query timing and threshold enforcement for 10k, 100k, and 500k aggregate-row fixtures. Its optional `--with-source-logs` mode writes synthetic JSONL source logs to time explicit context loading and to guard normal dashboard payload assembly against source-log reads. It must stay synthetic-only and must not read real Codex logs.
diff --git a/docs/cli-json-schemas.md b/docs/cli-json-schemas.md
index cc3d2a6..84c6ed3 100644
--- a/docs/cli-json-schemas.md
+++ b/docs/cli-json-schemas.md
@@ -46,6 +46,7 @@ Tracked schema ids:
| `codex-usage-tracker-summary-v1` | CLI `summary --json`, CLI `expensive --json`, MCP summary/expensive JSON |
| `codex-usage-tracker-query-v1` | CLI `query`, MCP `usage_query(...)` |
| `codex-usage-tracker-recommendations-v1` | CLI `recommendations --json`, MCP `usage_recommendations(response_format="json")` |
+| `codex-usage-tracker-diagnostics-v1` | CLI `diagnostics ... --json`, dashboard server `/api/diagnostics/*` |
| `codex-usage-tracker-session-v1` | CLI `session --json`, MCP `session_usage(response_format="json")` |
| `codex-usage-tracker-context-v1` | CLI `context`, MCP `usage_call_context` when raw context is explicitly enabled |
| `codex-usage-tracker-context-disabled-v1` | MCP `usage_call_context` when raw context is disabled |
@@ -225,6 +226,61 @@ Schema: `codex-usage-tracker-session-v1`
}
```
+## Diagnostics
+
+Commands:
+
+```bash
+codex-usage-tracker diagnostics summary --json
+codex-usage-tracker diagnostics facts --sort uncached --json
+codex-usage-tracker diagnostics fact-calls --fact-type compaction --fact-name post_compaction --json
+```
+
+Dashboard server API:
+
+- `/api/diagnostics/summary`
+- `/api/diagnostics/facts`
+- `/api/diagnostics/fact-calls?fact_type=compaction&fact_name=post_compaction`
+- `/api/diagnostics/compactions`
+- `/api/diagnostics/tools`
+
+Schema: `codex-usage-tracker-diagnostics-v1`
+
+```json
+{
+ "schema": "codex-usage-tracker-diagnostics-v1",
+ "view": "facts",
+ "filters": {
+ "since": null,
+ "until": null,
+ "model": null,
+ "effort": null,
+ "thread": null,
+ "min_tokens": null,
+ "fact_type": null,
+ "fact_name": null,
+ "fact_category": null,
+ "fact_group": null,
+ "include_archived": false,
+ "sort": "uncached",
+ "direction": "desc",
+ "limit": 50,
+ "offset": 0,
+ "privacy_mode": "normal"
+ },
+ "row_count": 1,
+ "total_matched_rows": 1,
+ "truncated": false,
+ "raw_context_included": false,
+ "rows": [],
+ "notes": [
+ "Associated token totals are not additive when one call has multiple diagnostic facts."
+ ]
+}
+```
+
+Diagnostics payloads report aggregate structured facts such as compaction, tool/function/MCP activity, command families, structured skill labels, search/read loops, and outcome events. They do not include prompts, assistant messages, tool arguments, tool output, patch text, raw commands, command arguments, file contents, or JSONL fragments. Token totals are associated with facts observed before a token-count row; they are not causal allocations.
+
## Pricing Coverage
Command:
diff --git a/docs/cli-reference.md b/docs/cli-reference.md
index a26eb16..79c08b4 100644
--- a/docs/cli-reference.md
+++ b/docs/cli-reference.md
@@ -113,6 +113,20 @@ Useful investigations:
- Use `expensive --limit 10` for a quick list of the highest-cost calls.
- Use `recommendations --json` for ranked action rows and thread rollups with severity score, primary recommendation, and secondary signals.
+## Diagnostics
+
+```bash
+codex-usage-tracker diagnostics summary
+codex-usage-tracker diagnostics facts --sort uncached
+codex-usage-tracker diagnostics compactions
+codex-usage-tracker diagnostics tools
+codex-usage-tracker diagnostics fact-calls --fact-type compaction --fact-name post_compaction
+```
+
+Diagnostics expose structured event patterns and their associated token totals. They can show compactions, tool/function/MCP activity, safe command families, structured skill labels, patch outcomes, task completion, search/read loops, and aborted or rolled-back turns. Associated totals are not causal allocations and are not additive when one model call has multiple diagnostic facts.
+
+Diagnostic payloads are aggregate-only. They do not include prompts, assistant text, tool arguments, tool output, patch text, raw commands, command arguments, file contents, or JSONL fragments.
+
## JSON Queries
```bash
diff --git a/docs/dashboard-guide.md b/docs/dashboard-guide.md
index cbd4c55..40beb28 100644
--- a/docs/dashboard-guide.md
+++ b/docs/dashboard-guide.md
@@ -65,7 +65,7 @@ The localhost server uses a random per-server token for refresh and context API

-The dashboard opens in `Insights` view. This view is designed to answer "what needs attention?" before you start sorting tables.
+Open the `Insights` view when you want to answer "what needs attention?" before you start sorting tables.
- `Needs Attention` cards rank costly threads, Codex allowance usage, low cache reuse, context bloat, unpriced usage, estimated pricing, and reasoning-output spikes from aggregate fields only.
- `Investigation Presets` apply a view, derived filter, sort order, and explanatory caption together.
@@ -126,7 +126,17 @@ Use `Threads` view when you want to understand a work session as a group instead
- Expanded calls default to newest first. Click an expanded-call header such as `Time`, `Tokens`, `Cost`, or `Cache` to sort that thread's visible calls without changing the top-level Threads ranking.
- Subagents with logged parent session ids are shown under the parent thread. Auto-review sessions without explicit parent ids may be attached by cwd and nearby activity and are marked as attached or inferred in the details.
-The same search, time range, confidence status, load limit, cards, and sort controls apply in `Insights`, `Calls`, and `Threads` views.
+## Diagnostics View
+
+Use `Diagnostics` view when you want to see what structured event patterns are happening and what token totals are associated with those patterns.
+
+- The tab consumes the localhost `/api/diagnostics/*` endpoints; static file dashboards show a live-API unavailable state.
+- The first table shows top diagnostic facts by associated uncached input tokens. Tool/function/MCP/command-family and compaction sections expose narrower slices of the same fact data.
+- Command diagnostics store only a command family such as `pytest`, `git`, or `unknown_command`. Skill and MCP labels are detected only when they are present as structured event metadata.
+- Click `Calls` on a fact row to load associated model calls. Call links and largest-call links open the Call Investigator, where raw context remains explicit and on demand.
+- Associated token totals are not causal allocations and are not additive when one call has multiple diagnostic facts.
+
+The same time range, model, reasoning, history scope, cards, and load controls apply in `Insights`, `Calls`, `Threads`, and `Diagnostics` views. Search, confidence, and sort controls currently scope the call/thread tables, not Diagnostics fact totals.
## Call Investigator
diff --git a/docs/privacy.md b/docs/privacy.md
index c5dad80..50da9e3 100644
--- a/docs/privacy.md
+++ b/docs/privacy.md
@@ -15,6 +15,7 @@ The local SQLite database is stored at `~/.codex-usage-tracker/usage.sqlite3` by
- materialized thread-level aggregate summaries for active and all-history scopes
- source-file refresh metadata such as path, path hash, size, mtime, indexed line/byte offsets, latest aggregate record id, parser diagnostics, and last indexed time
- observed Codex rate-limit snapshot metadata from local token-count logs, such as plan type, limit id, 5-hour/weekly used percentages, window lengths, and reset times
+- diagnostic fact labels tied to aggregate call records, such as safe event categories, payload type labels, counts, timestamps, and line ranges
- pricing, credit, allowance, recommendation, and project metadata derived from aggregate fields
## Not Stored
@@ -32,6 +33,8 @@ Those fields are not written to SQLite, CSV exports, generated dashboard HTML, o
Call-origin metadata is heuristic and confidence-labeled. It stores categories such as `user`, `codex`, or `unknown` plus a reason such as `user_message`, `tool_result`, `post_compaction`, or `agent_continuation`. It does not store the message text, tool output, compaction replacement text, or raw JSONL fragment that produced the category.
+Diagnostic facts follow the same aggregate-only rule. They can store safe structured labels such as `patch_applied`, `function_call_output`, `post_compaction`, MCP tool/server labels, structured skill labels, and command families such as `pytest`, `git`, or `unknown_command`, along with event counts and source line ranges. Command text may be classified in memory during parsing, but it is not persisted. Diagnostic facts do not store tool arguments, command text, command output, patch text, prompt or assistant text, file contents, raw JSONL fragments, or raw context evidence.
+
## On-Demand Context
`usage_call_context`, `codex-usage-tracker context`, and the `serve-dashboard` context endpoint read a single source JSONL file only when explicitly requested. Returned context is redacted for common secret patterns and capped in size by default for CLI/MCP requests. The call investigator uses the same endpoint at runtime and requests quick redacted evidence for the selected call when the local context API is enabled; that still does not persist raw context into SQLite, CSV, support bundles, or generated dashboard HTML.
diff --git a/scripts/check_release.py b/scripts/check_release.py
index aee8e59..4619fb7 100755
--- a/scripts/check_release.py
+++ b/scripts/check_release.py
@@ -120,6 +120,7 @@
"src/codex_usage_tracker/plugin_data/dashboard/dashboard_tooltips.js",
"src/codex_usage_tracker/plugin_data/dashboard/dashboard_status.js",
"src/codex_usage_tracker/plugin_data/dashboard/dashboard_events.js",
+ "src/codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics.js",
"src/codex_usage_tracker/plugin_data/dashboard/dashboard_call_diagnostics.js",
"src/codex_usage_tracker/plugin_data/dashboard/dashboard.js",
"src/codex_usage_tracker/plugin_data/dashboard/dashboard_state.js",
@@ -163,6 +164,7 @@
"codex_usage_tracker/plugin_data/dashboard/dashboard_tooltips.js",
"codex_usage_tracker/plugin_data/dashboard/dashboard_status.js",
"codex_usage_tracker/plugin_data/dashboard/dashboard_events.js",
+ "codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics.js",
"codex_usage_tracker/plugin_data/dashboard/dashboard_call_diagnostics.js",
"codex_usage_tracker/plugin_data/dashboard/dashboard.js",
"codex_usage_tracker/plugin_data/dashboard/dashboard_state.js",
diff --git a/scripts/smoke_installed_package.py b/scripts/smoke_installed_package.py
index b685623..75b0088 100644
--- a/scripts/smoke_installed_package.py
+++ b/scripts/smoke_installed_package.py
@@ -46,6 +46,7 @@
"summary",
"query",
"recommendations",
+ "diagnostics",
"session",
"context",
"dashboard",
@@ -86,6 +87,7 @@
"dashboard/dashboard_events.js",
"dashboard/dashboard_actions.js",
"dashboard/dashboard_live.js",
+ "dashboard/dashboard_diagnostics.js",
"dashboard/dashboard_call_diagnostics.js",
"dashboard/dashboard.js",
"dashboard/dashboard_state.js",
diff --git a/src/codex_usage_tracker/cli.py b/src/codex_usage_tracker/cli.py
index 4f927b9..96b7465 100644
--- a/src/codex_usage_tracker/cli.py
+++ b/src/codex_usage_tracker/cli.py
@@ -24,6 +24,11 @@
from codex_usage_tracker.cli_parser import build_parser
from codex_usage_tracker.context import load_call_context
from codex_usage_tracker.dashboard import generate_dashboard
+from codex_usage_tracker.diagnostic_reports import (
+ build_diagnostics_fact_calls_report,
+ build_diagnostics_facts_report,
+ build_diagnostics_summary_report,
+)
from codex_usage_tracker.diagnostics import run_doctor
from codex_usage_tracker.formatting import (
format_doctor,
@@ -387,6 +392,79 @@ def _run_recommendations(args: argparse.Namespace) -> int:
return 0
+def _run_diagnostics(args: argparse.Namespace) -> int:
+ command = args.diagnostics_command
+ if command == "summary":
+ report = build_diagnostics_summary_report(
+ db_path=args.db,
+ limit=args.limit,
+ since=args.since,
+ until=args.until,
+ model=args.model,
+ effort=args.effort,
+ thread=args.thread,
+ min_tokens=args.min_tokens,
+ fact_type=args.fact_type,
+ fact_name=args.fact_name,
+ fact_category=args.fact_category,
+ include_archived=args.include_archived,
+ sort=args.sort,
+ direction=args.direction,
+ )
+ elif command in {"facts", "compactions", "tools"}:
+ report = build_diagnostics_facts_report(
+ db_path=args.db,
+ limit=args.limit,
+ since=args.since,
+ until=args.until,
+ model=args.model,
+ effort=args.effort,
+ thread=args.thread,
+ min_tokens=args.min_tokens,
+ fact_type=_diagnostic_fact_type_filter(args),
+ fact_name=getattr(args, "fact_name", None),
+ fact_category=getattr(args, "fact_category", None),
+ include_archived=args.include_archived,
+ sort=args.sort,
+ direction=args.direction,
+ fact_group="tools" if command == "tools" else None,
+ view=command,
+ )
+ elif command == "fact-calls":
+ report = build_diagnostics_fact_calls_report(
+ db_path=args.db,
+ fact_type=args.fact_type,
+ fact_name=args.fact_name,
+ limit=args.limit,
+ offset=args.offset,
+ since=args.since,
+ until=args.until,
+ model=args.model,
+ effort=args.effort,
+ thread=args.thread,
+ min_tokens=args.min_tokens,
+ include_archived=args.include_archived,
+ sort=args.sort,
+ direction=args.direction,
+ privacy_mode=args.privacy_mode,
+ )
+ else:
+ raise ValueError(f"unknown diagnostics command: {command}")
+
+ if args.as_json:
+ _print_json(report.payload)
+ return 0
+ print(report.render())
+ return 0
+
+
+def _diagnostic_fact_type_filter(args: argparse.Namespace) -> str | None:
+ command = args.diagnostics_command
+ if command == "compactions":
+ return "compaction"
+ return getattr(args, "fact_type", None)
+
+
def _run_session(args: argparse.Namespace) -> int:
rows = query_session_usage(args.db, args.session_id, args.limit)
rows = apply_project_privacy_to_rows(rows, privacy_mode=args.privacy_mode)
@@ -805,6 +883,7 @@ def _run_support_bundle(args: argparse.Namespace) -> int:
"summary": _run_summary,
"query": _run_query,
"recommendations": _run_recommendations,
+ "diagnostics": _run_diagnostics,
"session": _run_session,
"context": _run_context,
"dashboard": _run_dashboard,
diff --git a/src/codex_usage_tracker/cli_parser.py b/src/codex_usage_tracker/cli_parser.py
index 2af5e09..193fe7a 100644
--- a/src/codex_usage_tracker/cli_parser.py
+++ b/src/codex_usage_tracker/cli_parser.py
@@ -7,6 +7,11 @@
from codex_usage_tracker import __version__
from codex_usage_tracker.context import DEFAULT_CONTEXT_CHARS, DEFAULT_CONTEXT_ENTRIES
+from codex_usage_tracker.diagnostic_reports import (
+ DIAGNOSTIC_CALL_SORT_CHOICES,
+ DIAGNOSTIC_DIRECTION_CHOICES,
+ DIAGNOSTIC_FACT_SORT_CHOICES,
+)
from codex_usage_tracker.paths import (
DEFAULT_ALLOWANCE_PATH,
DEFAULT_CODEX_HOME,
@@ -70,6 +75,7 @@ def build_parser() -> argparse.ArgumentParser:
_add_summary_parser(subparsers)
_add_query_parser(subparsers)
_add_recommendations_parser(subparsers)
+ _add_diagnostics_parser(subparsers)
_add_session_parser(subparsers)
_add_context_parser(subparsers)
_add_dashboard_parsers(subparsers)
@@ -281,6 +287,88 @@ def _add_recommendations_parser(
recommendations.add_argument("--json", action="store_true", dest="as_json")
+def _add_diagnostics_parser(
+ subparsers: argparse._SubParsersAction[argparse.ArgumentParser],
+) -> None:
+ diagnostics = subparsers.add_parser(
+ "diagnostics",
+ help="Inspect aggregate diagnostic facts and their associated token costs",
+ )
+ diagnostic_subparsers = diagnostics.add_subparsers(
+ dest="diagnostics_command",
+ required=True,
+ )
+
+ summary = diagnostic_subparsers.add_parser(
+ "summary",
+ help="Summarize diagnostic facts by fact type",
+ )
+ _add_diagnostics_fact_filters(summary)
+ _add_diagnostics_fact_sort(summary, default_limit=20)
+
+ facts = diagnostic_subparsers.add_parser(
+ "facts",
+ help="List diagnostic facts with associated token totals",
+ )
+ _add_diagnostics_fact_filters(facts)
+ _add_diagnostics_fact_sort(facts, default_limit=50)
+
+ compactions = diagnostic_subparsers.add_parser(
+ "compactions",
+ help="List compaction diagnostic facts",
+ )
+ _add_diagnostics_base_filters(compactions)
+ _add_diagnostics_fact_sort(compactions, default_limit=50)
+
+ tools = diagnostic_subparsers.add_parser(
+ "tools",
+ help="List tool/function diagnostic facts",
+ )
+ _add_diagnostics_base_filters(tools)
+ _add_diagnostics_fact_sort(tools, default_limit=50)
+
+ fact_calls = diagnostic_subparsers.add_parser(
+ "fact-calls",
+ help="List calls associated with one diagnostic fact",
+ )
+ fact_calls.add_argument("--fact-type", required=True)
+ fact_calls.add_argument("--fact-name", required=True)
+ _add_diagnostics_base_filters(fact_calls)
+ fact_calls.add_argument("--offset", type=int, default=0)
+ fact_calls.add_argument("--limit", type=int, default=50, help="Maximum rows; use 0 for all")
+ fact_calls.add_argument("--sort", choices=DIAGNOSTIC_CALL_SORT_CHOICES, default="tokens")
+ fact_calls.add_argument("--direction", choices=DIAGNOSTIC_DIRECTION_CHOICES, default="desc")
+ fact_calls.add_argument("--json", action="store_true", dest="as_json")
+
+
+def _add_diagnostics_base_filters(parser: argparse.ArgumentParser) -> None:
+ parser.add_argument("--since", help="Only include calls at or after this ISO date/time")
+ parser.add_argument("--until", help="Only include calls at or before this ISO date/time")
+ parser.add_argument("--model")
+ parser.add_argument("--effort")
+ parser.add_argument("--thread")
+ parser.add_argument("--min-tokens", type=int)
+ parser.add_argument("--include-archived", action="store_true")
+
+
+def _add_diagnostics_fact_filters(parser: argparse.ArgumentParser) -> None:
+ _add_diagnostics_base_filters(parser)
+ parser.add_argument("--fact-type")
+ parser.add_argument("--fact-name")
+ parser.add_argument("--fact-category")
+
+
+def _add_diagnostics_fact_sort(
+ parser: argparse.ArgumentParser,
+ *,
+ default_limit: int,
+) -> None:
+ parser.add_argument("--limit", type=int, default=default_limit, help="Maximum rows; use 0 for all")
+ parser.add_argument("--sort", choices=DIAGNOSTIC_FACT_SORT_CHOICES, default="uncached")
+ parser.add_argument("--direction", choices=DIAGNOSTIC_DIRECTION_CHOICES, default="desc")
+ parser.add_argument("--json", action="store_true", dest="as_json")
+
+
def _add_session_parser(subparsers: argparse._SubParsersAction[argparse.ArgumentParser]) -> None:
session = subparsers.add_parser("session", help="Show one session's usage")
session.add_argument("session_id", nargs="?")
diff --git a/src/codex_usage_tracker/context.py b/src/codex_usage_tracker/context.py
index 759a0e6..4fe3de3 100644
--- a/src/codex_usage_tracker/context.py
+++ b/src/codex_usage_tracker/context.py
@@ -23,6 +23,37 @@
"Tool output hidden for this request. Reload with include_tool_output=true to inspect "
"redacted, size-limited output."
)
+_SAFE_STRUCTURED_EVENT_TYPES = frozenset(
+ {
+ "image_generation_end",
+ "mcp_tool_call_end",
+ "patch_apply_end",
+ "skill_completed",
+ "skill_invoked",
+ "skill_selected",
+ "skill_started",
+ "skill_used",
+ "task_complete",
+ "thread_rolled_back",
+ "turn_aborted",
+ "web_search_end",
+ }
+)
+_SAFE_STRUCTURED_EVENT_FIELDS = (
+ "type",
+ "call_id",
+ "turn_id",
+ "phase",
+ "status",
+ "duration_ms",
+ "num_turns",
+ "started_at",
+ "completed_at",
+ "time_to_first_token_ms",
+ "tool_name",
+ "server_name",
+ "skill_name",
+)
def load_call_context(
@@ -166,6 +197,7 @@ def _read_context_entries(
current_turn_id: str | None = None
collecting = target_turn_id is None
pending_compactions: list[dict[str, Any]] = []
+ pending_diagnostic_events: list[dict[str, Any]] = []
full_serialized_analysis = context_mode == CONTEXT_MODE_FULL
encoding, estimator = (
_context_encoding(model or "")
@@ -225,8 +257,10 @@ def _read_context_entries(
)
)
candidates.extend(pending_compactions)
+ candidates.extend(pending_diagnostic_events)
candidates.extend(carried_compactions)
pending_compactions = []
+ pending_diagnostic_events = []
continue
if collecting:
@@ -261,6 +295,22 @@ def _read_context_entries(
]
continue
+ if (
+ not collecting
+ and summarized is not None
+ and summarized.get("carry_into_next_turn") is True
+ ):
+ pending_diagnostic_events = [
+ *pending_diagnostic_events,
+ _summarized_context_entry(
+ line_number,
+ timestamp,
+ entry_type,
+ summarized,
+ ),
+ ][-8:]
+ continue
+
if not collecting:
continue
@@ -590,6 +640,10 @@ def _summarize_event_msg(
"token_usage": token_usage,
}
+ safe_structured = _summarize_safe_structured_event(event_type, payload)
+ if safe_structured is not None:
+ return safe_structured
+
if "message" in payload:
return {"label": event_type, "text": _optional_str(payload.get("message")) or ""}
@@ -608,6 +662,30 @@ def _summarize_event_msg(
return {"label": event_type, "text": _jsonish(compact)} if compact else None
+def _summarize_safe_structured_event(
+ event_type: str,
+ payload: dict[str, Any],
+) -> dict[str, Any] | None:
+ if event_type not in _SAFE_STRUCTURED_EVENT_TYPES:
+ return None
+ compact: dict[str, Any] = {"type": event_type}
+ for key in _SAFE_STRUCTURED_EVENT_FIELDS:
+ if key == "type" or key not in payload:
+ continue
+ value = payload.get(key)
+ if _is_safe_structured_scalar(value):
+ compact[key] = value
+ return {
+ "label": event_type,
+ "text": _jsonish(compact),
+ "carry_into_next_turn": True,
+ }
+
+
+def _is_safe_structured_scalar(value: object) -> bool:
+ return value is None or isinstance(value, (str, int, float, bool))
+
+
def _token_count_summary(info: dict[str, Any]) -> dict[str, Any]:
return {
"last_token_usage": _token_usage_summary(info.get("last_token_usage")),
diff --git a/src/codex_usage_tracker/dashboard.py b/src/codex_usage_tracker/dashboard.py
index 1b07037..d014fde 100644
--- a/src/codex_usage_tracker/dashboard.py
+++ b/src/codex_usage_tracker/dashboard.py
@@ -76,6 +76,7 @@
("actions_script_src", "__ACTIONS_SCRIPT_SRC__", "dashboard_actions.js"),
("live_script_src", "__LIVE_SCRIPT_SRC__", "dashboard_live.js"),
("events_script_src", "__EVENTS_SCRIPT_SRC__", "dashboard_events.js"),
+ ("diagnostics_script_src", "__DIAGNOSTICS_SCRIPT_SRC__", "dashboard_diagnostics.js"),
(
"call_diagnostics_script_src",
"__CALL_DIAGNOSTICS_SCRIPT_SRC__",
@@ -319,6 +320,7 @@ def render_dashboard_html(
actions_script_src: str | None = None,
live_script_src: str | None = None,
events_script_src: str | None = None,
+ diagnostics_script_src: str | None = None,
call_diagnostics_script_src: str | None = None,
call_investigator_script_src: str | None = None,
script_src: str | None = None,
@@ -345,6 +347,7 @@ def render_dashboard_html(
"actions_script_src": actions_script_src,
"live_script_src": live_script_src,
"events_script_src": events_script_src,
+ "diagnostics_script_src": diagnostics_script_src,
"call_diagnostics_script_src": call_diagnostics_script_src,
"call_investigator_script_src": call_investigator_script_src,
"script_src": script_src,
diff --git a/src/codex_usage_tracker/diagnostic_facts.py b/src/codex_usage_tracker/diagnostic_facts.py
new file mode 100644
index 0000000..52f2160
--- /dev/null
+++ b/src/codex_usage_tracker/diagnostic_facts.py
@@ -0,0 +1,636 @@
+"""Aggregate-only diagnostic fact classification for Codex JSONL events."""
+
+from __future__ import annotations
+
+import json
+import re
+import shlex
+from dataclasses import asdict, replace
+from typing import Any
+
+from codex_usage_tracker.models import DiagnosticFact
+
+EVIDENCE_SCOPE_BETWEEN_TOKEN_COUNTS = "between_token_counts"
+SAFE_STRUCTURED_LABEL_RE = re.compile(r"^[A-Za-z0-9_.:-]{1,80}$")
+SKILL_EVENT_TYPES = frozenset({
+ "skill_completed",
+ "skill_invoked",
+ "skill_selected",
+ "skill_started",
+ "skill_used",
+})
+SHELL_TOOL_NAMES = frozenset({
+ "bash",
+ "exec_command",
+ "functions.exec_command",
+ "run_command",
+ "shell",
+ "terminal",
+})
+SEARCH_READ_COMMANDS = frozenset({
+ "cat",
+ "fd",
+ "find",
+ "grep",
+ "head",
+ "ls",
+ "nl",
+ "rg",
+ "sed",
+ "tail",
+ "wc",
+})
+
+CONFIDENCE_ORDER = {
+ "unknown": 0,
+ "low": 1,
+ "medium": 2,
+ "high": 3,
+}
+
+
+def diagnostic_facts_from_envelope(
+ envelope: object,
+ *,
+ line_number: int,
+) -> tuple[DiagnosticFact, ...]:
+ """Return safe diagnostic facts from one JSONL envelope."""
+
+ if not isinstance(envelope, dict):
+ return ()
+ payload = envelope.get("payload")
+ if not isinstance(payload, dict):
+ payload = {}
+ entry_type = envelope.get("type")
+ payload_type = _optional_str(payload.get("type"))
+ timestamp = _optional_str(envelope.get("timestamp"))
+ if payload_type is None:
+ return ()
+
+ facts: list[DiagnosticFact] = []
+ if entry_type == "event_msg":
+ mapping = {
+ "context_compacted": ("compaction", "post_compaction", "context", "high"),
+ "patch_apply_end": ("outcome", "patch_applied", "patch", "high"),
+ "task_complete": ("outcome", "task_complete", "task", "high"),
+ "thread_rolled_back": ("outcome", "thread_rolled_back", "failure", "high"),
+ "turn_aborted": ("outcome", "turn_aborted", "turn", "high"),
+ "mcp_tool_call_end": ("tool", "mcp_tool_call_end", "mcp", "medium"),
+ "web_search_end": ("tool", "web_search_end", "search", "medium"),
+ "image_generation_end": ("tool", "image_generation_end", "media", "medium"),
+ }
+ classification = mapping.get(payload_type)
+ if classification is not None:
+ fact_type, fact_name, category, confidence = classification
+ facts.append(
+ _fact(
+ fact_type=fact_type,
+ fact_name=fact_name,
+ category=category,
+ confidence=confidence,
+ timestamp=timestamp,
+ line_number=line_number,
+ )
+ )
+ facts.extend(
+ _structured_tool_and_skill_facts(
+ entry_type=entry_type,
+ payload=payload,
+ payload_type=payload_type,
+ timestamp=timestamp,
+ line_number=line_number,
+ )
+ )
+ return tuple(facts)
+
+ if entry_type == "response_item":
+ mapping = {
+ "function_call": ("tool", "function_call", "function", "low"),
+ "function_call_output": ("tool", "function_call_output", "function", "medium"),
+ "tool_search_call": ("tool", "tool_search_call", "search", "low"),
+ "tool_search_output": ("tool", "tool_search_output", "search", "medium"),
+ }
+ classification = mapping.get(payload_type)
+ if classification is not None:
+ fact_type, fact_name, category, confidence = classification
+ facts.append(
+ _fact(
+ fact_type=fact_type,
+ fact_name=fact_name,
+ category=category,
+ confidence=confidence,
+ timestamp=timestamp,
+ line_number=line_number,
+ )
+ )
+ facts.extend(
+ _structured_tool_and_skill_facts(
+ entry_type=entry_type,
+ payload=payload,
+ payload_type=payload_type,
+ timestamp=timestamp,
+ line_number=line_number,
+ )
+ )
+ return tuple(facts)
+
+ return ()
+
+
+def _structured_tool_and_skill_facts(
+ *,
+ entry_type: object,
+ payload: dict[str, Any],
+ payload_type: str,
+ timestamp: str | None,
+ line_number: int,
+) -> tuple[DiagnosticFact, ...]:
+ """Classify safe structured labels without persisting args or outputs."""
+
+ facts: list[DiagnosticFact] = []
+ tool_label: str | None = None
+ if entry_type == "response_item" and payload_type in {
+ "function_call",
+ "function_call_output",
+ }:
+ tool_label = _safe_structured_label(payload.get("name"))
+ if tool_label:
+ facts.append(
+ _fact(
+ fact_type="function",
+ fact_name=tool_label,
+ category="function",
+ confidence="medium" if payload_type == "function_call_output" else "low",
+ timestamp=timestamp,
+ line_number=line_number,
+ )
+ )
+ if _looks_like_mcp_tool_label(tool_label):
+ facts.append(
+ _fact(
+ fact_type="mcp_tool",
+ fact_name=tool_label,
+ category="mcp",
+ confidence="medium",
+ timestamp=timestamp,
+ line_number=line_number,
+ )
+ )
+ if entry_type == "event_msg" and payload_type in {
+ "mcp_tool_call_begin",
+ "mcp_tool_call_end",
+ }:
+ tool_label = _safe_structured_label(
+ payload.get("tool_name") or payload.get("name") or payload.get("tool")
+ )
+ if tool_label:
+ facts.append(
+ _fact(
+ fact_type="mcp_tool",
+ fact_name=tool_label,
+ category="mcp",
+ confidence="high" if payload_type == "mcp_tool_call_end" else "medium",
+ timestamp=timestamp,
+ line_number=line_number,
+ )
+ )
+ server_label = _safe_structured_label(
+ payload.get("server_name") or payload.get("server") or payload.get("mcp_server")
+ )
+ if server_label:
+ facts.append(
+ _fact(
+ fact_type="mcp_server",
+ fact_name=server_label,
+ category="mcp",
+ confidence="high" if payload_type == "mcp_tool_call_end" else "medium",
+ timestamp=timestamp,
+ line_number=line_number,
+ )
+ )
+ skill_label = _skill_label(payload)
+ if skill_label and (payload_type in SKILL_EVENT_TYPES or "skill" in payload):
+ facts.append(
+ _fact(
+ fact_type="skill",
+ fact_name=skill_label,
+ category="skill",
+ confidence="high" if payload_type in SKILL_EVENT_TYPES else "medium",
+ timestamp=timestamp,
+ line_number=line_number,
+ )
+ )
+ command = _shell_command_from_payload(payload, tool_label=tool_label)
+ if command is not None:
+ family = _command_family(command)
+ facts.append(
+ _fact(
+ fact_type="command_family",
+ fact_name=family,
+ category="command",
+ confidence="medium" if family != "unknown_command" else "low",
+ timestamp=timestamp,
+ line_number=line_number,
+ )
+ )
+ if _is_search_read_command(command):
+ facts.append(
+ _fact(
+ fact_type="activity",
+ fact_name="search_read_command",
+ category="read",
+ confidence="medium",
+ timestamp=timestamp,
+ line_number=line_number,
+ )
+ )
+ return tuple(facts)
+
+
+def _skill_label(payload: dict[str, Any]) -> str | None:
+ label = _safe_structured_label(
+ payload.get("skill_name") or payload.get("skill_id") or payload.get("skill")
+ )
+ if label:
+ return label
+ skill = payload.get("skill")
+ if isinstance(skill, dict):
+ return _safe_structured_label(skill.get("name") or skill.get("id"))
+ return None
+
+
+def _shell_command_from_payload(
+ payload: dict[str, Any],
+ *,
+ tool_label: str | None,
+) -> str | None:
+ if not tool_label or not _is_shell_tool_label(tool_label):
+ return None
+ for key in ("cmd", "command"):
+ value = payload.get(key)
+ if isinstance(value, str):
+ return value
+ arguments = _arguments_dict(payload.get("arguments"))
+ for key in ("cmd", "command"):
+ value = arguments.get(key)
+ if isinstance(value, str):
+ return value
+ return None
+
+
+def _arguments_dict(value: object) -> dict[str, Any]:
+ if isinstance(value, dict):
+ return value
+ if isinstance(value, str):
+ try:
+ loaded = json.loads(value)
+ except json.JSONDecodeError:
+ return {}
+ if isinstance(loaded, dict):
+ return loaded
+ return {}
+
+
+def _command_family(command: str) -> str:
+ tokens = _command_tokens(command)
+ tokens = _strip_command_wrappers(tokens)
+ if not tokens:
+ return "unknown_command"
+ base = _command_basename(tokens[0])
+ if base in {"py.test", "pytest"}:
+ return "pytest"
+ if _is_python_command(base):
+ module_family = _python_module_family(tokens)
+ return module_family or "python"
+ normalized = {
+ "git": "git",
+ "mypy": "mypy",
+ "node": "node",
+ "npm": "npm",
+ "pnpm": "pnpm",
+ "ruff": "ruff",
+ }.get(base)
+ return normalized or "unknown_command"
+
+
+def _command_tokens(command: str) -> list[str]:
+ try:
+ return shlex.split(command, posix=True)
+ except ValueError:
+ return []
+
+
+def _strip_command_wrappers(tokens: list[str]) -> list[str]:
+ remaining = list(tokens)
+ while remaining:
+ while remaining and _looks_like_assignment(remaining[0]):
+ remaining.pop(0)
+ if not remaining:
+ break
+ base = _command_basename(remaining[0])
+ if base in {"command", "env", "sudo"}:
+ remaining.pop(0)
+ continue
+ break
+ return remaining
+
+
+def _looks_like_assignment(token: str) -> bool:
+ return bool(re.fullmatch(r"[A-Za-z_][A-Za-z0-9_]*=.*", token))
+
+
+def _python_module_family(tokens: list[str]) -> str | None:
+ for index, token in enumerate(tokens[:-1]):
+ if token != "-m":
+ continue
+ module = _command_basename(tokens[index + 1]).split(".", 1)[0]
+ if module in {"mypy", "pytest", "ruff"}:
+ return module
+ return None
+ return None
+
+
+def _is_search_read_command(command: str) -> bool:
+ tokens = _strip_command_wrappers(_command_tokens(command))
+ return bool(tokens and _command_basename(tokens[0]) in SEARCH_READ_COMMANDS)
+
+
+def _is_python_command(base: str) -> bool:
+ return base == "py" or base == "python" or base.startswith("python")
+
+
+def _command_basename(token: str) -> str:
+ return re.split(r"[\\/]", token)[-1].lower()
+
+
+def _is_shell_tool_label(label: str) -> bool:
+ lowered = label.lower()
+ suffix = lowered.rsplit(".", 1)[-1].rsplit("__", 1)[-1]
+ return lowered in SHELL_TOOL_NAMES or suffix in SHELL_TOOL_NAMES
+
+
+def _looks_like_mcp_tool_label(label: str) -> bool:
+ return label.startswith("mcp__")
+
+
+def _safe_structured_label(value: object) -> str | None:
+ if not isinstance(value, str):
+ return None
+ stripped = value.strip()
+ if not SAFE_STRUCTURED_LABEL_RE.fullmatch(stripped):
+ return None
+ return stripped.lower()
+
+
+def _with_derived_loop_facts(
+ segment: tuple[DiagnosticFact, ...],
+) -> tuple[DiagnosticFact, ...]:
+ enriched = tuple(segment)
+ search_read_facts = [
+ fact
+ for fact in segment
+ if fact.fact_category in {"read", "search"}
+ or (fact.fact_type == "activity" and fact.fact_name == "search_read_command")
+ ]
+ search_read_count = sum(fact.event_count for fact in search_read_facts)
+ if search_read_count >= 3:
+ enriched = add_diagnostic_fact(
+ enriched,
+ _derived_loop_fact(
+ fact_name="search_read_loop",
+ category="search",
+ event_count=search_read_count,
+ source_facts=search_read_facts,
+ ),
+ )
+ retry_facts = [
+ fact
+ for fact in segment
+ if fact.fact_category in {"failure", "retry"}
+ or fact.fact_name in {"thread_rolled_back", "turn_aborted"}
+ ]
+ retry_count = sum(fact.event_count for fact in retry_facts)
+ if retry_count >= 2:
+ enriched = add_diagnostic_fact(
+ enriched,
+ _derived_loop_fact(
+ fact_name="retry_or_abort_loop",
+ category="failure",
+ event_count=retry_count,
+ source_facts=retry_facts,
+ ),
+ )
+ return enriched
+
+
+def _derived_loop_fact(
+ *,
+ fact_name: str,
+ category: str,
+ event_count: int,
+ source_facts: list[DiagnosticFact],
+) -> DiagnosticFact:
+ first_source_line = _min_optional_int(
+ *[fact.first_source_line for fact in source_facts]
+ )
+ last_source_line = _max_optional_int(*[fact.last_source_line for fact in source_facts])
+ return DiagnosticFact(
+ record_id=None,
+ fact_type="loop",
+ fact_name=fact_name,
+ fact_category=category,
+ event_count=event_count,
+ confidence="medium",
+ first_event_timestamp=_timestamp_for_source_line(
+ source_facts,
+ source_line=first_source_line,
+ first=True,
+ ),
+ last_event_timestamp=_timestamp_for_source_line(
+ source_facts,
+ source_line=last_source_line,
+ first=False,
+ ),
+ first_source_line=first_source_line,
+ last_source_line=last_source_line,
+ evidence_scope=EVIDENCE_SCOPE_BETWEEN_TOKEN_COUNTS,
+ raw_content_included=0,
+ )
+
+
+def _timestamp_for_source_line(
+ facts: list[DiagnosticFact],
+ *,
+ source_line: int | None,
+ first: bool,
+) -> str | None:
+ for fact in sorted(
+ facts,
+ key=lambda item: _source_line_sort_key(
+ item.first_source_line if first else item.last_source_line
+ ),
+ ):
+ candidate_line = fact.first_source_line if first else fact.last_source_line
+ if candidate_line == source_line:
+ return fact.first_event_timestamp if first else fact.last_event_timestamp
+ return None
+
+
+def add_diagnostic_fact(
+ segment: tuple[DiagnosticFact, ...],
+ fact: DiagnosticFact,
+) -> tuple[DiagnosticFact, ...]:
+ """Merge one fact into the pending between-token-count segment."""
+
+ by_key = {(item.fact_type, item.fact_name): item for item in segment}
+ key = (fact.fact_type, fact.fact_name)
+ existing = by_key.get(key)
+ by_key[key] = fact if existing is None else merge_diagnostic_facts(existing, fact)
+ return tuple(by_key.values())
+
+
+def merge_diagnostic_facts(
+ existing: DiagnosticFact,
+ incoming: DiagnosticFact,
+) -> DiagnosticFact:
+ """Combine repeated facts without storing raw evidence."""
+
+ return replace(
+ existing,
+ event_count=existing.event_count + incoming.event_count,
+ confidence=strongest_confidence([existing.confidence, incoming.confidence]),
+ first_event_timestamp=_earliest_event_timestamp(existing, incoming),
+ last_event_timestamp=_latest_event_timestamp(existing, incoming),
+ first_source_line=_min_optional_int(existing.first_source_line, incoming.first_source_line),
+ last_source_line=_max_optional_int(existing.last_source_line, incoming.last_source_line),
+ raw_content_included=max(existing.raw_content_included, incoming.raw_content_included),
+ )
+
+
+def assign_record_id_to_diagnostic_facts(
+ segment: tuple[DiagnosticFact, ...],
+ *,
+ record_id: str,
+) -> tuple[DiagnosticFact, ...]:
+ """Attach pending segment facts to the token-count row they describe."""
+
+ enriched_segment = _with_derived_loop_facts(segment)
+ return tuple(
+ replace(fact, record_id=record_id)
+ for fact in sorted(enriched_segment, key=lambda item: (item.fact_type, item.fact_name))
+ )
+
+
+def diagnostic_fact_to_json(fact: DiagnosticFact) -> dict[str, Any]:
+ """Encode a pending diagnostic fact for parser-state persistence."""
+
+ payload = asdict(fact)
+ payload.pop("record_id", None)
+ return payload
+
+
+def diagnostic_fact_from_json(value: object) -> DiagnosticFact | None:
+ """Decode a pending diagnostic fact from aggregate-only parser state."""
+
+ if not isinstance(value, dict):
+ return None
+ fact_type = _optional_str(value.get("fact_type"))
+ fact_name = _optional_str(value.get("fact_name"))
+ if not fact_type or not fact_name:
+ return None
+ event_count = _positive_int(value.get("event_count")) or 1
+ return DiagnosticFact(
+ record_id=None,
+ fact_type=fact_type,
+ fact_name=fact_name,
+ fact_category=_optional_str(value.get("fact_category")),
+ event_count=event_count,
+ confidence=_optional_str(value.get("confidence")) or "medium",
+ first_event_timestamp=_optional_str(value.get("first_event_timestamp")),
+ last_event_timestamp=_optional_str(value.get("last_event_timestamp")),
+ first_source_line=_positive_int(value.get("first_source_line")),
+ last_source_line=_positive_int(value.get("last_source_line")),
+ evidence_scope=(
+ _optional_str(value.get("evidence_scope")) or EVIDENCE_SCOPE_BETWEEN_TOKEN_COUNTS
+ ),
+ raw_content_included=1 if value.get("raw_content_included") == 1 else 0,
+ )
+
+
+def strongest_confidence(values: list[str]) -> str:
+ """Return the strongest confidence label in a stable order."""
+
+ if not values:
+ return "unknown"
+ return max(values, key=lambda value: CONFIDENCE_ORDER.get(value, 0))
+
+
+def _fact(
+ *,
+ fact_type: str,
+ fact_name: str,
+ category: str,
+ confidence: str,
+ timestamp: str | None,
+ line_number: int,
+) -> DiagnosticFact:
+ return DiagnosticFact(
+ record_id=None,
+ fact_type=fact_type,
+ fact_name=fact_name,
+ fact_category=category,
+ event_count=1,
+ confidence=confidence,
+ first_event_timestamp=timestamp,
+ last_event_timestamp=timestamp,
+ first_source_line=line_number,
+ last_source_line=line_number,
+ evidence_scope=EVIDENCE_SCOPE_BETWEEN_TOKEN_COUNTS,
+ raw_content_included=0,
+ )
+
+
+def _earliest_event_timestamp(
+ existing: DiagnosticFact,
+ incoming: DiagnosticFact,
+) -> str | None:
+ if _source_line_sort_key(incoming.first_source_line) < _source_line_sort_key(
+ existing.first_source_line
+ ):
+ return incoming.first_event_timestamp
+ return existing.first_event_timestamp
+
+
+def _latest_event_timestamp(
+ existing: DiagnosticFact,
+ incoming: DiagnosticFact,
+) -> str | None:
+ if _source_line_sort_key(incoming.last_source_line) >= _source_line_sort_key(
+ existing.last_source_line
+ ):
+ return incoming.last_event_timestamp
+ return existing.last_event_timestamp
+
+
+def _source_line_sort_key(value: int | None) -> int:
+ return value if value is not None else -1
+
+
+def _min_optional_int(*items: int | None) -> int | None:
+ values = [value for value in items if value is not None]
+ return min(values) if values else None
+
+
+def _max_optional_int(*items: int | None) -> int | None:
+ values = [value for value in items if value is not None]
+ return max(values) if values else None
+
+
+def _optional_str(value: object) -> str | None:
+ return value if isinstance(value, str) else None
+
+
+def _positive_int(value: object) -> int | None:
+ if isinstance(value, int) and not isinstance(value, bool) and value > 0:
+ return value
+ return None
diff --git a/src/codex_usage_tracker/diagnostic_reports.py b/src/codex_usage_tracker/diagnostic_reports.py
new file mode 100644
index 0000000..eab2344
--- /dev/null
+++ b/src/codex_usage_tracker/diagnostic_reports.py
@@ -0,0 +1,503 @@
+"""Shared diagnostics report builders for CLI and localhost API surfaces."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+
+from codex_usage_tracker.projects import apply_project_privacy_to_rows, validate_privacy_mode
+from codex_usage_tracker.store import (
+ query_diagnostic_fact_call_count,
+ query_diagnostic_fact_calls,
+ query_diagnostic_facts,
+ query_diagnostic_summary,
+)
+
+DIAGNOSTICS_SCHEMA = "codex-usage-tracker-diagnostics-v1"
+DIAGNOSTICS_NOTES = [
+ "Associated token totals are not additive when one call has multiple diagnostic facts.",
+ "Diagnostics use structured event metadata only; raw context remains explicit and on-demand.",
+]
+DIAGNOSTIC_FACT_SORT_CHOICES = (
+ "uncached",
+ "tokens",
+ "calls",
+ "occurrences",
+ "time",
+ "fact",
+)
+DIAGNOSTIC_CALL_SORT_CHOICES = (
+ "tokens",
+ "time",
+ "uncached",
+ "input",
+ "cached",
+ "output",
+ "reasoning",
+ "cache",
+ "model",
+ "effort",
+ "thread",
+)
+DIAGNOSTIC_DIRECTION_CHOICES = ("asc", "desc")
+DIAGNOSTIC_TOOL_FACT_TYPES = {
+ "activity",
+ "command_family",
+ "function",
+ "mcp_server",
+ "mcp_tool",
+ "skill",
+ "tool",
+}
+
+
+@dataclass(frozen=True)
+class DiagnosticsReport:
+ """Resolved diagnostics payload for one display surface."""
+
+ payload: dict[str, Any]
+
+ def render(self) -> str:
+ view = self.payload.get("view")
+ rows = self.payload.get("rows")
+ if not isinstance(rows, list) or not rows:
+ return "No diagnostic facts matched the requested filters."
+ if view == "fact-calls":
+ return _render_fact_calls(rows)
+ if view == "summary":
+ return _render_summary(rows)
+ return _render_facts(rows)
+
+
+def build_diagnostics_summary_report(
+ *,
+ db_path: Path,
+ limit: int = 20,
+ since: str | None = None,
+ until: str | None = None,
+ model: str | None = None,
+ effort: str | None = None,
+ thread: str | None = None,
+ min_tokens: int | None = None,
+ fact_type: str | None = None,
+ fact_name: str | None = None,
+ fact_category: str | None = None,
+ include_archived: bool = False,
+ sort: str = "uncached",
+ direction: str = "desc",
+) -> DiagnosticsReport:
+ """Build diagnostic summaries grouped by fact type."""
+
+ _validate_fact_sort(sort)
+ _validate_direction(direction)
+ normalized_limit = _normalize_limit(limit)
+ all_rows = query_diagnostic_summary(
+ db_path=db_path,
+ limit=0,
+ since=since,
+ until=until,
+ model=model,
+ effort=effort,
+ thread=thread,
+ min_tokens=min_tokens,
+ fact_type=fact_type,
+ fact_name=fact_name,
+ fact_category=fact_category,
+ include_archived=include_archived,
+ sort=sort,
+ direction=direction,
+ )
+ rows = _limit_rows(all_rows, normalized_limit)
+ for row in rows:
+ row["action_hint"] = _action_hint(
+ fact_type=str(row.get("fact_type") or ""),
+ fact_name=str(row.get("top_fact_name") or ""),
+ )
+ return DiagnosticsReport(
+ _diagnostics_payload(
+ view="summary",
+ rows=rows,
+ total_matched_rows=len(all_rows),
+ filters=_filters(
+ since=since,
+ until=until,
+ model=model,
+ effort=effort,
+ thread=thread,
+ min_tokens=min_tokens,
+ fact_type=fact_type,
+ fact_name=fact_name,
+ fact_category=fact_category,
+ fact_group=None,
+ include_archived=include_archived,
+ sort=sort,
+ direction=direction,
+ limit=normalized_limit,
+ offset=0,
+ ),
+ )
+ )
+
+
+def build_diagnostics_facts_report(
+ *,
+ db_path: Path,
+ limit: int = 50,
+ since: str | None = None,
+ until: str | None = None,
+ model: str | None = None,
+ effort: str | None = None,
+ thread: str | None = None,
+ min_tokens: int | None = None,
+ fact_type: str | None = None,
+ fact_name: str | None = None,
+ fact_category: str | None = None,
+ include_archived: bool = False,
+ sort: str = "uncached",
+ direction: str = "desc",
+ fact_group: str | None = None,
+ view: str = "facts",
+) -> DiagnosticsReport:
+ """Build diagnostic fact rows with associated token totals."""
+
+ _validate_fact_sort(sort)
+ _validate_direction(direction)
+ _validate_fact_group(fact_group)
+ normalized_limit = _normalize_limit(limit)
+ all_rows = query_diagnostic_facts(
+ db_path=db_path,
+ limit=0,
+ since=since,
+ until=until,
+ model=model,
+ effort=effort,
+ thread=thread,
+ min_tokens=min_tokens,
+ fact_type=fact_type,
+ fact_name=fact_name,
+ fact_category=fact_category,
+ include_archived=include_archived,
+ sort=sort,
+ direction=direction,
+ )
+ grouped_rows = _filter_fact_group(all_rows, fact_group)
+ rows = _limit_rows(grouped_rows, normalized_limit)
+ for row in rows:
+ row["action_hint"] = _action_hint(
+ fact_type=str(row.get("fact_type") or ""),
+ fact_name=str(row.get("fact_name") or ""),
+ )
+ return DiagnosticsReport(
+ _diagnostics_payload(
+ view=view,
+ rows=rows,
+ total_matched_rows=len(grouped_rows),
+ filters=_filters(
+ since=since,
+ until=until,
+ model=model,
+ effort=effort,
+ thread=thread,
+ min_tokens=min_tokens,
+ fact_type=fact_type,
+ fact_name=fact_name,
+ fact_category=fact_category,
+ fact_group=fact_group,
+ include_archived=include_archived,
+ sort=sort,
+ direction=direction,
+ limit=normalized_limit,
+ offset=0,
+ ),
+ )
+ )
+
+
+def build_diagnostics_fact_calls_report(
+ *,
+ db_path: Path,
+ fact_type: str,
+ fact_name: str,
+ limit: int = 50,
+ offset: int = 0,
+ since: str | None = None,
+ until: str | None = None,
+ model: str | None = None,
+ effort: str | None = None,
+ thread: str | None = None,
+ min_tokens: int | None = None,
+ include_archived: bool = False,
+ sort: str = "tokens",
+ direction: str = "desc",
+ privacy_mode: str = "normal",
+) -> DiagnosticsReport:
+ """Build calls associated with one diagnostic fact."""
+
+ if not fact_type:
+ raise ValueError("fact_type is required")
+ if not fact_name:
+ raise ValueError("fact_name is required")
+ _validate_call_sort(sort)
+ _validate_direction(direction)
+ privacy_mode = validate_privacy_mode(privacy_mode)
+ normalized_limit = _normalize_limit(limit)
+ normalized_offset = max(offset, 0)
+ rows = query_diagnostic_fact_calls(
+ db_path=db_path,
+ fact_type=fact_type,
+ fact_name=fact_name,
+ limit=normalized_limit,
+ offset=normalized_offset,
+ since=since,
+ until=until,
+ model=model,
+ effort=effort,
+ thread=thread,
+ min_tokens=min_tokens,
+ include_archived=include_archived,
+ sort=sort,
+ direction=direction,
+ )
+ rows = apply_project_privacy_to_rows(rows, privacy_mode=privacy_mode)
+ total_matched = query_diagnostic_fact_call_count(
+ db_path=db_path,
+ fact_type=fact_type,
+ fact_name=fact_name,
+ since=since,
+ until=until,
+ model=model,
+ effort=effort,
+ thread=thread,
+ min_tokens=min_tokens,
+ include_archived=include_archived,
+ )
+ truncated = normalized_limit is not None and normalized_offset + len(rows) < total_matched
+ return DiagnosticsReport(
+ _diagnostics_payload(
+ view="fact-calls",
+ rows=rows,
+ total_matched_rows=total_matched,
+ filters=_filters(
+ since=since,
+ until=until,
+ model=model,
+ effort=effort,
+ thread=thread,
+ min_tokens=min_tokens,
+ fact_type=fact_type,
+ fact_name=fact_name,
+ fact_category=None,
+ fact_group=None,
+ include_archived=include_archived,
+ sort=sort,
+ direction=direction,
+ limit=normalized_limit,
+ offset=normalized_offset,
+ privacy_mode=privacy_mode,
+ ),
+ truncated=truncated,
+ )
+ )
+
+
+def _diagnostics_payload(
+ *,
+ view: str,
+ rows: list[dict[str, Any]],
+ total_matched_rows: int,
+ filters: dict[str, Any],
+ truncated: bool | None = None,
+) -> dict[str, Any]:
+ if truncated is None:
+ limit = filters.get("limit")
+ offset = int(filters.get("offset") or 0)
+ truncated = isinstance(limit, int) and offset + len(rows) < total_matched_rows
+ return {
+ "schema": DIAGNOSTICS_SCHEMA,
+ "view": view,
+ "filters": filters,
+ "row_count": len(rows),
+ "total_matched_rows": total_matched_rows,
+ "truncated": truncated,
+ "raw_context_included": False,
+ "rows": rows,
+ "notes": list(DIAGNOSTICS_NOTES),
+ }
+
+
+def _filters(
+ *,
+ since: str | None,
+ until: str | None,
+ model: str | None,
+ effort: str | None,
+ thread: str | None,
+ min_tokens: int | None,
+ fact_type: str | None,
+ fact_name: str | None,
+ fact_category: str | None,
+ fact_group: str | None,
+ include_archived: bool,
+ sort: str,
+ direction: str,
+ limit: int | None,
+ offset: int,
+ privacy_mode: str = "normal",
+) -> dict[str, Any]:
+ return {
+ "since": since,
+ "until": until,
+ "model": model,
+ "effort": effort,
+ "thread": thread,
+ "min_tokens": min_tokens,
+ "fact_type": fact_type,
+ "fact_name": fact_name,
+ "fact_category": fact_category,
+ "fact_group": fact_group,
+ "include_archived": include_archived,
+ "sort": sort,
+ "direction": direction,
+ "limit": limit,
+ "offset": offset,
+ "privacy_mode": privacy_mode,
+ }
+
+
+def _action_hint(*, fact_type: str, fact_name: str) -> str:
+ if fact_type == "compaction" or fact_name == "post_compaction":
+ return "Review associated calls to see whether compaction reduced context or a fresh handoff would be cleaner."
+ if fact_type == "command_family":
+ if fact_name == "unknown_command":
+ return "Open associated calls when shell activity is high; command text is intentionally not stored."
+ return "Review repeated validation or command loops when associated uncached input is high."
+ if fact_type in {"mcp_server", "mcp_tool"}:
+ return "Inspect repeated MCP activity and narrow tool result scope when associated costs are high."
+ if fact_type == "skill":
+ return "Skill use is detected only from structured events; inspect associated calls for repeated workflow cost."
+ if fact_name in {"search_read_command", "search_read_loop"}:
+ return "Inspect repeated search/read loops or narrow the task before loading more source context."
+ if fact_name == "retry_or_abort_loop":
+ return "Inspect associated calls for interrupted work, rollback, or retry loops."
+ if fact_name == "function_call_output":
+ return "Inspect repeated large tool results when associated uncached input is high."
+ if fact_type == "function":
+ return "Check whether repeated function calls are carrying more context forward than needed."
+ if fact_type == "tool":
+ return "Check whether repeated tool activity is carrying forward more context than needed."
+ if fact_name == "patch_applied":
+ return "Likely productive work; verify tests or commit state captured the change."
+ if fact_name == "task_complete":
+ return "Consider archiving or writing a handoff before reviving the thread later."
+ if fact_name == "turn_aborted":
+ return "Inspect associated calls for interrupted work or retry loops."
+ return "Open associated high-cost calls when the pattern needs more context."
+
+
+def _render_facts(rows: list[dict[str, Any]]) -> str:
+ lines = [_header("Fact", "Occ", "Calls", "Uncached", "Total")]
+ for row in rows:
+ fact = f"{row.get('fact_type')}/{row.get('fact_name')}"
+ lines.append(
+ _line(
+ fact,
+ _int(row.get("occurrences")),
+ _int(row.get("associated_calls")),
+ _int(row.get("associated_uncached_input_tokens")),
+ _int(row.get("associated_total_tokens")),
+ )
+ )
+ return "\n".join(lines)
+
+
+def _render_summary(rows: list[dict[str, Any]]) -> str:
+ lines = [_header("Type", "Occ", "Calls", "Uncached", "Top fact")]
+ for row in rows:
+ lines.append(
+ _line(
+ str(row.get("fact_type") or ""),
+ _int(row.get("occurrences")),
+ _int(row.get("associated_calls")),
+ _int(row.get("associated_uncached_input_tokens")),
+ str(row.get("top_fact_name") or ""),
+ )
+ )
+ return "\n".join(lines)
+
+
+def _render_fact_calls(rows: list[dict[str, Any]]) -> str:
+ lines = [_header("Record", "Time", "Model", "Tokens", "Uncached")]
+ for row in rows:
+ lines.append(
+ _line(
+ str(row.get("record_id") or "")[:12],
+ str(row.get("event_timestamp") or ""),
+ str(row.get("model") or ""),
+ _int(row.get("total_tokens")),
+ _int(row.get("uncached_input_tokens")),
+ )
+ )
+ return "\n".join(lines)
+
+
+def _header(*columns: str) -> str:
+ return _line(*columns)
+
+
+def _line(*columns: str) -> str:
+ return " ".join(str(column) for column in columns)
+
+
+def _int(value: object) -> str:
+ if isinstance(value, int) and not isinstance(value, bool):
+ return f"{value:,}"
+ if isinstance(value, float):
+ return f"{value:,.0f}"
+ return "0"
+
+
+def _normalize_limit(limit: int) -> int | None:
+ return None if limit <= 0 else int(limit)
+
+
+def _limit_rows(rows: list[dict[str, Any]], limit: int | None) -> list[dict[str, Any]]:
+ return rows if limit is None else rows[:limit]
+
+
+def _filter_fact_group(
+ rows: list[dict[str, Any]],
+ fact_group: str | None,
+) -> list[dict[str, Any]]:
+ if fact_group is None:
+ return rows
+ if fact_group == "tools":
+ return [
+ row
+ for row in rows
+ if str(row.get("fact_type") or "") in DIAGNOSTIC_TOOL_FACT_TYPES
+ ]
+ raise ValueError(f"unknown diagnostic fact group: {fact_group}")
+
+
+def _validate_fact_group(fact_group: str | None) -> None:
+ if fact_group not in {None, "tools"}:
+ raise ValueError("fact_group must be one of: tools")
+
+
+def _validate_fact_sort(sort: str) -> None:
+ if sort not in DIAGNOSTIC_FACT_SORT_CHOICES:
+ allowed = ", ".join(DIAGNOSTIC_FACT_SORT_CHOICES)
+ raise ValueError(f"sort must be one of: {allowed}")
+
+
+def _validate_call_sort(sort: str) -> None:
+ if sort not in DIAGNOSTIC_CALL_SORT_CHOICES:
+ allowed = ", ".join(DIAGNOSTIC_CALL_SORT_CHOICES)
+ raise ValueError(f"sort must be one of: {allowed}")
+
+
+def _validate_direction(direction: str) -> None:
+ if direction not in DIAGNOSTIC_DIRECTION_CHOICES:
+ allowed = ", ".join(DIAGNOSTIC_DIRECTION_CHOICES)
+ raise ValueError(f"direction must be one of: {allowed}")
diff --git a/src/codex_usage_tracker/json_contracts.py b/src/codex_usage_tracker/json_contracts.py
index 08973b3..bddcc11 100644
--- a/src/codex_usage_tracker/json_contracts.py
+++ b/src/codex_usage_tracker/json_contracts.py
@@ -126,6 +126,38 @@
}
},
},
+ "codex-usage-tracker-diagnostics-v1": {
+ "required": {
+ "view": str,
+ "filters": dict,
+ "row_count": int,
+ "total_matched_rows": int,
+ "truncated": bool,
+ "raw_context_included": bool,
+ "rows": list,
+ "notes": list,
+ },
+ "nested": {
+ "filters": {
+ "since": (str, NoneType),
+ "until": (str, NoneType),
+ "model": (str, NoneType),
+ "effort": (str, NoneType),
+ "thread": (str, NoneType),
+ "min_tokens": (int, NoneType),
+ "fact_type": (str, NoneType),
+ "fact_name": (str, NoneType),
+ "fact_category": (str, NoneType),
+ "fact_group": (str, NoneType),
+ "include_archived": bool,
+ "sort": str,
+ "direction": str,
+ "limit": (int, NoneType),
+ "offset": int,
+ "privacy_mode": str,
+ }
+ },
+ },
"codex-usage-tracker-session-v1": {
"required": {
"requested_session_id": (str, NoneType),
diff --git a/src/codex_usage_tracker/models.py b/src/codex_usage_tracker/models.py
index 364f8b7..0648427 100644
--- a/src/codex_usage_tracker/models.py
+++ b/src/codex_usage_tracker/models.py
@@ -98,6 +98,29 @@ def to_row(self) -> dict[str, object]:
return row
+@dataclass(frozen=True)
+class DiagnosticFact:
+ """One aggregate diagnostic fact associated with a usage-event record."""
+
+ record_id: str | None
+ fact_type: str
+ fact_name: str
+ fact_category: str | None
+ event_count: int = 1
+ confidence: str = "medium"
+ first_event_timestamp: str | None = None
+ last_event_timestamp: str | None = None
+ first_source_line: int | None = None
+ last_source_line: int | None = None
+ evidence_scope: str = "between_token_counts"
+ raw_content_included: int = 0
+
+ def to_row(self) -> dict[str, object]:
+ if not self.record_id:
+ raise ValueError("diagnostic facts must have a record_id before persistence")
+ return asdict(self)
+
+
@dataclass(frozen=True)
class RefreshResult:
scanned_files: int
diff --git a/src/codex_usage_tracker/parser.py b/src/codex_usage_tracker/parser.py
index 7bde6a0..0a188c1 100644
--- a/src/codex_usage_tracker/parser.py
+++ b/src/codex_usage_tracker/parser.py
@@ -15,14 +15,21 @@
classify_call_origin,
event_flags_from_envelope,
)
-from codex_usage_tracker.models import SessionInfo, UsageEvent
+from codex_usage_tracker.diagnostic_facts import (
+ add_diagnostic_fact,
+ assign_record_id_to_diagnostic_facts,
+ diagnostic_fact_from_json,
+ diagnostic_fact_to_json,
+ diagnostic_facts_from_envelope,
+)
+from codex_usage_tracker.models import DiagnosticFact, SessionInfo, UsageEvent
from codex_usage_tracker.paths import DEFAULT_CODEX_HOME
SESSION_ID_RE = re.compile(
r"rollout-[^-]+-[0-9T:-]+-([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\.jsonl$"
)
-PARSER_ADAPTER_VERSION = "codex-jsonl-v1"
+PARSER_ADAPTER_VERSION = "codex-jsonl-v2"
PARSER_DIAGNOSTIC_KEYS = (
"invalid_json",
"missing_payload",
@@ -44,8 +51,14 @@
"context_compacted",
"image_generation_end",
"item_completed",
+ "mcp_tool_call_begin",
"mcp_tool_call_end",
"patch_apply_end",
+ "skill_completed",
+ "skill_invoked",
+ "skill_selected",
+ "skill_started",
+ "skill_used",
"task_complete",
"task_started",
"thread_goal_updated",
@@ -106,6 +119,7 @@ class ParserState:
current_turn: dict[str, Any] = field(default_factory=dict)
last_cumulative_total: int = -1
call_origin_segment: tuple[CallOriginFlags, ...] = ()
+ diagnostic_facts_segment: tuple[DiagnosticFact, ...] = ()
latest_record_id: str | None = None
latest_event_timestamp: str | None = None
@@ -115,6 +129,7 @@ class ParsedUsageFile:
"""Parsed aggregate usage events plus the final parser cursor."""
events: list[UsageEvent]
+ diagnostic_facts: list[DiagnosticFact]
state: ParserState
@@ -213,12 +228,22 @@ def parser_state_from_json(raw: str | None) -> ParserState | None:
segment = payload.get("call_origin_segment")
if not isinstance(segment, list):
segment = []
+ diagnostic_segment = payload.get("diagnostic_facts_segment")
+ if not isinstance(diagnostic_segment, list):
+ diagnostic_segment = []
return ParserState(
session_id=_optional_str(payload.get("session_id")),
session_meta=_string_dict(payload.get("session_meta")),
current_turn=_string_dict(payload.get("current_turn")),
last_cumulative_total=_json_int(payload.get("last_cumulative_total"), -1),
call_origin_segment=tuple(_call_origin_flags_from_json(item) for item in segment),
+ diagnostic_facts_segment=tuple(
+ fact
+ for fact in (
+ diagnostic_fact_from_json(item) for item in diagnostic_segment
+ )
+ if fact is not None
+ ),
latest_record_id=_optional_str(payload.get("latest_record_id")),
latest_event_timestamp=_optional_str(payload.get("latest_event_timestamp")),
)
@@ -243,6 +268,9 @@ def parser_state_to_json(state: ParserState) -> str:
}
for flags in state.call_origin_segment
],
+ "diagnostic_facts_segment": [
+ diagnostic_fact_to_json(fact) for fact in state.diagnostic_facts_segment
+ ],
"latest_record_id": state.latest_record_id,
"latest_event_timestamp": state.latest_event_timestamp,
},
@@ -336,7 +364,9 @@ def _parse_codex_jsonl_v1(
)
last_cumulative_total = previous_state.last_cumulative_total
events: list[UsageEvent] = []
+ diagnostic_facts: list[DiagnosticFact] = []
call_origin_segment: list[CallOriginFlags] = list(previous_state.call_origin_segment)
+ diagnostic_facts_segment = previous_state.diagnostic_facts_segment
latest_record_id = previous_state.latest_record_id
latest_event_timestamp = previous_state.latest_event_timestamp
@@ -383,6 +413,14 @@ def _parse_codex_jsonl_v1(
flags = event_flags_from_envelope(envelope)
if flags.has_signal:
call_origin_segment.append(flags)
+ for fact in diagnostic_facts_from_envelope(
+ envelope,
+ line_number=line_number,
+ ):
+ diagnostic_facts_segment = add_diagnostic_fact(
+ diagnostic_facts_segment,
+ fact,
+ )
if entry_type == "event_msg" and payload_type not in KNOWN_NON_TOKEN_EVENT_MSG_TYPES:
_increment_stat(stats, "unknown_event_shape")
continue
@@ -448,15 +486,24 @@ def _parse_codex_jsonl_v1(
latest_record_id = event.record_id
latest_event_timestamp = event.event_timestamp
events.append(event)
+ diagnostic_facts.extend(
+ assign_record_id_to_diagnostic_facts(
+ diagnostic_facts_segment,
+ record_id=event.record_id,
+ )
+ )
+ diagnostic_facts_segment = ()
return ParsedUsageFile(
events=events,
+ diagnostic_facts=diagnostic_facts,
state=ParserState(
session_id=session_id,
session_meta=session_meta,
current_turn=current_turn,
last_cumulative_total=last_cumulative_total,
call_origin_segment=tuple(call_origin_segment),
+ diagnostic_facts_segment=diagnostic_facts_segment,
latest_record_id=latest_record_id,
latest_event_timestamp=latest_event_timestamp,
),
diff --git a/src/codex_usage_tracker/plugin_data/dashboard/dashboard.js b/src/codex_usage_tracker/plugin_data/dashboard/dashboard.js
index ba03780..1d5eeed 100644
--- a/src/codex_usage_tracker/plugin_data/dashboard/dashboard.js
+++ b/src/codex_usage_tracker/plugin_data/dashboard/dashboard.js
@@ -12,6 +12,7 @@
const dashboardActionsFactory = window.CodexUsageDashboardActions;
const dashboardEventsFactory = window.CodexUsageDashboardEvents;
const dashboardLiveFactory = window.CodexUsageDashboardLive;
+ const dashboardDiagnosticsFactory = window.CodexUsageDashboardDiagnostics;
const {
number,
money,
@@ -134,6 +135,7 @@
const insightsViewEl = document.getElementById('insightsView');
const callsViewEl = document.getElementById('callsView');
const threadsViewEl = document.getElementById('threadsView');
+ const diagnosticsViewEl = document.getElementById('diagnosticsView');
const insightsPanelEl = document.getElementById('insightsPanel');
const insightCardsEl = document.getElementById('insightCards');
const presetListEl = document.getElementById('presetList');
@@ -155,6 +157,8 @@
const rowLoadProgressLabelEl = document.getElementById('rowLoadProgressLabel');
const rowLoadProgressCountEl = document.getElementById('rowLoadProgressCount');
const rowLoadProgressBarEl = document.getElementById('rowLoadProgressBar');
+ const diagnosticsPanelEl = document.getElementById('diagnosticsPanel');
+ const usageTableEl = document.getElementById('usageTable');
const toTopEl = document.getElementById('toTop');
let rowByRecordId = new Map();
let threadAttachmentByRecordId = new Map();
@@ -184,7 +188,7 @@
const allowedDatePresets = new Set(Object.keys(datePresetLabels));
const defaultDashboardView = 'calls';
const defaultDashboardSort = 'time';
- let activeView = ['calls', 'threads', 'insights', 'call'].includes(initialState.view) ? initialState.view : defaultDashboardView;
+ let activeView = ['calls', 'threads', 'insights', 'diagnostics', 'call'].includes(initialState.view) ? initialState.view : defaultDashboardView;
document.body.dataset.activeView = activeView;
let sortKey = optionValueExists(sortEl, initialState.sort) ? initialState.sort : sortEl.value || defaultDashboardSort;
let sortDirection = ['asc', 'desc'].includes(initialState.direction) ? initialState.direction : defaultSortDirection(sortKey);
@@ -684,6 +688,19 @@
function dateCaptionPrefix(range = currentDateRange()) {
return range.active || range.invalid ? `${range.label}. ` : '';
}
+ function diagnosticApiFilters(dateRange = currentDateRange()) {
+ const range = dateRange || currentDateRange();
+ const filters = {
+ include_archived: includeArchived ? '1' : '0',
+ };
+ if (modelEl.value) filters.model = modelEl.value;
+ if (effortEl.value) filters.effort = effortEl.value;
+ if (range.active && !range.invalid) {
+ if (range.start) filters.since = localDateKey(range.start);
+ if (range.endExclusive) filters.until = localDateKey(addDays(range.endExclusive, -1));
+ }
+ return filters;
+ }
function filtered(dateRange = currentDateRange()) {
const term = searchEl.value.trim().toLowerCase();
const model = modelEl.value;
@@ -854,6 +871,29 @@
syncUrlState,
tableUrlForRow,
} = dashboardActions;
+ const dashboardDiagnostics = dashboardDiagnosticsFactory.create({
+ apiToken: () => apiToken,
+ diagnosticsPanelEl,
+ escapeHtml,
+ formatTimestamp,
+ getDiagnosticFilters: diagnosticApiFilters,
+ isActive: () => activeView === 'diagnostics',
+ liveRefreshSupported,
+ number,
+ openInvestigatorUrl,
+ pagerEl,
+ pct,
+ renderDashboard: () => render(),
+ renderTimeCell,
+ rowInvestigatorLink,
+ rowLoadProgressEl,
+ rowsEl,
+ tableCaptionEl,
+ tableTitleEl,
+ t,
+ tooltipAttributes,
+ usageTableEl,
+ });
const dashboardAnalysis = dashboardAnalysisFactory.create({
cachedInputTokens,
callInitiatorText,
@@ -1094,11 +1134,14 @@
function render() {
rowsEl.textContent = '';
document.body.dataset.activeView = activeView;
+ dashboardDiagnostics.setActive(activeView === 'diagnostics');
+ exportVisibleEl.hidden = activeView === 'diagnostics';
updateSortControls();
if (activeView === 'call') {
insightsViewEl.setAttribute('aria-pressed', 'false');
callsViewEl.setAttribute('aria-pressed', 'false');
threadsViewEl.setAttribute('aria-pressed', 'false');
+ diagnosticsViewEl.setAttribute('aria-pressed', 'false');
callInvestigator.renderCallInvestigator(Array.from(rowByRecordId.values()));
fitModelPills();
syncUrlState();
@@ -1124,9 +1167,12 @@
insightsViewEl.setAttribute('aria-pressed', activeView === 'insights' ? 'true' : 'false');
callsViewEl.setAttribute('aria-pressed', activeView === 'calls' ? 'true' : 'false');
threadsViewEl.setAttribute('aria-pressed', activeView === 'threads' ? 'true' : 'false');
+ diagnosticsViewEl.setAttribute('aria-pressed', activeView === 'diagnostics' ? 'true' : 'false');
renderInsightPanel(rows);
if (activeView === 'call') {
callInvestigator.renderCallInvestigator(rows);
+ } else if (activeView === 'diagnostics') {
+ dashboardDiagnostics.renderDiagnostics(dateRange);
} else if (activeView === 'threads') {
renderThreads(rows);
} else if (activeView === 'insights') {
@@ -1274,6 +1320,7 @@
}
rebuildDashboardIndexes();
rebuildFilterOptions();
+ dashboardDiagnostics.invalidate();
updatePricingSourceLine();
updateAllowanceSourceLine();
updatePrivacyModeLine();
@@ -1398,6 +1445,7 @@
dateStartEl,
defaultSortDirection,
detailToggleEl,
+ diagnosticsViewEl,
effortEl,
exportCurrentRows,
exportVisibleEl,
@@ -1474,6 +1522,13 @@
} else if (activeView === 'call') {
autoRefreshEl.checked = false;
updateLiveStatus('badge.live', `${t('dashboard.view.call')}. ${loadedRowsDescription()}. ${historyRowsDescription()}`);
+ } else if (activeView === 'diagnostics') {
+ updateLiveStatus('badge.live', `${t('dashboard.view.diagnostics')}. ${loadedRowsDescription()}. ${historyRowsDescription()}`);
+ refreshDashboardData(false, {
+ refreshLogs: false,
+ resetRows: true,
+ allowDiagnosticsBootstrap: true,
+ });
} else {
updateLiveStatus('badge.live', `${tf('live.every', { seconds: liveRefreshIntervalMs / 1000 })}. ${loadedRowsDescription()}. ${historyRowsDescription()}`);
scheduleAutoRefresh();
diff --git a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics.js b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics.js
new file mode 100644
index 0000000..0522b62
--- /dev/null
+++ b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_diagnostics.js
@@ -0,0 +1,568 @@
+(() => {
+ function createDiagnosticsRuntime(deps) {
+ const {
+ apiToken,
+ diagnosticsPanelEl,
+ escapeHtml,
+ formatTimestamp,
+ getDiagnosticFilters,
+ isActive,
+ liveRefreshSupported,
+ number,
+ openInvestigatorUrl,
+ pagerEl,
+ pct,
+ renderDashboard,
+ renderTimeCell,
+ rowInvestigatorLink,
+ rowLoadProgressEl,
+ rowsEl,
+ tableCaptionEl,
+ tableTitleEl,
+ t,
+ tooltipAttributes = () => '',
+ usageTableEl,
+ } = deps;
+
+ let activeSignature = '';
+ let status = 'idle';
+ let errorMessage = '';
+ let requestGeneration = 0;
+ let payloads = emptyPayloads();
+ let selectedFactKey = '';
+ let pendingScrollAnchor = null;
+ const factCallPageSize = 25;
+ const factCallPayloads = new Map();
+ const factCallSorts = new Map();
+
+ function setActive(active) {
+ diagnosticsPanelEl.hidden = !active;
+ if (usageTableEl) usageTableEl.hidden = active;
+ if (active) {
+ pagerEl.hidden = true;
+ rowLoadProgressEl.hidden = true;
+ }
+ }
+
+ function invalidate() {
+ activeSignature = '';
+ status = 'idle';
+ errorMessage = '';
+ selectedFactKey = '';
+ payloads = emptyPayloads();
+ factCallPayloads.clear();
+ factCallSorts.clear();
+ }
+
+ function renderDiagnostics(dateRange) {
+ setActive(true);
+ rowsEl.textContent = '';
+ tableTitleEl.textContent = t('dashboard.view.diagnostics');
+ tableCaptionEl.textContent = 'Associated token totals by structured diagnostic facts. Totals are not additive when one call has multiple facts.';
+ if (dateRange && dateRange.invalid) {
+ diagnosticsPanelEl.innerHTML = renderState(t('date.invalid_range'));
+ return;
+ }
+ if (!liveRefreshSupported) {
+ diagnosticsPanelEl.innerHTML = renderState('Diagnostics require the live localhost dashboard API.');
+ return;
+ }
+ const filters = getDiagnosticFilters(dateRange);
+ const signature = JSON.stringify(filters);
+ if (signature !== activeSignature) {
+ activeSignature = signature;
+ status = 'loading';
+ errorMessage = '';
+ selectedFactKey = '';
+ payloads = emptyPayloads();
+ factCallPayloads.clear();
+ void fetchDiagnostics(signature, filters);
+ }
+ diagnosticsPanelEl.innerHTML = renderPanel();
+ restoreScrollAnchor();
+ }
+
+ async function fetchDiagnostics(signature, filters) {
+ const generation = requestGeneration + 1;
+ requestGeneration = generation;
+ try {
+ const [facts, tools, compactions] = await Promise.all([
+ fetchPayload('/api/diagnostics/facts', { ...filters, limit: '50', sort: 'uncached', direction: 'desc' }),
+ fetchPayload('/api/diagnostics/tools', { ...filters, limit: '25', sort: 'uncached', direction: 'desc' }),
+ fetchPayload('/api/diagnostics/compactions', { ...filters, limit: '25', sort: 'uncached', direction: 'desc' }),
+ ]);
+ if (generation !== requestGeneration || signature !== activeSignature) return;
+ payloads = { facts, tools, compactions };
+ status = 'ready';
+ } catch (error) {
+ if (generation !== requestGeneration || signature !== activeSignature) return;
+ errorMessage = error.message || String(error);
+ status = 'error';
+ }
+ renderIfActive();
+ }
+
+ async function fetchFactCalls(factType, factName, options = {}) {
+ const key = factKey(factType, factName);
+ const append = Boolean(options.append);
+ const force = Boolean(options.force);
+ const signature = activeSignature;
+ if (!append && !force && selectedFactKey === key) {
+ selectedFactKey = '';
+ renderIfActive();
+ return;
+ }
+ selectedFactKey = key;
+ const cached = factCallPayloads.get(key);
+ const sortState = factCallSortState(key);
+ if (!append && !force && cached && cached.status === 'ready' && cached.sort === sortState.sort && cached.direction === sortState.direction) {
+ renderIfActive();
+ return;
+ }
+ if (append && (!cached || cached.status === 'appending' || !factCallsHasMore(cached.payload))) {
+ return;
+ }
+ const previousPayload = cached && cached.payload ? cached.payload : null;
+ const offset = append ? factCallRows(previousPayload).length : 0;
+ factCallPayloads.set(key, {
+ status: append ? 'appending' : 'loading',
+ payload: previousPayload,
+ error: '',
+ sort: sortState.sort,
+ direction: sortState.direction,
+ });
+ renderIfActive();
+ try {
+ const filters = getDiagnosticFilters();
+ const payload = await fetchPayload('/api/diagnostics/fact-calls', {
+ ...filters,
+ fact_type: factType,
+ fact_name: factName,
+ limit: String(factCallPageSize),
+ offset: String(offset),
+ sort: sortState.sort,
+ direction: sortState.direction,
+ });
+ if (signature !== activeSignature) return;
+ factCallPayloads.set(key, {
+ status: 'ready',
+ payload: append ? mergeFactCallPayload(previousPayload, payload) : payload,
+ error: '',
+ sort: sortState.sort,
+ direction: sortState.direction,
+ });
+ } catch (error) {
+ if (signature !== activeSignature) return;
+ factCallPayloads.set(key, {
+ status: append && previousPayload ? 'ready' : 'error',
+ payload: append ? previousPayload : null,
+ error: error.message || String(error),
+ sort: sortState.sort,
+ direction: sortState.direction,
+ });
+ }
+ renderIfActive();
+ }
+
+ function sortFactCalls(sortKey) {
+ if (!selectedFactKey || !diagnosticCallSortLabels()[sortKey]) return;
+ const current = factCallSortState(selectedFactKey);
+ const next = current.sort === sortKey
+ ? { sort: sortKey, direction: current.direction === 'asc' ? 'desc' : 'asc' }
+ : { sort: sortKey, direction: defaultFactCallSortDirection(sortKey) };
+ factCallSorts.set(selectedFactKey, next);
+ const [factType, factName] = splitFactKey(selectedFactKey);
+ void fetchFactCalls(factType, factName, { force: true });
+ }
+
+ async function fetchPayload(path, params) {
+ const urlParams = new URLSearchParams();
+ Object.entries(params || {}).forEach(([key, value]) => {
+ if (value === null || value === undefined || value === '') return;
+ urlParams.set(key, String(value));
+ });
+ urlParams.set('_', String(Date.now()));
+ const response = await fetch(`${path}?${urlParams.toString()}`, {
+ headers: {
+ 'Accept': 'application/json',
+ 'X-Codex-Usage-Token': apiToken(),
+ },
+ cache: 'no-store',
+ });
+ if (!response.ok) throw new Error(`HTTP ${response.status}`);
+ const payload = await response.json();
+ if (payload.error) throw new Error(payload.error);
+ return payload;
+ }
+
+ function renderPanel() {
+ if (status === 'error') return renderState(`Diagnostics unavailable: ${errorMessage}`);
+ const loading = status === 'loading';
+ return `
+
+
+ ${readoutMetric('Fact rows', payloads.facts)}
+ ${readoutMetric('Tool/function rows', payloads.tools)}
+ ${readoutMetric('Compaction rows', payloads.compactions)}
+ Structured labels only. Raw context remains on-demand in the call investigator.
+
+ ${renderFactSection('Top Diagnostic Facts', 'Ranked by associated uncached input tokens.', payloads.facts, loading)}
+ ${renderFactSection('Tool and Function Activity', 'Tool/function facts associated with model calls.', payloads.tools, loading)}
+ ${renderFactSection('Compaction Activity', 'Compaction facts and post-compaction associated costs.', payloads.compactions, loading)}
+
+ `;
+ }
+
+ function renderFactSection(title, caption, payload, loading) {
+ const rows = Array.isArray(payload?.rows) ? payload.rows : [];
+ return `
+
+
+ ${renderFactTable(rows, loading)}
+
+ `;
+ }
+
+ function renderFactTable(rows, loading) {
+ if (loading && !rows.length) return renderState('Loading diagnostics...');
+ if (!rows.length) return renderState('No diagnostic facts matched the current filters.');
+ const body = rows.map(row => {
+ const key = factKey(row.fact_type, row.fact_name);
+ const selected = key === selectedFactKey;
+ const largest = row.largest_record_id
+ ? rowInvestigatorLink({ record_id: row.largest_record_id }, tokenText(row.largest_call_tokens), true)
+ : tokenText(row.largest_call_tokens);
+ return `
+
+
+
+ ${escapeHtml(row.fact_type || 'unknown')}/${escapeHtml(row.fact_name || 'unknown')}
+ ${escapeHtml(row.fact_category || 'uncategorized')}
+
+
+ ${number.format(Number(row.occurrences || 0))}
+ ${number.format(Number(row.associated_calls || 0))}
+ ${tokenText(row.associated_total_tokens)}
+ ${tokenText(row.associated_cached_input_tokens)}
+ ${tokenText(row.associated_uncached_input_tokens)}
+ ${tokenText(row.associated_output_tokens)}
+ ${pct(row.avg_cache_ratio)}
+ ${largest}
+ ${escapeHtml(formatTimestamp(row.latest_event_timestamp || ''))}
+ ${selected ? '-' : '+'}
+
+ ${selected ? `
+
+ ${renderFactCallsPanel()}
+
+ ` : ''}
+ `;
+ }).join('');
+ return `
+
+
+
+ ${columnHeader('Fact', 'Diagnostic fact type and name derived from structured local log metadata. Raw prompts, assistant text, and tool output are not persisted.')}
+ ${columnHeader('Occ', 'Occurrences: count of matching diagnostic fact events. One model call can contribute more than one occurrence.', 'num')}
+ ${columnHeader('Calls', 'Distinct model calls associated with this diagnostic fact.', 'num')}
+ ${columnHeader('Assoc total', 'Associated total tokens for those calls. Totals are not additive across facts because one call can have multiple facts.', 'num')}
+ ${columnHeader('Cached', 'Associated cached input tokens for those calls.', 'num')}
+ ${columnHeader('Uncached', 'Associated uncached input tokens for those calls.', 'num')}
+ ${columnHeader('Output', 'Associated output tokens for those calls.', 'num')}
+ ${columnHeader('Cache %', 'Average cache ratio across associated calls.', 'num')}
+ ${columnHeader('Largest', 'Largest associated call by total tokens.', 'num')}
+ ${columnHeader('Latest', 'Latest associated call timestamp.')}
+ ${columnHeader('Action', 'Expand or collapse the associated calls.')}
+
+ ${body}
+
+
+ `;
+ }
+
+ function renderFactCallsPanel() {
+ const entry = factCallPayloads.get(selectedFactKey);
+ const label = selectedFactKey.replace('\u0000', '/');
+ if (!entry || (entry.status === 'loading' && !entry.payload)) {
+ return `${renderState(`Loading calls for ${label}...`)}
`;
+ }
+ if (entry.status === 'error' && !entry.payload) {
+ return `${renderState(`Could not load calls for ${label}: ${entry.error}`)}
`;
+ }
+ const rows = factCallRows(entry.payload);
+ if (!rows.length) {
+ return `${renderState(`No calls found for ${label}.`)}
`;
+ }
+ const total = Number(entry.payload?.total_matched_rows || rows.length);
+ const loadingMore = entry.status === 'appending';
+ const body = rows.map(row => `
+
+ ${rowInvestigatorLink(row, renderTimeCell(row.event_timestamp), true)}
+ ${rowInvestigatorLink(row, escapeHtml(row.thread_name || row.parent_thread_name || row.session_id || 'Unknown'))}
+ ${rowInvestigatorLink(row, `${escapeHtml(row.model || 'Unknown')} `)}
+ ${rowInvestigatorLink(row, escapeHtml(row.effort || 'unknown'))}
+ ${rowInvestigatorLink(row, tokenText(row.total_tokens))}
+ ${rowInvestigatorLink(row, tokenText(row.cached_input_tokens))}
+ ${rowInvestigatorLink(row, tokenText(row.uncached_input_tokens))}
+ ${rowInvestigatorLink(row, tokenText(row.output_tokens))}
+ ${rowInvestigatorLink(row, tokenText(row.reasoning_output_tokens))}
+ ${rowInvestigatorLink(row, pct(row.cache_ratio))}
+
+ `).join('');
+ return `
+
+
+
+
+
+ ${diagnosticCallHeader('time', 'Time', false, 'Call timestamp.')}
+ ${diagnosticCallHeader('thread', 'Thread', false, 'Resolved thread, parent thread, or session label.')}
+ ${diagnosticCallHeader('model', 'Model', false, 'Model label for this associated call.')}
+ ${diagnosticCallHeader('effort', 'Effort', false, 'Reasoning effort label for this associated call.')}
+ ${diagnosticCallHeader('tokens', 'Tokens', true, 'Total tokens for this associated model call.')}
+ ${diagnosticCallHeader('cached', 'Cached', true, 'Cached input tokens for this associated model call.')}
+ ${diagnosticCallHeader('uncached', 'Uncached', true, 'Uncached input tokens for this associated model call.')}
+ ${diagnosticCallHeader('output', 'Output', true, 'Output tokens for this associated model call.')}
+ ${diagnosticCallHeader('reasoning', 'Reasoning', true, 'Reasoning output tokens for this associated model call.')}
+ ${diagnosticCallHeader('cache', 'Cache %', true, 'Cache ratio for this associated model call.')}
+
+ ${body}
+
+ ${renderFactCallPager(entry, rows.length, total, loadingMore)}
+
+ ${entry.error ? `
${escapeHtml(`Could not load more calls: ${entry.error}`)}
` : ''}
+
+ `;
+ }
+
+ function renderFactCallPager(entry, loaded, total, loadingMore) {
+ const canLoadMore = loadingMore || factCallsHasMore(entry.payload);
+ const statusText = `Showing ${number.format(loaded)} of ${number.format(total)} calls`;
+ if (!canLoadMore) {
+ return `${escapeHtml(statusText)}
`;
+ }
+ return `
+
+ ${escapeHtml(statusText)}
+
+
+ `;
+ }
+
+ function readoutMetric(label, payload) {
+ const count = payload ? Number(payload.total_matched_rows || payload.row_count || 0) : 0;
+ return `${number.format(count)} ${escapeHtml(label)} `;
+ }
+
+ function renderState(message) {
+ return `${escapeHtml(message)}
`;
+ }
+
+ function tokenText(value) {
+ return number.format(Math.round(Number(value || 0)));
+ }
+
+ function columnHeader(label, tooltip, className = '') {
+ const classAttr = className ? ` class="${escapeHtml(className)}"` : '';
+ const tooltipAttr = tooltipAttributes(tooltip);
+ return `${escapeHtml(label)} `;
+ }
+
+ function diagnosticCallHeader(sortKey, label, numeric = false, tooltip = '') {
+ const state = factCallSortState(selectedFactKey);
+ const active = state.sort === sortKey;
+ const indicator = active ? (state.direction === 'asc' ? 'â–²' : 'â–¼') : '';
+ const ariaSort = active ? (state.direction === 'asc' ? 'ascending' : 'descending') : 'none';
+ const tooltipAttr = tooltipAttributes(tooltip);
+ return `
+
+
+
+ `;
+ }
+
+ function diagnosticCallSortDescription() {
+ const state = factCallSortState(selectedFactKey);
+ const labels = diagnosticCallSortLabels();
+ const label = labels[state.sort] || state.sort;
+ return `${label} ${state.direction === 'asc' ? 'ascending' : 'descending'}`;
+ }
+
+ function diagnosticCallSortLabels() {
+ return {
+ cache: 'cache ratio',
+ cached: 'cached input tokens',
+ effort: 'effort',
+ input: 'input tokens',
+ model: 'model',
+ output: 'output tokens',
+ reasoning: 'reasoning output tokens',
+ thread: 'thread',
+ time: 'time',
+ tokens: 'total tokens',
+ uncached: 'uncached input tokens',
+ };
+ }
+
+ function factCallSortState(key) {
+ return factCallSorts.get(key) || { sort: 'tokens', direction: 'desc' };
+ }
+
+ function defaultFactCallSortDirection(sortKey) {
+ return ['effort', 'model', 'thread'].includes(sortKey) ? 'asc' : 'desc';
+ }
+
+ function factCallRows(payload) {
+ return Array.isArray(payload?.rows) ? payload.rows : [];
+ }
+
+ function factCallsHasMore(payload) {
+ if (!payload) return false;
+ const loaded = factCallRows(payload).length;
+ const total = Number(payload.total_matched_rows || loaded);
+ return Boolean(payload.truncated) && loaded < total;
+ }
+
+ function mergeFactCallPayload(previousPayload, nextPayload) {
+ const previousRows = factCallRows(previousPayload);
+ const mergedRows = previousRows.slice();
+ const seenRecordIds = new Set(previousRows.map(row => row.record_id).filter(Boolean));
+ factCallRows(nextPayload).forEach(row => {
+ const recordId = row.record_id || '';
+ if (recordId && seenRecordIds.has(recordId)) return;
+ if (recordId) seenRecordIds.add(recordId);
+ mergedRows.push(row);
+ });
+ const total = Number(nextPayload.total_matched_rows || previousPayload?.total_matched_rows || mergedRows.length);
+ const madeProgress = mergedRows.length > previousRows.length;
+ return {
+ ...nextPayload,
+ rows: mergedRows,
+ row_count: mergedRows.length,
+ total_matched_rows: total,
+ truncated: madeProgress && mergedRows.length < total,
+ filters: {
+ ...(nextPayload.filters || {}),
+ offset: 0,
+ },
+ };
+ }
+
+ function factKey(factType, factName) {
+ return `${factType || ''}\u0000${factName || ''}`;
+ }
+
+ function splitFactKey(key) {
+ const delimiter = key.indexOf('\u0000');
+ if (delimiter < 0) return [key, ''];
+ return [key.slice(0, delimiter), key.slice(delimiter + 1)];
+ }
+
+ function captureScrollAnchor(element, key, type = 'fact') {
+ if (!element || !element.getBoundingClientRect) return;
+ pendingScrollAnchor = {
+ key,
+ type,
+ top: element.getBoundingClientRect().top,
+ scrollY: window.scrollY,
+ };
+ }
+
+ function restoreScrollAnchor() {
+ if (!pendingScrollAnchor) return;
+ const anchor = pendingScrollAnchor;
+ pendingScrollAnchor = null;
+ window.requestAnimationFrame(() => {
+ const target = anchor.type === 'load-more'
+ ? diagnosticsPanelEl.querySelector('[data-diagnostics-call-load-more]')
+ : findFactButton(anchor.key);
+ const fallback = findFactButton(anchor.key);
+ const element = target || fallback;
+ if (!element || !element.getBoundingClientRect) {
+ window.scrollTo({ top: anchor.scrollY, behavior: 'auto' });
+ return;
+ }
+ const delta = element.getBoundingClientRect().top - anchor.top;
+ if (Math.abs(delta) > 1) {
+ window.scrollBy({ top: delta, behavior: 'auto' });
+ }
+ });
+ }
+
+ function findFactButton(key) {
+ const [factType, factName] = splitFactKey(key);
+ return Array.from(diagnosticsPanelEl.querySelectorAll('[data-diagnostics-fact-type][data-diagnostics-fact-name]')).find(button => {
+ return button.dataset.diagnosticsFactType === factType && button.dataset.diagnosticsFactName === factName;
+ }) || null;
+ }
+
+ function emptyPayloads() {
+ return { facts: null, tools: null, compactions: null };
+ }
+
+ function renderIfActive() {
+ if (isActive()) renderDashboard();
+ }
+
+ diagnosticsPanelEl.addEventListener('click', event => {
+ const target = event.target;
+ if (!target || !target.closest) return;
+ const link = target.closest('a.row-investigator-link');
+ if (link && diagnosticsPanelEl.contains(link) && liveRefreshSupported) {
+ event.preventDefault();
+ event.stopPropagation();
+ void openInvestigatorUrl(link.href);
+ return;
+ }
+ const loadMoreButton = target.closest('[data-diagnostics-call-load-more]');
+ if (loadMoreButton && diagnosticsPanelEl.contains(loadMoreButton)) {
+ event.preventDefault();
+ event.stopPropagation();
+ if (!selectedFactKey) return;
+ captureScrollAnchor(loadMoreButton, selectedFactKey, 'load-more');
+ const [factType, factName] = splitFactKey(selectedFactKey);
+ void fetchFactCalls(factType, factName, { append: true });
+ return;
+ }
+ const sortButton = target.closest('[data-diagnostics-call-sort-key]');
+ if (sortButton && diagnosticsPanelEl.contains(sortButton)) {
+ event.preventDefault();
+ event.stopPropagation();
+ if (!selectedFactKey) return;
+ captureScrollAnchor(sortButton, selectedFactKey, 'fact');
+ sortFactCalls(sortButton.dataset.diagnosticsCallSortKey || '');
+ return;
+ }
+ const button = target.closest('[data-diagnostics-fact-type][data-diagnostics-fact-name]');
+ if (!button || !diagnosticsPanelEl.contains(button)) return;
+ event.preventDefault();
+ event.stopPropagation();
+ const key = factKey(button.dataset.diagnosticsFactType || '', button.dataset.diagnosticsFactName || '');
+ captureScrollAnchor(button, key);
+ void fetchFactCalls(button.dataset.diagnosticsFactType || '', button.dataset.diagnosticsFactName || '');
+ });
+
+ return {
+ invalidate,
+ renderDiagnostics,
+ setActive,
+ };
+ }
+
+ window.CodexUsageDashboardDiagnostics = { create: createDiagnosticsRuntime };
+})();
diff --git a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_events.js b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_events.js
index 0687fd2..43be17d 100644
--- a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_events.js
+++ b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_events.js
@@ -15,6 +15,7 @@
dateStartEl,
defaultSortDirection,
detailToggleEl,
+ diagnosticsViewEl,
effortEl,
exportCurrentRows,
exportVisibleEl,
@@ -69,6 +70,7 @@
insightsViewEl.addEventListener('click', () => setView('insights'));
callsViewEl.addEventListener('click', () => setView('calls'));
threadsViewEl.addEventListener('click', () => setView('threads'));
+ diagnosticsViewEl.addEventListener('click', () => setView('diagnostics'));
clearPresetEl.addEventListener('click', clearPreset);
copyViewLinkEl.addEventListener('click', copyCurrentViewLink);
exportVisibleEl.addEventListener('click', exportCurrentRows);
@@ -115,6 +117,7 @@
if (event.key === '1') setView('insights');
if (event.key === '2') setView('calls');
if (event.key === '3') setView('threads');
+ if (event.key === '4') setView('diagnostics');
});
window.addEventListener('scroll', updateToTopVisibility, { passive: true });
toTopEl.addEventListener('click', () => window.scrollTo({ top: 0, behavior: 'smooth' }));
diff --git a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_i18n.js b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_i18n.js
index a2fc6e0..33ef1f9 100644
--- a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_i18n.js
+++ b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_i18n.js
@@ -6,6 +6,7 @@
'dashboard.view.insights': 'Insights',
'dashboard.view.calls': 'Calls',
'dashboard.view.threads': 'Threads',
+ 'dashboard.view.diagnostics': 'Diagnostics',
'dashboard.view.call': 'Call Investigator',
'dashboard.model_calls': 'Model Calls',
'dashboard.call_details': 'Call Details',
diff --git a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_layout.css b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_layout.css
index 81d4128..cf33edd 100644
--- a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_layout.css
+++ b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_layout.css
@@ -24,6 +24,12 @@
overflow-y: clip;
}
}
+ body[data-active-view="diagnostics"] .grid {
+ display: block;
+ }
+ body[data-active-view="diagnostics"] .detail-section {
+ display: none;
+ }
.detail-section {
position: sticky;
top: 14px;
diff --git a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_live.js b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_live.js
index 4b03206..a792b3a 100644
--- a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_live.js
+++ b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_live.js
@@ -71,7 +71,7 @@
if (!rowLoadProgressEl) return;
const target = rowHydrationTarget();
const loaded = Math.min(getData().length, target || getData().length);
- const shouldShow = activeView() !== 'call' && liveRefreshSupported && (rowHydrationInFlight || rowsNeedHydration() || rowHydrationError);
+ const shouldShow = !['call', 'diagnostics'].includes(activeView()) && liveRefreshSupported && (rowHydrationInFlight || rowsNeedHydration() || rowHydrationError);
rowLoadProgressEl.hidden = !shouldShow;
if (!shouldShow) return;
const totalText = number.format(target || getTotalAvailableRows() || loaded);
@@ -119,7 +119,7 @@
}
async function hydrateDashboardRows(options = null) {
- if (!liveRefreshSupported || activeView() === 'call') return;
+ if (!liveRefreshSupported || ['call', 'diagnostics'].includes(activeView())) return;
const hydrateOptions = options || {};
if (rowHydrationInFlight) {
if (hydrateOptions.reset) rowHydrationRestartRequested = true;
@@ -150,7 +150,7 @@
updateLiveStatus('status.checking', t('live.loading_rows'));
updateRowLoadProgress();
try {
- while (getData().length < target && generation === rowHydrationGeneration && activeView() !== 'call') {
+ while (getData().length < target && generation === rowHydrationGeneration && !['call', 'diagnostics'].includes(activeView())) {
const offset = getData().length;
const remaining = target - offset;
const chunkSize = Math.min(
@@ -174,7 +174,7 @@
if (!response.ok) throw new Error(`HTTP ${response.status}`);
const payload = await response.json();
if (payload.error) throw new Error(payload.error);
- if (generation !== rowHydrationGeneration || activeView() === 'call') break;
+ if (generation !== rowHydrationGeneration || ['call', 'diagnostics'].includes(activeView())) break;
const rows = payloadRows(payload);
if (!rows.length) break;
applyDashboardPayload(payload, { appendRows: true });
@@ -189,7 +189,7 @@
} finally {
rowHydrationInFlight = false;
updateRowLoadProgress();
- const shouldRestart = rowHydrationRestartRequested && activeView() !== 'call';
+ const shouldRestart = rowHydrationRestartRequested && !['call', 'diagnostics'].includes(activeView());
rowHydrationRestartRequested = false;
if (shouldRestart) {
hydrateDashboardRows();
@@ -200,7 +200,7 @@
}
async function refreshDashboardIfStale() {
- if (!liveRefreshSupported || !apiToken() || activeView() === 'call') return;
+ if (!liveRefreshSupported || !apiToken() || ['call', 'diagnostics'].includes(activeView())) return;
try {
const params = new URLSearchParams({
include_archived: getIncludeArchived() ? '1' : '0',
@@ -234,9 +234,11 @@
window.location.reload();
return;
}
+ const refreshOptions = options || {};
+ const allowDiagnosticsBootstrap = Boolean(refreshOptions.allowDiagnosticsBootstrap);
if (activeView() === 'call' && !manual) return;
+ if (activeView() === 'diagnostics' && !manual && !allowDiagnosticsBootstrap) return;
if (refreshInFlight) return;
- const refreshOptions = options || {};
const refreshLogs = refreshOptions.refreshLogs === undefined ? manual : Boolean(refreshOptions.refreshLogs);
const resetRows = refreshOptions.resetRows !== undefined
? Boolean(refreshOptions.resetRows)
@@ -271,7 +273,7 @@
rowHydrationComplete = false;
}
applyDashboardPayload(nextPayload);
- if (activeView() !== 'call') hydrateDashboardRows({ reset: resetRows });
+ if (!['call', 'diagnostics'].includes(activeView())) hydrateDashboardRows({ reset: resetRows });
const result = nextPayload.refresh_result || {};
const indexed = result.inserted_or_updated_events === undefined
? ''
@@ -293,7 +295,7 @@
function scheduleAutoRefresh() {
if (autoRefreshTimer) window.clearInterval(autoRefreshTimer);
autoRefreshTimer = null;
- if (!autoRefreshEl.checked || !liveRefreshSupported || activeView() === 'call') return;
+ if (!autoRefreshEl.checked || !liveRefreshSupported || ['call', 'diagnostics'].includes(activeView())) return;
autoRefreshTimer = window.setInterval(() => {
if (document.visibilityState === 'visible') refreshDashboardIfStale();
}, liveRefreshIntervalMs);
diff --git a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_responsive.css b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_responsive.css
index 6d2afb1..2eddda7 100644
--- a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_responsive.css
+++ b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_responsive.css
@@ -30,6 +30,8 @@
.pager { width: auto; }
.action-status { min-width: 0; }
.preset-card { grid-template-columns: 1fr; }
+ .diagnostics-section-header { display: grid; }
+ .diagnostics-readout .diagnostics-note { grid-column: auto; }
body[data-active-view="call"] .call-investigator {
grid-template-columns: 1fr;
}
diff --git a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_state.js b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_state.js
index 5f2fe6f..6ed78cd 100644
--- a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_state.js
+++ b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_state.js
@@ -1,5 +1,5 @@
(function () {
- const ALLOWED_VIEWS = new Set(['insights', 'calls', 'threads', 'call']);
+ const ALLOWED_VIEWS = new Set(['insights', 'calls', 'threads', 'diagnostics', 'call']);
const ALLOWED_DIRECTIONS = new Set(['asc', 'desc']);
const STATE_KEYS = [
'view',
diff --git a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_tables.css b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_tables.css
index b5a6255..a8364d5 100644
--- a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_tables.css
+++ b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_tables.css
@@ -108,6 +108,9 @@
th[data-thread-call-sort-active="true"] .sort-header {
color: var(--ink);
}
+ th[data-diagnostics-call-sort-active="true"] .sort-header {
+ color: var(--ink);
+ }
.sort-indicator {
display: inline-block;
min-width: 12px;
@@ -219,6 +222,149 @@
font-weight: 760;
cursor: pointer;
}
+ .diagnostics-panel {
+ padding: 16px;
+ background: #ffffff;
+ }
+ .diagnostics-stack {
+ display: grid;
+ gap: 14px;
+ }
+ .diagnostics-readout {
+ display: grid;
+ grid-template-columns: repeat(auto-fit, minmax(170px, 1fr));
+ gap: 10px;
+ align-items: stretch;
+ }
+ .diagnostics-readout span {
+ min-width: 0;
+ padding: 10px 12px;
+ border: 1px solid var(--line);
+ border-radius: 8px;
+ background: #f8fafc;
+ color: var(--muted);
+ font-size: 12px;
+ font-weight: 720;
+ line-height: 1.35;
+ }
+ .diagnostics-readout b {
+ display: block;
+ margin-bottom: 3px;
+ color: var(--ink);
+ font-size: 18px;
+ font-weight: 780;
+ }
+ .diagnostics-readout .diagnostics-note {
+ grid-column: span 2;
+ color: #475569;
+ }
+ .diagnostics-section,
+ .diagnostics-drilldown {
+ display: grid;
+ gap: 10px;
+ min-width: 0;
+ padding: 12px;
+ border: 1px solid var(--line);
+ border-radius: 8px;
+ background: #fbfcfe;
+ }
+ .diagnostics-drilldown {
+ border-color: #bfdbfe;
+ background: #eff6ff;
+ }
+ .diagnostics-drilldown-row > td {
+ padding: 0 10px 12px 10px;
+ background: #f8fbff;
+ }
+ .diagnostics-drilldown-row .diagnostics-drilldown {
+ margin-top: 2px;
+ }
+ .diagnostics-expand-button {
+ min-width: 32px;
+ font-size: 16px;
+ line-height: 1;
+ }
+ .diagnostics-section-header {
+ display: flex;
+ align-items: flex-start;
+ justify-content: space-between;
+ gap: 12px;
+ }
+ .diagnostics-section-header h3 {
+ margin: 0;
+ font-size: 14px;
+ font-weight: 780;
+ }
+ .diagnostics-section-header p {
+ margin: 4px 0 0;
+ color: var(--muted);
+ font-size: 12px;
+ font-weight: 680;
+ line-height: 1.35;
+ }
+ .diagnostics-section-header > span {
+ flex: 0 0 auto;
+ padding: 3px 8px;
+ border: 1px solid var(--line);
+ border-radius: 999px;
+ background: #ffffff;
+ color: var(--muted);
+ font-size: 11px;
+ font-weight: 780;
+ white-space: nowrap;
+ }
+ .diagnostics-table-wrap {
+ overflow-x: auto;
+ border: 1px solid var(--line);
+ border-radius: 8px;
+ background: #ffffff;
+ }
+ .diagnostics-table {
+ min-width: 1120px;
+ table-layout: fixed;
+ }
+ .diagnostics-call-table {
+ min-width: 1060px;
+ }
+ .diagnostics-table th {
+ position: static;
+ }
+ .diagnostics-table td,
+ .diagnostics-table th {
+ padding: 8px 10px;
+ }
+ .diagnostics-table tr:last-child td {
+ border-bottom: 0;
+ }
+ .diagnostic-fact {
+ display: grid;
+ gap: 3px;
+ min-width: 0;
+ }
+ .diagnostic-fact strong {
+ overflow-wrap: anywhere;
+ font-size: 12px;
+ line-height: 1.25;
+ }
+ .diagnostic-fact span {
+ color: var(--muted);
+ font-size: 11px;
+ font-weight: 700;
+ }
+ .diagnostics-empty {
+ border: 1px dashed var(--line);
+ border-radius: 8px;
+ background: #ffffff;
+ }
+ .diagnostics-inline-error {
+ padding: 8px 10px;
+ border: 1px solid #fecaca;
+ border-radius: 8px;
+ background: #fef2f2;
+ color: #991b1b;
+ font-size: 12px;
+ font-weight: 720;
+ }
.pager-button:disabled {
cursor: not-allowed;
color: var(--muted);
diff --git a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_template.html b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_template.html
index a322ba6..ad15f74 100644
--- a/src/codex_usage_tracker/plugin_data/dashboard/dashboard_template.html
+++ b/src/codex_usage_tracker/plugin_data/dashboard/dashboard_template.html
@@ -101,6 +101,7 @@ Model Calls
Insights
Calls
Threads
+ Diagnostics
Showing individual model calls.
@@ -118,7 +119,8 @@
Model Calls
-
+
+
@@ -169,6 +171,7 @@ Call Details
+
diff --git a/src/codex_usage_tracker/plugin_data/docs/dashboard-guide.html b/src/codex_usage_tracker/plugin_data/docs/dashboard-guide.html
index 97c0eae..97843b5 100644
--- a/src/codex_usage_tracker/plugin_data/docs/dashboard-guide.html
+++ b/src/codex_usage_tracker/plugin_data/docs/dashboard-guide.html
@@ -120,6 +120,16 @@ Threads View
Use Threads view to understand a work session as a group. Expand a thread to see calls newest first by default, then click an expanded-call header to change the per-thread sort. Subagents with logged parent session ids are shown under their parent thread; inferred auto-review attachments are marked in the details panel. Selected threads also show lifecycle signals such as first expensive turn, largest cumulative jump, cache trend, context trend, and whether subagent or auto-review work appeared before a usage spike.
+ Diagnostics View
+ Use Diagnostics view to inspect structured event patterns and the token totals associated with those patterns. The first table ranks top diagnostic facts by associated uncached input tokens. Tool/function/MCP/command-family and compaction sections expose narrower slices of the same fact data.
+
+ The tab consumes localhost /api/diagnostics/* endpoints; static file dashboards show a live-API unavailable state.
+ Command diagnostics store only a command family such as pytest, git, or unknown_command, never command text or arguments.
+ Skill and MCP labels are detected only when they are present as structured event metadata.
+ Associated token totals are not causal allocations and are not additive when one model call has multiple diagnostic facts.
+ Click Calls on a fact row to load associated model calls; call links and largest-call links open the Call Investigator, where raw context remains explicit and on demand.
+
+
Call Investigator
Clicking a Calls row opens dashboard.html?view=call&record=<record_id> for one model call. It separates exact callback counts, derived previous/next deltas, visible-context estimates, serialized local JSONL upper bounds, candidate serialized-overhead buckets, and redacted evidence loaded at runtime for the selected call. When the localhost context API is enabled, the investigator automatically loads a bounded turn-log evidence window with tool output included; use Hide tool output for a quieter evidence stream, and expand or load older surrounding evidence explicitly when needed. Visible evidence token estimates are calculated from the full selected-turn evidence set before display limiting, using tiktoken when available and a conservative character fallback only when the tokenizer is unavailable. The serialized upper bound tokenizes a redacted raw-JSON representation of the same selected-turn log slice. It can explain why visible text is much smaller than exact uncached input, but it can overcount because local JSONL includes client metadata that may not be prompt text. Bucket labels such as encrypted reasoning/state, local goal metadata, token callback metadata, and rate-limit metadata are counts only; raw text is not returned. encrypted_content is an opaque encrypted field found on some reasoning response items. The tracker cannot decrypt it and treats it as serialized state, not readable prompt, assistant, or tool text. Previous and next buttons move chronologically within the same resolved thread. Cache diagnostics label warm cache reuse, cold resume or stale cache, partial cache miss, uncached spike, and post-compaction without claiming exact cached text spans.
diff --git a/src/codex_usage_tracker/schema.py b/src/codex_usage_tracker/schema.py
index 575a390..77cbdba 100644
--- a/src/codex_usage_tracker/schema.py
+++ b/src/codex_usage_tracker/schema.py
@@ -90,3 +90,18 @@ class UsageColumn:
for column in USAGE_EVENT_COLUMNS
if column.repairable
}
+
+DIAGNOSTIC_FACT_COLUMN_NAMES = (
+ "record_id",
+ "fact_type",
+ "fact_name",
+ "fact_category",
+ "event_count",
+ "confidence",
+ "first_event_timestamp",
+ "last_event_timestamp",
+ "first_source_line",
+ "last_source_line",
+ "evidence_scope",
+ "raw_content_included",
+)
diff --git a/src/codex_usage_tracker/server.py b/src/codex_usage_tracker/server.py
index 803044d..42e01ae 100644
--- a/src/codex_usage_tracker/server.py
+++ b/src/codex_usage_tracker/server.py
@@ -30,6 +30,11 @@
generate_dashboard,
render_dashboard_html,
)
+from codex_usage_tracker.diagnostic_reports import (
+ build_diagnostics_fact_calls_report,
+ build_diagnostics_facts_report,
+ build_diagnostics_summary_report,
+)
from codex_usage_tracker.i18n import normalize_language
from codex_usage_tracker.paths import (
DEFAULT_ALLOWANCE_PATH,
@@ -93,6 +98,11 @@
_validate_loopback_host = server_utils.validate_loopback_host
+def _optional_int_query(params: dict[str, list[str]], key: str) -> int | None:
+ value = _first(params.get(key))
+ return None if value is None else _safe_int(value)
+
+
class _ContextApiState:
def __init__(self, enabled: bool) -> None:
self._enabled = enabled
@@ -279,6 +289,21 @@ def do_GET(self) -> None: # noqa: N802 - stdlib hook name
if parsed.path == "/api/recommendations":
self._handle_recommendations(parsed.query)
return
+ if parsed.path == "/api/diagnostics/summary":
+ self._handle_diagnostics_summary(parsed.query)
+ return
+ if parsed.path == "/api/diagnostics/facts":
+ self._handle_diagnostics_facts(parsed.query)
+ return
+ if parsed.path == "/api/diagnostics/fact-calls":
+ self._handle_diagnostics_fact_calls(parsed.query)
+ return
+ if parsed.path == "/api/diagnostics/compactions":
+ self._handle_diagnostics_facts(parsed.query, fact_type="compaction")
+ return
+ if parsed.path == "/api/diagnostics/tools":
+ self._handle_diagnostics_facts(parsed.query, fact_group="tools")
+ return
if parsed.path == "/api/usage":
self._handle_usage(parsed.query)
return
@@ -802,6 +827,122 @@ def _handle_recommendations(self, query: str) -> None:
payload["raw_context_included"] = False
self._send_json(HTTPStatus.OK, payload)
+ def _handle_diagnostics_summary(self, query: str) -> None:
+ params = parse_qs(query)
+ try:
+ payload = build_diagnostics_summary_report(
+ db_path=self._db_path,
+ limit=_parse_report_limit(_first(params.get("limit")), 20),
+ since=_first(params.get("since")),
+ until=_first(params.get("until")),
+ model=_first(params.get("model")),
+ effort=_first(params.get("effort")),
+ thread=_first(params.get("thread")),
+ min_tokens=_optional_int_query(params, "min_tokens"),
+ fact_type=_first(params.get("fact_type")),
+ fact_name=_first(params.get("fact_name")),
+ fact_category=_first(params.get("fact_category")),
+ include_archived=_parse_bool(
+ _first(params.get("include_archived")),
+ self._include_archived,
+ ),
+ sort=_first(params.get("sort")) or "uncached",
+ direction=_first(params.get("direction")) or "desc",
+ ).payload
+ except ValueError as exc:
+ self._send_json(HTTPStatus.BAD_REQUEST, {"error": str(exc)})
+ return
+ except sqlite3.Error as exc:
+ self._send_json(
+ HTTPStatus.INTERNAL_SERVER_ERROR,
+ {"error": f"Database error while reading diagnostics: {exc}"},
+ )
+ return
+ self._send_json(HTTPStatus.OK, payload)
+
+ def _handle_diagnostics_facts(
+ self,
+ query: str,
+ *,
+ fact_type: str | None = None,
+ fact_group: str | None = None,
+ ) -> None:
+ params = parse_qs(query)
+ try:
+ payload = build_diagnostics_facts_report(
+ db_path=self._db_path,
+ limit=_parse_report_limit(_first(params.get("limit")), 50),
+ since=_first(params.get("since")),
+ until=_first(params.get("until")),
+ model=_first(params.get("model")),
+ effort=_first(params.get("effort")),
+ thread=_first(params.get("thread")),
+ min_tokens=_optional_int_query(params, "min_tokens"),
+ fact_type=fact_type or _first(params.get("fact_type")),
+ fact_name=_first(params.get("fact_name")),
+ fact_category=_first(params.get("fact_category")),
+ include_archived=_parse_bool(
+ _first(params.get("include_archived")),
+ self._include_archived,
+ ),
+ sort=_first(params.get("sort")) or "uncached",
+ direction=_first(params.get("direction")) or "desc",
+ fact_group=fact_group,
+ view=urlparse(self.path).path.rsplit("/", 1)[-1],
+ ).payload
+ except ValueError as exc:
+ self._send_json(HTTPStatus.BAD_REQUEST, {"error": str(exc)})
+ return
+ except sqlite3.Error as exc:
+ self._send_json(
+ HTTPStatus.INTERNAL_SERVER_ERROR,
+ {"error": f"Database error while reading diagnostics: {exc}"},
+ )
+ return
+ self._send_json(HTTPStatus.OK, payload)
+
+ def _handle_diagnostics_fact_calls(self, query: str) -> None:
+ params = parse_qs(query)
+ fact_type = _first(params.get("fact_type"))
+ fact_name = _first(params.get("fact_name"))
+ if not fact_type or not fact_name:
+ self._send_json(
+ HTTPStatus.BAD_REQUEST,
+ {"error": "fact_type and fact_name are required"},
+ )
+ return
+ try:
+ payload = build_diagnostics_fact_calls_report(
+ db_path=self._db_path,
+ fact_type=fact_type,
+ fact_name=fact_name,
+ limit=_parse_report_limit(_first(params.get("limit")), 50),
+ offset=_parse_api_offset(_first(params.get("offset"))),
+ since=_first(params.get("since")),
+ until=_first(params.get("until")),
+ model=_first(params.get("model")),
+ effort=_first(params.get("effort")),
+ thread=_first(params.get("thread")),
+ min_tokens=_optional_int_query(params, "min_tokens"),
+ include_archived=_parse_bool(
+ _first(params.get("include_archived")),
+ self._include_archived,
+ ),
+ sort=_first(params.get("sort")) or "tokens",
+ direction=_first(params.get("direction")) or "desc",
+ privacy_mode=self._privacy_mode,
+ ).payload
+ except ValueError as exc:
+ self._send_json(HTTPStatus.BAD_REQUEST, {"error": str(exc)})
+ return
+ except sqlite3.Error as exc:
+ self._send_json(
+ HTTPStatus.INTERNAL_SERVER_ERROR,
+ {"error": f"Database error while reading diagnostic calls: {exc}"},
+ )
+ return
+ self._send_json(HTTPStatus.OK, payload)
+
def _live_query_params(
self,
params: dict[str, list[str]],
diff --git a/src/codex_usage_tracker/store.py b/src/codex_usage_tracker/store.py
index fc2b8b3..4455bb7 100644
--- a/src/codex_usage_tracker/store.py
+++ b/src/codex_usage_tracker/store.py
@@ -10,8 +10,9 @@
from pathlib import Path
from typing import Any
-from codex_usage_tracker.models import RefreshResult, UsageEvent
+from codex_usage_tracker.models import DiagnosticFact, RefreshResult, UsageEvent
from codex_usage_tracker.parser import (
+ PARSER_ADAPTER_VERSION,
PARSER_DIAGNOSTIC_KEYS,
compact_parser_diagnostics,
find_session_logs,
@@ -21,6 +22,7 @@
from codex_usage_tracker.paths import DEFAULT_CODEX_HOME, DEFAULT_DB_PATH
from codex_usage_tracker.projects import apply_project_privacy_to_rows, validate_privacy_mode
from codex_usage_tracker.schema import (
+ DIAGNOSTIC_FACT_COLUMN_NAMES,
USAGE_EVENT_COLUMN_NAMES,
USAGE_EVENT_SCHEMA_CHECKSUM,
)
@@ -47,6 +49,7 @@
from codex_usage_tracker.store_thread_summaries import rebuild_thread_summaries
EVENT_COLUMNS = list(USAGE_EVENT_COLUMN_NAMES)
+DIAGNOSTIC_FACT_COLUMNS = list(DIAGNOSTIC_FACT_COLUMN_NAMES)
__all__ = ["EVENT_COLUMNS", "SCHEMA_VERSION", "SchemaMigrationError", "init_db"]
OBSERVED_USAGE_RECONCILIATION_THRESHOLD = 3
@@ -65,6 +68,7 @@ def refresh_usage_index(
parse_plans = source_logs_requiring_parse(conn, logs)
stats: dict[str, int] = {}
events: list[UsageEvent] = []
+ diagnostic_facts: list[DiagnosticFact] = []
parsed_files: list[ParsedSourceFile] = []
for plan in parse_plans:
file_stats: dict[str, int] = {}
@@ -78,6 +82,7 @@ def refresh_usage_index(
)
file_events = parsed_file.events
events.extend(file_events)
+ diagnostic_facts.extend(parsed_file.diagnostic_facts)
parsed_files.append((plan.path, file_events, file_stats, parsed_file.state))
for key, value in file_stats.items():
stats[key] = stats.get(key, 0) + int(value)
@@ -85,6 +90,7 @@ def refresh_usage_index(
events,
db_path=db_path,
replace_source_files=(plan.path for plan in parse_plans if plan.replace_existing),
+ diagnostic_facts=diagnostic_facts,
)
record_source_file_metadata(db_path=db_path, parsed_files=parsed_files)
skipped_events = stats.get("skipped_events", 0)
@@ -118,6 +124,7 @@ def rebuild_usage_index(
with connect(db_path) as conn:
init_db(conn)
+ conn.execute("DELETE FROM call_diagnostic_facts")
conn.execute("DELETE FROM usage_events")
conn.execute("DELETE FROM thread_summaries")
conn.execute("DELETE FROM source_files")
@@ -136,6 +143,7 @@ def reset_usage_database(db_path: Path = DEFAULT_DB_PATH) -> dict[str, Any]:
init_db(conn)
row = conn.execute("SELECT COUNT(*) AS count FROM usage_events").fetchone()
deleted_rows = int(row["count"] if row is not None else 0)
+ conn.execute("DELETE FROM call_diagnostic_facts")
conn.execute("DELETE FROM usage_events")
conn.execute("DELETE FROM thread_summaries")
conn.execute("DELETE FROM source_files")
@@ -180,7 +188,7 @@ def record_refresh_metadata(
"parsed_events": str(parsed_events),
"skipped_events": str(skipped_events),
"inserted_or_updated_events": str(inserted_or_updated_events),
- "parser_adapter": "codex-jsonl-v1",
+ "parser_adapter": PARSER_ADAPTER_VERSION,
"schema_version": str(SCHEMA_VERSION),
"usage_events_schema_checksum": USAGE_EVENT_SCHEMA_CHECKSUM,
}
@@ -270,13 +278,26 @@ def upsert_usage_events(
*,
refresh_links: bool = True,
replace_source_files: Iterable[Path] | None = None,
+ diagnostic_facts: Iterable[DiagnosticFact] | None = None,
) -> int:
rows = [event.to_row() for event in events]
+ fact_rows = [fact.to_row() for fact in diagnostic_facts or []]
source_files_to_replace = [str(path) for path in replace_source_files or []]
with connect(db_path) as conn:
init_db(conn)
if source_files_to_replace:
placeholders = ", ".join("?" for _source in source_files_to_replace)
+ conn.execute(
+ f"""
+ DELETE FROM call_diagnostic_facts
+ WHERE record_id IN (
+ SELECT record_id
+ FROM usage_events
+ WHERE source_file IN ({placeholders})
+ )
+ """,
+ source_files_to_replace,
+ )
conn.execute(
f"DELETE FROM usage_events WHERE source_file IN ({placeholders})",
source_files_to_replace,
@@ -297,13 +318,54 @@ def upsert_usage_events(
f"VALUES ({placeholders}) "
f"ON CONFLICT(record_id) DO UPDATE SET {update_clause}"
)
+ _delete_diagnostic_facts_for_record_ids(
+ conn,
+ [str(row["record_id"]) for row in rows],
+ )
conn.executemany(sql, [[row[column] for column in EVENT_COLUMNS] for row in rows])
+ _insert_diagnostic_facts(conn, fact_rows)
if refresh_links:
_refresh_usage_event_links(conn)
rebuild_thread_summaries(conn)
return len(rows)
+def _delete_diagnostic_facts_for_record_ids(
+ conn: sqlite3.Connection,
+ record_ids: list[str],
+) -> None:
+ if not record_ids:
+ return
+ placeholders = ", ".join("?" for _record_id in record_ids)
+ conn.execute(
+ f"DELETE FROM call_diagnostic_facts WHERE record_id IN ({placeholders})",
+ record_ids,
+ )
+
+
+def _insert_diagnostic_facts(
+ conn: sqlite3.Connection,
+ rows: list[dict[str, object]],
+) -> None:
+ if not rows:
+ return
+ placeholders = ", ".join("?" for _column in DIAGNOSTIC_FACT_COLUMNS)
+ update_clause = ", ".join(
+ f"{column}=excluded.{column}"
+ for column in DIAGNOSTIC_FACT_COLUMNS
+ if column not in {"record_id", "fact_type", "fact_name"}
+ )
+ sql = (
+ f"INSERT INTO call_diagnostic_facts ({', '.join(DIAGNOSTIC_FACT_COLUMNS)}) "
+ f"VALUES ({placeholders}) "
+ f"ON CONFLICT(record_id, fact_type, fact_name) DO UPDATE SET {update_clause}"
+ )
+ conn.executemany(
+ sql,
+ [[row[column] for column in DIAGNOSTIC_FACT_COLUMNS] for row in rows],
+ )
+
+
def refresh_usage_event_links(db_path: Path = DEFAULT_DB_PATH) -> int:
"""Recompute per-thread chronological adjacency for aggregate usage rows."""
@@ -469,6 +531,480 @@ def query_usage_record(
return _row_to_dict(row) if row is not None else None
+def query_diagnostic_facts(
+ db_path: Path = DEFAULT_DB_PATH,
+ *,
+ limit: int | None = 50,
+ since: str | None = None,
+ until: str | None = None,
+ model: str | None = None,
+ effort: str | None = None,
+ thread: str | None = None,
+ min_tokens: int | None = None,
+ fact_type: str | None = None,
+ fact_name: str | None = None,
+ fact_category: str | None = None,
+ include_archived: bool = False,
+ sort: str = "uncached",
+ direction: str = "desc",
+) -> list[dict[str, Any]]:
+ """Return aggregate diagnostic fact summaries joined to usage events."""
+
+ sort_map = {
+ "uncached": "associated_uncached_input_tokens",
+ "tokens": "associated_total_tokens",
+ "calls": "associated_calls",
+ "occurrences": "occurrences",
+ "time": "latest_event_timestamp",
+ "fact": "f.fact_name",
+ }
+ if sort not in sort_map:
+ allowed = ", ".join(sorted(sort_map))
+ raise ValueError(f"sort must be one of: {allowed}")
+ direction_sql = _normalize_sort_direction(direction)
+ where_clause, params = _usage_where_clause(
+ since=since,
+ until=until,
+ model=model,
+ effort=effort,
+ thread=thread,
+ min_tokens=min_tokens,
+ table_alias="usage_events",
+ include_archived=include_archived,
+ )
+ where_clause, params = _append_diagnostic_fact_filters(
+ where_clause,
+ params,
+ fact_type=fact_type,
+ fact_name=fact_name,
+ fact_category=fact_category,
+ table_alias="f",
+ )
+ sub_where_clause, sub_params = _usage_where_clause(
+ since=since,
+ until=until,
+ model=model,
+ effort=effort,
+ thread=thread,
+ min_tokens=min_tokens,
+ table_alias="u2",
+ include_archived=include_archived,
+ )
+ sub_where_clause, sub_params = _append_diagnostic_fact_filters(
+ sub_where_clause,
+ sub_params,
+ fact_type=fact_type,
+ fact_name=fact_name,
+ fact_category=fact_category,
+ table_alias="f2",
+ )
+ sub_conditions = [
+ "f2.fact_type = f.fact_type",
+ "f2.fact_name = f.fact_name",
+ ]
+ if sub_where_clause:
+ sub_conditions.append(sub_where_clause.removeprefix("WHERE "))
+ sub_where_sql = "WHERE " + " AND ".join(f"({condition})" for condition in sub_conditions)
+ normalized_limit = _normalize_limit(limit)
+ limit_clause = ""
+ query_params: list[Any] = [*sub_params, *params]
+ if normalized_limit is not None:
+ limit_clause = "LIMIT ?"
+ query_params.append(normalized_limit)
+ with connect(db_path) as conn:
+ init_db(conn)
+ rows = conn.execute(
+ f"""
+ SELECT
+ f.fact_type,
+ f.fact_name,
+ f.fact_category,
+ coalesce(SUM(f.event_count), 0) AS occurrences,
+ COUNT(DISTINCT usage_events.record_id) AS associated_calls,
+ coalesce(SUM(usage_events.input_tokens), 0) AS associated_input_tokens,
+ coalesce(SUM(usage_events.cached_input_tokens), 0)
+ AS associated_cached_input_tokens,
+ coalesce(SUM(usage_events.uncached_input_tokens), 0)
+ AS associated_uncached_input_tokens,
+ coalesce(SUM(usage_events.output_tokens), 0) AS associated_output_tokens,
+ coalesce(SUM(usage_events.reasoning_output_tokens), 0)
+ AS associated_reasoning_output_tokens,
+ coalesce(SUM(usage_events.total_tokens), 0) AS associated_total_tokens,
+ AVG(usage_events.cache_ratio) AS avg_cache_ratio,
+ MAX(usage_events.total_tokens) AS largest_call_tokens,
+ MAX(usage_events.event_timestamp) AS latest_event_timestamp,
+ MIN(f.first_source_line) AS first_source_line,
+ MAX(f.last_source_line) AS last_source_line,
+ MAX(f.raw_content_included) AS raw_content_included,
+ (
+ SELECT u2.record_id
+ FROM call_diagnostic_facts AS f2
+ JOIN usage_events AS u2 ON u2.record_id = f2.record_id
+ {sub_where_sql}
+ ORDER BY u2.total_tokens DESC, u2.event_timestamp DESC, u2.record_id
+ LIMIT 1
+ ) AS largest_record_id
+ FROM call_diagnostic_facts AS f
+ JOIN usage_events ON usage_events.record_id = f.record_id
+ {where_clause}
+ GROUP BY f.fact_type, f.fact_name, f.fact_category
+ ORDER BY {sort_map[sort]} {direction_sql},
+ associated_total_tokens DESC,
+ f.fact_type,
+ f.fact_name
+ {limit_clause}
+ """,
+ query_params,
+ )
+ return [_row_to_dict(row) for row in rows]
+
+
+def query_diagnostic_summary(
+ db_path: Path = DEFAULT_DB_PATH,
+ *,
+ limit: int | None = 20,
+ since: str | None = None,
+ until: str | None = None,
+ model: str | None = None,
+ effort: str | None = None,
+ thread: str | None = None,
+ min_tokens: int | None = None,
+ fact_type: str | None = None,
+ fact_name: str | None = None,
+ fact_category: str | None = None,
+ include_archived: bool = False,
+ sort: str = "uncached",
+ direction: str = "desc",
+) -> list[dict[str, Any]]:
+ """Return aggregate diagnostic summaries grouped by fact type."""
+
+ sort_map = {
+ "uncached": "associated_uncached_input_tokens",
+ "tokens": "associated_total_tokens",
+ "calls": "associated_calls",
+ "occurrences": "occurrences",
+ "time": "latest_event_timestamp",
+ "fact": "type_counts.fact_type",
+ }
+ if sort not in sort_map:
+ allowed = ", ".join(sorted(sort_map))
+ raise ValueError(f"sort must be one of: {allowed}")
+ direction_sql = _normalize_sort_direction(direction)
+ where_clause, params = _usage_where_clause(
+ since=since,
+ until=until,
+ model=model,
+ effort=effort,
+ thread=thread,
+ min_tokens=min_tokens,
+ table_alias="usage_events",
+ include_archived=include_archived,
+ )
+ where_clause, params = _append_diagnostic_fact_filters(
+ where_clause,
+ params,
+ fact_type=fact_type,
+ fact_name=fact_name,
+ fact_category=fact_category,
+ table_alias="f",
+ )
+ normalized_limit = _normalize_limit(limit)
+ limit_clause = ""
+ query_params: list[Any] = list(params)
+ if normalized_limit is not None:
+ limit_clause = "LIMIT ?"
+ query_params.append(normalized_limit)
+ with connect(db_path) as conn:
+ init_db(conn)
+ rows = conn.execute(
+ f"""
+ WITH scoped AS (
+ SELECT
+ f.fact_type,
+ f.fact_name,
+ f.event_count,
+ usage_events.record_id,
+ usage_events.input_tokens,
+ usage_events.cached_input_tokens,
+ usage_events.uncached_input_tokens,
+ usage_events.output_tokens,
+ usage_events.reasoning_output_tokens,
+ usage_events.total_tokens,
+ usage_events.cache_ratio,
+ usage_events.event_timestamp
+ FROM call_diagnostic_facts AS f
+ JOIN usage_events ON usage_events.record_id = f.record_id
+ {where_clause}
+ ),
+ type_counts AS (
+ SELECT
+ fact_type,
+ coalesce(SUM(event_count), 0) AS occurrences,
+ COUNT(DISTINCT record_id) AS associated_calls,
+ COUNT(DISTINCT fact_name) AS fact_names
+ FROM scoped
+ GROUP BY fact_type
+ ),
+ distinct_calls AS (
+ SELECT
+ fact_type,
+ record_id,
+ MAX(input_tokens) AS input_tokens,
+ MAX(cached_input_tokens) AS cached_input_tokens,
+ MAX(uncached_input_tokens) AS uncached_input_tokens,
+ MAX(output_tokens) AS output_tokens,
+ MAX(reasoning_output_tokens) AS reasoning_output_tokens,
+ MAX(total_tokens) AS total_tokens,
+ MAX(cache_ratio) AS cache_ratio,
+ MAX(event_timestamp) AS event_timestamp
+ FROM scoped
+ GROUP BY fact_type, record_id
+ ),
+ call_sums AS (
+ SELECT
+ fact_type,
+ coalesce(SUM(input_tokens), 0) AS associated_input_tokens,
+ coalesce(SUM(cached_input_tokens), 0) AS associated_cached_input_tokens,
+ coalesce(SUM(uncached_input_tokens), 0)
+ AS associated_uncached_input_tokens,
+ coalesce(SUM(output_tokens), 0) AS associated_output_tokens,
+ coalesce(SUM(reasoning_output_tokens), 0)
+ AS associated_reasoning_output_tokens,
+ coalesce(SUM(total_tokens), 0) AS associated_total_tokens,
+ AVG(cache_ratio) AS avg_cache_ratio,
+ MAX(total_tokens) AS largest_call_tokens,
+ MAX(event_timestamp) AS latest_event_timestamp
+ FROM distinct_calls
+ GROUP BY fact_type
+ )
+ SELECT
+ type_counts.fact_type,
+ type_counts.occurrences,
+ type_counts.associated_calls,
+ type_counts.fact_names,
+ call_sums.associated_input_tokens,
+ call_sums.associated_cached_input_tokens,
+ call_sums.associated_uncached_input_tokens,
+ call_sums.associated_output_tokens,
+ call_sums.associated_reasoning_output_tokens,
+ call_sums.associated_total_tokens,
+ call_sums.avg_cache_ratio,
+ call_sums.largest_call_tokens,
+ call_sums.latest_event_timestamp,
+ (
+ SELECT s2.fact_name
+ FROM scoped AS s2
+ WHERE s2.fact_type = type_counts.fact_type
+ GROUP BY s2.fact_name
+ ORDER BY SUM(s2.event_count) DESC, SUM(s2.uncached_input_tokens) DESC
+ LIMIT 1
+ ) AS top_fact_name
+ FROM type_counts
+ JOIN call_sums ON call_sums.fact_type = type_counts.fact_type
+ ORDER BY {sort_map[sort]} {direction_sql},
+ associated_total_tokens DESC,
+ type_counts.fact_type
+ {limit_clause}
+ """,
+ query_params,
+ )
+ return [_row_to_dict(row) for row in rows]
+
+
+def query_diagnostic_fact_calls(
+ db_path: Path = DEFAULT_DB_PATH,
+ *,
+ fact_type: str,
+ fact_name: str,
+ limit: int | None = 50,
+ offset: int = 0,
+ since: str | None = None,
+ until: str | None = None,
+ model: str | None = None,
+ effort: str | None = None,
+ thread: str | None = None,
+ min_tokens: int | None = None,
+ include_archived: bool = False,
+ sort: str = "tokens",
+ direction: str = "desc",
+) -> list[dict[str, Any]]:
+ """Return usage calls associated with one diagnostic fact."""
+
+ where_clause, params = _diagnostic_fact_call_where(
+ fact_type=fact_type,
+ fact_name=fact_name,
+ since=since,
+ until=until,
+ model=model,
+ effort=effort,
+ thread=thread,
+ min_tokens=min_tokens,
+ include_archived=include_archived,
+ )
+ order_expr = _diagnostic_fact_call_order_expression(sort)
+ direction_sql = _normalize_sort_direction(direction)
+ normalized_limit = _normalize_limit(limit)
+ normalized_offset = _normalize_offset(offset)
+ limit_clause = ""
+ query_params = list(params)
+ if normalized_limit is not None:
+ limit_clause = "LIMIT ?"
+ query_params.append(normalized_limit)
+ if normalized_offset:
+ limit_clause += " OFFSET ?"
+ query_params.append(normalized_offset)
+ elif normalized_offset:
+ limit_clause = "LIMIT -1 OFFSET ?"
+ query_params.append(normalized_offset)
+ with connect(db_path) as conn:
+ init_db(conn)
+ rows = conn.execute(
+ f"""
+ SELECT
+ usage_events.*,
+ f.fact_type,
+ f.fact_name,
+ f.fact_category,
+ f.event_count AS diagnostic_event_count,
+ f.confidence AS diagnostic_confidence,
+ f.first_event_timestamp AS diagnostic_first_event_timestamp,
+ f.last_event_timestamp AS diagnostic_last_event_timestamp,
+ f.first_source_line AS diagnostic_first_source_line,
+ f.last_source_line AS diagnostic_last_source_line,
+ f.evidence_scope AS diagnostic_evidence_scope,
+ f.raw_content_included AS raw_content_included
+ FROM call_diagnostic_facts AS f
+ JOIN usage_events ON usage_events.record_id = f.record_id
+ {where_clause}
+ ORDER BY {order_expr} {direction_sql},
+ usage_events.event_timestamp DESC,
+ usage_events.cumulative_total_tokens DESC
+ {limit_clause}
+ """,
+ query_params,
+ )
+ return [_row_to_dict(row) for row in rows]
+
+
+def query_diagnostic_fact_call_count(
+ db_path: Path = DEFAULT_DB_PATH,
+ *,
+ fact_type: str,
+ fact_name: str,
+ since: str | None = None,
+ until: str | None = None,
+ model: str | None = None,
+ effort: str | None = None,
+ thread: str | None = None,
+ min_tokens: int | None = None,
+ include_archived: bool = False,
+) -> int:
+ """Return the number of calls associated with one diagnostic fact."""
+
+ where_clause, params = _diagnostic_fact_call_where(
+ fact_type=fact_type,
+ fact_name=fact_name,
+ since=since,
+ until=until,
+ model=model,
+ effort=effort,
+ thread=thread,
+ min_tokens=min_tokens,
+ include_archived=include_archived,
+ )
+ with connect(db_path) as conn:
+ init_db(conn)
+ row = conn.execute(
+ f"""
+ SELECT COUNT(DISTINCT usage_events.record_id) AS row_count
+ FROM call_diagnostic_facts AS f
+ JOIN usage_events ON usage_events.record_id = f.record_id
+ {where_clause}
+ """,
+ params,
+ ).fetchone()
+ return int(row["row_count"] if row is not None else 0)
+
+
+def _diagnostic_fact_call_where(
+ *,
+ fact_type: str,
+ fact_name: str,
+ since: str | None,
+ until: str | None,
+ model: str | None,
+ effort: str | None,
+ thread: str | None,
+ min_tokens: int | None,
+ include_archived: bool,
+) -> tuple[str, list[Any]]:
+ where_clause, params = _usage_where_clause(
+ since=since,
+ until=until,
+ model=model,
+ effort=effort,
+ thread=thread,
+ min_tokens=min_tokens,
+ table_alias="usage_events",
+ include_archived=include_archived,
+ )
+ return _append_diagnostic_fact_filters(
+ where_clause,
+ params,
+ fact_type=fact_type,
+ fact_name=fact_name,
+ fact_category=None,
+ table_alias="f",
+ )
+
+
+def _append_diagnostic_fact_filters(
+ where_clause: str,
+ params: list[Any],
+ *,
+ fact_type: str | None,
+ fact_name: str | None,
+ fact_category: str | None,
+ table_alias: str,
+) -> tuple[str, list[Any]]:
+ clauses = [where_clause.removeprefix("WHERE ")] if where_clause else []
+ updated_params = list(params)
+ prefix = f"{table_alias}."
+ if fact_type:
+ clauses.append(f"{prefix}fact_type = ?")
+ updated_params.append(fact_type)
+ if fact_name:
+ clauses.append(f"{prefix}fact_name = ?")
+ updated_params.append(fact_name)
+ if fact_category:
+ clauses.append(f"{prefix}fact_category = ?")
+ updated_params.append(fact_category)
+ if not clauses:
+ return "", updated_params
+ return "WHERE " + " AND ".join(f"({clause})" for clause in clauses), updated_params
+
+
+def _diagnostic_fact_call_order_expression(sort: str) -> str:
+ sort_map = {
+ "time": "usage_events.event_timestamp",
+ "tokens": "usage_events.total_tokens",
+ "input": "usage_events.input_tokens",
+ "cached": "usage_events.cached_input_tokens",
+ "uncached": "usage_events.uncached_input_tokens",
+ "output": "usage_events.output_tokens",
+ "reasoning": "usage_events.reasoning_output_tokens",
+ "cache": "usage_events.cache_ratio",
+ "model": "usage_events.model",
+ "effort": "usage_events.effort",
+ "thread": "coalesce(usage_events.thread_name, usage_events.parent_thread_name, usage_events.session_id)",
+ }
+ try:
+ return sort_map[sort]
+ except KeyError as exc:
+ allowed = ", ".join(sorted(sort_map))
+ raise ValueError(f"sort must be one of: {allowed}") from exc
+
+
def query_dashboard_events(
db_path: Path = DEFAULT_DB_PATH,
limit: int | None = 5000,
diff --git a/src/codex_usage_tracker/store_schema.py b/src/codex_usage_tracker/store_schema.py
index 27a4a1c..c5e973c 100644
--- a/src/codex_usage_tracker/store_schema.py
+++ b/src/codex_usage_tracker/store_schema.py
@@ -12,7 +12,7 @@
USAGE_EVENT_SCHEMA_CHECKSUM,
)
-SCHEMA_VERSION = 8
+SCHEMA_VERSION = 9
MIGRATION_NAMES = {
1: "create usage_events aggregate fact table",
2: "track schema migration checksum metadata",
@@ -22,6 +22,7 @@
6: "track source file refresh metadata",
7: "persist source file parser cursors",
8: "persist observed Codex usage snapshots",
+ 9: "persist aggregate diagnostic facts",
}
CALL_ORIGIN_REPAIR_COLUMNS = {
"call_initiator": "TEXT",
@@ -95,6 +96,12 @@ def init_db(conn: sqlite3.Connection) -> None:
else:
_migrate_v8(conn)
_record_migration_if_missing(conn, 8)
+ if user_version < 9:
+ _migrate_v9(conn)
+ _record_migration(conn, 9)
+ else:
+ _migrate_v9(conn)
+ _record_migration_if_missing(conn, 9)
_validate_usage_events_schema(conn)
conn.execute(f"PRAGMA user_version = {SCHEMA_VERSION}")
@@ -245,6 +252,34 @@ def _migrate_v8(conn: sqlite3.Connection) -> None:
)
+def _migrate_v9(conn: sqlite3.Connection) -> None:
+ conn.executescript(
+ """
+ CREATE TABLE IF NOT EXISTS call_diagnostic_facts (
+ record_id TEXT NOT NULL,
+ fact_type TEXT NOT NULL,
+ fact_name TEXT NOT NULL,
+ fact_category TEXT,
+ event_count INTEGER NOT NULL DEFAULT 1,
+ confidence TEXT NOT NULL DEFAULT 'medium',
+ first_event_timestamp TEXT,
+ last_event_timestamp TEXT,
+ first_source_line INTEGER,
+ last_source_line INTEGER,
+ evidence_scope TEXT NOT NULL DEFAULT 'between_token_counts',
+ raw_content_included INTEGER NOT NULL DEFAULT 0,
+ PRIMARY KEY (record_id, fact_type, fact_name),
+ FOREIGN KEY (record_id) REFERENCES usage_events(record_id) ON DELETE CASCADE
+ );
+
+ CREATE INDEX IF NOT EXISTS idx_call_diagnostic_facts_type_name
+ ON call_diagnostic_facts(fact_type, fact_name);
+ CREATE INDEX IF NOT EXISTS idx_call_diagnostic_facts_record
+ ON call_diagnostic_facts(record_id);
+ """
+ )
+
+
def _record_migration(conn: sqlite3.Connection, version: int) -> None:
conn.execute(
"""
diff --git a/src/codex_usage_tracker/store_sources.py b/src/codex_usage_tracker/store_sources.py
index f6a3abf..89029a2 100644
--- a/src/codex_usage_tracker/store_sources.py
+++ b/src/codex_usage_tracker/store_sources.py
@@ -13,6 +13,7 @@
from codex_usage_tracker.models import UsageEvent
from codex_usage_tracker.parser import (
+ PARSER_ADAPTER_VERSION,
ParserState,
compact_parser_diagnostics,
parser_state_from_json,
@@ -47,7 +48,7 @@ def source_logs_requiring_parse(
row = conn.execute(
"""
SELECT size_bytes, mtime_ns, parsed_until_line
- , parsed_until_byte, parser_state_json
+ , parsed_until_byte, parser_adapter, parser_state_json
FROM source_files
WHERE source_file = ?
""",
@@ -61,6 +62,10 @@ def source_logs_requiring_parse(
previous_byte = int(row["parsed_until_byte"])
previous_line = int(row["parsed_until_line"])
previous_state = parser_state_from_json(row["parser_state_json"])
+ previous_adapter = str(row["parser_adapter"] or "")
+ if previous_adapter != PARSER_ADAPTER_VERSION:
+ changed.append(SourceParsePlan(path=path))
+ continue
if previous_state is None:
changed.append(SourceParsePlan(path=path))
continue
@@ -130,7 +135,7 @@ def upsert_source_file_metadata(
if latest_event
else parser_state.latest_event_timestamp
),
- "parser_adapter": "codex-jsonl-v1",
+ "parser_adapter": PARSER_ADAPTER_VERSION,
"parser_diagnostics_json": json.dumps(
compact_parser_diagnostics(diagnostics),
sort_keys=True,
diff --git a/tests/test_cli_lifecycle.py b/tests/test_cli_lifecycle.py
index 0c4923d..7a02eab 100644
--- a/tests/test_cli_lifecycle.py
+++ b/tests/test_cli_lifecycle.py
@@ -336,6 +336,115 @@ def test_report_json_and_query_cli(tmp_path: Path) -> None:
assert "[redacted cwd:" in csv_text
+def test_diagnostics_cli_returns_aggregate_json(tmp_path: Path) -> None:
+ codex_home = _make_diagnostics_codex_home(tmp_path)
+ db_path = tmp_path / "usage.sqlite3"
+
+ refresh = _run_cli(
+ tmp_path,
+ "--db",
+ str(db_path),
+ "refresh",
+ "--codex-home",
+ str(codex_home),
+ "--json",
+ )
+ summary = _run_cli(
+ tmp_path,
+ "--db",
+ str(db_path),
+ "diagnostics",
+ "summary",
+ "--json",
+ )
+ facts = _run_cli(
+ tmp_path,
+ "--db",
+ str(db_path),
+ "diagnostics",
+ "facts",
+ "--limit",
+ "0",
+ "--json",
+ )
+ compactions = _run_cli(
+ tmp_path,
+ "--db",
+ str(db_path),
+ "diagnostics",
+ "compactions",
+ "--json",
+ )
+ tools = _run_cli(
+ tmp_path,
+ "--db",
+ str(db_path),
+ "diagnostics",
+ "tools",
+ "--json",
+ )
+ fact_calls = _run_cli(
+ tmp_path,
+ "--db",
+ str(db_path),
+ "--privacy-mode",
+ "strict",
+ "diagnostics",
+ "fact-calls",
+ "--fact-type",
+ "compaction",
+ "--fact-name",
+ "post_compaction",
+ "--json",
+ )
+
+ assert refresh.returncode == 0
+ summary_payload = json.loads(summary.stdout)
+ facts_payload = json.loads(facts.stdout)
+ compactions_payload = json.loads(compactions.stdout)
+ tools_payload = json.loads(tools.stdout)
+ fact_calls_payload = json.loads(fact_calls.stdout)
+ for payload in (
+ summary_payload,
+ facts_payload,
+ compactions_payload,
+ tools_payload,
+ fact_calls_payload,
+ ):
+ _assert_contract(payload)
+ assert payload["schema"] == "codex-usage-tracker-diagnostics-v1"
+ assert payload["raw_context_included"] is False
+ assert "Associated token totals are not additive" in payload["notes"][0]
+
+ fact_names = {row["fact_name"] for row in facts_payload["rows"]}
+ assert {"function_call_output", "patch_applied", "post_compaction"} <= fact_names
+ assert summary_payload["view"] == "summary"
+ assert {row["fact_type"] for row in summary_payload["rows"]} >= {
+ "compaction",
+ "outcome",
+ "tool",
+ }
+ assert compactions_payload["filters"]["fact_type"] == "compaction"
+ assert {row["fact_type"] for row in compactions_payload["rows"]} == {"compaction"}
+ assert tools_payload["filters"]["fact_type"] is None
+ assert tools_payload["filters"]["fact_group"] == "tools"
+ assert {row["fact_type"] for row in tools_payload["rows"]} == {"tool"}
+ assert fact_calls_payload["view"] == "fact-calls"
+ assert fact_calls_payload["filters"]["privacy_mode"] == "strict"
+ assert fact_calls_payload["rows"][0]["cwd"].startswith("[redacted cwd:")
+ combined = json.dumps(
+ [
+ summary_payload,
+ facts_payload,
+ compactions_payload,
+ tools_payload,
+ fact_calls_payload,
+ ]
+ )
+ assert "SECRET" not in combined
+ assert "/tmp/private-diagnostics" not in json.dumps(fact_calls_payload)
+
+
def _assert_contract(payload: object) -> None:
assert validate_json_payload_contract(payload) == []
@@ -383,6 +492,62 @@ def _make_codex_home(tmp_path: Path) -> Path:
return codex_home
+def _make_diagnostics_codex_home(tmp_path: Path) -> Path:
+ codex_home = tmp_path / ".codex"
+ log_dir = codex_home / "sessions" / "2026" / "05" / "17"
+ log_path = log_dir / f"rollout-2026-05-17T14-58-23-{SESSION_ID}.jsonl"
+ _write_jsonl(
+ codex_home / "session_index.jsonl",
+ [
+ {
+ "id": SESSION_ID,
+ "thread_name": "Synthetic diagnostics test",
+ "updated_at": "2026-05-17T18:58:27Z",
+ }
+ ],
+ )
+ _write_jsonl(
+ log_path,
+ [
+ _entry("session_meta", {"id": SESSION_ID}),
+ _entry(
+ "turn_context",
+ {
+ "turn_id": "turn-a",
+ "model": "gpt-5.5",
+ "cwd": "/tmp/private-diagnostics",
+ },
+ ),
+ _entry(
+ "response_item",
+ {"type": "function_call_output", "output": "SECRET TOOL OUTPUT"},
+ ),
+ _entry(
+ "event_msg",
+ {"type": "patch_apply_end", "patch": "SECRET PATCH TEXT"},
+ ),
+ _token_event(120, 120),
+ _entry(
+ "event_msg",
+ {
+ "type": "context_compacted",
+ "replacement_history": [
+ {
+ "type": "message",
+ "role": "assistant",
+ "content": [
+ {"type": "output_text", "text": "SECRET COMPACTION TEXT"}
+ ],
+ }
+ ],
+ },
+ ),
+ _token_event(220, 100),
+ ],
+ )
+ return codex_home
+
+
def _token_event(cumulative_total: int, last_total: int) -> dict[str, object]:
return _entry(
"event_msg",
diff --git a/tests/test_cli_release.py b/tests/test_cli_release.py
index e70109f..32a1d73 100644
--- a/tests/test_cli_release.py
+++ b/tests/test_cli_release.py
@@ -26,6 +26,7 @@
"summary",
"query",
"recommendations",
+ "diagnostics",
"session",
"context",
"dashboard",
diff --git a/tests/test_context_evidence.py b/tests/test_context_evidence.py
index 4f29119..eca9a5d 100644
--- a/tests/test_context_evidence.py
+++ b/tests/test_context_evidence.py
@@ -14,6 +14,7 @@
from codex_usage_tracker.context import load_call_context
from codex_usage_tracker.store import (
+ query_diagnostic_fact_calls,
query_session_usage,
refresh_usage_index,
)
@@ -251,6 +252,72 @@ def test_context_carries_incoming_compaction_history_into_selected_turn(tmp_path
assert "[REDACTED_OPENAI_KEY]" in context_text
+def test_context_carries_safe_diagnostic_events_into_selected_turn(tmp_path: Path) -> None:
+ session_id = "019e37d5-f19f-7e4d-84cb-508941431112"
+ codex_home = tmp_path / ".codex"
+ log_path = (
+ codex_home
+ / "sessions"
+ / "2026"
+ / "06"
+ / "11"
+ / f"rollout-2026-06-11T22-25-00-{session_id}.jsonl"
+ )
+ _write_jsonl(
+ codex_home / "session_index.jsonl",
+ [
+ {
+ "id": session_id,
+ "thread_name": "Diagnostic boundary",
+ "updated_at": "2026-06-11T22:35:00Z",
+ }
+ ],
+ )
+ _write_jsonl(
+ log_path,
+ [
+ _entry("session_meta", {"id": session_id}),
+ _entry("turn_context", {"turn_id": "before-diagnostic", "model": "gpt-5.5"}),
+ _token_event(100, 100),
+ _entry(
+ "event_msg",
+ {
+ "type": "thread_rolled_back",
+ "num_turns": 1,
+ "reason": "SECRET ROLLBACK",
+ },
+ ),
+ _entry("event_msg", {"type": "turn_aborted", "reason": "SECRET ABORT"}),
+ _entry("turn_context", {"turn_id": "after-diagnostic", "model": "gpt-5.5"}),
+ _token_event(300, 200),
+ ],
+ )
+ db_path = tmp_path / "usage.sqlite3"
+ refresh_usage_index(codex_home=codex_home, db_path=db_path)
+ fact_calls = query_diagnostic_fact_calls(
+ db_path=db_path,
+ fact_type="outcome",
+ fact_name="thread_rolled_back",
+ limit=0,
+ )
+ target = next(
+ row
+ for row in query_session_usage(db_path=db_path, session_id=session_id)
+ if row["turn_id"] == "after-diagnostic"
+ )
+
+ context = load_call_context(target["record_id"], db_path=db_path)
+ entries_by_label = {entry["label"]: entry for entry in context["entries"]}
+ context_text = json.dumps(context)
+
+ assert [row["record_id"] for row in fact_calls] == [target["record_id"]]
+ assert entries_by_label["thread_rolled_back"]["line_number"] == 4
+ assert '"num_turns": 1' in entries_by_label["thread_rolled_back"]["text"]
+ assert entries_by_label["turn_aborted"]["line_number"] == 5
+ assert "SECRET ROLLBACK" not in context_text
+ assert "SECRET ABORT" not in context_text
+
+
def test_context_dedupes_adjacent_chat_message_echoes(tmp_path: Path) -> None:
session_id = "019e37d5-f19f-7e4d-84cb-508941432222"
codex_home = tmp_path / ".codex"
diff --git a/tests/test_dashboard_live.py b/tests/test_dashboard_live.py
new file mode 100644
index 0000000..1d1a585
--- /dev/null
+++ b/tests/test_dashboard_live.py
@@ -0,0 +1,142 @@
+from __future__ import annotations
+
+import json
+import shutil
+import subprocess
+from pathlib import Path
+
+import pytest
+
+
+def _run_dashboard_live_script(script: str) -> dict[str, object]:
+ node = shutil.which("node")
+ if node is None:
+ pytest.skip("node is required for dashboard live helper tests")
+ repo_root = Path(__file__).resolve().parents[1]
+ script_path = (
+ repo_root
+ / "src"
+ / "codex_usage_tracker"
+ / "plugin_data"
+ / "dashboard"
+ / "dashboard_live.js"
+ )
+ wrapped = f"""
+const fs = require('fs');
+const vm = require('vm');
+const code = fs.readFileSync({json.dumps(str(script_path))}, 'utf8');
+const context = {{
+ window: {{ clearInterval, setInterval }},
+ URLSearchParams,
+ fetch: async (url, options) => globalThis.__fetch(url, options),
+ console,
+}};
+vm.createContext(context);
+vm.runInContext(code, context);
+const factory = context.window.CodexUsageDashboardLive;
+{script}
+"""
+ result = subprocess.run(
+ [node, "-e", wrapped],
+ check=True,
+ capture_output=True,
+ text=True,
+ )
+ return json.loads(result.stdout)
+
+
+def test_dashboard_live_allows_diagnostics_bootstrap_refresh() -> None:
+ payload = _run_dashboard_live_script(
+ """
+(async () => {
+ const calls = [];
+ globalThis.__fetch = async (url, options) => {
+ calls.push({ url, headers: options.headers });
+ return {
+ ok: true,
+ json: async () => ({
+ rows: [],
+ refreshed_at: '2026-06-19T00:00:00Z',
+ refresh_result: null,
+ total_available_rows: 0,
+ }),
+ };
+ };
+ const statusUpdates = [];
+ const appliedPayloads = [];
+ const runtime = factory.create({
+ activeView: () => 'diagnostics',
+ apiToken: () => 'test-token',
+ applyDashboardPayload: payload => appliedPayloads.push(payload),
+ autoRefreshEl: { checked: false },
+ backgroundHydrationChunkSize: 2000,
+ formatTimestamp: value => value,
+ getArchivedAvailableRows: () => 0,
+ getData: () => [],
+ getIncludeArchived: () => false,
+ getLoadedLimit: () => null,
+ getTotalAvailableRows: () => 0,
+ historyScopeEl: { value: 'active', parentElement: {} },
+ i18n: { currentLanguage: 'en' },
+ initialHydrationChunkSize: 500,
+ latestRefreshAt: () => '',
+ limitValue: value => value === null ? 'all' : String(value),
+ liveRefreshIntervalMs: 10000,
+ liveRefreshSupported: true,
+ loadLimitEl: { value: '5000', options: [], lastElementChild: null, insertBefore: () => {} },
+ number: new Intl.NumberFormat('en-US'),
+ payloadRows: payload => payload.rows || [],
+ rebuildDashboardIndexes: () => {},
+ rebuildFilterOptions: () => {},
+ refreshDashboardEl: { disabled: false },
+ render: () => {},
+ resetRowsForHydration: () => {},
+ rowLoadProgressBarEl: { style: {} },
+ rowLoadProgressCountEl: { textContent: '' },
+ rowLoadProgressEl: { hidden: true },
+ rowLoadProgressLabelEl: { textContent: '' },
+ setFastTooltip: () => {},
+ t: key => key,
+ tf: (key, values = {}) => `${key}:${JSON.stringify(values)}`,
+ updateLiveStatus: (key, detail) => statusUpdates.push({ key, detail }),
+ });
+ await runtime.refreshDashboardData(false, {
+ refreshLogs: false,
+ resetRows: true,
+ allowDiagnosticsBootstrap: true,
+ });
+ console.log(JSON.stringify({
+ fetchCount: calls.length,
+ firstUrl: calls[0] ? calls[0].url : '',
+ token: calls[0] ? calls[0].headers['X-Codex-Usage-Token'] : '',
+ appliedCount: appliedPayloads.length,
+ statusKeys: statusUpdates.map(entry => entry.key),
+ }));
+})().catch(error => {
+ console.error(error);
+ process.exit(1);
+});
+"""
+ )
+
+ assert payload["fetchCount"] == 1
+ assert payload["firstUrl"].startswith("/api/usage?")
+ assert "shell=1" in payload["firstUrl"]
+ assert "refresh=1" not in payload["firstUrl"]
+ assert payload["token"] == "test-token"
+ assert payload["appliedCount"] == 1
+ assert payload["statusKeys"] == ["status.checking", "status.updated"]
+
+
+def test_dashboard_bootstraps_direct_diagnostics_view() -> None:
+ repo_root = Path(__file__).resolve().parents[1]
+ dashboard_js = (
+ repo_root
+ / "src"
+ / "codex_usage_tracker"
+ / "plugin_data"
+ / "dashboard"
+ / "dashboard.js"
+ ).read_text(encoding="utf-8")
+
+ assert "allowDiagnosticsBootstrap: true" in dashboard_js
diff --git a/tests/test_dashboard_payload.py b/tests/test_dashboard_payload.py
index 14e01a2..045b804 100644
--- a/tests/test_dashboard_payload.py
+++ b/tests/test_dashboard_payload.py
@@ -52,6 +52,9 @@ def test_dashboard_and_csv_are_aggregate_only(tmp_path: Path) -> None:
dashboard_actions_js = (asset_dir / "dashboard_actions.js").read_text(encoding="utf-8")
dashboard_live_js = (asset_dir / "dashboard_live.js").read_text(encoding="utf-8")
dashboard_events_js = (asset_dir / "dashboard_events.js").read_text(encoding="utf-8")
+ dashboard_diagnostics_js = (asset_dir / "dashboard_diagnostics.js").read_text(
+ encoding="utf-8"
+ )
dashboard_call_diagnostics_js = (
asset_dir / "dashboard_call_diagnostics.js"
).read_text(encoding="utf-8")
@@ -90,6 +93,7 @@ def test_dashboard_and_csv_are_aggregate_only(tmp_path: Path) -> None:
dashboard_actions_js,
dashboard_live_js,
dashboard_events_js,
+ dashboard_diagnostics_js,
dashboard_call_diagnostics_js,
dashboard_call_js,
dashboard_js,
@@ -114,6 +118,7 @@ def test_dashboard_and_csv_are_aggregate_only(tmp_path: Path) -> None:
assert "SECRET RAW PROMPT" not in dashboard_actions_js
assert "SECRET RAW PROMPT" not in dashboard_live_js
assert "SECRET RAW PROMPT" not in dashboard_events_js
+ assert "SECRET RAW PROMPT" not in dashboard_diagnostics_js
assert "SECRET RAW PROMPT" not in dashboard_call_diagnostics_js
assert "SECRET RAW PROMPT" not in dashboard_call_js
assert "SECRET RAW PROMPT" not in dashboard_css
@@ -133,6 +138,7 @@ def test_dashboard_and_csv_are_aggregate_only(tmp_path: Path) -> None:
assert "COMPACTED REPLACEMENT SUMMARY" not in dashboard_actions_js
assert "COMPACTED REPLACEMENT SUMMARY" not in dashboard_live_js
assert "COMPACTED REPLACEMENT SUMMARY" not in dashboard_events_js
+ assert "COMPACTED REPLACEMENT SUMMARY" not in dashboard_diagnostics_js
assert "COMPACTED REPLACEMENT SUMMARY" not in dashboard_call_diagnostics_js
assert "COMPACTED REPLACEMENT SUMMARY" not in dashboard_call_js
assert "EVENT MSG COMPACTION SUMMARY" not in dashboard
@@ -150,6 +156,7 @@ def test_dashboard_and_csv_are_aggregate_only(tmp_path: Path) -> None:
assert "EVENT MSG COMPACTION SUMMARY" not in dashboard_actions_js
assert "EVENT MSG COMPACTION SUMMARY" not in dashboard_live_js
assert "EVENT MSG COMPACTION SUMMARY" not in dashboard_events_js
+ assert "EVENT MSG COMPACTION SUMMARY" not in dashboard_diagnostics_js
assert "EVENT MSG COMPACTION SUMMARY" not in dashboard_call_diagnostics_js
assert "EVENT MSG COMPACTION SUMMARY" not in dashboard_call_js
for stylesheet in dashboard_stylesheets:
@@ -170,6 +177,7 @@ def test_dashboard_and_csv_are_aggregate_only(tmp_path: Path) -> None:
assert 'src="codex-usage-tracker-assets/dashboard_actions.js?v=' in dashboard
assert 'src="codex-usage-tracker-assets/dashboard_live.js?v=' in dashboard
assert 'src="codex-usage-tracker-assets/dashboard_events.js?v=' in dashboard
+ assert 'src="codex-usage-tracker-assets/dashboard_diagnostics.js?v=' in dashboard
assert 'src="codex-usage-tracker-assets/dashboard_call_diagnostics.js?v=' in dashboard
assert 'src="codex-usage-tracker-assets/dashboard_call_investigator.js?v=' in dashboard
assert 'src="codex-usage-tracker-assets/dashboard.js?v=' in dashboard
@@ -189,6 +197,7 @@ def test_dashboard_and_csv_are_aggregate_only(tmp_path: Path) -> None:
assert "CodexUsageDashboardActions" in dashboard_actions_js
assert "CodexUsageDashboardLive" in dashboard_live_js
assert "CodexUsageDashboardEvents" in dashboard_events_js
+ assert "CodexUsageDashboardDiagnostics" in dashboard_diagnostics_js
assert "CodexUsageCallDiagnostics" in dashboard_call_diagnostics_js
assert "CodexUsageCallInvestigator" in dashboard_call_js
assert "copyViewLink" in dashboard
@@ -267,6 +276,35 @@ def test_dashboard_and_csv_are_aggregate_only(tmp_path: Path) -> None:
assert "insightsView" in dashboard
assert "callsView" in dashboard
assert "threadsView" in dashboard
+ assert "diagnosticsView" in dashboard
+ assert "diagnosticsPanel" in dashboard
+ assert "/api/diagnostics/facts" in dashboard_diagnostics_js
+ assert "/api/diagnostics/tools" in dashboard_diagnostics_js
+ assert "/api/diagnostics/compactions" in dashboard_diagnostics_js
+ assert "/api/diagnostics/fact-calls" in dashboard_diagnostics_js
+ assert "Associated token totals" in dashboard_diagnostics_js
+ assert "Raw context remains on-demand" in dashboard_diagnostics_js
+ assert "rowInvestigatorLink" in dashboard_diagnostics_js
+ assert "diagnostics-drilldown-row" in dashboard_diagnostics_js
+ assert 'td colspan="11"' in dashboard_diagnostics_js
+ assert "associated_cached_input_tokens" in dashboard_diagnostics_js
+ assert "row.cached_input_tokens" in dashboard_diagnostics_js
+ assert "Occurrences: count of matching diagnostic fact events" in dashboard_diagnostics_js
+ assert "Associated total tokens for those calls" in dashboard_diagnostics_js
+ assert "Average cache ratio across associated calls" in dashboard_diagnostics_js
+ assert "captureScrollAnchor" in dashboard_diagnostics_js
+ assert "restoreScrollAnchor" in dashboard_diagnostics_js
+ assert "data-diagnostics-call-load-more" in dashboard_diagnostics_js
+ assert "offset: String(offset)" in dashboard_diagnostics_js
+ assert "mergeFactCallPayload" in dashboard_diagnostics_js
+ assert "data-diagnostics-call-sort-key" in dashboard_diagnostics_js
+ assert "data-diagnostics-call-sort-active" in dashboard_diagnostics_js
+ assert "sortFactCalls" in dashboard_diagnostics_js
+ assert "defaultFactCallSortDirection" in dashboard_diagnostics_js
+ assert "sort: sortState.sort" in dashboard_diagnostics_js
+ assert "direction: sortState.direction" in dashboard_diagnostics_js
+ assert "diagnostics-expand-button" in dashboard_surface
+ assert "selectedFactKey === key" in dashboard_diagnostics_js
assert "Needs Attention" in dashboard
assert "Investigation Presets" in dashboard
assert "presetDefinitions" in dashboard_insights_js
@@ -461,6 +499,7 @@ def test_dashboard_and_csv_are_aggregate_only(tmp_path: Path) -> None:
assert (asset_dir / "dashboard_actions.js").exists()
assert (asset_dir / "dashboard_live.js").exists()
assert (asset_dir / "dashboard_events.js").exists()
+ assert (asset_dir / "dashboard_diagnostics.js").exists()
assert (asset_dir / "dashboard_call_diagnostics.js").exists()
for stylesheet in dashboard_stylesheets:
assert (asset_dir / stylesheet).exists()
@@ -513,6 +552,7 @@ def test_dashboard_and_csv_are_aggregate_only(tmp_path: Path) -> None:
assert 'Highest Codex credits ' in dashboard
assert 'id="insightsView" type="button" aria-pressed="false"' in dashboard
assert 'id="callsView" type="button" aria-pressed="true"' in dashboard
+ assert 'id="diagnosticsView" type="button" aria-pressed="false"' in dashboard
pricing_path.write_text(
json.dumps(
diff --git a/tests/test_dashboard_server.py b/tests/test_dashboard_server.py
index 0d8b36d..6ee9a25 100644
--- a/tests/test_dashboard_server.py
+++ b/tests/test_dashboard_server.py
@@ -460,6 +460,18 @@ def test_dashboard_server_live_sql_api_slices_are_aggregate_only(tmp_path: Path)
)
summary_payload = _read_json(f"{base_url}/api/summary?group_by=model&limit=5")
recommendations_payload = _read_json(f"{base_url}/api/recommendations?limit=5")
+ diagnostics_summary_payload = _read_json(f"{base_url}/api/diagnostics/summary?limit=5")
+ diagnostics_facts_payload = _read_json(f"{base_url}/api/diagnostics/facts?limit=5")
+ diagnostics_compactions_payload = _read_json(
+ f"{base_url}/api/diagnostics/compactions?limit=5"
+ )
+ diagnostics_tools_payload = _read_json(f"{base_url}/api/diagnostics/tools?limit=5")
+ diagnostics_fact_calls_payload = _read_json(
+ f"{base_url}/api/diagnostics/fact-calls"
+ "?fact_type=compaction&fact_name=post_compaction&limit=5"
+ )
+ invalid_diagnostics = _http_error_json(f"{base_url}/api/diagnostics/facts?sort=bad")
+ missing_fact_calls = _http_error_json(f"{base_url}/api/diagnostics/fact-calls")
invalid_sort = _http_error_json(f"{base_url}/api/calls?sort=not-a-sort")
finally:
server.shutdown()
@@ -504,6 +516,30 @@ def test_dashboard_server_live_sql_api_slices_are_aggregate_only(tmp_path: Path)
assert summary_payload["group_by"] == "model"
assert recommendations_payload["schema"] == "codex-usage-tracker-recommendations-v1"
_assert_contract(recommendations_payload)
+ assert diagnostics_summary_payload["schema"] == "codex-usage-tracker-diagnostics-v1"
+ _assert_contract(diagnostics_summary_payload)
+ assert diagnostics_summary_payload["view"] == "summary"
+ assert diagnostics_summary_payload["raw_context_included"] is False
+ assert diagnostics_facts_payload["schema"] == "codex-usage-tracker-diagnostics-v1"
+ _assert_contract(diagnostics_facts_payload)
+ assert diagnostics_facts_payload["view"] == "facts"
+ assert {row["fact_name"] for row in diagnostics_facts_payload["rows"]} >= {
+ "post_compaction"
+ }
+ assert diagnostics_compactions_payload["filters"]["fact_type"] == "compaction"
+ _assert_contract(diagnostics_compactions_payload)
+ assert {row["fact_type"] for row in diagnostics_compactions_payload["rows"]} == {
+ "compaction"
+ }
+ assert diagnostics_tools_payload["filters"]["fact_type"] is None
+ assert diagnostics_tools_payload["filters"]["fact_group"] == "tools"
+ _assert_contract(diagnostics_tools_payload)
+ assert diagnostics_fact_calls_payload["view"] == "fact-calls"
+ _assert_contract(diagnostics_fact_calls_payload)
+ assert diagnostics_fact_calls_payload["filters"]["fact_name"] == "post_compaction"
+ assert diagnostics_fact_calls_payload["rows"][0]["fact_name"] == "post_compaction"
+ assert invalid_diagnostics["status"] == 400
+ assert missing_fact_calls["status"] == 400
assert invalid_sort["status"] == 400
assert "sort must be one of" in invalid_sort["payload"]["error"]
@@ -516,6 +552,11 @@ def test_dashboard_server_live_sql_api_slices_are_aggregate_only(tmp_path: Path)
thread_calls_payload,
summary_payload,
recommendations_payload,
+ diagnostics_summary_payload,
+ diagnostics_facts_payload,
+ diagnostics_compactions_payload,
+ diagnostics_tools_payload,
+ diagnostics_fact_calls_payload,
]
)
assert "SECRET RAW PROMPT" not in combined_payload
diff --git a/tests/test_dashboard_state.py b/tests/test_dashboard_state.py
index 965bb23..07bfc65 100644
--- a/tests/test_dashboard_state.py
+++ b/tests/test_dashboard_state.py
@@ -37,7 +37,7 @@ def test_dashboard_url_state_round_trips() -> None:
const manager = context.window.CodexUsageDashboardState;
const expected = {{
- view: 'threads',
+ view: 'diagnostics',
search: 'Thread Alpha',
model: 'gpt-5.5',
effort: 'high',
@@ -93,7 +93,7 @@ def test_dashboard_url_state_round_trips() -> None:
assert payload["hash"] == "#details"
assert payload["defaultSearch"] == "?external=keep"
assert payload["actual"] == {
- "view": "threads",
+ "view": "diagnostics",
"search": "Thread Alpha",
"model": "gpt-5.5",
"effort": "high",
diff --git a/tests/test_json_contracts.py b/tests/test_json_contracts.py
index 5b1c76d..1353db9 100644
--- a/tests/test_json_contracts.py
+++ b/tests/test_json_contracts.py
@@ -17,6 +17,7 @@
REPO_ROOT / "src" / "codex_usage_tracker" / "cli.py",
REPO_ROOT / "src" / "codex_usage_tracker" / "context.py",
REPO_ROOT / "src" / "codex_usage_tracker" / "costing.py",
+ REPO_ROOT / "src" / "codex_usage_tracker" / "diagnostic_reports.py",
REPO_ROOT / "src" / "codex_usage_tracker" / "diagnostics.py",
REPO_ROOT / "src" / "codex_usage_tracker" / "mcp_server.py",
REPO_ROOT / "src" / "codex_usage_tracker" / "reports.py",
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 16c63f1..74f6d8c 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -12,6 +12,7 @@
inspect_log,
load_session_index,
parse_usage_events_from_file,
+ parse_usage_events_from_file_with_state,
)
SESSION_ID = "019e374d-c19f-7da3-a44f-8de043a7a64e"
@@ -290,6 +291,153 @@ def test_parser_persists_call_origin_from_metadata_segments(tmp_path: Path) -> N
assert "SECRET" not in json.dumps([event.to_row() for event in events])
+def test_parser_collects_diagnostic_facts_between_token_counts(tmp_path: Path) -> None:
+ log_path = tmp_path / f"rollout-2026-05-17T14-58-23-{SESSION_ID}.jsonl"
+ _write_jsonl(
+ log_path,
+ [
+ _entry("session_meta", {"id": SESSION_ID}),
+ _entry("turn_context", {"turn_id": "turn-a", "model": "gpt-5.5"}),
+ _entry(
+ "response_item",
+ {
+ "type": "function_call_output",
+ "output": "SECRET TOOL OUTPUT",
+ },
+ ),
+ _entry(
+ "response_item",
+ {
+ "type": "function_call_output",
+ "output": "SECRET SECOND TOOL OUTPUT",
+ },
+ ),
+ _entry(
+ "event_msg",
+ {
+ "type": "patch_apply_end",
+ "patch": "SECRET PATCH TEXT",
+ },
+ ),
+ _token_event(100, 100),
+ _entry(
+ "event_msg",
+ {
+ "type": "context_compacted",
+ "replacement_history": [
+ {
+ "type": "message",
+ "role": "assistant",
+ "content": [
+ {"type": "output_text", "text": "SECRET COMPACTION TEXT"}
+ ],
+ }
+ ],
+ },
+ ),
+ _token_event(150, 50),
+ ],
+ )
+
+ parsed = parse_usage_events_from_file_with_state(log_path)
+
+ assert [event.cumulative_total_tokens for event in parsed.events] == [100, 150]
+ facts = {(fact.fact_type, fact.fact_name): fact for fact in parsed.diagnostic_facts}
+ assert set(facts) == {
+ ("compaction", "post_compaction"),
+ ("outcome", "patch_applied"),
+ ("tool", "function_call_output"),
+ }
+ assert facts[("tool", "function_call_output")].record_id == parsed.events[0].record_id
+ assert facts[("tool", "function_call_output")].event_count == 2
+ assert facts[("outcome", "patch_applied")].record_id == parsed.events[0].record_id
+ assert facts[("compaction", "post_compaction")].record_id == parsed.events[1].record_id
+ assert all(fact.raw_content_included == 0 for fact in parsed.diagnostic_facts)
+ assert "SECRET" not in json.dumps(
+ [fact.to_row() for fact in parsed.diagnostic_facts],
+ sort_keys=True,
+ )
+
+
+def test_parser_classifies_richer_diagnostic_detectors_without_raw_content(
+ tmp_path: Path,
+) -> None:
+ log_path = tmp_path / f"rollout-2026-05-17T14-58-23-{SESSION_ID}.jsonl"
+ _write_jsonl(
+ log_path,
+ [
+ _entry("session_meta", {"id": SESSION_ID}),
+ _entry("turn_context", {"turn_id": "turn-a", "model": "gpt-5.5"}),
+ _entry(
+ "response_item",
+ {
+ "type": "function_call",
+ "name": "functions.exec_command",
+ "arguments": json.dumps(
+ {"cmd": "pytest tests/test_private_customer.py -k SECRET_CUSTOMER"}
+ ),
+ },
+ ),
+ _entry(
+ "response_item",
+ {
+ "type": "function_call",
+ "name": "functions.exec_command",
+ "arguments": json.dumps({"cmd": "rg -n SECRET_CUSTOMER private"}),
+ },
+ ),
+ _entry("response_item", {"type": "tool_search_call"}),
+ _entry("response_item", {"type": "tool_search_output", "output": "SECRET SEARCH"}),
+ _entry("event_msg", {"type": "web_search_end", "query": "SECRET SEARCH"}),
+ _entry(
+ "event_msg",
+ {
+ "type": "mcp_tool_call_end",
+ "tool_name": "mcp__github__search_issues",
+ "server_name": "github",
+ "arguments": {"query": "SECRET MCP ARGUMENT"},
+ },
+ ),
+ _entry(
+ "event_msg",
+ {"type": "skill_started", "skill_name": "codex-usage-tracker"},
+ ),
+ _entry("event_msg", {"type": "turn_aborted", "reason": "SECRET ABORT"}),
+ _entry("event_msg", {"type": "thread_rolled_back", "reason": "SECRET ROLLBACK"}),
+ _token_event(100, 100),
+ ],
+ )
+
+ parsed = parse_usage_events_from_file_with_state(log_path)
+
+ assert len(parsed.events) == 1
+ facts = {(fact.fact_type, fact.fact_name): fact for fact in parsed.diagnostic_facts}
+ assert {
+ ("activity", "search_read_command"),
+ ("command_family", "pytest"),
+ ("command_family", "unknown_command"),
+ ("function", "functions.exec_command"),
+ ("loop", "retry_or_abort_loop"),
+ ("loop", "search_read_loop"),
+ ("mcp_server", "github"),
+ ("mcp_tool", "mcp__github__search_issues"),
+ ("outcome", "thread_rolled_back"),
+ ("outcome", "turn_aborted"),
+ ("skill", "codex-usage-tracker"),
+ } <= set(facts)
+ assert facts[("function", "functions.exec_command")].event_count == 2
+ assert facts[("loop", "search_read_loop")].event_count >= 3
+ assert facts[("loop", "retry_or_abort_loop")].event_count == 2
+ serialized = json.dumps(
+ [fact.to_row() for fact in parsed.diagnostic_facts],
+ sort_keys=True,
+ )
+ assert "SECRET" not in serialized
+ assert "test_private_customer" not in serialized
+ assert "rg -n" not in serialized
+ assert all(fact.raw_content_included == 0 for fact in parsed.diagnostic_facts)
+
+
def test_parser_persists_dashboard_helper_metadata(tmp_path: Path) -> None:
log_path = (
tmp_path
@@ -341,7 +489,7 @@ def test_inspect_log_reports_aggregate_diagnostics_without_db_writes(tmp_path: P
payload = inspect_log(log_path)
- assert payload["adapter"] == "codex-jsonl-v1"
+ assert payload["adapter"] == "codex-jsonl-v2"
assert payload["file_session_id"] is None
assert payload["event_count"] == 1
assert payload["session_ids"] == [SESSION_ID]
@@ -366,7 +514,7 @@ def test_cli_inspect_log_outputs_parser_summary(tmp_path: Path) -> None:
env=_subprocess_env(),
)
- assert "Adapter: codex-jsonl-v1" in result.stdout
+ assert "Adapter: codex-jsonl-v2" in result.stdout
assert "Parsed events: 1" in result.stdout
assert "Diagnostics: none" in result.stdout
diff --git a/tests/test_privacy.py b/tests/test_privacy.py
index 93ee2dc..50fe5f4 100644
--- a/tests/test_privacy.py
+++ b/tests/test_privacy.py
@@ -20,6 +20,7 @@
from codex_usage_tracker.store import (
export_usage_csv,
query_dashboard_events,
+ query_diagnostic_facts,
query_session_usage,
refresh_usage_index,
)
@@ -53,6 +54,7 @@ def test_aggregate_outputs_exclude_raw_transcript_content(tmp_path: Path) -> Non
refresh_usage_index(codex_home=fixture.codex_home, db_path=db_path)
raw_rows = query_dashboard_events(db_path=db_path, limit=0)
+ diagnostic_facts = query_diagnostic_facts(db_path=db_path, limit=0)
strict_payload = dashboard_payload(
db_path=db_path,
limit=0,
@@ -78,6 +80,7 @@ def test_aggregate_outputs_exclude_raw_transcript_content(tmp_path: Path) -> Non
aggregate_outputs = [
db_path.read_bytes().decode("utf-8", errors="ignore"),
json.dumps(raw_rows),
+ json.dumps(diagnostic_facts),
json.dumps(strict_payload),
dashboard_path.read_text(encoding="utf-8"),
csv_path.read_text(encoding="utf-8"),
@@ -113,6 +116,7 @@ def test_aggregate_outputs_exclude_raw_transcript_content(tmp_path: Path) -> Non
assert PRIVATE_TAG not in json.dumps(strict_payload)
assert PRIVATE_BRANCH not in csv_path.read_text(encoding="utf-8")
assert PRIVATE_TAG not in csv_path.read_text(encoding="utf-8")
+ assert all(row["raw_content_included"] == 0 for row in diagnostic_facts)
def test_privacy_modes_cover_dashboard_query_session_and_csv(tmp_path: Path) -> None:
diff --git a/tests/test_store_dashboard_mcp.py b/tests/test_store_dashboard_mcp.py
index 3ed7d54..f0e66e8 100644
--- a/tests/test_store_dashboard_mcp.py
+++ b/tests/test_store_dashboard_mcp.py
@@ -18,12 +18,14 @@
)
from codex_usage_tracker import store as store_module
+from codex_usage_tracker.diagnostic_reports import build_diagnostics_facts_report
from codex_usage_tracker.models import UsageEvent
from codex_usage_tracker.store import (
connect,
init_db,
query_dashboard_event_count,
query_dashboard_events,
+ query_diagnostic_facts,
query_latest_observed_usage,
query_most_expensive_calls,
query_session_usage,
@@ -78,13 +80,13 @@ def test_refresh_is_idempotent_and_summary_works(tmp_path: Path) -> None:
assert meta["inserted_or_updated_events"] == "0"
assert meta["parsed_source_files"] == "0"
assert meta["skipped_source_files"] == "3"
- assert meta["parser_adapter"] == "codex-jsonl-v1"
- assert meta["schema_version"] == "8"
+ assert meta["parser_adapter"] == "codex-jsonl-v2"
+ assert meta["schema_version"] == "9"
assert meta["parser_skipped_events"] == "0"
state = schema_state(db_path)
- assert state["schema_version"] == 8
+ assert state["schema_version"] == 9
assert state["checksum_matches"] is True
- assert [row["version"] for row in state["migrations"]] == [1, 2, 3, 4, 5, 6, 7, 8]
+ assert [row["version"] for row in state["migrations"]] == [1, 2, 3, 4, 5, 6, 7, 8, 9]
with connect(db_path) as conn:
init_db(conn)
source_rows = [
@@ -191,6 +193,96 @@ def tracking_parse(*args: Any, **kwargs: Any):
assert metadata["skipped_source_files"] == "3"
+def test_refresh_reparses_source_when_parser_adapter_changes(
+ tmp_path: Path,
+ monkeypatch: pytest.MonkeyPatch,
+) -> None:
+ session_id = "019e37d5-f19f-7e4d-84cb-50894143c000"
+ codex_home = tmp_path / ".codex"
+ db_path = tmp_path / "usage.sqlite3"
+ log_path = (
+ codex_home
+ / "sessions"
+ / "2026"
+ / "05"
+ / "17"
+ / f"rollout-2026-05-17T18-58-27-{session_id}.jsonl"
+ )
+ _write_jsonl(
+ codex_home / "session_index.jsonl",
+ [
+ {
+ "id": session_id,
+ "thread_name": "Parser adapter diagnostics",
+ "updated_at": "2026-05-17T19:00:00Z",
+ }
+ ],
+ )
+ _write_jsonl(
+ log_path,
+ [
+ _entry("session_meta", {"id": session_id}),
+ _entry("turn_context", {"turn_id": "turn-a", "model": "gpt-5.5"}),
+ _entry("event_msg", {"type": "patch_apply_end", "patch": "SECRET PATCH"}),
+ _token_event(100, 100),
+ ],
+ )
+ first = refresh_usage_index(codex_home=codex_home, db_path=db_path)
+ with connect(db_path) as conn:
+ init_db(conn)
+ source_before = conn.execute(
+ """
+ SELECT parsed_until_byte, parser_adapter
+ FROM source_files
+ WHERE source_file = ?
+ """,
+ (str(log_path),),
+ ).fetchone()
+ conn.execute("DELETE FROM call_diagnostic_facts")
+ conn.execute(
+ "UPDATE source_files SET parser_adapter = ? WHERE source_file = ?",
+ ("codex-jsonl-v0", str(log_path)),
+ )
+ parse_calls: list[dict[str, Any]] = []
+ original_parse = store_module.parse_usage_events_from_file_with_state
+
+ def tracking_parse(*args: Any, **kwargs: Any):
+ parse_calls.append(
+ {
+ "path": args[0],
+ "start_byte": kwargs.get("start_byte"),
+ "start_line": kwargs.get("start_line"),
+ "initial_state": kwargs.get("initial_state"),
+ }
+ )
+ return original_parse(*args, **kwargs)
+
+ monkeypatch.setattr(
+ store_module,
+ "parse_usage_events_from_file_with_state",
+ tracking_parse,
+ )
+
+ second = refresh_usage_index(codex_home=codex_home, db_path=db_path)
+ facts = query_diagnostic_facts(db_path=db_path, limit=0)
+
+ assert first.parsed_events == 1
+ assert source_before is not None
+ assert source_before["parsed_until_byte"] > 0
+ assert source_before["parser_adapter"] == "codex-jsonl-v2"
+ assert len(parse_calls) == 1
+ assert parse_calls[0] == {
+ "path": log_path,
+ "start_byte": 0,
+ "start_line": 0,
+ "initial_state": None,
+ }
+ assert second.parsed_events == 1
+ assert second.inserted_or_updated_events == 1
+ assert [row["fact_name"] for row in facts] == ["patch_applied"]
+ assert "SECRET PATCH" not in json.dumps(facts)
+
+
def test_append_cursor_preserves_pending_call_origin_between_refreshes(
tmp_path: Path,
) -> None:
@@ -258,6 +350,300 @@ def test_append_cursor_preserves_pending_call_origin_between_refreshes(
assert "SECRET PENDING USER TEXT" not in source_rows_text
+def test_append_cursor_preserves_pending_diagnostic_facts_between_refreshes(
+ tmp_path: Path,
+) -> None:
+ session_id = "019e37d5-f19f-7e4d-84cb-50894143c002"
+ codex_home = tmp_path / ".codex"
+ log_path = (
+ codex_home
+ / "sessions"
+ / "2026"
+ / "05"
+ / "17"
+ / f"rollout-2026-05-17T18-58-27-{session_id}.jsonl"
+ )
+ _write_jsonl(
+ codex_home / "session_index.jsonl",
+ [
+ {
+ "id": session_id,
+ "thread_name": "Append cursor diagnostics",
+ "updated_at": "2026-05-17T19:00:00Z",
+ }
+ ],
+ )
+ _write_jsonl(
+ log_path,
+ [
+ _entry("session_meta", {"id": session_id}),
+ _entry("turn_context", {"turn_id": "turn-a", "model": "gpt-5.5"}),
+ _token_event(100, 100),
+ _entry(
+ "response_item",
+ {
+ "type": "function_call_output",
+ "output": "SECRET PENDING TOOL OUTPUT",
+ },
+ ),
+ ],
+ )
+ db_path = tmp_path / "usage.sqlite3"
+
+ first = refresh_usage_index(codex_home=codex_home, db_path=db_path)
+ with log_path.open("a", encoding="utf-8") as handle:
+ handle.write(json.dumps(_token_event(150, 50)) + "\n")
+ second = refresh_usage_index(codex_home=codex_home, db_path=db_path)
+ rows = query_session_usage(db_path=db_path, session_id=session_id)
+ facts = query_diagnostic_facts(db_path=db_path, limit=0)
+ with connect(db_path) as conn:
+ init_db(conn)
+ source_rows_text = json.dumps(
+ [
+ dict(row)
+ for row in conn.execute(
+ "SELECT parser_state_json FROM source_files WHERE source_file = ?",
+ (str(log_path),),
+ ).fetchall()
+ ]
+ )
+
+ assert first.parsed_events == 1
+ assert second.parsed_events == 1
+ assert [row["cumulative_total_tokens"] for row in rows] == [100, 150]
+ assert len(facts) == 1
+ assert facts[0]["fact_name"] == "function_call_output"
+ assert facts[0]["associated_total_tokens"] == 50
+ assert facts[0]["largest_record_id"] == rows[-1]["record_id"]
+ assert facts[0]["raw_content_included"] == 0
+ assert "SECRET PENDING TOOL OUTPUT" not in source_rows_text
+ assert "SECRET PENDING TOOL OUTPUT" not in json.dumps(facts)
+
+
+def test_refresh_persists_diagnostic_facts_without_raw_content(tmp_path: Path) -> None:
+ session_id = "019e37d5-f19f-7e4d-84cb-50894143c003"
+ codex_home = tmp_path / ".codex"
+ db_path = tmp_path / "usage.sqlite3"
+ log_path = (
+ codex_home
+ / "sessions"
+ / "2026"
+ / "05"
+ / "17"
+ / f"rollout-2026-05-17T18-58-27-{session_id}.jsonl"
+ )
+ _write_jsonl(
+ codex_home / "session_index.jsonl",
+ [
+ {
+ "id": session_id,
+ "thread_name": "Diagnostic facts",
+ "updated_at": "2026-05-17T19:00:00Z",
+ }
+ ],
+ )
+ _write_jsonl(
+ log_path,
+ [
+ _entry("session_meta", {"id": session_id}),
+ _entry("turn_context", {"turn_id": "turn-a", "model": "gpt-5.5"}),
+ _entry(
+ "response_item",
+ {"type": "function_call_output", "output": "SECRET TOOL OUTPUT"},
+ ),
+ _entry(
+ "event_msg",
+ {"type": "patch_apply_end", "patch": "SECRET PATCH TEXT"},
+ ),
+ _token_event(120, 120),
+ _entry(
+ "event_msg",
+ {
+ "type": "context_compacted",
+ "replacement_history": [
+ {
+ "type": "message",
+ "role": "assistant",
+ "content": [
+ {"type": "output_text", "text": "SECRET COMPACTION TEXT"}
+ ],
+ }
+ ],
+ },
+ ),
+ _token_event(200, 80),
+ ],
+ )
+
+ result = refresh_usage_index(codex_home=codex_home, db_path=db_path)
+ facts = query_diagnostic_facts(db_path=db_path, limit=0, sort="fact", direction="asc")
+ with connect(db_path) as conn:
+ init_db(conn)
+ persisted = [
+ dict(row)
+ for row in conn.execute(
+ "SELECT * FROM call_diagnostic_facts ORDER BY fact_type, fact_name"
+ ).fetchall()
+ ]
+
+ by_name = {row["fact_name"]: row for row in facts}
+ assert result.parsed_events == 2
+ assert set(by_name) == {"function_call_output", "patch_applied", "post_compaction"}
+ assert by_name["function_call_output"]["associated_total_tokens"] == 120
+ assert by_name["patch_applied"]["associated_total_tokens"] == 120
+ assert by_name["post_compaction"]["associated_total_tokens"] == 80
+ assert all(row["raw_content_included"] == 0 for row in persisted)
+ assert "SECRET" not in json.dumps(persisted, sort_keys=True)
+ assert "SECRET" not in json.dumps(facts, sort_keys=True)
+
+
+def test_refresh_persists_richer_diagnostic_detectors_without_command_text(
+ tmp_path: Path,
+) -> None:
+ session_id = "019e37d5-f19f-7e4d-84cb-50894143c005"
+ codex_home = tmp_path / ".codex"
+ db_path = tmp_path / "usage.sqlite3"
+ log_path = (
+ codex_home
+ / "sessions"
+ / "2026"
+ / "05"
+ / "17"
+ / f"rollout-2026-05-17T18-58-27-{session_id}.jsonl"
+ )
+ _write_jsonl(
+ codex_home / "session_index.jsonl",
+ [
+ {
+ "id": session_id,
+ "thread_name": "Diagnostic detectors",
+ "updated_at": "2026-05-17T19:00:00Z",
+ }
+ ],
+ )
+ _write_jsonl(
+ log_path,
+ [
+ _entry("session_meta", {"id": session_id}),
+ _entry("turn_context", {"turn_id": "turn-a", "model": "gpt-5.5"}),
+ _entry(
+ "response_item",
+ {
+ "type": "function_call",
+ "name": "functions.exec_command",
+ "arguments": json.dumps(
+ {"cmd": "python -m pytest tests/test_secret_customer.py"}
+ ),
+ },
+ ),
+ _entry(
+ "event_msg",
+ {
+ "type": "mcp_tool_call_end",
+ "tool_name": "mcp__calendar__search_events",
+ "server_name": "google-calendar",
+ "arguments": {"calendar": "SECRET CALENDAR"},
+ },
+ ),
+ _entry("event_msg", {"type": "skill_started", "skill_name": "brooks-test"}),
+ _token_event(120, 120),
+ ],
+ )
+
+ result = refresh_usage_index(codex_home=codex_home, db_path=db_path)
+ facts = query_diagnostic_facts(db_path=db_path, limit=0, sort="fact", direction="asc")
+ tools_payload = build_diagnostics_facts_report(
+ db_path=db_path,
+ fact_group="tools",
+ view="tools",
+ ).payload
+
+ by_key = {(row["fact_type"], row["fact_name"]): row for row in facts}
+ tool_types = {row["fact_type"] for row in tools_payload["rows"]}
+ assert result.parsed_events == 1
+ assert {"command_family", "function", "mcp_server", "mcp_tool", "skill", "tool"} <= tool_types
+ assert tools_payload["filters"]["fact_group"] == "tools"
+ assert by_key[("command_family", "pytest")]["associated_total_tokens"] == 120
+ assert by_key[("function", "functions.exec_command")]["associated_total_tokens"] == 120
+ assert by_key[("mcp_tool", "mcp__calendar__search_events")][
+ "associated_total_tokens"
+ ] == 120
+ assert by_key[("mcp_server", "google-calendar")]["associated_total_tokens"] == 120
+ assert by_key[("skill", "brooks-test")]["associated_total_tokens"] == 120
+ serialized = json.dumps(facts, sort_keys=True)
+ assert "SECRET" not in serialized
+ assert "test_secret_customer" not in serialized
+ assert "python -m pytest" not in serialized
+
+
+def test_full_reparse_replaces_stale_diagnostic_facts(tmp_path: Path) -> None:
+ session_id = "019e37d5-f19f-7e4d-84cb-50894143c004"
+ codex_home = tmp_path / ".codex"
+ db_path = tmp_path / "usage.sqlite3"
+ log_path = (
+ codex_home
+ / "sessions"
+ / "2026"
+ / "05"
+ / "17"
+ / f"rollout-2026-05-17T18-58-27-{session_id}.jsonl"
+ )
+ _write_jsonl(
+ codex_home / "session_index.jsonl",
+ [
+ {
+ "id": session_id,
+ "thread_name": "Diagnostic facts replace",
+ "updated_at": "2026-05-17T19:00:00Z",
+ }
+ ],
+ )
+ _write_jsonl(
+ log_path,
+ [
+ _entry("session_meta", {"id": session_id}),
+ _entry("turn_context", {"turn_id": "turn-a", "model": "gpt-5.5"}),
+ _entry("event_msg", {"type": "patch_apply_end", "patch": "SECRET PATCH"}),
+ _token_event(100, 100),
+ ],
+ )
+ refresh_usage_index(codex_home=codex_home, db_path=db_path)
+ assert [row["fact_name"] for row in query_diagnostic_facts(db_path=db_path)] == [
+ "patch_applied"
+ ]
+
+ _write_jsonl(
+ log_path,
+ [
+ _entry("session_meta", {"id": session_id}),
+ _entry("turn_context", {"turn_id": "turn-a", "model": "gpt-5.5"}),
+ _token_event(100, 100),
+ ],
+ )
+ with connect(db_path) as conn:
+ init_db(conn)
+ conn.execute(
+ """
+ UPDATE source_files
+ SET size_bytes = ?, mtime_ns = 0
+ WHERE source_file = ?
+ """,
+ (999_999, str(log_path)),
+ )
+
+ second = refresh_usage_index(codex_home=codex_home, db_path=db_path)
+ with connect(db_path) as conn:
+ init_db(conn)
+ persisted_count = conn.execute(
+ "SELECT COUNT(*) AS count FROM call_diagnostic_facts"
+ ).fetchone()
+
+ assert second.parsed_events == 1
+ assert query_diagnostic_facts(db_path=db_path) == []
+ assert persisted_count is not None
+ assert persisted_count["count"] == 0
+
+
def test_connect_sets_sqlite_concurrency_pragmas(tmp_path: Path) -> None:
db_path = tmp_path / "usage.sqlite3"
with connect(db_path) as conn:
@@ -268,7 +654,7 @@ def test_connect_sets_sqlite_concurrency_pragmas(tmp_path: Path) -> None:
assert busy_timeout == 5000
assert str(journal_mode).lower() == "wal"
- assert user_version == 8
+ assert user_version == 9
def test_init_db_repairs_version_zero_schema(tmp_path: Path) -> None:
@@ -341,8 +727,8 @@ def test_init_db_repairs_version_zero_schema(tmp_path: Path) -> None:
assert "rate_limit_plan_type" in columns
assert "rate_limit_primary_used_percent" in columns
assert "idx_usage_observed_rate_limit_timestamp" in indexes
- assert user_version == 8
- assert [row["version"] for row in migrations] == [1, 2, 3, 4, 5, 6, 7, 8]
+ assert user_version == 9
+ assert [row["version"] for row in migrations] == [1, 2, 3, 4, 5, 6, 7, 8, 9]
def test_latest_observed_usage_prefers_normal_codex_limit_pool(tmp_path: Path) -> None:
diff --git a/tests/test_store_migrations.py b/tests/test_store_migrations.py
index a28c0c9..4a0df29 100644
--- a/tests/test_store_migrations.py
+++ b/tests/test_store_migrations.py
@@ -56,9 +56,14 @@ def test_init_db_migrates_legacy_aggregate_table_without_data_loss(tmp_path: Pat
assert rows[0]["rate_limit_secondary_used_percent"] is None
assert metadata["parsed_events"] == "legacy"
assert metadata["parser_invalid_integer"] == "2"
- assert state["schema_version"] == 8
+ assert state["schema_version"] == 9
assert state["checksum_matches"] is True
- assert [row["version"] for row in state["migrations"]] == [1, 2, 3, 4, 5, 6, 7, 8]
+ assert [row["version"] for row in state["migrations"]] == [1, 2, 3, 4, 5, 6, 7, 8, 9]
+ with connect(db_path) as conn:
+ init_db(conn)
+ facts = conn.execute("SELECT COUNT(*) AS count FROM call_diagnostic_facts").fetchone()
+ assert facts is not None
+ assert facts["count"] == 0
def test_refresh_is_idempotent_after_legacy_migration(tmp_path: Path) -> None:
@@ -81,7 +86,7 @@ def test_refresh_is_idempotent_after_legacy_migration(tmp_path: Path) -> None:
assert second_count == 2
assert legacy_rows[0]["record_id"] == "legacy-record"
assert new_rows[0]["thread_name"] == "Synthetic migration thread"
- assert metadata["schema_version"] == "8"
+ assert metadata["schema_version"] == "9"
assert metadata["parsed_events"] == "0"
assert metadata["inserted_or_updated_events"] == "0"
assert metadata["parsed_source_files"] == "0"