diff --git a/.gitignore b/.gitignore index df2baaf..c709d90 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,9 @@ __pycache__/ .mypy_cache/ .ruff_cache/ .coverage* +node_modules/ +playwright-report/ +test-results/ build/ dist/ *.egg-info/ diff --git a/docs/architecture.md b/docs/architecture.md index b58f8eb..4353847 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -13,7 +13,8 @@ Codex Usage Tracker is a local sidecar app. It reads aggregate token counters fr - `costing.py`, `pricing_config.py`, `pricing_openai.py`, `pricing_estimates.py`, and `allowance.py` own cost, credit, rate-card, and allowance annotation. Keep estimate confidence and source metadata attached to rows. - `projects.py`, `threads.py`, and `recommendations.py` annotate aggregate rows with project identity, thread relationships, and actionable signals. Project privacy redaction also belongs in `projects.py` so CLI, MCP, dashboard, CSV, and support-bundle surfaces share the same behavior. - `dashboard.py` builds aggregate-only static dashboard payloads and writes HTML/assets. `server.py` adds localhost refresh, the compatibility `/api/usage` endpoint, SQL-backed live API slices, and explicit lazy context loading. -- `plugin_data/dashboard/dashboard_format.js` owns dashboard formatting primitives. `dashboard_data.js` owns row payload and thread relationship helpers. `dashboard_analysis.js` owns scoring, sorting, recommendation, and thread grouping logic. `dashboard_cells.js` owns reusable table/cell HTML helpers. `dashboard_details.js` owns sidebar detail and thread narrative rendering. `dashboard_insights.js` owns insight cards and investigation preset UI. `dashboard_tables.js` owns Calls, Threads, and expanded thread-call table rendering. `dashboard_diagnostics.js` owns the Diagnostics tab that consumes `/api/diagnostics/*` aggregate payloads. `dashboard_filters.js` owns date range parsing and row date matching. `dashboard_state.js` owns URL, CSV, and download state utilities. `dashboard_i18n.js`, `dashboard_payload_cache.js`, and `dashboard_tooltips.js` own localization, session aggregate cache, and fast tooltip helpers. `dashboard_call_investigator.js` owns the dedicated call drilldown surface. `dashboard.js` owns top-level DOM rendering, event handling, and API refresh orchestration. +- `diagnostic_snapshots.py` owns persisted diagnostic snapshot refresh/load orchestration. `diagnostic_snapshot_analysis.py`, `diagnostic_snapshot_events.py`, `diagnostic_snapshot_rows.py`, and `diagnostic_snapshot_concentration.py` own source-log aggregation, safe event parsing, row shaping, and concentration math. `diagnostic_snapshot_report.py` owns CLI rendering. Keep these modules synthetic-testable and aggregate-only. +- `plugin_data/dashboard/dashboard_format.js` owns dashboard formatting primitives. `dashboard_data.js` owns row payload and thread relationship helpers. `dashboard_analysis.js` owns scoring, sorting, recommendation, and thread grouping logic. `dashboard_cells.js` owns reusable table/cell HTML helpers. `dashboard_details.js` owns sidebar detail and thread narrative rendering. `dashboard_insights.js` owns insight cards and investigation preset UI. `dashboard_tables.js` owns Calls, Threads, and expanded thread-call table rendering. `dashboard_diagnostics.js` coordinates the Diagnostics tab data flow and events, `dashboard_diagnostics_snapshots.js` renders on-demand snapshot panels, and `dashboard_diagnostics_facts.js` renders the fact tables and drilldowns. `dashboard_filters.js` owns date range parsing and row date matching. `dashboard_state.js` owns URL, CSV, and download state utilities. `dashboard_i18n.js`, `dashboard_payload_cache.js`, and `dashboard_tooltips.js` own localization, session aggregate cache, and fast tooltip helpers. `dashboard_call_investigator.js` owns the dedicated call drilldown surface. `dashboard.js` owns top-level DOM rendering, event handling, and API refresh orchestration. - `context.py` is the only normal path that reads raw log context, and it does so only for one selected record on demand with redaction and size limits. Its default quick mode omits tool output and serialized groups; full serialized JSONL group analysis is explicit. - `plugin_installer.py`, `.mcp.json`, `skills/`, and `scripts/check_release.py` own install and packaging behavior. - `scripts/benchmark_synthetic_history.py` owns generated large-history query timing and threshold enforcement for 10k, 100k, and 500k aggregate-row fixtures. Its optional `--with-source-logs` mode writes synthetic JSONL source logs to time explicit context loading and to guard normal dashboard payload assembly against source-log reads. It must stay synthetic-only and must not read real Codex logs. @@ -26,10 +27,11 @@ Codex Usage Tracker is a local sidecar app. It reads aggregate token counters fr 1. Add new persisted usage-event metrics through `UsageEvent`, `schema.py`, migrations, store queries, dashboard payload tests, and CSV/export checks. Add auxiliary aggregate tables such as `thread_summaries` or `source_files` through `store.py` migrations plus focused migration/privacy tests. 2. Add new report views through `reports.py` first, then wire CLI and MCP wrappers to that shared service. 3. Add new machine-readable outputs through `api_payloads.py` or report payload methods with a `schema` value, a `json_contracts.py` entry, and focused tests. -4. Add dashboard-only interactions in `plugin_data/dashboard/dashboard.js` and keep URL state in `dashboard_state.js`. +4. Add dashboard-only interactions in the narrowest dashboard module and keep URL state in `dashboard_state.js`. Diagnostics snapshot panels should stay in `dashboard_diagnostics_snapshots.js`; fact tables should stay in `dashboard_diagnostics_facts.js`. 5. Keep all examples, screenshots, mocks, and tests synthetic. Never derive fixtures from real logs. 6. When editing skill instructions, update both the source `skills/...` file and the bundled `src/codex_usage_tracker/plugin_data/skills/...` copy. `scripts/check_release.py` verifies that installable plugin assets stay complete and synced. 7. When adding fields derived from `cwd`, Git metadata, source paths, or log-event metadata, decide how they behave in `normal`, `redacted`, and `strict` privacy modes before exposing them in dashboard, JSON, CSV, MCP, or support-bundle output. +8. Diagnostic snapshot refresh must remain explicit and on demand. Normal usage refresh paths may load stored snapshots, but they must not rescan source logs for diagnostic sections unless the user calls a diagnostics `--refresh` command or a `/api/diagnostics/
/refresh` endpoint. ## Validation diff --git a/docs/cli-json-schemas.md b/docs/cli-json-schemas.md index 84c6ed3..6cde68f 100644 --- a/docs/cli-json-schemas.md +++ b/docs/cli-json-schemas.md @@ -47,6 +47,12 @@ Tracked schema ids: | `codex-usage-tracker-query-v1` | CLI `query`, MCP `usage_query(...)` | | `codex-usage-tracker-recommendations-v1` | CLI `recommendations --json`, MCP `usage_recommendations(response_format="json")` | | `codex-usage-tracker-diagnostics-v1` | CLI `diagnostics ... --json`, dashboard server `/api/diagnostics/*` | +| `codex-usage-tracker-diagnostic-overview-v1` | CLI `diagnostics overview --json`, dashboard server `/api/diagnostics/overview` | +| `codex-usage-tracker-diagnostic-tool-output-v1` | CLI `diagnostics tool-output --json`, dashboard server `/api/diagnostics/tool-output` | +| `codex-usage-tracker-diagnostic-commands-v1` | CLI `diagnostics commands --json`, dashboard server `/api/diagnostics/commands` | +| `codex-usage-tracker-diagnostic-file-reads-v1` | CLI `diagnostics file-reads --json`, dashboard server `/api/diagnostics/file-reads` | +| `codex-usage-tracker-diagnostic-read-productivity-v1` | CLI `diagnostics read-productivity --json`, dashboard server `/api/diagnostics/read-productivity` | +| `codex-usage-tracker-diagnostic-concentration-v1` | CLI `diagnostics concentration --json`, dashboard server `/api/diagnostics/concentration` | | `codex-usage-tracker-session-v1` | CLI `session --json`, MCP `session_usage(response_format="json")` | | `codex-usage-tracker-context-v1` | CLI `context`, MCP `usage_call_context` when raw context is explicitly enabled | | `codex-usage-tracker-context-disabled-v1` | MCP `usage_call_context` when raw context is disabled | @@ -281,6 +287,259 @@ Schema: `codex-usage-tracker-diagnostics-v1` Diagnostics payloads report aggregate structured facts such as compaction, tool/function/MCP activity, command families, structured skill labels, search/read loops, and outcome events. They do not include prompts, assistant messages, tool arguments, tool output, patch text, raw commands, command arguments, file contents, or JSONL fragments. Token totals are associated with facts observed before a token-count row; they are not causal allocations. +Diagnostic snapshots use separate section endpoints instead of one large read payload. `GET` returns the latest stored section snapshot or `status: "missing"`; `POST /api/diagnostics/
/refresh` recomputes and replaces only that section. The dashboard button calls `POST /api/diagnostics/refresh`, which returns a small wrapper with `sections` and recomputes source-log-derived sections with one shared analyzer pass. This keeps ordinary dashboard refresh fast and prevents source-log rescans unless a diagnostics refresh is explicit. + +## Diagnostic Overview Snapshot + +Commands: + +```bash +codex-usage-tracker diagnostics overview --json +codex-usage-tracker diagnostics overview --refresh --json +``` + +Dashboard server API: + +- `GET /api/diagnostics/overview` +- `POST /api/diagnostics/overview/refresh` + +Schema: `codex-usage-tracker-diagnostic-overview-v1` + +```json +{ + "schema": "codex-usage-tracker-diagnostic-overview-v1", + "section": "overview", + "status": "ready", + "refreshed": false, + "raw_context_included": false, + "snapshot": { + "computed_at": "2026-06-20T18:00:00+00:00", + "history_scope": "active", + "source_logs_scanned": 3, + "usage_rows_scanned": 10, + "raw_content_included": false + }, + "overview": { + "usage_rows": 10, + "total_tokens": 12345, + "cached_input_tokens": 9000, + "uncached_input_tokens": 2000, + "cache_ratio": 0.75 + }, + "notes": [] +} +``` + +The overview snapshot is recomputed only when explicitly refreshed. Ordinary dashboard usage refreshes do not update diagnostic snapshots. + +## Diagnostic Tool Output Snapshot + +Commands: + +```bash +codex-usage-tracker diagnostics tool-output --json +codex-usage-tracker diagnostics tool-output --refresh --json +``` + +Dashboard server API: + +- `GET /api/diagnostics/tool-output` +- `POST /api/diagnostics/tool-output/refresh` + +Schema: `codex-usage-tracker-diagnostic-tool-output-v1` + +```json +{ + "schema": "codex-usage-tracker-diagnostic-tool-output-v1", + "section": "tool-output", + "status": "ready", + "refreshed": false, + "raw_context_included": false, + "snapshot": {}, + "summary": { + "function_calls": 1, + "function_outputs": 1, + "outputs_with_original_token_count": 1, + "outputs_missing_original_token_count": 0, + "original_token_sum": 42 + }, + "functions": [], + "command_roots": [], + "missing_reasons": [], + "notes": [] +} +``` + +The tool-output snapshot stores function names, conservative command roots, numeric counts, and terminal `Original token count` totals. It does not store raw tool output or command text. + +## Diagnostic Commands Snapshot + +Commands: + +```bash +codex-usage-tracker diagnostics commands --json +codex-usage-tracker diagnostics commands --refresh --json +``` + +Dashboard server API: + +- `GET /api/diagnostics/commands` +- `POST /api/diagnostics/commands/refresh` + +Schema: `codex-usage-tracker-diagnostic-commands-v1` + +```json +{ + "schema": "codex-usage-tracker-diagnostic-commands-v1", + "section": "commands", + "status": "ready", + "refreshed": false, + "raw_context_included": false, + "snapshot": {}, + "summary": { + "shell_function_calls": 1, + "command_root_count": 1, + "missing_command": 0 + }, + "commands": [ + { + "root": "git", + "total": 1, + "children": [{"child": "status", "count": 1}] + } + ], + "notes": [] +} +``` + +The commands snapshot keeps only command roots and a bounded list of safe one-level child labels such as `status`, `diff`, or `-m:pytest`. + +## Diagnostic File Reads Snapshot + +Commands: + +```bash +codex-usage-tracker diagnostics file-reads --json +codex-usage-tracker diagnostics file-reads --refresh --json +``` + +Dashboard server API: + +- `GET /api/diagnostics/file-reads` +- `POST /api/diagnostics/file-reads/refresh` + +Schema: `codex-usage-tracker-diagnostic-file-reads-v1` + +```json +{ + "schema": "codex-usage-tracker-diagnostic-file-reads-v1", + "section": "file-reads", + "status": "ready", + "refreshed": false, + "raw_context_included": false, + "snapshot": {}, + "summary": { + "read_commands": 1, + "read_events": 1, + "unique_paths_read": 1, + "read_events_with_output_count": 1, + "read_events_missing_output_count": 0, + "allocated_output_token_sum": 42 + }, + "by_reader": [], + "top_paths": [], + "largest_read_commands": [], + "path_privacy": {}, + "notes": [] +} +``` + +The file-reads snapshot classifies common shell readers such as `cat`, `sed`, `nl`, `rg`, and `find`. Path labels are basename-only with a short irreversible hash; raw commands, command arguments, absolute paths, file contents, and tool output are not stored. + +## Diagnostic Read Productivity Snapshot + +Commands: + +```bash +codex-usage-tracker diagnostics read-productivity --json +codex-usage-tracker diagnostics read-productivity --refresh --json +``` + +Dashboard server API: + +- `GET /api/diagnostics/read-productivity` +- `POST /api/diagnostics/read-productivity/refresh` + +Schema: `codex-usage-tracker-diagnostic-read-productivity-v1` + +```json +{ + "schema": "codex-usage-tracker-diagnostic-read-productivity-v1", + "section": "read-productivity", + "status": "ready", + "refreshed": false, + "raw_context_included": false, + "snapshot": {}, + "summary": { + "read_events": 1, + "read_events_modified_later": 1, + "read_events_modified_later_pct": 1.0, + "unique_paths_read": 1, + "unique_paths_modified_later": 1, + "unique_path_modified_later_pct": 1.0, + "correlation_note": "Read-to-modify counts are temporal correlations." + }, + "by_reader": [], + "top_modified_paths": [], + "path_privacy": {}, + "notes": [] +} +``` + +Read productivity is a temporal correlation, not causation. A read is counted as modified later only when the same privacy-preserving path key appears in a later structured patch event in the same source log. + +## Diagnostic Concentration Snapshot + +Commands: + +```bash +codex-usage-tracker diagnostics concentration --json +codex-usage-tracker diagnostics concentration --refresh --json +``` + +Dashboard server API: + +- `GET /api/diagnostics/concentration` +- `POST /api/diagnostics/concentration/refresh` + +Schema: `codex-usage-tracker-diagnostic-concentration-v1` + +```json +{ + "schema": "codex-usage-tracker-diagnostic-concentration-v1", + "section": "concentration", + "status": "ready", + "refreshed": false, + "raw_context_included": false, + "snapshot": {}, + "summary": { + "usage_rows": 4, + "total_tokens": 100, + "dimension_count": 3, + "history_scope": "active" + }, + "metrics": [ + {"metric": "top_1_source_log_share", "dimension": "source_log", "top_n": 1, "share": 0.5} + ], + "dimensions": [], + "largest_impact_rows": [], + "privacy": {}, + "notes": [] +} +``` + +The concentration snapshot computes top-1/top-3/top-5 share and effective group count by source log/session, cwd/project label, and day. Metric ids such as `top_1_source_log_share` are stable JSON contract fields; dashboard views should render them as reader-facing labels. Source log labels use session-id prefixes or source hashes, cwd labels use basename-only labels, and raw source paths/cwd paths are not included. + ## Pricing Coverage Command: diff --git a/docs/cli-reference.md b/docs/cli-reference.md index 79c08b4..8d6c322 100644 --- a/docs/cli-reference.md +++ b/docs/cli-reference.md @@ -120,12 +120,22 @@ codex-usage-tracker diagnostics summary codex-usage-tracker diagnostics facts --sort uncached codex-usage-tracker diagnostics compactions codex-usage-tracker diagnostics tools +codex-usage-tracker diagnostics overview --refresh +codex-usage-tracker diagnostics tool-output --refresh +codex-usage-tracker diagnostics commands --refresh +codex-usage-tracker diagnostics file-reads --refresh +codex-usage-tracker diagnostics read-productivity --refresh +codex-usage-tracker diagnostics concentration --refresh codex-usage-tracker diagnostics fact-calls --fact-type compaction --fact-name post_compaction ``` Diagnostics expose structured event patterns and their associated token totals. They can show compactions, tool/function/MCP activity, safe command families, structured skill labels, patch outcomes, task completion, search/read loops, and aborted or rolled-back turns. Associated totals are not causal allocations and are not additive when one model call has multiple diagnostic facts. -Diagnostic payloads are aggregate-only. They do not include prompts, assistant text, tool arguments, tool output, patch text, raw commands, command arguments, file contents, or JSONL fragments. +Snapshot diagnostics are persisted aggregate reports. Without `--refresh`, snapshot commands return the latest stored payload or a `missing` status. With `--refresh`, they recompute from indexed source logs and replace the stored section snapshot. Ordinary `refresh`, `open-dashboard`, and dashboard `Refresh` update usage rows only; they do not recompute diagnostic snapshots. + +The snapshot sections answer different questions: `overview` summarizes usage rows and aggregate token totals, `tool-output` counts functions and terminal `Original token count` coverage, `commands` keeps command roots plus bounded safe child labels, `file-reads` counts reader/path activity and allocated read-output tokens, `read-productivity` reports later-edit correlations for matching path keys, and `concentration` shows top-N token share by source/session, cwd/project, and day. + +Diagnostic payloads are aggregate-only. They do not include prompts, assistant text, tool arguments, tool output, patch text, raw commands, command arguments, file contents, raw absolute paths, or JSONL fragments. File-read diagnostics use basename-only path labels plus short irreversible hashes, read-productivity percentages are temporal correlations rather than proof that a read caused a later edit, and concentration reports use safe source/session, cwd, and day labels only. ## JSON Queries diff --git a/docs/dashboard-guide.md b/docs/dashboard-guide.md index 40beb28..6d535b1 100644 --- a/docs/dashboard-guide.md +++ b/docs/dashboard-guide.md @@ -133,6 +133,12 @@ Use `Diagnostics` view when you want to see what structured event patterns are h - The tab consumes the localhost `/api/diagnostics/*` endpoints; static file dashboards show a live-API unavailable state. - The first table shows top diagnostic facts by associated uncached input tokens. Tool/function/MCP/command-family and compaction sections expose narrower slices of the same fact data. - Command diagnostics store only a command family such as `pytest`, `git`, or `unknown_command`. Skill and MCP labels are detected only when they are present as structured event metadata. +- Newer on-demand diagnostic snapshot endpoints are section-specific (`overview`, `tool-output`, `commands`, `file-reads`, `read-productivity`, and `concentration`). Heavy recomputation happens only through explicit diagnostic refresh endpoints. The dashboard's `Refresh diagnostics` button uses one batched refresh so source-log sections share one scan. +- Click `Refresh diagnostics` when you want to recompute stored diagnostic snapshots. The normal dashboard `Refresh` action updates usage rows only. +- Snapshot panels show their stored status, last computed time, history scope, and logs scanned count. Missing or stale panels still render without forcing a source-log scan. +- `Tool Output` totals come from terminal wrapper metadata such as `Original token count`; missing-count rows show coverage gaps where that header was absent. +- File-read snapshots use basename-only path labels and short hashes. Read-productivity rates are temporal correlations between earlier reads and later structured patch events, not causation. +- Concentration snapshots show top-N share and effective group count by source log/session, cwd/project label, and day without exposing raw source-log or cwd paths. - Click `Calls` on a fact row to load associated model calls. Call links and largest-call links open the Call Investigator, where raw context remains explicit and on demand. - Associated token totals are not causal allocations and are not additive when one call has multiple diagnostic facts. diff --git a/docs/development.md b/docs/development.md index 8ca507f..431cb0a 100644 --- a/docs/development.md +++ b/docs/development.md @@ -182,6 +182,13 @@ codex-usage-tracker summary --preset by-subagent-role codex-usage-tracker expensive --limit 5 ``` +For browser-level dashboard smoke after starting a live dashboard server: + +```bash +npm install +DASHBOARD_BASE_URL=http://127.0.0.1:8898 npm run smoke:dashboard:diagnostics +``` + ## Dashboard Screenshots Dashboard screenshots in `docs/assets/` and `src/codex_usage_tracker/plugin_data/docs/assets/` must be generated from synthetic aggregate fixture data only. diff --git a/docs/privacy.md b/docs/privacy.md index 50da9e3..94eea7a 100644 --- a/docs/privacy.md +++ b/docs/privacy.md @@ -35,6 +35,10 @@ Call-origin metadata is heuristic and confidence-labeled. It stores categories s Diagnostic facts follow the same aggregate-only rule. They can store safe structured labels such as `patch_applied`, `function_call_output`, `post_compaction`, MCP tool/server labels, structured skill labels, and command families such as `pytest`, `git`, or `unknown_command`, along with event counts and source line ranges. Command text may be classified in memory during parsing, but it is not persisted. Diagnostic facts do not store tool arguments, command text, command output, patch text, prompt or assistant text, file contents, raw JSONL fragments, or raw context evidence. +On-demand diagnostic snapshots follow the same boundary. Tool-output snapshots use terminal wrapper metadata such as `Original token count` when present and persist only counts, coverage gaps, and safe function/command labels. Command snapshots keep command roots plus a bounded list of conservative one-level child labels. File-read snapshots classify common read commands and path scans, but persist only counters, reader families, basename-only path labels, and short irreversible path hashes. They do not persist raw absolute paths, raw command strings, command arguments, file contents, tool output, or patch text. Read-productivity snapshots report only temporal read-to-modify correlations for matching path keys in the same source log; they do not claim causation. Concentration snapshots group by safe source/session, cwd, and day labels and do not expose raw source-log or cwd paths. + +Diagnostic snapshots are not live recomputed during ordinary dashboard or usage refresh. Stored snapshots can be displayed without rescanning source logs, and recomputation requires an explicit diagnostics `--refresh` command, the batched localhost `/api/diagnostics/refresh` request, or a targeted `/api/diagnostics/
/refresh` request. + ## On-Demand Context `usage_call_context`, `codex-usage-tracker context`, and the `serve-dashboard` context endpoint read a single source JSONL file only when explicitly requested. Returned context is redacted for common secret patterns and capped in size by default for CLI/MCP requests. The call investigator uses the same endpoint at runtime and requests quick redacted evidence for the selected call when the local context API is enabled; that still does not persist raw context into SQLite, CSV, support bundles, or generated dashboard HTML. diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 0000000..ed5cdf5 --- /dev/null +++ b/package-lock.json @@ -0,0 +1,76 @@ +{ + "name": "codex-usage-tracker-dashboard-smoke", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "codex-usage-tracker-dashboard-smoke", + "devDependencies": { + "@playwright/test": "1.61.0" + } + }, + "node_modules/@playwright/test": { + "version": "1.61.0", + "resolved": "https://registry.npmjs.org/@playwright/test/-/test-1.61.0.tgz", + "integrity": "sha512-cKA5B6lpFEMyMGjxF54QihfYpB4FkEGH+qZhtArDEG+wezQAJY8Pq6C7T1SjWz+FFzt3TbyoXBQYk/0292TdJA==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "playwright": "1.61.0" + }, + "bin": { + "playwright": "cli.js" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/fsevents": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", + "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, + "node_modules/playwright": { + "version": "1.61.0", + "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.61.0.tgz", + "integrity": "sha512-Z+7BeeqQPRRzklHsVFP4KTGIyMxKUmfeRA4WisM6G3/XW6nwGeX6fX9qYaDa+CiUqpOkb2f6X3nar05R3kSuJQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "playwright-core": "1.61.0" + }, + "bin": { + "playwright": "cli.js" + }, + "engines": { + "node": ">=18" + }, + "optionalDependencies": { + "fsevents": "2.3.2" + } + }, + "node_modules/playwright-core": { + "version": "1.61.0", + "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.61.0.tgz", + "integrity": "sha512-caX7TrY3Ml6egyDX0WUcTHDxodl/b51y5wJOdCEA36QviK/s2g081hvmGs8eaE3DWb6NYZQ6BjO/QkNRPenoPA==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "playwright-core": "cli.js" + }, + "engines": { + "node": ">=18" + } + } + } +} diff --git a/package.json b/package.json new file mode 100644 index 0000000..c5e1f46 --- /dev/null +++ b/package.json @@ -0,0 +1,11 @@ +{ + "name": "codex-usage-tracker-dashboard-smoke", + "private": true, + "type": "module", + "scripts": { + "smoke:dashboard:diagnostics": "playwright test tests/playwright/dashboard-diagnostics.spec.mjs" + }, + "devDependencies": { + "@playwright/test": "1.61.0" + } +} diff --git a/playwright.config.mjs b/playwright.config.mjs new file mode 100644 index 0000000..2e57663 --- /dev/null +++ b/playwright.config.mjs @@ -0,0 +1,32 @@ +import { defineConfig, devices } from '@playwright/test'; + +const baseURL = process.env.DASHBOARD_BASE_URL || 'http://127.0.0.1:8898'; + +export default defineConfig({ + testDir: './tests/playwright', + timeout: 30_000, + expect: { + timeout: 10_000, + }, + use: { + baseURL, + trace: 'retain-on-failure', + }, + reporter: [['list']], + projects: [ + { + name: 'chromium-desktop', + use: { + ...devices['Desktop Chrome'], + viewport: { width: 1440, height: 1000 }, + }, + }, + { + name: 'chromium-mobile', + use: { + ...devices['Pixel 5'], + viewport: { width: 393, height: 851 }, + }, + }, + ], +}); diff --git a/src/codex_usage_tracker/cli.py b/src/codex_usage_tracker/cli.py index 96b7465..a0d06f1 100644 --- a/src/codex_usage_tracker/cli.py +++ b/src/codex_usage_tracker/cli.py @@ -29,6 +29,14 @@ build_diagnostics_facts_report, build_diagnostics_summary_report, ) +from codex_usage_tracker.diagnostic_snapshots import ( + build_diagnostic_commands_report, + build_diagnostic_concentration_report, + build_diagnostic_file_reads_report, + build_diagnostic_overview_report, + build_diagnostic_read_productivity_report, + build_diagnostic_tool_output_report, +) from codex_usage_tracker.diagnostics import run_doctor from codex_usage_tracker.formatting import ( format_doctor, @@ -394,6 +402,7 @@ def _run_recommendations(args: argparse.Namespace) -> int: def _run_diagnostics(args: argparse.Namespace) -> int: command = args.diagnostics_command + report: Any if command == "summary": report = build_diagnostics_summary_report( db_path=args.db, @@ -448,6 +457,42 @@ def _run_diagnostics(args: argparse.Namespace) -> int: direction=args.direction, privacy_mode=args.privacy_mode, ) + elif command == "overview": + report = build_diagnostic_overview_report( + db_path=args.db, + include_archived=args.include_archived, + refresh=args.refresh, + ) + elif command == "tool-output": + report = build_diagnostic_tool_output_report( + db_path=args.db, + include_archived=args.include_archived, + refresh=args.refresh, + ) + elif command == "commands": + report = build_diagnostic_commands_report( + db_path=args.db, + include_archived=args.include_archived, + refresh=args.refresh, + ) + elif command == "file-reads": + report = build_diagnostic_file_reads_report( + db_path=args.db, + include_archived=args.include_archived, + refresh=args.refresh, + ) + elif command == "read-productivity": + report = build_diagnostic_read_productivity_report( + db_path=args.db, + include_archived=args.include_archived, + refresh=args.refresh, + ) + elif command == "concentration": + report = build_diagnostic_concentration_report( + db_path=args.db, + include_archived=args.include_archived, + refresh=args.refresh, + ) else: raise ValueError(f"unknown diagnostics command: {command}") diff --git a/src/codex_usage_tracker/cli_parser.py b/src/codex_usage_tracker/cli_parser.py index 193fe7a..1721dd0 100644 --- a/src/codex_usage_tracker/cli_parser.py +++ b/src/codex_usage_tracker/cli_parser.py @@ -327,6 +327,78 @@ def _add_diagnostics_parser( _add_diagnostics_base_filters(tools) _add_diagnostics_fact_sort(tools, default_limit=50) + overview = diagnostic_subparsers.add_parser( + "overview", + help="Show the on-demand aggregate diagnostic overview snapshot", + ) + overview.add_argument("--include-archived", action="store_true") + overview.add_argument( + "--refresh", + action="store_true", + help="Recompute and persist the overview snapshot before reading it.", + ) + overview.add_argument("--json", action="store_true", dest="as_json") + + tool_output = diagnostic_subparsers.add_parser( + "tool-output", + help="Show the on-demand aggregate tool-output snapshot", + ) + tool_output.add_argument("--include-archived", action="store_true") + tool_output.add_argument( + "--refresh", + action="store_true", + help="Recompute and persist the tool-output snapshot before reading it.", + ) + tool_output.add_argument("--json", action="store_true", dest="as_json") + + commands = diagnostic_subparsers.add_parser( + "commands", + help="Show the on-demand aggregate command root snapshot", + ) + commands.add_argument("--include-archived", action="store_true") + commands.add_argument( + "--refresh", + action="store_true", + help="Recompute and persist the command snapshot before reading it.", + ) + commands.add_argument("--json", action="store_true", dest="as_json") + + file_reads = diagnostic_subparsers.add_parser( + "file-reads", + help="Show the on-demand aggregate file-read snapshot", + ) + file_reads.add_argument("--include-archived", action="store_true") + file_reads.add_argument( + "--refresh", + action="store_true", + help="Recompute and persist the file-read snapshot before reading it.", + ) + file_reads.add_argument("--json", action="store_true", dest="as_json") + + read_productivity = diagnostic_subparsers.add_parser( + "read-productivity", + help="Show temporal read-to-modify diagnostic correlations", + ) + read_productivity.add_argument("--include-archived", action="store_true") + read_productivity.add_argument( + "--refresh", + action="store_true", + help="Recompute and persist the read-productivity snapshot before reading it.", + ) + read_productivity.add_argument("--json", action="store_true", dest="as_json") + + concentration = diagnostic_subparsers.add_parser( + "concentration", + help="Show concentration of token impact by source log, cwd, and day", + ) + concentration.add_argument("--include-archived", action="store_true") + concentration.add_argument( + "--refresh", + action="store_true", + help="Recompute and persist the concentration snapshot before reading it.", + ) + concentration.add_argument("--json", action="store_true", dest="as_json") + fact_calls = diagnostic_subparsers.add_parser( "fact-calls", help="List calls associated with one diagnostic fact", diff --git a/src/codex_usage_tracker/dashboard.py b/src/codex_usage_tracker/dashboard.py index d014fde..665e0dd 100644 --- a/src/codex_usage_tracker/dashboard.py +++ b/src/codex_usage_tracker/dashboard.py @@ -76,6 +76,16 @@ ("actions_script_src", "__ACTIONS_SCRIPT_SRC__", "dashboard_actions.js"), ("live_script_src", "__LIVE_SCRIPT_SRC__", "dashboard_live.js"), ("events_script_src", "__EVENTS_SCRIPT_SRC__", "dashboard_events.js"), + ( + "diagnostics_snapshots_script_src", + "__DIAGNOSTICS_SNAPSHOTS_SCRIPT_SRC__", + "dashboard_diagnostics_snapshots.js", + ), + ( + "diagnostics_facts_script_src", + "__DIAGNOSTICS_FACTS_SCRIPT_SRC__", + "dashboard_diagnostics_facts.js", + ), ("diagnostics_script_src", "__DIAGNOSTICS_SCRIPT_SRC__", "dashboard_diagnostics.js"), ( "call_diagnostics_script_src", @@ -320,6 +330,8 @@ def render_dashboard_html( actions_script_src: str | None = None, live_script_src: str | None = None, events_script_src: str | None = None, + diagnostics_snapshots_script_src: str | None = None, + diagnostics_facts_script_src: str | None = None, diagnostics_script_src: str | None = None, call_diagnostics_script_src: str | None = None, call_investigator_script_src: str | None = None, @@ -347,6 +359,8 @@ def render_dashboard_html( "actions_script_src": actions_script_src, "live_script_src": live_script_src, "events_script_src": events_script_src, + "diagnostics_snapshots_script_src": diagnostics_snapshots_script_src, + "diagnostics_facts_script_src": diagnostics_facts_script_src, "diagnostics_script_src": diagnostics_script_src, "call_diagnostics_script_src": call_diagnostics_script_src, "call_investigator_script_src": call_investigator_script_src, diff --git a/src/codex_usage_tracker/diagnostic_snapshot_analysis.py b/src/codex_usage_tracker/diagnostic_snapshot_analysis.py new file mode 100644 index 0000000..070857a --- /dev/null +++ b/src/codex_usage_tracker/diagnostic_snapshot_analysis.py @@ -0,0 +1,446 @@ +"""Aggregate diagnostic snapshot analyzers.""" + +from __future__ import annotations + +import json +from collections import Counter, defaultdict +from pathlib import Path +from typing import Any + +from codex_usage_tracker.diagnostic_snapshot_events import ( + READ_PRODUCTIVITY_NOTE, + allocate_token_count, + command_root_and_child, + int_value, + is_shell_tool, + modified_path_refs, + optional_str, + original_output_count, + path_privacy_metadata, + ratio, + read_path_refs_from_command, + read_reader, + safe_label, + shell_command_from_payload, + simple_rows, + unique_path_rows, +) +from codex_usage_tracker.diagnostic_snapshot_rows import ( + command_output_rows, + command_rows, + function_rows, + largest_read_command_rows, + read_path_rows, + read_productivity_path_rows, + read_productivity_reader_rows, + read_reader_rows, +) +from codex_usage_tracker.store import connect +from codex_usage_tracker.store_schema import init_db + + +def analyze_indexed_source_logs( + *, + db_path: Path, + include_archived: bool, +) -> dict[str, Any]: + source_logs, usage_rows_scanned = _indexed_source_logs( + db_path=db_path, + include_archived=include_archived, + ) + counters = _empty_counters() + meta: Counter[str] = Counter() + meta["source_logs_scanned"] = len(source_logs) + meta["usage_rows_scanned"] = usage_rows_scanned + + for source_log in source_logs: + _scan_source_log(source_log, counters=counters, meta=meta) + + return _analysis_payload(counters=counters, meta=meta) + + +def _indexed_source_logs( + *, + db_path: Path, + include_archived: bool, +) -> tuple[list[Path], int]: + where = "" if include_archived else "WHERE is_archived = 0" + with connect(db_path) as conn: + init_db(conn) + rows = conn.execute( + f"SELECT source_file FROM source_files {where} ORDER BY source_file" + ).fetchall() + usage_row = conn.execute( + f"SELECT COUNT(*) AS usage_rows FROM usage_events {where}" + ).fetchone() + return [Path(str(row["source_file"])) for row in rows], int_value(usage_row["usage_rows"]) + + +def _empty_counters() -> dict[str, Any]: + return { + "function_calls": Counter(), + "function_outputs": Counter(), + "output_with_count": Counter(), + "output_missing_count": Counter(), + "output_token_sum": Counter(), + "command_calls": Counter(), + "command_children": {}, + "command_with_count": Counter(), + "command_missing_count": Counter(), + "command_token_sum": Counter(), + "read_events": [], + "read_command_count": 0, + "read_events_by_reader": Counter(), + "read_events_by_path": Counter(), + "read_events_with_count_by_reader": Counter(), + "read_events_missing_count_by_reader": Counter(), + "read_tokens_by_reader": Counter(), + "read_tokens_by_path": Counter(), + "read_modified_by_reader": Counter(), + "read_modified_by_path": Counter(), + "read_path_refs": {}, + "largest_read_commands": [], + "missing_reasons": Counter(), + } + + +def _scan_source_log(source_log: Path, *, counters: dict[str, Any], meta: Counter[str]) -> None: + call_names: dict[str, str] = {} + call_roots: dict[str, str] = {} + call_read_events: dict[str, list[int]] = {} + source_read_events: list[int] = [] + modified_orders_by_path: dict[str, list[int]] = defaultdict(list) + try: + lines = source_log.open(encoding="utf-8") + except OSError: + meta["read_errors"] += 1 + return + + with lines: + for order, line in enumerate(lines): + if '"response_item"' not in line and '"patch_apply_end"' not in line: + continue + envelope = _json_envelope(line, meta=meta) + if envelope is None: + continue + payload = envelope.get("payload") + if not isinstance(payload, dict): + continue + if envelope.get("type") == "event_msg": + for path_ref in modified_path_refs(payload): + modified_orders_by_path[path_ref["path_key"]].append(order) + continue + if envelope.get("type") != "response_item": + continue + if payload.get("type") == "function_call": + _record_function_call( + payload, + order=order, + counters=counters, + meta=meta, + call_names=call_names, + call_roots=call_roots, + call_read_events=call_read_events, + source_read_events=source_read_events, + ) + elif payload.get("type") == "function_call_output": + _record_function_output( + payload, + counters=counters, + call_names=call_names, + call_roots=call_roots, + call_read_events=call_read_events, + ) + + _mark_later_modifications( + counters=counters, + source_read_events=source_read_events, + modified_orders_by_path=modified_orders_by_path, + ) + + +def _json_envelope(line: str, *, meta: Counter[str]) -> dict[str, Any] | None: + try: + envelope = json.loads(line) + except json.JSONDecodeError: + meta["invalid_json"] += 1 + return None + return envelope if isinstance(envelope, dict) else None + + +def _record_function_call( + payload: dict[str, Any], + *, + order: int, + counters: dict[str, Any], + meta: Counter[str], + call_names: dict[str, str], + call_roots: dict[str, str], + call_read_events: dict[str, list[int]], + source_read_events: list[int], +) -> None: + call_id = optional_str(payload.get("call_id") or payload.get("id")) + function_name = safe_label(payload.get("name")) or "unknown_function" + counters["function_calls"][function_name] += 1 + if call_id: + call_names[call_id] = function_name + command = shell_command_from_payload(payload, function_name=function_name) + if command is None: + if is_shell_tool(function_name): + meta["missing_command"] += 1 + return + root, child = command_root_and_child(command) + counters["command_calls"][root] += 1 + counters["command_children"].setdefault(root, Counter())[child] += 1 + if call_id: + call_roots[call_id] = root + read_refs = read_path_refs_from_command(command, root=root) + if read_refs: + counters["read_command_count"] += 1 + read_event_indexes = _record_read_refs( + read_refs, + root=root, + order=order, + counters=counters, + source_read_events=source_read_events, + ) + if call_id: + call_read_events[call_id] = read_event_indexes + + +def _record_read_refs( + read_refs: list[dict[str, str]], + *, + root: str, + order: int, + counters: dict[str, Any], + source_read_events: list[int], +) -> list[int]: + indexes: list[int] = [] + reader = read_reader(root) + for path_ref in read_refs: + path_key = path_ref["path_key"] + counters["read_path_refs"][path_key] = path_ref + event_index = len(counters["read_events"]) + counters["read_events"].append( + { + "reader": reader, + "root": root, + "path_key": path_key, + "path_label": path_ref["path_label"], + "path_hash": path_ref["path_hash"], + "order": order, + "modified_later": False, + } + ) + source_read_events.append(event_index) + indexes.append(event_index) + counters["read_events_by_reader"][reader] += 1 + counters["read_events_by_path"][path_key] += 1 + return indexes + + +def _record_function_output( + payload: dict[str, Any], + *, + counters: dict[str, Any], + call_names: dict[str, str], + call_roots: dict[str, str], + call_read_events: dict[str, list[int]], +) -> None: + call_id = optional_str(payload.get("call_id")) + function_name = call_names.get(call_id or "", "unknown_function") + counters["function_outputs"][function_name] += 1 + output = payload.get("output") + count = original_output_count(output) + read_indexes = call_read_events.get(call_id or "", []) + if count is None: + _record_missing_output_count( + output, + counters=counters, + function_name=function_name, + root=call_roots.get(call_id or ""), + read_indexes=read_indexes, + ) + return + _record_output_count( + int(count), + counters=counters, + function_name=function_name, + root=call_roots.get(call_id or ""), + read_indexes=read_indexes, + ) + + +def _record_missing_output_count( + output: object, + *, + counters: dict[str, Any], + function_name: str, + root: str | None, + read_indexes: list[int], +) -> None: + counters["output_missing_count"][function_name] += 1 + counters["missing_reasons"]["string_no_header" if isinstance(output, str) else "non_string_output"] += 1 + if root: + counters["command_missing_count"][root] += 1 + for event_index in read_indexes: + reader = str(counters["read_events"][event_index]["reader"]) + counters["read_events_missing_count_by_reader"][reader] += 1 + + +def _record_output_count( + count: int, + *, + counters: dict[str, Any], + function_name: str, + root: str | None, + read_indexes: list[int], +) -> None: + counters["output_with_count"][function_name] += 1 + counters["output_token_sum"][function_name] += count + if root: + counters["command_with_count"][root] += 1 + counters["command_token_sum"][root] += count + if not read_indexes: + return + paths: list[dict[str, str]] = [] + readers: Counter[str] = Counter() + allocations = allocate_token_count(count, len(read_indexes)) + for event_index, allocated in zip(read_indexes, allocations, strict=True): + event = counters["read_events"][event_index] + reader = str(event["reader"]) + path_key = str(event["path_key"]) + counters["read_events_with_count_by_reader"][reader] += 1 + counters["read_tokens_by_reader"][reader] += allocated + counters["read_tokens_by_path"][path_key] += allocated + readers[reader] += 1 + paths.append({"path_label": str(event["path_label"]), "path_hash": str(event["path_hash"])}) + counters["largest_read_commands"].append( + { + "root": root or "unknown_command", + "read_event_count": len(read_indexes), + "original_token_count": int(count), + "readers": simple_rows(readers, key_name="reader"), + "paths": unique_path_rows(paths), + } + ) + + +def _mark_later_modifications( + *, + counters: dict[str, Any], + source_read_events: list[int], + modified_orders_by_path: dict[str, list[int]], +) -> None: + for event_index in source_read_events: + event = counters["read_events"][event_index] + path_key = str(event["path_key"]) + if any(order > int(event["order"]) for order in modified_orders_by_path.get(path_key, [])): + event["modified_later"] = True + counters["read_modified_by_reader"][str(event["reader"])] += 1 + counters["read_modified_by_path"][path_key] += 1 + + +def _analysis_payload(*, counters: dict[str, Any], meta: Counter[str]) -> dict[str, Any]: + return { + "meta": {key: int(value) for key, value in meta.items()}, + "tool_output": _tool_output_payload(counters), + "commands": _commands_payload(counters, meta=meta), + "file_reads": _file_reads_payload(counters), + "read_productivity": _read_productivity_payload(counters), + } + + +def _tool_output_payload(counters: dict[str, Any]) -> dict[str, Any]: + return { + "summary": { + "function_calls": int(sum(counters["function_calls"].values())), + "function_outputs": int(sum(counters["function_outputs"].values())), + "outputs_with_original_token_count": int(sum(counters["output_with_count"].values())), + "outputs_missing_original_token_count": int(sum(counters["output_missing_count"].values())), + "original_token_sum": int(sum(counters["output_token_sum"].values())), + }, + "functions": function_rows( + function_calls=counters["function_calls"], + function_outputs=counters["function_outputs"], + output_with_count=counters["output_with_count"], + output_missing_count=counters["output_missing_count"], + output_token_sum=counters["output_token_sum"], + ), + "command_roots": command_output_rows( + command_calls=counters["command_calls"], + command_with_count=counters["command_with_count"], + command_missing_count=counters["command_missing_count"], + command_token_sum=counters["command_token_sum"], + ), + "missing_reasons": simple_rows(counters["missing_reasons"]), + } + + +def _commands_payload(counters: dict[str, Any], *, meta: Counter[str]) -> dict[str, Any]: + return { + "summary": { + "shell_function_calls": int(sum(counters["command_calls"].values())), + "command_root_count": len(counters["command_calls"]), + "missing_command": int(meta["missing_command"]), + }, + "commands": command_rows( + command_calls=counters["command_calls"], + command_children=counters["command_children"], + ), + } + + +def _file_reads_payload(counters: dict[str, Any]) -> dict[str, Any]: + return { + "summary": { + "read_commands": counters["read_command_count"], + "read_events": len(counters["read_events"]), + "unique_paths_read": len(counters["read_path_refs"]), + "read_events_with_output_count": int(sum(counters["read_events_with_count_by_reader"].values())), + "read_events_missing_output_count": int(sum(counters["read_events_missing_count_by_reader"].values())), + "allocated_output_token_sum": int(sum(counters["read_tokens_by_reader"].values())), + }, + "by_reader": read_reader_rows( + read_events_by_reader=counters["read_events_by_reader"], + read_events_with_count_by_reader=counters["read_events_with_count_by_reader"], + read_events_missing_count_by_reader=counters["read_events_missing_count_by_reader"], + read_tokens_by_reader=counters["read_tokens_by_reader"], + ), + "top_paths": read_path_rows( + read_path_refs=counters["read_path_refs"], + read_events_by_path=counters["read_events_by_path"], + read_tokens_by_path=counters["read_tokens_by_path"], + ), + "largest_read_commands": largest_read_command_rows(counters["largest_read_commands"]), + "path_privacy": path_privacy_metadata(), + } + + +def _read_productivity_payload(counters: dict[str, Any]) -> dict[str, Any]: + read_modified_count = int(sum(counters["read_modified_by_reader"].values())) + return { + "summary": { + "read_events": len(counters["read_events"]), + "read_events_modified_later": read_modified_count, + "read_events_modified_later_pct": ratio(read_modified_count, len(counters["read_events"])), + "unique_paths_read": len(counters["read_path_refs"]), + "unique_paths_modified_later": len(counters["read_modified_by_path"]), + "unique_path_modified_later_pct": ratio( + len(counters["read_modified_by_path"]), + len(counters["read_path_refs"]), + ), + "correlation_note": READ_PRODUCTIVITY_NOTE, + }, + "by_reader": read_productivity_reader_rows( + read_events_by_reader=counters["read_events_by_reader"], + read_modified_by_reader=counters["read_modified_by_reader"], + ), + "top_modified_paths": read_productivity_path_rows( + read_path_refs=counters["read_path_refs"], + read_events_by_path=counters["read_events_by_path"], + read_modified_by_path=counters["read_modified_by_path"], + ), + "path_privacy": path_privacy_metadata(), + } diff --git a/src/codex_usage_tracker/diagnostic_snapshot_concentration.py b/src/codex_usage_tracker/diagnostic_snapshot_concentration.py new file mode 100644 index 0000000..9139409 --- /dev/null +++ b/src/codex_usage_tracker/diagnostic_snapshot_concentration.py @@ -0,0 +1,338 @@ +"""Aggregate diagnostic concentration snapshot analysis.""" + +from __future__ import annotations + +import hashlib +import re +from pathlib import Path +from typing import Any + +from codex_usage_tracker.store import connect +from codex_usage_tracker.store_schema import init_db + +DIAGNOSTIC_HISTORY_ACTIVE = "active" +DIAGNOSTIC_HISTORY_ALL = "all" +SAFE_PATH_LABEL_RE = re.compile(r"^[A-Za-z0-9_.:@*+-]{1,80}$") +SENSITIVE_LABEL_PREFIXES = ("sk-", "sk_", "ghp_", "github_pat_", "xox") + + +def compute_concentration( + *, + db_path: Path, + include_archived: bool, +) -> dict[str, Any]: + where = "" if include_archived else "WHERE is_archived = 0" + with connect(db_path) as conn: + init_db(conn) + rows = conn.execute( + f""" + SELECT + record_id, + session_id, + event_timestamp, + source_file, + cwd, + total_tokens + FROM usage_events + {where} + ORDER BY event_timestamp, record_id + """ + ).fetchall() + source_row = conn.execute( + f"SELECT COUNT(DISTINCT source_file) AS source_logs_scanned FROM usage_events {where}" + ).fetchone() + + source_groups: dict[str, dict[str, Any]] = {} + cwd_groups: dict[str, dict[str, Any]] = {} + day_groups: dict[str, dict[str, Any]] = {} + total_tokens = 0 + for row in rows: + tokens = _int_value(row["total_tokens"]) + total_tokens += tokens + record_id = str(row["record_id"]) + session_id = _optional_str(row["session_id"]) + _add_concentration_row( + source_groups, + key=_source_group_key(row["source_file"]), + label=_source_group_label(row["source_file"], session_id=session_id), + group_hash=_source_group_hash(row["source_file"]), + tokens=tokens, + record_id=record_id, + session_id=session_id, + ) + cwd_ref = _cwd_group_ref(row["cwd"]) + _add_concentration_row( + cwd_groups, + key=cwd_ref["group_hash"], + label=cwd_ref["label"], + group_hash=cwd_ref["group_hash"], + tokens=tokens, + record_id=record_id, + session_id=session_id, + ) + day = _day_label(row["event_timestamp"]) + _add_concentration_row( + day_groups, + key=day, + label=day, + group_hash=_stable_hash(day), + tokens=tokens, + record_id=record_id, + session_id=session_id, + ) + + dimensions = [ + _concentration_dimension( + "source_log", + "Source Log / Session", + source_groups, + total_tokens=total_tokens, + ), + _concentration_dimension("cwd", "Cwd / Project", cwd_groups, total_tokens=total_tokens), + _concentration_dimension("day", "Day", day_groups, total_tokens=total_tokens), + ] + metrics = _concentration_metrics(dimensions) + return { + "meta": { + "source_logs_scanned": _int_value(source_row["source_logs_scanned"]), + }, + "summary": { + "usage_rows": len(rows), + "total_tokens": total_tokens, + "dimension_count": len(dimensions), + "history_scope": _history_scope(include_archived), + }, + "metrics": metrics, + "dimensions": dimensions, + "largest_impact_rows": _largest_impact_rows(dimensions), + "privacy": concentration_privacy_metadata(), + } + + + +def _add_concentration_row( + groups: dict[str, dict[str, Any]], + *, + key: str, + label: str, + group_hash: str, + tokens: int, + record_id: str, + session_id: str | None, +) -> None: + group = groups.setdefault( + key, + { + "label": label, + "group_hash": group_hash, + "total_tokens": 0, + "usage_rows": 0, + "largest_record_id": None, + "largest_call_tokens": 0, + "session_ids": set(), + }, + ) + group["total_tokens"] = int(group["total_tokens"]) + tokens + group["usage_rows"] = int(group["usage_rows"]) + 1 + if tokens > int(group["largest_call_tokens"]): + group["largest_call_tokens"] = tokens + group["largest_record_id"] = record_id + if session_id: + group["session_ids"].add(session_id) + + +def _concentration_dimension( + dimension: str, + label: str, + groups: dict[str, dict[str, Any]], + *, + total_tokens: int, +) -> dict[str, Any]: + rows = [_concentration_group_row(dimension, group, total_tokens=total_tokens) for group in groups.values()] + rows = sorted( + rows, + key=lambda row: (-int(row["total_tokens"]), -int(row["usage_rows"]), row["label"]), + ) + return { + "dimension": dimension, + "label": label, + "group_count": len(rows), + "total_tokens": total_tokens, + "top_1_share": _top_share(rows, 1, total_tokens=total_tokens), + "top_3_share": _top_share(rows, 3, total_tokens=total_tokens), + "top_5_share": _top_share(rows, 5, total_tokens=total_tokens), + "effective_group_count": _effective_group_count(rows, total_tokens=total_tokens), + "top_rows": rows[:10], + } + + +def _concentration_group_row( + dimension: str, + group: dict[str, Any], + *, + total_tokens: int, +) -> dict[str, Any]: + session_ids = sorted(group["session_ids"]) + return { + "dimension": dimension, + "label": group["label"], + "group_hash": group["group_hash"], + "usage_rows": int(group["usage_rows"]), + "total_tokens": int(group["total_tokens"]), + "share": _rounded_ratio(int(group["total_tokens"]), total_tokens), + "largest_record_id": group["largest_record_id"], + "largest_call_tokens": int(group["largest_call_tokens"]), + "session_id": session_ids[0] if len(session_ids) == 1 else None, + } + + +def _concentration_metrics(dimensions: list[dict[str, Any]]) -> list[dict[str, Any]]: + rows: list[dict[str, Any]] = [] + for dimension in dimensions: + dimension_key = str(dimension["dimension"]) + for top_n in (1, 3, 5): + rows.append( + { + "metric": f"top_{top_n}_{dimension_key}_share", + "dimension": dimension_key, + "top_n": top_n, + "share": dimension[f"top_{top_n}_share"], + } + ) + return rows + + +def _largest_impact_rows(dimensions: list[dict[str, Any]]) -> list[dict[str, Any]]: + rows: list[dict[str, Any]] = [] + for dimension in dimensions: + for row in dimension["top_rows"]: + rows.append(dict(row)) + return sorted( + rows, + key=lambda row: (-float(row["share"]), -int(row["total_tokens"]), row["dimension"], row["label"]), + )[:15] + + +def _top_share( + rows: list[dict[str, Any]], + top_n: int, + *, + total_tokens: int, +) -> float: + return _rounded_ratio(sum(int(row["total_tokens"]) for row in rows[:top_n]), total_tokens) + + +def _effective_group_count( + rows: list[dict[str, Any]], + *, + total_tokens: int, +) -> float: + if total_tokens <= 0: + return 0.0 + hhi = sum((int(row["total_tokens"]) / total_tokens) ** 2 for row in rows) + return round(1 / hhi, 6) if hhi else 0.0 + + +def _source_group_key(value: object) -> str: + return _source_group_hash(value) + + +def _source_group_hash(value: object) -> str: + source = value if isinstance(value, str) and value else "unknown_source" + return _stable_hash(source) + + +def _source_group_label(value: object, *, session_id: str | None) -> str: + if session_id: + return f"session:{session_id[:8]}" + return f"source:{_source_group_hash(value)}" + + +def _cwd_group_ref(value: object) -> dict[str, str]: + if isinstance(value, str) and value: + path_ref = _path_ref_from_token(value) + if path_ref is not None: + return {"label": path_ref["path_label"], "group_hash": path_ref["path_hash"]} + return {"label": "unknown_cwd", "group_hash": _stable_hash("unknown_cwd")} + + +def _day_label(value: object) -> str: + if isinstance(value, str): + match = re.match(r"^\d{4}-\d{2}-\d{2}", value) + if match: + return match.group(0) + return "unknown_day" + + + + +def concentration_privacy_metadata() -> dict[str, str]: + return { + "source_log_label_policy": "session_id_prefix_or_source_hash", + "cwd_label_policy": "basename_only", + "hash_policy": "sha256_12", + "raw_source_paths_included": "false", + "raw_cwd_paths_included": "false", + } + + +def _path_ref_from_token(token: str) -> dict[str, str] | None: + raw = token.strip() + if not raw or raw == "-" or _is_shell_separator(raw) or _looks_like_assignment(raw): + return None + if raw.startswith(("$", "`")) or "://" in raw: + return None + label = _safe_path_label(raw) + if label is None: + return None + path_hash = _stable_hash(raw) + return {"path_key": path_hash, "path_label": label, "path_hash": path_hash} + + +def _safe_path_label(token: str) -> str | None: + normalized = token.rstrip("/") + label = normalized if normalized in {".", ".."} else normalized.rsplit("/", 1)[-1].rsplit("\\", 1)[-1] + if not label: + return None + lowered = label.lower() + if lowered.startswith(SENSITIVE_LABEL_PREFIXES): + return "path" + return label if SAFE_PATH_LABEL_RE.fullmatch(label) else "path" + + +def _is_shell_separator(token: str) -> bool: + return token in {"&&", "||", ";", "|"} + + +def _looks_like_assignment(token: str) -> bool: + return bool(re.fullmatch(r"[A-Za-z_][A-Za-z0-9_]*=.*", token)) + + +def _stable_hash(value: str) -> str: + return hashlib.sha256(value.encode("utf-8")).hexdigest()[:12] + + +def _optional_str(value: object) -> str | None: + return value if isinstance(value, str) and value else None + + +def _history_scope(include_archived: bool) -> str: + return DIAGNOSTIC_HISTORY_ALL if include_archived else DIAGNOSTIC_HISTORY_ACTIVE + + +def _int_value(value: object) -> int: + if isinstance(value, int): + return value + if isinstance(value, float): + return int(value) + if isinstance(value, str) and value: + return int(value) + return 0 + + +def _ratio(numerator: int, denominator: int) -> float: + return numerator / denominator if denominator else 0.0 + + +def _rounded_ratio(numerator: int, denominator: int) -> float: + return round(_ratio(numerator, denominator), 6) diff --git a/src/codex_usage_tracker/diagnostic_snapshot_constants.py b/src/codex_usage_tracker/diagnostic_snapshot_constants.py new file mode 100644 index 0000000..2852510 --- /dev/null +++ b/src/codex_usage_tracker/diagnostic_snapshot_constants.py @@ -0,0 +1,21 @@ +"""Shared constants for diagnostic snapshot reports.""" + +DIAGNOSTIC_OVERVIEW_SCHEMA = "codex-usage-tracker-diagnostic-overview-v1" +DIAGNOSTIC_TOOL_OUTPUT_SCHEMA = "codex-usage-tracker-diagnostic-tool-output-v1" +DIAGNOSTIC_COMMANDS_SCHEMA = "codex-usage-tracker-diagnostic-commands-v1" +DIAGNOSTIC_FILE_READS_SCHEMA = "codex-usage-tracker-diagnostic-file-reads-v1" +DIAGNOSTIC_READ_PRODUCTIVITY_SCHEMA = "codex-usage-tracker-diagnostic-read-productivity-v1" +DIAGNOSTIC_CONCENTRATION_SCHEMA = "codex-usage-tracker-diagnostic-concentration-v1" +DIAGNOSTIC_BATCH_REFRESH_SCHEMA = "codex-usage-tracker-diagnostic-snapshot-refresh-v1" +DIAGNOSTIC_OVERVIEW_SECTION = "overview" +DIAGNOSTIC_TOOL_OUTPUT_SECTION = "tool-output" +DIAGNOSTIC_COMMANDS_SECTION = "commands" +DIAGNOSTIC_FILE_READS_SECTION = "file-reads" +DIAGNOSTIC_READ_PRODUCTIVITY_SECTION = "read-productivity" +DIAGNOSTIC_CONCENTRATION_SECTION = "concentration" +DIAGNOSTIC_HISTORY_ACTIVE = "active" +DIAGNOSTIC_HISTORY_ALL = "all" +DIAGNOSTIC_SNAPSHOT_NOTES = [ + "Diagnostic snapshots are recomputed only by explicit diagnostic refresh.", + "Snapshot totals are aggregate-only and do not include raw context.", +] diff --git a/src/codex_usage_tracker/diagnostic_snapshot_events.py b/src/codex_usage_tracker/diagnostic_snapshot_events.py new file mode 100644 index 0000000..5170f6e --- /dev/null +++ b/src/codex_usage_tracker/diagnostic_snapshot_events.py @@ -0,0 +1,399 @@ +"""Safe event parsing helpers for diagnostic snapshot reports.""" + +from __future__ import annotations + +import hashlib +import json +import re +import shlex +from collections import Counter +from pathlib import Path +from typing import Any + +SAFE_LABEL_RE = re.compile(r"^[A-Za-z0-9_.:-]{1,80}$") +SAFE_PATH_LABEL_RE = re.compile(r"^[A-Za-z0-9_.:@*+-]{1,80}$") +SENSITIVE_LABEL_PREFIXES = ("sk-", "sk_", "ghp_", "github_pat_", "xox") +SHELL_TOOL_NAMES = { + "bash", + "exec_command", + "functions.exec_command", + "run_command", + "shell", + "terminal", + "write_stdin", +} +READ_COMMAND_ROOTS = {"cat", "find", "grep", "head", "nl", "rg", "sed", "strings", "tail", "wc"} +SEARCH_READ_ROOTS = {"find", "rg"} +READ_PRODUCTIVITY_NOTE = ( + "Read-to-modify counts are temporal correlations: a read is counted when the same " + "privacy-preserving path key is modified later in the same source log." +) +ORIGINAL_OUTPUT_RE = re.compile( + r"^Chunk ID: (?P[^\n]+)\n" + r"Wall time: (?P[^\n]+)\n" + r"(?:(?PProcess exited with code -?\d+|Process running with session ID \d+)\n)?" + r"Original token count: (?P\d+)\n", + re.S, +) + + +def shell_command_from_payload(payload: dict[str, Any], *, function_name: str) -> str | None: + if not is_shell_tool(function_name): + return None + arguments = payload.get("arguments") + if isinstance(arguments, str): + try: + loaded = json.loads(arguments) + except json.JSONDecodeError: + loaded = {} + if isinstance(loaded, dict): + command = loaded.get("cmd") or loaded.get("command") + if isinstance(command, str): + return command + if isinstance(arguments, dict): + command = arguments.get("cmd") or arguments.get("command") + if isinstance(command, str): + return command + command = payload.get("cmd") or payload.get("command") + return command if isinstance(command, str) else None + + +def is_shell_tool(function_name: str) -> bool: + lowered = function_name.lower() + suffix = lowered.rsplit(".", 1)[-1].rsplit("__", 1)[-1] + return lowered in SHELL_TOOL_NAMES or suffix in SHELL_TOOL_NAMES + + +def command_root_and_child(command: str) -> tuple[str, str]: + tokens = _strip_command_wrappers(_command_tokens(command)) + if not tokens: + return "unknown_command", "unknown" + root = _command_root(tokens) + return root, _command_child(root, tokens) + + +def read_path_refs_from_command(command: str, *, root: str) -> list[dict[str, str]]: + if root not in READ_COMMAND_ROOTS: + return [] + tokens = _strip_command_wrappers(_command_tokens(command)) + if not tokens: + return [] + path_tokens = _read_path_tokens(root=root, tokens=tokens) + refs: list[dict[str, str]] = [] + seen: set[str] = set() + for token in path_tokens: + path_ref = _path_ref_from_token(token) + if path_ref is None or path_ref["path_key"] in seen: + continue + seen.add(path_ref["path_key"]) + refs.append(path_ref) + return refs + + +def read_reader(root: str) -> str: + if root in SEARCH_READ_ROOTS: + return f"search_path_scan:{root}" + return f"direct_file_read:{root}" + + +def modified_path_refs(payload: dict[str, Any]) -> list[dict[str, str]]: + if payload.get("type") != "patch_apply_end": + return [] + paths: list[str] = [] + for key in ("changed_paths", "paths", "files", "modified_paths"): + paths.extend(_path_values(payload.get(key))) + paths.extend(_path_values(payload.get("changes"))) + refs: list[dict[str, str]] = [] + seen: set[str] = set() + for path in paths: + path_ref = _path_ref_from_token(path) + if path_ref is None or path_ref["path_key"] in seen: + continue + seen.add(path_ref["path_key"]) + refs.append(path_ref) + return refs + + +def path_privacy_metadata() -> dict[str, str]: + return { + "label_policy": "basename_only", + "hash_policy": "sha256_12", + "normal": "basename_only_with_hash", + "redacted": "basename_only_with_hash", + "strict": "hash_available_for_hiding_labels", + } + + +def original_output_count(output: object) -> int | None: + if not isinstance(output, str): + return None + match = ORIGINAL_OUTPUT_RE.match(output) + if not match: + return None + return int(match.group("count")) + + +def optional_str(value: object) -> str | None: + return value if isinstance(value, str) and value else None + + +def safe_label(value: object) -> str | None: + if not isinstance(value, str): + return None + stripped = value.strip() + lowered = stripped.lower() + if lowered.startswith(SENSITIVE_LABEL_PREFIXES): + return None + if "/" in stripped or "\\" in stripped: + return None + return lowered if SAFE_LABEL_RE.fullmatch(stripped) else None + + +def simple_rows( + counter: Counter[str], + *, + key_name: str = "name", +) -> list[dict[str, Any]]: + return [ + {key_name: name, "count": int(count)} + for name, count in sorted(counter.items(), key=lambda item: (-item[1], item[0])) + ] + + +def unique_path_rows(paths: list[dict[str, str]]) -> list[dict[str, str]]: + rows: list[dict[str, str]] = [] + seen: set[str] = set() + for path in paths: + path_hash = path["path_hash"] + if path_hash in seen: + continue + seen.add(path_hash) + rows.append({"path_label": path["path_label"], "path_hash": path_hash}) + return rows[:25] + + +def allocate_token_count(count: int, bucket_count: int) -> list[int]: + if bucket_count <= 0: + return [] + base = count // bucket_count + remainder = count % bucket_count + return [base + (1 if index < remainder else 0) for index in range(bucket_count)] + + +def int_value(value: object) -> int: + if isinstance(value, int): + return value + if isinstance(value, float): + return int(value) + if isinstance(value, str) and value: + return int(value) + return 0 + + +def ratio(numerator: int, denominator: int) -> float: + return numerator / denominator if denominator else 0.0 + + +def _read_path_tokens(*, root: str, tokens: list[str]) -> list[str]: + args = tokens[1:] + if root == "find": + return _find_path_tokens(args) + if root == "rg": + return _ripgrep_path_tokens(args) + if root == "grep": + operands = _non_option_operands(args, root=root) + return operands[1:] if len(operands) > 1 else [] + if root == "sed": + operands = _non_option_operands(args, root=root) + return operands[1:] if len(operands) > 1 else [] + return _non_option_operands(args, root=root) + + +def _find_path_tokens(args: list[str]) -> list[str]: + paths: list[str] = [] + for token in args: + if _is_shell_separator(token): + break + if token == "--": + continue + if token.startswith("-") or token in {"!", "(", ")"}: + break + paths.append(token) + return paths or ["."] + + +def _ripgrep_path_tokens(args: list[str]) -> list[str]: + operands = _non_option_operands(args, root="rg") + if any(token == "--files" or token.startswith("--files=") for token in args): + return operands or ["."] + return operands[1:] if len(operands) > 1 else [] + + +def _non_option_operands(args: list[str], *, root: str) -> list[str]: + option_args = _option_args_for_root(root) + operands: list[str] = [] + skip_next = False + passthrough = False + for token in args: + if skip_next: + skip_next = False + continue + if _is_shell_separator(token): + break + if token in {">", ">>", "<", "2>", "2>>"}: + break + if passthrough: + operands.append(token) + continue + if token == "--": + passthrough = True + continue + if token.startswith("-"): + option_name = token.split("=", 1)[0] + if option_name in option_args and "=" not in token: + skip_next = True + continue + operands.append(token) + return operands + + +def _option_args_for_root(root: str) -> set[str]: + return { + "grep": { + "-A", + "-B", + "-C", + "-e", + "-f", + "-m", + "--after-context", + "--before-context", + "--context", + "--file", + "--max-count", + "--regexp", + }, + "head": {"-c", "-n", "--bytes", "--lines"}, + "rg": { + "-A", + "-B", + "-C", + "-e", + "-f", + "-g", + "-m", + "-t", + "-T", + "--after-context", + "--before-context", + "--context", + "--file", + "--glob", + "--max-count", + "--max-depth", + "--type", + "--type-not", + }, + "sed": {"-e", "-f", "--expression", "--file"}, + "tail": {"-c", "-n", "--bytes", "--lines"}, + }.get(root, set()) + + +def _path_values(value: object) -> list[str]: + if isinstance(value, str): + return [value] + if isinstance(value, list | tuple): + paths: list[str] = [] + for item in value: + paths.extend(_path_values(item)) + return paths + if isinstance(value, dict): + paths = [] + for key in ("path", "file", "filename", "new_path", "old_path"): + paths.extend(_path_values(value.get(key))) + return paths + return [] + + +def _path_ref_from_token(token: str) -> dict[str, str] | None: + raw = token.strip() + if not raw or raw == "-" or _is_shell_separator(raw) or _looks_like_assignment(raw): + return None + if raw.startswith(("$", "`")) or "://" in raw: + return None + label = _safe_path_label(raw) + if label is None: + return None + path_hash = _stable_hash(raw) + return {"path_key": path_hash, "path_label": label, "path_hash": path_hash} + + +def _safe_path_label(token: str) -> str | None: + normalized = token.rstrip("/") + label = normalized if normalized in {".", ".."} else Path(normalized).name + if not label: + return None + lowered = label.lower() + if lowered.startswith(SENSITIVE_LABEL_PREFIXES): + return "path" + return label if SAFE_PATH_LABEL_RE.fullmatch(label) else "path" + + +def _stable_hash(value: str) -> str: + return hashlib.sha256(value.encode("utf-8")).hexdigest()[:12] + + +def _command_tokens(command: str) -> list[str]: + try: + return shlex.split(command, posix=True) + except ValueError: + return [] + + +def _strip_command_wrappers(tokens: list[str]) -> list[str]: + remaining = list(tokens) + while remaining: + while remaining and _looks_like_assignment(remaining[0]): + remaining.pop(0) + if not remaining: + break + base = _basename(remaining[0]) + if base in {"command", "env", "sudo"}: + remaining.pop(0) + continue + break + return remaining + + +def _command_root(tokens: list[str]) -> str: + base = _basename(tokens[0]) + if base in {"py.test", "pytest"}: + return "pytest" + if base == "py" or base == "python" or base.startswith("python"): + return "python" + return safe_label(base) or "unknown_command" + + +def _command_child(root: str, tokens: list[str]) -> str: + if root == "python": + for index, token in enumerate(tokens[:-1]): + if token == "-m": + module = safe_label(_basename(tokens[index + 1]).split(".", 1)[0]) + return f"-m:{module}" if module else "-m:unknown" + return tokens[1] if len(tokens) > 1 and tokens[1].startswith("-") else " + + diff --git a/src/codex_usage_tracker/server.py b/src/codex_usage_tracker/server.py index 42e01ae..19c98cb 100644 --- a/src/codex_usage_tracker/server.py +++ b/src/codex_usage_tracker/server.py @@ -35,6 +35,15 @@ build_diagnostics_facts_report, build_diagnostics_summary_report, ) +from codex_usage_tracker.diagnostic_snapshots import ( + build_diagnostic_commands_report, + build_diagnostic_concentration_report, + build_diagnostic_file_reads_report, + build_diagnostic_overview_report, + build_diagnostic_read_productivity_report, + build_diagnostic_tool_output_report, + refresh_diagnostic_snapshots, +) from codex_usage_tracker.i18n import normalize_language from codex_usage_tracker.paths import ( DEFAULT_ALLOWANCE_PATH, @@ -304,6 +313,24 @@ def do_GET(self) -> None: # noqa: N802 - stdlib hook name if parsed.path == "/api/diagnostics/tools": self._handle_diagnostics_facts(parsed.query, fact_group="tools") return + if parsed.path == "/api/diagnostics/overview": + self._handle_diagnostics_overview(parsed.query) + return + if parsed.path == "/api/diagnostics/tool-output": + self._handle_diagnostics_tool_output(parsed.query) + return + if parsed.path == "/api/diagnostics/commands": + self._handle_diagnostics_commands(parsed.query) + return + if parsed.path == "/api/diagnostics/file-reads": + self._handle_diagnostics_file_reads(parsed.query) + return + if parsed.path == "/api/diagnostics/read-productivity": + self._handle_diagnostics_read_productivity(parsed.query) + return + if parsed.path == "/api/diagnostics/concentration": + self._handle_diagnostics_concentration(parsed.query) + return if parsed.path == "/api/usage": self._handle_usage(parsed.query) return @@ -315,6 +342,34 @@ def do_GET(self) -> None: # noqa: N802 - stdlib hook name return super().do_GET() + def do_POST(self) -> None: # noqa: N802 - stdlib hook name + parsed = urlparse(self.path) + if not self._request_origin_allowed(): + self._send_json(HTTPStatus.FORBIDDEN, {"error": "Request host or origin is not allowed"}) + return + if parsed.path == "/api/diagnostics/refresh": + self._handle_diagnostics_refresh(parsed.query) + return + if parsed.path == "/api/diagnostics/overview/refresh": + self._handle_diagnostics_overview_refresh(parsed.query) + return + if parsed.path == "/api/diagnostics/tool-output/refresh": + self._handle_diagnostics_tool_output_refresh(parsed.query) + return + if parsed.path == "/api/diagnostics/commands/refresh": + self._handle_diagnostics_commands_refresh(parsed.query) + return + if parsed.path == "/api/diagnostics/file-reads/refresh": + self._handle_diagnostics_file_reads_refresh(parsed.query) + return + if parsed.path == "/api/diagnostics/read-productivity/refresh": + self._handle_diagnostics_read_productivity_refresh(parsed.query) + return + if parsed.path == "/api/diagnostics/concentration/refresh": + self._handle_diagnostics_concentration_refresh(parsed.query) + return + self._send_json(HTTPStatus.NOT_FOUND, {"error": "Unknown API endpoint"}) + def end_headers(self) -> None: if self._is_dashboard_html_request(): self.send_header("Cache-Control", "no-store") @@ -943,6 +998,169 @@ def _handle_diagnostics_fact_calls(self, query: str) -> None: return self._send_json(HTTPStatus.OK, payload) + def _handle_diagnostics_overview(self, query: str) -> None: + self._handle_diagnostic_snapshot( + query, + build_report=build_diagnostic_overview_report, + refresh=False, + label="diagnostic overview", + ) + + def _handle_diagnostics_refresh(self, query: str) -> None: + params = parse_qs(query) + if not self._has_valid_api_token(params): + self._send_json( + HTTPStatus.FORBIDDEN, + {"error": "Valid API token is required for diagnostic refresh"}, + ) + return + include_archived = _parse_bool( + _first(params.get("include_archived")), + self._include_archived, + ) + try: + with self._refresh_lock: + payload = refresh_diagnostic_snapshots( + db_path=self._db_path, + include_archived=include_archived, + ) + except sqlite3.Error as exc: + self._send_json( + HTTPStatus.INTERNAL_SERVER_ERROR, + {"error": f"Database error while refreshing diagnostics: {exc}"}, + ) + return + self._send_json(HTTPStatus.OK, payload) + + def _handle_diagnostics_overview_refresh(self, query: str) -> None: + self._handle_diagnostic_snapshot( + query, + build_report=build_diagnostic_overview_report, + refresh=True, + label="diagnostic overview", + ) + + def _handle_diagnostics_tool_output(self, query: str) -> None: + self._handle_diagnostic_snapshot( + query, + build_report=build_diagnostic_tool_output_report, + refresh=False, + label="diagnostic tool output", + ) + + def _handle_diagnostics_tool_output_refresh(self, query: str) -> None: + self._handle_diagnostic_snapshot( + query, + build_report=build_diagnostic_tool_output_report, + refresh=True, + label="diagnostic tool output", + ) + + def _handle_diagnostics_commands(self, query: str) -> None: + self._handle_diagnostic_snapshot( + query, + build_report=build_diagnostic_commands_report, + refresh=False, + label="diagnostic commands", + ) + + def _handle_diagnostics_commands_refresh(self, query: str) -> None: + self._handle_diagnostic_snapshot( + query, + build_report=build_diagnostic_commands_report, + refresh=True, + label="diagnostic commands", + ) + + def _handle_diagnostics_file_reads(self, query: str) -> None: + self._handle_diagnostic_snapshot( + query, + build_report=build_diagnostic_file_reads_report, + refresh=False, + label="diagnostic file reads", + ) + + def _handle_diagnostics_file_reads_refresh(self, query: str) -> None: + self._handle_diagnostic_snapshot( + query, + build_report=build_diagnostic_file_reads_report, + refresh=True, + label="diagnostic file reads", + ) + + def _handle_diagnostics_read_productivity(self, query: str) -> None: + self._handle_diagnostic_snapshot( + query, + build_report=build_diagnostic_read_productivity_report, + refresh=False, + label="diagnostic read productivity", + ) + + def _handle_diagnostics_read_productivity_refresh(self, query: str) -> None: + self._handle_diagnostic_snapshot( + query, + build_report=build_diagnostic_read_productivity_report, + refresh=True, + label="diagnostic read productivity", + ) + + def _handle_diagnostics_concentration(self, query: str) -> None: + self._handle_diagnostic_snapshot( + query, + build_report=build_diagnostic_concentration_report, + refresh=False, + label="diagnostic concentration", + ) + + def _handle_diagnostics_concentration_refresh(self, query: str) -> None: + self._handle_diagnostic_snapshot( + query, + build_report=build_diagnostic_concentration_report, + refresh=True, + label="diagnostic concentration", + ) + + def _handle_diagnostic_snapshot( + self, + query: str, + *, + build_report: Any, + refresh: bool, + label: str, + ) -> None: + params = parse_qs(query) + if refresh and not self._has_valid_api_token(params): + self._send_json( + HTTPStatus.FORBIDDEN, + {"error": "Valid API token is required for diagnostic refresh"}, + ) + return + include_archived = _parse_bool( + _first(params.get("include_archived")), + self._include_archived, + ) + try: + if refresh: + with self._refresh_lock: + payload = build_report( + db_path=self._db_path, + include_archived=include_archived, + refresh=True, + ).payload + else: + payload = build_report( + db_path=self._db_path, + include_archived=include_archived, + refresh=False, + ).payload + except sqlite3.Error as exc: + self._send_json( + HTTPStatus.INTERNAL_SERVER_ERROR, + {"error": f"Database error while reading {label}: {exc}"}, + ) + return + self._send_json(HTTPStatus.OK, payload) + def _live_query_params( self, params: dict[str, list[str]], diff --git a/src/codex_usage_tracker/store.py b/src/codex_usage_tracker/store.py index 0af764a..13b894c 100644 --- a/src/codex_usage_tracker/store.py +++ b/src/codex_usage_tracker/store.py @@ -3,6 +3,7 @@ from __future__ import annotations import csv +import json import sqlite3 from collections.abc import Iterable, Iterator from contextlib import contextmanager, suppress @@ -125,6 +126,7 @@ def rebuild_usage_index( with connect(db_path) as conn: init_db(conn) conn.execute("DELETE FROM call_diagnostic_facts") + conn.execute("DELETE FROM diagnostic_snapshots") conn.execute("DELETE FROM usage_events") conn.execute("DELETE FROM thread_summaries") conn.execute("DELETE FROM source_files") @@ -144,6 +146,7 @@ def reset_usage_database(db_path: Path = DEFAULT_DB_PATH) -> dict[str, Any]: row = conn.execute("SELECT COUNT(*) AS count FROM usage_events").fetchone() deleted_rows = int(row["count"] if row is not None else 0) conn.execute("DELETE FROM call_diagnostic_facts") + conn.execute("DELETE FROM diagnostic_snapshots") conn.execute("DELETE FROM usage_events") conn.execute("DELETE FROM thread_summaries") conn.execute("DELETE FROM source_files") @@ -222,6 +225,94 @@ def refresh_metadata(db_path: Path = DEFAULT_DB_PATH) -> dict[str, str]: return {str(row["key"]): str(row["value"]) for row in rows} +def upsert_diagnostic_snapshot( + db_path: Path = DEFAULT_DB_PATH, + *, + section: str, + history_scope: str, + payload: dict[str, Any], + computed_at: str, + source_logs_scanned: int, + usage_rows_scanned: int, + raw_content_included: bool = False, +) -> None: + """Persist one aggregate diagnostic report snapshot.""" + + payload_json = json.dumps(payload, sort_keys=True, separators=(",", ":")) + with connect(db_path) as conn: + init_db(conn) + conn.execute( + """ + INSERT INTO diagnostic_snapshots ( + section, + history_scope, + payload_json, + computed_at, + source_logs_scanned, + usage_rows_scanned, + raw_content_included + ) + VALUES (?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(section, history_scope) DO UPDATE SET + payload_json = excluded.payload_json, + computed_at = excluded.computed_at, + source_logs_scanned = excluded.source_logs_scanned, + usage_rows_scanned = excluded.usage_rows_scanned, + raw_content_included = excluded.raw_content_included + """, + ( + section, + history_scope, + payload_json, + computed_at, + int(source_logs_scanned), + int(usage_rows_scanned), + 1 if raw_content_included else 0, + ), + ) + + +def query_diagnostic_snapshot( + db_path: Path = DEFAULT_DB_PATH, + *, + section: str, + history_scope: str, +) -> dict[str, Any] | None: + """Return one persisted aggregate diagnostic report snapshot.""" + + if not db_path.exists(): + return None + with connect(db_path) as conn: + init_db(conn) + row = conn.execute( + """ + SELECT + section, + history_scope, + payload_json, + computed_at, + source_logs_scanned, + usage_rows_scanned, + raw_content_included + FROM diagnostic_snapshots + WHERE section = ? AND history_scope = ? + """, + (section, history_scope), + ).fetchone() + if row is None: + return None + payload = json.loads(str(row["payload_json"])) + return { + "section": str(row["section"]), + "history_scope": str(row["history_scope"]), + "payload": payload if isinstance(payload, dict) else {}, + "computed_at": str(row["computed_at"]), + "source_logs_scanned": int(row["source_logs_scanned"]), + "usage_rows_scanned": int(row["usage_rows_scanned"]), + "raw_content_included": bool(row["raw_content_included"]), + } + + def schema_state(db_path: Path = DEFAULT_DB_PATH) -> dict[str, Any]: """Return database migration and usage_events checksum state.""" diff --git a/src/codex_usage_tracker/store_schema.py b/src/codex_usage_tracker/store_schema.py index c5e973c..da85a5e 100644 --- a/src/codex_usage_tracker/store_schema.py +++ b/src/codex_usage_tracker/store_schema.py @@ -12,7 +12,7 @@ USAGE_EVENT_SCHEMA_CHECKSUM, ) -SCHEMA_VERSION = 9 +SCHEMA_VERSION = 10 MIGRATION_NAMES = { 1: "create usage_events aggregate fact table", 2: "track schema migration checksum metadata", @@ -23,6 +23,7 @@ 7: "persist source file parser cursors", 8: "persist observed Codex usage snapshots", 9: "persist aggregate diagnostic facts", + 10: "persist on-demand diagnostic report snapshots", } CALL_ORIGIN_REPAIR_COLUMNS = { "call_initiator": "TEXT", @@ -102,6 +103,12 @@ def init_db(conn: sqlite3.Connection) -> None: else: _migrate_v9(conn) _record_migration_if_missing(conn, 9) + if user_version < 10: + _migrate_v10(conn) + _record_migration(conn, 10) + else: + _migrate_v10(conn) + _record_migration_if_missing(conn, 10) _validate_usage_events_schema(conn) conn.execute(f"PRAGMA user_version = {SCHEMA_VERSION}") @@ -280,6 +287,26 @@ def _migrate_v9(conn: sqlite3.Connection) -> None: ) +def _migrate_v10(conn: sqlite3.Connection) -> None: + conn.executescript( + """ + CREATE TABLE IF NOT EXISTS diagnostic_snapshots ( + section TEXT NOT NULL, + history_scope TEXT NOT NULL, + payload_json TEXT NOT NULL, + computed_at TEXT NOT NULL, + source_logs_scanned INTEGER NOT NULL DEFAULT 0, + usage_rows_scanned INTEGER NOT NULL DEFAULT 0, + raw_content_included INTEGER NOT NULL DEFAULT 0, + PRIMARY KEY (section, history_scope) + ); + + CREATE INDEX IF NOT EXISTS idx_diagnostic_snapshots_computed_at + ON diagnostic_snapshots(computed_at); + """ + ) + + def _record_migration(conn: sqlite3.Connection, version: int) -> None: conn.execute( """ diff --git a/tests/playwright/dashboard-diagnostics.spec.mjs b/tests/playwright/dashboard-diagnostics.spec.mjs new file mode 100644 index 0000000..9a6e47e --- /dev/null +++ b/tests/playwright/dashboard-diagnostics.spec.mjs @@ -0,0 +1,28 @@ +import { expect, test } from '@playwright/test'; + +test.describe('diagnostics dashboard smoke', () => { + test('renders diagnostics panels with explicit refresh control', async ({ page }) => { + await page.goto('/dashboard.html?view=diagnostics'); + + await expect(page.getByRole('button', { name: 'Diagnostics', exact: true })).toHaveAttribute( + 'aria-pressed', + 'true', + ); + await expect(page.locator('#diagnosticsPanel')).toBeVisible(); + await expect(page.getByRole('button', { name: 'Refresh diagnostics' })).toBeVisible(); + await expect(page.locator('#diagnosticsPanel')).not.toContainText( + 'Live API required for diagnostics refresh', + ); + + for (const heading of [ + 'Overview', + 'Tool Output', + 'Commands', + 'File Reads', + 'Read Productivity', + 'Concentration', + ]) { + await expect(page.getByRole('heading', { name: heading })).toBeVisible(); + } + }); +}); diff --git a/tests/store_dashboard_helpers.py b/tests/store_dashboard_helpers.py index 92806ab..4fcd2eb 100644 --- a/tests/store_dashboard_helpers.py +++ b/tests/store_dashboard_helpers.py @@ -349,14 +349,24 @@ def _assert_contract(payload: object) -> None: assert validate_json_payload_contract(payload) == [] -def _read_json(url: str, headers: dict[str, str] | None = None) -> dict[str, object]: - request = urllib.request.Request(url, headers=headers or {}) +def _read_json( + url: str, + headers: dict[str, str] | None = None, + data: bytes | None = None, + method: str | None = None, +) -> dict[str, object]: + request = urllib.request.Request(url, data=data, headers=headers or {}, method=method) with urllib.request.urlopen(request, timeout=5) as response: # noqa: S310 - local test server only return json.loads(response.read().decode("utf-8")) -def _http_error_json(url: str, headers: dict[str, str] | None = None) -> dict[str, object]: - request = urllib.request.Request(url, headers=headers or {}) +def _http_error_json( + url: str, + headers: dict[str, str] | None = None, + data: bytes | None = None, + method: str | None = None, +) -> dict[str, object]: + request = urllib.request.Request(url, data=data, headers=headers or {}, method=method) try: urllib.request.urlopen(request, timeout=5) # noqa: S310 - local test server only except urllib.error.HTTPError as exc: diff --git a/tests/test_cli_lifecycle.py b/tests/test_cli_lifecycle.py index 7a02eab..09caf47 100644 --- a/tests/test_cli_lifecycle.py +++ b/tests/test_cli_lifecycle.py @@ -383,6 +383,68 @@ def test_diagnostics_cli_returns_aggregate_json(tmp_path: Path) -> None: "tools", "--json", ) + overview_missing = _run_cli( + tmp_path, + "--db", + str(db_path), + "diagnostics", + "overview", + "--json", + ) + overview_refresh = _run_cli( + tmp_path, + "--db", + str(db_path), + "diagnostics", + "overview", + "--refresh", + "--json", + ) + tool_output_refresh = _run_cli( + tmp_path, + "--db", + str(db_path), + "diagnostics", + "tool-output", + "--refresh", + "--json", + ) + commands_refresh = _run_cli( + tmp_path, + "--db", + str(db_path), + "diagnostics", + "commands", + "--refresh", + "--json", + ) + file_reads_refresh = _run_cli( + tmp_path, + "--db", + str(db_path), + "diagnostics", + "file-reads", + "--refresh", + "--json", + ) + read_productivity_refresh = _run_cli( + tmp_path, + "--db", + str(db_path), + "diagnostics", + "read-productivity", + "--refresh", + "--json", + ) + concentration_refresh = _run_cli( + tmp_path, + "--db", + str(db_path), + "diagnostics", + "concentration", + "--refresh", + "--json", + ) fact_calls = _run_cli( tmp_path, "--db", @@ -403,17 +465,38 @@ def test_diagnostics_cli_returns_aggregate_json(tmp_path: Path) -> None: facts_payload = json.loads(facts.stdout) compactions_payload = json.loads(compactions.stdout) tools_payload = json.loads(tools.stdout) + overview_missing_payload = json.loads(overview_missing.stdout) + overview_refresh_payload = json.loads(overview_refresh.stdout) + tool_output_refresh_payload = json.loads(tool_output_refresh.stdout) + commands_refresh_payload = json.loads(commands_refresh.stdout) + file_reads_refresh_payload = json.loads(file_reads_refresh.stdout) + read_productivity_refresh_payload = json.loads(read_productivity_refresh.stdout) + concentration_refresh_payload = json.loads(concentration_refresh.stdout) fact_calls_payload = json.loads(fact_calls.stdout) for payload in ( summary_payload, facts_payload, compactions_payload, tools_payload, + overview_missing_payload, + overview_refresh_payload, + tool_output_refresh_payload, + commands_refresh_payload, + file_reads_refresh_payload, + read_productivity_refresh_payload, + concentration_refresh_payload, fact_calls_payload, ): _assert_contract(payload) - assert payload["schema"] == "codex-usage-tracker-diagnostics-v1" assert payload["raw_context_included"] is False + for payload in ( + summary_payload, + facts_payload, + compactions_payload, + tools_payload, + fact_calls_payload, + ): + assert payload["schema"] == "codex-usage-tracker-diagnostics-v1" assert "Associated token totals are not additive" in payload["notes"][0] fact_names = {row["fact_name"] for row in facts_payload["rows"]} @@ -428,7 +511,43 @@ def test_diagnostics_cli_returns_aggregate_json(tmp_path: Path) -> None: assert {row["fact_type"] for row in compactions_payload["rows"]} == {"compaction"} assert tools_payload["filters"]["fact_type"] is None assert tools_payload["filters"]["fact_group"] == "tools" - assert {row["fact_type"] for row in tools_payload["rows"]} == {"tool"} + assert "tool" in {row["fact_type"] for row in tools_payload["rows"]} + assert overview_missing_payload["schema"] == "codex-usage-tracker-diagnostic-overview-v1" + assert overview_missing_payload["status"] == "missing" + assert overview_refresh_payload["schema"] == "codex-usage-tracker-diagnostic-overview-v1" + assert overview_refresh_payload["status"] == "ready" + assert overview_refresh_payload["overview"]["usage_rows"] == 2 + assert overview_refresh_payload["refreshed"] is True + assert ( + tool_output_refresh_payload["schema"] + == "codex-usage-tracker-diagnostic-tool-output-v1" + ) + assert tool_output_refresh_payload["summary"]["original_token_sum"] == 9 + assert ( + commands_refresh_payload["schema"] + == "codex-usage-tracker-diagnostic-commands-v1" + ) + assert commands_refresh_payload["commands"][0]["root"] == "git" + assert commands_refresh_payload["commands"][0]["children"][0] == { + "child": "status", + "count": 1, + } + assert ( + file_reads_refresh_payload["schema"] + == "codex-usage-tracker-diagnostic-file-reads-v1" + ) + assert file_reads_refresh_payload["summary"]["read_events"] == 0 + assert ( + read_productivity_refresh_payload["schema"] + == "codex-usage-tracker-diagnostic-read-productivity-v1" + ) + assert read_productivity_refresh_payload["summary"]["read_events_modified_later"] == 0 + assert ( + concentration_refresh_payload["schema"] + == "codex-usage-tracker-diagnostic-concentration-v1" + ) + assert concentration_refresh_payload["summary"]["usage_rows"] == 2 + assert concentration_refresh_payload["metrics"] assert fact_calls_payload["view"] == "fact-calls" assert fact_calls_payload["filters"]["privacy_mode"] == "strict" assert fact_calls_payload["rows"][0]["cwd"].startswith("[redacted cwd:") @@ -520,7 +639,27 @@ def _make_diagnostics_codex_home(tmp_path: Path) -> Path: ), _entry( "response_item", - {"type": "function_call_output", "output": "SECRET TOOL OUTPUT"}, + { + "type": "function_call", + "call_id": "call-git", + "name": "exec_command", + "arguments": json.dumps({"cmd": "git status SECRET_ARG"}), + }, + ), + _entry( + "response_item", + { + "type": "function_call_output", + "call_id": "call-git", + "output": ( + "Chunk ID: abc123\n" + "Wall time: 0.0000 seconds\n" + "Process exited with code 0\n" + "Original token count: 9\n" + "Output:\n" + "SECRET TOOL OUTPUT" + ), + }, ), _entry( "event_msg", diff --git a/tests/test_dashboard_diagnostics_snapshots.py b/tests/test_dashboard_diagnostics_snapshots.py new file mode 100644 index 0000000..b25a74a --- /dev/null +++ b/tests/test_dashboard_diagnostics_snapshots.py @@ -0,0 +1,164 @@ +from __future__ import annotations + +import json +import shutil +import subprocess +from pathlib import Path + +import pytest + + +def _run_snapshot_renderer_script(script: str) -> dict[str, object]: + node = shutil.which("node") + if node is None: + pytest.skip("node is required for dashboard diagnostic snapshot renderer tests") + repo_root = Path(__file__).resolve().parents[1] + script_path = ( + repo_root + / "src" + / "codex_usage_tracker" + / "plugin_data" + / "dashboard" + / "dashboard_diagnostics_snapshots.js" + ) + wrapped = f""" +const fs = require('fs'); +const vm = require('vm'); +const code = fs.readFileSync({json.dumps(str(script_path))}, 'utf8'); +const context = {{ + window: {{}}, + console, +}}; +vm.createContext(context); +vm.runInContext(code, context); +const factory = context.window.CodexUsageDashboardDiagnosticSnapshots; +function escapeHtml(value) {{ + return String(value) + .replace(/&/g, '&') + .replace(//g, '>') + .replace(/"/g, '"') + .replace(/'/g, '''); +}} +{script} +""" + result = subprocess.run( + [node, "-e", wrapped], + check=True, + capture_output=True, + text=True, + ) + return json.loads(result.stdout) + + +def test_dashboard_commands_snapshot_renders_collapsible_children() -> None: + payload = _run_snapshot_renderer_script( + """ +const renderer = factory.create({ + escapeHtml, + formatTimestamp: value => value, + number: new Intl.NumberFormat('en-US'), + pct: value => `${value}%`, + renderState: message => `
${escapeHtml(message)}
`, + rowInvestigatorLink: () => 'call', + tokenText: value => new Intl.NumberFormat('en-US').format(Number(value || 0)), +}); +const html = renderer.renderPanels({ + loading: false, + payloads: { + commands: { + status: 'ready', + refreshed: false, + snapshot: { + computed_at: '2026-06-20T00:00:00Z', + history_scope: 'active', + source_logs_scanned: 1, + }, + commands: [ + { + root: 'git', + total: 3, + children: [ + { child: 'status', count: 2 }, + { child: 'diff', count: 1 }, + ], + }, + ], + }, + }, +}); +console.log(JSON.stringify({ + hasDetails: html.includes('
'), + hasShowSummary: html.includes('Show all 2 children'), + hasHideSummary: html.includes('Hide 2 children'), + hasToggleIcon: html.includes('diagnostics-command-toggle-icon'), + hasFirstChild: html.includes('status') && html.includes('2'), + hasSecondChild: html.includes('diff') && html.includes('1'), + hasTopChildColumn: html.includes('Top child'), +})); +""" + ) + + assert payload["hasDetails"] is True + assert payload["hasShowSummary"] is True + assert payload["hasHideSummary"] is True + assert payload["hasToggleIcon"] is True + assert payload["hasFirstChild"] is True + assert payload["hasSecondChild"] is True + assert payload["hasTopChildColumn"] is False + + +def test_dashboard_concentration_snapshot_renders_reader_facing_labels() -> None: + payload = _run_snapshot_renderer_script( + """ +const renderer = factory.create({ + escapeHtml, + formatTimestamp: value => value, + number: new Intl.NumberFormat('en-US'), + pct: value => `${Math.round(Number(value || 0) * 100)}%`, + renderState: message => `
${escapeHtml(message)}
`, + rowInvestigatorLink: () => '1,000', + tokenText: value => new Intl.NumberFormat('en-US').format(Number(value || 0)), +}); +const html = renderer.renderPanels({ + loading: false, + payloads: { + concentration: { + status: 'ready', + refreshed: false, + snapshot: { + computed_at: '2026-06-20T00:00:00Z', + history_scope: 'active', + source_logs_scanned: 1, + }, + metrics: [ + { metric: 'top_1_source_log_share', dimension: 'source_log', top_n: 1, share: 0.5 }, + { metric: 'top_3_cwd_share', dimension: 'cwd', top_n: 3, share: 0.9 }, + ], + largest_impact_rows: [ + { + dimension: 'source_log', + label: 'session:019e37d3', + share: 0.5, + largest_record_id: 'r1', + largest_call_tokens: 1000, + }, + ], + }, + }, +}); +console.log(JSON.stringify({ + hasSourceMetricLabel: html.includes('Top 1 source/session share'), + hasProjectMetricLabel: html.includes('Top 3 project/cwd share'), + hasDimensionLabel: html.includes('Source/session'), + hasSafeSourceLabel: html.includes('session:019e37d3'), + leaksMetricId: html.includes('top_1_source_log_share'), +})); +""" + ) + + assert payload["hasSourceMetricLabel"] is True + assert payload["hasProjectMetricLabel"] is True + assert payload["hasDimensionLabel"] is True + assert payload["hasSafeSourceLabel"] is True + assert payload["leaksMetricId"] is False diff --git a/tests/test_dashboard_live.py b/tests/test_dashboard_live.py index f470434..a25df3c 100644 --- a/tests/test_dashboard_live.py +++ b/tests/test_dashboard_live.py @@ -128,6 +128,95 @@ def test_dashboard_live_allows_diagnostics_bootstrap_refresh() -> None: assert payload["statusKeys"] == ["status.checking", "status.updated"] +def test_dashboard_live_skips_diagnostics_auto_refresh_cycle() -> None: + payload = _run_dashboard_live_script( + """ +(async () => { + const calls = []; + const statusUpdates = []; + const appliedPayloads = []; + let scheduledIntervals = 0; + context.window.setInterval = () => { + scheduledIntervals += 1; + return 1; + }; + context.window.clearInterval = () => {}; + globalThis.__fetch = async (url, options) => { + calls.push({ url, headers: options.headers }); + return { + ok: true, + json: async () => ({ + rows: [], + refreshed_at: '2026-06-19T00:00:00Z', + refresh_result: { + inserted_or_updated_events: 1, + scanned_files: 1, + skipped_events: 0, + }, + total_available_rows: 1, + }), + }; + }; + const refreshDashboardEl = { disabled: false }; + const runtime = factory.create({ + activeView: () => 'diagnostics', + apiToken: () => 'test-token', + applyDashboardPayload: payload => appliedPayloads.push(payload), + autoRefreshEl: { checked: true }, + backgroundHydrationChunkSize: 2000, + formatTimestamp: value => value, + getArchivedAvailableRows: () => 0, + getData: () => [], + getIncludeArchived: () => false, + getLoadedLimit: () => null, + getTotalAvailableRows: () => 1, + historyScopeEl: { value: 'active', parentElement: {} }, + i18n: { currentLanguage: 'en' }, + initialHydrationChunkSize: 500, + latestRefreshAt: () => '', + limitValue: value => value === null ? 'all' : String(value), + liveRefreshIntervalMs: 10000, + liveRefreshSupported: true, + loadLimitEl: { value: '5000', options: [], lastElementChild: null, insertBefore: () => {} }, + number: new Intl.NumberFormat('en-US'), + payloadRows: payload => payload.rows || [], + rebuildDashboardIndexes: () => {}, + rebuildFilterOptions: () => {}, + refreshDashboardEl, + render: () => {}, + resetRowsForHydration: () => {}, + rowLoadProgressBarEl: { style: {} }, + rowLoadProgressCountEl: { textContent: '' }, + rowLoadProgressEl: { hidden: true }, + rowLoadProgressLabelEl: { textContent: '' }, + setFastTooltip: () => {}, + t: key => key, + tf: (key, values = {}) => `${key}:${JSON.stringify(values)}`, + updateLiveStatus: (key, detail) => statusUpdates.push({ key, detail }), + }); + runtime.scheduleAutoRefresh(); + await runtime.refreshDashboardLive(); + console.log(JSON.stringify({ + fetchCount: calls.length, + appliedCount: appliedPayloads.length, + statusKeys: statusUpdates.map(entry => entry.key), + scheduledIntervals, + refreshDisabled: refreshDashboardEl.disabled, + })); +})().catch(error => { + console.error(error); + process.exit(1); +}); +""" + ) + + assert payload["fetchCount"] == 0 + assert payload["appliedCount"] == 0 + assert payload["statusKeys"] == [] + assert payload["scheduledIntervals"] == 0 + assert payload["refreshDisabled"] is False + + def test_dashboard_live_prepends_new_rows_after_cached_index_refresh() -> None: payload = _run_dashboard_live_script( """ diff --git a/tests/test_dashboard_payload.py b/tests/test_dashboard_payload.py index 19bb5ba..5e73de8 100644 --- a/tests/test_dashboard_payload.py +++ b/tests/test_dashboard_payload.py @@ -55,6 +55,12 @@ def test_dashboard_and_csv_are_aggregate_only(tmp_path: Path) -> None: dashboard_diagnostics_js = (asset_dir / "dashboard_diagnostics.js").read_text( encoding="utf-8" ) + dashboard_diagnostics_facts_js = ( + asset_dir / "dashboard_diagnostics_facts.js" + ).read_text(encoding="utf-8") + dashboard_diagnostics_snapshots_js = ( + asset_dir / "dashboard_diagnostics_snapshots.js" + ).read_text(encoding="utf-8") dashboard_call_diagnostics_js = ( asset_dir / "dashboard_call_diagnostics.js" ).read_text(encoding="utf-8") @@ -94,6 +100,8 @@ def test_dashboard_and_csv_are_aggregate_only(tmp_path: Path) -> None: dashboard_live_js, dashboard_events_js, dashboard_diagnostics_js, + dashboard_diagnostics_facts_js, + dashboard_diagnostics_snapshots_js, dashboard_call_diagnostics_js, dashboard_call_js, dashboard_js, @@ -119,6 +127,8 @@ def test_dashboard_and_csv_are_aggregate_only(tmp_path: Path) -> None: assert "SECRET RAW PROMPT" not in dashboard_live_js assert "SECRET RAW PROMPT" not in dashboard_events_js assert "SECRET RAW PROMPT" not in dashboard_diagnostics_js + assert "SECRET RAW PROMPT" not in dashboard_diagnostics_facts_js + assert "SECRET RAW PROMPT" not in dashboard_diagnostics_snapshots_js assert "SECRET RAW PROMPT" not in dashboard_call_diagnostics_js assert "SECRET RAW PROMPT" not in dashboard_call_js assert "SECRET RAW PROMPT" not in dashboard_css @@ -139,6 +149,8 @@ def test_dashboard_and_csv_are_aggregate_only(tmp_path: Path) -> None: assert "COMPACTED REPLACEMENT SUMMARY" not in dashboard_live_js assert "COMPACTED REPLACEMENT SUMMARY" not in dashboard_events_js assert "COMPACTED REPLACEMENT SUMMARY" not in dashboard_diagnostics_js + assert "COMPACTED REPLACEMENT SUMMARY" not in dashboard_diagnostics_facts_js + assert "COMPACTED REPLACEMENT SUMMARY" not in dashboard_diagnostics_snapshots_js assert "COMPACTED REPLACEMENT SUMMARY" not in dashboard_call_diagnostics_js assert "COMPACTED REPLACEMENT SUMMARY" not in dashboard_call_js assert "EVENT MSG COMPACTION SUMMARY" not in dashboard @@ -157,6 +169,8 @@ def test_dashboard_and_csv_are_aggregate_only(tmp_path: Path) -> None: assert "EVENT MSG COMPACTION SUMMARY" not in dashboard_live_js assert "EVENT MSG COMPACTION SUMMARY" not in dashboard_events_js assert "EVENT MSG COMPACTION SUMMARY" not in dashboard_diagnostics_js + assert "EVENT MSG COMPACTION SUMMARY" not in dashboard_diagnostics_facts_js + assert "EVENT MSG COMPACTION SUMMARY" not in dashboard_diagnostics_snapshots_js assert "EVENT MSG COMPACTION SUMMARY" not in dashboard_call_diagnostics_js assert "EVENT MSG COMPACTION SUMMARY" not in dashboard_call_js for stylesheet in dashboard_stylesheets: @@ -177,6 +191,8 @@ def test_dashboard_and_csv_are_aggregate_only(tmp_path: Path) -> None: assert 'src="codex-usage-tracker-assets/dashboard_actions.js?v=' in dashboard assert 'src="codex-usage-tracker-assets/dashboard_live.js?v=' in dashboard assert 'src="codex-usage-tracker-assets/dashboard_events.js?v=' in dashboard + assert 'src="codex-usage-tracker-assets/dashboard_diagnostics_snapshots.js?v=' in dashboard + assert 'src="codex-usage-tracker-assets/dashboard_diagnostics_facts.js?v=' in dashboard assert 'src="codex-usage-tracker-assets/dashboard_diagnostics.js?v=' in dashboard assert 'src="codex-usage-tracker-assets/dashboard_call_diagnostics.js?v=' in dashboard assert 'src="codex-usage-tracker-assets/dashboard_call_investigator.js?v=' in dashboard @@ -197,6 +213,8 @@ def test_dashboard_and_csv_are_aggregate_only(tmp_path: Path) -> None: assert "CodexUsageDashboardActions" in dashboard_actions_js assert "CodexUsageDashboardLive" in dashboard_live_js assert "CodexUsageDashboardEvents" in dashboard_events_js + assert "CodexUsageDashboardDiagnosticSnapshots" in dashboard_diagnostics_snapshots_js + assert "CodexUsageDashboardDiagnosticFacts" in dashboard_diagnostics_facts_js assert "CodexUsageDashboardDiagnostics" in dashboard_diagnostics_js assert "CodexUsageCallDiagnostics" in dashboard_call_diagnostics_js assert "CodexUsageCallInvestigator" in dashboard_call_js @@ -282,22 +300,42 @@ def test_dashboard_and_csv_are_aggregate_only(tmp_path: Path) -> None: assert "/api/diagnostics/tools" in dashboard_diagnostics_js assert "/api/diagnostics/compactions" in dashboard_diagnostics_js assert "/api/diagnostics/fact-calls" in dashboard_diagnostics_js + assert "/api/diagnostics/refresh" in dashboard_diagnostics_js + assert "dashboard_diagnostics_snapshots.js" in dashboard + assert "dashboard_diagnostics_facts.js" in dashboard + assert "/api/diagnostics/overview" in dashboard_diagnostics_snapshots_js + assert "/api/diagnostics/tool-output/refresh" in dashboard_diagnostics_snapshots_js + assert "/api/diagnostics/commands/refresh" in dashboard_diagnostics_snapshots_js + assert "/api/diagnostics/file-reads/refresh" in dashboard_diagnostics_snapshots_js + assert "/api/diagnostics/read-productivity/refresh" in dashboard_diagnostics_snapshots_js + assert "/api/diagnostics/concentration/refresh" in dashboard_diagnostics_snapshots_js + assert "Refresh diagnostics" in dashboard_diagnostics_snapshots_js + assert "data-diagnostics-refresh" in dashboard_diagnostics_js + assert "Live API required for diagnostics refresh" in dashboard_diagnostics_js + assert "Overview" in dashboard_diagnostics_snapshots_js + assert "Tool Output" in dashboard_diagnostics_snapshots_js + assert "File Reads" in dashboard_diagnostics_snapshots_js + assert "Read Productivity" in dashboard_diagnostics_snapshots_js + assert "Concentration" in dashboard_diagnostics_snapshots_js assert "Associated token totals" in dashboard_diagnostics_js assert "Raw context remains on-demand" in dashboard_diagnostics_js assert "rowInvestigatorLink" in dashboard_diagnostics_js - assert "diagnostics-drilldown-row" in dashboard_diagnostics_js - assert 'td colspan="11"' in dashboard_diagnostics_js - assert "associated_cached_input_tokens" in dashboard_diagnostics_js - assert "row.cached_input_tokens" in dashboard_diagnostics_js - assert "Occurrences: count of matching diagnostic fact events" in dashboard_diagnostics_js - assert "Associated total tokens for those calls" in dashboard_diagnostics_js - assert "Average cache ratio across associated calls" in dashboard_diagnostics_js - assert "data-diagnostics-fact-sort-key" in dashboard_diagnostics_js - assert "data-diagnostics-fact-sort-active" in dashboard_diagnostics_js + assert "diagnostics-drilldown-row" in dashboard_diagnostics_facts_js + assert 'td colspan="11"' in dashboard_diagnostics_facts_js + assert "associated_cached_input_tokens" in dashboard_diagnostics_facts_js + assert "row.cached_input_tokens" in dashboard_diagnostics_facts_js + assert "Occurrences: count of matching diagnostic fact events" in dashboard_diagnostics_facts_js + assert "Associated total tokens for those calls" in dashboard_diagnostics_facts_js + assert "Average cache ratio across associated calls" in dashboard_diagnostics_facts_js + assert "data-diagnostics-fact-sort-key" in dashboard_diagnostics_facts_js + assert "data-diagnostics-fact-sort-active" in dashboard_diagnostics_facts_js assert "sortFactRows" in dashboard_diagnostics_js - assert "diagnosticFactHeader" in dashboard_diagnostics_js + assert "diagnosticFactHeader" in dashboard_diagnostics_facts_js assert "diagnostics-facts-table" in dashboard_surface assert "diagnostics-fact-cell" in dashboard_surface + assert "diagnostics-snapshot-grid" in dashboard_css + assert "diagnostics-toolbar" in dashboard_css + assert "diagnostics-mini-table" in dashboard_css assert "diagnostics-facts-table th:first-child" in dashboard_css assert "td.diagnostics-fact-cell" in dashboard_css assert "captureScrollAnchor" in dashboard_diagnostics_js @@ -306,7 +344,7 @@ def test_dashboard_and_csv_are_aggregate_only(tmp_path: Path) -> None: assert "offset: String(offset)" in dashboard_diagnostics_js assert "mergeFactCallPayload" in dashboard_diagnostics_js assert "data-diagnostics-call-sort-key" in dashboard_diagnostics_js - assert "data-diagnostics-call-sort-active" in dashboard_diagnostics_js + assert "data-diagnostics-call-sort-active" in dashboard_diagnostics_facts_js assert "sortFactCalls" in dashboard_diagnostics_js assert "defaultFactCallSortDirection" in dashboard_diagnostics_js assert "sort: sortState.sort" in dashboard_diagnostics_js @@ -501,6 +539,7 @@ def test_dashboard_and_csv_are_aggregate_only(tmp_path: Path) -> None: assert (asset_dir / "dashboard_details.js").exists() assert (asset_dir / "dashboard_insights.js").exists() assert (asset_dir / "dashboard_tables.js").exists() + assert (asset_dir / "dashboard_diagnostics_snapshots.js").exists() assert (asset_dir / "dashboard_filters.js").exists() assert (asset_dir / "dashboard_state.js").exists() assert (asset_dir / "dashboard_payload_cache.js").exists() diff --git a/tests/test_dashboard_server.py b/tests/test_dashboard_server.py index a174f76..9dc3036 100644 --- a/tests/test_dashboard_server.py +++ b/tests/test_dashboard_server.py @@ -79,6 +79,65 @@ def test_dashboard_server_usage_api_refreshes_aggregate_rows(tmp_path: Path) -> content_security_policy = response.headers.get("Content-Security-Policy") referrer_policy = response.headers.get("Referrer-Policy") limited_payload = json.loads(response.read().decode("utf-8")) + diagnostic_overview_after_usage_refresh = _read_json( + f"http://127.0.0.1:{server.server_port}/api/diagnostics/overview" + ) + diagnostic_refresh_without_token = _http_error_json( + f"http://127.0.0.1:{server.server_port}/api/diagnostics/overview/refresh", + data=b"", + method="POST", + ) + diagnostic_refresh_payload = _read_json( + f"http://127.0.0.1:{server.server_port}/api/diagnostics/overview/refresh", + headers={"X-Codex-Usage-Token": "test-token"}, + data=b"", + method="POST", + ) + diagnostic_batch_refresh_payload = _read_json( + f"http://127.0.0.1:{server.server_port}/api/diagnostics/refresh", + headers={"X-Codex-Usage-Token": "test-token"}, + data=b"", + method="POST", + ) + diagnostic_tool_output_refresh_payload = diagnostic_batch_refresh_payload["sections"]["toolOutput"] + diagnostic_commands_refresh_payload = diagnostic_batch_refresh_payload["sections"]["commands"] + diagnostic_file_reads_refresh_payload = diagnostic_batch_refresh_payload["sections"]["fileReads"] + diagnostic_read_productivity_refresh_payload = diagnostic_batch_refresh_payload["sections"][ + "readProductivity" + ] + diagnostic_concentration_refresh_payload = diagnostic_batch_refresh_payload["sections"][ + "concentration" + ] + diagnostic_stored_payload = _read_json( + f"http://127.0.0.1:{server.server_port}/api/diagnostics/overview" + ) + diagnostic_tool_output_stored_payload = _read_json( + f"http://127.0.0.1:{server.server_port}/api/diagnostics/tool-output" + ) + diagnostic_commands_stored_payload = _read_json( + f"http://127.0.0.1:{server.server_port}/api/diagnostics/commands" + ) + diagnostic_file_reads_stored_payload = _read_json( + f"http://127.0.0.1:{server.server_port}/api/diagnostics/file-reads" + ) + diagnostic_read_productivity_stored_payload = _read_json( + f"http://127.0.0.1:{server.server_port}/api/diagnostics/read-productivity" + ) + diagnostic_concentration_stored_payload = _read_json( + f"http://127.0.0.1:{server.server_port}/api/diagnostics/concentration" + ) + diagnostic_computed_at = diagnostic_stored_payload["snapshot"]["computed_at"] + with urllib.request.urlopen( # noqa: S310 - local test server only + urllib.request.Request( + f"http://127.0.0.1:{server.server_port}/api/usage?refresh=1&limit=2", + headers={"X-Codex-Usage-Token": "test-token"}, + ), + timeout=5, + ) as response: + second_usage_refresh_payload = json.loads(response.read().decode("utf-8")) + diagnostic_after_second_usage_refresh = _read_json( + f"http://127.0.0.1:{server.server_port}/api/diagnostics/overview" + ) with urllib.request.urlopen( # noqa: S310 - local test server only f"http://127.0.0.1:{server.server_port}/api/usage?limit=all", timeout=5, @@ -104,6 +163,7 @@ def test_dashboard_server_usage_api_refreshes_aggregate_rows(tmp_path: Path) -> thread.join(timeout=5) assert refresh_without_token["status"] == 403 + assert diagnostic_refresh_without_token["status"] == 403 assert dashboard_cache_control == "no-store" shell_raw_payload = dashboard_html.split( '