From d2a6cc7af7c8499558f427c7df90fab0032f21ed Mon Sep 17 00:00:00 2001 From: Will Washburn Date: Sun, 26 Apr 2026 15:33:49 -0400 Subject: [PATCH] Honor fidelity in burn compare (#95) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wire the `summarizeFidelity` / `hasMinimumFidelity` helpers from `@relayburn/analyze` (shipped in 0.14.0 by #41) into `burn compare` so the aggregate stops silently averaging `aggregate-only` / `cost-only` / `partial` turns with full-fidelity peers from the same model — the "looks more confident than it should" failure mode #41 called out. - Default minimum is `usage-only`. Records emitted before TurnRecord.fidelity existed (pre-#41 ledgers) still pass for backward compat. - `--fidelity ` overrides the floor; `--include-partial` is shorthand for `--fidelity partial` and includes every turn. Conflicting combinations exit 2 with a clear message. - TTY output adds an `excluded N turns below fidelity (… aggregate- only, … cost-only, … partial)` coverage line whenever the gate dropped anything. Per-model totals render `—` instead of `$0.00` when a model survived the filter with zero turns. - JSON gains a top-level `fidelity` block (`{ minimum, excluded, summary }`) computed against the unfiltered slice so consumers can render their own coverage UI. - `runCompare` now accepts an optional `CompareDeps` for ingest/query/pricing injection (parallels `runLimits`), enabling deterministic CLI tests. Co-Authored-By: Claude Opus 4.7 (1M context) --- README.md | 6 +- packages/analyze/CHANGELOG.md | 1 + packages/cli/CHANGELOG.md | 1 + packages/cli/src/commands/compare.test.ts | 350 ++++++++++++++++++++++ packages/cli/src/commands/compare.ts | 209 ++++++++++++- 5 files changed, 551 insertions(+), 16 deletions(-) create mode 100644 packages/cli/src/commands/compare.test.ts diff --git a/README.md b/README.md index d1c6559..781b45b 100644 --- a/README.md +++ b/README.md @@ -257,7 +257,7 @@ You can override per-call via `costForUsage(usage, model, pricing, { reasoningMo ``` burn summary [--since 7d] [--project ] [--session ] [--workflow ] [--agent ] burn by-tool [--since 7d] [--project ] [--session ] -burn compare [--models a,b] [--since 7d] [--project ] [--session ] [--workflow ] [--agent ] [--min-sample ] [--json|--csv] +burn compare [--models a,b] [--since 7d] [--project ] [--session ] [--workflow ] [--agent ] [--min-sample ] [--fidelity ] [--include-partial] [--json|--csv] burn claude [--tag k=v ...] [-- ] ``` @@ -282,7 +282,9 @@ This is observed data, not counterfactual: it tells you what happened when you a Standard filters apply: `--session ` limits to a single session, `--agent ` limits to a stamped agent ID, `--workflow ` to a stamped workflow ID, `--project ` to a project path or git-canonical projectKey. -Output formats: TTY table (default), `--json` for scripts, `--csv` for spreadsheets. `--json` and `--csv` are mutually exclusive. +By default, `burn compare` only aggregates turns with `usage-only` fidelity or better — `aggregate-only`, `cost-only`, and `partial` turns are excluded so a session with mixed fidelity can't silently bias the cost/turn or one-shot rate of full-fidelity peers from the same model. When the gate dropped anything, the table prints an `excluded N turns below fidelity (… aggregate-only, … cost-only, … partial)` coverage note. Override the floor with `--fidelity full | usage-only | aggregate-only | cost-only | partial`; `--include-partial` is shorthand for `--fidelity partial` and includes every turn. Records emitted before `TurnRecord.fidelity` existed always pass for backward compatibility. + +Output formats: TTY table (default), `--json` for scripts, `--csv` for spreadsheets. `--json` and `--csv` are mutually exclusive. The `--json` payload includes a `fidelity` block (`{ minimum, excluded, summary }`) computed against the unfiltered slice so consumers can render their own coverage UI. ### `burn rebuild --reclassify` — backfill activity labels on old turns diff --git a/packages/analyze/CHANGELOG.md b/packages/analyze/CHANGELOG.md index 48ec10a..17b6336 100644 --- a/packages/analyze/CHANGELOG.md +++ b/packages/analyze/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed +- **`hasMinimumFidelity` and `summarizeFidelity` are now wired into `burn compare`** ([#95](https://github.com/AgentWorkforce/burn/issues/95)). No API change in `@relayburn/analyze` itself — this entry just records the consumer-side adoption of the helpers shipped in 0.14.0 ([#41](https://github.com/AgentWorkforce/burn/issues/41)). See `@relayburn/cli` for the CLI surface (`--fidelity`, `--include-partial`, the new JSON `fidelity` block, and the "excluded N turns" coverage note). - **Waste attribution uses persisted user-turn block sizes before even-split** (#2). `attributeWaste()` accepts `userTurnsBySession` and fills missing per-`toolUseId` sizes from `UserTurnRecord.blocks` when content sidecars are unavailable, while keeping full sidecar content primary when present. Sessions that use this path report `attributionMethod: "user-turn"` instead of degrading to even-split. ## [0.22.0] - 2026-04-26 diff --git a/packages/cli/CHANGELOG.md b/packages/cli/CHANGELOG.md index 90b53bb..be0dc9a 100644 --- a/packages/cli/CHANGELOG.md +++ b/packages/cli/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed +- **`burn compare` honors fidelity** ([#95](https://github.com/AgentWorkforce/burn/issues/95)). The aggregate now defaults to the `usage-only` floor: turns whose fidelity is `aggregate-only`, `cost-only`, or `partial` are excluded so a session with mixed fidelity can't silently bias the cost/turn or one-shot rate of full-fidelity peers from the same model. Records emitted before `TurnRecord.fidelity` existed (pre-#41 ledgers) still pass for backward compatibility. New flags: `--fidelity ` (any of `full | usage-only | aggregate-only | cost-only | partial`) overrides the floor; `--include-partial` is shorthand for `--fidelity partial` and includes every turn — both invalid combinations exit 2 with a clear message. Coverage notes gain an `excluded N turns below fidelity (… aggregate-only, … cost-only, … partial)` line whenever the gate dropped anything, the JSON output gains a top-level `fidelity` block (`{ minimum, excluded, summary }`) computed against the unfiltered slice, and per-model totals render `—` instead of `$0.00` when a model survived the filter with zero turns. - **Persist user-turn block-size records during ingest** (#2). `burn ingest`, passive ingest, and the Claude/Codex/OpenCode wrappers now append parser-emitted `UserTurnRecord`s for all three harnesses. Codex passive cursors also carry the in-flight user-turn slot so resumed ingest can complete a bridge record across file-growth boundaries. `burn waste` and `burn diagnose` load these records and use them as the sized fallback when content sidecars are missing. ## [0.26.0] - 2026-04-26 diff --git a/packages/cli/src/commands/compare.test.ts b/packages/cli/src/commands/compare.test.ts new file mode 100644 index 0000000..5ae218d --- /dev/null +++ b/packages/cli/src/commands/compare.test.ts @@ -0,0 +1,350 @@ +import { strict as assert } from 'node:assert'; +import { describe, it } from 'node:test'; + +import { loadBuiltinPricing } from '@relayburn/analyze'; +import type { EnrichedTurn } from '@relayburn/ledger'; +import { + EMPTY_COVERAGE, + makeFidelity, +} from '@relayburn/reader'; +import type { ActivityCategory, Fidelity } from '@relayburn/reader'; + +import { runCompare, type CompareDeps } from './compare.js'; +import type { ParsedArgs } from '../args.js'; + +async function captureStdout( + fn: () => Promise, +): Promise<{ result: T; stdout: string; stderr: string }> { + let stdout = ''; + let stderr = ''; + const origOut = process.stdout.write.bind(process.stdout); + const origErr = process.stderr.write.bind(process.stderr); + // node:test pipes diagnostic frames through process.stdout. Pass anything + // that isn't a plain string straight through to the original sink so the + // test runner's V8-serialized event traffic still reaches the reporter. + process.stdout.write = ((c: string | Uint8Array, ...rest: unknown[]) => { + if (typeof c === 'string') { + stdout += c; + return true; + } + return origOut(c as Uint8Array, ...(rest as [])); + }) as typeof process.stdout.write; + process.stderr.write = ((c: string | Uint8Array, ...rest: unknown[]) => { + if (typeof c === 'string') { + stderr += c; + return true; + } + return origErr(c as Uint8Array, ...(rest as [])); + }) as typeof process.stderr.write; + try { + const result = await fn(); + return { result, stdout, stderr }; + } finally { + process.stdout.write = origOut; + process.stderr.write = origErr; + } +} + +function args(flags: Record = {}): ParsedArgs { + return { flags, tags: {}, positional: [], passthrough: [] }; +} + +const FULL_FIDELITY: Fidelity = makeFidelity('per-turn', { + ...EMPTY_COVERAGE, + hasInputTokens: true, + hasOutputTokens: true, + hasCacheReadTokens: true, + hasToolCalls: true, + hasToolResultEvents: true, + hasSessionRelationships: true, +}); + +const AGGREGATE_FIDELITY: Fidelity = makeFidelity('per-session-aggregate', { + ...EMPTY_COVERAGE, + hasInputTokens: true, + hasOutputTokens: true, +}); + +const COST_ONLY_FIDELITY: Fidelity = makeFidelity('cost-only', { + ...EMPTY_COVERAGE, +}); + +const PARTIAL_FIDELITY: Fidelity = makeFidelity('per-turn', { + ...EMPTY_COVERAGE, + hasInputTokens: true, + // missing output / cache-read / tool events → "partial" +}); + +let counter = 0; + +function turn( + model: string, + activity: ActivityCategory | undefined, + fidelity: Fidelity | undefined, + partial: Partial = {}, +): EnrichedTurn { + counter++; + const base: EnrichedTurn = { + v: 1, + source: 'claude-code', + sessionId: 's', + messageId: `m-${counter}`, + turnIndex: 0, + ts: '2026-04-20T00:00:00.000Z', + model, + usage: { + input: 1000, + output: 500, + reasoning: 0, + cacheRead: 0, + cacheCreate5m: 0, + cacheCreate1h: 0, + }, + toolCalls: [], + enrichment: {}, + ...(activity !== undefined ? { activity } : {}), + ...partial, + }; + // exactOptionalPropertyTypes — only set fidelity when defined. + if (fidelity !== undefined) base.fidelity = fidelity; + return base; +} + +function makeDeps(turns: EnrichedTurn[]): CompareDeps { + return { + ingestAll: async () => undefined, + queryAll: async () => turns, + loadPricing: loadBuiltinPricing, + }; +} + +describe('burn compare — fidelity gating', () => { + it('excludes aggregate-only / cost-only / partial turns by default (usage-only floor)', async () => { + const turns: EnrichedTurn[] = [ + // 5 full-fidelity Sonnet coding turns — should survive. + ...Array.from({ length: 5 }, () => + turn('claude-sonnet-4-6', 'coding', FULL_FIDELITY, { + hasEdits: true, + retries: 0, + }), + ), + // 3 aggregate-only turns from the same model+activity — must NOT + // contaminate the average. + ...Array.from({ length: 3 }, () => + turn('claude-sonnet-4-6', 'coding', AGGREGATE_FIDELITY, { + hasEdits: true, + retries: 0, + }), + ), + // 1 cost-only and 2 partial turns — also dropped. + turn('claude-sonnet-4-6', 'coding', COST_ONLY_FIDELITY, { hasEdits: true, retries: 0 }), + turn('claude-sonnet-4-6', 'coding', PARTIAL_FIDELITY, { hasEdits: true, retries: 0 }), + turn('claude-sonnet-4-6', 'coding', PARTIAL_FIDELITY, { hasEdits: true, retries: 0 }), + ]; + + const { result, stdout } = await captureStdout(() => + runCompare(args({ json: true }), makeDeps(turns)), + ); + assert.equal(result, 0); + const parsed = JSON.parse(stdout); + assert.equal(parsed.analyzedTurns, 5, 'only the 5 full-fidelity turns survive the default gate'); + const cell = parsed.cells.find( + (c: { model: string; category: string }) => + c.model === 'claude-sonnet-4-6' && c.category === 'coding', + ); + assert.ok(cell); + assert.equal(cell.turns, 5); + }); + + it('records with no fidelity field still pass the default gate (backward compat)', async () => { + const turns: EnrichedTurn[] = [ + // Pre-#41 ledger writers don't stamp `fidelity` — keep counting them. + ...Array.from({ length: 3 }, () => + turn('claude-sonnet-4-6', 'coding', undefined, { hasEdits: true, retries: 0 }), + ), + ]; + const { result, stdout } = await captureStdout(() => + runCompare(args({ json: true }), makeDeps(turns)), + ); + assert.equal(result, 0); + const parsed = JSON.parse(stdout); + assert.equal(parsed.analyzedTurns, 3); + assert.equal(parsed.fidelity.excluded.total, 0); + }); + + it('annotates the rendered table with an "excluded N turns" coverage note', async () => { + const turns: EnrichedTurn[] = [ + ...Array.from({ length: 4 }, () => + turn('claude-sonnet-4-6', 'coding', FULL_FIDELITY, { hasEdits: true, retries: 0 }), + ), + turn('claude-sonnet-4-6', 'coding', AGGREGATE_FIDELITY, { hasEdits: true, retries: 0 }), + turn('claude-sonnet-4-6', 'coding', AGGREGATE_FIDELITY, { hasEdits: true, retries: 0 }), + turn('claude-sonnet-4-6', 'coding', COST_ONLY_FIDELITY, { hasEdits: true, retries: 0 }), + turn('claude-sonnet-4-6', 'coding', PARTIAL_FIDELITY, { hasEdits: true, retries: 0 }), + ]; + const { result, stdout } = await captureStdout(() => + runCompare(args(), makeDeps(turns)), + ); + assert.equal(result, 0); + assert.match(stdout, /excluded 4 turns below usage-only fidelity/); + assert.match(stdout, /2 aggregate-only/); + assert.match(stdout, /1 cost-only/); + assert.match(stdout, /1 partial/); + }); + + it('omits the excluded note when nothing was filtered', async () => { + const turns: EnrichedTurn[] = [ + turn('claude-sonnet-4-6', 'coding', FULL_FIDELITY, { hasEdits: true, retries: 0 }), + turn('claude-sonnet-4-6', 'coding', FULL_FIDELITY, { hasEdits: true, retries: 0 }), + ]; + const { result, stdout } = await captureStdout(() => + runCompare(args(), makeDeps(turns)), + ); + assert.equal(result, 0); + assert.doesNotMatch(stdout, /excluded/); + }); + + it('--fidelity full strictly drops anything below full', async () => { + const turns: EnrichedTurn[] = [ + turn('claude-sonnet-4-6', 'coding', FULL_FIDELITY, { hasEdits: true, retries: 0 }), + // usage-only is allowed under the default but NOT under --fidelity full. + turn('claude-sonnet-4-6', 'coding', makeFidelity('per-turn', { + ...EMPTY_COVERAGE, + hasInputTokens: true, + hasOutputTokens: true, + hasCacheReadTokens: true, + }), { hasEdits: true, retries: 0 }), + ]; + const { result, stdout } = await captureStdout(() => + runCompare(args({ json: true, fidelity: 'full' }), makeDeps(turns)), + ); + assert.equal(result, 0); + const parsed = JSON.parse(stdout); + assert.equal(parsed.analyzedTurns, 1); + assert.equal(parsed.fidelity.minimum, 'full'); + assert.equal(parsed.fidelity.excluded.total, 1); + assert.equal(parsed.fidelity.excluded.usageOnly, 1); + }); + + it('--fidelity partial includes everything (no exclusions)', async () => { + const turns: EnrichedTurn[] = [ + turn('claude-sonnet-4-6', 'coding', FULL_FIDELITY, { hasEdits: true, retries: 0 }), + turn('claude-sonnet-4-6', 'coding', AGGREGATE_FIDELITY, { hasEdits: true, retries: 0 }), + turn('claude-sonnet-4-6', 'coding', COST_ONLY_FIDELITY, { hasEdits: true, retries: 0 }), + turn('claude-sonnet-4-6', 'coding', PARTIAL_FIDELITY, { hasEdits: true, retries: 0 }), + ]; + const { result, stdout } = await captureStdout(() => + runCompare(args({ json: true, fidelity: 'partial' }), makeDeps(turns)), + ); + assert.equal(result, 0); + const parsed = JSON.parse(stdout); + assert.equal(parsed.analyzedTurns, 4); + assert.equal(parsed.fidelity.excluded.total, 0); + }); + + it('--include-partial is shorthand for --fidelity partial', async () => { + const turns: EnrichedTurn[] = [ + turn('claude-sonnet-4-6', 'coding', FULL_FIDELITY, { hasEdits: true, retries: 0 }), + turn('claude-sonnet-4-6', 'coding', AGGREGATE_FIDELITY, { hasEdits: true, retries: 0 }), + turn('claude-sonnet-4-6', 'coding', COST_ONLY_FIDELITY, { hasEdits: true, retries: 0 }), + ]; + const { result, stdout } = await captureStdout(() => + runCompare(args({ json: true, 'include-partial': true }), makeDeps(turns)), + ); + assert.equal(result, 0); + const parsed = JSON.parse(stdout); + assert.equal(parsed.fidelity.minimum, 'partial'); + assert.equal(parsed.fidelity.excluded.total, 0); + assert.equal(parsed.analyzedTurns, 3); + }); + + it('--include-partial together with a conflicting --fidelity exits 2', async () => { + const { result, stderr } = await captureStdout(() => + runCompare( + args({ 'include-partial': true, fidelity: 'full' }), + makeDeps([]), + ), + ); + assert.equal(result, 2); + assert.match(stderr, /--include-partial conflicts with --fidelity full/); + }); + + it('--fidelity with an unknown class exits 2', async () => { + const { result, stderr } = await captureStdout(() => + runCompare(args({ fidelity: 'bogus' }), makeDeps([])), + ); + assert.equal(result, 2); + assert.match(stderr, /invalid --fidelity: bogus/); + }); + + it('JSON output emits a fidelity block with minimum, excluded, and summary', async () => { + const turns: EnrichedTurn[] = [ + turn('claude-sonnet-4-6', 'coding', FULL_FIDELITY, { hasEdits: true, retries: 0 }), + turn('claude-sonnet-4-6', 'coding', FULL_FIDELITY, { hasEdits: true, retries: 0 }), + turn('claude-sonnet-4-6', 'coding', AGGREGATE_FIDELITY, { hasEdits: true, retries: 0 }), + // unknown bucket — survives the gate, counted in summary. + turn('claude-sonnet-4-6', 'coding', undefined, { hasEdits: true, retries: 0 }), + ]; + const { stdout } = await captureStdout(() => + runCompare(args({ json: true }), makeDeps(turns)), + ); + const parsed = JSON.parse(stdout); + assert.ok(parsed.fidelity, 'JSON has a top-level fidelity block'); + assert.equal(parsed.fidelity.minimum, 'usage-only'); + assert.equal(parsed.fidelity.excluded.total, 1); + assert.equal(parsed.fidelity.excluded.aggregateOnly, 1); + // summary mirrors `summarizeFidelity` over the unfiltered slice. + assert.equal(parsed.fidelity.summary.total, 4); + assert.equal(parsed.fidelity.summary.byClass.full, 2); + assert.equal(parsed.fidelity.summary.byClass['aggregate-only'], 1); + assert.equal(parsed.fidelity.summary.unknown, 1); + }); + + it('renders "—" (not $0.00 / 0%) when a (model, activity) collapses to zero turns post-filter', async () => { + // Sonnet has only aggregate-only turns in `coding` — under the default + // floor every turn is dropped, the cell should render as the dash sentinel + // and the JSON cell flips to noData=true. Haiku keeps a real cell so the + // category survives. + const turns: EnrichedTurn[] = [ + turn('claude-sonnet-4-6', 'coding', AGGREGATE_FIDELITY, { hasEdits: true, retries: 0 }), + turn('claude-sonnet-4-6', 'coding', AGGREGATE_FIDELITY, { hasEdits: true, retries: 0 }), + turn('claude-haiku-4-5', 'coding', FULL_FIDELITY, { hasEdits: true, retries: 0 }), + ]; + const { stdout: jsonOut } = await captureStdout(() => + runCompare(args({ json: true, models: 'claude-sonnet-4-6,claude-haiku-4-5' }), makeDeps(turns)), + ); + const parsed = JSON.parse(jsonOut); + const sonnetCell = parsed.cells.find( + (c: { model: string; category: string }) => + c.model === 'claude-sonnet-4-6' && c.category === 'coding', + ); + assert.ok(sonnetCell); + assert.equal(sonnetCell.turns, 0); + assert.equal(sonnetCell.noData, true); + assert.equal(sonnetCell.costPerTurn, null); + assert.equal(sonnetCell.oneShotRate, null); + + const { stdout: ttyOut } = await captureStdout(() => + runCompare(args({ models: 'claude-sonnet-4-6,claude-haiku-4-5' }), makeDeps(turns)), + ); + // Find the data row for `coding` — the Sonnet half (3 sub-columns) must + // be three em-dashes, never $0.00 / 0%. Tightening the regex so we don't + // accidentally match real money like `$0.0035` from another row. + const codingLine = ttyOut.split('\n').find((l) => l.startsWith('coding')); + assert.ok(codingLine, 'expected a coding row'); + assert.match(codingLine, /—\s+—\s+—/); + assert.doesNotMatch(codingLine, /\$0\.00\b/); + assert.doesNotMatch(codingLine, /\b0%/); + }); + + it('singular wording when exactly one turn was excluded', async () => { + const turns: EnrichedTurn[] = [ + turn('claude-sonnet-4-6', 'coding', FULL_FIDELITY, { hasEdits: true, retries: 0 }), + turn('claude-sonnet-4-6', 'coding', AGGREGATE_FIDELITY, { hasEdits: true, retries: 0 }), + ]; + const { stdout } = await captureStdout(() => + runCompare(args(), makeDeps(turns)), + ); + assert.match(stdout, /excluded 1 turn below usage-only fidelity/); + }); +}); diff --git a/packages/cli/src/commands/compare.ts b/packages/cli/src/commands/compare.ts index cb55381..3644c4e 100644 --- a/packages/cli/src/commands/compare.ts +++ b/packages/cli/src/commands/compare.ts @@ -1,11 +1,15 @@ import { buildCompareTable, DEFAULT_MIN_SAMPLE, + hasMinimumFidelity, loadPricing, + summarizeFidelity, type CompareCell, type CompareTable, + type FidelitySummary, } from '@relayburn/analyze'; -import { queryAll, type Query } from '@relayburn/ledger'; +import { queryAll, type EnrichedTurn, type Query } from '@relayburn/ledger'; +import type { FidelityClass } from '@relayburn/reader'; import { ingestAll } from '../ingest.js'; import { formatInt, formatUsd, parseSinceArg } from '../format.js'; @@ -15,7 +19,8 @@ const COMPARE_HELP = `burn compare — per-(model, activity) comparison table Usage: burn compare [--models a,b] [--since 7d] [--project ] [--session ] - [--workflow ] [--agent ] [--min-sample ] [--json|--csv] + [--workflow ] [--agent ] [--min-sample ] + [--fidelity ] [--include-partial] [--json|--csv] Flags: --models comma-separated list of model names to include (default: all) @@ -26,8 +31,16 @@ Flags: --agent filter by stamped agentId --min-sample insufficient-sample threshold; cells below this get flagged in the coverage-notes block (default: 5) + --fidelity minimum fidelity class to include in the aggregate + (full | usage-only | aggregate-only | cost-only | partial). + Default: usage-only — drops aggregate-only / cost-only / partial + turns so a session with mixed fidelity isn't silently averaged + with full-fidelity turns from the same model. Records emitted + before TurnRecord.fidelity existed always pass. + --include-partial + shorthand for --fidelity partial; includes every turn. --json emit a stable JSON object (analyzedTurns, models, categories, - totals, cells[]) + totals, cells[], fidelity{ minimum, excluded, summary }) --csv emit a CSV with one row per (model, category) pair --help, -h show this message @@ -43,9 +56,28 @@ Examples: burn compare --since 30d burn compare --models claude-sonnet-4-6,claude-haiku-4-5 --since 7d burn compare --workflow wf-refactor --json + burn compare --fidelity full # strict: drop anything below full + burn compare --include-partial # include every turn, even cost-only `; -export async function runCompare(args: ParsedArgs): Promise { +const FIDELITY_CHOICES: ReadonlyArray = [ + 'full', + 'usage-only', + 'aggregate-only', + 'cost-only', + 'partial', +]; + +export interface CompareDeps { + ingestAll?: () => Promise; + queryAll?: (q: Query) => Promise; + loadPricing?: typeof loadPricing; +} + +export async function runCompare( + args: ParsedArgs, + deps: CompareDeps = {}, +): Promise { const first = args.positional[0]; if ( args.flags['help'] === true || @@ -73,6 +105,31 @@ export async function runCompare(args: ParsedArgs): Promise { return 2; } + // Resolve --fidelity / --include-partial. --include-partial is just sugar + // for --fidelity partial; passing both is fine as long as they agree, and + // we error otherwise so the user doesn't get a surprising effective level. + const includePartial = args.flags['include-partial'] === true; + const fidelityFlag = args.flags['fidelity']; + let minFidelity: FidelityClass = 'usage-only'; + if (typeof fidelityFlag === 'string') { + if (!isFidelityClass(fidelityFlag)) { + process.stderr.write( + `burn: invalid --fidelity: ${fidelityFlag} (expected one of ${FIDELITY_CHOICES.join(', ')})\n`, + ); + return 2; + } + minFidelity = fidelityFlag; + } + if (includePartial) { + if (typeof fidelityFlag === 'string' && fidelityFlag !== 'partial') { + process.stderr.write( + `burn: --include-partial conflicts with --fidelity ${fidelityFlag}\n`, + ); + return 2; + } + minFidelity = 'partial'; + } + const wantJson = args.flags['json'] === true; const wantCsv = args.flags['csv'] === true; if (wantJson && wantCsv) { @@ -82,16 +139,45 @@ export async function runCompare(args: ParsedArgs): Promise { return 2; } - await ingestAll(); - const pricing = await loadPricing(); - const turns = await queryAll(q); + const ingest = deps.ingestAll ?? ingestAll; + const query = deps.queryAll ?? queryAll; + const loadPricingFn = deps.loadPricing ?? loadPricing; + + await ingest(); + const pricing = await loadPricingFn(); + const turns = await query(q); + + // Summarize fidelity over the *unfiltered* slice so coverage notes and the + // JSON `summary` reflect the input the user actually queried, not what + // survived the gate. The summary is what tells them why N turns were + // dropped. + const summary = summarizeFidelity(turns); + // `--fidelity partial` (and its `--include-partial` shorthand) is the "let + // everything through" escape hatch per #41. The FidelityClass ordering used + // by `hasMinimumFidelity` puts `partial` strictly above `aggregate-only` / + // `cost-only`, so the predicate would otherwise still drop those two + // buckets. Bypass the gate entirely in that mode. + const filteredTurns = minFidelity === 'partial' + ? turns + : turns.filter((t) => hasMinimumFidelity(t.fidelity, minFidelity)); + const excluded = computeExcluded(summary, minFidelity); const opts: Parameters[1] = { pricing, minSample }; if (models) opts.models = models; - const table = buildCompareTable(turns, opts); + const table = buildCompareTable(filteredTurns, opts); if (wantJson) { - process.stdout.write(JSON.stringify(toJson(table, turns.length), null, 2) + '\n'); + process.stdout.write( + JSON.stringify( + toJson(table, filteredTurns.length, { + minimum: minFidelity, + excluded, + summary, + }), + null, + 2, + ) + '\n', + ); return 0; } if (wantCsv) { @@ -99,11 +185,75 @@ export async function runCompare(args: ParsedArgs): Promise { return 0; } - process.stdout.write(renderTty(table, turns.length)); + process.stdout.write( + renderTty(table, filteredTurns.length, { minimum: minFidelity, excluded }), + ); return 0; } -function toJson(t: CompareTable, analyzedTurns: number): object { +function isFidelityClass(s: string): s is FidelityClass { + return (FIDELITY_CHOICES as ReadonlyArray).includes(s); +} + +interface ExcludedBreakdown { + total: number; + aggregateOnly: number; + costOnly: number; + partial: number; + usageOnly: number; +} + +// Sum the byClass buckets that fall below the minimum fidelity. We never +// exclude `unknown` (records without a fidelity field — `hasMinimumFidelity` +// passes them for backward compat), so they don't get counted here. +// +// `--fidelity partial` is the "include everything" escape hatch (matched by +// the runtime), so it always reports zero excluded — even though the +// FidelityClass ordering puts `partial` above `aggregate-only` / `cost-only`. +function computeExcluded( + summary: FidelitySummary, + minimum: FidelityClass, +): ExcludedBreakdown { + const out: ExcludedBreakdown = { + total: 0, + aggregateOnly: 0, + costOnly: 0, + partial: 0, + usageOnly: 0, + }; + if (minimum === 'partial') return out; + const order: ReadonlyArray = [ + 'cost-only', + 'aggregate-only', + 'partial', + 'usage-only', + 'full', + ]; + const need = order.indexOf(minimum); + for (const cls of order) { + if (order.indexOf(cls) >= need) continue; + const n = summary.byClass[cls]; + if (n === 0) continue; + out.total += n; + if (cls === 'aggregate-only') out.aggregateOnly += n; + else if (cls === 'cost-only') out.costOnly += n; + else if (cls === 'partial') out.partial += n; + else if (cls === 'usage-only') out.usageOnly += n; + } + return out; +} + +interface FidelityJsonBlock { + minimum: FidelityClass; + excluded: ExcludedBreakdown; + summary: FidelitySummary; +} + +function toJson( + t: CompareTable, + analyzedTurns: number, + fidelity: FidelityJsonBlock, +): object { const cells: Array> = []; for (const m of t.models) { for (const cat of t.categories) { @@ -132,6 +282,7 @@ function toJson(t: CompareTable, analyzedTurns: number): object { categories: t.categories, totals: t.totals, cells, + fidelity, }; } @@ -211,10 +362,22 @@ function cellFields(c: CompareCell): [string, string, string] { return [turns, cost, oneShot]; } -function renderTty(t: CompareTable, analyzedTurns: number): string { +interface FidelityRenderInput { + minimum: FidelityClass; + excluded: ExcludedBreakdown; +} + +function renderTty( + t: CompareTable, + analyzedTurns: number, + fidelity: FidelityRenderInput, +): string { const lines: string[] = []; lines.push(''); lines.push(`turns analyzed: ${formatInt(analyzedTurns)}`); + if (fidelity.excluded.total > 0) { + lines.push(formatExcludedNote(fidelity)); + } lines.push(''); if (t.models.length === 0 || t.categories.length === 0) { @@ -294,16 +457,34 @@ function renderTty(t: CompareTable, analyzedTurns: number): string { } } - // Per-model totals + // Per-model totals. A model that survived the filter with zero turns (e.g. + // every turn was excluded by --fidelity, or --models pre-seeded a model the + // user asked about that has no data in the slice) renders the cost as the + // dash sentinel — not "$0.00", which would falsely claim the model ran for + // free. lines.push(''); for (const m of t.models) { const tot = t.totals[m] ?? { turns: 0, totalCost: 0 }; - lines.push(`${displayModelName(m)}: ${formatInt(tot.turns)} turns, ${formatUsd(tot.totalCost)} total`); + const totalCost = tot.turns > 0 ? formatUsd(tot.totalCost) : DASH; + lines.push(`${displayModelName(m)}: ${formatInt(tot.turns)} turns, ${totalCost} total`); } lines.push(''); return lines.join('\n'); } +// "excluded 12 turns below usage-only fidelity (8 aggregate-only, 3 cost-only, 1 partial)" +// — only mention non-zero buckets so the parenthetical stays terse. +function formatExcludedNote(f: FidelityRenderInput): string { + const parts: string[] = []; + if (f.excluded.aggregateOnly > 0) parts.push(`${f.excluded.aggregateOnly} aggregate-only`); + if (f.excluded.costOnly > 0) parts.push(`${f.excluded.costOnly} cost-only`); + if (f.excluded.partial > 0) parts.push(`${f.excluded.partial} partial`); + if (f.excluded.usageOnly > 0) parts.push(`${f.excluded.usageOnly} usage-only`); + const breakdown = parts.length > 0 ? ` (${parts.join(', ')})` : ''; + const noun = f.excluded.total === 1 ? 'turn' : 'turns'; + return `excluded ${formatInt(f.excluded.total)} ${noun} below ${f.minimum} fidelity${breakdown}`; +} + function renderRow(row: string[], widths: number[], sep: string): string { return row.map((cell, i) => cell.padEnd(widths[i]!)).join(sep).trimEnd(); }