diff --git a/CHANGELOG.md b/CHANGELOG.md index 90ab8ab..94103d4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) ## [Unreleased] +### Changed + +- **`burn compare` honors fidelity** ([#95](https://github.com/AgentWorkforce/burn/issues/95)). The aggregate now defaults to the `usage-only` fidelity floor — turns whose per-turn token coverage is `aggregate-only` / `cost-only` / `partial` are excluded so a session with mixed fidelity can't silently bias the cost/turn or one-shot rate of full-fidelity peers from the same model. New `--fidelity ` and `--include-partial` flags override or disable the floor; coverage notes and the `--json` output gain a `fidelity` block (`{ minimum, excluded, summary }`) computed against the unfiltered slice. Records emitted before `TurnRecord.fidelity` existed still pass for backward compatibility. + ## [0.33.0] - 2026-04-27 ### Added diff --git a/README.md b/README.md index 441e1c8..26abbc8 100644 --- a/README.md +++ b/README.md @@ -268,7 +268,7 @@ You can override per-call via `costForUsage(usage, model, pricing, { reasoningMo burn summary [--since 7d] [--project ] [--session ] [--workflow ] [--agent ] [--provider

] [--by-provider] burn by-tool [--since 7d] [--project ] [--session ] [--provider

] burn waste [--since 7d] [--project ] [--session ] [--workflow ] [--provider

] -burn compare [--models a,b] [--since 7d] [--project ] [--session ] [--workflow ] [--agent ] [--min-sample ] [--json|--csv] +burn compare [--models a,b] [--since 7d] [--project ] [--session ] [--workflow ] [--agent ] [--min-sample ] [--fidelity ] [--include-partial] [--json|--csv] burn claude [--tag k=v ...] [-- ] burn codex [--tag k=v ...] [-- ] burn opencode [--tag k=v ...] [-- ] @@ -298,7 +298,9 @@ This is observed data, not counterfactual: it tells you what happened when you a Standard filters apply: `--session ` limits to a single session, `--agent ` limits to a stamped agent ID, `--workflow ` to a stamped workflow ID, `--project ` to a project path or git-canonical projectKey. -Output formats: TTY table (default), `--json` for scripts, `--csv` for spreadsheets. `--json` and `--csv` are mutually exclusive. +By default, `burn compare` only aggregates turns with `usage-only` fidelity or better — `aggregate-only`, `cost-only`, and `partial` turns are excluded so a session with mixed fidelity can't silently bias the cost/turn or one-shot rate of full-fidelity peers from the same model. When the gate dropped anything, the table prints an `excluded N turns below fidelity (… aggregate-only, … cost-only, … partial)` coverage note. Override the floor with `--fidelity full | usage-only | aggregate-only | cost-only | partial`; `--include-partial` is shorthand for `--fidelity partial` and includes every turn. Records emitted before `TurnRecord.fidelity` existed always pass for backward compatibility. + +Output formats: TTY table (default), `--json` for scripts, `--csv` for spreadsheets. `--json` and `--csv` are mutually exclusive. The `--json` payload includes a `fidelity` block (`{ minimum, excluded, summary }`) computed against the unfiltered slice so consumers can render their own coverage UI. ### `burn rebuild --reclassify` — backfill activity labels on old turns diff --git a/packages/analyze/CHANGELOG.md b/packages/analyze/CHANGELOG.md index 6a07109..9751b64 100644 --- a/packages/analyze/CHANGELOG.md +++ b/packages/analyze/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Changed + +- **`hasMinimumFidelity` and `summarizeFidelity` are now wired into `burn compare`** ([#95](https://github.com/AgentWorkforce/burn/issues/95)). No API change in `@relayburn/analyze` itself — this entry just records the consumer-side adoption of the helpers shipped in 0.14.0 ([#41](https://github.com/AgentWorkforce/burn/issues/41)). See `@relayburn/cli` for the CLI surface (`--fidelity`, `--include-partial`, the new JSON `fidelity` block, and the "excluded N turns" coverage note). + ## [0.33.0] - 2026-04-27 ### Added diff --git a/packages/cli/CHANGELOG.md b/packages/cli/CHANGELOG.md index 2cd2ac1..a0e0fb7 100644 --- a/packages/cli/CHANGELOG.md +++ b/packages/cli/CHANGELOG.md @@ -11,6 +11,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - **`burn limits` honors fidelity on its 5-hour forecast** ([#105](https://github.com/AgentWorkforce/burn/issues/105)). The forecast still consumes every windowed turn — partial / aggregate-only / cost-only data still contributes to the running token total — but `burn limits` now classifies the contributing slice via `summarizeFidelity` and surfaces a binary `high` / `low` confidence flag. Text mode appends a `forecast: low-confidence (N of M contributing turns lack per-turn token data)` notice when at least one contributing turn is missing per-turn token coverage; full-fidelity windows print no notice. `--json` output gains a `forecast.fidelity` block carrying the `confidence` flag and the underlying `FidelitySummary`. `--watch` re-evaluates confidence on each tick so the flag flips as fresher full-fidelity turns land. +### Changed + +- **`burn compare` honors fidelity** ([#95](https://github.com/AgentWorkforce/burn/issues/95)). The aggregate now defaults to the `usage-only` floor: turns whose fidelity is `aggregate-only`, `cost-only`, or `partial` are excluded so a session with mixed fidelity can't silently bias the cost/turn or one-shot rate of full-fidelity peers from the same model. Records emitted before `TurnRecord.fidelity` existed (pre-#41 ledgers) still pass for backward compatibility. New flags: `--fidelity ` (any of `full | usage-only | aggregate-only | cost-only | partial`) overrides the floor; `--include-partial` is shorthand for `--fidelity partial` and includes every turn — both invalid combinations exit 2 with a clear message. Coverage notes gain an `excluded N turns below fidelity (… aggregate-only, … cost-only, … partial)` line whenever the gate dropped anything, the JSON output gains a top-level `fidelity` block (`{ minimum, excluded, summary }`) computed against the unfiltered slice, and per-model totals render `—` instead of `$0.00` when a model survived the filter with zero turns. When fidelity filtering is active (the default) `burn compare` falls back to the in-memory `queryAll` path so the gate is correctly applied; `--include-partial` (or `--fidelity partial`) reuses the archive's grouped SQL path from #88. + ## [0.33.0] - 2026-04-27 ### Added diff --git a/packages/cli/src/commands/compare.test.ts b/packages/cli/src/commands/compare.test.ts new file mode 100644 index 0000000..5ae218d --- /dev/null +++ b/packages/cli/src/commands/compare.test.ts @@ -0,0 +1,350 @@ +import { strict as assert } from 'node:assert'; +import { describe, it } from 'node:test'; + +import { loadBuiltinPricing } from '@relayburn/analyze'; +import type { EnrichedTurn } from '@relayburn/ledger'; +import { + EMPTY_COVERAGE, + makeFidelity, +} from '@relayburn/reader'; +import type { ActivityCategory, Fidelity } from '@relayburn/reader'; + +import { runCompare, type CompareDeps } from './compare.js'; +import type { ParsedArgs } from '../args.js'; + +async function captureStdout( + fn: () => Promise, +): Promise<{ result: T; stdout: string; stderr: string }> { + let stdout = ''; + let stderr = ''; + const origOut = process.stdout.write.bind(process.stdout); + const origErr = process.stderr.write.bind(process.stderr); + // node:test pipes diagnostic frames through process.stdout. Pass anything + // that isn't a plain string straight through to the original sink so the + // test runner's V8-serialized event traffic still reaches the reporter. + process.stdout.write = ((c: string | Uint8Array, ...rest: unknown[]) => { + if (typeof c === 'string') { + stdout += c; + return true; + } + return origOut(c as Uint8Array, ...(rest as [])); + }) as typeof process.stdout.write; + process.stderr.write = ((c: string | Uint8Array, ...rest: unknown[]) => { + if (typeof c === 'string') { + stderr += c; + return true; + } + return origErr(c as Uint8Array, ...(rest as [])); + }) as typeof process.stderr.write; + try { + const result = await fn(); + return { result, stdout, stderr }; + } finally { + process.stdout.write = origOut; + process.stderr.write = origErr; + } +} + +function args(flags: Record = {}): ParsedArgs { + return { flags, tags: {}, positional: [], passthrough: [] }; +} + +const FULL_FIDELITY: Fidelity = makeFidelity('per-turn', { + ...EMPTY_COVERAGE, + hasInputTokens: true, + hasOutputTokens: true, + hasCacheReadTokens: true, + hasToolCalls: true, + hasToolResultEvents: true, + hasSessionRelationships: true, +}); + +const AGGREGATE_FIDELITY: Fidelity = makeFidelity('per-session-aggregate', { + ...EMPTY_COVERAGE, + hasInputTokens: true, + hasOutputTokens: true, +}); + +const COST_ONLY_FIDELITY: Fidelity = makeFidelity('cost-only', { + ...EMPTY_COVERAGE, +}); + +const PARTIAL_FIDELITY: Fidelity = makeFidelity('per-turn', { + ...EMPTY_COVERAGE, + hasInputTokens: true, + // missing output / cache-read / tool events → "partial" +}); + +let counter = 0; + +function turn( + model: string, + activity: ActivityCategory | undefined, + fidelity: Fidelity | undefined, + partial: Partial = {}, +): EnrichedTurn { + counter++; + const base: EnrichedTurn = { + v: 1, + source: 'claude-code', + sessionId: 's', + messageId: `m-${counter}`, + turnIndex: 0, + ts: '2026-04-20T00:00:00.000Z', + model, + usage: { + input: 1000, + output: 500, + reasoning: 0, + cacheRead: 0, + cacheCreate5m: 0, + cacheCreate1h: 0, + }, + toolCalls: [], + enrichment: {}, + ...(activity !== undefined ? { activity } : {}), + ...partial, + }; + // exactOptionalPropertyTypes — only set fidelity when defined. + if (fidelity !== undefined) base.fidelity = fidelity; + return base; +} + +function makeDeps(turns: EnrichedTurn[]): CompareDeps { + return { + ingestAll: async () => undefined, + queryAll: async () => turns, + loadPricing: loadBuiltinPricing, + }; +} + +describe('burn compare — fidelity gating', () => { + it('excludes aggregate-only / cost-only / partial turns by default (usage-only floor)', async () => { + const turns: EnrichedTurn[] = [ + // 5 full-fidelity Sonnet coding turns — should survive. + ...Array.from({ length: 5 }, () => + turn('claude-sonnet-4-6', 'coding', FULL_FIDELITY, { + hasEdits: true, + retries: 0, + }), + ), + // 3 aggregate-only turns from the same model+activity — must NOT + // contaminate the average. + ...Array.from({ length: 3 }, () => + turn('claude-sonnet-4-6', 'coding', AGGREGATE_FIDELITY, { + hasEdits: true, + retries: 0, + }), + ), + // 1 cost-only and 2 partial turns — also dropped. + turn('claude-sonnet-4-6', 'coding', COST_ONLY_FIDELITY, { hasEdits: true, retries: 0 }), + turn('claude-sonnet-4-6', 'coding', PARTIAL_FIDELITY, { hasEdits: true, retries: 0 }), + turn('claude-sonnet-4-6', 'coding', PARTIAL_FIDELITY, { hasEdits: true, retries: 0 }), + ]; + + const { result, stdout } = await captureStdout(() => + runCompare(args({ json: true }), makeDeps(turns)), + ); + assert.equal(result, 0); + const parsed = JSON.parse(stdout); + assert.equal(parsed.analyzedTurns, 5, 'only the 5 full-fidelity turns survive the default gate'); + const cell = parsed.cells.find( + (c: { model: string; category: string }) => + c.model === 'claude-sonnet-4-6' && c.category === 'coding', + ); + assert.ok(cell); + assert.equal(cell.turns, 5); + }); + + it('records with no fidelity field still pass the default gate (backward compat)', async () => { + const turns: EnrichedTurn[] = [ + // Pre-#41 ledger writers don't stamp `fidelity` — keep counting them. + ...Array.from({ length: 3 }, () => + turn('claude-sonnet-4-6', 'coding', undefined, { hasEdits: true, retries: 0 }), + ), + ]; + const { result, stdout } = await captureStdout(() => + runCompare(args({ json: true }), makeDeps(turns)), + ); + assert.equal(result, 0); + const parsed = JSON.parse(stdout); + assert.equal(parsed.analyzedTurns, 3); + assert.equal(parsed.fidelity.excluded.total, 0); + }); + + it('annotates the rendered table with an "excluded N turns" coverage note', async () => { + const turns: EnrichedTurn[] = [ + ...Array.from({ length: 4 }, () => + turn('claude-sonnet-4-6', 'coding', FULL_FIDELITY, { hasEdits: true, retries: 0 }), + ), + turn('claude-sonnet-4-6', 'coding', AGGREGATE_FIDELITY, { hasEdits: true, retries: 0 }), + turn('claude-sonnet-4-6', 'coding', AGGREGATE_FIDELITY, { hasEdits: true, retries: 0 }), + turn('claude-sonnet-4-6', 'coding', COST_ONLY_FIDELITY, { hasEdits: true, retries: 0 }), + turn('claude-sonnet-4-6', 'coding', PARTIAL_FIDELITY, { hasEdits: true, retries: 0 }), + ]; + const { result, stdout } = await captureStdout(() => + runCompare(args(), makeDeps(turns)), + ); + assert.equal(result, 0); + assert.match(stdout, /excluded 4 turns below usage-only fidelity/); + assert.match(stdout, /2 aggregate-only/); + assert.match(stdout, /1 cost-only/); + assert.match(stdout, /1 partial/); + }); + + it('omits the excluded note when nothing was filtered', async () => { + const turns: EnrichedTurn[] = [ + turn('claude-sonnet-4-6', 'coding', FULL_FIDELITY, { hasEdits: true, retries: 0 }), + turn('claude-sonnet-4-6', 'coding', FULL_FIDELITY, { hasEdits: true, retries: 0 }), + ]; + const { result, stdout } = await captureStdout(() => + runCompare(args(), makeDeps(turns)), + ); + assert.equal(result, 0); + assert.doesNotMatch(stdout, /excluded/); + }); + + it('--fidelity full strictly drops anything below full', async () => { + const turns: EnrichedTurn[] = [ + turn('claude-sonnet-4-6', 'coding', FULL_FIDELITY, { hasEdits: true, retries: 0 }), + // usage-only is allowed under the default but NOT under --fidelity full. + turn('claude-sonnet-4-6', 'coding', makeFidelity('per-turn', { + ...EMPTY_COVERAGE, + hasInputTokens: true, + hasOutputTokens: true, + hasCacheReadTokens: true, + }), { hasEdits: true, retries: 0 }), + ]; + const { result, stdout } = await captureStdout(() => + runCompare(args({ json: true, fidelity: 'full' }), makeDeps(turns)), + ); + assert.equal(result, 0); + const parsed = JSON.parse(stdout); + assert.equal(parsed.analyzedTurns, 1); + assert.equal(parsed.fidelity.minimum, 'full'); + assert.equal(parsed.fidelity.excluded.total, 1); + assert.equal(parsed.fidelity.excluded.usageOnly, 1); + }); + + it('--fidelity partial includes everything (no exclusions)', async () => { + const turns: EnrichedTurn[] = [ + turn('claude-sonnet-4-6', 'coding', FULL_FIDELITY, { hasEdits: true, retries: 0 }), + turn('claude-sonnet-4-6', 'coding', AGGREGATE_FIDELITY, { hasEdits: true, retries: 0 }), + turn('claude-sonnet-4-6', 'coding', COST_ONLY_FIDELITY, { hasEdits: true, retries: 0 }), + turn('claude-sonnet-4-6', 'coding', PARTIAL_FIDELITY, { hasEdits: true, retries: 0 }), + ]; + const { result, stdout } = await captureStdout(() => + runCompare(args({ json: true, fidelity: 'partial' }), makeDeps(turns)), + ); + assert.equal(result, 0); + const parsed = JSON.parse(stdout); + assert.equal(parsed.analyzedTurns, 4); + assert.equal(parsed.fidelity.excluded.total, 0); + }); + + it('--include-partial is shorthand for --fidelity partial', async () => { + const turns: EnrichedTurn[] = [ + turn('claude-sonnet-4-6', 'coding', FULL_FIDELITY, { hasEdits: true, retries: 0 }), + turn('claude-sonnet-4-6', 'coding', AGGREGATE_FIDELITY, { hasEdits: true, retries: 0 }), + turn('claude-sonnet-4-6', 'coding', COST_ONLY_FIDELITY, { hasEdits: true, retries: 0 }), + ]; + const { result, stdout } = await captureStdout(() => + runCompare(args({ json: true, 'include-partial': true }), makeDeps(turns)), + ); + assert.equal(result, 0); + const parsed = JSON.parse(stdout); + assert.equal(parsed.fidelity.minimum, 'partial'); + assert.equal(parsed.fidelity.excluded.total, 0); + assert.equal(parsed.analyzedTurns, 3); + }); + + it('--include-partial together with a conflicting --fidelity exits 2', async () => { + const { result, stderr } = await captureStdout(() => + runCompare( + args({ 'include-partial': true, fidelity: 'full' }), + makeDeps([]), + ), + ); + assert.equal(result, 2); + assert.match(stderr, /--include-partial conflicts with --fidelity full/); + }); + + it('--fidelity with an unknown class exits 2', async () => { + const { result, stderr } = await captureStdout(() => + runCompare(args({ fidelity: 'bogus' }), makeDeps([])), + ); + assert.equal(result, 2); + assert.match(stderr, /invalid --fidelity: bogus/); + }); + + it('JSON output emits a fidelity block with minimum, excluded, and summary', async () => { + const turns: EnrichedTurn[] = [ + turn('claude-sonnet-4-6', 'coding', FULL_FIDELITY, { hasEdits: true, retries: 0 }), + turn('claude-sonnet-4-6', 'coding', FULL_FIDELITY, { hasEdits: true, retries: 0 }), + turn('claude-sonnet-4-6', 'coding', AGGREGATE_FIDELITY, { hasEdits: true, retries: 0 }), + // unknown bucket — survives the gate, counted in summary. + turn('claude-sonnet-4-6', 'coding', undefined, { hasEdits: true, retries: 0 }), + ]; + const { stdout } = await captureStdout(() => + runCompare(args({ json: true }), makeDeps(turns)), + ); + const parsed = JSON.parse(stdout); + assert.ok(parsed.fidelity, 'JSON has a top-level fidelity block'); + assert.equal(parsed.fidelity.minimum, 'usage-only'); + assert.equal(parsed.fidelity.excluded.total, 1); + assert.equal(parsed.fidelity.excluded.aggregateOnly, 1); + // summary mirrors `summarizeFidelity` over the unfiltered slice. + assert.equal(parsed.fidelity.summary.total, 4); + assert.equal(parsed.fidelity.summary.byClass.full, 2); + assert.equal(parsed.fidelity.summary.byClass['aggregate-only'], 1); + assert.equal(parsed.fidelity.summary.unknown, 1); + }); + + it('renders "—" (not $0.00 / 0%) when a (model, activity) collapses to zero turns post-filter', async () => { + // Sonnet has only aggregate-only turns in `coding` — under the default + // floor every turn is dropped, the cell should render as the dash sentinel + // and the JSON cell flips to noData=true. Haiku keeps a real cell so the + // category survives. + const turns: EnrichedTurn[] = [ + turn('claude-sonnet-4-6', 'coding', AGGREGATE_FIDELITY, { hasEdits: true, retries: 0 }), + turn('claude-sonnet-4-6', 'coding', AGGREGATE_FIDELITY, { hasEdits: true, retries: 0 }), + turn('claude-haiku-4-5', 'coding', FULL_FIDELITY, { hasEdits: true, retries: 0 }), + ]; + const { stdout: jsonOut } = await captureStdout(() => + runCompare(args({ json: true, models: 'claude-sonnet-4-6,claude-haiku-4-5' }), makeDeps(turns)), + ); + const parsed = JSON.parse(jsonOut); + const sonnetCell = parsed.cells.find( + (c: { model: string; category: string }) => + c.model === 'claude-sonnet-4-6' && c.category === 'coding', + ); + assert.ok(sonnetCell); + assert.equal(sonnetCell.turns, 0); + assert.equal(sonnetCell.noData, true); + assert.equal(sonnetCell.costPerTurn, null); + assert.equal(sonnetCell.oneShotRate, null); + + const { stdout: ttyOut } = await captureStdout(() => + runCompare(args({ models: 'claude-sonnet-4-6,claude-haiku-4-5' }), makeDeps(turns)), + ); + // Find the data row for `coding` — the Sonnet half (3 sub-columns) must + // be three em-dashes, never $0.00 / 0%. Tightening the regex so we don't + // accidentally match real money like `$0.0035` from another row. + const codingLine = ttyOut.split('\n').find((l) => l.startsWith('coding')); + assert.ok(codingLine, 'expected a coding row'); + assert.match(codingLine, /—\s+—\s+—/); + assert.doesNotMatch(codingLine, /\$0\.00\b/); + assert.doesNotMatch(codingLine, /\b0%/); + }); + + it('singular wording when exactly one turn was excluded', async () => { + const turns: EnrichedTurn[] = [ + turn('claude-sonnet-4-6', 'coding', FULL_FIDELITY, { hasEdits: true, retries: 0 }), + turn('claude-sonnet-4-6', 'coding', AGGREGATE_FIDELITY, { hasEdits: true, retries: 0 }), + ]; + const { stdout } = await captureStdout(() => + runCompare(args(), makeDeps(turns)), + ); + assert.match(stdout, /excluded 1 turn below usage-only fidelity/); + }); +}); diff --git a/packages/cli/src/commands/compare.ts b/packages/cli/src/commands/compare.ts index 965410f..0355ead 100644 --- a/packages/cli/src/commands/compare.ts +++ b/packages/cli/src/commands/compare.ts @@ -2,11 +2,15 @@ import { buildCompareTable, compareFromArchive, DEFAULT_MIN_SAMPLE, + hasMinimumFidelity, loadPricing, + summarizeFidelity, type CompareCell, type CompareTable, + type FidelitySummary, } from '@relayburn/analyze'; -import { buildArchive, queryAll, type Query } from '@relayburn/ledger'; +import { buildArchive, queryAll, type EnrichedTurn, type Query } from '@relayburn/ledger'; +import type { FidelityClass } from '@relayburn/reader'; import { ingestAll } from '../ingest.js'; import { formatInt, formatUsd, parseSinceArg } from '../format.js'; @@ -17,7 +21,7 @@ const COMPARE_HELP = `burn compare — per-(model, activity) comparison table Usage: burn compare [--models a,b] [--since 7d] [--project ] [--session ] [--workflow ] [--agent ] [--min-sample ] - [--json|--csv] [--no-archive] + [--fidelity ] [--include-partial] [--json|--csv] [--no-archive] Flags: --models comma-separated list of model names to include (default: all) @@ -28,8 +32,16 @@ Flags: --agent filter by stamped agentId --min-sample insufficient-sample threshold; cells below this get flagged in the coverage-notes block (default: 5) + --fidelity minimum fidelity class to include in the aggregate + (full | usage-only | aggregate-only | cost-only | partial). + Default: usage-only — drops aggregate-only / cost-only / partial + turns so a session with mixed fidelity isn't silently averaged + with full-fidelity turns from the same model. Records emitted + before TurnRecord.fidelity existed always pass. + --include-partial + shorthand for --fidelity partial; includes every turn. --json emit a stable JSON object (analyzedTurns, models, categories, - totals, cells[]) + totals, cells[], fidelity{ minimum, excluded, summary }) --csv emit a CSV with one row per (model, category) pair --no-archive bypass the SQLite archive and stream the ledger directly (legacy path; honored when env RELAYBURN_ARCHIVE=0) @@ -47,9 +59,28 @@ Examples: burn compare --since 30d burn compare --models claude-sonnet-4-6,claude-haiku-4-5 --since 7d burn compare --workflow wf-refactor --json + burn compare --fidelity full # strict: drop anything below full + burn compare --include-partial # include every turn, even cost-only `; -export async function runCompare(args: ParsedArgs): Promise { +const FIDELITY_CHOICES: ReadonlyArray = [ + 'full', + 'usage-only', + 'aggregate-only', + 'cost-only', + 'partial', +]; + +export interface CompareDeps { + ingestAll?: () => Promise; + queryAll?: (q: Query) => Promise; + loadPricing?: typeof loadPricing; +} + +export async function runCompare( + args: ParsedArgs, + deps: CompareDeps = {}, +): Promise { const first = args.positional[0]; if ( args.flags['help'] === true || @@ -77,6 +108,31 @@ export async function runCompare(args: ParsedArgs): Promise { return 2; } + // Resolve --fidelity / --include-partial. --include-partial is just sugar + // for --fidelity partial; passing both is fine as long as they agree, and + // we error otherwise so the user doesn't get a surprising effective level. + const includePartial = args.flags['include-partial'] === true; + const fidelityFlag = args.flags['fidelity']; + let minFidelity: FidelityClass = 'usage-only'; + if (typeof fidelityFlag === 'string') { + if (!isFidelityClass(fidelityFlag)) { + process.stderr.write( + `burn: invalid --fidelity: ${fidelityFlag} (expected one of ${FIDELITY_CHOICES.join(', ')})\n`, + ); + return 2; + } + minFidelity = fidelityFlag; + } + if (includePartial) { + if (typeof fidelityFlag === 'string' && fidelityFlag !== 'partial') { + process.stderr.write( + `burn: --include-partial conflicts with --fidelity ${fidelityFlag}\n`, + ); + return 2; + } + minFidelity = 'partial'; + } + const wantJson = args.flags['json'] === true; const wantCsv = args.flags['csv'] === true; if (wantJson && wantCsv) { @@ -86,19 +142,33 @@ export async function runCompare(args: ParsedArgs): Promise { return 2; } - await ingestAll(); - const pricing = await loadPricing(); + const ingest = deps.ingestAll ?? ingestAll; + const query = deps.queryAll ?? queryAll; + const loadPricingFn = deps.loadPricing ?? loadPricing; + + await ingest(); + const pricing = await loadPricingFn(); const opts: Parameters[1] = { pricing, minSample }; if (models) opts.models = models; - // Archive path is the default (#88). Fallback to the in-memory `queryAll` - // + `buildCompareTable` path is preserved behind `--no-archive` and the - // env override `RELAYBURN_ARCHIVE=0` for parity validation and as a - // safety net when the archive is missing or corrupt. - const useArchive = !shouldBypassArchive(args); + // Archive path is the default (#88), but it does not yet apply + // `attribution_fidelity` filtering at the SQL layer. Fall back to the + // in-memory `queryAll` + `buildCompareTable` path whenever fidelity + // filtering is in effect (i.e., minFidelity !== 'partial') so #95's + // gate is correctly applied. `--include-partial` / `--fidelity partial` + // disables filtering and reuses the archive's grouped SQL. + // + // Skip the archive when the caller injected `queryAll` (test mode): + // `buildArchive` and `compareFromArchive` are not part of `CompareDeps` + // and would hit the real `~/.relayburn/archive.sqlite`, breaking test + // isolation. The non-archive branch already handles `--fidelity partial` + // correctly (the filter becomes a no-op). + const useArchive = + !shouldBypassArchive(args) && minFidelity === 'partial' && !deps.queryAll; let table: CompareTable; - let analyzedTurns: number; + let filteredTurns: EnrichedTurn[]; + let summary: FidelitySummary; if (useArchive) { // Materialize the ledger tail before reading. ingestAll() above only // writes to the JSONL ledger; the archive is a derived read model that @@ -106,15 +176,44 @@ export async function runCompare(args: ParsedArgs): Promise { await buildArchive(); const result = await compareFromArchive(q, opts); table = result.table; - analyzedTurns = result.analyzedTurns; + // For fidelity-permissive mode the JSON summary reflects everything in + // the queried slice; we still emit a zero-excluded breakdown so the + // schema is stable. + const turnsForSummary = await query(q); + summary = summarizeFidelity(turnsForSummary); + filteredTurns = turnsForSummary; + void result.analyzedTurns; } else { - const turns = await queryAll(q); - table = buildCompareTable(turns, opts); - analyzedTurns = turns.length; + const turns = await query(q); + // Summarize fidelity over the *unfiltered* slice so coverage notes and + // the JSON `summary` reflect the input the user actually queried, not + // what survived the gate. The summary is what tells them why N turns + // were dropped. + summary = summarizeFidelity(turns); + // `--fidelity partial` (and its `--include-partial` shorthand) is the + // "let everything through" escape hatch per #41. The FidelityClass + // ordering used by `hasMinimumFidelity` puts `partial` strictly above + // `aggregate-only` / `cost-only`, so the predicate would otherwise + // still drop those two buckets. Bypass the gate entirely in that mode. + filteredTurns = minFidelity === 'partial' + ? turns + : turns.filter((t) => hasMinimumFidelity(t.fidelity, minFidelity)); + table = buildCompareTable(filteredTurns, opts); } + const excluded = computeExcluded(summary, minFidelity); if (wantJson) { - process.stdout.write(JSON.stringify(toJson(table, analyzedTurns), null, 2) + '\n'); + process.stdout.write( + JSON.stringify( + toJson(table, filteredTurns.length, { + minimum: minFidelity, + excluded, + summary, + }), + null, + 2, + ) + '\n', + ); return 0; } if (wantCsv) { @@ -122,7 +221,9 @@ export async function runCompare(args: ParsedArgs): Promise { return 0; } - process.stdout.write(renderTty(table, analyzedTurns)); + process.stdout.write( + renderTty(table, filteredTurns.length, { minimum: minFidelity, excluded }), + ); return 0; } @@ -133,7 +234,69 @@ function shouldBypassArchive(args: ParsedArgs): boolean { return false; } -function toJson(t: CompareTable, analyzedTurns: number): object { +function isFidelityClass(s: string): s is FidelityClass { + return (FIDELITY_CHOICES as ReadonlyArray).includes(s); +} + +interface ExcludedBreakdown { + total: number; + aggregateOnly: number; + costOnly: number; + partial: number; + usageOnly: number; +} + +// Sum the byClass buckets that fall below the minimum fidelity. We never +// exclude `unknown` (records without a fidelity field — `hasMinimumFidelity` +// passes them for backward compat), so they don't get counted here. +// +// `--fidelity partial` is the "include everything" escape hatch (matched by +// the runtime), so it always reports zero excluded — even though the +// FidelityClass ordering puts `partial` above `aggregate-only` / `cost-only`. +function computeExcluded( + summary: FidelitySummary, + minimum: FidelityClass, +): ExcludedBreakdown { + const out: ExcludedBreakdown = { + total: 0, + aggregateOnly: 0, + costOnly: 0, + partial: 0, + usageOnly: 0, + }; + if (minimum === 'partial') return out; + const order: ReadonlyArray = [ + 'cost-only', + 'aggregate-only', + 'partial', + 'usage-only', + 'full', + ]; + const need = order.indexOf(minimum); + for (const cls of order) { + if (order.indexOf(cls) >= need) continue; + const n = summary.byClass[cls]; + if (n === 0) continue; + out.total += n; + if (cls === 'aggregate-only') out.aggregateOnly += n; + else if (cls === 'cost-only') out.costOnly += n; + else if (cls === 'partial') out.partial += n; + else if (cls === 'usage-only') out.usageOnly += n; + } + return out; +} + +interface FidelityJsonBlock { + minimum: FidelityClass; + excluded: ExcludedBreakdown; + summary: FidelitySummary; +} + +function toJson( + t: CompareTable, + analyzedTurns: number, + fidelity: FidelityJsonBlock, +): object { const cells: Array> = []; for (const m of t.models) { for (const cat of t.categories) { @@ -162,6 +325,7 @@ function toJson(t: CompareTable, analyzedTurns: number): object { categories: t.categories, totals: t.totals, cells, + fidelity, }; } @@ -241,10 +405,22 @@ function cellFields(c: CompareCell): [string, string, string] { return [turns, cost, oneShot]; } -function renderTty(t: CompareTable, analyzedTurns: number): string { +interface FidelityRenderInput { + minimum: FidelityClass; + excluded: ExcludedBreakdown; +} + +function renderTty( + t: CompareTable, + analyzedTurns: number, + fidelity: FidelityRenderInput, +): string { const lines: string[] = []; lines.push(''); lines.push(`turns analyzed: ${formatInt(analyzedTurns)}`); + if (fidelity.excluded.total > 0) { + lines.push(formatExcludedNote(fidelity)); + } lines.push(''); if (t.models.length === 0 || t.categories.length === 0) { @@ -324,16 +500,34 @@ function renderTty(t: CompareTable, analyzedTurns: number): string { } } - // Per-model totals + // Per-model totals. A model that survived the filter with zero turns (e.g. + // every turn was excluded by --fidelity, or --models pre-seeded a model the + // user asked about that has no data in the slice) renders the cost as the + // dash sentinel — not "$0.00", which would falsely claim the model ran for + // free. lines.push(''); for (const m of t.models) { const tot = t.totals[m] ?? { turns: 0, totalCost: 0 }; - lines.push(`${displayModelName(m)}: ${formatInt(tot.turns)} turns, ${formatUsd(tot.totalCost)} total`); + const totalCost = tot.turns > 0 ? formatUsd(tot.totalCost) : DASH; + lines.push(`${displayModelName(m)}: ${formatInt(tot.turns)} turns, ${totalCost} total`); } lines.push(''); return lines.join('\n'); } +// "excluded 12 turns below usage-only fidelity (8 aggregate-only, 3 cost-only, 1 partial)" +// — only mention non-zero buckets so the parenthetical stays terse. +function formatExcludedNote(f: FidelityRenderInput): string { + const parts: string[] = []; + if (f.excluded.aggregateOnly > 0) parts.push(`${f.excluded.aggregateOnly} aggregate-only`); + if (f.excluded.costOnly > 0) parts.push(`${f.excluded.costOnly} cost-only`); + if (f.excluded.partial > 0) parts.push(`${f.excluded.partial} partial`); + if (f.excluded.usageOnly > 0) parts.push(`${f.excluded.usageOnly} usage-only`); + const breakdown = parts.length > 0 ? ` (${parts.join(', ')})` : ''; + const noun = f.excluded.total === 1 ? 'turn' : 'turns'; + return `excluded ${formatInt(f.excluded.total)} ${noun} below ${f.minimum} fidelity${breakdown}`; +} + function renderRow(row: string[], widths: number[], sep: string): string { return row.map((cell, i) => cell.padEnd(widths[i]!)).join(sep).trimEnd(); }