From 6389864f94201263af888840c18d22cd71b00f7e Mon Sep 17 00:00:00 2001 From: Will Washburn Date: Sun, 26 Apr 2026 15:33:34 -0400 Subject: [PATCH 1/3] Honor fidelity in burn waste (#100) `burn waste` now hard-filters its input slice against the coverage flags each detector requires, refuses with a non-zero exit when no turn meets the prereqs, and annotates partial exclusions both in text and JSON output. - `attributeWaste` / `aggregateBy*` need `hasToolCalls` + `hasToolResultEvents`. The orchestrator filters the slice before calling them and reports an "analyzed N of M turns; K excluded ..." notice when some turns survive. - `--patterns retries|failures` need the same flags; `reverts` needs `hasToolCalls` + `hasRawContent` (the editPreHash / editPostHash fields the parser computes from raw content); `compaction` is unchanged because the compaction sidecar is independent of `TurnRecord.fidelity`. Per-detector notices name the missing prereq and the source kinds responsible. - Refusal: when every input turn fails the prereq, exit 2 with `burn waste: 142/142 turns lack tool-call/tool-result coverage required for waste attribution. Sources: codex (per-session-aggregate, missing tool-call records, tool-result events). No waste analysis was performed.` - `--json` carries a `fidelity` block (`{ analyzed, excluded, summary, refused }`) mirroring `summary --json`. `--patterns` JSON additionally exposes a `perDetector` array with each detector's `required` flags and `excludedBySource` breakdown. - Tests cover full refusal, partial exclusion, per-detector messaging, and the JSON contract for both the attribution and patterns paths. Closes #100. Refs #41, #76. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/cli/CHANGELOG.md | 1 + packages/cli/src/commands/waste.test.ts | 595 +++++++++++++++++++++++- packages/cli/src/commands/waste.ts | 558 ++++++++++++++++++++-- 3 files changed, 1117 insertions(+), 37 deletions(-) diff --git a/packages/cli/CHANGELOG.md b/packages/cli/CHANGELOG.md index 90b53bb..4e34154 100644 --- a/packages/cli/CHANGELOG.md +++ b/packages/cli/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - **Persist user-turn block-size records during ingest** (#2). `burn ingest`, passive ingest, and the Claude/Codex/OpenCode wrappers now append parser-emitted `UserTurnRecord`s for all three harnesses. Codex passive cursors also carry the in-flight user-turn slot so resumed ingest can complete a bridge record across file-growth boundaries. `burn waste` and `burn diagnose` load these records and use them as the sized fallback when content sidecars are missing. +- **`burn waste` honors fidelity** ([#100](https://github.com/AgentWorkforce/burn/issues/100)). The attribution path (and the `--patterns retries|failures|reverts` detectors) now hard-filters the input slice against the coverage flags each detector requires — `attributeWaste` / `aggregateBy*` need `hasToolCalls` + `hasToolResultEvents`; `reverts` additionally needs `hasRawContent` (for `editPreHash` / `editPostHash`); `compaction` is unchanged because its sidecar is independent of `TurnRecord.fidelity`. When *all* turns fall below the prereq, `burn waste` exits non-zero with a message naming the missing prerequisite and the source kinds responsible (`burn waste: 142/142 turns lack tool-call/tool-result coverage required for waste attribution. Sources: codex (per-session-aggregate, missing tool-call records, tool-result events). No waste analysis was performed.`). When *some* turns survive, the text and JSON output gain an "analyzed N of M" coverage notice that names the gap per source. `--json` now carries a `fidelity` block (`{ analyzed, excluded, summary, refused }`) mirroring `summary --json`; `--patterns` JSON additionally exposes a `perDetector` array with each detector's `required` flags and `excludedBySource` breakdown. ## [0.26.0] - 2026-04-26 diff --git a/packages/cli/src/commands/waste.test.ts b/packages/cli/src/commands/waste.test.ts index 83d2980..f3a5ed4 100644 --- a/packages/cli/src/commands/waste.test.ts +++ b/packages/cli/src/commands/waste.test.ts @@ -1,6 +1,7 @@ import { strict as assert } from 'node:assert'; import { describe, it } from 'node:test'; +import { loadBuiltinPricing } from '@relayburn/analyze'; import type { BashAggregation, FileAggregation, @@ -8,8 +9,25 @@ import type { SubagentAggregation, WasteResult, } from '@relayburn/analyze'; +import type { EnrichedTurn } from '@relayburn/ledger'; +import type { Coverage, Fidelity, SourceKind } from '@relayburn/reader'; -import { formatWasteReport, isAttributionDegraded } from './waste.js'; +import { + ATTRIBUTION_REQUIRED, + PATTERN_REQUIRED, + describeExcluded, + fmtCoverageKey, + formatCoverageNotice, + formatWasteReport, + isAttributionDegraded, + renderSourcesClause, + resolvePatternSelection, + runPatternsMode, + runWasteAttribution, + turnPassesCoverage, + type WasteAttributionDeps, +} from './waste.js'; +import type { ParsedArgs } from '../args.js'; function session( id: string, @@ -194,3 +212,578 @@ describe('formatWasteReport', () => { ); }); }); + +// --------------------------------------------------------------------------- +// Fidelity-gating helpers (#100) + +function fullCoverage(): Coverage { + return { + hasInputTokens: true, + hasOutputTokens: true, + hasReasoningTokens: true, + hasCacheReadTokens: true, + hasCacheCreateTokens: true, + hasToolCalls: true, + hasToolResultEvents: true, + hasSessionRelationships: true, + hasRawContent: true, + }; +} + +function fidelityWith( + cls: Fidelity['class'], + granularity: Fidelity['granularity'], + overrides: Partial = {}, +): Fidelity { + return { + class: cls, + granularity, + coverage: { ...fullCoverage(), ...overrides }, + }; +} + +function makeTurn( + overrides: Partial & { + sessionId: string; + messageId: string; + turnIndex: number; + source: SourceKind; + }, +): EnrichedTurn { + return { + v: 1, + model: 'claude-sonnet-4-6', + ts: '2026-04-20T00:00:00.000Z', + usage: { + input: 100, + output: 50, + reasoning: 0, + cacheRead: 0, + cacheCreate5m: 0, + cacheCreate1h: 0, + }, + toolCalls: [], + enrichment: {}, + ...overrides, + }; +} + +function args(flags: Record = {}): ParsedArgs { + return { flags, tags: {}, positional: [], passthrough: [] }; +} + +async function captureStdio( + fn: () => Promise, +): Promise<{ result: T; stdout: string; stderr: string }> { + let stdout = ''; + let stderr = ''; + const origOut = process.stdout.write.bind(process.stdout); + const origErr = process.stderr.write.bind(process.stderr); + process.stdout.write = ((c: string | Uint8Array) => { + stdout += typeof c === 'string' ? c : Buffer.from(c).toString('utf8'); + return true; + }) as typeof process.stdout.write; + process.stderr.write = ((c: string | Uint8Array) => { + stderr += typeof c === 'string' ? c : Buffer.from(c).toString('utf8'); + return true; + }) as typeof process.stderr.write; + try { + const result = await fn(); + return { result, stdout, stderr }; + } finally { + process.stdout.write = origOut; + process.stderr.write = origErr; + } +} + +const EMPTY_DEPS: WasteAttributionDeps = { + loadContentForSession: async () => [], + loadUserTurnsForSession: async () => [], +}; + +describe('turnPassesCoverage (#100)', () => { + it('passes turns with no fidelity field (legacy ledger writers)', () => { + const t = makeTurn({ sessionId: 's', messageId: 'm', turnIndex: 0, source: 'claude-code' }); + assert.equal(turnPassesCoverage(t, ['hasToolCalls', 'hasToolResultEvents']), true); + }); + + it('fails a turn that is missing any required coverage flag', () => { + const t = makeTurn({ + sessionId: 's', + messageId: 'm', + turnIndex: 0, + source: 'codex', + fidelity: fidelityWith('partial', 'per-turn', { hasToolResultEvents: false }), + }); + assert.equal(turnPassesCoverage(t, ['hasToolCalls', 'hasToolResultEvents']), false); + }); + + it('passes a turn that has every required coverage flag', () => { + const t = makeTurn({ + sessionId: 's', + messageId: 'm', + turnIndex: 0, + source: 'codex', + fidelity: fidelityWith('full', 'per-turn'), + }); + assert.equal(turnPassesCoverage(t, ['hasToolCalls', 'hasToolResultEvents']), true); + }); +}); + +describe('describeExcluded / source clauses (#100)', () => { + it('groups excluded turns by source and tracks granularity + missing flags', () => { + const excluded = [ + makeTurn({ + sessionId: 's1', + messageId: 'm1', + turnIndex: 0, + source: 'codex', + fidelity: fidelityWith('partial', 'per-turn', { hasToolResultEvents: false }), + }), + makeTurn({ + sessionId: 's1', + messageId: 'm2', + turnIndex: 1, + source: 'codex', + fidelity: fidelityWith('partial', 'per-turn', { hasToolResultEvents: false }), + }), + makeTurn({ + sessionId: 's2', + messageId: 'm3', + turnIndex: 0, + source: 'opencode', + fidelity: fidelityWith('aggregate-only', 'per-session-aggregate', { + hasToolCalls: false, + hasToolResultEvents: false, + }), + }), + ]; + const breakdown = describeExcluded(excluded, ATTRIBUTION_REQUIRED); + assert.equal(breakdown.sources.size, 2); + const codex = breakdown.sources.get('codex')!; + assert.equal(codex.count, 2); + assert.deepEqual([...codex.granularities].sort(), ['per-turn']); + assert.deepEqual([...codex.missing].sort(), ['hasToolResultEvents']); + const opencode = breakdown.sources.get('opencode')!; + assert.equal(opencode.count, 1); + assert.deepEqual([...opencode.granularities].sort(), ['per-session-aggregate']); + assert.deepEqual( + [...opencode.missing].sort(), + ['hasToolCalls', 'hasToolResultEvents'], + ); + + const clause = renderSourcesClause(breakdown); + assert.match(clause, /codex \(per-turn, missing tool-result events\)/); + assert.match( + clause, + /opencode \(per-session-aggregate, missing tool-call records, tool-result events\)/, + ); + }); + + it('formatCoverageNotice renders an "analyzed N of M" line that names the gap and source', () => { + const excluded = [ + makeTurn({ + sessionId: 's', + messageId: 'm1', + turnIndex: 0, + source: 'codex', + fidelity: fidelityWith('partial', 'per-turn', { hasToolResultEvents: false }), + }), + makeTurn({ + sessionId: 's', + messageId: 'm2', + turnIndex: 1, + source: 'codex', + fidelity: fidelityWith('partial', 'per-turn', { hasToolResultEvents: false }), + }), + ]; + const breakdown = describeExcluded(excluded, ATTRIBUTION_REQUIRED); + const notice = formatCoverageNotice(8, 10, breakdown); + assert.match(notice, /^analyzed 8 of 10 turns; 2 excluded for /); + assert.match(notice, /missing tool-result events/); + assert.match(notice, /\(codex\)/); + }); + + it('fmtCoverageKey expands every key without falling through to a raw flag name', () => { + const keys: Array = [ + 'hasInputTokens', + 'hasOutputTokens', + 'hasReasoningTokens', + 'hasCacheReadTokens', + 'hasCacheCreateTokens', + 'hasToolCalls', + 'hasToolResultEvents', + 'hasSessionRelationships', + 'hasRawContent', + ]; + for (const k of keys) { + const text = fmtCoverageKey(k); + assert.ok(text && !text.startsWith('has'), `${k} -> ${text}`); + } + }); +}); + +describe('PATTERN_REQUIRED prerequisites (#100)', () => { + it('matches the spec: retries/failures need tool-result events; reverts needs raw content', () => { + assert.deepEqual([...PATTERN_REQUIRED.retries].sort(), [ + 'hasToolCalls', + 'hasToolResultEvents', + ]); + assert.deepEqual([...PATTERN_REQUIRED.failures].sort(), [ + 'hasToolCalls', + 'hasToolResultEvents', + ]); + assert.deepEqual([...PATTERN_REQUIRED.reverts].sort(), [ + 'hasRawContent', + 'hasToolCalls', + ]); + }); +}); + +describe('resolvePatternSelection', () => { + it('parses a comma-separated list of detector names', () => { + const set = resolvePatternSelection('retries,failures'); + assert.equal(set.size, 2); + assert.ok(set.has('retries')); + assert.ok(set.has('failures')); + }); + + it('returns all detectors when the flag is bare (true)', () => { + const set = resolvePatternSelection(true); + assert.equal(set.size, 4); + }); +}); + +describe('runWasteAttribution — fidelity refusal (#100)', () => { + it('refuses with exit 2, names the missing prerequisite + source kind, when every turn is aggregate-only', async () => { + const pricing = await loadBuiltinPricing(); + const turns: EnrichedTurn[] = []; + for (let i = 0; i < 142; i++) { + turns.push( + makeTurn({ + sessionId: 's', + messageId: `m${i}`, + turnIndex: i, + source: 'codex', + fidelity: fidelityWith('aggregate-only', 'per-session-aggregate', { + hasToolCalls: false, + hasToolResultEvents: false, + }), + }), + ); + } + const { result, stdout, stderr } = await captureStdio(() => + runWasteAttribution(args(), turns, pricing, EMPTY_DEPS), + ); + assert.equal(result, 2); + assert.equal(stdout, ''); + assert.match(stderr, /burn waste: 142\/142 turns lack tool-call\/tool-result coverage/); + assert.match(stderr, /codex/); + assert.match(stderr, /per-session-aggregate/); + assert.match(stderr, /missing tool-call records, tool-result events/); + assert.match(stderr, /No waste analysis was performed/); + }); + + it('JSON-mode refusal still writes a fidelity block with refused: true and exits 2', async () => { + const pricing = await loadBuiltinPricing(); + const turns: EnrichedTurn[] = [ + makeTurn({ + sessionId: 's', + messageId: 'm0', + turnIndex: 0, + source: 'codex', + fidelity: fidelityWith('aggregate-only', 'per-session-aggregate', { + hasToolCalls: false, + hasToolResultEvents: false, + }), + }), + ]; + const { result, stdout, stderr } = await captureStdio(() => + runWasteAttribution(args({ json: true }), turns, pricing, EMPTY_DEPS), + ); + assert.equal(result, 2); + assert.match(stderr, /No waste analysis was performed/); + const payload = JSON.parse(stdout); + assert.equal(payload.fidelity.refused, true); + assert.equal(payload.fidelity.analyzed, 0); + assert.equal(payload.fidelity.excluded, 1); + assert.ok(payload.fidelity.summary, 'summary present'); + assert.equal(payload.fidelity.summary.total, 1); + assert.equal(payload.fidelity.summary.byClass['aggregate-only'], 1); + assert.equal(payload.turnsAnalyzed, 0); + assert.match(payload.refusalReason, /No waste analysis was performed/); + }); + + it('does not refuse on a fully empty input (no turns at all)', async () => { + const pricing = await loadBuiltinPricing(); + const { result, stderr } = await captureStdio(() => + runWasteAttribution(args(), [], pricing, EMPTY_DEPS), + ); + assert.equal(result, 0, 'empty slice is not a refusal'); + assert.equal(stderr, ''); + }); +}); + +describe('runWasteAttribution — partial exclusion (#100)', () => { + it('analyzes only qualifying turns and prints "analyzed N of M" with the exclusion reason', async () => { + const pricing = await loadBuiltinPricing(); + const goodFidelity = fidelityWith('full', 'per-turn'); + const badFidelity = fidelityWith('partial', 'per-turn', { hasToolResultEvents: false }); + const turns: EnrichedTurn[] = [ + makeTurn({ + sessionId: 'good', + messageId: 'g1', + turnIndex: 0, + source: 'claude-code', + fidelity: goodFidelity, + }), + makeTurn({ + sessionId: 'good', + messageId: 'g2', + turnIndex: 1, + source: 'claude-code', + fidelity: goodFidelity, + }), + makeTurn({ + sessionId: 'bad', + messageId: 'b1', + turnIndex: 0, + source: 'codex', + fidelity: badFidelity, + }), + ]; + const { result, stdout, stderr } = await captureStdio(() => + runWasteAttribution(args(), turns, pricing, EMPTY_DEPS), + ); + assert.equal(result, 0); + assert.equal(stderr, ''); + assert.match(stdout, /turns analyzed: 2/); + assert.match(stdout, /analyzed 2 of 3 turns; 1 excluded for/); + assert.match(stdout, /missing tool-result events/); + assert.match(stdout, /\(codex\)/); + }); + + it('omits the coverage notice when nothing is excluded', async () => { + const pricing = await loadBuiltinPricing(); + const turns: EnrichedTurn[] = [ + makeTurn({ + sessionId: 's', + messageId: 'm', + turnIndex: 0, + source: 'claude-code', + fidelity: fidelityWith('full', 'per-turn'), + }), + ]; + const { result, stdout } = await captureStdio(() => + runWasteAttribution(args(), turns, pricing, EMPTY_DEPS), + ); + assert.equal(result, 0); + assert.doesNotMatch(stdout, /analyzed \d+ of \d+ turns; \d+ excluded/); + }); + + it('JSON mode includes a fidelity block with analyzed, excluded, summary, refused: false', async () => { + const pricing = await loadBuiltinPricing(); + const turns: EnrichedTurn[] = [ + makeTurn({ + sessionId: 'good', + messageId: 'g1', + turnIndex: 0, + source: 'claude-code', + fidelity: fidelityWith('full', 'per-turn'), + }), + makeTurn({ + sessionId: 'bad', + messageId: 'b1', + turnIndex: 0, + source: 'codex', + fidelity: fidelityWith('partial', 'per-turn', { hasToolResultEvents: false }), + }), + ]; + const { result, stdout } = await captureStdio(() => + runWasteAttribution(args({ json: true }), turns, pricing, EMPTY_DEPS), + ); + assert.equal(result, 0); + const payload = JSON.parse(stdout); + assert.equal(payload.fidelity.refused, false); + assert.equal(payload.fidelity.analyzed, 1); + assert.equal(payload.fidelity.excluded, 1); + assert.equal(payload.fidelity.summary.total, 2); + assert.equal(payload.fidelity.summary.byClass.full, 1); + assert.equal(payload.fidelity.summary.byClass.partial, 1); + assert.equal(payload.turnsAnalyzed, 1); + }); +}); + +describe('runPatternsMode — fidelity refusal (#100)', () => { + it('refuses with exit 2 when every turn is below every selected detector\'s prereq', async () => { + const pricing = await loadBuiltinPricing(); + const turns: EnrichedTurn[] = [ + makeTurn({ + sessionId: 's', + messageId: 'm0', + turnIndex: 0, + source: 'codex', + fidelity: fidelityWith('aggregate-only', 'per-session-aggregate', { + hasToolCalls: false, + hasToolResultEvents: false, + hasRawContent: false, + }), + }), + ]; + const selected = new Set(['retries', 'failures'] as const); + const { result, stdout, stderr } = await captureStdio(() => + runPatternsMode(args(), turns, pricing, [], selected), + ); + assert.equal(result, 2); + assert.equal(stdout, ''); + assert.match(stderr, /burn waste --patterns: no selected detectors can run/); + assert.match(stderr, /retries: 1\/1 turns lack tool-call records \+ tool-result events/); + assert.match(stderr, /failures: 1\/1 turns lack tool-call records \+ tool-result events/); + assert.match(stderr, /codex/); + }); + + it('JSON-mode refusal includes per-detector required prerequisites and refused=true', async () => { + const pricing = await loadBuiltinPricing(); + const turns: EnrichedTurn[] = [ + makeTurn({ + sessionId: 's', + messageId: 'm0', + turnIndex: 0, + source: 'codex', + fidelity: fidelityWith('aggregate-only', 'per-session-aggregate', { + hasToolCalls: false, + hasToolResultEvents: false, + }), + }), + ]; + const selected = new Set(['retries'] as const); + const { stdout } = await captureStdio(() => + runPatternsMode(args({ json: true }), turns, pricing, [], selected), + ); + const payload = JSON.parse(stdout); + assert.equal(payload.fidelity.refused, true); + assert.ok(Array.isArray(payload.fidelity.perDetector)); + const retries = payload.fidelity.perDetector.find( + (d: { kind: string }) => d.kind === 'retries', + ); + assert.ok(retries, 'retries detector reported'); + assert.deepEqual(retries.required.sort(), ['hasToolCalls', 'hasToolResultEvents']); + assert.equal(retries.refused, true); + assert.equal(retries.analyzed, 0); + assert.equal(retries.excluded, 1); + assert.ok(Array.isArray(retries.excludedBySource)); + assert.equal(retries.excludedBySource[0].source, 'codex'); + }); +}); + +describe('runPatternsMode — per-detector partial exclusion (#100)', () => { + it('names the missing coverage flag per detector when a source is excluded', async () => { + const pricing = await loadBuiltinPricing(); + // Three claude turns with full fidelity; two codex turns with partial + // fidelity (no tool-result events). Selecting --patterns retries,failures + // should analyze only the claude turns and emit a per-detector notice + // naming the missing prereq. + const turns: EnrichedTurn[] = []; + for (let i = 0; i < 3; i++) { + turns.push( + makeTurn({ + sessionId: 'good', + messageId: `g${i}`, + turnIndex: i, + source: 'claude-code', + fidelity: fidelityWith('full', 'per-turn'), + }), + ); + } + for (let i = 0; i < 2; i++) { + turns.push( + makeTurn({ + sessionId: 'bad', + messageId: `b${i}`, + turnIndex: i, + source: 'codex', + fidelity: fidelityWith('partial', 'per-turn', { hasToolResultEvents: false }), + }), + ); + } + const selected = new Set(['retries', 'failures'] as const); + const { result, stdout, stderr } = await captureStdio(() => + runPatternsMode(args(), turns, pricing, [], selected), + ); + assert.equal(result, 0); + assert.equal(stderr, ''); + // Per-detector lines should mention the missing prereq + source. + assert.match(stdout, /retries: analyzed 3 of 5 turns; 2 excluded \(needs tool-call records \+ tool-result events;/); + assert.match(stdout, /failures: analyzed 3 of 5 turns; 2 excluded \(needs tool-call records \+ tool-result events;/); + assert.match(stdout, /missing tool-result events/); + assert.match(stdout, /\(codex\)/); + }); + + it('JSON mode reports per-detector required + excludedBySource shape', async () => { + const pricing = await loadBuiltinPricing(); + const turns: EnrichedTurn[] = [ + makeTurn({ + sessionId: 'good', + messageId: 'g1', + turnIndex: 0, + source: 'claude-code', + fidelity: fidelityWith('full', 'per-turn'), + }), + makeTurn({ + sessionId: 'bad', + messageId: 'b1', + turnIndex: 0, + source: 'codex', + fidelity: fidelityWith('partial', 'per-turn', { hasToolResultEvents: false }), + }), + ]; + const selected = new Set(['retries', 'failures', 'reverts'] as const); + const { stdout } = await captureStdio(() => + runPatternsMode(args({ json: true }), turns, pricing, [], selected), + ); + const payload = JSON.parse(stdout); + assert.equal(payload.fidelity.refused, false); + const retries = payload.fidelity.perDetector.find( + (d: { kind: string }) => d.kind === 'retries', + ); + const reverts = payload.fidelity.perDetector.find( + (d: { kind: string }) => d.kind === 'reverts', + ); + assert.ok(retries && reverts); + assert.deepEqual(retries.required.sort(), ['hasToolCalls', 'hasToolResultEvents']); + assert.deepEqual(reverts.required.sort(), ['hasRawContent', 'hasToolCalls']); + // The codex turn here passes reverts (it has hasRawContent + hasToolCalls + // by default in fullCoverage()) but fails retries. + assert.equal(retries.excluded, 1); + assert.equal(retries.excludedBySource[0].source, 'codex'); + assert.deepEqual( + retries.excludedBySource[0].missingCoverage.sort(), + ['hasToolResultEvents'], + ); + }); + + it('compaction detector is independent of fidelity — runs against the full slice', async () => { + const pricing = await loadBuiltinPricing(); + // Even though every turn lacks tool coverage, selecting only `compaction` + // must not refuse — the compaction sidecar comes from the ledger directly. + const turns: EnrichedTurn[] = [ + makeTurn({ + sessionId: 's', + messageId: 'm0', + turnIndex: 0, + source: 'codex', + fidelity: fidelityWith('aggregate-only', 'per-session-aggregate', { + hasToolCalls: false, + hasToolResultEvents: false, + }), + }), + ]; + const selected = new Set(['compaction'] as const); + const { result, stdout, stderr } = await captureStdio(() => + runPatternsMode(args(), turns, pricing, [], selected), + ); + assert.equal(result, 0, 'compaction-only must not refuse on aggregate-only input'); + assert.equal(stderr, ''); + assert.doesNotMatch(stdout, /no selected detectors can run/); + }); +}); diff --git a/packages/cli/src/commands/waste.ts b/packages/cli/src/commands/waste.ts index ed6ac7b..2ad74d6 100644 --- a/packages/cli/src/commands/waste.ts +++ b/packages/cli/src/commands/waste.ts @@ -5,7 +5,9 @@ import { attributeWaste, detectPatterns, loadPricing, + summarizeFidelity, type BashAggregation, + type FidelitySummary, type FileAggregation, type PatternsResult, type SubagentAggregation, @@ -16,9 +18,15 @@ import { queryCompactions, queryUserTurns, readContent, + type EnrichedTurn, type Query, } from '@relayburn/ledger'; -import type { ContentRecord, UserTurnRecord } from '@relayburn/reader'; +import type { + ContentRecord, + Coverage, + SourceKind, + UserTurnRecord, +} from '@relayburn/reader'; import { ingestAll } from '../ingest.js'; import { formatInt, formatUsd, parseSinceArg, table } from '../format.js'; @@ -45,6 +53,108 @@ export function isAttributionDegraded( return evenSplit / result.sessionTotals.length >= threshold; } +// Coverage flags a turn must carry to participate in `attributeWaste` and the +// matching aggregators. A turn missing either flag has no chronology we can +// allocate cost against (no per-call records, or no result-side bytes to +// allocate the next-turn input delta over). Records without `fidelity` (older +// ledger writers, foreign sources) are treated as best-effort full per #41 — +// they pass the gate. +export const ATTRIBUTION_REQUIRED: ReadonlyArray = [ + 'hasToolCalls', + 'hasToolResultEvents', +]; + +// Returns `true` if the turn carries every coverage flag in `required`. +// Records without `fidelity` (older ledger writers, foreign sources) are +// treated as best-effort full per #41 — they pass regardless of `required`. +export function turnPassesCoverage( + turn: Pick, + required: ReadonlyArray, +): boolean { + const f = turn.fidelity; + if (!f) return true; + for (const key of required) { + if (!f.coverage[key]) return false; + } + return true; +} + +export interface CoverageGapBreakdown { + // sourceKind -> set of missing-coverage flags observed on excluded turns + // from that source. Used to render "codex (per-turn, missing tool-result + // events), opencode (per-session-aggregate)"-style messages without + // hand-rolling source-specific copy at every call site. + sources: Map; granularities: Set; count: number }>; +} + +export function describeExcluded( + excluded: ReadonlyArray>, + required: ReadonlyArray, +): CoverageGapBreakdown { + const sources = new Map; granularities: Set; count: number }>(); + for (const t of excluded) { + let row = sources.get(t.source); + if (!row) { + row = { missing: new Set(), granularities: new Set(), count: 0 }; + sources.set(t.source, row); + } + row.count++; + if (t.fidelity) { + row.granularities.add(t.fidelity.granularity); + for (const key of required) { + if (!t.fidelity.coverage[key]) row.missing.add(key); + } + } + } + return { sources }; +} + +export function fmtCoverageKey(key: keyof Coverage): string { + // `hasToolResultEvents` -> "tool-result events". Keeps the messaging + // talking about *what's missing* rather than parroting field names. + switch (key) { + case 'hasToolCalls': + return 'tool-call records'; + case 'hasToolResultEvents': + return 'tool-result events'; + case 'hasSessionRelationships': + return 'session relationships'; + case 'hasRawContent': + return 'raw content'; + case 'hasInputTokens': + return 'input tokens'; + case 'hasOutputTokens': + return 'output tokens'; + case 'hasReasoningTokens': + return 'reasoning tokens'; + case 'hasCacheReadTokens': + return 'cacheRead tokens'; + case 'hasCacheCreateTokens': + return 'cacheCreate tokens'; + } +} + +function renderSourceClause( + source: SourceKind, + row: { missing: Set; granularities: Set; count: number }, +): string { + const grans = [...row.granularities].sort(); + const missing = [...row.missing].map(fmtCoverageKey); + const parts: string[] = []; + if (grans.length > 0) parts.push(grans.join('+')); + if (missing.length > 0) parts.push(`missing ${missing.join(', ')}`); + if (parts.length === 0) return source; + return `${source} (${parts.join(', ')})`; +} + +export function renderSourcesClause(breakdown: CoverageGapBreakdown): string { + const rows: string[] = []; + for (const [source, row] of breakdown.sources) { + rows.push(renderSourceClause(source, row)); + } + return rows.join('; '); +} + export async function runWaste(args: ParsedArgs): Promise { const q: Query = {}; if (typeof args.flags['since'] === 'string') q.since = parseSinceArg(args.flags['since']); @@ -62,21 +172,93 @@ export async function runWaste(args: ParsedArgs): Promise { const compactions = selected.has('compaction') ? await queryCompactions(q) : []; - const patterns = detectPatterns(turns, { pricing, compactions }); - return renderPatterns(args, patterns, selected, turns.length); + return runPatternsMode(args, turns, pricing, compactions, selected); } - const sessionIds = new Set(turns.map((t) => t.sessionId)); + return runWasteAttribution(args, turns, pricing); +} + +// Exposed for tests so they can drive the orchestration with fixture turns +// and a mocked content/userTurns loader. Production callers go through +// `runWaste`, which fetches both via the ledger. +export interface WasteAttributionDeps { + loadContentForSession?: (sessionId: string) => Promise; + loadUserTurnsForSession?: (sessionId: string) => Promise; +} + +export async function runWasteAttribution( + args: ParsedArgs, + turns: EnrichedTurn[], + pricing: Awaited>, + deps: WasteAttributionDeps = {}, +): Promise { + const total = turns.length; + const eligible: EnrichedTurn[] = []; + const excluded: EnrichedTurn[] = []; + for (const t of turns) { + if (turnPassesCoverage(t, ATTRIBUTION_REQUIRED)) eligible.push(t); + else excluded.push(t); + } + + const fidelityAll = summarizeFidelity(turns); + + // Refusal: nothing to analyze. Exit non-zero with a message that names + // both the missing prerequisites and the source kinds responsible. This + // mirrors the "hard-fail with a clear message" wording from #41. + if (total > 0 && eligible.length === 0) { + const breakdown = describeExcluded(excluded, ATTRIBUTION_REQUIRED); + const sourcesClause = renderSourcesClause(breakdown); + const message = + `burn waste: ${total}/${total} turns lack tool-call/tool-result coverage required for waste attribution. ` + + `Sources: ${sourcesClause}. No waste analysis was performed.`; + if (args.flags['json'] === true) { + process.stdout.write( + JSON.stringify( + { + turnsAnalyzed: 0, + grandTotal: 0, + attributedTotal: 0, + unattributedTotal: 0, + attributionDegraded: false, + sessions: [], + files: [], + bash: [], + subagents: [], + fidelity: { + analyzed: 0, + excluded: total, + summary: fidelityAll, + refused: true, + }, + refusalReason: message, + }, + null, + 2, + ) + '\n', + ); + } + process.stderr.write(message + '\n'); + return 2; + } + + const loadContent = + deps.loadContentForSession ?? + ((sessionId: string) => readContent({ sessionId })); + const loadUserTurns = + deps.loadUserTurnsForSession ?? + ((sessionId: string) => queryUserTurns({ sessionId })); + + const sessionIds = new Set(eligible.map((t) => t.sessionId)); const contentBySession = new Map(); const userTurnsBySession = new Map(); for (const sessionId of sessionIds) { - const records = await readContent({ sessionId }); + const records = await loadContent(sessionId); if (records.length > 0) contentBySession.set(sessionId, records); - const userTurns = await queryUserTurns({ sessionId }); + const userTurns = await loadUserTurns(sessionId); if (userTurns.length > 0) userTurnsBySession.set(sessionId, userTurns); } - const result = attributeWaste(turns, { + const result = attributeWaste(eligible, { pricing, contentBySession, userTurnsBySession, @@ -86,11 +268,16 @@ export async function runWaste(args: ParsedArgs): Promise { const subagents = aggregateBySubagent(result.attributions); const degraded = isAttributionDegraded(result); + const coverageNotice = + excluded.length > 0 + ? formatCoverageNotice(eligible.length, total, describeExcluded(excluded, ATTRIBUTION_REQUIRED)) + : undefined; + if (args.flags['json'] === true) { process.stdout.write( JSON.stringify( { - turnsAnalyzed: turns.length, + turnsAnalyzed: eligible.length, grandTotal: result.grandTotal, attributedTotal: result.attributedTotal, unattributedTotal: result.unattributedTotal, @@ -99,6 +286,12 @@ export async function runWaste(args: ParsedArgs): Promise { files, bash: bashes, subagents, + fidelity: { + analyzed: eligible.length, + excluded: excluded.length, + summary: fidelityAll, + refused: false, + }, }, null, 2, @@ -110,20 +303,48 @@ export async function runWaste(args: ParsedArgs): Promise { const showAll = args.flags['all'] === true; const limit = showAll ? Number.POSITIVE_INFINITY : DEFAULT_TOP_N; - process.stdout.write( - formatWasteReport({ - turnsAnalyzed: turns.length, - result, - files, - bashes, - subagents, - limit, - degraded, - }), - ); + const reportInput: FormatWasteReportInput = { + turnsAnalyzed: eligible.length, + result, + files, + bashes, + subagents, + limit, + degraded, + }; + if (coverageNotice !== undefined) reportInput.coverageNotice = coverageNotice; + process.stdout.write(formatWasteReport(reportInput)); return 0; } +// Render each source with its own missing-fields clause, since one source +// might be missing tool-result events and another might be a session +// aggregate. Joining with " and " reads naturally for ≤ 2 sources and +// doesn't get too clumsy beyond that. +function renderInlineSourceClauses(breakdown: CoverageGapBreakdown): string[] { + const out: string[] = []; + for (const [source, row] of breakdown.sources) { + const grans = [...row.granularities].sort(); + const missing = [...row.missing].map(fmtCoverageKey); + const inner: string[] = []; + if (missing.length > 0) inner.push(`missing ${missing.join(', ')}`); + if (grans.length > 0) inner.push(`${grans.join('+')} granularity`); + if (inner.length === 0) out.push(source); + else out.push(`${inner.join(', ')} (${source})`); + } + return out; +} + +export function formatCoverageNotice( + analyzed: number, + total: number, + breakdown: CoverageGapBreakdown, +): string { + const excluded = total - analyzed; + const sourceClauses = renderInlineSourceClauses(breakdown); + return `analyzed ${formatInt(analyzed)} of ${formatInt(total)} turns; ${formatInt(excluded)} excluded for ${sourceClauses.join(' and ')}`; +} + interface FormatWasteReportInput { turnsAnalyzed: number; result: WasteResult; @@ -132,10 +353,11 @@ interface FormatWasteReportInput { subagents: SubagentAggregation[]; limit: number; degraded: boolean; + coverageNotice?: string; } export function formatWasteReport(input: FormatWasteReportInput): string { - const { turnsAnalyzed, result, files, bashes, subagents, limit, degraded } = input; + const { turnsAnalyzed, result, files, bashes, subagents, limit, degraded, coverageNotice } = input; const evenSplitSessions = result.sessionTotals.filter( (s) => s.attributionMethod === 'even-split', ); @@ -143,6 +365,7 @@ export function formatWasteReport(input: FormatWasteReportInput): string { const out: string[] = []; out.push(''); out.push(`turns analyzed: ${formatInt(turnsAnalyzed)}`); + if (coverageNotice) out.push(coverageNotice); out.push(`session grand total: ${formatUsd(result.grandTotal)}`); if (degraded) { @@ -274,7 +497,7 @@ function truncate(s: string, n: number): string { return s.slice(0, n - 1) + '…'; } -function resolvePatternSelection(flag: string | true): Set { +export function resolvePatternSelection(flag: string | true): Set { if (flag === true) return new Set(PATTERN_KINDS); const set = new Set(); for (const raw of flag.split(',').map((s) => s.trim()).filter(Boolean)) { @@ -290,27 +513,192 @@ function resolvePatternSelection(flag: string | true): Set { return set; } -function renderPatterns( +// Per-detector coverage prerequisites. `compaction` is intentionally absent — +// the compaction sidecar is loaded directly from the ledger via +// `queryCompactions` and is independent of `TurnRecord.fidelity`. +// +// The revert detector needs editPreHash / editPostHash, which require +// hasRawContent upstream (the parser computes the hashes from the raw +// strings). hasToolCalls is the obvious prereq. +export const PATTERN_REQUIRED: Record< + Exclude, + ReadonlyArray +> = { + retries: ['hasToolCalls', 'hasToolResultEvents'], + failures: ['hasToolCalls', 'hasToolResultEvents'], + reverts: ['hasToolCalls', 'hasRawContent'], +}; + +interface PatternDetectorCoverage { + kind: PatternKind; + analyzed: number; + excluded: number; + // Only set when this detector required coverage (compaction never does). + breakdown?: CoverageGapBreakdown; + // Whether the detector ran on any turns at all. + refused: boolean; +} + +export async function runPatternsMode( args: ParsedArgs, - patterns: PatternsResult, + turns: EnrichedTurn[], + pricing: Awaited>, + compactions: Awaited>, selected: Set, - turnsAnalyzed: number, -): number { - const retryLoops = selected.has('retries') ? patterns.retryLoops : []; - const failureRuns = selected.has('failures') ? patterns.failureRuns : []; - const compactions = selected.has('compaction') ? patterns.compactions : []; - const editReverts = selected.has('reverts') ? patterns.editReverts : []; +): Promise { + const total = turns.length; + const fidelityAll = summarizeFidelity(turns); + + // Per-detector filtered slices. `compaction` always runs on the full slice + // because its data path (the sidecar) doesn't go through TurnRecord at all. + const perDetector = new Map(); + const perDetectorCoverage: PatternDetectorCoverage[] = []; + + for (const kind of selected) { + if (kind === 'compaction') { + perDetector.set(kind, turns); + perDetectorCoverage.push({ + kind, + analyzed: total, + excluded: 0, + refused: false, + }); + continue; + } + const required = PATTERN_REQUIRED[kind]; + const eligible: EnrichedTurn[] = []; + const excluded: EnrichedTurn[] = []; + for (const t of turns) { + if (turnPassesCoverage(t, required)) eligible.push(t); + else excluded.push(t); + } + perDetector.set(kind, eligible); + const coverage: PatternDetectorCoverage = { + kind, + analyzed: eligible.length, + excluded: excluded.length, + refused: total > 0 && eligible.length === 0, + }; + if (excluded.length > 0) { + coverage.breakdown = describeExcluded(excluded, required); + } + perDetectorCoverage.push(coverage); + } + + // Refusal: every selected detector that has a coverage prereq refused. If + // the only thing the user asked for was compaction (always allowed), we + // never refuse. Mixed sets where at least one detector can run continue + // with partial output and per-detector notices. + const refusableSelected = perDetectorCoverage.filter( + (d) => d.kind !== 'compaction', + ); + const allRefused = + refusableSelected.length > 0 && refusableSelected.every((d) => d.refused); + + if (allRefused) { + const lines: string[] = []; + for (const d of refusableSelected) { + const required = PATTERN_REQUIRED[d.kind as Exclude]; + const sourcesClause = d.breakdown ? renderSourcesClause(d.breakdown) : '(unknown sources)'; + lines.push( + ` ${d.kind}: ${total}/${total} turns lack ${required.map(fmtCoverageKey).join(' + ')} (sources: ${sourcesClause})`, + ); + } + const message = + `burn waste --patterns: no selected detectors can run on this slice.\n` + + lines.join('\n') + + `\nNo pattern analysis was performed.`; + + if (args.flags['json'] === true) { + process.stdout.write( + JSON.stringify( + { + turnsAnalyzed: 0, + retryLoops: [], + failureRuns: [], + compactions: [], + editReverts: [], + sessionSummaries: [], + fidelity: { + analyzed: 0, + excluded: total, + summary: fidelityAll, + refused: true, + perDetector: perDetectorCoverage.map(toJsonDetector), + }, + refusalReason: message, + }, + null, + 2, + ) + '\n', + ); + } + process.stderr.write(message + '\n'); + return 2; + } + + // Run each enabled detector on its own filtered slice. + let retryLoops: PatternsResult['retryLoops'] = []; + let failureRuns: PatternsResult['failureRuns'] = []; + let compactionLosses: PatternsResult['compactions'] = []; + let editReverts: PatternsResult['editReverts'] = []; + let sessionSummaries: PatternsResult['sessionSummaries'] = []; + + if (selected.has('retries')) { + const r = detectPatterns(perDetector.get('retries')!, { pricing }); + retryLoops = r.retryLoops; + } + if (selected.has('failures')) { + const r = detectPatterns(perDetector.get('failures')!, { pricing }); + failureRuns = r.failureRuns; + } + if (selected.has('compaction')) { + const r = detectPatterns(perDetector.get('compaction')!, { pricing, compactions }); + compactionLosses = r.compactions; + } + if (selected.has('reverts')) { + const r = detectPatterns(perDetector.get('reverts')!, { pricing }); + editReverts = r.editReverts; + } + + // Build session summaries on the union — anything attributed by *any* + // detector counts. Re-running detectPatterns on a single union slice + // doesn't work because each detector has its own coverage threshold; instead + // synthesize the summary from the per-detector results. + sessionSummaries = buildSessionSummaries( + retryLoops, + failureRuns, + compactionLosses, + editReverts, + ); + + // For the "turns analyzed" headline we report the union of analyzed slices — + // a turn that survived any detector counts. + const analyzedUnion = new Set(); + for (const d of perDetectorCoverage) { + if (d.kind === 'compaction') continue; + const slice = perDetector.get(d.kind)!; + for (const t of slice) analyzedUnion.add(`${t.sessionId}|${t.messageId}`); + } + const analyzedCount = analyzedUnion.size; if (args.flags['json'] === true) { process.stdout.write( JSON.stringify( { - turnsAnalyzed, + turnsAnalyzed: analyzedCount, retryLoops, failureRuns, - compactions, + compactions: compactionLosses, editReverts, - sessionSummaries: patterns.sessionSummaries, + sessionSummaries, + fidelity: { + analyzed: analyzedCount, + excluded: total - analyzedCount, + summary: fidelityAll, + refused: false, + perDetector: perDetectorCoverage.map(toJsonDetector), + }, }, null, 2, @@ -324,10 +712,14 @@ function renderPatterns( const out: string[] = []; out.push(''); - out.push(`turns analyzed: ${formatInt(turnsAnalyzed)}`); + out.push(`turns analyzed: ${formatInt(analyzedCount)}`); + for (const d of perDetectorCoverage) { + const notice = formatPerDetectorNotice(d, total); + if (notice) out.push(notice); + } out.push( - `sessions with patterns: ${formatInt(patterns.sessionSummaries.length)} / total pattern cost: ${formatUsd( - patterns.sessionSummaries.reduce((s, r) => s + r.totalPatternCost, 0), + `sessions with patterns: ${formatInt(sessionSummaries.length)} / total pattern cost: ${formatUsd( + sessionSummaries.reduce((s, r) => s + r.totalPatternCost, 0), )}`, ); out.push(''); @@ -344,7 +736,7 @@ function renderPatterns( } if (selected.has('compaction')) { out.push('Compaction-loss events'); - out.push(renderCompactionTable(compactions, limit)); + out.push(renderCompactionTable(compactionLosses, limit)); out.push(''); } if (selected.has('reverts')) { @@ -357,6 +749,100 @@ function renderPatterns( return 0; } +function toJsonDetector(d: PatternDetectorCoverage): { + kind: PatternKind; + analyzed: number; + excluded: number; + refused: boolean; + required: ReadonlyArray; + excludedBySource?: Array<{ + source: SourceKind; + count: number; + granularities: string[]; + missingCoverage: Array; + }>; +} { + const required: ReadonlyArray = + d.kind === 'compaction' ? [] : PATTERN_REQUIRED[d.kind]; + const out: ReturnType = { + kind: d.kind, + analyzed: d.analyzed, + excluded: d.excluded, + refused: d.refused, + required, + }; + if (d.breakdown && d.breakdown.sources.size > 0) { + out.excludedBySource = [...d.breakdown.sources].map(([source, row]) => ({ + source, + count: row.count, + granularities: [...row.granularities].sort(), + missingCoverage: [...row.missing], + })); + } + return out; +} + +function formatPerDetectorNotice( + d: PatternDetectorCoverage, + total: number, +): string | undefined { + if (d.excluded === 0) return undefined; + if (d.kind === 'compaction') return undefined; + const required = PATTERN_REQUIRED[d.kind as Exclude]; + const sourceClauses = d.breakdown ? renderInlineSourceClauses(d.breakdown) : []; + const requirements = required.map(fmtCoverageKey).join(' + '); + return `${d.kind}: analyzed ${formatInt(d.analyzed)} of ${formatInt(total)} turns; ${formatInt(d.excluded)} excluded (needs ${requirements}; ${sourceClauses.join(' and ') || 'no source breakdown'})`; +} + +function buildSessionSummaries( + retryLoops: PatternsResult['retryLoops'], + failureRuns: PatternsResult['failureRuns'], + compactions: PatternsResult['compactions'], + editReverts: PatternsResult['editReverts'], +): PatternsResult['sessionSummaries'] { + const by = new Map(); + const get = (sessionId: string): PatternsResult['sessionSummaries'][number] => { + let row = by.get(sessionId); + if (!row) { + row = { + sessionId, + retryLoopCount: 0, + failureRunCount: 0, + consecutiveFailureMax: 0, + compactionCount: 0, + editRevertCount: 0, + totalRetries: 0, + totalPatternCost: 0, + }; + by.set(sessionId, row); + } + return row; + }; + for (const r of retryLoops) { + const row = get(r.sessionId); + row.retryLoopCount++; + row.totalRetries += r.attempts; + row.totalPatternCost += r.cost; + } + for (const f of failureRuns) { + const row = get(f.sessionId); + row.failureRunCount++; + if (f.length > row.consecutiveFailureMax) row.consecutiveFailureMax = f.length; + row.totalPatternCost += f.cost; + } + for (const c of compactions) { + const row = get(c.sessionId); + row.compactionCount++; + row.totalPatternCost += c.cacheLostCost; + } + for (const e of editReverts) { + const row = get(e.sessionId); + row.editRevertCount++; + row.totalPatternCost += e.cost; + } + return [...by.values()].sort((a, b) => b.totalPatternCost - a.totalPatternCost); +} + function renderRetryTable(loops: PatternsResult['retryLoops'], limit: number): string { if (loops.length === 0) return ' (none)'; const rows: string[][] = [ From be0270e568d4e49e16dafbe8dc00bc54bee58dc9 Mon Sep 17 00:00:00 2001 From: Will Washburn Date: Mon, 27 Apr 2026 10:18:55 -0700 Subject: [PATCH 2/3] Fix Devin Review: include compaction in analyzed-union for top-level counts The compaction detector has no fidelity prereq and runs against every turn in the slice, but the analyzed-union was skipping it. Result: with --patterns compaction (alone or alongside fidelity-gated detectors), the top-level turnsAnalyzed reported 0 and fidelity.excluded reported total, contradicting the per-detector entry that correctly recorded { analyzed: total, excluded: 0 }. Drop the compaction skip so its slice (always the full turns array) participates in the union. Add two regression tests: compaction-only asserts turnsAnalyzed/fidelity.{analyzed,excluded} match the slice; mixed retries+compaction asserts the union credits compaction-only turns so excluded = 0 even when retries excluded some turns. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/cli/src/commands/waste.test.ts | 90 +++++++++++++++++++++++++ packages/cli/src/commands/waste.ts | 5 +- 2 files changed, 93 insertions(+), 2 deletions(-) diff --git a/packages/cli/src/commands/waste.test.ts b/packages/cli/src/commands/waste.test.ts index f3a5ed4..602c41b 100644 --- a/packages/cli/src/commands/waste.test.ts +++ b/packages/cli/src/commands/waste.test.ts @@ -786,4 +786,94 @@ describe('runPatternsMode — per-detector partial exclusion (#100)', () => { assert.equal(stderr, ''); assert.doesNotMatch(stdout, /no selected detectors can run/); }); + + it('compaction-only counts every turn as analyzed (top-level + JSON fidelity)', async () => { + const pricing = await loadBuiltinPricing(); + // Regression for the case where --patterns compaction reported + // turnsAnalyzed: 0 / fidelity.excluded: total because the analyzed-union + // skipped the compaction slice. Compaction has no fidelity prereq, so + // every turn is "analyzed" by it. + const turns: EnrichedTurn[] = []; + for (let i = 0; i < 3; i++) { + turns.push( + makeTurn({ + sessionId: 's', + messageId: `m${i}`, + turnIndex: i, + source: 'codex', + fidelity: fidelityWith('aggregate-only', 'per-session-aggregate', { + hasToolCalls: false, + hasToolResultEvents: false, + }), + }), + ); + } + const selected = new Set(['compaction'] as const); + const { stdout } = await captureStdio(() => + runPatternsMode(args({ json: true }), turns, pricing, [], selected), + ); + const payload = JSON.parse(stdout); + assert.equal(payload.turnsAnalyzed, 3); + assert.equal(payload.fidelity.analyzed, 3); + assert.equal(payload.fidelity.excluded, 0); + assert.equal(payload.fidelity.refused, false); + const compaction = payload.fidelity.perDetector.find( + (d: { kind: string }) => d.kind === 'compaction', + ); + assert.ok(compaction); + assert.equal(compaction.analyzed, 3); + assert.equal(compaction.excluded, 0); + }); + + it('mixed compaction + retries union credits compaction-only turns', async () => { + const pricing = await loadBuiltinPricing(); + // Two full-fidelity turns (analyzable by retries) + three aggregate-only + // turns (only compaction can analyze). The union should be all 5 turns; + // fidelity.excluded must be 0 because every turn was analyzed by at + // least one detector. + const turns: EnrichedTurn[] = []; + for (let i = 0; i < 2; i++) { + turns.push( + makeTurn({ + sessionId: 'good', + messageId: `g${i}`, + turnIndex: i, + source: 'claude-code', + fidelity: fidelityWith('full', 'per-turn'), + }), + ); + } + for (let i = 0; i < 3; i++) { + turns.push( + makeTurn({ + sessionId: 'bad', + messageId: `b${i}`, + turnIndex: i, + source: 'codex', + fidelity: fidelityWith('aggregate-only', 'per-session-aggregate', { + hasToolCalls: false, + hasToolResultEvents: false, + }), + }), + ); + } + const selected = new Set(['retries', 'compaction'] as const); + const { stdout } = await captureStdio(() => + runPatternsMode(args({ json: true }), turns, pricing, [], selected), + ); + const payload = JSON.parse(stdout); + assert.equal(payload.turnsAnalyzed, 5); + assert.equal(payload.fidelity.analyzed, 5); + assert.equal(payload.fidelity.excluded, 0); + const retries = payload.fidelity.perDetector.find( + (d: { kind: string }) => d.kind === 'retries', + ); + assert.equal(retries.analyzed, 2); + assert.equal(retries.excluded, 3); + const compaction = payload.fidelity.perDetector.find( + (d: { kind: string }) => d.kind === 'compaction', + ); + assert.equal(compaction.analyzed, 5); + assert.equal(compaction.excluded, 0); + }); }); diff --git a/packages/cli/src/commands/waste.ts b/packages/cli/src/commands/waste.ts index b15eef9..9fdf66d 100644 --- a/packages/cli/src/commands/waste.ts +++ b/packages/cli/src/commands/waste.ts @@ -680,10 +680,11 @@ export async function runPatternsMode( ); // For the "turns analyzed" headline we report the union of analyzed slices — - // a turn that survived any detector counts. + // a turn that survived any detector counts. Compaction has no fidelity + // prereq and runs on the full slice, so every turn is "analyzed" by it + // whenever it's selected. const analyzedUnion = new Set(); for (const d of perDetectorCoverage) { - if (d.kind === 'compaction') continue; const slice = perDetector.get(d.kind)!; for (const t of slice) analyzedUnion.add(`${t.sessionId}|${t.messageId}`); } From bed87eefd173a4c9b85fb282987ba0cc3e94278d Mon Sep 17 00:00:00 2001 From: Will Washburn Date: Mon, 27 Apr 2026 10:56:04 -0700 Subject: [PATCH 3/3] Fix Devin Review: don't refuse mixed selections when compaction can run; CHANGELOG to [Unreleased] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two findings from the second Devin pass: 1. waste.ts:599-603 — `--patterns retries,compaction` against an aggregate-only slice was refusing the entire run, dropping the compaction signal even though compaction has no fidelity prereq and could run. The `refusableSelected` filter (which excludes compaction) drove `allRefused`, so a fully-refused retries was enough to trip it. Switch the gate to `perDetectorCoverage.every(d => d.refused)` so compaction's `refused: false` short-circuits the refusal whenever it's in the selection. Add a regression test asserting result=0, fidelity.refused=false, compaction.analyzed=total when retries is fully refused. 2. packages/cli/CHANGELOG.md — the #100 entry was appended under [0.27.0] (already released). Move it under [Unreleased] ### Added alongside the #108 plans-fidelity entry. Per-AGENTS.md, only [Unreleased] gets new entries pre-publish. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/cli/CHANGELOG.md | 2 +- packages/cli/src/commands/waste.test.ts | 41 +++++++++++++++++++++++++ packages/cli/src/commands/waste.ts | 11 ++++--- 3 files changed, 48 insertions(+), 6 deletions(-) diff --git a/packages/cli/CHANGELOG.md b/packages/cli/CHANGELOG.md index 9ea4b2c..4dfa704 100644 --- a/packages/cli/CHANGELOG.md +++ b/packages/cli/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - **`burn plans` honors per-cycle fidelity** ([#108](https://github.com/AgentWorkforce/burn/issues/108)). The list view continues to render every plan even when the cycle slice contains `partial` / `aggregate-only` / `cost-only` turns (no fidelity-based filter — `plans`, like `limits`, is permissive), but now flags low-confidence cycles so a "looks under budget" plan isn't read as authoritative. The text table grows a `confidence` column when at least one plan has any contributing turn missing per-turn input/output token data, marked `low (partial token data)`, and a footer note names the affected plan + lower-bound caveat (e.g. `note: claude-pro: 3 of 412 turns this cycle lack per-turn token data — totals are a lower bound.`). Full-fidelity cycles render exactly as before — no extra column, no footer. `--json` gains a per-plan `usage.fidelity: { confidence, summary }` block carrying the same `FidelitySummary` shape the analyze package emits elsewhere, so machine consumers can render exact counts without re-walking the ledger. `cost-only` source contributions count toward `spentUsd` and mark the cycle low-confidence on the token-coverage axis. +- **`burn waste` honors fidelity** ([#100](https://github.com/AgentWorkforce/burn/issues/100)). The attribution path (and the `--patterns retries|failures|reverts` detectors) now hard-filters the input slice against the coverage flags each detector requires — `attributeWaste` / `aggregateBy*` need `hasToolCalls` + `hasToolResultEvents`; `reverts` additionally needs `hasRawContent` (for `editPreHash` / `editPostHash`); `compaction` is unchanged because its sidecar is independent of `TurnRecord.fidelity`. When *all* turns fall below the prereq, `burn waste` exits non-zero with a message naming the missing prerequisite and the source kinds responsible (`burn waste: 142/142 turns lack tool-call/tool-result coverage required for waste attribution. Sources: codex (per-session-aggregate, missing tool-call records, tool-result events). No waste analysis was performed.`). When *some* turns survive, the text and JSON output gain an "analyzed N of M" coverage notice that names the gap per source. `--json` now carries a `fidelity` block (`{ analyzed, excluded, summary, refused }`) mirroring `summary --json`; `--patterns` JSON additionally exposes a `perDetector` array with each detector's `required` flags and `excludedBySource` breakdown. When `compaction` is in the selection it always runs — its sidecar has no per-turn fidelity requirement — so `--patterns retries,compaction` against an aggregate-only slice produces partial output rather than refusing. ### Changed @@ -48,7 +49,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - **Persist user-turn block-size records during ingest** (#2). `burn ingest`, passive ingest, and the Claude/Codex/OpenCode wrappers now append parser-emitted `UserTurnRecord`s for all three harnesses. Codex passive cursors also carry the in-flight user-turn slot so resumed ingest can complete a bridge record across file-growth boundaries. `burn waste` and `burn diagnose` load these records and use them as the sized fallback when content sidecars are missing. -- **`burn waste` honors fidelity** ([#100](https://github.com/AgentWorkforce/burn/issues/100)). The attribution path (and the `--patterns retries|failures|reverts` detectors) now hard-filters the input slice against the coverage flags each detector requires — `attributeWaste` / `aggregateBy*` need `hasToolCalls` + `hasToolResultEvents`; `reverts` additionally needs `hasRawContent` (for `editPreHash` / `editPostHash`); `compaction` is unchanged because its sidecar is independent of `TurnRecord.fidelity`. When *all* turns fall below the prereq, `burn waste` exits non-zero with a message naming the missing prerequisite and the source kinds responsible (`burn waste: 142/142 turns lack tool-call/tool-result coverage required for waste attribution. Sources: codex (per-session-aggregate, missing tool-call records, tool-result events). No waste analysis was performed.`). When *some* turns survive, the text and JSON output gain an "analyzed N of M" coverage notice that names the gap per source. `--json` now carries a `fidelity` block (`{ analyzed, excluded, summary, refused }`) mirroring `summary --json`; `--patterns` JSON additionally exposes a `perDetector` array with each detector's `required` flags and `excludedBySource` breakdown. ## [0.26.0] - 2026-04-26 diff --git a/packages/cli/src/commands/waste.test.ts b/packages/cli/src/commands/waste.test.ts index 602c41b..8bf4068 100644 --- a/packages/cli/src/commands/waste.test.ts +++ b/packages/cli/src/commands/waste.test.ts @@ -876,4 +876,45 @@ describe('runPatternsMode — per-detector partial exclusion (#100)', () => { assert.equal(compaction.analyzed, 5); assert.equal(compaction.excluded, 0); }); + + it('mixed retries+compaction does NOT refuse when only retries lacks coverage', async () => { + const pricing = await loadBuiltinPricing(); + // Every turn is aggregate-only — retries must refuse, but compaction + // has no fidelity prereq and should still run. Refusing the whole + // command in this case would silently drop the compaction signal. + const turns: EnrichedTurn[] = []; + for (let i = 0; i < 3; i++) { + turns.push( + makeTurn({ + sessionId: 's', + messageId: `m${i}`, + turnIndex: i, + source: 'codex', + fidelity: fidelityWith('aggregate-only', 'per-session-aggregate', { + hasToolCalls: false, + hasToolResultEvents: false, + }), + }), + ); + } + const selected = new Set(['retries', 'compaction'] as const); + const { result, stdout, stderr } = await captureStdio(() => + runPatternsMode(args({ json: true }), turns, pricing, [], selected), + ); + assert.equal(result, 0, 'must not refuse — compaction can still run'); + assert.equal(stderr, ''); + const payload = JSON.parse(stdout); + assert.equal(payload.fidelity.refused, false); + assert.equal(payload.turnsAnalyzed, 3); + const retries = payload.fidelity.perDetector.find( + (d: { kind: string }) => d.kind === 'retries', + ); + assert.equal(retries.refused, true); + assert.equal(retries.analyzed, 0); + const compaction = payload.fidelity.perDetector.find( + (d: { kind: string }) => d.kind === 'compaction', + ); + assert.equal(compaction.refused, false); + assert.equal(compaction.analyzed, 3); + }); }); diff --git a/packages/cli/src/commands/waste.ts b/packages/cli/src/commands/waste.ts index 9fdf66d..d470c70 100644 --- a/packages/cli/src/commands/waste.ts +++ b/packages/cli/src/commands/waste.ts @@ -592,15 +592,16 @@ export async function runPatternsMode( perDetectorCoverage.push(coverage); } - // Refusal: every selected detector that has a coverage prereq refused. If - // the only thing the user asked for was compaction (always allowed), we - // never refuse. Mixed sets where at least one detector can run continue - // with partial output and per-detector notices. + // Refusal: every selected detector refused. Compaction has no fidelity + // prereq and is recorded with refused:false unconditionally, so its + // presence in `selected` short-circuits this — we only refuse when the + // entire selection is fidelity-gated and every detector lost its slice. const refusableSelected = perDetectorCoverage.filter( (d) => d.kind !== 'compaction', ); const allRefused = - refusableSelected.length > 0 && refusableSelected.every((d) => d.refused); + perDetectorCoverage.length > 0 && + perDetectorCoverage.every((d) => d.refused); if (allRefused) { const lines: string[] = [];