From e732ca41725d4d7269d9de765eca1b89242edc2c Mon Sep 17 00:00:00 2001
From: Will Washburn <will.washburn@gmail.com>
Date: Sun, 26 Apr 2026 15:33:38 -0400
Subject: [PATCH 1/5] Honor fidelity in burn plans (#108)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`computePlanUsage` now annotates each cycle with a `fidelity:
{ confidence, summary }` block computed over its contributing turns.
`confidence === 'high'` only when every turn is `full` or `usage-only`
with both per-turn input and output token coverage; otherwise `low`.
Records without a `fidelity` field stay best-effort high (matches the
codebase's existing backward-compat policy). Spend totals continue to
include `partial` / `aggregate-only` / `cost-only` contributions —
under-counting silently is worse than annotating low-confidence — so
the cycle's `spentUsd` is the lower bound the consumer renders against
the new flag.

`burn plans` (list view) renders a `confidence` column and a footer
note (e.g. `note: claude-pro: 3 of 412 turns this cycle lack per-turn
token data — totals are a lower bound.`) when at least one plan has
any low-confidence cycle. Full-fidelity cycles render exactly as
before. `--json` gains a per-plan `usage.fidelity` block.

`PlanUsageFidelity` is exported from `@relayburn/analyze`. The
`limits.test.ts` mocks now include `fidelity` because `PlanUsage`
gained a required field.

Tests cover the high/low/cost-only/partial cycle paths in analyze,
and the rendered-note + JSON shape in cli.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 packages/analyze/CHANGELOG.md            |   1 +
 packages/analyze/src/index.ts            |   1 +
 packages/analyze/src/plan-usage.test.ts  | 160 ++++++++++++++++++-
 packages/analyze/src/plan-usage.ts       |  60 +++++++
 packages/cli/CHANGELOG.md                |   4 +
 packages/cli/src/commands/limits.test.ts |   5 +
 packages/cli/src/commands/plans.test.ts  | 189 ++++++++++++++++++++++-
 packages/cli/src/commands/plans.ts       |  71 ++++++++-
 8 files changed, 483 insertions(+), 8 deletions(-)

diff --git a/packages/analyze/CHANGELOG.md b/packages/analyze/CHANGELOG.md
index 11bafe7..ff0189b 100644
--- a/packages/analyze/CHANGELOG.md
+++ b/packages/analyze/CHANGELOG.md
@@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Added
 
 - **`compareFromArchive(query, opts)`** ([#88](https://github.com/AgentWorkforce/burn/issues/88)). New helper that builds a `CompareTable` directly from `archive.sqlite` via a single grouped `SELECT … GROUP BY model, activity, source` plus a tiny per-(model, activity) follow-up for median retries, instead of streaming every `EnrichedTurn` through `buildCompareTable` in memory. Returns `{ table, analyzedTurns }` so the caller can populate the same "turns analyzed" header the legacy path uses. Output is byte-identical to `buildCompareTable(await queryAll(q), opts)` for the parity fixture; per-source reasoning-mode handling (Codex's `included_in_output`) is preserved by grouping on `source` alongside `(model, activity)`. Powers the migration of `burn compare` to the archive read model.
+- **`PlanUsage.fidelity` annotates per-cycle token-coverage confidence** ([#108](https://github.com/AgentWorkforce/burn/issues/108)). `computePlanUsage` now walks every contributing turn through `summarizeFidelity` and emits a `{ confidence: 'high' | 'low', summary }` block alongside the existing spend/projection fields. `confidence === 'high'` only when every turn in the cycle is `full` or `usage-only` with both per-turn input and output token coverage; otherwise `low`. Records with no `fidelity` field at all (older ledger writers) are treated as best-effort high, matching the codebase's existing backward-compat policy. Spend totals continue to include `partial` / `aggregate-only` / `cost-only` contributions — under-counting is worse than annotating low-confidence — so the cycle's `spentUsd` is the lower bound the consumer renders against the new flag. The `PlanUsageFidelity` type is exported for downstream consumers.
 
 ## [0.27.0] - 2026-04-26
 
diff --git a/packages/analyze/src/index.ts b/packages/analyze/src/index.ts
index 79ea502..878df8d 100644
--- a/packages/analyze/src/index.ts
+++ b/packages/analyze/src/index.ts
@@ -78,6 +78,7 @@ export type {
   ComputePlanUsageFromArchiveOptions,
   ComputePlanUsageOptions,
   PlanUsage,
+  PlanUsageFidelity,
 } from './plan-usage.js';
 export {
   emptyFidelitySummary,
diff --git a/packages/analyze/src/plan-usage.test.ts b/packages/analyze/src/plan-usage.test.ts
index d93c808..b4c834a 100644
--- a/packages/analyze/src/plan-usage.test.ts
+++ b/packages/analyze/src/plan-usage.test.ts
@@ -3,11 +3,38 @@ import { DatabaseSync } from 'node:sqlite';
 import { describe, it } from 'node:test';
 
 import type { Plan } from '@relayburn/ledger';
-import type { SourceKind, TurnRecord } from '@relayburn/reader';
+import { EMPTY_COVERAGE, makeFidelity } from '@relayburn/reader';
+import type { Fidelity, SourceKind, TurnRecord } from '@relayburn/reader';
 
 import { computePlanUsage, cycleBounds, planUsageFromArchive } from './plan-usage.js';
 import type { PricingTable } from './pricing.js';
 
+const FULL_FIDELITY: Fidelity = makeFidelity('per-turn', {
+  ...EMPTY_COVERAGE,
+  hasInputTokens: true,
+  hasOutputTokens: true,
+  hasCacheReadTokens: true,
+  hasToolCalls: true,
+  hasToolResultEvents: true,
+  hasSessionRelationships: true,
+});
+
+const USAGE_ONLY_FIDELITY: Fidelity = makeFidelity('per-turn', {
+  ...EMPTY_COVERAGE,
+  hasInputTokens: true,
+  hasOutputTokens: true,
+});
+
+const PARTIAL_FIDELITY: Fidelity = makeFidelity('per-turn', {
+  ...EMPTY_COVERAGE,
+  hasInputTokens: true,
+  // missing output → "partial"
+});
+
+const COST_ONLY_FIDELITY: Fidelity = makeFidelity('cost-only', {
+  ...EMPTY_COVERAGE,
+});
+
 const PRICING: PricingTable = {
   'claude-sonnet-4-6': {
     input: 3,
@@ -25,8 +52,11 @@ function turn(opts: {
   outputTokens?: number;
   model?: string;
   sessionId?: string;
+  fidelity?: Fidelity;
 }): TurnRecord {
-  return {
+  // exactOptionalPropertyTypes refuses an explicit `undefined` for the
+  // optional `fidelity` field — only attach when present.
+  const base: TurnRecord = {
     v: 1,
     source: opts.source ?? 'claude-code',
     sessionId: opts.sessionId ?? 's1',
@@ -44,6 +74,7 @@ function turn(opts: {
     },
     toolCalls: [],
   };
+  return opts.fidelity ? { ...base, fidelity: opts.fidelity } : base;
 }
 
 const plan: Plan = {
@@ -205,6 +236,131 @@ describe('computePlanUsage', () => {
     const u = computePlanUsage(plan, turns, { pricing: PRICING, now });
     assert.equal(u.spentUsd, 3);
   });
+
+  // Issue #108: fidelity-aware totals. The plan view continues to count every
+  // turn that lands in the cycle (no fidelity-based filter — `plans`, like
+  // `limits`, is permissive), but annotates the cycle as low-confidence when
+  // any contributing turn lacks per-turn input/output token coverage.
+  it('reports high-confidence fidelity when every cycle turn is full', () => {
+    const turns: TurnRecord[] = [
+      turn({
+        ts: '2026-04-05T00:00:00.000Z',
+        inputTokens: 1_000_000,
+        fidelity: FULL_FIDELITY,
+      }),
+      turn({
+        ts: '2026-04-10T00:00:00.000Z',
+        inputTokens: 1_000_000,
+        fidelity: FULL_FIDELITY,
+      }),
+    ];
+    const u = computePlanUsage(plan, turns, { pricing: PRICING, now });
+    assert.equal(u.spentUsd, 6);
+    assert.equal(u.fidelity.confidence, 'high');
+    assert.equal(u.fidelity.summary.total, 2);
+    assert.equal(u.fidelity.summary.byClass.full, 2);
+  });
+
+  it('treats usage-only (per-turn input + output) cycles as high-confidence', () => {
+    const turns: TurnRecord[] = [
+      turn({
+        ts: '2026-04-05T00:00:00.000Z',
+        inputTokens: 1_000_000,
+        outputTokens: 1_000_000,
+        fidelity: USAGE_ONLY_FIDELITY,
+      }),
+    ];
+    const u = computePlanUsage(plan, turns, { pricing: PRICING, now });
+    assert.equal(u.fidelity.confidence, 'high');
+  });
+
+  it('treats turns without fidelity (older ledger writers) as high-confidence', () => {
+    // Backward-compat: pre-#41 records have no fidelity field at all and are
+    // best-effort full per the codebase convention. Don't demote a cycle to
+    // low-confidence purely because the writer was old.
+    const turns: TurnRecord[] = [
+      turn({ ts: '2026-04-05T00:00:00.000Z', inputTokens: 1_000_000 }),
+    ];
+    const u = computePlanUsage(plan, turns, { pricing: PRICING, now });
+    assert.equal(u.fidelity.confidence, 'high');
+  });
+
+  it('marks low-confidence when a cycle has any partial-fidelity turn', () => {
+    const turns: TurnRecord[] = [
+      turn({
+        ts: '2026-04-05T00:00:00.000Z',
+        inputTokens: 1_000_000,
+        outputTokens: 1_000_000,
+        fidelity: FULL_FIDELITY,
+      }),
+      // Partial: input known, output missing — its priced contribution is a
+      // lower bound. Cycle total still includes it.
+      turn({
+        ts: '2026-04-10T00:00:00.000Z',
+        inputTokens: 500_000,
+        fidelity: PARTIAL_FIDELITY,
+      }),
+    ];
+    const u = computePlanUsage(plan, turns, { pricing: PRICING, now });
+    // Spend still counts both turns: 1M input + 1M output ($3 + $15) + 500k input ($1.5)
+    assert.equal(u.spentUsd, 19.5);
+    assert.equal(u.fidelity.confidence, 'low');
+    assert.equal(u.fidelity.summary.total, 2);
+    assert.equal(u.fidelity.summary.byClass.full, 1);
+    assert.equal(u.fidelity.summary.byClass.partial, 1);
+    assert.equal(u.fidelity.summary.missingCoverage.hasOutputTokens, 1);
+  });
+
+  it('counts cost-only contributions toward spend and marks the cycle low-confidence', () => {
+    // A `cost-only` source provides a price (here: via priced tokens on the
+    // turn) but no per-turn token coverage. Spend totals include it; the
+    // cycle is flagged low-confidence on the token-coverage axis.
+    const turns: TurnRecord[] = [
+      turn({
+        ts: '2026-04-05T00:00:00.000Z',
+        inputTokens: 1_000_000,
+        outputTokens: 1_000_000,
+        fidelity: FULL_FIDELITY,
+      }),
+      turn({
+        ts: '2026-04-10T00:00:00.000Z',
+        inputTokens: 1_000_000, // priced contribution, but fidelity says "cost-only"
+        fidelity: COST_ONLY_FIDELITY,
+      }),
+    ];
+    const u = computePlanUsage(plan, turns, { pricing: PRICING, now });
+    // 1M input + 1M output = $3 + $15 = $18; then cost-only 1M input = $3 → $21
+    assert.equal(u.spentUsd, 21);
+    assert.equal(u.fidelity.confidence, 'low');
+    assert.equal(u.fidelity.summary.byClass['cost-only'], 1);
+  });
+
+  it('reports an empty cycle as high-confidence (nothing to be uncertain about)', () => {
+    const u = computePlanUsage(plan, [], { pricing: PRICING, now });
+    assert.equal(u.fidelity.confidence, 'high');
+    assert.equal(u.fidelity.summary.total, 0);
+  });
+
+  it('ignores fidelity of turns outside the cycle when deciding confidence', () => {
+    const turns: TurnRecord[] = [
+      // In-cycle, full fidelity:
+      turn({
+        ts: '2026-04-05T00:00:00.000Z',
+        inputTokens: 1_000_000,
+        outputTokens: 1_000_000,
+        fidelity: FULL_FIDELITY,
+      }),
+      // Out-of-cycle (previous month), partial: must NOT drag the cycle down.
+      turn({
+        ts: '2026-03-20T00:00:00.000Z',
+        inputTokens: 1_000_000,
+        fidelity: PARTIAL_FIDELITY,
+      }),
+    ];
+    const u = computePlanUsage(plan, turns, { pricing: PRICING, now });
+    assert.equal(u.fidelity.confidence, 'high');
+    assert.equal(u.fidelity.summary.total, 1);
+  });
 });
 
 // Minimal subset of the real `archive.sqlite` `turns` schema — just the
diff --git a/packages/analyze/src/plan-usage.ts b/packages/analyze/src/plan-usage.ts
index 2f3e471..ad3c435 100644
--- a/packages/analyze/src/plan-usage.ts
+++ b/packages/analyze/src/plan-usage.ts
@@ -4,8 +4,23 @@ import type { Plan, PlanProvider } from '@relayburn/ledger';
 import type { SourceKind, TurnRecord } from '@relayburn/reader';
 
 import { costForTurn } from './cost.js';
+import { emptyFidelitySummary, summarizeFidelity } from './fidelity.js';
+import type { FidelitySummary } from './fidelity.js';
 import type { PricingTable } from './pricing.js';
 
+// Per-cycle confidence on the spent/projected totals. `high` when every
+// contributing turn supplies per-turn input + output token coverage (i.e.
+// `full` or `usage-only` with both axes present). Otherwise `low` — the cycle
+// includes at least one `partial` / `aggregate-only` / `cost-only` turn, so
+// the totals are a lower bound on actual spend. The accompanying `summary`
+// is the same `FidelitySummary` shape `summarizeFidelity` emits for any
+// other slice — kept here so JSON consumers can render exact counts without
+// re-walking turns.
+export interface PlanUsageFidelity {
+  confidence: 'high' | 'low';
+  summary: FidelitySummary;
+}
+
 export interface PlanUsage {
   plan: Plan;
   cycleStart: Date;
@@ -29,6 +44,12 @@ export interface PlanUsage {
   // Renderers should mark these projections as "limited data" per #39's
   // acceptance criteria.
   limitedData: boolean;
+  // Token-coverage confidence over the contributing turns this cycle. See
+  // `PlanUsageFidelity`. When `confidence === 'low'`, `spentUsd` is a lower
+  // bound — at least one turn lacked per-turn input/output token data, so
+  // its priced contribution is missing or estimated. Renderers should
+  // surface this so a "looks under budget" plan isn't read as authoritative.
+  fidelity: PlanUsageFidelity;
 }
 
 const MS_PER_DAY = 24 * 60 * 60 * 1000;
@@ -51,11 +72,20 @@ export function computePlanUsage(
   const nowMs = now.getTime();
 
   let spent = 0;
+  // Like `burn limits`, `plans` is allowed to count partial / aggregate-only /
+  // cost-only contributions toward the cycle total — under-counting silently is
+  // worse than annotating low-confidence. We collect the contributing turns'
+  // fidelity blocks here so we can mark the whole cycle low-confidence below
+  // when any of them lacks per-turn input/output coverage.
+  const contributing: Array<Pick<TurnRecord, 'fidelity'>> = [];
   for (const t of turns) {
     if (!matchesProvider(plan.provider, t)) continue;
     const ts = Date.parse(t.ts);
     if (!Number.isFinite(ts)) continue;
     if (ts < cycleStartMs || ts >= cycleEndMs) continue;
+    // exactOptionalPropertyTypes refuses an explicit `undefined` for the
+    // optional `fidelity` field — only attach the property when present.
+    contributing.push(t.fidelity ? { fidelity: t.fidelity } : {});
     const cost = costForTurn(t, opts.pricing);
     if (cost) spent += cost.total;
   }
@@ -94,9 +124,39 @@ export function computePlanUsage(
     runwayDays,
     resetAt: cycleEnd.toISOString(),
     limitedData: daysElapsed < LIMITED_DATA_DAYS,
+    fidelity: deriveFidelity(contributing),
   };
 }
 
+// `confidence === 'high'` when every contributing turn carries per-turn
+// input + output token coverage — that is, `full` or `usage-only` with both
+// axes present. A turn with no `fidelity` field at all (older ledger writers,
+// pre-#41) is also treated as high; we have no signal to claim otherwise and
+// elsewhere the codebase treats unknown as best-effort full. Empty cycles
+// (no contributing turns) report high — there's nothing to be uncertain about.
+function deriveFidelity(
+  contributing: ReadonlyArray<Pick<TurnRecord, 'fidelity'>>,
+): PlanUsageFidelity {
+  if (contributing.length === 0) {
+    return { confidence: 'high', summary: emptyFidelitySummary() };
+  }
+  const summary = summarizeFidelity(contributing);
+  let confidence: 'high' | 'low' = 'high';
+  for (const t of contributing) {
+    const f = t.fidelity;
+    if (!f) continue; // unknown → treat as high, matches summarizeFidelity policy
+    if (f.class !== 'full' && f.class !== 'usage-only') {
+      confidence = 'low';
+      break;
+    }
+    if (!f.coverage.hasInputTokens || !f.coverage.hasOutputTokens) {
+      confidence = 'low';
+      break;
+    }
+  }
+  return { confidence, summary };
+}
+
 // Returns the [start, end) window for the cycle containing `now`. The
 // start is the most recent occurrence of resetDay (clamped to the month's
 // last day if resetDay > month length); the end is the next occurrence.
diff --git a/packages/cli/CHANGELOG.md b/packages/cli/CHANGELOG.md
index be9a944..58e2d50 100644
--- a/packages/cli/CHANGELOG.md
+++ b/packages/cli/CHANGELOG.md
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+### Added
+
+- **`burn plans` honors per-cycle fidelity** ([#108](https://github.com/AgentWorkforce/burn/issues/108)). The list view continues to render every plan even when the cycle slice contains `partial` / `aggregate-only` / `cost-only` turns (no fidelity-based filter — `plans`, like `limits`, is permissive), but now flags low-confidence cycles so a "looks under budget" plan isn't read as authoritative. The text table grows a `confidence` column when at least one plan has any contributing turn missing per-turn input/output token data, marked `low (partial token data)`, and a footer note names the affected plan + lower-bound caveat (e.g. `note: claude-pro: 3 of 412 turns this cycle lack per-turn token data — totals are a lower bound.`). Full-fidelity cycles render exactly as before — no extra column, no footer. `--json` gains a per-plan `usage.fidelity: { confidence, summary }` block carrying the same `FidelitySummary` shape the analyze package emits elsewhere, so machine consumers can render exact counts without re-walking the ledger. `cost-only` source contributions count toward `spentUsd` and mark the cycle low-confidence on the token-coverage axis.
+
 ### Changed
 
 - **`burn plans` (list view) reads spend from the archive** ([#91](https://github.com/AgentWorkforce/burn/issues/91)). The list path now issues one `SUM(...) GROUP BY (source, model)` aggregate per plan against `archive.sqlite` instead of walking the full ledger once per plan. Output is byte-identical to the legacy `queryAll()` reduce path on the parity fixture (text and `--json`); `limitedData` flagging, reset-day boundaries, multi-plan ordering, and built-in presets all carry over. Pass `--no-archive` (or set `RELAYBURN_ARCHIVE=0`) to opt back into the in-memory reduce while the migration shakes out.
diff --git a/packages/cli/src/commands/limits.test.ts b/packages/cli/src/commands/limits.test.ts
index 28c0e17..27fdc00 100644
--- a/packages/cli/src/commands/limits.test.ts
+++ b/packages/cli/src/commands/limits.test.ts
@@ -1,6 +1,8 @@
 import { strict as assert } from 'node:assert';
 import { describe, it } from 'node:test';
 
+import { emptyFidelitySummary } from '@relayburn/analyze';
+
 import {
   makeCachingFetcher,
   runLimits,
@@ -221,6 +223,7 @@ describe('burn limits', () => {
             runwayDays: 29,
             resetAt: '2026-05-01T00:00:00.000Z',
             limitedData: false,
+            fidelity: { confidence: 'high', summary: emptyFidelitySummary() },
           },
         },
       ],
@@ -260,6 +263,7 @@ describe('burn limits', () => {
             runwayDays: null,
             resetAt: '2026-05-22T00:00:00.000Z',
             limitedData: true,
+            fidelity: { confidence: 'high', summary: emptyFidelitySummary() },
           },
         },
       ],
@@ -318,6 +322,7 @@ describe('burn limits', () => {
             runwayDays: null,
             resetAt: '2026-05-01T00:00:00.000Z',
             limitedData: false,
+            fidelity: { confidence: 'high', summary: emptyFidelitySummary() },
           },
         },
       ],
diff --git a/packages/cli/src/commands/plans.test.ts b/packages/cli/src/commands/plans.test.ts
index 8482a74..ff9dbec 100644
--- a/packages/cli/src/commands/plans.test.ts
+++ b/packages/cli/src/commands/plans.test.ts
@@ -6,7 +6,8 @@ import { after, beforeEach, describe, it } from 'node:test';
 
 import { appendTurns, loadPlans, savePlans } from '@relayburn/ledger';
 import type { Plan } from '@relayburn/ledger';
-import type { TurnRecord } from '@relayburn/reader';
+import { EMPTY_COVERAGE, makeFidelity } from '@relayburn/reader';
+import type { Fidelity, TurnRecord } from '@relayburn/reader';
 
 import type { ParsedArgs } from '../args.js';
 import { runPlans, statusForPlans } from './plans.js';
@@ -364,4 +365,190 @@ describe('burn plans CLI', () => {
     assert.equal(archiveStatus[0]!.usage.daysInCycle, fallbackStatus[0]!.usage.daysInCycle);
     assert.equal(archiveStatus[0]!.usage.limitedData, fallbackStatus[0]!.usage.limitedData);
   });
+
+  // Issue #108: list view honors per-cycle fidelity. The plan still renders
+  // when partial / aggregate-only / cost-only turns land in the cycle, and
+  // surfaces a low-confidence note + JSON block so callers can tell the total
+  // is a lower bound.
+  describe('fidelity (#108)', () => {
+    const FULL_FIDELITY: Fidelity = makeFidelity('per-turn', {
+      ...EMPTY_COVERAGE,
+      hasInputTokens: true,
+      hasOutputTokens: true,
+      hasCacheReadTokens: true,
+      hasToolCalls: true,
+      hasToolResultEvents: true,
+      hasSessionRelationships: true,
+    });
+
+    const PARTIAL_FIDELITY: Fidelity = makeFidelity('per-turn', {
+      ...EMPTY_COVERAGE,
+      hasInputTokens: true,
+      // missing output → "partial"
+    });
+
+    // Per-test counter so each turn's messageId/sessionId AND content
+    // fingerprint is unique. The `appendTurns` index cache is process-wide,
+    // so without distinct session ids + token totals, a turn from the
+    // previous test would dedup the new test's matching turn. We mix the
+    // counter into the token totals (a few extra bytes per turn) to push the
+    // content fingerprint apart.
+    let testCounter = 0;
+    function fakeTurn(opts: {
+      ts: string;
+      inputTokens: number;
+      outputTokens?: number;
+      fidelity?: Fidelity;
+      label?: string;
+    }): TurnRecord {
+      testCounter++;
+      const tag = `${Date.now()}-${process.pid}-${testCounter}`;
+      const base: TurnRecord = {
+        v: 1,
+        source: 'claude-code',
+        sessionId: `s-fid-${tag}`,
+        messageId: `m-${opts.label ?? 'turn'}-${tag}`,
+        turnIndex: 0,
+        ts: opts.ts,
+        model: 'claude-sonnet-4-6',
+        usage: {
+          // Bias by the counter so each turn lands on a distinct content
+          // fingerprint even when ts + model + raw token totals would
+          // otherwise collide with a turn from a previous test.
+          input: opts.inputTokens + testCounter,
+          output: opts.outputTokens ?? 0,
+          reasoning: 0,
+          cacheRead: 0,
+          cacheCreate5m: 0,
+          cacheCreate1h: 0,
+        },
+        toolCalls: [],
+      };
+      return opts.fidelity ? { ...base, fidelity: opts.fidelity } : base;
+    }
+
+    // Pin a recent timestamp inside whatever calendar month the test runs in
+    // so the turn always lands within a reset-day=1 plan's current cycle.
+    function tsInsideCycleNow(): string {
+      const now = new Date();
+      // Anchor 30 minutes into "today" (UTC) — well after the cycle start.
+      const anchor = new Date(
+        Date.UTC(now.getUTCFullYear(), now.getUTCMonth(), now.getUTCDate(), 0, 30),
+      );
+      return anchor.toISOString();
+    }
+
+    it('renders the table without a confidence column when every cycle is full-fidelity', async () => {
+      await savePlans([
+        {
+          id: 'claude-pro',
+          provider: 'claude',
+          name: 'Claude Pro',
+          budgetUsd: 20,
+          resetDay: 1,
+        },
+      ]);
+      await appendTurns([
+        fakeTurn({
+          ts: tsInsideCycleNow(),
+          inputTokens: 100_000,
+          outputTokens: 50_000,
+          fidelity: FULL_FIDELITY,
+        }),
+      ]);
+      const { result, stdout } = await captureStdio(() => runPlans(args()));
+      assert.equal(result, 0);
+      assert.match(stdout, /claude-pro/);
+      assert.doesNotMatch(stdout, /confidence/);
+      assert.doesNotMatch(stdout, /lower bound/);
+    });
+
+    it('appends a low-confidence note when any cycle turn lacks per-turn token data', async () => {
+      await savePlans([
+        {
+          id: 'claude-pro',
+          provider: 'claude',
+          name: 'Claude Pro',
+          budgetUsd: 20,
+          resetDay: 1,
+        },
+      ]);
+      await appendTurns([
+        fakeTurn({
+          ts: tsInsideCycleNow(),
+          inputTokens: 100_000,
+          outputTokens: 50_000,
+          label: 'full',
+          fidelity: FULL_FIDELITY,
+        }),
+        fakeTurn({
+          ts: tsInsideCycleNow(),
+          inputTokens: 100_000,
+          label: 'partial',
+          fidelity: PARTIAL_FIDELITY,
+        }),
+      ]);
+      const { result, stdout } = await captureStdio(() => runPlans(args()));
+      assert.equal(result, 0);
+      // Header shows the new column when at least one plan is low-confidence.
+      assert.match(stdout, /confidence/);
+      assert.match(stdout, /low \(partial token data\)/);
+      // Footer note names the affected plan + the lower-bound caveat.
+      assert.match(
+        stdout,
+        /note: claude-pro: 1 of 2 turns this cycle lack per-turn token data — totals are a lower bound\./,
+      );
+    });
+
+    it('emits a per-plan fidelity block in --json output', async () => {
+      await savePlans([
+        {
+          id: 'claude-pro',
+          provider: 'claude',
+          name: 'Claude Pro',
+          budgetUsd: 20,
+          resetDay: 1,
+        },
+      ]);
+      await appendTurns([
+        fakeTurn({
+          ts: tsInsideCycleNow(),
+          inputTokens: 100_000,
+          outputTokens: 50_000,
+          label: 'full-json',
+          fidelity: FULL_FIDELITY,
+        }),
+        fakeTurn({
+          ts: tsInsideCycleNow(),
+          inputTokens: 100_000,
+          label: 'partial-json',
+          fidelity: PARTIAL_FIDELITY,
+        }),
+      ]);
+      const { result, stdout } = await captureStdio(() => runPlans(args([], { json: true })));
+      assert.equal(result, 0);
+      const parsed = JSON.parse(stdout) as {
+        plans: Array<{
+          usage: {
+            plan: { id: string };
+            fidelity: {
+              confidence: 'high' | 'low';
+              summary: {
+                total: number;
+                byClass: Record<string, number>;
+                missingCoverage: Record<string, number>;
+              };
+            };
+          };
+        }>;
+      };
+      assert.equal(parsed.plans.length, 1);
+      const fid = parsed.plans[0]!.usage.fidelity;
+      assert.equal(fid.confidence, 'low');
+      assert.equal(fid.summary.total, 2);
+      assert.equal(fid.summary.byClass['full'], 1);
+      assert.equal(fid.summary.byClass['partial'], 1);
+      assert.equal(fid.summary.missingCoverage['hasOutputTokens'], 1);
+    });
+  });
 });
diff --git a/packages/cli/src/commands/plans.ts b/packages/cli/src/commands/plans.ts
index 7a43325..807d785 100644
--- a/packages/cli/src/commands/plans.ts
+++ b/packages/cli/src/commands/plans.ts
@@ -67,7 +67,21 @@ async function runList(args: ParsedArgs): Promise<number> {
   const statuses = await statusForPlans(plans, { useArchive: shouldUseArchive(args) });
 
   if (json) {
-    process.stdout.write(JSON.stringify({ plans: statuses }, null, 2) + '\n');
+    // Hand-shape the per-plan payload so the `fidelity` block is emitted next
+    // to the rest of the cycle stats. Mirrors the shape `burn limits --json`
+    // would build if it grew the same field — keep the two surfaces parallel.
+    const payload = {
+      plans: statuses.map((s) => ({
+        usage: {
+          ...s.usage,
+          fidelity: {
+            confidence: s.usage.fidelity.confidence,
+            summary: s.usage.fidelity.summary,
+          },
+        },
+      })),
+    };
+    process.stdout.write(JSON.stringify(payload, null, 2) + '\n');
     return 0;
   }
 
@@ -78,24 +92,71 @@ async function runList(args: ParsedArgs): Promise<number> {
     return 0;
   }
 
-  const rows: string[][] = [['id', 'name', 'spent', 'projected', 'budget', 'reset']];
+  const anyLowConfidence = statuses.some((s) => s.usage.fidelity.confidence === 'low');
+  const headers = ['id', 'name', 'spent', 'projected', 'budget', 'reset'];
+  if (anyLowConfidence) headers.push('confidence');
+  const rows: string[][] = [headers];
   for (const s of statuses) {
     const u = s.usage;
     const projected = formatUsd(u.projectedEndOfCycleUsd);
     const projectedCell = u.limitedData ? `${projected} (limited data)` : projected;
-    rows.push([
+    const row = [
       u.plan.id,
       u.plan.name,
       formatUsd(u.spentUsd),
       projectedCell,
       formatUsd(u.plan.budgetUsd),
       `${u.daysElapsed}/${u.daysInCycle} days`,
-    ]);
+    ];
+    if (anyLowConfidence) {
+      row.push(u.fidelity.confidence === 'low' ? 'low (partial token data)' : 'high');
+    }
+    rows.push(row);
+  }
+  let output = table(rows) + '\n';
+  // When any cycle has at least one turn missing per-turn token coverage,
+  // append a footer line that names the worst affected plan so users can
+  // tell at a glance whether the totals are a lower bound. Suppressed when
+  // every cycle is full-fidelity.
+  for (const s of statuses) {
+    const u = s.usage;
+    if (u.fidelity.confidence !== 'low') continue;
+    const total = u.fidelity.summary.total;
+    if (total === 0) continue;
+    const lacking = countTurnsLackingTokens(u.fidelity.summary);
+    if (lacking === 0) continue;
+    output +=
+      `note: ${u.plan.id}: ${lacking} of ${total} turns this cycle ` +
+      `lack per-turn token data — totals are a lower bound.\n`;
   }
-  process.stdout.write(table(rows) + '\n');
+  process.stdout.write(output);
   return 0;
 }
 
+// Count turns whose per-turn input or output token coverage is missing.
+// Mirrors the `confidence === 'low'` rule in `computePlanUsage` so the
+// rendered count agrees with the per-plan flag. We approximate using the
+// summary's `missingCoverage` counts: any turn missing input *or* output
+// counts; we take the max of the two as a safe upper bound (a turn missing
+// both still counts once, which is what the user wants to read).
+function countTurnsLackingTokens(summary: {
+  missingCoverage: { hasInputTokens: number; hasOutputTokens: number };
+  byClass: { partial: number; 'aggregate-only': number; 'cost-only': number };
+}): number {
+  const fromCoverage = Math.max(
+    summary.missingCoverage.hasInputTokens,
+    summary.missingCoverage.hasOutputTokens,
+  );
+  // Fallback for records whose granularity already classes them as
+  // aggregate-only / cost-only / partial — those are by definition missing
+  // per-turn token coverage even if the coverage flags happen to be on.
+  const fromClass =
+    summary.byClass.partial +
+    summary.byClass['aggregate-only'] +
+    summary.byClass['cost-only'];
+  return Math.max(fromCoverage, fromClass);
+}
+
 async function runAdd(args: ParsedArgs): Promise<number> {
   const provider = args.flags['provider'];
   if (typeof provider !== 'string' || !isProvider(provider)) {

From f0126d3894ca98b5f807d5bf3c261ddb6ec926ae Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Mon, 27 Apr 2026 13:17:20 +0000
Subject: [PATCH 2/5] Add root CHANGELOG entry for cross-package fidelity work
 (#108)

Co-Authored-By: Will Washburn <will.washburn@gmail.com>
---
 CHANGELOG.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f839e6c..5fb550b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 
 ## [Unreleased]
 
+### Added
+
+- **`burn plans` honors per-cycle fidelity** ([#108](https://github.com/AgentWorkforce/burn/issues/108)). `computePlanUsage` now annotates each cycle with a `fidelity: { confidence, summary }` block so renderers can flag low-confidence totals. The CLI list view gains a `confidence` column and footer note when any cycle contains turns missing per-turn token data; `--json` emits the same `FidelitySummary` shape. Spend totals continue to include all contributing turns — the annotation marks them as a lower bound rather than silently under-counting.
+
 ## [0.27.0] - 2026-04-26
 
 ### Added

From edef2edcfe17f7a95410414bb57ea3d77e42816c Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Mon, 27 Apr 2026 14:18:00 +0000
Subject: [PATCH 3/5] Wire fidelity into planUsageFromArchive so archive-backed
 plans emit confidence

Co-Authored-By: Will Washburn <will.washburn@gmail.com>
---
 packages/analyze/src/plan-usage.test.ts |  3 +
 packages/analyze/src/plan-usage.ts      | 90 ++++++++++++++++++++++++-
 2 files changed, 92 insertions(+), 1 deletion(-)

diff --git a/packages/analyze/src/plan-usage.test.ts b/packages/analyze/src/plan-usage.test.ts
index b4c834a..457a406 100644
--- a/packages/analyze/src/plan-usage.test.ts
+++ b/packages/analyze/src/plan-usage.test.ts
@@ -379,6 +379,9 @@ const ARCHIVE_TURNS_DDL = `
     cache_read_tokens       INTEGER NOT NULL DEFAULT 0,
     cache_create_5m_tokens  INTEGER NOT NULL DEFAULT 0,
     cache_create_1h_tokens  INTEGER NOT NULL DEFAULT 0,
+    attribution_fidelity    TEXT,
+    tokens_present          INTEGER,
+    cost_present            INTEGER,
     PRIMARY KEY (source, session_id, message_id)
   );
   CREATE INDEX idx_turns_ts ON turns(ts);
diff --git a/packages/analyze/src/plan-usage.ts b/packages/analyze/src/plan-usage.ts
index ad3c435..c49a81f 100644
--- a/packages/analyze/src/plan-usage.ts
+++ b/packages/analyze/src/plan-usage.ts
@@ -1,7 +1,8 @@
 import type { DatabaseSync } from 'node:sqlite';
 
 import type { Plan, PlanProvider } from '@relayburn/ledger';
-import type { SourceKind, TurnRecord } from '@relayburn/reader';
+import type { Coverage, Fidelity, FidelityClass, SourceKind, TurnRecord, UsageGranularity } from '@relayburn/reader';
+import { EMPTY_COVERAGE } from '@relayburn/reader';
 
 import { costForTurn } from './cost.js';
 import { emptyFidelitySummary, summarizeFidelity } from './fidelity.js';
@@ -320,6 +321,15 @@ export function planUsageFromArchive(
     }
   }
 
+  // Query per-turn fidelity data from the archive so `deriveFidelity` can
+  // annotate the cycle with the same confidence flag the in-memory path uses.
+  const fidelityRows = queryFidelityRows(opts.db, cycleStartIso, cycleEndIso, sources ?? undefined);
+  const contributing: Array<Pick<TurnRecord, 'fidelity'>> = fidelityRows.map((r) => {
+    const fidelity = synthesizeArchiveFidelity(r);
+    return fidelity ? { fidelity } : {};
+  });
+  const fidelity = deriveFidelity(contributing);
+
   return {
     plan,
     cycleStart,
@@ -332,6 +342,7 @@ export function planUsageFromArchive(
     runwayDays,
     resetAt: cycleEnd.toISOString(),
     limitedData: daysElapsed < LIMITED_DATA_DAYS,
+    fidelity,
   };
 }
 
@@ -387,3 +398,80 @@ function runQuery(
   );
   return stmt.all(cycleStartIso, cycleEndIso, ...sources) as unknown as BucketRow[];
 }
+
+// ---------------------------------------------------------------------------
+// Archive fidelity helpers — mirrors the ledger's `synthesizeFidelity` logic
+// so the archive-backed `planUsageFromArchive` can feed `deriveFidelity` the
+// same per-turn fidelity shape the in-memory `computePlanUsage` produces.
+// ---------------------------------------------------------------------------
+
+interface FidelityRow {
+  attribution_fidelity: string | null;
+  tokens_present: number | bigint | null;
+  cost_present: number | bigint | null;
+}
+
+function queryFidelityRows(
+  db: DatabaseSync,
+  cycleStartIso: string,
+  cycleEndIso: string,
+  sources: readonly SourceKind[] | undefined,
+): FidelityRow[] {
+  const baseSql = `
+    SELECT attribution_fidelity, tokens_present, cost_present
+    FROM turns
+    WHERE ts >= ? AND ts < ?`;
+  if (sources === undefined) {
+    const stmt = db.prepare(baseSql);
+    return stmt.all(cycleStartIso, cycleEndIso) as unknown as FidelityRow[];
+  }
+  const placeholders = sources.map(() => '?').join(', ');
+  const stmt = db.prepare(`${baseSql} AND source IN (${placeholders})`);
+  return stmt.all(cycleStartIso, cycleEndIso, ...sources) as unknown as FidelityRow[];
+}
+
+function synthesizeArchiveFidelity(r: FidelityRow): Fidelity | undefined {
+  if (r.attribution_fidelity === null) return undefined;
+  const cls = r.attribution_fidelity as FidelityClass;
+  const tokensPresent = r.tokens_present !== null && Number(r.tokens_present) === 1;
+  const costPresent = r.cost_present !== null && Number(r.cost_present) === 1;
+  const granularity: UsageGranularity = costPresent ? 'cost-only' : 'per-turn';
+  const coverage = coverageForClass(cls, tokensPresent);
+  return { class: cls, granularity, coverage };
+}
+
+function coverageForClass(cls: FidelityClass, tokensPresent: boolean): Coverage {
+  switch (cls) {
+    case 'full':
+      return {
+        ...EMPTY_COVERAGE,
+        hasInputTokens: true,
+        hasOutputTokens: true,
+        hasReasoningTokens: true,
+        hasCacheReadTokens: true,
+        hasCacheCreateTokens: true,
+        hasToolCalls: true,
+        hasToolResultEvents: true,
+        hasSessionRelationships: true,
+        hasRawContent: true,
+      };
+    case 'usage-only':
+      return {
+        ...EMPTY_COVERAGE,
+        hasInputTokens: tokensPresent,
+        hasOutputTokens: tokensPresent,
+        hasCacheReadTokens: tokensPresent,
+        hasCacheCreateTokens: tokensPresent,
+      };
+    case 'partial':
+      return {
+        ...EMPTY_COVERAGE,
+        hasInputTokens: tokensPresent,
+        hasOutputTokens: tokensPresent,
+      };
+    case 'aggregate-only':
+    case 'cost-only':
+    default:
+      return { ...EMPTY_COVERAGE };
+  }
+}

From 5212bb2bc6d92d4191bd356b16850f8919e4881e Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Mon, 27 Apr 2026 14:23:26 +0000
Subject: [PATCH 4/5] Fix fidelity CLI tests: use --no-archive to test exact
 per-axis coverage

Co-Authored-By: Will Washburn <will.washburn@gmail.com>
---
 packages/cli/src/commands/plans.test.ts | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/packages/cli/src/commands/plans.test.ts b/packages/cli/src/commands/plans.test.ts
index ff9dbec..318e2e0 100644
--- a/packages/cli/src/commands/plans.test.ts
+++ b/packages/cli/src/commands/plans.test.ts
@@ -456,7 +456,11 @@ describe('burn plans CLI', () => {
           fidelity: FULL_FIDELITY,
         }),
       ]);
-      const { result, stdout } = await captureStdio(() => runPlans(args()));
+      // Force the in-memory path — the archive's single `tokens_present` bit
+      // cannot distinguish per-axis coverage, so `missingCoverage` assertions
+      // only hold on the exact-fidelity in-memory path. Archive fidelity is
+      // tested at the analyze layer (plan-usage.test.ts).
+      const { result, stdout } = await captureStdio(() => runPlans(args([], { 'no-archive': true })));
       assert.equal(result, 0);
       assert.match(stdout, /claude-pro/);
       assert.doesNotMatch(stdout, /confidence/);
@@ -488,7 +492,7 @@ describe('burn plans CLI', () => {
           fidelity: PARTIAL_FIDELITY,
         }),
       ]);
-      const { result, stdout } = await captureStdio(() => runPlans(args()));
+      const { result, stdout } = await captureStdio(() => runPlans(args([], { 'no-archive': true })));
       assert.equal(result, 0);
       // Header shows the new column when at least one plan is low-confidence.
       assert.match(stdout, /confidence/);
@@ -525,7 +529,7 @@ describe('burn plans CLI', () => {
           fidelity: PARTIAL_FIDELITY,
         }),
       ]);
-      const { result, stdout } = await captureStdio(() => runPlans(args([], { json: true })));
+      const { result, stdout } = await captureStdio(() => runPlans(args([], { json: true, 'no-archive': true })));
       assert.equal(result, 0);
       const parsed = JSON.parse(stdout) as {
         plans: Array<{

From ef5174509dfbd529e18c61852e91eb545a6b23e3 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Mon, 27 Apr 2026 14:31:23 +0000
Subject: [PATCH 5/5] Fix Devin Review findings: move #108 entry to
 [Unreleased], guard empty sources in fidelity query

Co-Authored-By: Will Washburn <will.washburn@gmail.com>
---
 packages/analyze/CHANGELOG.md      | 2 +-
 packages/analyze/src/plan-usage.ts | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/packages/analyze/CHANGELOG.md b/packages/analyze/CHANGELOG.md
index ff0189b..b6e2884 100644
--- a/packages/analyze/CHANGELOG.md
+++ b/packages/analyze/CHANGELOG.md
@@ -10,13 +10,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Added
 
 - `planUsageFromArchive(plan, { pricing, db, now })` ([#91](https://github.com/AgentWorkforce/burn/issues/91)) — computes `PlanUsage` for a plan via one `SUM(...) GROUP BY (source, model)` query against the archive's `turns` table instead of a full ledger scan. Returns the same shape as `computePlanUsage` so callers can swap paths cleanly. Reuses `costForTurn`'s source-aware reasoning override, so Codex `output_tokens` is not double-billed against `usage.reasoning`.
+- **`PlanUsage.fidelity` annotates per-cycle token-coverage confidence** ([#108](https://github.com/AgentWorkforce/burn/issues/108)). `computePlanUsage` now walks every contributing turn through `summarizeFidelity` and emits a `{ confidence: 'high' | 'low', summary }` block alongside the existing spend/projection fields. `confidence === 'high'` only when every turn in the cycle is `full` or `usage-only` with both per-turn input and output token coverage; otherwise `low`. Records with no `fidelity` field at all (older ledger writers) are treated as best-effort high, matching the codebase's existing backward-compat policy. Spend totals continue to include `partial` / `aggregate-only` / `cost-only` contributions — under-counting is worse than annotating low-confidence — so the cycle's `spentUsd` is the lower bound the consumer renders against the new flag. The `PlanUsageFidelity` type is exported for downstream consumers.
 
 ## [0.31.0] - 2026-04-27
 
 ### Added
 
 - **`compareFromArchive(query, opts)`** ([#88](https://github.com/AgentWorkforce/burn/issues/88)). New helper that builds a `CompareTable` directly from `archive.sqlite` via a single grouped `SELECT … GROUP BY model, activity, source` plus a tiny per-(model, activity) follow-up for median retries, instead of streaming every `EnrichedTurn` through `buildCompareTable` in memory. Returns `{ table, analyzedTurns }` so the caller can populate the same "turns analyzed" header the legacy path uses. Output is byte-identical to `buildCompareTable(await queryAll(q), opts)` for the parity fixture; per-source reasoning-mode handling (Codex's `included_in_output`) is preserved by grouping on `source` alongside `(model, activity)`. Powers the migration of `burn compare` to the archive read model.
-- **`PlanUsage.fidelity` annotates per-cycle token-coverage confidence** ([#108](https://github.com/AgentWorkforce/burn/issues/108)). `computePlanUsage` now walks every contributing turn through `summarizeFidelity` and emits a `{ confidence: 'high' | 'low', summary }` block alongside the existing spend/projection fields. `confidence === 'high'` only when every turn in the cycle is `full` or `usage-only` with both per-turn input and output token coverage; otherwise `low`. Records with no `fidelity` field at all (older ledger writers) are treated as best-effort high, matching the codebase's existing backward-compat policy. Spend totals continue to include `partial` / `aggregate-only` / `cost-only` contributions — under-counting is worse than annotating low-confidence — so the cycle's `spentUsd` is the lower bound the consumer renders against the new flag. The `PlanUsageFidelity` type is exported for downstream consumers.
 
 ## [0.27.0] - 2026-04-26
 
diff --git a/packages/analyze/src/plan-usage.ts b/packages/analyze/src/plan-usage.ts
index c49a81f..4a04c5a 100644
--- a/packages/analyze/src/plan-usage.ts
+++ b/packages/analyze/src/plan-usage.ts
@@ -323,7 +323,9 @@ export function planUsageFromArchive(
 
   // Query per-turn fidelity data from the archive so `deriveFidelity` can
   // annotate the cycle with the same confidence flag the in-memory path uses.
-  const fidelityRows = queryFidelityRows(opts.db, cycleStartIso, cycleEndIso, sources ?? undefined);
+  const fidelityRows = sources !== null && sources.length === 0
+    ? []
+    : queryFidelityRows(opts.db, cycleStartIso, cycleEndIso, sources ?? undefined);
   const contributing: Array<Pick<TurnRecord, 'fidelity'>> = fidelityRows.map((r) => {
     const fidelity = synthesizeArchiveFidelity(r);
     return fidelity ? { fidelity } : {};