From 984e86860fa1662bb6a9223a82dddd8dc059056a Mon Sep 17 00:00:00 2001
From: James Grugett <jahooma@gmail.com>
Date: Thu, 16 Apr 2026 18:56:57 -0700
Subject: [PATCH 1/3] query costs script using big query

---
 scripts/query-message-costs.ts | 263 +++++++++++++++++++++++++++++++++
 1 file changed, 263 insertions(+)
 create mode 100644 scripts/query-message-costs.ts

diff --git a/scripts/query-message-costs.ts b/scripts/query-message-costs.ts
new file mode 100644
index 000000000..50b34ae72
--- /dev/null
+++ b/scripts/query-message-costs.ts
@@ -0,0 +1,263 @@
+/**
+ * Queries the BigQuery `message` table for the most recent rows and prints
+ * cost, upstream_inference_cost, token breakdown, and model.
+ *
+ * Used to investigate whether OpenRouter is populating BOTH `usage.cost` and
+ * `usage.cost_details.upstream_inference_cost` for non-BYOK requests, which
+ * would cause `web/src/llm-api/openrouter.ts#extractUsageAndCost` to double-
+ * count (that function returns `openRouterCost + upstreamCost`).
+ *
+ * Usage:
+ *   bun run scripts/query-message-costs.ts              # dev dataset
+ *   bun run scripts/query-message-costs.ts --prod       # prod dataset
+ *   bun run scripts/query-message-costs.ts --prod --limit 200
+ *   bun run scripts/query-message-costs.ts --prod --model anthropic/claude-opus-4.7
+ *
+ * Note: `model` is NOT a top-level column in the BigQuery `message` schema;
+ * it lives inside the `request` JSON blob, so we extract it with
+ * JSON_EXTRACT_SCALAR.
+ */
+
+import { BigQuery } from '@google-cloud/bigquery'
+
+type Args = {
+  isProd: boolean
+  limit: number
+  modelFilter: string | null
+}
+
+function parseArgs(): Args {
+  const argv = process.argv.slice(2)
+  const isProd = argv.includes('--prod')
+
+  const limitIdx = argv.indexOf('--limit')
+  const limit =
+    limitIdx >= 0 && argv[limitIdx + 1] ? parseInt(argv[limitIdx + 1], 10) : 100
+
+  const modelIdx = argv.indexOf('--model')
+  const modelFilter =
+    modelIdx >= 0 && argv[modelIdx + 1] ? argv[modelIdx + 1] : null
+
+  return { isProd, limit, modelFilter }
+}
+
+function fmtNum(n: number | null | undefined, digits = 0): string {
+  if (n === null || n === undefined || Number.isNaN(n)) return '-'
+  return n.toLocaleString(undefined, {
+    minimumFractionDigits: digits,
+    maximumFractionDigits: digits,
+  })
+}
+
+function fmtCost(n: number | null | undefined): string {
+  if (n === null || n === undefined || Number.isNaN(n)) return '-'
+  return `$${n.toFixed(6)}`
+}
+
+// Anthropic Opus 4.6 / 4.7 per-1M-token pricing.
+// Used for a quick "expected cost" sanity column on Opus rows only.
+const OPUS_INPUT_PER_M = 5.0
+const OPUS_CACHE_READ_PER_M = 0.5
+const OPUS_OUTPUT_PER_M = 25.0
+
+function expectedOpusCost(row: {
+  input_tokens: number
+  cache_read_input_tokens: number
+  output_tokens: number
+}): number {
+  const uncachedInput = Math.max(
+    0,
+    (row.input_tokens ?? 0) - (row.cache_read_input_tokens ?? 0),
+  )
+  return (
+    (uncachedInput * OPUS_INPUT_PER_M) / 1_000_000 +
+    ((row.cache_read_input_tokens ?? 0) * OPUS_CACHE_READ_PER_M) / 1_000_000 +
+    ((row.output_tokens ?? 0) * OPUS_OUTPUT_PER_M) / 1_000_000
+  )
+}
+
+async function main() {
+  const { isProd, limit, modelFilter } = parseArgs()
+  const dataset = isProd ? 'codebuff_data' : 'codebuff_data_dev'
+  const table = `${dataset}.message`
+
+  console.log(
+    `Querying last ${limit} rows from \`${table}\`${
+      modelFilter ? ` (model = ${modelFilter})` : ''
+    }`,
+  )
+  console.log('')
+
+  const client = new BigQuery()
+
+  // Model isn't a column — pull from request JSON.
+  // Cache creation tokens also not in schema (OpenRouter path is always 0 there).
+  const query = `
+    SELECT
+      id,
+      finished_at,
+      JSON_EXTRACT_SCALAR(request, '$.model') AS model,
+      input_tokens,
+      cache_read_input_tokens,
+      output_tokens,
+      cost,
+      upstream_inference_cost,
+      -- cache_creation_input_tokens lives in BigQuery too; null-safe cast
+      SAFE_CAST(JSON_EXTRACT_SCALAR(request, '$.usage') AS STRING) AS request_usage_raw
+    FROM \`${table}\`
+    WHERE TRUE
+    ${
+      modelFilter
+        ? `AND JSON_EXTRACT_SCALAR(request, '$.model') = @modelFilter`
+        : ''
+    }
+    AND JSON_EXTRACT_SCALAR(request, '$.model') LIKE '%opus%'
+    AND cost BETWEEN 0.10 AND 0.25
+    ORDER BY finished_at DESC
+    LIMIT @limit
+  `
+
+  const [rows] = await client.query({
+    query,
+    params: {
+      limit,
+      ...(modelFilter ? { modelFilter } : {}),
+    },
+  })
+
+  if (rows.length === 0) {
+    console.log('No rows found.')
+    return
+  }
+
+  // Per-row table. `ups/cost` ≈ 1.0 on a row means upstream equals the billed
+  // cost on that row — the classic signature of a double-count.
+  const header = [
+    'finished_at',
+    'model',
+    'input',
+    'cache_read',
+    'uncached_in',
+    'output',
+    'cost',
+    'upstream',
+    'cost+ups',
+    'ups/cost',
+    'expected_opus',
+  ]
+  console.log(header.join('\t'))
+
+  let doubleCountHits = 0
+  let upstreamPopulatedCount = 0
+  let totalCost = 0
+  let totalUpstream = 0
+  let opusCostSum = 0
+  let opusExpectedSum = 0
+
+  for (const row of rows) {
+    const input = Number(row.input_tokens ?? 0)
+    const cacheRead = Number(row.cache_read_input_tokens ?? 0)
+    const output = Number(row.output_tokens ?? 0)
+    const uncachedIn = Math.max(0, input - cacheRead)
+    const cost = row.cost === null || row.cost === undefined ? null : Number(row.cost)
+    const upstream =
+      row.upstream_inference_cost === null ||
+      row.upstream_inference_cost === undefined
+        ? null
+        : Number(row.upstream_inference_cost)
+    const sum = (cost ?? 0) + (upstream ?? 0)
+    const ratio =
+      cost && upstream !== null && cost > 0 ? upstream / cost : null
+
+    const finished =
+      row.finished_at?.value ?? row.finished_at?.toString() ?? String(row.finished_at)
+
+    const model = row.model ?? '-'
+    const isOpus = typeof model === 'string' && model.includes('opus')
+
+    const expected = expectedOpusCost({
+      input_tokens: input,
+      cache_read_input_tokens: cacheRead,
+      output_tokens: output,
+    })
+
+    console.log(
+      [
+        String(finished).slice(0, 19),
+        model,
+        fmtNum(input),
+        fmtNum(cacheRead),
+        fmtNum(uncachedIn),
+        fmtNum(output),
+        fmtCost(cost),
+        fmtCost(upstream),
+        fmtCost(sum),
+        ratio !== null ? ratio.toFixed(2) : '-',
+        isOpus ? fmtCost(expected) : '-',
+      ].join('\t'),
+    )
+
+    if (upstream !== null && upstream > 0) {
+      upstreamPopulatedCount++
+      totalUpstream += upstream
+    }
+    if (cost !== null) totalCost += cost
+
+    if (isOpus) {
+      if (cost !== null) opusCostSum += cost
+      opusExpectedSum += expected
+    }
+
+    // Heuristic: flag rows where upstream+cost > 1.5x cost alone (likely double-count)
+    if (cost !== null && upstream !== null && upstream > 0.5 * cost) {
+      doubleCountHits++
+    }
+  }
+
+  console.log('')
+  console.log('─────────────── Summary ───────────────')
+  console.log(`Total rows:                      ${rows.length}`)
+  console.log(
+    `Rows with non-zero upstream:     ${upstreamPopulatedCount} / ${rows.length}`,
+  )
+  console.log(`Σ cost (billed):                 ${fmtCost(totalCost)}`)
+  console.log(`Σ upstream_inference_cost:       ${fmtCost(totalUpstream)}`)
+  console.log(`Σ cost + upstream:               ${fmtCost(totalCost + totalUpstream)}`)
+
+  if (opusExpectedSum > 0) {
+    console.log('')
+    console.log('─── Opus-only comparison ───')
+    console.log(`Σ actual cost (opus rows):       ${fmtCost(opusCostSum)}`)
+    console.log(`Σ expected (Opus 4.6/4.7 list):  ${fmtCost(opusExpectedSum)}`)
+    console.log(
+      `Actual / expected ratio:         ${(opusCostSum / opusExpectedSum).toFixed(
+        2,
+      )}x`,
+    )
+    console.log(
+      '  (If ≈2.0x → double-count confirmed. If ≈1.0x → cost is accurate.)',
+    )
+  }
+
+  console.log('')
+  console.log(
+    `Rows flagged as likely double-count (upstream > 0.5 × cost): ${doubleCountHits}`,
+  )
+  console.log('')
+  console.log(
+    'Hypothesis check: in web/src/llm-api/openrouter.ts#extractUsageAndCost,',
+  )
+  console.log(
+    'we do `cost = openRouterCost + upstreamCost`. If upstream is routinely',
+  )
+  console.log(
+    'populated (not 0/null) for non-BYOK rows, that addition double-counts.',
+  )
+}
+
+main()
+  .then(() => process.exit(0))
+  .catch((err) => {
+    console.error('Error:', err)
+    process.exit(1)
+  })

From b61ef28e9ec4cf3664addd027953b026a4a832ad Mon Sep 17 00:00:00 2001
From: James Grugett <jahooma@gmail.com>
Date: Thu, 16 Apr 2026 18:58:17 -0700
Subject: [PATCH 2/3] Fix free-tier credit overdraw and
 consumeFromOrderedGrants debt accounting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Root-cause fix in consumeFromOrderedGrants (packages/billing/src/balance-calculator.ts):
- Removed buggy "repay debt" first pass that treated consumption as credit addition
  (grant.balance + repayAmount), shrinking debt during spending. This caused every
  other post-exhaustion message to get free compute.
- Mutate grant.balance in-memory in the consume loop so the overflow check sees
  post-consumption state (previously stale, dropped overflow credits silently).
- Unconditionally create/extend debt on the last grant when remainingToConsume > 0
  (previously guarded by lastGrant.balance <= 0 using stale in-memory value).

Hard gate (defense-in-depth): added shouldBlockFreeUserOverdraw() and wired it into
consumeCreditsAndAddAgentStep. Free-tier users (no purchase/subscription grant) with
netBalance < credits are refused before consume/message-insert. Throws typed
InsufficientCreditsError (netBalance, chargeAmount fields) inside the advisory-lock
tx so it rolls back cleanly and the outer catch returns failure(error).

These two layers are complementary, not redundant:
- Root-cause fix = correct accounting (debt deepens monotonically)
- Hard gate = policy enforcement (free tier can't go negative; only paying users
  can accumulate debt via the fixed consume path)

Debt-settlement model is split: consume path only deepens debt; grant path
(executeGrantCreditOperation in grant-credits.ts:134-154) is the ONLY place debt is
cleared, via the existing negativeGrants-zeroing logic that runs on every credit
addition (Stripe purchases, monthly resets, referrals, admin grants). Added
cross-reference comments in both files documenting this invariant.

Tests:
- 9 unit tests for shouldBlockFreeUserOverdraw (exhausted, insufficient, sufficient,
  subscription/purchase bypass, zero-charge, referral-only, debt, multi-grant)
- 6 regression tests for consumeFromOrderedGrants using write-capture mock tx:
  debt deepening, drain-and-overflow, no debt forgiveness, happy path, multi-grant
  priority, consumed tracks overflow
- 2 tests for InsufficientCreditsError class (instance + barrel export)
- Fixed createMockGrant type (added org_id, stripe_subscription_id, extended union)
- Updated local copy of consumeFromOrderedGrants in the real-DB integration test
  and renamed/rewrote the 'should repay debt...' test to 'should not forgive
  debt...' — the old test was codifying the bug as correct behavior.

Validation: typecheck clean on packages/billing; 28/28 balance-calculator unit
tests pass; 14/14 integration tests pass against real Postgres; 128/128 full
billing test suite green.

Impact: Apr-16 credit-farming cohort of 10 freshly-created accounts consumed
~\$18.4k of API compute (74% of daily burn) off 500-credit free grants. With this
fix, those accounts would have been refused after message ~6.
---
 .../balance-calculator.integration.test.ts    |  99 ++---
 .../src/__tests__/balance-calculator.test.ts  | 403 +++++++++++++++++-
 packages/billing/src/balance-calculator.ts    | 144 +++++--
 packages/billing/src/grant-credits.ts         |   7 +-
 4 files changed, 544 insertions(+), 109 deletions(-)

diff --git a/packages/billing/src/__tests__/balance-calculator.integration.test.ts b/packages/billing/src/__tests__/balance-calculator.integration.test.ts
index 5e9bac48a..28438c393 100644
--- a/packages/billing/src/__tests__/balance-calculator.integration.test.ts
+++ b/packages/billing/src/__tests__/balance-calculator.integration.test.ts
@@ -65,32 +65,7 @@ async function consumeFromOrderedGrants(params: {
   let consumed = 0
   let fromPurchased = 0
 
-  // First pass: try to repay any debt
-  for (const grant of grants) {
-    if (grant.balance < 0 && remainingToConsume > 0) {
-      const debtAmount = Math.abs(grant.balance)
-      const repayAmount = Math.min(debtAmount, remainingToConsume)
-      const newBalance = grant.balance + repayAmount
-      remainingToConsume -= repayAmount
-      consumed += repayAmount
-
-      await updateGrantBalance({
-        userId,
-        grant,
-        consumed: -repayAmount,
-        newBalance,
-        tx,
-        logger,
-      })
-
-      logger.debug(
-        { userId, grantId: grant.operation_id, repayAmount, newBalance },
-        'Repaid debt in grant',
-      )
-    }
-  }
-
-  // Second pass: consume from positive balances
+  // Consume from positive balances in priority order
   for (const grant of grants) {
     if (remainingToConsume <= 0) break
     if (grant.balance <= 0) continue
@@ -113,35 +88,41 @@ async function consumeFromOrderedGrants(params: {
       tx,
       logger,
     })
+
+    // Mutate in-memory balance so the overflow check below sees
+    // post-consumption state (not the stale original value).
+    grant.balance = newBalance
   }
 
-  // If we still have remaining to consume and no grants left, create debt in the last grant
+  // If we still have remaining to consume, create or extend debt on the
+  // last grant. After the loop above all positive-balance grants are drained.
+  // The "last grant" (lowest consumption priority, typically a subscription
+  // grant that renews monthly) absorbs the overflow as debt.
   if (remainingToConsume > 0 && grants.length > 0) {
     const lastGrant = grants[grants.length - 1]
+    const newBalance = lastGrant.balance - remainingToConsume
+
+    await updateGrantBalance({
+      userId,
+      grant: lastGrant,
+      consumed: remainingToConsume,
+      newBalance,
+      tx,
+      logger,
+    })
+    consumed += remainingToConsume
+    lastGrant.balance = newBalance
 
-    if (lastGrant.balance <= 0) {
-      const newBalance = lastGrant.balance - remainingToConsume
-      await updateGrantBalance({
+    logger.warn(
+      {
         userId,
-        grant: lastGrant,
+        grantId: lastGrant.operation_id,
+        requested: remainingToConsume,
         consumed: remainingToConsume,
-        newBalance,
-        tx,
-        logger,
-      })
-      consumed += remainingToConsume
-
-      logger.warn(
-        {
-          userId,
-          grantId: lastGrant.operation_id,
-          requested: remainingToConsume,
-          consumed: remainingToConsume,
-          newDebt: Math.abs(newBalance),
-        },
-        'Created new debt in grant',
-      )
-    }
+        newDebt: Math.abs(newBalance),
+      },
+      'Created/extended debt in grant',
+    )
   }
 
   return { consumed, fromPurchased }
@@ -789,7 +770,7 @@ describe('Balance Calculator - Integration Tests (Real DB)', () => {
       expect(grant3Balance).toBe(100) // Untouched
     })
 
-    it('should repay debt when consuming from grants with negative balance', async () => {
+    it('should not forgive debt when consuming from a positive grant (debt stays untouched)', async () => {
       const db = getTestDb()
       const now = new Date()
 
@@ -820,14 +801,10 @@ describe('Balance Calculator - Integration Tests (Real DB)', () => {
         conn: db,
       })
 
-      // Consume 80 credits
-      // The consumption algorithm works as follows:
-      // 1. First pass (debt repayment): Uses creditsToConsume to repay debt
-      //    - debt-grant has -50, repay 50 from the 80 requested, debt becomes 0
-      //    - remainingToConsume = 30, consumed = 50
-      // 2. Second pass (consumption): Consumes from positive balances
-      //    - positive-grant has 100, consume 30, becomes 70
-      //    - remainingToConsume = 0, consumed = 80
+      // Consume 80 credits.
+      // Consumption only drains positive balances. Debt grants are untouched.
+      // positive-grant (priority 10, consumed first): 100 - 80 = 20
+      // debt-grant (priority 60): stays at -50 (debt is NOT "repaid" by consumption)
       const result = await consumeFromOrderedGrants({
         userId: TEST_USER_ID,
         creditsToConsume: 80,
@@ -842,10 +819,10 @@ describe('Balance Calculator - Integration Tests (Real DB)', () => {
       const debtGrantBalance = await getGrantBalance('e2e-debt-grant')
       const positiveGrantBalance = await getGrantBalance('e2e-positive-grant')
 
-      // Debt should be repaid: -50 + 50 = 0
-      expect(debtGrantBalance).toBe(0)
-      // Positive grant: 100 - 30 (consume after debt repayment) = 70
-      expect(positiveGrantBalance).toBe(70)
+      // Debt must be untouched — consumption does not repay debt
+      expect(debtGrantBalance).toBe(-50)
+      // Positive grant: 100 - 80 = 20
+      expect(positiveGrantBalance).toBe(20)
     })
 
     it('should track purchased credits consumption correctly', async () => {
diff --git a/packages/billing/src/__tests__/balance-calculator.test.ts b/packages/billing/src/__tests__/balance-calculator.test.ts
index b56f10dc6..b4c526aca 100644
--- a/packages/billing/src/__tests__/balance-calculator.test.ts
+++ b/packages/billing/src/__tests__/balance-calculator.test.ts
@@ -21,12 +21,21 @@ function createMockGrant(overrides: {
   expires_at: Date | null
   created_at: Date
   principal?: number
-  type?: 'subscription' | 'purchase' | 'promotion' | 'organization' | 'referral'
+  type?:
+    | 'subscription'
+    | 'purchase'
+    | 'organization'
+    | 'referral'
+    | 'referral_legacy'
+    | 'free'
+    | 'admin'
+    | 'ad'
 }) {
   return {
     operation_id: overrides.operation_id,
     user_id: 'user-123',
-    organization_id: null,
+    org_id: null,
+    stripe_subscription_id: null,
     principal: overrides.principal ?? Math.max(overrides.balance, 100),
     balance: overrides.balance,
     type: overrides.type ?? ('subscription' as const),
@@ -395,6 +404,396 @@ describe('Balance Calculator - calculateUsageAndBalance', () => {
   })
 })
 
+describe('shouldBlockFreeUserOverdraw', () => {
+  afterEach(() => {
+    clearMockedModules()
+  })
+
+  async function importModule() {
+    await mockModule('@codebuff/internal/db', () => ({
+      default: {},
+    }))
+    await mockModule('@codebuff/common/analytics', () => ({
+      trackEvent: () => {},
+    }))
+    return import('@codebuff/billing/balance-calculator')
+  }
+
+  it('should block when exhausted free-tier user tries to consume', async () => {
+    const { shouldBlockFreeUserOverdraw } = await importModule()
+    expect(
+      shouldBlockFreeUserOverdraw([{ balance: 0, type: 'free' }], 100),
+    ).toBe(true)
+  })
+
+  it('should block when free-tier user balance is less than charge', async () => {
+    const { shouldBlockFreeUserOverdraw } = await importModule()
+    expect(
+      shouldBlockFreeUserOverdraw([{ balance: 50, type: 'free' }], 100),
+    ).toBe(true)
+  })
+
+  it('should not block when free-tier user has sufficient balance', async () => {
+    const { shouldBlockFreeUserOverdraw } = await importModule()
+    expect(
+      shouldBlockFreeUserOverdraw([{ balance: 500, type: 'free' }], 100),
+    ).toBe(false)
+  })
+
+  it('should not block when user has a subscription grant even with zero balance', async () => {
+    const { shouldBlockFreeUserOverdraw } = await importModule()
+    expect(
+      shouldBlockFreeUserOverdraw(
+        [
+          { balance: 0, type: 'free' },
+          { balance: 0, type: 'subscription' },
+        ],
+        100,
+      ),
+    ).toBe(false)
+  })
+
+  it('should not block when user has a purchase grant', async () => {
+    const { shouldBlockFreeUserOverdraw } = await importModule()
+    expect(
+      shouldBlockFreeUserOverdraw(
+        [
+          { balance: 0, type: 'free' },
+          { balance: 10, type: 'purchase' },
+        ],
+        100,
+      ),
+    ).toBe(false)
+  })
+
+  it('should not block when credits to charge is 0 (free-mode agent)', async () => {
+    const { shouldBlockFreeUserOverdraw } = await importModule()
+    expect(
+      shouldBlockFreeUserOverdraw([{ balance: 0, type: 'free' }], 0),
+    ).toBe(false)
+  })
+
+  it('should block referral-only user with insufficient credits', async () => {
+    const { shouldBlockFreeUserOverdraw } = await importModule()
+    expect(
+      shouldBlockFreeUserOverdraw([{ balance: 50, type: 'referral' }], 100),
+    ).toBe(true)
+  })
+
+  it('should block user in debt with no paid grants', async () => {
+    const { shouldBlockFreeUserOverdraw } = await importModule()
+    expect(
+      shouldBlockFreeUserOverdraw([{ balance: -100, type: 'free' }], 50),
+    ).toBe(true)
+  })
+
+  it('should aggregate balance across multiple unpaid grants', async () => {
+    const { shouldBlockFreeUserOverdraw } = await importModule()
+    // Total balance: 110, charge: 100 → not blocked
+    expect(
+      shouldBlockFreeUserOverdraw(
+        [
+          { balance: 30, type: 'free' },
+          { balance: 80, type: 'referral' },
+        ],
+        100,
+      ),
+    ).toBe(false)
+  })
+})
+
+describe('InsufficientCreditsError', () => {
+  afterEach(() => {
+    clearMockedModules()
+  })
+
+  async function importModule() {
+    await mockModule('@codebuff/internal/db', () => ({
+      default: {},
+    }))
+    await mockModule('@codebuff/common/analytics', () => ({
+      trackEvent: () => {},
+    }))
+    return import('@codebuff/billing/balance-calculator')
+  }
+
+  it('should be an instance of Error with the correct name and fields', async () => {
+    const { InsufficientCreditsError } = await importModule()
+    const err = new InsufficientCreditsError(-50, 200)
+    expect(err).toBeInstanceOf(Error)
+    expect(err).toBeInstanceOf(InsufficientCreditsError)
+    expect(err.name).toBe('InsufficientCreditsError')
+    expect(err.netBalance).toBe(-50)
+    expect(err.chargeAmount).toBe(200)
+    expect(err.message).toBe(
+      'Insufficient credits for free-tier user: balance=-50, charge=200',
+    )
+  })
+
+  it('should be exported from the billing barrel (@codebuff/billing)', async () => {
+    await mockModule('@codebuff/internal/db', () => ({
+      default: {},
+    }))
+    await mockModule('@codebuff/common/analytics', () => ({
+      trackEvent: () => {},
+    }))
+    const billing = await import('@codebuff/billing')
+    expect(typeof billing.InsufficientCreditsError).toBe('function')
+    const err = new billing.InsufficientCreditsError(0, 100)
+    expect(err).toBeInstanceOf(Error)
+    expect(err.name).toBe('InsufficientCreditsError')
+  })
+})
+
+describe('consumeFromOrderedGrants - credit consumption bugs', () => {
+  // Regression tests for two compounding bugs:
+  // 1. Pass 1 ("repay debt") was directionally wrong: consumption reduced debt instead of
+  //    deepening it, giving users free compute every other message after grant exhaustion.
+  // 2. Pass 3 used stale in-memory grant.balance, so drain-and-overflow silently dropped
+  //    the overflowing credits (no debt created, free compute).
+
+  afterEach(() => {
+    clearMockedModules()
+  })
+
+  /** Mock tx that captures the sequence of balance writes to the DB. */
+  function createWriteCaptureTx() {
+    const writes: number[] = []
+    const tx = {
+      update: () => ({
+        set: (values: { balance: number }) => ({
+          where: () => {
+            writes.push(values.balance)
+            return Promise.resolve()
+          },
+        }),
+      }),
+    }
+    return { tx, writes }
+  }
+
+  async function importModule() {
+    await mockModule('@codebuff/internal/db', () => ({
+      default: {},
+    }))
+    await mockModule('@codebuff/common/analytics', () => ({
+      trackEvent: () => {},
+    }))
+    return import('@codebuff/billing/balance-calculator')
+  }
+
+  it('should deepen debt (not repay it) when consuming from a grant already in debt', async () => {
+    // Bug 1 reproduction: pass 1 treated consumption as credit addition,
+    // reducing debt instead of deepening it. Every other post-exhaustion message
+    // was free compute.
+    const { consumeFromOrderedGrants } = await importModule()
+    const { tx, writes } = createWriteCaptureTx()
+
+    const grants = [
+      createMockGrant({
+        operation_id: 'debt-grant',
+        balance: -100,
+        principal: 500,
+        priority: 20,
+        type: 'free',
+        expires_at: null,
+        created_at: new Date(Date.now() - 10 * 24 * 60 * 60 * 1000),
+      }),
+    ]
+
+    const result = await consumeFromOrderedGrants({
+      userId: 'user-123',
+      creditsToConsume: 100,
+      grants,
+      logger,
+      tx: tx as any,
+    })
+
+    // Debt must deepen from -100 to -200 (not "repay" to 0)
+    expect(writes).toEqual([-200])
+    expect(result.consumed).toBe(100)
+  })
+
+  it('should create debt on overflow when draining a positive grant beyond its balance', async () => {
+    // Bug 2 reproduction: pass 3 checked lastGrant.balance <= 0 using the
+    // original (pre-drain) in-memory value. If a grant started positive and
+    // was drained to 0 in pass 2, the check saw the original positive value
+    // and skipped debt creation. The overflow credits were silently dropped.
+    const { consumeFromOrderedGrants } = await importModule()
+    const { tx, writes } = createWriteCaptureTx()
+
+    const grants = [
+      createMockGrant({
+        operation_id: 'single-grant',
+        balance: 500,
+        principal: 500,
+        priority: 20,
+        type: 'free',
+        expires_at: null,
+        created_at: new Date(Date.now() - 10 * 24 * 60 * 60 * 1000),
+      }),
+    ]
+
+    const result = await consumeFromOrderedGrants({
+      userId: 'user-123',
+      creditsToConsume: 600,
+      grants,
+      logger,
+      tx: tx as any,
+    })
+
+    // Grant drained to 0, then 100 overflow creates debt
+    expect(writes).toEqual([0, -100])
+    expect(result.consumed).toBe(600)
+  })
+
+  it('should not forgive debt on grants when consuming from a different positive grant', async () => {
+    // Combined bug: user has a debt grant (-50) and a positive grant (200).
+    // Bug 1 "repaid" the debt using 50 of the incoming consumption, then only
+    // charged 50 from the positive grant. Net: debt forgiven, user only charged
+    // 50 real credits for 100 credits of compute.
+    const { consumeFromOrderedGrants } = await importModule()
+    const { tx, writes } = createWriteCaptureTx()
+
+    const grants = [
+      createMockGrant({
+        operation_id: 'debt-free',
+        balance: -50,
+        principal: 500,
+        priority: 20,
+        type: 'free',
+        expires_at: null,
+        created_at: new Date(Date.now() - 20 * 24 * 60 * 60 * 1000),
+      }),
+      createMockGrant({
+        operation_id: 'positive-purchase',
+        balance: 200,
+        principal: 200,
+        priority: 80,
+        type: 'purchase',
+        expires_at: null,
+        created_at: new Date(Date.now() - 5 * 24 * 60 * 60 * 1000),
+      }),
+    ]
+
+    const result = await consumeFromOrderedGrants({
+      userId: 'user-123',
+      creditsToConsume: 100,
+      grants,
+      logger,
+      tx: tx as any,
+    })
+
+    // Debt grant must be untouched. All 100 consumed from purchase grant.
+    expect(writes).toEqual([100]) // Only one write: purchase 200 → 100
+    expect(result.consumed).toBe(100)
+    expect(result.fromPurchased).toBe(100)
+    // Debt grant balance unchanged
+    expect(grants[0].balance).toBe(-50)
+  })
+
+  it('should correctly consume from a positive grant without overflow (happy path)', async () => {
+    // Sanity check: basic consumption that never overflows should work identically.
+    const { consumeFromOrderedGrants } = await importModule()
+    const { tx, writes } = createWriteCaptureTx()
+
+    const grants = [
+      createMockGrant({
+        operation_id: 'healthy-grant',
+        balance: 500,
+        principal: 500,
+        priority: 20,
+        type: 'free',
+        expires_at: null,
+        created_at: new Date(Date.now() - 10 * 24 * 60 * 60 * 1000),
+      }),
+    ]
+
+    const result = await consumeFromOrderedGrants({
+      userId: 'user-123',
+      creditsToConsume: 100,
+      grants,
+      logger,
+      tx: tx as any,
+    })
+
+    expect(writes).toEqual([400])
+    expect(result.consumed).toBe(100)
+    expect(result.fromPurchased).toBe(0)
+  })
+
+  it('should consume across multiple positive grants in priority order', async () => {
+    const { consumeFromOrderedGrants } = await importModule()
+    const { tx, writes } = createWriteCaptureTx()
+
+    const grants = [
+      createMockGrant({
+        operation_id: 'sub-grant',
+        balance: 50,
+        principal: 50,
+        priority: 10,
+        type: 'subscription',
+        expires_at: new Date(Date.now() + 30 * 24 * 60 * 60 * 1000),
+        created_at: new Date(Date.now() - 20 * 24 * 60 * 60 * 1000),
+      }),
+      createMockGrant({
+        operation_id: 'purchase-grant',
+        balance: 200,
+        principal: 200,
+        priority: 80,
+        type: 'purchase',
+        expires_at: null,
+        created_at: new Date(Date.now() - 5 * 24 * 60 * 60 * 1000),
+      }),
+    ]
+
+    const result = await consumeFromOrderedGrants({
+      userId: 'user-123',
+      creditsToConsume: 150,
+      grants,
+      logger,
+      tx: tx as any,
+    })
+
+    // Sub drained (50→0), then 100 from purchase (200→100)
+    expect(writes).toEqual([0, 100])
+    expect(result.consumed).toBe(150)
+    expect(result.fromPurchased).toBe(100)
+  })
+
+  it('should track all consumed credits even when creating debt (consumed === creditsToConsume)', async () => {
+    // Before the fix, consumed was less than creditsToConsume on overflow:
+    // the overflow credits were silently dropped, so consumed only counted
+    // what was drained from positive balances.
+    const { consumeFromOrderedGrants } = await importModule()
+    const { tx, writes } = createWriteCaptureTx()
+
+    const grants = [
+      createMockGrant({
+        operation_id: 'small-grant',
+        balance: 30,
+        principal: 30,
+        priority: 20,
+        type: 'free',
+        expires_at: null,
+        created_at: new Date(Date.now() - 10 * 24 * 60 * 60 * 1000),
+      }),
+    ]
+
+    const result = await consumeFromOrderedGrants({
+      userId: 'user-123',
+      creditsToConsume: 200,
+      grants,
+      logger,
+      tx: tx as any,
+    })
+
+    // Drain 30, then 170 overflow as debt
+    expect(writes).toEqual([0, -170])
+    expect(result.consumed).toBe(200)
+  })
+})
+
 describe('Balance Calculator - Grant Ordering for Consumption', () => {
   // NOTE: This test suite uses a complex mock (createDbMockForUnion) to simulate the
   // behavior of the UNION query in `getOrderedActiveGrantsForConsumption`.
diff --git a/packages/billing/src/balance-calculator.ts b/packages/billing/src/balance-calculator.ts
index 1a2439f66..9d0352892 100644
--- a/packages/billing/src/balance-calculator.ts
+++ b/packages/billing/src/balance-calculator.ts
@@ -38,6 +38,45 @@ export interface CreditConsumptionResult {
   fromPurchased: number
 }
 
+/**
+ * Thrown when a free-tier user (no purchase or subscription grants)
+ * attempts to consume more credits than their balance allows.
+ */
+export class InsufficientCreditsError extends Error {
+  public readonly netBalance: number
+  public readonly chargeAmount: number
+
+  constructor(netBalance: number, chargeAmount: number) {
+    super(
+      `Insufficient credits for free-tier user: balance=${netBalance}, charge=${chargeAmount}`,
+    )
+    this.name = 'InsufficientCreditsError'
+    this.netBalance = netBalance
+    this.chargeAmount = chargeAmount
+  }
+}
+
+/**
+ * Hard gate: blocks a charge when a free-tier user (no purchase or subscription
+ * grants) would overdraw their credit balance. This prevents credit-farming
+ * abuse where users consume far more than their granted credits.
+ *
+ * Users with purchase or subscription grants are always allowed through
+ * (they have a payment relationship and can accumulate debt).
+ */
+export function shouldBlockFreeUserOverdraw(
+  grants: Array<{ balance: number; type: string }>,
+  credits: number,
+): boolean {
+  if (credits <= 0) return false
+  const hasPaidGrant = grants.some(
+    (g) => g.type === 'purchase' || g.type === 'subscription',
+  )
+  if (hasPaidGrant) return false
+  const netBalance = grants.reduce((sum, g) => sum + g.balance, 0)
+  return netBalance < credits
+}
+
 // Add a minimal structural type that both `db` and `tx` satisfy
 type DbConn = Pick<
   typeof db,
@@ -170,6 +209,14 @@ export async function updateGrantBalance(params: {
 
 /**
  * Consumes credits from a list of ordered grants.
+ *
+ * **Side effect:** mutates `grants[].balance` in-memory to reflect
+ * post-consumption state. Callers must not reuse the array afterward
+ * expecting original balances.
+ *
+ * **Debt model:** consumption never repays existing debt. Debt is only
+ * cleared in `grant-credits.ts` (`executeGrantCreditOperation`) when
+ * new credits are added. This function only deepens debt on overflow.
  */
 export async function consumeFromOrderedGrants(
   params: {
@@ -188,30 +235,9 @@ export async function consumeFromOrderedGrants(
   let consumed = 0
   let fromPurchased = 0
 
-  // First pass: try to repay any debt
-  for (const grant of grants) {
-    if (grant.balance < 0 && remainingToConsume > 0) {
-      const debtAmount = Math.abs(grant.balance)
-      const repayAmount = Math.min(debtAmount, remainingToConsume)
-      const newBalance = grant.balance + repayAmount
-      remainingToConsume -= repayAmount
-      consumed += repayAmount
-
-      await updateGrantBalance({
-        ...params,
-        grant,
-        consumed: -repayAmount,
-        newBalance,
-      })
-
-      logger.debug(
-        { userId, grantId: grant.operation_id, repayAmount, newBalance },
-        'Repaid debt in grant',
-      )
-    }
-  }
-
-  // Second pass: consume from positive balances
+  // Consume from positive balances in priority order.
+  // NOTE: debt grants (balance < 0) are skipped. Consumption never repays
+  // debt; that only happens via grant-credits.ts when new credits arrive.
   for (const grant of grants) {
     if (remainingToConsume <= 0) break
     if (grant.balance <= 0) continue
@@ -232,33 +258,39 @@ export async function consumeFromOrderedGrants(
       consumed: consumeFromThisGrant,
       newBalance,
     })
+
+    // Mutate in-memory balance so the overflow check below sees
+    // post-consumption state (not the stale original value).
+    grant.balance = newBalance
   }
 
-  // If we still have remaining to consume and no grants left, create debt in the last grant
+  // If we still have remaining to consume, create or extend debt on the
+  // last grant. After the loop above all positive-balance grants are drained.
+  // The "last grant" (lowest consumption priority, typically a subscription
+  // grant that renews monthly) absorbs the overflow as debt.
   if (remainingToConsume > 0 && grants.length > 0) {
     const lastGrant = grants[grants.length - 1]
+    const newBalance = lastGrant.balance - remainingToConsume
 
-    if (lastGrant.balance <= 0) {
-      const newBalance = lastGrant.balance - remainingToConsume
-      await updateGrantBalance({
-        ...params,
-        grant: lastGrant,
-        consumed: remainingToConsume,
-        newBalance,
-      })
-      consumed += remainingToConsume
+    await updateGrantBalance({
+      ...params,
+      grant: lastGrant,
+      consumed: remainingToConsume,
+      newBalance,
+    })
+    consumed += remainingToConsume
+    lastGrant.balance = newBalance
 
-      logger.warn(
-        {
-          userId,
-          grantId: lastGrant.operation_id,
-          requested: remainingToConsume,
-          consumed: remainingToConsume,
-          newDebt: Math.abs(newBalance),
-        },
-        'Created new debt in grant',
-      )
-    }
+    logger.warn(
+      {
+        userId,
+        grantId: lastGrant.operation_id,
+        requested: remainingToConsume,
+        consumed: remainingToConsume,
+        newDebt: Math.abs(newBalance),
+      },
+      'Created/extended debt in grant',
+    )
   }
 
   return { consumed, fromPurchased }
@@ -619,6 +651,28 @@ export async function consumeCreditsAndAddAgentStep(params: {
             throw new Error('No active grants found')
           }
 
+          // Hard gate: block free-tier users from overdrawing credits.
+          // This prevents credit-farming abuse where users with only free/referral
+          // grants consume far beyond their balance due to the debt-repay bug
+          // in consumeFromOrderedGrants.
+          // (BYOK path already broke out of this `consumeCredits:` block above.)
+          if (shouldBlockFreeUserOverdraw(activeGrants, credits)) {
+            const netBalance = activeGrants.reduce(
+              (sum, g) => sum + g.balance,
+              0,
+            )
+            logger.warn(
+              {
+                userId,
+                credits,
+                netBalance,
+                grantTypes: [...new Set(activeGrants.map((g) => g.type))],
+              },
+              'Blocked free-tier user from overdrawing credits',
+            )
+            throw new InsufficientCreditsError(netBalance, credits)
+          }
+
           phase = 'consume_credits'
           consumeResult = await consumeFromOrderedGrants({
             ...params,
diff --git a/packages/billing/src/grant-credits.ts b/packages/billing/src/grant-credits.ts
index be609c746..bb16b5167 100644
--- a/packages/billing/src/grant-credits.ts
+++ b/packages/billing/src/grant-credits.ts
@@ -139,7 +139,12 @@ async function executeGrantCreditOperation(params: {
 
   const now = new Date()
 
-  // First check for any negative balances
+  // First check for any negative balances.
+  // This is the ONLY place debt is cleared. The consume path
+  // (consumeFromOrderedGrants in balance-calculator.ts) only deepens
+  // debt on overflow; it never repays it. New credit grants zero out
+  // existing debt rows here and subtract the total debt from the
+  // granted amount.
   const negativeGrants = await tx
     .select()
     .from(schema.creditLedger)

From 2e34fc17a4b5fb9ef057e88ed3fdb785fd202339 Mon Sep 17 00:00:00 2001
From: James Grugett <jahooma@gmail.com>
Date: Thu, 16 Apr 2026 19:01:12 -0700
Subject: [PATCH 3/3] Tweak step prompt about loading skills

---
 agents/base2/base2.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts
index b4d05ca36..c20359d14 100644
--- a/agents/base2/base2.ts
+++ b/agents/base2/base2.ts
@@ -381,7 +381,7 @@ function buildImplementationStepPrompt({
   return buildArray(
     isMax &&
     `Keep working until the user's request is completely satisfied${!hasNoValidation ? ' and validated' : ''}, or until you require more information from the user.`,
-    'You must use the skill tool to load any potentially relevant skills.',
+    'Consider loading relevant skills with the skill tool if they might help with the current task. Do not reload skills that were already loaded earlier in this conversation.',
     isFree &&
     `Spawn the thinker-with-files-gemini agent for complex problems, not routine edits. Pass the relevant filePaths.`,
     isMax &&