From 1c294a0a1bd3e7d0b1a821347d7acc7dde97bffb Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sun, 19 Apr 2026 13:50:29 -0700 Subject: [PATCH 1/9] Better suspended message --- web/src/app/api/v1/chat/completions/_post.ts | 150 +++++++++---------- 1 file changed, 75 insertions(+), 75 deletions(-) diff --git a/web/src/app/api/v1/chat/completions/_post.ts b/web/src/app/api/v1/chat/completions/_post.ts index 85e10437a..0e565ff28 100644 --- a/web/src/app/api/v1/chat/completions/_post.ts +++ b/web/src/app/api/v1/chat/completions/_post.ts @@ -260,7 +260,7 @@ export async function postChatCompletions(params: { return NextResponse.json( { error: 'account_suspended', - message: `Your account has been suspended due to billing issues. Please contact ${env.NEXT_PUBLIC_SUPPORT_EMAIL} to resolve this.`, + message: `Your account has been suspended. Please contact ${env.NEXT_PUBLIC_SUPPORT_EMAIL} if you did not expect this.`, }, { status: 403 }, ) @@ -468,19 +468,19 @@ export async function postChatCompletions(params: { if (ensureSubscriberBlockGrant) { try { const blockGrantResult = await ensureSubscriberBlockGrant({ userId, logger }) - + // Check if user hit subscription limit and should be rate-limited if (blockGrantResult && (isWeeklyLimitError(blockGrantResult) || isBlockExhaustedError(blockGrantResult))) { // Fetch user's preference for falling back to a-la-carte credits const preferences = getUserPreferences ? await getUserPreferences({ userId, logger }) : { fallbackToALaCarte: true } // Default to allowing a-la-carte if no preference function - + if (!preferences.fallbackToALaCarte && !isFreeModeRequest) { const resetTime = blockGrantResult.resetsAt const resetCountdown = formatQuotaResetCountdown(resetTime.toISOString()) const limitType = isWeeklyLimitError(blockGrantResult) ? 'weekly' : '5-hour session' - + trackEvent({ event: AnalyticsEvent.CHAT_COMPLETIONS_INSUFFICIENT_CREDITS, userId, @@ -491,7 +491,7 @@ export async function postChatCompletions(params: { }, logger, }) - + return NextResponse.json( { error: 'rate_limit_exceeded', @@ -553,54 +553,54 @@ export async function postChatCompletions(params: { const useOpenAIDirect = !useFireworks && isOpenAIDirectModel(typedBody.model) const stream = useSiliconFlow ? await handleSiliconFlowStream({ - body: typedBody, - userId, - stripeCustomerId, - agentId, - fetch, - logger, - insertMessageBigquery, - }) + body: typedBody, + userId, + stripeCustomerId, + agentId, + fetch, + logger, + insertMessageBigquery, + }) : useCanopyWave - ? await handleCanopyWaveStream({ - body: typedBody, - userId, - stripeCustomerId, - agentId, - fetch, - logger, - insertMessageBigquery, - }) - : useFireworks - ? await handleFireworksStream({ - body: typedBody, - userId, - stripeCustomerId, - agentId, - fetch, - logger, - insertMessageBigquery, - }) - : useOpenAIDirect - ? await handleOpenAIStream({ - body: typedBody, - userId, - stripeCustomerId, - agentId, - fetch, - logger, - insertMessageBigquery, - }) - : await handleOpenRouterStream({ + ? await handleCanopyWaveStream({ body: typedBody, userId, stripeCustomerId, agentId, - openrouterApiKey, fetch, logger, insertMessageBigquery, }) + : useFireworks + ? await handleFireworksStream({ + body: typedBody, + userId, + stripeCustomerId, + agentId, + fetch, + logger, + insertMessageBigquery, + }) + : useOpenAIDirect + ? await handleOpenAIStream({ + body: typedBody, + userId, + stripeCustomerId, + agentId, + fetch, + logger, + insertMessageBigquery, + }) + : await handleOpenRouterStream({ + body: typedBody, + userId, + stripeCustomerId, + agentId, + openrouterApiKey, + fetch, + logger, + insertMessageBigquery, + }) trackEvent({ event: AnalyticsEvent.CHAT_COMPLETIONS_STREAM_STARTED, @@ -631,26 +631,16 @@ export async function postChatCompletions(params: { const nonStreamRequest = useSiliconFlow ? handleSiliconFlowNonStream({ - body: typedBody, - userId, - stripeCustomerId, - agentId, - fetch, - logger, - insertMessageBigquery, - }) + body: typedBody, + userId, + stripeCustomerId, + agentId, + fetch, + logger, + insertMessageBigquery, + }) : useCanopyWave - ? handleCanopyWaveNonStream({ - body: typedBody, - userId, - stripeCustomerId, - agentId, - fetch, - logger, - insertMessageBigquery, - }) - : useFireworks - ? handleFireworksNonStream({ + ? handleCanopyWaveNonStream({ body: typedBody, userId, stripeCustomerId, @@ -659,26 +649,36 @@ export async function postChatCompletions(params: { logger, insertMessageBigquery, }) - : shouldUseOpenAIEndpoint - ? handleOpenAINonStream({ - body: typedBody, - userId, - stripeCustomerId, - agentId, - fetch, - logger, - insertMessageBigquery, - }) - : handleOpenRouterNonStream({ + : useFireworks + ? handleFireworksNonStream({ body: typedBody, userId, stripeCustomerId, agentId, - openrouterApiKey, fetch, logger, insertMessageBigquery, }) + : shouldUseOpenAIEndpoint + ? handleOpenAINonStream({ + body: typedBody, + userId, + stripeCustomerId, + agentId, + fetch, + logger, + insertMessageBigquery, + }) + : handleOpenRouterNonStream({ + body: typedBody, + userId, + stripeCustomerId, + agentId, + openrouterApiKey, + fetch, + logger, + insertMessageBigquery, + }) const result = await nonStreamRequest trackEvent({ From b01d2e3aaaf562dc713f5362af321e11ee9a2b40 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sun, 19 Apr 2026 14:41:19 -0700 Subject: [PATCH 2/9] Admit users by p90 of prefil queue time instead of p50 --- .../__tests__/fireworks-health.test.ts | 22 +++++++++-------- .../server/free-session/fireworks-health.ts | 24 ++++++++++--------- 2 files changed, 25 insertions(+), 21 deletions(-) diff --git a/web/src/server/free-session/__tests__/fireworks-health.test.ts b/web/src/server/free-session/__tests__/fireworks-health.test.ts index 6120731cf..3475769cd 100644 --- a/web/src/server/free-session/__tests__/fireworks-health.test.ts +++ b/web/src/server/free-session/__tests__/fireworks-health.test.ts @@ -3,7 +3,7 @@ import { describe, expect, test } from 'bun:test' import { KV_BLOCKS_DEGRADED_FRACTION, KV_BLOCKS_UNHEALTHY_FRACTION, - PREFILL_QUEUE_DEGRADED_MS, + PREFILL_QUEUE_P90_DEGRADED_MS, classify, } from '../fireworks-health' @@ -19,20 +19,22 @@ function kvBlocks(value: number): PromSample { } } -/** Emit a minimal cumulative-counts histogram for prefill queue where every - * event lands in exactly one bucket `le`. */ -function prefillQueueBuckets(p50Ms: number): PromSample[] { +/** Emit a cumulative-counts histogram for prefill queue where the p90 + * percentile falls in the bucket with le ≥ p90Ms (i.e. p90 ≥ p90Ms). + * Uses 10 total events all landing in that bucket, so the 90th-percentile + * interpolates within the bucket above the bucket boundary. */ +function prefillQueueBuckets(p90Ms: number): PromSample[] { const les = [50, 150, 300, 500, 750, 1000, 1500, 3000, 5000, 7500, 10000] const name = 'latency_prefill_queue_ms_bucket:sum_by_deployment' - // cumulative count = 0 below p50, 1 at and above p50 + const total = 10 return les.map((le) => ({ name, labels: { deployment_id: DEPLOY, le: String(le) }, - value: le >= p50Ms ? 1 : 0, + value: le >= p90Ms ? total : 0, })).concat({ name, labels: { deployment_id: DEPLOY, le: '+Inf' }, - value: 1, + value: total, }) } @@ -58,10 +60,10 @@ describe('fireworks health classifier', () => { expect(classify(samples, [DEPLOY])).toBe('healthy') }) - test('degraded when prefill queue p50 exceeds the threshold', () => { + test('degraded when prefill queue p90 exceeds the threshold', () => { const samples: PromSample[] = [ kvBlocks(0.5), - ...prefillQueueBuckets(PREFILL_QUEUE_DEGRADED_MS + 500), + ...prefillQueueBuckets(PREFILL_QUEUE_P90_DEGRADED_MS + 500), ] expect(classify(samples, [DEPLOY])).toBe('degraded') }) @@ -110,7 +112,7 @@ describe('fireworks health classifier', () => { const other = 'other123' const samples: PromSample[] = [ kvBlocks(0.5), - ...prefillQueueBuckets(PREFILL_QUEUE_DEGRADED_MS + 500), + ...prefillQueueBuckets(PREFILL_QUEUE_P90_DEGRADED_MS + 500), { name: 'generator_kv_blocks_fraction:avg_by_deployment', labels: { deployment_id: other }, diff --git a/web/src/server/free-session/fireworks-health.ts b/web/src/server/free-session/fireworks-health.ts index c102e721c..7d8e115e4 100644 --- a/web/src/server/free-session/fireworks-health.ts +++ b/web/src/server/free-session/fireworks-health.ts @@ -1,5 +1,6 @@ -import { FIREWORKS_ACCOUNT_ID, FIREWORKS_DEPLOYMENT_MAP } from '@/llm-api/fireworks-config' import { env } from '@codebuff/internal/env' + +import { FIREWORKS_ACCOUNT_ID, FIREWORKS_DEPLOYMENT_MAP } from '@/llm-api/fireworks-config' import { logger } from '@/util/logger' /** @@ -15,13 +16,14 @@ import { logger } from '@/util/logger' */ export type FireworksHealth = 'healthy' | 'degraded' | 'unhealthy' -/** Degrade once median prefill-queue latency crosses this bound. Strict by - * design — a 1s queue on top of ~1s prefill already means users feel 2s+ - * before first token. */ -export const PREFILL_QUEUE_DEGRADED_MS = 125 +/** Degrade once p90 prefill-queue latency crosses this bound. Using p90 + * instead of p50 gives a better early-warning signal — the tail starts + * rising before the median does, so we can halt admission before most + * users feel it. */ +export const PREFILL_QUEUE_P90_DEGRADED_MS = 1000 /** Leading indicator of load — responds instantly to memory pressure, while - * prefill-queue p50 is a lagging window statistic. Degrading here lets us + * prefill-queue p90 is a lagging window statistic. Degrading here lets us * halt admission *before* users feel it. */ export const KV_BLOCKS_DEGRADED_FRACTION = 0.8 @@ -160,16 +162,16 @@ function classifyOne(samples: PromSample[], deploymentId: string): FireworksHeal return 'unhealthy' } - const p50 = histogramPercentile( + const p90 = histogramPercentile( samples, 'latency_prefill_queue_ms_bucket:sum_by_deployment', deploymentId, - 50, + 90, ) - if (p50 !== undefined && p50 > PREFILL_QUEUE_DEGRADED_MS) { + if (p90 !== undefined && p90 > PREFILL_QUEUE_P90_DEGRADED_MS) { logger.info( - { deploymentId, prefillQueueP50Ms: Math.round(p50), kvBlocks }, - '[FireworksHealth] degraded: prefill queue p50 over threshold', + { deploymentId, prefillQueueP90Ms: Math.round(p90), kvBlocks }, + '[FireworksHealth] degraded: prefill queue p90 over threshold', ) return 'degraded' } From 21d5dd3c8aad3b8d602c549bef800a67d3a6f604 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sun, 19 Apr 2026 15:17:22 -0700 Subject: [PATCH 3/9] Add .claude/settings.json with auto permission mode (#513) Co-authored-by: Claude Opus 4.6 --- .claude/settings.json | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .claude/settings.json diff --git a/.claude/settings.json b/.claude/settings.json new file mode 100644 index 000000000..9b82e92e3 --- /dev/null +++ b/.claude/settings.json @@ -0,0 +1,5 @@ +{ + "permissions": { + "defaultMode": "auto" + } +} From 6befd5193fc7679c0da7038779a8181153118b26 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sun, 19 Apr 2026 15:13:22 -0700 Subject: [PATCH 4/9] Allow team@codebuff.com to bypass waiting room --- web/src/app/api/v1/chat/completions/_post.ts | 6 +++- .../app/api/v1/freebuff/session/_handlers.ts | 16 ++++++--- .../free-session/__tests__/public-api.test.ts | 23 +++++++++++++ web/src/server/free-session/config.ts | 12 +++++++ web/src/server/free-session/public-api.ts | 33 ++++++++++++++++--- 5 files changed, 81 insertions(+), 9 deletions(-) diff --git a/web/src/app/api/v1/chat/completions/_post.ts b/web/src/app/api/v1/chat/completions/_post.ts index 0e565ff28..c9b616846 100644 --- a/web/src/app/api/v1/chat/completions/_post.ts +++ b/web/src/app/api/v1/chat/completions/_post.ts @@ -413,7 +413,11 @@ export async function postChatCompletions(params: { if (isFreeModeRequest) { const claimedInstanceId = typedBody.codebuff_metadata?.freebuff_instance_id - const gate = await checkSession({ userId, claimedInstanceId }) + const gate = await checkSession({ + userId, + userEmail: userInfo.email, + claimedInstanceId, + }) if (!gate.ok) { trackEvent({ event: AnalyticsEvent.CHAT_COMPLETIONS_VALIDATION_ERROR, diff --git a/web/src/app/api/v1/freebuff/session/_handlers.ts b/web/src/app/api/v1/freebuff/session/_handlers.ts index 54157c0b8..5bed8e9c9 100644 --- a/web/src/app/api/v1/freebuff/session/_handlers.ts +++ b/web/src/app/api/v1/freebuff/session/_handlers.ts @@ -22,7 +22,9 @@ export interface FreebuffSessionDeps { sessionDeps?: SessionDeps } -type AuthResult = { error: NextResponse } | { userId: string } +type AuthResult = + | { error: NextResponse } + | { userId: string; userEmail: string | null } async function resolveUser(req: NextRequest, deps: FreebuffSessionDeps): Promise { const apiKey = extractApiKeyFromHeader(req) @@ -39,7 +41,7 @@ async function resolveUser(req: NextRequest, deps: FreebuffSessionDeps): Promise } const userInfo = await deps.getUserInfoFromApiKey({ apiKey, - fields: ['id'], + fields: ['id', 'email'], logger: deps.logger, }) if (!userInfo?.id) { @@ -50,7 +52,7 @@ async function resolveUser(req: NextRequest, deps: FreebuffSessionDeps): Promise ), } } - return { userId: String(userInfo.id) } + return { userId: String(userInfo.id), userEmail: userInfo.email ?? null } } function serverError( @@ -96,6 +98,7 @@ export async function postFreebuffSession( try { const state = await requestSession({ userId: auth.userId, + userEmail: auth.userEmail, deps: deps.sessionDeps, }) return NextResponse.json(state, { status: 200 }) @@ -118,6 +121,7 @@ export async function getFreebuffSession( const claimedInstanceId = req.headers.get(FREEBUFF_INSTANCE_HEADER) ?? undefined const state = await getSessionState({ userId: auth.userId, + userEmail: auth.userEmail, claimedInstanceId, deps: deps.sessionDeps, }) @@ -142,7 +146,11 @@ export async function deleteFreebuffSession( if ('error' in auth) return auth.error try { - await endUserSession({ userId: auth.userId, deps: deps.sessionDeps }) + await endUserSession({ + userId: auth.userId, + userEmail: auth.userEmail, + deps: deps.sessionDeps, + }) return NextResponse.json({ status: 'ended' }, { status: 200 }) } catch (error) { return serverError(deps, 'DELETE', auth.userId, error) diff --git a/web/src/server/free-session/__tests__/public-api.test.ts b/web/src/server/free-session/__tests__/public-api.test.ts index df34b7556..b19f24ea0 100644 --- a/web/src/server/free-session/__tests__/public-api.test.ts +++ b/web/src/server/free-session/__tests__/public-api.test.ts @@ -281,6 +281,29 @@ describe('checkSessionAdmissible', () => { expect(result.code).toBe('waiting_room_required') }) + test('bypassed email (team@codebuff.com) → ok with reason=disabled, no DB read', async () => { + const result = await checkSessionAdmissible({ + userId: 'u1', + userEmail: 'team@codebuff.com', + claimedInstanceId: undefined, + deps, + }) + expect(result.ok).toBe(true) + if (!result.ok) throw new Error('unreachable') + expect(result.reason).toBe('disabled') + expect(deps.rows.size).toBe(0) + }) + + test('bypassed email is case-insensitive', async () => { + const result = await checkSessionAdmissible({ + userId: 'u1', + userEmail: 'Team@Codebuff.COM', + claimedInstanceId: undefined, + deps, + }) + expect(result.ok).toBe(true) + }) + test('queued session → waiting_room_queued', async () => { await requestSession({ userId: 'u1', deps }) const result = await checkSessionAdmissible({ diff --git a/web/src/server/free-session/config.ts b/web/src/server/free-session/config.ts index 4e9e729c1..e70e1b5c6 100644 --- a/web/src/server/free-session/config.ts +++ b/web/src/server/free-session/config.ts @@ -16,6 +16,18 @@ export function isWaitingRoomEnabled(): boolean { return env.FREEBUFF_WAITING_ROOM_ENABLED } +/** Per-account override on top of the global kill switch. The internal + * `team@codebuff.com` account drives e2e tests in CI; landing it in the + * queue would make those tests flake whenever the waiting room is warm. + * Bypassed users behave exactly as if the waiting room were disabled. */ +const WAITING_ROOM_BYPASS_EMAILS = new Set(['team@codebuff.com']) +export function isWaitingRoomBypassedForEmail( + email: string | null | undefined, +): boolean { + if (!email) return false + return WAITING_ROOM_BYPASS_EMAILS.has(email.toLowerCase()) +} + export function getSessionLengthMs(): number { return env.FREEBUFF_SESSION_LENGTH_MS } diff --git a/web/src/server/free-session/public-api.ts b/web/src/server/free-session/public-api.ts index 759a516d7..74af009cc 100644 --- a/web/src/server/free-session/public-api.ts +++ b/web/src/server/free-session/public-api.ts @@ -1,5 +1,6 @@ import { getSessionGraceMs, + isWaitingRoomBypassedForEmail, isWaitingRoomEnabled, } from './config' import { @@ -79,10 +80,16 @@ async function viewForRow( */ export async function requestSession(params: { userId: string + userEmail?: string | null | undefined deps?: SessionDeps }): Promise { const deps = params.deps ?? defaultDeps - if (!deps.isWaitingRoomEnabled()) return { status: 'disabled' } + if ( + !deps.isWaitingRoomEnabled() || + isWaitingRoomBypassedForEmail(params.userEmail) + ) { + return { status: 'disabled' } + } const row = await deps.joinOrTakeOver({ userId: params.userId, now: nowOf(deps) }) const view = await viewForRow(params.userId, deps, row) @@ -109,11 +116,17 @@ export async function requestSession(params: { */ export async function getSessionState(params: { userId: string + userEmail?: string | null | undefined claimedInstanceId?: string | null | undefined deps?: SessionDeps }): Promise { const deps = params.deps ?? defaultDeps - if (!deps.isWaitingRoomEnabled()) return { status: 'disabled' } + if ( + !deps.isWaitingRoomEnabled() || + isWaitingRoomBypassedForEmail(params.userEmail) + ) { + return { status: 'disabled' } + } const row = await deps.getSessionRow(params.userId) if (!row) return { status: 'none' } @@ -132,10 +145,16 @@ export async function getSessionState(params: { export async function endUserSession(params: { userId: string + userEmail?: string | null | undefined deps?: SessionDeps }): Promise { const deps = params.deps ?? defaultDeps - if (!deps.isWaitingRoomEnabled()) return + if ( + !deps.isWaitingRoomEnabled() || + isWaitingRoomBypassedForEmail(params.userEmail) + ) { + return + } await deps.endSession(params.userId) } @@ -169,11 +188,17 @@ export type SessionGateResult = */ export async function checkSessionAdmissible(params: { userId: string + userEmail?: string | null | undefined claimedInstanceId: string | null | undefined deps?: SessionDeps }): Promise { const deps = params.deps ?? defaultDeps - if (!deps.isWaitingRoomEnabled()) return { ok: true, reason: 'disabled' } + if ( + !deps.isWaitingRoomEnabled() || + isWaitingRoomBypassedForEmail(params.userEmail) + ) { + return { ok: true, reason: 'disabled' } + } // Pre-waiting-room CLIs never send a freebuff_instance_id. Classify that up // front so the caller gets a distinct code (→ 426 Upgrade Required) and the From 27cbb1086e27d4186b4648b0d9e0a91265a3fad3 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sun, 19 Apr 2026 15:56:41 -0700 Subject: [PATCH 5/9] Estimate waiting room wait as 24 seconds per spot ahead (#516) --- web/src/server/free-session/__tests__/session-view.test.ts | 2 +- web/src/server/free-session/session-view.ts | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/web/src/server/free-session/__tests__/session-view.test.ts b/web/src/server/free-session/__tests__/session-view.test.ts index b3bdade6a..681072b30 100644 --- a/web/src/server/free-session/__tests__/session-view.test.ts +++ b/web/src/server/free-session/__tests__/session-view.test.ts @@ -4,7 +4,7 @@ import { estimateWaitMs, toSessionStateResponse } from '../session-view' import type { InternalSessionRow } from '../types' -const WAIT_PER_SPOT_MS = 60_000 +const WAIT_PER_SPOT_MS = 24_000 const GRACE_MS = 30 * 60_000 function row(overrides: Partial = {}): InternalSessionRow { diff --git a/web/src/server/free-session/session-view.ts b/web/src/server/free-session/session-view.ts index 7ce1f75fe..582e78814 100644 --- a/web/src/server/free-session/session-view.ts +++ b/web/src/server/free-session/session-view.ts @@ -59,10 +59,10 @@ export function toSessionStateResponse(params: { return null } -const WAIT_MS_PER_SPOT_AHEAD = 60_000 +const WAIT_MS_PER_SPOT_AHEAD = 24_000 /** - * Rough wait-time estimate shown to queued users: one minute per spot ahead. + * Rough wait-time estimate shown to queued users: 24 seconds per spot ahead. * Position 1 → 0ms (next tick picks you up). */ export function estimateWaitMs(params: { position: number }): number { From 76086297356ffc2c0e3716dfb9bd97f765ce7034 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sun, 19 Apr 2026 15:57:33 -0700 Subject: [PATCH 6/9] Keep freebuff session alive when browsing /history (#515) --- cli/src/app.tsx | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/cli/src/app.tsx b/cli/src/app.tsx index 5c93cd8f6..616e7b890 100644 --- a/cli/src/app.tsx +++ b/cli/src/app.tsx @@ -285,17 +285,6 @@ export const App = ({ ) } - // Render chat history screen when requested - if (showChatHistory) { - return ( - - ) - } - // Use key to force remount when resuming a different chat from history const chatKey = resumeChatId ?? 'current' @@ -316,6 +305,10 @@ export const App = ({ initialMode={initialMode} gitRoot={gitRoot} onSwitchToGitRoot={handleSwitchToGitRoot} + showChatHistory={showChatHistory} + onSelectChat={handleResumeChat} + onCancelChatHistory={closeChatHistory} + onNewChat={handleNewChat} /> ) } @@ -336,6 +329,10 @@ interface AuthedSurfaceProps { initialMode: AgentMode | undefined gitRoot: string | null | undefined onSwitchToGitRoot: () => void + showChatHistory: boolean + onSelectChat: (chatId: string) => void + onCancelChatHistory: () => void + onNewChat: () => void } /** @@ -359,6 +356,10 @@ const AuthedSurface = ({ initialMode, gitRoot, onSwitchToGitRoot, + showChatHistory, + onSelectChat, + onCancelChatHistory, + onNewChat, }: AuthedSurfaceProps) => { const { session, error: sessionError } = useFreebuffSession() @@ -388,6 +389,20 @@ const AuthedSurface = ({ return } + // Chat history renders inside AuthedSurface so the freebuff session stays + // mounted while the user browses history. Unmounting this surface would + // DELETE the session row and drop the user back into the waiting room on + // return. + if (showChatHistory) { + return ( + + ) + } + return ( Date: Sun, 19 Apr 2026 16:45:20 -0700 Subject: [PATCH 7/9] Preserve in-progress message history when agent run errors (#517) --- .../src/__tests__/main-prompt.test.ts | 3 +- packages/agent-runtime/src/run-agent-step.ts | 30 +- .../run-error-preserves-history.test.ts | 315 ++++++++++++++++++ sdk/src/run.ts | 19 +- 4 files changed, 354 insertions(+), 13 deletions(-) create mode 100644 sdk/src/__tests__/run-error-preserves-history.test.ts diff --git a/packages/agent-runtime/src/__tests__/main-prompt.test.ts b/packages/agent-runtime/src/__tests__/main-prompt.test.ts index 17b4f99e1..f68e13147 100644 --- a/packages/agent-runtime/src/__tests__/main-prompt.test.ts +++ b/packages/agent-runtime/src/__tests__/main-prompt.test.ts @@ -375,6 +375,7 @@ describe('mainPrompt', () => { it('should update consecutiveAssistantMessages when new prompt is received', async () => { const sessionState = getInitialSessionState(mockFileContext) sessionState.mainAgentState.stepsRemaining = 12 + const initialStepsRemaining = sessionState.mainAgentState.stepsRemaining const action = { type: 'prompt' as const, @@ -394,7 +395,7 @@ describe('mainPrompt', () => { // When there's a new prompt, consecutiveAssistantMessages should be set to 1 expect(newSessionState.mainAgentState.stepsRemaining).toBe( - sessionState.mainAgentState.stepsRemaining - 1, + initialStepsRemaining - 1, ) }) diff --git a/packages/agent-runtime/src/run-agent-step.ts b/packages/agent-runtime/src/run-agent-step.ts index 704cedf3a..4b8267033 100644 --- a/packages/agent-runtime/src/run-agent-step.ts +++ b/packages/agent-runtime/src/run-agent-step.ts @@ -536,6 +536,17 @@ export const runAgentStep = async ( } } +/** + * Runs the agent loop. + * + * IMPORTANT: This function mutates `params.agentState` in place throughout the + * run (not just at return time). Fields like `messageHistory`, `systemPrompt`, + * `toolDefinitions`, `creditsUsed`, and `output` are updated as work progresses + * so that callers holding a reference to the same object (e.g. the SDK's + * `sessionState.mainAgentState`) see in-progress work immediately — which + * matters when an error is thrown mid-run and the normal return path is + * skipped. + */ export async function loopAgentSteps( params: { addAgentStep: AddAgentStepFn @@ -800,12 +811,13 @@ export async function loopAgentSteps( return cachedAdditionalToolDefinitions } - let currentAgentState: AgentState = { - ...initialAgentState, - messageHistory: initialMessages, - systemPrompt: system, - toolDefinitions, - } + // Mutate initialAgentState so that in-progress work propagates back to the + // caller's shared reference (e.g. SDK's sessionState.mainAgentState) even if + // an error is thrown before we return. + initialAgentState.messageHistory = initialMessages + initialAgentState.systemPrompt = system + initialAgentState.toolDefinitions = toolDefinitions + let currentAgentState: AgentState = initialAgentState // Convert tool definitions to Anthropic format for accurate token counting // Tool definitions are stored as { [name]: { description, inputSchema } } @@ -908,7 +920,8 @@ export async function loopAgentSteps( } = programmaticResult n = generateN - currentAgentState = programmaticAgentState + Object.assign(initialAgentState, programmaticAgentState) + currentAgentState = initialAgentState totalSteps = stepNumber shouldEndTurn = endTurn @@ -989,7 +1002,8 @@ export async function loopAgentSteps( logger.error('No runId found for agent state after finishing agent run') } - currentAgentState = newAgentState + Object.assign(initialAgentState, newAgentState) + currentAgentState = initialAgentState shouldEndTurn = llmShouldEndTurn nResponses = generatedResponses diff --git a/sdk/src/__tests__/run-error-preserves-history.test.ts b/sdk/src/__tests__/run-error-preserves-history.test.ts new file mode 100644 index 000000000..95b72ead2 --- /dev/null +++ b/sdk/src/__tests__/run-error-preserves-history.test.ts @@ -0,0 +1,315 @@ +import * as mainPromptModule from '@codebuff/agent-runtime/main-prompt' +import { getInitialSessionState } from '@codebuff/common/types/session-state' +import { getStubProjectFileContext } from '@codebuff/common/util/file' +import { assistantMessage, userMessage } from '@codebuff/common/util/messages' +import { afterEach, describe, expect, it, mock, spyOn } from 'bun:test' + +import { CodebuffClient } from '../client' +import * as databaseModule from '../impl/database' + +interface ToolCallContentBlock { + type: 'tool-call' + toolCallId: string + toolName: string + input: Record +} + +const setupDatabaseMocks = () => { + spyOn(databaseModule, 'getUserInfoFromApiKey').mockResolvedValue({ + id: 'user-123', + email: 'test@example.com', + discord_id: null, + referral_code: null, + stripe_customer_id: null, + banned: false, + created_at: new Date('2024-01-01T00:00:00Z'), + }) + spyOn(databaseModule, 'fetchAgentFromDatabase').mockResolvedValue(null) + spyOn(databaseModule, 'startAgentRun').mockResolvedValue('run-1') + spyOn(databaseModule, 'finishAgentRun').mockResolvedValue(undefined) + spyOn(databaseModule, 'addAgentStep').mockResolvedValue('step-1') +} + +describe('Error preserves in-progress message history', () => { + afterEach(() => { + mock.restore() + }) + + it('preserves in-progress assistant work on error (simulated via shared state mutation)', async () => { + setupDatabaseMocks() + + // Simulate the agent runtime: + // 1. Mutates the shared session state with the user message and partial work + // 2. Then throws due to a downstream timeout/service error + spyOn(mainPromptModule, 'callMainPrompt').mockImplementation( + async (params: Parameters[0]) => { + const mainAgentState = params.action.sessionState.mainAgentState + + // Match the real runtime's behavior: replace messageHistory with a new + // array that includes the user prompt as its first entry. The SDK + // detects runtime progress via reference inequality, so we must + // reassign the array rather than pushing into it. + mainAgentState.messageHistory = [ + ...mainAgentState.messageHistory, + { + role: 'user', + content: [{ type: 'text', text: 'Fix the bug in auth.ts' }], + tags: ['USER_PROMPT'], + }, + { + role: 'assistant', + content: [ + { type: 'text', text: 'Let me read the auth file first.' }, + { + type: 'tool-call', + toolCallId: 'read-1', + toolName: 'read_files', + input: { paths: ['auth.ts'] }, + } as ToolCallContentBlock, + ], + }, + { + role: 'tool', + toolCallId: 'read-1', + toolName: 'read_files', + content: [ + { + type: 'json', + value: [{ path: 'auth.ts', content: 'const auth = ...' }], + }, + ], + }, + { + role: 'assistant', + content: [ + { type: 'text', text: 'Found the issue, writing the fix now.' }, + { + type: 'tool-call', + toolCallId: 'write-1', + toolName: 'write_file', + input: { path: 'auth.ts', content: 'const auth = fixed' }, + } as ToolCallContentBlock, + ], + }, + { + role: 'tool', + toolCallId: 'write-1', + toolName: 'write_file', + content: [{ type: 'json', value: { file: 'auth.ts', message: 'File written' } }], + }, + ] + + // Now simulate a server timeout on the next LLM call + const timeoutError = new Error('Service Unavailable') as Error & { + statusCode: number + responseBody: string + } + timeoutError.statusCode = 503 + timeoutError.responseBody = JSON.stringify({ + message: 'Request timeout after 30s', + }) + throw timeoutError + }, + ) + + const client = new CodebuffClient({ apiKey: 'test-key' }) + const result = await client.run({ + agent: 'base2', + prompt: 'Fix the bug in auth.ts', + }) + + // Error output with correct status code + expect(result.output.type).toBe('error') + const errorOutput = result.output as { + type: 'error' + message: string + statusCode?: number + } + expect(errorOutput.statusCode).toBe(503) + + const history = result.sessionState!.mainAgentState.messageHistory + + // The user's prompt should appear exactly once + const userPromptMessages = history.filter( + (m) => + m.role === 'user' && + (m.content as Array<{ type: string; text?: string }>).some( + (c) => c.type === 'text' && c.text?.includes('Fix the bug'), + ), + ) + expect(userPromptMessages.length).toBe(1) + + // Assistant text messages from both steps should be preserved + const firstAssistantText = history.find( + (m) => + m.role === 'assistant' && + (m.content as Array<{ type: string; text?: string }>).some( + (c) => c.type === 'text' && c.text?.includes('read the auth file'), + ), + ) + expect(firstAssistantText).toBeDefined() + + const secondAssistantText = history.find( + (m) => + m.role === 'assistant' && + (m.content as Array<{ type: string; text?: string }>).some( + (c) => c.type === 'text' && c.text?.includes('writing the fix'), + ), + ) + expect(secondAssistantText).toBeDefined() + + // Both tool calls and both tool results should be preserved + const readToolCall = history.find( + (m) => + m.role === 'assistant' && + (m.content as Array<{ type: string; toolCallId?: string }>).some( + (c) => c.type === 'tool-call' && c.toolCallId === 'read-1', + ), + ) + expect(readToolCall).toBeDefined() + + const writeToolCall = history.find( + (m) => + m.role === 'assistant' && + (m.content as Array<{ type: string; toolCallId?: string }>).some( + (c) => c.type === 'tool-call' && c.toolCallId === 'write-1', + ), + ) + expect(writeToolCall).toBeDefined() + + const readToolResult = history.find( + (m) => m.role === 'tool' && m.toolCallId === 'read-1', + ) + expect(readToolResult).toBeDefined() + + const writeToolResult = history.find( + (m) => m.role === 'tool' && m.toolCallId === 'write-1', + ) + expect(writeToolResult).toBeDefined() + }) + + it('a subsequent run after error includes the preserved in-progress history', async () => { + setupDatabaseMocks() + + // Run 1: agent does some work then hits an error + spyOn(mainPromptModule, 'callMainPrompt').mockImplementation( + async (params: Parameters[0]) => { + const mainAgentState = params.action.sessionState.mainAgentState + + mainAgentState.messageHistory = [ + ...mainAgentState.messageHistory, + { + role: 'user', + content: [{ type: 'text', text: 'Investigate the login bug' }], + tags: ['USER_PROMPT'], + }, + assistantMessage('I found the problem in auth.ts on line 42.'), + { + role: 'assistant', + content: [ + { + type: 'tool-call', + toolCallId: 'read-login', + toolName: 'read_files', + input: { paths: ['login.ts'] }, + } as ToolCallContentBlock, + ], + }, + { + role: 'tool', + toolCallId: 'read-login', + toolName: 'read_files', + content: [{ type: 'json', value: [{ path: 'login.ts', content: 'login code' }] }], + }, + ] + + const error = new Error('Service Unavailable') as Error & { + statusCode: number + } + error.statusCode = 503 + throw error + }, + ) + + const client = new CodebuffClient({ apiKey: 'test-key' }) + const firstResult = await client.run({ + agent: 'base2', + prompt: 'Investigate the login bug', + }) + + expect(firstResult.output.type).toBe('error') + + // Run 2: use the failed run as previousRun + mock.restore() + setupDatabaseMocks() + + let historyReceivedByRuntime: unknown[] | undefined + spyOn(mainPromptModule, 'callMainPrompt').mockImplementation( + async (params: Parameters[0]) => { + const { sendAction, promptId } = params + historyReceivedByRuntime = [ + ...params.action.sessionState.mainAgentState.messageHistory, + ] + + const responseSessionState = getInitialSessionState( + getStubProjectFileContext(), + ) + responseSessionState.mainAgentState.messageHistory = [ + ...params.action.sessionState.mainAgentState.messageHistory, + userMessage('Now try again'), + assistantMessage('Continuing with the fix.'), + ] + + await sendAction({ + action: { + type: 'prompt-response', + promptId, + sessionState: responseSessionState, + output: { type: 'lastMessage', value: [] }, + }, + }) + + return { + sessionState: responseSessionState, + output: { type: 'lastMessage' as const, value: [] }, + } + }, + ) + + const secondResult = await client.run({ + agent: 'base2', + prompt: 'Now try again', + previousRun: firstResult, + }) + + // The runtime should have received history containing the work from the first run + expect(historyReceivedByRuntime).toBeDefined() + const receivedReadCall = historyReceivedByRuntime!.find( + (m) => + (m as { role: string }).role === 'assistant' && + ((m as { content: Array<{ type: string; toolCallId?: string }> }) + .content ?? []).some( + (c) => c.type === 'tool-call' && c.toolCallId === 'read-login', + ), + ) + expect(receivedReadCall).toBeDefined() + + const receivedToolResult = historyReceivedByRuntime!.find( + (m) => + (m as { role: string }).role === 'tool' && + (m as { toolCallId: string }).toolCallId === 'read-login', + ) + expect(receivedToolResult).toBeDefined() + + // Final result should preserve history + const finalHistory = secondResult.sessionState!.mainAgentState.messageHistory + const finalReadCall = finalHistory.find( + (m) => + m.role === 'assistant' && + (m.content as Array<{ type: string; toolCallId?: string }>).some( + (c) => c.type === 'tool-call' && c.toolCallId === 'read-login', + ), + ) + expect(finalReadCall).toBeDefined() + }) +}) diff --git a/sdk/src/run.ts b/sdk/src/run.ts index 5a18f7025..2dfcef553 100644 --- a/sdk/src/run.ts +++ b/sdk/src/run.ts @@ -282,16 +282,27 @@ async function runOnce({ } } + // The agent runtime mutates sessionState.mainAgentState as it progresses, + // replacing messageHistory with a new array once it adds the user prompt. + // Comparing array identity detects progress more robustly than length: + // context pruning could shrink history below its starting length without + // meaning the runtime never ran. + const initialMessageHistory = sessionState.mainAgentState.messageHistory + /** Calculates the current session state if cancelled. * - * This is used when callMainPrompt throws an error (the server never processed the request). - * We need to add the user's message here since the server didn't get a chance to add it. + * This is used when callMainPrompt throws an error. If the agent runtime made + * any progress (replaced the shared messageHistory), those messages are + * preserved. Otherwise the user's message is added so it isn't lost. */ function getCancelledSessionState(message: string): SessionState { + const runtimeMadeProgress = + sessionState.mainAgentState.messageHistory !== initialMessageHistory + const state = cloneDeep(sessionState) - // Add the user's message since the server never processed it - if (prompt || preparedContent) { + // Only add the user's message if the runtime didn't get a chance to add it. + if (!runtimeMadeProgress && (prompt || preparedContent)) { state.mainAgentState.messageHistory.push({ role: 'user' as const, content: buildUserMessageContent(prompt, params, preparedContent), From 78740f5c1864797dfe8fc576e4f94077c015346b Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sun, 19 Apr 2026 17:33:35 -0700 Subject: [PATCH 8/9] Fix banned user test to match new suspension message (#520) Co-authored-by: Claude Opus 4.7 --- docs/error-schema.md | 2 +- .../app/api/v1/chat/completions/__tests__/completions.test.ts | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/error-schema.md b/docs/error-schema.md index 6f7e2e177..56a735654 100644 --- a/docs/error-schema.md +++ b/docs/error-schema.md @@ -34,7 +34,7 @@ Used for errors that the client needs to identify programmatically: | Status | `error` code | Example `message` | |--------|-------------|-------------------| -| 403 | `account_suspended` | `"Your account has been suspended due to billing issues. Please contact support@codebuff.com to resolve this."` | +| 403 | `account_suspended` | `"Your account has been suspended. Please contact support@codebuff.com if you did not expect this."` | | 403 | `free_mode_unavailable` | `"Free mode is not available in your country."` (Freebuff: `"Freebuff is not available in your country."`) | | 429 | `rate_limit_exceeded` | `"Subscription weekly limit reached. Your limit resets in 2 hours. Enable 'Continue with credits' in the CLI to use a-la-carte credits."` | diff --git a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts index 2c6d5bb27..43b431f29 100644 --- a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts +++ b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts @@ -412,8 +412,8 @@ describe('/api/v1/chat/completions POST endpoint', () => { expect(response.status).toBe(403) const body = await response.json() expect(body.error).toBe('account_suspended') - expect(body.message).toContain('Your account has been suspended due to billing issues') - expect(body.message).toContain('to resolve this') + expect(body.message).toContain('Your account has been suspended') + expect(body.message).toContain('if you did not expect this') }) }) From 711f40ca44cd3f7a88885dd8a6d5d756b214cd29 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sun, 19 Apr 2026 17:41:50 -0700 Subject: [PATCH 9/9] Fix freebuff grace-period hang where UI looks stuck streaming (#518) Co-authored-by: Claude Opus 4.7 --- cli/src/chat.tsx | 10 ++++++---- cli/src/hooks/helpers/send-message.ts | 14 ++++++++++---- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/cli/src/chat.tsx b/cli/src/chat.tsx index bafdcecf1..af83a45c9 100644 --- a/cli/src/chat.tsx +++ b/cli/src/chat.tsx @@ -1473,15 +1473,17 @@ export const Chat = ({ )} {reviewMode ? ( - // Review takes precedence over the session-ended banner: during the - // grace window the agent may still be asking to run tools, and - // those approvals must be reachable for the run to finish. + // Review and ask_user take precedence over the session-ended banner: + // during the grace window the agent may still be asking to run tools + // or asking the user a question, and those approvals/answers must be + // reachable for the run to finish — otherwise the agent hangs + // waiting for input that can never be given. - ) : isFreebuffSessionOver ? ( + ) : isFreebuffSessionOver && !askUserState ? ( diff --git a/cli/src/hooks/helpers/send-message.ts b/cli/src/hooks/helpers/send-message.ts index 01f6880b6..02e419b30 100644 --- a/cli/src/hooks/helpers/send-message.ts +++ b/cli/src/hooks/helpers/send-message.ts @@ -510,10 +510,16 @@ function handleFreebuffGateError( switch (kind) { case 'session_expired': case 'waiting_room_required': - // Our seat is gone mid-chat. Flip to `ended` instead of auto re-queuing: - // the Chat surface stays mounted so any in-flight agent work can finish - // under the server-side grace period, and the session-ended banner - // prompts the user to press Enter when they're ready to rejoin. + // Our seat is gone mid-chat. Finalize the AI message so its streaming + // indicator stops — otherwise `isComplete` stays false and the message + // keeps rendering a blinking cursor forever, making the user think the + // agent is still working even though the SessionEndedBanner is visible + // and actionable. Also disposes the batched-updater flush interval. + updater.markComplete() + // Flip to `ended` instead of auto re-queuing: the Chat surface stays + // mounted so any in-flight agent work can finish under the server-side + // grace period, and the session-ended banner prompts the user to press + // Enter when they're ready to rejoin. markFreebuffSessionEnded() return case 'waiting_room_queued':