diff --git a/.claude/settings.json b/.claude/settings.json
new file mode 100644
index 000000000..9b82e92e3
--- /dev/null
+++ b/.claude/settings.json
@@ -0,0 +1,5 @@
+{
+  "permissions": {
+    "defaultMode": "auto"
+  }
+}
diff --git a/cli/src/app.tsx b/cli/src/app.tsx
index 5c93cd8f6..616e7b890 100644
--- a/cli/src/app.tsx
+++ b/cli/src/app.tsx
@@ -285,17 +285,6 @@ export const App = ({
     )
   }
 
-  // Render chat history screen when requested
-  if (showChatHistory) {
-    return (
-      <ChatHistoryScreen
-        onSelectChat={handleResumeChat}
-        onCancel={closeChatHistory}
-        onNewChat={handleNewChat}
-      />
-    )
-  }
-
   // Use key to force remount when resuming a different chat from history
   const chatKey = resumeChatId ?? 'current'
 
@@ -316,6 +305,10 @@ export const App = ({
       initialMode={initialMode}
       gitRoot={gitRoot}
       onSwitchToGitRoot={handleSwitchToGitRoot}
+      showChatHistory={showChatHistory}
+      onSelectChat={handleResumeChat}
+      onCancelChatHistory={closeChatHistory}
+      onNewChat={handleNewChat}
     />
   )
 }
@@ -336,6 +329,10 @@ interface AuthedSurfaceProps {
   initialMode: AgentMode | undefined
   gitRoot: string | null | undefined
   onSwitchToGitRoot: () => void
+  showChatHistory: boolean
+  onSelectChat: (chatId: string) => void
+  onCancelChatHistory: () => void
+  onNewChat: () => void
 }
 
 /**
@@ -359,6 +356,10 @@ const AuthedSurface = ({
   initialMode,
   gitRoot,
   onSwitchToGitRoot,
+  showChatHistory,
+  onSelectChat,
+  onCancelChatHistory,
+  onNewChat,
 }: AuthedSurfaceProps) => {
   const { session, error: sessionError } = useFreebuffSession()
 
@@ -388,6 +389,20 @@ const AuthedSurface = ({
     return <WaitingRoomScreen session={session} error={sessionError} />
   }
 
+  // Chat history renders inside AuthedSurface so the freebuff session stays
+  // mounted while the user browses history. Unmounting this surface would
+  // DELETE the session row and drop the user back into the waiting room on
+  // return.
+  if (showChatHistory) {
+    return (
+      <ChatHistoryScreen
+        onSelectChat={onSelectChat}
+        onCancel={onCancelChatHistory}
+        onNewChat={onNewChat}
+      />
+    )
+  }
+
   return (
     <Chat
       key={chatKey}
diff --git a/cli/src/chat.tsx b/cli/src/chat.tsx
index bafdcecf1..af83a45c9 100644
--- a/cli/src/chat.tsx
+++ b/cli/src/chat.tsx
@@ -1473,15 +1473,17 @@ export const Chat = ({
         )}
 
         {reviewMode ? (
-          // Review takes precedence over the session-ended banner: during the
-          // grace window the agent may still be asking to run tools, and
-          // those approvals must be reachable for the run to finish.
+          // Review and ask_user take precedence over the session-ended banner:
+          // during the grace window the agent may still be asking to run tools
+          // or asking the user a question, and those approvals/answers must be
+          // reachable for the run to finish — otherwise the agent hangs
+          // waiting for input that can never be given.
           <ReviewScreen
             onSelectOption={handleReviewOptionSelect}
             onCustom={handleReviewCustom}
             onCancel={handleCloseReviewScreen}
           />
-        ) : isFreebuffSessionOver ? (
+        ) : isFreebuffSessionOver && !askUserState ? (
           <SessionEndedBanner
             isStreaming={isStreaming || isWaitingForResponse}
           />
diff --git a/cli/src/hooks/helpers/send-message.ts b/cli/src/hooks/helpers/send-message.ts
index 01f6880b6..02e419b30 100644
--- a/cli/src/hooks/helpers/send-message.ts
+++ b/cli/src/hooks/helpers/send-message.ts
@@ -510,10 +510,16 @@ function handleFreebuffGateError(
   switch (kind) {
     case 'session_expired':
     case 'waiting_room_required':
-      // Our seat is gone mid-chat. Flip to `ended` instead of auto re-queuing:
-      // the Chat surface stays mounted so any in-flight agent work can finish
-      // under the server-side grace period, and the session-ended banner
-      // prompts the user to press Enter when they're ready to rejoin.
+      // Our seat is gone mid-chat. Finalize the AI message so its streaming
+      // indicator stops — otherwise `isComplete` stays false and the message
+      // keeps rendering a blinking cursor forever, making the user think the
+      // agent is still working even though the SessionEndedBanner is visible
+      // and actionable. Also disposes the batched-updater flush interval.
+      updater.markComplete()
+      // Flip to `ended` instead of auto re-queuing: the Chat surface stays
+      // mounted so any in-flight agent work can finish under the server-side
+      // grace period, and the session-ended banner prompts the user to press
+      // Enter when they're ready to rejoin.
       markFreebuffSessionEnded()
       return
     case 'waiting_room_queued':
diff --git a/docs/error-schema.md b/docs/error-schema.md
index 6f7e2e177..56a735654 100644
--- a/docs/error-schema.md
+++ b/docs/error-schema.md
@@ -34,7 +34,7 @@ Used for errors that the client needs to identify programmatically:
 
 | Status | `error` code | Example `message` |
 |--------|-------------|-------------------|
-| 403 | `account_suspended` | `"Your account has been suspended due to billing issues. Please contact support@codebuff.com to resolve this."` |
+| 403 | `account_suspended` | `"Your account has been suspended. Please contact support@codebuff.com if you did not expect this."` |
 | 403 | `free_mode_unavailable` | `"Free mode is not available in your country."` (Freebuff: `"Freebuff is not available in your country."`) |
 | 429 | `rate_limit_exceeded` | `"Subscription weekly limit reached. Your limit resets in 2 hours. Enable 'Continue with credits' in the CLI to use a-la-carte credits."` |
 
diff --git a/packages/agent-runtime/src/__tests__/main-prompt.test.ts b/packages/agent-runtime/src/__tests__/main-prompt.test.ts
index 17b4f99e1..f68e13147 100644
--- a/packages/agent-runtime/src/__tests__/main-prompt.test.ts
+++ b/packages/agent-runtime/src/__tests__/main-prompt.test.ts
@@ -375,6 +375,7 @@ describe('mainPrompt', () => {
   it('should update consecutiveAssistantMessages when new prompt is received', async () => {
     const sessionState = getInitialSessionState(mockFileContext)
     sessionState.mainAgentState.stepsRemaining = 12
+    const initialStepsRemaining = sessionState.mainAgentState.stepsRemaining
 
     const action = {
       type: 'prompt' as const,
@@ -394,7 +395,7 @@ describe('mainPrompt', () => {
 
     // When there's a new prompt, consecutiveAssistantMessages should be set to 1
     expect(newSessionState.mainAgentState.stepsRemaining).toBe(
-      sessionState.mainAgentState.stepsRemaining - 1,
+      initialStepsRemaining - 1,
     )
   })
 
diff --git a/packages/agent-runtime/src/run-agent-step.ts b/packages/agent-runtime/src/run-agent-step.ts
index 704cedf3a..4b8267033 100644
--- a/packages/agent-runtime/src/run-agent-step.ts
+++ b/packages/agent-runtime/src/run-agent-step.ts
@@ -536,6 +536,17 @@ export const runAgentStep = async (
   }
 }
 
+/**
+ * Runs the agent loop.
+ *
+ * IMPORTANT: This function mutates `params.agentState` in place throughout the
+ * run (not just at return time). Fields like `messageHistory`, `systemPrompt`,
+ * `toolDefinitions`, `creditsUsed`, and `output` are updated as work progresses
+ * so that callers holding a reference to the same object (e.g. the SDK's
+ * `sessionState.mainAgentState`) see in-progress work immediately — which
+ * matters when an error is thrown mid-run and the normal return path is
+ * skipped.
+ */
 export async function loopAgentSteps(
   params: {
     addAgentStep: AddAgentStepFn
@@ -800,12 +811,13 @@ export async function loopAgentSteps(
     return cachedAdditionalToolDefinitions
   }
 
-  let currentAgentState: AgentState = {
-    ...initialAgentState,
-    messageHistory: initialMessages,
-    systemPrompt: system,
-    toolDefinitions,
-  }
+  // Mutate initialAgentState so that in-progress work propagates back to the
+  // caller's shared reference (e.g. SDK's sessionState.mainAgentState) even if
+  // an error is thrown before we return.
+  initialAgentState.messageHistory = initialMessages
+  initialAgentState.systemPrompt = system
+  initialAgentState.toolDefinitions = toolDefinitions
+  let currentAgentState: AgentState = initialAgentState
 
   // Convert tool definitions to Anthropic format for accurate token counting
   // Tool definitions are stored as { [name]: { description, inputSchema } }
@@ -908,7 +920,8 @@ export async function loopAgentSteps(
         } = programmaticResult
         n = generateN
 
-        currentAgentState = programmaticAgentState
+        Object.assign(initialAgentState, programmaticAgentState)
+        currentAgentState = initialAgentState
         totalSteps = stepNumber
 
         shouldEndTurn = endTurn
@@ -989,7 +1002,8 @@ export async function loopAgentSteps(
         logger.error('No runId found for agent state after finishing agent run')
       }
 
-      currentAgentState = newAgentState
+      Object.assign(initialAgentState, newAgentState)
+      currentAgentState = initialAgentState
       shouldEndTurn = llmShouldEndTurn
       nResponses = generatedResponses
 
diff --git a/sdk/src/__tests__/run-error-preserves-history.test.ts b/sdk/src/__tests__/run-error-preserves-history.test.ts
new file mode 100644
index 000000000..95b72ead2
--- /dev/null
+++ b/sdk/src/__tests__/run-error-preserves-history.test.ts
@@ -0,0 +1,315 @@
+import * as mainPromptModule from '@codebuff/agent-runtime/main-prompt'
+import { getInitialSessionState } from '@codebuff/common/types/session-state'
+import { getStubProjectFileContext } from '@codebuff/common/util/file'
+import { assistantMessage, userMessage } from '@codebuff/common/util/messages'
+import { afterEach, describe, expect, it, mock, spyOn } from 'bun:test'
+
+import { CodebuffClient } from '../client'
+import * as databaseModule from '../impl/database'
+
+interface ToolCallContentBlock {
+  type: 'tool-call'
+  toolCallId: string
+  toolName: string
+  input: Record<string, unknown>
+}
+
+const setupDatabaseMocks = () => {
+  spyOn(databaseModule, 'getUserInfoFromApiKey').mockResolvedValue({
+    id: 'user-123',
+    email: 'test@example.com',
+    discord_id: null,
+    referral_code: null,
+    stripe_customer_id: null,
+    banned: false,
+    created_at: new Date('2024-01-01T00:00:00Z'),
+  })
+  spyOn(databaseModule, 'fetchAgentFromDatabase').mockResolvedValue(null)
+  spyOn(databaseModule, 'startAgentRun').mockResolvedValue('run-1')
+  spyOn(databaseModule, 'finishAgentRun').mockResolvedValue(undefined)
+  spyOn(databaseModule, 'addAgentStep').mockResolvedValue('step-1')
+}
+
+describe('Error preserves in-progress message history', () => {
+  afterEach(() => {
+    mock.restore()
+  })
+
+  it('preserves in-progress assistant work on error (simulated via shared state mutation)', async () => {
+    setupDatabaseMocks()
+
+    // Simulate the agent runtime:
+    // 1. Mutates the shared session state with the user message and partial work
+    // 2. Then throws due to a downstream timeout/service error
+    spyOn(mainPromptModule, 'callMainPrompt').mockImplementation(
+      async (params: Parameters<typeof mainPromptModule.callMainPrompt>[0]) => {
+        const mainAgentState = params.action.sessionState.mainAgentState
+
+        // Match the real runtime's behavior: replace messageHistory with a new
+        // array that includes the user prompt as its first entry. The SDK
+        // detects runtime progress via reference inequality, so we must
+        // reassign the array rather than pushing into it.
+        mainAgentState.messageHistory = [
+          ...mainAgentState.messageHistory,
+          {
+            role: 'user',
+            content: [{ type: 'text', text: 'Fix the bug in auth.ts' }],
+            tags: ['USER_PROMPT'],
+          },
+          {
+            role: 'assistant',
+            content: [
+              { type: 'text', text: 'Let me read the auth file first.' },
+              {
+                type: 'tool-call',
+                toolCallId: 'read-1',
+                toolName: 'read_files',
+                input: { paths: ['auth.ts'] },
+              } as ToolCallContentBlock,
+            ],
+          },
+          {
+            role: 'tool',
+            toolCallId: 'read-1',
+            toolName: 'read_files',
+            content: [
+              {
+                type: 'json',
+                value: [{ path: 'auth.ts', content: 'const auth = ...' }],
+              },
+            ],
+          },
+          {
+            role: 'assistant',
+            content: [
+              { type: 'text', text: 'Found the issue, writing the fix now.' },
+              {
+                type: 'tool-call',
+                toolCallId: 'write-1',
+                toolName: 'write_file',
+                input: { path: 'auth.ts', content: 'const auth = fixed' },
+              } as ToolCallContentBlock,
+            ],
+          },
+          {
+            role: 'tool',
+            toolCallId: 'write-1',
+            toolName: 'write_file',
+            content: [{ type: 'json', value: { file: 'auth.ts', message: 'File written' } }],
+          },
+        ]
+
+        // Now simulate a server timeout on the next LLM call
+        const timeoutError = new Error('Service Unavailable') as Error & {
+          statusCode: number
+          responseBody: string
+        }
+        timeoutError.statusCode = 503
+        timeoutError.responseBody = JSON.stringify({
+          message: 'Request timeout after 30s',
+        })
+        throw timeoutError
+      },
+    )
+
+    const client = new CodebuffClient({ apiKey: 'test-key' })
+    const result = await client.run({
+      agent: 'base2',
+      prompt: 'Fix the bug in auth.ts',
+    })
+
+    // Error output with correct status code
+    expect(result.output.type).toBe('error')
+    const errorOutput = result.output as {
+      type: 'error'
+      message: string
+      statusCode?: number
+    }
+    expect(errorOutput.statusCode).toBe(503)
+
+    const history = result.sessionState!.mainAgentState.messageHistory
+
+    // The user's prompt should appear exactly once
+    const userPromptMessages = history.filter(
+      (m) =>
+        m.role === 'user' &&
+        (m.content as Array<{ type: string; text?: string }>).some(
+          (c) => c.type === 'text' && c.text?.includes('Fix the bug'),
+        ),
+    )
+    expect(userPromptMessages.length).toBe(1)
+
+    // Assistant text messages from both steps should be preserved
+    const firstAssistantText = history.find(
+      (m) =>
+        m.role === 'assistant' &&
+        (m.content as Array<{ type: string; text?: string }>).some(
+          (c) => c.type === 'text' && c.text?.includes('read the auth file'),
+        ),
+    )
+    expect(firstAssistantText).toBeDefined()
+
+    const secondAssistantText = history.find(
+      (m) =>
+        m.role === 'assistant' &&
+        (m.content as Array<{ type: string; text?: string }>).some(
+          (c) => c.type === 'text' && c.text?.includes('writing the fix'),
+        ),
+    )
+    expect(secondAssistantText).toBeDefined()
+
+    // Both tool calls and both tool results should be preserved
+    const readToolCall = history.find(
+      (m) =>
+        m.role === 'assistant' &&
+        (m.content as Array<{ type: string; toolCallId?: string }>).some(
+          (c) => c.type === 'tool-call' && c.toolCallId === 'read-1',
+        ),
+    )
+    expect(readToolCall).toBeDefined()
+
+    const writeToolCall = history.find(
+      (m) =>
+        m.role === 'assistant' &&
+        (m.content as Array<{ type: string; toolCallId?: string }>).some(
+          (c) => c.type === 'tool-call' && c.toolCallId === 'write-1',
+        ),
+    )
+    expect(writeToolCall).toBeDefined()
+
+    const readToolResult = history.find(
+      (m) => m.role === 'tool' && m.toolCallId === 'read-1',
+    )
+    expect(readToolResult).toBeDefined()
+
+    const writeToolResult = history.find(
+      (m) => m.role === 'tool' && m.toolCallId === 'write-1',
+    )
+    expect(writeToolResult).toBeDefined()
+  })
+
+  it('a subsequent run after error includes the preserved in-progress history', async () => {
+    setupDatabaseMocks()
+
+    // Run 1: agent does some work then hits an error
+    spyOn(mainPromptModule, 'callMainPrompt').mockImplementation(
+      async (params: Parameters<typeof mainPromptModule.callMainPrompt>[0]) => {
+        const mainAgentState = params.action.sessionState.mainAgentState
+
+        mainAgentState.messageHistory = [
+          ...mainAgentState.messageHistory,
+          {
+            role: 'user',
+            content: [{ type: 'text', text: 'Investigate the login bug' }],
+            tags: ['USER_PROMPT'],
+          },
+          assistantMessage('I found the problem in auth.ts on line 42.'),
+          {
+            role: 'assistant',
+            content: [
+              {
+                type: 'tool-call',
+                toolCallId: 'read-login',
+                toolName: 'read_files',
+                input: { paths: ['login.ts'] },
+              } as ToolCallContentBlock,
+            ],
+          },
+          {
+            role: 'tool',
+            toolCallId: 'read-login',
+            toolName: 'read_files',
+            content: [{ type: 'json', value: [{ path: 'login.ts', content: 'login code' }] }],
+          },
+        ]
+
+        const error = new Error('Service Unavailable') as Error & {
+          statusCode: number
+        }
+        error.statusCode = 503
+        throw error
+      },
+    )
+
+    const client = new CodebuffClient({ apiKey: 'test-key' })
+    const firstResult = await client.run({
+      agent: 'base2',
+      prompt: 'Investigate the login bug',
+    })
+
+    expect(firstResult.output.type).toBe('error')
+
+    // Run 2: use the failed run as previousRun
+    mock.restore()
+    setupDatabaseMocks()
+
+    let historyReceivedByRuntime: unknown[] | undefined
+    spyOn(mainPromptModule, 'callMainPrompt').mockImplementation(
+      async (params: Parameters<typeof mainPromptModule.callMainPrompt>[0]) => {
+        const { sendAction, promptId } = params
+        historyReceivedByRuntime = [
+          ...params.action.sessionState.mainAgentState.messageHistory,
+        ]
+
+        const responseSessionState = getInitialSessionState(
+          getStubProjectFileContext(),
+        )
+        responseSessionState.mainAgentState.messageHistory = [
+          ...params.action.sessionState.mainAgentState.messageHistory,
+          userMessage('Now try again'),
+          assistantMessage('Continuing with the fix.'),
+        ]
+
+        await sendAction({
+          action: {
+            type: 'prompt-response',
+            promptId,
+            sessionState: responseSessionState,
+            output: { type: 'lastMessage', value: [] },
+          },
+        })
+
+        return {
+          sessionState: responseSessionState,
+          output: { type: 'lastMessage' as const, value: [] },
+        }
+      },
+    )
+
+    const secondResult = await client.run({
+      agent: 'base2',
+      prompt: 'Now try again',
+      previousRun: firstResult,
+    })
+
+    // The runtime should have received history containing the work from the first run
+    expect(historyReceivedByRuntime).toBeDefined()
+    const receivedReadCall = historyReceivedByRuntime!.find(
+      (m) =>
+        (m as { role: string }).role === 'assistant' &&
+        ((m as { content: Array<{ type: string; toolCallId?: string }> })
+          .content ?? []).some(
+          (c) => c.type === 'tool-call' && c.toolCallId === 'read-login',
+        ),
+    )
+    expect(receivedReadCall).toBeDefined()
+
+    const receivedToolResult = historyReceivedByRuntime!.find(
+      (m) =>
+        (m as { role: string }).role === 'tool' &&
+        (m as { toolCallId: string }).toolCallId === 'read-login',
+    )
+    expect(receivedToolResult).toBeDefined()
+
+    // Final result should preserve history
+    const finalHistory = secondResult.sessionState!.mainAgentState.messageHistory
+    const finalReadCall = finalHistory.find(
+      (m) =>
+        m.role === 'assistant' &&
+        (m.content as Array<{ type: string; toolCallId?: string }>).some(
+          (c) => c.type === 'tool-call' && c.toolCallId === 'read-login',
+        ),
+    )
+    expect(finalReadCall).toBeDefined()
+  })
+})
diff --git a/sdk/src/run.ts b/sdk/src/run.ts
index 5a18f7025..2dfcef553 100644
--- a/sdk/src/run.ts
+++ b/sdk/src/run.ts
@@ -282,16 +282,27 @@ async function runOnce({
     }
   }
 
+  // The agent runtime mutates sessionState.mainAgentState as it progresses,
+  // replacing messageHistory with a new array once it adds the user prompt.
+  // Comparing array identity detects progress more robustly than length:
+  // context pruning could shrink history below its starting length without
+  // meaning the runtime never ran.
+  const initialMessageHistory = sessionState.mainAgentState.messageHistory
+
   /** Calculates the current session state if cancelled.
    *
-   * This is used when callMainPrompt throws an error (the server never processed the request).
-   * We need to add the user's message here since the server didn't get a chance to add it.
+   * This is used when callMainPrompt throws an error. If the agent runtime made
+   * any progress (replaced the shared messageHistory), those messages are
+   * preserved. Otherwise the user's message is added so it isn't lost.
    */
   function getCancelledSessionState(message: string): SessionState {
+    const runtimeMadeProgress =
+      sessionState.mainAgentState.messageHistory !== initialMessageHistory
+
     const state = cloneDeep(sessionState)
 
-    // Add the user's message since the server never processed it
-    if (prompt || preparedContent) {
+    // Only add the user's message if the runtime didn't get a chance to add it.
+    if (!runtimeMadeProgress && (prompt || preparedContent)) {
       state.mainAgentState.messageHistory.push({
         role: 'user' as const,
         content: buildUserMessageContent(prompt, params, preparedContent),
diff --git a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts
index 2c6d5bb27..43b431f29 100644
--- a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts
+++ b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts
@@ -412,8 +412,8 @@ describe('/api/v1/chat/completions POST endpoint', () => {
       expect(response.status).toBe(403)
       const body = await response.json()
       expect(body.error).toBe('account_suspended')
-      expect(body.message).toContain('Your account has been suspended due to billing issues')
-      expect(body.message).toContain('to resolve this')
+      expect(body.message).toContain('Your account has been suspended')
+      expect(body.message).toContain('if you did not expect this')
     })
   })
 
diff --git a/web/src/app/api/v1/chat/completions/_post.ts b/web/src/app/api/v1/chat/completions/_post.ts
index 85e10437a..c9b616846 100644
--- a/web/src/app/api/v1/chat/completions/_post.ts
+++ b/web/src/app/api/v1/chat/completions/_post.ts
@@ -260,7 +260,7 @@ export async function postChatCompletions(params: {
       return NextResponse.json(
         {
           error: 'account_suspended',
-          message: `Your account has been suspended due to billing issues. Please contact ${env.NEXT_PUBLIC_SUPPORT_EMAIL} to resolve this.`,
+          message: `Your account has been suspended. Please contact ${env.NEXT_PUBLIC_SUPPORT_EMAIL} if you did not expect this.`,
         },
         { status: 403 },
       )
@@ -413,7 +413,11 @@ export async function postChatCompletions(params: {
     if (isFreeModeRequest) {
       const claimedInstanceId =
         typedBody.codebuff_metadata?.freebuff_instance_id
-      const gate = await checkSession({ userId, claimedInstanceId })
+      const gate = await checkSession({
+        userId,
+        userEmail: userInfo.email,
+        claimedInstanceId,
+      })
       if (!gate.ok) {
         trackEvent({
           event: AnalyticsEvent.CHAT_COMPLETIONS_VALIDATION_ERROR,
@@ -468,19 +472,19 @@ export async function postChatCompletions(params: {
     if (ensureSubscriberBlockGrant) {
       try {
         const blockGrantResult = await ensureSubscriberBlockGrant({ userId, logger })
-        
+
         // Check if user hit subscription limit and should be rate-limited
         if (blockGrantResult && (isWeeklyLimitError(blockGrantResult) || isBlockExhaustedError(blockGrantResult))) {
           // Fetch user's preference for falling back to a-la-carte credits
           const preferences = getUserPreferences
             ? await getUserPreferences({ userId, logger })
             : { fallbackToALaCarte: true } // Default to allowing a-la-carte if no preference function
-          
+
           if (!preferences.fallbackToALaCarte && !isFreeModeRequest) {
             const resetTime = blockGrantResult.resetsAt
             const resetCountdown = formatQuotaResetCountdown(resetTime.toISOString())
             const limitType = isWeeklyLimitError(blockGrantResult) ? 'weekly' : '5-hour session'
-            
+
             trackEvent({
               event: AnalyticsEvent.CHAT_COMPLETIONS_INSUFFICIENT_CREDITS,
               userId,
@@ -491,7 +495,7 @@ export async function postChatCompletions(params: {
               },
               logger,
             })
-            
+
             return NextResponse.json(
               {
                 error: 'rate_limit_exceeded',
@@ -553,54 +557,54 @@ export async function postChatCompletions(params: {
         const useOpenAIDirect = !useFireworks && isOpenAIDirectModel(typedBody.model)
         const stream = useSiliconFlow
           ? await handleSiliconFlowStream({
-              body: typedBody,
-              userId,
-              stripeCustomerId,
-              agentId,
-              fetch,
-              logger,
-              insertMessageBigquery,
-            })
+            body: typedBody,
+            userId,
+            stripeCustomerId,
+            agentId,
+            fetch,
+            logger,
+            insertMessageBigquery,
+          })
           : useCanopyWave
-          ? await handleCanopyWaveStream({
-              body: typedBody,
-              userId,
-              stripeCustomerId,
-              agentId,
-              fetch,
-              logger,
-              insertMessageBigquery,
-            })
-          : useFireworks
-          ? await handleFireworksStream({
-              body: typedBody,
-              userId,
-              stripeCustomerId,
-              agentId,
-              fetch,
-              logger,
-              insertMessageBigquery,
-            })
-          : useOpenAIDirect
-          ? await handleOpenAIStream({
-              body: typedBody,
-              userId,
-              stripeCustomerId,
-              agentId,
-              fetch,
-              logger,
-              insertMessageBigquery,
-            })
-          : await handleOpenRouterStream({
+            ? await handleCanopyWaveStream({
               body: typedBody,
               userId,
               stripeCustomerId,
               agentId,
-              openrouterApiKey,
               fetch,
               logger,
               insertMessageBigquery,
             })
+            : useFireworks
+              ? await handleFireworksStream({
+                body: typedBody,
+                userId,
+                stripeCustomerId,
+                agentId,
+                fetch,
+                logger,
+                insertMessageBigquery,
+              })
+              : useOpenAIDirect
+                ? await handleOpenAIStream({
+                  body: typedBody,
+                  userId,
+                  stripeCustomerId,
+                  agentId,
+                  fetch,
+                  logger,
+                  insertMessageBigquery,
+                })
+                : await handleOpenRouterStream({
+                  body: typedBody,
+                  userId,
+                  stripeCustomerId,
+                  agentId,
+                  openrouterApiKey,
+                  fetch,
+                  logger,
+                  insertMessageBigquery,
+                })
 
         trackEvent({
           event: AnalyticsEvent.CHAT_COMPLETIONS_STREAM_STARTED,
@@ -631,26 +635,16 @@ export async function postChatCompletions(params: {
 
         const nonStreamRequest = useSiliconFlow
           ? handleSiliconFlowNonStream({
-              body: typedBody,
-              userId,
-              stripeCustomerId,
-              agentId,
-              fetch,
-              logger,
-              insertMessageBigquery,
-            })
+            body: typedBody,
+            userId,
+            stripeCustomerId,
+            agentId,
+            fetch,
+            logger,
+            insertMessageBigquery,
+          })
           : useCanopyWave
-          ? handleCanopyWaveNonStream({
-              body: typedBody,
-              userId,
-              stripeCustomerId,
-              agentId,
-              fetch,
-              logger,
-              insertMessageBigquery,
-            })
-          : useFireworks
-          ? handleFireworksNonStream({
+            ? handleCanopyWaveNonStream({
               body: typedBody,
               userId,
               stripeCustomerId,
@@ -659,26 +653,36 @@ export async function postChatCompletions(params: {
               logger,
               insertMessageBigquery,
             })
-          : shouldUseOpenAIEndpoint
-            ? handleOpenAINonStream({
-                body: typedBody,
-                userId,
-                stripeCustomerId,
-                agentId,
-                fetch,
-                logger,
-                insertMessageBigquery,
-              })
-            : handleOpenRouterNonStream({
+            : useFireworks
+              ? handleFireworksNonStream({
                 body: typedBody,
                 userId,
                 stripeCustomerId,
                 agentId,
-                openrouterApiKey,
                 fetch,
                 logger,
                 insertMessageBigquery,
               })
+              : shouldUseOpenAIEndpoint
+                ? handleOpenAINonStream({
+                  body: typedBody,
+                  userId,
+                  stripeCustomerId,
+                  agentId,
+                  fetch,
+                  logger,
+                  insertMessageBigquery,
+                })
+                : handleOpenRouterNonStream({
+                  body: typedBody,
+                  userId,
+                  stripeCustomerId,
+                  agentId,
+                  openrouterApiKey,
+                  fetch,
+                  logger,
+                  insertMessageBigquery,
+                })
         const result = await nonStreamRequest
 
         trackEvent({
diff --git a/web/src/app/api/v1/freebuff/session/_handlers.ts b/web/src/app/api/v1/freebuff/session/_handlers.ts
index 54157c0b8..5bed8e9c9 100644
--- a/web/src/app/api/v1/freebuff/session/_handlers.ts
+++ b/web/src/app/api/v1/freebuff/session/_handlers.ts
@@ -22,7 +22,9 @@ export interface FreebuffSessionDeps {
   sessionDeps?: SessionDeps
 }
 
-type AuthResult = { error: NextResponse } | { userId: string }
+type AuthResult =
+  | { error: NextResponse }
+  | { userId: string; userEmail: string | null }
 
 async function resolveUser(req: NextRequest, deps: FreebuffSessionDeps): Promise<AuthResult> {
   const apiKey = extractApiKeyFromHeader(req)
@@ -39,7 +41,7 @@ async function resolveUser(req: NextRequest, deps: FreebuffSessionDeps): Promise
   }
   const userInfo = await deps.getUserInfoFromApiKey({
     apiKey,
-    fields: ['id'],
+    fields: ['id', 'email'],
     logger: deps.logger,
   })
   if (!userInfo?.id) {
@@ -50,7 +52,7 @@ async function resolveUser(req: NextRequest, deps: FreebuffSessionDeps): Promise
       ),
     }
   }
-  return { userId: String(userInfo.id) }
+  return { userId: String(userInfo.id), userEmail: userInfo.email ?? null }
 }
 
 function serverError(
@@ -96,6 +98,7 @@ export async function postFreebuffSession(
   try {
     const state = await requestSession({
       userId: auth.userId,
+      userEmail: auth.userEmail,
       deps: deps.sessionDeps,
     })
     return NextResponse.json(state, { status: 200 })
@@ -118,6 +121,7 @@ export async function getFreebuffSession(
     const claimedInstanceId = req.headers.get(FREEBUFF_INSTANCE_HEADER) ?? undefined
     const state = await getSessionState({
       userId: auth.userId,
+      userEmail: auth.userEmail,
       claimedInstanceId,
       deps: deps.sessionDeps,
     })
@@ -142,7 +146,11 @@ export async function deleteFreebuffSession(
   if ('error' in auth) return auth.error
 
   try {
-    await endUserSession({ userId: auth.userId, deps: deps.sessionDeps })
+    await endUserSession({
+      userId: auth.userId,
+      userEmail: auth.userEmail,
+      deps: deps.sessionDeps,
+    })
     return NextResponse.json({ status: 'ended' }, { status: 200 })
   } catch (error) {
     return serverError(deps, 'DELETE', auth.userId, error)
diff --git a/web/src/server/free-session/__tests__/fireworks-health.test.ts b/web/src/server/free-session/__tests__/fireworks-health.test.ts
index 6120731cf..3475769cd 100644
--- a/web/src/server/free-session/__tests__/fireworks-health.test.ts
+++ b/web/src/server/free-session/__tests__/fireworks-health.test.ts
@@ -3,7 +3,7 @@ import { describe, expect, test } from 'bun:test'
 import {
   KV_BLOCKS_DEGRADED_FRACTION,
   KV_BLOCKS_UNHEALTHY_FRACTION,
-  PREFILL_QUEUE_DEGRADED_MS,
+  PREFILL_QUEUE_P90_DEGRADED_MS,
   classify,
 } from '../fireworks-health'
 
@@ -19,20 +19,22 @@ function kvBlocks(value: number): PromSample {
   }
 }
 
-/** Emit a minimal cumulative-counts histogram for prefill queue where every
- *  event lands in exactly one bucket `le`. */
-function prefillQueueBuckets(p50Ms: number): PromSample[] {
+/** Emit a cumulative-counts histogram for prefill queue where the p90
+ *  percentile falls in the bucket with le ≥ p90Ms (i.e. p90 ≥ p90Ms).
+ *  Uses 10 total events all landing in that bucket, so the 90th-percentile
+ *  interpolates within the bucket above the bucket boundary. */
+function prefillQueueBuckets(p90Ms: number): PromSample[] {
   const les = [50, 150, 300, 500, 750, 1000, 1500, 3000, 5000, 7500, 10000]
   const name = 'latency_prefill_queue_ms_bucket:sum_by_deployment'
-  // cumulative count = 0 below p50, 1 at and above p50
+  const total = 10
   return les.map((le) => ({
     name,
     labels: { deployment_id: DEPLOY, le: String(le) },
-    value: le >= p50Ms ? 1 : 0,
+    value: le >= p90Ms ? total : 0,
   })).concat({
     name,
     labels: { deployment_id: DEPLOY, le: '+Inf' },
-    value: 1,
+    value: total,
   })
 }
 
@@ -58,10 +60,10 @@ describe('fireworks health classifier', () => {
     expect(classify(samples, [DEPLOY])).toBe('healthy')
   })
 
-  test('degraded when prefill queue p50 exceeds the threshold', () => {
+  test('degraded when prefill queue p90 exceeds the threshold', () => {
     const samples: PromSample[] = [
       kvBlocks(0.5),
-      ...prefillQueueBuckets(PREFILL_QUEUE_DEGRADED_MS + 500),
+      ...prefillQueueBuckets(PREFILL_QUEUE_P90_DEGRADED_MS + 500),
     ]
     expect(classify(samples, [DEPLOY])).toBe('degraded')
   })
@@ -110,7 +112,7 @@ describe('fireworks health classifier', () => {
     const other = 'other123'
     const samples: PromSample[] = [
       kvBlocks(0.5),
-      ...prefillQueueBuckets(PREFILL_QUEUE_DEGRADED_MS + 500),
+      ...prefillQueueBuckets(PREFILL_QUEUE_P90_DEGRADED_MS + 500),
       {
         name: 'generator_kv_blocks_fraction:avg_by_deployment',
         labels: { deployment_id: other },
diff --git a/web/src/server/free-session/__tests__/public-api.test.ts b/web/src/server/free-session/__tests__/public-api.test.ts
index df34b7556..b19f24ea0 100644
--- a/web/src/server/free-session/__tests__/public-api.test.ts
+++ b/web/src/server/free-session/__tests__/public-api.test.ts
@@ -281,6 +281,29 @@ describe('checkSessionAdmissible', () => {
     expect(result.code).toBe('waiting_room_required')
   })
 
+  test('bypassed email (team@codebuff.com) → ok with reason=disabled, no DB read', async () => {
+    const result = await checkSessionAdmissible({
+      userId: 'u1',
+      userEmail: 'team@codebuff.com',
+      claimedInstanceId: undefined,
+      deps,
+    })
+    expect(result.ok).toBe(true)
+    if (!result.ok) throw new Error('unreachable')
+    expect(result.reason).toBe('disabled')
+    expect(deps.rows.size).toBe(0)
+  })
+
+  test('bypassed email is case-insensitive', async () => {
+    const result = await checkSessionAdmissible({
+      userId: 'u1',
+      userEmail: 'Team@Codebuff.COM',
+      claimedInstanceId: undefined,
+      deps,
+    })
+    expect(result.ok).toBe(true)
+  })
+
   test('queued session → waiting_room_queued', async () => {
     await requestSession({ userId: 'u1', deps })
     const result = await checkSessionAdmissible({
diff --git a/web/src/server/free-session/__tests__/session-view.test.ts b/web/src/server/free-session/__tests__/session-view.test.ts
index b3bdade6a..681072b30 100644
--- a/web/src/server/free-session/__tests__/session-view.test.ts
+++ b/web/src/server/free-session/__tests__/session-view.test.ts
@@ -4,7 +4,7 @@ import { estimateWaitMs, toSessionStateResponse } from '../session-view'
 
 import type { InternalSessionRow } from '../types'
 
-const WAIT_PER_SPOT_MS = 60_000
+const WAIT_PER_SPOT_MS = 24_000
 const GRACE_MS = 30 * 60_000
 
 function row(overrides: Partial<InternalSessionRow> = {}): InternalSessionRow {
diff --git a/web/src/server/free-session/config.ts b/web/src/server/free-session/config.ts
index 4e9e729c1..e70e1b5c6 100644
--- a/web/src/server/free-session/config.ts
+++ b/web/src/server/free-session/config.ts
@@ -16,6 +16,18 @@ export function isWaitingRoomEnabled(): boolean {
   return env.FREEBUFF_WAITING_ROOM_ENABLED
 }
 
+/** Per-account override on top of the global kill switch. The internal
+ *  `team@codebuff.com` account drives e2e tests in CI; landing it in the
+ *  queue would make those tests flake whenever the waiting room is warm.
+ *  Bypassed users behave exactly as if the waiting room were disabled. */
+const WAITING_ROOM_BYPASS_EMAILS = new Set<string>(['team@codebuff.com'])
+export function isWaitingRoomBypassedForEmail(
+  email: string | null | undefined,
+): boolean {
+  if (!email) return false
+  return WAITING_ROOM_BYPASS_EMAILS.has(email.toLowerCase())
+}
+
 export function getSessionLengthMs(): number {
   return env.FREEBUFF_SESSION_LENGTH_MS
 }
diff --git a/web/src/server/free-session/fireworks-health.ts b/web/src/server/free-session/fireworks-health.ts
index c102e721c..7d8e115e4 100644
--- a/web/src/server/free-session/fireworks-health.ts
+++ b/web/src/server/free-session/fireworks-health.ts
@@ -1,5 +1,6 @@
-import { FIREWORKS_ACCOUNT_ID, FIREWORKS_DEPLOYMENT_MAP } from '@/llm-api/fireworks-config'
 import { env } from '@codebuff/internal/env'
+
+import { FIREWORKS_ACCOUNT_ID, FIREWORKS_DEPLOYMENT_MAP } from '@/llm-api/fireworks-config'
 import { logger } from '@/util/logger'
 
 /**
@@ -15,13 +16,14 @@ import { logger } from '@/util/logger'
  */
 export type FireworksHealth = 'healthy' | 'degraded' | 'unhealthy'
 
-/** Degrade once median prefill-queue latency crosses this bound. Strict by
- *  design — a 1s queue on top of ~1s prefill already means users feel 2s+
- *  before first token. */
-export const PREFILL_QUEUE_DEGRADED_MS = 125
+/** Degrade once p90 prefill-queue latency crosses this bound. Using p90
+ *  instead of p50 gives a better early-warning signal — the tail starts
+ *  rising before the median does, so we can halt admission before most
+ *  users feel it. */
+export const PREFILL_QUEUE_P90_DEGRADED_MS = 1000
 
 /** Leading indicator of load — responds instantly to memory pressure, while
- *  prefill-queue p50 is a lagging window statistic. Degrading here lets us
+ *  prefill-queue p90 is a lagging window statistic. Degrading here lets us
  *  halt admission *before* users feel it. */
 export const KV_BLOCKS_DEGRADED_FRACTION = 0.8
 
@@ -160,16 +162,16 @@ function classifyOne(samples: PromSample[], deploymentId: string): FireworksHeal
     return 'unhealthy'
   }
 
-  const p50 = histogramPercentile(
+  const p90 = histogramPercentile(
     samples,
     'latency_prefill_queue_ms_bucket:sum_by_deployment',
     deploymentId,
-    50,
+    90,
   )
-  if (p50 !== undefined && p50 > PREFILL_QUEUE_DEGRADED_MS) {
+  if (p90 !== undefined && p90 > PREFILL_QUEUE_P90_DEGRADED_MS) {
     logger.info(
-      { deploymentId, prefillQueueP50Ms: Math.round(p50), kvBlocks },
-      '[FireworksHealth] degraded: prefill queue p50 over threshold',
+      { deploymentId, prefillQueueP90Ms: Math.round(p90), kvBlocks },
+      '[FireworksHealth] degraded: prefill queue p90 over threshold',
     )
     return 'degraded'
   }
diff --git a/web/src/server/free-session/public-api.ts b/web/src/server/free-session/public-api.ts
index 759a516d7..74af009cc 100644
--- a/web/src/server/free-session/public-api.ts
+++ b/web/src/server/free-session/public-api.ts
@@ -1,5 +1,6 @@
 import {
   getSessionGraceMs,
+  isWaitingRoomBypassedForEmail,
   isWaitingRoomEnabled,
 } from './config'
 import {
@@ -79,10 +80,16 @@ async function viewForRow(
  */
 export async function requestSession(params: {
   userId: string
+  userEmail?: string | null | undefined
   deps?: SessionDeps
 }): Promise<SessionStateResponse> {
   const deps = params.deps ?? defaultDeps
-  if (!deps.isWaitingRoomEnabled()) return { status: 'disabled' }
+  if (
+    !deps.isWaitingRoomEnabled() ||
+    isWaitingRoomBypassedForEmail(params.userEmail)
+  ) {
+    return { status: 'disabled' }
+  }
 
   const row = await deps.joinOrTakeOver({ userId: params.userId, now: nowOf(deps) })
   const view = await viewForRow(params.userId, deps, row)
@@ -109,11 +116,17 @@ export async function requestSession(params: {
  */
 export async function getSessionState(params: {
   userId: string
+  userEmail?: string | null | undefined
   claimedInstanceId?: string | null | undefined
   deps?: SessionDeps
 }): Promise<FreebuffSessionServerResponse> {
   const deps = params.deps ?? defaultDeps
-  if (!deps.isWaitingRoomEnabled()) return { status: 'disabled' }
+  if (
+    !deps.isWaitingRoomEnabled() ||
+    isWaitingRoomBypassedForEmail(params.userEmail)
+  ) {
+    return { status: 'disabled' }
+  }
   const row = await deps.getSessionRow(params.userId)
   if (!row) return { status: 'none' }
 
@@ -132,10 +145,16 @@ export async function getSessionState(params: {
 
 export async function endUserSession(params: {
   userId: string
+  userEmail?: string | null | undefined
   deps?: SessionDeps
 }): Promise<void> {
   const deps = params.deps ?? defaultDeps
-  if (!deps.isWaitingRoomEnabled()) return
+  if (
+    !deps.isWaitingRoomEnabled() ||
+    isWaitingRoomBypassedForEmail(params.userEmail)
+  ) {
+    return
+  }
   await deps.endSession(params.userId)
 }
 
@@ -169,11 +188,17 @@ export type SessionGateResult =
  */
 export async function checkSessionAdmissible(params: {
   userId: string
+  userEmail?: string | null | undefined
   claimedInstanceId: string | null | undefined
   deps?: SessionDeps
 }): Promise<SessionGateResult> {
   const deps = params.deps ?? defaultDeps
-  if (!deps.isWaitingRoomEnabled()) return { ok: true, reason: 'disabled' }
+  if (
+    !deps.isWaitingRoomEnabled() ||
+    isWaitingRoomBypassedForEmail(params.userEmail)
+  ) {
+    return { ok: true, reason: 'disabled' }
+  }
 
   // Pre-waiting-room CLIs never send a freebuff_instance_id. Classify that up
   // front so the caller gets a distinct code (→ 426 Upgrade Required) and the
diff --git a/web/src/server/free-session/session-view.ts b/web/src/server/free-session/session-view.ts
index 7ce1f75fe..582e78814 100644
--- a/web/src/server/free-session/session-view.ts
+++ b/web/src/server/free-session/session-view.ts
@@ -59,10 +59,10 @@ export function toSessionStateResponse(params: {
   return null
 }
 
-const WAIT_MS_PER_SPOT_AHEAD = 60_000
+const WAIT_MS_PER_SPOT_AHEAD = 24_000
 
 /**
- * Rough wait-time estimate shown to queued users: one minute per spot ahead.
+ * Rough wait-time estimate shown to queued users: 24 seconds per spot ahead.
  * Position 1 → 0ms (next tick picks you up).
  */
 export function estimateWaitMs(params: { position: number }): number {