From b28479c0de55ca42eddeccf4f53a204d72e73071 Mon Sep 17 00:00:00 2001
From: James Grugett <jahooma@gmail.com>
Date: Tue, 10 Mar 2026 12:36:37 -0700
Subject: [PATCH 01/16] Switch to baseten provider for minimax

---
 agents/base2/base2.ts                        |   3 -
 agents/tmux-cli.ts                           |   3 -
 packages/internal/src/env-schema.ts          |   2 +
 web/src/app/api/v1/chat/completions/_post.ts |  53 +-
 web/src/llm-api/baseten.ts                   | 607 +++++++++++++++++++
 web/src/llm-api/fireworks.ts                 |   2 +-
 6 files changed, 654 insertions(+), 16 deletions(-)
 create mode 100644 web/src/llm-api/baseten.ts

diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts
index be5ade5a1c..52ca7ef4ba 100644
--- a/agents/base2/base2.ts
+++ b/agents/base2/base2.ts
@@ -30,9 +30,6 @@ export function createBase2(
     publisher,
     model: isFree ? 'minimax/minimax-m2.5' : 'anthropic/claude-opus-4.6',
     providerOptions: isFree ? {
-      only: ['inceptron/fp8'],
-      order: ['inceptron/fp8'],
-      allow_fallbacks: false,
       data_collection: 'deny',
     } : {
       only: ['amazon-bedrock'],
diff --git a/agents/tmux-cli.ts b/agents/tmux-cli.ts
index 10c0ecdeab..e959bf64c3 100644
--- a/agents/tmux-cli.ts
+++ b/agents/tmux-cli.ts
@@ -75,9 +75,6 @@ const definition: AgentDefinition = {
   // Provider options are tightly coupled to the model choice above.
   // If you change the model, update these accordingly.
   providerOptions: {
-    only: ['inceptron/fp8'],
-    order: ['inceptron/fp8'],
-    allow_fallbacks: false,
     data_collection: 'deny',
   },
 
diff --git a/packages/internal/src/env-schema.ts b/packages/internal/src/env-schema.ts
index 7f9336a08d..21a0147bd8 100644
--- a/packages/internal/src/env-schema.ts
+++ b/packages/internal/src/env-schema.ts
@@ -7,6 +7,7 @@ export const serverEnvSchema = clientEnvSchema.extend({
   OPENAI_API_KEY: z.string().min(1),
   ANTHROPIC_API_KEY: z.string().min(1),
   FIREWORKS_API_KEY: z.string().min(1),
+  BASETEN_API_KEY: z.string().min(1).optional(),
   LINKUP_API_KEY: z.string().min(1),
   CONTEXT7_API_KEY: z.string().optional(),
   GRAVITY_API_KEY: z.string().min(1),
@@ -50,6 +51,7 @@ export const serverProcessEnv: ServerInput = {
   OPENAI_API_KEY: process.env.OPENAI_API_KEY,
   ANTHROPIC_API_KEY: process.env.ANTHROPIC_API_KEY,
   FIREWORKS_API_KEY: process.env.FIREWORKS_API_KEY,
+  BASETEN_API_KEY: process.env.BASETEN_API_KEY,
   LINKUP_API_KEY: process.env.LINKUP_API_KEY,
   CONTEXT7_API_KEY: process.env.CONTEXT7_API_KEY,
   GRAVITY_API_KEY: process.env.GRAVITY_API_KEY,
diff --git a/web/src/app/api/v1/chat/completions/_post.ts b/web/src/app/api/v1/chat/completions/_post.ts
index b9ebb09f63..1eec315d82 100644
--- a/web/src/app/api/v1/chat/completions/_post.ts
+++ b/web/src/app/api/v1/chat/completions/_post.ts
@@ -35,6 +35,12 @@ import type { NextRequest } from 'next/server'
 
 import type { ChatCompletionRequestBody } from '@/llm-api/types'
 
+import {
+  BasetenError,
+  handleBasetenNonStream,
+  handleBasetenStream,
+  isBasetenModel,
+} from '@/llm-api/baseten'
 import {
   FireworksError,
   handleFireworksNonStream,
@@ -354,9 +360,20 @@ export async function postChatCompletions(params: {
     // Handle streaming vs non-streaming
     try {
       if (bodyStream) {
-        // Streaming request — route to Fireworks for supported models
-        const useFireworks = isFireworksModel(typedBody.model)
-        const stream = useFireworks
+        // Streaming request — route to Baseten/Fireworks for supported models
+        const useBaseten = isBasetenModel(typedBody.model)
+        const useFireworks = !useBaseten && isFireworksModel(typedBody.model)
+        const stream = useBaseten
+          ? await handleBasetenStream({
+              body: typedBody,
+              userId,
+              stripeCustomerId,
+              agentId,
+              fetch,
+              logger,
+              insertMessageBigquery,
+            })
+          : useFireworks
           ? await handleFireworksStream({
               body: typedBody,
               userId,
@@ -396,9 +413,10 @@ export async function postChatCompletions(params: {
           },
         })
       } else {
-        // Non-streaming request — route to Fireworks for supported models
+        // Non-streaming request — route to Baseten/Fireworks for supported models
         const model = typedBody.model
-        const useFireworks = isFireworksModel(model)
+        const useBaseten = isBasetenModel(model)
+        const useFireworks = !useBaseten && isFireworksModel(model)
         const modelParts = model.split('/')
         const shortModelName = modelParts.length > 1 ? modelParts[1] : model
         const isOpenAIDirectModel =
@@ -409,7 +427,17 @@ export async function postChatCompletions(params: {
         const shouldUseOpenAIEndpoint =
           isOpenAIDirectModel && typedBody.codebuff_metadata?.n !== undefined
 
-        const nonStreamRequest = useFireworks
+        const nonStreamRequest = useBaseten
+          ? handleBasetenNonStream({
+              body: typedBody,
+              userId,
+              stripeCustomerId,
+              agentId,
+              fetch,
+              logger,
+              insertMessageBigquery,
+            })
+          : useFireworks
           ? handleFireworksNonStream({
               body: typedBody,
               userId,
@@ -463,10 +491,14 @@ export async function postChatCompletions(params: {
       if (error instanceof FireworksError) {
         fireworksError = error
       }
+      let basetenError: BasetenError | undefined
+      if (error instanceof BasetenError) {
+        basetenError = error
+      }
 
       // Log detailed error information for debugging
       const errorDetails = openrouterError?.toJSON()
-      const providerLabel = fireworksError ? 'Fireworks' : 'OpenRouter'
+      const providerLabel = basetenError ? 'Baseten' : fireworksError ? 'Fireworks' : 'OpenRouter'
       logger.error(
         {
           error: getErrorObject(error),
@@ -480,8 +512,8 @@ export async function postChatCompletions(params: {
             ? typedBody.messages.length
             : 0,
           messages: typedBody.messages,
-          providerStatusCode: (openrouterError ?? fireworksError)?.statusCode,
-          providerStatusText: (openrouterError ?? fireworksError)?.statusText,
+          providerStatusCode: (openrouterError ?? fireworksError ?? basetenError)?.statusCode,
+          providerStatusText: (openrouterError ?? fireworksError ?? basetenError)?.statusText,
           openrouterErrorCode: errorDetails?.error?.code,
           openrouterErrorType: errorDetails?.error?.type,
           openrouterErrorMessage: errorDetails?.error?.message,
@@ -509,6 +541,9 @@ export async function postChatCompletions(params: {
       if (error instanceof FireworksError) {
         return NextResponse.json(error.toJSON(), { status: error.statusCode })
       }
+      if (error instanceof BasetenError) {
+        return NextResponse.json(error.toJSON(), { status: error.statusCode })
+      }
 
       return NextResponse.json(
         { error: 'Failed to process request' },
diff --git a/web/src/llm-api/baseten.ts b/web/src/llm-api/baseten.ts
new file mode 100644
index 0000000000..dbd787def8
--- /dev/null
+++ b/web/src/llm-api/baseten.ts
@@ -0,0 +1,607 @@
+import { Agent } from 'undici'
+
+import { PROFIT_MARGIN } from '@codebuff/common/constants/limits'
+import { getErrorObject } from '@codebuff/common/util/error'
+import { env } from '@codebuff/internal/env'
+
+import {
+  consumeCreditsForMessage,
+  extractRequestMetadata,
+  insertMessageToBigQuery,
+} from './helpers'
+
+import type { UsageData } from './helpers'
+import type { InsertMessageBigqueryFn } from '@codebuff/common/types/contracts/bigquery'
+import type { Logger } from '@codebuff/common/types/contracts/logger'
+import type { ChatCompletionRequestBody } from './types'
+
+const BASETEN_BASE_URL = 'https://inference.baseten.co/v1'
+
+// Extended timeout for deep-thinking models that can take
+// a long time to start streaming.
+const BASETEN_HEADERS_TIMEOUT_MS = 10 * 60 * 1000
+
+const basetenAgent = new Agent({
+  headersTimeout: BASETEN_HEADERS_TIMEOUT_MS,
+  bodyTimeout: 0,
+})
+
+/** Map from OpenRouter model IDs to Baseten model IDs */
+const BASETEN_MODEL_MAP: Record<string, string> = {
+  'minimax/minimax-m2.5': 'MiniMaxAI/MiniMax-M2.5',
+}
+
+export function isBasetenModel(model: string): boolean {
+  return model in BASETEN_MODEL_MAP
+}
+
+function getBasetenModelId(openrouterModel: string): string {
+  return BASETEN_MODEL_MAP[openrouterModel] ?? openrouterModel
+}
+
+type StreamState = { responseText: string; reasoningText: string }
+
+type LineResult = {
+  state: StreamState
+  billedCredits?: number
+  patchedLine: string
+}
+
+function createBasetenRequest(params: {
+  body: ChatCompletionRequestBody
+  originalModel: string
+  fetch: typeof globalThis.fetch
+}) {
+  const { body, originalModel, fetch } = params
+  const basetenBody: Record<string, unknown> = {
+    ...body,
+    model: getBasetenModelId(originalModel),
+  }
+
+  // Strip OpenRouter-specific / internal fields
+  delete basetenBody.provider
+  delete basetenBody.transforms
+  delete basetenBody.codebuff_metadata
+  delete basetenBody.usage
+
+  // For streaming, request usage in the final chunk
+  if (basetenBody.stream) {
+    basetenBody.stream_options = { include_usage: true }
+  }
+
+  if (!env.BASETEN_API_KEY) {
+    throw new Error('BASETEN_API_KEY is not configured')
+  }
+
+  return fetch(`${BASETEN_BASE_URL}/chat/completions`, {
+    method: 'POST',
+    headers: {
+      Authorization: `Bearer ${env.BASETEN_API_KEY}`,
+      'Content-Type': 'application/json',
+    },
+    body: JSON.stringify(basetenBody),
+    // @ts-expect-error - dispatcher is a valid undici option not in fetch types
+    dispatcher: basetenAgent,
+  })
+}
+
+// Baseten per-token pricing (dollars per token)
+// TODO: Verify these costs against Baseten's actual pricing
+const BASETEN_INPUT_COST_PER_TOKEN = 0.30 / 1_000_000
+const BASETEN_CACHED_INPUT_COST_PER_TOKEN = 0.03 / 1_000_000
+const BASETEN_OUTPUT_COST_PER_TOKEN = 1.20 / 1_000_000
+
+function extractUsageAndCost(usage: Record<string, unknown> | undefined | null): UsageData {
+  if (!usage) return { inputTokens: 0, outputTokens: 0, cacheReadInputTokens: 0, reasoningTokens: 0, cost: 0 }
+  const promptDetails = usage.prompt_tokens_details as Record<string, unknown> | undefined | null
+  const completionDetails = usage.completion_tokens_details as Record<string, unknown> | undefined | null
+
+  const inputTokens = typeof usage.prompt_tokens === 'number' ? usage.prompt_tokens : 0
+  const outputTokens = typeof usage.completion_tokens === 'number' ? usage.completion_tokens : 0
+  const cacheReadInputTokens = typeof promptDetails?.cached_tokens === 'number' ? promptDetails.cached_tokens : 0
+  const reasoningTokens = typeof completionDetails?.reasoning_tokens === 'number' ? completionDetails.reasoning_tokens : 0
+
+  // Baseten doesn't return cost — compute from token counts and known pricing
+  const nonCachedInputTokens = Math.max(0, inputTokens - cacheReadInputTokens)
+  const cost =
+    nonCachedInputTokens * BASETEN_INPUT_COST_PER_TOKEN +
+    cacheReadInputTokens * BASETEN_CACHED_INPUT_COST_PER_TOKEN +
+    outputTokens * BASETEN_OUTPUT_COST_PER_TOKEN
+
+  return { inputTokens, outputTokens, cacheReadInputTokens, reasoningTokens, cost }
+}
+
+export async function handleBasetenNonStream({
+  body,
+  userId,
+  stripeCustomerId,
+  agentId,
+  fetch,
+  logger,
+  insertMessageBigquery,
+}: {
+  body: ChatCompletionRequestBody
+  userId: string
+  stripeCustomerId?: string | null
+  agentId: string
+  fetch: typeof globalThis.fetch
+  logger: Logger
+  insertMessageBigquery: InsertMessageBigqueryFn
+}) {
+  const originalModel = body.model
+  const startTime = new Date()
+  const { clientId, clientRequestId, costMode } = extractRequestMetadata({ body, logger })
+
+  const response = await createBasetenRequest({ body, originalModel, fetch })
+
+  if (!response.ok) {
+    throw await parseBasetenError(response)
+  }
+
+  const data = await response.json()
+  const content = data.choices?.[0]?.message?.content ?? ''
+  const reasoningText = data.choices?.[0]?.message?.reasoning_content ?? data.choices?.[0]?.message?.reasoning ?? ''
+  const usageData = extractUsageAndCost(data.usage)
+
+  insertMessageToBigQuery({
+    messageId: data.id,
+    userId,
+    startTime,
+    request: body,
+    reasoningText,
+    responseText: content,
+    usageData,
+    logger,
+    insertMessageBigquery,
+  }).catch((error) => {
+    logger.error({ error }, 'Failed to insert message into BigQuery')
+  })
+
+  const billedCredits = await consumeCreditsForMessage({
+    messageId: data.id,
+    userId,
+    stripeCustomerId,
+    agentId,
+    clientId,
+    clientRequestId,
+    startTime,
+    model: originalModel,
+    reasoningText,
+    responseText: content,
+    usageData,
+    byok: false,
+    logger,
+    costMode,
+  })
+
+  // Overwrite cost so SDK calculates exact credits we charged
+  if (data.usage) {
+    data.usage.cost = creditsToFakeCost(billedCredits)
+    data.usage.cost_details = { upstream_inference_cost: 0 }
+  }
+
+  // Normalise model name back to OpenRouter format for client compatibility
+  data.model = originalModel
+  if (!data.provider) data.provider = 'Baseten'
+
+  return data
+}
+
+export async function handleBasetenStream({
+  body,
+  userId,
+  stripeCustomerId,
+  agentId,
+  fetch,
+  logger,
+  insertMessageBigquery,
+}: {
+  body: ChatCompletionRequestBody
+  userId: string
+  stripeCustomerId?: string | null
+  agentId: string
+  fetch: typeof globalThis.fetch
+  logger: Logger
+  insertMessageBigquery: InsertMessageBigqueryFn
+}) {
+  const originalModel = body.model
+  const startTime = new Date()
+  const { clientId, clientRequestId, costMode } = extractRequestMetadata({ body, logger })
+
+  const response = await createBasetenRequest({ body, originalModel, fetch })
+
+  if (!response.ok) {
+    throw await parseBasetenError(response)
+  }
+
+  const reader = response.body?.getReader()
+  if (!reader) {
+    throw new Error('Failed to get response reader')
+  }
+
+  let heartbeatInterval: NodeJS.Timeout
+  let state: StreamState = { responseText: '', reasoningText: '' }
+  let clientDisconnected = false
+
+  const stream = new ReadableStream({
+    async start(controller) {
+      const decoder = new TextDecoder()
+      let buffer = ''
+
+      controller.enqueue(
+        new TextEncoder().encode(`: connected ${new Date().toISOString()}\n`),
+      )
+
+      heartbeatInterval = setInterval(() => {
+        if (!clientDisconnected) {
+          try {
+            controller.enqueue(
+              new TextEncoder().encode(
+                `: heartbeat ${new Date().toISOString()}\n\n`,
+              ),
+            )
+          } catch {
+            // client disconnected
+          }
+        }
+      }, 30000)
+
+      try {
+        let done = false
+        while (!done) {
+          const result = await reader.read()
+          done = result.done
+          const value = result.value
+
+          if (done) break
+
+          buffer += decoder.decode(value, { stream: true })
+          let lineEnd = buffer.indexOf('\n')
+
+          while (lineEnd !== -1) {
+            const line = buffer.slice(0, lineEnd + 1)
+            buffer = buffer.slice(lineEnd + 1)
+
+            const lineResult = await handleLine({
+              userId,
+              stripeCustomerId,
+              agentId,
+              clientId,
+              clientRequestId,
+              costMode,
+              startTime,
+              request: body,
+              originalModel,
+              line,
+              state,
+              logger,
+              insertMessage: insertMessageBigquery,
+            })
+            state = lineResult.state
+
+            if (!clientDisconnected) {
+              try {
+                controller.enqueue(new TextEncoder().encode(lineResult.patchedLine))
+              } catch {
+                logger.warn('Client disconnected during stream, continuing for billing')
+                clientDisconnected = true
+              }
+            }
+
+            lineEnd = buffer.indexOf('\n')
+          }
+        }
+
+        if (!clientDisconnected) {
+          controller.close()
+        }
+      } catch (error) {
+        if (!clientDisconnected) {
+          controller.error(error)
+        } else {
+          logger.warn(
+            getErrorObject(error),
+            'Error after client disconnect in Baseten stream',
+          )
+        }
+      } finally {
+        clearInterval(heartbeatInterval)
+      }
+    },
+    cancel() {
+      clearInterval(heartbeatInterval)
+      clientDisconnected = true
+      logger.warn(
+        {
+          clientDisconnected,
+          responseTextLength: state.responseText.length,
+          reasoningTextLength: state.reasoningText.length,
+        },
+        'Client cancelled stream, continuing Baseten consumption for billing',
+      )
+    },
+  })
+
+  return stream
+}
+
+async function handleLine({
+  userId,
+  stripeCustomerId,
+  agentId,
+  clientId,
+  clientRequestId,
+  costMode,
+  startTime,
+  request,
+  originalModel,
+  line,
+  state,
+  logger,
+  insertMessage,
+}: {
+  userId: string
+  stripeCustomerId?: string | null
+  agentId: string
+  clientId: string | null
+  clientRequestId: string | null
+  costMode: string | undefined
+  startTime: Date
+  request: unknown
+  originalModel: string
+  line: string
+  state: StreamState
+  logger: Logger
+  insertMessage: InsertMessageBigqueryFn
+}): Promise<LineResult> {
+  if (!line.startsWith('data: ')) {
+    return { state, patchedLine: line }
+  }
+
+  const raw = line.slice('data: '.length)
+  if (raw === '[DONE]\n' || raw === '[DONE]') {
+    return { state, patchedLine: line }
+  }
+
+  let obj: Record<string, unknown>
+  try {
+    obj = JSON.parse(raw)
+  } catch (error) {
+    logger.warn(
+      { error: getErrorObject(error, { includeRawError: true }) },
+      'Received non-JSON Baseten response',
+    )
+    return { state, patchedLine: line }
+  }
+
+  // Patch model and provider for SDK compatibility
+  if (obj.model) obj.model = originalModel
+  if (!obj.provider) obj.provider = 'Baseten'
+
+  // Process the chunk for billing / state tracking
+  const result = await handleResponse({
+    userId,
+    stripeCustomerId,
+    agentId,
+    clientId,
+    clientRequestId,
+    costMode,
+    startTime,
+    request,
+    originalModel,
+    data: obj,
+    state,
+    logger,
+    insertMessage,
+  })
+
+  // If this is the final chunk with billing, overwrite cost in the patched object
+  if (result.billedCredits !== undefined && obj.usage) {
+    const usage = obj.usage as Record<string, unknown>
+    usage.cost = creditsToFakeCost(result.billedCredits)
+    usage.cost_details = { upstream_inference_cost: 0 }
+  }
+
+  const patchedLine = `data: ${JSON.stringify(obj)}\n`
+  return { state: result.state, billedCredits: result.billedCredits, patchedLine }
+}
+
+async function handleResponse({
+  userId,
+  stripeCustomerId,
+  agentId,
+  clientId,
+  clientRequestId,
+  costMode,
+  startTime,
+  request,
+  originalModel,
+  data,
+  state,
+  logger,
+  insertMessage,
+}: {
+  userId: string
+  stripeCustomerId?: string | null
+  agentId: string
+  clientId: string | null
+  clientRequestId: string | null
+  costMode: string | undefined
+  startTime: Date
+  request: unknown
+  originalModel: string
+  data: Record<string, unknown>
+  state: StreamState
+  logger: Logger
+  insertMessage: InsertMessageBigqueryFn
+}): Promise<{ state: StreamState; billedCredits?: number }> {
+  state = handleStreamChunk({ data, state, logger, userId, agentId, model: originalModel })
+
+  if ('error' in data || !data.usage) {
+    return { state }
+  }
+
+  const usageData = extractUsageAndCost(data.usage as Record<string, unknown>)
+  const messageId = typeof data.id === 'string' ? data.id : 'unknown'
+
+  insertMessageToBigQuery({
+    messageId,
+    userId,
+    startTime,
+    request,
+    reasoningText: state.reasoningText,
+    responseText: state.responseText,
+    usageData,
+    logger,
+    insertMessageBigquery: insertMessage,
+  }).catch((error) => {
+    logger.error({ error }, 'Failed to insert message into BigQuery')
+  })
+
+  const billedCredits = await consumeCreditsForMessage({
+    messageId,
+    userId,
+    stripeCustomerId,
+    agentId,
+    clientId,
+    clientRequestId,
+    startTime,
+    model: originalModel,
+    reasoningText: state.reasoningText,
+    responseText: state.responseText,
+    usageData,
+    byok: false,
+    logger,
+    costMode,
+  })
+
+  return { state, billedCredits }
+}
+
+function handleStreamChunk({
+  data,
+  state,
+  logger,
+  userId,
+  agentId,
+  model,
+}: {
+  data: Record<string, unknown>
+  state: StreamState
+  logger: Logger
+  userId: string
+  agentId: string
+  model: string
+}): StreamState {
+  const MAX_BUFFER_SIZE = 1 * 1024 * 1024
+
+  if ('error' in data) {
+    const errorData = data.error as Record<string, unknown>
+    logger.error(
+      {
+        userId,
+        agentId,
+        model,
+        errorCode: errorData?.code,
+        errorType: errorData?.type,
+        errorMessage: errorData?.message,
+      },
+      'Received error chunk in Baseten stream',
+    )
+    return state
+  }
+
+  const choices = data.choices as Array<Record<string, unknown>> | undefined
+  if (!choices?.length) {
+    return state
+  }
+  const choice = choices[0]
+  const delta = choice.delta as Record<string, unknown> | undefined
+
+  const contentDelta = typeof delta?.content === 'string' ? delta.content : ''
+  if (state.responseText.length < MAX_BUFFER_SIZE) {
+    state.responseText += contentDelta
+    if (state.responseText.length >= MAX_BUFFER_SIZE) {
+      state.responseText =
+        state.responseText.slice(0, MAX_BUFFER_SIZE) + '\n---[TRUNCATED]---'
+      logger.warn({ userId, agentId, model }, 'Response text buffer truncated at 1MB')
+    }
+  }
+
+  const reasoningDelta = typeof delta?.reasoning_content === 'string' ? delta.reasoning_content
+    : typeof delta?.reasoning === 'string' ? delta.reasoning
+    : ''
+  if (state.reasoningText.length < MAX_BUFFER_SIZE) {
+    state.reasoningText += reasoningDelta
+    if (state.reasoningText.length >= MAX_BUFFER_SIZE) {
+      state.reasoningText =
+        state.reasoningText.slice(0, MAX_BUFFER_SIZE) + '\n---[TRUNCATED]---'
+      logger.warn({ userId, agentId, model }, 'Reasoning text buffer truncated at 1MB')
+    }
+  }
+
+  return state
+}
+
+export class BasetenError extends Error {
+  constructor(
+    public readonly statusCode: number,
+    public readonly statusText: string,
+    public readonly errorBody: {
+      error: {
+        message: string
+        code: string | number | null
+        type?: string | null
+      }
+    },
+  ) {
+    super(errorBody.error.message)
+    this.name = 'BasetenError'
+  }
+
+  toJSON() {
+    return {
+      error: {
+        message: this.errorBody.error.message,
+        code: this.errorBody.error.code,
+        type: this.errorBody.error.type,
+      },
+    }
+  }
+}
+
+async function parseBasetenError(response: Response): Promise<BasetenError> {
+  const errorText = await response.text()
+  let errorBody: BasetenError['errorBody']
+  try {
+    const parsed = JSON.parse(errorText)
+    if (parsed?.error?.message) {
+      errorBody = {
+        error: {
+          message: parsed.error.message,
+          code: parsed.error.code ?? null,
+          type: parsed.error.type ?? null,
+        },
+      }
+    } else {
+      errorBody = {
+        error: {
+          message: errorText || response.statusText,
+          code: response.status,
+        },
+      }
+    }
+  } catch {
+    errorBody = {
+      error: {
+        message: errorText || response.statusText,
+        code: response.status,
+      },
+    }
+  }
+  return new BasetenError(response.status, response.statusText, errorBody)
+}
+
+function creditsToFakeCost(credits: number): number {
+  return credits / ((1 + PROFIT_MARGIN) * 100)
+}
diff --git a/web/src/llm-api/fireworks.ts b/web/src/llm-api/fireworks.ts
index 4df557af08..42217cb525 100644
--- a/web/src/llm-api/fireworks.ts
+++ b/web/src/llm-api/fireworks.ts
@@ -28,7 +28,7 @@ const fireworksAgent = new Agent({
 
 /** Map from OpenRouter model IDs to Fireworks model IDs */
 const FIREWORKS_MODEL_MAP: Record<string, string> = {
-  'minimax/minimax-m2.5': 'accounts/fireworks/models/minimax-m2p5',
+  // 'minimax/minimax-m2.5': 'accounts/fireworks/models/minimax-m2p5',
 }
 
 export function isFireworksModel(model: string): boolean {

From 6990d6777ebbcbe99bed0add6bd89d296d4c6554 Mon Sep 17 00:00:00 2001
From: James Grugett <jahooma@gmail.com>
Date: Tue, 10 Mar 2026 14:06:32 -0700
Subject: [PATCH 02/16] Simplify tmux cli agent slightly

---
 agents/base2/base2.ts |   2 +-
 agents/tmux-cli.ts    | 152 ++++++++++++++----------------------------
 2 files changed, 52 insertions(+), 102 deletions(-)

diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts
index 52ca7ef4ba..4a3c40064f 100644
--- a/agents/base2/base2.ts
+++ b/agents/base2/base2.ts
@@ -85,7 +85,7 @@ export function createBase2(
       isFree && 'code-reviewer-lite',
       isDefault && 'code-reviewer',
       isMax && 'code-reviewer-multi-prompt',
-      isDefault && 'tmux-cli',
+      'tmux-cli',
       'context-pruner',
     ),
 
diff --git a/agents/tmux-cli.ts b/agents/tmux-cli.ts
index e959bf64c3..be07859283 100644
--- a/agents/tmux-cli.ts
+++ b/agents/tmux-cli.ts
@@ -450,137 +450,84 @@ esac
     const sessionName = 'tui-test-' + Date.now() + '-' + Math.random().toString(36).slice(2, 6)
     const helperPath = '/tmp/tmux-helper-' + sessionName + '.sh'
 
-    logger.info('Writing helper script to ' + helperPath)
+    logger.info('Setting up tmux session: ' + sessionName)
 
-    // Write the self-contained helper script to /tmp
-    const { toolResult: writeResult } = yield {
-      toolName: 'run_terminal_command',
-      input: {
-        command: 'cat > ' + helperPath + " << 'TMUX_HELPER_EOF'\n" + helperScript + "TMUX_HELPER_EOF\nchmod +x " + helperPath,
-        timeout_seconds: 10,
-      },
-    }
-
-    const writeOutput = writeResult?.[0]
-    if (writeOutput && writeOutput.type === 'json') {
-      const value = writeOutput.value as Record<string, unknown>
-      const exitCode = typeof value?.exitCode === 'number' ? value.exitCode : undefined
-      if (exitCode !== 0) {
-        const stderr = typeof value?.stderr === 'string' ? value.stderr.trim() : 'unknown error'
-        logger.error('Failed to write helper script: ' + stderr)
-        yield {
-          toolName: 'set_output',
-          input: {
-            overallStatus: 'failure',
-            summary: 'Failed to write helper script to /tmp. ' + stderr,
-            sessionName: '',
-            scriptIssues: [{ script: helperPath, issue: stderr, suggestedFix: 'Check /tmp is writable' }],
-            captures: [],
-          },
-        }
-        return
-      }
-    }
-
-    logger.info('Starting tmux session (bash)')
-
-    // Start the tmux session with bash (not the user's command directly)
-    const { toolResult } = yield {
+    // Combined setup: write helper script, start session, send command (single yield to reduce round-trips)
+    const escapedCommand = startCommand.replace(/'/g, "'\\''")
+    const setupScript =
+      'set -e\n' +
+      'cat > ' + helperPath + " << 'TMUX_HELPER_EOF'\n" + helperScript + 'TMUX_HELPER_EOF\n' +
+      'chmod +x ' + helperPath + '\n' +
+      'OUTPUT=$(' + helperPath + " start '" + sessionName + "') || { echo \"FAIL_START\" >&2; exit 1; }\n" +
+      helperPath + " send '" + sessionName + "' '" + escapedCommand + "' || { " + helperPath + " stop '" + sessionName + "' 2>/dev/null; echo \"FAIL_SEND\" >&2; exit 1; }\n" +
+      'echo "$OUTPUT"'
+
+    const { toolResult: setupResult } = yield {
       toolName: 'run_terminal_command',
       input: {
-        command: helperPath + " start '" + sessionName + "'",
+        command: setupScript,
         timeout_seconds: 30,
       },
+      includeToolCall: false,
     }
 
-    let started = false
-    let parseError = ''
+    let setupSuccess = false
+    let setupError = ''
 
-    const result = toolResult?.[0]
-    if (result && result.type === 'json') {
-      const value = result.value as Record<string, unknown>
+    const setupOutput = setupResult?.[0]
+    if (setupOutput && setupOutput.type === 'json') {
+      const value = setupOutput.value as Record<string, unknown>
       const stdout = typeof value?.stdout === 'string' ? value.stdout.trim() : ''
       const stderr = typeof value?.stderr === 'string' ? value.stderr.trim() : ''
       const exitCode = typeof value?.exitCode === 'number' ? value.exitCode : undefined
 
-      if (exitCode !== 0) {
-        parseError = stderr || 'Helper script failed with no error message'
-      } else if (stdout === sessionName) {
-        started = true
+      if (exitCode === 0 && stdout === sessionName) {
+        setupSuccess = true
       } else {
-        parseError = 'Unexpected output: ' + stdout
+        setupError = stderr || stdout || 'Setup failed with no error message'
       }
     } else {
-      parseError = 'Unexpected result type from run_terminal_command'
+      setupError = 'Unexpected result type from run_terminal_command'
     }
 
-    if (!started) {
-      const errorMsg = parseError || 'Failed to start session'
-      logger.error({ parseError: errorMsg }, 'Failed to start tmux session')
+    if (!setupSuccess) {
+      const isSendFailure = setupError.includes('FAIL_SEND')
+      const isStartFailure = setupError.includes('FAIL_START')
+
+      let summary: string
+      let suggestedFix: string
+      if (isSendFailure) {
+        summary = 'Started session but failed to send command. ' + setupError
+        suggestedFix = 'Check that the command is valid.'
+      } else if (isStartFailure) {
+        summary = 'Failed to start tmux session. ' + setupError
+        suggestedFix = 'Ensure tmux is installed and the command is valid.'
+      } else {
+        summary = 'Failed to write helper script to /tmp. ' + setupError
+        suggestedFix = 'Check /tmp is writable'
+      }
+
+      logger.error(setupError, 'Setup failed')
       yield {
         toolName: 'set_output',
         input: {
           overallStatus: 'failure',
-          summary: 'Failed to start tmux session. ' + errorMsg,
-          sessionName: '',
-          scriptIssues: [
-            {
-              script: helperPath,
-              issue: errorMsg,
-              errorOutput: JSON.stringify(toolResult),
-              suggestedFix: 'Ensure tmux is installed and the command is valid.',
-            },
-          ],
+          summary,
+          sessionName: isSendFailure ? sessionName : '',
+          scriptIssues: [{ script: helperPath, issue: setupError, suggestedFix }],
           captures: [],
         },
       }
       return
     }
 
-    logger.info('Successfully started tmux session: ' + sessionName)
-
-    // Send the user's command to the bash session
-    const escapedCommand = startCommand.replace(/'/g, "'\\''")
-    const { toolResult: sendResult } = yield {
-      toolName: 'run_terminal_command',
-      input: {
-        command: helperPath + " send '" + sessionName + "' '" + escapedCommand + "'",
-        timeout_seconds: 15,
-      },
-    }
-
-    const sendOutput = sendResult?.[0]
-    if (sendOutput && sendOutput.type === 'json') {
-      const value = sendOutput.value as Record<string, unknown>
-      const exitCode = typeof value?.exitCode === 'number' ? value.exitCode : undefined
-      if (exitCode !== 0) {
-        const stderr = typeof value?.stderr === 'string' ? value.stderr.trim() : 'send failed'
-        logger.error('Failed to send command: ' + stderr)
-        yield {
-          toolName: 'run_terminal_command',
-          input: { command: helperPath + " stop '" + sessionName + "'", timeout_seconds: 5 },
-        }
-        yield {
-          toolName: 'set_output',
-          input: {
-            overallStatus: 'failure',
-            summary: 'Started session but failed to send command. ' + stderr,
-            sessionName,
-            scriptIssues: [{ script: helperPath, issue: stderr, suggestedFix: 'Check that the command is valid.' }],
-            captures: [],
-          },
-        }
-        return
-      }
-    }
-
-    logger.info('Sent command to session: ' + startCommand)
+    logger.info('Session ready: ' + sessionName)
 
-    // Wait briefly then capture initial state so the agent starts with context
+    // Capture initial state so the agent starts with context (0.5s is enough since send already waits ~0.6s)
     const { toolResult: initCapture } = yield {
       toolName: 'run_terminal_command',
       input: {
-        command: 'sleep 1.5 && ' + helperPath + " capture '" + sessionName + "' --wait 0 --label startup-check",
+        command: 'sleep 0.5 && ' + helperPath + " capture '" + sessionName + "' --wait 0 --label startup-check",
         timeout_seconds: 10,
       },
     }
@@ -606,7 +553,10 @@ esac
           '**Captures dir:** `' + captureDir + '/`\n\n' +
           '**Initial terminal output:**\n```\n' + initialOutput + '\n```\n\n' +
           'Check the initial output above — if you see errors like "command not found" or "No such file", report failure immediately.\n\n' +
-          'Commands:\n' +
+          '## Helper Script Implementation\n\n' +
+          'The helper script at `' + helperPath + '` is a Bash script that wraps tmux commands to interact with the CLI. Here is its full implementation:\n\n' +
+          '```bash\n' + helperScript.replace(/```/g, '\\`\\`\\`') + '\n```\n\n' +
+          '## Quick Reference\n\n' +
           '- Send input: `' + helperPath + ' send "' + sessionName + '" "..."`\n' +
           '- Send with paste mode: `' + helperPath + ' send "' + sessionName + '" "..." --paste`\n' +
           '- Send + wait for output: `' + helperPath + ' send "' + sessionName + '" "..." --wait-idle 3`\n' +

From 567cdbbb05017c25bd40418780b34fcfd3cc7565 Mon Sep 17 00:00:00 2001
From: James Grugett <jahooma@gmail.com>
Date: Tue, 10 Mar 2026 14:15:36 -0700
Subject: [PATCH 03/16] Upgrade to react 19

---
 bun.lock                                      | 42 ++++--------
 cli/package.json                              |  2 +-
 .../components/blocks/agent-branch-item.tsx   |  5 +-
 cli/src/components/clickable.tsx              |  6 +-
 cli/src/components/tools/tool-call-item.tsx   |  5 +-
 cli/src/types/react19-compat.d.ts             | 19 ++++++
 .../__tests__/markdown-renderer.test.tsx      | 64 ++++++++++---------
 freebuff/web/package.json                     |  8 +--
 package.json                                  |  4 +-
 web/package.json                              |  8 +--
 .../admin/traces/components/chat-message.tsx  |  1 +
 web/src/components/card-with-beams.tsx        |  1 +
 web/src/components/docs/mdx/code-demo.tsx     |  1 +
 .../components/docs/mdx/markdown-table.tsx    |  2 +-
 .../ui/landing/competition/github-copilot.tsx |  2 +-
 15 files changed, 91 insertions(+), 79 deletions(-)
 create mode 100644 cli/src/types/react19-compat.d.ts

diff --git a/bun.lock b/bun.lock
index e53d3ca9a2..964cd43180 100644
--- a/bun.lock
+++ b/bun.lock
@@ -75,7 +75,7 @@
         "zustand": "^5.0.8",
       },
       "devDependencies": {
-        "@types/react": "^18.3.12",
+        "@types/react": "19.2.14",
         "@types/react-reconciler": "^0.32.0",
         "react-dom": "^19.0.0",
         "strip-ansi": "^7.1.2",
@@ -149,16 +149,16 @@
         "next-auth": "^4.24.11",
         "next-themes": "^0.3.0",
         "pino": "^9.6.0",
-        "react": "18.3.1",
-        "react-dom": "18.3.1",
+        "react": "^19.0.0",
+        "react-dom": "^19.0.0",
         "tailwind-merge": "^2.5.2",
         "zod": "^4.2.1",
       },
       "devDependencies": {
         "@tailwindcss/typography": "^0.5.15",
         "@types/node": "^22.14.0",
-        "@types/react": "18.3.26",
-        "@types/react-dom": "18.3.7",
+        "@types/react": "19.2.14",
+        "@types/react-dom": "19.2.3",
         "autoprefixer": "^10.4.21",
         "postcss": "^8",
         "tailwindcss": "^3.4.11",
@@ -299,8 +299,8 @@
         "pino": "^9.6.0",
         "posthog-js": "^1.234.10",
         "prism-react-renderer": "^2.4.1",
-        "react": "18.3.1",
-        "react-dom": "18.3.1",
+        "react": "^19.0.0",
+        "react-dom": "^19.0.0",
         "react-hook-form": "^7.55.0",
         "server-only": "^0.0.1",
         "tailwind-merge": "^2.5.2",
@@ -319,8 +319,8 @@
         "@types/jest": "^29.5.14",
         "@types/node": "^22.14.0",
         "@types/pg": "^8.11.11",
-        "@types/react": "18.3.26",
-        "@types/react-dom": "18.3.7",
+        "@types/react": "19.2.14",
+        "@types/react-dom": "19.2.3",
         "@typescript-eslint/eslint-plugin": "^8.29.1",
         "@typescript-eslint/parser": "^8.29.1",
         "autoprefixer": "^10.4.21",
@@ -351,8 +351,8 @@
     },
   },
   "overrides": {
-    "@types/react": "18.3.26",
-    "@types/react-dom": "18.3.7",
+    "@types/react": "19.2.14",
+    "@types/react-dom": "19.2.3",
     "baseline-browser-mapping": "^2.9.14",
     "signal-exit": "3.0.7",
     "zod": "^4.2.1",
@@ -1330,11 +1330,9 @@
 
     "@types/prismjs": ["@types/prismjs@1.26.5", "", {}, "sha512-AUZTa7hQ2KY5L7AmtSiqxlhWxb4ina0yd8hNbl4TWuqnv/pFP0nDMb3YrfSBf4hJVGLh2YEIBfKaBW/9UEl6IQ=="],
 
-    "@types/prop-types": ["@types/prop-types@15.7.15", "", {}, "sha512-F6bEyamV9jKGAFBEmlQnesRPGOQqS2+Uwi0Em15xenOxHaf2hv6L8YCVn3rPdPJOiJfPiCnLIRyvwVaqMY3MIw=="],
-
-    "@types/react": ["@types/react@18.3.26", "", { "dependencies": { "@types/prop-types": "*", "csstype": "^3.0.2" } }, "sha512-RFA/bURkcKzx/X9oumPG9Vp3D3JUgus/d0b67KB0t5S/raciymilkOa66olh78MUI92QLbEJevO7rvqU/kjwKA=="],
+    "@types/react": ["@types/react@19.2.14", "", { "dependencies": { "csstype": "^3.2.2" } }, "sha512-ilcTH/UniCkMdtexkoCN0bI7pMcJDvmQFPvuPvmEaYA/NSfFTAgdUSLAoVjaRJm7+6PvcM+q1zYOwS4wTYMF9w=="],
 
-    "@types/react-dom": ["@types/react-dom@18.3.7", "", { "peerDependencies": { "@types/react": "^18.0.0" } }, "sha512-MEe3UeoENYVFXzoXEWsvcpg6ZvlrFNlOQ7EOsvhI3CfAXwzPfO8Qwuxd40nepsYKqyyVQnTdEfv68q91yLcKrQ=="],
+    "@types/react-dom": ["@types/react-dom@19.2.3", "", { "peerDependencies": { "@types/react": "^19.2.0" } }, "sha512-jp2L/eY6fn+KgVVQAOqYItbF0VY/YApe5Mz2F0aykSO8gx31bYCZyvSeYxCHKvzHG5eZjc+zyaS5BrBWya2+kQ=="],
 
     "@types/react-reconciler": ["@types/react-reconciler@0.32.2", "", { "peerDependencies": { "@types/react": "*" } }, "sha512-gjcm6O0aUknhYaogEl8t5pecPfiOTD8VQkbjOhgbZas/E6qGY+veW9iuJU/7p4Y1E0EuQ0mArga7VEOUWSlVRA=="],
 
@@ -1744,7 +1742,7 @@
 
     "cssstyle": ["cssstyle@2.3.0", "", { "dependencies": { "cssom": "~0.3.6" } }, "sha512-AZL67abkUzIuvcHqk7c09cezpGNcxUxU4Ioi/05xHk4DQeTkWmGYftIE6ctU6AEt+Gn4n1lDStOtj7FKycP71A=="],
 
-    "csstype": ["csstype@3.1.3", "", {}, "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw=="],
+    "csstype": ["csstype@3.2.3", "", {}, "sha512-z1HGKcYy2xA8AGQfwrn0PAy+PB7X/GSj3UVJW9qKyn43xWa+gl5nXmU4qqLMRzWVLFC8KusUX8T/0kCiOYpAIQ=="],
 
     "cycled": ["cycled@1.2.0", "", {}, "sha512-/BOOCEohSBflVHHtY/wUc1F6YDYPqyVs/A837gDoq4H1pm72nU/yChyGt91V4ML+MbbAmHs8uo2l1yJkkTIUdg=="],
 
@@ -3668,20 +3666,12 @@
 
     "@codebuff/freebuff-web/pino": ["pino@9.14.0", "", { "dependencies": { "@pinojs/redact": "^0.4.0", "atomic-sleep": "^1.0.0", "on-exit-leak-free": "^2.1.0", "pino-abstract-transport": "^2.0.0", "pino-std-serializers": "^7.0.0", "process-warning": "^5.0.0", "quick-format-unescaped": "^4.0.3", "real-require": "^0.2.0", "safe-stable-stringify": "^2.3.1", "sonic-boom": "^4.0.1", "thread-stream": "^3.0.0" }, "bin": { "pino": "bin.js" } }, "sha512-8OEwKp5juEvb/MjpIc4hjqfgCNysrS94RIOMXYvpYCdm/jglrKEiAYmiumbmGhCvs+IcInsphYDFwqrjr7398w=="],
 
-    "@codebuff/freebuff-web/react": ["react@18.3.1", "", { "dependencies": { "loose-envify": "^1.1.0" } }, "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ=="],
-
-    "@codebuff/freebuff-web/react-dom": ["react-dom@18.3.1", "", { "dependencies": { "loose-envify": "^1.1.0", "scheduler": "^0.23.2" }, "peerDependencies": { "react": "^18.3.1" } }, "sha512-5m4nQKp+rZRb09LNH59GM4BxTh9251/ylbKIbpe7TpGxfJ+9kv6BLkLBXIjjspbgbnIBNqlI23tRnTWT0snUIw=="],
-
     "@codebuff/sdk/ignore": ["ignore@7.0.5", "", {}, "sha512-Hs59xBNfUIunMFgWAbGX5cq6893IbWg4KnrjbYwX3tx0ztorVgTDA6B2sxf8ejHJ4wz8BqGUMYlnzNBer5NvGg=="],
 
     "@codebuff/web/@typescript-eslint/eslint-plugin": ["@typescript-eslint/eslint-plugin@8.46.2", "", { "dependencies": { "@eslint-community/regexpp": "^4.10.0", "@typescript-eslint/scope-manager": "8.46.2", "@typescript-eslint/type-utils": "8.46.2", "@typescript-eslint/utils": "8.46.2", "@typescript-eslint/visitor-keys": "8.46.2", "graphemer": "^1.4.0", "ignore": "^7.0.0", "natural-compare": "^1.4.0", "ts-api-utils": "^2.1.0" }, "peerDependencies": { "@typescript-eslint/parser": "^8.46.2", "eslint": "^8.57.0 || ^9.0.0", "typescript": ">=4.8.4 <6.0.0" } }, "sha512-ZGBMToy857/NIPaaCucIUQgqueOiq7HeAKkhlvqVV4lm089zUFW6ikRySx2v+cAhKeUCPuWVHeimyk6Dw1iY3w=="],
 
     "@codebuff/web/pino": ["pino@9.14.0", "", { "dependencies": { "@pinojs/redact": "^0.4.0", "atomic-sleep": "^1.0.0", "on-exit-leak-free": "^2.1.0", "pino-abstract-transport": "^2.0.0", "pino-std-serializers": "^7.0.0", "process-warning": "^5.0.0", "quick-format-unescaped": "^4.0.3", "real-require": "^0.2.0", "safe-stable-stringify": "^2.3.1", "sonic-boom": "^4.0.1", "thread-stream": "^3.0.0" }, "bin": { "pino": "bin.js" } }, "sha512-8OEwKp5juEvb/MjpIc4hjqfgCNysrS94RIOMXYvpYCdm/jglrKEiAYmiumbmGhCvs+IcInsphYDFwqrjr7398w=="],
 
-    "@codebuff/web/react": ["react@18.3.1", "", { "dependencies": { "loose-envify": "^1.1.0" } }, "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ=="],
-
-    "@codebuff/web/react-dom": ["react-dom@18.3.1", "", { "dependencies": { "loose-envify": "^1.1.0", "scheduler": "^0.23.2" }, "peerDependencies": { "react": "^18.3.1" } }, "sha512-5m4nQKp+rZRb09LNH59GM4BxTh9251/ylbKIbpe7TpGxfJ+9kv6BLkLBXIjjspbgbnIBNqlI23tRnTWT0snUIw=="],
-
     "@commitlint/config-validator/ajv": ["ajv@8.17.1", "", { "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", "json-schema-traverse": "^1.0.0", "require-from-string": "^2.0.2" } }, "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g=="],
 
     "@commitlint/top-level/find-up": ["find-up@7.0.0", "", { "dependencies": { "locate-path": "^7.2.0", "path-exists": "^5.0.0", "unicorn-magic": "^0.1.0" } }, "sha512-YyZM99iHrqLKjmt4LJDj58KI+fYyufRLBSYcqycxf//KpBk9FoewoGX0450m9nB44qrZnovzC2oeP5hUibxc/g=="],
@@ -4234,8 +4224,6 @@
 
     "@codebuff/freebuff-web/pino/process-warning": ["process-warning@5.0.0", "", {}, "sha512-a39t9ApHNx2L4+HBnQKqxxHNs1r7KF+Intd8Q/g1bUh6q0WIp9voPXJ/x0j+ZL45KF1pJd9+q2jLIRMfvEshkA=="],
 
-    "@codebuff/freebuff-web/react-dom/scheduler": ["scheduler@0.23.2", "", { "dependencies": { "loose-envify": "^1.1.0" } }, "sha512-UOShsPwz7NrMUqhR6t0hWjFduvOzbtv7toDH1/hIrfRNIDBnnBWd0CwJTGvTpngVlmwGCdP9/Zl/tVrDqcuYzQ=="],
-
     "@codebuff/web/@typescript-eslint/eslint-plugin/@typescript-eslint/scope-manager": ["@typescript-eslint/scope-manager@8.46.2", "", { "dependencies": { "@typescript-eslint/types": "8.46.2", "@typescript-eslint/visitor-keys": "8.46.2" } }, "sha512-LF4b/NmGvdWEHD2H4MsHD8ny6JpiVNDzrSZr3CsckEgCbAGZbYM4Cqxvi9L+WqDMT+51Ozy7lt2M+d0JLEuBqA=="],
 
     "@codebuff/web/@typescript-eslint/eslint-plugin/@typescript-eslint/type-utils": ["@typescript-eslint/type-utils@8.46.2", "", { "dependencies": { "@typescript-eslint/types": "8.46.2", "@typescript-eslint/typescript-estree": "8.46.2", "@typescript-eslint/utils": "8.46.2", "debug": "^4.3.4", "ts-api-utils": "^2.1.0" }, "peerDependencies": { "eslint": "^8.57.0 || ^9.0.0", "typescript": ">=4.8.4 <6.0.0" } }, "sha512-HbPM4LbaAAt/DjxXaG9yiS9brOOz6fabal4uvUmaUYe6l3K1phQDMQKBRUrr06BQkxkvIZVVHttqiybM9nJsLA=="],
@@ -4252,8 +4240,6 @@
 
     "@codebuff/web/pino/process-warning": ["process-warning@5.0.0", "", {}, "sha512-a39t9ApHNx2L4+HBnQKqxxHNs1r7KF+Intd8Q/g1bUh6q0WIp9voPXJ/x0j+ZL45KF1pJd9+q2jLIRMfvEshkA=="],
 
-    "@codebuff/web/react-dom/scheduler": ["scheduler@0.23.2", "", { "dependencies": { "loose-envify": "^1.1.0" } }, "sha512-UOShsPwz7NrMUqhR6t0hWjFduvOzbtv7toDH1/hIrfRNIDBnnBWd0CwJTGvTpngVlmwGCdP9/Zl/tVrDqcuYzQ=="],
-
     "@commitlint/config-validator/ajv/json-schema-traverse": ["json-schema-traverse@1.0.0", "", {}, "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug=="],
 
     "@commitlint/top-level/find-up/locate-path": ["locate-path@7.2.0", "", { "dependencies": { "p-locate": "^6.0.0" } }, "sha512-gvVijfZvn7R+2qyPX8mAuKcFGDf6Nc61GdvGafQsHL0sBIxfKzA+usWn4GFC/bk+QdwPUD4kWFJLhElipq+0VA=="],
diff --git a/cli/package.json b/cli/package.json
index 9b67437fca..135823c3ef 100644
--- a/cli/package.json
+++ b/cli/package.json
@@ -54,7 +54,7 @@
     "zustand": "^5.0.8"
   },
   "devDependencies": {
-    "@types/react": "^18.3.12",
+    "@types/react": "19.2.14",
     "@types/react-reconciler": "^0.32.0",
     "react-dom": "^19.0.0",
     "strip-ansi": "^7.1.2"
diff --git a/cli/src/components/blocks/agent-branch-item.tsx b/cli/src/components/blocks/agent-branch-item.tsx
index 7661bd1be9..67f6b6d6b5 100644
--- a/cli/src/components/blocks/agent-branch-item.tsx
+++ b/cli/src/components/blocks/agent-branch-item.tsx
@@ -80,8 +80,9 @@ export const AgentBranchItem = memo((props: AgentBranchItemProps) => {
     }
 
     if (React.isValidElement(value)) {
+      const elProps = value.props as Record<string, unknown>
       if (value.type === React.Fragment) {
-        return isTextRenderable(value.props.children)
+        return isTextRenderable(elProps.children as ReactNode)
       }
 
       if (typeof value.type === 'string') {
@@ -90,7 +91,7 @@ export const AgentBranchItem = memo((props: AgentBranchItemProps) => {
           value.type === 'strong' ||
           value.type === 'em'
         ) {
-          return isTextRenderable(value.props.children)
+          return isTextRenderable(elProps.children as ReactNode)
         }
 
         return false
diff --git a/cli/src/components/clickable.tsx b/cli/src/components/clickable.tsx
index caf56356c1..b9f4bbb516 100644
--- a/cli/src/components/clickable.tsx
+++ b/cli/src/components/clickable.tsx
@@ -28,18 +28,18 @@ export function makeTextUnselectable(node: ReactNode): ReactNode {
 
   if (!isValidElement(node)) return node
 
-  const el = node as ReactElement
+  const el = node as ReactElement<{ children?: ReactNode; [key: string]: unknown }>
   const type = el.type
 
   // Ensure text and span nodes are not selectable
   if (typeof type === 'string' && (type === 'text' || type === 'span')) {
     const nextProps = { ...el.props, selectable: false }
-    const nextChildren = el.props?.children ? makeTextUnselectable(el.props.children) : el.props?.children
+    const nextChildren = el.props.children ? makeTextUnselectable(el.props.children) : el.props.children
     return cloneElement(el, nextProps, nextChildren)
   }
 
   // Recurse into other host elements and components' children
-  const nextChildren = el.props?.children ? makeTextUnselectable(el.props.children) : el.props?.children
+  const nextChildren = el.props.children ? makeTextUnselectable(el.props.children) : el.props.children
   return cloneElement(el, el.props, nextChildren)
 }
 
diff --git a/cli/src/components/tools/tool-call-item.tsx b/cli/src/components/tools/tool-call-item.tsx
index 72cdef7182..c207bcb35e 100644
--- a/cli/src/components/tools/tool-call-item.tsx
+++ b/cli/src/components/tools/tool-call-item.tsx
@@ -33,8 +33,9 @@ const isTextRenderable = (value: ReactNode): boolean => {
   }
 
   if (React.isValidElement(value)) {
+    const elProps = value.props as Record<string, unknown>
     if (value.type === React.Fragment) {
-      return isTextRenderable(value.props.children)
+      return isTextRenderable(elProps.children as ReactNode)
     }
 
     if (typeof value.type === 'string') {
@@ -43,7 +44,7 @@ const isTextRenderable = (value: ReactNode): boolean => {
         value.type === 'strong' ||
         value.type === 'em'
       ) {
-        return isTextRenderable(value.props.children)
+        return isTextRenderable(elProps.children as ReactNode)
       }
 
       return false
diff --git a/cli/src/types/react19-compat.d.ts b/cli/src/types/react19-compat.d.ts
new file mode 100644
index 0000000000..11ca1af2a0
--- /dev/null
+++ b/cli/src/types/react19-compat.d.ts
@@ -0,0 +1,19 @@
+/**
+ * React 19 compatibility shim for OpenTUI JSX types.
+ *
+ * OpenTUI's JSX namespace defines `type Element = React.ReactNode`.
+ * In React 19, `FunctionComponent` returns `ReactNode | Promise<ReactNode>`,
+ * but `Promise<ReactNode>` is not assignable to `ReactNode`.
+ *
+ * This augmentation adds a narrower call signature to `FunctionComponent`
+ * that returns just `ReactNode`. Due to TypeScript's interface merging rules,
+ * the later declaration's overloads have higher precedence, so the narrower
+ * signature is resolved first — fixing all `React.FC` JSX compatibility errors.
+ */
+import 'react'
+
+declare module 'react' {
+  interface FunctionComponent<P = {}> {
+    (props: P): ReactNode
+  }
+}
diff --git a/cli/src/utils/__tests__/markdown-renderer.test.tsx b/cli/src/utils/__tests__/markdown-renderer.test.tsx
index 9cc2d35ffb..36ea688fe6 100644
--- a/cli/src/utils/__tests__/markdown-renderer.test.tsx
+++ b/cli/src/utils/__tests__/markdown-renderer.test.tsx
@@ -4,10 +4,12 @@ import React from 'react'
 
 import { renderMarkdown, renderStreamingMarkdown } from '../markdown-renderer'
 
-const flattenNodes = (input: React.ReactNode): React.ReactNode[] => {
+type El = React.ReactElement<Record<string, unknown>>
+
+const flattenNodes = (input: unknown): React.ReactNode[] => {
   const result: React.ReactNode[] = []
 
-  const visit = (value: React.ReactNode): void => {
+  const visit = (value: unknown): void => {
     if (value === null || value === undefined || typeof value === 'boolean') {
       return
     }
@@ -18,18 +20,18 @@ const flattenNodes = (input: React.ReactNode): React.ReactNode[] => {
     }
 
     if (React.isValidElement(value) && value.type === React.Fragment) {
-      visit(value.props.children)
+      visit((value as El).props.children)
       return
     }
 
-    result.push(value)
+    result.push(value as React.ReactNode)
   }
 
   visit(input)
   return result
 }
 
-const flattenChildren = (value: React.ReactNode): React.ReactNode[] =>
+const flattenChildren = (value: unknown): React.ReactNode[] =>
   flattenNodes(value)
 
 describe('markdown renderer', () => {
@@ -39,13 +41,13 @@ describe('markdown renderer', () => {
 
     expect(nodes[0]).toBe('Hello ')
 
-    const bold = nodes[1] as React.ReactElement
+    const bold = nodes[1] as El
     expect(bold.props.attributes).toBe(TextAttributes.BOLD)
     expect(flattenChildren(bold.props.children)).toEqual(['bold'])
 
     expect(nodes[2]).toBe(' and ')
 
-    const italic = nodes[3] as React.ReactElement
+    const italic = nodes[3] as El
     expect(italic.props.attributes).toBe(TextAttributes.ITALIC)
     expect(flattenChildren(italic.props.children)).toEqual(['italic'])
 
@@ -58,7 +60,7 @@ describe('markdown renderer', () => {
 
     expect(nodes[0]).toBe('Use ')
 
-    const inlineCode = nodes[1] as React.ReactElement
+    const inlineCode = nodes[1] as El
     expect(inlineCode.props.fg).toBe('#86efac')
     expect(inlineCode.props.bg).toBe('#0d1117')
     expect(flattenChildren(inlineCode.props.children)).toEqual([' ls '])
@@ -70,7 +72,7 @@ describe('markdown renderer', () => {
     const output = renderMarkdown('# Heading One')
     const nodes = flattenNodes(output)
 
-    const heading = nodes[0] as React.ReactElement
+    const heading = nodes[0] as El
     expect(heading.props.attributes).toBe(TextAttributes.BOLD)
     expect(heading.props.fg).toBe('magenta')
     expect(flattenChildren(heading.props.children)).toEqual(['Heading One'])
@@ -82,12 +84,12 @@ describe('markdown renderer', () => {
     )
     const nodes = flattenNodes(output)
 
-    const heading = nodes[0] as React.ReactElement
+    const heading = nodes[0] as El
     const contents = flattenChildren(heading.props.children)
 
     expect(contents[0]).toBe('Other')
 
-    const strong = contents[1] as React.ReactElement
+    const strong = contents[1] as El
     expect(strong.props.attributes).toBe(TextAttributes.BOLD)
     expect(flattenChildren(strong.props.children)).toEqual(['.github/'])
 
@@ -98,11 +100,11 @@ describe('markdown renderer', () => {
     const output = renderMarkdown('> note')
     const nodes = flattenNodes(output)
 
-    const prefixSpan = nodes[0] as React.ReactElement
+    const prefixSpan = nodes[0] as El
     expect(prefixSpan.props.fg).toBe('gray')
     expect(flattenChildren(prefixSpan.props.children)).toEqual(['> '])
 
-    const textSpan = nodes[1] as React.ReactElement
+    const textSpan = nodes[1] as El
     expect(textSpan.props.fg).toBe('gray')
     expect(flattenChildren(textSpan.props.children)).toEqual(['note'])
   })
@@ -112,10 +114,10 @@ describe('markdown renderer', () => {
     const nodes = flattenNodes(output)
 
     const bulletSpans = nodes.filter(
-      (node): node is React.ReactElement =>
+      (node): node is El =>
         React.isValidElement(node) &&
         node.type === 'span' &&
-        flattenChildren(node.props.children).join('') === '- ',
+        flattenChildren((node as El).props.children).join('') === '- ',
     )
 
     expect(bulletSpans).toHaveLength(2)
@@ -135,10 +137,10 @@ describe('markdown renderer', () => {
     const nodes = flattenNodes(output)
 
     const boldNode = nodes.find(
-      (node): node is React.ReactElement =>
+      (node): node is El =>
         React.isValidElement(node) &&
-        node.props !== undefined &&
-        node.props.attributes === TextAttributes.BOLD,
+        (node as El).props !== undefined &&
+        (node as El).props.attributes === TextAttributes.BOLD,
     )
 
     expect(boldNode).toBeDefined()
@@ -152,7 +154,7 @@ describe('markdown renderer', () => {
 
     expect(nodes[0]).toBe('This is ')
 
-    const strikethrough = nodes[1] as React.ReactElement
+    const strikethrough = nodes[1] as El
     expect(strikethrough.props.attributes).toBe(TextAttributes.DIM)
     expect(flattenChildren(strikethrough.props.children)).toEqual(['deleted'])
 
@@ -164,11 +166,11 @@ describe('markdown renderer', () => {
     const nodes = flattenNodes(output)
 
     const checkboxSpans = nodes.filter(
-      (node): node is React.ReactElement =>
+      (node): node is El =>
         React.isValidElement(node) &&
         node.type === 'span' &&
-        (flattenChildren(node.props.children).join('') === '[ ] ' ||
-          flattenChildren(node.props.children).join('') === '[x] '),
+        (flattenChildren((node as El).props.children).join('') === '[ ] ' ||
+          flattenChildren((node as El).props.children).join('') === '[x] '),
     )
 
     expect(checkboxSpans).toHaveLength(2)
@@ -187,7 +189,7 @@ describe('markdown renderer', () => {
       .map((node) => {
         if (typeof node === 'string') return node
         if (React.isValidElement(node)) {
-          return flattenChildren(node.props.children).join('')
+          return flattenChildren((node as El).props.children).join('')
         }
         return ''
       })
@@ -217,7 +219,7 @@ codebuff "add a new feature to handle user authentication"
       .map((node) => {
         if (typeof node === 'string') return node
         if (React.isValidElement(node)) {
-          return flattenChildren(node.props.children).join('')
+          return flattenChildren((node as El).props.children).join('')
         }
         return ''
       })
@@ -241,7 +243,7 @@ codebuff "add a new feature to handle user authentication"
 
     expect(nodes[0]).toBe('Use ')
 
-    const inlineCode = nodes[1] as React.ReactElement
+    const inlineCode = nodes[1] as El
     expect(inlineCode.props.fg).toBe('#86efac')
     const inlineContent = flattenChildren(inlineCode.props.children).join('')
     expect(inlineContent).toContain('codebuff "fix bug"')
@@ -271,7 +273,7 @@ console.log("world")
       .map((node) => {
         if (typeof node === 'string') return node
         if (React.isValidElement(node)) {
-          return flattenChildren(node.props.children).join('')
+          return flattenChildren((node as El).props.children).join('')
         }
         return ''
       })
@@ -299,7 +301,7 @@ codebuff "implement feature" --verbose
       .map((node) => {
         if (typeof node === 'string') return node
         if (React.isValidElement(node)) {
-          return flattenChildren(node.props.children).join('')
+          return flattenChildren((node as El).props.children).join('')
         }
         return ''
       })
@@ -315,7 +317,7 @@ codebuff "implement feature" --verbose
     const output = renderMarkdown(markdown)
     const nodes = flattenNodes(output)
 
-    const inlineCode = nodes[1] as React.ReactElement
+    const inlineCode = nodes[1] as El
     const inlineContent = flattenChildren(inlineCode.props.children).join('')
 
     // Should preserve quotes and special characters within inline code
@@ -337,7 +339,7 @@ codebuff "implement feature" --verbose
       .map((node) => {
         if (typeof node === 'string') return node
         if (React.isValidElement(node)) {
-          return flattenChildren(node.props.children).join('')
+          return flattenChildren((node as El).props.children).join('')
         }
         return ''
       })
@@ -372,7 +374,7 @@ codebuff "implement feature" --verbose
       .map((node) => {
         if (typeof node === 'string') return node
         if (React.isValidElement(node)) {
-          return flattenChildren(node.props.children).join('')
+          return flattenChildren((node as El).props.children).join('')
         }
         return ''
       })
@@ -399,7 +401,7 @@ codebuff "implement feature" --verbose
       .map((node) => {
         if (typeof node === 'string') return node
         if (React.isValidElement(node)) {
-          return flattenChildren(node.props.children).join('')
+          return flattenChildren((node as El).props.children).join('')
         }
         return ''
       })
diff --git a/freebuff/web/package.json b/freebuff/web/package.json
index 55c492359b..53dc3c7a5e 100644
--- a/freebuff/web/package.json
+++ b/freebuff/web/package.json
@@ -25,16 +25,16 @@
     "next-auth": "^4.24.11",
     "next-themes": "^0.3.0",
     "pino": "^9.6.0",
-    "react": "18.3.1",
-    "react-dom": "18.3.1",
+    "react": "^19.0.0",
+    "react-dom": "^19.0.0",
     "tailwind-merge": "^2.5.2",
     "zod": "^4.2.1"
   },
   "devDependencies": {
     "@tailwindcss/typography": "^0.5.15",
     "@types/node": "^22.14.0",
-    "@types/react": "18.3.26",
-    "@types/react-dom": "18.3.7",
+    "@types/react": "19.2.14",
+    "@types/react-dom": "19.2.3",
     "autoprefixer": "^10.4.21",
     "postcss": "^8",
     "tailwindcss": "^3.4.11",
diff --git a/package.json b/package.json
index b5e971d6d2..628036fc1d 100644
--- a/package.json
+++ b/package.json
@@ -45,8 +45,8 @@
     "zod": "^4.2.1"
   },
   "overrides": {
-    "@types/react": "18.3.26",
-    "@types/react-dom": "18.3.7",
+    "@types/react": "19.2.14",
+    "@types/react-dom": "19.2.3",
     "baseline-browser-mapping": "^2.9.14",
     "zod": "^4.2.1",
     "signal-exit": "3.0.7"
diff --git a/web/package.json b/web/package.json
index 4307ba85f6..bf6ef79342 100644
--- a/web/package.json
+++ b/web/package.json
@@ -80,8 +80,8 @@
     "pino": "^9.6.0",
     "posthog-js": "^1.234.10",
     "prism-react-renderer": "^2.4.1",
-    "react": "18.3.1",
-    "react-dom": "18.3.1",
+    "react": "^19.0.0",
+    "react-dom": "^19.0.0",
     "react-hook-form": "^7.55.0",
     "server-only": "^0.0.1",
     "tailwind-merge": "^2.5.2",
@@ -100,8 +100,8 @@
     "@types/jest": "^29.5.14",
     "@types/node": "^22.14.0",
     "@types/pg": "^8.11.11",
-    "@types/react": "18.3.26",
-    "@types/react-dom": "18.3.7",
+    "@types/react": "19.2.14",
+    "@types/react-dom": "19.2.3",
     "@typescript-eslint/eslint-plugin": "^8.29.1",
     "@typescript-eslint/parser": "^8.29.1",
     "autoprefixer": "^10.4.21",
diff --git a/web/src/app/admin/traces/components/chat-message.tsx b/web/src/app/admin/traces/components/chat-message.tsx
index c9166e2895..815579fb7e 100644
--- a/web/src/app/admin/traces/components/chat-message.tsx
+++ b/web/src/app/admin/traces/components/chat-message.tsx
@@ -1,5 +1,6 @@
 'use client'
 
+import type { JSX } from 'react'
 import { User, Bot, Clock, Coins, Hash, Wrench } from 'lucide-react'
 
 import {
diff --git a/web/src/components/card-with-beams.tsx b/web/src/components/card-with-beams.tsx
index a004f5e16f..3fe48d71c5 100644
--- a/web/src/components/card-with-beams.tsx
+++ b/web/src/components/card-with-beams.tsx
@@ -1,3 +1,4 @@
+import type { JSX } from 'react'
 import { BackgroundBeams } from './ui/background-beams'
 import {
   Card,
diff --git a/web/src/components/docs/mdx/code-demo.tsx b/web/src/components/docs/mdx/code-demo.tsx
index b4ff6ec8ba..e02168f7ee 100644
--- a/web/src/components/docs/mdx/code-demo.tsx
+++ b/web/src/components/docs/mdx/code-demo.tsx
@@ -3,6 +3,7 @@
 import { Check, Copy } from 'lucide-react'
 import { Highlight, themes } from 'prism-react-renderer'
 import { useMemo, useState } from 'react'
+import type { JSX } from 'react'
 
 import { MermaidDiagram } from './mermaid-diagram'
 
diff --git a/web/src/components/docs/mdx/markdown-table.tsx b/web/src/components/docs/mdx/markdown-table.tsx
index 0d211d7a2a..c4758f7c3c 100644
--- a/web/src/components/docs/mdx/markdown-table.tsx
+++ b/web/src/components/docs/mdx/markdown-table.tsx
@@ -20,7 +20,7 @@ function extractTextContent(node: React.ReactNode): string {
     return node.map(extractTextContent).join('')
   }
   if (typeof node === 'object' && 'props' in node) {
-    const element = node as React.ReactElement
+    const element = node as React.ReactElement<{ children?: React.ReactNode }>
     return extractTextContent(element.props.children)
   }
   return ''
diff --git a/web/src/components/ui/landing/competition/github-copilot.tsx b/web/src/components/ui/landing/competition/github-copilot.tsx
index 25ca264d73..d192635249 100644
--- a/web/src/components/ui/landing/competition/github-copilot.tsx
+++ b/web/src/components/ui/landing/competition/github-copilot.tsx
@@ -225,7 +225,7 @@ function MatrixRainEffect({
   isActive?: boolean
 }) {
   const canvasRef = useRef<HTMLCanvasElement>(null)
-  const requestRef = useRef<number>()
+  const requestRef = useRef<number | undefined>(undefined)
 
   // Only render if enabled and active
   const shouldRender = enabled && isActive

From cff62fe5f16b5942d320f3d30b7ff7f3ddb14d60 Mon Sep 17 00:00:00 2001
From: James Grugett <jahooma@gmail.com>
Date: Tue, 10 Mar 2026 14:24:50 -0700
Subject: [PATCH 04/16] Integrate canopy wave instead of baseten for minimax
 provider

---
 packages/internal/src/env-schema.ts           |   4 +-
 scripts/test-canopywave-e2e.ts                | 135 +++++++
 scripts/test-canopywave.ts                    | 375 ++++++++++++++++++
 scripts/test-fireworks.ts                     |   2 +
 web/src/app/api/v1/chat/completions/_post.ts  |  44 +-
 web/src/llm-api/{baseten.ts => canopywave.ts} | 123 +++---
 6 files changed, 604 insertions(+), 79 deletions(-)
 create mode 100644 scripts/test-canopywave-e2e.ts
 create mode 100644 scripts/test-canopywave.ts
 rename web/src/llm-api/{baseten.ts => canopywave.ts} (79%)

diff --git a/packages/internal/src/env-schema.ts b/packages/internal/src/env-schema.ts
index 21a0147bd8..93cfee7d4f 100644
--- a/packages/internal/src/env-schema.ts
+++ b/packages/internal/src/env-schema.ts
@@ -7,7 +7,7 @@ export const serverEnvSchema = clientEnvSchema.extend({
   OPENAI_API_KEY: z.string().min(1),
   ANTHROPIC_API_KEY: z.string().min(1),
   FIREWORKS_API_KEY: z.string().min(1),
-  BASETEN_API_KEY: z.string().min(1).optional(),
+  CANOPYWAVE_API_KEY: z.string().min(1).optional(),
   LINKUP_API_KEY: z.string().min(1),
   CONTEXT7_API_KEY: z.string().optional(),
   GRAVITY_API_KEY: z.string().min(1),
@@ -51,7 +51,7 @@ export const serverProcessEnv: ServerInput = {
   OPENAI_API_KEY: process.env.OPENAI_API_KEY,
   ANTHROPIC_API_KEY: process.env.ANTHROPIC_API_KEY,
   FIREWORKS_API_KEY: process.env.FIREWORKS_API_KEY,
-  BASETEN_API_KEY: process.env.BASETEN_API_KEY,
+  CANOPYWAVE_API_KEY: process.env.CANOPYWAVE_API_KEY,
   LINKUP_API_KEY: process.env.LINKUP_API_KEY,
   CONTEXT7_API_KEY: process.env.CONTEXT7_API_KEY,
   GRAVITY_API_KEY: process.env.GRAVITY_API_KEY,
diff --git a/scripts/test-canopywave-e2e.ts b/scripts/test-canopywave-e2e.ts
new file mode 100644
index 0000000000..e03d1778fe
--- /dev/null
+++ b/scripts/test-canopywave-e2e.ts
@@ -0,0 +1,135 @@
+#!/usr/bin/env bun
+
+/**
+ * E2E test for CanopyWave integration via the Codebuff SDK.
+ *
+ * Creates a real agent run using the minimax model so the request
+ * flows through our chat completions endpoint → CanopyWave → back with usage data.
+ *
+ * Usage:
+ *   bun scripts/test-canopywave-e2e.ts
+ */
+
+import { CodebuffClient } from '@codebuff/sdk'
+
+import type { AgentDefinition } from '@codebuff/sdk'
+import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
+
+const minimaxAgent: AgentDefinition = {
+  id: 'canopywave-test-agent',
+  model: 'minimax/minimax-m2.5',
+  displayName: 'CanopyWave Test Agent',
+  toolNames: ['end_turn'],
+  instructionsPrompt: `You are a test agent. Respond with exactly "Hello from CanopyWave!" and nothing else. Then call the end_turn tool.`,
+}
+
+async function main() {
+  const apiKey = process.env.CODEBUFF_API_KEY
+  if (!apiKey) {
+    console.error('❌ CODEBUFF_API_KEY is not set.')
+    console.error('   Example: CODEBUFF_API_KEY=<key> bun scripts/test-canopywave-e2e.ts')
+    process.exit(1)
+  }
+
+  console.log('🔌 CanopyWave E2E Test via Codebuff SDK')
+  console.log('='.repeat(50))
+  console.log()
+  console.log(`Model: ${minimaxAgent.model}`)
+  console.log(`Agent: ${minimaxAgent.id}`)
+  console.log()
+
+  const client = new CodebuffClient({
+    apiKey,
+    cwd: process.cwd(),
+  })
+
+  const events: PrintModeEvent[] = []
+  let responseText = ''
+
+  const startTime = Date.now()
+
+  const result = await client.run({
+    agent: minimaxAgent,
+    prompt: 'Say hello',
+    costMode: 'free',
+    handleEvent: (event) => {
+      events.push(event)
+      if (event.type === 'text') {
+        responseText += event.text
+        process.stdout.write(event.text)
+      } else if (event.type === 'reasoning_delta') {
+        // Don't print reasoning, just note it
+      } else if (event.type === 'error') {
+        console.error(`\n❌ Error event: ${event.message}`)
+      } else if (event.type === 'finish') {
+        console.log('\n')
+      }
+    },
+    handleStreamChunk: (chunk) => {
+      if (typeof chunk === 'string') {
+        // Already handled in handleEvent
+      }
+    },
+  })
+
+  const elapsed = Date.now() - startTime
+
+  console.log(`── Results (${elapsed}ms) ──`)
+  console.log()
+
+  if (result.output.type === 'error') {
+    console.error(`❌ Run failed: ${result.output.message}`)
+    if ('statusCode' in result.output) {
+      console.error(`   Status code: ${result.output.statusCode}`)
+    }
+    process.exit(1)
+  }
+
+  console.log(`✅ Run succeeded!`)
+  console.log(`   Output type: ${result.output.type}`)
+  console.log(`   Response text: ${responseText.slice(0, 200)}`)
+  console.log()
+
+  // Check session state for credits used
+  const creditsUsed = result.sessionState?.mainAgentState.creditsUsed ?? 0
+  console.log(`── Credits & Billing ──`)
+  console.log(`   Credits used: ${creditsUsed}`)
+  console.log(`   Cost (USD): $${(creditsUsed / 100).toFixed(4)}`)
+  console.log()
+
+  // Summarize events
+  const eventTypes = events.reduce((acc, e) => {
+    acc[e.type] = (acc[e.type] ?? 0) + 1
+    return acc
+  }, {} as Record<string, number>)
+  console.log(`── Event Summary ──`)
+  for (const [type, count] of Object.entries(eventTypes)) {
+    console.log(`   ${type}: ${count}`)
+  }
+  console.log()
+
+  // Check for finish events which include cost info
+  const finishEvents = events.filter((e) => e.type === 'finish')
+  if (finishEvents.length > 0) {
+    console.log(`── Finish Events ──`)
+    for (const event of finishEvents) {
+      console.log(JSON.stringify(event, null, 2))
+    }
+    console.log()
+  }
+
+  // Print all events for debugging
+  console.log(`── All Events (${events.length} total) ──`)
+  for (const event of events) {
+    if (event.type === 'text' || event.type === 'reasoning_delta') continue
+    console.log(JSON.stringify(event))
+  }
+  console.log()
+
+  console.log('Done!')
+}
+
+main().catch((error) => {
+  console.error('Fatal error:', error)
+  process.exit(1)
+})
diff --git a/scripts/test-canopywave.ts b/scripts/test-canopywave.ts
new file mode 100644
index 0000000000..ab1dede618
--- /dev/null
+++ b/scripts/test-canopywave.ts
@@ -0,0 +1,375 @@
+#!/usr/bin/env bun
+
+/**
+ * Test script to verify CanopyWave integration and usage/token reporting.
+ *
+ * Usage:
+ *   # Test 1: Hit CanopyWave API directly
+ *   bun scripts/test-canopywave.ts direct
+ *
+ *   # Test 2: Hit our chat completions endpoint (requires running web server + valid API key)
+ *   CODEBUFF_API_KEY=<key> bun scripts/test-canopywave.ts endpoint
+ *
+ *   # Run both tests
+ *   CODEBUFF_API_KEY=<key> bun scripts/test-canopywave.ts both
+ */
+
+export {}
+
+const CANOPYWAVE_BASE_URL = 'https://inference.canopywave.io/v1'
+const CANOPYWAVE_MODEL = 'minimax/minimax-m2.5'
+const OPENROUTER_MODEL = 'minimax/minimax-m2.5'
+
+const testPrompt = 'Say "hello world" and nothing else.'
+
+async function testCanopyWaveDirect() {
+  const apiKey = process.env.CANOPYWAVE_API_KEY
+  if (!apiKey) {
+    console.error('❌ CANOPYWAVE_API_KEY is not set. Add it to .env.local or pass it directly.')
+    process.exit(1)
+  }
+
+  // ── Non-streaming ──
+  console.log('── Test 1: CanopyWave API (non-streaming) ──')
+  console.log(`Model: ${CANOPYWAVE_MODEL}`)
+  console.log(`Prompt: "${testPrompt}"`)
+  console.log()
+
+  const startTime = Date.now()
+  const response = await fetch(`${CANOPYWAVE_BASE_URL}/chat/completions`, {
+    method: 'POST',
+    headers: {
+      Authorization: `Bearer ${apiKey}`,
+      'Content-Type': 'application/json',
+    },
+    body: JSON.stringify({
+      model: CANOPYWAVE_MODEL,
+      messages: [{ role: 'user', content: testPrompt }],
+      max_tokens: 64,
+    }),
+  })
+
+  if (!response.ok) {
+    const errorText = await response.text()
+    console.error(`❌ CanopyWave API returned ${response.status}: ${errorText}`)
+    process.exit(1)
+  }
+
+  const data = await response.json()
+  const elapsed = Date.now() - startTime
+  const content = data.choices?.[0]?.message?.content ?? '<no content>'
+
+  console.log(`✅ Response (${elapsed}ms):`)
+  console.log(`   Content: ${content}`)
+  console.log(`   Model: ${data.model}`)
+  console.log()
+  console.log('   ── Raw usage object ──')
+  console.log(JSON.stringify(data.usage, null, 2))
+  console.log()
+  console.log('   ── Full raw response (excluding choices content) ──')
+  const debugData = { ...data }
+  if (debugData.choices) {
+    debugData.choices = debugData.choices.map((c: Record<string, unknown>) => ({
+      ...c,
+      message: { ...(c.message as Record<string, unknown>), content: '<truncated>' },
+    }))
+  }
+  console.log(JSON.stringify(debugData, null, 2))
+  console.log()
+
+  // ── Streaming ──
+  console.log('── Test 2: CanopyWave API (streaming, include_usage only) ──')
+  const streamStart = Date.now()
+  const streamResponse = await fetch(`${CANOPYWAVE_BASE_URL}/chat/completions`, {
+    method: 'POST',
+    headers: {
+      Authorization: `Bearer ${apiKey}`,
+      'Content-Type': 'application/json',
+    },
+    body: JSON.stringify({
+      model: CANOPYWAVE_MODEL,
+      messages: [{ role: 'user', content: testPrompt }],
+      max_tokens: 64,
+      stream: true,
+      stream_options: { include_usage: true },
+    }),
+  })
+
+  if (!streamResponse.ok) {
+    const errorText = await streamResponse.text()
+    console.error(`❌ CanopyWave streaming API returned ${streamResponse.status}: ${errorText}`)
+    process.exit(1)
+  }
+
+  await consumeStream(streamResponse, streamStart, 'include_usage only')
+}
+
+async function consumeStream(streamResponse: Response, streamStart: number, label: string) {
+  const reader = streamResponse.body?.getReader()
+  if (!reader) {
+    console.error('❌ No response body reader')
+    process.exit(1)
+  }
+
+  const decoder = new TextDecoder()
+  let streamContent = ''
+  let chunkCount = 0
+  const allUsageChunks: unknown[] = []
+  const allRawChunks: unknown[] = []
+
+  let done = false
+  while (!done) {
+    const result = await reader.read()
+    done = result.done
+    if (done) break
+
+    const text = decoder.decode(result.value, { stream: true })
+    const lines = text.split('\n').filter((l) => l.startsWith('data: '))
+
+    for (const line of lines) {
+      const raw = line.slice('data: '.length)
+      if (raw === '[DONE]') continue
+
+      try {
+        const chunk = JSON.parse(raw)
+        chunkCount++
+        const delta = chunk.choices?.[0]?.delta
+        if (delta?.content) streamContent += delta.content
+        if (delta?.reasoning_content) {
+          console.log(`   [reasoning chunk] ${delta.reasoning_content.slice(0, 80)}...`)
+        }
+        if (chunk.usage) {
+          allUsageChunks.push(chunk.usage)
+        }
+        // Capture first 3 chunks for debugging
+        if (chunkCount <= 3) {
+          allRawChunks.push(chunk)
+        }
+      } catch {
+        // skip non-JSON lines
+      }
+    }
+  }
+
+  const streamElapsed = Date.now() - streamStart
+  console.log(`✅ Stream response [${label}] (${streamElapsed}ms, ${chunkCount} chunks):`)
+  console.log(`   Content: ${streamContent}`)
+  console.log()
+  console.log(`   ── First 3 raw chunks ──`)
+  for (const chunk of allRawChunks) {
+    console.log(JSON.stringify(chunk, null, 2))
+    console.log()
+  }
+  console.log(`   ── All usage chunks (${allUsageChunks.length} total) ──`)
+  for (const usage of allUsageChunks) {
+    console.log(JSON.stringify(usage, null, 2))
+    console.log()
+  }
+  if (allUsageChunks.length === 0) {
+    console.log('   ⚠️  No usage data received in stream!')
+  }
+  console.log()
+}
+
+// ─── Chat Completions Endpoint Test ─────────────────────────────────────────
+
+async function testChatCompletionsEndpoint() {
+  const codebuffApiKey = process.env.CODEBUFF_API_KEY
+  if (!codebuffApiKey) {
+    console.error('❌ CODEBUFF_API_KEY is not set. Pass it as an env var.')
+    console.error('   Example: CODEBUFF_API_KEY=<key> bun scripts/test-canopywave.ts endpoint')
+    process.exit(1)
+  }
+
+  const appUrl = process.env.NEXT_PUBLIC_CODEBUFF_APP_URL ?? 'http://localhost:3000'
+  const endpoint = `${appUrl}/api/v1/chat/completions`
+  const runId = process.env.RUN_ID ?? 'test-run-id-canopywave'
+
+  // ── Non-streaming ──
+  console.log('── Test: Chat Completions Endpoint (non-streaming) ──')
+  console.log(`Endpoint: ${endpoint}`)
+  console.log(`Model: ${OPENROUTER_MODEL} (should route to CanopyWave)`)
+  console.log(`Prompt: "${testPrompt}"`)
+  console.log()
+
+  const startTime = Date.now()
+  const response = await fetch(endpoint, {
+    method: 'POST',
+    headers: {
+      Authorization: `Bearer ${codebuffApiKey}`,
+      'Content-Type': 'application/json',
+    },
+    body: JSON.stringify({
+      model: OPENROUTER_MODEL,
+      messages: [{ role: 'user', content: testPrompt }],
+      max_tokens: 64,
+      stream: false,
+      codebuff_metadata: {
+        run_id: runId,
+        client_id: 'test-canopywave-script',
+        cost_mode: 'free',
+      },
+    }),
+  })
+
+  const elapsed = Date.now() - startTime
+  const data = await response.json()
+
+  if (response.ok) {
+    const content = data.choices?.[0]?.message?.content ?? '<no content>'
+    console.log(`✅ Response (${elapsed}ms):`)
+    console.log(`   Content: ${content}`)
+    console.log(`   Model: ${data.model}`)
+    console.log(`   Provider: ${data.provider}`)
+    console.log()
+    console.log('   ── Usage object ──')
+    console.log(JSON.stringify(data.usage, null, 2))
+    console.log()
+    if (data.usage) {
+      const u = data.usage
+      console.log(`   prompt_tokens:     ${u.prompt_tokens ?? 'N/A'}`)
+      console.log(`   completion_tokens: ${u.completion_tokens ?? 'N/A'}`)
+      console.log(`   total_tokens:      ${u.total_tokens ?? 'N/A'}`)
+      console.log(`   cost:              ${u.cost ?? 'N/A'}`)
+      console.log(`   cost_details:      ${JSON.stringify(u.cost_details)}`)
+    }
+  } else {
+    console.log(`⚠️  Response ${response.status} (${elapsed}ms):`)
+    console.log(`   ${JSON.stringify(data)}`)
+    if (response.status === 400 && data.message?.includes('runId')) {
+      console.log('   ℹ️  This is expected if you don\'t have a valid run_id.')
+      console.log('   ℹ️  The request reached the endpoint — routing to CanopyWave is wired up.')
+    } else if (response.status === 401) {
+      console.log('   ℹ️  Auth failed. Make sure CODEBUFF_API_KEY is valid.')
+    }
+  }
+  console.log()
+
+  // ── Streaming ──
+  console.log('── Test: Chat Completions Endpoint (streaming) ──')
+  const streamStart = Date.now()
+  const streamResponse = await fetch(endpoint, {
+    method: 'POST',
+    headers: {
+      Authorization: `Bearer ${codebuffApiKey}`,
+      'Content-Type': 'application/json',
+    },
+    body: JSON.stringify({
+      model: OPENROUTER_MODEL,
+      messages: [{ role: 'user', content: testPrompt }],
+      max_tokens: 64,
+      stream: true,
+      codebuff_metadata: {
+        run_id: runId,
+        client_id: 'test-canopywave-script',
+        cost_mode: 'free',
+      },
+    }),
+  })
+
+  const streamElapsed = Date.now() - streamStart
+
+  if (streamResponse.ok) {
+    const reader = streamResponse.body?.getReader()
+    if (!reader) {
+      console.error('❌ No response body reader')
+      process.exit(1)
+    }
+
+    const decoder = new TextDecoder()
+    let streamContent = ''
+    let chunkCount = 0
+    let chunksWithUsage = 0
+    let lastUsage: unknown = null
+
+    let done = false
+    while (!done) {
+      const result = await reader.read()
+      done = result.done
+      if (done) break
+
+      const text = decoder.decode(result.value, { stream: true })
+      const lines = text.split('\n').filter((l) => l.startsWith('data: '))
+
+      for (const line of lines) {
+        const raw = line.slice('data: '.length)
+        if (raw === '[DONE]') continue
+
+        try {
+          const chunk = JSON.parse(raw)
+          chunkCount++
+          const delta = chunk.choices?.[0]?.delta
+          if (delta?.content) streamContent += delta.content
+          if (chunk.usage) {
+            chunksWithUsage++
+            lastUsage = chunk.usage
+          }
+        } catch {
+          // skip non-JSON lines
+        }
+      }
+    }
+
+    console.log(`✅ Stream response (${streamElapsed}ms, ${chunkCount} chunks):`)
+    console.log(`   Content: ${streamContent}`)
+    console.log(`   Chunks with usage: ${chunksWithUsage} (should be exactly 1)`)
+    if (chunksWithUsage > 1) {
+      console.log(`   ⚠️  Multiple usage chunks detected — billing fix may not be working!`)
+    } else if (chunksWithUsage === 1) {
+      console.log(`   ✅ Only 1 usage chunk — billing fix is working correctly!`)
+    } else {
+      console.log(`   ⚠️  No usage chunks received!`)
+    }
+    if (lastUsage) {
+      console.log()
+      console.log('   ── Final usage object ──')
+      console.log(JSON.stringify(lastUsage, null, 2))
+      const u = lastUsage as Record<string, unknown>
+      console.log()
+      console.log(`   prompt_tokens:     ${u.prompt_tokens ?? 'N/A'}`)
+      console.log(`   completion_tokens: ${u.completion_tokens ?? 'N/A'}`)
+      console.log(`   total_tokens:      ${u.total_tokens ?? 'N/A'}`)
+      console.log(`   cost:              ${u.cost ?? 'N/A'}`)
+      console.log(`   cost_details:      ${JSON.stringify(u.cost_details)}`)
+    }
+  } else {
+    const data = await streamResponse.json()
+    console.log(`⚠️  Response ${streamResponse.status} (${streamElapsed}ms):`)
+    console.log(`   ${JSON.stringify(data)}`)
+    if (streamResponse.status === 400 && data.message?.includes('runId')) {
+      console.log('   ℹ️  Expected without a valid run_id. Endpoint is reachable and routing works.')
+    }
+  }
+  console.log()
+}
+
+// ─── Main ───────────────────────────────────────────────────────────────────
+
+async function main() {
+  const mode = process.argv[2] ?? 'direct'
+
+  console.log('🔌 CanopyWave Integration Test')
+  console.log('='.repeat(50))
+  console.log()
+
+  switch (mode) {
+    case 'direct':
+      await testCanopyWaveDirect()
+      break
+    case 'endpoint':
+      await testChatCompletionsEndpoint()
+      break
+    case 'both':
+      await testCanopyWaveDirect()
+      await testChatCompletionsEndpoint()
+      break
+    default:
+      console.error(`Unknown mode: ${mode}`)
+      console.error('Usage: bun scripts/test-canopywave.ts [direct|endpoint|both]')
+      process.exit(1)
+  }
+
+  console.log('Done!')
+}
+
+main()
diff --git a/scripts/test-fireworks.ts b/scripts/test-fireworks.ts
index b7c57e1f54..00622cd770 100644
--- a/scripts/test-fireworks.ts
+++ b/scripts/test-fireworks.ts
@@ -14,6 +14,8 @@
  *   CODEBUFF_API_KEY=<key> bun scripts/test-fireworks.ts both
  */
 
+export {}
+
 const FIREWORKS_BASE_URL = 'https://api.fireworks.ai/inference/v1'
 const FIREWORKS_MODEL = 'accounts/fireworks/models/minimax-m2p5'
 const OPENROUTER_MODEL = 'minimax/minimax-m2.5'
diff --git a/web/src/app/api/v1/chat/completions/_post.ts b/web/src/app/api/v1/chat/completions/_post.ts
index 1eec315d82..d236125bcb 100644
--- a/web/src/app/api/v1/chat/completions/_post.ts
+++ b/web/src/app/api/v1/chat/completions/_post.ts
@@ -36,11 +36,11 @@ import type { NextRequest } from 'next/server'
 import type { ChatCompletionRequestBody } from '@/llm-api/types'
 
 import {
-  BasetenError,
-  handleBasetenNonStream,
-  handleBasetenStream,
-  isBasetenModel,
-} from '@/llm-api/baseten'
+  CanopyWaveError,
+  handleCanopyWaveNonStream,
+  handleCanopyWaveStream,
+  isCanopyWaveModel,
+} from '@/llm-api/canopywave'
 import {
   FireworksError,
   handleFireworksNonStream,
@@ -360,11 +360,11 @@ export async function postChatCompletions(params: {
     // Handle streaming vs non-streaming
     try {
       if (bodyStream) {
-        // Streaming request — route to Baseten/Fireworks for supported models
-        const useBaseten = isBasetenModel(typedBody.model)
-        const useFireworks = !useBaseten && isFireworksModel(typedBody.model)
-        const stream = useBaseten
-          ? await handleBasetenStream({
+        // Streaming request — route to CanopyWave/Fireworks for supported models
+        const useCanopyWave = isCanopyWaveModel(typedBody.model)
+        const useFireworks = !useCanopyWave && isFireworksModel(typedBody.model)
+        const stream = useCanopyWave
+          ? await handleCanopyWaveStream({
               body: typedBody,
               userId,
               stripeCustomerId,
@@ -413,10 +413,10 @@ export async function postChatCompletions(params: {
           },
         })
       } else {
-        // Non-streaming request — route to Baseten/Fireworks for supported models
+        // Non-streaming request — route to CanopyWave/Fireworks for supported models
         const model = typedBody.model
-        const useBaseten = isBasetenModel(model)
-        const useFireworks = !useBaseten && isFireworksModel(model)
+        const useCanopyWave = isCanopyWaveModel(model)
+        const useFireworks = !useCanopyWave && isFireworksModel(model)
         const modelParts = model.split('/')
         const shortModelName = modelParts.length > 1 ? modelParts[1] : model
         const isOpenAIDirectModel =
@@ -427,8 +427,8 @@ export async function postChatCompletions(params: {
         const shouldUseOpenAIEndpoint =
           isOpenAIDirectModel && typedBody.codebuff_metadata?.n !== undefined
 
-        const nonStreamRequest = useBaseten
-          ? handleBasetenNonStream({
+        const nonStreamRequest = useCanopyWave
+          ? handleCanopyWaveNonStream({
               body: typedBody,
               userId,
               stripeCustomerId,
@@ -491,14 +491,14 @@ export async function postChatCompletions(params: {
       if (error instanceof FireworksError) {
         fireworksError = error
       }
-      let basetenError: BasetenError | undefined
-      if (error instanceof BasetenError) {
-        basetenError = error
+      let canopywaveError: CanopyWaveError | undefined
+      if (error instanceof CanopyWaveError) {
+        canopywaveError = error
       }
 
       // Log detailed error information for debugging
       const errorDetails = openrouterError?.toJSON()
-      const providerLabel = basetenError ? 'Baseten' : fireworksError ? 'Fireworks' : 'OpenRouter'
+      const providerLabel = canopywaveError ? 'CanopyWave' : fireworksError ? 'Fireworks' : 'OpenRouter'
       logger.error(
         {
           error: getErrorObject(error),
@@ -512,8 +512,8 @@ export async function postChatCompletions(params: {
             ? typedBody.messages.length
             : 0,
           messages: typedBody.messages,
-          providerStatusCode: (openrouterError ?? fireworksError ?? basetenError)?.statusCode,
-          providerStatusText: (openrouterError ?? fireworksError ?? basetenError)?.statusText,
+          providerStatusCode: (openrouterError ?? fireworksError ?? canopywaveError)?.statusCode,
+          providerStatusText: (openrouterError ?? fireworksError ?? canopywaveError)?.statusText,
           openrouterErrorCode: errorDetails?.error?.code,
           openrouterErrorType: errorDetails?.error?.type,
           openrouterErrorMessage: errorDetails?.error?.message,
@@ -541,7 +541,7 @@ export async function postChatCompletions(params: {
       if (error instanceof FireworksError) {
         return NextResponse.json(error.toJSON(), { status: error.statusCode })
       }
-      if (error instanceof BasetenError) {
+      if (error instanceof CanopyWaveError) {
         return NextResponse.json(error.toJSON(), { status: error.statusCode })
       }
 
diff --git a/web/src/llm-api/baseten.ts b/web/src/llm-api/canopywave.ts
similarity index 79%
rename from web/src/llm-api/baseten.ts
rename to web/src/llm-api/canopywave.ts
index dbd787def8..8582645944 100644
--- a/web/src/llm-api/baseten.ts
+++ b/web/src/llm-api/canopywave.ts
@@ -15,31 +15,31 @@ import type { InsertMessageBigqueryFn } from '@codebuff/common/types/contracts/b
 import type { Logger } from '@codebuff/common/types/contracts/logger'
 import type { ChatCompletionRequestBody } from './types'
 
-const BASETEN_BASE_URL = 'https://inference.baseten.co/v1'
+const CANOPYWAVE_BASE_URL = 'https://inference.canopywave.io/v1'
 
 // Extended timeout for deep-thinking models that can take
 // a long time to start streaming.
-const BASETEN_HEADERS_TIMEOUT_MS = 10 * 60 * 1000
+const CANOPYWAVE_HEADERS_TIMEOUT_MS = 10 * 60 * 1000
 
-const basetenAgent = new Agent({
-  headersTimeout: BASETEN_HEADERS_TIMEOUT_MS,
+const canopywaveAgent = new Agent({
+  headersTimeout: CANOPYWAVE_HEADERS_TIMEOUT_MS,
   bodyTimeout: 0,
 })
 
-/** Map from OpenRouter model IDs to Baseten model IDs */
-const BASETEN_MODEL_MAP: Record<string, string> = {
-  'minimax/minimax-m2.5': 'MiniMaxAI/MiniMax-M2.5',
+/** Map from OpenRouter model IDs to CanopyWave model IDs */
+const CANOPYWAVE_MODEL_MAP: Record<string, string> = {
+  'minimax/minimax-m2.5': 'minimax/minimax-m2.5',
 }
 
-export function isBasetenModel(model: string): boolean {
-  return model in BASETEN_MODEL_MAP
+export function isCanopyWaveModel(model: string): boolean {
+  return model in CANOPYWAVE_MODEL_MAP
 }
 
-function getBasetenModelId(openrouterModel: string): string {
-  return BASETEN_MODEL_MAP[openrouterModel] ?? openrouterModel
+function getCanopyWaveModelId(openrouterModel: string): string {
+  return CANOPYWAVE_MODEL_MAP[openrouterModel] ?? openrouterModel
 }
 
-type StreamState = { responseText: string; reasoningText: string }
+type StreamState = { responseText: string; reasoningText: string; billedAlready: boolean }
 
 type LineResult = {
   state: StreamState
@@ -47,49 +47,48 @@ type LineResult = {
   patchedLine: string
 }
 
-function createBasetenRequest(params: {
+function createCanopyWaveRequest(params: {
   body: ChatCompletionRequestBody
   originalModel: string
   fetch: typeof globalThis.fetch
 }) {
   const { body, originalModel, fetch } = params
-  const basetenBody: Record<string, unknown> = {
+  const canopywaveBody: Record<string, unknown> = {
     ...body,
-    model: getBasetenModelId(originalModel),
+    model: getCanopyWaveModelId(originalModel),
   }
 
   // Strip OpenRouter-specific / internal fields
-  delete basetenBody.provider
-  delete basetenBody.transforms
-  delete basetenBody.codebuff_metadata
-  delete basetenBody.usage
+  delete canopywaveBody.provider
+  delete canopywaveBody.transforms
+  delete canopywaveBody.codebuff_metadata
+  delete canopywaveBody.usage
 
   // For streaming, request usage in the final chunk
-  if (basetenBody.stream) {
-    basetenBody.stream_options = { include_usage: true }
+  if (canopywaveBody.stream) {
+    canopywaveBody.stream_options = { include_usage: true }
   }
 
-  if (!env.BASETEN_API_KEY) {
-    throw new Error('BASETEN_API_KEY is not configured')
+  if (!env.CANOPYWAVE_API_KEY) {
+    throw new Error('CANOPYWAVE_API_KEY is not configured')
   }
 
-  return fetch(`${BASETEN_BASE_URL}/chat/completions`, {
+  return fetch(`${CANOPYWAVE_BASE_URL}/chat/completions`, {
     method: 'POST',
     headers: {
-      Authorization: `Bearer ${env.BASETEN_API_KEY}`,
+      Authorization: `Bearer ${env.CANOPYWAVE_API_KEY}`,
       'Content-Type': 'application/json',
     },
-    body: JSON.stringify(basetenBody),
+    body: JSON.stringify(canopywaveBody),
     // @ts-expect-error - dispatcher is a valid undici option not in fetch types
-    dispatcher: basetenAgent,
+    dispatcher: canopywaveAgent,
   })
 }
 
-// Baseten per-token pricing (dollars per token)
-// TODO: Verify these costs against Baseten's actual pricing
-const BASETEN_INPUT_COST_PER_TOKEN = 0.30 / 1_000_000
-const BASETEN_CACHED_INPUT_COST_PER_TOKEN = 0.03 / 1_000_000
-const BASETEN_OUTPUT_COST_PER_TOKEN = 1.20 / 1_000_000
+// CanopyWave per-token pricing (dollars per token) for MiniMax M2.5
+const CANOPYWAVE_INPUT_COST_PER_TOKEN = 0.27 / 1_000_000
+const CANOPYWAVE_CACHED_INPUT_COST_PER_TOKEN = 0.03 / 1_000_000
+const CANOPYWAVE_OUTPUT_COST_PER_TOKEN = 1.08 / 1_000_000
 
 function extractUsageAndCost(usage: Record<string, unknown> | undefined | null): UsageData {
   if (!usage) return { inputTokens: 0, outputTokens: 0, cacheReadInputTokens: 0, reasoningTokens: 0, cost: 0 }
@@ -101,17 +100,16 @@ function extractUsageAndCost(usage: Record<string, unknown> | undefined | null):
   const cacheReadInputTokens = typeof promptDetails?.cached_tokens === 'number' ? promptDetails.cached_tokens : 0
   const reasoningTokens = typeof completionDetails?.reasoning_tokens === 'number' ? completionDetails.reasoning_tokens : 0
 
-  // Baseten doesn't return cost — compute from token counts and known pricing
   const nonCachedInputTokens = Math.max(0, inputTokens - cacheReadInputTokens)
   const cost =
-    nonCachedInputTokens * BASETEN_INPUT_COST_PER_TOKEN +
-    cacheReadInputTokens * BASETEN_CACHED_INPUT_COST_PER_TOKEN +
-    outputTokens * BASETEN_OUTPUT_COST_PER_TOKEN
+    nonCachedInputTokens * CANOPYWAVE_INPUT_COST_PER_TOKEN +
+    cacheReadInputTokens * CANOPYWAVE_CACHED_INPUT_COST_PER_TOKEN +
+    outputTokens * CANOPYWAVE_OUTPUT_COST_PER_TOKEN
 
   return { inputTokens, outputTokens, cacheReadInputTokens, reasoningTokens, cost }
 }
 
-export async function handleBasetenNonStream({
+export async function handleCanopyWaveNonStream({
   body,
   userId,
   stripeCustomerId,
@@ -132,10 +130,10 @@ export async function handleBasetenNonStream({
   const startTime = new Date()
   const { clientId, clientRequestId, costMode } = extractRequestMetadata({ body, logger })
 
-  const response = await createBasetenRequest({ body, originalModel, fetch })
+  const response = await createCanopyWaveRequest({ body, originalModel, fetch })
 
   if (!response.ok) {
-    throw await parseBasetenError(response)
+    throw await parseCanopyWaveError(response)
   }
 
   const data = await response.json()
@@ -182,12 +180,12 @@ export async function handleBasetenNonStream({
 
   // Normalise model name back to OpenRouter format for client compatibility
   data.model = originalModel
-  if (!data.provider) data.provider = 'Baseten'
+  if (!data.provider) data.provider = 'CanopyWave'
 
   return data
 }
 
-export async function handleBasetenStream({
+export async function handleCanopyWaveStream({
   body,
   userId,
   stripeCustomerId,
@@ -208,10 +206,10 @@ export async function handleBasetenStream({
   const startTime = new Date()
   const { clientId, clientRequestId, costMode } = extractRequestMetadata({ body, logger })
 
-  const response = await createBasetenRequest({ body, originalModel, fetch })
+  const response = await createCanopyWaveRequest({ body, originalModel, fetch })
 
   if (!response.ok) {
-    throw await parseBasetenError(response)
+    throw await parseCanopyWaveError(response)
   }
 
   const reader = response.body?.getReader()
@@ -220,7 +218,7 @@ export async function handleBasetenStream({
   }
 
   let heartbeatInterval: NodeJS.Timeout
-  let state: StreamState = { responseText: '', reasoningText: '' }
+  let state: StreamState = { responseText: '', reasoningText: '', billedAlready: false }
   let clientDisconnected = false
 
   const stream = new ReadableStream({
@@ -301,7 +299,7 @@ export async function handleBasetenStream({
         } else {
           logger.warn(
             getErrorObject(error),
-            'Error after client disconnect in Baseten stream',
+            'Error after client disconnect in CanopyWave stream',
           )
         }
       } finally {
@@ -317,7 +315,7 @@ export async function handleBasetenStream({
           responseTextLength: state.responseText.length,
           reasoningTextLength: state.reasoningText.length,
         },
-        'Client cancelled stream, continuing Baseten consumption for billing',
+        'Client cancelled stream, continuing CanopyWave consumption for billing',
       )
     },
   })
@@ -369,14 +367,14 @@ async function handleLine({
   } catch (error) {
     logger.warn(
       { error: getErrorObject(error, { includeRawError: true }) },
-      'Received non-JSON Baseten response',
+      'Received non-JSON CanopyWave response',
     )
     return { state, patchedLine: line }
   }
 
   // Patch model and provider for SDK compatibility
   if (obj.model) obj.model = originalModel
-  if (!obj.provider) obj.provider = 'Baseten'
+  if (!obj.provider) obj.provider = 'CanopyWave'
 
   // Process the chunk for billing / state tracking
   const result = await handleResponse({
@@ -406,6 +404,12 @@ async function handleLine({
   return { state: result.state, billedCredits: result.billedCredits, patchedLine }
 }
 
+function isFinalChunk(data: Record<string, unknown>): boolean {
+  const choices = data.choices as Array<Record<string, unknown>> | undefined
+  if (!choices || choices.length === 0) return true
+  return choices.some(c => c.finish_reason != null)
+}
+
 async function handleResponse({
   userId,
   stripeCustomerId,
@@ -437,13 +441,22 @@ async function handleResponse({
 }): Promise<{ state: StreamState; billedCredits?: number }> {
   state = handleStreamChunk({ data, state, logger, userId, agentId, model: originalModel })
 
-  if ('error' in data || !data.usage) {
+  // Some providers send cumulative usage on EVERY chunk (not just the final one),
+  // so we must only bill once on the final chunk to avoid charging N times.
+  if ('error' in data || !data.usage || state.billedAlready || !isFinalChunk(data)) {
+    // Strip usage from non-final chunks and duplicate final chunks
+    // so the SDK doesn't see multiple usage objects
+    if (data.usage && (!isFinalChunk(data) || state.billedAlready)) {
+      delete data.usage
+    }
     return { state }
   }
 
   const usageData = extractUsageAndCost(data.usage as Record<string, unknown>)
   const messageId = typeof data.id === 'string' ? data.id : 'unknown'
 
+  state.billedAlready = true
+
   insertMessageToBigQuery({
     messageId,
     userId,
@@ -506,7 +519,7 @@ function handleStreamChunk({
         errorType: errorData?.type,
         errorMessage: errorData?.message,
       },
-      'Received error chunk in Baseten stream',
+      'Received error chunk in CanopyWave stream',
     )
     return state
   }
@@ -543,7 +556,7 @@ function handleStreamChunk({
   return state
 }
 
-export class BasetenError extends Error {
+export class CanopyWaveError extends Error {
   constructor(
     public readonly statusCode: number,
     public readonly statusText: string,
@@ -556,7 +569,7 @@ export class BasetenError extends Error {
     },
   ) {
     super(errorBody.error.message)
-    this.name = 'BasetenError'
+    this.name = 'CanopyWaveError'
   }
 
   toJSON() {
@@ -570,9 +583,9 @@ export class BasetenError extends Error {
   }
 }
 
-async function parseBasetenError(response: Response): Promise<BasetenError> {
+async function parseCanopyWaveError(response: Response): Promise<CanopyWaveError> {
   const errorText = await response.text()
-  let errorBody: BasetenError['errorBody']
+  let errorBody: CanopyWaveError['errorBody']
   try {
     const parsed = JSON.parse(errorText)
     if (parsed?.error?.message) {
@@ -599,7 +612,7 @@ async function parseBasetenError(response: Response): Promise<BasetenError> {
       },
     }
   }
-  return new BasetenError(response.status, response.statusText, errorBody)
+  return new CanopyWaveError(response.status, response.statusText, errorBody)
 }
 
 function creditsToFakeCost(credits: number): number {

From d09bea6aaa107c8857905dc7af1bd475d734b264 Mon Sep 17 00:00:00 2001
From: James Grugett <jahooma@gmail.com>
Date: Tue, 10 Mar 2026 14:40:18 -0700
Subject: [PATCH 05/16] Update .env.example with canopywave key example

---
 .env.example | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.env.example b/.env.example
index 55e7721d2e..d3c6f2438d 100644
--- a/.env.example
+++ b/.env.example
@@ -4,6 +4,7 @@ OPEN_ROUTER_API_KEY=dummy_openrouter_key
 OPENAI_API_KEY=dummy_openai_key
 ANTHROPIC_API_KEY=dummy_anthropic_key
 FIREWORKS_API_KEY=dummy_fireworks_key
+CANOPYWAVE_API_KEY=dummy_canopywave_key
 
 # Database & Server
 DATABASE_URL=postgresql://manicode_user_local:secretpassword_local@localhost:5432/manicode_db_local

From 14602f734dd418e9a23c92b6f71f989cdd61be98 Mon Sep 17 00:00:00 2001
From: James Grugett <jahooma@gmail.com>
Date: Tue, 10 Mar 2026 15:32:17 -0700
Subject: [PATCH 06/16] Update next-themes version so it can use react 19

---
 bun.lock                              | 14 +++++---------
 freebuff/web/package.json             |  2 +-
 package.json                          |  2 ++
 web/package.json                      |  2 +-
 web/src/components/theme-provider.tsx |  2 +-
 5 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/bun.lock b/bun.lock
index 964cd43180..f9bedc4412 100644
--- a/bun.lock
+++ b/bun.lock
@@ -147,7 +147,7 @@
         "lucide-react": "^0.487.0",
         "next": "15.5.11",
         "next-auth": "^4.24.11",
-        "next-themes": "^0.3.0",
+        "next-themes": "^0.4.6",
         "pino": "^9.6.0",
         "react": "^19.0.0",
         "react-dom": "^19.0.0",
@@ -294,7 +294,7 @@
         "next": "15.5.11",
         "next-auth": "^4.24.11",
         "next-contentlayer2": "^0.5.8",
-        "next-themes": "^0.3.0",
+        "next-themes": "^0.4.6",
         "nextjs-linkedin-insight-tag": "^0.0.6",
         "pino": "^9.6.0",
         "posthog-js": "^1.234.10",
@@ -354,6 +354,8 @@
     "@types/react": "19.2.14",
     "@types/react-dom": "19.2.3",
     "baseline-browser-mapping": "^2.9.14",
+    "react": "^19.0.0",
+    "react-dom": "^19.0.0",
     "signal-exit": "3.0.7",
     "zod": "^4.2.1",
   },
@@ -2810,7 +2812,7 @@
 
     "next-contentlayer2": ["next-contentlayer2@0.5.8", "", { "dependencies": { "@contentlayer2/core": "0.5.8", "@contentlayer2/utils": "0.5.8" }, "peerDependencies": { "contentlayer2": "0.5.8", "next": ">=12.0.0", "react": "^18 || ^19 || ^19.0.0-rc", "react-dom": "^18 || ^19 || ^19.0.0-rc" } }, "sha512-3Xh8quPCFmg/QGa4qTnOwSsT3oNYCtmm+Ii0UlbOHxX59gHYVX9M5mTzkdUKiKC1aJfiGIPPGQXhKNfc6qvWZg=="],
 
-    "next-themes": ["next-themes@0.3.0", "", { "peerDependencies": { "react": "^16.8 || ^17 || ^18", "react-dom": "^16.8 || ^17 || ^18" } }, "sha512-/QHIrsYpd6Kfk7xakK4svpDI5mmXP0gfvCoJdGpZQ2TOrQZmsW0QxjaiLn8wbIKjtm4BTSqLoix4lxYYOnLJ/w=="],
+    "next-themes": ["next-themes@0.4.6", "", { "peerDependencies": { "react": "^16.8 || ^17 || ^18 || ^19 || ^19.0.0-rc", "react-dom": "^16.8 || ^17 || ^18 || ^19 || ^19.0.0-rc" } }, "sha512-pZvgD5L0IEvX5/9GWyHMf3m8BKiVQwsCMHfoFosXtXBMnaS0ZnIJ9ST4b4NqLVKDEm8QBxoNNGNaBv2JNF6XNA=="],
 
     "nextjs-linkedin-insight-tag": ["nextjs-linkedin-insight-tag@0.0.6", "", { "dependencies": { "typescript": "^4.9.4" }, "peerDependencies": { "next": ">=11.0.0", "react": ">=17.0.0" } }, "sha512-hk3cHpz+1SLbe0hd2nFjUP2AlFmgeDMHHudXGTYrtIvRri/qliFEIpURH7FJWKxQLXm9f1X8B5O20Wvj2wNPCg=="],
 
@@ -4054,10 +4056,6 @@
 
     "next-auth/uuid": ["uuid@8.3.2", "", { "bin": { "uuid": "dist/bin/uuid" } }, "sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg=="],
 
-    "next-themes/react": ["react@18.3.1", "", { "dependencies": { "loose-envify": "^1.1.0" } }, "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ=="],
-
-    "next-themes/react-dom": ["react-dom@18.3.1", "", { "dependencies": { "loose-envify": "^1.1.0", "scheduler": "^0.23.2" }, "peerDependencies": { "react": "^18.3.1" } }, "sha512-5m4nQKp+rZRb09LNH59GM4BxTh9251/ylbKIbpe7TpGxfJ+9kv6BLkLBXIjjspbgbnIBNqlI23tRnTWT0snUIw=="],
-
     "nextjs-linkedin-insight-tag/typescript": ["typescript@4.9.5", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-1FXk9E2Hm+QzZQ7z+McJiHL4NW1F2EzMu9Nq9i3zAaGqibafqYwCVU6WyWAuyQRRzOlxou8xZSyXLEN8oKj24g=="],
 
     "nx/axios": ["axios@1.13.1", "", { "dependencies": { "follow-redirects": "^1.15.6", "form-data": "^4.0.4", "proxy-from-env": "^1.1.0" } }, "sha512-hU4EGxxt+j7TQijx1oYdAjw4xuIp1wRQSsbMFwSthCWeBQur1eF+qJ5iQ5sN3Tw8YRzQNKb8jszgBdMDVqwJcw=="],
@@ -4516,8 +4514,6 @@
 
     "mlly/pkg-types/confbox": ["confbox@0.1.8", "", {}, "sha512-RMtmw0iFkeR4YV+fUOSucriAQNb9g8zFR52MWCtl+cCZOFRNL6zeB395vPzFhEjjn4fMxXudmELnl/KF/WrK6w=="],
 
-    "next-themes/react-dom/scheduler": ["scheduler@0.23.2", "", { "dependencies": { "loose-envify": "^1.1.0" } }, "sha512-UOShsPwz7NrMUqhR6t0hWjFduvOzbtv7toDH1/hIrfRNIDBnnBWd0CwJTGvTpngVlmwGCdP9/Zl/tVrDqcuYzQ=="],
-
     "nx/chalk/supports-color": ["supports-color@7.2.0", "", { "dependencies": { "has-flag": "^4.0.0" } }, "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw=="],
 
     "nx/minimatch/brace-expansion": ["brace-expansion@2.0.2", "", { "dependencies": { "balanced-match": "^1.0.0" } }, "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ=="],
diff --git a/freebuff/web/package.json b/freebuff/web/package.json
index 53dc3c7a5e..fdf5a358c5 100644
--- a/freebuff/web/package.json
+++ b/freebuff/web/package.json
@@ -23,7 +23,7 @@
     "lucide-react": "^0.487.0",
     "next": "15.5.11",
     "next-auth": "^4.24.11",
-    "next-themes": "^0.3.0",
+    "next-themes": "^0.4.6",
     "pino": "^9.6.0",
     "react": "^19.0.0",
     "react-dom": "^19.0.0",
diff --git a/package.json b/package.json
index 628036fc1d..bd94e8cbd8 100644
--- a/package.json
+++ b/package.json
@@ -45,6 +45,8 @@
     "zod": "^4.2.1"
   },
   "overrides": {
+    "react": "^19.0.0",
+    "react-dom": "^19.0.0",
     "@types/react": "19.2.14",
     "@types/react-dom": "19.2.3",
     "baseline-browser-mapping": "^2.9.14",
diff --git a/web/package.json b/web/package.json
index bf6ef79342..9b92c03529 100644
--- a/web/package.json
+++ b/web/package.json
@@ -75,7 +75,7 @@
     "next": "15.5.11",
     "next-auth": "^4.24.11",
     "next-contentlayer2": "^0.5.8",
-    "next-themes": "^0.3.0",
+    "next-themes": "^0.4.6",
     "nextjs-linkedin-insight-tag": "^0.0.6",
     "pino": "^9.6.0",
     "posthog-js": "^1.234.10",
diff --git a/web/src/components/theme-provider.tsx b/web/src/components/theme-provider.tsx
index 4c77ee977c..16559fe1a3 100644
--- a/web/src/components/theme-provider.tsx
+++ b/web/src/components/theme-provider.tsx
@@ -1,7 +1,7 @@
 'use client'
 
 import { ThemeProvider as NextThemesProvider } from 'next-themes'
-import { type ThemeProviderProps } from 'next-themes/dist/types'
+import { type ThemeProviderProps } from 'next-themes'
 import { useEffect } from 'react'
 
 export const ThemeProvider = ({ children, ...props }: ThemeProviderProps) => {

From 151145f2ce0ddecfd4f433826aa652daf07f1fa5 Mon Sep 17 00:00:00 2001
From: James Grugett <jahooma@gmail.com>
Date: Tue, 10 Mar 2026 16:57:22 -0700
Subject: [PATCH 07/16] Use siliconflow as provider for minimax

---
 .env.example                                 |   1 +
 agents/base2/base2.ts                        |   1 +
 packages/internal/src/env-schema.ts          |   2 +
 scripts/test-siliconflow.ts                  | 384 ++++++++++++
 web/src/app/api/v1/chat/completions/_post.ts |  59 +-
 web/src/llm-api/siliconflow.ts               | 621 +++++++++++++++++++
 6 files changed, 1057 insertions(+), 11 deletions(-)
 create mode 100644 scripts/test-siliconflow.ts
 create mode 100644 web/src/llm-api/siliconflow.ts

diff --git a/.env.example b/.env.example
index d3c6f2438d..a1b46a0b88 100644
--- a/.env.example
+++ b/.env.example
@@ -5,6 +5,7 @@ OPENAI_API_KEY=dummy_openai_key
 ANTHROPIC_API_KEY=dummy_anthropic_key
 FIREWORKS_API_KEY=dummy_fireworks_key
 CANOPYWAVE_API_KEY=dummy_canopywave_key
+SILICONFLOW_API_KEY=dummy_siliconflow_key
 
 # Database & Server
 DATABASE_URL=postgresql://manicode_user_local:secretpassword_local@localhost:5432/manicode_db_local
diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts
index 4a3c40064f..8735d0579b 100644
--- a/agents/base2/base2.ts
+++ b/agents/base2/base2.ts
@@ -30,6 +30,7 @@ export function createBase2(
     publisher,
     model: isFree ? 'minimax/minimax-m2.5' : 'anthropic/claude-opus-4.6',
     providerOptions: isFree ? {
+      only: ['siliconflow/fp8'],
       data_collection: 'deny',
     } : {
       only: ['amazon-bedrock'],
diff --git a/packages/internal/src/env-schema.ts b/packages/internal/src/env-schema.ts
index 93cfee7d4f..c4bfa7423f 100644
--- a/packages/internal/src/env-schema.ts
+++ b/packages/internal/src/env-schema.ts
@@ -8,6 +8,7 @@ export const serverEnvSchema = clientEnvSchema.extend({
   ANTHROPIC_API_KEY: z.string().min(1),
   FIREWORKS_API_KEY: z.string().min(1),
   CANOPYWAVE_API_KEY: z.string().min(1).optional(),
+  SILICONFLOW_API_KEY: z.string().min(1).optional(),
   LINKUP_API_KEY: z.string().min(1),
   CONTEXT7_API_KEY: z.string().optional(),
   GRAVITY_API_KEY: z.string().min(1),
@@ -52,6 +53,7 @@ export const serverProcessEnv: ServerInput = {
   ANTHROPIC_API_KEY: process.env.ANTHROPIC_API_KEY,
   FIREWORKS_API_KEY: process.env.FIREWORKS_API_KEY,
   CANOPYWAVE_API_KEY: process.env.CANOPYWAVE_API_KEY,
+  SILICONFLOW_API_KEY: process.env.SILICONFLOW_API_KEY,
   LINKUP_API_KEY: process.env.LINKUP_API_KEY,
   CONTEXT7_API_KEY: process.env.CONTEXT7_API_KEY,
   GRAVITY_API_KEY: process.env.GRAVITY_API_KEY,
diff --git a/scripts/test-siliconflow.ts b/scripts/test-siliconflow.ts
new file mode 100644
index 0000000000..845db4a3cb
--- /dev/null
+++ b/scripts/test-siliconflow.ts
@@ -0,0 +1,384 @@
+#!/usr/bin/env bun
+
+/**
+ * Test script to verify SiliconFlow prompt caching across a 10-turn conversation.
+ *
+ * Uses a very large system prompt (~5k+ input tokens) with low output (max 100 tokens)
+ * to measure how well SiliconFlow caches the shared prefix across turns.
+ *
+ * Usage:
+ *   bun scripts/test-siliconflow.ts
+ */
+
+export {}
+
+const SILICONFLOW_BASE_URL = 'https://api.siliconflow.com/v1'
+const SILICONFLOW_MODEL = 'MiniMaxAI/MiniMax-M2.5'
+
+// Pricing constants — https://siliconflow.com/pricing
+const INPUT_COST_PER_TOKEN = 0.30 / 1_000_000
+const CACHED_INPUT_COST_PER_TOKEN = 0.03 / 1_000_000
+const OUTPUT_COST_PER_TOKEN = 1.20 / 1_000_000
+
+const MAX_TOKENS = 100
+
+function computeCost(usage: Record<string, unknown>): { cost: number; breakdown: string } {
+  const inputTokens = typeof usage.prompt_tokens === 'number' ? usage.prompt_tokens : 0
+  const outputTokens = typeof usage.completion_tokens === 'number' ? usage.completion_tokens : 0
+  const promptDetails = usage.prompt_tokens_details as Record<string, unknown> | undefined
+  const cachedTokens = typeof promptDetails?.cached_tokens === 'number' ? promptDetails.cached_tokens : 0
+  const nonCachedInput = Math.max(0, inputTokens - cachedTokens)
+
+  const inputCost = nonCachedInput * INPUT_COST_PER_TOKEN
+  const cachedCost = cachedTokens * CACHED_INPUT_COST_PER_TOKEN
+  const outputCost = outputTokens * OUTPUT_COST_PER_TOKEN
+  const totalCost = inputCost + cachedCost + outputCost
+
+  const breakdown = [
+    `${nonCachedInput} non-cached input × $0.30/M = $${inputCost.toFixed(8)}`,
+    `${cachedTokens} cached input × $0.03/M = $${cachedCost.toFixed(8)}`,
+    `${outputTokens} output × $1.20/M = $${outputCost.toFixed(8)}`,
+    `Total: $${totalCost.toFixed(8)}`,
+  ].join('\n         ')
+
+  return { cost: totalCost, breakdown }
+}
+
+// Very large system prompt to push input tokens to ~5k+
+const SYSTEM_PROMPT = `You are an expert software architect, technical writer, and senior engineering consultant.
+You always respond with brief, concise answers — one or two sentences at most.
+You provide practical advice grounded in real-world engineering experience.
+
+Your areas of expertise include:
+- Distributed systems design and architecture patterns (microservices, event-driven, CQRS, saga patterns, choreography vs orchestration, bulkhead pattern, circuit breaker, retry with exponential backoff, sidecar pattern, ambassador pattern, strangler fig pattern, anti-corruption layer)
+- Database design and optimization (relational databases including PostgreSQL, MySQL, SQL Server; document databases including MongoDB, CouchDB, DynamoDB; graph databases including Neo4j, ArangoDB, JanusGraph; time-series databases including InfluxDB, TimescaleDB, QuestDB; wide-column stores including Cassandra, ScyllaDB, HBase; sharding strategies including hash-based, range-based, geographic; replication topologies including primary-replica, multi-primary, chain replication; connection pooling with PgBouncer, ProxySQL; query optimization techniques including index selection, query plan analysis, materialized views, covering indexes, partial indexes, expression indexes)
+- Cloud infrastructure and deployment (AWS services including EC2, ECS, EKS, Lambda, S3, DynamoDB, RDS, Aurora, ElastiCache, CloudFront, Route53, IAM, VPC, SQS, SNS, Kinesis, Step Functions; GCP services including GKE, Cloud Run, Cloud Functions, BigQuery, Spanner, Pub/Sub, Cloud Storage; Azure services including AKS, Azure Functions, Cosmos DB, Azure SQL; container orchestration with Kubernetes including deployments, stateful sets, daemon sets, jobs, CronJobs, custom resource definitions, operators, Helm charts, Kustomize; infrastructure as code with Terraform, Pulumi, CloudFormation, CDK; service mesh with Istio, Linkerd, Consul Connect; load balancers including ALB, NLB, HAProxy, Nginx, Envoy; auto-scaling including HPA, VPA, KEDA, cluster autoscaler)
+- Programming languages and their ecosystems (TypeScript/JavaScript with Node.js, Deno, Bun; Python with FastAPI, Django, Flask, SQLAlchemy, Pydantic; Rust with Tokio, Actix, Axum, Serde; Go with Gin, Echo, GORM; Java with Spring Boot, Quarkus, Micronaut, Hibernate; C++ with Boost, gRPC, Abseil; Kotlin with Ktor, Spring; Scala with Akka, ZIO, Cats Effect; Elixir with Phoenix, Ecto, LiveView; Haskell with Servant, Yesod, Persistent)
+- API design principles (REST architectural constraints, Richardson Maturity Model, HATEOAS, content negotiation; GraphQL including schema design, resolvers, DataLoader, subscriptions, federation; gRPC including protobuf schema design, streaming patterns, interceptors, deadline propagation; WebSocket patterns for real-time communication; Server-Sent Events for unidirectional streaming; OpenAPI/Swagger specification; API versioning strategies including URL path, header, query parameter; pagination patterns including cursor-based, offset, keyset; rate limiting algorithms including token bucket, leaky bucket, sliding window; API gateway patterns)
+- Security best practices (authentication protocols including OAuth 2.0, OIDC, SAML, WebAuthn, FIDO2; authorization models including RBAC, ABAC, ReBAC, PBAC; encryption at rest with AES-256, at transit with TLS 1.3; OWASP Top 10 including injection, broken authentication, sensitive data exposure, XXE, broken access control, security misconfiguration, XSS, insecure deserialization, known vulnerabilities, insufficient logging; Content Security Policy headers; CORS configuration; DDoS mitigation with WAF, rate limiting, geo-blocking; secret management with HashiCorp Vault, AWS Secrets Manager, GCP Secret Manager; certificate management including Let's Encrypt, cert-manager, mTLS; supply chain security with SBOM, Sigstore, dependency scanning)
+- Performance optimization and profiling (caching strategies including write-through, write-behind, read-through, cache-aside, refresh-ahead; cache invalidation patterns; CDN configuration with CloudFront, Fastly, Cloudflare; connection pooling for HTTP, database, Redis; async patterns including event loops, worker threads, thread pools, coroutines; WebAssembly for compute-intensive operations; JIT compilation optimization; memory profiling with heap snapshots, allocation tracking; CPU profiling with flame graphs, perf, async-profiler; load testing with k6, Locust, Artillery, Gatling; performance budgets and real user monitoring)
+- Testing methodologies (unit testing with Jest, Vitest, pytest, Go testing; integration testing with Testcontainers, Docker Compose; end-to-end testing with Playwright, Cypress, Selenium; property-based testing with fast-check, Hypothesis, QuickCheck; mutation testing with Stryker, PITest; snapshot testing; contract testing with Pact, Spring Cloud Contract; chaos engineering with Chaos Monkey, Litmus, Gremlin; load testing; fuzz testing with AFL, LibFuzzer; visual regression testing; accessibility testing)
+- CI/CD pipelines and DevOps practices (GitHub Actions workflows, Jenkins pipelines, GitLab CI, CircleCI; ArgoCD for GitOps; deployment strategies including blue-green, canary, rolling update, recreate; feature flag systems with LaunchDarkly, Flagsmith, Unleash; trunk-based development; semantic versioning and conventional commits; artifact management with Artifactory, Nexus, ECR, GCR; infrastructure pipeline including Terraform plan/apply, drift detection; security scanning in CI including SAST, DAST, SCA, secret scanning; release management including changelogs, release notes, semantic-release)
+- Monitoring and observability (metrics collection with Prometheus, StatsD, Datadog; visualization with Grafana, Kibana; distributed tracing with Jaeger, Zipkin, Tempo, OpenTelemetry; log aggregation with Elasticsearch, Loki, CloudWatch; alerting with PagerDuty, OpsGenie, VictorOps; SLO/SLI definition and error budgets; synthetic monitoring; real user monitoring; custom business metrics; incident management processes; postmortem culture; runbook automation)
+- Data engineering and analytics (stream processing with Apache Kafka, Flink, Spark Streaming, Kinesis; batch processing with Spark, Hadoop, dbt; data warehousing with Snowflake, BigQuery, Redshift, ClickHouse; data lake architecture with Delta Lake, Apache Iceberg, Apache Hudi; ETL/ELT patterns; data quality frameworks with Great Expectations, dbt tests; schema evolution and backward compatibility; data governance and lineage tracking; real-time analytics with materialized views, OLAP cubes)
+- Machine learning operations (model serving with TensorFlow Serving, TorchServe, Triton; MLOps pipelines with MLflow, Kubeflow, Metaflow; feature stores with Feast, Tecton; model monitoring for drift detection; A/B testing for ML models; experiment tracking; model versioning and registry; GPU cluster management; inference optimization with quantization, pruning, distillation)
+
+When providing responses, you follow these conventions:
+- Keep answers extremely brief — one or two sentences maximum
+- Be direct and actionable
+- Use concrete examples over abstract advice
+- Reference specific tools, libraries, or patterns by name
+
+Additional context for this conversation:
+- We are working on a high-traffic web application that serves 50 million requests per day across 3 regions
+- The system needs to handle bursty traffic patterns with 10x spikes during peak hours and flash sales
+- Data consistency is important but eventual consistency is acceptable for most read paths with a 5-second staleness budget
+- The team is experienced with TypeScript and Node.js but open to other technologies for specific use cases
+- We use PostgreSQL 16 as our primary database with logical replication to read replicas and Redis 7 Cluster for caching
+- The application is deployed on Kubernetes 1.29 in a multi-region setup across US-East-1, US-West-2, and EU-West-1
+- We need to maintain 99.95% uptime SLA with a target p99 latency of 150ms for API endpoints and 50ms for cached reads
+- Cost optimization is a secondary concern after reliability and developer experience, but we spend $2.5M/year on infrastructure
+- The codebase is approximately 750k lines of TypeScript across 80+ microservices with an additional 200k lines of Python for ML services
+- We use an event-driven architecture with Kafka (3 clusters, 500+ topics) for inter-service communication with exactly-once semantics
+- All services expose both REST (OpenAPI 3.1) and gRPC (protobuf v3) endpoints with automatic code generation
+- We have a comprehensive monitoring stack with Prometheus (50M time series), Grafana (200+ dashboards), Jaeger, and PagerDuty
+- Database migrations are managed with Drizzle ORM with automated rollback capabilities and zero-downtime schema changes
+- The frontend is a Next.js 15 application with React Server Components, streaming SSR, and partial prerendering
+- We use feature flags extensively via LaunchDarkly with 500+ active flags and automated cleanup for stale flags
+- The CI/CD pipeline runs 5000+ tests (unit, integration, e2e) with a target of under 8 minutes using distributed execution on BuildKite
+- We practice trunk-based development with short-lived feature branches, PR previews, and automated merge queues
+- The team consists of 60 engineers across 10 squads, each owning 5-12 services with clear domain boundaries
+- We use a mono-repo structure managed with Turborepo and Bun workspaces with remote caching
+- All inter-service communication uses Protocol Buffers for serialization with a shared schema registry and backward compatibility enforcement
+- We have a custom API gateway built on Envoy that handles authentication, rate limiting, request routing, and observability injection
+- The system processes approximately 100TB of data per day through our analytics pipeline (Kafka → Flink → ClickHouse + BigQuery)
+- Mobile clients communicate via a BFF (Backend for Frontend) layer with GraphQL federation across 12 subgraphs
+- We have a custom feature flag evaluation engine that supports complex targeting rules including percentage rollouts, user segments, and geographic targeting
+- The deployment pipeline supports multi-region blue-green deployments with automated rollback on SLO violation detection
+- We use HashiCorp Vault for secret management with automatic rotation policies for database credentials, API keys, and certificates
+- Our observability stack includes custom instrumentation for business metrics including revenue, conversion, engagement, and error rates
+- The team follows an RFC process for architectural decisions with ADRs stored in the repo and reviewed by the architecture guild
+- We have a dedicated platform team of 8 engineers that maintains shared infrastructure, developer tooling, and internal SDKs
+- All services implement health checks (liveness + readiness), graceful shutdown handlers, and circuit breakers via a shared middleware library
+- We use PgBouncer in transaction mode for PostgreSQL connection pooling (max 500 connections per region) and Redis Cluster with 6 shards per region
+- The system supports multi-tenancy with tenant isolation at the database level using row-level security and per-tenant connection pools
+- We have a custom schema registry for Kafka topic schemas with backward/forward compatibility validation and automated consumer migration
+- Our error handling follows a structured error taxonomy with 200+ error codes, retry policies, and dead-letter queues for unprocessable messages
+- We use structured logging with JSON format, correlation IDs, and trace context propagation across all services via OpenTelemetry
+- The frontend uses a design system with 300+ components maintained by a dedicated UI platform team with visual regression testing via Chromatic
+- We have automated performance regression testing that runs nightly against production-like data with 10% traffic replay
+- Our incident response process includes automated runbook execution, escalation policies, and post-incident review within 48 hours
+- We maintain a service catalog with dependency graphs, SLO definitions, on-call schedules, and cost attribution per service
+- The platform supports A/B testing with Bayesian statistical significance calculations, multi-armed bandit allocation, and segment analysis
+- We use GitOps for all infrastructure management with Terraform modules in a dedicated repo and Atlantis for plan/apply workflows
+- Our security posture includes weekly penetration testing, continuous dependency scanning with Snyk, SAST with Semgrep, and DAST with OWASP ZAP
+- We have a data mesh architecture for analytics with 15 domain-owned data products, each with defined SLAs and data contracts
+- The system supports webhook delivery with at-least-once semantics, configurable retry policies (exponential backoff up to 24h), and delivery status tracking
+- We use OpenTelemetry Collector for telemetry pipeline with custom processors for PII redaction, sampling, and cost-based routing
+- Our caching strategy uses L1 (in-process LRU, 100MB per pod), L2 (Redis Cluster, 500GB), and L3 (CloudFront, 30+ edge locations) with coordinated invalidation
+- We maintain backward compatibility for 3 API versions simultaneously with automated deprecation notices, usage tracking, and migration guides
+- The platform includes a developer portal with API documentation, SDK generation, sandbox environments, and usage analytics
+- We use Temporal for workflow orchestration across 20+ long-running business processes including order fulfillment, payment processing, and user onboarding
+- Our ML platform serves 50+ models in production with A/B testing, shadow mode deployment, and automated retraining pipelines
+- The search infrastructure uses Elasticsearch clusters with 500M+ documents, custom analyzers, and learning-to-rank models
+- We have a notification system that delivers 10M+ messages daily across email, push, SMS, and in-app channels with template management and delivery optimization
+- The billing system processes $50M+ in monthly transactions with Stripe integration, usage-based billing, and revenue recognition
+- We use Crossplane for provisioning cloud resources as Kubernetes custom resources with drift detection and reconciliation
+- Our edge computing layer uses Cloudflare Workers for geo-routing, A/B test assignment, and personalization at the edge
+- The platform includes a custom query builder for internal dashboards that generates optimized SQL for ClickHouse and PostgreSQL
+- We maintain a shared protobuf definition repository with 500+ message types, automated code generation for 6 languages, and breaking change detection`
+
+const TURN_PROMPTS = [
+  'Give a brief one-sentence answer: What is the single most important principle when designing distributed systems?',
+  'Give a brief one-sentence answer: What is the biggest mistake teams make when adopting microservices?',
+  'Give a brief one-sentence answer: When should you choose eventual consistency over strong consistency?',
+  'Give a brief one-sentence answer: What is the most underrated database optimization technique?',
+  'Give a brief one-sentence answer: What is the best approach to handle cascading failures in a microservice architecture?',
+  'Give a brief one-sentence answer: When is it better to use gRPC over REST?',
+  'Give a brief one-sentence answer: What is the most effective caching strategy for a read-heavy workload?',
+  'Give a brief one-sentence answer: What is the key to successful trunk-based development at scale?',
+  'Give a brief one-sentence answer: What metric best predicts production reliability?',
+  'Give a brief one-sentence answer: What is the most important thing to get right in an observability stack?',
+]
+
+interface ConversationMessage {
+  role: string
+  content: string
+}
+
+interface TurnResult {
+  label: string
+  usage: Record<string, unknown> | null
+  elapsedMs: number
+  outputTokens: number
+  ttftMs?: number
+  outputTokensPerSec?: number
+  responseContent: string
+}
+
+async function makeConversationStreamRequest(
+  label: string,
+  apiKey: string,
+  conversationMessages: ConversationMessage[],
+): Promise<TurnResult> {
+  console.log(`── ${label} (streaming) ──`)
+  const startTime = Date.now()
+  let ttftMs: number | undefined
+
+  const response = await fetch(`${SILICONFLOW_BASE_URL}/chat/completions`, {
+    method: 'POST',
+    headers: {
+      Authorization: `Bearer ${apiKey}`,
+      'Content-Type': 'application/json',
+    },
+    body: JSON.stringify({
+      model: SILICONFLOW_MODEL,
+      messages: conversationMessages,
+      max_tokens: MAX_TOKENS,
+      stream: true,
+      stream_options: { include_usage: true },
+    }),
+  })
+
+  if (!response.ok) {
+    const errorText = await response.text()
+    console.error(`❌ SiliconFlow streaming API returned ${response.status}: ${errorText}`)
+    return { label, usage: null, elapsedMs: Date.now() - startTime, outputTokens: 0, responseContent: '' }
+  }
+
+  const reader = response.body?.getReader()
+  if (!reader) {
+    console.error('❌ No response body reader')
+    return { label, usage: null, elapsedMs: Date.now() - startTime, outputTokens: 0, responseContent: '' }
+  }
+
+  const decoder = new TextDecoder()
+  let streamContent = ''
+  let chunkCount = 0
+  let streamUsage: Record<string, unknown> | null = null
+  let firstContentChunkTime: number | undefined
+
+  let done = false
+  while (!done) {
+    const result = await reader.read()
+    done = result.done
+    if (done) break
+
+    const text = decoder.decode(result.value, { stream: true })
+    const lines = text.split('\n').filter((l) => l.startsWith('data: '))
+
+    for (const line of lines) {
+      const raw = line.slice('data: '.length)
+      if (raw === '[DONE]') continue
+
+      try {
+        const chunk = JSON.parse(raw)
+        chunkCount++
+        const delta = chunk.choices?.[0]?.delta
+        if (delta?.content) {
+          if (firstContentChunkTime === undefined) {
+            firstContentChunkTime = Date.now()
+            ttftMs = firstContentChunkTime - startTime
+          }
+          streamContent += delta.content
+        }
+        if (chunk.usage) streamUsage = chunk.usage
+      } catch {
+        // skip non-JSON lines
+      }
+    }
+  }
+
+  const elapsedMs = Date.now() - startTime
+  const outputTokens = streamUsage && typeof streamUsage.completion_tokens === 'number'
+    ? streamUsage.completion_tokens
+    : 0
+
+  const generationTimeMs = firstContentChunkTime !== undefined
+    ? Date.now() - firstContentChunkTime
+    : elapsedMs
+  const outputTokensPerSec = generationTimeMs > 0
+    ? (outputTokens / (generationTimeMs / 1000))
+    : 0
+
+  // Print compact per-turn stats
+  const inputTokens = streamUsage && typeof streamUsage.prompt_tokens === 'number' ? streamUsage.prompt_tokens : 0
+  const promptDetails = streamUsage?.prompt_tokens_details as Record<string, unknown> | undefined
+  const cachedTokens = typeof promptDetails?.cached_tokens === 'number' ? promptDetails.cached_tokens : 0
+  const cacheRate = inputTokens > 0 ? ((cachedTokens / inputTokens) * 100).toFixed(1) : '0.0'
+  const cost = streamUsage ? `$${computeCost(streamUsage).cost.toFixed(6)}` : 'err'
+
+  console.log(`   ✅ ${(elapsedMs / 1000).toFixed(2)}s | TTFT ${ttftMs !== undefined ? (ttftMs / 1000).toFixed(2) + 's' : 'n/a'} | ${inputTokens} in (${cachedTokens} cached, ${cacheRate}%) | ${outputTokens} out @ ${outputTokensPerSec.toFixed(1)} tok/s | ${cost}`)
+  console.log(`   Response: ${streamContent.slice(0, 150)}${streamContent.length > 150 ? '...' : ''}`)
+  console.log()
+
+  return { label, usage: streamUsage, elapsedMs, outputTokens, ttftMs, outputTokensPerSec, responseContent: streamContent }
+}
+
+async function main() {
+  const apiKey = process.env.SILICONFLOW_API_KEY
+  if (!apiKey) {
+    console.error('❌ SILICONFLOW_API_KEY is not set. Add it to .env.local or pass it directly.')
+    process.exit(1)
+  }
+
+  console.log('🧪 SiliconFlow 10-Turn Conversation Caching Test')
+  console.log('='.repeat(60))
+  console.log(`Model:       ${SILICONFLOW_MODEL}`)
+  console.log(`Base URL:    ${SILICONFLOW_BASE_URL}`)
+  console.log(`Max tokens:  ${MAX_TOKENS} (low output per turn)`)
+  console.log(`Turns:       ${TURN_PROMPTS.length}`)
+  console.log(`Pricing:     $0.30/M input, $0.03/M cached, $1.20/M output`)
+  console.log('='.repeat(60))
+  console.log()
+
+  const conversationHistory: ConversationMessage[] = [
+    { role: 'system', content: SYSTEM_PROMPT },
+  ]
+
+  const results: TurnResult[] = []
+
+  for (let i = 0; i < TURN_PROMPTS.length; i++) {
+    conversationHistory.push({ role: 'user', content: TURN_PROMPTS[i] })
+
+    const label = `Turn ${i + 1}/${TURN_PROMPTS.length}${i === 0 ? ' (cold)' : ''}`
+    const result = await makeConversationStreamRequest(label, apiKey, [...conversationHistory])
+    results.push(result)
+
+    if (result.responseContent) {
+      conversationHistory.push({ role: 'assistant', content: result.responseContent })
+    }
+  }
+
+  // ── Summary table ──
+  console.log('━'.repeat(120))
+  console.log('SUMMARY')
+  console.log('━'.repeat(120))
+  console.log()
+
+  console.log('   Turn | Time     | TTFT    | Input  | Cached | Cache%  | Output | tok/s  | e2e t/s | Cost')
+  console.log('   ' + '-'.repeat(110))
+
+  let totalCost = 0
+  let totalInputTokens = 0
+  let totalCachedTokens = 0
+  let totalOutputTokens = 0
+  let totalElapsedMs = 0
+
+  for (const r of results) {
+    const time = `${(r.elapsedMs / 1000).toFixed(2)}s`
+    const ttft = r.ttftMs !== undefined ? `${(r.ttftMs / 1000).toFixed(2)}s` : 'n/a'
+    const tokSec = r.outputTokensPerSec !== undefined ? r.outputTokensPerSec.toFixed(1) : 'n/a'
+    const e2eTokSec = r.elapsedMs > 0 ? (r.outputTokens / (r.elapsedMs / 1000)).toFixed(1) : 'n/a'
+    const cost = r.usage ? computeCost(r.usage).cost : 0
+    const costStr = r.usage ? `$${cost.toFixed(6)}` : 'err'
+
+    const inputTokens = r.usage && typeof r.usage.prompt_tokens === 'number' ? r.usage.prompt_tokens : 0
+    const promptDetails = r.usage?.prompt_tokens_details as Record<string, unknown> | undefined
+    const cachedTokens = typeof promptDetails?.cached_tokens === 'number' ? promptDetails.cached_tokens : 0
+    const cacheRate = inputTokens > 0 ? `${((cachedTokens / inputTokens) * 100).toFixed(1)}%` : '0.0%'
+
+    totalCost += cost
+    totalInputTokens += inputTokens
+    totalCachedTokens += cachedTokens
+    totalOutputTokens += r.outputTokens
+    totalElapsedMs += r.elapsedMs
+
+    console.log(
+      `   ${r.label.padEnd(4).slice(0, 25).padEnd(25)} | ${time.padStart(8)} | ${ttft.padStart(7)} | ${String(inputTokens).padStart(6)} | ${String(cachedTokens).padStart(6)} | ${cacheRate.padStart(7)} | ${String(r.outputTokens).padStart(6)} | ${tokSec.padStart(6)} | ${e2eTokSec.padStart(7)} | ${costStr}`,
+    )
+  }
+
+  console.log('   ' + '-'.repeat(110))
+
+  const overallCacheRate = totalInputTokens > 0 ? ((totalCachedTokens / totalInputTokens) * 100).toFixed(1) : '0.0'
+  const totalTimeStr = `${(totalElapsedMs / 1000).toFixed(2)}s`
+  const overallTokSec = totalElapsedMs > 0 ? (totalOutputTokens / (totalElapsedMs / 1000)).toFixed(1) : 'n/a'
+  console.log(`   ${'TOTAL'.padEnd(25)} | ${totalTimeStr.padStart(8)} |         | ${String(totalInputTokens).padStart(6)} | ${String(totalCachedTokens).padStart(6)} | ${(overallCacheRate + '%').padStart(7)} | ${String(totalOutputTokens).padStart(6)} |        | ${overallTokSec.padStart(7)} | $${totalCost.toFixed(6)}`)
+  console.log()
+
+  // ── Cost analysis ──
+  console.log('━'.repeat(120))
+  console.log('COST ANALYSIS')
+  console.log('━'.repeat(120))
+  console.log()
+
+  // What would the cost be without caching?
+  const costWithoutCaching = totalInputTokens * INPUT_COST_PER_TOKEN + totalOutputTokens * OUTPUT_COST_PER_TOKEN
+  const savings = costWithoutCaching - totalCost
+  const savingsPercent = costWithoutCaching > 0 ? ((savings / costWithoutCaching) * 100).toFixed(1) : '0.0'
+
+  console.log(`   Total cost (actual):        $${totalCost.toFixed(6)}`)
+  console.log(`   Total cost (no caching):    $${costWithoutCaching.toFixed(6)}`)
+  console.log(`   Savings from caching:       $${savings.toFixed(6)} (${savingsPercent}%)`)
+  console.log()
+  console.log(`   Total input tokens:         ${totalInputTokens}`)
+  console.log(`   Total cached tokens:        ${totalCachedTokens}`)
+  console.log(`   Overall cache hit rate:     ${overallCacheRate}%`)
+  console.log(`   Total output tokens:        ${totalOutputTokens}`)
+  console.log()
+
+  // TTFT analysis
+  const ttfts = results.filter((r) => r.ttftMs !== undefined).map((r) => r.ttftMs!)
+  if (ttfts.length > 0) {
+    const avgTtft = ttfts.reduce((a, b) => a + b, 0) / ttfts.length
+    const minTtft = Math.min(...ttfts)
+    const maxTtft = Math.max(...ttfts)
+    console.log(`   TTFT — avg: ${(avgTtft / 1000).toFixed(2)}s, min: ${(minTtft / 1000).toFixed(2)}s, max: ${(maxTtft / 1000).toFixed(2)}s`)
+
+    if (results[0].ttftMs !== undefined && ttfts.length > 1) {
+      const coldTtft = results[0].ttftMs
+      const warmTtfts = ttfts.slice(1)
+      const avgWarmTtft = warmTtfts.reduce((a, b) => a + b, 0) / warmTtfts.length
+      console.log(`   TTFT — cold (turn 1): ${(coldTtft / 1000).toFixed(2)}s, avg warm (turns 2-${TURN_PROMPTS.length}): ${(avgWarmTtft / 1000).toFixed(2)}s`)
+      if (avgWarmTtft < coldTtft) {
+        console.log(`   ✅ Warm TTFT is ${((1 - avgWarmTtft / coldTtft) * 100).toFixed(1)}% faster than cold TTFT`)
+      }
+    }
+  }
+
+  console.log()
+  console.log('Done!')
+}
+
+main()
diff --git a/web/src/app/api/v1/chat/completions/_post.ts b/web/src/app/api/v1/chat/completions/_post.ts
index d236125bcb..b886a3d838 100644
--- a/web/src/app/api/v1/chat/completions/_post.ts
+++ b/web/src/app/api/v1/chat/completions/_post.ts
@@ -47,6 +47,12 @@ import {
   handleFireworksStream,
   isFireworksModel,
 } from '@/llm-api/fireworks'
+import {
+  SiliconFlowError,
+  handleSiliconFlowNonStream,
+  handleSiliconFlowStream,
+  isSiliconFlowModel,
+} from '@/llm-api/siliconflow'
 import {
   handleOpenAINonStream,
   OPENAI_SUPPORTED_MODELS,
@@ -360,10 +366,22 @@ export async function postChatCompletions(params: {
     // Handle streaming vs non-streaming
     try {
       if (bodyStream) {
-        // Streaming request — route to CanopyWave/Fireworks for supported models
-        const useCanopyWave = isCanopyWaveModel(typedBody.model)
-        const useFireworks = !useCanopyWave && isFireworksModel(typedBody.model)
-        const stream = useCanopyWave
+        // Streaming request — route to SiliconFlow/CanopyWave/Fireworks for supported models
+        // CanopyWave and Fireworks TEMPORARILY DISABLED: route through OpenRouter
+        const useSiliconFlow = isSiliconFlowModel(typedBody.model)
+        const useCanopyWave = false // isCanopyWaveModel(typedBody.model)
+        const useFireworks = false // isFireworksModel(typedBody.model)
+        const stream = useSiliconFlow
+          ? await handleSiliconFlowStream({
+              body: typedBody,
+              userId,
+              stripeCustomerId,
+              agentId,
+              fetch,
+              logger,
+              insertMessageBigquery,
+            })
+          : useCanopyWave
           ? await handleCanopyWaveStream({
               body: typedBody,
               userId,
@@ -413,10 +431,12 @@ export async function postChatCompletions(params: {
           },
         })
       } else {
-        // Non-streaming request — route to CanopyWave/Fireworks for supported models
+        // Non-streaming request — route to SiliconFlow/CanopyWave/Fireworks for supported models
+        // CanopyWave and Fireworks TEMPORARILY DISABLED: route through OpenRouter
         const model = typedBody.model
-        const useCanopyWave = isCanopyWaveModel(model)
-        const useFireworks = !useCanopyWave && isFireworksModel(model)
+        const useSiliconFlow = isSiliconFlowModel(model)
+        const useCanopyWave = false // isCanopyWaveModel(model)
+        const useFireworks = false // isFireworksModel(model)
         const modelParts = model.split('/')
         const shortModelName = modelParts.length > 1 ? modelParts[1] : model
         const isOpenAIDirectModel =
@@ -427,7 +447,17 @@ export async function postChatCompletions(params: {
         const shouldUseOpenAIEndpoint =
           isOpenAIDirectModel && typedBody.codebuff_metadata?.n !== undefined
 
-        const nonStreamRequest = useCanopyWave
+        const nonStreamRequest = useSiliconFlow
+          ? handleSiliconFlowNonStream({
+              body: typedBody,
+              userId,
+              stripeCustomerId,
+              agentId,
+              fetch,
+              logger,
+              insertMessageBigquery,
+            })
+          : useCanopyWave
           ? handleCanopyWaveNonStream({
               body: typedBody,
               userId,
@@ -495,10 +525,14 @@ export async function postChatCompletions(params: {
       if (error instanceof CanopyWaveError) {
         canopywaveError = error
       }
+      let siliconflowError: SiliconFlowError | undefined
+      if (error instanceof SiliconFlowError) {
+        siliconflowError = error
+      }
 
       // Log detailed error information for debugging
       const errorDetails = openrouterError?.toJSON()
-      const providerLabel = canopywaveError ? 'CanopyWave' : fireworksError ? 'Fireworks' : 'OpenRouter'
+      const providerLabel = siliconflowError ? 'SiliconFlow' : canopywaveError ? 'CanopyWave' : fireworksError ? 'Fireworks' : 'OpenRouter'
       logger.error(
         {
           error: getErrorObject(error),
@@ -512,8 +546,8 @@ export async function postChatCompletions(params: {
             ? typedBody.messages.length
             : 0,
           messages: typedBody.messages,
-          providerStatusCode: (openrouterError ?? fireworksError ?? canopywaveError)?.statusCode,
-          providerStatusText: (openrouterError ?? fireworksError ?? canopywaveError)?.statusText,
+          providerStatusCode: (openrouterError ?? fireworksError ?? canopywaveError ?? siliconflowError)?.statusCode,
+          providerStatusText: (openrouterError ?? fireworksError ?? canopywaveError ?? siliconflowError)?.statusText,
           openrouterErrorCode: errorDetails?.error?.code,
           openrouterErrorType: errorDetails?.error?.type,
           openrouterErrorMessage: errorDetails?.error?.message,
@@ -544,6 +578,9 @@ export async function postChatCompletions(params: {
       if (error instanceof CanopyWaveError) {
         return NextResponse.json(error.toJSON(), { status: error.statusCode })
       }
+      if (error instanceof SiliconFlowError) {
+        return NextResponse.json(error.toJSON(), { status: error.statusCode })
+      }
 
       return NextResponse.json(
         { error: 'Failed to process request' },
diff --git a/web/src/llm-api/siliconflow.ts b/web/src/llm-api/siliconflow.ts
new file mode 100644
index 0000000000..1146bbe3df
--- /dev/null
+++ b/web/src/llm-api/siliconflow.ts
@@ -0,0 +1,621 @@
+import { Agent } from 'undici'
+
+import { PROFIT_MARGIN } from '@codebuff/common/constants/limits'
+import { getErrorObject } from '@codebuff/common/util/error'
+import { env } from '@codebuff/internal/env'
+
+import {
+  consumeCreditsForMessage,
+  extractRequestMetadata,
+  insertMessageToBigQuery,
+} from './helpers'
+
+import type { UsageData } from './helpers'
+import type { InsertMessageBigqueryFn } from '@codebuff/common/types/contracts/bigquery'
+import type { Logger } from '@codebuff/common/types/contracts/logger'
+import type { ChatCompletionRequestBody } from './types'
+
+const SILICONFLOW_BASE_URL = 'https://api.siliconflow.com/v1'
+
+// Extended timeout for deep-thinking models that can take
+// a long time to start streaming.
+const SILICONFLOW_HEADERS_TIMEOUT_MS = 10 * 60 * 1000
+
+const siliconflowAgent = new Agent({
+  headersTimeout: SILICONFLOW_HEADERS_TIMEOUT_MS,
+  bodyTimeout: 0,
+})
+
+/** Map from OpenRouter model IDs to SiliconFlow model IDs */
+const SILICONFLOW_MODEL_MAP: Record<string, string> = {
+  'minimax/minimax-m2.5': 'MiniMaxAI/MiniMax-M2.5',
+}
+
+export function isSiliconFlowModel(model: string): boolean {
+  return model in SILICONFLOW_MODEL_MAP
+}
+
+function getSiliconFlowModelId(openrouterModel: string): string {
+  return SILICONFLOW_MODEL_MAP[openrouterModel] ?? openrouterModel
+}
+
+type StreamState = { responseText: string; reasoningText: string; billedAlready: boolean }
+
+type LineResult = {
+  state: StreamState
+  billedCredits?: number
+  patchedLine: string
+}
+
+function createSiliconFlowRequest(params: {
+  body: ChatCompletionRequestBody
+  originalModel: string
+  fetch: typeof globalThis.fetch
+}) {
+  const { body, originalModel, fetch } = params
+  const siliconflowBody: Record<string, unknown> = {
+    ...body,
+    model: getSiliconFlowModelId(originalModel),
+  }
+
+  // Strip OpenRouter-specific / internal fields
+  delete siliconflowBody.provider
+  delete siliconflowBody.transforms
+  delete siliconflowBody.codebuff_metadata
+  delete siliconflowBody.usage
+
+  // For streaming, request usage in the final chunk
+  if (siliconflowBody.stream) {
+    siliconflowBody.stream_options = { include_usage: true }
+  }
+
+  if (!env.SILICONFLOW_API_KEY) {
+    throw new Error('SILICONFLOW_API_KEY is not configured')
+  }
+
+  return fetch(`${SILICONFLOW_BASE_URL}/chat/completions`, {
+    method: 'POST',
+    headers: {
+      Authorization: `Bearer ${env.SILICONFLOW_API_KEY}`,
+      'Content-Type': 'application/json',
+    },
+    body: JSON.stringify(siliconflowBody),
+    // @ts-expect-error - dispatcher is a valid undici option not in fetch types
+    dispatcher: siliconflowAgent,
+  })
+}
+
+// SiliconFlow per-token pricing (dollars per token) for MiniMax M2.5
+// https://siliconflow.com/pricing — $0.30/M input, $1.20/M output
+const SILICONFLOW_INPUT_COST_PER_TOKEN = 0.30 / 1_000_000
+const SILICONFLOW_CACHED_INPUT_COST_PER_TOKEN = 0.03 / 1_000_000
+const SILICONFLOW_OUTPUT_COST_PER_TOKEN = 1.20 / 1_000_000
+
+function extractUsageAndCost(usage: Record<string, unknown> | undefined | null): UsageData {
+  if (!usage) return { inputTokens: 0, outputTokens: 0, cacheReadInputTokens: 0, reasoningTokens: 0, cost: 0 }
+  const promptDetails = usage.prompt_tokens_details as Record<string, unknown> | undefined | null
+  const completionDetails = usage.completion_tokens_details as Record<string, unknown> | undefined | null
+
+  const inputTokens = typeof usage.prompt_tokens === 'number' ? usage.prompt_tokens : 0
+  const outputTokens = typeof usage.completion_tokens === 'number' ? usage.completion_tokens : 0
+  const cacheReadInputTokens = typeof promptDetails?.cached_tokens === 'number' ? promptDetails.cached_tokens : 0
+  const reasoningTokens = typeof completionDetails?.reasoning_tokens === 'number' ? completionDetails.reasoning_tokens : 0
+
+  const nonCachedInputTokens = Math.max(0, inputTokens - cacheReadInputTokens)
+  const cost =
+    nonCachedInputTokens * SILICONFLOW_INPUT_COST_PER_TOKEN +
+    cacheReadInputTokens * SILICONFLOW_CACHED_INPUT_COST_PER_TOKEN +
+    outputTokens * SILICONFLOW_OUTPUT_COST_PER_TOKEN
+
+  return { inputTokens, outputTokens, cacheReadInputTokens, reasoningTokens, cost }
+}
+
+export async function handleSiliconFlowNonStream({
+  body,
+  userId,
+  stripeCustomerId,
+  agentId,
+  fetch,
+  logger,
+  insertMessageBigquery,
+}: {
+  body: ChatCompletionRequestBody
+  userId: string
+  stripeCustomerId?: string | null
+  agentId: string
+  fetch: typeof globalThis.fetch
+  logger: Logger
+  insertMessageBigquery: InsertMessageBigqueryFn
+}) {
+  const originalModel = body.model
+  const startTime = new Date()
+  const { clientId, clientRequestId, costMode } = extractRequestMetadata({ body, logger })
+
+  const response = await createSiliconFlowRequest({ body, originalModel, fetch })
+
+  if (!response.ok) {
+    throw await parseSiliconFlowError(response)
+  }
+
+  const data = await response.json()
+  const content = data.choices?.[0]?.message?.content ?? ''
+  const reasoningText = data.choices?.[0]?.message?.reasoning_content ?? data.choices?.[0]?.message?.reasoning ?? ''
+  const usageData = extractUsageAndCost(data.usage)
+
+  insertMessageToBigQuery({
+    messageId: data.id,
+    userId,
+    startTime,
+    request: body,
+    reasoningText,
+    responseText: content,
+    usageData,
+    logger,
+    insertMessageBigquery,
+  }).catch((error) => {
+    logger.error({ error }, 'Failed to insert message into BigQuery')
+  })
+
+  const billedCredits = await consumeCreditsForMessage({
+    messageId: data.id,
+    userId,
+    stripeCustomerId,
+    agentId,
+    clientId,
+    clientRequestId,
+    startTime,
+    model: originalModel,
+    reasoningText,
+    responseText: content,
+    usageData,
+    byok: false,
+    logger,
+    costMode,
+  })
+
+  // Overwrite cost so SDK calculates exact credits we charged
+  if (data.usage) {
+    data.usage.cost = creditsToFakeCost(billedCredits)
+    data.usage.cost_details = { upstream_inference_cost: 0 }
+  }
+
+  // Normalise model name back to OpenRouter format for client compatibility
+  data.model = originalModel
+  if (!data.provider) data.provider = 'SiliconFlow'
+
+  return data
+}
+
+export async function handleSiliconFlowStream({
+  body,
+  userId,
+  stripeCustomerId,
+  agentId,
+  fetch,
+  logger,
+  insertMessageBigquery,
+}: {
+  body: ChatCompletionRequestBody
+  userId: string
+  stripeCustomerId?: string | null
+  agentId: string
+  fetch: typeof globalThis.fetch
+  logger: Logger
+  insertMessageBigquery: InsertMessageBigqueryFn
+}) {
+  const originalModel = body.model
+  const startTime = new Date()
+  const { clientId, clientRequestId, costMode } = extractRequestMetadata({ body, logger })
+
+  const response = await createSiliconFlowRequest({ body, originalModel, fetch })
+
+  if (!response.ok) {
+    throw await parseSiliconFlowError(response)
+  }
+
+  const reader = response.body?.getReader()
+  if (!reader) {
+    throw new Error('Failed to get response reader')
+  }
+
+  let heartbeatInterval: NodeJS.Timeout
+  let state: StreamState = { responseText: '', reasoningText: '', billedAlready: false }
+  let clientDisconnected = false
+
+  const stream = new ReadableStream({
+    async start(controller) {
+      const decoder = new TextDecoder()
+      let buffer = ''
+
+      controller.enqueue(
+        new TextEncoder().encode(`: connected ${new Date().toISOString()}\n`),
+      )
+
+      heartbeatInterval = setInterval(() => {
+        if (!clientDisconnected) {
+          try {
+            controller.enqueue(
+              new TextEncoder().encode(
+                `: heartbeat ${new Date().toISOString()}\n\n`,
+              ),
+            )
+          } catch {
+            // client disconnected
+          }
+        }
+      }, 30000)
+
+      try {
+        let done = false
+        while (!done) {
+          const result = await reader.read()
+          done = result.done
+          const value = result.value
+
+          if (done) break
+
+          buffer += decoder.decode(value, { stream: true })
+          let lineEnd = buffer.indexOf('\n')
+
+          while (lineEnd !== -1) {
+            const line = buffer.slice(0, lineEnd + 1)
+            buffer = buffer.slice(lineEnd + 1)
+
+            const lineResult = await handleLine({
+              userId,
+              stripeCustomerId,
+              agentId,
+              clientId,
+              clientRequestId,
+              costMode,
+              startTime,
+              request: body,
+              originalModel,
+              line,
+              state,
+              logger,
+              insertMessage: insertMessageBigquery,
+            })
+            state = lineResult.state
+
+            if (!clientDisconnected) {
+              try {
+                controller.enqueue(new TextEncoder().encode(lineResult.patchedLine))
+              } catch {
+                logger.warn('Client disconnected during stream, continuing for billing')
+                clientDisconnected = true
+              }
+            }
+
+            lineEnd = buffer.indexOf('\n')
+          }
+        }
+
+        if (!clientDisconnected) {
+          controller.close()
+        }
+      } catch (error) {
+        if (!clientDisconnected) {
+          controller.error(error)
+        } else {
+          logger.warn(
+            getErrorObject(error),
+            'Error after client disconnect in SiliconFlow stream',
+          )
+        }
+      } finally {
+        clearInterval(heartbeatInterval)
+      }
+    },
+    cancel() {
+      clearInterval(heartbeatInterval)
+      clientDisconnected = true
+      logger.warn(
+        {
+          clientDisconnected,
+          responseTextLength: state.responseText.length,
+          reasoningTextLength: state.reasoningText.length,
+        },
+        'Client cancelled stream, continuing SiliconFlow consumption for billing',
+      )
+    },
+  })
+
+  return stream
+}
+
+async function handleLine({
+  userId,
+  stripeCustomerId,
+  agentId,
+  clientId,
+  clientRequestId,
+  costMode,
+  startTime,
+  request,
+  originalModel,
+  line,
+  state,
+  logger,
+  insertMessage,
+}: {
+  userId: string
+  stripeCustomerId?: string | null
+  agentId: string
+  clientId: string | null
+  clientRequestId: string | null
+  costMode: string | undefined
+  startTime: Date
+  request: unknown
+  originalModel: string
+  line: string
+  state: StreamState
+  logger: Logger
+  insertMessage: InsertMessageBigqueryFn
+}): Promise<LineResult> {
+  if (!line.startsWith('data: ')) {
+    return { state, patchedLine: line }
+  }
+
+  const raw = line.slice('data: '.length)
+  if (raw === '[DONE]\n' || raw === '[DONE]') {
+    return { state, patchedLine: line }
+  }
+
+  let obj: Record<string, unknown>
+  try {
+    obj = JSON.parse(raw)
+  } catch (error) {
+    logger.warn(
+      { error: getErrorObject(error, { includeRawError: true }) },
+      'Received non-JSON SiliconFlow response',
+    )
+    return { state, patchedLine: line }
+  }
+
+  // Patch model and provider for SDK compatibility
+  if (obj.model) obj.model = originalModel
+  if (!obj.provider) obj.provider = 'SiliconFlow'
+
+  // Process the chunk for billing / state tracking
+  const result = await handleResponse({
+    userId,
+    stripeCustomerId,
+    agentId,
+    clientId,
+    clientRequestId,
+    costMode,
+    startTime,
+    request,
+    originalModel,
+    data: obj,
+    state,
+    logger,
+    insertMessage,
+  })
+
+  // If this is the final chunk with billing, overwrite cost in the patched object
+  if (result.billedCredits !== undefined && obj.usage) {
+    const usage = obj.usage as Record<string, unknown>
+    usage.cost = creditsToFakeCost(result.billedCredits)
+    usage.cost_details = { upstream_inference_cost: 0 }
+  }
+
+  const patchedLine = `data: ${JSON.stringify(obj)}\n`
+  return { state: result.state, billedCredits: result.billedCredits, patchedLine }
+}
+
+function isFinalChunk(data: Record<string, unknown>): boolean {
+  const choices = data.choices as Array<Record<string, unknown>> | undefined
+  if (!choices || choices.length === 0) return true
+  return choices.some(c => c.finish_reason != null)
+}
+
+async function handleResponse({
+  userId,
+  stripeCustomerId,
+  agentId,
+  clientId,
+  clientRequestId,
+  costMode,
+  startTime,
+  request,
+  originalModel,
+  data,
+  state,
+  logger,
+  insertMessage,
+}: {
+  userId: string
+  stripeCustomerId?: string | null
+  agentId: string
+  clientId: string | null
+  clientRequestId: string | null
+  costMode: string | undefined
+  startTime: Date
+  request: unknown
+  originalModel: string
+  data: Record<string, unknown>
+  state: StreamState
+  logger: Logger
+  insertMessage: InsertMessageBigqueryFn
+}): Promise<{ state: StreamState; billedCredits?: number }> {
+  state = handleStreamChunk({ data, state, logger, userId, agentId, model: originalModel })
+
+  // Some providers send cumulative usage on EVERY chunk (not just the final one),
+  // so we must only bill once on the final chunk to avoid charging N times.
+  if ('error' in data || !data.usage || state.billedAlready || !isFinalChunk(data)) {
+    // Strip usage from non-final chunks and duplicate final chunks
+    // so the SDK doesn't see multiple usage objects
+    if (data.usage && (!isFinalChunk(data) || state.billedAlready)) {
+      delete data.usage
+    }
+    return { state }
+  }
+
+  const usageData = extractUsageAndCost(data.usage as Record<string, unknown>)
+  const messageId = typeof data.id === 'string' ? data.id : 'unknown'
+
+  state.billedAlready = true
+
+  insertMessageToBigQuery({
+    messageId,
+    userId,
+    startTime,
+    request,
+    reasoningText: state.reasoningText,
+    responseText: state.responseText,
+    usageData,
+    logger,
+    insertMessageBigquery: insertMessage,
+  }).catch((error) => {
+    logger.error({ error }, 'Failed to insert message into BigQuery')
+  })
+
+  const billedCredits = await consumeCreditsForMessage({
+    messageId,
+    userId,
+    stripeCustomerId,
+    agentId,
+    clientId,
+    clientRequestId,
+    startTime,
+    model: originalModel,
+    reasoningText: state.reasoningText,
+    responseText: state.responseText,
+    usageData,
+    byok: false,
+    logger,
+    costMode,
+  })
+
+  return { state, billedCredits }
+}
+
+function handleStreamChunk({
+  data,
+  state,
+  logger,
+  userId,
+  agentId,
+  model,
+}: {
+  data: Record<string, unknown>
+  state: StreamState
+  logger: Logger
+  userId: string
+  agentId: string
+  model: string
+}): StreamState {
+  const MAX_BUFFER_SIZE = 1 * 1024 * 1024
+
+  if ('error' in data) {
+    const errorData = data.error as Record<string, unknown>
+    logger.error(
+      {
+        userId,
+        agentId,
+        model,
+        errorCode: errorData?.code,
+        errorType: errorData?.type,
+        errorMessage: errorData?.message,
+      },
+      'Received error chunk in SiliconFlow stream',
+    )
+    return state
+  }
+
+  const choices = data.choices as Array<Record<string, unknown>> | undefined
+  if (!choices?.length) {
+    return state
+  }
+  const choice = choices[0]
+  const delta = choice.delta as Record<string, unknown> | undefined
+
+  const contentDelta = typeof delta?.content === 'string' ? delta.content : ''
+  if (state.responseText.length < MAX_BUFFER_SIZE) {
+    state.responseText += contentDelta
+    if (state.responseText.length >= MAX_BUFFER_SIZE) {
+      state.responseText =
+        state.responseText.slice(0, MAX_BUFFER_SIZE) + '\n---[TRUNCATED]---'
+      logger.warn({ userId, agentId, model }, 'Response text buffer truncated at 1MB')
+    }
+  }
+
+  const reasoningDelta = typeof delta?.reasoning_content === 'string' ? delta.reasoning_content
+    : typeof delta?.reasoning === 'string' ? delta.reasoning
+    : ''
+  if (state.reasoningText.length < MAX_BUFFER_SIZE) {
+    state.reasoningText += reasoningDelta
+    if (state.reasoningText.length >= MAX_BUFFER_SIZE) {
+      state.reasoningText =
+        state.reasoningText.slice(0, MAX_BUFFER_SIZE) + '\n---[TRUNCATED]---'
+      logger.warn({ userId, agentId, model }, 'Reasoning text buffer truncated at 1MB')
+    }
+  }
+
+  return state
+}
+
+export class SiliconFlowError extends Error {
+  constructor(
+    public readonly statusCode: number,
+    public readonly statusText: string,
+    public readonly errorBody: {
+      error: {
+        message: string
+        code: string | number | null
+        type?: string | null
+      }
+    },
+  ) {
+    super(errorBody.error.message)
+    this.name = 'SiliconFlowError'
+  }
+
+  toJSON() {
+    return {
+      error: {
+        message: this.errorBody.error.message,
+        code: this.errorBody.error.code,
+        type: this.errorBody.error.type,
+      },
+    }
+  }
+}
+
+async function parseSiliconFlowError(response: Response): Promise<SiliconFlowError> {
+  const errorText = await response.text()
+  let errorBody: SiliconFlowError['errorBody']
+  try {
+    const parsed = JSON.parse(errorText)
+    if (parsed?.error?.message) {
+      errorBody = {
+        error: {
+          message: parsed.error.message,
+          code: parsed.error.code ?? null,
+          type: parsed.error.type ?? null,
+        },
+      }
+    } else {
+      errorBody = {
+        error: {
+          message: errorText || response.statusText,
+          code: response.status,
+        },
+      }
+    }
+  } catch {
+    errorBody = {
+      error: {
+        message: errorText || response.statusText,
+        code: response.status,
+      },
+    }
+  }
+  return new SiliconFlowError(response.status, response.statusText, errorBody)
+}
+
+function creditsToFakeCost(credits: number): number {
+  return credits / ((1 + PROFIT_MARGIN) * 100)
+}

From 2f3b772f48f1484bf655046ec2e2180c6e5565c4 Mon Sep 17 00:00:00 2001
From: James Grugett <jahooma@gmail.com>
Date: Tue, 10 Mar 2026 17:16:39 -0700
Subject: [PATCH 08/16] Route minimax through siliconflow of openrouter for now

---
 agents/base2/base2.ts                        |  1 -
 web/src/app/api/v1/chat/completions/_post.ts | 18 ++++++++++++++----
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts
index 8735d0579b..4a3c40064f 100644
--- a/agents/base2/base2.ts
+++ b/agents/base2/base2.ts
@@ -30,7 +30,6 @@ export function createBase2(
     publisher,
     model: isFree ? 'minimax/minimax-m2.5' : 'anthropic/claude-opus-4.6',
     providerOptions: isFree ? {
-      only: ['siliconflow/fp8'],
       data_collection: 'deny',
     } : {
       only: ['amazon-bedrock'],
diff --git a/web/src/app/api/v1/chat/completions/_post.ts b/web/src/app/api/v1/chat/completions/_post.ts
index b886a3d838..94df6d7865 100644
--- a/web/src/app/api/v1/chat/completions/_post.ts
+++ b/web/src/app/api/v1/chat/completions/_post.ts
@@ -367,10 +367,15 @@ export async function postChatCompletions(params: {
     try {
       if (bodyStream) {
         // Streaming request — route to SiliconFlow/CanopyWave/Fireworks for supported models
-        // CanopyWave and Fireworks TEMPORARILY DISABLED: route through OpenRouter
-        const useSiliconFlow = isSiliconFlowModel(typedBody.model)
+        // SiliconFlow, CanopyWave, and Fireworks TEMPORARILY DISABLED: route through OpenRouter
+        const useSiliconFlow = false // isSiliconFlowModel(typedBody.model)
         const useCanopyWave = false // isCanopyWaveModel(typedBody.model)
         const useFireworks = false // isFireworksModel(typedBody.model)
+
+        // Route minimax models through OpenRouter via SiliconFlow provider
+        if (isSiliconFlowModel(typedBody.model)) {
+          typedBody.provider = { ...typedBody.provider, only: ['siliconflow/fp8'] }
+        }
         const stream = useSiliconFlow
           ? await handleSiliconFlowStream({
               body: typedBody,
@@ -432,11 +437,16 @@ export async function postChatCompletions(params: {
         })
       } else {
         // Non-streaming request — route to SiliconFlow/CanopyWave/Fireworks for supported models
-        // CanopyWave and Fireworks TEMPORARILY DISABLED: route through OpenRouter
+        // SiliconFlow, CanopyWave, and Fireworks TEMPORARILY DISABLED: route through OpenRouter
         const model = typedBody.model
-        const useSiliconFlow = isSiliconFlowModel(model)
+        const useSiliconFlow = false // isSiliconFlowModel(model)
         const useCanopyWave = false // isCanopyWaveModel(model)
         const useFireworks = false // isFireworksModel(model)
+
+        // Route minimax models through OpenRouter via SiliconFlow provider
+        if (isSiliconFlowModel(model)) {
+          typedBody.provider = { ...typedBody.provider, only: ['siliconflow/fp8'] }
+        }
         const modelParts = model.split('/')
         const shortModelName = modelParts.length > 1 ? modelParts[1] : model
         const isOpenAIDirectModel =

From 7b921d5fa12ffeec86ba927ffcb7d4c3c411647d Mon Sep 17 00:00:00 2001
From: James Grugett <jahooma@gmail.com>
Date: Tue, 10 Mar 2026 17:37:14 -0700
Subject: [PATCH 09/16] Reenalbe fireworks

---
 web/src/app/api/v1/chat/completions/_post.ts | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/web/src/app/api/v1/chat/completions/_post.ts b/web/src/app/api/v1/chat/completions/_post.ts
index 94df6d7865..ad0eb4f7ad 100644
--- a/web/src/app/api/v1/chat/completions/_post.ts
+++ b/web/src/app/api/v1/chat/completions/_post.ts
@@ -370,12 +370,7 @@ export async function postChatCompletions(params: {
         // SiliconFlow, CanopyWave, and Fireworks TEMPORARILY DISABLED: route through OpenRouter
         const useSiliconFlow = false // isSiliconFlowModel(typedBody.model)
         const useCanopyWave = false // isCanopyWaveModel(typedBody.model)
-        const useFireworks = false // isFireworksModel(typedBody.model)
-
-        // Route minimax models through OpenRouter via SiliconFlow provider
-        if (isSiliconFlowModel(typedBody.model)) {
-          typedBody.provider = { ...typedBody.provider, only: ['siliconflow/fp8'] }
-        }
+        const useFireworks = isFireworksModel(typedBody.model)
         const stream = useSiliconFlow
           ? await handleSiliconFlowStream({
               body: typedBody,
@@ -441,12 +436,7 @@ export async function postChatCompletions(params: {
         const model = typedBody.model
         const useSiliconFlow = false // isSiliconFlowModel(model)
         const useCanopyWave = false // isCanopyWaveModel(model)
-        const useFireworks = false // isFireworksModel(model)
-
-        // Route minimax models through OpenRouter via SiliconFlow provider
-        if (isSiliconFlowModel(model)) {
-          typedBody.provider = { ...typedBody.provider, only: ['siliconflow/fp8'] }
-        }
+        const useFireworks = isFireworksModel(model)
         const modelParts = model.split('/')
         const shortModelName = modelParts.length > 1 ? modelParts[1] : model
         const isOpenAIDirectModel =

From d5246e282260fc7cb196c9903a8baa4af47fce1d Mon Sep 17 00:00:00 2001
From: James Grugett <jahooma@gmail.com>
Date: Tue, 10 Mar 2026 17:47:39 -0700
Subject: [PATCH 10/16] Add install guide to freebuff landing page

---
 freebuff/web/src/app/home-client.tsx | 90 +++++++++++++++++++++++++++-
 1 file changed, 89 insertions(+), 1 deletion(-)

diff --git a/freebuff/web/src/app/home-client.tsx b/freebuff/web/src/app/home-client.tsx
index bcef00bf97..e397fd101c 100644
--- a/freebuff/web/src/app/home-client.tsx
+++ b/freebuff/web/src/app/home-client.tsx
@@ -38,6 +38,85 @@ const faqs = [
   },
 ]
 
+const setupSteps = [
+  {
+    label: 'Open your terminal',
+    description: 'Use any terminal — within VS Code, plain terminal, PowerShell, etc.',
+  },
+  {
+    label: 'Navigate to your project',
+    command: 'cd /path/to/your-repo',
+  },
+  {
+    label: 'Install Freebuff',
+    command: 'npm install -g freebuff',
+  },
+  {
+    label: 'Run Freebuff',
+    command: 'freebuff',
+  },
+]
+
+function SetupGuide() {
+  const [isOpen, setIsOpen] = useState(false)
+
+  return (
+    <div className="max-w-md mx-auto">
+      <button
+        onClick={() => setIsOpen(!isOpen)}
+        aria-expanded={isOpen}
+        className="flex items-center gap-2 mx-auto text-sm text-zinc-400 hover:text-acid-green transition-colors duration-200 cursor-pointer group"
+      >
+        <span>Install guide</span>
+        <motion.span
+          animate={{ rotate: isOpen ? 180 : 0 }}
+          transition={{ duration: 0.25 }}
+        >
+          <ChevronDown className="h-3.5 w-3.5" />
+        </motion.span>
+      </button>
+
+      <AnimatePresence initial={false}>
+        {isOpen && (
+          <motion.div
+            initial={{ height: 0, opacity: 0 }}
+            animate={{ height: 'auto', opacity: 1 }}
+            exit={{ height: 0, opacity: 0 }}
+            transition={{ duration: 0.3, ease: 'easeInOut' }}
+            className="overflow-hidden"
+          >
+            <div className="mt-4 bg-zinc-900/60 border border-zinc-800 rounded-xl p-5 text-left">
+              <ol className="space-y-4">
+                {setupSteps.map((step, i) => (
+                  <li key={i} className="flex gap-3">
+                    <span className="flex-shrink-0 w-6 h-6 rounded-full bg-acid-green/10 border border-acid-green/30 flex items-center justify-center text-xs font-mono text-acid-green">
+                      {i + 1}
+                    </span>
+                    <div className="flex-1 min-w-0">
+                      <p className="text-sm font-medium text-white/90">{step.label}</p>
+                      {'description' in step && step.description && (
+                        <p className="text-xs text-zinc-500 mt-0.5">{step.description}</p>
+                      )}
+                      {'command' in step && step.command && (
+                        <div className="mt-1.5 flex items-center gap-2 bg-zinc-800/60 border border-zinc-700/40 rounded-md px-3 py-1.5 hover:border-acid-green/30 transition-colors duration-200">
+                          <code className="font-mono text-xs text-white/80 flex-1 select-all">
+                            {step.command}
+                          </code>
+                          <CopyButton value={step.command} />
+                        </div>
+                      )}
+                    </div>
+                  </li>
+                ))}
+              </ol>
+            </div>
+          </motion.div>
+        )}
+      </AnimatePresence>
+    </div>
+  )
+}
+
 function InstallCommand({ className }: { className?: string }) {
   return (
     <div
@@ -180,10 +259,19 @@ export default function HomeClient() {
             initial={{ opacity: 0, y: 20 }}
             animate={{ opacity: 1, y: 0 }}
             transition={{ duration: 0.5, delay: 1.0 }}
-            className="max-w-md mx-auto mb-8"
+            className="max-w-md mx-auto mb-4"
           >
             <InstallCommand />
           </motion.div>
+
+          <motion.div
+            initial={{ opacity: 0 }}
+            animate={{ opacity: 1 }}
+            transition={{ duration: 0.5, delay: 1.3 }}
+            className="mb-8"
+          >
+            <SetupGuide />
+          </motion.div>
         </div>
 
         {/* Bottom fade */}

From 52523da38ebe40ef8879f26ba8f90ce44d78a44e Mon Sep 17 00:00:00 2001
From: James Grugett <jahooma@gmail.com>
Date: Tue, 10 Mar 2026 18:02:38 -0700
Subject: [PATCH 11/16] freebuff web: Remove navbar, reorder philosophy section

---
 freebuff/web/src/app/home-client.tsx   | 41 +++++++++++++++++++-
 freebuff/web/src/app/layout.tsx        |  2 -
 freebuff/web/src/components/navbar.tsx | 52 --------------------------
 3 files changed, 40 insertions(+), 55 deletions(-)
 delete mode 100644 freebuff/web/src/components/navbar.tsx

diff --git a/freebuff/web/src/app/home-client.tsx b/freebuff/web/src/app/home-client.tsx
index e397fd101c..36a5e2d675 100644
--- a/freebuff/web/src/app/home-client.tsx
+++ b/freebuff/web/src/app/home-client.tsx
@@ -4,11 +4,14 @@ import { AnimatePresence, motion } from 'framer-motion'
 import {
   ChevronDown,
 } from 'lucide-react'
+import Image from 'next/image'
+import Link from 'next/link'
 import { useState } from 'react'
 
 import { BackgroundBeams } from '@/components/background-beams'
 import { CopyButton } from '@/components/copy-button'
 import { HeroGrid } from '@/components/hero-grid'
+import { Icons } from '@/components/icons'
 import { cn } from '@/lib/utils'
 
 const INSTALL_COMMAND = 'npm install -g freebuff'
@@ -187,8 +190,8 @@ function FAQList() {
 }
 
 const PHILOSOPHY_WORDS = [
-  { word: 'FAST', description: '3× the speed of Claude Code' },
   { word: 'SIMPLE', description: 'No modes. No config. Just code.' },
+  { word: 'FAST', description: 'Up to 3× the speed of Claude Code' },
   { word: 'LOADED', description: 'Web research, browser use, and more — built in' },
 ]
 
@@ -218,6 +221,42 @@ export default function HomeClient() {
         <HeroGrid />
         <BackgroundBeams />
 
+        {/* Inline nav overlay */}
+        <motion.div
+          initial={{ opacity: 0, y: -10 }}
+          animate={{ opacity: 1, y: 0 }}
+          transition={{ duration: 0.5, delay: 0.1 }}
+          className="absolute top-0 left-0 right-0 z-20 container mx-auto px-4 py-4 flex justify-between items-center"
+        >
+          <Link
+            href="/"
+            className="flex items-center space-x-2 group transition-all duration-300 hover:scale-105"
+          >
+            <Image
+              src="/logo-icon.png"
+              alt="Freebuff"
+              width={28}
+              height={28}
+              className="rounded-sm transition-all duration-300 group-hover:brightness-110"
+            />
+            <span className="text-xl tracking-widest font-serif text-white">
+              freebuff
+            </span>
+          </Link>
+
+          <nav className="flex items-center space-x-1">
+            <Link
+              href="https://github.com/CodebuffAI/codebuff"
+              target="_blank"
+              rel="noopener noreferrer"
+              className="relative font-medium px-3 py-2 rounded-md transition-all duration-200 hover:bg-white/10 text-zinc-400 hover:text-white flex items-center gap-2 text-sm"
+            >
+              <Icons.github className="h-4 w-4" />
+              <span className="hidden sm:inline">GitHub</span>
+            </Link>
+          </nav>
+        </motion.div>
+
         {/* Hero content */}
         <div className="relative z-10 container mx-auto px-4 pt-20 pb-12 text-center">
           {/* Headline with staggered word animation */}
diff --git a/freebuff/web/src/app/layout.tsx b/freebuff/web/src/app/layout.tsx
index b813a211dd..3128907ae6 100644
--- a/freebuff/web/src/app/layout.tsx
+++ b/freebuff/web/src/app/layout.tsx
@@ -3,7 +3,6 @@ import '@/styles/globals.css'
 import type { Metadata } from 'next'
 
 import { Footer } from '@/components/footer'
-import { Navbar } from '@/components/navbar'
 import { ThemeProvider } from '@/components/theme-provider'
 import { siteConfig } from '@/lib/constant'
 import { fonts } from '@/lib/fonts'
@@ -54,7 +53,6 @@ export default function RootLayout({
       >
         <ThemeProvider attribute="class">
           <SessionProvider>
-            <Navbar />
             <div className="flex-grow">{children}</div>
             <Footer />
           </SessionProvider>
diff --git a/freebuff/web/src/components/navbar.tsx b/freebuff/web/src/components/navbar.tsx
deleted file mode 100644
index 66774385db..0000000000
--- a/freebuff/web/src/components/navbar.tsx
+++ /dev/null
@@ -1,52 +0,0 @@
-'use client'
-
-import Image from 'next/image'
-import Link from 'next/link'
-
-import { Icons } from './icons'
-
-export function Navbar() {
-
-  return (
-    <header className="sticky top-0 z-50 w-full border-b border-border/40 bg-background/95 backdrop-blur supports-[backdrop-filter]:bg-background/60">
-      <div className="container mx-auto px-4 py-3 flex justify-between items-center">
-        <Link
-          href="/"
-          className="flex items-center space-x-2 group transition-all duration-300 hover:scale-105"
-        >
-          <Image
-            src="/logo-icon.png"
-            alt="Freebuff"
-            width={28}
-            height={28}
-            className="rounded-sm transition-all duration-300 group-hover:brightness-110"
-          />
-          <span className="text-xl tracking-widest font-serif text-white">
-            freebuff
-          </span>
-        </Link>
-
-        <nav className="flex items-center space-x-1">
-          {/* <Link
-            href="https://codebuff.com/docs"
-            target="_blank"
-            rel="noopener noreferrer"
-            className="relative font-medium px-3 py-2 rounded-md transition-all duration-200 hover:bg-accent hover:text-accent-foreground text-sm"
-          >
-            Docs
-          </Link> */}
-          <Link
-            href="https://github.com/CodebuffAI/codebuff"
-            target="_blank"
-            rel="noopener noreferrer"
-            className="relative font-medium px-3 py-2 rounded-md transition-all duration-200 hover:bg-accent hover:text-accent-foreground flex items-center gap-2 text-sm"
-          >
-            <Icons.github className="h-4 w-4" />
-            <span className="hidden sm:inline">GitHub</span>
-          </Link>
-
-        </nav>
-      </div>
-    </header>
-  )
-}

From cbbfe731c49ff2ce2f5d8e0c47c99e683abb352c Mon Sep 17 00:00:00 2001
From: James Grugett <jahooma@gmail.com>
Date: Tue, 10 Mar 2026 18:20:39 -0700
Subject: [PATCH 12/16] Fix for importing bundled agents

---
 cli/src/utils/local-agent-registry.ts | 19 +++----------------
 1 file changed, 3 insertions(+), 16 deletions(-)

diff --git a/cli/src/utils/local-agent-registry.ts b/cli/src/utils/local-agent-registry.ts
index 2016cc7991..203a9f7a90 100644
--- a/cli/src/utils/local-agent-registry.ts
+++ b/cli/src/utils/local-agent-registry.ts
@@ -10,6 +10,7 @@ import type { MCPConfig } from '@codebuff/common/types/mcp'
 import { getProjectRoot } from '../project-files'
 import { AGENT_MODE_TO_ID, type AgentMode } from './constants'
 import { logger } from './logger'
+import * as bundledAgentsModule from '../agents/bundled-agents.generated'
 
 import type { AgentDefinition } from '@codebuff/common/templates/initial-agents-dir/types/agent-definition'
 
@@ -153,26 +154,12 @@ const getUserAgentDefinitions = (): AgentDefinition[] => {
 // Bundled agents loading (generated at build time by prebuild-agents.ts)
 // ============================================================================
 
-interface BundledAgentsModule {
-  bundledAgents: Record<string, AgentDefinition>
-  getBundledAgentsAsLocalInfo: () => LocalAgentInfo[]
-}
-
-// NOTE: Inline require() with try/catch is used because this file is generated at
-// build time by prebuild-agents.ts and may not exist during development
-let bundledAgentsModule: BundledAgentsModule | null = null
-try {
-  bundledAgentsModule = require('../agents/bundled-agents.generated')
-} catch {
-  // File not generated yet - running in development without prebuild
-}
-
 const getBundledAgents = (): Record<string, AgentDefinition> => {
-  return bundledAgentsModule?.bundledAgents ?? {}
+  return bundledAgentsModule.bundledAgents ?? {}
 }
 
 const getBundledAgentsAsLocalInfo = (): LocalAgentInfo[] => {
-  return bundledAgentsModule?.getBundledAgentsAsLocalInfo?.() ?? []
+  return bundledAgentsModule.getBundledAgentsAsLocalInfo?.() ?? []
 }
 
 // ============================================================================

From 58ff484b65407b5ebbe08c2c2463634c8fc797fc Mon Sep 17 00:00:00 2001
From: James Grugett <jahooma@gmail.com>
Date: Tue, 10 Mar 2026 18:30:46 -0700
Subject: [PATCH 13/16] freebuff web: improve styles

---
 freebuff/web/src/app/home-client.tsx          | 154 ++++++++++--------
 .../web/src/components/background-beams.tsx   |   2 +-
 freebuff/web/src/components/copy-button.tsx   |   2 +-
 freebuff/web/src/components/footer.tsx        |  16 +-
 freebuff/web/src/components/hero-grid.tsx     |   6 +-
 freebuff/web/src/components/terminal-demo.tsx |   8 +-
 freebuff/web/src/styles/globals.css           |  24 +--
 freebuff/web/tailwind.config.ts               |   4 +-
 8 files changed, 117 insertions(+), 99 deletions(-)

diff --git a/freebuff/web/src/app/home-client.tsx b/freebuff/web/src/app/home-client.tsx
index 36a5e2d675..373cc2d4a8 100644
--- a/freebuff/web/src/app/home-client.tsx
+++ b/freebuff/web/src/app/home-client.tsx
@@ -68,7 +68,7 @@ function SetupGuide() {
       <button
         onClick={() => setIsOpen(!isOpen)}
         aria-expanded={isOpen}
-        className="flex items-center gap-2 mx-auto text-sm text-zinc-400 hover:text-acid-green transition-colors duration-200 cursor-pointer group"
+        className="flex items-center gap-2 mx-auto text-sm text-zinc-400 hover:text-acid-matrix transition-colors duration-200 cursor-pointer group"
       >
         <span>Install guide</span>
         <motion.span
@@ -92,7 +92,7 @@ function SetupGuide() {
               <ol className="space-y-4">
                 {setupSteps.map((step, i) => (
                   <li key={i} className="flex gap-3">
-                    <span className="flex-shrink-0 w-6 h-6 rounded-full bg-acid-green/10 border border-acid-green/30 flex items-center justify-center text-xs font-mono text-acid-green">
+                    <span className="flex-shrink-0 w-6 h-6 rounded-full bg-acid-matrix/10 border border-acid-matrix/30 flex items-center justify-center text-xs font-mono text-acid-matrix">
                       {i + 1}
                     </span>
                     <div className="flex-1 min-w-0">
@@ -101,7 +101,7 @@ function SetupGuide() {
                         <p className="text-xs text-zinc-500 mt-0.5">{step.description}</p>
                       )}
                       {'command' in step && step.command && (
-                        <div className="mt-1.5 flex items-center gap-2 bg-zinc-800/60 border border-zinc-700/40 rounded-md px-3 py-1.5 hover:border-acid-green/30 transition-colors duration-200">
+                        <div className="mt-1.5 flex items-center gap-2 bg-zinc-800/60 border border-zinc-700/40 rounded-md px-3 py-1.5 hover:border-acid-matrix/30 transition-colors duration-200">
                           <code className="font-mono text-xs text-white/80 flex-1 select-all">
                             {step.command}
                           </code>
@@ -125,12 +125,12 @@ function InstallCommand({ className }: { className?: string }) {
     <div
       className={cn(
         'flex items-center gap-2 bg-zinc-900/80 border border-zinc-700/50 rounded-lg px-4 py-3 font-mono text-sm',
-        'hover:border-acid-green/50 hover:shadow-[0_0_20px_rgba(0,255,149,0.12)] transition-all duration-300',
+        'hover:border-acid-matrix/50 hover:shadow-[0_0_20px_rgba(124,255,63,0.12)] transition-all duration-300',
         'gradient-border-shine',
         className,
       )}
     >
-      <span className="text-acid-green select-none">$</span>
+      <span className="text-acid-matrix select-none">$</span>
       <code className="text-white/90 select-all flex-1">
         {INSTALL_COMMAND}
       </code>
@@ -156,7 +156,7 @@ function FAQList() {
           >
             <button
               onClick={() => setOpenIndex(isOpen ? null : i)}
-              className="w-full flex items-center justify-between gap-4 bg-zinc-900/50 border border-zinc-800 rounded-xl px-6 py-4 text-left hover:border-acid-green/30 hover:bg-zinc-900/80 transition-all duration-300 cursor-pointer"
+              className="w-full flex items-center justify-between gap-4 bg-zinc-900/50 border border-zinc-800 rounded-xl px-6 py-4 text-left hover:border-acid-matrix/30 hover:bg-zinc-900/80 transition-all duration-300 cursor-pointer"
             >
               <span className="font-semibold text-white">{faq.question}</span>
               <motion.span
@@ -192,9 +192,61 @@ function FAQList() {
 const PHILOSOPHY_WORDS = [
   { word: 'SIMPLE', description: 'No modes. No config. Just code.' },
   { word: 'FAST', description: 'Up to 3× the speed of Claude Code' },
-  { word: 'LOADED', description: 'Web research, browser use, and more — built in' },
+  { word: 'LOADED', description: 'Built in web research, browser use, and more' },
 ]
 
+function PhilosophySection() {
+  const [litWords, setLitWords] = useState<Set<number>>(new Set())
+
+  const lightUp = (i: number) => {
+    setLitWords(prev => {
+      const next = new Set(prev)
+      next.add(i)
+      return next
+    })
+  }
+
+  const dimDown = (i: number) => {
+    setLitWords(prev => {
+      const next = new Set(prev)
+      next.delete(i)
+      return next
+    })
+  }
+
+  return (
+    <div className="relative z-10 container mx-auto max-w-5xl px-4 pt-16 md:pt-24 pb-24 md:pb-32">
+      <div className="flex flex-col gap-12 md:gap-16">
+        {PHILOSOPHY_WORDS.map((item, i) => (
+          <motion.div
+            key={item.word}
+            initial={{ opacity: 0, filter: 'blur(12px)' }}
+            whileInView={{ opacity: 1, filter: 'blur(0px)' }}
+            viewport={{ once: true, amount: 0.5 }}
+            transition={{ duration: 0.7, delay: i * 0.1 }}
+            className="group"
+          >
+            <motion.div
+              onViewportEnter={() => lightUp(i)}
+              onViewportLeave={() => dimDown(i)}
+              viewport={{ margin: '0px 0px -55% 0px' }}
+              className={cn(
+                'font-dm-mono text-7xl md:text-[8rem] lg:text-[10rem] font-medium leading-[0.85] tracking-tighter select-none transition-all duration-500',
+                litWords.has(i) ? 'keyword-filled' : 'keyword-hollow',
+              )}
+            >
+              {item.word}
+            </motion.div>
+            <p className="mt-3 md:mt-4 text-zinc-500 text-sm md:text-base font-mono tracking-wide">
+              {item.description}
+            </p>
+          </motion.div>
+        ))}
+      </div>
+    </div>
+  )
+}
+
 const wordVariant = {
   initial: { opacity: 0, y: 30, filter: 'blur(8px)' },
   animate: {
@@ -211,12 +263,12 @@ const wordVariant = {
 export default function HomeClient() {
   return (
     <div className="relative">
-      {/* ─── Hero Section ─── */}
-      <section className="relative min-h-[90vh] flex flex-col items-center justify-center overflow-hidden">
-        {/* Layered backgrounds */}
-        <div className="absolute inset-0 bg-gradient-to-b from-dark-forest-green via-black to-black" />
-        <div className="absolute inset-0 bg-[radial-gradient(ellipse_80%_50%_at_50%_-20%,rgba(0,255,149,0.12),transparent_60%)]" />
-        <div className="absolute inset-0 bg-[radial-gradient(ellipse_50%_80%_at_50%_100%,rgba(0,255,149,0.04),transparent_60%)]" />
+      {/* ─── Hero + Philosophy: unified section ─── */}
+      <div className="relative overflow-hidden">
+        {/* Shared layered backgrounds */}
+        <div className="absolute inset-0 bg-gradient-to-b from-dark-forest-green via-black/95 to-black" />
+        <div className="absolute inset-0 bg-[radial-gradient(ellipse_80%_50%_at_50%_-10%,rgba(124,255,63,0.12),transparent_50%)]" />
+        <div className="absolute inset-0 bg-[radial-gradient(ellipse_60%_40%_at_50%_65%,rgba(124,255,63,0.06),transparent_50%)]" />
 
         <HeroGrid />
         <BackgroundBeams />
@@ -237,9 +289,9 @@ export default function HomeClient() {
               alt="Freebuff"
               width={28}
               height={28}
-              className="rounded-sm transition-all duration-300 group-hover:brightness-110"
+              className="rounded-sm opacity-60 group-hover:opacity-100 transition-all duration-300 group-hover:brightness-110"
             />
-            <span className="text-xl tracking-widest font-serif text-white">
+            <span className="text-xl tracking-widest font-serif text-zinc-400 group-hover:text-white transition-colors duration-200">
               freebuff
             </span>
           </Link>
@@ -258,7 +310,7 @@ export default function HomeClient() {
         </motion.div>
 
         {/* Hero content */}
-        <div className="relative z-10 container mx-auto px-4 pt-20 pb-12 text-center">
+        <div className="relative z-10 container mx-auto px-4 pt-32 pb-16 md:pt-40 md:pb-20 text-center min-h-screen flex flex-col items-center justify-center">
           {/* Headline with staggered word animation */}
           <motion.h1
             className="hero-heading mb-8"
@@ -275,7 +327,7 @@ export default function HomeClient() {
                 <motion.span
                   key={i}
                   variants={wordVariant}
-                  className={word === 'free' ? 'inline-block mr-[0.3em] text-acid-green neon-text animate-glow-pulse' : 'inline-block mr-[0.3em] text-white'}
+                  className={word === 'free' ? 'inline-block mr-[0.3em] text-acid-matrix neon-text animate-glow-pulse' : 'inline-block mr-[0.3em] text-white'}
                 >
                   {word}
                 </motion.span>
@@ -298,7 +350,7 @@ export default function HomeClient() {
             initial={{ opacity: 0, y: 20 }}
             animate={{ opacity: 1, y: 0 }}
             transition={{ duration: 0.5, delay: 1.0 }}
-            className="max-w-md mx-auto mb-4"
+            className="max-w-lg w-full mx-auto mb-4"
           >
             <InstallCommand />
           </motion.div>
@@ -313,56 +365,28 @@ export default function HomeClient() {
           </motion.div>
         </div>
 
-        {/* Bottom fade */}
-        <div className="absolute bottom-0 left-0 right-0 h-32 bg-gradient-to-t from-black to-transparent" />
-      </section>
+        {/* Philosophy content — same background, continuous flow */}
+        <PhilosophySection />
 
-      {/* ─── Philosophy Section ─── */}
-      <section className="relative py-24 md:py-32 px-4 overflow-hidden">
-        <div className="relative z-10 container mx-auto max-w-5xl">
-          <div className="flex flex-col gap-12 md:gap-16">
-            {PHILOSOPHY_WORDS.map((item, i) => (
-              <motion.div
-                key={item.word}
-                initial={{ opacity: 0, filter: 'blur(12px)' }}
-                whileInView={{ opacity: 1, filter: 'blur(0px)' }}
-                viewport={{ once: true, amount: 0.5 }}
-                transition={{ duration: 0.7, delay: i * 0.1 }}
-                className="group"
-              >
-                <div className="keyword-hollow font-dm-mono text-7xl md:text-[8rem] lg:text-[10rem] font-medium leading-[0.85] tracking-tighter select-none">
-                  {item.word}
-                </div>
-                <p className="mt-3 md:mt-4 text-zinc-500 text-sm md:text-base font-mono tracking-wide">
-                  {item.description}
-                </p>
-              </motion.div>
-            ))}
-          </div>
-        </div>
-      </section>
-
-      {/* Divider */}
-      <div className="h-px bg-gradient-to-r from-transparent via-acid-green/30 to-transparent" />
-
-      {/* ─── FAQ Section ─── */}
-      <section className="py-24 px-4">
-        <div className="container mx-auto max-w-2xl">
-          <motion.div
-            initial={{ opacity: 0, y: 20 }}
-            whileInView={{ opacity: 1, y: 0 }}
-            viewport={{ once: true, amount: 0.3 }}
-            transition={{ duration: 0.6 }}
-            className="text-center mb-12"
-          >
-            <h2 className="text-3xl md:text-4xl font-bold mb-4">
-              Frequently asked questions
-            </h2>
-          </motion.div>
+        {/* ─── FAQ Section ─── */}
+        <div className="relative z-10 py-24 px-4">
+          <div className="container mx-auto max-w-2xl">
+            <motion.div
+              initial={{ opacity: 0, y: 20 }}
+              whileInView={{ opacity: 1, y: 0 }}
+              viewport={{ once: true, amount: 0.3 }}
+              transition={{ duration: 0.6 }}
+              className="text-center mb-12"
+            >
+              <h2 className="text-3xl md:text-4xl font-bold mb-4">
+                Frequently asked questions
+              </h2>
+            </motion.div>
 
-          <FAQList />
+            <FAQList />
+          </div>
         </div>
-      </section>
+      </div>
     </div>
   )
 }
diff --git a/freebuff/web/src/components/background-beams.tsx b/freebuff/web/src/components/background-beams.tsx
index 4a0d300f0d..0a0d2c794f 100644
--- a/freebuff/web/src/components/background-beams.tsx
+++ b/freebuff/web/src/components/background-beams.tsx
@@ -37,7 +37,7 @@ export function BackgroundBeams({ className }: { className?: string }) {
         className="absolute left-[--beam-x] top-[--beam-y] h-px w-px"
         style={{
           boxShadow:
-            '0 0 150px 80px rgba(0, 255, 149, 0.08), 0 0 300px 150px rgba(0, 255, 149, 0.04)',
+            '0 0 150px 80px rgba(124, 255, 63, 0.08), 0 0 300px 150px rgba(124, 255, 63, 0.04)',
           transform: 'translate(-50%, -50%)',
         }}
       />
diff --git a/freebuff/web/src/components/copy-button.tsx b/freebuff/web/src/components/copy-button.tsx
index d4e07ca00c..17b06e76b6 100644
--- a/freebuff/web/src/components/copy-button.tsx
+++ b/freebuff/web/src/components/copy-button.tsx
@@ -30,7 +30,7 @@ export function CopyButton({
       aria-label={`Copy: ${value}`}
     >
       {copied ? (
-        <Check className="h-4 w-4 text-acid-green" />
+        <Check className="h-4 w-4 text-acid-matrix" />
       ) : (
         <Copy className="h-4 w-4 text-white/60" />
       )}
diff --git a/freebuff/web/src/components/footer.tsx b/freebuff/web/src/components/footer.tsx
index 1b9587dbec..97cd24896e 100644
--- a/freebuff/web/src/components/footer.tsx
+++ b/freebuff/web/src/components/footer.tsx
@@ -3,7 +3,7 @@ import Link from 'next/link'
 
 export function Footer() {
   return (
-    <footer className="w-full border-t">
+    <footer className="w-full">
       <div className="container mx-auto flex flex-col gap-4 py-8 px-4">
         <div className="grid grid-cols-1 sm:grid-cols-3 gap-8">
           <div>
@@ -66,20 +66,12 @@ export function Footer() {
               >
                 Terms of Service
               </Link>
+              <span className="text-xs text-muted-foreground mt-1">
+                © {new Date().getFullYear()} Freebuff
+              </span>
             </nav>
           </div>
         </div>
-
-        <div className="border-t pt-4 text-center text-xs text-muted-foreground">
-          © {new Date().getFullYear()} Freebuff. Built on the{' '}
-          <Link
-            href="https://codebuff.com"
-            className="hover:text-primary underline underline-offset-4"
-          >
-            Codebuff
-          </Link>{' '}
-          platform.
-        </div>
       </div>
     </footer>
   )
diff --git a/freebuff/web/src/components/hero-grid.tsx b/freebuff/web/src/components/hero-grid.tsx
index 100229b13f..b42ddcc072 100644
--- a/freebuff/web/src/components/hero-grid.tsx
+++ b/freebuff/web/src/components/hero-grid.tsx
@@ -10,7 +10,7 @@ export function HeroGrid({ className }: { className?: string }) {
         className="absolute inset-0 opacity-[0.03]"
         style={{
           backgroundImage:
-            'radial-gradient(circle, #00FF95 1px, transparent 1px)',
+            'radial-gradient(circle, #7CFF3F 1px, transparent 1px)',
           backgroundSize: '32px 32px',
         }}
       />
@@ -20,7 +20,7 @@ export function HeroGrid({ className }: { className?: string }) {
           className="h-px w-full"
           style={{
             background:
-              'linear-gradient(90deg, transparent, rgba(0,255,149,0.15) 20%, rgba(0,255,149,0.3) 50%, rgba(0,255,149,0.15) 80%, transparent)',
+              'linear-gradient(90deg, transparent, rgba(124,255,63,0.15) 20%, rgba(124,255,63,0.3) 50%, rgba(124,255,63,0.15) 80%, transparent)',
           }}
         />
       </div>
@@ -29,7 +29,7 @@ export function HeroGrid({ className }: { className?: string }) {
         className="absolute inset-0 opacity-[0.025]"
         style={{
           backgroundImage:
-            'linear-gradient(90deg, #00FF95 1px, transparent 1px)',
+            'linear-gradient(90deg, #7CFF3F 1px, transparent 1px)',
           backgroundSize: '120px 120px',
         }}
       />
diff --git a/freebuff/web/src/components/terminal-demo.tsx b/freebuff/web/src/components/terminal-demo.tsx
index 4048312dd8..e2fdfc6b8a 100644
--- a/freebuff/web/src/components/terminal-demo.tsx
+++ b/freebuff/web/src/components/terminal-demo.tsx
@@ -42,13 +42,13 @@ export function TerminalDemo() {
   const getLineColor = (type: string) => {
     switch (type) {
       case 'prompt':
-        return 'text-acid-green'
+        return 'text-acid-matrix'
       case 'user':
         return 'text-white font-medium'
       case 'agent':
         return 'text-zinc-300'
       case 'success':
-        return 'text-acid-green font-medium'
+        return 'text-acid-matrix font-medium'
       default:
         return 'text-zinc-500'
     }
@@ -62,7 +62,7 @@ export function TerminalDemo() {
       className="relative mx-auto max-w-2xl"
     >
       {/* Glow behind terminal */}
-      <div className="absolute -inset-4 bg-acid-green/[0.03] blur-2xl rounded-3xl" />
+      <div className="absolute -inset-4 bg-acid-matrix/[0.03] blur-2xl rounded-3xl" />
 
       <div className="relative rounded-xl border border-zinc-800/80 bg-zinc-950/90 backdrop-blur-sm overflow-hidden shadow-2xl shadow-black/50">
         {/* Title bar */}
@@ -93,7 +93,7 @@ export function TerminalDemo() {
             ))}
           </AnimatePresence>
           {visibleLines < DEMO_LINES.length && (
-            <span className="inline-block w-2 h-4 bg-acid-green/70 animate-terminal-cursor" />
+            <span className="inline-block w-2 h-4 bg-acid-matrix/70 animate-terminal-cursor" />
           )}
         </div>
       </div>
diff --git a/freebuff/web/src/styles/globals.css b/freebuff/web/src/styles/globals.css
index a18c7568cf..c9cde579cc 100644
--- a/freebuff/web/src/styles/globals.css
+++ b/freebuff/web/src/styles/globals.css
@@ -55,9 +55,9 @@
 /* Neon green glow text */
 .neon-text {
   text-shadow:
-    0 0 20px rgba(0, 255, 149, 0.4),
-    0 0 40px rgba(0, 255, 149, 0.2),
-    0 0 80px rgba(0, 255, 149, 0.1);
+    0 0 20px rgba(124, 255, 63, 0.4),
+    0 0 40px rgba(124, 255, 63, 0.2),
+    0 0 80px rgba(124, 255, 63, 0.1);
 }
 
 /* Gradient border shine effect */
@@ -73,10 +73,10 @@
   padding: 1px;
   background: linear-gradient(
     135deg,
-    rgba(0, 255, 149, 0.3),
+    rgba(124, 255, 63, 0.3),
     transparent 40%,
     transparent 60%,
-    rgba(0, 255, 149, 0.15)
+    rgba(124, 255, 63, 0.15)
   );
   -webkit-mask:
     linear-gradient(#fff 0 0) content-box,
@@ -89,19 +89,21 @@
 /* Giant keyword wall — hollow outlined text */
 .keyword-hollow {
   color: transparent;
-  -webkit-text-stroke: 1.5px rgba(0, 255, 149, 0.4);
+  -webkit-text-stroke: 1.5px rgba(124, 255, 63, 0.45);
   transition: color 0.5s ease, -webkit-text-stroke-color 0.5s ease, text-shadow 0.5s ease;
 }
 
-.group:hover .keyword-hollow,
+
 .keyword-filled {
-  color: #00FF95;
-  -webkit-text-stroke: 1.5px #00FF95;
+  color: #7CFF3F;
+  -webkit-text-stroke: 1.5px #7CFF3F;
   text-shadow:
-    0 0 40px rgba(0, 255, 149, 0.3),
-    0 0 80px rgba(0, 255, 149, 0.1);
+    0 0 40px rgba(124, 255, 63, 0.3),
+    0 0 80px rgba(124, 255, 63, 0.1);
+  transition: text-shadow 0.5s ease;
 }
 
+
 @media (prefers-reduced-motion: reduce) {
   .animate-glow-pulse,
   .animate-scan-line,
diff --git a/freebuff/web/tailwind.config.ts b/freebuff/web/tailwind.config.ts
index eb436d506f..3345cfb9dd 100644
--- a/freebuff/web/tailwind.config.ts
+++ b/freebuff/web/tailwind.config.ts
@@ -83,10 +83,10 @@ const config = {
         },
         'glow-pulse': {
           '0%, 100%': {
-            textShadow: '0 0 20px rgba(0,255,149,0.4), 0 0 40px rgba(0,255,149,0.2), 0 0 80px rgba(0,255,149,0.1)',
+            textShadow: '0 0 20px rgba(124,255,63,0.4), 0 0 40px rgba(124,255,63,0.2), 0 0 80px rgba(124,255,63,0.1)',
           },
           '50%': {
-            textShadow: '0 0 30px rgba(0,255,149,0.6), 0 0 60px rgba(0,255,149,0.3), 0 0 100px rgba(0,255,149,0.15)',
+            textShadow: '0 0 30px rgba(124,255,63,0.6), 0 0 60px rgba(124,255,63,0.3), 0 0 100px rgba(124,255,63,0.15)',
           },
         },
 

From 8a033ac0fc5f7829c99add50ae06835b65206fc7 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Wed, 11 Mar 2026 01:32:26 +0000
Subject: [PATCH 14/16] Bump Freebuff version to 0.0.8

---
 freebuff/cli/release/package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/freebuff/cli/release/package.json b/freebuff/cli/release/package.json
index d7ca6de62c..f330e92c64 100644
--- a/freebuff/cli/release/package.json
+++ b/freebuff/cli/release/package.json
@@ -1,6 +1,6 @@
 {
   "name": "freebuff",
-  "version": "0.0.7",
+  "version": "0.0.8",
   "description": "The world's strongest free coding agent",
   "license": "MIT",
   "bin": {

From 016efa5ea7c77cbe51fef58edc96cbb77f203ef3 Mon Sep 17 00:00:00 2001
From: James Grugett <jahooma@gmail.com>
Date: Tue, 10 Mar 2026 18:37:34 -0700
Subject: [PATCH 15/16] Fix build

---
 cli/src/agents/bundled-agents.generated.d.ts | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 cli/src/agents/bundled-agents.generated.d.ts

diff --git a/cli/src/agents/bundled-agents.generated.d.ts b/cli/src/agents/bundled-agents.generated.d.ts
new file mode 100644
index 0000000000..f5b89022cf
--- /dev/null
+++ b/cli/src/agents/bundled-agents.generated.d.ts
@@ -0,0 +1,14 @@
+/**
+ * Type declarations for the auto-generated bundled agents module.
+ *
+ * The actual file (bundled-agents.generated.ts) is created by
+ * cli/scripts/prebuild-agents.ts and is gitignored. This declaration
+ * file lets TypeScript resolve the module when the generated file
+ * has not been built yet.
+ */
+import type { LocalAgentInfo } from '../utils/local-agent-registry'
+
+export declare const bundledAgents: Record<string, any>
+export declare function getBundledAgentsAsLocalInfo(): LocalAgentInfo[]
+export declare function getBundledAgentIds(): string[]
+export declare function isBundledAgent(agentId: string): boolean

From c34a61e9d8f0885ce54bef3b26e6d475644d51d6 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Wed, 11 Mar 2026 01:40:41 +0000
Subject: [PATCH 16/16] Bump Freebuff version to 0.0.9

---
 freebuff/cli/release/package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/freebuff/cli/release/package.json b/freebuff/cli/release/package.json
index f330e92c64..39156d5c7a 100644
--- a/freebuff/cli/release/package.json
+++ b/freebuff/cli/release/package.json
@@ -1,6 +1,6 @@
 {
   "name": "freebuff",
-  "version": "0.0.8",
+  "version": "0.0.9",
   "description": "The world's strongest free coding agent",
   "license": "MIT",
   "bin": {