From b28479c0de55ca42eddeccf4f53a204d72e73071 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Tue, 10 Mar 2026 12:36:37 -0700 Subject: [PATCH 01/16] Switch to baseten provider for minimax --- agents/base2/base2.ts | 3 - agents/tmux-cli.ts | 3 - packages/internal/src/env-schema.ts | 2 + web/src/app/api/v1/chat/completions/_post.ts | 53 +- web/src/llm-api/baseten.ts | 607 +++++++++++++++++++ web/src/llm-api/fireworks.ts | 2 +- 6 files changed, 654 insertions(+), 16 deletions(-) create mode 100644 web/src/llm-api/baseten.ts diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts index be5ade5a1c..52ca7ef4ba 100644 --- a/agents/base2/base2.ts +++ b/agents/base2/base2.ts @@ -30,9 +30,6 @@ export function createBase2( publisher, model: isFree ? 'minimax/minimax-m2.5' : 'anthropic/claude-opus-4.6', providerOptions: isFree ? { - only: ['inceptron/fp8'], - order: ['inceptron/fp8'], - allow_fallbacks: false, data_collection: 'deny', } : { only: ['amazon-bedrock'], diff --git a/agents/tmux-cli.ts b/agents/tmux-cli.ts index 10c0ecdeab..e959bf64c3 100644 --- a/agents/tmux-cli.ts +++ b/agents/tmux-cli.ts @@ -75,9 +75,6 @@ const definition: AgentDefinition = { // Provider options are tightly coupled to the model choice above. // If you change the model, update these accordingly. providerOptions: { - only: ['inceptron/fp8'], - order: ['inceptron/fp8'], - allow_fallbacks: false, data_collection: 'deny', }, diff --git a/packages/internal/src/env-schema.ts b/packages/internal/src/env-schema.ts index 7f9336a08d..21a0147bd8 100644 --- a/packages/internal/src/env-schema.ts +++ b/packages/internal/src/env-schema.ts @@ -7,6 +7,7 @@ export const serverEnvSchema = clientEnvSchema.extend({ OPENAI_API_KEY: z.string().min(1), ANTHROPIC_API_KEY: z.string().min(1), FIREWORKS_API_KEY: z.string().min(1), + BASETEN_API_KEY: z.string().min(1).optional(), LINKUP_API_KEY: z.string().min(1), CONTEXT7_API_KEY: z.string().optional(), GRAVITY_API_KEY: z.string().min(1), @@ -50,6 +51,7 @@ export const serverProcessEnv: ServerInput = { OPENAI_API_KEY: process.env.OPENAI_API_KEY, ANTHROPIC_API_KEY: process.env.ANTHROPIC_API_KEY, FIREWORKS_API_KEY: process.env.FIREWORKS_API_KEY, + BASETEN_API_KEY: process.env.BASETEN_API_KEY, LINKUP_API_KEY: process.env.LINKUP_API_KEY, CONTEXT7_API_KEY: process.env.CONTEXT7_API_KEY, GRAVITY_API_KEY: process.env.GRAVITY_API_KEY, diff --git a/web/src/app/api/v1/chat/completions/_post.ts b/web/src/app/api/v1/chat/completions/_post.ts index b9ebb09f63..1eec315d82 100644 --- a/web/src/app/api/v1/chat/completions/_post.ts +++ b/web/src/app/api/v1/chat/completions/_post.ts @@ -35,6 +35,12 @@ import type { NextRequest } from 'next/server' import type { ChatCompletionRequestBody } from '@/llm-api/types' +import { + BasetenError, + handleBasetenNonStream, + handleBasetenStream, + isBasetenModel, +} from '@/llm-api/baseten' import { FireworksError, handleFireworksNonStream, @@ -354,9 +360,20 @@ export async function postChatCompletions(params: { // Handle streaming vs non-streaming try { if (bodyStream) { - // Streaming request — route to Fireworks for supported models - const useFireworks = isFireworksModel(typedBody.model) - const stream = useFireworks + // Streaming request — route to Baseten/Fireworks for supported models + const useBaseten = isBasetenModel(typedBody.model) + const useFireworks = !useBaseten && isFireworksModel(typedBody.model) + const stream = useBaseten + ? await handleBasetenStream({ + body: typedBody, + userId, + stripeCustomerId, + agentId, + fetch, + logger, + insertMessageBigquery, + }) + : useFireworks ? await handleFireworksStream({ body: typedBody, userId, @@ -396,9 +413,10 @@ export async function postChatCompletions(params: { }, }) } else { - // Non-streaming request — route to Fireworks for supported models + // Non-streaming request — route to Baseten/Fireworks for supported models const model = typedBody.model - const useFireworks = isFireworksModel(model) + const useBaseten = isBasetenModel(model) + const useFireworks = !useBaseten && isFireworksModel(model) const modelParts = model.split('/') const shortModelName = modelParts.length > 1 ? modelParts[1] : model const isOpenAIDirectModel = @@ -409,7 +427,17 @@ export async function postChatCompletions(params: { const shouldUseOpenAIEndpoint = isOpenAIDirectModel && typedBody.codebuff_metadata?.n !== undefined - const nonStreamRequest = useFireworks + const nonStreamRequest = useBaseten + ? handleBasetenNonStream({ + body: typedBody, + userId, + stripeCustomerId, + agentId, + fetch, + logger, + insertMessageBigquery, + }) + : useFireworks ? handleFireworksNonStream({ body: typedBody, userId, @@ -463,10 +491,14 @@ export async function postChatCompletions(params: { if (error instanceof FireworksError) { fireworksError = error } + let basetenError: BasetenError | undefined + if (error instanceof BasetenError) { + basetenError = error + } // Log detailed error information for debugging const errorDetails = openrouterError?.toJSON() - const providerLabel = fireworksError ? 'Fireworks' : 'OpenRouter' + const providerLabel = basetenError ? 'Baseten' : fireworksError ? 'Fireworks' : 'OpenRouter' logger.error( { error: getErrorObject(error), @@ -480,8 +512,8 @@ export async function postChatCompletions(params: { ? typedBody.messages.length : 0, messages: typedBody.messages, - providerStatusCode: (openrouterError ?? fireworksError)?.statusCode, - providerStatusText: (openrouterError ?? fireworksError)?.statusText, + providerStatusCode: (openrouterError ?? fireworksError ?? basetenError)?.statusCode, + providerStatusText: (openrouterError ?? fireworksError ?? basetenError)?.statusText, openrouterErrorCode: errorDetails?.error?.code, openrouterErrorType: errorDetails?.error?.type, openrouterErrorMessage: errorDetails?.error?.message, @@ -509,6 +541,9 @@ export async function postChatCompletions(params: { if (error instanceof FireworksError) { return NextResponse.json(error.toJSON(), { status: error.statusCode }) } + if (error instanceof BasetenError) { + return NextResponse.json(error.toJSON(), { status: error.statusCode }) + } return NextResponse.json( { error: 'Failed to process request' }, diff --git a/web/src/llm-api/baseten.ts b/web/src/llm-api/baseten.ts new file mode 100644 index 0000000000..dbd787def8 --- /dev/null +++ b/web/src/llm-api/baseten.ts @@ -0,0 +1,607 @@ +import { Agent } from 'undici' + +import { PROFIT_MARGIN } from '@codebuff/common/constants/limits' +import { getErrorObject } from '@codebuff/common/util/error' +import { env } from '@codebuff/internal/env' + +import { + consumeCreditsForMessage, + extractRequestMetadata, + insertMessageToBigQuery, +} from './helpers' + +import type { UsageData } from './helpers' +import type { InsertMessageBigqueryFn } from '@codebuff/common/types/contracts/bigquery' +import type { Logger } from '@codebuff/common/types/contracts/logger' +import type { ChatCompletionRequestBody } from './types' + +const BASETEN_BASE_URL = 'https://inference.baseten.co/v1' + +// Extended timeout for deep-thinking models that can take +// a long time to start streaming. +const BASETEN_HEADERS_TIMEOUT_MS = 10 * 60 * 1000 + +const basetenAgent = new Agent({ + headersTimeout: BASETEN_HEADERS_TIMEOUT_MS, + bodyTimeout: 0, +}) + +/** Map from OpenRouter model IDs to Baseten model IDs */ +const BASETEN_MODEL_MAP: Record = { + 'minimax/minimax-m2.5': 'MiniMaxAI/MiniMax-M2.5', +} + +export function isBasetenModel(model: string): boolean { + return model in BASETEN_MODEL_MAP +} + +function getBasetenModelId(openrouterModel: string): string { + return BASETEN_MODEL_MAP[openrouterModel] ?? openrouterModel +} + +type StreamState = { responseText: string; reasoningText: string } + +type LineResult = { + state: StreamState + billedCredits?: number + patchedLine: string +} + +function createBasetenRequest(params: { + body: ChatCompletionRequestBody + originalModel: string + fetch: typeof globalThis.fetch +}) { + const { body, originalModel, fetch } = params + const basetenBody: Record = { + ...body, + model: getBasetenModelId(originalModel), + } + + // Strip OpenRouter-specific / internal fields + delete basetenBody.provider + delete basetenBody.transforms + delete basetenBody.codebuff_metadata + delete basetenBody.usage + + // For streaming, request usage in the final chunk + if (basetenBody.stream) { + basetenBody.stream_options = { include_usage: true } + } + + if (!env.BASETEN_API_KEY) { + throw new Error('BASETEN_API_KEY is not configured') + } + + return fetch(`${BASETEN_BASE_URL}/chat/completions`, { + method: 'POST', + headers: { + Authorization: `Bearer ${env.BASETEN_API_KEY}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify(basetenBody), + // @ts-expect-error - dispatcher is a valid undici option not in fetch types + dispatcher: basetenAgent, + }) +} + +// Baseten per-token pricing (dollars per token) +// TODO: Verify these costs against Baseten's actual pricing +const BASETEN_INPUT_COST_PER_TOKEN = 0.30 / 1_000_000 +const BASETEN_CACHED_INPUT_COST_PER_TOKEN = 0.03 / 1_000_000 +const BASETEN_OUTPUT_COST_PER_TOKEN = 1.20 / 1_000_000 + +function extractUsageAndCost(usage: Record | undefined | null): UsageData { + if (!usage) return { inputTokens: 0, outputTokens: 0, cacheReadInputTokens: 0, reasoningTokens: 0, cost: 0 } + const promptDetails = usage.prompt_tokens_details as Record | undefined | null + const completionDetails = usage.completion_tokens_details as Record | undefined | null + + const inputTokens = typeof usage.prompt_tokens === 'number' ? usage.prompt_tokens : 0 + const outputTokens = typeof usage.completion_tokens === 'number' ? usage.completion_tokens : 0 + const cacheReadInputTokens = typeof promptDetails?.cached_tokens === 'number' ? promptDetails.cached_tokens : 0 + const reasoningTokens = typeof completionDetails?.reasoning_tokens === 'number' ? completionDetails.reasoning_tokens : 0 + + // Baseten doesn't return cost — compute from token counts and known pricing + const nonCachedInputTokens = Math.max(0, inputTokens - cacheReadInputTokens) + const cost = + nonCachedInputTokens * BASETEN_INPUT_COST_PER_TOKEN + + cacheReadInputTokens * BASETEN_CACHED_INPUT_COST_PER_TOKEN + + outputTokens * BASETEN_OUTPUT_COST_PER_TOKEN + + return { inputTokens, outputTokens, cacheReadInputTokens, reasoningTokens, cost } +} + +export async function handleBasetenNonStream({ + body, + userId, + stripeCustomerId, + agentId, + fetch, + logger, + insertMessageBigquery, +}: { + body: ChatCompletionRequestBody + userId: string + stripeCustomerId?: string | null + agentId: string + fetch: typeof globalThis.fetch + logger: Logger + insertMessageBigquery: InsertMessageBigqueryFn +}) { + const originalModel = body.model + const startTime = new Date() + const { clientId, clientRequestId, costMode } = extractRequestMetadata({ body, logger }) + + const response = await createBasetenRequest({ body, originalModel, fetch }) + + if (!response.ok) { + throw await parseBasetenError(response) + } + + const data = await response.json() + const content = data.choices?.[0]?.message?.content ?? '' + const reasoningText = data.choices?.[0]?.message?.reasoning_content ?? data.choices?.[0]?.message?.reasoning ?? '' + const usageData = extractUsageAndCost(data.usage) + + insertMessageToBigQuery({ + messageId: data.id, + userId, + startTime, + request: body, + reasoningText, + responseText: content, + usageData, + logger, + insertMessageBigquery, + }).catch((error) => { + logger.error({ error }, 'Failed to insert message into BigQuery') + }) + + const billedCredits = await consumeCreditsForMessage({ + messageId: data.id, + userId, + stripeCustomerId, + agentId, + clientId, + clientRequestId, + startTime, + model: originalModel, + reasoningText, + responseText: content, + usageData, + byok: false, + logger, + costMode, + }) + + // Overwrite cost so SDK calculates exact credits we charged + if (data.usage) { + data.usage.cost = creditsToFakeCost(billedCredits) + data.usage.cost_details = { upstream_inference_cost: 0 } + } + + // Normalise model name back to OpenRouter format for client compatibility + data.model = originalModel + if (!data.provider) data.provider = 'Baseten' + + return data +} + +export async function handleBasetenStream({ + body, + userId, + stripeCustomerId, + agentId, + fetch, + logger, + insertMessageBigquery, +}: { + body: ChatCompletionRequestBody + userId: string + stripeCustomerId?: string | null + agentId: string + fetch: typeof globalThis.fetch + logger: Logger + insertMessageBigquery: InsertMessageBigqueryFn +}) { + const originalModel = body.model + const startTime = new Date() + const { clientId, clientRequestId, costMode } = extractRequestMetadata({ body, logger }) + + const response = await createBasetenRequest({ body, originalModel, fetch }) + + if (!response.ok) { + throw await parseBasetenError(response) + } + + const reader = response.body?.getReader() + if (!reader) { + throw new Error('Failed to get response reader') + } + + let heartbeatInterval: NodeJS.Timeout + let state: StreamState = { responseText: '', reasoningText: '' } + let clientDisconnected = false + + const stream = new ReadableStream({ + async start(controller) { + const decoder = new TextDecoder() + let buffer = '' + + controller.enqueue( + new TextEncoder().encode(`: connected ${new Date().toISOString()}\n`), + ) + + heartbeatInterval = setInterval(() => { + if (!clientDisconnected) { + try { + controller.enqueue( + new TextEncoder().encode( + `: heartbeat ${new Date().toISOString()}\n\n`, + ), + ) + } catch { + // client disconnected + } + } + }, 30000) + + try { + let done = false + while (!done) { + const result = await reader.read() + done = result.done + const value = result.value + + if (done) break + + buffer += decoder.decode(value, { stream: true }) + let lineEnd = buffer.indexOf('\n') + + while (lineEnd !== -1) { + const line = buffer.slice(0, lineEnd + 1) + buffer = buffer.slice(lineEnd + 1) + + const lineResult = await handleLine({ + userId, + stripeCustomerId, + agentId, + clientId, + clientRequestId, + costMode, + startTime, + request: body, + originalModel, + line, + state, + logger, + insertMessage: insertMessageBigquery, + }) + state = lineResult.state + + if (!clientDisconnected) { + try { + controller.enqueue(new TextEncoder().encode(lineResult.patchedLine)) + } catch { + logger.warn('Client disconnected during stream, continuing for billing') + clientDisconnected = true + } + } + + lineEnd = buffer.indexOf('\n') + } + } + + if (!clientDisconnected) { + controller.close() + } + } catch (error) { + if (!clientDisconnected) { + controller.error(error) + } else { + logger.warn( + getErrorObject(error), + 'Error after client disconnect in Baseten stream', + ) + } + } finally { + clearInterval(heartbeatInterval) + } + }, + cancel() { + clearInterval(heartbeatInterval) + clientDisconnected = true + logger.warn( + { + clientDisconnected, + responseTextLength: state.responseText.length, + reasoningTextLength: state.reasoningText.length, + }, + 'Client cancelled stream, continuing Baseten consumption for billing', + ) + }, + }) + + return stream +} + +async function handleLine({ + userId, + stripeCustomerId, + agentId, + clientId, + clientRequestId, + costMode, + startTime, + request, + originalModel, + line, + state, + logger, + insertMessage, +}: { + userId: string + stripeCustomerId?: string | null + agentId: string + clientId: string | null + clientRequestId: string | null + costMode: string | undefined + startTime: Date + request: unknown + originalModel: string + line: string + state: StreamState + logger: Logger + insertMessage: InsertMessageBigqueryFn +}): Promise { + if (!line.startsWith('data: ')) { + return { state, patchedLine: line } + } + + const raw = line.slice('data: '.length) + if (raw === '[DONE]\n' || raw === '[DONE]') { + return { state, patchedLine: line } + } + + let obj: Record + try { + obj = JSON.parse(raw) + } catch (error) { + logger.warn( + { error: getErrorObject(error, { includeRawError: true }) }, + 'Received non-JSON Baseten response', + ) + return { state, patchedLine: line } + } + + // Patch model and provider for SDK compatibility + if (obj.model) obj.model = originalModel + if (!obj.provider) obj.provider = 'Baseten' + + // Process the chunk for billing / state tracking + const result = await handleResponse({ + userId, + stripeCustomerId, + agentId, + clientId, + clientRequestId, + costMode, + startTime, + request, + originalModel, + data: obj, + state, + logger, + insertMessage, + }) + + // If this is the final chunk with billing, overwrite cost in the patched object + if (result.billedCredits !== undefined && obj.usage) { + const usage = obj.usage as Record + usage.cost = creditsToFakeCost(result.billedCredits) + usage.cost_details = { upstream_inference_cost: 0 } + } + + const patchedLine = `data: ${JSON.stringify(obj)}\n` + return { state: result.state, billedCredits: result.billedCredits, patchedLine } +} + +async function handleResponse({ + userId, + stripeCustomerId, + agentId, + clientId, + clientRequestId, + costMode, + startTime, + request, + originalModel, + data, + state, + logger, + insertMessage, +}: { + userId: string + stripeCustomerId?: string | null + agentId: string + clientId: string | null + clientRequestId: string | null + costMode: string | undefined + startTime: Date + request: unknown + originalModel: string + data: Record + state: StreamState + logger: Logger + insertMessage: InsertMessageBigqueryFn +}): Promise<{ state: StreamState; billedCredits?: number }> { + state = handleStreamChunk({ data, state, logger, userId, agentId, model: originalModel }) + + if ('error' in data || !data.usage) { + return { state } + } + + const usageData = extractUsageAndCost(data.usage as Record) + const messageId = typeof data.id === 'string' ? data.id : 'unknown' + + insertMessageToBigQuery({ + messageId, + userId, + startTime, + request, + reasoningText: state.reasoningText, + responseText: state.responseText, + usageData, + logger, + insertMessageBigquery: insertMessage, + }).catch((error) => { + logger.error({ error }, 'Failed to insert message into BigQuery') + }) + + const billedCredits = await consumeCreditsForMessage({ + messageId, + userId, + stripeCustomerId, + agentId, + clientId, + clientRequestId, + startTime, + model: originalModel, + reasoningText: state.reasoningText, + responseText: state.responseText, + usageData, + byok: false, + logger, + costMode, + }) + + return { state, billedCredits } +} + +function handleStreamChunk({ + data, + state, + logger, + userId, + agentId, + model, +}: { + data: Record + state: StreamState + logger: Logger + userId: string + agentId: string + model: string +}): StreamState { + const MAX_BUFFER_SIZE = 1 * 1024 * 1024 + + if ('error' in data) { + const errorData = data.error as Record + logger.error( + { + userId, + agentId, + model, + errorCode: errorData?.code, + errorType: errorData?.type, + errorMessage: errorData?.message, + }, + 'Received error chunk in Baseten stream', + ) + return state + } + + const choices = data.choices as Array> | undefined + if (!choices?.length) { + return state + } + const choice = choices[0] + const delta = choice.delta as Record | undefined + + const contentDelta = typeof delta?.content === 'string' ? delta.content : '' + if (state.responseText.length < MAX_BUFFER_SIZE) { + state.responseText += contentDelta + if (state.responseText.length >= MAX_BUFFER_SIZE) { + state.responseText = + state.responseText.slice(0, MAX_BUFFER_SIZE) + '\n---[TRUNCATED]---' + logger.warn({ userId, agentId, model }, 'Response text buffer truncated at 1MB') + } + } + + const reasoningDelta = typeof delta?.reasoning_content === 'string' ? delta.reasoning_content + : typeof delta?.reasoning === 'string' ? delta.reasoning + : '' + if (state.reasoningText.length < MAX_BUFFER_SIZE) { + state.reasoningText += reasoningDelta + if (state.reasoningText.length >= MAX_BUFFER_SIZE) { + state.reasoningText = + state.reasoningText.slice(0, MAX_BUFFER_SIZE) + '\n---[TRUNCATED]---' + logger.warn({ userId, agentId, model }, 'Reasoning text buffer truncated at 1MB') + } + } + + return state +} + +export class BasetenError extends Error { + constructor( + public readonly statusCode: number, + public readonly statusText: string, + public readonly errorBody: { + error: { + message: string + code: string | number | null + type?: string | null + } + }, + ) { + super(errorBody.error.message) + this.name = 'BasetenError' + } + + toJSON() { + return { + error: { + message: this.errorBody.error.message, + code: this.errorBody.error.code, + type: this.errorBody.error.type, + }, + } + } +} + +async function parseBasetenError(response: Response): Promise { + const errorText = await response.text() + let errorBody: BasetenError['errorBody'] + try { + const parsed = JSON.parse(errorText) + if (parsed?.error?.message) { + errorBody = { + error: { + message: parsed.error.message, + code: parsed.error.code ?? null, + type: parsed.error.type ?? null, + }, + } + } else { + errorBody = { + error: { + message: errorText || response.statusText, + code: response.status, + }, + } + } + } catch { + errorBody = { + error: { + message: errorText || response.statusText, + code: response.status, + }, + } + } + return new BasetenError(response.status, response.statusText, errorBody) +} + +function creditsToFakeCost(credits: number): number { + return credits / ((1 + PROFIT_MARGIN) * 100) +} diff --git a/web/src/llm-api/fireworks.ts b/web/src/llm-api/fireworks.ts index 4df557af08..42217cb525 100644 --- a/web/src/llm-api/fireworks.ts +++ b/web/src/llm-api/fireworks.ts @@ -28,7 +28,7 @@ const fireworksAgent = new Agent({ /** Map from OpenRouter model IDs to Fireworks model IDs */ const FIREWORKS_MODEL_MAP: Record = { - 'minimax/minimax-m2.5': 'accounts/fireworks/models/minimax-m2p5', + // 'minimax/minimax-m2.5': 'accounts/fireworks/models/minimax-m2p5', } export function isFireworksModel(model: string): boolean { From 6990d6777ebbcbe99bed0add6bd89d296d4c6554 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Tue, 10 Mar 2026 14:06:32 -0700 Subject: [PATCH 02/16] Simplify tmux cli agent slightly --- agents/base2/base2.ts | 2 +- agents/tmux-cli.ts | 152 ++++++++++++++---------------------------- 2 files changed, 52 insertions(+), 102 deletions(-) diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts index 52ca7ef4ba..4a3c40064f 100644 --- a/agents/base2/base2.ts +++ b/agents/base2/base2.ts @@ -85,7 +85,7 @@ export function createBase2( isFree && 'code-reviewer-lite', isDefault && 'code-reviewer', isMax && 'code-reviewer-multi-prompt', - isDefault && 'tmux-cli', + 'tmux-cli', 'context-pruner', ), diff --git a/agents/tmux-cli.ts b/agents/tmux-cli.ts index e959bf64c3..be07859283 100644 --- a/agents/tmux-cli.ts +++ b/agents/tmux-cli.ts @@ -450,137 +450,84 @@ esac const sessionName = 'tui-test-' + Date.now() + '-' + Math.random().toString(36).slice(2, 6) const helperPath = '/tmp/tmux-helper-' + sessionName + '.sh' - logger.info('Writing helper script to ' + helperPath) + logger.info('Setting up tmux session: ' + sessionName) - // Write the self-contained helper script to /tmp - const { toolResult: writeResult } = yield { - toolName: 'run_terminal_command', - input: { - command: 'cat > ' + helperPath + " << 'TMUX_HELPER_EOF'\n" + helperScript + "TMUX_HELPER_EOF\nchmod +x " + helperPath, - timeout_seconds: 10, - }, - } - - const writeOutput = writeResult?.[0] - if (writeOutput && writeOutput.type === 'json') { - const value = writeOutput.value as Record - const exitCode = typeof value?.exitCode === 'number' ? value.exitCode : undefined - if (exitCode !== 0) { - const stderr = typeof value?.stderr === 'string' ? value.stderr.trim() : 'unknown error' - logger.error('Failed to write helper script: ' + stderr) - yield { - toolName: 'set_output', - input: { - overallStatus: 'failure', - summary: 'Failed to write helper script to /tmp. ' + stderr, - sessionName: '', - scriptIssues: [{ script: helperPath, issue: stderr, suggestedFix: 'Check /tmp is writable' }], - captures: [], - }, - } - return - } - } - - logger.info('Starting tmux session (bash)') - - // Start the tmux session with bash (not the user's command directly) - const { toolResult } = yield { + // Combined setup: write helper script, start session, send command (single yield to reduce round-trips) + const escapedCommand = startCommand.replace(/'/g, "'\\''") + const setupScript = + 'set -e\n' + + 'cat > ' + helperPath + " << 'TMUX_HELPER_EOF'\n" + helperScript + 'TMUX_HELPER_EOF\n' + + 'chmod +x ' + helperPath + '\n' + + 'OUTPUT=$(' + helperPath + " start '" + sessionName + "') || { echo \"FAIL_START\" >&2; exit 1; }\n" + + helperPath + " send '" + sessionName + "' '" + escapedCommand + "' || { " + helperPath + " stop '" + sessionName + "' 2>/dev/null; echo \"FAIL_SEND\" >&2; exit 1; }\n" + + 'echo "$OUTPUT"' + + const { toolResult: setupResult } = yield { toolName: 'run_terminal_command', input: { - command: helperPath + " start '" + sessionName + "'", + command: setupScript, timeout_seconds: 30, }, + includeToolCall: false, } - let started = false - let parseError = '' + let setupSuccess = false + let setupError = '' - const result = toolResult?.[0] - if (result && result.type === 'json') { - const value = result.value as Record + const setupOutput = setupResult?.[0] + if (setupOutput && setupOutput.type === 'json') { + const value = setupOutput.value as Record const stdout = typeof value?.stdout === 'string' ? value.stdout.trim() : '' const stderr = typeof value?.stderr === 'string' ? value.stderr.trim() : '' const exitCode = typeof value?.exitCode === 'number' ? value.exitCode : undefined - if (exitCode !== 0) { - parseError = stderr || 'Helper script failed with no error message' - } else if (stdout === sessionName) { - started = true + if (exitCode === 0 && stdout === sessionName) { + setupSuccess = true } else { - parseError = 'Unexpected output: ' + stdout + setupError = stderr || stdout || 'Setup failed with no error message' } } else { - parseError = 'Unexpected result type from run_terminal_command' + setupError = 'Unexpected result type from run_terminal_command' } - if (!started) { - const errorMsg = parseError || 'Failed to start session' - logger.error({ parseError: errorMsg }, 'Failed to start tmux session') + if (!setupSuccess) { + const isSendFailure = setupError.includes('FAIL_SEND') + const isStartFailure = setupError.includes('FAIL_START') + + let summary: string + let suggestedFix: string + if (isSendFailure) { + summary = 'Started session but failed to send command. ' + setupError + suggestedFix = 'Check that the command is valid.' + } else if (isStartFailure) { + summary = 'Failed to start tmux session. ' + setupError + suggestedFix = 'Ensure tmux is installed and the command is valid.' + } else { + summary = 'Failed to write helper script to /tmp. ' + setupError + suggestedFix = 'Check /tmp is writable' + } + + logger.error(setupError, 'Setup failed') yield { toolName: 'set_output', input: { overallStatus: 'failure', - summary: 'Failed to start tmux session. ' + errorMsg, - sessionName: '', - scriptIssues: [ - { - script: helperPath, - issue: errorMsg, - errorOutput: JSON.stringify(toolResult), - suggestedFix: 'Ensure tmux is installed and the command is valid.', - }, - ], + summary, + sessionName: isSendFailure ? sessionName : '', + scriptIssues: [{ script: helperPath, issue: setupError, suggestedFix }], captures: [], }, } return } - logger.info('Successfully started tmux session: ' + sessionName) - - // Send the user's command to the bash session - const escapedCommand = startCommand.replace(/'/g, "'\\''") - const { toolResult: sendResult } = yield { - toolName: 'run_terminal_command', - input: { - command: helperPath + " send '" + sessionName + "' '" + escapedCommand + "'", - timeout_seconds: 15, - }, - } - - const sendOutput = sendResult?.[0] - if (sendOutput && sendOutput.type === 'json') { - const value = sendOutput.value as Record - const exitCode = typeof value?.exitCode === 'number' ? value.exitCode : undefined - if (exitCode !== 0) { - const stderr = typeof value?.stderr === 'string' ? value.stderr.trim() : 'send failed' - logger.error('Failed to send command: ' + stderr) - yield { - toolName: 'run_terminal_command', - input: { command: helperPath + " stop '" + sessionName + "'", timeout_seconds: 5 }, - } - yield { - toolName: 'set_output', - input: { - overallStatus: 'failure', - summary: 'Started session but failed to send command. ' + stderr, - sessionName, - scriptIssues: [{ script: helperPath, issue: stderr, suggestedFix: 'Check that the command is valid.' }], - captures: [], - }, - } - return - } - } - - logger.info('Sent command to session: ' + startCommand) + logger.info('Session ready: ' + sessionName) - // Wait briefly then capture initial state so the agent starts with context + // Capture initial state so the agent starts with context (0.5s is enough since send already waits ~0.6s) const { toolResult: initCapture } = yield { toolName: 'run_terminal_command', input: { - command: 'sleep 1.5 && ' + helperPath + " capture '" + sessionName + "' --wait 0 --label startup-check", + command: 'sleep 0.5 && ' + helperPath + " capture '" + sessionName + "' --wait 0 --label startup-check", timeout_seconds: 10, }, } @@ -606,7 +553,10 @@ esac '**Captures dir:** `' + captureDir + '/`\n\n' + '**Initial terminal output:**\n```\n' + initialOutput + '\n```\n\n' + 'Check the initial output above — if you see errors like "command not found" or "No such file", report failure immediately.\n\n' + - 'Commands:\n' + + '## Helper Script Implementation\n\n' + + 'The helper script at `' + helperPath + '` is a Bash script that wraps tmux commands to interact with the CLI. Here is its full implementation:\n\n' + + '```bash\n' + helperScript.replace(/```/g, '\\`\\`\\`') + '\n```\n\n' + + '## Quick Reference\n\n' + '- Send input: `' + helperPath + ' send "' + sessionName + '" "..."`\n' + '- Send with paste mode: `' + helperPath + ' send "' + sessionName + '" "..." --paste`\n' + '- Send + wait for output: `' + helperPath + ' send "' + sessionName + '" "..." --wait-idle 3`\n' + From 567cdbbb05017c25bd40418780b34fcfd3cc7565 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Tue, 10 Mar 2026 14:15:36 -0700 Subject: [PATCH 03/16] Upgrade to react 19 --- bun.lock | 42 ++++-------- cli/package.json | 2 +- .../components/blocks/agent-branch-item.tsx | 5 +- cli/src/components/clickable.tsx | 6 +- cli/src/components/tools/tool-call-item.tsx | 5 +- cli/src/types/react19-compat.d.ts | 19 ++++++ .../__tests__/markdown-renderer.test.tsx | 64 ++++++++++--------- freebuff/web/package.json | 8 +-- package.json | 4 +- web/package.json | 8 +-- .../admin/traces/components/chat-message.tsx | 1 + web/src/components/card-with-beams.tsx | 1 + web/src/components/docs/mdx/code-demo.tsx | 1 + .../components/docs/mdx/markdown-table.tsx | 2 +- .../ui/landing/competition/github-copilot.tsx | 2 +- 15 files changed, 91 insertions(+), 79 deletions(-) create mode 100644 cli/src/types/react19-compat.d.ts diff --git a/bun.lock b/bun.lock index e53d3ca9a2..964cd43180 100644 --- a/bun.lock +++ b/bun.lock @@ -75,7 +75,7 @@ "zustand": "^5.0.8", }, "devDependencies": { - "@types/react": "^18.3.12", + "@types/react": "19.2.14", "@types/react-reconciler": "^0.32.0", "react-dom": "^19.0.0", "strip-ansi": "^7.1.2", @@ -149,16 +149,16 @@ "next-auth": "^4.24.11", "next-themes": "^0.3.0", "pino": "^9.6.0", - "react": "18.3.1", - "react-dom": "18.3.1", + "react": "^19.0.0", + "react-dom": "^19.0.0", "tailwind-merge": "^2.5.2", "zod": "^4.2.1", }, "devDependencies": { "@tailwindcss/typography": "^0.5.15", "@types/node": "^22.14.0", - "@types/react": "18.3.26", - "@types/react-dom": "18.3.7", + "@types/react": "19.2.14", + "@types/react-dom": "19.2.3", "autoprefixer": "^10.4.21", "postcss": "^8", "tailwindcss": "^3.4.11", @@ -299,8 +299,8 @@ "pino": "^9.6.0", "posthog-js": "^1.234.10", "prism-react-renderer": "^2.4.1", - "react": "18.3.1", - "react-dom": "18.3.1", + "react": "^19.0.0", + "react-dom": "^19.0.0", "react-hook-form": "^7.55.0", "server-only": "^0.0.1", "tailwind-merge": "^2.5.2", @@ -319,8 +319,8 @@ "@types/jest": "^29.5.14", "@types/node": "^22.14.0", "@types/pg": "^8.11.11", - "@types/react": "18.3.26", - "@types/react-dom": "18.3.7", + "@types/react": "19.2.14", + "@types/react-dom": "19.2.3", "@typescript-eslint/eslint-plugin": "^8.29.1", "@typescript-eslint/parser": "^8.29.1", "autoprefixer": "^10.4.21", @@ -351,8 +351,8 @@ }, }, "overrides": { - "@types/react": "18.3.26", - "@types/react-dom": "18.3.7", + "@types/react": "19.2.14", + "@types/react-dom": "19.2.3", "baseline-browser-mapping": "^2.9.14", "signal-exit": "3.0.7", "zod": "^4.2.1", @@ -1330,11 +1330,9 @@ "@types/prismjs": ["@types/prismjs@1.26.5", "", {}, "sha512-AUZTa7hQ2KY5L7AmtSiqxlhWxb4ina0yd8hNbl4TWuqnv/pFP0nDMb3YrfSBf4hJVGLh2YEIBfKaBW/9UEl6IQ=="], - "@types/prop-types": ["@types/prop-types@15.7.15", "", {}, "sha512-F6bEyamV9jKGAFBEmlQnesRPGOQqS2+Uwi0Em15xenOxHaf2hv6L8YCVn3rPdPJOiJfPiCnLIRyvwVaqMY3MIw=="], - - "@types/react": ["@types/react@18.3.26", "", { "dependencies": { "@types/prop-types": "*", "csstype": "^3.0.2" } }, "sha512-RFA/bURkcKzx/X9oumPG9Vp3D3JUgus/d0b67KB0t5S/raciymilkOa66olh78MUI92QLbEJevO7rvqU/kjwKA=="], + "@types/react": ["@types/react@19.2.14", "", { "dependencies": { "csstype": "^3.2.2" } }, "sha512-ilcTH/UniCkMdtexkoCN0bI7pMcJDvmQFPvuPvmEaYA/NSfFTAgdUSLAoVjaRJm7+6PvcM+q1zYOwS4wTYMF9w=="], - "@types/react-dom": ["@types/react-dom@18.3.7", "", { "peerDependencies": { "@types/react": "^18.0.0" } }, "sha512-MEe3UeoENYVFXzoXEWsvcpg6ZvlrFNlOQ7EOsvhI3CfAXwzPfO8Qwuxd40nepsYKqyyVQnTdEfv68q91yLcKrQ=="], + "@types/react-dom": ["@types/react-dom@19.2.3", "", { "peerDependencies": { "@types/react": "^19.2.0" } }, "sha512-jp2L/eY6fn+KgVVQAOqYItbF0VY/YApe5Mz2F0aykSO8gx31bYCZyvSeYxCHKvzHG5eZjc+zyaS5BrBWya2+kQ=="], "@types/react-reconciler": ["@types/react-reconciler@0.32.2", "", { "peerDependencies": { "@types/react": "*" } }, "sha512-gjcm6O0aUknhYaogEl8t5pecPfiOTD8VQkbjOhgbZas/E6qGY+veW9iuJU/7p4Y1E0EuQ0mArga7VEOUWSlVRA=="], @@ -1744,7 +1742,7 @@ "cssstyle": ["cssstyle@2.3.0", "", { "dependencies": { "cssom": "~0.3.6" } }, "sha512-AZL67abkUzIuvcHqk7c09cezpGNcxUxU4Ioi/05xHk4DQeTkWmGYftIE6ctU6AEt+Gn4n1lDStOtj7FKycP71A=="], - "csstype": ["csstype@3.1.3", "", {}, "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw=="], + "csstype": ["csstype@3.2.3", "", {}, "sha512-z1HGKcYy2xA8AGQfwrn0PAy+PB7X/GSj3UVJW9qKyn43xWa+gl5nXmU4qqLMRzWVLFC8KusUX8T/0kCiOYpAIQ=="], "cycled": ["cycled@1.2.0", "", {}, "sha512-/BOOCEohSBflVHHtY/wUc1F6YDYPqyVs/A837gDoq4H1pm72nU/yChyGt91V4ML+MbbAmHs8uo2l1yJkkTIUdg=="], @@ -3668,20 +3666,12 @@ "@codebuff/freebuff-web/pino": ["pino@9.14.0", "", { "dependencies": { "@pinojs/redact": "^0.4.0", "atomic-sleep": "^1.0.0", "on-exit-leak-free": "^2.1.0", "pino-abstract-transport": "^2.0.0", "pino-std-serializers": "^7.0.0", "process-warning": "^5.0.0", "quick-format-unescaped": "^4.0.3", "real-require": "^0.2.0", "safe-stable-stringify": "^2.3.1", "sonic-boom": "^4.0.1", "thread-stream": "^3.0.0" }, "bin": { "pino": "bin.js" } }, "sha512-8OEwKp5juEvb/MjpIc4hjqfgCNysrS94RIOMXYvpYCdm/jglrKEiAYmiumbmGhCvs+IcInsphYDFwqrjr7398w=="], - "@codebuff/freebuff-web/react": ["react@18.3.1", "", { "dependencies": { "loose-envify": "^1.1.0" } }, "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ=="], - - "@codebuff/freebuff-web/react-dom": ["react-dom@18.3.1", "", { "dependencies": { "loose-envify": "^1.1.0", "scheduler": "^0.23.2" }, "peerDependencies": { "react": "^18.3.1" } }, "sha512-5m4nQKp+rZRb09LNH59GM4BxTh9251/ylbKIbpe7TpGxfJ+9kv6BLkLBXIjjspbgbnIBNqlI23tRnTWT0snUIw=="], - "@codebuff/sdk/ignore": ["ignore@7.0.5", "", {}, "sha512-Hs59xBNfUIunMFgWAbGX5cq6893IbWg4KnrjbYwX3tx0ztorVgTDA6B2sxf8ejHJ4wz8BqGUMYlnzNBer5NvGg=="], "@codebuff/web/@typescript-eslint/eslint-plugin": ["@typescript-eslint/eslint-plugin@8.46.2", "", { "dependencies": { "@eslint-community/regexpp": "^4.10.0", "@typescript-eslint/scope-manager": "8.46.2", "@typescript-eslint/type-utils": "8.46.2", "@typescript-eslint/utils": "8.46.2", "@typescript-eslint/visitor-keys": "8.46.2", "graphemer": "^1.4.0", "ignore": "^7.0.0", "natural-compare": "^1.4.0", "ts-api-utils": "^2.1.0" }, "peerDependencies": { "@typescript-eslint/parser": "^8.46.2", "eslint": "^8.57.0 || ^9.0.0", "typescript": ">=4.8.4 <6.0.0" } }, "sha512-ZGBMToy857/NIPaaCucIUQgqueOiq7HeAKkhlvqVV4lm089zUFW6ikRySx2v+cAhKeUCPuWVHeimyk6Dw1iY3w=="], "@codebuff/web/pino": ["pino@9.14.0", "", { "dependencies": { "@pinojs/redact": "^0.4.0", "atomic-sleep": "^1.0.0", "on-exit-leak-free": "^2.1.0", "pino-abstract-transport": "^2.0.0", "pino-std-serializers": "^7.0.0", "process-warning": "^5.0.0", "quick-format-unescaped": "^4.0.3", "real-require": "^0.2.0", "safe-stable-stringify": "^2.3.1", "sonic-boom": "^4.0.1", "thread-stream": "^3.0.0" }, "bin": { "pino": "bin.js" } }, "sha512-8OEwKp5juEvb/MjpIc4hjqfgCNysrS94RIOMXYvpYCdm/jglrKEiAYmiumbmGhCvs+IcInsphYDFwqrjr7398w=="], - "@codebuff/web/react": ["react@18.3.1", "", { "dependencies": { "loose-envify": "^1.1.0" } }, "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ=="], - - "@codebuff/web/react-dom": ["react-dom@18.3.1", "", { "dependencies": { "loose-envify": "^1.1.0", "scheduler": "^0.23.2" }, "peerDependencies": { "react": "^18.3.1" } }, "sha512-5m4nQKp+rZRb09LNH59GM4BxTh9251/ylbKIbpe7TpGxfJ+9kv6BLkLBXIjjspbgbnIBNqlI23tRnTWT0snUIw=="], - "@commitlint/config-validator/ajv": ["ajv@8.17.1", "", { "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", "json-schema-traverse": "^1.0.0", "require-from-string": "^2.0.2" } }, "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g=="], "@commitlint/top-level/find-up": ["find-up@7.0.0", "", { "dependencies": { "locate-path": "^7.2.0", "path-exists": "^5.0.0", "unicorn-magic": "^0.1.0" } }, "sha512-YyZM99iHrqLKjmt4LJDj58KI+fYyufRLBSYcqycxf//KpBk9FoewoGX0450m9nB44qrZnovzC2oeP5hUibxc/g=="], @@ -4234,8 +4224,6 @@ "@codebuff/freebuff-web/pino/process-warning": ["process-warning@5.0.0", "", {}, "sha512-a39t9ApHNx2L4+HBnQKqxxHNs1r7KF+Intd8Q/g1bUh6q0WIp9voPXJ/x0j+ZL45KF1pJd9+q2jLIRMfvEshkA=="], - "@codebuff/freebuff-web/react-dom/scheduler": ["scheduler@0.23.2", "", { "dependencies": { "loose-envify": "^1.1.0" } }, "sha512-UOShsPwz7NrMUqhR6t0hWjFduvOzbtv7toDH1/hIrfRNIDBnnBWd0CwJTGvTpngVlmwGCdP9/Zl/tVrDqcuYzQ=="], - "@codebuff/web/@typescript-eslint/eslint-plugin/@typescript-eslint/scope-manager": ["@typescript-eslint/scope-manager@8.46.2", "", { "dependencies": { "@typescript-eslint/types": "8.46.2", "@typescript-eslint/visitor-keys": "8.46.2" } }, "sha512-LF4b/NmGvdWEHD2H4MsHD8ny6JpiVNDzrSZr3CsckEgCbAGZbYM4Cqxvi9L+WqDMT+51Ozy7lt2M+d0JLEuBqA=="], "@codebuff/web/@typescript-eslint/eslint-plugin/@typescript-eslint/type-utils": ["@typescript-eslint/type-utils@8.46.2", "", { "dependencies": { "@typescript-eslint/types": "8.46.2", "@typescript-eslint/typescript-estree": "8.46.2", "@typescript-eslint/utils": "8.46.2", "debug": "^4.3.4", "ts-api-utils": "^2.1.0" }, "peerDependencies": { "eslint": "^8.57.0 || ^9.0.0", "typescript": ">=4.8.4 <6.0.0" } }, "sha512-HbPM4LbaAAt/DjxXaG9yiS9brOOz6fabal4uvUmaUYe6l3K1phQDMQKBRUrr06BQkxkvIZVVHttqiybM9nJsLA=="], @@ -4252,8 +4240,6 @@ "@codebuff/web/pino/process-warning": ["process-warning@5.0.0", "", {}, "sha512-a39t9ApHNx2L4+HBnQKqxxHNs1r7KF+Intd8Q/g1bUh6q0WIp9voPXJ/x0j+ZL45KF1pJd9+q2jLIRMfvEshkA=="], - "@codebuff/web/react-dom/scheduler": ["scheduler@0.23.2", "", { "dependencies": { "loose-envify": "^1.1.0" } }, "sha512-UOShsPwz7NrMUqhR6t0hWjFduvOzbtv7toDH1/hIrfRNIDBnnBWd0CwJTGvTpngVlmwGCdP9/Zl/tVrDqcuYzQ=="], - "@commitlint/config-validator/ajv/json-schema-traverse": ["json-schema-traverse@1.0.0", "", {}, "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug=="], "@commitlint/top-level/find-up/locate-path": ["locate-path@7.2.0", "", { "dependencies": { "p-locate": "^6.0.0" } }, "sha512-gvVijfZvn7R+2qyPX8mAuKcFGDf6Nc61GdvGafQsHL0sBIxfKzA+usWn4GFC/bk+QdwPUD4kWFJLhElipq+0VA=="], diff --git a/cli/package.json b/cli/package.json index 9b67437fca..135823c3ef 100644 --- a/cli/package.json +++ b/cli/package.json @@ -54,7 +54,7 @@ "zustand": "^5.0.8" }, "devDependencies": { - "@types/react": "^18.3.12", + "@types/react": "19.2.14", "@types/react-reconciler": "^0.32.0", "react-dom": "^19.0.0", "strip-ansi": "^7.1.2" diff --git a/cli/src/components/blocks/agent-branch-item.tsx b/cli/src/components/blocks/agent-branch-item.tsx index 7661bd1be9..67f6b6d6b5 100644 --- a/cli/src/components/blocks/agent-branch-item.tsx +++ b/cli/src/components/blocks/agent-branch-item.tsx @@ -80,8 +80,9 @@ export const AgentBranchItem = memo((props: AgentBranchItemProps) => { } if (React.isValidElement(value)) { + const elProps = value.props as Record if (value.type === React.Fragment) { - return isTextRenderable(value.props.children) + return isTextRenderable(elProps.children as ReactNode) } if (typeof value.type === 'string') { @@ -90,7 +91,7 @@ export const AgentBranchItem = memo((props: AgentBranchItemProps) => { value.type === 'strong' || value.type === 'em' ) { - return isTextRenderable(value.props.children) + return isTextRenderable(elProps.children as ReactNode) } return false diff --git a/cli/src/components/clickable.tsx b/cli/src/components/clickable.tsx index caf56356c1..b9f4bbb516 100644 --- a/cli/src/components/clickable.tsx +++ b/cli/src/components/clickable.tsx @@ -28,18 +28,18 @@ export function makeTextUnselectable(node: ReactNode): ReactNode { if (!isValidElement(node)) return node - const el = node as ReactElement + const el = node as ReactElement<{ children?: ReactNode; [key: string]: unknown }> const type = el.type // Ensure text and span nodes are not selectable if (typeof type === 'string' && (type === 'text' || type === 'span')) { const nextProps = { ...el.props, selectable: false } - const nextChildren = el.props?.children ? makeTextUnselectable(el.props.children) : el.props?.children + const nextChildren = el.props.children ? makeTextUnselectable(el.props.children) : el.props.children return cloneElement(el, nextProps, nextChildren) } // Recurse into other host elements and components' children - const nextChildren = el.props?.children ? makeTextUnselectable(el.props.children) : el.props?.children + const nextChildren = el.props.children ? makeTextUnselectable(el.props.children) : el.props.children return cloneElement(el, el.props, nextChildren) } diff --git a/cli/src/components/tools/tool-call-item.tsx b/cli/src/components/tools/tool-call-item.tsx index 72cdef7182..c207bcb35e 100644 --- a/cli/src/components/tools/tool-call-item.tsx +++ b/cli/src/components/tools/tool-call-item.tsx @@ -33,8 +33,9 @@ const isTextRenderable = (value: ReactNode): boolean => { } if (React.isValidElement(value)) { + const elProps = value.props as Record if (value.type === React.Fragment) { - return isTextRenderable(value.props.children) + return isTextRenderable(elProps.children as ReactNode) } if (typeof value.type === 'string') { @@ -43,7 +44,7 @@ const isTextRenderable = (value: ReactNode): boolean => { value.type === 'strong' || value.type === 'em' ) { - return isTextRenderable(value.props.children) + return isTextRenderable(elProps.children as ReactNode) } return false diff --git a/cli/src/types/react19-compat.d.ts b/cli/src/types/react19-compat.d.ts new file mode 100644 index 0000000000..11ca1af2a0 --- /dev/null +++ b/cli/src/types/react19-compat.d.ts @@ -0,0 +1,19 @@ +/** + * React 19 compatibility shim for OpenTUI JSX types. + * + * OpenTUI's JSX namespace defines `type Element = React.ReactNode`. + * In React 19, `FunctionComponent` returns `ReactNode | Promise`, + * but `Promise` is not assignable to `ReactNode`. + * + * This augmentation adds a narrower call signature to `FunctionComponent` + * that returns just `ReactNode`. Due to TypeScript's interface merging rules, + * the later declaration's overloads have higher precedence, so the narrower + * signature is resolved first — fixing all `React.FC` JSX compatibility errors. + */ +import 'react' + +declare module 'react' { + interface FunctionComponent

{ + (props: P): ReactNode + } +} diff --git a/cli/src/utils/__tests__/markdown-renderer.test.tsx b/cli/src/utils/__tests__/markdown-renderer.test.tsx index 9cc2d35ffb..36ea688fe6 100644 --- a/cli/src/utils/__tests__/markdown-renderer.test.tsx +++ b/cli/src/utils/__tests__/markdown-renderer.test.tsx @@ -4,10 +4,12 @@ import React from 'react' import { renderMarkdown, renderStreamingMarkdown } from '../markdown-renderer' -const flattenNodes = (input: React.ReactNode): React.ReactNode[] => { +type El = React.ReactElement> + +const flattenNodes = (input: unknown): React.ReactNode[] => { const result: React.ReactNode[] = [] - const visit = (value: React.ReactNode): void => { + const visit = (value: unknown): void => { if (value === null || value === undefined || typeof value === 'boolean') { return } @@ -18,18 +20,18 @@ const flattenNodes = (input: React.ReactNode): React.ReactNode[] => { } if (React.isValidElement(value) && value.type === React.Fragment) { - visit(value.props.children) + visit((value as El).props.children) return } - result.push(value) + result.push(value as React.ReactNode) } visit(input) return result } -const flattenChildren = (value: React.ReactNode): React.ReactNode[] => +const flattenChildren = (value: unknown): React.ReactNode[] => flattenNodes(value) describe('markdown renderer', () => { @@ -39,13 +41,13 @@ describe('markdown renderer', () => { expect(nodes[0]).toBe('Hello ') - const bold = nodes[1] as React.ReactElement + const bold = nodes[1] as El expect(bold.props.attributes).toBe(TextAttributes.BOLD) expect(flattenChildren(bold.props.children)).toEqual(['bold']) expect(nodes[2]).toBe(' and ') - const italic = nodes[3] as React.ReactElement + const italic = nodes[3] as El expect(italic.props.attributes).toBe(TextAttributes.ITALIC) expect(flattenChildren(italic.props.children)).toEqual(['italic']) @@ -58,7 +60,7 @@ describe('markdown renderer', () => { expect(nodes[0]).toBe('Use ') - const inlineCode = nodes[1] as React.ReactElement + const inlineCode = nodes[1] as El expect(inlineCode.props.fg).toBe('#86efac') expect(inlineCode.props.bg).toBe('#0d1117') expect(flattenChildren(inlineCode.props.children)).toEqual([' ls ']) @@ -70,7 +72,7 @@ describe('markdown renderer', () => { const output = renderMarkdown('# Heading One') const nodes = flattenNodes(output) - const heading = nodes[0] as React.ReactElement + const heading = nodes[0] as El expect(heading.props.attributes).toBe(TextAttributes.BOLD) expect(heading.props.fg).toBe('magenta') expect(flattenChildren(heading.props.children)).toEqual(['Heading One']) @@ -82,12 +84,12 @@ describe('markdown renderer', () => { ) const nodes = flattenNodes(output) - const heading = nodes[0] as React.ReactElement + const heading = nodes[0] as El const contents = flattenChildren(heading.props.children) expect(contents[0]).toBe('Other') - const strong = contents[1] as React.ReactElement + const strong = contents[1] as El expect(strong.props.attributes).toBe(TextAttributes.BOLD) expect(flattenChildren(strong.props.children)).toEqual(['.github/']) @@ -98,11 +100,11 @@ describe('markdown renderer', () => { const output = renderMarkdown('> note') const nodes = flattenNodes(output) - const prefixSpan = nodes[0] as React.ReactElement + const prefixSpan = nodes[0] as El expect(prefixSpan.props.fg).toBe('gray') expect(flattenChildren(prefixSpan.props.children)).toEqual(['> ']) - const textSpan = nodes[1] as React.ReactElement + const textSpan = nodes[1] as El expect(textSpan.props.fg).toBe('gray') expect(flattenChildren(textSpan.props.children)).toEqual(['note']) }) @@ -112,10 +114,10 @@ describe('markdown renderer', () => { const nodes = flattenNodes(output) const bulletSpans = nodes.filter( - (node): node is React.ReactElement => + (node): node is El => React.isValidElement(node) && node.type === 'span' && - flattenChildren(node.props.children).join('') === '- ', + flattenChildren((node as El).props.children).join('') === '- ', ) expect(bulletSpans).toHaveLength(2) @@ -135,10 +137,10 @@ describe('markdown renderer', () => { const nodes = flattenNodes(output) const boldNode = nodes.find( - (node): node is React.ReactElement => + (node): node is El => React.isValidElement(node) && - node.props !== undefined && - node.props.attributes === TextAttributes.BOLD, + (node as El).props !== undefined && + (node as El).props.attributes === TextAttributes.BOLD, ) expect(boldNode).toBeDefined() @@ -152,7 +154,7 @@ describe('markdown renderer', () => { expect(nodes[0]).toBe('This is ') - const strikethrough = nodes[1] as React.ReactElement + const strikethrough = nodes[1] as El expect(strikethrough.props.attributes).toBe(TextAttributes.DIM) expect(flattenChildren(strikethrough.props.children)).toEqual(['deleted']) @@ -164,11 +166,11 @@ describe('markdown renderer', () => { const nodes = flattenNodes(output) const checkboxSpans = nodes.filter( - (node): node is React.ReactElement => + (node): node is El => React.isValidElement(node) && node.type === 'span' && - (flattenChildren(node.props.children).join('') === '[ ] ' || - flattenChildren(node.props.children).join('') === '[x] '), + (flattenChildren((node as El).props.children).join('') === '[ ] ' || + flattenChildren((node as El).props.children).join('') === '[x] '), ) expect(checkboxSpans).toHaveLength(2) @@ -187,7 +189,7 @@ describe('markdown renderer', () => { .map((node) => { if (typeof node === 'string') return node if (React.isValidElement(node)) { - return flattenChildren(node.props.children).join('') + return flattenChildren((node as El).props.children).join('') } return '' }) @@ -217,7 +219,7 @@ codebuff "add a new feature to handle user authentication" .map((node) => { if (typeof node === 'string') return node if (React.isValidElement(node)) { - return flattenChildren(node.props.children).join('') + return flattenChildren((node as El).props.children).join('') } return '' }) @@ -241,7 +243,7 @@ codebuff "add a new feature to handle user authentication" expect(nodes[0]).toBe('Use ') - const inlineCode = nodes[1] as React.ReactElement + const inlineCode = nodes[1] as El expect(inlineCode.props.fg).toBe('#86efac') const inlineContent = flattenChildren(inlineCode.props.children).join('') expect(inlineContent).toContain('codebuff "fix bug"') @@ -271,7 +273,7 @@ console.log("world") .map((node) => { if (typeof node === 'string') return node if (React.isValidElement(node)) { - return flattenChildren(node.props.children).join('') + return flattenChildren((node as El).props.children).join('') } return '' }) @@ -299,7 +301,7 @@ codebuff "implement feature" --verbose .map((node) => { if (typeof node === 'string') return node if (React.isValidElement(node)) { - return flattenChildren(node.props.children).join('') + return flattenChildren((node as El).props.children).join('') } return '' }) @@ -315,7 +317,7 @@ codebuff "implement feature" --verbose const output = renderMarkdown(markdown) const nodes = flattenNodes(output) - const inlineCode = nodes[1] as React.ReactElement + const inlineCode = nodes[1] as El const inlineContent = flattenChildren(inlineCode.props.children).join('') // Should preserve quotes and special characters within inline code @@ -337,7 +339,7 @@ codebuff "implement feature" --verbose .map((node) => { if (typeof node === 'string') return node if (React.isValidElement(node)) { - return flattenChildren(node.props.children).join('') + return flattenChildren((node as El).props.children).join('') } return '' }) @@ -372,7 +374,7 @@ codebuff "implement feature" --verbose .map((node) => { if (typeof node === 'string') return node if (React.isValidElement(node)) { - return flattenChildren(node.props.children).join('') + return flattenChildren((node as El).props.children).join('') } return '' }) @@ -399,7 +401,7 @@ codebuff "implement feature" --verbose .map((node) => { if (typeof node === 'string') return node if (React.isValidElement(node)) { - return flattenChildren(node.props.children).join('') + return flattenChildren((node as El).props.children).join('') } return '' }) diff --git a/freebuff/web/package.json b/freebuff/web/package.json index 55c492359b..53dc3c7a5e 100644 --- a/freebuff/web/package.json +++ b/freebuff/web/package.json @@ -25,16 +25,16 @@ "next-auth": "^4.24.11", "next-themes": "^0.3.0", "pino": "^9.6.0", - "react": "18.3.1", - "react-dom": "18.3.1", + "react": "^19.0.0", + "react-dom": "^19.0.0", "tailwind-merge": "^2.5.2", "zod": "^4.2.1" }, "devDependencies": { "@tailwindcss/typography": "^0.5.15", "@types/node": "^22.14.0", - "@types/react": "18.3.26", - "@types/react-dom": "18.3.7", + "@types/react": "19.2.14", + "@types/react-dom": "19.2.3", "autoprefixer": "^10.4.21", "postcss": "^8", "tailwindcss": "^3.4.11", diff --git a/package.json b/package.json index b5e971d6d2..628036fc1d 100644 --- a/package.json +++ b/package.json @@ -45,8 +45,8 @@ "zod": "^4.2.1" }, "overrides": { - "@types/react": "18.3.26", - "@types/react-dom": "18.3.7", + "@types/react": "19.2.14", + "@types/react-dom": "19.2.3", "baseline-browser-mapping": "^2.9.14", "zod": "^4.2.1", "signal-exit": "3.0.7" diff --git a/web/package.json b/web/package.json index 4307ba85f6..bf6ef79342 100644 --- a/web/package.json +++ b/web/package.json @@ -80,8 +80,8 @@ "pino": "^9.6.0", "posthog-js": "^1.234.10", "prism-react-renderer": "^2.4.1", - "react": "18.3.1", - "react-dom": "18.3.1", + "react": "^19.0.0", + "react-dom": "^19.0.0", "react-hook-form": "^7.55.0", "server-only": "^0.0.1", "tailwind-merge": "^2.5.2", @@ -100,8 +100,8 @@ "@types/jest": "^29.5.14", "@types/node": "^22.14.0", "@types/pg": "^8.11.11", - "@types/react": "18.3.26", - "@types/react-dom": "18.3.7", + "@types/react": "19.2.14", + "@types/react-dom": "19.2.3", "@typescript-eslint/eslint-plugin": "^8.29.1", "@typescript-eslint/parser": "^8.29.1", "autoprefixer": "^10.4.21", diff --git a/web/src/app/admin/traces/components/chat-message.tsx b/web/src/app/admin/traces/components/chat-message.tsx index c9166e2895..815579fb7e 100644 --- a/web/src/app/admin/traces/components/chat-message.tsx +++ b/web/src/app/admin/traces/components/chat-message.tsx @@ -1,5 +1,6 @@ 'use client' +import type { JSX } from 'react' import { User, Bot, Clock, Coins, Hash, Wrench } from 'lucide-react' import { diff --git a/web/src/components/card-with-beams.tsx b/web/src/components/card-with-beams.tsx index a004f5e16f..3fe48d71c5 100644 --- a/web/src/components/card-with-beams.tsx +++ b/web/src/components/card-with-beams.tsx @@ -1,3 +1,4 @@ +import type { JSX } from 'react' import { BackgroundBeams } from './ui/background-beams' import { Card, diff --git a/web/src/components/docs/mdx/code-demo.tsx b/web/src/components/docs/mdx/code-demo.tsx index b4ff6ec8ba..e02168f7ee 100644 --- a/web/src/components/docs/mdx/code-demo.tsx +++ b/web/src/components/docs/mdx/code-demo.tsx @@ -3,6 +3,7 @@ import { Check, Copy } from 'lucide-react' import { Highlight, themes } from 'prism-react-renderer' import { useMemo, useState } from 'react' +import type { JSX } from 'react' import { MermaidDiagram } from './mermaid-diagram' diff --git a/web/src/components/docs/mdx/markdown-table.tsx b/web/src/components/docs/mdx/markdown-table.tsx index 0d211d7a2a..c4758f7c3c 100644 --- a/web/src/components/docs/mdx/markdown-table.tsx +++ b/web/src/components/docs/mdx/markdown-table.tsx @@ -20,7 +20,7 @@ function extractTextContent(node: React.ReactNode): string { return node.map(extractTextContent).join('') } if (typeof node === 'object' && 'props' in node) { - const element = node as React.ReactElement + const element = node as React.ReactElement<{ children?: React.ReactNode }> return extractTextContent(element.props.children) } return '' diff --git a/web/src/components/ui/landing/competition/github-copilot.tsx b/web/src/components/ui/landing/competition/github-copilot.tsx index 25ca264d73..d192635249 100644 --- a/web/src/components/ui/landing/competition/github-copilot.tsx +++ b/web/src/components/ui/landing/competition/github-copilot.tsx @@ -225,7 +225,7 @@ function MatrixRainEffect({ isActive?: boolean }) { const canvasRef = useRef(null) - const requestRef = useRef() + const requestRef = useRef(undefined) // Only render if enabled and active const shouldRender = enabled && isActive From cff62fe5f16b5942d320f3d30b7ff7f3ddb14d60 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Tue, 10 Mar 2026 14:24:50 -0700 Subject: [PATCH 04/16] Integrate canopy wave instead of baseten for minimax provider --- packages/internal/src/env-schema.ts | 4 +- scripts/test-canopywave-e2e.ts | 135 +++++++ scripts/test-canopywave.ts | 375 ++++++++++++++++++ scripts/test-fireworks.ts | 2 + web/src/app/api/v1/chat/completions/_post.ts | 44 +- web/src/llm-api/{baseten.ts => canopywave.ts} | 123 +++--- 6 files changed, 604 insertions(+), 79 deletions(-) create mode 100644 scripts/test-canopywave-e2e.ts create mode 100644 scripts/test-canopywave.ts rename web/src/llm-api/{baseten.ts => canopywave.ts} (79%) diff --git a/packages/internal/src/env-schema.ts b/packages/internal/src/env-schema.ts index 21a0147bd8..93cfee7d4f 100644 --- a/packages/internal/src/env-schema.ts +++ b/packages/internal/src/env-schema.ts @@ -7,7 +7,7 @@ export const serverEnvSchema = clientEnvSchema.extend({ OPENAI_API_KEY: z.string().min(1), ANTHROPIC_API_KEY: z.string().min(1), FIREWORKS_API_KEY: z.string().min(1), - BASETEN_API_KEY: z.string().min(1).optional(), + CANOPYWAVE_API_KEY: z.string().min(1).optional(), LINKUP_API_KEY: z.string().min(1), CONTEXT7_API_KEY: z.string().optional(), GRAVITY_API_KEY: z.string().min(1), @@ -51,7 +51,7 @@ export const serverProcessEnv: ServerInput = { OPENAI_API_KEY: process.env.OPENAI_API_KEY, ANTHROPIC_API_KEY: process.env.ANTHROPIC_API_KEY, FIREWORKS_API_KEY: process.env.FIREWORKS_API_KEY, - BASETEN_API_KEY: process.env.BASETEN_API_KEY, + CANOPYWAVE_API_KEY: process.env.CANOPYWAVE_API_KEY, LINKUP_API_KEY: process.env.LINKUP_API_KEY, CONTEXT7_API_KEY: process.env.CONTEXT7_API_KEY, GRAVITY_API_KEY: process.env.GRAVITY_API_KEY, diff --git a/scripts/test-canopywave-e2e.ts b/scripts/test-canopywave-e2e.ts new file mode 100644 index 0000000000..e03d1778fe --- /dev/null +++ b/scripts/test-canopywave-e2e.ts @@ -0,0 +1,135 @@ +#!/usr/bin/env bun + +/** + * E2E test for CanopyWave integration via the Codebuff SDK. + * + * Creates a real agent run using the minimax model so the request + * flows through our chat completions endpoint → CanopyWave → back with usage data. + * + * Usage: + * bun scripts/test-canopywave-e2e.ts + */ + +import { CodebuffClient } from '@codebuff/sdk' + +import type { AgentDefinition } from '@codebuff/sdk' +import type { PrintModeEvent } from '@codebuff/common/types/print-mode' + +const minimaxAgent: AgentDefinition = { + id: 'canopywave-test-agent', + model: 'minimax/minimax-m2.5', + displayName: 'CanopyWave Test Agent', + toolNames: ['end_turn'], + instructionsPrompt: `You are a test agent. Respond with exactly "Hello from CanopyWave!" and nothing else. Then call the end_turn tool.`, +} + +async function main() { + const apiKey = process.env.CODEBUFF_API_KEY + if (!apiKey) { + console.error('❌ CODEBUFF_API_KEY is not set.') + console.error(' Example: CODEBUFF_API_KEY= bun scripts/test-canopywave-e2e.ts') + process.exit(1) + } + + console.log('🔌 CanopyWave E2E Test via Codebuff SDK') + console.log('='.repeat(50)) + console.log() + console.log(`Model: ${minimaxAgent.model}`) + console.log(`Agent: ${minimaxAgent.id}`) + console.log() + + const client = new CodebuffClient({ + apiKey, + cwd: process.cwd(), + }) + + const events: PrintModeEvent[] = [] + let responseText = '' + + const startTime = Date.now() + + const result = await client.run({ + agent: minimaxAgent, + prompt: 'Say hello', + costMode: 'free', + handleEvent: (event) => { + events.push(event) + if (event.type === 'text') { + responseText += event.text + process.stdout.write(event.text) + } else if (event.type === 'reasoning_delta') { + // Don't print reasoning, just note it + } else if (event.type === 'error') { + console.error(`\n❌ Error event: ${event.message}`) + } else if (event.type === 'finish') { + console.log('\n') + } + }, + handleStreamChunk: (chunk) => { + if (typeof chunk === 'string') { + // Already handled in handleEvent + } + }, + }) + + const elapsed = Date.now() - startTime + + console.log(`── Results (${elapsed}ms) ──`) + console.log() + + if (result.output.type === 'error') { + console.error(`❌ Run failed: ${result.output.message}`) + if ('statusCode' in result.output) { + console.error(` Status code: ${result.output.statusCode}`) + } + process.exit(1) + } + + console.log(`✅ Run succeeded!`) + console.log(` Output type: ${result.output.type}`) + console.log(` Response text: ${responseText.slice(0, 200)}`) + console.log() + + // Check session state for credits used + const creditsUsed = result.sessionState?.mainAgentState.creditsUsed ?? 0 + console.log(`── Credits & Billing ──`) + console.log(` Credits used: ${creditsUsed}`) + console.log(` Cost (USD): $${(creditsUsed / 100).toFixed(4)}`) + console.log() + + // Summarize events + const eventTypes = events.reduce((acc, e) => { + acc[e.type] = (acc[e.type] ?? 0) + 1 + return acc + }, {} as Record) + console.log(`── Event Summary ──`) + for (const [type, count] of Object.entries(eventTypes)) { + console.log(` ${type}: ${count}`) + } + console.log() + + // Check for finish events which include cost info + const finishEvents = events.filter((e) => e.type === 'finish') + if (finishEvents.length > 0) { + console.log(`── Finish Events ──`) + for (const event of finishEvents) { + console.log(JSON.stringify(event, null, 2)) + } + console.log() + } + + // Print all events for debugging + console.log(`── All Events (${events.length} total) ──`) + for (const event of events) { + if (event.type === 'text' || event.type === 'reasoning_delta') continue + console.log(JSON.stringify(event)) + } + console.log() + + console.log('Done!') +} + +main().catch((error) => { + console.error('Fatal error:', error) + process.exit(1) +}) diff --git a/scripts/test-canopywave.ts b/scripts/test-canopywave.ts new file mode 100644 index 0000000000..ab1dede618 --- /dev/null +++ b/scripts/test-canopywave.ts @@ -0,0 +1,375 @@ +#!/usr/bin/env bun + +/** + * Test script to verify CanopyWave integration and usage/token reporting. + * + * Usage: + * # Test 1: Hit CanopyWave API directly + * bun scripts/test-canopywave.ts direct + * + * # Test 2: Hit our chat completions endpoint (requires running web server + valid API key) + * CODEBUFF_API_KEY= bun scripts/test-canopywave.ts endpoint + * + * # Run both tests + * CODEBUFF_API_KEY= bun scripts/test-canopywave.ts both + */ + +export {} + +const CANOPYWAVE_BASE_URL = 'https://inference.canopywave.io/v1' +const CANOPYWAVE_MODEL = 'minimax/minimax-m2.5' +const OPENROUTER_MODEL = 'minimax/minimax-m2.5' + +const testPrompt = 'Say "hello world" and nothing else.' + +async function testCanopyWaveDirect() { + const apiKey = process.env.CANOPYWAVE_API_KEY + if (!apiKey) { + console.error('❌ CANOPYWAVE_API_KEY is not set. Add it to .env.local or pass it directly.') + process.exit(1) + } + + // ── Non-streaming ── + console.log('── Test 1: CanopyWave API (non-streaming) ──') + console.log(`Model: ${CANOPYWAVE_MODEL}`) + console.log(`Prompt: "${testPrompt}"`) + console.log() + + const startTime = Date.now() + const response = await fetch(`${CANOPYWAVE_BASE_URL}/chat/completions`, { + method: 'POST', + headers: { + Authorization: `Bearer ${apiKey}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + model: CANOPYWAVE_MODEL, + messages: [{ role: 'user', content: testPrompt }], + max_tokens: 64, + }), + }) + + if (!response.ok) { + const errorText = await response.text() + console.error(`❌ CanopyWave API returned ${response.status}: ${errorText}`) + process.exit(1) + } + + const data = await response.json() + const elapsed = Date.now() - startTime + const content = data.choices?.[0]?.message?.content ?? '' + + console.log(`✅ Response (${elapsed}ms):`) + console.log(` Content: ${content}`) + console.log(` Model: ${data.model}`) + console.log() + console.log(' ── Raw usage object ──') + console.log(JSON.stringify(data.usage, null, 2)) + console.log() + console.log(' ── Full raw response (excluding choices content) ──') + const debugData = { ...data } + if (debugData.choices) { + debugData.choices = debugData.choices.map((c: Record) => ({ + ...c, + message: { ...(c.message as Record), content: '' }, + })) + } + console.log(JSON.stringify(debugData, null, 2)) + console.log() + + // ── Streaming ── + console.log('── Test 2: CanopyWave API (streaming, include_usage only) ──') + const streamStart = Date.now() + const streamResponse = await fetch(`${CANOPYWAVE_BASE_URL}/chat/completions`, { + method: 'POST', + headers: { + Authorization: `Bearer ${apiKey}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + model: CANOPYWAVE_MODEL, + messages: [{ role: 'user', content: testPrompt }], + max_tokens: 64, + stream: true, + stream_options: { include_usage: true }, + }), + }) + + if (!streamResponse.ok) { + const errorText = await streamResponse.text() + console.error(`❌ CanopyWave streaming API returned ${streamResponse.status}: ${errorText}`) + process.exit(1) + } + + await consumeStream(streamResponse, streamStart, 'include_usage only') +} + +async function consumeStream(streamResponse: Response, streamStart: number, label: string) { + const reader = streamResponse.body?.getReader() + if (!reader) { + console.error('❌ No response body reader') + process.exit(1) + } + + const decoder = new TextDecoder() + let streamContent = '' + let chunkCount = 0 + const allUsageChunks: unknown[] = [] + const allRawChunks: unknown[] = [] + + let done = false + while (!done) { + const result = await reader.read() + done = result.done + if (done) break + + const text = decoder.decode(result.value, { stream: true }) + const lines = text.split('\n').filter((l) => l.startsWith('data: ')) + + for (const line of lines) { + const raw = line.slice('data: '.length) + if (raw === '[DONE]') continue + + try { + const chunk = JSON.parse(raw) + chunkCount++ + const delta = chunk.choices?.[0]?.delta + if (delta?.content) streamContent += delta.content + if (delta?.reasoning_content) { + console.log(` [reasoning chunk] ${delta.reasoning_content.slice(0, 80)}...`) + } + if (chunk.usage) { + allUsageChunks.push(chunk.usage) + } + // Capture first 3 chunks for debugging + if (chunkCount <= 3) { + allRawChunks.push(chunk) + } + } catch { + // skip non-JSON lines + } + } + } + + const streamElapsed = Date.now() - streamStart + console.log(`✅ Stream response [${label}] (${streamElapsed}ms, ${chunkCount} chunks):`) + console.log(` Content: ${streamContent}`) + console.log() + console.log(` ── First 3 raw chunks ──`) + for (const chunk of allRawChunks) { + console.log(JSON.stringify(chunk, null, 2)) + console.log() + } + console.log(` ── All usage chunks (${allUsageChunks.length} total) ──`) + for (const usage of allUsageChunks) { + console.log(JSON.stringify(usage, null, 2)) + console.log() + } + if (allUsageChunks.length === 0) { + console.log(' ⚠️ No usage data received in stream!') + } + console.log() +} + +// ─── Chat Completions Endpoint Test ───────────────────────────────────────── + +async function testChatCompletionsEndpoint() { + const codebuffApiKey = process.env.CODEBUFF_API_KEY + if (!codebuffApiKey) { + console.error('❌ CODEBUFF_API_KEY is not set. Pass it as an env var.') + console.error(' Example: CODEBUFF_API_KEY= bun scripts/test-canopywave.ts endpoint') + process.exit(1) + } + + const appUrl = process.env.NEXT_PUBLIC_CODEBUFF_APP_URL ?? 'http://localhost:3000' + const endpoint = `${appUrl}/api/v1/chat/completions` + const runId = process.env.RUN_ID ?? 'test-run-id-canopywave' + + // ── Non-streaming ── + console.log('── Test: Chat Completions Endpoint (non-streaming) ──') + console.log(`Endpoint: ${endpoint}`) + console.log(`Model: ${OPENROUTER_MODEL} (should route to CanopyWave)`) + console.log(`Prompt: "${testPrompt}"`) + console.log() + + const startTime = Date.now() + const response = await fetch(endpoint, { + method: 'POST', + headers: { + Authorization: `Bearer ${codebuffApiKey}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + model: OPENROUTER_MODEL, + messages: [{ role: 'user', content: testPrompt }], + max_tokens: 64, + stream: false, + codebuff_metadata: { + run_id: runId, + client_id: 'test-canopywave-script', + cost_mode: 'free', + }, + }), + }) + + const elapsed = Date.now() - startTime + const data = await response.json() + + if (response.ok) { + const content = data.choices?.[0]?.message?.content ?? '' + console.log(`✅ Response (${elapsed}ms):`) + console.log(` Content: ${content}`) + console.log(` Model: ${data.model}`) + console.log(` Provider: ${data.provider}`) + console.log() + console.log(' ── Usage object ──') + console.log(JSON.stringify(data.usage, null, 2)) + console.log() + if (data.usage) { + const u = data.usage + console.log(` prompt_tokens: ${u.prompt_tokens ?? 'N/A'}`) + console.log(` completion_tokens: ${u.completion_tokens ?? 'N/A'}`) + console.log(` total_tokens: ${u.total_tokens ?? 'N/A'}`) + console.log(` cost: ${u.cost ?? 'N/A'}`) + console.log(` cost_details: ${JSON.stringify(u.cost_details)}`) + } + } else { + console.log(`⚠️ Response ${response.status} (${elapsed}ms):`) + console.log(` ${JSON.stringify(data)}`) + if (response.status === 400 && data.message?.includes('runId')) { + console.log(' ℹ️ This is expected if you don\'t have a valid run_id.') + console.log(' ℹ️ The request reached the endpoint — routing to CanopyWave is wired up.') + } else if (response.status === 401) { + console.log(' ℹ️ Auth failed. Make sure CODEBUFF_API_KEY is valid.') + } + } + console.log() + + // ── Streaming ── + console.log('── Test: Chat Completions Endpoint (streaming) ──') + const streamStart = Date.now() + const streamResponse = await fetch(endpoint, { + method: 'POST', + headers: { + Authorization: `Bearer ${codebuffApiKey}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + model: OPENROUTER_MODEL, + messages: [{ role: 'user', content: testPrompt }], + max_tokens: 64, + stream: true, + codebuff_metadata: { + run_id: runId, + client_id: 'test-canopywave-script', + cost_mode: 'free', + }, + }), + }) + + const streamElapsed = Date.now() - streamStart + + if (streamResponse.ok) { + const reader = streamResponse.body?.getReader() + if (!reader) { + console.error('❌ No response body reader') + process.exit(1) + } + + const decoder = new TextDecoder() + let streamContent = '' + let chunkCount = 0 + let chunksWithUsage = 0 + let lastUsage: unknown = null + + let done = false + while (!done) { + const result = await reader.read() + done = result.done + if (done) break + + const text = decoder.decode(result.value, { stream: true }) + const lines = text.split('\n').filter((l) => l.startsWith('data: ')) + + for (const line of lines) { + const raw = line.slice('data: '.length) + if (raw === '[DONE]') continue + + try { + const chunk = JSON.parse(raw) + chunkCount++ + const delta = chunk.choices?.[0]?.delta + if (delta?.content) streamContent += delta.content + if (chunk.usage) { + chunksWithUsage++ + lastUsage = chunk.usage + } + } catch { + // skip non-JSON lines + } + } + } + + console.log(`✅ Stream response (${streamElapsed}ms, ${chunkCount} chunks):`) + console.log(` Content: ${streamContent}`) + console.log(` Chunks with usage: ${chunksWithUsage} (should be exactly 1)`) + if (chunksWithUsage > 1) { + console.log(` ⚠️ Multiple usage chunks detected — billing fix may not be working!`) + } else if (chunksWithUsage === 1) { + console.log(` ✅ Only 1 usage chunk — billing fix is working correctly!`) + } else { + console.log(` ⚠️ No usage chunks received!`) + } + if (lastUsage) { + console.log() + console.log(' ── Final usage object ──') + console.log(JSON.stringify(lastUsage, null, 2)) + const u = lastUsage as Record + console.log() + console.log(` prompt_tokens: ${u.prompt_tokens ?? 'N/A'}`) + console.log(` completion_tokens: ${u.completion_tokens ?? 'N/A'}`) + console.log(` total_tokens: ${u.total_tokens ?? 'N/A'}`) + console.log(` cost: ${u.cost ?? 'N/A'}`) + console.log(` cost_details: ${JSON.stringify(u.cost_details)}`) + } + } else { + const data = await streamResponse.json() + console.log(`⚠️ Response ${streamResponse.status} (${streamElapsed}ms):`) + console.log(` ${JSON.stringify(data)}`) + if (streamResponse.status === 400 && data.message?.includes('runId')) { + console.log(' ℹ️ Expected without a valid run_id. Endpoint is reachable and routing works.') + } + } + console.log() +} + +// ─── Main ─────────────────────────────────────────────────────────────────── + +async function main() { + const mode = process.argv[2] ?? 'direct' + + console.log('🔌 CanopyWave Integration Test') + console.log('='.repeat(50)) + console.log() + + switch (mode) { + case 'direct': + await testCanopyWaveDirect() + break + case 'endpoint': + await testChatCompletionsEndpoint() + break + case 'both': + await testCanopyWaveDirect() + await testChatCompletionsEndpoint() + break + default: + console.error(`Unknown mode: ${mode}`) + console.error('Usage: bun scripts/test-canopywave.ts [direct|endpoint|both]') + process.exit(1) + } + + console.log('Done!') +} + +main() diff --git a/scripts/test-fireworks.ts b/scripts/test-fireworks.ts index b7c57e1f54..00622cd770 100644 --- a/scripts/test-fireworks.ts +++ b/scripts/test-fireworks.ts @@ -14,6 +14,8 @@ * CODEBUFF_API_KEY= bun scripts/test-fireworks.ts both */ +export {} + const FIREWORKS_BASE_URL = 'https://api.fireworks.ai/inference/v1' const FIREWORKS_MODEL = 'accounts/fireworks/models/minimax-m2p5' const OPENROUTER_MODEL = 'minimax/minimax-m2.5' diff --git a/web/src/app/api/v1/chat/completions/_post.ts b/web/src/app/api/v1/chat/completions/_post.ts index 1eec315d82..d236125bcb 100644 --- a/web/src/app/api/v1/chat/completions/_post.ts +++ b/web/src/app/api/v1/chat/completions/_post.ts @@ -36,11 +36,11 @@ import type { NextRequest } from 'next/server' import type { ChatCompletionRequestBody } from '@/llm-api/types' import { - BasetenError, - handleBasetenNonStream, - handleBasetenStream, - isBasetenModel, -} from '@/llm-api/baseten' + CanopyWaveError, + handleCanopyWaveNonStream, + handleCanopyWaveStream, + isCanopyWaveModel, +} from '@/llm-api/canopywave' import { FireworksError, handleFireworksNonStream, @@ -360,11 +360,11 @@ export async function postChatCompletions(params: { // Handle streaming vs non-streaming try { if (bodyStream) { - // Streaming request — route to Baseten/Fireworks for supported models - const useBaseten = isBasetenModel(typedBody.model) - const useFireworks = !useBaseten && isFireworksModel(typedBody.model) - const stream = useBaseten - ? await handleBasetenStream({ + // Streaming request — route to CanopyWave/Fireworks for supported models + const useCanopyWave = isCanopyWaveModel(typedBody.model) + const useFireworks = !useCanopyWave && isFireworksModel(typedBody.model) + const stream = useCanopyWave + ? await handleCanopyWaveStream({ body: typedBody, userId, stripeCustomerId, @@ -413,10 +413,10 @@ export async function postChatCompletions(params: { }, }) } else { - // Non-streaming request — route to Baseten/Fireworks for supported models + // Non-streaming request — route to CanopyWave/Fireworks for supported models const model = typedBody.model - const useBaseten = isBasetenModel(model) - const useFireworks = !useBaseten && isFireworksModel(model) + const useCanopyWave = isCanopyWaveModel(model) + const useFireworks = !useCanopyWave && isFireworksModel(model) const modelParts = model.split('/') const shortModelName = modelParts.length > 1 ? modelParts[1] : model const isOpenAIDirectModel = @@ -427,8 +427,8 @@ export async function postChatCompletions(params: { const shouldUseOpenAIEndpoint = isOpenAIDirectModel && typedBody.codebuff_metadata?.n !== undefined - const nonStreamRequest = useBaseten - ? handleBasetenNonStream({ + const nonStreamRequest = useCanopyWave + ? handleCanopyWaveNonStream({ body: typedBody, userId, stripeCustomerId, @@ -491,14 +491,14 @@ export async function postChatCompletions(params: { if (error instanceof FireworksError) { fireworksError = error } - let basetenError: BasetenError | undefined - if (error instanceof BasetenError) { - basetenError = error + let canopywaveError: CanopyWaveError | undefined + if (error instanceof CanopyWaveError) { + canopywaveError = error } // Log detailed error information for debugging const errorDetails = openrouterError?.toJSON() - const providerLabel = basetenError ? 'Baseten' : fireworksError ? 'Fireworks' : 'OpenRouter' + const providerLabel = canopywaveError ? 'CanopyWave' : fireworksError ? 'Fireworks' : 'OpenRouter' logger.error( { error: getErrorObject(error), @@ -512,8 +512,8 @@ export async function postChatCompletions(params: { ? typedBody.messages.length : 0, messages: typedBody.messages, - providerStatusCode: (openrouterError ?? fireworksError ?? basetenError)?.statusCode, - providerStatusText: (openrouterError ?? fireworksError ?? basetenError)?.statusText, + providerStatusCode: (openrouterError ?? fireworksError ?? canopywaveError)?.statusCode, + providerStatusText: (openrouterError ?? fireworksError ?? canopywaveError)?.statusText, openrouterErrorCode: errorDetails?.error?.code, openrouterErrorType: errorDetails?.error?.type, openrouterErrorMessage: errorDetails?.error?.message, @@ -541,7 +541,7 @@ export async function postChatCompletions(params: { if (error instanceof FireworksError) { return NextResponse.json(error.toJSON(), { status: error.statusCode }) } - if (error instanceof BasetenError) { + if (error instanceof CanopyWaveError) { return NextResponse.json(error.toJSON(), { status: error.statusCode }) } diff --git a/web/src/llm-api/baseten.ts b/web/src/llm-api/canopywave.ts similarity index 79% rename from web/src/llm-api/baseten.ts rename to web/src/llm-api/canopywave.ts index dbd787def8..8582645944 100644 --- a/web/src/llm-api/baseten.ts +++ b/web/src/llm-api/canopywave.ts @@ -15,31 +15,31 @@ import type { InsertMessageBigqueryFn } from '@codebuff/common/types/contracts/b import type { Logger } from '@codebuff/common/types/contracts/logger' import type { ChatCompletionRequestBody } from './types' -const BASETEN_BASE_URL = 'https://inference.baseten.co/v1' +const CANOPYWAVE_BASE_URL = 'https://inference.canopywave.io/v1' // Extended timeout for deep-thinking models that can take // a long time to start streaming. -const BASETEN_HEADERS_TIMEOUT_MS = 10 * 60 * 1000 +const CANOPYWAVE_HEADERS_TIMEOUT_MS = 10 * 60 * 1000 -const basetenAgent = new Agent({ - headersTimeout: BASETEN_HEADERS_TIMEOUT_MS, +const canopywaveAgent = new Agent({ + headersTimeout: CANOPYWAVE_HEADERS_TIMEOUT_MS, bodyTimeout: 0, }) -/** Map from OpenRouter model IDs to Baseten model IDs */ -const BASETEN_MODEL_MAP: Record = { - 'minimax/minimax-m2.5': 'MiniMaxAI/MiniMax-M2.5', +/** Map from OpenRouter model IDs to CanopyWave model IDs */ +const CANOPYWAVE_MODEL_MAP: Record = { + 'minimax/minimax-m2.5': 'minimax/minimax-m2.5', } -export function isBasetenModel(model: string): boolean { - return model in BASETEN_MODEL_MAP +export function isCanopyWaveModel(model: string): boolean { + return model in CANOPYWAVE_MODEL_MAP } -function getBasetenModelId(openrouterModel: string): string { - return BASETEN_MODEL_MAP[openrouterModel] ?? openrouterModel +function getCanopyWaveModelId(openrouterModel: string): string { + return CANOPYWAVE_MODEL_MAP[openrouterModel] ?? openrouterModel } -type StreamState = { responseText: string; reasoningText: string } +type StreamState = { responseText: string; reasoningText: string; billedAlready: boolean } type LineResult = { state: StreamState @@ -47,49 +47,48 @@ type LineResult = { patchedLine: string } -function createBasetenRequest(params: { +function createCanopyWaveRequest(params: { body: ChatCompletionRequestBody originalModel: string fetch: typeof globalThis.fetch }) { const { body, originalModel, fetch } = params - const basetenBody: Record = { + const canopywaveBody: Record = { ...body, - model: getBasetenModelId(originalModel), + model: getCanopyWaveModelId(originalModel), } // Strip OpenRouter-specific / internal fields - delete basetenBody.provider - delete basetenBody.transforms - delete basetenBody.codebuff_metadata - delete basetenBody.usage + delete canopywaveBody.provider + delete canopywaveBody.transforms + delete canopywaveBody.codebuff_metadata + delete canopywaveBody.usage // For streaming, request usage in the final chunk - if (basetenBody.stream) { - basetenBody.stream_options = { include_usage: true } + if (canopywaveBody.stream) { + canopywaveBody.stream_options = { include_usage: true } } - if (!env.BASETEN_API_KEY) { - throw new Error('BASETEN_API_KEY is not configured') + if (!env.CANOPYWAVE_API_KEY) { + throw new Error('CANOPYWAVE_API_KEY is not configured') } - return fetch(`${BASETEN_BASE_URL}/chat/completions`, { + return fetch(`${CANOPYWAVE_BASE_URL}/chat/completions`, { method: 'POST', headers: { - Authorization: `Bearer ${env.BASETEN_API_KEY}`, + Authorization: `Bearer ${env.CANOPYWAVE_API_KEY}`, 'Content-Type': 'application/json', }, - body: JSON.stringify(basetenBody), + body: JSON.stringify(canopywaveBody), // @ts-expect-error - dispatcher is a valid undici option not in fetch types - dispatcher: basetenAgent, + dispatcher: canopywaveAgent, }) } -// Baseten per-token pricing (dollars per token) -// TODO: Verify these costs against Baseten's actual pricing -const BASETEN_INPUT_COST_PER_TOKEN = 0.30 / 1_000_000 -const BASETEN_CACHED_INPUT_COST_PER_TOKEN = 0.03 / 1_000_000 -const BASETEN_OUTPUT_COST_PER_TOKEN = 1.20 / 1_000_000 +// CanopyWave per-token pricing (dollars per token) for MiniMax M2.5 +const CANOPYWAVE_INPUT_COST_PER_TOKEN = 0.27 / 1_000_000 +const CANOPYWAVE_CACHED_INPUT_COST_PER_TOKEN = 0.03 / 1_000_000 +const CANOPYWAVE_OUTPUT_COST_PER_TOKEN = 1.08 / 1_000_000 function extractUsageAndCost(usage: Record | undefined | null): UsageData { if (!usage) return { inputTokens: 0, outputTokens: 0, cacheReadInputTokens: 0, reasoningTokens: 0, cost: 0 } @@ -101,17 +100,16 @@ function extractUsageAndCost(usage: Record | undefined | null): const cacheReadInputTokens = typeof promptDetails?.cached_tokens === 'number' ? promptDetails.cached_tokens : 0 const reasoningTokens = typeof completionDetails?.reasoning_tokens === 'number' ? completionDetails.reasoning_tokens : 0 - // Baseten doesn't return cost — compute from token counts and known pricing const nonCachedInputTokens = Math.max(0, inputTokens - cacheReadInputTokens) const cost = - nonCachedInputTokens * BASETEN_INPUT_COST_PER_TOKEN + - cacheReadInputTokens * BASETEN_CACHED_INPUT_COST_PER_TOKEN + - outputTokens * BASETEN_OUTPUT_COST_PER_TOKEN + nonCachedInputTokens * CANOPYWAVE_INPUT_COST_PER_TOKEN + + cacheReadInputTokens * CANOPYWAVE_CACHED_INPUT_COST_PER_TOKEN + + outputTokens * CANOPYWAVE_OUTPUT_COST_PER_TOKEN return { inputTokens, outputTokens, cacheReadInputTokens, reasoningTokens, cost } } -export async function handleBasetenNonStream({ +export async function handleCanopyWaveNonStream({ body, userId, stripeCustomerId, @@ -132,10 +130,10 @@ export async function handleBasetenNonStream({ const startTime = new Date() const { clientId, clientRequestId, costMode } = extractRequestMetadata({ body, logger }) - const response = await createBasetenRequest({ body, originalModel, fetch }) + const response = await createCanopyWaveRequest({ body, originalModel, fetch }) if (!response.ok) { - throw await parseBasetenError(response) + throw await parseCanopyWaveError(response) } const data = await response.json() @@ -182,12 +180,12 @@ export async function handleBasetenNonStream({ // Normalise model name back to OpenRouter format for client compatibility data.model = originalModel - if (!data.provider) data.provider = 'Baseten' + if (!data.provider) data.provider = 'CanopyWave' return data } -export async function handleBasetenStream({ +export async function handleCanopyWaveStream({ body, userId, stripeCustomerId, @@ -208,10 +206,10 @@ export async function handleBasetenStream({ const startTime = new Date() const { clientId, clientRequestId, costMode } = extractRequestMetadata({ body, logger }) - const response = await createBasetenRequest({ body, originalModel, fetch }) + const response = await createCanopyWaveRequest({ body, originalModel, fetch }) if (!response.ok) { - throw await parseBasetenError(response) + throw await parseCanopyWaveError(response) } const reader = response.body?.getReader() @@ -220,7 +218,7 @@ export async function handleBasetenStream({ } let heartbeatInterval: NodeJS.Timeout - let state: StreamState = { responseText: '', reasoningText: '' } + let state: StreamState = { responseText: '', reasoningText: '', billedAlready: false } let clientDisconnected = false const stream = new ReadableStream({ @@ -301,7 +299,7 @@ export async function handleBasetenStream({ } else { logger.warn( getErrorObject(error), - 'Error after client disconnect in Baseten stream', + 'Error after client disconnect in CanopyWave stream', ) } } finally { @@ -317,7 +315,7 @@ export async function handleBasetenStream({ responseTextLength: state.responseText.length, reasoningTextLength: state.reasoningText.length, }, - 'Client cancelled stream, continuing Baseten consumption for billing', + 'Client cancelled stream, continuing CanopyWave consumption for billing', ) }, }) @@ -369,14 +367,14 @@ async function handleLine({ } catch (error) { logger.warn( { error: getErrorObject(error, { includeRawError: true }) }, - 'Received non-JSON Baseten response', + 'Received non-JSON CanopyWave response', ) return { state, patchedLine: line } } // Patch model and provider for SDK compatibility if (obj.model) obj.model = originalModel - if (!obj.provider) obj.provider = 'Baseten' + if (!obj.provider) obj.provider = 'CanopyWave' // Process the chunk for billing / state tracking const result = await handleResponse({ @@ -406,6 +404,12 @@ async function handleLine({ return { state: result.state, billedCredits: result.billedCredits, patchedLine } } +function isFinalChunk(data: Record): boolean { + const choices = data.choices as Array> | undefined + if (!choices || choices.length === 0) return true + return choices.some(c => c.finish_reason != null) +} + async function handleResponse({ userId, stripeCustomerId, @@ -437,13 +441,22 @@ async function handleResponse({ }): Promise<{ state: StreamState; billedCredits?: number }> { state = handleStreamChunk({ data, state, logger, userId, agentId, model: originalModel }) - if ('error' in data || !data.usage) { + // Some providers send cumulative usage on EVERY chunk (not just the final one), + // so we must only bill once on the final chunk to avoid charging N times. + if ('error' in data || !data.usage || state.billedAlready || !isFinalChunk(data)) { + // Strip usage from non-final chunks and duplicate final chunks + // so the SDK doesn't see multiple usage objects + if (data.usage && (!isFinalChunk(data) || state.billedAlready)) { + delete data.usage + } return { state } } const usageData = extractUsageAndCost(data.usage as Record) const messageId = typeof data.id === 'string' ? data.id : 'unknown' + state.billedAlready = true + insertMessageToBigQuery({ messageId, userId, @@ -506,7 +519,7 @@ function handleStreamChunk({ errorType: errorData?.type, errorMessage: errorData?.message, }, - 'Received error chunk in Baseten stream', + 'Received error chunk in CanopyWave stream', ) return state } @@ -543,7 +556,7 @@ function handleStreamChunk({ return state } -export class BasetenError extends Error { +export class CanopyWaveError extends Error { constructor( public readonly statusCode: number, public readonly statusText: string, @@ -556,7 +569,7 @@ export class BasetenError extends Error { }, ) { super(errorBody.error.message) - this.name = 'BasetenError' + this.name = 'CanopyWaveError' } toJSON() { @@ -570,9 +583,9 @@ export class BasetenError extends Error { } } -async function parseBasetenError(response: Response): Promise { +async function parseCanopyWaveError(response: Response): Promise { const errorText = await response.text() - let errorBody: BasetenError['errorBody'] + let errorBody: CanopyWaveError['errorBody'] try { const parsed = JSON.parse(errorText) if (parsed?.error?.message) { @@ -599,7 +612,7 @@ async function parseBasetenError(response: Response): Promise { }, } } - return new BasetenError(response.status, response.statusText, errorBody) + return new CanopyWaveError(response.status, response.statusText, errorBody) } function creditsToFakeCost(credits: number): number { From d09bea6aaa107c8857905dc7af1bd475d734b264 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Tue, 10 Mar 2026 14:40:18 -0700 Subject: [PATCH 05/16] Update .env.example with canopywave key example --- .env.example | 1 + 1 file changed, 1 insertion(+) diff --git a/.env.example b/.env.example index 55e7721d2e..d3c6f2438d 100644 --- a/.env.example +++ b/.env.example @@ -4,6 +4,7 @@ OPEN_ROUTER_API_KEY=dummy_openrouter_key OPENAI_API_KEY=dummy_openai_key ANTHROPIC_API_KEY=dummy_anthropic_key FIREWORKS_API_KEY=dummy_fireworks_key +CANOPYWAVE_API_KEY=dummy_canopywave_key # Database & Server DATABASE_URL=postgresql://manicode_user_local:secretpassword_local@localhost:5432/manicode_db_local From 14602f734dd418e9a23c92b6f71f989cdd61be98 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Tue, 10 Mar 2026 15:32:17 -0700 Subject: [PATCH 06/16] Update next-themes version so it can use react 19 --- bun.lock | 14 +++++--------- freebuff/web/package.json | 2 +- package.json | 2 ++ web/package.json | 2 +- web/src/components/theme-provider.tsx | 2 +- 5 files changed, 10 insertions(+), 12 deletions(-) diff --git a/bun.lock b/bun.lock index 964cd43180..f9bedc4412 100644 --- a/bun.lock +++ b/bun.lock @@ -147,7 +147,7 @@ "lucide-react": "^0.487.0", "next": "15.5.11", "next-auth": "^4.24.11", - "next-themes": "^0.3.0", + "next-themes": "^0.4.6", "pino": "^9.6.0", "react": "^19.0.0", "react-dom": "^19.0.0", @@ -294,7 +294,7 @@ "next": "15.5.11", "next-auth": "^4.24.11", "next-contentlayer2": "^0.5.8", - "next-themes": "^0.3.0", + "next-themes": "^0.4.6", "nextjs-linkedin-insight-tag": "^0.0.6", "pino": "^9.6.0", "posthog-js": "^1.234.10", @@ -354,6 +354,8 @@ "@types/react": "19.2.14", "@types/react-dom": "19.2.3", "baseline-browser-mapping": "^2.9.14", + "react": "^19.0.0", + "react-dom": "^19.0.0", "signal-exit": "3.0.7", "zod": "^4.2.1", }, @@ -2810,7 +2812,7 @@ "next-contentlayer2": ["next-contentlayer2@0.5.8", "", { "dependencies": { "@contentlayer2/core": "0.5.8", "@contentlayer2/utils": "0.5.8" }, "peerDependencies": { "contentlayer2": "0.5.8", "next": ">=12.0.0", "react": "^18 || ^19 || ^19.0.0-rc", "react-dom": "^18 || ^19 || ^19.0.0-rc" } }, "sha512-3Xh8quPCFmg/QGa4qTnOwSsT3oNYCtmm+Ii0UlbOHxX59gHYVX9M5mTzkdUKiKC1aJfiGIPPGQXhKNfc6qvWZg=="], - "next-themes": ["next-themes@0.3.0", "", { "peerDependencies": { "react": "^16.8 || ^17 || ^18", "react-dom": "^16.8 || ^17 || ^18" } }, "sha512-/QHIrsYpd6Kfk7xakK4svpDI5mmXP0gfvCoJdGpZQ2TOrQZmsW0QxjaiLn8wbIKjtm4BTSqLoix4lxYYOnLJ/w=="], + "next-themes": ["next-themes@0.4.6", "", { "peerDependencies": { "react": "^16.8 || ^17 || ^18 || ^19 || ^19.0.0-rc", "react-dom": "^16.8 || ^17 || ^18 || ^19 || ^19.0.0-rc" } }, "sha512-pZvgD5L0IEvX5/9GWyHMf3m8BKiVQwsCMHfoFosXtXBMnaS0ZnIJ9ST4b4NqLVKDEm8QBxoNNGNaBv2JNF6XNA=="], "nextjs-linkedin-insight-tag": ["nextjs-linkedin-insight-tag@0.0.6", "", { "dependencies": { "typescript": "^4.9.4" }, "peerDependencies": { "next": ">=11.0.0", "react": ">=17.0.0" } }, "sha512-hk3cHpz+1SLbe0hd2nFjUP2AlFmgeDMHHudXGTYrtIvRri/qliFEIpURH7FJWKxQLXm9f1X8B5O20Wvj2wNPCg=="], @@ -4054,10 +4056,6 @@ "next-auth/uuid": ["uuid@8.3.2", "", { "bin": { "uuid": "dist/bin/uuid" } }, "sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg=="], - "next-themes/react": ["react@18.3.1", "", { "dependencies": { "loose-envify": "^1.1.0" } }, "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ=="], - - "next-themes/react-dom": ["react-dom@18.3.1", "", { "dependencies": { "loose-envify": "^1.1.0", "scheduler": "^0.23.2" }, "peerDependencies": { "react": "^18.3.1" } }, "sha512-5m4nQKp+rZRb09LNH59GM4BxTh9251/ylbKIbpe7TpGxfJ+9kv6BLkLBXIjjspbgbnIBNqlI23tRnTWT0snUIw=="], - "nextjs-linkedin-insight-tag/typescript": ["typescript@4.9.5", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-1FXk9E2Hm+QzZQ7z+McJiHL4NW1F2EzMu9Nq9i3zAaGqibafqYwCVU6WyWAuyQRRzOlxou8xZSyXLEN8oKj24g=="], "nx/axios": ["axios@1.13.1", "", { "dependencies": { "follow-redirects": "^1.15.6", "form-data": "^4.0.4", "proxy-from-env": "^1.1.0" } }, "sha512-hU4EGxxt+j7TQijx1oYdAjw4xuIp1wRQSsbMFwSthCWeBQur1eF+qJ5iQ5sN3Tw8YRzQNKb8jszgBdMDVqwJcw=="], @@ -4516,8 +4514,6 @@ "mlly/pkg-types/confbox": ["confbox@0.1.8", "", {}, "sha512-RMtmw0iFkeR4YV+fUOSucriAQNb9g8zFR52MWCtl+cCZOFRNL6zeB395vPzFhEjjn4fMxXudmELnl/KF/WrK6w=="], - "next-themes/react-dom/scheduler": ["scheduler@0.23.2", "", { "dependencies": { "loose-envify": "^1.1.0" } }, "sha512-UOShsPwz7NrMUqhR6t0hWjFduvOzbtv7toDH1/hIrfRNIDBnnBWd0CwJTGvTpngVlmwGCdP9/Zl/tVrDqcuYzQ=="], - "nx/chalk/supports-color": ["supports-color@7.2.0", "", { "dependencies": { "has-flag": "^4.0.0" } }, "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw=="], "nx/minimatch/brace-expansion": ["brace-expansion@2.0.2", "", { "dependencies": { "balanced-match": "^1.0.0" } }, "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ=="], diff --git a/freebuff/web/package.json b/freebuff/web/package.json index 53dc3c7a5e..fdf5a358c5 100644 --- a/freebuff/web/package.json +++ b/freebuff/web/package.json @@ -23,7 +23,7 @@ "lucide-react": "^0.487.0", "next": "15.5.11", "next-auth": "^4.24.11", - "next-themes": "^0.3.0", + "next-themes": "^0.4.6", "pino": "^9.6.0", "react": "^19.0.0", "react-dom": "^19.0.0", diff --git a/package.json b/package.json index 628036fc1d..bd94e8cbd8 100644 --- a/package.json +++ b/package.json @@ -45,6 +45,8 @@ "zod": "^4.2.1" }, "overrides": { + "react": "^19.0.0", + "react-dom": "^19.0.0", "@types/react": "19.2.14", "@types/react-dom": "19.2.3", "baseline-browser-mapping": "^2.9.14", diff --git a/web/package.json b/web/package.json index bf6ef79342..9b92c03529 100644 --- a/web/package.json +++ b/web/package.json @@ -75,7 +75,7 @@ "next": "15.5.11", "next-auth": "^4.24.11", "next-contentlayer2": "^0.5.8", - "next-themes": "^0.3.0", + "next-themes": "^0.4.6", "nextjs-linkedin-insight-tag": "^0.0.6", "pino": "^9.6.0", "posthog-js": "^1.234.10", diff --git a/web/src/components/theme-provider.tsx b/web/src/components/theme-provider.tsx index 4c77ee977c..16559fe1a3 100644 --- a/web/src/components/theme-provider.tsx +++ b/web/src/components/theme-provider.tsx @@ -1,7 +1,7 @@ 'use client' import { ThemeProvider as NextThemesProvider } from 'next-themes' -import { type ThemeProviderProps } from 'next-themes/dist/types' +import { type ThemeProviderProps } from 'next-themes' import { useEffect } from 'react' export const ThemeProvider = ({ children, ...props }: ThemeProviderProps) => { From 151145f2ce0ddecfd4f433826aa652daf07f1fa5 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Tue, 10 Mar 2026 16:57:22 -0700 Subject: [PATCH 07/16] Use siliconflow as provider for minimax --- .env.example | 1 + agents/base2/base2.ts | 1 + packages/internal/src/env-schema.ts | 2 + scripts/test-siliconflow.ts | 384 ++++++++++++ web/src/app/api/v1/chat/completions/_post.ts | 59 +- web/src/llm-api/siliconflow.ts | 621 +++++++++++++++++++ 6 files changed, 1057 insertions(+), 11 deletions(-) create mode 100644 scripts/test-siliconflow.ts create mode 100644 web/src/llm-api/siliconflow.ts diff --git a/.env.example b/.env.example index d3c6f2438d..a1b46a0b88 100644 --- a/.env.example +++ b/.env.example @@ -5,6 +5,7 @@ OPENAI_API_KEY=dummy_openai_key ANTHROPIC_API_KEY=dummy_anthropic_key FIREWORKS_API_KEY=dummy_fireworks_key CANOPYWAVE_API_KEY=dummy_canopywave_key +SILICONFLOW_API_KEY=dummy_siliconflow_key # Database & Server DATABASE_URL=postgresql://manicode_user_local:secretpassword_local@localhost:5432/manicode_db_local diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts index 4a3c40064f..8735d0579b 100644 --- a/agents/base2/base2.ts +++ b/agents/base2/base2.ts @@ -30,6 +30,7 @@ export function createBase2( publisher, model: isFree ? 'minimax/minimax-m2.5' : 'anthropic/claude-opus-4.6', providerOptions: isFree ? { + only: ['siliconflow/fp8'], data_collection: 'deny', } : { only: ['amazon-bedrock'], diff --git a/packages/internal/src/env-schema.ts b/packages/internal/src/env-schema.ts index 93cfee7d4f..c4bfa7423f 100644 --- a/packages/internal/src/env-schema.ts +++ b/packages/internal/src/env-schema.ts @@ -8,6 +8,7 @@ export const serverEnvSchema = clientEnvSchema.extend({ ANTHROPIC_API_KEY: z.string().min(1), FIREWORKS_API_KEY: z.string().min(1), CANOPYWAVE_API_KEY: z.string().min(1).optional(), + SILICONFLOW_API_KEY: z.string().min(1).optional(), LINKUP_API_KEY: z.string().min(1), CONTEXT7_API_KEY: z.string().optional(), GRAVITY_API_KEY: z.string().min(1), @@ -52,6 +53,7 @@ export const serverProcessEnv: ServerInput = { ANTHROPIC_API_KEY: process.env.ANTHROPIC_API_KEY, FIREWORKS_API_KEY: process.env.FIREWORKS_API_KEY, CANOPYWAVE_API_KEY: process.env.CANOPYWAVE_API_KEY, + SILICONFLOW_API_KEY: process.env.SILICONFLOW_API_KEY, LINKUP_API_KEY: process.env.LINKUP_API_KEY, CONTEXT7_API_KEY: process.env.CONTEXT7_API_KEY, GRAVITY_API_KEY: process.env.GRAVITY_API_KEY, diff --git a/scripts/test-siliconflow.ts b/scripts/test-siliconflow.ts new file mode 100644 index 0000000000..845db4a3cb --- /dev/null +++ b/scripts/test-siliconflow.ts @@ -0,0 +1,384 @@ +#!/usr/bin/env bun + +/** + * Test script to verify SiliconFlow prompt caching across a 10-turn conversation. + * + * Uses a very large system prompt (~5k+ input tokens) with low output (max 100 tokens) + * to measure how well SiliconFlow caches the shared prefix across turns. + * + * Usage: + * bun scripts/test-siliconflow.ts + */ + +export {} + +const SILICONFLOW_BASE_URL = 'https://api.siliconflow.com/v1' +const SILICONFLOW_MODEL = 'MiniMaxAI/MiniMax-M2.5' + +// Pricing constants — https://siliconflow.com/pricing +const INPUT_COST_PER_TOKEN = 0.30 / 1_000_000 +const CACHED_INPUT_COST_PER_TOKEN = 0.03 / 1_000_000 +const OUTPUT_COST_PER_TOKEN = 1.20 / 1_000_000 + +const MAX_TOKENS = 100 + +function computeCost(usage: Record): { cost: number; breakdown: string } { + const inputTokens = typeof usage.prompt_tokens === 'number' ? usage.prompt_tokens : 0 + const outputTokens = typeof usage.completion_tokens === 'number' ? usage.completion_tokens : 0 + const promptDetails = usage.prompt_tokens_details as Record | undefined + const cachedTokens = typeof promptDetails?.cached_tokens === 'number' ? promptDetails.cached_tokens : 0 + const nonCachedInput = Math.max(0, inputTokens - cachedTokens) + + const inputCost = nonCachedInput * INPUT_COST_PER_TOKEN + const cachedCost = cachedTokens * CACHED_INPUT_COST_PER_TOKEN + const outputCost = outputTokens * OUTPUT_COST_PER_TOKEN + const totalCost = inputCost + cachedCost + outputCost + + const breakdown = [ + `${nonCachedInput} non-cached input × $0.30/M = $${inputCost.toFixed(8)}`, + `${cachedTokens} cached input × $0.03/M = $${cachedCost.toFixed(8)}`, + `${outputTokens} output × $1.20/M = $${outputCost.toFixed(8)}`, + `Total: $${totalCost.toFixed(8)}`, + ].join('\n ') + + return { cost: totalCost, breakdown } +} + +// Very large system prompt to push input tokens to ~5k+ +const SYSTEM_PROMPT = `You are an expert software architect, technical writer, and senior engineering consultant. +You always respond with brief, concise answers — one or two sentences at most. +You provide practical advice grounded in real-world engineering experience. + +Your areas of expertise include: +- Distributed systems design and architecture patterns (microservices, event-driven, CQRS, saga patterns, choreography vs orchestration, bulkhead pattern, circuit breaker, retry with exponential backoff, sidecar pattern, ambassador pattern, strangler fig pattern, anti-corruption layer) +- Database design and optimization (relational databases including PostgreSQL, MySQL, SQL Server; document databases including MongoDB, CouchDB, DynamoDB; graph databases including Neo4j, ArangoDB, JanusGraph; time-series databases including InfluxDB, TimescaleDB, QuestDB; wide-column stores including Cassandra, ScyllaDB, HBase; sharding strategies including hash-based, range-based, geographic; replication topologies including primary-replica, multi-primary, chain replication; connection pooling with PgBouncer, ProxySQL; query optimization techniques including index selection, query plan analysis, materialized views, covering indexes, partial indexes, expression indexes) +- Cloud infrastructure and deployment (AWS services including EC2, ECS, EKS, Lambda, S3, DynamoDB, RDS, Aurora, ElastiCache, CloudFront, Route53, IAM, VPC, SQS, SNS, Kinesis, Step Functions; GCP services including GKE, Cloud Run, Cloud Functions, BigQuery, Spanner, Pub/Sub, Cloud Storage; Azure services including AKS, Azure Functions, Cosmos DB, Azure SQL; container orchestration with Kubernetes including deployments, stateful sets, daemon sets, jobs, CronJobs, custom resource definitions, operators, Helm charts, Kustomize; infrastructure as code with Terraform, Pulumi, CloudFormation, CDK; service mesh with Istio, Linkerd, Consul Connect; load balancers including ALB, NLB, HAProxy, Nginx, Envoy; auto-scaling including HPA, VPA, KEDA, cluster autoscaler) +- Programming languages and their ecosystems (TypeScript/JavaScript with Node.js, Deno, Bun; Python with FastAPI, Django, Flask, SQLAlchemy, Pydantic; Rust with Tokio, Actix, Axum, Serde; Go with Gin, Echo, GORM; Java with Spring Boot, Quarkus, Micronaut, Hibernate; C++ with Boost, gRPC, Abseil; Kotlin with Ktor, Spring; Scala with Akka, ZIO, Cats Effect; Elixir with Phoenix, Ecto, LiveView; Haskell with Servant, Yesod, Persistent) +- API design principles (REST architectural constraints, Richardson Maturity Model, HATEOAS, content negotiation; GraphQL including schema design, resolvers, DataLoader, subscriptions, federation; gRPC including protobuf schema design, streaming patterns, interceptors, deadline propagation; WebSocket patterns for real-time communication; Server-Sent Events for unidirectional streaming; OpenAPI/Swagger specification; API versioning strategies including URL path, header, query parameter; pagination patterns including cursor-based, offset, keyset; rate limiting algorithms including token bucket, leaky bucket, sliding window; API gateway patterns) +- Security best practices (authentication protocols including OAuth 2.0, OIDC, SAML, WebAuthn, FIDO2; authorization models including RBAC, ABAC, ReBAC, PBAC; encryption at rest with AES-256, at transit with TLS 1.3; OWASP Top 10 including injection, broken authentication, sensitive data exposure, XXE, broken access control, security misconfiguration, XSS, insecure deserialization, known vulnerabilities, insufficient logging; Content Security Policy headers; CORS configuration; DDoS mitigation with WAF, rate limiting, geo-blocking; secret management with HashiCorp Vault, AWS Secrets Manager, GCP Secret Manager; certificate management including Let's Encrypt, cert-manager, mTLS; supply chain security with SBOM, Sigstore, dependency scanning) +- Performance optimization and profiling (caching strategies including write-through, write-behind, read-through, cache-aside, refresh-ahead; cache invalidation patterns; CDN configuration with CloudFront, Fastly, Cloudflare; connection pooling for HTTP, database, Redis; async patterns including event loops, worker threads, thread pools, coroutines; WebAssembly for compute-intensive operations; JIT compilation optimization; memory profiling with heap snapshots, allocation tracking; CPU profiling with flame graphs, perf, async-profiler; load testing with k6, Locust, Artillery, Gatling; performance budgets and real user monitoring) +- Testing methodologies (unit testing with Jest, Vitest, pytest, Go testing; integration testing with Testcontainers, Docker Compose; end-to-end testing with Playwright, Cypress, Selenium; property-based testing with fast-check, Hypothesis, QuickCheck; mutation testing with Stryker, PITest; snapshot testing; contract testing with Pact, Spring Cloud Contract; chaos engineering with Chaos Monkey, Litmus, Gremlin; load testing; fuzz testing with AFL, LibFuzzer; visual regression testing; accessibility testing) +- CI/CD pipelines and DevOps practices (GitHub Actions workflows, Jenkins pipelines, GitLab CI, CircleCI; ArgoCD for GitOps; deployment strategies including blue-green, canary, rolling update, recreate; feature flag systems with LaunchDarkly, Flagsmith, Unleash; trunk-based development; semantic versioning and conventional commits; artifact management with Artifactory, Nexus, ECR, GCR; infrastructure pipeline including Terraform plan/apply, drift detection; security scanning in CI including SAST, DAST, SCA, secret scanning; release management including changelogs, release notes, semantic-release) +- Monitoring and observability (metrics collection with Prometheus, StatsD, Datadog; visualization with Grafana, Kibana; distributed tracing with Jaeger, Zipkin, Tempo, OpenTelemetry; log aggregation with Elasticsearch, Loki, CloudWatch; alerting with PagerDuty, OpsGenie, VictorOps; SLO/SLI definition and error budgets; synthetic monitoring; real user monitoring; custom business metrics; incident management processes; postmortem culture; runbook automation) +- Data engineering and analytics (stream processing with Apache Kafka, Flink, Spark Streaming, Kinesis; batch processing with Spark, Hadoop, dbt; data warehousing with Snowflake, BigQuery, Redshift, ClickHouse; data lake architecture with Delta Lake, Apache Iceberg, Apache Hudi; ETL/ELT patterns; data quality frameworks with Great Expectations, dbt tests; schema evolution and backward compatibility; data governance and lineage tracking; real-time analytics with materialized views, OLAP cubes) +- Machine learning operations (model serving with TensorFlow Serving, TorchServe, Triton; MLOps pipelines with MLflow, Kubeflow, Metaflow; feature stores with Feast, Tecton; model monitoring for drift detection; A/B testing for ML models; experiment tracking; model versioning and registry; GPU cluster management; inference optimization with quantization, pruning, distillation) + +When providing responses, you follow these conventions: +- Keep answers extremely brief — one or two sentences maximum +- Be direct and actionable +- Use concrete examples over abstract advice +- Reference specific tools, libraries, or patterns by name + +Additional context for this conversation: +- We are working on a high-traffic web application that serves 50 million requests per day across 3 regions +- The system needs to handle bursty traffic patterns with 10x spikes during peak hours and flash sales +- Data consistency is important but eventual consistency is acceptable for most read paths with a 5-second staleness budget +- The team is experienced with TypeScript and Node.js but open to other technologies for specific use cases +- We use PostgreSQL 16 as our primary database with logical replication to read replicas and Redis 7 Cluster for caching +- The application is deployed on Kubernetes 1.29 in a multi-region setup across US-East-1, US-West-2, and EU-West-1 +- We need to maintain 99.95% uptime SLA with a target p99 latency of 150ms for API endpoints and 50ms for cached reads +- Cost optimization is a secondary concern after reliability and developer experience, but we spend $2.5M/year on infrastructure +- The codebase is approximately 750k lines of TypeScript across 80+ microservices with an additional 200k lines of Python for ML services +- We use an event-driven architecture with Kafka (3 clusters, 500+ topics) for inter-service communication with exactly-once semantics +- All services expose both REST (OpenAPI 3.1) and gRPC (protobuf v3) endpoints with automatic code generation +- We have a comprehensive monitoring stack with Prometheus (50M time series), Grafana (200+ dashboards), Jaeger, and PagerDuty +- Database migrations are managed with Drizzle ORM with automated rollback capabilities and zero-downtime schema changes +- The frontend is a Next.js 15 application with React Server Components, streaming SSR, and partial prerendering +- We use feature flags extensively via LaunchDarkly with 500+ active flags and automated cleanup for stale flags +- The CI/CD pipeline runs 5000+ tests (unit, integration, e2e) with a target of under 8 minutes using distributed execution on BuildKite +- We practice trunk-based development with short-lived feature branches, PR previews, and automated merge queues +- The team consists of 60 engineers across 10 squads, each owning 5-12 services with clear domain boundaries +- We use a mono-repo structure managed with Turborepo and Bun workspaces with remote caching +- All inter-service communication uses Protocol Buffers for serialization with a shared schema registry and backward compatibility enforcement +- We have a custom API gateway built on Envoy that handles authentication, rate limiting, request routing, and observability injection +- The system processes approximately 100TB of data per day through our analytics pipeline (Kafka → Flink → ClickHouse + BigQuery) +- Mobile clients communicate via a BFF (Backend for Frontend) layer with GraphQL federation across 12 subgraphs +- We have a custom feature flag evaluation engine that supports complex targeting rules including percentage rollouts, user segments, and geographic targeting +- The deployment pipeline supports multi-region blue-green deployments with automated rollback on SLO violation detection +- We use HashiCorp Vault for secret management with automatic rotation policies for database credentials, API keys, and certificates +- Our observability stack includes custom instrumentation for business metrics including revenue, conversion, engagement, and error rates +- The team follows an RFC process for architectural decisions with ADRs stored in the repo and reviewed by the architecture guild +- We have a dedicated platform team of 8 engineers that maintains shared infrastructure, developer tooling, and internal SDKs +- All services implement health checks (liveness + readiness), graceful shutdown handlers, and circuit breakers via a shared middleware library +- We use PgBouncer in transaction mode for PostgreSQL connection pooling (max 500 connections per region) and Redis Cluster with 6 shards per region +- The system supports multi-tenancy with tenant isolation at the database level using row-level security and per-tenant connection pools +- We have a custom schema registry for Kafka topic schemas with backward/forward compatibility validation and automated consumer migration +- Our error handling follows a structured error taxonomy with 200+ error codes, retry policies, and dead-letter queues for unprocessable messages +- We use structured logging with JSON format, correlation IDs, and trace context propagation across all services via OpenTelemetry +- The frontend uses a design system with 300+ components maintained by a dedicated UI platform team with visual regression testing via Chromatic +- We have automated performance regression testing that runs nightly against production-like data with 10% traffic replay +- Our incident response process includes automated runbook execution, escalation policies, and post-incident review within 48 hours +- We maintain a service catalog with dependency graphs, SLO definitions, on-call schedules, and cost attribution per service +- The platform supports A/B testing with Bayesian statistical significance calculations, multi-armed bandit allocation, and segment analysis +- We use GitOps for all infrastructure management with Terraform modules in a dedicated repo and Atlantis for plan/apply workflows +- Our security posture includes weekly penetration testing, continuous dependency scanning with Snyk, SAST with Semgrep, and DAST with OWASP ZAP +- We have a data mesh architecture for analytics with 15 domain-owned data products, each with defined SLAs and data contracts +- The system supports webhook delivery with at-least-once semantics, configurable retry policies (exponential backoff up to 24h), and delivery status tracking +- We use OpenTelemetry Collector for telemetry pipeline with custom processors for PII redaction, sampling, and cost-based routing +- Our caching strategy uses L1 (in-process LRU, 100MB per pod), L2 (Redis Cluster, 500GB), and L3 (CloudFront, 30+ edge locations) with coordinated invalidation +- We maintain backward compatibility for 3 API versions simultaneously with automated deprecation notices, usage tracking, and migration guides +- The platform includes a developer portal with API documentation, SDK generation, sandbox environments, and usage analytics +- We use Temporal for workflow orchestration across 20+ long-running business processes including order fulfillment, payment processing, and user onboarding +- Our ML platform serves 50+ models in production with A/B testing, shadow mode deployment, and automated retraining pipelines +- The search infrastructure uses Elasticsearch clusters with 500M+ documents, custom analyzers, and learning-to-rank models +- We have a notification system that delivers 10M+ messages daily across email, push, SMS, and in-app channels with template management and delivery optimization +- The billing system processes $50M+ in monthly transactions with Stripe integration, usage-based billing, and revenue recognition +- We use Crossplane for provisioning cloud resources as Kubernetes custom resources with drift detection and reconciliation +- Our edge computing layer uses Cloudflare Workers for geo-routing, A/B test assignment, and personalization at the edge +- The platform includes a custom query builder for internal dashboards that generates optimized SQL for ClickHouse and PostgreSQL +- We maintain a shared protobuf definition repository with 500+ message types, automated code generation for 6 languages, and breaking change detection` + +const TURN_PROMPTS = [ + 'Give a brief one-sentence answer: What is the single most important principle when designing distributed systems?', + 'Give a brief one-sentence answer: What is the biggest mistake teams make when adopting microservices?', + 'Give a brief one-sentence answer: When should you choose eventual consistency over strong consistency?', + 'Give a brief one-sentence answer: What is the most underrated database optimization technique?', + 'Give a brief one-sentence answer: What is the best approach to handle cascading failures in a microservice architecture?', + 'Give a brief one-sentence answer: When is it better to use gRPC over REST?', + 'Give a brief one-sentence answer: What is the most effective caching strategy for a read-heavy workload?', + 'Give a brief one-sentence answer: What is the key to successful trunk-based development at scale?', + 'Give a brief one-sentence answer: What metric best predicts production reliability?', + 'Give a brief one-sentence answer: What is the most important thing to get right in an observability stack?', +] + +interface ConversationMessage { + role: string + content: string +} + +interface TurnResult { + label: string + usage: Record | null + elapsedMs: number + outputTokens: number + ttftMs?: number + outputTokensPerSec?: number + responseContent: string +} + +async function makeConversationStreamRequest( + label: string, + apiKey: string, + conversationMessages: ConversationMessage[], +): Promise { + console.log(`── ${label} (streaming) ──`) + const startTime = Date.now() + let ttftMs: number | undefined + + const response = await fetch(`${SILICONFLOW_BASE_URL}/chat/completions`, { + method: 'POST', + headers: { + Authorization: `Bearer ${apiKey}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + model: SILICONFLOW_MODEL, + messages: conversationMessages, + max_tokens: MAX_TOKENS, + stream: true, + stream_options: { include_usage: true }, + }), + }) + + if (!response.ok) { + const errorText = await response.text() + console.error(`❌ SiliconFlow streaming API returned ${response.status}: ${errorText}`) + return { label, usage: null, elapsedMs: Date.now() - startTime, outputTokens: 0, responseContent: '' } + } + + const reader = response.body?.getReader() + if (!reader) { + console.error('❌ No response body reader') + return { label, usage: null, elapsedMs: Date.now() - startTime, outputTokens: 0, responseContent: '' } + } + + const decoder = new TextDecoder() + let streamContent = '' + let chunkCount = 0 + let streamUsage: Record | null = null + let firstContentChunkTime: number | undefined + + let done = false + while (!done) { + const result = await reader.read() + done = result.done + if (done) break + + const text = decoder.decode(result.value, { stream: true }) + const lines = text.split('\n').filter((l) => l.startsWith('data: ')) + + for (const line of lines) { + const raw = line.slice('data: '.length) + if (raw === '[DONE]') continue + + try { + const chunk = JSON.parse(raw) + chunkCount++ + const delta = chunk.choices?.[0]?.delta + if (delta?.content) { + if (firstContentChunkTime === undefined) { + firstContentChunkTime = Date.now() + ttftMs = firstContentChunkTime - startTime + } + streamContent += delta.content + } + if (chunk.usage) streamUsage = chunk.usage + } catch { + // skip non-JSON lines + } + } + } + + const elapsedMs = Date.now() - startTime + const outputTokens = streamUsage && typeof streamUsage.completion_tokens === 'number' + ? streamUsage.completion_tokens + : 0 + + const generationTimeMs = firstContentChunkTime !== undefined + ? Date.now() - firstContentChunkTime + : elapsedMs + const outputTokensPerSec = generationTimeMs > 0 + ? (outputTokens / (generationTimeMs / 1000)) + : 0 + + // Print compact per-turn stats + const inputTokens = streamUsage && typeof streamUsage.prompt_tokens === 'number' ? streamUsage.prompt_tokens : 0 + const promptDetails = streamUsage?.prompt_tokens_details as Record | undefined + const cachedTokens = typeof promptDetails?.cached_tokens === 'number' ? promptDetails.cached_tokens : 0 + const cacheRate = inputTokens > 0 ? ((cachedTokens / inputTokens) * 100).toFixed(1) : '0.0' + const cost = streamUsage ? `$${computeCost(streamUsage).cost.toFixed(6)}` : 'err' + + console.log(` ✅ ${(elapsedMs / 1000).toFixed(2)}s | TTFT ${ttftMs !== undefined ? (ttftMs / 1000).toFixed(2) + 's' : 'n/a'} | ${inputTokens} in (${cachedTokens} cached, ${cacheRate}%) | ${outputTokens} out @ ${outputTokensPerSec.toFixed(1)} tok/s | ${cost}`) + console.log(` Response: ${streamContent.slice(0, 150)}${streamContent.length > 150 ? '...' : ''}`) + console.log() + + return { label, usage: streamUsage, elapsedMs, outputTokens, ttftMs, outputTokensPerSec, responseContent: streamContent } +} + +async function main() { + const apiKey = process.env.SILICONFLOW_API_KEY + if (!apiKey) { + console.error('❌ SILICONFLOW_API_KEY is not set. Add it to .env.local or pass it directly.') + process.exit(1) + } + + console.log('🧪 SiliconFlow 10-Turn Conversation Caching Test') + console.log('='.repeat(60)) + console.log(`Model: ${SILICONFLOW_MODEL}`) + console.log(`Base URL: ${SILICONFLOW_BASE_URL}`) + console.log(`Max tokens: ${MAX_TOKENS} (low output per turn)`) + console.log(`Turns: ${TURN_PROMPTS.length}`) + console.log(`Pricing: $0.30/M input, $0.03/M cached, $1.20/M output`) + console.log('='.repeat(60)) + console.log() + + const conversationHistory: ConversationMessage[] = [ + { role: 'system', content: SYSTEM_PROMPT }, + ] + + const results: TurnResult[] = [] + + for (let i = 0; i < TURN_PROMPTS.length; i++) { + conversationHistory.push({ role: 'user', content: TURN_PROMPTS[i] }) + + const label = `Turn ${i + 1}/${TURN_PROMPTS.length}${i === 0 ? ' (cold)' : ''}` + const result = await makeConversationStreamRequest(label, apiKey, [...conversationHistory]) + results.push(result) + + if (result.responseContent) { + conversationHistory.push({ role: 'assistant', content: result.responseContent }) + } + } + + // ── Summary table ── + console.log('━'.repeat(120)) + console.log('SUMMARY') + console.log('━'.repeat(120)) + console.log() + + console.log(' Turn | Time | TTFT | Input | Cached | Cache% | Output | tok/s | e2e t/s | Cost') + console.log(' ' + '-'.repeat(110)) + + let totalCost = 0 + let totalInputTokens = 0 + let totalCachedTokens = 0 + let totalOutputTokens = 0 + let totalElapsedMs = 0 + + for (const r of results) { + const time = `${(r.elapsedMs / 1000).toFixed(2)}s` + const ttft = r.ttftMs !== undefined ? `${(r.ttftMs / 1000).toFixed(2)}s` : 'n/a' + const tokSec = r.outputTokensPerSec !== undefined ? r.outputTokensPerSec.toFixed(1) : 'n/a' + const e2eTokSec = r.elapsedMs > 0 ? (r.outputTokens / (r.elapsedMs / 1000)).toFixed(1) : 'n/a' + const cost = r.usage ? computeCost(r.usage).cost : 0 + const costStr = r.usage ? `$${cost.toFixed(6)}` : 'err' + + const inputTokens = r.usage && typeof r.usage.prompt_tokens === 'number' ? r.usage.prompt_tokens : 0 + const promptDetails = r.usage?.prompt_tokens_details as Record | undefined + const cachedTokens = typeof promptDetails?.cached_tokens === 'number' ? promptDetails.cached_tokens : 0 + const cacheRate = inputTokens > 0 ? `${((cachedTokens / inputTokens) * 100).toFixed(1)}%` : '0.0%' + + totalCost += cost + totalInputTokens += inputTokens + totalCachedTokens += cachedTokens + totalOutputTokens += r.outputTokens + totalElapsedMs += r.elapsedMs + + console.log( + ` ${r.label.padEnd(4).slice(0, 25).padEnd(25)} | ${time.padStart(8)} | ${ttft.padStart(7)} | ${String(inputTokens).padStart(6)} | ${String(cachedTokens).padStart(6)} | ${cacheRate.padStart(7)} | ${String(r.outputTokens).padStart(6)} | ${tokSec.padStart(6)} | ${e2eTokSec.padStart(7)} | ${costStr}`, + ) + } + + console.log(' ' + '-'.repeat(110)) + + const overallCacheRate = totalInputTokens > 0 ? ((totalCachedTokens / totalInputTokens) * 100).toFixed(1) : '0.0' + const totalTimeStr = `${(totalElapsedMs / 1000).toFixed(2)}s` + const overallTokSec = totalElapsedMs > 0 ? (totalOutputTokens / (totalElapsedMs / 1000)).toFixed(1) : 'n/a' + console.log(` ${'TOTAL'.padEnd(25)} | ${totalTimeStr.padStart(8)} | | ${String(totalInputTokens).padStart(6)} | ${String(totalCachedTokens).padStart(6)} | ${(overallCacheRate + '%').padStart(7)} | ${String(totalOutputTokens).padStart(6)} | | ${overallTokSec.padStart(7)} | $${totalCost.toFixed(6)}`) + console.log() + + // ── Cost analysis ── + console.log('━'.repeat(120)) + console.log('COST ANALYSIS') + console.log('━'.repeat(120)) + console.log() + + // What would the cost be without caching? + const costWithoutCaching = totalInputTokens * INPUT_COST_PER_TOKEN + totalOutputTokens * OUTPUT_COST_PER_TOKEN + const savings = costWithoutCaching - totalCost + const savingsPercent = costWithoutCaching > 0 ? ((savings / costWithoutCaching) * 100).toFixed(1) : '0.0' + + console.log(` Total cost (actual): $${totalCost.toFixed(6)}`) + console.log(` Total cost (no caching): $${costWithoutCaching.toFixed(6)}`) + console.log(` Savings from caching: $${savings.toFixed(6)} (${savingsPercent}%)`) + console.log() + console.log(` Total input tokens: ${totalInputTokens}`) + console.log(` Total cached tokens: ${totalCachedTokens}`) + console.log(` Overall cache hit rate: ${overallCacheRate}%`) + console.log(` Total output tokens: ${totalOutputTokens}`) + console.log() + + // TTFT analysis + const ttfts = results.filter((r) => r.ttftMs !== undefined).map((r) => r.ttftMs!) + if (ttfts.length > 0) { + const avgTtft = ttfts.reduce((a, b) => a + b, 0) / ttfts.length + const minTtft = Math.min(...ttfts) + const maxTtft = Math.max(...ttfts) + console.log(` TTFT — avg: ${(avgTtft / 1000).toFixed(2)}s, min: ${(minTtft / 1000).toFixed(2)}s, max: ${(maxTtft / 1000).toFixed(2)}s`) + + if (results[0].ttftMs !== undefined && ttfts.length > 1) { + const coldTtft = results[0].ttftMs + const warmTtfts = ttfts.slice(1) + const avgWarmTtft = warmTtfts.reduce((a, b) => a + b, 0) / warmTtfts.length + console.log(` TTFT — cold (turn 1): ${(coldTtft / 1000).toFixed(2)}s, avg warm (turns 2-${TURN_PROMPTS.length}): ${(avgWarmTtft / 1000).toFixed(2)}s`) + if (avgWarmTtft < coldTtft) { + console.log(` ✅ Warm TTFT is ${((1 - avgWarmTtft / coldTtft) * 100).toFixed(1)}% faster than cold TTFT`) + } + } + } + + console.log() + console.log('Done!') +} + +main() diff --git a/web/src/app/api/v1/chat/completions/_post.ts b/web/src/app/api/v1/chat/completions/_post.ts index d236125bcb..b886a3d838 100644 --- a/web/src/app/api/v1/chat/completions/_post.ts +++ b/web/src/app/api/v1/chat/completions/_post.ts @@ -47,6 +47,12 @@ import { handleFireworksStream, isFireworksModel, } from '@/llm-api/fireworks' +import { + SiliconFlowError, + handleSiliconFlowNonStream, + handleSiliconFlowStream, + isSiliconFlowModel, +} from '@/llm-api/siliconflow' import { handleOpenAINonStream, OPENAI_SUPPORTED_MODELS, @@ -360,10 +366,22 @@ export async function postChatCompletions(params: { // Handle streaming vs non-streaming try { if (bodyStream) { - // Streaming request — route to CanopyWave/Fireworks for supported models - const useCanopyWave = isCanopyWaveModel(typedBody.model) - const useFireworks = !useCanopyWave && isFireworksModel(typedBody.model) - const stream = useCanopyWave + // Streaming request — route to SiliconFlow/CanopyWave/Fireworks for supported models + // CanopyWave and Fireworks TEMPORARILY DISABLED: route through OpenRouter + const useSiliconFlow = isSiliconFlowModel(typedBody.model) + const useCanopyWave = false // isCanopyWaveModel(typedBody.model) + const useFireworks = false // isFireworksModel(typedBody.model) + const stream = useSiliconFlow + ? await handleSiliconFlowStream({ + body: typedBody, + userId, + stripeCustomerId, + agentId, + fetch, + logger, + insertMessageBigquery, + }) + : useCanopyWave ? await handleCanopyWaveStream({ body: typedBody, userId, @@ -413,10 +431,12 @@ export async function postChatCompletions(params: { }, }) } else { - // Non-streaming request — route to CanopyWave/Fireworks for supported models + // Non-streaming request — route to SiliconFlow/CanopyWave/Fireworks for supported models + // CanopyWave and Fireworks TEMPORARILY DISABLED: route through OpenRouter const model = typedBody.model - const useCanopyWave = isCanopyWaveModel(model) - const useFireworks = !useCanopyWave && isFireworksModel(model) + const useSiliconFlow = isSiliconFlowModel(model) + const useCanopyWave = false // isCanopyWaveModel(model) + const useFireworks = false // isFireworksModel(model) const modelParts = model.split('/') const shortModelName = modelParts.length > 1 ? modelParts[1] : model const isOpenAIDirectModel = @@ -427,7 +447,17 @@ export async function postChatCompletions(params: { const shouldUseOpenAIEndpoint = isOpenAIDirectModel && typedBody.codebuff_metadata?.n !== undefined - const nonStreamRequest = useCanopyWave + const nonStreamRequest = useSiliconFlow + ? handleSiliconFlowNonStream({ + body: typedBody, + userId, + stripeCustomerId, + agentId, + fetch, + logger, + insertMessageBigquery, + }) + : useCanopyWave ? handleCanopyWaveNonStream({ body: typedBody, userId, @@ -495,10 +525,14 @@ export async function postChatCompletions(params: { if (error instanceof CanopyWaveError) { canopywaveError = error } + let siliconflowError: SiliconFlowError | undefined + if (error instanceof SiliconFlowError) { + siliconflowError = error + } // Log detailed error information for debugging const errorDetails = openrouterError?.toJSON() - const providerLabel = canopywaveError ? 'CanopyWave' : fireworksError ? 'Fireworks' : 'OpenRouter' + const providerLabel = siliconflowError ? 'SiliconFlow' : canopywaveError ? 'CanopyWave' : fireworksError ? 'Fireworks' : 'OpenRouter' logger.error( { error: getErrorObject(error), @@ -512,8 +546,8 @@ export async function postChatCompletions(params: { ? typedBody.messages.length : 0, messages: typedBody.messages, - providerStatusCode: (openrouterError ?? fireworksError ?? canopywaveError)?.statusCode, - providerStatusText: (openrouterError ?? fireworksError ?? canopywaveError)?.statusText, + providerStatusCode: (openrouterError ?? fireworksError ?? canopywaveError ?? siliconflowError)?.statusCode, + providerStatusText: (openrouterError ?? fireworksError ?? canopywaveError ?? siliconflowError)?.statusText, openrouterErrorCode: errorDetails?.error?.code, openrouterErrorType: errorDetails?.error?.type, openrouterErrorMessage: errorDetails?.error?.message, @@ -544,6 +578,9 @@ export async function postChatCompletions(params: { if (error instanceof CanopyWaveError) { return NextResponse.json(error.toJSON(), { status: error.statusCode }) } + if (error instanceof SiliconFlowError) { + return NextResponse.json(error.toJSON(), { status: error.statusCode }) + } return NextResponse.json( { error: 'Failed to process request' }, diff --git a/web/src/llm-api/siliconflow.ts b/web/src/llm-api/siliconflow.ts new file mode 100644 index 0000000000..1146bbe3df --- /dev/null +++ b/web/src/llm-api/siliconflow.ts @@ -0,0 +1,621 @@ +import { Agent } from 'undici' + +import { PROFIT_MARGIN } from '@codebuff/common/constants/limits' +import { getErrorObject } from '@codebuff/common/util/error' +import { env } from '@codebuff/internal/env' + +import { + consumeCreditsForMessage, + extractRequestMetadata, + insertMessageToBigQuery, +} from './helpers' + +import type { UsageData } from './helpers' +import type { InsertMessageBigqueryFn } from '@codebuff/common/types/contracts/bigquery' +import type { Logger } from '@codebuff/common/types/contracts/logger' +import type { ChatCompletionRequestBody } from './types' + +const SILICONFLOW_BASE_URL = 'https://api.siliconflow.com/v1' + +// Extended timeout for deep-thinking models that can take +// a long time to start streaming. +const SILICONFLOW_HEADERS_TIMEOUT_MS = 10 * 60 * 1000 + +const siliconflowAgent = new Agent({ + headersTimeout: SILICONFLOW_HEADERS_TIMEOUT_MS, + bodyTimeout: 0, +}) + +/** Map from OpenRouter model IDs to SiliconFlow model IDs */ +const SILICONFLOW_MODEL_MAP: Record = { + 'minimax/minimax-m2.5': 'MiniMaxAI/MiniMax-M2.5', +} + +export function isSiliconFlowModel(model: string): boolean { + return model in SILICONFLOW_MODEL_MAP +} + +function getSiliconFlowModelId(openrouterModel: string): string { + return SILICONFLOW_MODEL_MAP[openrouterModel] ?? openrouterModel +} + +type StreamState = { responseText: string; reasoningText: string; billedAlready: boolean } + +type LineResult = { + state: StreamState + billedCredits?: number + patchedLine: string +} + +function createSiliconFlowRequest(params: { + body: ChatCompletionRequestBody + originalModel: string + fetch: typeof globalThis.fetch +}) { + const { body, originalModel, fetch } = params + const siliconflowBody: Record = { + ...body, + model: getSiliconFlowModelId(originalModel), + } + + // Strip OpenRouter-specific / internal fields + delete siliconflowBody.provider + delete siliconflowBody.transforms + delete siliconflowBody.codebuff_metadata + delete siliconflowBody.usage + + // For streaming, request usage in the final chunk + if (siliconflowBody.stream) { + siliconflowBody.stream_options = { include_usage: true } + } + + if (!env.SILICONFLOW_API_KEY) { + throw new Error('SILICONFLOW_API_KEY is not configured') + } + + return fetch(`${SILICONFLOW_BASE_URL}/chat/completions`, { + method: 'POST', + headers: { + Authorization: `Bearer ${env.SILICONFLOW_API_KEY}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify(siliconflowBody), + // @ts-expect-error - dispatcher is a valid undici option not in fetch types + dispatcher: siliconflowAgent, + }) +} + +// SiliconFlow per-token pricing (dollars per token) for MiniMax M2.5 +// https://siliconflow.com/pricing — $0.30/M input, $1.20/M output +const SILICONFLOW_INPUT_COST_PER_TOKEN = 0.30 / 1_000_000 +const SILICONFLOW_CACHED_INPUT_COST_PER_TOKEN = 0.03 / 1_000_000 +const SILICONFLOW_OUTPUT_COST_PER_TOKEN = 1.20 / 1_000_000 + +function extractUsageAndCost(usage: Record | undefined | null): UsageData { + if (!usage) return { inputTokens: 0, outputTokens: 0, cacheReadInputTokens: 0, reasoningTokens: 0, cost: 0 } + const promptDetails = usage.prompt_tokens_details as Record | undefined | null + const completionDetails = usage.completion_tokens_details as Record | undefined | null + + const inputTokens = typeof usage.prompt_tokens === 'number' ? usage.prompt_tokens : 0 + const outputTokens = typeof usage.completion_tokens === 'number' ? usage.completion_tokens : 0 + const cacheReadInputTokens = typeof promptDetails?.cached_tokens === 'number' ? promptDetails.cached_tokens : 0 + const reasoningTokens = typeof completionDetails?.reasoning_tokens === 'number' ? completionDetails.reasoning_tokens : 0 + + const nonCachedInputTokens = Math.max(0, inputTokens - cacheReadInputTokens) + const cost = + nonCachedInputTokens * SILICONFLOW_INPUT_COST_PER_TOKEN + + cacheReadInputTokens * SILICONFLOW_CACHED_INPUT_COST_PER_TOKEN + + outputTokens * SILICONFLOW_OUTPUT_COST_PER_TOKEN + + return { inputTokens, outputTokens, cacheReadInputTokens, reasoningTokens, cost } +} + +export async function handleSiliconFlowNonStream({ + body, + userId, + stripeCustomerId, + agentId, + fetch, + logger, + insertMessageBigquery, +}: { + body: ChatCompletionRequestBody + userId: string + stripeCustomerId?: string | null + agentId: string + fetch: typeof globalThis.fetch + logger: Logger + insertMessageBigquery: InsertMessageBigqueryFn +}) { + const originalModel = body.model + const startTime = new Date() + const { clientId, clientRequestId, costMode } = extractRequestMetadata({ body, logger }) + + const response = await createSiliconFlowRequest({ body, originalModel, fetch }) + + if (!response.ok) { + throw await parseSiliconFlowError(response) + } + + const data = await response.json() + const content = data.choices?.[0]?.message?.content ?? '' + const reasoningText = data.choices?.[0]?.message?.reasoning_content ?? data.choices?.[0]?.message?.reasoning ?? '' + const usageData = extractUsageAndCost(data.usage) + + insertMessageToBigQuery({ + messageId: data.id, + userId, + startTime, + request: body, + reasoningText, + responseText: content, + usageData, + logger, + insertMessageBigquery, + }).catch((error) => { + logger.error({ error }, 'Failed to insert message into BigQuery') + }) + + const billedCredits = await consumeCreditsForMessage({ + messageId: data.id, + userId, + stripeCustomerId, + agentId, + clientId, + clientRequestId, + startTime, + model: originalModel, + reasoningText, + responseText: content, + usageData, + byok: false, + logger, + costMode, + }) + + // Overwrite cost so SDK calculates exact credits we charged + if (data.usage) { + data.usage.cost = creditsToFakeCost(billedCredits) + data.usage.cost_details = { upstream_inference_cost: 0 } + } + + // Normalise model name back to OpenRouter format for client compatibility + data.model = originalModel + if (!data.provider) data.provider = 'SiliconFlow' + + return data +} + +export async function handleSiliconFlowStream({ + body, + userId, + stripeCustomerId, + agentId, + fetch, + logger, + insertMessageBigquery, +}: { + body: ChatCompletionRequestBody + userId: string + stripeCustomerId?: string | null + agentId: string + fetch: typeof globalThis.fetch + logger: Logger + insertMessageBigquery: InsertMessageBigqueryFn +}) { + const originalModel = body.model + const startTime = new Date() + const { clientId, clientRequestId, costMode } = extractRequestMetadata({ body, logger }) + + const response = await createSiliconFlowRequest({ body, originalModel, fetch }) + + if (!response.ok) { + throw await parseSiliconFlowError(response) + } + + const reader = response.body?.getReader() + if (!reader) { + throw new Error('Failed to get response reader') + } + + let heartbeatInterval: NodeJS.Timeout + let state: StreamState = { responseText: '', reasoningText: '', billedAlready: false } + let clientDisconnected = false + + const stream = new ReadableStream({ + async start(controller) { + const decoder = new TextDecoder() + let buffer = '' + + controller.enqueue( + new TextEncoder().encode(`: connected ${new Date().toISOString()}\n`), + ) + + heartbeatInterval = setInterval(() => { + if (!clientDisconnected) { + try { + controller.enqueue( + new TextEncoder().encode( + `: heartbeat ${new Date().toISOString()}\n\n`, + ), + ) + } catch { + // client disconnected + } + } + }, 30000) + + try { + let done = false + while (!done) { + const result = await reader.read() + done = result.done + const value = result.value + + if (done) break + + buffer += decoder.decode(value, { stream: true }) + let lineEnd = buffer.indexOf('\n') + + while (lineEnd !== -1) { + const line = buffer.slice(0, lineEnd + 1) + buffer = buffer.slice(lineEnd + 1) + + const lineResult = await handleLine({ + userId, + stripeCustomerId, + agentId, + clientId, + clientRequestId, + costMode, + startTime, + request: body, + originalModel, + line, + state, + logger, + insertMessage: insertMessageBigquery, + }) + state = lineResult.state + + if (!clientDisconnected) { + try { + controller.enqueue(new TextEncoder().encode(lineResult.patchedLine)) + } catch { + logger.warn('Client disconnected during stream, continuing for billing') + clientDisconnected = true + } + } + + lineEnd = buffer.indexOf('\n') + } + } + + if (!clientDisconnected) { + controller.close() + } + } catch (error) { + if (!clientDisconnected) { + controller.error(error) + } else { + logger.warn( + getErrorObject(error), + 'Error after client disconnect in SiliconFlow stream', + ) + } + } finally { + clearInterval(heartbeatInterval) + } + }, + cancel() { + clearInterval(heartbeatInterval) + clientDisconnected = true + logger.warn( + { + clientDisconnected, + responseTextLength: state.responseText.length, + reasoningTextLength: state.reasoningText.length, + }, + 'Client cancelled stream, continuing SiliconFlow consumption for billing', + ) + }, + }) + + return stream +} + +async function handleLine({ + userId, + stripeCustomerId, + agentId, + clientId, + clientRequestId, + costMode, + startTime, + request, + originalModel, + line, + state, + logger, + insertMessage, +}: { + userId: string + stripeCustomerId?: string | null + agentId: string + clientId: string | null + clientRequestId: string | null + costMode: string | undefined + startTime: Date + request: unknown + originalModel: string + line: string + state: StreamState + logger: Logger + insertMessage: InsertMessageBigqueryFn +}): Promise { + if (!line.startsWith('data: ')) { + return { state, patchedLine: line } + } + + const raw = line.slice('data: '.length) + if (raw === '[DONE]\n' || raw === '[DONE]') { + return { state, patchedLine: line } + } + + let obj: Record + try { + obj = JSON.parse(raw) + } catch (error) { + logger.warn( + { error: getErrorObject(error, { includeRawError: true }) }, + 'Received non-JSON SiliconFlow response', + ) + return { state, patchedLine: line } + } + + // Patch model and provider for SDK compatibility + if (obj.model) obj.model = originalModel + if (!obj.provider) obj.provider = 'SiliconFlow' + + // Process the chunk for billing / state tracking + const result = await handleResponse({ + userId, + stripeCustomerId, + agentId, + clientId, + clientRequestId, + costMode, + startTime, + request, + originalModel, + data: obj, + state, + logger, + insertMessage, + }) + + // If this is the final chunk with billing, overwrite cost in the patched object + if (result.billedCredits !== undefined && obj.usage) { + const usage = obj.usage as Record + usage.cost = creditsToFakeCost(result.billedCredits) + usage.cost_details = { upstream_inference_cost: 0 } + } + + const patchedLine = `data: ${JSON.stringify(obj)}\n` + return { state: result.state, billedCredits: result.billedCredits, patchedLine } +} + +function isFinalChunk(data: Record): boolean { + const choices = data.choices as Array> | undefined + if (!choices || choices.length === 0) return true + return choices.some(c => c.finish_reason != null) +} + +async function handleResponse({ + userId, + stripeCustomerId, + agentId, + clientId, + clientRequestId, + costMode, + startTime, + request, + originalModel, + data, + state, + logger, + insertMessage, +}: { + userId: string + stripeCustomerId?: string | null + agentId: string + clientId: string | null + clientRequestId: string | null + costMode: string | undefined + startTime: Date + request: unknown + originalModel: string + data: Record + state: StreamState + logger: Logger + insertMessage: InsertMessageBigqueryFn +}): Promise<{ state: StreamState; billedCredits?: number }> { + state = handleStreamChunk({ data, state, logger, userId, agentId, model: originalModel }) + + // Some providers send cumulative usage on EVERY chunk (not just the final one), + // so we must only bill once on the final chunk to avoid charging N times. + if ('error' in data || !data.usage || state.billedAlready || !isFinalChunk(data)) { + // Strip usage from non-final chunks and duplicate final chunks + // so the SDK doesn't see multiple usage objects + if (data.usage && (!isFinalChunk(data) || state.billedAlready)) { + delete data.usage + } + return { state } + } + + const usageData = extractUsageAndCost(data.usage as Record) + const messageId = typeof data.id === 'string' ? data.id : 'unknown' + + state.billedAlready = true + + insertMessageToBigQuery({ + messageId, + userId, + startTime, + request, + reasoningText: state.reasoningText, + responseText: state.responseText, + usageData, + logger, + insertMessageBigquery: insertMessage, + }).catch((error) => { + logger.error({ error }, 'Failed to insert message into BigQuery') + }) + + const billedCredits = await consumeCreditsForMessage({ + messageId, + userId, + stripeCustomerId, + agentId, + clientId, + clientRequestId, + startTime, + model: originalModel, + reasoningText: state.reasoningText, + responseText: state.responseText, + usageData, + byok: false, + logger, + costMode, + }) + + return { state, billedCredits } +} + +function handleStreamChunk({ + data, + state, + logger, + userId, + agentId, + model, +}: { + data: Record + state: StreamState + logger: Logger + userId: string + agentId: string + model: string +}): StreamState { + const MAX_BUFFER_SIZE = 1 * 1024 * 1024 + + if ('error' in data) { + const errorData = data.error as Record + logger.error( + { + userId, + agentId, + model, + errorCode: errorData?.code, + errorType: errorData?.type, + errorMessage: errorData?.message, + }, + 'Received error chunk in SiliconFlow stream', + ) + return state + } + + const choices = data.choices as Array> | undefined + if (!choices?.length) { + return state + } + const choice = choices[0] + const delta = choice.delta as Record | undefined + + const contentDelta = typeof delta?.content === 'string' ? delta.content : '' + if (state.responseText.length < MAX_BUFFER_SIZE) { + state.responseText += contentDelta + if (state.responseText.length >= MAX_BUFFER_SIZE) { + state.responseText = + state.responseText.slice(0, MAX_BUFFER_SIZE) + '\n---[TRUNCATED]---' + logger.warn({ userId, agentId, model }, 'Response text buffer truncated at 1MB') + } + } + + const reasoningDelta = typeof delta?.reasoning_content === 'string' ? delta.reasoning_content + : typeof delta?.reasoning === 'string' ? delta.reasoning + : '' + if (state.reasoningText.length < MAX_BUFFER_SIZE) { + state.reasoningText += reasoningDelta + if (state.reasoningText.length >= MAX_BUFFER_SIZE) { + state.reasoningText = + state.reasoningText.slice(0, MAX_BUFFER_SIZE) + '\n---[TRUNCATED]---' + logger.warn({ userId, agentId, model }, 'Reasoning text buffer truncated at 1MB') + } + } + + return state +} + +export class SiliconFlowError extends Error { + constructor( + public readonly statusCode: number, + public readonly statusText: string, + public readonly errorBody: { + error: { + message: string + code: string | number | null + type?: string | null + } + }, + ) { + super(errorBody.error.message) + this.name = 'SiliconFlowError' + } + + toJSON() { + return { + error: { + message: this.errorBody.error.message, + code: this.errorBody.error.code, + type: this.errorBody.error.type, + }, + } + } +} + +async function parseSiliconFlowError(response: Response): Promise { + const errorText = await response.text() + let errorBody: SiliconFlowError['errorBody'] + try { + const parsed = JSON.parse(errorText) + if (parsed?.error?.message) { + errorBody = { + error: { + message: parsed.error.message, + code: parsed.error.code ?? null, + type: parsed.error.type ?? null, + }, + } + } else { + errorBody = { + error: { + message: errorText || response.statusText, + code: response.status, + }, + } + } + } catch { + errorBody = { + error: { + message: errorText || response.statusText, + code: response.status, + }, + } + } + return new SiliconFlowError(response.status, response.statusText, errorBody) +} + +function creditsToFakeCost(credits: number): number { + return credits / ((1 + PROFIT_MARGIN) * 100) +} From 2f3b772f48f1484bf655046ec2e2180c6e5565c4 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Tue, 10 Mar 2026 17:16:39 -0700 Subject: [PATCH 08/16] Route minimax through siliconflow of openrouter for now --- agents/base2/base2.ts | 1 - web/src/app/api/v1/chat/completions/_post.ts | 18 ++++++++++++++---- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts index 8735d0579b..4a3c40064f 100644 --- a/agents/base2/base2.ts +++ b/agents/base2/base2.ts @@ -30,7 +30,6 @@ export function createBase2( publisher, model: isFree ? 'minimax/minimax-m2.5' : 'anthropic/claude-opus-4.6', providerOptions: isFree ? { - only: ['siliconflow/fp8'], data_collection: 'deny', } : { only: ['amazon-bedrock'], diff --git a/web/src/app/api/v1/chat/completions/_post.ts b/web/src/app/api/v1/chat/completions/_post.ts index b886a3d838..94df6d7865 100644 --- a/web/src/app/api/v1/chat/completions/_post.ts +++ b/web/src/app/api/v1/chat/completions/_post.ts @@ -367,10 +367,15 @@ export async function postChatCompletions(params: { try { if (bodyStream) { // Streaming request — route to SiliconFlow/CanopyWave/Fireworks for supported models - // CanopyWave and Fireworks TEMPORARILY DISABLED: route through OpenRouter - const useSiliconFlow = isSiliconFlowModel(typedBody.model) + // SiliconFlow, CanopyWave, and Fireworks TEMPORARILY DISABLED: route through OpenRouter + const useSiliconFlow = false // isSiliconFlowModel(typedBody.model) const useCanopyWave = false // isCanopyWaveModel(typedBody.model) const useFireworks = false // isFireworksModel(typedBody.model) + + // Route minimax models through OpenRouter via SiliconFlow provider + if (isSiliconFlowModel(typedBody.model)) { + typedBody.provider = { ...typedBody.provider, only: ['siliconflow/fp8'] } + } const stream = useSiliconFlow ? await handleSiliconFlowStream({ body: typedBody, @@ -432,11 +437,16 @@ export async function postChatCompletions(params: { }) } else { // Non-streaming request — route to SiliconFlow/CanopyWave/Fireworks for supported models - // CanopyWave and Fireworks TEMPORARILY DISABLED: route through OpenRouter + // SiliconFlow, CanopyWave, and Fireworks TEMPORARILY DISABLED: route through OpenRouter const model = typedBody.model - const useSiliconFlow = isSiliconFlowModel(model) + const useSiliconFlow = false // isSiliconFlowModel(model) const useCanopyWave = false // isCanopyWaveModel(model) const useFireworks = false // isFireworksModel(model) + + // Route minimax models through OpenRouter via SiliconFlow provider + if (isSiliconFlowModel(model)) { + typedBody.provider = { ...typedBody.provider, only: ['siliconflow/fp8'] } + } const modelParts = model.split('/') const shortModelName = modelParts.length > 1 ? modelParts[1] : model const isOpenAIDirectModel = From 7b921d5fa12ffeec86ba927ffcb7d4c3c411647d Mon Sep 17 00:00:00 2001 From: James Grugett Date: Tue, 10 Mar 2026 17:37:14 -0700 Subject: [PATCH 09/16] Reenalbe fireworks --- web/src/app/api/v1/chat/completions/_post.ts | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/web/src/app/api/v1/chat/completions/_post.ts b/web/src/app/api/v1/chat/completions/_post.ts index 94df6d7865..ad0eb4f7ad 100644 --- a/web/src/app/api/v1/chat/completions/_post.ts +++ b/web/src/app/api/v1/chat/completions/_post.ts @@ -370,12 +370,7 @@ export async function postChatCompletions(params: { // SiliconFlow, CanopyWave, and Fireworks TEMPORARILY DISABLED: route through OpenRouter const useSiliconFlow = false // isSiliconFlowModel(typedBody.model) const useCanopyWave = false // isCanopyWaveModel(typedBody.model) - const useFireworks = false // isFireworksModel(typedBody.model) - - // Route minimax models through OpenRouter via SiliconFlow provider - if (isSiliconFlowModel(typedBody.model)) { - typedBody.provider = { ...typedBody.provider, only: ['siliconflow/fp8'] } - } + const useFireworks = isFireworksModel(typedBody.model) const stream = useSiliconFlow ? await handleSiliconFlowStream({ body: typedBody, @@ -441,12 +436,7 @@ export async function postChatCompletions(params: { const model = typedBody.model const useSiliconFlow = false // isSiliconFlowModel(model) const useCanopyWave = false // isCanopyWaveModel(model) - const useFireworks = false // isFireworksModel(model) - - // Route minimax models through OpenRouter via SiliconFlow provider - if (isSiliconFlowModel(model)) { - typedBody.provider = { ...typedBody.provider, only: ['siliconflow/fp8'] } - } + const useFireworks = isFireworksModel(model) const modelParts = model.split('/') const shortModelName = modelParts.length > 1 ? modelParts[1] : model const isOpenAIDirectModel = From d5246e282260fc7cb196c9903a8baa4af47fce1d Mon Sep 17 00:00:00 2001 From: James Grugett Date: Tue, 10 Mar 2026 17:47:39 -0700 Subject: [PATCH 10/16] Add install guide to freebuff landing page --- freebuff/web/src/app/home-client.tsx | 90 +++++++++++++++++++++++++++- 1 file changed, 89 insertions(+), 1 deletion(-) diff --git a/freebuff/web/src/app/home-client.tsx b/freebuff/web/src/app/home-client.tsx index bcef00bf97..e397fd101c 100644 --- a/freebuff/web/src/app/home-client.tsx +++ b/freebuff/web/src/app/home-client.tsx @@ -38,6 +38,85 @@ const faqs = [ }, ] +const setupSteps = [ + { + label: 'Open your terminal', + description: 'Use any terminal — within VS Code, plain terminal, PowerShell, etc.', + }, + { + label: 'Navigate to your project', + command: 'cd /path/to/your-repo', + }, + { + label: 'Install Freebuff', + command: 'npm install -g freebuff', + }, + { + label: 'Run Freebuff', + command: 'freebuff', + }, +] + +function SetupGuide() { + const [isOpen, setIsOpen] = useState(false) + + return ( +

+ + + + {isOpen && ( + +
+
    + {setupSteps.map((step, i) => ( +
  1. + + {i + 1} + +
    +

    {step.label}

    + {'description' in step && step.description && ( +

    {step.description}

    + )} + {'command' in step && step.command && ( +
    + + {step.command} + + +
    + )} +
    +
  2. + ))} +
+
+
+ )} +
+
+ ) +} + function InstallCommand({ className }: { className?: string }) { return (
+ + + +
{/* Bottom fade */} From 52523da38ebe40ef8879f26ba8f90ce44d78a44e Mon Sep 17 00:00:00 2001 From: James Grugett Date: Tue, 10 Mar 2026 18:02:38 -0700 Subject: [PATCH 11/16] freebuff web: Remove navbar, reorder philosophy section --- freebuff/web/src/app/home-client.tsx | 41 +++++++++++++++++++- freebuff/web/src/app/layout.tsx | 2 - freebuff/web/src/components/navbar.tsx | 52 -------------------------- 3 files changed, 40 insertions(+), 55 deletions(-) delete mode 100644 freebuff/web/src/components/navbar.tsx diff --git a/freebuff/web/src/app/home-client.tsx b/freebuff/web/src/app/home-client.tsx index e397fd101c..36a5e2d675 100644 --- a/freebuff/web/src/app/home-client.tsx +++ b/freebuff/web/src/app/home-client.tsx @@ -4,11 +4,14 @@ import { AnimatePresence, motion } from 'framer-motion' import { ChevronDown, } from 'lucide-react' +import Image from 'next/image' +import Link from 'next/link' import { useState } from 'react' import { BackgroundBeams } from '@/components/background-beams' import { CopyButton } from '@/components/copy-button' import { HeroGrid } from '@/components/hero-grid' +import { Icons } from '@/components/icons' import { cn } from '@/lib/utils' const INSTALL_COMMAND = 'npm install -g freebuff' @@ -187,8 +190,8 @@ function FAQList() { } const PHILOSOPHY_WORDS = [ - { word: 'FAST', description: '3× the speed of Claude Code' }, { word: 'SIMPLE', description: 'No modes. No config. Just code.' }, + { word: 'FAST', description: 'Up to 3× the speed of Claude Code' }, { word: 'LOADED', description: 'Web research, browser use, and more — built in' }, ] @@ -218,6 +221,42 @@ export default function HomeClient() { + {/* Inline nav overlay */} + + + Freebuff + + freebuff + + + + + + {/* Hero content */}
{/* Headline with staggered word animation */} diff --git a/freebuff/web/src/app/layout.tsx b/freebuff/web/src/app/layout.tsx index b813a211dd..3128907ae6 100644 --- a/freebuff/web/src/app/layout.tsx +++ b/freebuff/web/src/app/layout.tsx @@ -3,7 +3,6 @@ import '@/styles/globals.css' import type { Metadata } from 'next' import { Footer } from '@/components/footer' -import { Navbar } from '@/components/navbar' import { ThemeProvider } from '@/components/theme-provider' import { siteConfig } from '@/lib/constant' import { fonts } from '@/lib/fonts' @@ -54,7 +53,6 @@ export default function RootLayout({ > -
{children}
diff --git a/freebuff/web/src/components/navbar.tsx b/freebuff/web/src/components/navbar.tsx deleted file mode 100644 index 66774385db..0000000000 --- a/freebuff/web/src/components/navbar.tsx +++ /dev/null @@ -1,52 +0,0 @@ -'use client' - -import Image from 'next/image' -import Link from 'next/link' - -import { Icons } from './icons' - -export function Navbar() { - - return ( -
-
- - Freebuff - - freebuff - - - - -
-
- ) -} From cbbfe731c49ff2ce2f5d8e0c47c99e683abb352c Mon Sep 17 00:00:00 2001 From: James Grugett Date: Tue, 10 Mar 2026 18:20:39 -0700 Subject: [PATCH 12/16] Fix for importing bundled agents --- cli/src/utils/local-agent-registry.ts | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/cli/src/utils/local-agent-registry.ts b/cli/src/utils/local-agent-registry.ts index 2016cc7991..203a9f7a90 100644 --- a/cli/src/utils/local-agent-registry.ts +++ b/cli/src/utils/local-agent-registry.ts @@ -10,6 +10,7 @@ import type { MCPConfig } from '@codebuff/common/types/mcp' import { getProjectRoot } from '../project-files' import { AGENT_MODE_TO_ID, type AgentMode } from './constants' import { logger } from './logger' +import * as bundledAgentsModule from '../agents/bundled-agents.generated' import type { AgentDefinition } from '@codebuff/common/templates/initial-agents-dir/types/agent-definition' @@ -153,26 +154,12 @@ const getUserAgentDefinitions = (): AgentDefinition[] => { // Bundled agents loading (generated at build time by prebuild-agents.ts) // ============================================================================ -interface BundledAgentsModule { - bundledAgents: Record - getBundledAgentsAsLocalInfo: () => LocalAgentInfo[] -} - -// NOTE: Inline require() with try/catch is used because this file is generated at -// build time by prebuild-agents.ts and may not exist during development -let bundledAgentsModule: BundledAgentsModule | null = null -try { - bundledAgentsModule = require('../agents/bundled-agents.generated') -} catch { - // File not generated yet - running in development without prebuild -} - const getBundledAgents = (): Record => { - return bundledAgentsModule?.bundledAgents ?? {} + return bundledAgentsModule.bundledAgents ?? {} } const getBundledAgentsAsLocalInfo = (): LocalAgentInfo[] => { - return bundledAgentsModule?.getBundledAgentsAsLocalInfo?.() ?? [] + return bundledAgentsModule.getBundledAgentsAsLocalInfo?.() ?? [] } // ============================================================================ From 58ff484b65407b5ebbe08c2c2463634c8fc797fc Mon Sep 17 00:00:00 2001 From: James Grugett Date: Tue, 10 Mar 2026 18:30:46 -0700 Subject: [PATCH 13/16] freebuff web: improve styles --- freebuff/web/src/app/home-client.tsx | 154 ++++++++++-------- .../web/src/components/background-beams.tsx | 2 +- freebuff/web/src/components/copy-button.tsx | 2 +- freebuff/web/src/components/footer.tsx | 16 +- freebuff/web/src/components/hero-grid.tsx | 6 +- freebuff/web/src/components/terminal-demo.tsx | 8 +- freebuff/web/src/styles/globals.css | 24 +-- freebuff/web/tailwind.config.ts | 4 +- 8 files changed, 117 insertions(+), 99 deletions(-) diff --git a/freebuff/web/src/app/home-client.tsx b/freebuff/web/src/app/home-client.tsx index 36a5e2d675..373cc2d4a8 100644 --- a/freebuff/web/src/app/home-client.tsx +++ b/freebuff/web/src/app/home-client.tsx @@ -68,7 +68,7 @@ function SetupGuide() {
@@ -29,7 +29,7 @@ export function HeroGrid({ className }: { className?: string }) { className="absolute inset-0 opacity-[0.025]" style={{ backgroundImage: - 'linear-gradient(90deg, #00FF95 1px, transparent 1px)', + 'linear-gradient(90deg, #7CFF3F 1px, transparent 1px)', backgroundSize: '120px 120px', }} /> diff --git a/freebuff/web/src/components/terminal-demo.tsx b/freebuff/web/src/components/terminal-demo.tsx index 4048312dd8..e2fdfc6b8a 100644 --- a/freebuff/web/src/components/terminal-demo.tsx +++ b/freebuff/web/src/components/terminal-demo.tsx @@ -42,13 +42,13 @@ export function TerminalDemo() { const getLineColor = (type: string) => { switch (type) { case 'prompt': - return 'text-acid-green' + return 'text-acid-matrix' case 'user': return 'text-white font-medium' case 'agent': return 'text-zinc-300' case 'success': - return 'text-acid-green font-medium' + return 'text-acid-matrix font-medium' default: return 'text-zinc-500' } @@ -62,7 +62,7 @@ export function TerminalDemo() { className="relative mx-auto max-w-2xl" > {/* Glow behind terminal */} -
+
{/* Title bar */} @@ -93,7 +93,7 @@ export function TerminalDemo() { ))} {visibleLines < DEMO_LINES.length && ( - + )}
diff --git a/freebuff/web/src/styles/globals.css b/freebuff/web/src/styles/globals.css index a18c7568cf..c9cde579cc 100644 --- a/freebuff/web/src/styles/globals.css +++ b/freebuff/web/src/styles/globals.css @@ -55,9 +55,9 @@ /* Neon green glow text */ .neon-text { text-shadow: - 0 0 20px rgba(0, 255, 149, 0.4), - 0 0 40px rgba(0, 255, 149, 0.2), - 0 0 80px rgba(0, 255, 149, 0.1); + 0 0 20px rgba(124, 255, 63, 0.4), + 0 0 40px rgba(124, 255, 63, 0.2), + 0 0 80px rgba(124, 255, 63, 0.1); } /* Gradient border shine effect */ @@ -73,10 +73,10 @@ padding: 1px; background: linear-gradient( 135deg, - rgba(0, 255, 149, 0.3), + rgba(124, 255, 63, 0.3), transparent 40%, transparent 60%, - rgba(0, 255, 149, 0.15) + rgba(124, 255, 63, 0.15) ); -webkit-mask: linear-gradient(#fff 0 0) content-box, @@ -89,19 +89,21 @@ /* Giant keyword wall — hollow outlined text */ .keyword-hollow { color: transparent; - -webkit-text-stroke: 1.5px rgba(0, 255, 149, 0.4); + -webkit-text-stroke: 1.5px rgba(124, 255, 63, 0.45); transition: color 0.5s ease, -webkit-text-stroke-color 0.5s ease, text-shadow 0.5s ease; } -.group:hover .keyword-hollow, + .keyword-filled { - color: #00FF95; - -webkit-text-stroke: 1.5px #00FF95; + color: #7CFF3F; + -webkit-text-stroke: 1.5px #7CFF3F; text-shadow: - 0 0 40px rgba(0, 255, 149, 0.3), - 0 0 80px rgba(0, 255, 149, 0.1); + 0 0 40px rgba(124, 255, 63, 0.3), + 0 0 80px rgba(124, 255, 63, 0.1); + transition: text-shadow 0.5s ease; } + @media (prefers-reduced-motion: reduce) { .animate-glow-pulse, .animate-scan-line, diff --git a/freebuff/web/tailwind.config.ts b/freebuff/web/tailwind.config.ts index eb436d506f..3345cfb9dd 100644 --- a/freebuff/web/tailwind.config.ts +++ b/freebuff/web/tailwind.config.ts @@ -83,10 +83,10 @@ const config = { }, 'glow-pulse': { '0%, 100%': { - textShadow: '0 0 20px rgba(0,255,149,0.4), 0 0 40px rgba(0,255,149,0.2), 0 0 80px rgba(0,255,149,0.1)', + textShadow: '0 0 20px rgba(124,255,63,0.4), 0 0 40px rgba(124,255,63,0.2), 0 0 80px rgba(124,255,63,0.1)', }, '50%': { - textShadow: '0 0 30px rgba(0,255,149,0.6), 0 0 60px rgba(0,255,149,0.3), 0 0 100px rgba(0,255,149,0.15)', + textShadow: '0 0 30px rgba(124,255,63,0.6), 0 0 60px rgba(124,255,63,0.3), 0 0 100px rgba(124,255,63,0.15)', }, }, From 8a033ac0fc5f7829c99add50ae06835b65206fc7 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 11 Mar 2026 01:32:26 +0000 Subject: [PATCH 14/16] Bump Freebuff version to 0.0.8 --- freebuff/cli/release/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/freebuff/cli/release/package.json b/freebuff/cli/release/package.json index d7ca6de62c..f330e92c64 100644 --- a/freebuff/cli/release/package.json +++ b/freebuff/cli/release/package.json @@ -1,6 +1,6 @@ { "name": "freebuff", - "version": "0.0.7", + "version": "0.0.8", "description": "The world's strongest free coding agent", "license": "MIT", "bin": { From 016efa5ea7c77cbe51fef58edc96cbb77f203ef3 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Tue, 10 Mar 2026 18:37:34 -0700 Subject: [PATCH 15/16] Fix build --- cli/src/agents/bundled-agents.generated.d.ts | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 cli/src/agents/bundled-agents.generated.d.ts diff --git a/cli/src/agents/bundled-agents.generated.d.ts b/cli/src/agents/bundled-agents.generated.d.ts new file mode 100644 index 0000000000..f5b89022cf --- /dev/null +++ b/cli/src/agents/bundled-agents.generated.d.ts @@ -0,0 +1,14 @@ +/** + * Type declarations for the auto-generated bundled agents module. + * + * The actual file (bundled-agents.generated.ts) is created by + * cli/scripts/prebuild-agents.ts and is gitignored. This declaration + * file lets TypeScript resolve the module when the generated file + * has not been built yet. + */ +import type { LocalAgentInfo } from '../utils/local-agent-registry' + +export declare const bundledAgents: Record +export declare function getBundledAgentsAsLocalInfo(): LocalAgentInfo[] +export declare function getBundledAgentIds(): string[] +export declare function isBundledAgent(agentId: string): boolean From c34a61e9d8f0885ce54bef3b26e6d475644d51d6 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 11 Mar 2026 01:40:41 +0000 Subject: [PATCH 16/16] Bump Freebuff version to 0.0.9 --- freebuff/cli/release/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/freebuff/cli/release/package.json b/freebuff/cli/release/package.json index f330e92c64..39156d5c7a 100644 --- a/freebuff/cli/release/package.json +++ b/freebuff/cli/release/package.json @@ -1,6 +1,6 @@ { "name": "freebuff", - "version": "0.0.8", + "version": "0.0.9", "description": "The world's strongest free coding agent", "license": "MIT", "bin": {