diff --git a/.agents/claude-code-cli.ts b/.agents/claude-code-cli.ts index 2de48ff5c5..075d9f23e4 100644 --- a/.agents/claude-code-cli.ts +++ b/.agents/claude-code-cli.ts @@ -10,7 +10,7 @@ const baseDefinition = createCliAgent({ startCommand: 'claude --dangerously-skip-permissions', permissionNote: 'Always use `--dangerously-skip-permissions` when testing to avoid permission prompts that would block automated tests.', - model: 'anthropic/claude-opus-4.6', + model: 'anthropic/claude-opus-4.7', }) // Constants must be inside handleSteps since it gets serialized via .toString() diff --git a/.agents/codebuff-local-cli.ts b/.agents/codebuff-local-cli.ts index 1fdf975c62..8cb367a08a 100644 --- a/.agents/codebuff-local-cli.ts +++ b/.agents/codebuff-local-cli.ts @@ -10,7 +10,7 @@ const baseDefinition = createCliAgent({ startCommand: 'bun --cwd=cli run dev', permissionNote: 'No permission flags needed for Codebuff local dev server.', - model: 'anthropic/claude-opus-4.6', + model: 'anthropic/claude-opus-4.7', skipPrepPhase: true, cliSpecificDocs: `## Codebuff CLI Specific Guidance diff --git a/.agents/codex-cli.ts b/.agents/codex-cli.ts index 9914e3d7c7..e7b18473a8 100644 --- a/.agents/codex-cli.ts +++ b/.agents/codex-cli.ts @@ -81,7 +81,7 @@ const baseDefinition = createCliAgent({ startCommand: 'codex -a never -s danger-full-access', permissionNote: 'Always use `-a never -s danger-full-access` when testing to avoid approval prompts that would block automated tests.', - model: 'anthropic/claude-opus-4.6', + model: 'anthropic/claude-opus-4.7', extraInputParams: { reviewType: { type: 'string', diff --git a/.agents/gemini-cli.ts b/.agents/gemini-cli.ts index 38186add48..d5eb7f45e2 100644 --- a/.agents/gemini-cli.ts +++ b/.agents/gemini-cli.ts @@ -10,7 +10,7 @@ const baseDefinition = createCliAgent({ startCommand: 'gemini --yolo', permissionNote: 'Always use `--yolo` (or `--approval-mode yolo`) when testing to auto-approve all tool actions and avoid prompts that would block automated tests.', - model: 'anthropic/claude-opus-4.6', + model: 'anthropic/claude-opus-4.7', cliSpecificDocs: `## Gemini CLI Commands Gemini CLI uses slash commands for navigation: diff --git a/.agents/types/agent-definition.ts b/.agents/types/agent-definition.ts index 6323ec7b77..9dce8fa7cb 100644 --- a/.agents/types/agent-definition.ts +++ b/.agents/types/agent-definition.ts @@ -380,7 +380,9 @@ export type ModelName = // Anthropic | 'anthropic/claude-sonnet-4.6' + | 'anthropic/claude-opus-4.7' | 'anthropic/claude-opus-4.6' + | 'anthropic/claude-opus-4.5' | 'anthropic/claude-haiku-4.5' | 'anthropic/claude-sonnet-4.5' | 'anthropic/claude-opus-4.1' diff --git a/agents-graveyard/editor/best-of-n/best-of-n-selector.ts b/agents-graveyard/editor/best-of-n/best-of-n-selector.ts index 27d9dd8993..74f9d8c767 100644 --- a/agents-graveyard/editor/best-of-n/best-of-n-selector.ts +++ b/agents-graveyard/editor/best-of-n/best-of-n-selector.ts @@ -17,7 +17,7 @@ export const createBestOfNSelector = (options: { model: isSonnet ? 'anthropic/claude-sonnet-4.5' : isOpus - ? 'anthropic/claude-opus-4.6' + ? 'anthropic/claude-opus-4.7' : isGemini ? 'google/gemini-3-pro-preview' : 'openai/gpt-5.1', diff --git a/agents-graveyard/editor/best-of-n/editor-implementor2.ts b/agents-graveyard/editor/best-of-n/editor-implementor2.ts index 9447693177..6a5dc1085f 100644 --- a/agents-graveyard/editor/best-of-n/editor-implementor2.ts +++ b/agents-graveyard/editor/best-of-n/editor-implementor2.ts @@ -13,7 +13,7 @@ export const createBestOfNImplementor2 = (options: { model: isGpt5 ? 'openai/gpt-5.2' : isOpus - ? 'anthropic/claude-opus-4.6' + ? 'anthropic/claude-opus-4.7' : 'anthropic/claude-sonnet-4.5', displayName: isGpt5 ? 'GPT-5 Implementation Generator v2' diff --git a/agents-graveyard/editor/best-of-n/editor-multi-prompt2.ts b/agents-graveyard/editor/best-of-n/editor-multi-prompt2.ts index be9722b5ef..0bedd6953c 100644 --- a/agents-graveyard/editor/best-of-n/editor-multi-prompt2.ts +++ b/agents-graveyard/editor/best-of-n/editor-multi-prompt2.ts @@ -10,7 +10,7 @@ import type { SecretAgentDefinition } from '../../types/secret-agent-definition' export function createMultiPromptEditor(): Omit { return { publisher, - model: 'anthropic/claude-opus-4.6', + model: 'anthropic/claude-opus-4.7', displayName: 'Multi-Prompt Editor', spawnerPrompt: 'Edits code by spawning multiple implementor agents with different strategy prompts, selects the best implementation, and applies the changes. It also returns further suggested improvements which you should take seriously and act on. Pass as input an array of short prompts specifying different implementation approaches or strategies. Make sure to read any files intended to be edited before spawning this agent.', diff --git a/agents-graveyard/editor/reviewer-editor.ts b/agents-graveyard/editor/reviewer-editor.ts index 4049cb0c68..c6cfe42b6a 100644 --- a/agents-graveyard/editor/reviewer-editor.ts +++ b/agents-graveyard/editor/reviewer-editor.ts @@ -12,7 +12,7 @@ export const createCodeEditor = (options: { model: options.model === 'gpt-5' ? 'openai/gpt-5.1' - : 'anthropic/claude-opus-4.6', + : 'anthropic/claude-opus-4.7', displayName: 'Code Editor', spawnerPrompt: 'Expert code reviewer that reviews recent code changes and makes improvements.', diff --git a/agents/__tests__/editor.test.ts b/agents/__tests__/editor.test.ts index 9e14909f89..030857c8dc 100644 --- a/agents/__tests__/editor.test.ts +++ b/agents/__tests__/editor.test.ts @@ -28,7 +28,7 @@ describe('editor agent', () => { }) test('uses opus model by default', () => { - expect(editor.model).toBe('anthropic/claude-opus-4.6') + expect(editor.model).toBe('anthropic/claude-opus-4.7') }) test('has output mode set to structured_output', () => { @@ -54,7 +54,7 @@ describe('editor agent', () => { describe('createCodeEditor', () => { test('creates opus editor by default', () => { const opusEditor = createCodeEditor({ model: 'opus' }) - expect(opusEditor.model).toBe('anthropic/claude-opus-4.6') + expect(opusEditor.model).toBe('anthropic/claude-opus-4.7') }) test('creates gpt-5 editor', () => { diff --git a/agents/__tests__/thinker.test.ts b/agents/__tests__/thinker.test.ts index ac36c12dbe..0e44a9743e 100644 --- a/agents/__tests__/thinker.test.ts +++ b/agents/__tests__/thinker.test.ts @@ -29,7 +29,7 @@ describe('thinker agent', () => { }) test('uses opus model', () => { - expect(thinker.model).toBe('anthropic/claude-opus-4.6') + expect(thinker.model).toBe('anthropic/claude-opus-4.7') }) test('has output mode set to structured_output', () => { diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts index 3cc65d5b46..b4d05ca366 100644 --- a/agents/base2/base2.ts +++ b/agents/base2/base2.ts @@ -25,7 +25,7 @@ export function createBase2( const isFree = mode === 'free' const isSonnet = false - const model = isFree ? 'z-ai/glm-5.1' : 'anthropic/claude-opus-4.6' + const model = isFree ? 'z-ai/glm-5.1' : 'anthropic/claude-opus-4.7' return { publisher, diff --git a/agents/editor/best-of-n/best-of-n-selector2.ts b/agents/editor/best-of-n/best-of-n-selector2.ts index cc35abbaba..cc28b24116 100644 --- a/agents/editor/best-of-n/best-of-n-selector2.ts +++ b/agents/editor/best-of-n/best-of-n-selector2.ts @@ -16,7 +16,7 @@ export const createBestOfNSelector2 = (options: { model: isSonnet ? 'anthropic/claude-sonnet-4.5' : isOpus - ? 'anthropic/claude-opus-4.6' + ? 'anthropic/claude-opus-4.7' : 'openai/gpt-5.4', ...(isGpt5 && { reasoningOptions: { diff --git a/agents/editor/best-of-n/editor-implementor.ts b/agents/editor/best-of-n/editor-implementor.ts index 87ec441ba3..fe9fe13ebf 100644 --- a/agents/editor/best-of-n/editor-implementor.ts +++ b/agents/editor/best-of-n/editor-implementor.ts @@ -16,7 +16,7 @@ export const createBestOfNImplementor = (options: { model: isSonnet ? 'anthropic/claude-sonnet-4.5' : isOpus - ? 'anthropic/claude-opus-4.6' + ? 'anthropic/claude-opus-4.7' : isGemini ? 'google/gemini-3-pro-preview' : 'openai/gpt-5.1', diff --git a/agents/editor/best-of-n/editor-multi-prompt.ts b/agents/editor/best-of-n/editor-multi-prompt.ts index 5c54cf9697..922fb43f22 100644 --- a/agents/editor/best-of-n/editor-multi-prompt.ts +++ b/agents/editor/best-of-n/editor-multi-prompt.ts @@ -11,7 +11,7 @@ import type { SecretAgentDefinition } from '../../types/secret-agent-definition' export function createMultiPromptEditor(): Omit { return { publisher, - model: 'anthropic/claude-opus-4.6', + model: 'anthropic/claude-opus-4.7', providerOptions: { only: ['amazon-bedrock'], }, diff --git a/agents/editor/editor.ts b/agents/editor/editor.ts index e191609ad2..3d208aa13a 100644 --- a/agents/editor/editor.ts +++ b/agents/editor/editor.ts @@ -14,7 +14,7 @@ export const createCodeEditor = (options: { ? 'openai/gpt-5.1' : options.model === 'glm' ? 'z-ai/glm-5.1' - : 'anthropic/claude-opus-4.6', + : 'anthropic/claude-opus-4.7', ...(options.model === 'opus' && { providerOptions: { only: ['amazon-bedrock'], diff --git a/agents/general-agent/general-agent.ts b/agents/general-agent/general-agent.ts index 26f2099589..14d12e440d 100644 --- a/agents/general-agent/general-agent.ts +++ b/agents/general-agent/general-agent.ts @@ -12,7 +12,7 @@ export const createGeneralAgent = (options: { return { publisher, - model: isGpt5 ? 'openai/gpt-5.4' : 'anthropic/claude-opus-4.6', + model: isGpt5 ? 'openai/gpt-5.4' : 'anthropic/claude-opus-4.7', ...(!isGpt5 && { providerOptions: { only: ['amazon-bedrock'], diff --git a/agents/reviewer/code-reviewer.ts b/agents/reviewer/code-reviewer.ts index 9cc840d69f..31b261d992 100644 --- a/agents/reviewer/code-reviewer.ts +++ b/agents/reviewer/code-reviewer.ts @@ -64,7 +64,7 @@ Be extremely concise.`, const definition: SecretAgentDefinition = { id: 'code-reviewer', publisher, - ...createReviewer('anthropic/claude-opus-4.6'), + ...createReviewer('anthropic/claude-opus-4.7'), providerOptions: { only: ['amazon-bedrock'], }, diff --git a/agents/reviewer/multi-prompt/code-reviewer-multi-prompt.ts b/agents/reviewer/multi-prompt/code-reviewer-multi-prompt.ts index a6a380e3ee..e7bac906eb 100644 --- a/agents/reviewer/multi-prompt/code-reviewer-multi-prompt.ts +++ b/agents/reviewer/multi-prompt/code-reviewer-multi-prompt.ts @@ -14,7 +14,7 @@ export function createCodeReviewerMultiPrompt(): Omit< > { return { publisher, - model: 'anthropic/claude-opus-4.6', + model: 'anthropic/claude-opus-4.7', providerOptions: { only: ['amazon-bedrock'], }, diff --git a/agents/thinker/best-of-n/thinker-best-of-n.ts b/agents/thinker/best-of-n/thinker-best-of-n.ts index 3e1e532c5f..5c09fae840 100644 --- a/agents/thinker/best-of-n/thinker-best-of-n.ts +++ b/agents/thinker/best-of-n/thinker-best-of-n.ts @@ -18,7 +18,7 @@ export function createThinkerBestOfN( model: isGpt5 ? 'openai/gpt-5.1' : isOpus - ? 'anthropic/claude-opus-4.6' + ? 'anthropic/claude-opus-4.7' : 'anthropic/claude-sonnet-4.5', ...(isOpus && { providerOptions: { diff --git a/agents/thinker/best-of-n/thinker-selector.ts b/agents/thinker/best-of-n/thinker-selector.ts index ab10bff69f..62bf834208 100644 --- a/agents/thinker/best-of-n/thinker-selector.ts +++ b/agents/thinker/best-of-n/thinker-selector.ts @@ -9,7 +9,7 @@ export function createThinkerSelector( return { publisher, model: isOpus - ? 'anthropic/claude-opus-4.6' + ? 'anthropic/claude-opus-4.7' : 'anthropic/claude-sonnet-4.5', ...(isOpus && { providerOptions: { diff --git a/agents/thinker/thinker.ts b/agents/thinker/thinker.ts index 47fc54ec71..6a9f7d808d 100644 --- a/agents/thinker/thinker.ts +++ b/agents/thinker/thinker.ts @@ -5,7 +5,7 @@ import type { SecretAgentDefinition } from '../types/secret-agent-definition' const definition: SecretAgentDefinition = { id: 'thinker', publisher, - model: 'anthropic/claude-opus-4.6', + model: 'anthropic/claude-opus-4.7', providerOptions: { only: ['amazon-bedrock'], }, diff --git a/agents/types/agent-definition.ts b/agents/types/agent-definition.ts index b81fc69c88..b28a77c311 100644 --- a/agents/types/agent-definition.ts +++ b/agents/types/agent-definition.ts @@ -380,7 +380,9 @@ export type ModelName = // Anthropic | 'anthropic/claude-sonnet-4.6' + | 'anthropic/claude-opus-4.7' | 'anthropic/claude-opus-4.6' + | 'anthropic/claude-opus-4.5' | 'anthropic/claude-haiku-4.5' | 'anthropic/claude-sonnet-4.5' | 'anthropic/claude-opus-4.1' diff --git a/common/src/constants/claude-oauth.ts b/common/src/constants/claude-oauth.ts index 8204f8db78..16b4286103 100644 --- a/common/src/constants/claude-oauth.ts +++ b/common/src/constants/claude-oauth.ts @@ -82,7 +82,7 @@ export const OPENROUTER_TO_ANTHROPIC_MODEL_MAP: Record = { 'anthropic/claude-4-sonnet': 'claude-sonnet-4-20250514', // Claude 4.x Opus models - 'anthropic/claude-opus-4.6': 'claude-opus-4-6', + 'anthropic/claude-opus-4.7': 'claude-opus-4-7', 'anthropic/claude-opus-4.5': 'claude-opus-4-5-20251101', 'anthropic/claude-opus-4.1': 'claude-opus-4-1-20250805', 'anthropic/claude-opus-4': 'claude-opus-4-1-20250805', diff --git a/common/src/constants/model-config.ts b/common/src/constants/model-config.ts index c75bda26e0..10e579a921 100644 --- a/common/src/constants/model-config.ts +++ b/common/src/constants/model-config.ts @@ -124,15 +124,6 @@ export const providerModelNames = { export type Model = (typeof models)[keyof typeof models] | (string & {}) -export const shouldCacheModels = [ - 'anthropic/claude-opus-4.1', - 'anthropic/claude-sonnet-4', - 'anthropic/claude-opus-4', - 'anthropic/claude-3.7-sonnet', - 'anthropic/claude-3.5-haiku', - 'z-ai/glm-4.5', - 'qwen/qwen3-coder', -] const nonCacheableModels = [ models.openrouter_grok_4, ] satisfies string[] as string[] diff --git a/common/src/templates/initial-agents-dir/README.md b/common/src/templates/initial-agents-dir/README.md index f9290a7ca8..c02ddab90a 100644 --- a/common/src/templates/initial-agents-dir/README.md +++ b/common/src/templates/initial-agents-dir/README.md @@ -170,7 +170,7 @@ async *handleSteps() { Choose models based on your agent's needs: -- **`anthropic/claude-opus-4.6`**: Best general-purpose capabilities and code generation +- **`anthropic/claude-opus-4.7`**: Best general-purpose capabilities and code generation - **`openai/gpt-5.2`**: Best at complex reasoning and planning - **`google/gemini-3.1-flash-lite-preview`**: Fast and cost-effective for simple or medium-complexity tasks diff --git a/common/src/templates/initial-agents-dir/types/agent-definition.ts b/common/src/templates/initial-agents-dir/types/agent-definition.ts index b81fc69c88..b28a77c311 100644 --- a/common/src/templates/initial-agents-dir/types/agent-definition.ts +++ b/common/src/templates/initial-agents-dir/types/agent-definition.ts @@ -380,7 +380,9 @@ export type ModelName = // Anthropic | 'anthropic/claude-sonnet-4.6' + | 'anthropic/claude-opus-4.7' | 'anthropic/claude-opus-4.6' + | 'anthropic/claude-opus-4.5' | 'anthropic/claude-haiku-4.5' | 'anthropic/claude-sonnet-4.5' | 'anthropic/claude-opus-4.1' diff --git a/common/src/util/model-utils.ts b/common/src/util/model-utils.ts index 00277dd065..17d1f388e5 100644 --- a/common/src/util/model-utils.ts +++ b/common/src/util/model-utils.ts @@ -8,11 +8,8 @@ function getExplicitlyDefinedModels(): Set { if (explicitlyDefinedModels === null) { // NOTE: Inline require() avoids circular dependency - old-constants imports this // module, so a top-level import would create a circular reference - const { models, shouldCacheModels } = require('../old-constants') - explicitlyDefinedModels = new Set([ - ...(Object.values(models) as string[]), - ...(Object.values(shouldCacheModels) as string[]), - ]) + const { models } = require('../old-constants') + explicitlyDefinedModels = new Set(Object.values(models) as string[]) } return explicitlyDefinedModels } diff --git a/web/src/app/api/v1/token-count/_post.ts b/web/src/app/api/v1/token-count/_post.ts index 1daea67723..f7224c25d1 100644 --- a/web/src/app/api/v1/token-count/_post.ts +++ b/web/src/app/api/v1/token-count/_post.ts @@ -32,7 +32,7 @@ const tokenCountRequestSchema = z.object({ type TokenCountRequest = z.infer -const DEFAULT_ANTHROPIC_MODEL = 'claude-opus-4-6' +const DEFAULT_ANTHROPIC_MODEL = 'claude-opus-4-7' export async function postTokenCount(params: { req: NextRequest diff --git a/web/src/llm-api/__tests__/fireworks-deployment.test.ts b/web/src/llm-api/__tests__/fireworks-deployment.test.ts index 717b5c9990..9ed91fd0a6 100644 --- a/web/src/llm-api/__tests__/fireworks-deployment.test.ts +++ b/web/src/llm-api/__tests__/fireworks-deployment.test.ts @@ -379,6 +379,162 @@ describe('Fireworks deployment routing', () => { } }) + it('transforms reasoning to reasoning_effort (defaults to medium)', async () => { + const fetchedBodies: Record[] = [] + + const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => { + const body = JSON.parse(init?.body as string) + fetchedBodies.push(body) + return new Response(JSON.stringify({ ok: true }), { status: 200 }) + }) as unknown as typeof globalThis.fetch + + await createFireworksRequestWithFallback({ + body: { + ...minimalBody, + reasoning: { enabled: true }, + } as never, + originalModel: 'z-ai/glm-5.1', + fetch: mockFetch, + logger, + useCustomDeployment: false, + sessionId: 'test-user-id', + }) + + expect(fetchedBodies).toHaveLength(1) + expect(fetchedBodies[0].reasoning_effort).toBe('medium') + expect(fetchedBodies[0].reasoning).toBeUndefined() + }) + + it('uses reasoning.effort value when specified', async () => { + const fetchedBodies: Record[] = [] + + const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => { + const body = JSON.parse(init?.body as string) + fetchedBodies.push(body) + return new Response(JSON.stringify({ ok: true }), { status: 200 }) + }) as unknown as typeof globalThis.fetch + + await createFireworksRequestWithFallback({ + body: { + ...minimalBody, + reasoning: { effort: 'high' }, + } as never, + originalModel: 'z-ai/glm-5.1', + fetch: mockFetch, + logger, + useCustomDeployment: false, + sessionId: 'test-user-id', + }) + + expect(fetchedBodies).toHaveLength(1) + expect(fetchedBodies[0].reasoning_effort).toBe('high') + expect(fetchedBodies[0].reasoning).toBeUndefined() + }) + + it('skips reasoning_effort when reasoning.enabled is false', async () => { + const fetchedBodies: Record[] = [] + + const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => { + const body = JSON.parse(init?.body as string) + fetchedBodies.push(body) + return new Response(JSON.stringify({ ok: true }), { status: 200 }) + }) as unknown as typeof globalThis.fetch + + await createFireworksRequestWithFallback({ + body: { + ...minimalBody, + reasoning: { enabled: false, effort: 'high' }, + } as never, + originalModel: 'z-ai/glm-5.1', + fetch: mockFetch, + logger, + useCustomDeployment: false, + sessionId: 'test-user-id', + }) + + expect(fetchedBodies).toHaveLength(1) + expect(fetchedBodies[0].reasoning_effort).toBeUndefined() + expect(fetchedBodies[0].reasoning).toBeUndefined() + }) + + it('preserves reasoning_effort when tools are present (Fireworks supports both)', async () => { + const fetchedBodies: Record[] = [] + + const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => { + const body = JSON.parse(init?.body as string) + fetchedBodies.push(body) + return new Response(JSON.stringify({ ok: true }), { status: 200 }) + }) as unknown as typeof globalThis.fetch + + await createFireworksRequestWithFallback({ + body: { + ...minimalBody, + reasoning: { effort: 'high' }, + tools: [{ type: 'function', function: { name: 'test', arguments: '{}' } }], + } as never, + originalModel: 'z-ai/glm-5.1', + fetch: mockFetch, + logger, + useCustomDeployment: false, + sessionId: 'test-user-id', + }) + + expect(fetchedBodies).toHaveLength(1) + expect(fetchedBodies[0].reasoning_effort).toBe('high') + expect(fetchedBodies[0].reasoning).toBeUndefined() + }) + + it('passes through reasoning_effort when set directly without reasoning object', async () => { + const fetchedBodies: Record[] = [] + + const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => { + const body = JSON.parse(init?.body as string) + fetchedBodies.push(body) + return new Response(JSON.stringify({ ok: true }), { status: 200 }) + }) as unknown as typeof globalThis.fetch + + await createFireworksRequestWithFallback({ + body: { + ...minimalBody, + reasoning_effort: 'low', + } as never, + originalModel: 'z-ai/glm-5.1', + fetch: mockFetch, + logger, + useCustomDeployment: false, + sessionId: 'test-user-id', + }) + + expect(fetchedBodies).toHaveLength(1) + expect(fetchedBodies[0].reasoning_effort).toBe('low') + }) + + it('preserves directly-set reasoning_effort when tools are present', async () => { + const fetchedBodies: Record[] = [] + + const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => { + const body = JSON.parse(init?.body as string) + fetchedBodies.push(body) + return new Response(JSON.stringify({ ok: true }), { status: 200 }) + }) as unknown as typeof globalThis.fetch + + await createFireworksRequestWithFallback({ + body: { + ...minimalBody, + reasoning_effort: 'low', + tools: [{ type: 'function', function: { name: 'test', arguments: '{}' } }], + } as never, + originalModel: 'z-ai/glm-5.1', + fetch: mockFetch, + logger, + useCustomDeployment: false, + sessionId: 'test-user-id', + }) + + expect(fetchedBodies).toHaveLength(1) + expect(fetchedBodies[0].reasoning_effort).toBe('low') + }) + it('logs when trying deployment and when falling back on 5xx', async () => { const spy = spyDeploymentHours(true) let callCount = 0 diff --git a/web/src/llm-api/__tests__/openrouter.test.ts b/web/src/llm-api/__tests__/openrouter.test.ts new file mode 100644 index 0000000000..88c108b68f --- /dev/null +++ b/web/src/llm-api/__tests__/openrouter.test.ts @@ -0,0 +1,166 @@ +import { describe, expect, it } from 'bun:test' + +import { extractUsageAndCost } from '../openrouter' + +describe('extractUsageAndCost', () => { + describe('OpenRouter response shapes', () => { + it('Anthropic shape: both cost and upstream_inference_cost populated with the SAME value (NOT additive)', () => { + // This is the shape that caused the 2x overcharge bug on every Anthropic call. + // The two fields report the same dollars via different routes (OR-billed-us + // and what-upstream-charged-us). Summing them doubles the bill. + const usage = { + prompt_tokens: 91437, + completion_tokens: 1209, + prompt_tokens_details: { cached_tokens: 87047 }, + completion_tokens_details: { reasoning_tokens: 0 }, + cost: 0.1171, + cost_details: { upstream_inference_cost: 0.1171 }, + } + const result = extractUsageAndCost(usage) + expect(result.cost).toBeCloseTo(0.1171, 6) + expect(result.cost).not.toBeCloseTo(0.2342, 6) // the old, buggy sum + expect(result.inputTokens).toBe(91437) + expect(result.outputTokens).toBe(1209) + expect(result.cacheReadInputTokens).toBe(87047) + }) + + it('Google shape: cost=0, upstream_inference_cost holds the real charge', () => { + const usage = { + prompt_tokens: 500, + completion_tokens: 200, + prompt_tokens_details: { cached_tokens: 0 }, + completion_tokens_details: { reasoning_tokens: 0 }, + cost: 0, + cost_details: { upstream_inference_cost: 0.000547 }, + } + const result = extractUsageAndCost(usage) + expect(result.cost).toBeCloseTo(0.000547, 9) + }) + + it('Legacy shape: cost populated, cost_details missing', () => { + const usage = { + prompt_tokens: 100, + completion_tokens: 50, + cost: 0.042, + } + const result = extractUsageAndCost(usage) + expect(result.cost).toBeCloseTo(0.042, 6) + }) + + it('Legacy shape: cost populated, cost_details present but upstream_inference_cost absent', () => { + const usage = { + prompt_tokens: 100, + completion_tokens: 50, + cost: 0.042, + cost_details: {}, + } + const result = extractUsageAndCost(usage) + expect(result.cost).toBeCloseTo(0.042, 6) + }) + + it('Legacy shape: cost populated, upstream_inference_cost null', () => { + const usage = { + prompt_tokens: 100, + completion_tokens: 50, + cost: 0.042, + cost_details: { upstream_inference_cost: null }, + } + const result = extractUsageAndCost(usage) + expect(result.cost).toBeCloseTo(0.042, 6) + }) + + it('Anthropic shape with slight rounding drift: picks the larger of the two', () => { + // Defensive: if the two fields ever diverge due to OR-side rounding, + // using max avoids under-reporting our spend. + const usage = { + prompt_tokens: 1000, + completion_tokens: 100, + cost: 0.005, + cost_details: { upstream_inference_cost: 0.0051 }, + } + const result = extractUsageAndCost(usage) + expect(result.cost).toBeCloseTo(0.0051, 6) + }) + + it('both cost and upstream missing: returns 0', () => { + const usage = { + prompt_tokens: 100, + completion_tokens: 50, + } + const result = extractUsageAndCost(usage) + expect(result.cost).toBe(0) + }) + + it('entire usage object undefined: returns zeros', () => { + const result = extractUsageAndCost(undefined) + expect(result.cost).toBe(0) + expect(result.inputTokens).toBe(0) + expect(result.outputTokens).toBe(0) + expect(result.cacheReadInputTokens).toBe(0) + expect(result.reasoningTokens).toBe(0) + }) + + it('entire usage object null: returns zeros', () => { + const result = extractUsageAndCost(null) + expect(result.cost).toBe(0) + }) + + it('cost is non-number (string): treated as 0', () => { + const usage = { + cost: '0.042' as unknown as number, + cost_details: { upstream_inference_cost: 0.01 }, + } + const result = extractUsageAndCost(usage) + expect(result.cost).toBeCloseTo(0.01, 6) + }) + }) + + describe('token extraction', () => { + it('extracts all token counts correctly', () => { + const usage = { + prompt_tokens: 1000, + completion_tokens: 500, + prompt_tokens_details: { cached_tokens: 900 }, + completion_tokens_details: { reasoning_tokens: 200 }, + cost: 0.01, + } + const result = extractUsageAndCost(usage) + expect(result.inputTokens).toBe(1000) + expect(result.outputTokens).toBe(500) + expect(result.cacheReadInputTokens).toBe(900) + expect(result.reasoningTokens).toBe(200) + }) + + it('missing nested token detail objects default to 0', () => { + const usage = { + prompt_tokens: 100, + completion_tokens: 50, + cost: 0.001, + } + const result = extractUsageAndCost(usage) + expect(result.cacheReadInputTokens).toBe(0) + expect(result.reasoningTokens).toBe(0) + }) + }) + + describe('regression: the exact bug from prod logs', () => { + // Pulled from debug/web.jsonl `openrouter-cost-audit` entries. + // Every one of these was billed at 2x the real price before the fix. + it.each([ + { cost: 0.1155, expected: 0.1155 }, + { cost: 0.0534, expected: 0.0534 }, + { cost: 0.0584, expected: 0.0584 }, + { cost: 0.1171, expected: 0.1171 }, + ])('bills $expected (not 2x) when cost === upstream === $cost', ({ cost, expected }) => { + const usage = { + prompt_tokens: 100000, + completion_tokens: 500, + prompt_tokens_details: { cached_tokens: 95000 }, + cost, + cost_details: { upstream_inference_cost: cost }, + } + const result = extractUsageAndCost(usage) + expect(result.cost).toBeCloseTo(expected, 6) + }) + }) +}) diff --git a/web/src/llm-api/fireworks.ts b/web/src/llm-api/fireworks.ts index 4799e91ac6..e677700943 100644 --- a/web/src/llm-api/fireworks.ts +++ b/web/src/llm-api/fireworks.ts @@ -97,6 +97,20 @@ function createFireworksRequest(params: { model: modelIdOverride ?? getFireworksModelId(originalModel), } + // Transform OpenRouter-style `reasoning` object into Fireworks' `reasoning_effort`. + // Unlike OpenAI, Fireworks supports reasoning_effort together with function tools + // (e.g. GLM-4.5/5.1 and Kimi K2 are designed for interleaved reasoning + tool use). + if (fireworksBody.reasoning && typeof fireworksBody.reasoning === 'object') { + const reasoning = fireworksBody.reasoning as { + enabled?: boolean + effort?: 'high' | 'medium' | 'low' + } + if (reasoning.enabled ?? true) { + fireworksBody.reasoning_effort = reasoning.effort ?? 'medium' + } + } + delete fireworksBody.reasoning + // Strip OpenRouter-specific / internal fields delete fireworksBody.provider delete fireworksBody.transforms diff --git a/web/src/llm-api/openrouter.ts b/web/src/llm-api/openrouter.ts index 08b7a31ef5..c084631726 100644 --- a/web/src/llm-api/openrouter.ts +++ b/web/src/llm-api/openrouter.ts @@ -61,15 +61,34 @@ function createOpenRouterRequest(params: { }) } -function extractUsageAndCost(usage: any): UsageData { - const openRouterCost = usage?.cost ?? 0 - const upstreamCost = usage?.cost_details?.upstream_inference_cost ?? 0 +/** + * Extract token counts and billed cost from an OpenRouter `usage` object. + * + * OpenRouter reports the billed charge in ONE of two fields — or in BOTH + * with the SAME value (observed on Anthropic routes). They are NOT additive: + * + * Anthropic routes: { cost: X, cost_details: { upstream_inference_cost: X } } + * Google routes: { cost: 0, cost_details: { upstream_inference_cost: X } } + * Some routes: { cost: X, cost_details: null } + * + * We previously summed the two fields, which double-charged every Anthropic + * call. Taking the max handles all three shapes safely. + * + * See: investigation notes + scripts/refund-openrouter-overcharge.ts + */ +export function extractUsageAndCost(usage: any): UsageData { + const openRouterCost = + typeof usage?.cost === 'number' ? usage.cost : 0 + const upstreamCost = + typeof usage?.cost_details?.upstream_inference_cost === 'number' + ? usage.cost_details.upstream_inference_cost + : 0 return { inputTokens: usage?.prompt_tokens ?? 0, outputTokens: usage?.completion_tokens ?? 0, cacheReadInputTokens: usage?.prompt_tokens_details?.cached_tokens ?? 0, reasoningTokens: usage?.completion_tokens_details?.reasoning_tokens ?? 0, - cost: openRouterCost + upstreamCost, + cost: Math.max(openRouterCost, upstreamCost), } }