From e006f0e255787fef7fd6da5cddc77c1e7bc91e82 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Thu, 16 Apr 2026 12:38:16 -0700 Subject: [PATCH 1/3] Update fireworks api to pass on reasoning effort, default medium --- .../__tests__/fireworks-deployment.test.ts | 156 ++++++++++++++++++ web/src/llm-api/fireworks.ts | 14 ++ 2 files changed, 170 insertions(+) diff --git a/web/src/llm-api/__tests__/fireworks-deployment.test.ts b/web/src/llm-api/__tests__/fireworks-deployment.test.ts index 717b5c999..9ed91fd0a 100644 --- a/web/src/llm-api/__tests__/fireworks-deployment.test.ts +++ b/web/src/llm-api/__tests__/fireworks-deployment.test.ts @@ -379,6 +379,162 @@ describe('Fireworks deployment routing', () => { } }) + it('transforms reasoning to reasoning_effort (defaults to medium)', async () => { + const fetchedBodies: Record[] = [] + + const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => { + const body = JSON.parse(init?.body as string) + fetchedBodies.push(body) + return new Response(JSON.stringify({ ok: true }), { status: 200 }) + }) as unknown as typeof globalThis.fetch + + await createFireworksRequestWithFallback({ + body: { + ...minimalBody, + reasoning: { enabled: true }, + } as never, + originalModel: 'z-ai/glm-5.1', + fetch: mockFetch, + logger, + useCustomDeployment: false, + sessionId: 'test-user-id', + }) + + expect(fetchedBodies).toHaveLength(1) + expect(fetchedBodies[0].reasoning_effort).toBe('medium') + expect(fetchedBodies[0].reasoning).toBeUndefined() + }) + + it('uses reasoning.effort value when specified', async () => { + const fetchedBodies: Record[] = [] + + const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => { + const body = JSON.parse(init?.body as string) + fetchedBodies.push(body) + return new Response(JSON.stringify({ ok: true }), { status: 200 }) + }) as unknown as typeof globalThis.fetch + + await createFireworksRequestWithFallback({ + body: { + ...minimalBody, + reasoning: { effort: 'high' }, + } as never, + originalModel: 'z-ai/glm-5.1', + fetch: mockFetch, + logger, + useCustomDeployment: false, + sessionId: 'test-user-id', + }) + + expect(fetchedBodies).toHaveLength(1) + expect(fetchedBodies[0].reasoning_effort).toBe('high') + expect(fetchedBodies[0].reasoning).toBeUndefined() + }) + + it('skips reasoning_effort when reasoning.enabled is false', async () => { + const fetchedBodies: Record[] = [] + + const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => { + const body = JSON.parse(init?.body as string) + fetchedBodies.push(body) + return new Response(JSON.stringify({ ok: true }), { status: 200 }) + }) as unknown as typeof globalThis.fetch + + await createFireworksRequestWithFallback({ + body: { + ...minimalBody, + reasoning: { enabled: false, effort: 'high' }, + } as never, + originalModel: 'z-ai/glm-5.1', + fetch: mockFetch, + logger, + useCustomDeployment: false, + sessionId: 'test-user-id', + }) + + expect(fetchedBodies).toHaveLength(1) + expect(fetchedBodies[0].reasoning_effort).toBeUndefined() + expect(fetchedBodies[0].reasoning).toBeUndefined() + }) + + it('preserves reasoning_effort when tools are present (Fireworks supports both)', async () => { + const fetchedBodies: Record[] = [] + + const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => { + const body = JSON.parse(init?.body as string) + fetchedBodies.push(body) + return new Response(JSON.stringify({ ok: true }), { status: 200 }) + }) as unknown as typeof globalThis.fetch + + await createFireworksRequestWithFallback({ + body: { + ...minimalBody, + reasoning: { effort: 'high' }, + tools: [{ type: 'function', function: { name: 'test', arguments: '{}' } }], + } as never, + originalModel: 'z-ai/glm-5.1', + fetch: mockFetch, + logger, + useCustomDeployment: false, + sessionId: 'test-user-id', + }) + + expect(fetchedBodies).toHaveLength(1) + expect(fetchedBodies[0].reasoning_effort).toBe('high') + expect(fetchedBodies[0].reasoning).toBeUndefined() + }) + + it('passes through reasoning_effort when set directly without reasoning object', async () => { + const fetchedBodies: Record[] = [] + + const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => { + const body = JSON.parse(init?.body as string) + fetchedBodies.push(body) + return new Response(JSON.stringify({ ok: true }), { status: 200 }) + }) as unknown as typeof globalThis.fetch + + await createFireworksRequestWithFallback({ + body: { + ...minimalBody, + reasoning_effort: 'low', + } as never, + originalModel: 'z-ai/glm-5.1', + fetch: mockFetch, + logger, + useCustomDeployment: false, + sessionId: 'test-user-id', + }) + + expect(fetchedBodies).toHaveLength(1) + expect(fetchedBodies[0].reasoning_effort).toBe('low') + }) + + it('preserves directly-set reasoning_effort when tools are present', async () => { + const fetchedBodies: Record[] = [] + + const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => { + const body = JSON.parse(init?.body as string) + fetchedBodies.push(body) + return new Response(JSON.stringify({ ok: true }), { status: 200 }) + }) as unknown as typeof globalThis.fetch + + await createFireworksRequestWithFallback({ + body: { + ...minimalBody, + reasoning_effort: 'low', + tools: [{ type: 'function', function: { name: 'test', arguments: '{}' } }], + } as never, + originalModel: 'z-ai/glm-5.1', + fetch: mockFetch, + logger, + useCustomDeployment: false, + sessionId: 'test-user-id', + }) + + expect(fetchedBodies).toHaveLength(1) + expect(fetchedBodies[0].reasoning_effort).toBe('low') + }) + it('logs when trying deployment and when falling back on 5xx', async () => { const spy = spyDeploymentHours(true) let callCount = 0 diff --git a/web/src/llm-api/fireworks.ts b/web/src/llm-api/fireworks.ts index 4799e91ac..e67770094 100644 --- a/web/src/llm-api/fireworks.ts +++ b/web/src/llm-api/fireworks.ts @@ -97,6 +97,20 @@ function createFireworksRequest(params: { model: modelIdOverride ?? getFireworksModelId(originalModel), } + // Transform OpenRouter-style `reasoning` object into Fireworks' `reasoning_effort`. + // Unlike OpenAI, Fireworks supports reasoning_effort together with function tools + // (e.g. GLM-4.5/5.1 and Kimi K2 are designed for interleaved reasoning + tool use). + if (fireworksBody.reasoning && typeof fireworksBody.reasoning === 'object') { + const reasoning = fireworksBody.reasoning as { + enabled?: boolean + effort?: 'high' | 'medium' | 'low' + } + if (reasoning.enabled ?? true) { + fireworksBody.reasoning_effort = reasoning.effort ?? 'medium' + } + } + delete fireworksBody.reasoning + // Strip OpenRouter-specific / internal fields delete fireworksBody.provider delete fireworksBody.transforms From 39d3588fb5344cdfdd4c47c6df40cc3047623a0c Mon Sep 17 00:00:00 2001 From: James Grugett Date: Thu, 16 Apr 2026 17:52:00 -0700 Subject: [PATCH 2/3] Upgrade to Opus 4.7 --- .agents/claude-code-cli.ts | 2 +- .agents/codebuff-local-cli.ts | 2 +- .agents/codex-cli.ts | 2 +- .agents/gemini-cli.ts | 2 +- .agents/types/agent-definition.ts | 2 ++ agents-graveyard/editor/best-of-n/best-of-n-selector.ts | 2 +- agents-graveyard/editor/best-of-n/editor-implementor2.ts | 2 +- .../editor/best-of-n/editor-multi-prompt2.ts | 2 +- agents-graveyard/editor/reviewer-editor.ts | 2 +- agents/__tests__/editor.test.ts | 4 ++-- agents/__tests__/thinker.test.ts | 2 +- agents/base2/base2.ts | 2 +- agents/editor/best-of-n/best-of-n-selector2.ts | 2 +- agents/editor/best-of-n/editor-implementor.ts | 2 +- agents/editor/best-of-n/editor-multi-prompt.ts | 2 +- agents/editor/editor.ts | 2 +- agents/general-agent/general-agent.ts | 2 +- agents/reviewer/code-reviewer.ts | 2 +- .../reviewer/multi-prompt/code-reviewer-multi-prompt.ts | 2 +- agents/thinker/best-of-n/thinker-best-of-n.ts | 2 +- agents/thinker/best-of-n/thinker-selector.ts | 2 +- agents/thinker/thinker.ts | 2 +- agents/types/agent-definition.ts | 2 ++ common/src/constants/claude-oauth.ts | 2 +- common/src/constants/model-config.ts | 9 --------- common/src/templates/initial-agents-dir/README.md | 2 +- .../initial-agents-dir/types/agent-definition.ts | 2 ++ common/src/util/model-utils.ts | 7 ++----- web/src/app/api/v1/token-count/_post.ts | 2 +- 29 files changed, 33 insertions(+), 39 deletions(-) diff --git a/.agents/claude-code-cli.ts b/.agents/claude-code-cli.ts index 2de48ff5c..075d9f23e 100644 --- a/.agents/claude-code-cli.ts +++ b/.agents/claude-code-cli.ts @@ -10,7 +10,7 @@ const baseDefinition = createCliAgent({ startCommand: 'claude --dangerously-skip-permissions', permissionNote: 'Always use `--dangerously-skip-permissions` when testing to avoid permission prompts that would block automated tests.', - model: 'anthropic/claude-opus-4.6', + model: 'anthropic/claude-opus-4.7', }) // Constants must be inside handleSteps since it gets serialized via .toString() diff --git a/.agents/codebuff-local-cli.ts b/.agents/codebuff-local-cli.ts index 1fdf975c6..8cb367a08 100644 --- a/.agents/codebuff-local-cli.ts +++ b/.agents/codebuff-local-cli.ts @@ -10,7 +10,7 @@ const baseDefinition = createCliAgent({ startCommand: 'bun --cwd=cli run dev', permissionNote: 'No permission flags needed for Codebuff local dev server.', - model: 'anthropic/claude-opus-4.6', + model: 'anthropic/claude-opus-4.7', skipPrepPhase: true, cliSpecificDocs: `## Codebuff CLI Specific Guidance diff --git a/.agents/codex-cli.ts b/.agents/codex-cli.ts index 9914e3d7c..e7b18473a 100644 --- a/.agents/codex-cli.ts +++ b/.agents/codex-cli.ts @@ -81,7 +81,7 @@ const baseDefinition = createCliAgent({ startCommand: 'codex -a never -s danger-full-access', permissionNote: 'Always use `-a never -s danger-full-access` when testing to avoid approval prompts that would block automated tests.', - model: 'anthropic/claude-opus-4.6', + model: 'anthropic/claude-opus-4.7', extraInputParams: { reviewType: { type: 'string', diff --git a/.agents/gemini-cli.ts b/.agents/gemini-cli.ts index 38186add4..d5eb7f45e 100644 --- a/.agents/gemini-cli.ts +++ b/.agents/gemini-cli.ts @@ -10,7 +10,7 @@ const baseDefinition = createCliAgent({ startCommand: 'gemini --yolo', permissionNote: 'Always use `--yolo` (or `--approval-mode yolo`) when testing to auto-approve all tool actions and avoid prompts that would block automated tests.', - model: 'anthropic/claude-opus-4.6', + model: 'anthropic/claude-opus-4.7', cliSpecificDocs: `## Gemini CLI Commands Gemini CLI uses slash commands for navigation: diff --git a/.agents/types/agent-definition.ts b/.agents/types/agent-definition.ts index 6323ec7b7..9dce8fa7c 100644 --- a/.agents/types/agent-definition.ts +++ b/.agents/types/agent-definition.ts @@ -380,7 +380,9 @@ export type ModelName = // Anthropic | 'anthropic/claude-sonnet-4.6' + | 'anthropic/claude-opus-4.7' | 'anthropic/claude-opus-4.6' + | 'anthropic/claude-opus-4.5' | 'anthropic/claude-haiku-4.5' | 'anthropic/claude-sonnet-4.5' | 'anthropic/claude-opus-4.1' diff --git a/agents-graveyard/editor/best-of-n/best-of-n-selector.ts b/agents-graveyard/editor/best-of-n/best-of-n-selector.ts index 27d9dd899..74f9d8c76 100644 --- a/agents-graveyard/editor/best-of-n/best-of-n-selector.ts +++ b/agents-graveyard/editor/best-of-n/best-of-n-selector.ts @@ -17,7 +17,7 @@ export const createBestOfNSelector = (options: { model: isSonnet ? 'anthropic/claude-sonnet-4.5' : isOpus - ? 'anthropic/claude-opus-4.6' + ? 'anthropic/claude-opus-4.7' : isGemini ? 'google/gemini-3-pro-preview' : 'openai/gpt-5.1', diff --git a/agents-graveyard/editor/best-of-n/editor-implementor2.ts b/agents-graveyard/editor/best-of-n/editor-implementor2.ts index 944769317..6a5dc1085 100644 --- a/agents-graveyard/editor/best-of-n/editor-implementor2.ts +++ b/agents-graveyard/editor/best-of-n/editor-implementor2.ts @@ -13,7 +13,7 @@ export const createBestOfNImplementor2 = (options: { model: isGpt5 ? 'openai/gpt-5.2' : isOpus - ? 'anthropic/claude-opus-4.6' + ? 'anthropic/claude-opus-4.7' : 'anthropic/claude-sonnet-4.5', displayName: isGpt5 ? 'GPT-5 Implementation Generator v2' diff --git a/agents-graveyard/editor/best-of-n/editor-multi-prompt2.ts b/agents-graveyard/editor/best-of-n/editor-multi-prompt2.ts index be9722b5e..0bedd6953 100644 --- a/agents-graveyard/editor/best-of-n/editor-multi-prompt2.ts +++ b/agents-graveyard/editor/best-of-n/editor-multi-prompt2.ts @@ -10,7 +10,7 @@ import type { SecretAgentDefinition } from '../../types/secret-agent-definition' export function createMultiPromptEditor(): Omit { return { publisher, - model: 'anthropic/claude-opus-4.6', + model: 'anthropic/claude-opus-4.7', displayName: 'Multi-Prompt Editor', spawnerPrompt: 'Edits code by spawning multiple implementor agents with different strategy prompts, selects the best implementation, and applies the changes. It also returns further suggested improvements which you should take seriously and act on. Pass as input an array of short prompts specifying different implementation approaches or strategies. Make sure to read any files intended to be edited before spawning this agent.', diff --git a/agents-graveyard/editor/reviewer-editor.ts b/agents-graveyard/editor/reviewer-editor.ts index 4049cb0c6..c6cfe42b6 100644 --- a/agents-graveyard/editor/reviewer-editor.ts +++ b/agents-graveyard/editor/reviewer-editor.ts @@ -12,7 +12,7 @@ export const createCodeEditor = (options: { model: options.model === 'gpt-5' ? 'openai/gpt-5.1' - : 'anthropic/claude-opus-4.6', + : 'anthropic/claude-opus-4.7', displayName: 'Code Editor', spawnerPrompt: 'Expert code reviewer that reviews recent code changes and makes improvements.', diff --git a/agents/__tests__/editor.test.ts b/agents/__tests__/editor.test.ts index 9e14909f8..030857c8d 100644 --- a/agents/__tests__/editor.test.ts +++ b/agents/__tests__/editor.test.ts @@ -28,7 +28,7 @@ describe('editor agent', () => { }) test('uses opus model by default', () => { - expect(editor.model).toBe('anthropic/claude-opus-4.6') + expect(editor.model).toBe('anthropic/claude-opus-4.7') }) test('has output mode set to structured_output', () => { @@ -54,7 +54,7 @@ describe('editor agent', () => { describe('createCodeEditor', () => { test('creates opus editor by default', () => { const opusEditor = createCodeEditor({ model: 'opus' }) - expect(opusEditor.model).toBe('anthropic/claude-opus-4.6') + expect(opusEditor.model).toBe('anthropic/claude-opus-4.7') }) test('creates gpt-5 editor', () => { diff --git a/agents/__tests__/thinker.test.ts b/agents/__tests__/thinker.test.ts index ac36c12db..0e44a9743 100644 --- a/agents/__tests__/thinker.test.ts +++ b/agents/__tests__/thinker.test.ts @@ -29,7 +29,7 @@ describe('thinker agent', () => { }) test('uses opus model', () => { - expect(thinker.model).toBe('anthropic/claude-opus-4.6') + expect(thinker.model).toBe('anthropic/claude-opus-4.7') }) test('has output mode set to structured_output', () => { diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts index 3cc65d5b4..b4d05ca36 100644 --- a/agents/base2/base2.ts +++ b/agents/base2/base2.ts @@ -25,7 +25,7 @@ export function createBase2( const isFree = mode === 'free' const isSonnet = false - const model = isFree ? 'z-ai/glm-5.1' : 'anthropic/claude-opus-4.6' + const model = isFree ? 'z-ai/glm-5.1' : 'anthropic/claude-opus-4.7' return { publisher, diff --git a/agents/editor/best-of-n/best-of-n-selector2.ts b/agents/editor/best-of-n/best-of-n-selector2.ts index cc35abbab..cc28b2411 100644 --- a/agents/editor/best-of-n/best-of-n-selector2.ts +++ b/agents/editor/best-of-n/best-of-n-selector2.ts @@ -16,7 +16,7 @@ export const createBestOfNSelector2 = (options: { model: isSonnet ? 'anthropic/claude-sonnet-4.5' : isOpus - ? 'anthropic/claude-opus-4.6' + ? 'anthropic/claude-opus-4.7' : 'openai/gpt-5.4', ...(isGpt5 && { reasoningOptions: { diff --git a/agents/editor/best-of-n/editor-implementor.ts b/agents/editor/best-of-n/editor-implementor.ts index 87ec441ba..fe9fe13eb 100644 --- a/agents/editor/best-of-n/editor-implementor.ts +++ b/agents/editor/best-of-n/editor-implementor.ts @@ -16,7 +16,7 @@ export const createBestOfNImplementor = (options: { model: isSonnet ? 'anthropic/claude-sonnet-4.5' : isOpus - ? 'anthropic/claude-opus-4.6' + ? 'anthropic/claude-opus-4.7' : isGemini ? 'google/gemini-3-pro-preview' : 'openai/gpt-5.1', diff --git a/agents/editor/best-of-n/editor-multi-prompt.ts b/agents/editor/best-of-n/editor-multi-prompt.ts index 5c54cf969..922fb43f2 100644 --- a/agents/editor/best-of-n/editor-multi-prompt.ts +++ b/agents/editor/best-of-n/editor-multi-prompt.ts @@ -11,7 +11,7 @@ import type { SecretAgentDefinition } from '../../types/secret-agent-definition' export function createMultiPromptEditor(): Omit { return { publisher, - model: 'anthropic/claude-opus-4.6', + model: 'anthropic/claude-opus-4.7', providerOptions: { only: ['amazon-bedrock'], }, diff --git a/agents/editor/editor.ts b/agents/editor/editor.ts index e191609ad..3d208aa13 100644 --- a/agents/editor/editor.ts +++ b/agents/editor/editor.ts @@ -14,7 +14,7 @@ export const createCodeEditor = (options: { ? 'openai/gpt-5.1' : options.model === 'glm' ? 'z-ai/glm-5.1' - : 'anthropic/claude-opus-4.6', + : 'anthropic/claude-opus-4.7', ...(options.model === 'opus' && { providerOptions: { only: ['amazon-bedrock'], diff --git a/agents/general-agent/general-agent.ts b/agents/general-agent/general-agent.ts index 26f209958..14d12e440 100644 --- a/agents/general-agent/general-agent.ts +++ b/agents/general-agent/general-agent.ts @@ -12,7 +12,7 @@ export const createGeneralAgent = (options: { return { publisher, - model: isGpt5 ? 'openai/gpt-5.4' : 'anthropic/claude-opus-4.6', + model: isGpt5 ? 'openai/gpt-5.4' : 'anthropic/claude-opus-4.7', ...(!isGpt5 && { providerOptions: { only: ['amazon-bedrock'], diff --git a/agents/reviewer/code-reviewer.ts b/agents/reviewer/code-reviewer.ts index 9cc840d69..31b261d99 100644 --- a/agents/reviewer/code-reviewer.ts +++ b/agents/reviewer/code-reviewer.ts @@ -64,7 +64,7 @@ Be extremely concise.`, const definition: SecretAgentDefinition = { id: 'code-reviewer', publisher, - ...createReviewer('anthropic/claude-opus-4.6'), + ...createReviewer('anthropic/claude-opus-4.7'), providerOptions: { only: ['amazon-bedrock'], }, diff --git a/agents/reviewer/multi-prompt/code-reviewer-multi-prompt.ts b/agents/reviewer/multi-prompt/code-reviewer-multi-prompt.ts index a6a380e3e..e7bac906e 100644 --- a/agents/reviewer/multi-prompt/code-reviewer-multi-prompt.ts +++ b/agents/reviewer/multi-prompt/code-reviewer-multi-prompt.ts @@ -14,7 +14,7 @@ export function createCodeReviewerMultiPrompt(): Omit< > { return { publisher, - model: 'anthropic/claude-opus-4.6', + model: 'anthropic/claude-opus-4.7', providerOptions: { only: ['amazon-bedrock'], }, diff --git a/agents/thinker/best-of-n/thinker-best-of-n.ts b/agents/thinker/best-of-n/thinker-best-of-n.ts index 3e1e532c5..5c09fae84 100644 --- a/agents/thinker/best-of-n/thinker-best-of-n.ts +++ b/agents/thinker/best-of-n/thinker-best-of-n.ts @@ -18,7 +18,7 @@ export function createThinkerBestOfN( model: isGpt5 ? 'openai/gpt-5.1' : isOpus - ? 'anthropic/claude-opus-4.6' + ? 'anthropic/claude-opus-4.7' : 'anthropic/claude-sonnet-4.5', ...(isOpus && { providerOptions: { diff --git a/agents/thinker/best-of-n/thinker-selector.ts b/agents/thinker/best-of-n/thinker-selector.ts index ab10bff69..62bf83420 100644 --- a/agents/thinker/best-of-n/thinker-selector.ts +++ b/agents/thinker/best-of-n/thinker-selector.ts @@ -9,7 +9,7 @@ export function createThinkerSelector( return { publisher, model: isOpus - ? 'anthropic/claude-opus-4.6' + ? 'anthropic/claude-opus-4.7' : 'anthropic/claude-sonnet-4.5', ...(isOpus && { providerOptions: { diff --git a/agents/thinker/thinker.ts b/agents/thinker/thinker.ts index 47fc54ec7..6a9f7d808 100644 --- a/agents/thinker/thinker.ts +++ b/agents/thinker/thinker.ts @@ -5,7 +5,7 @@ import type { SecretAgentDefinition } from '../types/secret-agent-definition' const definition: SecretAgentDefinition = { id: 'thinker', publisher, - model: 'anthropic/claude-opus-4.6', + model: 'anthropic/claude-opus-4.7', providerOptions: { only: ['amazon-bedrock'], }, diff --git a/agents/types/agent-definition.ts b/agents/types/agent-definition.ts index b81fc69c8..b28a77c31 100644 --- a/agents/types/agent-definition.ts +++ b/agents/types/agent-definition.ts @@ -380,7 +380,9 @@ export type ModelName = // Anthropic | 'anthropic/claude-sonnet-4.6' + | 'anthropic/claude-opus-4.7' | 'anthropic/claude-opus-4.6' + | 'anthropic/claude-opus-4.5' | 'anthropic/claude-haiku-4.5' | 'anthropic/claude-sonnet-4.5' | 'anthropic/claude-opus-4.1' diff --git a/common/src/constants/claude-oauth.ts b/common/src/constants/claude-oauth.ts index 8204f8db7..16b428610 100644 --- a/common/src/constants/claude-oauth.ts +++ b/common/src/constants/claude-oauth.ts @@ -82,7 +82,7 @@ export const OPENROUTER_TO_ANTHROPIC_MODEL_MAP: Record = { 'anthropic/claude-4-sonnet': 'claude-sonnet-4-20250514', // Claude 4.x Opus models - 'anthropic/claude-opus-4.6': 'claude-opus-4-6', + 'anthropic/claude-opus-4.7': 'claude-opus-4-7', 'anthropic/claude-opus-4.5': 'claude-opus-4-5-20251101', 'anthropic/claude-opus-4.1': 'claude-opus-4-1-20250805', 'anthropic/claude-opus-4': 'claude-opus-4-1-20250805', diff --git a/common/src/constants/model-config.ts b/common/src/constants/model-config.ts index c75bda26e..10e579a92 100644 --- a/common/src/constants/model-config.ts +++ b/common/src/constants/model-config.ts @@ -124,15 +124,6 @@ export const providerModelNames = { export type Model = (typeof models)[keyof typeof models] | (string & {}) -export const shouldCacheModels = [ - 'anthropic/claude-opus-4.1', - 'anthropic/claude-sonnet-4', - 'anthropic/claude-opus-4', - 'anthropic/claude-3.7-sonnet', - 'anthropic/claude-3.5-haiku', - 'z-ai/glm-4.5', - 'qwen/qwen3-coder', -] const nonCacheableModels = [ models.openrouter_grok_4, ] satisfies string[] as string[] diff --git a/common/src/templates/initial-agents-dir/README.md b/common/src/templates/initial-agents-dir/README.md index f9290a7ca..c02ddab90 100644 --- a/common/src/templates/initial-agents-dir/README.md +++ b/common/src/templates/initial-agents-dir/README.md @@ -170,7 +170,7 @@ async *handleSteps() { Choose models based on your agent's needs: -- **`anthropic/claude-opus-4.6`**: Best general-purpose capabilities and code generation +- **`anthropic/claude-opus-4.7`**: Best general-purpose capabilities and code generation - **`openai/gpt-5.2`**: Best at complex reasoning and planning - **`google/gemini-3.1-flash-lite-preview`**: Fast and cost-effective for simple or medium-complexity tasks diff --git a/common/src/templates/initial-agents-dir/types/agent-definition.ts b/common/src/templates/initial-agents-dir/types/agent-definition.ts index b81fc69c8..b28a77c31 100644 --- a/common/src/templates/initial-agents-dir/types/agent-definition.ts +++ b/common/src/templates/initial-agents-dir/types/agent-definition.ts @@ -380,7 +380,9 @@ export type ModelName = // Anthropic | 'anthropic/claude-sonnet-4.6' + | 'anthropic/claude-opus-4.7' | 'anthropic/claude-opus-4.6' + | 'anthropic/claude-opus-4.5' | 'anthropic/claude-haiku-4.5' | 'anthropic/claude-sonnet-4.5' | 'anthropic/claude-opus-4.1' diff --git a/common/src/util/model-utils.ts b/common/src/util/model-utils.ts index 00277dd06..17d1f388e 100644 --- a/common/src/util/model-utils.ts +++ b/common/src/util/model-utils.ts @@ -8,11 +8,8 @@ function getExplicitlyDefinedModels(): Set { if (explicitlyDefinedModels === null) { // NOTE: Inline require() avoids circular dependency - old-constants imports this // module, so a top-level import would create a circular reference - const { models, shouldCacheModels } = require('../old-constants') - explicitlyDefinedModels = new Set([ - ...(Object.values(models) as string[]), - ...(Object.values(shouldCacheModels) as string[]), - ]) + const { models } = require('../old-constants') + explicitlyDefinedModels = new Set(Object.values(models) as string[]) } return explicitlyDefinedModels } diff --git a/web/src/app/api/v1/token-count/_post.ts b/web/src/app/api/v1/token-count/_post.ts index 1daea6772..f7224c25d 100644 --- a/web/src/app/api/v1/token-count/_post.ts +++ b/web/src/app/api/v1/token-count/_post.ts @@ -32,7 +32,7 @@ const tokenCountRequestSchema = z.object({ type TokenCountRequest = z.infer -const DEFAULT_ANTHROPIC_MODEL = 'claude-opus-4-6' +const DEFAULT_ANTHROPIC_MODEL = 'claude-opus-4-7' export async function postTokenCount(params: { req: NextRequest From 21b5a269b4e60afd4b3a654e5e96e23e89162174 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Thu, 16 Apr 2026 18:32:34 -0700 Subject: [PATCH 3/3] Fix potential Anthropic double-charge bug (didn't reach prod) --- web/src/llm-api/__tests__/openrouter.test.ts | 166 +++++++++++++++++++ web/src/llm-api/openrouter.ts | 27 ++- 2 files changed, 189 insertions(+), 4 deletions(-) create mode 100644 web/src/llm-api/__tests__/openrouter.test.ts diff --git a/web/src/llm-api/__tests__/openrouter.test.ts b/web/src/llm-api/__tests__/openrouter.test.ts new file mode 100644 index 000000000..88c108b68 --- /dev/null +++ b/web/src/llm-api/__tests__/openrouter.test.ts @@ -0,0 +1,166 @@ +import { describe, expect, it } from 'bun:test' + +import { extractUsageAndCost } from '../openrouter' + +describe('extractUsageAndCost', () => { + describe('OpenRouter response shapes', () => { + it('Anthropic shape: both cost and upstream_inference_cost populated with the SAME value (NOT additive)', () => { + // This is the shape that caused the 2x overcharge bug on every Anthropic call. + // The two fields report the same dollars via different routes (OR-billed-us + // and what-upstream-charged-us). Summing them doubles the bill. + const usage = { + prompt_tokens: 91437, + completion_tokens: 1209, + prompt_tokens_details: { cached_tokens: 87047 }, + completion_tokens_details: { reasoning_tokens: 0 }, + cost: 0.1171, + cost_details: { upstream_inference_cost: 0.1171 }, + } + const result = extractUsageAndCost(usage) + expect(result.cost).toBeCloseTo(0.1171, 6) + expect(result.cost).not.toBeCloseTo(0.2342, 6) // the old, buggy sum + expect(result.inputTokens).toBe(91437) + expect(result.outputTokens).toBe(1209) + expect(result.cacheReadInputTokens).toBe(87047) + }) + + it('Google shape: cost=0, upstream_inference_cost holds the real charge', () => { + const usage = { + prompt_tokens: 500, + completion_tokens: 200, + prompt_tokens_details: { cached_tokens: 0 }, + completion_tokens_details: { reasoning_tokens: 0 }, + cost: 0, + cost_details: { upstream_inference_cost: 0.000547 }, + } + const result = extractUsageAndCost(usage) + expect(result.cost).toBeCloseTo(0.000547, 9) + }) + + it('Legacy shape: cost populated, cost_details missing', () => { + const usage = { + prompt_tokens: 100, + completion_tokens: 50, + cost: 0.042, + } + const result = extractUsageAndCost(usage) + expect(result.cost).toBeCloseTo(0.042, 6) + }) + + it('Legacy shape: cost populated, cost_details present but upstream_inference_cost absent', () => { + const usage = { + prompt_tokens: 100, + completion_tokens: 50, + cost: 0.042, + cost_details: {}, + } + const result = extractUsageAndCost(usage) + expect(result.cost).toBeCloseTo(0.042, 6) + }) + + it('Legacy shape: cost populated, upstream_inference_cost null', () => { + const usage = { + prompt_tokens: 100, + completion_tokens: 50, + cost: 0.042, + cost_details: { upstream_inference_cost: null }, + } + const result = extractUsageAndCost(usage) + expect(result.cost).toBeCloseTo(0.042, 6) + }) + + it('Anthropic shape with slight rounding drift: picks the larger of the two', () => { + // Defensive: if the two fields ever diverge due to OR-side rounding, + // using max avoids under-reporting our spend. + const usage = { + prompt_tokens: 1000, + completion_tokens: 100, + cost: 0.005, + cost_details: { upstream_inference_cost: 0.0051 }, + } + const result = extractUsageAndCost(usage) + expect(result.cost).toBeCloseTo(0.0051, 6) + }) + + it('both cost and upstream missing: returns 0', () => { + const usage = { + prompt_tokens: 100, + completion_tokens: 50, + } + const result = extractUsageAndCost(usage) + expect(result.cost).toBe(0) + }) + + it('entire usage object undefined: returns zeros', () => { + const result = extractUsageAndCost(undefined) + expect(result.cost).toBe(0) + expect(result.inputTokens).toBe(0) + expect(result.outputTokens).toBe(0) + expect(result.cacheReadInputTokens).toBe(0) + expect(result.reasoningTokens).toBe(0) + }) + + it('entire usage object null: returns zeros', () => { + const result = extractUsageAndCost(null) + expect(result.cost).toBe(0) + }) + + it('cost is non-number (string): treated as 0', () => { + const usage = { + cost: '0.042' as unknown as number, + cost_details: { upstream_inference_cost: 0.01 }, + } + const result = extractUsageAndCost(usage) + expect(result.cost).toBeCloseTo(0.01, 6) + }) + }) + + describe('token extraction', () => { + it('extracts all token counts correctly', () => { + const usage = { + prompt_tokens: 1000, + completion_tokens: 500, + prompt_tokens_details: { cached_tokens: 900 }, + completion_tokens_details: { reasoning_tokens: 200 }, + cost: 0.01, + } + const result = extractUsageAndCost(usage) + expect(result.inputTokens).toBe(1000) + expect(result.outputTokens).toBe(500) + expect(result.cacheReadInputTokens).toBe(900) + expect(result.reasoningTokens).toBe(200) + }) + + it('missing nested token detail objects default to 0', () => { + const usage = { + prompt_tokens: 100, + completion_tokens: 50, + cost: 0.001, + } + const result = extractUsageAndCost(usage) + expect(result.cacheReadInputTokens).toBe(0) + expect(result.reasoningTokens).toBe(0) + }) + }) + + describe('regression: the exact bug from prod logs', () => { + // Pulled from debug/web.jsonl `openrouter-cost-audit` entries. + // Every one of these was billed at 2x the real price before the fix. + it.each([ + { cost: 0.1155, expected: 0.1155 }, + { cost: 0.0534, expected: 0.0534 }, + { cost: 0.0584, expected: 0.0584 }, + { cost: 0.1171, expected: 0.1171 }, + ])('bills $expected (not 2x) when cost === upstream === $cost', ({ cost, expected }) => { + const usage = { + prompt_tokens: 100000, + completion_tokens: 500, + prompt_tokens_details: { cached_tokens: 95000 }, + cost, + cost_details: { upstream_inference_cost: cost }, + } + const result = extractUsageAndCost(usage) + expect(result.cost).toBeCloseTo(expected, 6) + }) + }) +}) diff --git a/web/src/llm-api/openrouter.ts b/web/src/llm-api/openrouter.ts index 08b7a31ef..c08463172 100644 --- a/web/src/llm-api/openrouter.ts +++ b/web/src/llm-api/openrouter.ts @@ -61,15 +61,34 @@ function createOpenRouterRequest(params: { }) } -function extractUsageAndCost(usage: any): UsageData { - const openRouterCost = usage?.cost ?? 0 - const upstreamCost = usage?.cost_details?.upstream_inference_cost ?? 0 +/** + * Extract token counts and billed cost from an OpenRouter `usage` object. + * + * OpenRouter reports the billed charge in ONE of two fields — or in BOTH + * with the SAME value (observed on Anthropic routes). They are NOT additive: + * + * Anthropic routes: { cost: X, cost_details: { upstream_inference_cost: X } } + * Google routes: { cost: 0, cost_details: { upstream_inference_cost: X } } + * Some routes: { cost: X, cost_details: null } + * + * We previously summed the two fields, which double-charged every Anthropic + * call. Taking the max handles all three shapes safely. + * + * See: investigation notes + scripts/refund-openrouter-overcharge.ts + */ +export function extractUsageAndCost(usage: any): UsageData { + const openRouterCost = + typeof usage?.cost === 'number' ? usage.cost : 0 + const upstreamCost = + typeof usage?.cost_details?.upstream_inference_cost === 'number' + ? usage.cost_details.upstream_inference_cost + : 0 return { inputTokens: usage?.prompt_tokens ?? 0, outputTokens: usage?.completion_tokens ?? 0, cacheReadInputTokens: usage?.prompt_tokens_details?.cached_tokens ?? 0, reasoningTokens: usage?.completion_tokens_details?.reasoning_tokens ?? 0, - cost: openRouterCost + upstreamCost, + cost: Math.max(openRouterCost, upstreamCost), } }