diff --git a/packages/opencode/src/provider/transform.ts b/packages/opencode/src/provider/transform.ts index 009b8fbaf..f8837d964 100644 --- a/packages/opencode/src/provider/transform.ts +++ b/packages/opencode/src/provider/transform.ts @@ -257,50 +257,85 @@ function supportsCacheMarkers(model: Provider.Model): boolean { return false } -function applyCaching(msgs: ModelMessage[], model: Provider.Model): ModelMessage[] { - // Only Anthropic and OpenRouter expose a cache-control TTL in their AI SDK; - // the other providers ignore an unknown `ttl` field, so we only thread it - // into those two branches. Default (unset) stays the provider 5m default. +// The cache-control marker shape differs per provider/SDK. This is the single +// source of truth, keyed by the SDK provider-options namespace. `applyCaching` +// attaches the whole object (keyed by stored providerID) and lets `message()` +// remap the active provider's namespace to its SDK key; `tools()` (which +// bypasses that remap) resolves a single namespace up front via `cacheMarkerFor`. +// Only Anthropic and OpenRouter expose a TTL in their AI SDK — the others ignore +// an unknown `ttl`, so we thread it only there. +function cacheMarkerOptions(model: Provider.Model) { const ttl = model.cachePromptTTL === "1h" ? { ttl: "1h" as const } : {} - const providerOptions = { - anthropic: { - cacheControl: { type: "ephemeral", ...ttl }, - }, - openrouter: { - cacheControl: { type: "ephemeral", ...ttl }, - }, - bedrock: { - cachePoint: { type: "default" }, - }, - openaiCompatible: { - cache_control: { type: "ephemeral" }, - }, - copilot: { - copilot_cache_control: { type: "ephemeral" }, - }, - alibaba: { - cacheControl: { type: "ephemeral" }, - }, + return { + anthropic: { cacheControl: { type: "ephemeral", ...ttl } }, + openrouter: { cacheControl: { type: "ephemeral", ...ttl } }, + bedrock: { cachePoint: { type: "default" } }, + openaiCompatible: { cache_control: { type: "ephemeral" } }, + copilot: { copilot_cache_control: { type: "ephemeral" } }, + alibaba: { cacheControl: { type: "ephemeral" } }, } +} + +// Resolve the marker for a single model, already keyed under the SDK namespace +// the AI SDK expects — i.e. the remap that `message()` performs for messages, +// done up front. Used by `tools()`, whose tools never pass through `message()`. +// Returns undefined for providers that don't take inline markers (callers gate +// on `supportsCacheMarkers` first, so this is just a type-safety fallback). +function cacheMarkerFor(model: Provider.Model): Record | undefined { + const shapes = cacheMarkerOptions(model) + const ns: keyof typeof shapes | undefined = + model.api.npm === "@ai-sdk/anthropic" || model.api.npm === "@ai-sdk/google-vertex/anthropic" + ? "anthropic" + : model.api.npm === "@openrouter/ai-sdk-provider" + ? "openrouter" + : model.api.npm === "@ai-sdk/amazon-bedrock" + ? "bedrock" + : model.api.npm === "@ai-sdk/github-copilot" + ? "copilot" + : model.api.npm === "@ai-sdk/alibaba" + ? "alibaba" + : undefined + if (!ns) return undefined + return { [ns]: shapes[ns] } +} - // Strategy: place cache breakpoints at stable prefix boundaries (max 4 allowed by Anthropic) - // 1. Last system message — system prompt never changes - // 2. Midpoint of conversation history — long prefix second-level cache - // 3. Message before the last user message — stable history boundary +function applyCaching(msgs: ModelMessage[], model: Provider.Model): ModelMessage[] { + const providerOptions = cacheMarkerOptions(model) + + // Strategy: prefix caching is longest-common-prefix based with a backward + // lookback window (Anthropic walks back ~20 blocks from a breakpoint to find + // a prior write). The markers that grow the cached prefix are pinned to the + // *tail* of the request. We place up to three stable breakpoints (Anthropic + // allows max 4): + // 1. Last system message — the immutable prompt prefix. + // 2+3. The last TWO messages — a "rolling double buffer". Each turn marks + // messages[-2] and messages[-1]; next turn the old [-1] is now [-2] and + // still carries its marker, so the lookback gets a cache READ hit, while + // the new [-1] is the WRITE for the turn after. + // + // Why two and not one: the second (next-to-last) marker is the safety + // net for the tail boundary. When the last message is removed — a + // tool-call retry, a Ctrl-C, or the user editing/deleting their latest + // message — a lone tail marker disappears with it, and how much of the + // surrounding prefix the provider then evicts depends on the upstream + // (Anthropic) KV-cache implementation. The next-to-last marker is a + // still-present, further-back write the next lookback can land on, so the + // worst case degrades to "recompute only the removed message" instead of + // "recompute the whole history". It also covers turns that append >20 + // blocks (tool spam pushes the prior write outside the lookback window). + // Cost is ~equal to a single marker: the two adjacent breakpoints write + // roughly the same incremental bytes as one, split in two, and a hit + // never rewrites. A third marker would write a segment never read + // independently, so two is the minimum that covers the boundary. + // We deliberately do NOT mark a drifting midpoint or a fixed before-last-user + // INDEX: those shift every turn without tracking the tail. const targets: ModelMessage[] = [] const systemMsgs = msgs.filter((msg) => msg.role === "system") if (systemMsgs.length > 0) targets.push(systemMsgs[systemMsgs.length - 1]) const nonSystem = msgs.filter((msg) => msg.role !== "system") - const lastUserIdx = nonSystem.findLastIndex((msg) => msg.role === "user") - if (lastUserIdx >= 1) { - targets.push(nonSystem[lastUserIdx - 1]) - const midpoint = Math.floor(lastUserIdx / 2) - if (midpoint > 0 && midpoint < lastUserIdx - 1) targets.push(nonSystem[midpoint]) - } else if (lastUserIdx === 0) { - targets.push(nonSystem[0]) - } + for (const msg of nonSystem.slice(-2)) targets.push(msg) for (const msg of unique(targets)) { const useMessageLevelOptions = @@ -450,6 +485,25 @@ export function message(msgs: ModelMessage[], model: Provider.Model, options: Re return msgs } +// Place a cache breakpoint on the tool definitions. The cache hierarchy is +// `tools` → `system` → `messages`, so marking the LAST tool caches the entire +// tool-schema block (often several KB) as a stable prefix that sits in front of +// the system + message caches. Tools are passed to the SDK separately from +// `message()` and never go through its providerID→SDK-key remap, so we resolve +// the SDK-keyed marker via `cacheMarkerFor`. Tool registration order is stable +// (insertion order of the tools record), so "last tool" is deterministic. +export function tools>(tools: T, model: Provider.Model): T { + if (!supportsCacheMarkers(model)) return tools + const marker = cacheMarkerFor(model) + if (!marker) return tools + const names = Object.keys(tools) + if (names.length === 0) return tools + + const last = tools[names[names.length - 1]] + last.providerOptions = mergeDeep(last.providerOptions ?? {}, marker) + return tools +} + export function temperature(model: Provider.Model) { const id = model.id.toLowerCase() if (id.includes("qwen")) return 0.55 diff --git a/packages/opencode/src/session/llm.ts b/packages/opencode/src/session/llm.ts index 856ba1c7a..98f20dcb5 100644 --- a/packages/opencode/src/session/llm.ts +++ b/packages/opencode/src/session/llm.ts @@ -583,7 +583,7 @@ const live: Layer.Layer< topK: params.topK, providerOptions: ProviderTransform.providerOptions(input.model, params.options), activeTools: Object.keys(tools).filter((x) => x !== "invalid"), - tools, + tools: ProviderTransform.tools(tools, input.model), toolChoice: input.toolChoice, maxOutputTokens: params.maxOutputTokens, abortSignal: input.abort, diff --git a/packages/opencode/test/provider/transform.test.ts b/packages/opencode/test/provider/transform.test.ts index 89231f27f..9b249af21 100644 --- a/packages/opencode/test/provider/transform.test.ts +++ b/packages/opencode/test/provider/transform.test.ts @@ -2172,6 +2172,150 @@ describe("ProviderTransform.message - cache control on gateway", () => { expect(result[0].providerOptions).toBeUndefined() }) + + test("multi-turn anthropic pins breakpoints to last system + last two messages", () => { + const model = createModel({ + providerID: "anthropic", + api: { id: "claude-sonnet-4", url: "https://api.anthropic.com", npm: "@ai-sdk/anthropic" }, + }) + const msgs = [ + { role: "system", content: "You are a helpful assistant" }, + { role: "user", content: "first question" }, + { role: "assistant", content: "first answer" }, + { role: "user", content: "second question" }, + { role: "assistant", content: "second answer" }, + { role: "user", content: "third question" }, + ] as any[] + + const result = ProviderTransform.message(msgs, model, {}) as any[] + + // The last system message plus the last TWO messages carry a breakpoint + // (rolling double buffer): the prior turn's tail marker survives as the + // read point while the new tail marker is the next write. + const marked = result + .map((msg, index) => ({ index, role: msg.role, hasCache: !!msg.providerOptions?.anthropic?.cacheControl })) + .filter((m) => m.hasCache) + + expect(marked).toEqual([ + { index: 0, role: "system", hasCache: true }, + { index: 4, role: "assistant", hasCache: true }, + { index: 5, role: "user", hasCache: true }, + ]) + // No drifting midpoint marker on earlier turns. + expect(result[2].providerOptions?.anthropic).toBeUndefined() + expect(result[3].providerOptions?.anthropic).toBeUndefined() + }) + + test("content-level provider marks the last two messages regardless of role", () => { + // Providers that reach applyCaching honor message-level markers (incl. + // assistant), so the double-tail marks the last two messages by position. + const model = createModel({ + providerID: "openrouter", + api: { id: "anthropic/claude-sonnet-4", url: "https://openrouter.ai/api", npm: "@openrouter/ai-sdk-provider" }, + }) + const msgs = [ + { role: "system", content: [{ type: "text", text: "sys" }] }, + { role: "user", content: [{ type: "text", text: "first question" }] }, + { role: "assistant", content: [{ type: "text", text: "first answer" }] }, + { role: "user", content: [{ type: "text", text: "second question" }] }, + { role: "assistant", content: [{ type: "text", text: "second answer" }] }, + ] as any[] + + const result = ProviderTransform.message(msgs, model, {}) as any[] + + const hasMarker = (msg: any) => + !!msg.providerOptions?.openrouter || + msg.content?.some?.((c: any) => c.providerOptions?.openrouter) + + // The last two messages (index 3 user, 4 assistant) are both marked. + expect(hasMarker(result[3])).toBe(true) + expect(hasMarker(result[4])).toBe(true) + // Earlier turns are not. + expect(hasMarker(result[1])).toBe(false) + expect(hasMarker(result[2])).toBe(false) + }) +}) + +describe("ProviderTransform.tools", () => { + const createModel = (overrides: Partial = {}): any => ({ + id: "test/test-model", + providerID: "test", + api: { id: "test-model", url: "https://api.test.com", npm: "@ai-sdk/openai" }, + name: "Test Model", + ...overrides, + }) + + test("marks the last tool for anthropic", () => { + const model = createModel({ + providerID: "anthropic", + api: { id: "claude-sonnet-4", url: "https://api.anthropic.com", npm: "@ai-sdk/anthropic" }, + }) + const tools = { read: {}, write: {}, bash: {} } as Record + + const result = ProviderTransform.tools(tools, model) + + expect(result.read.providerOptions).toBeUndefined() + expect(result.write.providerOptions).toBeUndefined() + expect(result.bash.providerOptions).toEqual({ anthropic: { cacheControl: { type: "ephemeral" } } }) + }) + + test("threads cachePromptTTL 1h into the tool marker", () => { + const model = createModel({ + providerID: "anthropic", + api: { id: "claude-sonnet-4", url: "https://api.anthropic.com", npm: "@ai-sdk/anthropic" }, + cachePromptTTL: "1h", + }) + const tools = { read: {}, bash: {} } as Record + + const result = ProviderTransform.tools(tools, model) + + expect(result.bash.providerOptions).toEqual({ anthropic: { cacheControl: { type: "ephemeral", ttl: "1h" } } }) + }) + + test("uses cachePoint shape for bedrock", () => { + const model = createModel({ + providerID: "amazon-bedrock", + api: { id: "anthropic.claude-sonnet-4", url: "https://api.test.com", npm: "@ai-sdk/amazon-bedrock" }, + }) + const tools = { read: {}, bash: {} } as Record + + const result = ProviderTransform.tools(tools, model) + + expect(result.bash.providerOptions).toEqual({ bedrock: { cachePoint: { type: "default" } } }) + }) + + test("uses copilot_cache_control shape for github-copilot", () => { + const model = createModel({ + providerID: "github-copilot", + api: { id: "claude-sonnet-4", url: "https://api.githubcopilot.com", npm: "@ai-sdk/github-copilot" }, + }) + const tools = { read: {}, bash: {} } as Record + + const result = ProviderTransform.tools(tools, model) + + expect(result.bash.providerOptions).toEqual({ copilot: { copilot_cache_control: { type: "ephemeral" } } }) + }) + + test("no marker for providers that do not support cache markers", () => { + const model = createModel({ + providerID: "openai", + api: { id: "gpt-4", url: "https://api.openai.com", npm: "@ai-sdk/openai" }, + }) + const tools = { read: {}, bash: {} } as Record + + const result = ProviderTransform.tools(tools, model) + + expect(result.read.providerOptions).toBeUndefined() + expect(result.bash.providerOptions).toBeUndefined() + }) + + test("no-op on empty tools", () => { + const model = createModel({ + providerID: "anthropic", + api: { id: "claude-sonnet-4", url: "https://api.anthropic.com", npm: "@ai-sdk/anthropic" }, + }) + expect(ProviderTransform.tools({}, model)).toEqual({}) + }) }) describe("ProviderTransform.variants", () => {