From 98e9cf6e0b40ee69755ea728aa4f13ee094199bc Mon Sep 17 00:00:00 2001 From: wqymi Date: Mon, 22 Jun 2026 15:47:30 +0800 Subject: [PATCH 1/5] fix(provider): pin prompt-cache breakpoints to rolling head The applyCaching message-level strategy placed breakpoints at a drifting midpoint and before-last-user, positions that shift every turn and never cache the request tail. Since prompt caching is longest-common-prefix based, only a marker pinned to the end grows the cached prefix each turn, so the old markers spent the budget without improving hit rate (and often landed on assistant/tool messages dropped by openai-compatible proxies). Replace them with a stable two-breakpoint scheme: last system message + last message (rolling head), mirroring upstream cc. Add a multi-turn test asserting only those two positions are marked. --- packages/opencode/src/provider/transform.ts | 24 +++++++-------- .../opencode/test/provider/transform.test.ts | 30 +++++++++++++++++++ 2 files changed, 42 insertions(+), 12 deletions(-) diff --git a/packages/opencode/src/provider/transform.ts b/packages/opencode/src/provider/transform.ts index 009b8fbaf..02dfdcb59 100644 --- a/packages/opencode/src/provider/transform.ts +++ b/packages/opencode/src/provider/transform.ts @@ -283,24 +283,24 @@ function applyCaching(msgs: ModelMessage[], model: Provider.Model): ModelMessage }, } - // Strategy: place cache breakpoints at stable prefix boundaries (max 4 allowed by Anthropic) - // 1. Last system message — system prompt never changes - // 2. Midpoint of conversation history — long prefix second-level cache - // 3. Message before the last user message — stable history boundary + // Strategy: prefix cache is longest-common-prefix based, so the only marker + // that grows the cached prefix every turn is one pinned to the *end* of the + // request. We place two stable breakpoints (max 4 allowed by Anthropic): + // 1. Last system message — the immutable prompt prefix. + // 2. The last message — "rolling head". Writing the cache up to the current + // tail makes the entire history a cache *read* on the next turn. + // + // We deliberately do NOT place markers at a drifting midpoint or at + // before-last-user: those indices shift every turn, land on assistant/tool + // messages (silently dropped by openai-compatible proxies), and spend the + // breakpoint budget without ever caching the tail. See internal/docs/cache-policy.md. const targets: ModelMessage[] = [] const systemMsgs = msgs.filter((msg) => msg.role === "system") if (systemMsgs.length > 0) targets.push(systemMsgs[systemMsgs.length - 1]) const nonSystem = msgs.filter((msg) => msg.role !== "system") - const lastUserIdx = nonSystem.findLastIndex((msg) => msg.role === "user") - if (lastUserIdx >= 1) { - targets.push(nonSystem[lastUserIdx - 1]) - const midpoint = Math.floor(lastUserIdx / 2) - if (midpoint > 0 && midpoint < lastUserIdx - 1) targets.push(nonSystem[midpoint]) - } else if (lastUserIdx === 0) { - targets.push(nonSystem[0]) - } + if (nonSystem.length > 0) targets.push(nonSystem[nonSystem.length - 1]) for (const msg of unique(targets)) { const useMessageLevelOptions = diff --git a/packages/opencode/test/provider/transform.test.ts b/packages/opencode/test/provider/transform.test.ts index 89231f27f..d7319f754 100644 --- a/packages/opencode/test/provider/transform.test.ts +++ b/packages/opencode/test/provider/transform.test.ts @@ -2172,6 +2172,36 @@ describe("ProviderTransform.message - cache control on gateway", () => { expect(result[0].providerOptions).toBeUndefined() }) + + test("multi-turn anthropic pins breakpoints to last system + last message only", () => { + const model = createModel({ + providerID: "anthropic", + api: { id: "claude-sonnet-4", url: "https://api.anthropic.com", npm: "@ai-sdk/anthropic" }, + }) + const msgs = [ + { role: "system", content: "You are a helpful assistant" }, + { role: "user", content: "first question" }, + { role: "assistant", content: "first answer" }, + { role: "user", content: "second question" }, + { role: "assistant", content: "second answer" }, + { role: "user", content: "third question" }, + ] as any[] + + const result = ProviderTransform.message(msgs, model, {}) as any[] + + // Only the last system message and the last message carry a breakpoint. + const marked = result + .map((msg, index) => ({ index, role: msg.role, hasCache: !!msg.providerOptions?.anthropic?.cacheControl })) + .filter((m) => m.hasCache) + + expect(marked).toEqual([ + { index: 0, role: "system", hasCache: true }, + { index: 5, role: "user", hasCache: true }, + ]) + // No drifting midpoint / before-last-user markers on intermediate assistant turns. + expect(result[2].providerOptions?.anthropic).toBeUndefined() + expect(result[4].providerOptions?.anthropic).toBeUndefined() + }) }) describe("ProviderTransform.variants", () => { From 66cba56cdfb9a7b76352b0219adbdacbd00f9b3e Mon Sep 17 00:00:00 2001 From: wqymi Date: Mon, 22 Jun 2026 22:22:49 +0800 Subject: [PATCH 2/5] fix(provider): double-marker rolling tail + tool-definition cache breakpoint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A single tail breakpoint only hits cache when a turn appends fewer than 20 content blocks (Anthropic's backward lookback window). Agentic turns with multiple tool calls routinely exceed that, pushing the prior write out of the window — and on openai-compatible proxies a lone marker landing on an assistant message is silently dropped. Both break the message cache. Mark the last TWO non-system messages instead (rolling double buffer): the prior turn's tail marker survives as the read point while the new tail is the next write, and the second marker also survives single-step tool-call retries and user-initiated session forks. This mirrors openclacky's double-marker strategy for proxied Claude. Also add ProviderTransform.tools(): the cache hierarchy is tools -> system -> messages, so marking the last tool caches the whole tool-schema block as a stable prefix. Tools bypass message()'s providerID->SDK-key remap, so the marker is written under the SDK key (anthropic cacheControl / bedrock cachePoint) directly in llm.ts before streamText. Update the multi-turn test to assert the two tail markers and add coverage for the tools breakpoint (anthropic/bedrock shapes, 1h TTL, unsupported providers). --- packages/opencode/src/provider/transform.ts | 53 +++++++++--- packages/opencode/src/session/llm.ts | 2 +- .../opencode/test/provider/transform.test.ts | 81 ++++++++++++++++++- 3 files changed, 120 insertions(+), 16 deletions(-) diff --git a/packages/opencode/src/provider/transform.ts b/packages/opencode/src/provider/transform.ts index 02dfdcb59..1dc2379c9 100644 --- a/packages/opencode/src/provider/transform.ts +++ b/packages/opencode/src/provider/transform.ts @@ -283,24 +283,31 @@ function applyCaching(msgs: ModelMessage[], model: Provider.Model): ModelMessage }, } - // Strategy: prefix cache is longest-common-prefix based, so the only marker - // that grows the cached prefix every turn is one pinned to the *end* of the - // request. We place two stable breakpoints (max 4 allowed by Anthropic): + // Strategy: prefix caching is longest-common-prefix based with a backward + // lookback window (Anthropic walks back ~20 blocks from a breakpoint to find + // a prior write). The markers that grow the cached prefix are pinned to the + // *tail* of the request. We place up to three stable breakpoints (Anthropic + // allows max 4): // 1. Last system message — the immutable prompt prefix. - // 2. The last message — "rolling head". Writing the cache up to the current - // tail makes the entire history a cache *read* on the next turn. - // - // We deliberately do NOT place markers at a drifting midpoint or at - // before-last-user: those indices shift every turn, land on assistant/tool - // messages (silently dropped by openai-compatible proxies), and spend the - // breakpoint budget without ever caching the tail. See internal/docs/cache-policy.md. + // 2+3. The last TWO messages — a "rolling double buffer". Each turn marks + // messages[-2] and messages[-1]; next turn the old [-1] is now [-2] and + // still carries its marker, so the lookback gets a cache READ hit, while + // the new [-1] is the WRITE for the turn after. One marker alone breaks + // when an agentic turn appends >20 content blocks (tool spam pushes the + // prior write outside the lookback window) or when a tool-call retry / + // interrupt discards the last message — the second marker survives both. + // A third tail marker would write a segment never read independently, so + // two is the minimum that covers the old-tail/new-tail boundary. + // We deliberately do NOT mark a drifting midpoint or a fixed before-last-user + // INDEX: those shift every turn without tracking the tail. See + // internal/docs/cache-policy.md. const targets: ModelMessage[] = [] const systemMsgs = msgs.filter((msg) => msg.role === "system") if (systemMsgs.length > 0) targets.push(systemMsgs[systemMsgs.length - 1]) const nonSystem = msgs.filter((msg) => msg.role !== "system") - if (nonSystem.length > 0) targets.push(nonSystem[nonSystem.length - 1]) + for (const msg of nonSystem.slice(-2)) targets.push(msg) for (const msg of unique(targets)) { const useMessageLevelOptions = @@ -450,6 +457,30 @@ export function message(msgs: ModelMessage[], model: Provider.Model, options: Re return msgs } +// Place a cache breakpoint on the tool definitions. The cache hierarchy is +// `tools` → `system` → `messages`, so marking the LAST tool caches the entire +// tool-schema block (often several KB) as a stable prefix that sits in front of +// the system + message caches. Tools are passed to the SDK separately from +// `message()` and never go through its providerID→SDK-key remap, so we write +// the marker under the SDK key directly. Tool registration order is stable +// (insertion order of the tools record), so "last tool" is deterministic. +export function tools>(tools: T, model: Provider.Model): T { + if (!supportsCacheMarkers(model)) return tools + const names = Object.keys(tools) + if (names.length === 0) return tools + + const ttl = model.cachePromptTTL === "1h" ? { ttl: "1h" as const } : {} + const marker = iife(() => { + if (model.api.npm === "@ai-sdk/amazon-bedrock") return { bedrock: { cachePoint: { type: "default" } } } + const key = sdkKey(model.api.npm) ?? model.providerID + return { [key]: { cacheControl: { type: "ephemeral", ...ttl } } } + }) + + const last = tools[names[names.length - 1]] + last.providerOptions = mergeDeep(last.providerOptions ?? {}, marker) + return tools +} + export function temperature(model: Provider.Model) { const id = model.id.toLowerCase() if (id.includes("qwen")) return 0.55 diff --git a/packages/opencode/src/session/llm.ts b/packages/opencode/src/session/llm.ts index 856ba1c7a..98f20dcb5 100644 --- a/packages/opencode/src/session/llm.ts +++ b/packages/opencode/src/session/llm.ts @@ -583,7 +583,7 @@ const live: Layer.Layer< topK: params.topK, providerOptions: ProviderTransform.providerOptions(input.model, params.options), activeTools: Object.keys(tools).filter((x) => x !== "invalid"), - tools, + tools: ProviderTransform.tools(tools, input.model), toolChoice: input.toolChoice, maxOutputTokens: params.maxOutputTokens, abortSignal: input.abort, diff --git a/packages/opencode/test/provider/transform.test.ts b/packages/opencode/test/provider/transform.test.ts index d7319f754..2d3a061c8 100644 --- a/packages/opencode/test/provider/transform.test.ts +++ b/packages/opencode/test/provider/transform.test.ts @@ -2173,7 +2173,7 @@ describe("ProviderTransform.message - cache control on gateway", () => { expect(result[0].providerOptions).toBeUndefined() }) - test("multi-turn anthropic pins breakpoints to last system + last message only", () => { + test("multi-turn anthropic pins breakpoints to last system + last two messages", () => { const model = createModel({ providerID: "anthropic", api: { id: "claude-sonnet-4", url: "https://api.anthropic.com", npm: "@ai-sdk/anthropic" }, @@ -2189,18 +2189,91 @@ describe("ProviderTransform.message - cache control on gateway", () => { const result = ProviderTransform.message(msgs, model, {}) as any[] - // Only the last system message and the last message carry a breakpoint. + // The last system message plus the last TWO messages carry a breakpoint + // (rolling double buffer): the prior turn's tail marker survives as the + // read point while the new tail marker is the next write. const marked = result .map((msg, index) => ({ index, role: msg.role, hasCache: !!msg.providerOptions?.anthropic?.cacheControl })) .filter((m) => m.hasCache) expect(marked).toEqual([ { index: 0, role: "system", hasCache: true }, + { index: 4, role: "assistant", hasCache: true }, { index: 5, role: "user", hasCache: true }, ]) - // No drifting midpoint / before-last-user markers on intermediate assistant turns. + // No drifting midpoint marker on earlier turns. expect(result[2].providerOptions?.anthropic).toBeUndefined() - expect(result[4].providerOptions?.anthropic).toBeUndefined() + expect(result[3].providerOptions?.anthropic).toBeUndefined() + }) +}) + +describe("ProviderTransform.tools", () => { + const createModel = (overrides: Partial = {}): any => ({ + id: "test/test-model", + providerID: "test", + api: { id: "test-model", url: "https://api.test.com", npm: "@ai-sdk/openai" }, + name: "Test Model", + ...overrides, + }) + + test("marks the last tool for anthropic", () => { + const model = createModel({ + providerID: "anthropic", + api: { id: "claude-sonnet-4", url: "https://api.anthropic.com", npm: "@ai-sdk/anthropic" }, + }) + const tools = { read: {}, write: {}, bash: {} } as Record + + const result = ProviderTransform.tools(tools, model) + + expect(result.read.providerOptions).toBeUndefined() + expect(result.write.providerOptions).toBeUndefined() + expect(result.bash.providerOptions).toEqual({ anthropic: { cacheControl: { type: "ephemeral" } } }) + }) + + test("threads cachePromptTTL 1h into the tool marker", () => { + const model = createModel({ + providerID: "anthropic", + api: { id: "claude-sonnet-4", url: "https://api.anthropic.com", npm: "@ai-sdk/anthropic" }, + cachePromptTTL: "1h", + }) + const tools = { read: {}, bash: {} } as Record + + const result = ProviderTransform.tools(tools, model) + + expect(result.bash.providerOptions).toEqual({ anthropic: { cacheControl: { type: "ephemeral", ttl: "1h" } } }) + }) + + test("uses cachePoint shape for bedrock", () => { + const model = createModel({ + providerID: "amazon-bedrock", + api: { id: "anthropic.claude-sonnet-4", url: "https://api.test.com", npm: "@ai-sdk/amazon-bedrock" }, + }) + const tools = { read: {}, bash: {} } as Record + + const result = ProviderTransform.tools(tools, model) + + expect(result.bash.providerOptions).toEqual({ bedrock: { cachePoint: { type: "default" } } }) + }) + + test("no marker for providers that do not support cache markers", () => { + const model = createModel({ + providerID: "openai", + api: { id: "gpt-4", url: "https://api.openai.com", npm: "@ai-sdk/openai" }, + }) + const tools = { read: {}, bash: {} } as Record + + const result = ProviderTransform.tools(tools, model) + + expect(result.read.providerOptions).toBeUndefined() + expect(result.bash.providerOptions).toBeUndefined() + }) + + test("no-op on empty tools", () => { + const model = createModel({ + providerID: "anthropic", + api: { id: "claude-sonnet-4", url: "https://api.anthropic.com", npm: "@ai-sdk/anthropic" }, + }) + expect(ProviderTransform.tools({}, model)).toEqual({}) }) }) From 4f86c40846a671868b8b9d43afc0914b7ad4ece7 Mon Sep 17 00:00:00 2001 From: wqymi Date: Mon, 22 Jun 2026 22:35:22 +0800 Subject: [PATCH 3/5] fix(provider): skip assistant turns when selecting cache markers on proxy path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The double rolling-tail selection used a blind last-2 (nonSystem.slice(-2)). On openai-compatible / content-level providers, cache_control on an assistant message is silently dropped, so when the tail is [..., user, assistant] one of the two markers evaporates and the double buffer collapses to a single marker — re-triggering the original low-hit-rate bug. Select the last 2 cacheable messages by role: Anthropic/Bedrock honor message-level assistant markers so take the last 2 outright; for everyone else skip assistant turns and take the last 2 user/tool messages so both markers survive. Tool results are role:"tool" after conversion and remain cacheable. Add a content-level (OpenRouter) test asserting only the user turns are marked. --- packages/opencode/src/provider/transform.ts | 20 +++++++++++-- .../opencode/test/provider/transform.test.ts | 30 +++++++++++++++++++ 2 files changed, 47 insertions(+), 3 deletions(-) diff --git a/packages/opencode/src/provider/transform.ts b/packages/opencode/src/provider/transform.ts index 1dc2379c9..9f3d53af2 100644 --- a/packages/opencode/src/provider/transform.ts +++ b/packages/opencode/src/provider/transform.ts @@ -298,16 +298,30 @@ function applyCaching(msgs: ModelMessage[], model: Provider.Model): ModelMessage // interrupt discards the last message — the second marker survives both. // A third tail marker would write a segment never read independently, so // two is the minimum that covers the old-tail/new-tail boundary. + // + // Role matters on the openai-compatible proxy path: those proxies + // silently DROP cache_control on assistant messages (see + // internal/docs/cache-policy.md — 1170 assistant markers, 0 cached). A + // blind last-2 would often land a marker on an assistant turn and lose + // it, collapsing the double buffer back to a single (or zero) effective + // marker. For providers that take message-level markers (Anthropic, + // Bedrock) assistant markers are honored, so we take the last 2 outright. + // For everyone else we take the last 2 *cacheable* messages (user/tool), + // skipping assistant turns so both markers survive. // We deliberately do NOT mark a drifting midpoint or a fixed before-last-user - // INDEX: those shift every turn without tracking the tail. See - // internal/docs/cache-policy.md. + // INDEX: those shift every turn without tracking the tail. const targets: ModelMessage[] = [] const systemMsgs = msgs.filter((msg) => msg.role === "system") if (systemMsgs.length > 0) targets.push(systemMsgs[systemMsgs.length - 1]) + const assistantMarkersHonored = + model.providerID === "anthropic" || + model.providerID.includes("bedrock") || + model.api.npm === "@ai-sdk/amazon-bedrock" const nonSystem = msgs.filter((msg) => msg.role !== "system") - for (const msg of nonSystem.slice(-2)) targets.push(msg) + const cacheable = assistantMarkersHonored ? nonSystem : nonSystem.filter((msg) => msg.role !== "assistant") + for (const msg of cacheable.slice(-2)) targets.push(msg) for (const msg of unique(targets)) { const useMessageLevelOptions = diff --git a/packages/opencode/test/provider/transform.test.ts b/packages/opencode/test/provider/transform.test.ts index 2d3a061c8..402458c91 100644 --- a/packages/opencode/test/provider/transform.test.ts +++ b/packages/opencode/test/provider/transform.test.ts @@ -2205,6 +2205,36 @@ describe("ProviderTransform.message - cache control on gateway", () => { expect(result[2].providerOptions?.anthropic).toBeUndefined() expect(result[3].providerOptions?.anthropic).toBeUndefined() }) + + test("non-anthropic content-level path skips assistant turns so both markers survive", () => { + // Providers that place markers at content level (e.g. OpenRouter / proxies) + // can have cache_control on assistant messages silently dropped, so a blind + // last-2 would lose a marker when the tail is [..., user, assistant]. The + // selector must pick the last 2 cacheable (user/tool) messages instead. + const model = createModel({ + providerID: "openrouter", + api: { id: "anthropic/claude-sonnet-4", url: "https://openrouter.ai/api", npm: "@openrouter/ai-sdk-provider" }, + }) + const msgs = [ + { role: "system", content: [{ type: "text", text: "sys" }] }, + { role: "user", content: [{ type: "text", text: "first question" }] }, + { role: "assistant", content: [{ type: "text", text: "first answer" }] }, + { role: "user", content: [{ type: "text", text: "second question" }] }, + { role: "assistant", content: [{ type: "text", text: "second answer" }] }, + ] as any[] + + const result = ProviderTransform.message(msgs, model, {}) as any[] + + const hasMarker = (msg: any) => + !!msg.providerOptions?.openrouter || + msg.content?.some?.((c: any) => c.providerOptions?.openrouter) + + // The two user turns (index 1, 3) are marked; the assistant turns are not. + expect(hasMarker(result[1])).toBe(true) + expect(hasMarker(result[3])).toBe(true) + expect(hasMarker(result[2])).toBe(false) + expect(hasMarker(result[4])).toBe(false) + }) }) describe("ProviderTransform.tools", () => { From dbb520dd0de322faf162864bf0801ab6f6ae485f Mon Sep 17 00:00:00 2001 From: wqymi Date: Tue, 23 Jun 2026 14:02:39 +0800 Subject: [PATCH 4/5] revert(provider): drop role-based skip in cache-marker selection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous commit skipped assistant turns when selecting the two tail breakpoints, on the theory that openai-compatible proxies silently drop cache_control on assistant messages. That premise doesn't hold: - providerOptions is namespaced per provider; a provider only reads its own namespace, so for protocols that don't consume cache_control (plain OpenAI), user AND assistant markers are equally absent from the payload — not an assistant-specific drop. - supportsCacheMarkers already returns false for @ai-sdk/openai and @ai-sdk/openai-compatible, so those never reach applyCaching at all (the protocol whitelist already exists, mirroring upstream RESPECTS_INLINE_HINTS). - The providers that do reach applyCaching (anthropic, bedrock, openrouter/ copilot/alibaba on Claude) all honor message-level assistant markers, so skipping assistant just discards a valid breakpoint. Select the last two messages by position again. Reframe the "why two" comment around the real win: when the last message is removed (retry, Ctrl-C, user edit/delete), the next-to-last marker is a still-present write the lookback can land on, bounding eviction to the removed message rather than the whole history. Cost is ~equal to a single marker (adjacent writes, no rewrite on hit). --- packages/opencode/src/provider/transform.ts | 37 ++++++++----------- .../opencode/test/provider/transform.test.ts | 15 ++++---- 2 files changed, 23 insertions(+), 29 deletions(-) diff --git a/packages/opencode/src/provider/transform.ts b/packages/opencode/src/provider/transform.ts index 9f3d53af2..109e9b536 100644 --- a/packages/opencode/src/provider/transform.ts +++ b/packages/opencode/src/provider/transform.ts @@ -292,22 +292,22 @@ function applyCaching(msgs: ModelMessage[], model: Provider.Model): ModelMessage // 2+3. The last TWO messages — a "rolling double buffer". Each turn marks // messages[-2] and messages[-1]; next turn the old [-1] is now [-2] and // still carries its marker, so the lookback gets a cache READ hit, while - // the new [-1] is the WRITE for the turn after. One marker alone breaks - // when an agentic turn appends >20 content blocks (tool spam pushes the - // prior write outside the lookback window) or when a tool-call retry / - // interrupt discards the last message — the second marker survives both. - // A third tail marker would write a segment never read independently, so - // two is the minimum that covers the old-tail/new-tail boundary. + // the new [-1] is the WRITE for the turn after. // - // Role matters on the openai-compatible proxy path: those proxies - // silently DROP cache_control on assistant messages (see - // internal/docs/cache-policy.md — 1170 assistant markers, 0 cached). A - // blind last-2 would often land a marker on an assistant turn and lose - // it, collapsing the double buffer back to a single (or zero) effective - // marker. For providers that take message-level markers (Anthropic, - // Bedrock) assistant markers are honored, so we take the last 2 outright. - // For everyone else we take the last 2 *cacheable* messages (user/tool), - // skipping assistant turns so both markers survive. + // Why two and not one: the second (next-to-last) marker is the safety + // net for the tail boundary. When the last message is removed — a + // tool-call retry, a Ctrl-C, or the user editing/deleting their latest + // message — a lone tail marker disappears with it, and how much of the + // surrounding prefix the provider then evicts depends on the upstream + // (Anthropic) KV-cache implementation. The next-to-last marker is a + // still-present, further-back write the next lookback can land on, so the + // worst case degrades to "recompute only the removed message" instead of + // "recompute the whole history". It also covers turns that append >20 + // blocks (tool spam pushes the prior write outside the lookback window). + // Cost is ~equal to a single marker: the two adjacent breakpoints write + // roughly the same incremental bytes as one, split in two, and a hit + // never rewrites. A third marker would write a segment never read + // independently, so two is the minimum that covers the boundary. // We deliberately do NOT mark a drifting midpoint or a fixed before-last-user // INDEX: those shift every turn without tracking the tail. const targets: ModelMessage[] = [] @@ -315,13 +315,8 @@ function applyCaching(msgs: ModelMessage[], model: Provider.Model): ModelMessage const systemMsgs = msgs.filter((msg) => msg.role === "system") if (systemMsgs.length > 0) targets.push(systemMsgs[systemMsgs.length - 1]) - const assistantMarkersHonored = - model.providerID === "anthropic" || - model.providerID.includes("bedrock") || - model.api.npm === "@ai-sdk/amazon-bedrock" const nonSystem = msgs.filter((msg) => msg.role !== "system") - const cacheable = assistantMarkersHonored ? nonSystem : nonSystem.filter((msg) => msg.role !== "assistant") - for (const msg of cacheable.slice(-2)) targets.push(msg) + for (const msg of nonSystem.slice(-2)) targets.push(msg) for (const msg of unique(targets)) { const useMessageLevelOptions = diff --git a/packages/opencode/test/provider/transform.test.ts b/packages/opencode/test/provider/transform.test.ts index 402458c91..d0e9d3bcd 100644 --- a/packages/opencode/test/provider/transform.test.ts +++ b/packages/opencode/test/provider/transform.test.ts @@ -2206,11 +2206,9 @@ describe("ProviderTransform.message - cache control on gateway", () => { expect(result[3].providerOptions?.anthropic).toBeUndefined() }) - test("non-anthropic content-level path skips assistant turns so both markers survive", () => { - // Providers that place markers at content level (e.g. OpenRouter / proxies) - // can have cache_control on assistant messages silently dropped, so a blind - // last-2 would lose a marker when the tail is [..., user, assistant]. The - // selector must pick the last 2 cacheable (user/tool) messages instead. + test("content-level provider marks the last two messages regardless of role", () => { + // Providers that reach applyCaching honor message-level markers (incl. + // assistant), so the double-tail marks the last two messages by position. const model = createModel({ providerID: "openrouter", api: { id: "anthropic/claude-sonnet-4", url: "https://openrouter.ai/api", npm: "@openrouter/ai-sdk-provider" }, @@ -2229,11 +2227,12 @@ describe("ProviderTransform.message - cache control on gateway", () => { !!msg.providerOptions?.openrouter || msg.content?.some?.((c: any) => c.providerOptions?.openrouter) - // The two user turns (index 1, 3) are marked; the assistant turns are not. - expect(hasMarker(result[1])).toBe(true) + // The last two messages (index 3 user, 4 assistant) are both marked. expect(hasMarker(result[3])).toBe(true) + expect(hasMarker(result[4])).toBe(true) + // Earlier turns are not. + expect(hasMarker(result[1])).toBe(false) expect(hasMarker(result[2])).toBe(false) - expect(hasMarker(result[4])).toBe(false) }) }) From 2588478e6a680eb7a437797e1bc463df4029c3b2 Mon Sep 17 00:00:00 2001 From: wqymi Date: Tue, 23 Jun 2026 14:21:35 +0800 Subject: [PATCH 5/5] refactor(provider): unify cache-marker shape source for messages and tools applyCaching and tools() each built the per-provider cache-control marker independently, and tools() diverged: it wrote { copilot: { cacheControl } } where the copilot SDK actually reads copilot_cache_control, so the tool breakpoint was silently ineffective on github-copilot. Extract cacheMarkerOptions() as the single source of truth for the per-provider marker shapes (anthropic/openrouter cacheControl, bedrock cachePoint, openaiCompatible cache_control, copilot copilot_cache_control, alibaba). message() keeps attaching the full object and remapping; tools() resolves one SDK-keyed namespace via cacheMarkerFor(). Add a copilot tools test locking in the shape. --- packages/opencode/src/provider/transform.ts | 78 +++++++++++-------- .../opencode/test/provider/transform.test.ts | 12 +++ 2 files changed, 58 insertions(+), 32 deletions(-) diff --git a/packages/opencode/src/provider/transform.ts b/packages/opencode/src/provider/transform.ts index 109e9b536..f8837d964 100644 --- a/packages/opencode/src/provider/transform.ts +++ b/packages/opencode/src/provider/transform.ts @@ -257,31 +257,50 @@ function supportsCacheMarkers(model: Provider.Model): boolean { return false } -function applyCaching(msgs: ModelMessage[], model: Provider.Model): ModelMessage[] { - // Only Anthropic and OpenRouter expose a cache-control TTL in their AI SDK; - // the other providers ignore an unknown `ttl` field, so we only thread it - // into those two branches. Default (unset) stays the provider 5m default. +// The cache-control marker shape differs per provider/SDK. This is the single +// source of truth, keyed by the SDK provider-options namespace. `applyCaching` +// attaches the whole object (keyed by stored providerID) and lets `message()` +// remap the active provider's namespace to its SDK key; `tools()` (which +// bypasses that remap) resolves a single namespace up front via `cacheMarkerFor`. +// Only Anthropic and OpenRouter expose a TTL in their AI SDK — the others ignore +// an unknown `ttl`, so we thread it only there. +function cacheMarkerOptions(model: Provider.Model) { const ttl = model.cachePromptTTL === "1h" ? { ttl: "1h" as const } : {} - const providerOptions = { - anthropic: { - cacheControl: { type: "ephemeral", ...ttl }, - }, - openrouter: { - cacheControl: { type: "ephemeral", ...ttl }, - }, - bedrock: { - cachePoint: { type: "default" }, - }, - openaiCompatible: { - cache_control: { type: "ephemeral" }, - }, - copilot: { - copilot_cache_control: { type: "ephemeral" }, - }, - alibaba: { - cacheControl: { type: "ephemeral" }, - }, + return { + anthropic: { cacheControl: { type: "ephemeral", ...ttl } }, + openrouter: { cacheControl: { type: "ephemeral", ...ttl } }, + bedrock: { cachePoint: { type: "default" } }, + openaiCompatible: { cache_control: { type: "ephemeral" } }, + copilot: { copilot_cache_control: { type: "ephemeral" } }, + alibaba: { cacheControl: { type: "ephemeral" } }, } +} + +// Resolve the marker for a single model, already keyed under the SDK namespace +// the AI SDK expects — i.e. the remap that `message()` performs for messages, +// done up front. Used by `tools()`, whose tools never pass through `message()`. +// Returns undefined for providers that don't take inline markers (callers gate +// on `supportsCacheMarkers` first, so this is just a type-safety fallback). +function cacheMarkerFor(model: Provider.Model): Record | undefined { + const shapes = cacheMarkerOptions(model) + const ns: keyof typeof shapes | undefined = + model.api.npm === "@ai-sdk/anthropic" || model.api.npm === "@ai-sdk/google-vertex/anthropic" + ? "anthropic" + : model.api.npm === "@openrouter/ai-sdk-provider" + ? "openrouter" + : model.api.npm === "@ai-sdk/amazon-bedrock" + ? "bedrock" + : model.api.npm === "@ai-sdk/github-copilot" + ? "copilot" + : model.api.npm === "@ai-sdk/alibaba" + ? "alibaba" + : undefined + if (!ns) return undefined + return { [ns]: shapes[ns] } +} + +function applyCaching(msgs: ModelMessage[], model: Provider.Model): ModelMessage[] { + const providerOptions = cacheMarkerOptions(model) // Strategy: prefix caching is longest-common-prefix based with a backward // lookback window (Anthropic walks back ~20 blocks from a breakpoint to find @@ -470,21 +489,16 @@ export function message(msgs: ModelMessage[], model: Provider.Model, options: Re // `tools` → `system` → `messages`, so marking the LAST tool caches the entire // tool-schema block (often several KB) as a stable prefix that sits in front of // the system + message caches. Tools are passed to the SDK separately from -// `message()` and never go through its providerID→SDK-key remap, so we write -// the marker under the SDK key directly. Tool registration order is stable +// `message()` and never go through its providerID→SDK-key remap, so we resolve +// the SDK-keyed marker via `cacheMarkerFor`. Tool registration order is stable // (insertion order of the tools record), so "last tool" is deterministic. export function tools>(tools: T, model: Provider.Model): T { if (!supportsCacheMarkers(model)) return tools + const marker = cacheMarkerFor(model) + if (!marker) return tools const names = Object.keys(tools) if (names.length === 0) return tools - const ttl = model.cachePromptTTL === "1h" ? { ttl: "1h" as const } : {} - const marker = iife(() => { - if (model.api.npm === "@ai-sdk/amazon-bedrock") return { bedrock: { cachePoint: { type: "default" } } } - const key = sdkKey(model.api.npm) ?? model.providerID - return { [key]: { cacheControl: { type: "ephemeral", ...ttl } } } - }) - const last = tools[names[names.length - 1]] last.providerOptions = mergeDeep(last.providerOptions ?? {}, marker) return tools diff --git a/packages/opencode/test/provider/transform.test.ts b/packages/opencode/test/provider/transform.test.ts index d0e9d3bcd..9b249af21 100644 --- a/packages/opencode/test/provider/transform.test.ts +++ b/packages/opencode/test/provider/transform.test.ts @@ -2284,6 +2284,18 @@ describe("ProviderTransform.tools", () => { expect(result.bash.providerOptions).toEqual({ bedrock: { cachePoint: { type: "default" } } }) }) + test("uses copilot_cache_control shape for github-copilot", () => { + const model = createModel({ + providerID: "github-copilot", + api: { id: "claude-sonnet-4", url: "https://api.githubcopilot.com", npm: "@ai-sdk/github-copilot" }, + }) + const tools = { read: {}, bash: {} } as Record + + const result = ProviderTransform.tools(tools, model) + + expect(result.bash.providerOptions).toEqual({ copilot: { copilot_cache_control: { type: "ephemeral" } } }) + }) + test("no marker for providers that do not support cache markers", () => { const model = createModel({ providerID: "openai",